1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005
3 Free Software Foundation, Inc.
4 Contributed by James E. Wilson <wilson@cygnus.com> and
5 David Mosberger <davidm@hpl.hp.com>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
26 #include "coretypes.h"
31 #include "hard-reg-set.h"
33 #include "insn-config.h"
34 #include "conditions.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
46 #include "sched-int.h"
49 #include "target-def.h"
52 #include "langhooks.h"
53 #include "cfglayout.h"
54 #include "tree-gimple.h"
56 /* This is used for communication between ASM_OUTPUT_LABEL and
57 ASM_OUTPUT_LABELREF. */
58 int ia64_asm_output_label
= 0;
60 /* Define the information needed to generate branch and scc insns. This is
61 stored from the compare operation. */
62 struct rtx_def
* ia64_compare_op0
;
63 struct rtx_def
* ia64_compare_op1
;
65 /* Register names for ia64_expand_prologue. */
66 static const char * const ia64_reg_numbers
[96] =
67 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
68 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
69 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
70 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
71 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
72 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
73 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
74 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
75 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
76 "r104","r105","r106","r107","r108","r109","r110","r111",
77 "r112","r113","r114","r115","r116","r117","r118","r119",
78 "r120","r121","r122","r123","r124","r125","r126","r127"};
80 /* ??? These strings could be shared with REGISTER_NAMES. */
81 static const char * const ia64_input_reg_names
[8] =
82 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
84 /* ??? These strings could be shared with REGISTER_NAMES. */
85 static const char * const ia64_local_reg_names
[80] =
86 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
87 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
88 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
89 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
90 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
91 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
92 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
93 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
94 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
95 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
97 /* ??? These strings could be shared with REGISTER_NAMES. */
98 static const char * const ia64_output_reg_names
[8] =
99 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
101 /* Which cpu are we scheduling for. */
102 enum processor_type ia64_tune
= PROCESSOR_ITANIUM2
;
104 /* Determines whether we run our final scheduling pass or not. We always
105 avoid the normal second scheduling pass. */
106 static int ia64_flag_schedule_insns2
;
108 /* Determines whether we run variable tracking in machine dependent
110 static int ia64_flag_var_tracking
;
112 /* Variables which are this size or smaller are put in the sdata/sbss
115 unsigned int ia64_section_threshold
;
117 /* The following variable is used by the DFA insn scheduler. The value is
118 TRUE if we do insn bundling instead of insn scheduling. */
121 /* Structure to be filled in by ia64_compute_frame_size with register
122 save masks and offsets for the current function. */
124 struct ia64_frame_info
126 HOST_WIDE_INT total_size
; /* size of the stack frame, not including
127 the caller's scratch area. */
128 HOST_WIDE_INT spill_cfa_off
; /* top of the reg spill area from the cfa. */
129 HOST_WIDE_INT spill_size
; /* size of the gr/br/fr spill area. */
130 HOST_WIDE_INT extra_spill_size
; /* size of spill area for others. */
131 HARD_REG_SET mask
; /* mask of saved registers. */
132 unsigned int gr_used_mask
; /* mask of registers in use as gr spill
133 registers or long-term scratches. */
134 int n_spilled
; /* number of spilled registers. */
135 int reg_fp
; /* register for fp. */
136 int reg_save_b0
; /* save register for b0. */
137 int reg_save_pr
; /* save register for prs. */
138 int reg_save_ar_pfs
; /* save register for ar.pfs. */
139 int reg_save_ar_unat
; /* save register for ar.unat. */
140 int reg_save_ar_lc
; /* save register for ar.lc. */
141 int reg_save_gp
; /* save register for gp. */
142 int n_input_regs
; /* number of input registers used. */
143 int n_local_regs
; /* number of local registers used. */
144 int n_output_regs
; /* number of output registers used. */
145 int n_rotate_regs
; /* number of rotating registers used. */
147 char need_regstk
; /* true if a .regstk directive needed. */
148 char initialized
; /* true if the data is finalized. */
151 /* Current frame information calculated by ia64_compute_frame_size. */
152 static struct ia64_frame_info current_frame_info
;
154 static int ia64_first_cycle_multipass_dfa_lookahead (void);
155 static void ia64_dependencies_evaluation_hook (rtx
, rtx
);
156 static void ia64_init_dfa_pre_cycle_insn (void);
157 static rtx
ia64_dfa_pre_cycle_insn (void);
158 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx
);
159 static int ia64_dfa_new_cycle (FILE *, int, rtx
, int, int, int *);
160 static rtx
gen_tls_get_addr (void);
161 static rtx
gen_thread_pointer (void);
162 static int find_gr_spill (int);
163 static int next_scratch_gr_reg (void);
164 static void mark_reg_gr_used_mask (rtx
, void *);
165 static void ia64_compute_frame_size (HOST_WIDE_INT
);
166 static void setup_spill_pointers (int, rtx
, HOST_WIDE_INT
);
167 static void finish_spill_pointers (void);
168 static rtx
spill_restore_mem (rtx
, HOST_WIDE_INT
);
169 static void do_spill (rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
, rtx
);
170 static void do_restore (rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
);
171 static rtx
gen_movdi_x (rtx
, rtx
, rtx
);
172 static rtx
gen_fr_spill_x (rtx
, rtx
, rtx
);
173 static rtx
gen_fr_restore_x (rtx
, rtx
, rtx
);
175 static enum machine_mode
hfa_element_mode (tree
, bool);
176 static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
,
178 static bool ia64_pass_by_reference (CUMULATIVE_ARGS
*, enum machine_mode
,
180 static int ia64_arg_partial_bytes (CUMULATIVE_ARGS
*, enum machine_mode
,
182 static bool ia64_function_ok_for_sibcall (tree
, tree
);
183 static bool ia64_return_in_memory (tree
, tree
);
184 static bool ia64_rtx_costs (rtx
, int, int, int *);
185 static void fix_range (const char *);
186 static bool ia64_handle_option (size_t, const char *, int);
187 static struct machine_function
* ia64_init_machine_status (void);
188 static void emit_insn_group_barriers (FILE *);
189 static void emit_all_insn_group_barriers (FILE *);
190 static void final_emit_insn_group_barriers (FILE *);
191 static void emit_predicate_relation_info (void);
192 static void ia64_reorg (void);
193 static bool ia64_in_small_data_p (tree
);
194 static void process_epilogue (void);
195 static int process_set (FILE *, rtx
);
197 static bool ia64_assemble_integer (rtx
, unsigned int, int);
198 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT
);
199 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT
);
200 static void ia64_output_function_end_prologue (FILE *);
202 static int ia64_issue_rate (void);
203 static int ia64_adjust_cost (rtx
, rtx
, rtx
, int);
204 static void ia64_sched_init (FILE *, int, int);
205 static void ia64_sched_finish (FILE *, int);
206 static int ia64_dfa_sched_reorder (FILE *, int, rtx
*, int *, int, int);
207 static int ia64_sched_reorder (FILE *, int, rtx
*, int *, int);
208 static int ia64_sched_reorder2 (FILE *, int, rtx
*, int *, int);
209 static int ia64_variable_issue (FILE *, int, rtx
, int);
211 static struct bundle_state
*get_free_bundle_state (void);
212 static void free_bundle_state (struct bundle_state
*);
213 static void initiate_bundle_states (void);
214 static void finish_bundle_states (void);
215 static unsigned bundle_state_hash (const void *);
216 static int bundle_state_eq_p (const void *, const void *);
217 static int insert_bundle_state (struct bundle_state
*);
218 static void initiate_bundle_state_table (void);
219 static void finish_bundle_state_table (void);
220 static int try_issue_nops (struct bundle_state
*, int);
221 static int try_issue_insn (struct bundle_state
*, rtx
);
222 static void issue_nops_and_insn (struct bundle_state
*, int, rtx
, int, int);
223 static int get_max_pos (state_t
);
224 static int get_template (state_t
, int);
226 static rtx
get_next_important_insn (rtx
, rtx
);
227 static void bundling (FILE *, int, rtx
, rtx
);
229 static void ia64_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
230 HOST_WIDE_INT
, tree
);
231 static void ia64_file_start (void);
233 static void ia64_select_rtx_section (enum machine_mode
, rtx
,
234 unsigned HOST_WIDE_INT
);
235 static void ia64_output_dwarf_dtprel (FILE *, int, rtx
)
237 static void ia64_rwreloc_select_section (tree
, int, unsigned HOST_WIDE_INT
)
239 static void ia64_rwreloc_unique_section (tree
, int)
241 static void ia64_rwreloc_select_rtx_section (enum machine_mode
, rtx
,
242 unsigned HOST_WIDE_INT
)
244 static unsigned int ia64_section_type_flags (tree
, const char *, int);
245 static void ia64_hpux_add_extern_decl (tree decl
)
247 static void ia64_hpux_file_end (void)
249 static void ia64_init_libfuncs (void)
251 static void ia64_hpux_init_libfuncs (void)
253 static void ia64_sysv4_init_libfuncs (void)
255 static void ia64_vms_init_libfuncs (void)
258 static tree
ia64_handle_model_attribute (tree
*, tree
, tree
, int, bool *);
259 static void ia64_encode_section_info (tree
, rtx
, int);
260 static rtx
ia64_struct_value_rtx (tree
, int);
261 static tree
ia64_gimplify_va_arg (tree
, tree
, tree
*, tree
*);
262 static bool ia64_scalar_mode_supported_p (enum machine_mode mode
);
263 static bool ia64_vector_mode_supported_p (enum machine_mode mode
);
264 static bool ia64_cannot_force_const_mem (rtx
);
266 /* Table of valid machine attributes. */
267 static const struct attribute_spec ia64_attribute_table
[] =
269 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
270 { "syscall_linkage", 0, 0, false, true, true, NULL
},
271 { "model", 1, 1, true, false, false, ia64_handle_model_attribute
},
272 { NULL
, 0, 0, false, false, false, NULL
}
275 /* Initialize the GCC target structure. */
276 #undef TARGET_ATTRIBUTE_TABLE
277 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
279 #undef TARGET_INIT_BUILTINS
280 #define TARGET_INIT_BUILTINS ia64_init_builtins
282 #undef TARGET_EXPAND_BUILTIN
283 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
285 #undef TARGET_ASM_BYTE_OP
286 #define TARGET_ASM_BYTE_OP "\tdata1\t"
287 #undef TARGET_ASM_ALIGNED_HI_OP
288 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
289 #undef TARGET_ASM_ALIGNED_SI_OP
290 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
291 #undef TARGET_ASM_ALIGNED_DI_OP
292 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
293 #undef TARGET_ASM_UNALIGNED_HI_OP
294 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
295 #undef TARGET_ASM_UNALIGNED_SI_OP
296 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
297 #undef TARGET_ASM_UNALIGNED_DI_OP
298 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
299 #undef TARGET_ASM_INTEGER
300 #define TARGET_ASM_INTEGER ia64_assemble_integer
302 #undef TARGET_ASM_FUNCTION_PROLOGUE
303 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
304 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
305 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
306 #undef TARGET_ASM_FUNCTION_EPILOGUE
307 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
309 #undef TARGET_IN_SMALL_DATA_P
310 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
312 #undef TARGET_SCHED_ADJUST_COST
313 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
314 #undef TARGET_SCHED_ISSUE_RATE
315 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
316 #undef TARGET_SCHED_VARIABLE_ISSUE
317 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
318 #undef TARGET_SCHED_INIT
319 #define TARGET_SCHED_INIT ia64_sched_init
320 #undef TARGET_SCHED_FINISH
321 #define TARGET_SCHED_FINISH ia64_sched_finish
322 #undef TARGET_SCHED_REORDER
323 #define TARGET_SCHED_REORDER ia64_sched_reorder
324 #undef TARGET_SCHED_REORDER2
325 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
327 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
328 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
330 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
331 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
333 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
334 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
335 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
336 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
338 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
339 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
340 ia64_first_cycle_multipass_dfa_lookahead_guard
342 #undef TARGET_SCHED_DFA_NEW_CYCLE
343 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
345 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
346 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
347 #undef TARGET_PASS_BY_REFERENCE
348 #define TARGET_PASS_BY_REFERENCE ia64_pass_by_reference
349 #undef TARGET_ARG_PARTIAL_BYTES
350 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
352 #undef TARGET_ASM_OUTPUT_MI_THUNK
353 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
354 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
355 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
357 #undef TARGET_ASM_FILE_START
358 #define TARGET_ASM_FILE_START ia64_file_start
360 #undef TARGET_RTX_COSTS
361 #define TARGET_RTX_COSTS ia64_rtx_costs
362 #undef TARGET_ADDRESS_COST
363 #define TARGET_ADDRESS_COST hook_int_rtx_0
365 #undef TARGET_MACHINE_DEPENDENT_REORG
366 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
368 #undef TARGET_ENCODE_SECTION_INFO
369 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
371 #undef TARGET_SECTION_TYPE_FLAGS
372 #define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
375 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
376 #define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
379 /* ??? ABI doesn't allow us to define this. */
381 #undef TARGET_PROMOTE_FUNCTION_ARGS
382 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
385 /* ??? ABI doesn't allow us to define this. */
387 #undef TARGET_PROMOTE_FUNCTION_RETURN
388 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
391 /* ??? Investigate. */
393 #undef TARGET_PROMOTE_PROTOTYPES
394 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
397 #undef TARGET_STRUCT_VALUE_RTX
398 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
399 #undef TARGET_RETURN_IN_MEMORY
400 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
401 #undef TARGET_SETUP_INCOMING_VARARGS
402 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
403 #undef TARGET_STRICT_ARGUMENT_NAMING
404 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
405 #undef TARGET_MUST_PASS_IN_STACK
406 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
408 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
409 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
411 #undef TARGET_UNWIND_EMIT
412 #define TARGET_UNWIND_EMIT process_for_unwind_directive
414 #undef TARGET_SCALAR_MODE_SUPPORTED_P
415 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
416 #undef TARGET_VECTOR_MODE_SUPPORTED_P
417 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
419 /* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
420 in an order different from the specified program order. */
421 #undef TARGET_RELAXED_ORDERING
422 #define TARGET_RELAXED_ORDERING true
424 #undef TARGET_DEFAULT_TARGET_FLAGS
425 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
426 #undef TARGET_HANDLE_OPTION
427 #define TARGET_HANDLE_OPTION ia64_handle_option
429 #undef TARGET_CANNOT_FORCE_CONST_MEM
430 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
432 struct gcc_target targetm
= TARGET_INITIALIZER
;
436 ADDR_AREA_NORMAL
, /* normal address area */
437 ADDR_AREA_SMALL
/* addressable by "addl" (-2MB < addr < 2MB) */
441 static GTY(()) tree small_ident1
;
442 static GTY(()) tree small_ident2
;
447 if (small_ident1
== 0)
449 small_ident1
= get_identifier ("small");
450 small_ident2
= get_identifier ("__small__");
454 /* Retrieve the address area that has been chosen for the given decl. */
456 static ia64_addr_area
457 ia64_get_addr_area (tree decl
)
461 model_attr
= lookup_attribute ("model", DECL_ATTRIBUTES (decl
));
467 id
= TREE_VALUE (TREE_VALUE (model_attr
));
468 if (id
== small_ident1
|| id
== small_ident2
)
469 return ADDR_AREA_SMALL
;
471 return ADDR_AREA_NORMAL
;
475 ia64_handle_model_attribute (tree
*node
, tree name
, tree args
,
476 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
478 ia64_addr_area addr_area
= ADDR_AREA_NORMAL
;
480 tree arg
, decl
= *node
;
483 arg
= TREE_VALUE (args
);
484 if (arg
== small_ident1
|| arg
== small_ident2
)
486 addr_area
= ADDR_AREA_SMALL
;
490 warning (OPT_Wattributes
, "invalid argument of %qs attribute",
491 IDENTIFIER_POINTER (name
));
492 *no_add_attrs
= true;
495 switch (TREE_CODE (decl
))
498 if ((DECL_CONTEXT (decl
) && TREE_CODE (DECL_CONTEXT (decl
))
500 && !TREE_STATIC (decl
))
502 error ("%Jan address area attribute cannot be specified for "
503 "local variables", decl
, decl
);
504 *no_add_attrs
= true;
506 area
= ia64_get_addr_area (decl
);
507 if (area
!= ADDR_AREA_NORMAL
&& addr_area
!= area
)
509 error ("%Jaddress area of '%s' conflicts with previous "
510 "declaration", decl
, decl
);
511 *no_add_attrs
= true;
516 error ("%Jaddress area attribute cannot be specified for functions",
518 *no_add_attrs
= true;
522 warning (OPT_Wattributes
, "%qs attribute ignored",
523 IDENTIFIER_POINTER (name
));
524 *no_add_attrs
= true;
532 ia64_encode_addr_area (tree decl
, rtx symbol
)
536 flags
= SYMBOL_REF_FLAGS (symbol
);
537 switch (ia64_get_addr_area (decl
))
539 case ADDR_AREA_NORMAL
: break;
540 case ADDR_AREA_SMALL
: flags
|= SYMBOL_FLAG_SMALL_ADDR
; break;
541 default: gcc_unreachable ();
543 SYMBOL_REF_FLAGS (symbol
) = flags
;
547 ia64_encode_section_info (tree decl
, rtx rtl
, int first
)
549 default_encode_section_info (decl
, rtl
, first
);
551 /* Careful not to prod global register variables. */
552 if (TREE_CODE (decl
) == VAR_DECL
553 && GET_CODE (DECL_RTL (decl
)) == MEM
554 && GET_CODE (XEXP (DECL_RTL (decl
), 0)) == SYMBOL_REF
555 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
)))
556 ia64_encode_addr_area (decl
, XEXP (rtl
, 0));
559 /* Implement CONST_OK_FOR_LETTER_P. */
562 ia64_const_ok_for_letter_p (HOST_WIDE_INT value
, char c
)
567 return CONST_OK_FOR_I (value
);
569 return CONST_OK_FOR_J (value
);
571 return CONST_OK_FOR_K (value
);
573 return CONST_OK_FOR_L (value
);
575 return CONST_OK_FOR_M (value
);
577 return CONST_OK_FOR_N (value
);
579 return CONST_OK_FOR_O (value
);
581 return CONST_OK_FOR_P (value
);
587 /* Implement CONST_DOUBLE_OK_FOR_LETTER_P. */
590 ia64_const_double_ok_for_letter_p (rtx value
, char c
)
595 return CONST_DOUBLE_OK_FOR_G (value
);
601 /* Implement EXTRA_CONSTRAINT. */
604 ia64_extra_constraint (rtx value
, char c
)
609 /* Non-volatile memory for FP_REG loads/stores. */
610 return memory_operand(value
, VOIDmode
) && !MEM_VOLATILE_P (value
);
613 /* 1..4 for shladd arguments. */
614 return (GET_CODE (value
) == CONST_INT
615 && INTVAL (value
) >= 1 && INTVAL (value
) <= 4);
618 /* Non-post-inc memory for asms and other unsavory creatures. */
619 return (GET_CODE (value
) == MEM
620 && GET_RTX_CLASS (GET_CODE (XEXP (value
, 0))) != RTX_AUTOINC
621 && (reload_in_progress
|| memory_operand (value
, VOIDmode
)));
624 /* Symbol ref to small-address-area. */
625 return small_addr_symbolic_operand (value
, VOIDmode
);
629 return value
== CONST0_RTX (GET_MODE (value
));
632 /* An integer vector, such that conversion to an integer yields a
633 value appropriate for an integer 'J' constraint. */
634 if (GET_CODE (value
) == CONST_VECTOR
635 && GET_MODE_CLASS (GET_MODE (value
)) == MODE_VECTOR_INT
)
637 value
= simplify_subreg (DImode
, value
, GET_MODE (value
), 0);
638 return ia64_const_ok_for_letter_p (INTVAL (value
), 'J');
643 /* A V2SF vector containing elements that satisfy 'G'. */
645 (GET_CODE (value
) == CONST_VECTOR
646 && GET_MODE (value
) == V2SFmode
647 && ia64_const_double_ok_for_letter_p (XVECEXP (value
, 0, 0), 'G')
648 && ia64_const_double_ok_for_letter_p (XVECEXP (value
, 0, 1), 'G'));
655 /* Return 1 if the operands of a move are ok. */
658 ia64_move_ok (rtx dst
, rtx src
)
660 /* If we're under init_recog_no_volatile, we'll not be able to use
661 memory_operand. So check the code directly and don't worry about
662 the validity of the underlying address, which should have been
663 checked elsewhere anyway. */
664 if (GET_CODE (dst
) != MEM
)
666 if (GET_CODE (src
) == MEM
)
668 if (register_operand (src
, VOIDmode
))
671 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
672 if (INTEGRAL_MODE_P (GET_MODE (dst
)))
673 return src
== const0_rtx
;
675 return GET_CODE (src
) == CONST_DOUBLE
&& CONST_DOUBLE_OK_FOR_G (src
);
679 addp4_optimize_ok (rtx op1
, rtx op2
)
681 return (basereg_operand (op1
, GET_MODE(op1
)) !=
682 basereg_operand (op2
, GET_MODE(op2
)));
685 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
686 Return the length of the field, or <= 0 on failure. */
689 ia64_depz_field_mask (rtx rop
, rtx rshift
)
691 unsigned HOST_WIDE_INT op
= INTVAL (rop
);
692 unsigned HOST_WIDE_INT shift
= INTVAL (rshift
);
694 /* Get rid of the zero bits we're shifting in. */
697 /* We must now have a solid block of 1's at bit 0. */
698 return exact_log2 (op
+ 1);
701 /* Return the TLS model to use for ADDR. */
703 static enum tls_model
704 tls_symbolic_operand_type (rtx addr
)
706 enum tls_model tls_kind
= 0;
708 if (GET_CODE (addr
) == CONST
)
710 if (GET_CODE (XEXP (addr
, 0)) == PLUS
711 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
)
712 tls_kind
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr
, 0), 0));
714 else if (GET_CODE (addr
) == SYMBOL_REF
)
715 tls_kind
= SYMBOL_REF_TLS_MODEL (addr
);
720 /* Return true if X is a constant that is valid for some immediate
721 field in an instruction. */
724 ia64_legitimate_constant_p (rtx x
)
726 switch (GET_CODE (x
))
733 if (GET_MODE (x
) == VOIDmode
)
735 return CONST_DOUBLE_OK_FOR_G (x
);
739 return tls_symbolic_operand_type (x
) == 0;
743 enum machine_mode mode
= GET_MODE (x
);
745 if (mode
== V2SFmode
)
746 return ia64_extra_constraint (x
, 'Y');
748 return (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
749 && GET_MODE_SIZE (mode
) <= 8);
757 /* Don't allow TLS addresses to get spilled to memory. */
760 ia64_cannot_force_const_mem (rtx x
)
762 return tls_symbolic_operand_type (x
) != 0;
765 /* Expand a symbolic constant load. */
768 ia64_expand_load_address (rtx dest
, rtx src
)
770 gcc_assert (GET_CODE (dest
) == REG
);
772 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
773 having to pointer-extend the value afterward. Other forms of address
774 computation below are also more natural to compute as 64-bit quantities.
775 If we've been given an SImode destination register, change it. */
776 if (GET_MODE (dest
) != Pmode
)
777 dest
= gen_rtx_REG_offset (dest
, Pmode
, REGNO (dest
), 0);
781 if (small_addr_symbolic_operand (src
, VOIDmode
))
785 emit_insn (gen_load_gprel64 (dest
, src
));
786 else if (GET_CODE (src
) == SYMBOL_REF
&& SYMBOL_REF_FUNCTION_P (src
))
787 emit_insn (gen_load_fptr (dest
, src
));
788 else if (sdata_symbolic_operand (src
, VOIDmode
))
789 emit_insn (gen_load_gprel (dest
, src
));
792 HOST_WIDE_INT addend
= 0;
795 /* We did split constant offsets in ia64_expand_move, and we did try
796 to keep them split in move_operand, but we also allowed reload to
797 rematerialize arbitrary constants rather than spill the value to
798 the stack and reload it. So we have to be prepared here to split
800 if (GET_CODE (src
) == CONST
)
802 HOST_WIDE_INT hi
, lo
;
804 hi
= INTVAL (XEXP (XEXP (src
, 0), 1));
805 lo
= ((hi
& 0x3fff) ^ 0x2000) - 0x2000;
811 src
= plus_constant (XEXP (XEXP (src
, 0), 0), hi
);
815 tmp
= gen_rtx_HIGH (Pmode
, src
);
816 tmp
= gen_rtx_PLUS (Pmode
, tmp
, pic_offset_table_rtx
);
817 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
819 tmp
= gen_rtx_LO_SUM (Pmode
, dest
, src
);
820 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
824 tmp
= gen_rtx_PLUS (Pmode
, dest
, GEN_INT (addend
));
825 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
832 static GTY(()) rtx gen_tls_tga
;
834 gen_tls_get_addr (void)
837 gen_tls_tga
= init_one_libfunc ("__tls_get_addr");
841 static GTY(()) rtx thread_pointer_rtx
;
843 gen_thread_pointer (void)
845 if (!thread_pointer_rtx
)
846 thread_pointer_rtx
= gen_rtx_REG (Pmode
, 13);
847 return thread_pointer_rtx
;
851 ia64_expand_tls_address (enum tls_model tls_kind
, rtx op0
, rtx op1
,
852 HOST_WIDE_INT addend
)
854 rtx tga_op1
, tga_op2
, tga_ret
, tga_eqv
, tmp
, insns
;
855 rtx orig_op0
= op0
, orig_op1
= op1
;
856 HOST_WIDE_INT addend_lo
, addend_hi
;
858 addend_lo
= ((addend
& 0x3fff) ^ 0x2000) - 0x2000;
859 addend_hi
= addend
- addend_lo
;
863 case TLS_MODEL_GLOBAL_DYNAMIC
:
866 tga_op1
= gen_reg_rtx (Pmode
);
867 emit_insn (gen_load_dtpmod (tga_op1
, op1
));
869 tga_op2
= gen_reg_rtx (Pmode
);
870 emit_insn (gen_load_dtprel (tga_op2
, op1
));
872 tga_ret
= emit_library_call_value (gen_tls_get_addr (), NULL_RTX
,
873 LCT_CONST
, Pmode
, 2, tga_op1
,
874 Pmode
, tga_op2
, Pmode
);
876 insns
= get_insns ();
879 if (GET_MODE (op0
) != Pmode
)
881 emit_libcall_block (insns
, op0
, tga_ret
, op1
);
884 case TLS_MODEL_LOCAL_DYNAMIC
:
885 /* ??? This isn't the completely proper way to do local-dynamic
886 If the call to __tls_get_addr is used only by a single symbol,
887 then we should (somehow) move the dtprel to the second arg
888 to avoid the extra add. */
891 tga_op1
= gen_reg_rtx (Pmode
);
892 emit_insn (gen_load_dtpmod (tga_op1
, op1
));
894 tga_op2
= const0_rtx
;
896 tga_ret
= emit_library_call_value (gen_tls_get_addr (), NULL_RTX
,
897 LCT_CONST
, Pmode
, 2, tga_op1
,
898 Pmode
, tga_op2
, Pmode
);
900 insns
= get_insns ();
903 tga_eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
905 tmp
= gen_reg_rtx (Pmode
);
906 emit_libcall_block (insns
, tmp
, tga_ret
, tga_eqv
);
908 if (!register_operand (op0
, Pmode
))
909 op0
= gen_reg_rtx (Pmode
);
912 emit_insn (gen_load_dtprel (op0
, op1
));
913 emit_insn (gen_adddi3 (op0
, tmp
, op0
));
916 emit_insn (gen_add_dtprel (op0
, op1
, tmp
));
919 case TLS_MODEL_INITIAL_EXEC
:
920 op1
= plus_constant (op1
, addend_hi
);
923 tmp
= gen_reg_rtx (Pmode
);
924 emit_insn (gen_load_tprel (tmp
, op1
));
926 if (!register_operand (op0
, Pmode
))
927 op0
= gen_reg_rtx (Pmode
);
928 emit_insn (gen_adddi3 (op0
, tmp
, gen_thread_pointer ()));
931 case TLS_MODEL_LOCAL_EXEC
:
932 if (!register_operand (op0
, Pmode
))
933 op0
= gen_reg_rtx (Pmode
);
939 emit_insn (gen_load_tprel (op0
, op1
));
940 emit_insn (gen_adddi3 (op0
, op0
, gen_thread_pointer ()));
943 emit_insn (gen_add_tprel (op0
, op1
, gen_thread_pointer ()));
951 op0
= expand_simple_binop (Pmode
, PLUS
, op0
, GEN_INT (addend
),
952 orig_op0
, 1, OPTAB_DIRECT
);
955 if (GET_MODE (orig_op0
) == Pmode
)
957 return gen_lowpart (GET_MODE (orig_op0
), op0
);
961 ia64_expand_move (rtx op0
, rtx op1
)
963 enum machine_mode mode
= GET_MODE (op0
);
965 if (!reload_in_progress
&& !reload_completed
&& !ia64_move_ok (op0
, op1
))
966 op1
= force_reg (mode
, op1
);
968 if ((mode
== Pmode
|| mode
== ptr_mode
) && symbolic_operand (op1
, VOIDmode
))
970 HOST_WIDE_INT addend
= 0;
971 enum tls_model tls_kind
;
974 if (GET_CODE (op1
) == CONST
975 && GET_CODE (XEXP (op1
, 0)) == PLUS
976 && GET_CODE (XEXP (XEXP (op1
, 0), 1)) == CONST_INT
)
978 addend
= INTVAL (XEXP (XEXP (op1
, 0), 1));
979 sym
= XEXP (XEXP (op1
, 0), 0);
982 tls_kind
= tls_symbolic_operand_type (sym
);
984 return ia64_expand_tls_address (tls_kind
, op0
, sym
, addend
);
986 if (any_offset_symbol_operand (sym
, mode
))
988 else if (aligned_offset_symbol_operand (sym
, mode
))
990 HOST_WIDE_INT addend_lo
, addend_hi
;
992 addend_lo
= ((addend
& 0x3fff) ^ 0x2000) - 0x2000;
993 addend_hi
= addend
- addend_lo
;
997 op1
= plus_constant (sym
, addend_hi
);
1006 if (reload_completed
)
1008 /* We really should have taken care of this offset earlier. */
1009 gcc_assert (addend
== 0);
1010 if (ia64_expand_load_address (op0
, op1
))
1016 rtx subtarget
= no_new_pseudos
? op0
: gen_reg_rtx (mode
);
1018 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
, op1
));
1020 op1
= expand_simple_binop (mode
, PLUS
, subtarget
,
1021 GEN_INT (addend
), op0
, 1, OPTAB_DIRECT
);
1030 /* Split a move from OP1 to OP0 conditional on COND. */
1033 ia64_emit_cond_move (rtx op0
, rtx op1
, rtx cond
)
1035 rtx insn
, first
= get_last_insn ();
1037 emit_move_insn (op0
, op1
);
1039 for (insn
= get_last_insn (); insn
!= first
; insn
= PREV_INSN (insn
))
1041 PATTERN (insn
) = gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
),
1045 /* Split a post-reload TImode or TFmode reference into two DImode
1046 components. This is made extra difficult by the fact that we do
1047 not get any scratch registers to work with, because reload cannot
1048 be prevented from giving us a scratch that overlaps the register
1049 pair involved. So instead, when addressing memory, we tweak the
1050 pointer register up and back down with POST_INCs. Or up and not
1051 back down when we can get away with it.
1053 REVERSED is true when the loads must be done in reversed order
1054 (high word first) for correctness. DEAD is true when the pointer
1055 dies with the second insn we generate and therefore the second
1056 address must not carry a postmodify.
1058 May return an insn which is to be emitted after the moves. */
1061 ia64_split_tmode (rtx out
[2], rtx in
, bool reversed
, bool dead
)
1065 switch (GET_CODE (in
))
1068 out
[reversed
] = gen_rtx_REG (DImode
, REGNO (in
));
1069 out
[!reversed
] = gen_rtx_REG (DImode
, REGNO (in
) + 1);
1074 /* Cannot occur reversed. */
1075 gcc_assert (!reversed
);
1077 if (GET_MODE (in
) != TFmode
)
1078 split_double (in
, &out
[0], &out
[1]);
1080 /* split_double does not understand how to split a TFmode
1081 quantity into a pair of DImode constants. */
1084 unsigned HOST_WIDE_INT p
[2];
1085 long l
[4]; /* TFmode is 128 bits */
1087 REAL_VALUE_FROM_CONST_DOUBLE (r
, in
);
1088 real_to_target (l
, &r
, TFmode
);
1090 if (FLOAT_WORDS_BIG_ENDIAN
)
1092 p
[0] = (((unsigned HOST_WIDE_INT
) l
[0]) << 32) + l
[1];
1093 p
[1] = (((unsigned HOST_WIDE_INT
) l
[2]) << 32) + l
[3];
1097 p
[0] = (((unsigned HOST_WIDE_INT
) l
[3]) << 32) + l
[2];
1098 p
[1] = (((unsigned HOST_WIDE_INT
) l
[1]) << 32) + l
[0];
1100 out
[0] = GEN_INT (p
[0]);
1101 out
[1] = GEN_INT (p
[1]);
1107 rtx base
= XEXP (in
, 0);
1110 switch (GET_CODE (base
))
1115 out
[0] = adjust_automodify_address
1116 (in
, DImode
, gen_rtx_POST_INC (Pmode
, base
), 0);
1117 out
[1] = adjust_automodify_address
1118 (in
, DImode
, dead
? 0 : gen_rtx_POST_DEC (Pmode
, base
), 8);
1122 /* Reversal requires a pre-increment, which can only
1123 be done as a separate insn. */
1124 emit_insn (gen_adddi3 (base
, base
, GEN_INT (8)));
1125 out
[0] = adjust_automodify_address
1126 (in
, DImode
, gen_rtx_POST_DEC (Pmode
, base
), 8);
1127 out
[1] = adjust_address (in
, DImode
, 0);
1132 gcc_assert (!reversed
&& !dead
);
1134 /* Just do the increment in two steps. */
1135 out
[0] = adjust_automodify_address (in
, DImode
, 0, 0);
1136 out
[1] = adjust_automodify_address (in
, DImode
, 0, 8);
1140 gcc_assert (!reversed
&& !dead
);
1142 /* Add 8, subtract 24. */
1143 base
= XEXP (base
, 0);
1144 out
[0] = adjust_automodify_address
1145 (in
, DImode
, gen_rtx_POST_INC (Pmode
, base
), 0);
1146 out
[1] = adjust_automodify_address
1148 gen_rtx_POST_MODIFY (Pmode
, base
, plus_constant (base
, -24)),
1153 gcc_assert (!reversed
&& !dead
);
1155 /* Extract and adjust the modification. This case is
1156 trickier than the others, because we might have an
1157 index register, or we might have a combined offset that
1158 doesn't fit a signed 9-bit displacement field. We can
1159 assume the incoming expression is already legitimate. */
1160 offset
= XEXP (base
, 1);
1161 base
= XEXP (base
, 0);
1163 out
[0] = adjust_automodify_address
1164 (in
, DImode
, gen_rtx_POST_INC (Pmode
, base
), 0);
1166 if (GET_CODE (XEXP (offset
, 1)) == REG
)
1168 /* Can't adjust the postmodify to match. Emit the
1169 original, then a separate addition insn. */
1170 out
[1] = adjust_automodify_address (in
, DImode
, 0, 8);
1171 fixup
= gen_adddi3 (base
, base
, GEN_INT (-8));
1175 gcc_assert (GET_CODE (XEXP (offset
, 1)) == CONST_INT
);
1176 if (INTVAL (XEXP (offset
, 1)) < -256 + 8)
1178 /* Again the postmodify cannot be made to match,
1179 but in this case it's more efficient to get rid
1180 of the postmodify entirely and fix up with an
1182 out
[1] = adjust_automodify_address (in
, DImode
, base
, 8);
1184 (base
, base
, GEN_INT (INTVAL (XEXP (offset
, 1)) - 8));
1188 /* Combined offset still fits in the displacement field.
1189 (We cannot overflow it at the high end.) */
1190 out
[1] = adjust_automodify_address
1191 (in
, DImode
, gen_rtx_POST_MODIFY
1192 (Pmode
, base
, gen_rtx_PLUS
1194 GEN_INT (INTVAL (XEXP (offset
, 1)) - 8))),
1213 /* Split a TImode or TFmode move instruction after reload.
1214 This is used by *movtf_internal and *movti_internal. */
1216 ia64_split_tmode_move (rtx operands
[])
1218 rtx in
[2], out
[2], insn
;
1221 bool reversed
= false;
1223 /* It is possible for reload to decide to overwrite a pointer with
1224 the value it points to. In that case we have to do the loads in
1225 the appropriate order so that the pointer is not destroyed too
1226 early. Also we must not generate a postmodify for that second
1227 load, or rws_access_regno will die. */
1228 if (GET_CODE (operands
[1]) == MEM
1229 && reg_overlap_mentioned_p (operands
[0], operands
[1]))
1231 rtx base
= XEXP (operands
[1], 0);
1232 while (GET_CODE (base
) != REG
)
1233 base
= XEXP (base
, 0);
1235 if (REGNO (base
) == REGNO (operands
[0]))
1239 /* Another reason to do the moves in reversed order is if the first
1240 element of the target register pair is also the second element of
1241 the source register pair. */
1242 if (GET_CODE (operands
[0]) == REG
&& GET_CODE (operands
[1]) == REG
1243 && REGNO (operands
[0]) == REGNO (operands
[1]) + 1)
1246 fixup
[0] = ia64_split_tmode (in
, operands
[1], reversed
, dead
);
1247 fixup
[1] = ia64_split_tmode (out
, operands
[0], reversed
, dead
);
1249 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1250 if (GET_CODE (EXP) == MEM \
1251 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1252 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1253 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1254 REG_NOTES (INSN) = gen_rtx_EXPR_LIST (REG_INC, \
1255 XEXP (XEXP (EXP, 0), 0), \
1258 insn
= emit_insn (gen_rtx_SET (VOIDmode
, out
[0], in
[0]));
1259 MAYBE_ADD_REG_INC_NOTE (insn
, in
[0]);
1260 MAYBE_ADD_REG_INC_NOTE (insn
, out
[0]);
1262 insn
= emit_insn (gen_rtx_SET (VOIDmode
, out
[1], in
[1]));
1263 MAYBE_ADD_REG_INC_NOTE (insn
, in
[1]);
1264 MAYBE_ADD_REG_INC_NOTE (insn
, out
[1]);
1267 emit_insn (fixup
[0]);
1269 emit_insn (fixup
[1]);
1271 #undef MAYBE_ADD_REG_INC_NOTE
1274 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1275 through memory plus an extra GR scratch register. Except that you can
1276 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1277 SECONDARY_RELOAD_CLASS, but not both.
1279 We got into problems in the first place by allowing a construct like
1280 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1281 This solution attempts to prevent this situation from occurring. When
1282 we see something like the above, we spill the inner register to memory. */
1285 spill_xfmode_operand (rtx in
, int force
)
1287 if (GET_CODE (in
) == SUBREG
1288 && GET_MODE (SUBREG_REG (in
)) == TImode
1289 && GET_CODE (SUBREG_REG (in
)) == REG
)
1291 rtx memt
= assign_stack_temp (TImode
, 16, 0);
1292 emit_move_insn (memt
, SUBREG_REG (in
));
1293 return adjust_address (memt
, XFmode
, 0);
1295 else if (force
&& GET_CODE (in
) == REG
)
1297 rtx memx
= assign_stack_temp (XFmode
, 16, 0);
1298 emit_move_insn (memx
, in
);
1305 /* Emit comparison instruction if necessary, returning the expression
1306 that holds the compare result in the proper mode. */
1308 static GTY(()) rtx cmptf_libfunc
;
1311 ia64_expand_compare (enum rtx_code code
, enum machine_mode mode
)
1313 rtx op0
= ia64_compare_op0
, op1
= ia64_compare_op1
;
1316 /* If we have a BImode input, then we already have a compare result, and
1317 do not need to emit another comparison. */
1318 if (GET_MODE (op0
) == BImode
)
1320 gcc_assert ((code
== NE
|| code
== EQ
) && op1
== const0_rtx
);
1323 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1324 magic number as its third argument, that indicates what to do.
1325 The return value is an integer to be compared against zero. */
1326 else if (GET_MODE (op0
) == TFmode
)
1329 QCMP_INV
= 1, /* Raise FP_INVALID on SNaN as a side effect. */
1335 enum rtx_code ncode
;
1338 gcc_assert (cmptf_libfunc
&& GET_MODE (op1
) == TFmode
);
1341 /* 1 = equal, 0 = not equal. Equality operators do
1342 not raise FP_INVALID when given an SNaN operand. */
1343 case EQ
: magic
= QCMP_EQ
; ncode
= NE
; break;
1344 case NE
: magic
= QCMP_EQ
; ncode
= EQ
; break;
1345 /* isunordered() from C99. */
1346 case UNORDERED
: magic
= QCMP_UNORD
; ncode
= NE
; break;
1347 case ORDERED
: magic
= QCMP_UNORD
; ncode
= EQ
; break;
1348 /* Relational operators raise FP_INVALID when given
1350 case LT
: magic
= QCMP_LT
|QCMP_INV
; ncode
= NE
; break;
1351 case LE
: magic
= QCMP_LT
|QCMP_EQ
|QCMP_INV
; ncode
= NE
; break;
1352 case GT
: magic
= QCMP_GT
|QCMP_INV
; ncode
= NE
; break;
1353 case GE
: magic
= QCMP_GT
|QCMP_EQ
|QCMP_INV
; ncode
= NE
; break;
1354 /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1355 Expanders for buneq etc. weuld have to be added to ia64.md
1356 for this to be useful. */
1357 default: gcc_unreachable ();
1362 ret
= emit_library_call_value (cmptf_libfunc
, 0, LCT_CONST
, DImode
, 3,
1363 op0
, TFmode
, op1
, TFmode
,
1364 GEN_INT (magic
), DImode
);
1365 cmp
= gen_reg_rtx (BImode
);
1366 emit_insn (gen_rtx_SET (VOIDmode
, cmp
,
1367 gen_rtx_fmt_ee (ncode
, BImode
,
1370 insns
= get_insns ();
1373 emit_libcall_block (insns
, cmp
, cmp
,
1374 gen_rtx_fmt_ee (code
, BImode
, op0
, op1
));
1379 cmp
= gen_reg_rtx (BImode
);
1380 emit_insn (gen_rtx_SET (VOIDmode
, cmp
,
1381 gen_rtx_fmt_ee (code
, BImode
, op0
, op1
)));
1385 return gen_rtx_fmt_ee (code
, mode
, cmp
, const0_rtx
);
1388 /* Generate an integral vector comparison. */
1391 ia64_expand_vecint_compare (enum rtx_code code
, enum machine_mode mode
,
1392 rtx dest
, rtx op0
, rtx op1
)
1394 bool negate
= false;
1429 rtx w0h
, w0l
, w1h
, w1l
, ch
, cl
;
1430 enum machine_mode wmode
;
1431 rtx (*unpack_l
) (rtx
, rtx
, rtx
);
1432 rtx (*unpack_h
) (rtx
, rtx
, rtx
);
1433 rtx (*pack
) (rtx
, rtx
, rtx
);
1435 /* We don't have native unsigned comparisons, but we can generate
1436 them better than generic code can. */
1438 gcc_assert (mode
!= V2SImode
);
1443 pack
= gen_pack2_sss
;
1444 unpack_l
= gen_unpack1_l
;
1445 unpack_h
= gen_unpack1_h
;
1450 pack
= gen_pack4_sss
;
1451 unpack_l
= gen_unpack2_l
;
1452 unpack_h
= gen_unpack2_h
;
1459 /* Unpack into wider vectors, zero extending the elements. */
1461 w0l
= gen_reg_rtx (wmode
);
1462 w0h
= gen_reg_rtx (wmode
);
1463 w1l
= gen_reg_rtx (wmode
);
1464 w1h
= gen_reg_rtx (wmode
);
1465 emit_insn (unpack_l (gen_lowpart (mode
, w0l
), op0
, CONST0_RTX (mode
)));
1466 emit_insn (unpack_h (gen_lowpart (mode
, w0h
), op0
, CONST0_RTX (mode
)));
1467 emit_insn (unpack_l (gen_lowpart (mode
, w1l
), op1
, CONST0_RTX (mode
)));
1468 emit_insn (unpack_h (gen_lowpart (mode
, w1h
), op1
, CONST0_RTX (mode
)));
1470 /* Compare in the wider mode. */
1472 cl
= gen_reg_rtx (wmode
);
1473 ch
= gen_reg_rtx (wmode
);
1474 code
= signed_condition (code
);
1475 ia64_expand_vecint_compare (code
, wmode
, cl
, w0l
, w1l
);
1476 negate
= ia64_expand_vecint_compare (code
, wmode
, ch
, w0h
, w1h
);
1478 /* Repack into a single narrower vector. */
1480 emit_insn (pack (dest
, cl
, ch
));
1488 x
= gen_rtx_fmt_ee (code
, mode
, op0
, op1
);
1489 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
1495 ia64_expand_vcondu_v2si (enum rtx_code code
, rtx operands
[])
1497 rtx dl
, dh
, bl
, bh
, op1l
, op1h
, op2l
, op2h
, op4l
, op4h
, op5l
, op5h
, x
;
1499 /* In this case, we extract the two SImode quantities and generate
1500 normal comparisons for each of them. */
1502 op1l
= gen_lowpart (SImode
, operands
[1]);
1503 op2l
= gen_lowpart (SImode
, operands
[2]);
1504 op4l
= gen_lowpart (SImode
, operands
[4]);
1505 op5l
= gen_lowpart (SImode
, operands
[5]);
1507 op1h
= gen_reg_rtx (SImode
);
1508 op2h
= gen_reg_rtx (SImode
);
1509 op4h
= gen_reg_rtx (SImode
);
1510 op5h
= gen_reg_rtx (SImode
);
1512 emit_insn (gen_lshrdi3 (gen_lowpart (DImode
, op1h
),
1513 gen_lowpart (DImode
, operands
[1]), GEN_INT (32)));
1514 emit_insn (gen_lshrdi3 (gen_lowpart (DImode
, op2h
),
1515 gen_lowpart (DImode
, operands
[2]), GEN_INT (32)));
1516 emit_insn (gen_lshrdi3 (gen_lowpart (DImode
, op4h
),
1517 gen_lowpart (DImode
, operands
[4]), GEN_INT (32)));
1518 emit_insn (gen_lshrdi3 (gen_lowpart (DImode
, op5h
),
1519 gen_lowpart (DImode
, operands
[5]), GEN_INT (32)));
1521 bl
= gen_reg_rtx (BImode
);
1522 x
= gen_rtx_fmt_ee (code
, BImode
, op4l
, op5l
);
1523 emit_insn (gen_rtx_SET (VOIDmode
, bl
, x
));
1525 bh
= gen_reg_rtx (BImode
);
1526 x
= gen_rtx_fmt_ee (code
, BImode
, op4h
, op5h
);
1527 emit_insn (gen_rtx_SET (VOIDmode
, bh
, x
));
1529 /* With the results of the comparisons, emit conditional moves. */
1531 dl
= gen_reg_rtx (SImode
);
1532 x
= gen_rtx_IF_THEN_ELSE (SImode
, bl
, op1l
, op2l
);
1533 emit_insn (gen_rtx_SET (VOIDmode
, dl
, x
));
1535 dh
= gen_reg_rtx (SImode
);
1536 x
= gen_rtx_IF_THEN_ELSE (SImode
, bh
, op1h
, op2h
);
1537 emit_insn (gen_rtx_SET (VOIDmode
, dh
, x
));
1539 /* Merge the two partial results back into a vector. */
1541 x
= gen_rtx_VEC_CONCAT (V2SImode
, dl
, dh
);
1542 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], x
));
1545 /* Emit an integral vector conditional move. */
1548 ia64_expand_vecint_cmov (rtx operands
[])
1550 enum machine_mode mode
= GET_MODE (operands
[0]);
1551 enum rtx_code code
= GET_CODE (operands
[3]);
1555 /* Since we don't have unsigned V2SImode comparisons, it's more efficient
1556 to special-case them entirely. */
1557 if (mode
== V2SImode
1558 && (code
== GTU
|| code
== GEU
|| code
== LEU
|| code
== LTU
))
1560 ia64_expand_vcondu_v2si (code
, operands
);
1564 cmp
= gen_reg_rtx (mode
);
1565 negate
= ia64_expand_vecint_compare (code
, mode
, cmp
,
1566 operands
[4], operands
[5]);
1568 ot
= operands
[1+negate
];
1569 of
= operands
[2-negate
];
1571 if (ot
== CONST0_RTX (mode
))
1573 if (of
== CONST0_RTX (mode
))
1575 emit_move_insn (operands
[0], ot
);
1579 x
= gen_rtx_NOT (mode
, cmp
);
1580 x
= gen_rtx_AND (mode
, x
, of
);
1581 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], x
));
1583 else if (of
== CONST0_RTX (mode
))
1585 x
= gen_rtx_AND (mode
, cmp
, ot
);
1586 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], x
));
1592 t
= gen_reg_rtx (mode
);
1593 x
= gen_rtx_AND (mode
, cmp
, operands
[1+negate
]);
1594 emit_insn (gen_rtx_SET (VOIDmode
, t
, x
));
1596 f
= gen_reg_rtx (mode
);
1597 x
= gen_rtx_NOT (mode
, cmp
);
1598 x
= gen_rtx_AND (mode
, x
, operands
[2-negate
]);
1599 emit_insn (gen_rtx_SET (VOIDmode
, f
, x
));
1601 x
= gen_rtx_IOR (mode
, t
, f
);
1602 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], x
));
1606 /* Emit an integral vector min or max operation. Return true if all done. */
1609 ia64_expand_vecint_minmax (enum rtx_code code
, enum machine_mode mode
,
1614 /* These four combinations are supported directly. */
1615 if (mode
== V8QImode
&& (code
== UMIN
|| code
== UMAX
))
1617 if (mode
== V4HImode
&& (code
== SMIN
|| code
== SMAX
))
1620 /* Everything else implemented via vector comparisons. */
1621 xops
[0] = operands
[0];
1622 xops
[4] = xops
[1] = operands
[1];
1623 xops
[5] = xops
[2] = operands
[2];
1642 xops
[3] = gen_rtx_fmt_ee (code
, VOIDmode
, operands
[1], operands
[2]);
1644 ia64_expand_vecint_cmov (xops
);
1648 /* Emit the appropriate sequence for a call. */
1651 ia64_expand_call (rtx retval
, rtx addr
, rtx nextarg ATTRIBUTE_UNUSED
,
1656 addr
= XEXP (addr
, 0);
1657 addr
= convert_memory_address (DImode
, addr
);
1658 b0
= gen_rtx_REG (DImode
, R_BR (0));
1660 /* ??? Should do this for functions known to bind local too. */
1661 if (TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
1664 insn
= gen_sibcall_nogp (addr
);
1666 insn
= gen_call_nogp (addr
, b0
);
1668 insn
= gen_call_value_nogp (retval
, addr
, b0
);
1669 insn
= emit_call_insn (insn
);
1674 insn
= gen_sibcall_gp (addr
);
1676 insn
= gen_call_gp (addr
, b0
);
1678 insn
= gen_call_value_gp (retval
, addr
, b0
);
1679 insn
= emit_call_insn (insn
);
1681 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), pic_offset_table_rtx
);
1685 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), b0
);
1689 ia64_reload_gp (void)
1693 if (current_frame_info
.reg_save_gp
)
1694 tmp
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_gp
);
1697 HOST_WIDE_INT offset
;
1699 offset
= (current_frame_info
.spill_cfa_off
1700 + current_frame_info
.spill_size
);
1701 if (frame_pointer_needed
)
1703 tmp
= hard_frame_pointer_rtx
;
1708 tmp
= stack_pointer_rtx
;
1709 offset
= current_frame_info
.total_size
- offset
;
1712 if (CONST_OK_FOR_I (offset
))
1713 emit_insn (gen_adddi3 (pic_offset_table_rtx
,
1714 tmp
, GEN_INT (offset
)));
1717 emit_move_insn (pic_offset_table_rtx
, GEN_INT (offset
));
1718 emit_insn (gen_adddi3 (pic_offset_table_rtx
,
1719 pic_offset_table_rtx
, tmp
));
1722 tmp
= gen_rtx_MEM (DImode
, pic_offset_table_rtx
);
1725 emit_move_insn (pic_offset_table_rtx
, tmp
);
1729 ia64_split_call (rtx retval
, rtx addr
, rtx retaddr
, rtx scratch_r
,
1730 rtx scratch_b
, int noreturn_p
, int sibcall_p
)
1733 bool is_desc
= false;
1735 /* If we find we're calling through a register, then we're actually
1736 calling through a descriptor, so load up the values. */
1737 if (REG_P (addr
) && GR_REGNO_P (REGNO (addr
)))
1742 /* ??? We are currently constrained to *not* use peep2, because
1743 we can legitimately change the global lifetime of the GP
1744 (in the form of killing where previously live). This is
1745 because a call through a descriptor doesn't use the previous
1746 value of the GP, while a direct call does, and we do not
1747 commit to either form until the split here.
1749 That said, this means that we lack precise life info for
1750 whether ADDR is dead after this call. This is not terribly
1751 important, since we can fix things up essentially for free
1752 with the POST_DEC below, but it's nice to not use it when we
1753 can immediately tell it's not necessary. */
1754 addr_dead_p
= ((noreturn_p
|| sibcall_p
1755 || TEST_HARD_REG_BIT (regs_invalidated_by_call
,
1757 && !FUNCTION_ARG_REGNO_P (REGNO (addr
)));
1759 /* Load the code address into scratch_b. */
1760 tmp
= gen_rtx_POST_INC (Pmode
, addr
);
1761 tmp
= gen_rtx_MEM (Pmode
, tmp
);
1762 emit_move_insn (scratch_r
, tmp
);
1763 emit_move_insn (scratch_b
, scratch_r
);
1765 /* Load the GP address. If ADDR is not dead here, then we must
1766 revert the change made above via the POST_INCREMENT. */
1768 tmp
= gen_rtx_POST_DEC (Pmode
, addr
);
1771 tmp
= gen_rtx_MEM (Pmode
, tmp
);
1772 emit_move_insn (pic_offset_table_rtx
, tmp
);
1779 insn
= gen_sibcall_nogp (addr
);
1781 insn
= gen_call_value_nogp (retval
, addr
, retaddr
);
1783 insn
= gen_call_nogp (addr
, retaddr
);
1784 emit_call_insn (insn
);
1786 if ((!TARGET_CONST_GP
|| is_desc
) && !noreturn_p
&& !sibcall_p
)
1790 /* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
1792 This differs from the generic code in that we know about the zero-extending
1793 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
1794 also know that ld.acq+cmpxchg.rel equals a full barrier.
1796 The loop we want to generate looks like
1801 new_reg = cmp_reg op val;
1802 cmp_reg = compare-and-swap(mem, old_reg, new_reg)
1803 if (cmp_reg != old_reg)
1806 Note that we only do the plain load from memory once. Subsequent
1807 iterations use the value loaded by the compare-and-swap pattern. */
1810 ia64_expand_atomic_op (enum rtx_code code
, rtx mem
, rtx val
,
1811 rtx old_dst
, rtx new_dst
)
1813 enum machine_mode mode
= GET_MODE (mem
);
1814 rtx old_reg
, new_reg
, cmp_reg
, ar_ccv
, label
;
1815 enum insn_code icode
;
1817 /* Special case for using fetchadd. */
1818 if ((mode
== SImode
|| mode
== DImode
) && fetchadd_operand (val
, mode
))
1821 old_dst
= gen_reg_rtx (mode
);
1823 emit_insn (gen_memory_barrier ());
1826 icode
= CODE_FOR_fetchadd_acq_si
;
1828 icode
= CODE_FOR_fetchadd_acq_di
;
1829 emit_insn (GEN_FCN (icode
) (old_dst
, mem
, val
));
1833 new_reg
= expand_simple_binop (mode
, PLUS
, old_dst
, val
, new_dst
,
1835 if (new_reg
!= new_dst
)
1836 emit_move_insn (new_dst
, new_reg
);
1841 /* Because of the volatile mem read, we get an ld.acq, which is the
1842 front half of the full barrier. The end half is the cmpxchg.rel. */
1843 gcc_assert (MEM_VOLATILE_P (mem
));
1845 old_reg
= gen_reg_rtx (DImode
);
1846 cmp_reg
= gen_reg_rtx (DImode
);
1847 label
= gen_label_rtx ();
1851 val
= simplify_gen_subreg (DImode
, val
, mode
, 0);
1852 emit_insn (gen_extend_insn (cmp_reg
, mem
, DImode
, mode
, 1));
1855 emit_move_insn (cmp_reg
, mem
);
1859 ar_ccv
= gen_rtx_REG (DImode
, AR_CCV_REGNUM
);
1860 emit_move_insn (old_reg
, cmp_reg
);
1861 emit_move_insn (ar_ccv
, cmp_reg
);
1864 emit_move_insn (old_dst
, gen_lowpart (mode
, cmp_reg
));
1869 new_reg
= expand_simple_unop (DImode
, NOT
, new_reg
, NULL_RTX
, true);
1872 new_reg
= expand_simple_binop (DImode
, code
, new_reg
, val
, NULL_RTX
,
1873 true, OPTAB_DIRECT
);
1876 new_reg
= gen_lowpart (mode
, new_reg
);
1878 emit_move_insn (new_dst
, new_reg
);
1882 case QImode
: icode
= CODE_FOR_cmpxchg_rel_qi
; break;
1883 case HImode
: icode
= CODE_FOR_cmpxchg_rel_hi
; break;
1884 case SImode
: icode
= CODE_FOR_cmpxchg_rel_si
; break;
1885 case DImode
: icode
= CODE_FOR_cmpxchg_rel_di
; break;
1890 emit_insn (GEN_FCN (icode
) (cmp_reg
, mem
, ar_ccv
, new_reg
));
1892 emit_cmp_and_jump_insns (cmp_reg
, old_reg
, EQ
, NULL
, DImode
, true, label
);
1895 /* Begin the assembly file. */
1898 ia64_file_start (void)
1900 /* Variable tracking should be run after all optimizations which change order
1901 of insns. It also needs a valid CFG. This can't be done in
1902 ia64_override_options, because flag_var_tracking is finalized after
1904 ia64_flag_var_tracking
= flag_var_tracking
;
1905 flag_var_tracking
= 0;
1907 default_file_start ();
1908 emit_safe_across_calls ();
1912 emit_safe_across_calls (void)
1914 unsigned int rs
, re
;
1921 while (rs
< 64 && call_used_regs
[PR_REG (rs
)])
1925 for (re
= rs
+ 1; re
< 64 && ! call_used_regs
[PR_REG (re
)]; re
++)
1929 fputs ("\t.pred.safe_across_calls ", asm_out_file
);
1933 fputc (',', asm_out_file
);
1935 fprintf (asm_out_file
, "p%u", rs
);
1937 fprintf (asm_out_file
, "p%u-p%u", rs
, re
- 1);
1941 fputc ('\n', asm_out_file
);
1944 /* Helper function for ia64_compute_frame_size: find an appropriate general
1945 register to spill some special register to. SPECIAL_SPILL_MASK contains
1946 bits in GR0 to GR31 that have already been allocated by this routine.
1947 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1950 find_gr_spill (int try_locals
)
1954 /* If this is a leaf function, first try an otherwise unused
1955 call-clobbered register. */
1956 if (current_function_is_leaf
)
1958 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
1959 if (! regs_ever_live
[regno
]
1960 && call_used_regs
[regno
]
1961 && ! fixed_regs
[regno
]
1962 && ! global_regs
[regno
]
1963 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0)
1965 current_frame_info
.gr_used_mask
|= 1 << regno
;
1972 regno
= current_frame_info
.n_local_regs
;
1973 /* If there is a frame pointer, then we can't use loc79, because
1974 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1975 reg_name switching code in ia64_expand_prologue. */
1976 if (regno
< (80 - frame_pointer_needed
))
1978 current_frame_info
.n_local_regs
= regno
+ 1;
1979 return LOC_REG (0) + regno
;
1983 /* Failed to find a general register to spill to. Must use stack. */
1987 /* In order to make for nice schedules, we try to allocate every temporary
1988 to a different register. We must of course stay away from call-saved,
1989 fixed, and global registers. We must also stay away from registers
1990 allocated in current_frame_info.gr_used_mask, since those include regs
1991 used all through the prologue.
1993 Any register allocated here must be used immediately. The idea is to
1994 aid scheduling, not to solve data flow problems. */
1996 static int last_scratch_gr_reg
;
1999 next_scratch_gr_reg (void)
2003 for (i
= 0; i
< 32; ++i
)
2005 regno
= (last_scratch_gr_reg
+ i
+ 1) & 31;
2006 if (call_used_regs
[regno
]
2007 && ! fixed_regs
[regno
]
2008 && ! global_regs
[regno
]
2009 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0)
2011 last_scratch_gr_reg
= regno
;
2016 /* There must be _something_ available. */
2020 /* Helper function for ia64_compute_frame_size, called through
2021 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2024 mark_reg_gr_used_mask (rtx reg
, void *data ATTRIBUTE_UNUSED
)
2026 unsigned int regno
= REGNO (reg
);
2029 unsigned int i
, n
= hard_regno_nregs
[regno
][GET_MODE (reg
)];
2030 for (i
= 0; i
< n
; ++i
)
2031 current_frame_info
.gr_used_mask
|= 1 << (regno
+ i
);
2035 /* Returns the number of bytes offset between the frame pointer and the stack
2036 pointer for the current function. SIZE is the number of bytes of space
2037 needed for local variables. */
2040 ia64_compute_frame_size (HOST_WIDE_INT size
)
2042 HOST_WIDE_INT total_size
;
2043 HOST_WIDE_INT spill_size
= 0;
2044 HOST_WIDE_INT extra_spill_size
= 0;
2045 HOST_WIDE_INT pretend_args_size
;
2048 int spilled_gr_p
= 0;
2049 int spilled_fr_p
= 0;
2053 if (current_frame_info
.initialized
)
2056 memset (¤t_frame_info
, 0, sizeof current_frame_info
);
2057 CLEAR_HARD_REG_SET (mask
);
2059 /* Don't allocate scratches to the return register. */
2060 diddle_return_value (mark_reg_gr_used_mask
, NULL
);
2062 /* Don't allocate scratches to the EH scratch registers. */
2063 if (cfun
->machine
->ia64_eh_epilogue_sp
)
2064 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_sp
, NULL
);
2065 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
2066 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_bsp
, NULL
);
2068 /* Find the size of the register stack frame. We have only 80 local
2069 registers, because we reserve 8 for the inputs and 8 for the
2072 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2073 since we'll be adjusting that down later. */
2074 regno
= LOC_REG (78) + ! frame_pointer_needed
;
2075 for (; regno
>= LOC_REG (0); regno
--)
2076 if (regs_ever_live
[regno
])
2078 current_frame_info
.n_local_regs
= regno
- LOC_REG (0) + 1;
2080 /* For functions marked with the syscall_linkage attribute, we must mark
2081 all eight input registers as in use, so that locals aren't visible to
2084 if (cfun
->machine
->n_varargs
> 0
2085 || lookup_attribute ("syscall_linkage",
2086 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
2087 current_frame_info
.n_input_regs
= 8;
2090 for (regno
= IN_REG (7); regno
>= IN_REG (0); regno
--)
2091 if (regs_ever_live
[regno
])
2093 current_frame_info
.n_input_regs
= regno
- IN_REG (0) + 1;
2096 for (regno
= OUT_REG (7); regno
>= OUT_REG (0); regno
--)
2097 if (regs_ever_live
[regno
])
2099 i
= regno
- OUT_REG (0) + 1;
2101 #ifndef PROFILE_HOOK
2102 /* When -p profiling, we need one output register for the mcount argument.
2103 Likewise for -a profiling for the bb_init_func argument. For -ax
2104 profiling, we need two output registers for the two bb_init_trace_func
2106 if (current_function_profile
)
2109 current_frame_info
.n_output_regs
= i
;
2111 /* ??? No rotating register support yet. */
2112 current_frame_info
.n_rotate_regs
= 0;
2114 /* Discover which registers need spilling, and how much room that
2115 will take. Begin with floating point and general registers,
2116 which will always wind up on the stack. */
2118 for (regno
= FR_REG (2); regno
<= FR_REG (127); regno
++)
2119 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
2121 SET_HARD_REG_BIT (mask
, regno
);
2127 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
2128 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
2130 SET_HARD_REG_BIT (mask
, regno
);
2136 for (regno
= BR_REG (1); regno
<= BR_REG (7); regno
++)
2137 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
2139 SET_HARD_REG_BIT (mask
, regno
);
2144 /* Now come all special registers that might get saved in other
2145 general registers. */
2147 if (frame_pointer_needed
)
2149 current_frame_info
.reg_fp
= find_gr_spill (1);
2150 /* If we did not get a register, then we take LOC79. This is guaranteed
2151 to be free, even if regs_ever_live is already set, because this is
2152 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2153 as we don't count loc79 above. */
2154 if (current_frame_info
.reg_fp
== 0)
2156 current_frame_info
.reg_fp
= LOC_REG (79);
2157 current_frame_info
.n_local_regs
++;
2161 if (! current_function_is_leaf
)
2163 /* Emit a save of BR0 if we call other functions. Do this even
2164 if this function doesn't return, as EH depends on this to be
2165 able to unwind the stack. */
2166 SET_HARD_REG_BIT (mask
, BR_REG (0));
2168 current_frame_info
.reg_save_b0
= find_gr_spill (1);
2169 if (current_frame_info
.reg_save_b0
== 0)
2175 /* Similarly for ar.pfs. */
2176 SET_HARD_REG_BIT (mask
, AR_PFS_REGNUM
);
2177 current_frame_info
.reg_save_ar_pfs
= find_gr_spill (1);
2178 if (current_frame_info
.reg_save_ar_pfs
== 0)
2180 extra_spill_size
+= 8;
2184 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2185 registers are clobbered, so we fall back to the stack. */
2186 current_frame_info
.reg_save_gp
2187 = (current_function_calls_setjmp
? 0 : find_gr_spill (1));
2188 if (current_frame_info
.reg_save_gp
== 0)
2190 SET_HARD_REG_BIT (mask
, GR_REG (1));
2197 if (regs_ever_live
[BR_REG (0)] && ! call_used_regs
[BR_REG (0)])
2199 SET_HARD_REG_BIT (mask
, BR_REG (0));
2204 if (regs_ever_live
[AR_PFS_REGNUM
])
2206 SET_HARD_REG_BIT (mask
, AR_PFS_REGNUM
);
2207 current_frame_info
.reg_save_ar_pfs
= find_gr_spill (1);
2208 if (current_frame_info
.reg_save_ar_pfs
== 0)
2210 extra_spill_size
+= 8;
2216 /* Unwind descriptor hackery: things are most efficient if we allocate
2217 consecutive GR save registers for RP, PFS, FP in that order. However,
2218 it is absolutely critical that FP get the only hard register that's
2219 guaranteed to be free, so we allocated it first. If all three did
2220 happen to be allocated hard regs, and are consecutive, rearrange them
2221 into the preferred order now. */
2222 if (current_frame_info
.reg_fp
!= 0
2223 && current_frame_info
.reg_save_b0
== current_frame_info
.reg_fp
+ 1
2224 && current_frame_info
.reg_save_ar_pfs
== current_frame_info
.reg_fp
+ 2)
2226 current_frame_info
.reg_save_b0
= current_frame_info
.reg_fp
;
2227 current_frame_info
.reg_save_ar_pfs
= current_frame_info
.reg_fp
+ 1;
2228 current_frame_info
.reg_fp
= current_frame_info
.reg_fp
+ 2;
2231 /* See if we need to store the predicate register block. */
2232 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
2233 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
2235 if (regno
<= PR_REG (63))
2237 SET_HARD_REG_BIT (mask
, PR_REG (0));
2238 current_frame_info
.reg_save_pr
= find_gr_spill (1);
2239 if (current_frame_info
.reg_save_pr
== 0)
2241 extra_spill_size
+= 8;
2245 /* ??? Mark them all as used so that register renaming and such
2246 are free to use them. */
2247 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
2248 regs_ever_live
[regno
] = 1;
2251 /* If we're forced to use st8.spill, we're forced to save and restore
2252 ar.unat as well. The check for existing liveness allows inline asm
2253 to touch ar.unat. */
2254 if (spilled_gr_p
|| cfun
->machine
->n_varargs
2255 || regs_ever_live
[AR_UNAT_REGNUM
])
2257 regs_ever_live
[AR_UNAT_REGNUM
] = 1;
2258 SET_HARD_REG_BIT (mask
, AR_UNAT_REGNUM
);
2259 current_frame_info
.reg_save_ar_unat
= find_gr_spill (spill_size
== 0);
2260 if (current_frame_info
.reg_save_ar_unat
== 0)
2262 extra_spill_size
+= 8;
2267 if (regs_ever_live
[AR_LC_REGNUM
])
2269 SET_HARD_REG_BIT (mask
, AR_LC_REGNUM
);
2270 current_frame_info
.reg_save_ar_lc
= find_gr_spill (spill_size
== 0);
2271 if (current_frame_info
.reg_save_ar_lc
== 0)
2273 extra_spill_size
+= 8;
2278 /* If we have an odd number of words of pretend arguments written to
2279 the stack, then the FR save area will be unaligned. We round the
2280 size of this area up to keep things 16 byte aligned. */
2282 pretend_args_size
= IA64_STACK_ALIGN (current_function_pretend_args_size
);
2284 pretend_args_size
= current_function_pretend_args_size
;
2286 total_size
= (spill_size
+ extra_spill_size
+ size
+ pretend_args_size
2287 + current_function_outgoing_args_size
);
2288 total_size
= IA64_STACK_ALIGN (total_size
);
2290 /* We always use the 16-byte scratch area provided by the caller, but
2291 if we are a leaf function, there's no one to which we need to provide
2293 if (current_function_is_leaf
)
2294 total_size
= MAX (0, total_size
- 16);
2296 current_frame_info
.total_size
= total_size
;
2297 current_frame_info
.spill_cfa_off
= pretend_args_size
- 16;
2298 current_frame_info
.spill_size
= spill_size
;
2299 current_frame_info
.extra_spill_size
= extra_spill_size
;
2300 COPY_HARD_REG_SET (current_frame_info
.mask
, mask
);
2301 current_frame_info
.n_spilled
= n_spilled
;
2302 current_frame_info
.initialized
= reload_completed
;
2305 /* Compute the initial difference between the specified pair of registers. */
2308 ia64_initial_elimination_offset (int from
, int to
)
2310 HOST_WIDE_INT offset
;
2312 ia64_compute_frame_size (get_frame_size ());
2315 case FRAME_POINTER_REGNUM
:
2318 case HARD_FRAME_POINTER_REGNUM
:
2319 if (current_function_is_leaf
)
2320 offset
= -current_frame_info
.total_size
;
2322 offset
= -(current_frame_info
.total_size
2323 - current_function_outgoing_args_size
- 16);
2326 case STACK_POINTER_REGNUM
:
2327 if (current_function_is_leaf
)
2330 offset
= 16 + current_function_outgoing_args_size
;
2338 case ARG_POINTER_REGNUM
:
2339 /* Arguments start above the 16 byte save area, unless stdarg
2340 in which case we store through the 16 byte save area. */
2343 case HARD_FRAME_POINTER_REGNUM
:
2344 offset
= 16 - current_function_pretend_args_size
;
2347 case STACK_POINTER_REGNUM
:
2348 offset
= (current_frame_info
.total_size
2349 + 16 - current_function_pretend_args_size
);
2364 /* If there are more than a trivial number of register spills, we use
2365 two interleaved iterators so that we can get two memory references
2368 In order to simplify things in the prologue and epilogue expanders,
2369 we use helper functions to fix up the memory references after the
2370 fact with the appropriate offsets to a POST_MODIFY memory mode.
2371 The following data structure tracks the state of the two iterators
2372 while insns are being emitted. */
2374 struct spill_fill_data
2376 rtx init_after
; /* point at which to emit initializations */
2377 rtx init_reg
[2]; /* initial base register */
2378 rtx iter_reg
[2]; /* the iterator registers */
2379 rtx
*prev_addr
[2]; /* address of last memory use */
2380 rtx prev_insn
[2]; /* the insn corresponding to prev_addr */
2381 HOST_WIDE_INT prev_off
[2]; /* last offset */
2382 int n_iter
; /* number of iterators in use */
2383 int next_iter
; /* next iterator to use */
2384 unsigned int save_gr_used_mask
;
2387 static struct spill_fill_data spill_fill_data
;
2390 setup_spill_pointers (int n_spills
, rtx init_reg
, HOST_WIDE_INT cfa_off
)
2394 spill_fill_data
.init_after
= get_last_insn ();
2395 spill_fill_data
.init_reg
[0] = init_reg
;
2396 spill_fill_data
.init_reg
[1] = init_reg
;
2397 spill_fill_data
.prev_addr
[0] = NULL
;
2398 spill_fill_data
.prev_addr
[1] = NULL
;
2399 spill_fill_data
.prev_insn
[0] = NULL
;
2400 spill_fill_data
.prev_insn
[1] = NULL
;
2401 spill_fill_data
.prev_off
[0] = cfa_off
;
2402 spill_fill_data
.prev_off
[1] = cfa_off
;
2403 spill_fill_data
.next_iter
= 0;
2404 spill_fill_data
.save_gr_used_mask
= current_frame_info
.gr_used_mask
;
2406 spill_fill_data
.n_iter
= 1 + (n_spills
> 2);
2407 for (i
= 0; i
< spill_fill_data
.n_iter
; ++i
)
2409 int regno
= next_scratch_gr_reg ();
2410 spill_fill_data
.iter_reg
[i
] = gen_rtx_REG (DImode
, regno
);
2411 current_frame_info
.gr_used_mask
|= 1 << regno
;
2416 finish_spill_pointers (void)
2418 current_frame_info
.gr_used_mask
= spill_fill_data
.save_gr_used_mask
;
2422 spill_restore_mem (rtx reg
, HOST_WIDE_INT cfa_off
)
2424 int iter
= spill_fill_data
.next_iter
;
2425 HOST_WIDE_INT disp
= spill_fill_data
.prev_off
[iter
] - cfa_off
;
2426 rtx disp_rtx
= GEN_INT (disp
);
2429 if (spill_fill_data
.prev_addr
[iter
])
2431 if (CONST_OK_FOR_N (disp
))
2433 *spill_fill_data
.prev_addr
[iter
]
2434 = gen_rtx_POST_MODIFY (DImode
, spill_fill_data
.iter_reg
[iter
],
2435 gen_rtx_PLUS (DImode
,
2436 spill_fill_data
.iter_reg
[iter
],
2438 REG_NOTES (spill_fill_data
.prev_insn
[iter
])
2439 = gen_rtx_EXPR_LIST (REG_INC
, spill_fill_data
.iter_reg
[iter
],
2440 REG_NOTES (spill_fill_data
.prev_insn
[iter
]));
2444 /* ??? Could use register post_modify for loads. */
2445 if (! CONST_OK_FOR_I (disp
))
2447 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
2448 emit_move_insn (tmp
, disp_rtx
);
2451 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
2452 spill_fill_data
.iter_reg
[iter
], disp_rtx
));
2455 /* Micro-optimization: if we've created a frame pointer, it's at
2456 CFA 0, which may allow the real iterator to be initialized lower,
2457 slightly increasing parallelism. Also, if there are few saves
2458 it may eliminate the iterator entirely. */
2460 && spill_fill_data
.init_reg
[iter
] == stack_pointer_rtx
2461 && frame_pointer_needed
)
2463 mem
= gen_rtx_MEM (GET_MODE (reg
), hard_frame_pointer_rtx
);
2464 set_mem_alias_set (mem
, get_varargs_alias_set ());
2472 seq
= gen_movdi (spill_fill_data
.iter_reg
[iter
],
2473 spill_fill_data
.init_reg
[iter
]);
2478 if (! CONST_OK_FOR_I (disp
))
2480 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
2481 emit_move_insn (tmp
, disp_rtx
);
2485 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
2486 spill_fill_data
.init_reg
[iter
],
2493 /* Careful for being the first insn in a sequence. */
2494 if (spill_fill_data
.init_after
)
2495 insn
= emit_insn_after (seq
, spill_fill_data
.init_after
);
2498 rtx first
= get_insns ();
2500 insn
= emit_insn_before (seq
, first
);
2502 insn
= emit_insn (seq
);
2504 spill_fill_data
.init_after
= insn
;
2506 /* If DISP is 0, we may or may not have a further adjustment
2507 afterward. If we do, then the load/store insn may be modified
2508 to be a post-modify. If we don't, then this copy may be
2509 eliminated by copyprop_hardreg_forward, which makes this
2510 insn garbage, which runs afoul of the sanity check in
2511 propagate_one_insn. So mark this insn as legal to delete. */
2513 REG_NOTES(insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
,
2517 mem
= gen_rtx_MEM (GET_MODE (reg
), spill_fill_data
.iter_reg
[iter
]);
2519 /* ??? Not all of the spills are for varargs, but some of them are.
2520 The rest of the spills belong in an alias set of their own. But
2521 it doesn't actually hurt to include them here. */
2522 set_mem_alias_set (mem
, get_varargs_alias_set ());
2524 spill_fill_data
.prev_addr
[iter
] = &XEXP (mem
, 0);
2525 spill_fill_data
.prev_off
[iter
] = cfa_off
;
2527 if (++iter
>= spill_fill_data
.n_iter
)
2529 spill_fill_data
.next_iter
= iter
;
2535 do_spill (rtx (*move_fn
) (rtx
, rtx
, rtx
), rtx reg
, HOST_WIDE_INT cfa_off
,
2538 int iter
= spill_fill_data
.next_iter
;
2541 mem
= spill_restore_mem (reg
, cfa_off
);
2542 insn
= emit_insn ((*move_fn
) (mem
, reg
, GEN_INT (cfa_off
)));
2543 spill_fill_data
.prev_insn
[iter
] = insn
;
2550 RTX_FRAME_RELATED_P (insn
) = 1;
2552 /* Don't even pretend that the unwind code can intuit its way
2553 through a pair of interleaved post_modify iterators. Just
2554 provide the correct answer. */
2556 if (frame_pointer_needed
)
2558 base
= hard_frame_pointer_rtx
;
2563 base
= stack_pointer_rtx
;
2564 off
= current_frame_info
.total_size
- cfa_off
;
2568 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
2569 gen_rtx_SET (VOIDmode
,
2570 gen_rtx_MEM (GET_MODE (reg
),
2571 plus_constant (base
, off
)),
2578 do_restore (rtx (*move_fn
) (rtx
, rtx
, rtx
), rtx reg
, HOST_WIDE_INT cfa_off
)
2580 int iter
= spill_fill_data
.next_iter
;
2583 insn
= emit_insn ((*move_fn
) (reg
, spill_restore_mem (reg
, cfa_off
),
2584 GEN_INT (cfa_off
)));
2585 spill_fill_data
.prev_insn
[iter
] = insn
;
2588 /* Wrapper functions that discards the CONST_INT spill offset. These
2589 exist so that we can give gr_spill/gr_fill the offset they need and
2590 use a consistent function interface. */
2593 gen_movdi_x (rtx dest
, rtx src
, rtx offset ATTRIBUTE_UNUSED
)
2595 return gen_movdi (dest
, src
);
2599 gen_fr_spill_x (rtx dest
, rtx src
, rtx offset ATTRIBUTE_UNUSED
)
2601 return gen_fr_spill (dest
, src
);
2605 gen_fr_restore_x (rtx dest
, rtx src
, rtx offset ATTRIBUTE_UNUSED
)
2607 return gen_fr_restore (dest
, src
);
2610 /* Called after register allocation to add any instructions needed for the
2611 prologue. Using a prologue insn is favored compared to putting all of the
2612 instructions in output_function_prologue(), since it allows the scheduler
2613 to intermix instructions with the saves of the caller saved registers. In
2614 some cases, it might be necessary to emit a barrier instruction as the last
2615 insn to prevent such scheduling.
2617 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2618 so that the debug info generation code can handle them properly.
2620 The register save area is layed out like so:
2622 [ varargs spill area ]
2623 [ fr register spill area ]
2624 [ br register spill area ]
2625 [ ar register spill area ]
2626 [ pr register spill area ]
2627 [ gr register spill area ] */
2629 /* ??? Get inefficient code when the frame size is larger than can fit in an
2630 adds instruction. */
2633 ia64_expand_prologue (void)
2635 rtx insn
, ar_pfs_save_reg
, ar_unat_save_reg
;
2636 int i
, epilogue_p
, regno
, alt_regno
, cfa_off
, n_varargs
;
2639 ia64_compute_frame_size (get_frame_size ());
2640 last_scratch_gr_reg
= 15;
2642 /* If there is no epilogue, then we don't need some prologue insns.
2643 We need to avoid emitting the dead prologue insns, because flow
2644 will complain about them. */
2650 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
2651 if ((e
->flags
& EDGE_FAKE
) == 0
2652 && (e
->flags
& EDGE_FALLTHRU
) != 0)
2654 epilogue_p
= (e
!= NULL
);
2659 /* Set the local, input, and output register names. We need to do this
2660 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2661 half. If we use in/loc/out register names, then we get assembler errors
2662 in crtn.S because there is no alloc insn or regstk directive in there. */
2663 if (! TARGET_REG_NAMES
)
2665 int inputs
= current_frame_info
.n_input_regs
;
2666 int locals
= current_frame_info
.n_local_regs
;
2667 int outputs
= current_frame_info
.n_output_regs
;
2669 for (i
= 0; i
< inputs
; i
++)
2670 reg_names
[IN_REG (i
)] = ia64_reg_numbers
[i
];
2671 for (i
= 0; i
< locals
; i
++)
2672 reg_names
[LOC_REG (i
)] = ia64_reg_numbers
[inputs
+ i
];
2673 for (i
= 0; i
< outputs
; i
++)
2674 reg_names
[OUT_REG (i
)] = ia64_reg_numbers
[inputs
+ locals
+ i
];
2677 /* Set the frame pointer register name. The regnum is logically loc79,
2678 but of course we'll not have allocated that many locals. Rather than
2679 worrying about renumbering the existing rtxs, we adjust the name. */
2680 /* ??? This code means that we can never use one local register when
2681 there is a frame pointer. loc79 gets wasted in this case, as it is
2682 renamed to a register that will never be used. See also the try_locals
2683 code in find_gr_spill. */
2684 if (current_frame_info
.reg_fp
)
2686 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
2687 reg_names
[HARD_FRAME_POINTER_REGNUM
]
2688 = reg_names
[current_frame_info
.reg_fp
];
2689 reg_names
[current_frame_info
.reg_fp
] = tmp
;
2692 /* We don't need an alloc instruction if we've used no outputs or locals. */
2693 if (current_frame_info
.n_local_regs
== 0
2694 && current_frame_info
.n_output_regs
== 0
2695 && current_frame_info
.n_input_regs
<= current_function_args_info
.int_regs
2696 && !TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
))
2698 /* If there is no alloc, but there are input registers used, then we
2699 need a .regstk directive. */
2700 current_frame_info
.need_regstk
= (TARGET_REG_NAMES
!= 0);
2701 ar_pfs_save_reg
= NULL_RTX
;
2705 current_frame_info
.need_regstk
= 0;
2707 if (current_frame_info
.reg_save_ar_pfs
)
2708 regno
= current_frame_info
.reg_save_ar_pfs
;
2710 regno
= next_scratch_gr_reg ();
2711 ar_pfs_save_reg
= gen_rtx_REG (DImode
, regno
);
2713 insn
= emit_insn (gen_alloc (ar_pfs_save_reg
,
2714 GEN_INT (current_frame_info
.n_input_regs
),
2715 GEN_INT (current_frame_info
.n_local_regs
),
2716 GEN_INT (current_frame_info
.n_output_regs
),
2717 GEN_INT (current_frame_info
.n_rotate_regs
)));
2718 RTX_FRAME_RELATED_P (insn
) = (current_frame_info
.reg_save_ar_pfs
!= 0);
2721 /* Set up frame pointer, stack pointer, and spill iterators. */
2723 n_varargs
= cfun
->machine
->n_varargs
;
2724 setup_spill_pointers (current_frame_info
.n_spilled
+ n_varargs
,
2725 stack_pointer_rtx
, 0);
2727 if (frame_pointer_needed
)
2729 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
2730 RTX_FRAME_RELATED_P (insn
) = 1;
2733 if (current_frame_info
.total_size
!= 0)
2735 rtx frame_size_rtx
= GEN_INT (- current_frame_info
.total_size
);
2738 if (CONST_OK_FOR_I (- current_frame_info
.total_size
))
2739 offset
= frame_size_rtx
;
2742 regno
= next_scratch_gr_reg ();
2743 offset
= gen_rtx_REG (DImode
, regno
);
2744 emit_move_insn (offset
, frame_size_rtx
);
2747 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
,
2748 stack_pointer_rtx
, offset
));
2750 if (! frame_pointer_needed
)
2752 RTX_FRAME_RELATED_P (insn
) = 1;
2753 if (GET_CODE (offset
) != CONST_INT
)
2756 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
2757 gen_rtx_SET (VOIDmode
,
2759 gen_rtx_PLUS (DImode
,
2766 /* ??? At this point we must generate a magic insn that appears to
2767 modify the stack pointer, the frame pointer, and all spill
2768 iterators. This would allow the most scheduling freedom. For
2769 now, just hard stop. */
2770 emit_insn (gen_blockage ());
2773 /* Must copy out ar.unat before doing any integer spills. */
2774 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
2776 if (current_frame_info
.reg_save_ar_unat
)
2778 = gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_unat
);
2781 alt_regno
= next_scratch_gr_reg ();
2782 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
2783 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
2786 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
2787 insn
= emit_move_insn (ar_unat_save_reg
, reg
);
2788 RTX_FRAME_RELATED_P (insn
) = (current_frame_info
.reg_save_ar_unat
!= 0);
2790 /* Even if we're not going to generate an epilogue, we still
2791 need to save the register so that EH works. */
2792 if (! epilogue_p
&& current_frame_info
.reg_save_ar_unat
)
2793 emit_insn (gen_prologue_use (ar_unat_save_reg
));
2796 ar_unat_save_reg
= NULL_RTX
;
2798 /* Spill all varargs registers. Do this before spilling any GR registers,
2799 since we want the UNAT bits for the GR registers to override the UNAT
2800 bits from varargs, which we don't care about. */
2803 for (regno
= GR_ARG_FIRST
+ 7; n_varargs
> 0; --n_varargs
, --regno
)
2805 reg
= gen_rtx_REG (DImode
, regno
);
2806 do_spill (gen_gr_spill
, reg
, cfa_off
+= 8, NULL_RTX
);
2809 /* Locate the bottom of the register save area. */
2810 cfa_off
= (current_frame_info
.spill_cfa_off
2811 + current_frame_info
.spill_size
2812 + current_frame_info
.extra_spill_size
);
2814 /* Save the predicate register block either in a register or in memory. */
2815 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
2817 reg
= gen_rtx_REG (DImode
, PR_REG (0));
2818 if (current_frame_info
.reg_save_pr
!= 0)
2820 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_pr
);
2821 insn
= emit_move_insn (alt_reg
, reg
);
2823 /* ??? Denote pr spill/fill by a DImode move that modifies all
2824 64 hard registers. */
2825 RTX_FRAME_RELATED_P (insn
) = 1;
2827 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
2828 gen_rtx_SET (VOIDmode
, alt_reg
, reg
),
2831 /* Even if we're not going to generate an epilogue, we still
2832 need to save the register so that EH works. */
2834 emit_insn (gen_prologue_use (alt_reg
));
2838 alt_regno
= next_scratch_gr_reg ();
2839 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2840 insn
= emit_move_insn (alt_reg
, reg
);
2841 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2846 /* Handle AR regs in numerical order. All of them get special handling. */
2847 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
)
2848 && current_frame_info
.reg_save_ar_unat
== 0)
2850 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
2851 do_spill (gen_movdi_x
, ar_unat_save_reg
, cfa_off
, reg
);
2855 /* The alloc insn already copied ar.pfs into a general register. The
2856 only thing we have to do now is copy that register to a stack slot
2857 if we'd not allocated a local register for the job. */
2858 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
)
2859 && current_frame_info
.reg_save_ar_pfs
== 0)
2861 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
2862 do_spill (gen_movdi_x
, ar_pfs_save_reg
, cfa_off
, reg
);
2866 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
2868 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
2869 if (current_frame_info
.reg_save_ar_lc
!= 0)
2871 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_lc
);
2872 insn
= emit_move_insn (alt_reg
, reg
);
2873 RTX_FRAME_RELATED_P (insn
) = 1;
2875 /* Even if we're not going to generate an epilogue, we still
2876 need to save the register so that EH works. */
2878 emit_insn (gen_prologue_use (alt_reg
));
2882 alt_regno
= next_scratch_gr_reg ();
2883 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2884 emit_move_insn (alt_reg
, reg
);
2885 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2890 if (current_frame_info
.reg_save_gp
)
2892 insn
= emit_move_insn (gen_rtx_REG (DImode
,
2893 current_frame_info
.reg_save_gp
),
2894 pic_offset_table_rtx
);
2895 /* We don't know for sure yet if this is actually needed, since
2896 we've not split the PIC call patterns. If all of the calls
2897 are indirect, and not followed by any uses of the gp, then
2898 this save is dead. Allow it to go away. */
2900 = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, REG_NOTES (insn
));
2903 /* We should now be at the base of the gr/br/fr spill area. */
2904 gcc_assert (cfa_off
== (current_frame_info
.spill_cfa_off
2905 + current_frame_info
.spill_size
));
2907 /* Spill all general registers. */
2908 for (regno
= GR_REG (1); regno
<= GR_REG (31); ++regno
)
2909 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2911 reg
= gen_rtx_REG (DImode
, regno
);
2912 do_spill (gen_gr_spill
, reg
, cfa_off
, reg
);
2916 /* Handle BR0 specially -- it may be getting stored permanently in
2917 some GR register. */
2918 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
2920 reg
= gen_rtx_REG (DImode
, BR_REG (0));
2921 if (current_frame_info
.reg_save_b0
!= 0)
2923 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_b0
);
2924 insn
= emit_move_insn (alt_reg
, reg
);
2925 RTX_FRAME_RELATED_P (insn
) = 1;
2927 /* Even if we're not going to generate an epilogue, we still
2928 need to save the register so that EH works. */
2930 emit_insn (gen_prologue_use (alt_reg
));
2934 alt_regno
= next_scratch_gr_reg ();
2935 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2936 emit_move_insn (alt_reg
, reg
);
2937 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2942 /* Spill the rest of the BR registers. */
2943 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
2944 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2946 alt_regno
= next_scratch_gr_reg ();
2947 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2948 reg
= gen_rtx_REG (DImode
, regno
);
2949 emit_move_insn (alt_reg
, reg
);
2950 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2954 /* Align the frame and spill all FR registers. */
2955 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
2956 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2958 gcc_assert (!(cfa_off
& 15));
2959 reg
= gen_rtx_REG (XFmode
, regno
);
2960 do_spill (gen_fr_spill_x
, reg
, cfa_off
, reg
);
2964 gcc_assert (cfa_off
== current_frame_info
.spill_cfa_off
);
2966 finish_spill_pointers ();
2969 /* Called after register allocation to add any instructions needed for the
2970 epilogue. Using an epilogue insn is favored compared to putting all of the
2971 instructions in output_function_prologue(), since it allows the scheduler
2972 to intermix instructions with the saves of the caller saved registers. In
2973 some cases, it might be necessary to emit a barrier instruction as the last
2974 insn to prevent such scheduling. */
2977 ia64_expand_epilogue (int sibcall_p
)
2979 rtx insn
, reg
, alt_reg
, ar_unat_save_reg
;
2980 int regno
, alt_regno
, cfa_off
;
2982 ia64_compute_frame_size (get_frame_size ());
2984 /* If there is a frame pointer, then we use it instead of the stack
2985 pointer, so that the stack pointer does not need to be valid when
2986 the epilogue starts. See EXIT_IGNORE_STACK. */
2987 if (frame_pointer_needed
)
2988 setup_spill_pointers (current_frame_info
.n_spilled
,
2989 hard_frame_pointer_rtx
, 0);
2991 setup_spill_pointers (current_frame_info
.n_spilled
, stack_pointer_rtx
,
2992 current_frame_info
.total_size
);
2994 if (current_frame_info
.total_size
!= 0)
2996 /* ??? At this point we must generate a magic insn that appears to
2997 modify the spill iterators and the frame pointer. This would
2998 allow the most scheduling freedom. For now, just hard stop. */
2999 emit_insn (gen_blockage ());
3002 /* Locate the bottom of the register save area. */
3003 cfa_off
= (current_frame_info
.spill_cfa_off
3004 + current_frame_info
.spill_size
3005 + current_frame_info
.extra_spill_size
);
3007 /* Restore the predicate registers. */
3008 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
3010 if (current_frame_info
.reg_save_pr
!= 0)
3011 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_pr
);
3014 alt_regno
= next_scratch_gr_reg ();
3015 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3016 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
3019 reg
= gen_rtx_REG (DImode
, PR_REG (0));
3020 emit_move_insn (reg
, alt_reg
);
3023 /* Restore the application registers. */
3025 /* Load the saved unat from the stack, but do not restore it until
3026 after the GRs have been restored. */
3027 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
3029 if (current_frame_info
.reg_save_ar_unat
!= 0)
3031 = gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_unat
);
3034 alt_regno
= next_scratch_gr_reg ();
3035 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
3036 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
3037 do_restore (gen_movdi_x
, ar_unat_save_reg
, cfa_off
);
3042 ar_unat_save_reg
= NULL_RTX
;
3044 if (current_frame_info
.reg_save_ar_pfs
!= 0)
3046 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_pfs
);
3047 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
3048 emit_move_insn (reg
, alt_reg
);
3050 else if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
))
3052 alt_regno
= next_scratch_gr_reg ();
3053 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3054 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
3056 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
3057 emit_move_insn (reg
, alt_reg
);
3060 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
3062 if (current_frame_info
.reg_save_ar_lc
!= 0)
3063 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_lc
);
3066 alt_regno
= next_scratch_gr_reg ();
3067 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3068 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
3071 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
3072 emit_move_insn (reg
, alt_reg
);
3075 /* We should now be at the base of the gr/br/fr spill area. */
3076 gcc_assert (cfa_off
== (current_frame_info
.spill_cfa_off
3077 + current_frame_info
.spill_size
));
3079 /* The GP may be stored on the stack in the prologue, but it's
3080 never restored in the epilogue. Skip the stack slot. */
3081 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, GR_REG (1)))
3084 /* Restore all general registers. */
3085 for (regno
= GR_REG (2); regno
<= GR_REG (31); ++regno
)
3086 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3088 reg
= gen_rtx_REG (DImode
, regno
);
3089 do_restore (gen_gr_restore
, reg
, cfa_off
);
3093 /* Restore the branch registers. Handle B0 specially, as it may
3094 have gotten stored in some GR register. */
3095 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
3097 if (current_frame_info
.reg_save_b0
!= 0)
3098 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_b0
);
3101 alt_regno
= next_scratch_gr_reg ();
3102 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3103 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
3106 reg
= gen_rtx_REG (DImode
, BR_REG (0));
3107 emit_move_insn (reg
, alt_reg
);
3110 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
3111 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3113 alt_regno
= next_scratch_gr_reg ();
3114 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3115 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
3117 reg
= gen_rtx_REG (DImode
, regno
);
3118 emit_move_insn (reg
, alt_reg
);
3121 /* Restore floating point registers. */
3122 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
3123 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3125 gcc_assert (!(cfa_off
& 15));
3126 reg
= gen_rtx_REG (XFmode
, regno
);
3127 do_restore (gen_fr_restore_x
, reg
, cfa_off
);
3131 /* Restore ar.unat for real. */
3132 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
3134 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
3135 emit_move_insn (reg
, ar_unat_save_reg
);
3138 gcc_assert (cfa_off
== current_frame_info
.spill_cfa_off
);
3140 finish_spill_pointers ();
3142 if (current_frame_info
.total_size
|| cfun
->machine
->ia64_eh_epilogue_sp
)
3144 /* ??? At this point we must generate a magic insn that appears to
3145 modify the spill iterators, the stack pointer, and the frame
3146 pointer. This would allow the most scheduling freedom. For now,
3148 emit_insn (gen_blockage ());
3151 if (cfun
->machine
->ia64_eh_epilogue_sp
)
3152 emit_move_insn (stack_pointer_rtx
, cfun
->machine
->ia64_eh_epilogue_sp
);
3153 else if (frame_pointer_needed
)
3155 insn
= emit_move_insn (stack_pointer_rtx
, hard_frame_pointer_rtx
);
3156 RTX_FRAME_RELATED_P (insn
) = 1;
3158 else if (current_frame_info
.total_size
)
3160 rtx offset
, frame_size_rtx
;
3162 frame_size_rtx
= GEN_INT (current_frame_info
.total_size
);
3163 if (CONST_OK_FOR_I (current_frame_info
.total_size
))
3164 offset
= frame_size_rtx
;
3167 regno
= next_scratch_gr_reg ();
3168 offset
= gen_rtx_REG (DImode
, regno
);
3169 emit_move_insn (offset
, frame_size_rtx
);
3172 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
, stack_pointer_rtx
,
3175 RTX_FRAME_RELATED_P (insn
) = 1;
3176 if (GET_CODE (offset
) != CONST_INT
)
3179 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
3180 gen_rtx_SET (VOIDmode
,
3182 gen_rtx_PLUS (DImode
,
3189 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
3190 emit_insn (gen_set_bsp (cfun
->machine
->ia64_eh_epilogue_bsp
));
3193 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode
, BR_REG (0))));
3196 int fp
= GR_REG (2);
3197 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
3198 first available call clobbered register. If there was a frame_pointer
3199 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
3200 so we have to make sure we're using the string "r2" when emitting
3201 the register name for the assembler. */
3202 if (current_frame_info
.reg_fp
&& current_frame_info
.reg_fp
== GR_REG (2))
3203 fp
= HARD_FRAME_POINTER_REGNUM
;
3205 /* We must emit an alloc to force the input registers to become output
3206 registers. Otherwise, if the callee tries to pass its parameters
3207 through to another call without an intervening alloc, then these
3209 /* ??? We don't need to preserve all input registers. We only need to
3210 preserve those input registers used as arguments to the sibling call.
3211 It is unclear how to compute that number here. */
3212 if (current_frame_info
.n_input_regs
!= 0)
3214 rtx n_inputs
= GEN_INT (current_frame_info
.n_input_regs
);
3215 insn
= emit_insn (gen_alloc (gen_rtx_REG (DImode
, fp
),
3216 const0_rtx
, const0_rtx
,
3217 n_inputs
, const0_rtx
));
3218 RTX_FRAME_RELATED_P (insn
) = 1;
3223 /* Return 1 if br.ret can do all the work required to return from a
3227 ia64_direct_return (void)
3229 if (reload_completed
&& ! frame_pointer_needed
)
3231 ia64_compute_frame_size (get_frame_size ());
3233 return (current_frame_info
.total_size
== 0
3234 && current_frame_info
.n_spilled
== 0
3235 && current_frame_info
.reg_save_b0
== 0
3236 && current_frame_info
.reg_save_pr
== 0
3237 && current_frame_info
.reg_save_ar_pfs
== 0
3238 && current_frame_info
.reg_save_ar_unat
== 0
3239 && current_frame_info
.reg_save_ar_lc
== 0);
3244 /* Return the magic cookie that we use to hold the return address
3245 during early compilation. */
3248 ia64_return_addr_rtx (HOST_WIDE_INT count
, rtx frame ATTRIBUTE_UNUSED
)
3252 return gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_RET_ADDR
);
3255 /* Split this value after reload, now that we know where the return
3256 address is saved. */
3259 ia64_split_return_addr_rtx (rtx dest
)
3263 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
3265 if (current_frame_info
.reg_save_b0
!= 0)
3266 src
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_b0
);
3272 /* Compute offset from CFA for BR0. */
3273 /* ??? Must be kept in sync with ia64_expand_prologue. */
3274 off
= (current_frame_info
.spill_cfa_off
3275 + current_frame_info
.spill_size
);
3276 for (regno
= GR_REG (1); regno
<= GR_REG (31); ++regno
)
3277 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3280 /* Convert CFA offset to a register based offset. */
3281 if (frame_pointer_needed
)
3282 src
= hard_frame_pointer_rtx
;
3285 src
= stack_pointer_rtx
;
3286 off
+= current_frame_info
.total_size
;
3289 /* Load address into scratch register. */
3290 if (CONST_OK_FOR_I (off
))
3291 emit_insn (gen_adddi3 (dest
, src
, GEN_INT (off
)));
3294 emit_move_insn (dest
, GEN_INT (off
));
3295 emit_insn (gen_adddi3 (dest
, src
, dest
));
3298 src
= gen_rtx_MEM (Pmode
, dest
);
3302 src
= gen_rtx_REG (DImode
, BR_REG (0));
3304 emit_move_insn (dest
, src
);
3308 ia64_hard_regno_rename_ok (int from
, int to
)
3310 /* Don't clobber any of the registers we reserved for the prologue. */
3311 if (to
== current_frame_info
.reg_fp
3312 || to
== current_frame_info
.reg_save_b0
3313 || to
== current_frame_info
.reg_save_pr
3314 || to
== current_frame_info
.reg_save_ar_pfs
3315 || to
== current_frame_info
.reg_save_ar_unat
3316 || to
== current_frame_info
.reg_save_ar_lc
)
3319 if (from
== current_frame_info
.reg_fp
3320 || from
== current_frame_info
.reg_save_b0
3321 || from
== current_frame_info
.reg_save_pr
3322 || from
== current_frame_info
.reg_save_ar_pfs
3323 || from
== current_frame_info
.reg_save_ar_unat
3324 || from
== current_frame_info
.reg_save_ar_lc
)
3327 /* Don't use output registers outside the register frame. */
3328 if (OUT_REGNO_P (to
) && to
>= OUT_REG (current_frame_info
.n_output_regs
))
3331 /* Retain even/oddness on predicate register pairs. */
3332 if (PR_REGNO_P (from
) && PR_REGNO_P (to
))
3333 return (from
& 1) == (to
& 1);
3338 /* Target hook for assembling integer objects. Handle word-sized
3339 aligned objects and detect the cases when @fptr is needed. */
3342 ia64_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
3344 if (size
== POINTER_SIZE
/ BITS_PER_UNIT
3345 && !(TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
3346 && GET_CODE (x
) == SYMBOL_REF
3347 && SYMBOL_REF_FUNCTION_P (x
))
3349 static const char * const directive
[2][2] = {
3350 /* 64-bit pointer */ /* 32-bit pointer */
3351 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */
3352 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */
3354 fputs (directive
[(aligned_p
!= 0)][POINTER_SIZE
== 32], asm_out_file
);
3355 output_addr_const (asm_out_file
, x
);
3356 fputs (")\n", asm_out_file
);
3359 return default_assemble_integer (x
, size
, aligned_p
);
3362 /* Emit the function prologue. */
3365 ia64_output_function_prologue (FILE *file
, HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
3367 int mask
, grsave
, grsave_prev
;
3369 if (current_frame_info
.need_regstk
)
3370 fprintf (file
, "\t.regstk %d, %d, %d, %d\n",
3371 current_frame_info
.n_input_regs
,
3372 current_frame_info
.n_local_regs
,
3373 current_frame_info
.n_output_regs
,
3374 current_frame_info
.n_rotate_regs
);
3376 if (!flag_unwind_tables
&& (!flag_exceptions
|| USING_SJLJ_EXCEPTIONS
))
3379 /* Emit the .prologue directive. */
3382 grsave
= grsave_prev
= 0;
3383 if (current_frame_info
.reg_save_b0
!= 0)
3386 grsave
= grsave_prev
= current_frame_info
.reg_save_b0
;
3388 if (current_frame_info
.reg_save_ar_pfs
!= 0
3389 && (grsave_prev
== 0
3390 || current_frame_info
.reg_save_ar_pfs
== grsave_prev
+ 1))
3393 if (grsave_prev
== 0)
3394 grsave
= current_frame_info
.reg_save_ar_pfs
;
3395 grsave_prev
= current_frame_info
.reg_save_ar_pfs
;
3397 if (current_frame_info
.reg_fp
!= 0
3398 && (grsave_prev
== 0
3399 || current_frame_info
.reg_fp
== grsave_prev
+ 1))
3402 if (grsave_prev
== 0)
3403 grsave
= HARD_FRAME_POINTER_REGNUM
;
3404 grsave_prev
= current_frame_info
.reg_fp
;
3406 if (current_frame_info
.reg_save_pr
!= 0
3407 && (grsave_prev
== 0
3408 || current_frame_info
.reg_save_pr
== grsave_prev
+ 1))
3411 if (grsave_prev
== 0)
3412 grsave
= current_frame_info
.reg_save_pr
;
3415 if (mask
&& TARGET_GNU_AS
)
3416 fprintf (file
, "\t.prologue %d, %d\n", mask
,
3417 ia64_dbx_register_number (grsave
));
3419 fputs ("\t.prologue\n", file
);
3421 /* Emit a .spill directive, if necessary, to relocate the base of
3422 the register spill area. */
3423 if (current_frame_info
.spill_cfa_off
!= -16)
3424 fprintf (file
, "\t.spill %ld\n",
3425 (long) (current_frame_info
.spill_cfa_off
3426 + current_frame_info
.spill_size
));
3429 /* Emit the .body directive at the scheduled end of the prologue. */
3432 ia64_output_function_end_prologue (FILE *file
)
3434 if (!flag_unwind_tables
&& (!flag_exceptions
|| USING_SJLJ_EXCEPTIONS
))
3437 fputs ("\t.body\n", file
);
3440 /* Emit the function epilogue. */
3443 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
3444 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
3448 if (current_frame_info
.reg_fp
)
3450 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
3451 reg_names
[HARD_FRAME_POINTER_REGNUM
]
3452 = reg_names
[current_frame_info
.reg_fp
];
3453 reg_names
[current_frame_info
.reg_fp
] = tmp
;
3455 if (! TARGET_REG_NAMES
)
3457 for (i
= 0; i
< current_frame_info
.n_input_regs
; i
++)
3458 reg_names
[IN_REG (i
)] = ia64_input_reg_names
[i
];
3459 for (i
= 0; i
< current_frame_info
.n_local_regs
; i
++)
3460 reg_names
[LOC_REG (i
)] = ia64_local_reg_names
[i
];
3461 for (i
= 0; i
< current_frame_info
.n_output_regs
; i
++)
3462 reg_names
[OUT_REG (i
)] = ia64_output_reg_names
[i
];
3465 current_frame_info
.initialized
= 0;
3469 ia64_dbx_register_number (int regno
)
3471 /* In ia64_expand_prologue we quite literally renamed the frame pointer
3472 from its home at loc79 to something inside the register frame. We
3473 must perform the same renumbering here for the debug info. */
3474 if (current_frame_info
.reg_fp
)
3476 if (regno
== HARD_FRAME_POINTER_REGNUM
)
3477 regno
= current_frame_info
.reg_fp
;
3478 else if (regno
== current_frame_info
.reg_fp
)
3479 regno
= HARD_FRAME_POINTER_REGNUM
;
3482 if (IN_REGNO_P (regno
))
3483 return 32 + regno
- IN_REG (0);
3484 else if (LOC_REGNO_P (regno
))
3485 return 32 + current_frame_info
.n_input_regs
+ regno
- LOC_REG (0);
3486 else if (OUT_REGNO_P (regno
))
3487 return (32 + current_frame_info
.n_input_regs
3488 + current_frame_info
.n_local_regs
+ regno
- OUT_REG (0));
3494 ia64_initialize_trampoline (rtx addr
, rtx fnaddr
, rtx static_chain
)
3496 rtx addr_reg
, eight
= GEN_INT (8);
3498 /* The Intel assembler requires that the global __ia64_trampoline symbol
3499 be declared explicitly */
3502 static bool declared_ia64_trampoline
= false;
3504 if (!declared_ia64_trampoline
)
3506 declared_ia64_trampoline
= true;
3507 (*targetm
.asm_out
.globalize_label
) (asm_out_file
,
3508 "__ia64_trampoline");
3512 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
3513 addr
= convert_memory_address (Pmode
, addr
);
3514 fnaddr
= convert_memory_address (Pmode
, fnaddr
);
3515 static_chain
= convert_memory_address (Pmode
, static_chain
);
3517 /* Load up our iterator. */
3518 addr_reg
= gen_reg_rtx (Pmode
);
3519 emit_move_insn (addr_reg
, addr
);
3521 /* The first two words are the fake descriptor:
3522 __ia64_trampoline, ADDR+16. */
3523 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
),
3524 gen_rtx_SYMBOL_REF (Pmode
, "__ia64_trampoline"));
3525 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
3527 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
),
3528 copy_to_reg (plus_constant (addr
, 16)));
3529 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
3531 /* The third word is the target descriptor. */
3532 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
), fnaddr
);
3533 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
3535 /* The fourth word is the static chain. */
3536 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
), static_chain
);
3539 /* Do any needed setup for a variadic function. CUM has not been updated
3540 for the last named argument which has type TYPE and mode MODE.
3542 We generate the actual spill instructions during prologue generation. */
3545 ia64_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3546 tree type
, int * pretend_size
,
3547 int second_time ATTRIBUTE_UNUSED
)
3549 CUMULATIVE_ARGS next_cum
= *cum
;
3551 /* Skip the current argument. */
3552 ia64_function_arg_advance (&next_cum
, mode
, type
, 1);
3554 if (next_cum
.words
< MAX_ARGUMENT_SLOTS
)
3556 int n
= MAX_ARGUMENT_SLOTS
- next_cum
.words
;
3557 *pretend_size
= n
* UNITS_PER_WORD
;
3558 cfun
->machine
->n_varargs
= n
;
3562 /* Check whether TYPE is a homogeneous floating point aggregate. If
3563 it is, return the mode of the floating point type that appears
3564 in all leafs. If it is not, return VOIDmode.
3566 An aggregate is a homogeneous floating point aggregate is if all
3567 fields/elements in it have the same floating point type (e.g,
3568 SFmode). 128-bit quad-precision floats are excluded.
3570 Variable sized aggregates should never arrive here, since we should
3571 have already decided to pass them by reference. Top-level zero-sized
3572 aggregates are excluded because our parallels crash the middle-end. */
3574 static enum machine_mode
3575 hfa_element_mode (tree type
, bool nested
)
3577 enum machine_mode element_mode
= VOIDmode
;
3578 enum machine_mode mode
;
3579 enum tree_code code
= TREE_CODE (type
);
3580 int know_element_mode
= 0;
3583 if (!nested
&& (!TYPE_SIZE (type
) || integer_zerop (TYPE_SIZE (type
))))
3588 case VOID_TYPE
: case INTEGER_TYPE
: case ENUMERAL_TYPE
:
3589 case BOOLEAN_TYPE
: case CHAR_TYPE
: case POINTER_TYPE
:
3590 case OFFSET_TYPE
: case REFERENCE_TYPE
: case METHOD_TYPE
:
3591 case LANG_TYPE
: case FUNCTION_TYPE
:
3594 /* Fortran complex types are supposed to be HFAs, so we need to handle
3595 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
3598 if (GET_MODE_CLASS (TYPE_MODE (type
)) == MODE_COMPLEX_FLOAT
3599 && TYPE_MODE (type
) != TCmode
)
3600 return GET_MODE_INNER (TYPE_MODE (type
));
3605 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
3606 mode if this is contained within an aggregate. */
3607 if (nested
&& TYPE_MODE (type
) != TFmode
)
3608 return TYPE_MODE (type
);
3613 return hfa_element_mode (TREE_TYPE (type
), 1);
3617 case QUAL_UNION_TYPE
:
3618 for (t
= TYPE_FIELDS (type
); t
; t
= TREE_CHAIN (t
))
3620 if (TREE_CODE (t
) != FIELD_DECL
)
3623 mode
= hfa_element_mode (TREE_TYPE (t
), 1);
3624 if (know_element_mode
)
3626 if (mode
!= element_mode
)
3629 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
)
3633 know_element_mode
= 1;
3634 element_mode
= mode
;
3637 return element_mode
;
3640 /* If we reach here, we probably have some front-end specific type
3641 that the backend doesn't know about. This can happen via the
3642 aggregate_value_p call in init_function_start. All we can do is
3643 ignore unknown tree types. */
3650 /* Return the number of words required to hold a quantity of TYPE and MODE
3651 when passed as an argument. */
3653 ia64_function_arg_words (tree type
, enum machine_mode mode
)
3657 if (mode
== BLKmode
)
3658 words
= int_size_in_bytes (type
);
3660 words
= GET_MODE_SIZE (mode
);
3662 return (words
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
; /* round up */
3665 /* Return the number of registers that should be skipped so the current
3666 argument (described by TYPE and WORDS) will be properly aligned.
3668 Integer and float arguments larger than 8 bytes start at the next
3669 even boundary. Aggregates larger than 8 bytes start at the next
3670 even boundary if the aggregate has 16 byte alignment. Note that
3671 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
3672 but are still to be aligned in registers.
3674 ??? The ABI does not specify how to handle aggregates with
3675 alignment from 9 to 15 bytes, or greater than 16. We handle them
3676 all as if they had 16 byte alignment. Such aggregates can occur
3677 only if gcc extensions are used. */
3679 ia64_function_arg_offset (CUMULATIVE_ARGS
*cum
, tree type
, int words
)
3681 if ((cum
->words
& 1) == 0)
3685 && TREE_CODE (type
) != INTEGER_TYPE
3686 && TREE_CODE (type
) != REAL_TYPE
)
3687 return TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
;
3692 /* Return rtx for register where argument is passed, or zero if it is passed
3694 /* ??? 128-bit quad-precision floats are always passed in general
3698 ia64_function_arg (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
, tree type
,
3699 int named
, int incoming
)
3701 int basereg
= (incoming
? GR_ARG_FIRST
: AR_ARG_FIRST
);
3702 int words
= ia64_function_arg_words (type
, mode
);
3703 int offset
= ia64_function_arg_offset (cum
, type
, words
);
3704 enum machine_mode hfa_mode
= VOIDmode
;
3706 /* If all argument slots are used, then it must go on the stack. */
3707 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
3710 /* Check for and handle homogeneous FP aggregates. */
3712 hfa_mode
= hfa_element_mode (type
, 0);
3714 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3715 and unprototyped hfas are passed specially. */
3716 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
3720 int fp_regs
= cum
->fp_regs
;
3721 int int_regs
= cum
->words
+ offset
;
3722 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
3726 /* If prototyped, pass it in FR regs then GR regs.
3727 If not prototyped, pass it in both FR and GR regs.
3729 If this is an SFmode aggregate, then it is possible to run out of
3730 FR regs while GR regs are still left. In that case, we pass the
3731 remaining part in the GR regs. */
3733 /* Fill the FP regs. We do this always. We stop if we reach the end
3734 of the argument, the last FP register, or the last argument slot. */
3736 byte_size
= ((mode
== BLKmode
)
3737 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3738 args_byte_size
= int_regs
* UNITS_PER_WORD
;
3740 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
3741 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
)); i
++)
3743 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
3744 gen_rtx_REG (hfa_mode
, (FR_ARG_FIRST
3748 args_byte_size
+= hfa_size
;
3752 /* If no prototype, then the whole thing must go in GR regs. */
3753 if (! cum
->prototype
)
3755 /* If this is an SFmode aggregate, then we might have some left over
3756 that needs to go in GR regs. */
3757 else if (byte_size
!= offset
)
3758 int_regs
+= offset
/ UNITS_PER_WORD
;
3760 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
3762 for (; offset
< byte_size
&& int_regs
< MAX_ARGUMENT_SLOTS
; i
++)
3764 enum machine_mode gr_mode
= DImode
;
3765 unsigned int gr_size
;
3767 /* If we have an odd 4 byte hunk because we ran out of FR regs,
3768 then this goes in a GR reg left adjusted/little endian, right
3769 adjusted/big endian. */
3770 /* ??? Currently this is handled wrong, because 4-byte hunks are
3771 always right adjusted/little endian. */
3774 /* If we have an even 4 byte hunk because the aggregate is a
3775 multiple of 4 bytes in size, then this goes in a GR reg right
3776 adjusted/little endian. */
3777 else if (byte_size
- offset
== 4)
3780 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
3781 gen_rtx_REG (gr_mode
, (basereg
3785 gr_size
= GET_MODE_SIZE (gr_mode
);
3787 if (gr_size
== UNITS_PER_WORD
3788 || (gr_size
< UNITS_PER_WORD
&& offset
% UNITS_PER_WORD
== 0))
3790 else if (gr_size
> UNITS_PER_WORD
)
3791 int_regs
+= gr_size
/ UNITS_PER_WORD
;
3793 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
3796 /* Integral and aggregates go in general registers. If we have run out of
3797 FR registers, then FP values must also go in general registers. This can
3798 happen when we have a SFmode HFA. */
3799 else if (mode
== TFmode
|| mode
== TCmode
3800 || (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
))
3802 int byte_size
= ((mode
== BLKmode
)
3803 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3804 if (BYTES_BIG_ENDIAN
3805 && (mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
3806 && byte_size
< UNITS_PER_WORD
3809 rtx gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
3810 gen_rtx_REG (DImode
,
3811 (basereg
+ cum
->words
3814 return gen_rtx_PARALLEL (mode
, gen_rtvec (1, gr_reg
));
3817 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
3821 /* If there is a prototype, then FP values go in a FR register when
3822 named, and in a GR register when unnamed. */
3823 else if (cum
->prototype
)
3826 return gen_rtx_REG (mode
, FR_ARG_FIRST
+ cum
->fp_regs
);
3827 /* In big-endian mode, an anonymous SFmode value must be represented
3828 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
3829 the value into the high half of the general register. */
3830 else if (BYTES_BIG_ENDIAN
&& mode
== SFmode
)
3831 return gen_rtx_PARALLEL (mode
,
3833 gen_rtx_EXPR_LIST (VOIDmode
,
3834 gen_rtx_REG (DImode
, basereg
+ cum
->words
+ offset
),
3836 /* Similarly, an anonymous XFmode value must be split into two
3837 registers and padded appropriately. */
3838 else if (BYTES_BIG_ENDIAN
&& mode
== XFmode
)
3841 loc
[0] = gen_rtx_EXPR_LIST (VOIDmode
,
3842 gen_rtx_REG (DImode
, basereg
+ cum
->words
+ offset
),
3844 loc
[1] = gen_rtx_EXPR_LIST (VOIDmode
,
3845 gen_rtx_REG (DImode
, basereg
+ cum
->words
+ offset
+ 1),
3846 GEN_INT (UNITS_PER_WORD
));
3847 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (2, loc
));
3850 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
3852 /* If there is no prototype, then FP values go in both FR and GR
3856 /* See comment above. */
3857 enum machine_mode inner_mode
=
3858 (BYTES_BIG_ENDIAN
&& mode
== SFmode
) ? DImode
: mode
;
3860 rtx fp_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
3861 gen_rtx_REG (mode
, (FR_ARG_FIRST
3864 rtx gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
3865 gen_rtx_REG (inner_mode
,
3866 (basereg
+ cum
->words
3870 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, fp_reg
, gr_reg
));
3874 /* Return number of bytes, at the beginning of the argument, that must be
3875 put in registers. 0 is the argument is entirely in registers or entirely
3879 ia64_arg_partial_bytes (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3880 tree type
, bool named ATTRIBUTE_UNUSED
)
3882 int words
= ia64_function_arg_words (type
, mode
);
3883 int offset
= ia64_function_arg_offset (cum
, type
, words
);
3885 /* If all argument slots are used, then it must go on the stack. */
3886 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
3889 /* It doesn't matter whether the argument goes in FR or GR regs. If
3890 it fits within the 8 argument slots, then it goes entirely in
3891 registers. If it extends past the last argument slot, then the rest
3892 goes on the stack. */
3894 if (words
+ cum
->words
+ offset
<= MAX_ARGUMENT_SLOTS
)
3897 return (MAX_ARGUMENT_SLOTS
- cum
->words
- offset
) * UNITS_PER_WORD
;
3900 /* Update CUM to point after this argument. This is patterned after
3901 ia64_function_arg. */
3904 ia64_function_arg_advance (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3905 tree type
, int named
)
3907 int words
= ia64_function_arg_words (type
, mode
);
3908 int offset
= ia64_function_arg_offset (cum
, type
, words
);
3909 enum machine_mode hfa_mode
= VOIDmode
;
3911 /* If all arg slots are already full, then there is nothing to do. */
3912 if (cum
->words
>= MAX_ARGUMENT_SLOTS
)
3915 cum
->words
+= words
+ offset
;
3917 /* Check for and handle homogeneous FP aggregates. */
3919 hfa_mode
= hfa_element_mode (type
, 0);
3921 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3922 and unprototyped hfas are passed specially. */
3923 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
3925 int fp_regs
= cum
->fp_regs
;
3926 /* This is the original value of cum->words + offset. */
3927 int int_regs
= cum
->words
- words
;
3928 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
3932 /* If prototyped, pass it in FR regs then GR regs.
3933 If not prototyped, pass it in both FR and GR regs.
3935 If this is an SFmode aggregate, then it is possible to run out of
3936 FR regs while GR regs are still left. In that case, we pass the
3937 remaining part in the GR regs. */
3939 /* Fill the FP regs. We do this always. We stop if we reach the end
3940 of the argument, the last FP register, or the last argument slot. */
3942 byte_size
= ((mode
== BLKmode
)
3943 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3944 args_byte_size
= int_regs
* UNITS_PER_WORD
;
3946 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
3947 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
));)
3950 args_byte_size
+= hfa_size
;
3954 cum
->fp_regs
= fp_regs
;
3957 /* Integral and aggregates go in general registers. So do TFmode FP values.
3958 If we have run out of FR registers, then other FP values must also go in
3959 general registers. This can happen when we have a SFmode HFA. */
3960 else if (mode
== TFmode
|| mode
== TCmode
3961 || (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
))
3962 cum
->int_regs
= cum
->words
;
3964 /* If there is a prototype, then FP values go in a FR register when
3965 named, and in a GR register when unnamed. */
3966 else if (cum
->prototype
)
3969 cum
->int_regs
= cum
->words
;
3971 /* ??? Complex types should not reach here. */
3972 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
3974 /* If there is no prototype, then FP values go in both FR and GR
3978 /* ??? Complex types should not reach here. */
3979 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
3980 cum
->int_regs
= cum
->words
;
3984 /* Arguments with alignment larger than 8 bytes start at the next even
3985 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary
3986 even though their normal alignment is 8 bytes. See ia64_function_arg. */
3989 ia64_function_arg_boundary (enum machine_mode mode
, tree type
)
3992 if (mode
== TFmode
&& TARGET_HPUX
&& TARGET_ILP32
)
3993 return PARM_BOUNDARY
* 2;
3997 if (TYPE_ALIGN (type
) > PARM_BOUNDARY
)
3998 return PARM_BOUNDARY
* 2;
4000 return PARM_BOUNDARY
;
4003 if (GET_MODE_BITSIZE (mode
) > PARM_BOUNDARY
)
4004 return PARM_BOUNDARY
* 2;
4006 return PARM_BOUNDARY
;
4009 /* Variable sized types are passed by reference. */
4010 /* ??? At present this is a GCC extension to the IA-64 ABI. */
4013 ia64_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
4014 enum machine_mode mode ATTRIBUTE_UNUSED
,
4015 tree type
, bool named ATTRIBUTE_UNUSED
)
4017 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
4020 /* True if it is OK to do sibling call optimization for the specified
4021 call expression EXP. DECL will be the called function, or NULL if
4022 this is an indirect call. */
4024 ia64_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
4026 /* We can't perform a sibcall if the current function has the syscall_linkage
4028 if (lookup_attribute ("syscall_linkage",
4029 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
4032 /* We must always return with our current GP. This means we can
4033 only sibcall to functions defined in the current module. */
4034 return decl
&& (*targetm
.binds_local_p
) (decl
);
4038 /* Implement va_arg. */
4041 ia64_gimplify_va_arg (tree valist
, tree type
, tree
*pre_p
, tree
*post_p
)
4043 /* Variable sized types are passed by reference. */
4044 if (pass_by_reference (NULL
, TYPE_MODE (type
), type
, false))
4046 tree ptrtype
= build_pointer_type (type
);
4047 tree addr
= std_gimplify_va_arg_expr (valist
, ptrtype
, pre_p
, post_p
);
4048 return build_va_arg_indirect_ref (addr
);
4051 /* Aggregate arguments with alignment larger than 8 bytes start at
4052 the next even boundary. Integer and floating point arguments
4053 do so if they are larger than 8 bytes, whether or not they are
4054 also aligned larger than 8 bytes. */
4055 if ((TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == INTEGER_TYPE
)
4056 ? int_size_in_bytes (type
) > 8 : TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
4058 tree t
= build (PLUS_EXPR
, TREE_TYPE (valist
), valist
,
4059 build_int_cst (NULL_TREE
, 2 * UNITS_PER_WORD
- 1));
4060 t
= build (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
4061 build_int_cst (NULL_TREE
, -2 * UNITS_PER_WORD
));
4062 t
= build (MODIFY_EXPR
, TREE_TYPE (valist
), valist
, t
);
4063 gimplify_and_add (t
, pre_p
);
4066 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
4069 /* Return 1 if function return value returned in memory. Return 0 if it is
4073 ia64_return_in_memory (tree valtype
, tree fntype ATTRIBUTE_UNUSED
)
4075 enum machine_mode mode
;
4076 enum machine_mode hfa_mode
;
4077 HOST_WIDE_INT byte_size
;
4079 mode
= TYPE_MODE (valtype
);
4080 byte_size
= GET_MODE_SIZE (mode
);
4081 if (mode
== BLKmode
)
4083 byte_size
= int_size_in_bytes (valtype
);
4088 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
4090 hfa_mode
= hfa_element_mode (valtype
, 0);
4091 if (hfa_mode
!= VOIDmode
)
4093 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
4095 if (byte_size
/ hfa_size
> MAX_ARGUMENT_SLOTS
)
4100 else if (byte_size
> UNITS_PER_WORD
* MAX_INT_RETURN_SLOTS
)
4106 /* Return rtx for register that holds the function return value. */
4109 ia64_function_value (tree valtype
, tree func ATTRIBUTE_UNUSED
)
4111 enum machine_mode mode
;
4112 enum machine_mode hfa_mode
;
4114 mode
= TYPE_MODE (valtype
);
4115 hfa_mode
= hfa_element_mode (valtype
, 0);
4117 if (hfa_mode
!= VOIDmode
)
4125 hfa_size
= GET_MODE_SIZE (hfa_mode
);
4126 byte_size
= ((mode
== BLKmode
)
4127 ? int_size_in_bytes (valtype
) : GET_MODE_SIZE (mode
));
4129 for (i
= 0; offset
< byte_size
; i
++)
4131 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
4132 gen_rtx_REG (hfa_mode
, FR_ARG_FIRST
+ i
),
4136 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
4138 else if (FLOAT_TYPE_P (valtype
) && mode
!= TFmode
&& mode
!= TCmode
)
4139 return gen_rtx_REG (mode
, FR_ARG_FIRST
);
4142 bool need_parallel
= false;
4144 /* In big-endian mode, we need to manage the layout of aggregates
4145 in the registers so that we get the bits properly aligned in
4146 the highpart of the registers. */
4147 if (BYTES_BIG_ENDIAN
4148 && (mode
== BLKmode
|| (valtype
&& AGGREGATE_TYPE_P (valtype
))))
4149 need_parallel
= true;
4151 /* Something like struct S { long double x; char a[0] } is not an
4152 HFA structure, and therefore doesn't go in fp registers. But
4153 the middle-end will give it XFmode anyway, and XFmode values
4154 don't normally fit in integer registers. So we need to smuggle
4155 the value inside a parallel. */
4156 else if (mode
== XFmode
|| mode
== XCmode
)
4157 need_parallel
= true;
4167 bytesize
= int_size_in_bytes (valtype
);
4168 /* An empty PARALLEL is invalid here, but the return value
4169 doesn't matter for empty structs. */
4171 return gen_rtx_REG (mode
, GR_RET_FIRST
);
4172 for (i
= 0; offset
< bytesize
; i
++)
4174 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
4175 gen_rtx_REG (DImode
,
4178 offset
+= UNITS_PER_WORD
;
4180 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
4183 return gen_rtx_REG (mode
, GR_RET_FIRST
);
4187 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
4188 We need to emit DTP-relative relocations. */
4191 ia64_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
4193 gcc_assert (size
== 8);
4194 fputs ("\tdata8.ua\t@dtprel(", file
);
4195 output_addr_const (file
, x
);
4199 /* Print a memory address as an operand to reference that memory location. */
4201 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
4202 also call this from ia64_print_operand for memory addresses. */
4205 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED
,
4206 rtx address ATTRIBUTE_UNUSED
)
4210 /* Print an operand to an assembler instruction.
4211 C Swap and print a comparison operator.
4212 D Print an FP comparison operator.
4213 E Print 32 - constant, for SImode shifts as extract.
4214 e Print 64 - constant, for DImode rotates.
4215 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
4216 a floating point register emitted normally.
4217 I Invert a predicate register by adding 1.
4218 J Select the proper predicate register for a condition.
4219 j Select the inverse predicate register for a condition.
4220 O Append .acq for volatile load.
4221 P Postincrement of a MEM.
4222 Q Append .rel for volatile store.
4223 S Shift amount for shladd instruction.
4224 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
4225 for Intel assembler.
4226 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
4227 for Intel assembler.
4228 r Print register name, or constant 0 as r0. HP compatibility for
4230 v Print vector constant value as an 8-byte integer value. */
4233 ia64_print_operand (FILE * file
, rtx x
, int code
)
4240 /* Handled below. */
4245 enum rtx_code c
= swap_condition (GET_CODE (x
));
4246 fputs (GET_RTX_NAME (c
), file
);
4251 switch (GET_CODE (x
))
4263 str
= GET_RTX_NAME (GET_CODE (x
));
4270 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 32 - INTVAL (x
));
4274 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 64 - INTVAL (x
));
4278 if (x
== CONST0_RTX (GET_MODE (x
)))
4279 str
= reg_names
[FR_REG (0)];
4280 else if (x
== CONST1_RTX (GET_MODE (x
)))
4281 str
= reg_names
[FR_REG (1)];
4284 gcc_assert (GET_CODE (x
) == REG
);
4285 str
= reg_names
[REGNO (x
)];
4291 fputs (reg_names
[REGNO (x
) + 1], file
);
4297 unsigned int regno
= REGNO (XEXP (x
, 0));
4298 if (GET_CODE (x
) == EQ
)
4302 fputs (reg_names
[regno
], file
);
4307 if (MEM_VOLATILE_P (x
))
4308 fputs(".acq", file
);
4313 HOST_WIDE_INT value
;
4315 switch (GET_CODE (XEXP (x
, 0)))
4321 x
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
4322 if (GET_CODE (x
) == CONST_INT
)
4326 gcc_assert (GET_CODE (x
) == REG
);
4327 fprintf (file
, ", %s", reg_names
[REGNO (x
)]);
4333 value
= GET_MODE_SIZE (GET_MODE (x
));
4337 value
= - (HOST_WIDE_INT
) GET_MODE_SIZE (GET_MODE (x
));
4341 fprintf (file
, ", " HOST_WIDE_INT_PRINT_DEC
, value
);
4346 if (MEM_VOLATILE_P (x
))
4347 fputs(".rel", file
);
4351 fprintf (file
, "%d", exact_log2 (INTVAL (x
)));
4355 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
4357 fprintf (file
, "0x%x", (int) INTVAL (x
) & 0xffffffff);
4363 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
4365 const char *prefix
= "0x";
4366 if (INTVAL (x
) & 0x80000000)
4368 fprintf (file
, "0xffffffff");
4371 fprintf (file
, "%s%x", prefix
, (int) INTVAL (x
) & 0xffffffff);
4377 /* If this operand is the constant zero, write it as register zero.
4378 Any register, zero, or CONST_INT value is OK here. */
4379 if (GET_CODE (x
) == REG
)
4380 fputs (reg_names
[REGNO (x
)], file
);
4381 else if (x
== CONST0_RTX (GET_MODE (x
)))
4383 else if (GET_CODE (x
) == CONST_INT
)
4384 output_addr_const (file
, x
);
4386 output_operand_lossage ("invalid %%r value");
4390 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
4391 x
= simplify_subreg (DImode
, x
, GET_MODE (x
), 0);
4398 /* For conditional branches, returns or calls, substitute
4399 sptk, dptk, dpnt, or spnt for %s. */
4400 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
4403 int pred_val
= INTVAL (XEXP (x
, 0));
4405 /* Guess top and bottom 10% statically predicted. */
4406 if (pred_val
< REG_BR_PROB_BASE
/ 50)
4408 else if (pred_val
< REG_BR_PROB_BASE
/ 2)
4410 else if (pred_val
< REG_BR_PROB_BASE
/ 100 * 98)
4415 else if (GET_CODE (current_output_insn
) == CALL_INSN
)
4420 fputs (which
, file
);
4425 x
= current_insn_predicate
;
4428 unsigned int regno
= REGNO (XEXP (x
, 0));
4429 if (GET_CODE (x
) == EQ
)
4431 fprintf (file
, "(%s) ", reg_names
[regno
]);
4436 output_operand_lossage ("ia64_print_operand: unknown code");
4440 switch (GET_CODE (x
))
4442 /* This happens for the spill/restore instructions. */
4447 /* ... fall through ... */
4450 fputs (reg_names
[REGNO (x
)], file
);
4455 rtx addr
= XEXP (x
, 0);
4456 if (GET_RTX_CLASS (GET_CODE (addr
)) == RTX_AUTOINC
)
4457 addr
= XEXP (addr
, 0);
4458 fprintf (file
, "[%s]", reg_names
[REGNO (addr
)]);
4463 output_addr_const (file
, x
);
4470 /* Compute a (partial) cost for rtx X. Return true if the complete
4471 cost has been computed, and false if subexpressions should be
4472 scanned. In either case, *TOTAL contains the cost result. */
4473 /* ??? This is incomplete. */
4476 ia64_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
4484 *total
= CONST_OK_FOR_J (INTVAL (x
)) ? 0 : COSTS_N_INSNS (1);
4487 if (CONST_OK_FOR_I (INTVAL (x
)))
4489 else if (CONST_OK_FOR_J (INTVAL (x
)))
4492 *total
= COSTS_N_INSNS (1);
4495 if (CONST_OK_FOR_K (INTVAL (x
)) || CONST_OK_FOR_L (INTVAL (x
)))
4498 *total
= COSTS_N_INSNS (1);
4503 *total
= COSTS_N_INSNS (1);
4509 *total
= COSTS_N_INSNS (3);
4513 /* For multiplies wider than HImode, we have to go to the FPU,
4514 which normally involves copies. Plus there's the latency
4515 of the multiply itself, and the latency of the instructions to
4516 transfer integer regs to FP regs. */
4517 /* ??? Check for FP mode. */
4518 if (GET_MODE_SIZE (GET_MODE (x
)) > 2)
4519 *total
= COSTS_N_INSNS (10);
4521 *total
= COSTS_N_INSNS (2);
4529 *total
= COSTS_N_INSNS (1);
4536 /* We make divide expensive, so that divide-by-constant will be
4537 optimized to a multiply. */
4538 *total
= COSTS_N_INSNS (60);
4546 /* Calculate the cost of moving data from a register in class FROM to
4547 one in class TO, using MODE. */
4550 ia64_register_move_cost (enum machine_mode mode
, enum reg_class from
,
4553 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
4554 if (to
== ADDL_REGS
)
4556 if (from
== ADDL_REGS
)
4559 /* All costs are symmetric, so reduce cases by putting the
4560 lower number class as the destination. */
4563 enum reg_class tmp
= to
;
4564 to
= from
, from
= tmp
;
4567 /* Moving from FR<->GR in XFmode must be more expensive than 2,
4568 so that we get secondary memory reloads. Between FR_REGS,
4569 we have to make this at least as expensive as MEMORY_MOVE_COST
4570 to avoid spectacularly poor register class preferencing. */
4573 if (to
!= GR_REGS
|| from
!= GR_REGS
)
4574 return MEMORY_MOVE_COST (mode
, to
, 0);
4582 /* Moving between PR registers takes two insns. */
4583 if (from
== PR_REGS
)
4585 /* Moving between PR and anything but GR is impossible. */
4586 if (from
!= GR_REGS
)
4587 return MEMORY_MOVE_COST (mode
, to
, 0);
4591 /* Moving between BR and anything but GR is impossible. */
4592 if (from
!= GR_REGS
&& from
!= GR_AND_BR_REGS
)
4593 return MEMORY_MOVE_COST (mode
, to
, 0);
4598 /* Moving between AR and anything but GR is impossible. */
4599 if (from
!= GR_REGS
)
4600 return MEMORY_MOVE_COST (mode
, to
, 0);
4605 case GR_AND_FR_REGS
:
4606 case GR_AND_BR_REGS
:
4617 /* Implement PREFERRED_RELOAD_CLASS. Place additional restrictions on CLASS
4618 to use when copying X into that class. */
4621 ia64_preferred_reload_class (rtx x
, enum reg_class
class)
4626 /* Don't allow volatile mem reloads into floating point registers.
4627 This is defined to force reload to choose the r/m case instead
4628 of the f/f case when reloading (set (reg fX) (mem/v)). */
4629 if (MEM_P (x
) && MEM_VOLATILE_P (x
))
4632 /* Force all unrecognized constants into the constant pool. */
4650 /* This function returns the register class required for a secondary
4651 register when copying between one of the registers in CLASS, and X,
4652 using MODE. A return value of NO_REGS means that no secondary register
4656 ia64_secondary_reload_class (enum reg_class
class,
4657 enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
4661 if (GET_CODE (x
) == REG
|| GET_CODE (x
) == SUBREG
)
4662 regno
= true_regnum (x
);
4669 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
4670 interaction. We end up with two pseudos with overlapping lifetimes
4671 both of which are equiv to the same constant, and both which need
4672 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
4673 changes depending on the path length, which means the qty_first_reg
4674 check in make_regs_eqv can give different answers at different times.
4675 At some point I'll probably need a reload_indi pattern to handle
4678 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
4679 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
4680 non-general registers for good measure. */
4681 if (regno
>= 0 && ! GENERAL_REGNO_P (regno
))
4684 /* This is needed if a pseudo used as a call_operand gets spilled to a
4686 if (GET_CODE (x
) == MEM
)
4691 /* Need to go through general registers to get to other class regs. */
4692 if (regno
>= 0 && ! (FR_REGNO_P (regno
) || GENERAL_REGNO_P (regno
)))
4695 /* This can happen when a paradoxical subreg is an operand to the
4697 /* ??? This shouldn't be necessary after instruction scheduling is
4698 enabled, because paradoxical subregs are not accepted by
4699 register_operand when INSN_SCHEDULING is defined. Or alternatively,
4700 stop the paradoxical subreg stupidity in the *_operand functions
4702 if (GET_CODE (x
) == MEM
4703 && (GET_MODE (x
) == SImode
|| GET_MODE (x
) == HImode
4704 || GET_MODE (x
) == QImode
))
4707 /* This can happen because of the ior/and/etc patterns that accept FP
4708 registers as operands. If the third operand is a constant, then it
4709 needs to be reloaded into a FP register. */
4710 if (GET_CODE (x
) == CONST_INT
)
4713 /* This can happen because of register elimination in a muldi3 insn.
4714 E.g. `26107 * (unsigned long)&u'. */
4715 if (GET_CODE (x
) == PLUS
)
4720 /* ??? This happens if we cse/gcse a BImode value across a call,
4721 and the function has a nonlocal goto. This is because global
4722 does not allocate call crossing pseudos to hard registers when
4723 current_function_has_nonlocal_goto is true. This is relatively
4724 common for C++ programs that use exceptions. To reproduce,
4725 return NO_REGS and compile libstdc++. */
4726 if (GET_CODE (x
) == MEM
)
4729 /* This can happen when we take a BImode subreg of a DImode value,
4730 and that DImode value winds up in some non-GR register. */
4731 if (regno
>= 0 && ! GENERAL_REGNO_P (regno
) && ! PR_REGNO_P (regno
))
4743 /* Emit text to declare externally defined variables and functions, because
4744 the Intel assembler does not support undefined externals. */
4747 ia64_asm_output_external (FILE *file
, tree decl
, const char *name
)
4749 int save_referenced
;
4751 /* GNU as does not need anything here, but the HP linker does need
4752 something for external functions. */
4756 || TREE_CODE (decl
) != FUNCTION_DECL
4757 || strstr (name
, "__builtin_") == name
))
4760 /* ??? The Intel assembler creates a reference that needs to be satisfied by
4761 the linker when we do this, so we need to be careful not to do this for
4762 builtin functions which have no library equivalent. Unfortunately, we
4763 can't tell here whether or not a function will actually be called by
4764 expand_expr, so we pull in library functions even if we may not need
4766 if (! strcmp (name
, "__builtin_next_arg")
4767 || ! strcmp (name
, "alloca")
4768 || ! strcmp (name
, "__builtin_constant_p")
4769 || ! strcmp (name
, "__builtin_args_info"))
4773 ia64_hpux_add_extern_decl (decl
);
4776 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
4778 save_referenced
= TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl
));
4779 if (TREE_CODE (decl
) == FUNCTION_DECL
)
4780 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
4781 (*targetm
.asm_out
.globalize_label
) (file
, name
);
4782 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl
)) = save_referenced
;
4786 /* Parse the -mfixed-range= option string. */
4789 fix_range (const char *const_str
)
4792 char *str
, *dash
, *comma
;
4794 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4795 REG2 are either register names or register numbers. The effect
4796 of this option is to mark the registers in the range from REG1 to
4797 REG2 as ``fixed'' so they won't be used by the compiler. This is
4798 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
4800 i
= strlen (const_str
);
4801 str
= (char *) alloca (i
+ 1);
4802 memcpy (str
, const_str
, i
+ 1);
4806 dash
= strchr (str
, '-');
4809 warning (0, "value of -mfixed-range must have form REG1-REG2");
4814 comma
= strchr (dash
+ 1, ',');
4818 first
= decode_reg_name (str
);
4821 warning (0, "unknown register name: %s", str
);
4825 last
= decode_reg_name (dash
+ 1);
4828 warning (0, "unknown register name: %s", dash
+ 1);
4836 warning (0, "%s-%s is an empty range", str
, dash
+ 1);
4840 for (i
= first
; i
<= last
; ++i
)
4841 fixed_regs
[i
] = call_used_regs
[i
] = 1;
4851 /* Implement TARGET_HANDLE_OPTION. */
4854 ia64_handle_option (size_t code
, const char *arg
, int value
)
4858 case OPT_mfixed_range_
:
4862 case OPT_mtls_size_
:
4863 if (value
!= 14 && value
!= 22 && value
!= 64)
4864 error ("bad value %<%s%> for -mtls-size= switch", arg
);
4871 const char *name
; /* processor name or nickname. */
4872 enum processor_type processor
;
4874 const processor_alias_table
[] =
4876 {"itanium", PROCESSOR_ITANIUM
},
4877 {"itanium1", PROCESSOR_ITANIUM
},
4878 {"merced", PROCESSOR_ITANIUM
},
4879 {"itanium2", PROCESSOR_ITANIUM2
},
4880 {"mckinley", PROCESSOR_ITANIUM2
},
4882 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
4885 for (i
= 0; i
< pta_size
; i
++)
4886 if (!strcmp (arg
, processor_alias_table
[i
].name
))
4888 ia64_tune
= processor_alias_table
[i
].processor
;
4892 error ("bad value %<%s%> for -mtune= switch", arg
);
4901 /* Implement OVERRIDE_OPTIONS. */
4904 ia64_override_options (void)
4906 if (TARGET_AUTO_PIC
)
4907 target_flags
|= MASK_CONST_GP
;
4909 if (TARGET_INLINE_SQRT
== INL_MIN_LAT
)
4911 warning (0, "not yet implemented: latency-optimized inline square root");
4912 TARGET_INLINE_SQRT
= INL_MAX_THR
;
4915 ia64_flag_schedule_insns2
= flag_schedule_insns_after_reload
;
4916 flag_schedule_insns_after_reload
= 0;
4918 ia64_section_threshold
= g_switch_set
? g_switch_value
: IA64_DEFAULT_GVALUE
;
4920 init_machine_status
= ia64_init_machine_status
;
4923 static struct machine_function
*
4924 ia64_init_machine_status (void)
4926 return ggc_alloc_cleared (sizeof (struct machine_function
));
4929 static enum attr_itanium_class
ia64_safe_itanium_class (rtx
);
4930 static enum attr_type
ia64_safe_type (rtx
);
4932 static enum attr_itanium_class
4933 ia64_safe_itanium_class (rtx insn
)
4935 if (recog_memoized (insn
) >= 0)
4936 return get_attr_itanium_class (insn
);
4938 return ITANIUM_CLASS_UNKNOWN
;
4941 static enum attr_type
4942 ia64_safe_type (rtx insn
)
4944 if (recog_memoized (insn
) >= 0)
4945 return get_attr_type (insn
);
4947 return TYPE_UNKNOWN
;
4950 /* The following collection of routines emit instruction group stop bits as
4951 necessary to avoid dependencies. */
4953 /* Need to track some additional registers as far as serialization is
4954 concerned so we can properly handle br.call and br.ret. We could
4955 make these registers visible to gcc, but since these registers are
4956 never explicitly used in gcc generated code, it seems wasteful to
4957 do so (plus it would make the call and return patterns needlessly
4959 #define REG_RP (BR_REG (0))
4960 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
4961 /* This is used for volatile asms which may require a stop bit immediately
4962 before and after them. */
4963 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
4964 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
4965 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
4967 /* For each register, we keep track of how it has been written in the
4968 current instruction group.
4970 If a register is written unconditionally (no qualifying predicate),
4971 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4973 If a register is written if its qualifying predicate P is true, we
4974 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
4975 may be written again by the complement of P (P^1) and when this happens,
4976 WRITE_COUNT gets set to 2.
4978 The result of this is that whenever an insn attempts to write a register
4979 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
4981 If a predicate register is written by a floating-point insn, we set
4982 WRITTEN_BY_FP to true.
4984 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4985 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
4987 struct reg_write_state
4989 unsigned int write_count
: 2;
4990 unsigned int first_pred
: 16;
4991 unsigned int written_by_fp
: 1;
4992 unsigned int written_by_and
: 1;
4993 unsigned int written_by_or
: 1;
4996 /* Cumulative info for the current instruction group. */
4997 struct reg_write_state rws_sum
[NUM_REGS
];
4998 /* Info for the current instruction. This gets copied to rws_sum after a
4999 stop bit is emitted. */
5000 struct reg_write_state rws_insn
[NUM_REGS
];
5002 /* Indicates whether this is the first instruction after a stop bit,
5003 in which case we don't need another stop bit. Without this,
5004 ia64_variable_issue will die when scheduling an alloc. */
5005 static int first_instruction
;
5007 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
5008 RTL for one instruction. */
5011 unsigned int is_write
: 1; /* Is register being written? */
5012 unsigned int is_fp
: 1; /* Is register used as part of an fp op? */
5013 unsigned int is_branch
: 1; /* Is register used as part of a branch? */
5014 unsigned int is_and
: 1; /* Is register used as part of and.orcm? */
5015 unsigned int is_or
: 1; /* Is register used as part of or.andcm? */
5016 unsigned int is_sibcall
: 1; /* Is this a sibling or normal call? */
5019 static void rws_update (struct reg_write_state
*, int, struct reg_flags
, int);
5020 static int rws_access_regno (int, struct reg_flags
, int);
5021 static int rws_access_reg (rtx
, struct reg_flags
, int);
5022 static void update_set_flags (rtx
, struct reg_flags
*);
5023 static int set_src_needs_barrier (rtx
, struct reg_flags
, int);
5024 static int rtx_needs_barrier (rtx
, struct reg_flags
, int);
5025 static void init_insn_group_barriers (void);
5026 static int group_barrier_needed (rtx
);
5027 static int safe_group_barrier_needed (rtx
);
5029 /* Update *RWS for REGNO, which is being written by the current instruction,
5030 with predicate PRED, and associated register flags in FLAGS. */
5033 rws_update (struct reg_write_state
*rws
, int regno
, struct reg_flags flags
, int pred
)
5036 rws
[regno
].write_count
++;
5038 rws
[regno
].write_count
= 2;
5039 rws
[regno
].written_by_fp
|= flags
.is_fp
;
5040 /* ??? Not tracking and/or across differing predicates. */
5041 rws
[regno
].written_by_and
= flags
.is_and
;
5042 rws
[regno
].written_by_or
= flags
.is_or
;
5043 rws
[regno
].first_pred
= pred
;
5046 /* Handle an access to register REGNO of type FLAGS using predicate register
5047 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
5048 a dependency with an earlier instruction in the same group. */
5051 rws_access_regno (int regno
, struct reg_flags flags
, int pred
)
5053 int need_barrier
= 0;
5055 gcc_assert (regno
< NUM_REGS
);
5057 if (! PR_REGNO_P (regno
))
5058 flags
.is_and
= flags
.is_or
= 0;
5064 /* One insn writes same reg multiple times? */
5065 gcc_assert (!rws_insn
[regno
].write_count
);
5067 /* Update info for current instruction. */
5068 rws_update (rws_insn
, regno
, flags
, pred
);
5069 write_count
= rws_sum
[regno
].write_count
;
5071 switch (write_count
)
5074 /* The register has not been written yet. */
5075 rws_update (rws_sum
, regno
, flags
, pred
);
5079 /* The register has been written via a predicate. If this is
5080 not a complementary predicate, then we need a barrier. */
5081 /* ??? This assumes that P and P+1 are always complementary
5082 predicates for P even. */
5083 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
5085 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
5087 else if ((rws_sum
[regno
].first_pred
^ 1) != pred
)
5089 rws_update (rws_sum
, regno
, flags
, pred
);
5093 /* The register has been unconditionally written already. We
5095 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
5097 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
5101 rws_sum
[regno
].written_by_and
= flags
.is_and
;
5102 rws_sum
[regno
].written_by_or
= flags
.is_or
;
5111 if (flags
.is_branch
)
5113 /* Branches have several RAW exceptions that allow to avoid
5116 if (REGNO_REG_CLASS (regno
) == BR_REGS
|| regno
== AR_PFS_REGNUM
)
5117 /* RAW dependencies on branch regs are permissible as long
5118 as the writer is a non-branch instruction. Since we
5119 never generate code that uses a branch register written
5120 by a branch instruction, handling this case is
5124 if (REGNO_REG_CLASS (regno
) == PR_REGS
5125 && ! rws_sum
[regno
].written_by_fp
)
5126 /* The predicates of a branch are available within the
5127 same insn group as long as the predicate was written by
5128 something other than a floating-point instruction. */
5132 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
5134 if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
5137 switch (rws_sum
[regno
].write_count
)
5140 /* The register has not been written yet. */
5144 /* The register has been written via a predicate. If this is
5145 not a complementary predicate, then we need a barrier. */
5146 /* ??? This assumes that P and P+1 are always complementary
5147 predicates for P even. */
5148 if ((rws_sum
[regno
].first_pred
^ 1) != pred
)
5153 /* The register has been unconditionally written already. We
5163 return need_barrier
;
5167 rws_access_reg (rtx reg
, struct reg_flags flags
, int pred
)
5169 int regno
= REGNO (reg
);
5170 int n
= HARD_REGNO_NREGS (REGNO (reg
), GET_MODE (reg
));
5173 return rws_access_regno (regno
, flags
, pred
);
5176 int need_barrier
= 0;
5178 need_barrier
|= rws_access_regno (regno
+ n
, flags
, pred
);
5179 return need_barrier
;
5183 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
5184 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
5187 update_set_flags (rtx x
, struct reg_flags
*pflags
)
5189 rtx src
= SET_SRC (x
);
5191 switch (GET_CODE (src
))
5197 /* There are three cases here:
5198 (1) The destination is (pc), in which case this is a branch,
5199 nothing here applies.
5200 (2) The destination is ar.lc, in which case this is a
5201 doloop_end_internal,
5202 (3) The destination is an fp register, in which case this is
5203 an fselect instruction.
5204 In all cases, nothing we do in this function applies. */
5208 if (COMPARISON_P (src
)
5209 && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src
, 0))))
5210 /* Set pflags->is_fp to 1 so that we know we're dealing
5211 with a floating point comparison when processing the
5212 destination of the SET. */
5215 /* Discover if this is a parallel comparison. We only handle
5216 and.orcm and or.andcm at present, since we must retain a
5217 strict inverse on the predicate pair. */
5218 else if (GET_CODE (src
) == AND
)
5220 else if (GET_CODE (src
) == IOR
)
5227 /* Subroutine of rtx_needs_barrier; this function determines whether the
5228 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
5229 are as in rtx_needs_barrier. COND is an rtx that holds the condition
5233 set_src_needs_barrier (rtx x
, struct reg_flags flags
, int pred
)
5235 int need_barrier
= 0;
5237 rtx src
= SET_SRC (x
);
5239 if (GET_CODE (src
) == CALL
)
5240 /* We don't need to worry about the result registers that
5241 get written by subroutine call. */
5242 return rtx_needs_barrier (src
, flags
, pred
);
5243 else if (SET_DEST (x
) == pc_rtx
)
5245 /* X is a conditional branch. */
5246 /* ??? This seems redundant, as the caller sets this bit for
5248 flags
.is_branch
= 1;
5249 return rtx_needs_barrier (src
, flags
, pred
);
5252 need_barrier
= rtx_needs_barrier (src
, flags
, pred
);
5255 if (GET_CODE (dst
) == ZERO_EXTRACT
)
5257 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 1), flags
, pred
);
5258 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 2), flags
, pred
);
5259 dst
= XEXP (dst
, 0);
5261 return need_barrier
;
5264 /* Handle an access to rtx X of type FLAGS using predicate register
5265 PRED. Return 1 if this access creates a dependency with an earlier
5266 instruction in the same group. */
5269 rtx_needs_barrier (rtx x
, struct reg_flags flags
, int pred
)
5272 int is_complemented
= 0;
5273 int need_barrier
= 0;
5274 const char *format_ptr
;
5275 struct reg_flags new_flags
;
5283 switch (GET_CODE (x
))
5286 update_set_flags (x
, &new_flags
);
5287 need_barrier
= set_src_needs_barrier (x
, new_flags
, pred
);
5288 if (GET_CODE (SET_SRC (x
)) != CALL
)
5290 new_flags
.is_write
= 1;
5291 need_barrier
|= rtx_needs_barrier (SET_DEST (x
), new_flags
, pred
);
5296 new_flags
.is_write
= 0;
5297 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
5299 /* Avoid multiple register writes, in case this is a pattern with
5300 multiple CALL rtx. This avoids a failure in rws_access_reg. */
5301 if (! flags
.is_sibcall
&& ! rws_insn
[REG_AR_CFM
].write_count
)
5303 new_flags
.is_write
= 1;
5304 need_barrier
|= rws_access_regno (REG_RP
, new_flags
, pred
);
5305 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, new_flags
, pred
);
5306 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
5311 /* X is a predicated instruction. */
5313 cond
= COND_EXEC_TEST (x
);
5315 need_barrier
= rtx_needs_barrier (cond
, flags
, 0);
5317 if (GET_CODE (cond
) == EQ
)
5318 is_complemented
= 1;
5319 cond
= XEXP (cond
, 0);
5320 gcc_assert (GET_CODE (cond
) == REG
5321 && REGNO_REG_CLASS (REGNO (cond
)) == PR_REGS
);
5322 pred
= REGNO (cond
);
5323 if (is_complemented
)
5326 need_barrier
|= rtx_needs_barrier (COND_EXEC_CODE (x
), flags
, pred
);
5327 return need_barrier
;
5331 /* Clobber & use are for earlier compiler-phases only. */
5336 /* We always emit stop bits for traditional asms. We emit stop bits
5337 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
5338 if (GET_CODE (x
) != ASM_OPERANDS
5339 || (MEM_VOLATILE_P (x
) && TARGET_VOL_ASM_STOP
))
5341 /* Avoid writing the register multiple times if we have multiple
5342 asm outputs. This avoids a failure in rws_access_reg. */
5343 if (! rws_insn
[REG_VOLATILE
].write_count
)
5345 new_flags
.is_write
= 1;
5346 rws_access_regno (REG_VOLATILE
, new_flags
, pred
);
5351 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
5352 We cannot just fall through here since then we would be confused
5353 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
5354 traditional asms unlike their normal usage. */
5356 for (i
= ASM_OPERANDS_INPUT_LENGTH (x
) - 1; i
>= 0; --i
)
5357 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x
, i
), flags
, pred
))
5362 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
5364 rtx pat
= XVECEXP (x
, 0, i
);
5365 switch (GET_CODE (pat
))
5368 update_set_flags (pat
, &new_flags
);
5369 need_barrier
|= set_src_needs_barrier (pat
, new_flags
, pred
);
5375 need_barrier
|= rtx_needs_barrier (pat
, flags
, pred
);
5386 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
5388 rtx pat
= XVECEXP (x
, 0, i
);
5389 if (GET_CODE (pat
) == SET
)
5391 if (GET_CODE (SET_SRC (pat
)) != CALL
)
5393 new_flags
.is_write
= 1;
5394 need_barrier
|= rtx_needs_barrier (SET_DEST (pat
), new_flags
,
5398 else if (GET_CODE (pat
) == CLOBBER
|| GET_CODE (pat
) == RETURN
)
5399 need_barrier
|= rtx_needs_barrier (pat
, flags
, pred
);
5404 need_barrier
|= rtx_needs_barrier (SUBREG_REG (x
), flags
, pred
);
5407 if (REGNO (x
) == AR_UNAT_REGNUM
)
5409 for (i
= 0; i
< 64; ++i
)
5410 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ i
, flags
, pred
);
5413 need_barrier
= rws_access_reg (x
, flags
, pred
);
5417 /* Find the regs used in memory address computation. */
5418 new_flags
.is_write
= 0;
5419 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
5422 case CONST_INT
: case CONST_DOUBLE
: case CONST_VECTOR
:
5423 case SYMBOL_REF
: case LABEL_REF
: case CONST
:
5426 /* Operators with side-effects. */
5427 case POST_INC
: case POST_DEC
:
5428 gcc_assert (GET_CODE (XEXP (x
, 0)) == REG
);
5430 new_flags
.is_write
= 0;
5431 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
5432 new_flags
.is_write
= 1;
5433 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
5437 gcc_assert (GET_CODE (XEXP (x
, 0)) == REG
);
5439 new_flags
.is_write
= 0;
5440 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
5441 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
5442 new_flags
.is_write
= 1;
5443 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
5446 /* Handle common unary and binary ops for efficiency. */
5447 case COMPARE
: case PLUS
: case MINUS
: case MULT
: case DIV
:
5448 case MOD
: case UDIV
: case UMOD
: case AND
: case IOR
:
5449 case XOR
: case ASHIFT
: case ROTATE
: case ASHIFTRT
: case LSHIFTRT
:
5450 case ROTATERT
: case SMIN
: case SMAX
: case UMIN
: case UMAX
:
5451 case NE
: case EQ
: case GE
: case GT
: case LE
:
5452 case LT
: case GEU
: case GTU
: case LEU
: case LTU
:
5453 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
5454 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
5457 case NEG
: case NOT
: case SIGN_EXTEND
: case ZERO_EXTEND
:
5458 case TRUNCATE
: case FLOAT_EXTEND
: case FLOAT_TRUNCATE
: case FLOAT
:
5459 case FIX
: case UNSIGNED_FLOAT
: case UNSIGNED_FIX
: case ABS
:
5460 case SQRT
: case FFS
: case POPCOUNT
:
5461 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), flags
, pred
);
5465 /* VEC_SELECT's second argument is a PARALLEL with integers that
5466 describe the elements selected. On ia64, those integers are
5467 always constants. Avoid walking the PARALLEL so that we don't
5468 get confused with "normal" parallels and then die. */
5469 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), flags
, pred
);
5473 switch (XINT (x
, 1))
5475 case UNSPEC_LTOFF_DTPMOD
:
5476 case UNSPEC_LTOFF_DTPREL
:
5478 case UNSPEC_LTOFF_TPREL
:
5480 case UNSPEC_PRED_REL_MUTEX
:
5481 case UNSPEC_PIC_CALL
:
5483 case UNSPEC_FETCHADD_ACQ
:
5484 case UNSPEC_BSP_VALUE
:
5485 case UNSPEC_FLUSHRS
:
5486 case UNSPEC_BUNDLE_SELECTOR
:
5489 case UNSPEC_GR_SPILL
:
5490 case UNSPEC_GR_RESTORE
:
5492 HOST_WIDE_INT offset
= INTVAL (XVECEXP (x
, 0, 1));
5493 HOST_WIDE_INT bit
= (offset
>> 3) & 63;
5495 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
5496 new_flags
.is_write
= (XINT (x
, 1) == UNSPEC_GR_SPILL
);
5497 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ bit
,
5502 case UNSPEC_FR_SPILL
:
5503 case UNSPEC_FR_RESTORE
:
5504 case UNSPEC_GETF_EXP
:
5505 case UNSPEC_SETF_EXP
:
5507 case UNSPEC_FR_SQRT_RECIP_APPROX
:
5508 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
5511 case UNSPEC_FR_RECIP_APPROX
:
5513 case UNSPEC_COPYSIGN
:
5514 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
5515 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
5518 case UNSPEC_CMPXCHG_ACQ
:
5519 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
5520 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 2), flags
, pred
);
5528 case UNSPEC_VOLATILE
:
5529 switch (XINT (x
, 1))
5532 /* Alloc must always be the first instruction of a group.
5533 We force this by always returning true. */
5534 /* ??? We might get better scheduling if we explicitly check for
5535 input/local/output register dependencies, and modify the
5536 scheduler so that alloc is always reordered to the start of
5537 the current group. We could then eliminate all of the
5538 first_instruction code. */
5539 rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
5541 new_flags
.is_write
= 1;
5542 rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
5545 case UNSPECV_SET_BSP
:
5549 case UNSPECV_BLOCKAGE
:
5550 case UNSPECV_INSN_GROUP_BARRIER
:
5552 case UNSPECV_PSAC_ALL
:
5553 case UNSPECV_PSAC_NORMAL
:
5562 new_flags
.is_write
= 0;
5563 need_barrier
= rws_access_regno (REG_RP
, flags
, pred
);
5564 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
5566 new_flags
.is_write
= 1;
5567 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
5568 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
5572 format_ptr
= GET_RTX_FORMAT (GET_CODE (x
));
5573 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
5574 switch (format_ptr
[i
])
5576 case '0': /* unused field */
5577 case 'i': /* integer */
5578 case 'n': /* note */
5579 case 'w': /* wide integer */
5580 case 's': /* pointer to string */
5581 case 'S': /* optional pointer to string */
5585 if (rtx_needs_barrier (XEXP (x
, i
), flags
, pred
))
5590 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; --j
)
5591 if (rtx_needs_barrier (XVECEXP (x
, i
, j
), flags
, pred
))
5600 return need_barrier
;
5603 /* Clear out the state for group_barrier_needed at the start of a
5604 sequence of insns. */
5607 init_insn_group_barriers (void)
5609 memset (rws_sum
, 0, sizeof (rws_sum
));
5610 first_instruction
= 1;
5613 /* Given the current state, determine whether a group barrier (a stop bit) is
5614 necessary before INSN. Return nonzero if so. This modifies the state to
5615 include the effects of INSN as a side-effect. */
5618 group_barrier_needed (rtx insn
)
5621 int need_barrier
= 0;
5622 struct reg_flags flags
;
5624 memset (&flags
, 0, sizeof (flags
));
5625 switch (GET_CODE (insn
))
5631 /* A barrier doesn't imply an instruction group boundary. */
5635 memset (rws_insn
, 0, sizeof (rws_insn
));
5639 flags
.is_branch
= 1;
5640 flags
.is_sibcall
= SIBLING_CALL_P (insn
);
5641 memset (rws_insn
, 0, sizeof (rws_insn
));
5643 /* Don't bundle a call following another call. */
5644 if ((pat
= prev_active_insn (insn
))
5645 && GET_CODE (pat
) == CALL_INSN
)
5651 need_barrier
= rtx_needs_barrier (PATTERN (insn
), flags
, 0);
5655 flags
.is_branch
= 1;
5657 /* Don't bundle a jump following a call. */
5658 if ((pat
= prev_active_insn (insn
))
5659 && GET_CODE (pat
) == CALL_INSN
)
5667 if (GET_CODE (PATTERN (insn
)) == USE
5668 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
5669 /* Don't care about USE and CLOBBER "insns"---those are used to
5670 indicate to the optimizer that it shouldn't get rid of
5671 certain operations. */
5674 pat
= PATTERN (insn
);
5676 /* Ug. Hack hacks hacked elsewhere. */
5677 switch (recog_memoized (insn
))
5679 /* We play dependency tricks with the epilogue in order
5680 to get proper schedules. Undo this for dv analysis. */
5681 case CODE_FOR_epilogue_deallocate_stack
:
5682 case CODE_FOR_prologue_allocate_stack
:
5683 pat
= XVECEXP (pat
, 0, 0);
5686 /* The pattern we use for br.cloop confuses the code above.
5687 The second element of the vector is representative. */
5688 case CODE_FOR_doloop_end_internal
:
5689 pat
= XVECEXP (pat
, 0, 1);
5692 /* Doesn't generate code. */
5693 case CODE_FOR_pred_rel_mutex
:
5694 case CODE_FOR_prologue_use
:
5701 memset (rws_insn
, 0, sizeof (rws_insn
));
5702 need_barrier
= rtx_needs_barrier (pat
, flags
, 0);
5704 /* Check to see if the previous instruction was a volatile
5707 need_barrier
= rws_access_regno (REG_VOLATILE
, flags
, 0);
5714 if (first_instruction
&& INSN_P (insn
)
5715 && ia64_safe_itanium_class (insn
) != ITANIUM_CLASS_IGNORE
5716 && GET_CODE (PATTERN (insn
)) != USE
5717 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
5720 first_instruction
= 0;
5723 return need_barrier
;
5726 /* Like group_barrier_needed, but do not clobber the current state. */
5729 safe_group_barrier_needed (rtx insn
)
5731 struct reg_write_state rws_saved
[NUM_REGS
];
5732 int saved_first_instruction
;
5735 memcpy (rws_saved
, rws_sum
, NUM_REGS
* sizeof *rws_saved
);
5736 saved_first_instruction
= first_instruction
;
5738 t
= group_barrier_needed (insn
);
5740 memcpy (rws_sum
, rws_saved
, NUM_REGS
* sizeof *rws_saved
);
5741 first_instruction
= saved_first_instruction
;
5746 /* Scan the current function and insert stop bits as necessary to
5747 eliminate dependencies. This function assumes that a final
5748 instruction scheduling pass has been run which has already
5749 inserted most of the necessary stop bits. This function only
5750 inserts new ones at basic block boundaries, since these are
5751 invisible to the scheduler. */
5754 emit_insn_group_barriers (FILE *dump
)
5758 int insns_since_last_label
= 0;
5760 init_insn_group_barriers ();
5762 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
5764 if (GET_CODE (insn
) == CODE_LABEL
)
5766 if (insns_since_last_label
)
5768 insns_since_last_label
= 0;
5770 else if (GET_CODE (insn
) == NOTE
5771 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_BASIC_BLOCK
)
5773 if (insns_since_last_label
)
5775 insns_since_last_label
= 0;
5777 else if (GET_CODE (insn
) == INSN
5778 && GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
5779 && XINT (PATTERN (insn
), 1) == UNSPECV_INSN_GROUP_BARRIER
)
5781 init_insn_group_barriers ();
5784 else if (INSN_P (insn
))
5786 insns_since_last_label
= 1;
5788 if (group_barrier_needed (insn
))
5793 fprintf (dump
, "Emitting stop before label %d\n",
5794 INSN_UID (last_label
));
5795 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label
);
5798 init_insn_group_barriers ();
5806 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
5807 This function has to emit all necessary group barriers. */
5810 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED
)
5814 init_insn_group_barriers ();
5816 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
5818 if (GET_CODE (insn
) == BARRIER
)
5820 rtx last
= prev_active_insn (insn
);
5824 if (GET_CODE (last
) == JUMP_INSN
5825 && GET_CODE (PATTERN (last
)) == ADDR_DIFF_VEC
)
5826 last
= prev_active_insn (last
);
5827 if (recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
5828 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last
);
5830 init_insn_group_barriers ();
5832 else if (INSN_P (insn
))
5834 if (recog_memoized (insn
) == CODE_FOR_insn_group_barrier
)
5835 init_insn_group_barriers ();
5836 else if (group_barrier_needed (insn
))
5838 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
5839 init_insn_group_barriers ();
5840 group_barrier_needed (insn
);
5848 /* Instruction scheduling support. */
5850 #define NR_BUNDLES 10
5852 /* A list of names of all available bundles. */
5854 static const char *bundle_name
[NR_BUNDLES
] =
5860 #if NR_BUNDLES == 10
5870 /* Nonzero if we should insert stop bits into the schedule. */
5872 int ia64_final_schedule
= 0;
5874 /* Codes of the corresponding queried units: */
5876 static int _0mii_
, _0mmi_
, _0mfi_
, _0mmf_
;
5877 static int _0bbb_
, _0mbb_
, _0mib_
, _0mmb_
, _0mfb_
, _0mlx_
;
5879 static int _1mii_
, _1mmi_
, _1mfi_
, _1mmf_
;
5880 static int _1bbb_
, _1mbb_
, _1mib_
, _1mmb_
, _1mfb_
, _1mlx_
;
5882 static int pos_1
, pos_2
, pos_3
, pos_4
, pos_5
, pos_6
;
5884 /* The following variable value is an insn group barrier. */
5886 static rtx dfa_stop_insn
;
5888 /* The following variable value is the last issued insn. */
5890 static rtx last_scheduled_insn
;
5892 /* The following variable value is size of the DFA state. */
5894 static size_t dfa_state_size
;
5896 /* The following variable value is pointer to a DFA state used as
5897 temporary variable. */
5899 static state_t temp_dfa_state
= NULL
;
5901 /* The following variable value is DFA state after issuing the last
5904 static state_t prev_cycle_state
= NULL
;
5906 /* The following array element values are TRUE if the corresponding
5907 insn requires to add stop bits before it. */
5909 static char *stops_p
;
5911 /* The following variable is used to set up the mentioned above array. */
5913 static int stop_before_p
= 0;
5915 /* The following variable value is length of the arrays `clocks' and
5918 static int clocks_length
;
5920 /* The following array element values are cycles on which the
5921 corresponding insn will be issued. The array is used only for
5926 /* The following array element values are numbers of cycles should be
5927 added to improve insn scheduling for MM_insns for Itanium1. */
5929 static int *add_cycles
;
5931 static rtx
ia64_single_set (rtx
);
5932 static void ia64_emit_insn_before (rtx
, rtx
);
5934 /* Map a bundle number to its pseudo-op. */
5937 get_bundle_name (int b
)
5939 return bundle_name
[b
];
5943 /* Return the maximum number of instructions a cpu can issue. */
5946 ia64_issue_rate (void)
5951 /* Helper function - like single_set, but look inside COND_EXEC. */
5954 ia64_single_set (rtx insn
)
5956 rtx x
= PATTERN (insn
), ret
;
5957 if (GET_CODE (x
) == COND_EXEC
)
5958 x
= COND_EXEC_CODE (x
);
5959 if (GET_CODE (x
) == SET
)
5962 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
5963 Although they are not classical single set, the second set is there just
5964 to protect it from moving past FP-relative stack accesses. */
5965 switch (recog_memoized (insn
))
5967 case CODE_FOR_prologue_allocate_stack
:
5968 case CODE_FOR_epilogue_deallocate_stack
:
5969 ret
= XVECEXP (x
, 0, 0);
5973 ret
= single_set_2 (insn
, x
);
5980 /* Adjust the cost of a scheduling dependency. Return the new cost of
5981 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5984 ia64_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
5986 enum attr_itanium_class dep_class
;
5987 enum attr_itanium_class insn_class
;
5989 if (REG_NOTE_KIND (link
) != REG_DEP_OUTPUT
)
5992 insn_class
= ia64_safe_itanium_class (insn
);
5993 dep_class
= ia64_safe_itanium_class (dep_insn
);
5994 if (dep_class
== ITANIUM_CLASS_ST
|| dep_class
== ITANIUM_CLASS_STF
5995 || insn_class
== ITANIUM_CLASS_ST
|| insn_class
== ITANIUM_CLASS_STF
)
6001 /* Like emit_insn_before, but skip cycle_display notes.
6002 ??? When cycle display notes are implemented, update this. */
6005 ia64_emit_insn_before (rtx insn
, rtx before
)
6007 emit_insn_before (insn
, before
);
6010 /* The following function marks insns who produce addresses for load
6011 and store insns. Such insns will be placed into M slots because it
6012 decrease latency time for Itanium1 (see function
6013 `ia64_produce_address_p' and the DFA descriptions). */
6016 ia64_dependencies_evaluation_hook (rtx head
, rtx tail
)
6018 rtx insn
, link
, next
, next_tail
;
6020 /* Before reload, which_alternative is not set, which means that
6021 ia64_safe_itanium_class will produce wrong results for (at least)
6022 move instructions. */
6023 if (!reload_completed
)
6026 next_tail
= NEXT_INSN (tail
);
6027 for (insn
= head
; insn
!= next_tail
; insn
= NEXT_INSN (insn
))
6030 for (insn
= head
; insn
!= next_tail
; insn
= NEXT_INSN (insn
))
6032 && ia64_safe_itanium_class (insn
) == ITANIUM_CLASS_IALU
)
6034 for (link
= INSN_DEPEND (insn
); link
!= 0; link
= XEXP (link
, 1))
6036 if (REG_NOTE_KIND (link
) != REG_DEP_TRUE
)
6038 next
= XEXP (link
, 0);
6039 if ((ia64_safe_itanium_class (next
) == ITANIUM_CLASS_ST
6040 || ia64_safe_itanium_class (next
) == ITANIUM_CLASS_STF
)
6041 && ia64_st_address_bypass_p (insn
, next
))
6043 else if ((ia64_safe_itanium_class (next
) == ITANIUM_CLASS_LD
6044 || ia64_safe_itanium_class (next
)
6045 == ITANIUM_CLASS_FLD
)
6046 && ia64_ld_address_bypass_p (insn
, next
))
6049 insn
->call
= link
!= 0;
6053 /* We're beginning a new block. Initialize data structures as necessary. */
6056 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED
,
6057 int sched_verbose ATTRIBUTE_UNUSED
,
6058 int max_ready ATTRIBUTE_UNUSED
)
6060 #ifdef ENABLE_CHECKING
6063 if (reload_completed
)
6064 for (insn
= NEXT_INSN (current_sched_info
->prev_head
);
6065 insn
!= current_sched_info
->next_tail
;
6066 insn
= NEXT_INSN (insn
))
6067 gcc_assert (!SCHED_GROUP_P (insn
));
6069 last_scheduled_insn
= NULL_RTX
;
6070 init_insn_group_barriers ();
6073 /* We are about to being issuing insns for this clock cycle.
6074 Override the default sort algorithm to better slot instructions. */
6077 ia64_dfa_sched_reorder (FILE *dump
, int sched_verbose
, rtx
*ready
,
6078 int *pn_ready
, int clock_var ATTRIBUTE_UNUSED
,
6082 int n_ready
= *pn_ready
;
6083 rtx
*e_ready
= ready
+ n_ready
;
6087 fprintf (dump
, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type
);
6089 if (reorder_type
== 0)
6091 /* First, move all USEs, CLOBBERs and other crud out of the way. */
6093 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
6094 if (insnp
< e_ready
)
6097 enum attr_type t
= ia64_safe_type (insn
);
6098 if (t
== TYPE_UNKNOWN
)
6100 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
6101 || asm_noperands (PATTERN (insn
)) >= 0)
6103 rtx lowest
= ready
[n_asms
];
6104 ready
[n_asms
] = insn
;
6110 rtx highest
= ready
[n_ready
- 1];
6111 ready
[n_ready
- 1] = insn
;
6118 if (n_asms
< n_ready
)
6120 /* Some normal insns to process. Skip the asms. */
6124 else if (n_ready
> 0)
6128 if (ia64_final_schedule
)
6131 int nr_need_stop
= 0;
6133 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
6134 if (safe_group_barrier_needed (*insnp
))
6137 if (reorder_type
== 1 && n_ready
== nr_need_stop
)
6139 if (reorder_type
== 0)
6142 /* Move down everything that needs a stop bit, preserving
6144 while (insnp
-- > ready
+ deleted
)
6145 while (insnp
>= ready
+ deleted
)
6148 if (! safe_group_barrier_needed (insn
))
6150 memmove (ready
+ 1, ready
, (insnp
- ready
) * sizeof (rtx
));
6161 /* We are about to being issuing insns for this clock cycle. Override
6162 the default sort algorithm to better slot instructions. */
6165 ia64_sched_reorder (FILE *dump
, int sched_verbose
, rtx
*ready
, int *pn_ready
,
6168 return ia64_dfa_sched_reorder (dump
, sched_verbose
, ready
,
6169 pn_ready
, clock_var
, 0);
6172 /* Like ia64_sched_reorder, but called after issuing each insn.
6173 Override the default sort algorithm to better slot instructions. */
6176 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED
,
6177 int sched_verbose ATTRIBUTE_UNUSED
, rtx
*ready
,
6178 int *pn_ready
, int clock_var
)
6180 if (ia64_tune
== PROCESSOR_ITANIUM
&& reload_completed
&& last_scheduled_insn
)
6181 clocks
[INSN_UID (last_scheduled_insn
)] = clock_var
;
6182 return ia64_dfa_sched_reorder (dump
, sched_verbose
, ready
, pn_ready
,
6186 /* We are about to issue INSN. Return the number of insns left on the
6187 ready queue that can be issued this cycle. */
6190 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED
,
6191 int sched_verbose ATTRIBUTE_UNUSED
,
6192 rtx insn ATTRIBUTE_UNUSED
,
6193 int can_issue_more ATTRIBUTE_UNUSED
)
6195 last_scheduled_insn
= insn
;
6196 memcpy (prev_cycle_state
, curr_state
, dfa_state_size
);
6197 if (reload_completed
)
6199 int needed
= group_barrier_needed (insn
);
6201 gcc_assert (!needed
);
6202 if (GET_CODE (insn
) == CALL_INSN
)
6203 init_insn_group_barriers ();
6204 stops_p
[INSN_UID (insn
)] = stop_before_p
;
6210 /* We are choosing insn from the ready queue. Return nonzero if INSN
6214 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn
)
6216 gcc_assert (insn
&& INSN_P (insn
));
6217 return (!reload_completed
6218 || !safe_group_barrier_needed (insn
));
6221 /* The following variable value is pseudo-insn used by the DFA insn
6222 scheduler to change the DFA state when the simulated clock is
6225 static rtx dfa_pre_cycle_insn
;
6227 /* We are about to being issuing INSN. Return nonzero if we cannot
6228 issue it on given cycle CLOCK and return zero if we should not sort
6229 the ready queue on the next clock start. */
6232 ia64_dfa_new_cycle (FILE *dump
, int verbose
, rtx insn
, int last_clock
,
6233 int clock
, int *sort_p
)
6235 int setup_clocks_p
= FALSE
;
6237 gcc_assert (insn
&& INSN_P (insn
));
6238 if ((reload_completed
&& safe_group_barrier_needed (insn
))
6239 || (last_scheduled_insn
6240 && (GET_CODE (last_scheduled_insn
) == CALL_INSN
6241 || GET_CODE (PATTERN (last_scheduled_insn
)) == ASM_INPUT
6242 || asm_noperands (PATTERN (last_scheduled_insn
)) >= 0)))
6244 init_insn_group_barriers ();
6245 if (verbose
&& dump
)
6246 fprintf (dump
, "// Stop should be before %d%s\n", INSN_UID (insn
),
6247 last_clock
== clock
? " + cycle advance" : "");
6249 if (last_clock
== clock
)
6251 state_transition (curr_state
, dfa_stop_insn
);
6252 if (TARGET_EARLY_STOP_BITS
)
6253 *sort_p
= (last_scheduled_insn
== NULL_RTX
6254 || GET_CODE (last_scheduled_insn
) != CALL_INSN
);
6259 else if (reload_completed
)
6260 setup_clocks_p
= TRUE
;
6261 if (GET_CODE (PATTERN (last_scheduled_insn
)) == ASM_INPUT
6262 || asm_noperands (PATTERN (last_scheduled_insn
)) >= 0)
6263 state_reset (curr_state
);
6266 memcpy (curr_state
, prev_cycle_state
, dfa_state_size
);
6267 state_transition (curr_state
, dfa_stop_insn
);
6268 state_transition (curr_state
, dfa_pre_cycle_insn
);
6269 state_transition (curr_state
, NULL
);
6272 else if (reload_completed
)
6273 setup_clocks_p
= TRUE
;
6274 if (setup_clocks_p
&& ia64_tune
== PROCESSOR_ITANIUM
6275 && GET_CODE (PATTERN (insn
)) != ASM_INPUT
6276 && asm_noperands (PATTERN (insn
)) < 0)
6278 enum attr_itanium_class c
= ia64_safe_itanium_class (insn
);
6280 if (c
!= ITANIUM_CLASS_MMMUL
&& c
!= ITANIUM_CLASS_MMSHF
)
6285 for (link
= LOG_LINKS (insn
); link
; link
= XEXP (link
, 1))
6286 if (REG_NOTE_KIND (link
) == 0)
6288 enum attr_itanium_class dep_class
;
6289 rtx dep_insn
= XEXP (link
, 0);
6291 dep_class
= ia64_safe_itanium_class (dep_insn
);
6292 if ((dep_class
== ITANIUM_CLASS_MMMUL
6293 || dep_class
== ITANIUM_CLASS_MMSHF
)
6294 && last_clock
- clocks
[INSN_UID (dep_insn
)] < 4
6296 || last_clock
- clocks
[INSN_UID (dep_insn
)] < d
))
6297 d
= last_clock
- clocks
[INSN_UID (dep_insn
)];
6300 add_cycles
[INSN_UID (insn
)] = 3 - d
;
6308 /* The following page contains abstract data `bundle states' which are
6309 used for bundling insns (inserting nops and template generation). */
6311 /* The following describes state of insn bundling. */
6315 /* Unique bundle state number to identify them in the debugging
6318 rtx insn
; /* corresponding insn, NULL for the 1st and the last state */
6319 /* number nops before and after the insn */
6320 short before_nops_num
, after_nops_num
;
6321 int insn_num
; /* insn number (0 - for initial state, 1 - for the 1st
6323 int cost
; /* cost of the state in cycles */
6324 int accumulated_insns_num
; /* number of all previous insns including
6325 nops. L is considered as 2 insns */
6326 int branch_deviation
; /* deviation of previous branches from 3rd slots */
6327 struct bundle_state
*next
; /* next state with the same insn_num */
6328 struct bundle_state
*originator
; /* originator (previous insn state) */
6329 /* All bundle states are in the following chain. */
6330 struct bundle_state
*allocated_states_chain
;
6331 /* The DFA State after issuing the insn and the nops. */
6335 /* The following is map insn number to the corresponding bundle state. */
6337 static struct bundle_state
**index_to_bundle_states
;
6339 /* The unique number of next bundle state. */
6341 static int bundle_states_num
;
6343 /* All allocated bundle states are in the following chain. */
6345 static struct bundle_state
*allocated_bundle_states_chain
;
6347 /* All allocated but not used bundle states are in the following
6350 static struct bundle_state
*free_bundle_state_chain
;
6353 /* The following function returns a free bundle state. */
6355 static struct bundle_state
*
6356 get_free_bundle_state (void)
6358 struct bundle_state
*result
;
6360 if (free_bundle_state_chain
!= NULL
)
6362 result
= free_bundle_state_chain
;
6363 free_bundle_state_chain
= result
->next
;
6367 result
= xmalloc (sizeof (struct bundle_state
));
6368 result
->dfa_state
= xmalloc (dfa_state_size
);
6369 result
->allocated_states_chain
= allocated_bundle_states_chain
;
6370 allocated_bundle_states_chain
= result
;
6372 result
->unique_num
= bundle_states_num
++;
6377 /* The following function frees given bundle state. */
6380 free_bundle_state (struct bundle_state
*state
)
6382 state
->next
= free_bundle_state_chain
;
6383 free_bundle_state_chain
= state
;
6386 /* Start work with abstract data `bundle states'. */
6389 initiate_bundle_states (void)
6391 bundle_states_num
= 0;
6392 free_bundle_state_chain
= NULL
;
6393 allocated_bundle_states_chain
= NULL
;
6396 /* Finish work with abstract data `bundle states'. */
6399 finish_bundle_states (void)
6401 struct bundle_state
*curr_state
, *next_state
;
6403 for (curr_state
= allocated_bundle_states_chain
;
6405 curr_state
= next_state
)
6407 next_state
= curr_state
->allocated_states_chain
;
6408 free (curr_state
->dfa_state
);
6413 /* Hash table of the bundle states. The key is dfa_state and insn_num
6414 of the bundle states. */
6416 static htab_t bundle_state_table
;
6418 /* The function returns hash of BUNDLE_STATE. */
6421 bundle_state_hash (const void *bundle_state
)
6423 const struct bundle_state
*state
= (struct bundle_state
*) bundle_state
;
6426 for (result
= i
= 0; i
< dfa_state_size
; i
++)
6427 result
+= (((unsigned char *) state
->dfa_state
) [i
]
6428 << ((i
% CHAR_BIT
) * 3 + CHAR_BIT
));
6429 return result
+ state
->insn_num
;
6432 /* The function returns nonzero if the bundle state keys are equal. */
6435 bundle_state_eq_p (const void *bundle_state_1
, const void *bundle_state_2
)
6437 const struct bundle_state
* state1
= (struct bundle_state
*) bundle_state_1
;
6438 const struct bundle_state
* state2
= (struct bundle_state
*) bundle_state_2
;
6440 return (state1
->insn_num
== state2
->insn_num
6441 && memcmp (state1
->dfa_state
, state2
->dfa_state
,
6442 dfa_state_size
) == 0);
6445 /* The function inserts the BUNDLE_STATE into the hash table. The
6446 function returns nonzero if the bundle has been inserted into the
6447 table. The table contains the best bundle state with given key. */
6450 insert_bundle_state (struct bundle_state
*bundle_state
)
6454 entry_ptr
= htab_find_slot (bundle_state_table
, bundle_state
, 1);
6455 if (*entry_ptr
== NULL
)
6457 bundle_state
->next
= index_to_bundle_states
[bundle_state
->insn_num
];
6458 index_to_bundle_states
[bundle_state
->insn_num
] = bundle_state
;
6459 *entry_ptr
= (void *) bundle_state
;
6462 else if (bundle_state
->cost
< ((struct bundle_state
*) *entry_ptr
)->cost
6463 || (bundle_state
->cost
== ((struct bundle_state
*) *entry_ptr
)->cost
6464 && (((struct bundle_state
*)*entry_ptr
)->accumulated_insns_num
6465 > bundle_state
->accumulated_insns_num
6466 || (((struct bundle_state
*)
6467 *entry_ptr
)->accumulated_insns_num
6468 == bundle_state
->accumulated_insns_num
6469 && ((struct bundle_state
*)
6470 *entry_ptr
)->branch_deviation
6471 > bundle_state
->branch_deviation
))))
6474 struct bundle_state temp
;
6476 temp
= *(struct bundle_state
*) *entry_ptr
;
6477 *(struct bundle_state
*) *entry_ptr
= *bundle_state
;
6478 ((struct bundle_state
*) *entry_ptr
)->next
= temp
.next
;
6479 *bundle_state
= temp
;
6484 /* Start work with the hash table. */
6487 initiate_bundle_state_table (void)
6489 bundle_state_table
= htab_create (50, bundle_state_hash
, bundle_state_eq_p
,
6493 /* Finish work with the hash table. */
6496 finish_bundle_state_table (void)
6498 htab_delete (bundle_state_table
);
6503 /* The following variable is a insn `nop' used to check bundle states
6504 with different number of inserted nops. */
6506 static rtx ia64_nop
;
6508 /* The following function tries to issue NOPS_NUM nops for the current
6509 state without advancing processor cycle. If it failed, the
6510 function returns FALSE and frees the current state. */
6513 try_issue_nops (struct bundle_state
*curr_state
, int nops_num
)
6517 for (i
= 0; i
< nops_num
; i
++)
6518 if (state_transition (curr_state
->dfa_state
, ia64_nop
) >= 0)
6520 free_bundle_state (curr_state
);
6526 /* The following function tries to issue INSN for the current
6527 state without advancing processor cycle. If it failed, the
6528 function returns FALSE and frees the current state. */
6531 try_issue_insn (struct bundle_state
*curr_state
, rtx insn
)
6533 if (insn
&& state_transition (curr_state
->dfa_state
, insn
) >= 0)
6535 free_bundle_state (curr_state
);
6541 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
6542 starting with ORIGINATOR without advancing processor cycle. If
6543 TRY_BUNDLE_END_P is TRUE, the function also/only (if
6544 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
6545 If it was successful, the function creates new bundle state and
6546 insert into the hash table and into `index_to_bundle_states'. */
6549 issue_nops_and_insn (struct bundle_state
*originator
, int before_nops_num
,
6550 rtx insn
, int try_bundle_end_p
, int only_bundle_end_p
)
6552 struct bundle_state
*curr_state
;
6554 curr_state
= get_free_bundle_state ();
6555 memcpy (curr_state
->dfa_state
, originator
->dfa_state
, dfa_state_size
);
6556 curr_state
->insn
= insn
;
6557 curr_state
->insn_num
= originator
->insn_num
+ 1;
6558 curr_state
->cost
= originator
->cost
;
6559 curr_state
->originator
= originator
;
6560 curr_state
->before_nops_num
= before_nops_num
;
6561 curr_state
->after_nops_num
= 0;
6562 curr_state
->accumulated_insns_num
6563 = originator
->accumulated_insns_num
+ before_nops_num
;
6564 curr_state
->branch_deviation
= originator
->branch_deviation
;
6566 if (INSN_CODE (insn
) == CODE_FOR_insn_group_barrier
)
6568 gcc_assert (GET_MODE (insn
) != TImode
);
6569 if (!try_issue_nops (curr_state
, before_nops_num
))
6571 if (!try_issue_insn (curr_state
, insn
))
6573 memcpy (temp_dfa_state
, curr_state
->dfa_state
, dfa_state_size
);
6574 if (state_transition (temp_dfa_state
, dfa_pre_cycle_insn
) >= 0
6575 && curr_state
->accumulated_insns_num
% 3 != 0)
6577 free_bundle_state (curr_state
);
6581 else if (GET_MODE (insn
) != TImode
)
6583 if (!try_issue_nops (curr_state
, before_nops_num
))
6585 if (!try_issue_insn (curr_state
, insn
))
6587 curr_state
->accumulated_insns_num
++;
6588 gcc_assert (GET_CODE (PATTERN (insn
)) != ASM_INPUT
6589 && asm_noperands (PATTERN (insn
)) < 0);
6591 if (ia64_safe_type (insn
) == TYPE_L
)
6592 curr_state
->accumulated_insns_num
++;
6596 /* If this is an insn that must be first in a group, then don't allow
6597 nops to be emitted before it. Currently, alloc is the only such
6598 supported instruction. */
6599 /* ??? The bundling automatons should handle this for us, but they do
6600 not yet have support for the first_insn attribute. */
6601 if (before_nops_num
> 0 && get_attr_first_insn (insn
) == FIRST_INSN_YES
)
6603 free_bundle_state (curr_state
);
6607 state_transition (curr_state
->dfa_state
, dfa_pre_cycle_insn
);
6608 state_transition (curr_state
->dfa_state
, NULL
);
6610 if (!try_issue_nops (curr_state
, before_nops_num
))
6612 if (!try_issue_insn (curr_state
, insn
))
6614 curr_state
->accumulated_insns_num
++;
6615 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
6616 || asm_noperands (PATTERN (insn
)) >= 0)
6618 /* Finish bundle containing asm insn. */
6619 curr_state
->after_nops_num
6620 = 3 - curr_state
->accumulated_insns_num
% 3;
6621 curr_state
->accumulated_insns_num
6622 += 3 - curr_state
->accumulated_insns_num
% 3;
6624 else if (ia64_safe_type (insn
) == TYPE_L
)
6625 curr_state
->accumulated_insns_num
++;
6627 if (ia64_safe_type (insn
) == TYPE_B
)
6628 curr_state
->branch_deviation
6629 += 2 - (curr_state
->accumulated_insns_num
- 1) % 3;
6630 if (try_bundle_end_p
&& curr_state
->accumulated_insns_num
% 3 != 0)
6632 if (!only_bundle_end_p
&& insert_bundle_state (curr_state
))
6635 struct bundle_state
*curr_state1
;
6636 struct bundle_state
*allocated_states_chain
;
6638 curr_state1
= get_free_bundle_state ();
6639 dfa_state
= curr_state1
->dfa_state
;
6640 allocated_states_chain
= curr_state1
->allocated_states_chain
;
6641 *curr_state1
= *curr_state
;
6642 curr_state1
->dfa_state
= dfa_state
;
6643 curr_state1
->allocated_states_chain
= allocated_states_chain
;
6644 memcpy (curr_state1
->dfa_state
, curr_state
->dfa_state
,
6646 curr_state
= curr_state1
;
6648 if (!try_issue_nops (curr_state
,
6649 3 - curr_state
->accumulated_insns_num
% 3))
6651 curr_state
->after_nops_num
6652 = 3 - curr_state
->accumulated_insns_num
% 3;
6653 curr_state
->accumulated_insns_num
6654 += 3 - curr_state
->accumulated_insns_num
% 3;
6656 if (!insert_bundle_state (curr_state
))
6657 free_bundle_state (curr_state
);
6661 /* The following function returns position in the two window bundle
6665 get_max_pos (state_t state
)
6667 if (cpu_unit_reservation_p (state
, pos_6
))
6669 else if (cpu_unit_reservation_p (state
, pos_5
))
6671 else if (cpu_unit_reservation_p (state
, pos_4
))
6673 else if (cpu_unit_reservation_p (state
, pos_3
))
6675 else if (cpu_unit_reservation_p (state
, pos_2
))
6677 else if (cpu_unit_reservation_p (state
, pos_1
))
6683 /* The function returns code of a possible template for given position
6684 and state. The function should be called only with 2 values of
6685 position equal to 3 or 6. We avoid generating F NOPs by putting
6686 templates containing F insns at the end of the template search
6687 because undocumented anomaly in McKinley derived cores which can
6688 cause stalls if an F-unit insn (including a NOP) is issued within a
6689 six-cycle window after reading certain application registers (such
6690 as ar.bsp). Furthermore, power-considerations also argue against
6691 the use of F-unit instructions unless they're really needed. */
6694 get_template (state_t state
, int pos
)
6699 if (cpu_unit_reservation_p (state
, _0mmi_
))
6701 else if (cpu_unit_reservation_p (state
, _0mii_
))
6703 else if (cpu_unit_reservation_p (state
, _0mmb_
))
6705 else if (cpu_unit_reservation_p (state
, _0mib_
))
6707 else if (cpu_unit_reservation_p (state
, _0mbb_
))
6709 else if (cpu_unit_reservation_p (state
, _0bbb_
))
6711 else if (cpu_unit_reservation_p (state
, _0mmf_
))
6713 else if (cpu_unit_reservation_p (state
, _0mfi_
))
6715 else if (cpu_unit_reservation_p (state
, _0mfb_
))
6717 else if (cpu_unit_reservation_p (state
, _0mlx_
))
6722 if (cpu_unit_reservation_p (state
, _1mmi_
))
6724 else if (cpu_unit_reservation_p (state
, _1mii_
))
6726 else if (cpu_unit_reservation_p (state
, _1mmb_
))
6728 else if (cpu_unit_reservation_p (state
, _1mib_
))
6730 else if (cpu_unit_reservation_p (state
, _1mbb_
))
6732 else if (cpu_unit_reservation_p (state
, _1bbb_
))
6734 else if (_1mmf_
>= 0 && cpu_unit_reservation_p (state
, _1mmf_
))
6736 else if (cpu_unit_reservation_p (state
, _1mfi_
))
6738 else if (cpu_unit_reservation_p (state
, _1mfb_
))
6740 else if (cpu_unit_reservation_p (state
, _1mlx_
))
6749 /* The following function returns an insn important for insn bundling
6750 followed by INSN and before TAIL. */
6753 get_next_important_insn (rtx insn
, rtx tail
)
6755 for (; insn
&& insn
!= tail
; insn
= NEXT_INSN (insn
))
6757 && ia64_safe_itanium_class (insn
) != ITANIUM_CLASS_IGNORE
6758 && GET_CODE (PATTERN (insn
)) != USE
6759 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
6764 /* The following function does insn bundling. Bundling means
6765 inserting templates and nop insns to fit insn groups into permitted
6766 templates. Instruction scheduling uses NDFA (non-deterministic
6767 finite automata) encoding informations about the templates and the
6768 inserted nops. Nondeterminism of the automata permits follows
6769 all possible insn sequences very fast.
6771 Unfortunately it is not possible to get information about inserting
6772 nop insns and used templates from the automata states. The
6773 automata only says that we can issue an insn possibly inserting
6774 some nops before it and using some template. Therefore insn
6775 bundling in this function is implemented by using DFA
6776 (deterministic finite automata). We follows all possible insn
6777 sequences by inserting 0-2 nops (that is what the NDFA describe for
6778 insn scheduling) before/after each insn being bundled. We know the
6779 start of simulated processor cycle from insn scheduling (insn
6780 starting a new cycle has TImode).
6782 Simple implementation of insn bundling would create enormous
6783 number of possible insn sequences satisfying information about new
6784 cycle ticks taken from the insn scheduling. To make the algorithm
6785 practical we use dynamic programming. Each decision (about
6786 inserting nops and implicitly about previous decisions) is described
6787 by structure bundle_state (see above). If we generate the same
6788 bundle state (key is automaton state after issuing the insns and
6789 nops for it), we reuse already generated one. As consequence we
6790 reject some decisions which cannot improve the solution and
6791 reduce memory for the algorithm.
6793 When we reach the end of EBB (extended basic block), we choose the
6794 best sequence and then, moving back in EBB, insert templates for
6795 the best alternative. The templates are taken from querying
6796 automaton state for each insn in chosen bundle states.
6798 So the algorithm makes two (forward and backward) passes through
6799 EBB. There is an additional forward pass through EBB for Itanium1
6800 processor. This pass inserts more nops to make dependency between
6801 a producer insn and MMMUL/MMSHF at least 4 cycles long. */
6804 bundling (FILE *dump
, int verbose
, rtx prev_head_insn
, rtx tail
)
6806 struct bundle_state
*curr_state
, *next_state
, *best_state
;
6807 rtx insn
, next_insn
;
6809 int i
, bundle_end_p
, only_bundle_end_p
, asm_p
;
6810 int pos
= 0, max_pos
, template0
, template1
;
6813 enum attr_type type
;
6816 /* Count insns in the EBB. */
6817 for (insn
= NEXT_INSN (prev_head_insn
);
6818 insn
&& insn
!= tail
;
6819 insn
= NEXT_INSN (insn
))
6825 dfa_clean_insn_cache ();
6826 initiate_bundle_state_table ();
6827 index_to_bundle_states
= xmalloc ((insn_num
+ 2)
6828 * sizeof (struct bundle_state
*));
6829 /* First (forward) pass -- generation of bundle states. */
6830 curr_state
= get_free_bundle_state ();
6831 curr_state
->insn
= NULL
;
6832 curr_state
->before_nops_num
= 0;
6833 curr_state
->after_nops_num
= 0;
6834 curr_state
->insn_num
= 0;
6835 curr_state
->cost
= 0;
6836 curr_state
->accumulated_insns_num
= 0;
6837 curr_state
->branch_deviation
= 0;
6838 curr_state
->next
= NULL
;
6839 curr_state
->originator
= NULL
;
6840 state_reset (curr_state
->dfa_state
);
6841 index_to_bundle_states
[0] = curr_state
;
6843 /* Shift cycle mark if it is put on insn which could be ignored. */
6844 for (insn
= NEXT_INSN (prev_head_insn
);
6846 insn
= NEXT_INSN (insn
))
6848 && (ia64_safe_itanium_class (insn
) == ITANIUM_CLASS_IGNORE
6849 || GET_CODE (PATTERN (insn
)) == USE
6850 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
6851 && GET_MODE (insn
) == TImode
)
6853 PUT_MODE (insn
, VOIDmode
);
6854 for (next_insn
= NEXT_INSN (insn
);
6856 next_insn
= NEXT_INSN (next_insn
))
6857 if (INSN_P (next_insn
)
6858 && ia64_safe_itanium_class (next_insn
) != ITANIUM_CLASS_IGNORE
6859 && GET_CODE (PATTERN (next_insn
)) != USE
6860 && GET_CODE (PATTERN (next_insn
)) != CLOBBER
)
6862 PUT_MODE (next_insn
, TImode
);
6866 /* Froward pass: generation of bundle states. */
6867 for (insn
= get_next_important_insn (NEXT_INSN (prev_head_insn
), tail
);
6871 gcc_assert (INSN_P (insn
)
6872 && ia64_safe_itanium_class (insn
) != ITANIUM_CLASS_IGNORE
6873 && GET_CODE (PATTERN (insn
)) != USE
6874 && GET_CODE (PATTERN (insn
)) != CLOBBER
);
6875 type
= ia64_safe_type (insn
);
6876 next_insn
= get_next_important_insn (NEXT_INSN (insn
), tail
);
6878 index_to_bundle_states
[insn_num
] = NULL
;
6879 for (curr_state
= index_to_bundle_states
[insn_num
- 1];
6881 curr_state
= next_state
)
6883 pos
= curr_state
->accumulated_insns_num
% 3;
6884 next_state
= curr_state
->next
;
6885 /* We must fill up the current bundle in order to start a
6886 subsequent asm insn in a new bundle. Asm insn is always
6887 placed in a separate bundle. */
6889 = (next_insn
!= NULL_RTX
6890 && INSN_CODE (insn
) == CODE_FOR_insn_group_barrier
6891 && ia64_safe_type (next_insn
) == TYPE_UNKNOWN
);
6892 /* We may fill up the current bundle if it is the cycle end
6893 without a group barrier. */
6895 = (only_bundle_end_p
|| next_insn
== NULL_RTX
6896 || (GET_MODE (next_insn
) == TImode
6897 && INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
));
6898 if (type
== TYPE_F
|| type
== TYPE_B
|| type
== TYPE_L
6900 /* We need to insert 2 nops for cases like M_MII. To
6901 guarantee issuing all insns on the same cycle for
6902 Itanium 1, we need to issue 2 nops after the first M
6903 insn (MnnMII where n is a nop insn). */
6904 || ((type
== TYPE_M
|| type
== TYPE_A
)
6905 && ia64_tune
== PROCESSOR_ITANIUM
6906 && !bundle_end_p
&& pos
== 1))
6907 issue_nops_and_insn (curr_state
, 2, insn
, bundle_end_p
,
6909 issue_nops_and_insn (curr_state
, 1, insn
, bundle_end_p
,
6911 issue_nops_and_insn (curr_state
, 0, insn
, bundle_end_p
,
6914 gcc_assert (index_to_bundle_states
[insn_num
]);
6915 for (curr_state
= index_to_bundle_states
[insn_num
];
6917 curr_state
= curr_state
->next
)
6918 if (verbose
>= 2 && dump
)
6920 /* This structure is taken from generated code of the
6921 pipeline hazard recognizer (see file insn-attrtab.c).
6922 Please don't forget to change the structure if a new
6923 automaton is added to .md file. */
6926 unsigned short one_automaton_state
;
6927 unsigned short oneb_automaton_state
;
6928 unsigned short two_automaton_state
;
6929 unsigned short twob_automaton_state
;
6934 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6935 curr_state
->unique_num
,
6936 (curr_state
->originator
== NULL
6937 ? -1 : curr_state
->originator
->unique_num
),
6939 curr_state
->before_nops_num
, curr_state
->after_nops_num
,
6940 curr_state
->accumulated_insns_num
, curr_state
->branch_deviation
,
6941 (ia64_tune
== PROCESSOR_ITANIUM
6942 ? ((struct DFA_chip
*) curr_state
->dfa_state
)->oneb_automaton_state
6943 : ((struct DFA_chip
*) curr_state
->dfa_state
)->twob_automaton_state
),
6948 /* We should find a solution because the 2nd insn scheduling has
6950 gcc_assert (index_to_bundle_states
[insn_num
]);
6951 /* Find a state corresponding to the best insn sequence. */
6953 for (curr_state
= index_to_bundle_states
[insn_num
];
6955 curr_state
= curr_state
->next
)
6956 /* We are just looking at the states with fully filled up last
6957 bundle. The first we prefer insn sequences with minimal cost
6958 then with minimal inserted nops and finally with branch insns
6959 placed in the 3rd slots. */
6960 if (curr_state
->accumulated_insns_num
% 3 == 0
6961 && (best_state
== NULL
|| best_state
->cost
> curr_state
->cost
6962 || (best_state
->cost
== curr_state
->cost
6963 && (curr_state
->accumulated_insns_num
6964 < best_state
->accumulated_insns_num
6965 || (curr_state
->accumulated_insns_num
6966 == best_state
->accumulated_insns_num
6967 && curr_state
->branch_deviation
6968 < best_state
->branch_deviation
)))))
6969 best_state
= curr_state
;
6970 /* Second (backward) pass: adding nops and templates. */
6971 insn_num
= best_state
->before_nops_num
;
6972 template0
= template1
= -1;
6973 for (curr_state
= best_state
;
6974 curr_state
->originator
!= NULL
;
6975 curr_state
= curr_state
->originator
)
6977 insn
= curr_state
->insn
;
6978 asm_p
= (GET_CODE (PATTERN (insn
)) == ASM_INPUT
6979 || asm_noperands (PATTERN (insn
)) >= 0);
6981 if (verbose
>= 2 && dump
)
6985 unsigned short one_automaton_state
;
6986 unsigned short oneb_automaton_state
;
6987 unsigned short two_automaton_state
;
6988 unsigned short twob_automaton_state
;
6993 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6994 curr_state
->unique_num
,
6995 (curr_state
->originator
== NULL
6996 ? -1 : curr_state
->originator
->unique_num
),
6998 curr_state
->before_nops_num
, curr_state
->after_nops_num
,
6999 curr_state
->accumulated_insns_num
, curr_state
->branch_deviation
,
7000 (ia64_tune
== PROCESSOR_ITANIUM
7001 ? ((struct DFA_chip
*) curr_state
->dfa_state
)->oneb_automaton_state
7002 : ((struct DFA_chip
*) curr_state
->dfa_state
)->twob_automaton_state
),
7005 /* Find the position in the current bundle window. The window can
7006 contain at most two bundles. Two bundle window means that
7007 the processor will make two bundle rotation. */
7008 max_pos
= get_max_pos (curr_state
->dfa_state
);
7010 /* The following (negative template number) means that the
7011 processor did one bundle rotation. */
7012 || (max_pos
== 3 && template0
< 0))
7014 /* We are at the end of the window -- find template(s) for
7018 template0
= get_template (curr_state
->dfa_state
, 3);
7021 template1
= get_template (curr_state
->dfa_state
, 3);
7022 template0
= get_template (curr_state
->dfa_state
, 6);
7025 if (max_pos
> 3 && template1
< 0)
7026 /* It may happen when we have the stop inside a bundle. */
7028 gcc_assert (pos
<= 3);
7029 template1
= get_template (curr_state
->dfa_state
, 3);
7033 /* Emit nops after the current insn. */
7034 for (i
= 0; i
< curr_state
->after_nops_num
; i
++)
7037 emit_insn_after (nop
, insn
);
7039 gcc_assert (pos
>= 0);
7042 /* We are at the start of a bundle: emit the template
7043 (it should be defined). */
7044 gcc_assert (template0
>= 0);
7045 b
= gen_bundle_selector (GEN_INT (template0
));
7046 ia64_emit_insn_before (b
, nop
);
7047 /* If we have two bundle window, we make one bundle
7048 rotation. Otherwise template0 will be undefined
7049 (negative value). */
7050 template0
= template1
;
7054 /* Move the position backward in the window. Group barrier has
7055 no slot. Asm insn takes all bundle. */
7056 if (INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
7057 && GET_CODE (PATTERN (insn
)) != ASM_INPUT
7058 && asm_noperands (PATTERN (insn
)) < 0)
7060 /* Long insn takes 2 slots. */
7061 if (ia64_safe_type (insn
) == TYPE_L
)
7063 gcc_assert (pos
>= 0);
7065 && INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
7066 && GET_CODE (PATTERN (insn
)) != ASM_INPUT
7067 && asm_noperands (PATTERN (insn
)) < 0)
7069 /* The current insn is at the bundle start: emit the
7071 gcc_assert (template0
>= 0);
7072 b
= gen_bundle_selector (GEN_INT (template0
));
7073 ia64_emit_insn_before (b
, insn
);
7074 b
= PREV_INSN (insn
);
7076 /* See comment above in analogous place for emitting nops
7078 template0
= template1
;
7081 /* Emit nops after the current insn. */
7082 for (i
= 0; i
< curr_state
->before_nops_num
; i
++)
7085 ia64_emit_insn_before (nop
, insn
);
7086 nop
= PREV_INSN (insn
);
7089 gcc_assert (pos
>= 0);
7092 /* See comment above in analogous place for emitting nops
7094 gcc_assert (template0
>= 0);
7095 b
= gen_bundle_selector (GEN_INT (template0
));
7096 ia64_emit_insn_before (b
, insn
);
7097 b
= PREV_INSN (insn
);
7099 template0
= template1
;
7104 if (ia64_tune
== PROCESSOR_ITANIUM
)
7105 /* Insert additional cycles for MM-insns (MMMUL and MMSHF).
7106 Itanium1 has a strange design, if the distance between an insn
7107 and dependent MM-insn is less 4 then we have a 6 additional
7108 cycles stall. So we make the distance equal to 4 cycles if it
7110 for (insn
= get_next_important_insn (NEXT_INSN (prev_head_insn
), tail
);
7114 gcc_assert (INSN_P (insn
)
7115 && ia64_safe_itanium_class (insn
) != ITANIUM_CLASS_IGNORE
7116 && GET_CODE (PATTERN (insn
)) != USE
7117 && GET_CODE (PATTERN (insn
)) != CLOBBER
);
7118 next_insn
= get_next_important_insn (NEXT_INSN (insn
), tail
);
7119 if (INSN_UID (insn
) < clocks_length
&& add_cycles
[INSN_UID (insn
)])
7120 /* We found a MM-insn which needs additional cycles. */
7126 /* Now we are searching for a template of the bundle in
7127 which the MM-insn is placed and the position of the
7128 insn in the bundle (0, 1, 2). Also we are searching
7129 for that there is a stop before the insn. */
7130 last
= prev_active_insn (insn
);
7131 pred_stop_p
= recog_memoized (last
) == CODE_FOR_insn_group_barrier
;
7133 last
= prev_active_insn (last
);
7135 for (;; last
= prev_active_insn (last
))
7136 if (recog_memoized (last
) == CODE_FOR_bundle_selector
)
7138 template0
= XINT (XVECEXP (PATTERN (last
), 0, 0), 0);
7140 /* The insn is in MLX bundle. Change the template
7141 onto MFI because we will add nops before the
7142 insn. It simplifies subsequent code a lot. */
7144 = gen_bundle_selector (const2_rtx
); /* -> MFI */
7147 else if (recog_memoized (last
) != CODE_FOR_insn_group_barrier
7148 && (ia64_safe_itanium_class (last
)
7149 != ITANIUM_CLASS_IGNORE
))
7151 /* Some check of correctness: the stop is not at the
7152 bundle start, there are no more 3 insns in the bundle,
7153 and the MM-insn is not at the start of bundle with
7155 gcc_assert ((!pred_stop_p
|| n
)
7157 && (template0
!= 9 || !n
));
7158 /* Put nops after the insn in the bundle. */
7159 for (j
= 3 - n
; j
> 0; j
--)
7160 ia64_emit_insn_before (gen_nop (), insn
);
7161 /* It takes into account that we will add more N nops
7162 before the insn lately -- please see code below. */
7163 add_cycles
[INSN_UID (insn
)]--;
7164 if (!pred_stop_p
|| add_cycles
[INSN_UID (insn
)])
7165 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7168 add_cycles
[INSN_UID (insn
)]--;
7169 for (i
= add_cycles
[INSN_UID (insn
)]; i
> 0; i
--)
7171 /* Insert "MII;" template. */
7172 ia64_emit_insn_before (gen_bundle_selector (const0_rtx
),
7174 ia64_emit_insn_before (gen_nop (), insn
);
7175 ia64_emit_insn_before (gen_nop (), insn
);
7178 /* To decrease code size, we use "MI;I;"
7180 ia64_emit_insn_before
7181 (gen_insn_group_barrier (GEN_INT (3)), insn
);
7184 ia64_emit_insn_before (gen_nop (), insn
);
7185 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7188 /* Put the MM-insn in the same slot of a bundle with the
7189 same template as the original one. */
7190 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (template0
)),
7192 /* To put the insn in the same slot, add necessary number
7194 for (j
= n
; j
> 0; j
--)
7195 ia64_emit_insn_before (gen_nop (), insn
);
7196 /* Put the stop if the original bundle had it. */
7198 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7202 free (index_to_bundle_states
);
7203 finish_bundle_state_table ();
7205 dfa_clean_insn_cache ();
7208 /* The following function is called at the end of scheduling BB or
7209 EBB. After reload, it inserts stop bits and does insn bundling. */
7212 ia64_sched_finish (FILE *dump
, int sched_verbose
)
7215 fprintf (dump
, "// Finishing schedule.\n");
7216 if (!reload_completed
)
7218 if (reload_completed
)
7220 final_emit_insn_group_barriers (dump
);
7221 bundling (dump
, sched_verbose
, current_sched_info
->prev_head
,
7222 current_sched_info
->next_tail
);
7223 if (sched_verbose
&& dump
)
7224 fprintf (dump
, "// finishing %d-%d\n",
7225 INSN_UID (NEXT_INSN (current_sched_info
->prev_head
)),
7226 INSN_UID (PREV_INSN (current_sched_info
->next_tail
)));
7232 /* The following function inserts stop bits in scheduled BB or EBB. */
7235 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED
)
7238 int need_barrier_p
= 0;
7239 rtx prev_insn
= NULL_RTX
;
7241 init_insn_group_barriers ();
7243 for (insn
= NEXT_INSN (current_sched_info
->prev_head
);
7244 insn
!= current_sched_info
->next_tail
;
7245 insn
= NEXT_INSN (insn
))
7247 if (GET_CODE (insn
) == BARRIER
)
7249 rtx last
= prev_active_insn (insn
);
7253 if (GET_CODE (last
) == JUMP_INSN
7254 && GET_CODE (PATTERN (last
)) == ADDR_DIFF_VEC
)
7255 last
= prev_active_insn (last
);
7256 if (recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
7257 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last
);
7259 init_insn_group_barriers ();
7261 prev_insn
= NULL_RTX
;
7263 else if (INSN_P (insn
))
7265 if (recog_memoized (insn
) == CODE_FOR_insn_group_barrier
)
7267 init_insn_group_barriers ();
7269 prev_insn
= NULL_RTX
;
7271 else if (need_barrier_p
|| group_barrier_needed (insn
))
7273 if (TARGET_EARLY_STOP_BITS
)
7278 last
!= current_sched_info
->prev_head
;
7279 last
= PREV_INSN (last
))
7280 if (INSN_P (last
) && GET_MODE (last
) == TImode
7281 && stops_p
[INSN_UID (last
)])
7283 if (last
== current_sched_info
->prev_head
)
7285 last
= prev_active_insn (last
);
7287 && recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
7288 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
7290 init_insn_group_barriers ();
7291 for (last
= NEXT_INSN (last
);
7293 last
= NEXT_INSN (last
))
7295 group_barrier_needed (last
);
7299 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7301 init_insn_group_barriers ();
7303 group_barrier_needed (insn
);
7304 prev_insn
= NULL_RTX
;
7306 else if (recog_memoized (insn
) >= 0)
7308 need_barrier_p
= (GET_CODE (insn
) == CALL_INSN
7309 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
7310 || asm_noperands (PATTERN (insn
)) >= 0);
7317 /* If the following function returns TRUE, we will use the the DFA
7321 ia64_first_cycle_multipass_dfa_lookahead (void)
7323 return (reload_completed
? 6 : 4);
7326 /* The following function initiates variable `dfa_pre_cycle_insn'. */
7329 ia64_init_dfa_pre_cycle_insn (void)
7331 if (temp_dfa_state
== NULL
)
7333 dfa_state_size
= state_size ();
7334 temp_dfa_state
= xmalloc (dfa_state_size
);
7335 prev_cycle_state
= xmalloc (dfa_state_size
);
7337 dfa_pre_cycle_insn
= make_insn_raw (gen_pre_cycle ());
7338 PREV_INSN (dfa_pre_cycle_insn
) = NEXT_INSN (dfa_pre_cycle_insn
) = NULL_RTX
;
7339 recog_memoized (dfa_pre_cycle_insn
);
7340 dfa_stop_insn
= make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
7341 PREV_INSN (dfa_stop_insn
) = NEXT_INSN (dfa_stop_insn
) = NULL_RTX
;
7342 recog_memoized (dfa_stop_insn
);
7345 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
7346 used by the DFA insn scheduler. */
7349 ia64_dfa_pre_cycle_insn (void)
7351 return dfa_pre_cycle_insn
;
7354 /* The following function returns TRUE if PRODUCER (of type ilog or
7355 ld) produces address for CONSUMER (of type st or stf). */
7358 ia64_st_address_bypass_p (rtx producer
, rtx consumer
)
7362 gcc_assert (producer
&& consumer
);
7363 dest
= ia64_single_set (producer
);
7365 reg
= SET_DEST (dest
);
7367 if (GET_CODE (reg
) == SUBREG
)
7368 reg
= SUBREG_REG (reg
);
7369 gcc_assert (GET_CODE (reg
) == REG
);
7371 dest
= ia64_single_set (consumer
);
7373 mem
= SET_DEST (dest
);
7374 gcc_assert (mem
&& GET_CODE (mem
) == MEM
);
7375 return reg_mentioned_p (reg
, mem
);
7378 /* The following function returns TRUE if PRODUCER (of type ilog or
7379 ld) produces address for CONSUMER (of type ld or fld). */
7382 ia64_ld_address_bypass_p (rtx producer
, rtx consumer
)
7384 rtx dest
, src
, reg
, mem
;
7386 gcc_assert (producer
&& consumer
);
7387 dest
= ia64_single_set (producer
);
7389 reg
= SET_DEST (dest
);
7391 if (GET_CODE (reg
) == SUBREG
)
7392 reg
= SUBREG_REG (reg
);
7393 gcc_assert (GET_CODE (reg
) == REG
);
7395 src
= ia64_single_set (consumer
);
7397 mem
= SET_SRC (src
);
7399 if (GET_CODE (mem
) == UNSPEC
&& XVECLEN (mem
, 0) > 0)
7400 mem
= XVECEXP (mem
, 0, 0);
7401 while (GET_CODE (mem
) == SUBREG
|| GET_CODE (mem
) == ZERO_EXTEND
)
7402 mem
= XEXP (mem
, 0);
7404 /* Note that LO_SUM is used for GOT loads. */
7405 gcc_assert (GET_CODE (mem
) == LO_SUM
|| GET_CODE (mem
) == MEM
);
7407 return reg_mentioned_p (reg
, mem
);
7410 /* The following function returns TRUE if INSN produces address for a
7411 load/store insn. We will place such insns into M slot because it
7412 decreases its latency time. */
7415 ia64_produce_address_p (rtx insn
)
7421 /* Emit pseudo-ops for the assembler to describe predicate relations.
7422 At present this assumes that we only consider predicate pairs to
7423 be mutex, and that the assembler can deduce proper values from
7424 straight-line code. */
7427 emit_predicate_relation_info (void)
7431 FOR_EACH_BB_REVERSE (bb
)
7434 rtx head
= BB_HEAD (bb
);
7436 /* We only need such notes at code labels. */
7437 if (GET_CODE (head
) != CODE_LABEL
)
7439 if (GET_CODE (NEXT_INSN (head
)) == NOTE
7440 && NOTE_LINE_NUMBER (NEXT_INSN (head
)) == NOTE_INSN_BASIC_BLOCK
)
7441 head
= NEXT_INSN (head
);
7443 /* Skip p0, which may be thought to be live due to (reg:DI p0)
7444 grabbing the entire block of predicate registers. */
7445 for (r
= PR_REG (2); r
< PR_REG (64); r
+= 2)
7446 if (REGNO_REG_SET_P (bb
->il
.rtl
->global_live_at_start
, r
))
7448 rtx p
= gen_rtx_REG (BImode
, r
);
7449 rtx n
= emit_insn_after (gen_pred_rel_mutex (p
), head
);
7450 if (head
== BB_END (bb
))
7456 /* Look for conditional calls that do not return, and protect predicate
7457 relations around them. Otherwise the assembler will assume the call
7458 returns, and complain about uses of call-clobbered predicates after
7460 FOR_EACH_BB_REVERSE (bb
)
7462 rtx insn
= BB_HEAD (bb
);
7466 if (GET_CODE (insn
) == CALL_INSN
7467 && GET_CODE (PATTERN (insn
)) == COND_EXEC
7468 && find_reg_note (insn
, REG_NORETURN
, NULL_RTX
))
7470 rtx b
= emit_insn_before (gen_safe_across_calls_all (), insn
);
7471 rtx a
= emit_insn_after (gen_safe_across_calls_normal (), insn
);
7472 if (BB_HEAD (bb
) == insn
)
7474 if (BB_END (bb
) == insn
)
7478 if (insn
== BB_END (bb
))
7480 insn
= NEXT_INSN (insn
);
7485 /* Perform machine dependent operations on the rtl chain INSNS. */
7490 /* We are freeing block_for_insn in the toplev to keep compatibility
7491 with old MDEP_REORGS that are not CFG based. Recompute it now. */
7492 compute_bb_for_insn ();
7494 /* If optimizing, we'll have split before scheduling. */
7496 split_all_insns (0);
7498 /* ??? update_life_info_in_dirty_blocks fails to terminate during
7499 non-optimizing bootstrap. */
7500 update_life_info (NULL
, UPDATE_LIFE_GLOBAL_RM_NOTES
, PROP_DEATH_NOTES
);
7502 if (ia64_flag_schedule_insns2
)
7504 timevar_push (TV_SCHED2
);
7505 ia64_final_schedule
= 1;
7507 initiate_bundle_states ();
7508 ia64_nop
= make_insn_raw (gen_nop ());
7509 PREV_INSN (ia64_nop
) = NEXT_INSN (ia64_nop
) = NULL_RTX
;
7510 recog_memoized (ia64_nop
);
7511 clocks_length
= get_max_uid () + 1;
7512 stops_p
= xcalloc (1, clocks_length
);
7513 if (ia64_tune
== PROCESSOR_ITANIUM
)
7515 clocks
= xcalloc (clocks_length
, sizeof (int));
7516 add_cycles
= xcalloc (clocks_length
, sizeof (int));
7518 if (ia64_tune
== PROCESSOR_ITANIUM2
)
7520 pos_1
= get_cpu_unit_code ("2_1");
7521 pos_2
= get_cpu_unit_code ("2_2");
7522 pos_3
= get_cpu_unit_code ("2_3");
7523 pos_4
= get_cpu_unit_code ("2_4");
7524 pos_5
= get_cpu_unit_code ("2_5");
7525 pos_6
= get_cpu_unit_code ("2_6");
7526 _0mii_
= get_cpu_unit_code ("2b_0mii.");
7527 _0mmi_
= get_cpu_unit_code ("2b_0mmi.");
7528 _0mfi_
= get_cpu_unit_code ("2b_0mfi.");
7529 _0mmf_
= get_cpu_unit_code ("2b_0mmf.");
7530 _0bbb_
= get_cpu_unit_code ("2b_0bbb.");
7531 _0mbb_
= get_cpu_unit_code ("2b_0mbb.");
7532 _0mib_
= get_cpu_unit_code ("2b_0mib.");
7533 _0mmb_
= get_cpu_unit_code ("2b_0mmb.");
7534 _0mfb_
= get_cpu_unit_code ("2b_0mfb.");
7535 _0mlx_
= get_cpu_unit_code ("2b_0mlx.");
7536 _1mii_
= get_cpu_unit_code ("2b_1mii.");
7537 _1mmi_
= get_cpu_unit_code ("2b_1mmi.");
7538 _1mfi_
= get_cpu_unit_code ("2b_1mfi.");
7539 _1mmf_
= get_cpu_unit_code ("2b_1mmf.");
7540 _1bbb_
= get_cpu_unit_code ("2b_1bbb.");
7541 _1mbb_
= get_cpu_unit_code ("2b_1mbb.");
7542 _1mib_
= get_cpu_unit_code ("2b_1mib.");
7543 _1mmb_
= get_cpu_unit_code ("2b_1mmb.");
7544 _1mfb_
= get_cpu_unit_code ("2b_1mfb.");
7545 _1mlx_
= get_cpu_unit_code ("2b_1mlx.");
7549 pos_1
= get_cpu_unit_code ("1_1");
7550 pos_2
= get_cpu_unit_code ("1_2");
7551 pos_3
= get_cpu_unit_code ("1_3");
7552 pos_4
= get_cpu_unit_code ("1_4");
7553 pos_5
= get_cpu_unit_code ("1_5");
7554 pos_6
= get_cpu_unit_code ("1_6");
7555 _0mii_
= get_cpu_unit_code ("1b_0mii.");
7556 _0mmi_
= get_cpu_unit_code ("1b_0mmi.");
7557 _0mfi_
= get_cpu_unit_code ("1b_0mfi.");
7558 _0mmf_
= get_cpu_unit_code ("1b_0mmf.");
7559 _0bbb_
= get_cpu_unit_code ("1b_0bbb.");
7560 _0mbb_
= get_cpu_unit_code ("1b_0mbb.");
7561 _0mib_
= get_cpu_unit_code ("1b_0mib.");
7562 _0mmb_
= get_cpu_unit_code ("1b_0mmb.");
7563 _0mfb_
= get_cpu_unit_code ("1b_0mfb.");
7564 _0mlx_
= get_cpu_unit_code ("1b_0mlx.");
7565 _1mii_
= get_cpu_unit_code ("1b_1mii.");
7566 _1mmi_
= get_cpu_unit_code ("1b_1mmi.");
7567 _1mfi_
= get_cpu_unit_code ("1b_1mfi.");
7568 _1mmf_
= get_cpu_unit_code ("1b_1mmf.");
7569 _1bbb_
= get_cpu_unit_code ("1b_1bbb.");
7570 _1mbb_
= get_cpu_unit_code ("1b_1mbb.");
7571 _1mib_
= get_cpu_unit_code ("1b_1mib.");
7572 _1mmb_
= get_cpu_unit_code ("1b_1mmb.");
7573 _1mfb_
= get_cpu_unit_code ("1b_1mfb.");
7574 _1mlx_
= get_cpu_unit_code ("1b_1mlx.");
7576 schedule_ebbs (dump_file
);
7577 finish_bundle_states ();
7578 if (ia64_tune
== PROCESSOR_ITANIUM
)
7584 emit_insn_group_barriers (dump_file
);
7586 ia64_final_schedule
= 0;
7587 timevar_pop (TV_SCHED2
);
7590 emit_all_insn_group_barriers (dump_file
);
7592 /* A call must not be the last instruction in a function, so that the
7593 return address is still within the function, so that unwinding works
7594 properly. Note that IA-64 differs from dwarf2 on this point. */
7595 if (flag_unwind_tables
|| (flag_exceptions
&& !USING_SJLJ_EXCEPTIONS
))
7600 insn
= get_last_insn ();
7601 if (! INSN_P (insn
))
7602 insn
= prev_active_insn (insn
);
7603 /* Skip over insns that expand to nothing. */
7604 while (GET_CODE (insn
) == INSN
&& get_attr_empty (insn
) == EMPTY_YES
)
7606 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
7607 && XINT (PATTERN (insn
), 1) == UNSPECV_INSN_GROUP_BARRIER
)
7609 insn
= prev_active_insn (insn
);
7611 if (GET_CODE (insn
) == CALL_INSN
)
7614 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7615 emit_insn (gen_break_f ());
7616 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7620 emit_predicate_relation_info ();
7622 if (ia64_flag_var_tracking
)
7624 timevar_push (TV_VAR_TRACKING
);
7625 variable_tracking_main ();
7626 timevar_pop (TV_VAR_TRACKING
);
7630 /* Return true if REGNO is used by the epilogue. */
7633 ia64_epilogue_uses (int regno
)
7638 /* With a call to a function in another module, we will write a new
7639 value to "gp". After returning from such a call, we need to make
7640 sure the function restores the original gp-value, even if the
7641 function itself does not use the gp anymore. */
7642 return !(TARGET_AUTO_PIC
|| TARGET_NO_PIC
);
7644 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
7645 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
7646 /* For functions defined with the syscall_linkage attribute, all
7647 input registers are marked as live at all function exits. This
7648 prevents the register allocator from using the input registers,
7649 which in turn makes it possible to restart a system call after
7650 an interrupt without having to save/restore the input registers.
7651 This also prevents kernel data from leaking to application code. */
7652 return lookup_attribute ("syscall_linkage",
7653 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))) != NULL
;
7656 /* Conditional return patterns can't represent the use of `b0' as
7657 the return address, so we force the value live this way. */
7661 /* Likewise for ar.pfs, which is used by br.ret. */
7669 /* Return true if REGNO is used by the frame unwinder. */
7672 ia64_eh_uses (int regno
)
7674 if (! reload_completed
)
7677 if (current_frame_info
.reg_save_b0
7678 && regno
== current_frame_info
.reg_save_b0
)
7680 if (current_frame_info
.reg_save_pr
7681 && regno
== current_frame_info
.reg_save_pr
)
7683 if (current_frame_info
.reg_save_ar_pfs
7684 && regno
== current_frame_info
.reg_save_ar_pfs
)
7686 if (current_frame_info
.reg_save_ar_unat
7687 && regno
== current_frame_info
.reg_save_ar_unat
)
7689 if (current_frame_info
.reg_save_ar_lc
7690 && regno
== current_frame_info
.reg_save_ar_lc
)
7696 /* Return true if this goes in small data/bss. */
7698 /* ??? We could also support own long data here. Generating movl/add/ld8
7699 instead of addl,ld8/ld8. This makes the code bigger, but should make the
7700 code faster because there is one less load. This also includes incomplete
7701 types which can't go in sdata/sbss. */
7704 ia64_in_small_data_p (tree exp
)
7706 if (TARGET_NO_SDATA
)
7709 /* We want to merge strings, so we never consider them small data. */
7710 if (TREE_CODE (exp
) == STRING_CST
)
7713 /* Functions are never small data. */
7714 if (TREE_CODE (exp
) == FUNCTION_DECL
)
7717 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
7719 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
7721 if (strcmp (section
, ".sdata") == 0
7722 || strncmp (section
, ".sdata.", 7) == 0
7723 || strncmp (section
, ".gnu.linkonce.s.", 16) == 0
7724 || strcmp (section
, ".sbss") == 0
7725 || strncmp (section
, ".sbss.", 6) == 0
7726 || strncmp (section
, ".gnu.linkonce.sb.", 17) == 0)
7731 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
7733 /* If this is an incomplete type with size 0, then we can't put it
7734 in sdata because it might be too big when completed. */
7735 if (size
> 0 && size
<= ia64_section_threshold
)
7742 /* Output assembly directives for prologue regions. */
7744 /* The current basic block number. */
7746 static bool last_block
;
7748 /* True if we need a copy_state command at the start of the next block. */
7750 static bool need_copy_state
;
7752 /* The function emits unwind directives for the start of an epilogue. */
7755 process_epilogue (void)
7757 /* If this isn't the last block of the function, then we need to label the
7758 current state, and copy it back in at the start of the next block. */
7762 fprintf (asm_out_file
, "\t.label_state %d\n",
7763 ++cfun
->machine
->state_num
);
7764 need_copy_state
= true;
7767 fprintf (asm_out_file
, "\t.restore sp\n");
7770 /* This function processes a SET pattern looking for specific patterns
7771 which result in emitting an assembly directive required for unwinding. */
7774 process_set (FILE *asm_out_file
, rtx pat
)
7776 rtx src
= SET_SRC (pat
);
7777 rtx dest
= SET_DEST (pat
);
7778 int src_regno
, dest_regno
;
7780 /* Look for the ALLOC insn. */
7781 if (GET_CODE (src
) == UNSPEC_VOLATILE
7782 && XINT (src
, 1) == UNSPECV_ALLOC
7783 && GET_CODE (dest
) == REG
)
7785 dest_regno
= REGNO (dest
);
7787 /* If this is the final destination for ar.pfs, then this must
7788 be the alloc in the prologue. */
7789 if (dest_regno
== current_frame_info
.reg_save_ar_pfs
)
7790 fprintf (asm_out_file
, "\t.save ar.pfs, r%d\n",
7791 ia64_dbx_register_number (dest_regno
));
7794 /* This must be an alloc before a sibcall. We must drop the
7795 old frame info. The easiest way to drop the old frame
7796 info is to ensure we had a ".restore sp" directive
7797 followed by a new prologue. If the procedure doesn't
7798 have a memory-stack frame, we'll issue a dummy ".restore
7800 if (current_frame_info
.total_size
== 0 && !frame_pointer_needed
)
7801 /* if haven't done process_epilogue() yet, do it now */
7802 process_epilogue ();
7803 fprintf (asm_out_file
, "\t.prologue\n");
7808 /* Look for SP = .... */
7809 if (GET_CODE (dest
) == REG
&& REGNO (dest
) == STACK_POINTER_REGNUM
)
7811 if (GET_CODE (src
) == PLUS
)
7813 rtx op0
= XEXP (src
, 0);
7814 rtx op1
= XEXP (src
, 1);
7816 gcc_assert (op0
== dest
&& GET_CODE (op1
) == CONST_INT
);
7818 if (INTVAL (op1
) < 0)
7819 fprintf (asm_out_file
, "\t.fframe "HOST_WIDE_INT_PRINT_DEC
"\n",
7822 process_epilogue ();
7826 gcc_assert (GET_CODE (src
) == REG
7827 && REGNO (src
) == HARD_FRAME_POINTER_REGNUM
);
7828 process_epilogue ();
7834 /* Register move we need to look at. */
7835 if (GET_CODE (dest
) == REG
&& GET_CODE (src
) == REG
)
7837 src_regno
= REGNO (src
);
7838 dest_regno
= REGNO (dest
);
7843 /* Saving return address pointer. */
7844 gcc_assert (dest_regno
== current_frame_info
.reg_save_b0
);
7845 fprintf (asm_out_file
, "\t.save rp, r%d\n",
7846 ia64_dbx_register_number (dest_regno
));
7850 gcc_assert (dest_regno
== current_frame_info
.reg_save_pr
);
7851 fprintf (asm_out_file
, "\t.save pr, r%d\n",
7852 ia64_dbx_register_number (dest_regno
));
7855 case AR_UNAT_REGNUM
:
7856 gcc_assert (dest_regno
== current_frame_info
.reg_save_ar_unat
);
7857 fprintf (asm_out_file
, "\t.save ar.unat, r%d\n",
7858 ia64_dbx_register_number (dest_regno
));
7862 gcc_assert (dest_regno
== current_frame_info
.reg_save_ar_lc
);
7863 fprintf (asm_out_file
, "\t.save ar.lc, r%d\n",
7864 ia64_dbx_register_number (dest_regno
));
7867 case STACK_POINTER_REGNUM
:
7868 gcc_assert (dest_regno
== HARD_FRAME_POINTER_REGNUM
7869 && frame_pointer_needed
);
7870 fprintf (asm_out_file
, "\t.vframe r%d\n",
7871 ia64_dbx_register_number (dest_regno
));
7875 /* Everything else should indicate being stored to memory. */
7880 /* Memory store we need to look at. */
7881 if (GET_CODE (dest
) == MEM
&& GET_CODE (src
) == REG
)
7887 if (GET_CODE (XEXP (dest
, 0)) == REG
)
7889 base
= XEXP (dest
, 0);
7894 gcc_assert (GET_CODE (XEXP (dest
, 0)) == PLUS
7895 && GET_CODE (XEXP (XEXP (dest
, 0), 1)) == CONST_INT
);
7896 base
= XEXP (XEXP (dest
, 0), 0);
7897 off
= INTVAL (XEXP (XEXP (dest
, 0), 1));
7900 if (base
== hard_frame_pointer_rtx
)
7902 saveop
= ".savepsp";
7907 gcc_assert (base
== stack_pointer_rtx
);
7911 src_regno
= REGNO (src
);
7915 gcc_assert (!current_frame_info
.reg_save_b0
);
7916 fprintf (asm_out_file
, "\t%s rp, %ld\n", saveop
, off
);
7920 gcc_assert (!current_frame_info
.reg_save_pr
);
7921 fprintf (asm_out_file
, "\t%s pr, %ld\n", saveop
, off
);
7925 gcc_assert (!current_frame_info
.reg_save_ar_lc
);
7926 fprintf (asm_out_file
, "\t%s ar.lc, %ld\n", saveop
, off
);
7930 gcc_assert (!current_frame_info
.reg_save_ar_pfs
);
7931 fprintf (asm_out_file
, "\t%s ar.pfs, %ld\n", saveop
, off
);
7934 case AR_UNAT_REGNUM
:
7935 gcc_assert (!current_frame_info
.reg_save_ar_unat
);
7936 fprintf (asm_out_file
, "\t%s ar.unat, %ld\n", saveop
, off
);
7943 fprintf (asm_out_file
, "\t.save.g 0x%x\n",
7944 1 << (src_regno
- GR_REG (4)));
7952 fprintf (asm_out_file
, "\t.save.b 0x%x\n",
7953 1 << (src_regno
- BR_REG (1)));
7960 fprintf (asm_out_file
, "\t.save.f 0x%x\n",
7961 1 << (src_regno
- FR_REG (2)));
7964 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7965 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7966 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7967 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7968 fprintf (asm_out_file
, "\t.save.gf 0x0, 0x%x\n",
7969 1 << (src_regno
- FR_REG (12)));
7981 /* This function looks at a single insn and emits any directives
7982 required to unwind this insn. */
7984 process_for_unwind_directive (FILE *asm_out_file
, rtx insn
)
7986 if (flag_unwind_tables
7987 || (flag_exceptions
&& !USING_SJLJ_EXCEPTIONS
))
7991 if (GET_CODE (insn
) == NOTE
7992 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_BASIC_BLOCK
)
7994 last_block
= NOTE_BASIC_BLOCK (insn
)->next_bb
== EXIT_BLOCK_PTR
;
7996 /* Restore unwind state from immediately before the epilogue. */
7997 if (need_copy_state
)
7999 fprintf (asm_out_file
, "\t.body\n");
8000 fprintf (asm_out_file
, "\t.copy_state %d\n",
8001 cfun
->machine
->state_num
);
8002 need_copy_state
= false;
8006 if (GET_CODE (insn
) == NOTE
|| ! RTX_FRAME_RELATED_P (insn
))
8009 pat
= find_reg_note (insn
, REG_FRAME_RELATED_EXPR
, NULL_RTX
);
8011 pat
= XEXP (pat
, 0);
8013 pat
= PATTERN (insn
);
8015 switch (GET_CODE (pat
))
8018 process_set (asm_out_file
, pat
);
8024 int limit
= XVECLEN (pat
, 0);
8025 for (par_index
= 0; par_index
< limit
; par_index
++)
8027 rtx x
= XVECEXP (pat
, 0, par_index
);
8028 if (GET_CODE (x
) == SET
)
8029 process_set (asm_out_file
, x
);
8044 IA64_BUILTIN_FLUSHRS
8048 ia64_init_builtins (void)
8053 /* The __fpreg type. */
8054 fpreg_type
= make_node (REAL_TYPE
);
8055 /* ??? The back end should know to load/save __fpreg variables using
8056 the ldf.fill and stf.spill instructions. */
8057 TYPE_PRECISION (fpreg_type
) = 80;
8058 layout_type (fpreg_type
);
8059 (*lang_hooks
.types
.register_builtin_type
) (fpreg_type
, "__fpreg");
8061 /* The __float80 type. */
8062 float80_type
= make_node (REAL_TYPE
);
8063 TYPE_PRECISION (float80_type
) = 80;
8064 layout_type (float80_type
);
8065 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
8067 /* The __float128 type. */
8070 tree float128_type
= make_node (REAL_TYPE
);
8071 TYPE_PRECISION (float128_type
) = 128;
8072 layout_type (float128_type
);
8073 (*lang_hooks
.types
.register_builtin_type
) (float128_type
, "__float128");
8076 /* Under HPUX, this is a synonym for "long double". */
8077 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
8080 #define def_builtin(name, type, code) \
8081 lang_hooks.builtin_function ((name), (type), (code), BUILT_IN_MD, \
8084 def_builtin ("__builtin_ia64_bsp",
8085 build_function_type (ptr_type_node
, void_list_node
),
8088 def_builtin ("__builtin_ia64_flushrs",
8089 build_function_type (void_type_node
, void_list_node
),
8090 IA64_BUILTIN_FLUSHRS
);
8096 ia64_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
8097 enum machine_mode mode ATTRIBUTE_UNUSED
,
8098 int ignore ATTRIBUTE_UNUSED
)
8100 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
8101 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
8105 case IA64_BUILTIN_BSP
:
8106 if (! target
|| ! register_operand (target
, DImode
))
8107 target
= gen_reg_rtx (DImode
);
8108 emit_insn (gen_bsp_value (target
));
8109 #ifdef POINTERS_EXTEND_UNSIGNED
8110 target
= convert_memory_address (ptr_mode
, target
);
8114 case IA64_BUILTIN_FLUSHRS
:
8115 emit_insn (gen_flushrs ());
8125 /* For the HP-UX IA64 aggregate parameters are passed stored in the
8126 most significant bits of the stack slot. */
8129 ia64_hpux_function_arg_padding (enum machine_mode mode
, tree type
)
8131 /* Exception to normal case for structures/unions/etc. */
8133 if (type
&& AGGREGATE_TYPE_P (type
)
8134 && int_size_in_bytes (type
) < UNITS_PER_WORD
)
8137 /* Fall back to the default. */
8138 return DEFAULT_FUNCTION_ARG_PADDING (mode
, type
);
8141 /* Linked list of all external functions that are to be emitted by GCC.
8142 We output the name if and only if TREE_SYMBOL_REFERENCED is set in
8143 order to avoid putting out names that are never really used. */
8145 struct extern_func_list
GTY(())
8147 struct extern_func_list
*next
;
8151 static GTY(()) struct extern_func_list
*extern_func_head
;
8154 ia64_hpux_add_extern_decl (tree decl
)
8156 struct extern_func_list
*p
= ggc_alloc (sizeof (struct extern_func_list
));
8159 p
->next
= extern_func_head
;
8160 extern_func_head
= p
;
8163 /* Print out the list of used global functions. */
8166 ia64_hpux_file_end (void)
8168 struct extern_func_list
*p
;
8170 for (p
= extern_func_head
; p
; p
= p
->next
)
8172 tree decl
= p
->decl
;
8173 tree id
= DECL_ASSEMBLER_NAME (decl
);
8177 if (!TREE_ASM_WRITTEN (decl
) && TREE_SYMBOL_REFERENCED (id
))
8179 const char *name
= XSTR (XEXP (DECL_RTL (decl
), 0), 0);
8181 TREE_ASM_WRITTEN (decl
) = 1;
8182 (*targetm
.asm_out
.globalize_label
) (asm_out_file
, name
);
8183 fputs (TYPE_ASM_OP
, asm_out_file
);
8184 assemble_name (asm_out_file
, name
);
8185 fprintf (asm_out_file
, "," TYPE_OPERAND_FMT
"\n", "function");
8189 extern_func_head
= 0;
8192 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
8193 modes of word_mode and larger. Rename the TFmode libfuncs using the
8194 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
8195 backward compatibility. */
8198 ia64_init_libfuncs (void)
8200 set_optab_libfunc (sdiv_optab
, SImode
, "__divsi3");
8201 set_optab_libfunc (udiv_optab
, SImode
, "__udivsi3");
8202 set_optab_libfunc (smod_optab
, SImode
, "__modsi3");
8203 set_optab_libfunc (umod_optab
, SImode
, "__umodsi3");
8205 set_optab_libfunc (add_optab
, TFmode
, "_U_Qfadd");
8206 set_optab_libfunc (sub_optab
, TFmode
, "_U_Qfsub");
8207 set_optab_libfunc (smul_optab
, TFmode
, "_U_Qfmpy");
8208 set_optab_libfunc (sdiv_optab
, TFmode
, "_U_Qfdiv");
8209 set_optab_libfunc (neg_optab
, TFmode
, "_U_Qfneg");
8211 set_conv_libfunc (sext_optab
, TFmode
, SFmode
, "_U_Qfcnvff_sgl_to_quad");
8212 set_conv_libfunc (sext_optab
, TFmode
, DFmode
, "_U_Qfcnvff_dbl_to_quad");
8213 set_conv_libfunc (sext_optab
, TFmode
, XFmode
, "_U_Qfcnvff_f80_to_quad");
8214 set_conv_libfunc (trunc_optab
, SFmode
, TFmode
, "_U_Qfcnvff_quad_to_sgl");
8215 set_conv_libfunc (trunc_optab
, DFmode
, TFmode
, "_U_Qfcnvff_quad_to_dbl");
8216 set_conv_libfunc (trunc_optab
, XFmode
, TFmode
, "_U_Qfcnvff_quad_to_f80");
8218 set_conv_libfunc (sfix_optab
, SImode
, TFmode
, "_U_Qfcnvfxt_quad_to_sgl");
8219 set_conv_libfunc (sfix_optab
, DImode
, TFmode
, "_U_Qfcnvfxt_quad_to_dbl");
8220 set_conv_libfunc (ufix_optab
, SImode
, TFmode
, "_U_Qfcnvfxut_quad_to_sgl");
8221 set_conv_libfunc (ufix_optab
, DImode
, TFmode
, "_U_Qfcnvfxut_quad_to_dbl");
8223 set_conv_libfunc (sfloat_optab
, TFmode
, SImode
, "_U_Qfcnvxf_sgl_to_quad");
8224 set_conv_libfunc (sfloat_optab
, TFmode
, DImode
, "_U_Qfcnvxf_dbl_to_quad");
8227 /* Rename all the TFmode libfuncs using the HPUX conventions. */
8230 ia64_hpux_init_libfuncs (void)
8232 ia64_init_libfuncs ();
8234 set_optab_libfunc (smin_optab
, TFmode
, "_U_Qfmin");
8235 set_optab_libfunc (smax_optab
, TFmode
, "_U_Qfmax");
8236 set_optab_libfunc (abs_optab
, TFmode
, "_U_Qfabs");
8238 /* ia64_expand_compare uses this. */
8239 cmptf_libfunc
= init_one_libfunc ("_U_Qfcmp");
8241 /* These should never be used. */
8242 set_optab_libfunc (eq_optab
, TFmode
, 0);
8243 set_optab_libfunc (ne_optab
, TFmode
, 0);
8244 set_optab_libfunc (gt_optab
, TFmode
, 0);
8245 set_optab_libfunc (ge_optab
, TFmode
, 0);
8246 set_optab_libfunc (lt_optab
, TFmode
, 0);
8247 set_optab_libfunc (le_optab
, TFmode
, 0);
8250 /* Rename the division and modulus functions in VMS. */
8253 ia64_vms_init_libfuncs (void)
8255 set_optab_libfunc (sdiv_optab
, SImode
, "OTS$DIV_I");
8256 set_optab_libfunc (sdiv_optab
, DImode
, "OTS$DIV_L");
8257 set_optab_libfunc (udiv_optab
, SImode
, "OTS$DIV_UI");
8258 set_optab_libfunc (udiv_optab
, DImode
, "OTS$DIV_UL");
8259 set_optab_libfunc (smod_optab
, SImode
, "OTS$REM_I");
8260 set_optab_libfunc (smod_optab
, DImode
, "OTS$REM_L");
8261 set_optab_libfunc (umod_optab
, SImode
, "OTS$REM_UI");
8262 set_optab_libfunc (umod_optab
, DImode
, "OTS$REM_UL");
8265 /* Rename the TFmode libfuncs available from soft-fp in glibc using
8266 the HPUX conventions. */
8269 ia64_sysv4_init_libfuncs (void)
8271 ia64_init_libfuncs ();
8273 /* These functions are not part of the HPUX TFmode interface. We
8274 use them instead of _U_Qfcmp, which doesn't work the way we
8276 set_optab_libfunc (eq_optab
, TFmode
, "_U_Qfeq");
8277 set_optab_libfunc (ne_optab
, TFmode
, "_U_Qfne");
8278 set_optab_libfunc (gt_optab
, TFmode
, "_U_Qfgt");
8279 set_optab_libfunc (ge_optab
, TFmode
, "_U_Qfge");
8280 set_optab_libfunc (lt_optab
, TFmode
, "_U_Qflt");
8281 set_optab_libfunc (le_optab
, TFmode
, "_U_Qfle");
8283 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
8284 glibc doesn't have them. */
8287 /* Switch to the section to which we should output X. The only thing
8288 special we do here is to honor small data. */
8291 ia64_select_rtx_section (enum machine_mode mode
, rtx x
,
8292 unsigned HOST_WIDE_INT align
)
8294 if (GET_MODE_SIZE (mode
) > 0
8295 && GET_MODE_SIZE (mode
) <= ia64_section_threshold
)
8298 default_elf_select_rtx_section (mode
, x
, align
);
8301 /* It is illegal to have relocations in shared segments on AIX and HPUX.
8302 Pretend flag_pic is always set. */
8305 ia64_rwreloc_select_section (tree exp
, int reloc
, unsigned HOST_WIDE_INT align
)
8307 default_elf_select_section_1 (exp
, reloc
, align
, true);
8311 ia64_rwreloc_unique_section (tree decl
, int reloc
)
8313 default_unique_section_1 (decl
, reloc
, true);
8317 ia64_rwreloc_select_rtx_section (enum machine_mode mode
, rtx x
,
8318 unsigned HOST_WIDE_INT align
)
8320 int save_pic
= flag_pic
;
8322 ia64_select_rtx_section (mode
, x
, align
);
8323 flag_pic
= save_pic
;
8326 #ifndef TARGET_RWRELOC
8327 #define TARGET_RWRELOC flag_pic
8331 ia64_section_type_flags (tree decl
, const char *name
, int reloc
)
8333 unsigned int flags
= 0;
8335 if (strcmp (name
, ".sdata") == 0
8336 || strncmp (name
, ".sdata.", 7) == 0
8337 || strncmp (name
, ".gnu.linkonce.s.", 16) == 0
8338 || strncmp (name
, ".sdata2.", 8) == 0
8339 || strncmp (name
, ".gnu.linkonce.s2.", 17) == 0
8340 || strcmp (name
, ".sbss") == 0
8341 || strncmp (name
, ".sbss.", 6) == 0
8342 || strncmp (name
, ".gnu.linkonce.sb.", 17) == 0)
8343 flags
= SECTION_SMALL
;
8345 flags
|= default_section_type_flags_1 (decl
, name
, reloc
, TARGET_RWRELOC
);
8349 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
8350 structure type and that the address of that type should be passed
8351 in out0, rather than in r8. */
8354 ia64_struct_retval_addr_is_first_parm_p (tree fntype
)
8356 tree ret_type
= TREE_TYPE (fntype
);
8358 /* The Itanium C++ ABI requires that out0, rather than r8, be used
8359 as the structure return address parameter, if the return value
8360 type has a non-trivial copy constructor or destructor. It is not
8361 clear if this same convention should be used for other
8362 programming languages. Until G++ 3.4, we incorrectly used r8 for
8363 these return values. */
8364 return (abi_version_at_least (2)
8366 && TYPE_MODE (ret_type
) == BLKmode
8367 && TREE_ADDRESSABLE (ret_type
)
8368 && strcmp (lang_hooks
.name
, "GNU C++") == 0);
8371 /* Output the assembler code for a thunk function. THUNK_DECL is the
8372 declaration for the thunk function itself, FUNCTION is the decl for
8373 the target function. DELTA is an immediate constant offset to be
8374 added to THIS. If VCALL_OFFSET is nonzero, the word at
8375 *(*this + vcall_offset) should be added to THIS. */
8378 ia64_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
8379 HOST_WIDE_INT delta
, HOST_WIDE_INT vcall_offset
,
8382 rtx
this, insn
, funexp
;
8383 unsigned int this_parmno
;
8384 unsigned int this_regno
;
8386 reload_completed
= 1;
8387 epilogue_completed
= 1;
8389 reset_block_changes ();
8391 /* Set things up as ia64_expand_prologue might. */
8392 last_scratch_gr_reg
= 15;
8394 memset (¤t_frame_info
, 0, sizeof (current_frame_info
));
8395 current_frame_info
.spill_cfa_off
= -16;
8396 current_frame_info
.n_input_regs
= 1;
8397 current_frame_info
.need_regstk
= (TARGET_REG_NAMES
!= 0);
8399 /* Mark the end of the (empty) prologue. */
8400 emit_note (NOTE_INSN_PROLOGUE_END
);
8402 /* Figure out whether "this" will be the first parameter (the
8403 typical case) or the second parameter (as happens when the
8404 virtual function returns certain class objects). */
8406 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk
))
8408 this_regno
= IN_REG (this_parmno
);
8409 if (!TARGET_REG_NAMES
)
8410 reg_names
[this_regno
] = ia64_reg_numbers
[this_parmno
];
8412 this = gen_rtx_REG (Pmode
, this_regno
);
8415 rtx tmp
= gen_rtx_REG (ptr_mode
, this_regno
);
8416 REG_POINTER (tmp
) = 1;
8417 if (delta
&& CONST_OK_FOR_I (delta
))
8419 emit_insn (gen_ptr_extend_plus_imm (this, tmp
, GEN_INT (delta
)));
8423 emit_insn (gen_ptr_extend (this, tmp
));
8426 /* Apply the constant offset, if required. */
8429 rtx delta_rtx
= GEN_INT (delta
);
8431 if (!CONST_OK_FOR_I (delta
))
8433 rtx tmp
= gen_rtx_REG (Pmode
, 2);
8434 emit_move_insn (tmp
, delta_rtx
);
8437 emit_insn (gen_adddi3 (this, this, delta_rtx
));
8440 /* Apply the offset from the vtable, if required. */
8443 rtx vcall_offset_rtx
= GEN_INT (vcall_offset
);
8444 rtx tmp
= gen_rtx_REG (Pmode
, 2);
8448 rtx t
= gen_rtx_REG (ptr_mode
, 2);
8449 REG_POINTER (t
) = 1;
8450 emit_move_insn (t
, gen_rtx_MEM (ptr_mode
, this));
8451 if (CONST_OK_FOR_I (vcall_offset
))
8453 emit_insn (gen_ptr_extend_plus_imm (tmp
, t
,
8458 emit_insn (gen_ptr_extend (tmp
, t
));
8461 emit_move_insn (tmp
, gen_rtx_MEM (Pmode
, this));
8465 if (!CONST_OK_FOR_J (vcall_offset
))
8467 rtx tmp2
= gen_rtx_REG (Pmode
, next_scratch_gr_reg ());
8468 emit_move_insn (tmp2
, vcall_offset_rtx
);
8469 vcall_offset_rtx
= tmp2
;
8471 emit_insn (gen_adddi3 (tmp
, tmp
, vcall_offset_rtx
));
8475 emit_move_insn (gen_rtx_REG (ptr_mode
, 2),
8476 gen_rtx_MEM (ptr_mode
, tmp
));
8478 emit_move_insn (tmp
, gen_rtx_MEM (Pmode
, tmp
));
8480 emit_insn (gen_adddi3 (this, this, tmp
));
8483 /* Generate a tail call to the target function. */
8484 if (! TREE_USED (function
))
8486 assemble_external (function
);
8487 TREE_USED (function
) = 1;
8489 funexp
= XEXP (DECL_RTL (function
), 0);
8490 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
8491 ia64_expand_call (NULL_RTX
, funexp
, NULL_RTX
, 1);
8492 insn
= get_last_insn ();
8493 SIBLING_CALL_P (insn
) = 1;
8495 /* Code generation for calls relies on splitting. */
8496 reload_completed
= 1;
8497 epilogue_completed
= 1;
8498 try_split (PATTERN (insn
), insn
, 0);
8502 /* Run just enough of rest_of_compilation to get the insns emitted.
8503 There's not really enough bulk here to make other passes such as
8504 instruction scheduling worth while. Note that use_thunk calls
8505 assemble_start_function and assemble_end_function. */
8507 insn_locators_initialize ();
8508 emit_all_insn_group_barriers (NULL
);
8509 insn
= get_insns ();
8510 shorten_branches (insn
);
8511 final_start_function (insn
, file
, 1);
8512 final (insn
, file
, 1);
8513 final_end_function ();
8515 reload_completed
= 0;
8516 epilogue_completed
= 0;
8520 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
8523 ia64_struct_value_rtx (tree fntype
,
8524 int incoming ATTRIBUTE_UNUSED
)
8526 if (fntype
&& ia64_struct_retval_addr_is_first_parm_p (fntype
))
8528 return gen_rtx_REG (Pmode
, GR_REG (8));
8532 ia64_scalar_mode_supported_p (enum machine_mode mode
)
8557 ia64_vector_mode_supported_p (enum machine_mode mode
)
8575 ia64_output_function_profiler (FILE *file
, int labelno
)
8578 fputs ("\t.prologue 4, r40\n", file
);
8580 fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file
);
8581 fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file
);
8583 if (NO_PROFILE_COUNTERS
)
8584 fputs ("\tmov out3 = r0\n\t;;\n", file
);
8588 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
8590 if (TARGET_AUTO_PIC
)
8591 fputs ("\tmovl out3 = @gprel(", file
);
8593 fputs ("\taddl out3 = @ltoff(", file
);
8594 assemble_name (file
, buf
);
8595 if (TARGET_AUTO_PIC
)
8596 fputs (")\n\t;;\n", file
);
8598 fputs ("), r1\n\t;;\n", file
);
8601 fputs ("\t.save rp, r42\n", file
);
8602 fputs ("\tmov out2 = b0\n", file
);
8603 fputs ("\t.body\n", file
);
8604 fputs ("\tmov out1 = r1\n", file
);
8605 fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file
);
8608 static GTY(()) rtx mcount_func_rtx
;
8610 gen_mcount_func_rtx (void)
8612 if (!mcount_func_rtx
)
8613 mcount_func_rtx
= init_one_libfunc ("_mcount");
8614 return mcount_func_rtx
;
8618 ia64_profile_hook (int labelno
)
8622 if (NO_PROFILE_COUNTERS
)
8627 const char *label_name
;
8628 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
8629 label_name
= (*targetm
.strip_name_encoding
) (ggc_strdup (buf
));
8630 label
= gen_rtx_SYMBOL_REF (Pmode
, label_name
);
8631 SYMBOL_REF_FLAGS (label
) = SYMBOL_FLAG_LOCAL
;
8633 ip
= gen_reg_rtx (Pmode
);
8634 emit_insn (gen_ip_value (ip
));
8635 emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL
,
8637 gen_rtx_REG (Pmode
, BR_REG (0)), Pmode
,
8642 #include "gt-ia64.h"