1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
3 Free Software Foundation, Inc.
4 Contributed by James E. Wilson <wilson@cygnus.com> and
5 David Mosberger <davidm@hpl.hp.com>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
45 #include "sched-int.h"
48 #include "target-def.h"
51 #include "langhooks.h"
52 #include "cfglayout.h"
59 #include "tm-constrs.h"
61 /* This is used for communication between ASM_OUTPUT_LABEL and
62 ASM_OUTPUT_LABELREF. */
63 int ia64_asm_output_label
= 0;
65 /* Define the information needed to generate branch and scc insns. This is
66 stored from the compare operation. */
67 struct rtx_def
* ia64_compare_op0
;
68 struct rtx_def
* ia64_compare_op1
;
70 /* Register names for ia64_expand_prologue. */
71 static const char * const ia64_reg_numbers
[96] =
72 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
73 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
74 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
75 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
76 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
77 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
78 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
79 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
80 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
81 "r104","r105","r106","r107","r108","r109","r110","r111",
82 "r112","r113","r114","r115","r116","r117","r118","r119",
83 "r120","r121","r122","r123","r124","r125","r126","r127"};
85 /* ??? These strings could be shared with REGISTER_NAMES. */
86 static const char * const ia64_input_reg_names
[8] =
87 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
89 /* ??? These strings could be shared with REGISTER_NAMES. */
90 static const char * const ia64_local_reg_names
[80] =
91 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
92 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
93 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
94 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
95 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
96 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
97 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
98 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
99 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
100 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
102 /* ??? These strings could be shared with REGISTER_NAMES. */
103 static const char * const ia64_output_reg_names
[8] =
104 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
106 /* Which cpu are we scheduling for. */
107 enum processor_type ia64_tune
= PROCESSOR_ITANIUM2
;
109 /* Determines whether we run our final scheduling pass or not. We always
110 avoid the normal second scheduling pass. */
111 static int ia64_flag_schedule_insns2
;
113 /* Determines whether we run variable tracking in machine dependent
115 static int ia64_flag_var_tracking
;
117 /* Variables which are this size or smaller are put in the sdata/sbss
120 unsigned int ia64_section_threshold
;
122 /* The following variable is used by the DFA insn scheduler. The value is
123 TRUE if we do insn bundling instead of insn scheduling. */
135 number_of_ia64_frame_regs
138 /* Structure to be filled in by ia64_compute_frame_size with register
139 save masks and offsets for the current function. */
141 struct ia64_frame_info
143 HOST_WIDE_INT total_size
; /* size of the stack frame, not including
144 the caller's scratch area. */
145 HOST_WIDE_INT spill_cfa_off
; /* top of the reg spill area from the cfa. */
146 HOST_WIDE_INT spill_size
; /* size of the gr/br/fr spill area. */
147 HOST_WIDE_INT extra_spill_size
; /* size of spill area for others. */
148 HARD_REG_SET mask
; /* mask of saved registers. */
149 unsigned int gr_used_mask
; /* mask of registers in use as gr spill
150 registers or long-term scratches. */
151 int n_spilled
; /* number of spilled registers. */
152 int r
[number_of_ia64_frame_regs
]; /* Frame related registers. */
153 int n_input_regs
; /* number of input registers used. */
154 int n_local_regs
; /* number of local registers used. */
155 int n_output_regs
; /* number of output registers used. */
156 int n_rotate_regs
; /* number of rotating registers used. */
158 char need_regstk
; /* true if a .regstk directive needed. */
159 char initialized
; /* true if the data is finalized. */
162 /* Current frame information calculated by ia64_compute_frame_size. */
163 static struct ia64_frame_info current_frame_info
;
164 /* The actual registers that are emitted. */
165 static int emitted_frame_related_regs
[number_of_ia64_frame_regs
];
167 static int ia64_first_cycle_multipass_dfa_lookahead (void);
168 static void ia64_dependencies_evaluation_hook (rtx
, rtx
);
169 static void ia64_init_dfa_pre_cycle_insn (void);
170 static rtx
ia64_dfa_pre_cycle_insn (void);
171 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx
);
172 static bool ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx
);
173 static int ia64_dfa_new_cycle (FILE *, int, rtx
, int, int, int *);
174 static void ia64_h_i_d_extended (void);
175 static int ia64_mode_to_int (enum machine_mode
);
176 static void ia64_set_sched_flags (spec_info_t
);
177 static int ia64_speculate_insn (rtx
, ds_t
, rtx
*);
178 static rtx
ia64_gen_spec_insn (rtx
, ds_t
, int, bool, bool);
179 static bool ia64_needs_block_p (const_rtx
);
180 static rtx
ia64_gen_check (rtx
, rtx
, bool);
181 static int ia64_spec_check_p (rtx
);
182 static int ia64_spec_check_src_p (rtx
);
183 static rtx
gen_tls_get_addr (void);
184 static rtx
gen_thread_pointer (void);
185 static int find_gr_spill (enum ia64_frame_regs
, int);
186 static int next_scratch_gr_reg (void);
187 static void mark_reg_gr_used_mask (rtx
, void *);
188 static void ia64_compute_frame_size (HOST_WIDE_INT
);
189 static void setup_spill_pointers (int, rtx
, HOST_WIDE_INT
);
190 static void finish_spill_pointers (void);
191 static rtx
spill_restore_mem (rtx
, HOST_WIDE_INT
);
192 static void do_spill (rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
, rtx
);
193 static void do_restore (rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
);
194 static rtx
gen_movdi_x (rtx
, rtx
, rtx
);
195 static rtx
gen_fr_spill_x (rtx
, rtx
, rtx
);
196 static rtx
gen_fr_restore_x (rtx
, rtx
, rtx
);
198 static enum machine_mode
hfa_element_mode (const_tree
, bool);
199 static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
,
201 static int ia64_arg_partial_bytes (CUMULATIVE_ARGS
*, enum machine_mode
,
203 static bool ia64_function_ok_for_sibcall (tree
, tree
);
204 static bool ia64_return_in_memory (const_tree
, const_tree
);
205 static bool ia64_rtx_costs (rtx
, int, int, int *);
206 static int ia64_unspec_may_trap_p (const_rtx
, unsigned);
207 static void fix_range (const char *);
208 static bool ia64_handle_option (size_t, const char *, int);
209 static struct machine_function
* ia64_init_machine_status (void);
210 static void emit_insn_group_barriers (FILE *);
211 static void emit_all_insn_group_barriers (FILE *);
212 static void final_emit_insn_group_barriers (FILE *);
213 static void emit_predicate_relation_info (void);
214 static void ia64_reorg (void);
215 static bool ia64_in_small_data_p (const_tree
);
216 static void process_epilogue (FILE *, rtx
, bool, bool);
217 static int process_set (FILE *, rtx
, rtx
, bool, bool);
219 static bool ia64_assemble_integer (rtx
, unsigned int, int);
220 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT
);
221 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT
);
222 static void ia64_output_function_end_prologue (FILE *);
224 static int ia64_issue_rate (void);
225 static int ia64_adjust_cost (rtx
, rtx
, rtx
, int);
226 static void ia64_sched_init (FILE *, int, int);
227 static void ia64_sched_init_global (FILE *, int, int);
228 static void ia64_sched_finish_global (FILE *, int);
229 static void ia64_sched_finish (FILE *, int);
230 static int ia64_dfa_sched_reorder (FILE *, int, rtx
*, int *, int, int);
231 static int ia64_sched_reorder (FILE *, int, rtx
*, int *, int);
232 static int ia64_sched_reorder2 (FILE *, int, rtx
*, int *, int);
233 static int ia64_variable_issue (FILE *, int, rtx
, int);
235 static struct bundle_state
*get_free_bundle_state (void);
236 static void free_bundle_state (struct bundle_state
*);
237 static void initiate_bundle_states (void);
238 static void finish_bundle_states (void);
239 static unsigned bundle_state_hash (const void *);
240 static int bundle_state_eq_p (const void *, const void *);
241 static int insert_bundle_state (struct bundle_state
*);
242 static void initiate_bundle_state_table (void);
243 static void finish_bundle_state_table (void);
244 static int try_issue_nops (struct bundle_state
*, int);
245 static int try_issue_insn (struct bundle_state
*, rtx
);
246 static void issue_nops_and_insn (struct bundle_state
*, int, rtx
, int, int);
247 static int get_max_pos (state_t
);
248 static int get_template (state_t
, int);
250 static rtx
get_next_important_insn (rtx
, rtx
);
251 static void bundling (FILE *, int, rtx
, rtx
);
253 static void ia64_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
254 HOST_WIDE_INT
, tree
);
255 static void ia64_file_start (void);
256 static void ia64_globalize_decl_name (FILE *, tree
);
258 static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED
;
259 static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED
;
260 static section
*ia64_select_rtx_section (enum machine_mode
, rtx
,
261 unsigned HOST_WIDE_INT
);
262 static void ia64_output_dwarf_dtprel (FILE *, int, rtx
)
264 static unsigned int ia64_section_type_flags (tree
, const char *, int);
265 static void ia64_init_libfuncs (void)
267 static void ia64_hpux_init_libfuncs (void)
269 static void ia64_sysv4_init_libfuncs (void)
271 static void ia64_vms_init_libfuncs (void)
274 static tree
ia64_handle_model_attribute (tree
*, tree
, tree
, int, bool *);
275 static tree
ia64_handle_version_id_attribute (tree
*, tree
, tree
, int, bool *);
276 static void ia64_encode_section_info (tree
, rtx
, int);
277 static rtx
ia64_struct_value_rtx (tree
, int);
278 static tree
ia64_gimplify_va_arg (tree
, tree
, gimple_seq
*, gimple_seq
*);
279 static bool ia64_scalar_mode_supported_p (enum machine_mode mode
);
280 static bool ia64_vector_mode_supported_p (enum machine_mode mode
);
281 static bool ia64_cannot_force_const_mem (rtx
);
282 static const char *ia64_mangle_type (const_tree
);
283 static const char *ia64_invalid_conversion (const_tree
, const_tree
);
284 static const char *ia64_invalid_unary_op (int, const_tree
);
285 static const char *ia64_invalid_binary_op (int, const_tree
, const_tree
);
286 static enum machine_mode
ia64_c_mode_for_suffix (char);
288 /* Table of valid machine attributes. */
289 static const struct attribute_spec ia64_attribute_table
[] =
291 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
292 { "syscall_linkage", 0, 0, false, true, true, NULL
},
293 { "model", 1, 1, true, false, false, ia64_handle_model_attribute
},
294 { "version_id", 1, 1, true, false, false,
295 ia64_handle_version_id_attribute
},
296 { NULL
, 0, 0, false, false, false, NULL
}
299 /* Initialize the GCC target structure. */
300 #undef TARGET_ATTRIBUTE_TABLE
301 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
303 #undef TARGET_INIT_BUILTINS
304 #define TARGET_INIT_BUILTINS ia64_init_builtins
306 #undef TARGET_EXPAND_BUILTIN
307 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
309 #undef TARGET_ASM_BYTE_OP
310 #define TARGET_ASM_BYTE_OP "\tdata1\t"
311 #undef TARGET_ASM_ALIGNED_HI_OP
312 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
313 #undef TARGET_ASM_ALIGNED_SI_OP
314 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
315 #undef TARGET_ASM_ALIGNED_DI_OP
316 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
317 #undef TARGET_ASM_UNALIGNED_HI_OP
318 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
319 #undef TARGET_ASM_UNALIGNED_SI_OP
320 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
321 #undef TARGET_ASM_UNALIGNED_DI_OP
322 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
323 #undef TARGET_ASM_INTEGER
324 #define TARGET_ASM_INTEGER ia64_assemble_integer
326 #undef TARGET_ASM_FUNCTION_PROLOGUE
327 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
328 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
329 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
330 #undef TARGET_ASM_FUNCTION_EPILOGUE
331 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
333 #undef TARGET_IN_SMALL_DATA_P
334 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
336 #undef TARGET_SCHED_ADJUST_COST
337 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
338 #undef TARGET_SCHED_ISSUE_RATE
339 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
340 #undef TARGET_SCHED_VARIABLE_ISSUE
341 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
342 #undef TARGET_SCHED_INIT
343 #define TARGET_SCHED_INIT ia64_sched_init
344 #undef TARGET_SCHED_FINISH
345 #define TARGET_SCHED_FINISH ia64_sched_finish
346 #undef TARGET_SCHED_INIT_GLOBAL
347 #define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
348 #undef TARGET_SCHED_FINISH_GLOBAL
349 #define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
350 #undef TARGET_SCHED_REORDER
351 #define TARGET_SCHED_REORDER ia64_sched_reorder
352 #undef TARGET_SCHED_REORDER2
353 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
355 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
356 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
358 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
359 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
361 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
362 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
363 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
364 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
366 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
367 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
368 ia64_first_cycle_multipass_dfa_lookahead_guard
370 #undef TARGET_SCHED_DFA_NEW_CYCLE
371 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
373 #undef TARGET_SCHED_H_I_D_EXTENDED
374 #define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
376 #undef TARGET_SCHED_SET_SCHED_FLAGS
377 #define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
379 #undef TARGET_SCHED_SPECULATE_INSN
380 #define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
382 #undef TARGET_SCHED_NEEDS_BLOCK_P
383 #define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
385 #undef TARGET_SCHED_GEN_CHECK
386 #define TARGET_SCHED_GEN_CHECK ia64_gen_check
388 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC
389 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC\
390 ia64_first_cycle_multipass_dfa_lookahead_guard_spec
392 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
393 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
394 #undef TARGET_ARG_PARTIAL_BYTES
395 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
397 #undef TARGET_ASM_OUTPUT_MI_THUNK
398 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
399 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
400 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
402 #undef TARGET_ASM_FILE_START
403 #define TARGET_ASM_FILE_START ia64_file_start
405 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
406 #define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
408 #undef TARGET_RTX_COSTS
409 #define TARGET_RTX_COSTS ia64_rtx_costs
410 #undef TARGET_ADDRESS_COST
411 #define TARGET_ADDRESS_COST hook_int_rtx_0
413 #undef TARGET_UNSPEC_MAY_TRAP_P
414 #define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
416 #undef TARGET_MACHINE_DEPENDENT_REORG
417 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
419 #undef TARGET_ENCODE_SECTION_INFO
420 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
422 #undef TARGET_SECTION_TYPE_FLAGS
423 #define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
426 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
427 #define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
430 /* ??? ABI doesn't allow us to define this. */
432 #undef TARGET_PROMOTE_FUNCTION_ARGS
433 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
436 /* ??? ABI doesn't allow us to define this. */
438 #undef TARGET_PROMOTE_FUNCTION_RETURN
439 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
442 /* ??? Investigate. */
444 #undef TARGET_PROMOTE_PROTOTYPES
445 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
448 #undef TARGET_STRUCT_VALUE_RTX
449 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
450 #undef TARGET_RETURN_IN_MEMORY
451 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
452 #undef TARGET_SETUP_INCOMING_VARARGS
453 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
454 #undef TARGET_STRICT_ARGUMENT_NAMING
455 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
456 #undef TARGET_MUST_PASS_IN_STACK
457 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
459 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
460 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
462 #undef TARGET_UNWIND_EMIT
463 #define TARGET_UNWIND_EMIT process_for_unwind_directive
465 #undef TARGET_SCALAR_MODE_SUPPORTED_P
466 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
467 #undef TARGET_VECTOR_MODE_SUPPORTED_P
468 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
470 /* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
471 in an order different from the specified program order. */
472 #undef TARGET_RELAXED_ORDERING
473 #define TARGET_RELAXED_ORDERING true
475 #undef TARGET_DEFAULT_TARGET_FLAGS
476 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
477 #undef TARGET_HANDLE_OPTION
478 #define TARGET_HANDLE_OPTION ia64_handle_option
480 #undef TARGET_CANNOT_FORCE_CONST_MEM
481 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
483 #undef TARGET_MANGLE_TYPE
484 #define TARGET_MANGLE_TYPE ia64_mangle_type
486 #undef TARGET_INVALID_CONVERSION
487 #define TARGET_INVALID_CONVERSION ia64_invalid_conversion
488 #undef TARGET_INVALID_UNARY_OP
489 #define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
490 #undef TARGET_INVALID_BINARY_OP
491 #define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
493 #undef TARGET_C_MODE_FOR_SUFFIX
494 #define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
496 #undef TARGET_OPTION_COLD_ATTRIBUTE_SETS_OPTIMIZATION
497 #define TARGET_OPTION_COLD_ATTRIBUTE_SETS_OPTIMIZATION true
499 #undef TARGET_OPTION_HOT_ATTRIBUTE_SETS_OPTIMIZATION
500 #define TARGET_OPTION_HOT_ATTRIBUTE_SETS_OPTIMIZATION true
502 struct gcc_target targetm
= TARGET_INITIALIZER
;
506 ADDR_AREA_NORMAL
, /* normal address area */
507 ADDR_AREA_SMALL
/* addressable by "addl" (-2MB < addr < 2MB) */
511 static GTY(()) tree small_ident1
;
512 static GTY(()) tree small_ident2
;
517 if (small_ident1
== 0)
519 small_ident1
= get_identifier ("small");
520 small_ident2
= get_identifier ("__small__");
524 /* Retrieve the address area that has been chosen for the given decl. */
526 static ia64_addr_area
527 ia64_get_addr_area (tree decl
)
531 model_attr
= lookup_attribute ("model", DECL_ATTRIBUTES (decl
));
537 id
= TREE_VALUE (TREE_VALUE (model_attr
));
538 if (id
== small_ident1
|| id
== small_ident2
)
539 return ADDR_AREA_SMALL
;
541 return ADDR_AREA_NORMAL
;
545 ia64_handle_model_attribute (tree
*node
, tree name
, tree args
,
546 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
548 ia64_addr_area addr_area
= ADDR_AREA_NORMAL
;
550 tree arg
, decl
= *node
;
553 arg
= TREE_VALUE (args
);
554 if (arg
== small_ident1
|| arg
== small_ident2
)
556 addr_area
= ADDR_AREA_SMALL
;
560 warning (OPT_Wattributes
, "invalid argument of %qs attribute",
561 IDENTIFIER_POINTER (name
));
562 *no_add_attrs
= true;
565 switch (TREE_CODE (decl
))
568 if ((DECL_CONTEXT (decl
) && TREE_CODE (DECL_CONTEXT (decl
))
570 && !TREE_STATIC (decl
))
572 error ("%Jan address area attribute cannot be specified for "
573 "local variables", decl
);
574 *no_add_attrs
= true;
576 area
= ia64_get_addr_area (decl
);
577 if (area
!= ADDR_AREA_NORMAL
&& addr_area
!= area
)
579 error ("address area of %q+D conflicts with previous "
580 "declaration", decl
);
581 *no_add_attrs
= true;
586 error ("%Jaddress area attribute cannot be specified for functions",
588 *no_add_attrs
= true;
592 warning (OPT_Wattributes
, "%qs attribute ignored",
593 IDENTIFIER_POINTER (name
));
594 *no_add_attrs
= true;
602 ia64_encode_addr_area (tree decl
, rtx symbol
)
606 flags
= SYMBOL_REF_FLAGS (symbol
);
607 switch (ia64_get_addr_area (decl
))
609 case ADDR_AREA_NORMAL
: break;
610 case ADDR_AREA_SMALL
: flags
|= SYMBOL_FLAG_SMALL_ADDR
; break;
611 default: gcc_unreachable ();
613 SYMBOL_REF_FLAGS (symbol
) = flags
;
617 ia64_encode_section_info (tree decl
, rtx rtl
, int first
)
619 default_encode_section_info (decl
, rtl
, first
);
621 /* Careful not to prod global register variables. */
622 if (TREE_CODE (decl
) == VAR_DECL
623 && GET_CODE (DECL_RTL (decl
)) == MEM
624 && GET_CODE (XEXP (DECL_RTL (decl
), 0)) == SYMBOL_REF
625 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
)))
626 ia64_encode_addr_area (decl
, XEXP (rtl
, 0));
629 /* Return 1 if the operands of a move are ok. */
632 ia64_move_ok (rtx dst
, rtx src
)
634 /* If we're under init_recog_no_volatile, we'll not be able to use
635 memory_operand. So check the code directly and don't worry about
636 the validity of the underlying address, which should have been
637 checked elsewhere anyway. */
638 if (GET_CODE (dst
) != MEM
)
640 if (GET_CODE (src
) == MEM
)
642 if (register_operand (src
, VOIDmode
))
645 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
646 if (INTEGRAL_MODE_P (GET_MODE (dst
)))
647 return src
== const0_rtx
;
649 return satisfies_constraint_G (src
);
652 /* Return 1 if the operands are ok for a floating point load pair. */
655 ia64_load_pair_ok (rtx dst
, rtx src
)
657 if (GET_CODE (dst
) != REG
|| !FP_REGNO_P (REGNO (dst
)))
659 if (GET_CODE (src
) != MEM
|| MEM_VOLATILE_P (src
))
661 switch (GET_CODE (XEXP (src
, 0)))
670 rtx adjust
= XEXP (XEXP (XEXP (src
, 0), 1), 1);
672 if (GET_CODE (adjust
) != CONST_INT
673 || INTVAL (adjust
) != GET_MODE_SIZE (GET_MODE (src
)))
684 addp4_optimize_ok (rtx op1
, rtx op2
)
686 return (basereg_operand (op1
, GET_MODE(op1
)) !=
687 basereg_operand (op2
, GET_MODE(op2
)));
690 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
691 Return the length of the field, or <= 0 on failure. */
694 ia64_depz_field_mask (rtx rop
, rtx rshift
)
696 unsigned HOST_WIDE_INT op
= INTVAL (rop
);
697 unsigned HOST_WIDE_INT shift
= INTVAL (rshift
);
699 /* Get rid of the zero bits we're shifting in. */
702 /* We must now have a solid block of 1's at bit 0. */
703 return exact_log2 (op
+ 1);
706 /* Return the TLS model to use for ADDR. */
708 static enum tls_model
709 tls_symbolic_operand_type (rtx addr
)
711 enum tls_model tls_kind
= 0;
713 if (GET_CODE (addr
) == CONST
)
715 if (GET_CODE (XEXP (addr
, 0)) == PLUS
716 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
)
717 tls_kind
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr
, 0), 0));
719 else if (GET_CODE (addr
) == SYMBOL_REF
)
720 tls_kind
= SYMBOL_REF_TLS_MODEL (addr
);
725 /* Return true if X is a constant that is valid for some immediate
726 field in an instruction. */
729 ia64_legitimate_constant_p (rtx x
)
731 switch (GET_CODE (x
))
738 if (GET_MODE (x
) == VOIDmode
)
740 return satisfies_constraint_G (x
);
744 /* ??? Short term workaround for PR 28490. We must make the code here
745 match the code in ia64_expand_move and move_operand, even though they
746 are both technically wrong. */
747 if (tls_symbolic_operand_type (x
) == 0)
749 HOST_WIDE_INT addend
= 0;
752 if (GET_CODE (op
) == CONST
753 && GET_CODE (XEXP (op
, 0)) == PLUS
754 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
756 addend
= INTVAL (XEXP (XEXP (op
, 0), 1));
757 op
= XEXP (XEXP (op
, 0), 0);
760 if (any_offset_symbol_operand (op
, GET_MODE (op
))
761 || function_operand (op
, GET_MODE (op
)))
763 if (aligned_offset_symbol_operand (op
, GET_MODE (op
)))
764 return (addend
& 0x3fff) == 0;
771 enum machine_mode mode
= GET_MODE (x
);
773 if (mode
== V2SFmode
)
774 return satisfies_constraint_Y (x
);
776 return (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
777 && GET_MODE_SIZE (mode
) <= 8);
785 /* Don't allow TLS addresses to get spilled to memory. */
788 ia64_cannot_force_const_mem (rtx x
)
790 if (GET_MODE (x
) == RFmode
)
792 return tls_symbolic_operand_type (x
) != 0;
795 /* Expand a symbolic constant load. */
798 ia64_expand_load_address (rtx dest
, rtx src
)
800 gcc_assert (GET_CODE (dest
) == REG
);
802 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
803 having to pointer-extend the value afterward. Other forms of address
804 computation below are also more natural to compute as 64-bit quantities.
805 If we've been given an SImode destination register, change it. */
806 if (GET_MODE (dest
) != Pmode
)
807 dest
= gen_rtx_REG_offset (dest
, Pmode
, REGNO (dest
),
808 byte_lowpart_offset (Pmode
, GET_MODE (dest
)));
812 if (small_addr_symbolic_operand (src
, VOIDmode
))
816 emit_insn (gen_load_gprel64 (dest
, src
));
817 else if (GET_CODE (src
) == SYMBOL_REF
&& SYMBOL_REF_FUNCTION_P (src
))
818 emit_insn (gen_load_fptr (dest
, src
));
819 else if (sdata_symbolic_operand (src
, VOIDmode
))
820 emit_insn (gen_load_gprel (dest
, src
));
823 HOST_WIDE_INT addend
= 0;
826 /* We did split constant offsets in ia64_expand_move, and we did try
827 to keep them split in move_operand, but we also allowed reload to
828 rematerialize arbitrary constants rather than spill the value to
829 the stack and reload it. So we have to be prepared here to split
831 if (GET_CODE (src
) == CONST
)
833 HOST_WIDE_INT hi
, lo
;
835 hi
= INTVAL (XEXP (XEXP (src
, 0), 1));
836 lo
= ((hi
& 0x3fff) ^ 0x2000) - 0x2000;
842 src
= plus_constant (XEXP (XEXP (src
, 0), 0), hi
);
846 tmp
= gen_rtx_HIGH (Pmode
, src
);
847 tmp
= gen_rtx_PLUS (Pmode
, tmp
, pic_offset_table_rtx
);
848 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
850 tmp
= gen_rtx_LO_SUM (Pmode
, dest
, src
);
851 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
855 tmp
= gen_rtx_PLUS (Pmode
, dest
, GEN_INT (addend
));
856 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
863 static GTY(()) rtx gen_tls_tga
;
865 gen_tls_get_addr (void)
868 gen_tls_tga
= init_one_libfunc ("__tls_get_addr");
872 static GTY(()) rtx thread_pointer_rtx
;
874 gen_thread_pointer (void)
876 if (!thread_pointer_rtx
)
877 thread_pointer_rtx
= gen_rtx_REG (Pmode
, 13);
878 return thread_pointer_rtx
;
882 ia64_expand_tls_address (enum tls_model tls_kind
, rtx op0
, rtx op1
,
883 rtx orig_op1
, HOST_WIDE_INT addend
)
885 rtx tga_op1
, tga_op2
, tga_ret
, tga_eqv
, tmp
, insns
;
887 HOST_WIDE_INT addend_lo
, addend_hi
;
891 case TLS_MODEL_GLOBAL_DYNAMIC
:
894 tga_op1
= gen_reg_rtx (Pmode
);
895 emit_insn (gen_load_dtpmod (tga_op1
, op1
));
897 tga_op2
= gen_reg_rtx (Pmode
);
898 emit_insn (gen_load_dtprel (tga_op2
, op1
));
900 tga_ret
= emit_library_call_value (gen_tls_get_addr (), NULL_RTX
,
901 LCT_CONST
, Pmode
, 2, tga_op1
,
902 Pmode
, tga_op2
, Pmode
);
904 insns
= get_insns ();
907 if (GET_MODE (op0
) != Pmode
)
909 emit_libcall_block (insns
, op0
, tga_ret
, op1
);
912 case TLS_MODEL_LOCAL_DYNAMIC
:
913 /* ??? This isn't the completely proper way to do local-dynamic
914 If the call to __tls_get_addr is used only by a single symbol,
915 then we should (somehow) move the dtprel to the second arg
916 to avoid the extra add. */
919 tga_op1
= gen_reg_rtx (Pmode
);
920 emit_insn (gen_load_dtpmod (tga_op1
, op1
));
922 tga_op2
= const0_rtx
;
924 tga_ret
= emit_library_call_value (gen_tls_get_addr (), NULL_RTX
,
925 LCT_CONST
, Pmode
, 2, tga_op1
,
926 Pmode
, tga_op2
, Pmode
);
928 insns
= get_insns ();
931 tga_eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
933 tmp
= gen_reg_rtx (Pmode
);
934 emit_libcall_block (insns
, tmp
, tga_ret
, tga_eqv
);
936 if (!register_operand (op0
, Pmode
))
937 op0
= gen_reg_rtx (Pmode
);
940 emit_insn (gen_load_dtprel (op0
, op1
));
941 emit_insn (gen_adddi3 (op0
, tmp
, op0
));
944 emit_insn (gen_add_dtprel (op0
, op1
, tmp
));
947 case TLS_MODEL_INITIAL_EXEC
:
948 addend_lo
= ((addend
& 0x3fff) ^ 0x2000) - 0x2000;
949 addend_hi
= addend
- addend_lo
;
951 op1
= plus_constant (op1
, addend_hi
);
954 tmp
= gen_reg_rtx (Pmode
);
955 emit_insn (gen_load_tprel (tmp
, op1
));
957 if (!register_operand (op0
, Pmode
))
958 op0
= gen_reg_rtx (Pmode
);
959 emit_insn (gen_adddi3 (op0
, tmp
, gen_thread_pointer ()));
962 case TLS_MODEL_LOCAL_EXEC
:
963 if (!register_operand (op0
, Pmode
))
964 op0
= gen_reg_rtx (Pmode
);
970 emit_insn (gen_load_tprel (op0
, op1
));
971 emit_insn (gen_adddi3 (op0
, op0
, gen_thread_pointer ()));
974 emit_insn (gen_add_tprel (op0
, op1
, gen_thread_pointer ()));
982 op0
= expand_simple_binop (Pmode
, PLUS
, op0
, GEN_INT (addend
),
983 orig_op0
, 1, OPTAB_DIRECT
);
986 if (GET_MODE (orig_op0
) == Pmode
)
988 return gen_lowpart (GET_MODE (orig_op0
), op0
);
992 ia64_expand_move (rtx op0
, rtx op1
)
994 enum machine_mode mode
= GET_MODE (op0
);
996 if (!reload_in_progress
&& !reload_completed
&& !ia64_move_ok (op0
, op1
))
997 op1
= force_reg (mode
, op1
);
999 if ((mode
== Pmode
|| mode
== ptr_mode
) && symbolic_operand (op1
, VOIDmode
))
1001 HOST_WIDE_INT addend
= 0;
1002 enum tls_model tls_kind
;
1005 if (GET_CODE (op1
) == CONST
1006 && GET_CODE (XEXP (op1
, 0)) == PLUS
1007 && GET_CODE (XEXP (XEXP (op1
, 0), 1)) == CONST_INT
)
1009 addend
= INTVAL (XEXP (XEXP (op1
, 0), 1));
1010 sym
= XEXP (XEXP (op1
, 0), 0);
1013 tls_kind
= tls_symbolic_operand_type (sym
);
1015 return ia64_expand_tls_address (tls_kind
, op0
, sym
, op1
, addend
);
1017 if (any_offset_symbol_operand (sym
, mode
))
1019 else if (aligned_offset_symbol_operand (sym
, mode
))
1021 HOST_WIDE_INT addend_lo
, addend_hi
;
1023 addend_lo
= ((addend
& 0x3fff) ^ 0x2000) - 0x2000;
1024 addend_hi
= addend
- addend_lo
;
1028 op1
= plus_constant (sym
, addend_hi
);
1037 if (reload_completed
)
1039 /* We really should have taken care of this offset earlier. */
1040 gcc_assert (addend
== 0);
1041 if (ia64_expand_load_address (op0
, op1
))
1047 rtx subtarget
= !can_create_pseudo_p () ? op0
: gen_reg_rtx (mode
);
1049 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
, op1
));
1051 op1
= expand_simple_binop (mode
, PLUS
, subtarget
,
1052 GEN_INT (addend
), op0
, 1, OPTAB_DIRECT
);
1061 /* Split a move from OP1 to OP0 conditional on COND. */
1064 ia64_emit_cond_move (rtx op0
, rtx op1
, rtx cond
)
1066 rtx insn
, first
= get_last_insn ();
1068 emit_move_insn (op0
, op1
);
1070 for (insn
= get_last_insn (); insn
!= first
; insn
= PREV_INSN (insn
))
1072 PATTERN (insn
) = gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
),
1076 /* Split a post-reload TImode or TFmode reference into two DImode
1077 components. This is made extra difficult by the fact that we do
1078 not get any scratch registers to work with, because reload cannot
1079 be prevented from giving us a scratch that overlaps the register
1080 pair involved. So instead, when addressing memory, we tweak the
1081 pointer register up and back down with POST_INCs. Or up and not
1082 back down when we can get away with it.
1084 REVERSED is true when the loads must be done in reversed order
1085 (high word first) for correctness. DEAD is true when the pointer
1086 dies with the second insn we generate and therefore the second
1087 address must not carry a postmodify.
1089 May return an insn which is to be emitted after the moves. */
1092 ia64_split_tmode (rtx out
[2], rtx in
, bool reversed
, bool dead
)
1096 switch (GET_CODE (in
))
1099 out
[reversed
] = gen_rtx_REG (DImode
, REGNO (in
));
1100 out
[!reversed
] = gen_rtx_REG (DImode
, REGNO (in
) + 1);
1105 /* Cannot occur reversed. */
1106 gcc_assert (!reversed
);
1108 if (GET_MODE (in
) != TFmode
)
1109 split_double (in
, &out
[0], &out
[1]);
1111 /* split_double does not understand how to split a TFmode
1112 quantity into a pair of DImode constants. */
1115 unsigned HOST_WIDE_INT p
[2];
1116 long l
[4]; /* TFmode is 128 bits */
1118 REAL_VALUE_FROM_CONST_DOUBLE (r
, in
);
1119 real_to_target (l
, &r
, TFmode
);
1121 if (FLOAT_WORDS_BIG_ENDIAN
)
1123 p
[0] = (((unsigned HOST_WIDE_INT
) l
[0]) << 32) + l
[1];
1124 p
[1] = (((unsigned HOST_WIDE_INT
) l
[2]) << 32) + l
[3];
1128 p
[0] = (((unsigned HOST_WIDE_INT
) l
[1]) << 32) + l
[0];
1129 p
[1] = (((unsigned HOST_WIDE_INT
) l
[3]) << 32) + l
[2];
1131 out
[0] = GEN_INT (p
[0]);
1132 out
[1] = GEN_INT (p
[1]);
1138 rtx base
= XEXP (in
, 0);
1141 switch (GET_CODE (base
))
1146 out
[0] = adjust_automodify_address
1147 (in
, DImode
, gen_rtx_POST_INC (Pmode
, base
), 0);
1148 out
[1] = adjust_automodify_address
1149 (in
, DImode
, dead
? 0 : gen_rtx_POST_DEC (Pmode
, base
), 8);
1153 /* Reversal requires a pre-increment, which can only
1154 be done as a separate insn. */
1155 emit_insn (gen_adddi3 (base
, base
, GEN_INT (8)));
1156 out
[0] = adjust_automodify_address
1157 (in
, DImode
, gen_rtx_POST_DEC (Pmode
, base
), 8);
1158 out
[1] = adjust_address (in
, DImode
, 0);
1163 gcc_assert (!reversed
&& !dead
);
1165 /* Just do the increment in two steps. */
1166 out
[0] = adjust_automodify_address (in
, DImode
, 0, 0);
1167 out
[1] = adjust_automodify_address (in
, DImode
, 0, 8);
1171 gcc_assert (!reversed
&& !dead
);
1173 /* Add 8, subtract 24. */
1174 base
= XEXP (base
, 0);
1175 out
[0] = adjust_automodify_address
1176 (in
, DImode
, gen_rtx_POST_INC (Pmode
, base
), 0);
1177 out
[1] = adjust_automodify_address
1179 gen_rtx_POST_MODIFY (Pmode
, base
, plus_constant (base
, -24)),
1184 gcc_assert (!reversed
&& !dead
);
1186 /* Extract and adjust the modification. This case is
1187 trickier than the others, because we might have an
1188 index register, or we might have a combined offset that
1189 doesn't fit a signed 9-bit displacement field. We can
1190 assume the incoming expression is already legitimate. */
1191 offset
= XEXP (base
, 1);
1192 base
= XEXP (base
, 0);
1194 out
[0] = adjust_automodify_address
1195 (in
, DImode
, gen_rtx_POST_INC (Pmode
, base
), 0);
1197 if (GET_CODE (XEXP (offset
, 1)) == REG
)
1199 /* Can't adjust the postmodify to match. Emit the
1200 original, then a separate addition insn. */
1201 out
[1] = adjust_automodify_address (in
, DImode
, 0, 8);
1202 fixup
= gen_adddi3 (base
, base
, GEN_INT (-8));
1206 gcc_assert (GET_CODE (XEXP (offset
, 1)) == CONST_INT
);
1207 if (INTVAL (XEXP (offset
, 1)) < -256 + 8)
1209 /* Again the postmodify cannot be made to match,
1210 but in this case it's more efficient to get rid
1211 of the postmodify entirely and fix up with an
1213 out
[1] = adjust_automodify_address (in
, DImode
, base
, 8);
1215 (base
, base
, GEN_INT (INTVAL (XEXP (offset
, 1)) - 8));
1219 /* Combined offset still fits in the displacement field.
1220 (We cannot overflow it at the high end.) */
1221 out
[1] = adjust_automodify_address
1222 (in
, DImode
, gen_rtx_POST_MODIFY
1223 (Pmode
, base
, gen_rtx_PLUS
1225 GEN_INT (INTVAL (XEXP (offset
, 1)) - 8))),
1244 /* Split a TImode or TFmode move instruction after reload.
1245 This is used by *movtf_internal and *movti_internal. */
1247 ia64_split_tmode_move (rtx operands
[])
1249 rtx in
[2], out
[2], insn
;
1252 bool reversed
= false;
1254 /* It is possible for reload to decide to overwrite a pointer with
1255 the value it points to. In that case we have to do the loads in
1256 the appropriate order so that the pointer is not destroyed too
1257 early. Also we must not generate a postmodify for that second
1258 load, or rws_access_regno will die. */
1259 if (GET_CODE (operands
[1]) == MEM
1260 && reg_overlap_mentioned_p (operands
[0], operands
[1]))
1262 rtx base
= XEXP (operands
[1], 0);
1263 while (GET_CODE (base
) != REG
)
1264 base
= XEXP (base
, 0);
1266 if (REGNO (base
) == REGNO (operands
[0]))
1270 /* Another reason to do the moves in reversed order is if the first
1271 element of the target register pair is also the second element of
1272 the source register pair. */
1273 if (GET_CODE (operands
[0]) == REG
&& GET_CODE (operands
[1]) == REG
1274 && REGNO (operands
[0]) == REGNO (operands
[1]) + 1)
1277 fixup
[0] = ia64_split_tmode (in
, operands
[1], reversed
, dead
);
1278 fixup
[1] = ia64_split_tmode (out
, operands
[0], reversed
, dead
);
1280 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1281 if (GET_CODE (EXP) == MEM \
1282 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1283 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1284 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1285 REG_NOTES (INSN) = gen_rtx_EXPR_LIST (REG_INC, \
1286 XEXP (XEXP (EXP, 0), 0), \
1289 insn
= emit_insn (gen_rtx_SET (VOIDmode
, out
[0], in
[0]));
1290 MAYBE_ADD_REG_INC_NOTE (insn
, in
[0]);
1291 MAYBE_ADD_REG_INC_NOTE (insn
, out
[0]);
1293 insn
= emit_insn (gen_rtx_SET (VOIDmode
, out
[1], in
[1]));
1294 MAYBE_ADD_REG_INC_NOTE (insn
, in
[1]);
1295 MAYBE_ADD_REG_INC_NOTE (insn
, out
[1]);
1298 emit_insn (fixup
[0]);
1300 emit_insn (fixup
[1]);
1302 #undef MAYBE_ADD_REG_INC_NOTE
1305 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1306 through memory plus an extra GR scratch register. Except that you can
1307 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1308 SECONDARY_RELOAD_CLASS, but not both.
1310 We got into problems in the first place by allowing a construct like
1311 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1312 This solution attempts to prevent this situation from occurring. When
1313 we see something like the above, we spill the inner register to memory. */
1316 spill_xfmode_rfmode_operand (rtx in
, int force
, enum machine_mode mode
)
1318 if (GET_CODE (in
) == SUBREG
1319 && GET_MODE (SUBREG_REG (in
)) == TImode
1320 && GET_CODE (SUBREG_REG (in
)) == REG
)
1322 rtx memt
= assign_stack_temp (TImode
, 16, 0);
1323 emit_move_insn (memt
, SUBREG_REG (in
));
1324 return adjust_address (memt
, mode
, 0);
1326 else if (force
&& GET_CODE (in
) == REG
)
1328 rtx memx
= assign_stack_temp (mode
, 16, 0);
1329 emit_move_insn (memx
, in
);
1336 /* Expand the movxf or movrf pattern (MODE says which) with the given
1337 OPERANDS, returning true if the pattern should then invoke
1341 ia64_expand_movxf_movrf (enum machine_mode mode
, rtx operands
[])
1343 rtx op0
= operands
[0];
1345 if (GET_CODE (op0
) == SUBREG
)
1346 op0
= SUBREG_REG (op0
);
1348 /* We must support XFmode loads into general registers for stdarg/vararg,
1349 unprototyped calls, and a rare case where a long double is passed as
1350 an argument after a float HFA fills the FP registers. We split them into
1351 DImode loads for convenience. We also need to support XFmode stores
1352 for the last case. This case does not happen for stdarg/vararg routines,
1353 because we do a block store to memory of unnamed arguments. */
1355 if (GET_CODE (op0
) == REG
&& GR_REGNO_P (REGNO (op0
)))
1359 /* We're hoping to transform everything that deals with XFmode
1360 quantities and GR registers early in the compiler. */
1361 gcc_assert (can_create_pseudo_p ());
1363 /* Struct to register can just use TImode instead. */
1364 if ((GET_CODE (operands
[1]) == SUBREG
1365 && GET_MODE (SUBREG_REG (operands
[1])) == TImode
)
1366 || (GET_CODE (operands
[1]) == REG
1367 && GR_REGNO_P (REGNO (operands
[1]))))
1369 rtx op1
= operands
[1];
1371 if (GET_CODE (op1
) == SUBREG
)
1372 op1
= SUBREG_REG (op1
);
1374 op1
= gen_rtx_REG (TImode
, REGNO (op1
));
1376 emit_move_insn (gen_rtx_REG (TImode
, REGNO (op0
)), op1
);
1380 if (GET_CODE (operands
[1]) == CONST_DOUBLE
)
1382 /* Don't word-swap when reading in the constant. */
1383 emit_move_insn (gen_rtx_REG (DImode
, REGNO (op0
)),
1384 operand_subword (operands
[1], WORDS_BIG_ENDIAN
,
1386 emit_move_insn (gen_rtx_REG (DImode
, REGNO (op0
) + 1),
1387 operand_subword (operands
[1], !WORDS_BIG_ENDIAN
,
1392 /* If the quantity is in a register not known to be GR, spill it. */
1393 if (register_operand (operands
[1], mode
))
1394 operands
[1] = spill_xfmode_rfmode_operand (operands
[1], 1, mode
);
1396 gcc_assert (GET_CODE (operands
[1]) == MEM
);
1398 /* Don't word-swap when reading in the value. */
1399 out
[0] = gen_rtx_REG (DImode
, REGNO (op0
));
1400 out
[1] = gen_rtx_REG (DImode
, REGNO (op0
) + 1);
1402 emit_move_insn (out
[0], adjust_address (operands
[1], DImode
, 0));
1403 emit_move_insn (out
[1], adjust_address (operands
[1], DImode
, 8));
1407 if (GET_CODE (operands
[1]) == REG
&& GR_REGNO_P (REGNO (operands
[1])))
1409 /* We're hoping to transform everything that deals with XFmode
1410 quantities and GR registers early in the compiler. */
1411 gcc_assert (can_create_pseudo_p ());
1413 /* Op0 can't be a GR_REG here, as that case is handled above.
1414 If op0 is a register, then we spill op1, so that we now have a
1415 MEM operand. This requires creating an XFmode subreg of a TImode reg
1416 to force the spill. */
1417 if (register_operand (operands
[0], mode
))
1419 rtx op1
= gen_rtx_REG (TImode
, REGNO (operands
[1]));
1420 op1
= gen_rtx_SUBREG (mode
, op1
, 0);
1421 operands
[1] = spill_xfmode_rfmode_operand (op1
, 0, mode
);
1428 gcc_assert (GET_CODE (operands
[0]) == MEM
);
1430 /* Don't word-swap when writing out the value. */
1431 in
[0] = gen_rtx_REG (DImode
, REGNO (operands
[1]));
1432 in
[1] = gen_rtx_REG (DImode
, REGNO (operands
[1]) + 1);
1434 emit_move_insn (adjust_address (operands
[0], DImode
, 0), in
[0]);
1435 emit_move_insn (adjust_address (operands
[0], DImode
, 8), in
[1]);
1440 if (!reload_in_progress
&& !reload_completed
)
1442 operands
[1] = spill_xfmode_rfmode_operand (operands
[1], 0, mode
);
1444 if (GET_MODE (op0
) == TImode
&& GET_CODE (op0
) == REG
)
1446 rtx memt
, memx
, in
= operands
[1];
1447 if (CONSTANT_P (in
))
1448 in
= validize_mem (force_const_mem (mode
, in
));
1449 if (GET_CODE (in
) == MEM
)
1450 memt
= adjust_address (in
, TImode
, 0);
1453 memt
= assign_stack_temp (TImode
, 16, 0);
1454 memx
= adjust_address (memt
, mode
, 0);
1455 emit_move_insn (memx
, in
);
1457 emit_move_insn (op0
, memt
);
1461 if (!ia64_move_ok (operands
[0], operands
[1]))
1462 operands
[1] = force_reg (mode
, operands
[1]);
1468 /* Emit comparison instruction if necessary, returning the expression
1469 that holds the compare result in the proper mode. */
1471 static GTY(()) rtx cmptf_libfunc
;
1474 ia64_expand_compare (enum rtx_code code
, enum machine_mode mode
)
1476 rtx op0
= ia64_compare_op0
, op1
= ia64_compare_op1
;
1479 /* If we have a BImode input, then we already have a compare result, and
1480 do not need to emit another comparison. */
1481 if (GET_MODE (op0
) == BImode
)
1483 gcc_assert ((code
== NE
|| code
== EQ
) && op1
== const0_rtx
);
1486 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1487 magic number as its third argument, that indicates what to do.
1488 The return value is an integer to be compared against zero. */
1489 else if (GET_MODE (op0
) == TFmode
)
1492 QCMP_INV
= 1, /* Raise FP_INVALID on SNaN as a side effect. */
1498 enum rtx_code ncode
;
1501 gcc_assert (cmptf_libfunc
&& GET_MODE (op1
) == TFmode
);
1504 /* 1 = equal, 0 = not equal. Equality operators do
1505 not raise FP_INVALID when given an SNaN operand. */
1506 case EQ
: magic
= QCMP_EQ
; ncode
= NE
; break;
1507 case NE
: magic
= QCMP_EQ
; ncode
= EQ
; break;
1508 /* isunordered() from C99. */
1509 case UNORDERED
: magic
= QCMP_UNORD
; ncode
= NE
; break;
1510 case ORDERED
: magic
= QCMP_UNORD
; ncode
= EQ
; break;
1511 /* Relational operators raise FP_INVALID when given
1513 case LT
: magic
= QCMP_LT
|QCMP_INV
; ncode
= NE
; break;
1514 case LE
: magic
= QCMP_LT
|QCMP_EQ
|QCMP_INV
; ncode
= NE
; break;
1515 case GT
: magic
= QCMP_GT
|QCMP_INV
; ncode
= NE
; break;
1516 case GE
: magic
= QCMP_GT
|QCMP_EQ
|QCMP_INV
; ncode
= NE
; break;
1517 /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1518 Expanders for buneq etc. weuld have to be added to ia64.md
1519 for this to be useful. */
1520 default: gcc_unreachable ();
1525 ret
= emit_library_call_value (cmptf_libfunc
, 0, LCT_CONST
, DImode
, 3,
1526 op0
, TFmode
, op1
, TFmode
,
1527 GEN_INT (magic
), DImode
);
1528 cmp
= gen_reg_rtx (BImode
);
1529 emit_insn (gen_rtx_SET (VOIDmode
, cmp
,
1530 gen_rtx_fmt_ee (ncode
, BImode
,
1533 insns
= get_insns ();
1536 emit_libcall_block (insns
, cmp
, cmp
,
1537 gen_rtx_fmt_ee (code
, BImode
, op0
, op1
));
1542 cmp
= gen_reg_rtx (BImode
);
1543 emit_insn (gen_rtx_SET (VOIDmode
, cmp
,
1544 gen_rtx_fmt_ee (code
, BImode
, op0
, op1
)));
1548 return gen_rtx_fmt_ee (code
, mode
, cmp
, const0_rtx
);
1551 /* Generate an integral vector comparison. Return true if the condition has
1552 been reversed, and so the sense of the comparison should be inverted. */
1555 ia64_expand_vecint_compare (enum rtx_code code
, enum machine_mode mode
,
1556 rtx dest
, rtx op0
, rtx op1
)
1558 bool negate
= false;
1561 /* Canonicalize the comparison to EQ, GT, GTU. */
1572 code
= reverse_condition (code
);
1578 code
= reverse_condition (code
);
1584 code
= swap_condition (code
);
1585 x
= op0
, op0
= op1
, op1
= x
;
1592 /* Unsigned parallel compare is not supported by the hardware. Play some
1593 tricks to turn this into a signed comparison against 0. */
1602 /* Perform a parallel modulo subtraction. */
1603 t1
= gen_reg_rtx (V2SImode
);
1604 emit_insn (gen_subv2si3 (t1
, op0
, op1
));
1606 /* Extract the original sign bit of op0. */
1607 mask
= GEN_INT (-0x80000000);
1608 mask
= gen_rtx_CONST_VECTOR (V2SImode
, gen_rtvec (2, mask
, mask
));
1609 mask
= force_reg (V2SImode
, mask
);
1610 t2
= gen_reg_rtx (V2SImode
);
1611 emit_insn (gen_andv2si3 (t2
, op0
, mask
));
1613 /* XOR it back into the result of the subtraction. This results
1614 in the sign bit set iff we saw unsigned underflow. */
1615 x
= gen_reg_rtx (V2SImode
);
1616 emit_insn (gen_xorv2si3 (x
, t1
, t2
));
1620 op1
= CONST0_RTX (mode
);
1626 /* Perform a parallel unsigned saturating subtraction. */
1627 x
= gen_reg_rtx (mode
);
1628 emit_insn (gen_rtx_SET (VOIDmode
, x
,
1629 gen_rtx_US_MINUS (mode
, op0
, op1
)));
1633 op1
= CONST0_RTX (mode
);
1642 x
= gen_rtx_fmt_ee (code
, mode
, op0
, op1
);
1643 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
1648 /* Emit an integral vector conditional move. */
1651 ia64_expand_vecint_cmov (rtx operands
[])
1653 enum machine_mode mode
= GET_MODE (operands
[0]);
1654 enum rtx_code code
= GET_CODE (operands
[3]);
1658 cmp
= gen_reg_rtx (mode
);
1659 negate
= ia64_expand_vecint_compare (code
, mode
, cmp
,
1660 operands
[4], operands
[5]);
1662 ot
= operands
[1+negate
];
1663 of
= operands
[2-negate
];
1665 if (ot
== CONST0_RTX (mode
))
1667 if (of
== CONST0_RTX (mode
))
1669 emit_move_insn (operands
[0], ot
);
1673 x
= gen_rtx_NOT (mode
, cmp
);
1674 x
= gen_rtx_AND (mode
, x
, of
);
1675 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], x
));
1677 else if (of
== CONST0_RTX (mode
))
1679 x
= gen_rtx_AND (mode
, cmp
, ot
);
1680 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], x
));
1686 t
= gen_reg_rtx (mode
);
1687 x
= gen_rtx_AND (mode
, cmp
, operands
[1+negate
]);
1688 emit_insn (gen_rtx_SET (VOIDmode
, t
, x
));
1690 f
= gen_reg_rtx (mode
);
1691 x
= gen_rtx_NOT (mode
, cmp
);
1692 x
= gen_rtx_AND (mode
, x
, operands
[2-negate
]);
1693 emit_insn (gen_rtx_SET (VOIDmode
, f
, x
));
1695 x
= gen_rtx_IOR (mode
, t
, f
);
1696 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], x
));
1700 /* Emit an integral vector min or max operation. Return true if all done. */
1703 ia64_expand_vecint_minmax (enum rtx_code code
, enum machine_mode mode
,
1708 /* These four combinations are supported directly. */
1709 if (mode
== V8QImode
&& (code
== UMIN
|| code
== UMAX
))
1711 if (mode
== V4HImode
&& (code
== SMIN
|| code
== SMAX
))
1714 /* This combination can be implemented with only saturating subtraction. */
1715 if (mode
== V4HImode
&& code
== UMAX
)
1717 rtx x
, tmp
= gen_reg_rtx (mode
);
1719 x
= gen_rtx_US_MINUS (mode
, operands
[1], operands
[2]);
1720 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, x
));
1722 emit_insn (gen_addv4hi3 (operands
[0], tmp
, operands
[2]));
1726 /* Everything else implemented via vector comparisons. */
1727 xops
[0] = operands
[0];
1728 xops
[4] = xops
[1] = operands
[1];
1729 xops
[5] = xops
[2] = operands
[2];
1748 xops
[3] = gen_rtx_fmt_ee (code
, VOIDmode
, operands
[1], operands
[2]);
1750 ia64_expand_vecint_cmov (xops
);
1754 /* Emit an integral vector widening sum operations. */
1757 ia64_expand_widen_sum (rtx operands
[3], bool unsignedp
)
1760 enum machine_mode wmode
, mode
;
1761 rtx (*unpack_l
) (rtx
, rtx
, rtx
);
1762 rtx (*unpack_h
) (rtx
, rtx
, rtx
);
1763 rtx (*plus
) (rtx
, rtx
, rtx
);
1765 wmode
= GET_MODE (operands
[0]);
1766 mode
= GET_MODE (operands
[1]);
1771 unpack_l
= gen_unpack1_l
;
1772 unpack_h
= gen_unpack1_h
;
1773 plus
= gen_addv4hi3
;
1776 unpack_l
= gen_unpack2_l
;
1777 unpack_h
= gen_unpack2_h
;
1778 plus
= gen_addv2si3
;
1784 /* Fill in x with the sign extension of each element in op1. */
1786 x
= CONST0_RTX (mode
);
1791 x
= gen_reg_rtx (mode
);
1793 neg
= ia64_expand_vecint_compare (LT
, mode
, x
, operands
[1],
1798 l
= gen_reg_rtx (wmode
);
1799 h
= gen_reg_rtx (wmode
);
1800 s
= gen_reg_rtx (wmode
);
1802 emit_insn (unpack_l (gen_lowpart (mode
, l
), operands
[1], x
));
1803 emit_insn (unpack_h (gen_lowpart (mode
, h
), operands
[1], x
));
1804 emit_insn (plus (s
, l
, operands
[2]));
1805 emit_insn (plus (operands
[0], h
, s
));
1808 /* Emit a signed or unsigned V8QI dot product operation. */
1811 ia64_expand_dot_prod_v8qi (rtx operands
[4], bool unsignedp
)
1813 rtx l1
, l2
, h1
, h2
, x1
, x2
, p1
, p2
, p3
, p4
, s1
, s2
, s3
;
1815 /* Fill in x1 and x2 with the sign extension of each element. */
1817 x1
= x2
= CONST0_RTX (V8QImode
);
1822 x1
= gen_reg_rtx (V8QImode
);
1823 x2
= gen_reg_rtx (V8QImode
);
1825 neg
= ia64_expand_vecint_compare (LT
, V8QImode
, x1
, operands
[1],
1826 CONST0_RTX (V8QImode
));
1828 neg
= ia64_expand_vecint_compare (LT
, V8QImode
, x2
, operands
[2],
1829 CONST0_RTX (V8QImode
));
1833 l1
= gen_reg_rtx (V4HImode
);
1834 l2
= gen_reg_rtx (V4HImode
);
1835 h1
= gen_reg_rtx (V4HImode
);
1836 h2
= gen_reg_rtx (V4HImode
);
1838 emit_insn (gen_unpack1_l (gen_lowpart (V8QImode
, l1
), operands
[1], x1
));
1839 emit_insn (gen_unpack1_l (gen_lowpart (V8QImode
, l2
), operands
[2], x2
));
1840 emit_insn (gen_unpack1_h (gen_lowpart (V8QImode
, h1
), operands
[1], x1
));
1841 emit_insn (gen_unpack1_h (gen_lowpart (V8QImode
, h2
), operands
[2], x2
));
1843 p1
= gen_reg_rtx (V2SImode
);
1844 p2
= gen_reg_rtx (V2SImode
);
1845 p3
= gen_reg_rtx (V2SImode
);
1846 p4
= gen_reg_rtx (V2SImode
);
1847 emit_insn (gen_pmpy2_r (p1
, l1
, l2
));
1848 emit_insn (gen_pmpy2_l (p2
, l1
, l2
));
1849 emit_insn (gen_pmpy2_r (p3
, h1
, h2
));
1850 emit_insn (gen_pmpy2_l (p4
, h1
, h2
));
1852 s1
= gen_reg_rtx (V2SImode
);
1853 s2
= gen_reg_rtx (V2SImode
);
1854 s3
= gen_reg_rtx (V2SImode
);
1855 emit_insn (gen_addv2si3 (s1
, p1
, p2
));
1856 emit_insn (gen_addv2si3 (s2
, p3
, p4
));
1857 emit_insn (gen_addv2si3 (s3
, s1
, operands
[3]));
1858 emit_insn (gen_addv2si3 (operands
[0], s2
, s3
));
1861 /* Emit the appropriate sequence for a call. */
1864 ia64_expand_call (rtx retval
, rtx addr
, rtx nextarg ATTRIBUTE_UNUSED
,
1869 addr
= XEXP (addr
, 0);
1870 addr
= convert_memory_address (DImode
, addr
);
1871 b0
= gen_rtx_REG (DImode
, R_BR (0));
1873 /* ??? Should do this for functions known to bind local too. */
1874 if (TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
1877 insn
= gen_sibcall_nogp (addr
);
1879 insn
= gen_call_nogp (addr
, b0
);
1881 insn
= gen_call_value_nogp (retval
, addr
, b0
);
1882 insn
= emit_call_insn (insn
);
1887 insn
= gen_sibcall_gp (addr
);
1889 insn
= gen_call_gp (addr
, b0
);
1891 insn
= gen_call_value_gp (retval
, addr
, b0
);
1892 insn
= emit_call_insn (insn
);
1894 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), pic_offset_table_rtx
);
1898 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), b0
);
1902 reg_emitted (enum ia64_frame_regs r
)
1904 if (emitted_frame_related_regs
[r
] == 0)
1905 emitted_frame_related_regs
[r
] = current_frame_info
.r
[r
];
1907 gcc_assert (emitted_frame_related_regs
[r
] == current_frame_info
.r
[r
]);
1911 get_reg (enum ia64_frame_regs r
)
1914 return current_frame_info
.r
[r
];
1918 is_emitted (int regno
)
1920 enum ia64_frame_regs r
;
1922 for (r
= reg_fp
; r
< number_of_ia64_frame_regs
; r
++)
1923 if (emitted_frame_related_regs
[r
] == regno
)
1929 ia64_reload_gp (void)
1933 if (current_frame_info
.r
[reg_save_gp
])
1935 tmp
= gen_rtx_REG (DImode
, get_reg (reg_save_gp
));
1939 HOST_WIDE_INT offset
;
1942 offset
= (current_frame_info
.spill_cfa_off
1943 + current_frame_info
.spill_size
);
1944 if (frame_pointer_needed
)
1946 tmp
= hard_frame_pointer_rtx
;
1951 tmp
= stack_pointer_rtx
;
1952 offset
= current_frame_info
.total_size
- offset
;
1955 offset_r
= GEN_INT (offset
);
1956 if (satisfies_constraint_I (offset_r
))
1957 emit_insn (gen_adddi3 (pic_offset_table_rtx
, tmp
, offset_r
));
1960 emit_move_insn (pic_offset_table_rtx
, offset_r
);
1961 emit_insn (gen_adddi3 (pic_offset_table_rtx
,
1962 pic_offset_table_rtx
, tmp
));
1965 tmp
= gen_rtx_MEM (DImode
, pic_offset_table_rtx
);
1968 emit_move_insn (pic_offset_table_rtx
, tmp
);
1972 ia64_split_call (rtx retval
, rtx addr
, rtx retaddr
, rtx scratch_r
,
1973 rtx scratch_b
, int noreturn_p
, int sibcall_p
)
1976 bool is_desc
= false;
1978 /* If we find we're calling through a register, then we're actually
1979 calling through a descriptor, so load up the values. */
1980 if (REG_P (addr
) && GR_REGNO_P (REGNO (addr
)))
1985 /* ??? We are currently constrained to *not* use peep2, because
1986 we can legitimately change the global lifetime of the GP
1987 (in the form of killing where previously live). This is
1988 because a call through a descriptor doesn't use the previous
1989 value of the GP, while a direct call does, and we do not
1990 commit to either form until the split here.
1992 That said, this means that we lack precise life info for
1993 whether ADDR is dead after this call. This is not terribly
1994 important, since we can fix things up essentially for free
1995 with the POST_DEC below, but it's nice to not use it when we
1996 can immediately tell it's not necessary. */
1997 addr_dead_p
= ((noreturn_p
|| sibcall_p
1998 || TEST_HARD_REG_BIT (regs_invalidated_by_call
,
2000 && !FUNCTION_ARG_REGNO_P (REGNO (addr
)));
2002 /* Load the code address into scratch_b. */
2003 tmp
= gen_rtx_POST_INC (Pmode
, addr
);
2004 tmp
= gen_rtx_MEM (Pmode
, tmp
);
2005 emit_move_insn (scratch_r
, tmp
);
2006 emit_move_insn (scratch_b
, scratch_r
);
2008 /* Load the GP address. If ADDR is not dead here, then we must
2009 revert the change made above via the POST_INCREMENT. */
2011 tmp
= gen_rtx_POST_DEC (Pmode
, addr
);
2014 tmp
= gen_rtx_MEM (Pmode
, tmp
);
2015 emit_move_insn (pic_offset_table_rtx
, tmp
);
2022 insn
= gen_sibcall_nogp (addr
);
2024 insn
= gen_call_value_nogp (retval
, addr
, retaddr
);
2026 insn
= gen_call_nogp (addr
, retaddr
);
2027 emit_call_insn (insn
);
2029 if ((!TARGET_CONST_GP
|| is_desc
) && !noreturn_p
&& !sibcall_p
)
2033 /* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
2035 This differs from the generic code in that we know about the zero-extending
2036 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
2037 also know that ld.acq+cmpxchg.rel equals a full barrier.
2039 The loop we want to generate looks like
2044 new_reg = cmp_reg op val;
2045 cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2046 if (cmp_reg != old_reg)
2049 Note that we only do the plain load from memory once. Subsequent
2050 iterations use the value loaded by the compare-and-swap pattern. */
2053 ia64_expand_atomic_op (enum rtx_code code
, rtx mem
, rtx val
,
2054 rtx old_dst
, rtx new_dst
)
2056 enum machine_mode mode
= GET_MODE (mem
);
2057 rtx old_reg
, new_reg
, cmp_reg
, ar_ccv
, label
;
2058 enum insn_code icode
;
2060 /* Special case for using fetchadd. */
2061 if ((mode
== SImode
|| mode
== DImode
)
2062 && (code
== PLUS
|| code
== MINUS
)
2063 && fetchadd_operand (val
, mode
))
2066 val
= GEN_INT (-INTVAL (val
));
2069 old_dst
= gen_reg_rtx (mode
);
2071 emit_insn (gen_memory_barrier ());
2074 icode
= CODE_FOR_fetchadd_acq_si
;
2076 icode
= CODE_FOR_fetchadd_acq_di
;
2077 emit_insn (GEN_FCN (icode
) (old_dst
, mem
, val
));
2081 new_reg
= expand_simple_binop (mode
, PLUS
, old_dst
, val
, new_dst
,
2083 if (new_reg
!= new_dst
)
2084 emit_move_insn (new_dst
, new_reg
);
2089 /* Because of the volatile mem read, we get an ld.acq, which is the
2090 front half of the full barrier. The end half is the cmpxchg.rel. */
2091 gcc_assert (MEM_VOLATILE_P (mem
));
2093 old_reg
= gen_reg_rtx (DImode
);
2094 cmp_reg
= gen_reg_rtx (DImode
);
2095 label
= gen_label_rtx ();
2099 val
= simplify_gen_subreg (DImode
, val
, mode
, 0);
2100 emit_insn (gen_extend_insn (cmp_reg
, mem
, DImode
, mode
, 1));
2103 emit_move_insn (cmp_reg
, mem
);
2107 ar_ccv
= gen_rtx_REG (DImode
, AR_CCV_REGNUM
);
2108 emit_move_insn (old_reg
, cmp_reg
);
2109 emit_move_insn (ar_ccv
, cmp_reg
);
2112 emit_move_insn (old_dst
, gen_lowpart (mode
, cmp_reg
));
2117 new_reg
= expand_simple_unop (DImode
, NOT
, new_reg
, NULL_RTX
, true);
2120 new_reg
= expand_simple_binop (DImode
, code
, new_reg
, val
, NULL_RTX
,
2121 true, OPTAB_DIRECT
);
2124 new_reg
= gen_lowpart (mode
, new_reg
);
2126 emit_move_insn (new_dst
, new_reg
);
2130 case QImode
: icode
= CODE_FOR_cmpxchg_rel_qi
; break;
2131 case HImode
: icode
= CODE_FOR_cmpxchg_rel_hi
; break;
2132 case SImode
: icode
= CODE_FOR_cmpxchg_rel_si
; break;
2133 case DImode
: icode
= CODE_FOR_cmpxchg_rel_di
; break;
2138 emit_insn (GEN_FCN (icode
) (cmp_reg
, mem
, ar_ccv
, new_reg
));
2140 emit_cmp_and_jump_insns (cmp_reg
, old_reg
, NE
, NULL
, DImode
, true, label
);
2143 /* Begin the assembly file. */
2146 ia64_file_start (void)
2148 /* Variable tracking should be run after all optimizations which change order
2149 of insns. It also needs a valid CFG. This can't be done in
2150 ia64_override_options, because flag_var_tracking is finalized after
2152 ia64_flag_var_tracking
= flag_var_tracking
;
2153 flag_var_tracking
= 0;
2155 default_file_start ();
2156 emit_safe_across_calls ();
2160 emit_safe_across_calls (void)
2162 unsigned int rs
, re
;
2169 while (rs
< 64 && call_used_regs
[PR_REG (rs
)])
2173 for (re
= rs
+ 1; re
< 64 && ! call_used_regs
[PR_REG (re
)]; re
++)
2177 fputs ("\t.pred.safe_across_calls ", asm_out_file
);
2181 fputc (',', asm_out_file
);
2183 fprintf (asm_out_file
, "p%u", rs
);
2185 fprintf (asm_out_file
, "p%u-p%u", rs
, re
- 1);
2189 fputc ('\n', asm_out_file
);
2192 /* Globalize a declaration. */
2195 ia64_globalize_decl_name (FILE * stream
, tree decl
)
2197 const char *name
= XSTR (XEXP (DECL_RTL (decl
), 0), 0);
2198 tree version_attr
= lookup_attribute ("version_id", DECL_ATTRIBUTES (decl
));
2201 tree v
= TREE_VALUE (TREE_VALUE (version_attr
));
2202 const char *p
= TREE_STRING_POINTER (v
);
2203 fprintf (stream
, "\t.alias %s#, \"%s{%s}\"\n", name
, name
, p
);
2205 targetm
.asm_out
.globalize_label (stream
, name
);
2206 if (TREE_CODE (decl
) == FUNCTION_DECL
)
2207 ASM_OUTPUT_TYPE_DIRECTIVE (stream
, name
, "function");
2210 /* Helper function for ia64_compute_frame_size: find an appropriate general
2211 register to spill some special register to. SPECIAL_SPILL_MASK contains
2212 bits in GR0 to GR31 that have already been allocated by this routine.
2213 TRY_LOCALS is true if we should attempt to locate a local regnum. */
2216 find_gr_spill (enum ia64_frame_regs r
, int try_locals
)
2220 if (emitted_frame_related_regs
[r
] != 0)
2222 regno
= emitted_frame_related_regs
[r
];
2223 if (regno
>= LOC_REG (0) && regno
< LOC_REG (80 - frame_pointer_needed
)
2224 && current_frame_info
.n_local_regs
< regno
- LOC_REG (0) + 1)
2225 current_frame_info
.n_local_regs
= regno
- LOC_REG (0) + 1;
2226 else if (current_function_is_leaf
2227 && regno
>= GR_REG (1) && regno
<= GR_REG (31))
2228 current_frame_info
.gr_used_mask
|= 1 << regno
;
2233 /* If this is a leaf function, first try an otherwise unused
2234 call-clobbered register. */
2235 if (current_function_is_leaf
)
2237 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
2238 if (! df_regs_ever_live_p (regno
)
2239 && call_used_regs
[regno
]
2240 && ! fixed_regs
[regno
]
2241 && ! global_regs
[regno
]
2242 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0
2243 && ! is_emitted (regno
))
2245 current_frame_info
.gr_used_mask
|= 1 << regno
;
2252 regno
= current_frame_info
.n_local_regs
;
2253 /* If there is a frame pointer, then we can't use loc79, because
2254 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
2255 reg_name switching code in ia64_expand_prologue. */
2256 while (regno
< (80 - frame_pointer_needed
))
2257 if (! is_emitted (LOC_REG (regno
++)))
2259 current_frame_info
.n_local_regs
= regno
;
2260 return LOC_REG (regno
- 1);
2264 /* Failed to find a general register to spill to. Must use stack. */
2268 /* In order to make for nice schedules, we try to allocate every temporary
2269 to a different register. We must of course stay away from call-saved,
2270 fixed, and global registers. We must also stay away from registers
2271 allocated in current_frame_info.gr_used_mask, since those include regs
2272 used all through the prologue.
2274 Any register allocated here must be used immediately. The idea is to
2275 aid scheduling, not to solve data flow problems. */
2277 static int last_scratch_gr_reg
;
2280 next_scratch_gr_reg (void)
2284 for (i
= 0; i
< 32; ++i
)
2286 regno
= (last_scratch_gr_reg
+ i
+ 1) & 31;
2287 if (call_used_regs
[regno
]
2288 && ! fixed_regs
[regno
]
2289 && ! global_regs
[regno
]
2290 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0)
2292 last_scratch_gr_reg
= regno
;
2297 /* There must be _something_ available. */
2301 /* Helper function for ia64_compute_frame_size, called through
2302 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2305 mark_reg_gr_used_mask (rtx reg
, void *data ATTRIBUTE_UNUSED
)
2307 unsigned int regno
= REGNO (reg
);
2310 unsigned int i
, n
= hard_regno_nregs
[regno
][GET_MODE (reg
)];
2311 for (i
= 0; i
< n
; ++i
)
2312 current_frame_info
.gr_used_mask
|= 1 << (regno
+ i
);
2317 /* Returns the number of bytes offset between the frame pointer and the stack
2318 pointer for the current function. SIZE is the number of bytes of space
2319 needed for local variables. */
2322 ia64_compute_frame_size (HOST_WIDE_INT size
)
2324 HOST_WIDE_INT total_size
;
2325 HOST_WIDE_INT spill_size
= 0;
2326 HOST_WIDE_INT extra_spill_size
= 0;
2327 HOST_WIDE_INT pretend_args_size
;
2330 int spilled_gr_p
= 0;
2331 int spilled_fr_p
= 0;
2337 if (current_frame_info
.initialized
)
2340 memset (¤t_frame_info
, 0, sizeof current_frame_info
);
2341 CLEAR_HARD_REG_SET (mask
);
2343 /* Don't allocate scratches to the return register. */
2344 diddle_return_value (mark_reg_gr_used_mask
, NULL
);
2346 /* Don't allocate scratches to the EH scratch registers. */
2347 if (cfun
->machine
->ia64_eh_epilogue_sp
)
2348 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_sp
, NULL
);
2349 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
2350 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_bsp
, NULL
);
2352 /* Find the size of the register stack frame. We have only 80 local
2353 registers, because we reserve 8 for the inputs and 8 for the
2356 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2357 since we'll be adjusting that down later. */
2358 regno
= LOC_REG (78) + ! frame_pointer_needed
;
2359 for (; regno
>= LOC_REG (0); regno
--)
2360 if (df_regs_ever_live_p (regno
) && !is_emitted (regno
))
2362 current_frame_info
.n_local_regs
= regno
- LOC_REG (0) + 1;
2364 /* For functions marked with the syscall_linkage attribute, we must mark
2365 all eight input registers as in use, so that locals aren't visible to
2368 if (cfun
->machine
->n_varargs
> 0
2369 || lookup_attribute ("syscall_linkage",
2370 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
2371 current_frame_info
.n_input_regs
= 8;
2374 for (regno
= IN_REG (7); regno
>= IN_REG (0); regno
--)
2375 if (df_regs_ever_live_p (regno
))
2377 current_frame_info
.n_input_regs
= regno
- IN_REG (0) + 1;
2380 for (regno
= OUT_REG (7); regno
>= OUT_REG (0); regno
--)
2381 if (df_regs_ever_live_p (regno
))
2383 i
= regno
- OUT_REG (0) + 1;
2385 #ifndef PROFILE_HOOK
2386 /* When -p profiling, we need one output register for the mcount argument.
2387 Likewise for -a profiling for the bb_init_func argument. For -ax
2388 profiling, we need two output registers for the two bb_init_trace_func
2393 current_frame_info
.n_output_regs
= i
;
2395 /* ??? No rotating register support yet. */
2396 current_frame_info
.n_rotate_regs
= 0;
2398 /* Discover which registers need spilling, and how much room that
2399 will take. Begin with floating point and general registers,
2400 which will always wind up on the stack. */
2402 for (regno
= FR_REG (2); regno
<= FR_REG (127); regno
++)
2403 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
2405 SET_HARD_REG_BIT (mask
, regno
);
2411 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
2412 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
2414 SET_HARD_REG_BIT (mask
, regno
);
2420 for (regno
= BR_REG (1); regno
<= BR_REG (7); regno
++)
2421 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
2423 SET_HARD_REG_BIT (mask
, regno
);
2428 /* Now come all special registers that might get saved in other
2429 general registers. */
2431 if (frame_pointer_needed
)
2433 current_frame_info
.r
[reg_fp
] = find_gr_spill (reg_fp
, 1);
2434 /* If we did not get a register, then we take LOC79. This is guaranteed
2435 to be free, even if regs_ever_live is already set, because this is
2436 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2437 as we don't count loc79 above. */
2438 if (current_frame_info
.r
[reg_fp
] == 0)
2440 current_frame_info
.r
[reg_fp
] = LOC_REG (79);
2441 current_frame_info
.n_local_regs
= LOC_REG (79) - LOC_REG (0) + 1;
2445 if (! current_function_is_leaf
)
2447 /* Emit a save of BR0 if we call other functions. Do this even
2448 if this function doesn't return, as EH depends on this to be
2449 able to unwind the stack. */
2450 SET_HARD_REG_BIT (mask
, BR_REG (0));
2452 current_frame_info
.r
[reg_save_b0
] = find_gr_spill (reg_save_b0
, 1);
2453 if (current_frame_info
.r
[reg_save_b0
] == 0)
2455 extra_spill_size
+= 8;
2459 /* Similarly for ar.pfs. */
2460 SET_HARD_REG_BIT (mask
, AR_PFS_REGNUM
);
2461 current_frame_info
.r
[reg_save_ar_pfs
] = find_gr_spill (reg_save_ar_pfs
, 1);
2462 if (current_frame_info
.r
[reg_save_ar_pfs
] == 0)
2464 extra_spill_size
+= 8;
2468 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2469 registers are clobbered, so we fall back to the stack. */
2470 current_frame_info
.r
[reg_save_gp
]
2471 = (cfun
->calls_setjmp
? 0 : find_gr_spill (reg_save_gp
, 1));
2472 if (current_frame_info
.r
[reg_save_gp
] == 0)
2474 SET_HARD_REG_BIT (mask
, GR_REG (1));
2481 if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs
[BR_REG (0)])
2483 SET_HARD_REG_BIT (mask
, BR_REG (0));
2484 extra_spill_size
+= 8;
2488 if (df_regs_ever_live_p (AR_PFS_REGNUM
))
2490 SET_HARD_REG_BIT (mask
, AR_PFS_REGNUM
);
2491 current_frame_info
.r
[reg_save_ar_pfs
]
2492 = find_gr_spill (reg_save_ar_pfs
, 1);
2493 if (current_frame_info
.r
[reg_save_ar_pfs
] == 0)
2495 extra_spill_size
+= 8;
2501 /* Unwind descriptor hackery: things are most efficient if we allocate
2502 consecutive GR save registers for RP, PFS, FP in that order. However,
2503 it is absolutely critical that FP get the only hard register that's
2504 guaranteed to be free, so we allocated it first. If all three did
2505 happen to be allocated hard regs, and are consecutive, rearrange them
2506 into the preferred order now.
2508 If we have already emitted code for any of those registers,
2509 then it's already too late to change. */
2510 min_regno
= MIN (current_frame_info
.r
[reg_fp
],
2511 MIN (current_frame_info
.r
[reg_save_b0
],
2512 current_frame_info
.r
[reg_save_ar_pfs
]));
2513 max_regno
= MAX (current_frame_info
.r
[reg_fp
],
2514 MAX (current_frame_info
.r
[reg_save_b0
],
2515 current_frame_info
.r
[reg_save_ar_pfs
]));
2517 && min_regno
+ 2 == max_regno
2518 && (current_frame_info
.r
[reg_fp
] == min_regno
+ 1
2519 || current_frame_info
.r
[reg_save_b0
] == min_regno
+ 1
2520 || current_frame_info
.r
[reg_save_ar_pfs
] == min_regno
+ 1)
2521 && (emitted_frame_related_regs
[reg_save_b0
] == 0
2522 || emitted_frame_related_regs
[reg_save_b0
] == min_regno
)
2523 && (emitted_frame_related_regs
[reg_save_ar_pfs
] == 0
2524 || emitted_frame_related_regs
[reg_save_ar_pfs
] == min_regno
+ 1)
2525 && (emitted_frame_related_regs
[reg_fp
] == 0
2526 || emitted_frame_related_regs
[reg_fp
] == min_regno
+ 2))
2528 current_frame_info
.r
[reg_save_b0
] = min_regno
;
2529 current_frame_info
.r
[reg_save_ar_pfs
] = min_regno
+ 1;
2530 current_frame_info
.r
[reg_fp
] = min_regno
+ 2;
2533 /* See if we need to store the predicate register block. */
2534 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
2535 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
2537 if (regno
<= PR_REG (63))
2539 SET_HARD_REG_BIT (mask
, PR_REG (0));
2540 current_frame_info
.r
[reg_save_pr
] = find_gr_spill (reg_save_pr
, 1);
2541 if (current_frame_info
.r
[reg_save_pr
] == 0)
2543 extra_spill_size
+= 8;
2547 /* ??? Mark them all as used so that register renaming and such
2548 are free to use them. */
2549 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
2550 df_set_regs_ever_live (regno
, true);
2553 /* If we're forced to use st8.spill, we're forced to save and restore
2554 ar.unat as well. The check for existing liveness allows inline asm
2555 to touch ar.unat. */
2556 if (spilled_gr_p
|| cfun
->machine
->n_varargs
2557 || df_regs_ever_live_p (AR_UNAT_REGNUM
))
2559 df_set_regs_ever_live (AR_UNAT_REGNUM
, true);
2560 SET_HARD_REG_BIT (mask
, AR_UNAT_REGNUM
);
2561 current_frame_info
.r
[reg_save_ar_unat
]
2562 = find_gr_spill (reg_save_ar_unat
, spill_size
== 0);
2563 if (current_frame_info
.r
[reg_save_ar_unat
] == 0)
2565 extra_spill_size
+= 8;
2570 if (df_regs_ever_live_p (AR_LC_REGNUM
))
2572 SET_HARD_REG_BIT (mask
, AR_LC_REGNUM
);
2573 current_frame_info
.r
[reg_save_ar_lc
]
2574 = find_gr_spill (reg_save_ar_lc
, spill_size
== 0);
2575 if (current_frame_info
.r
[reg_save_ar_lc
] == 0)
2577 extra_spill_size
+= 8;
2582 /* If we have an odd number of words of pretend arguments written to
2583 the stack, then the FR save area will be unaligned. We round the
2584 size of this area up to keep things 16 byte aligned. */
2586 pretend_args_size
= IA64_STACK_ALIGN (crtl
->args
.pretend_args_size
);
2588 pretend_args_size
= crtl
->args
.pretend_args_size
;
2590 total_size
= (spill_size
+ extra_spill_size
+ size
+ pretend_args_size
2591 + crtl
->outgoing_args_size
);
2592 total_size
= IA64_STACK_ALIGN (total_size
);
2594 /* We always use the 16-byte scratch area provided by the caller, but
2595 if we are a leaf function, there's no one to which we need to provide
2597 if (current_function_is_leaf
)
2598 total_size
= MAX (0, total_size
- 16);
2600 current_frame_info
.total_size
= total_size
;
2601 current_frame_info
.spill_cfa_off
= pretend_args_size
- 16;
2602 current_frame_info
.spill_size
= spill_size
;
2603 current_frame_info
.extra_spill_size
= extra_spill_size
;
2604 COPY_HARD_REG_SET (current_frame_info
.mask
, mask
);
2605 current_frame_info
.n_spilled
= n_spilled
;
2606 current_frame_info
.initialized
= reload_completed
;
2609 /* Compute the initial difference between the specified pair of registers. */
2612 ia64_initial_elimination_offset (int from
, int to
)
2614 HOST_WIDE_INT offset
;
2616 ia64_compute_frame_size (get_frame_size ());
2619 case FRAME_POINTER_REGNUM
:
2622 case HARD_FRAME_POINTER_REGNUM
:
2623 if (current_function_is_leaf
)
2624 offset
= -current_frame_info
.total_size
;
2626 offset
= -(current_frame_info
.total_size
2627 - crtl
->outgoing_args_size
- 16);
2630 case STACK_POINTER_REGNUM
:
2631 if (current_function_is_leaf
)
2634 offset
= 16 + crtl
->outgoing_args_size
;
2642 case ARG_POINTER_REGNUM
:
2643 /* Arguments start above the 16 byte save area, unless stdarg
2644 in which case we store through the 16 byte save area. */
2647 case HARD_FRAME_POINTER_REGNUM
:
2648 offset
= 16 - crtl
->args
.pretend_args_size
;
2651 case STACK_POINTER_REGNUM
:
2652 offset
= (current_frame_info
.total_size
2653 + 16 - crtl
->args
.pretend_args_size
);
2668 /* If there are more than a trivial number of register spills, we use
2669 two interleaved iterators so that we can get two memory references
2672 In order to simplify things in the prologue and epilogue expanders,
2673 we use helper functions to fix up the memory references after the
2674 fact with the appropriate offsets to a POST_MODIFY memory mode.
2675 The following data structure tracks the state of the two iterators
2676 while insns are being emitted. */
2678 struct spill_fill_data
2680 rtx init_after
; /* point at which to emit initializations */
2681 rtx init_reg
[2]; /* initial base register */
2682 rtx iter_reg
[2]; /* the iterator registers */
2683 rtx
*prev_addr
[2]; /* address of last memory use */
2684 rtx prev_insn
[2]; /* the insn corresponding to prev_addr */
2685 HOST_WIDE_INT prev_off
[2]; /* last offset */
2686 int n_iter
; /* number of iterators in use */
2687 int next_iter
; /* next iterator to use */
2688 unsigned int save_gr_used_mask
;
2691 static struct spill_fill_data spill_fill_data
;
2694 setup_spill_pointers (int n_spills
, rtx init_reg
, HOST_WIDE_INT cfa_off
)
2698 spill_fill_data
.init_after
= get_last_insn ();
2699 spill_fill_data
.init_reg
[0] = init_reg
;
2700 spill_fill_data
.init_reg
[1] = init_reg
;
2701 spill_fill_data
.prev_addr
[0] = NULL
;
2702 spill_fill_data
.prev_addr
[1] = NULL
;
2703 spill_fill_data
.prev_insn
[0] = NULL
;
2704 spill_fill_data
.prev_insn
[1] = NULL
;
2705 spill_fill_data
.prev_off
[0] = cfa_off
;
2706 spill_fill_data
.prev_off
[1] = cfa_off
;
2707 spill_fill_data
.next_iter
= 0;
2708 spill_fill_data
.save_gr_used_mask
= current_frame_info
.gr_used_mask
;
2710 spill_fill_data
.n_iter
= 1 + (n_spills
> 2);
2711 for (i
= 0; i
< spill_fill_data
.n_iter
; ++i
)
2713 int regno
= next_scratch_gr_reg ();
2714 spill_fill_data
.iter_reg
[i
] = gen_rtx_REG (DImode
, regno
);
2715 current_frame_info
.gr_used_mask
|= 1 << regno
;
2720 finish_spill_pointers (void)
2722 current_frame_info
.gr_used_mask
= spill_fill_data
.save_gr_used_mask
;
2726 spill_restore_mem (rtx reg
, HOST_WIDE_INT cfa_off
)
2728 int iter
= spill_fill_data
.next_iter
;
2729 HOST_WIDE_INT disp
= spill_fill_data
.prev_off
[iter
] - cfa_off
;
2730 rtx disp_rtx
= GEN_INT (disp
);
2733 if (spill_fill_data
.prev_addr
[iter
])
2735 if (satisfies_constraint_N (disp_rtx
))
2737 *spill_fill_data
.prev_addr
[iter
]
2738 = gen_rtx_POST_MODIFY (DImode
, spill_fill_data
.iter_reg
[iter
],
2739 gen_rtx_PLUS (DImode
,
2740 spill_fill_data
.iter_reg
[iter
],
2742 REG_NOTES (spill_fill_data
.prev_insn
[iter
])
2743 = gen_rtx_EXPR_LIST (REG_INC
, spill_fill_data
.iter_reg
[iter
],
2744 REG_NOTES (spill_fill_data
.prev_insn
[iter
]));
2748 /* ??? Could use register post_modify for loads. */
2749 if (!satisfies_constraint_I (disp_rtx
))
2751 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
2752 emit_move_insn (tmp
, disp_rtx
);
2755 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
2756 spill_fill_data
.iter_reg
[iter
], disp_rtx
));
2759 /* Micro-optimization: if we've created a frame pointer, it's at
2760 CFA 0, which may allow the real iterator to be initialized lower,
2761 slightly increasing parallelism. Also, if there are few saves
2762 it may eliminate the iterator entirely. */
2764 && spill_fill_data
.init_reg
[iter
] == stack_pointer_rtx
2765 && frame_pointer_needed
)
2767 mem
= gen_rtx_MEM (GET_MODE (reg
), hard_frame_pointer_rtx
);
2768 set_mem_alias_set (mem
, get_varargs_alias_set ());
2776 seq
= gen_movdi (spill_fill_data
.iter_reg
[iter
],
2777 spill_fill_data
.init_reg
[iter
]);
2782 if (!satisfies_constraint_I (disp_rtx
))
2784 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
2785 emit_move_insn (tmp
, disp_rtx
);
2789 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
2790 spill_fill_data
.init_reg
[iter
],
2797 /* Careful for being the first insn in a sequence. */
2798 if (spill_fill_data
.init_after
)
2799 insn
= emit_insn_after (seq
, spill_fill_data
.init_after
);
2802 rtx first
= get_insns ();
2804 insn
= emit_insn_before (seq
, first
);
2806 insn
= emit_insn (seq
);
2808 spill_fill_data
.init_after
= insn
;
2811 mem
= gen_rtx_MEM (GET_MODE (reg
), spill_fill_data
.iter_reg
[iter
]);
2813 /* ??? Not all of the spills are for varargs, but some of them are.
2814 The rest of the spills belong in an alias set of their own. But
2815 it doesn't actually hurt to include them here. */
2816 set_mem_alias_set (mem
, get_varargs_alias_set ());
2818 spill_fill_data
.prev_addr
[iter
] = &XEXP (mem
, 0);
2819 spill_fill_data
.prev_off
[iter
] = cfa_off
;
2821 if (++iter
>= spill_fill_data
.n_iter
)
2823 spill_fill_data
.next_iter
= iter
;
2829 do_spill (rtx (*move_fn
) (rtx
, rtx
, rtx
), rtx reg
, HOST_WIDE_INT cfa_off
,
2832 int iter
= spill_fill_data
.next_iter
;
2835 mem
= spill_restore_mem (reg
, cfa_off
);
2836 insn
= emit_insn ((*move_fn
) (mem
, reg
, GEN_INT (cfa_off
)));
2837 spill_fill_data
.prev_insn
[iter
] = insn
;
2844 RTX_FRAME_RELATED_P (insn
) = 1;
2846 /* Don't even pretend that the unwind code can intuit its way
2847 through a pair of interleaved post_modify iterators. Just
2848 provide the correct answer. */
2850 if (frame_pointer_needed
)
2852 base
= hard_frame_pointer_rtx
;
2857 base
= stack_pointer_rtx
;
2858 off
= current_frame_info
.total_size
- cfa_off
;
2862 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
2863 gen_rtx_SET (VOIDmode
,
2864 gen_rtx_MEM (GET_MODE (reg
),
2865 plus_constant (base
, off
)),
2872 do_restore (rtx (*move_fn
) (rtx
, rtx
, rtx
), rtx reg
, HOST_WIDE_INT cfa_off
)
2874 int iter
= spill_fill_data
.next_iter
;
2877 insn
= emit_insn ((*move_fn
) (reg
, spill_restore_mem (reg
, cfa_off
),
2878 GEN_INT (cfa_off
)));
2879 spill_fill_data
.prev_insn
[iter
] = insn
;
2882 /* Wrapper functions that discards the CONST_INT spill offset. These
2883 exist so that we can give gr_spill/gr_fill the offset they need and
2884 use a consistent function interface. */
2887 gen_movdi_x (rtx dest
, rtx src
, rtx offset ATTRIBUTE_UNUSED
)
2889 return gen_movdi (dest
, src
);
2893 gen_fr_spill_x (rtx dest
, rtx src
, rtx offset ATTRIBUTE_UNUSED
)
2895 return gen_fr_spill (dest
, src
);
2899 gen_fr_restore_x (rtx dest
, rtx src
, rtx offset ATTRIBUTE_UNUSED
)
2901 return gen_fr_restore (dest
, src
);
2904 /* Called after register allocation to add any instructions needed for the
2905 prologue. Using a prologue insn is favored compared to putting all of the
2906 instructions in output_function_prologue(), since it allows the scheduler
2907 to intermix instructions with the saves of the caller saved registers. In
2908 some cases, it might be necessary to emit a barrier instruction as the last
2909 insn to prevent such scheduling.
2911 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2912 so that the debug info generation code can handle them properly.
2914 The register save area is layed out like so:
2916 [ varargs spill area ]
2917 [ fr register spill area ]
2918 [ br register spill area ]
2919 [ ar register spill area ]
2920 [ pr register spill area ]
2921 [ gr register spill area ] */
2923 /* ??? Get inefficient code when the frame size is larger than can fit in an
2924 adds instruction. */
2927 ia64_expand_prologue (void)
2929 rtx insn
, ar_pfs_save_reg
, ar_unat_save_reg
;
2930 int i
, epilogue_p
, regno
, alt_regno
, cfa_off
, n_varargs
;
2933 ia64_compute_frame_size (get_frame_size ());
2934 last_scratch_gr_reg
= 15;
2938 fprintf (dump_file
, "ia64 frame related registers "
2939 "recorded in current_frame_info.r[]:\n");
2940 #define PRINTREG(a) if (current_frame_info.r[a]) \
2941 fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
2943 PRINTREG(reg_save_b0
);
2944 PRINTREG(reg_save_pr
);
2945 PRINTREG(reg_save_ar_pfs
);
2946 PRINTREG(reg_save_ar_unat
);
2947 PRINTREG(reg_save_ar_lc
);
2948 PRINTREG(reg_save_gp
);
2952 /* If there is no epilogue, then we don't need some prologue insns.
2953 We need to avoid emitting the dead prologue insns, because flow
2954 will complain about them. */
2960 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
2961 if ((e
->flags
& EDGE_FAKE
) == 0
2962 && (e
->flags
& EDGE_FALLTHRU
) != 0)
2964 epilogue_p
= (e
!= NULL
);
2969 /* Set the local, input, and output register names. We need to do this
2970 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2971 half. If we use in/loc/out register names, then we get assembler errors
2972 in crtn.S because there is no alloc insn or regstk directive in there. */
2973 if (! TARGET_REG_NAMES
)
2975 int inputs
= current_frame_info
.n_input_regs
;
2976 int locals
= current_frame_info
.n_local_regs
;
2977 int outputs
= current_frame_info
.n_output_regs
;
2979 for (i
= 0; i
< inputs
; i
++)
2980 reg_names
[IN_REG (i
)] = ia64_reg_numbers
[i
];
2981 for (i
= 0; i
< locals
; i
++)
2982 reg_names
[LOC_REG (i
)] = ia64_reg_numbers
[inputs
+ i
];
2983 for (i
= 0; i
< outputs
; i
++)
2984 reg_names
[OUT_REG (i
)] = ia64_reg_numbers
[inputs
+ locals
+ i
];
2987 /* Set the frame pointer register name. The regnum is logically loc79,
2988 but of course we'll not have allocated that many locals. Rather than
2989 worrying about renumbering the existing rtxs, we adjust the name. */
2990 /* ??? This code means that we can never use one local register when
2991 there is a frame pointer. loc79 gets wasted in this case, as it is
2992 renamed to a register that will never be used. See also the try_locals
2993 code in find_gr_spill. */
2994 if (current_frame_info
.r
[reg_fp
])
2996 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
2997 reg_names
[HARD_FRAME_POINTER_REGNUM
]
2998 = reg_names
[current_frame_info
.r
[reg_fp
]];
2999 reg_names
[current_frame_info
.r
[reg_fp
]] = tmp
;
3002 /* We don't need an alloc instruction if we've used no outputs or locals. */
3003 if (current_frame_info
.n_local_regs
== 0
3004 && current_frame_info
.n_output_regs
== 0
3005 && current_frame_info
.n_input_regs
<= crtl
->args
.info
.int_regs
3006 && !TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
))
3008 /* If there is no alloc, but there are input registers used, then we
3009 need a .regstk directive. */
3010 current_frame_info
.need_regstk
= (TARGET_REG_NAMES
!= 0);
3011 ar_pfs_save_reg
= NULL_RTX
;
3015 current_frame_info
.need_regstk
= 0;
3017 if (current_frame_info
.r
[reg_save_ar_pfs
])
3019 regno
= current_frame_info
.r
[reg_save_ar_pfs
];
3020 reg_emitted (reg_save_ar_pfs
);
3023 regno
= next_scratch_gr_reg ();
3024 ar_pfs_save_reg
= gen_rtx_REG (DImode
, regno
);
3026 insn
= emit_insn (gen_alloc (ar_pfs_save_reg
,
3027 GEN_INT (current_frame_info
.n_input_regs
),
3028 GEN_INT (current_frame_info
.n_local_regs
),
3029 GEN_INT (current_frame_info
.n_output_regs
),
3030 GEN_INT (current_frame_info
.n_rotate_regs
)));
3031 RTX_FRAME_RELATED_P (insn
) = (current_frame_info
.r
[reg_save_ar_pfs
] != 0);
3034 /* Set up frame pointer, stack pointer, and spill iterators. */
3036 n_varargs
= cfun
->machine
->n_varargs
;
3037 setup_spill_pointers (current_frame_info
.n_spilled
+ n_varargs
,
3038 stack_pointer_rtx
, 0);
3040 if (frame_pointer_needed
)
3042 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
3043 RTX_FRAME_RELATED_P (insn
) = 1;
3046 if (current_frame_info
.total_size
!= 0)
3048 rtx frame_size_rtx
= GEN_INT (- current_frame_info
.total_size
);
3051 if (satisfies_constraint_I (frame_size_rtx
))
3052 offset
= frame_size_rtx
;
3055 regno
= next_scratch_gr_reg ();
3056 offset
= gen_rtx_REG (DImode
, regno
);
3057 emit_move_insn (offset
, frame_size_rtx
);
3060 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
,
3061 stack_pointer_rtx
, offset
));
3063 if (! frame_pointer_needed
)
3065 RTX_FRAME_RELATED_P (insn
) = 1;
3066 if (GET_CODE (offset
) != CONST_INT
)
3069 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
3070 gen_rtx_SET (VOIDmode
,
3072 gen_rtx_PLUS (DImode
,
3079 /* ??? At this point we must generate a magic insn that appears to
3080 modify the stack pointer, the frame pointer, and all spill
3081 iterators. This would allow the most scheduling freedom. For
3082 now, just hard stop. */
3083 emit_insn (gen_blockage ());
3086 /* Must copy out ar.unat before doing any integer spills. */
3087 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
3089 if (current_frame_info
.r
[reg_save_ar_unat
])
3092 = gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_unat
]);
3093 reg_emitted (reg_save_ar_unat
);
3097 alt_regno
= next_scratch_gr_reg ();
3098 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
3099 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
3102 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
3103 insn
= emit_move_insn (ar_unat_save_reg
, reg
);
3104 RTX_FRAME_RELATED_P (insn
) = (current_frame_info
.r
[reg_save_ar_unat
] != 0);
3106 /* Even if we're not going to generate an epilogue, we still
3107 need to save the register so that EH works. */
3108 if (! epilogue_p
&& current_frame_info
.r
[reg_save_ar_unat
])
3109 emit_insn (gen_prologue_use (ar_unat_save_reg
));
3112 ar_unat_save_reg
= NULL_RTX
;
3114 /* Spill all varargs registers. Do this before spilling any GR registers,
3115 since we want the UNAT bits for the GR registers to override the UNAT
3116 bits from varargs, which we don't care about. */
3119 for (regno
= GR_ARG_FIRST
+ 7; n_varargs
> 0; --n_varargs
, --regno
)
3121 reg
= gen_rtx_REG (DImode
, regno
);
3122 do_spill (gen_gr_spill
, reg
, cfa_off
+= 8, NULL_RTX
);
3125 /* Locate the bottom of the register save area. */
3126 cfa_off
= (current_frame_info
.spill_cfa_off
3127 + current_frame_info
.spill_size
3128 + current_frame_info
.extra_spill_size
);
3130 /* Save the predicate register block either in a register or in memory. */
3131 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
3133 reg
= gen_rtx_REG (DImode
, PR_REG (0));
3134 if (current_frame_info
.r
[reg_save_pr
] != 0)
3136 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_pr
]);
3137 reg_emitted (reg_save_pr
);
3138 insn
= emit_move_insn (alt_reg
, reg
);
3140 /* ??? Denote pr spill/fill by a DImode move that modifies all
3141 64 hard registers. */
3142 RTX_FRAME_RELATED_P (insn
) = 1;
3144 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
3145 gen_rtx_SET (VOIDmode
, alt_reg
, reg
),
3148 /* Even if we're not going to generate an epilogue, we still
3149 need to save the register so that EH works. */
3151 emit_insn (gen_prologue_use (alt_reg
));
3155 alt_regno
= next_scratch_gr_reg ();
3156 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3157 insn
= emit_move_insn (alt_reg
, reg
);
3158 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
3163 /* Handle AR regs in numerical order. All of them get special handling. */
3164 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
)
3165 && current_frame_info
.r
[reg_save_ar_unat
] == 0)
3167 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
3168 do_spill (gen_movdi_x
, ar_unat_save_reg
, cfa_off
, reg
);
3172 /* The alloc insn already copied ar.pfs into a general register. The
3173 only thing we have to do now is copy that register to a stack slot
3174 if we'd not allocated a local register for the job. */
3175 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
)
3176 && current_frame_info
.r
[reg_save_ar_pfs
] == 0)
3178 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
3179 do_spill (gen_movdi_x
, ar_pfs_save_reg
, cfa_off
, reg
);
3183 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
3185 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
3186 if (current_frame_info
.r
[reg_save_ar_lc
] != 0)
3188 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_lc
]);
3189 reg_emitted (reg_save_ar_lc
);
3190 insn
= emit_move_insn (alt_reg
, reg
);
3191 RTX_FRAME_RELATED_P (insn
) = 1;
3193 /* Even if we're not going to generate an epilogue, we still
3194 need to save the register so that EH works. */
3196 emit_insn (gen_prologue_use (alt_reg
));
3200 alt_regno
= next_scratch_gr_reg ();
3201 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3202 emit_move_insn (alt_reg
, reg
);
3203 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
3208 /* Save the return pointer. */
3209 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
3211 reg
= gen_rtx_REG (DImode
, BR_REG (0));
3212 if (current_frame_info
.r
[reg_save_b0
] != 0)
3214 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_b0
]);
3215 reg_emitted (reg_save_b0
);
3216 insn
= emit_move_insn (alt_reg
, reg
);
3217 RTX_FRAME_RELATED_P (insn
) = 1;
3219 /* Even if we're not going to generate an epilogue, we still
3220 need to save the register so that EH works. */
3222 emit_insn (gen_prologue_use (alt_reg
));
3226 alt_regno
= next_scratch_gr_reg ();
3227 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3228 emit_move_insn (alt_reg
, reg
);
3229 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
3234 if (current_frame_info
.r
[reg_save_gp
])
3236 reg_emitted (reg_save_gp
);
3237 insn
= emit_move_insn (gen_rtx_REG (DImode
,
3238 current_frame_info
.r
[reg_save_gp
]),
3239 pic_offset_table_rtx
);
3242 /* We should now be at the base of the gr/br/fr spill area. */
3243 gcc_assert (cfa_off
== (current_frame_info
.spill_cfa_off
3244 + current_frame_info
.spill_size
));
3246 /* Spill all general registers. */
3247 for (regno
= GR_REG (1); regno
<= GR_REG (31); ++regno
)
3248 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3250 reg
= gen_rtx_REG (DImode
, regno
);
3251 do_spill (gen_gr_spill
, reg
, cfa_off
, reg
);
3255 /* Spill the rest of the BR registers. */
3256 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
3257 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3259 alt_regno
= next_scratch_gr_reg ();
3260 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3261 reg
= gen_rtx_REG (DImode
, regno
);
3262 emit_move_insn (alt_reg
, reg
);
3263 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
3267 /* Align the frame and spill all FR registers. */
3268 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
3269 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3271 gcc_assert (!(cfa_off
& 15));
3272 reg
= gen_rtx_REG (XFmode
, regno
);
3273 do_spill (gen_fr_spill_x
, reg
, cfa_off
, reg
);
3277 gcc_assert (cfa_off
== current_frame_info
.spill_cfa_off
);
3279 finish_spill_pointers ();
3282 /* Called after register allocation to add any instructions needed for the
3283 epilogue. Using an epilogue insn is favored compared to putting all of the
3284 instructions in output_function_prologue(), since it allows the scheduler
3285 to intermix instructions with the saves of the caller saved registers. In
3286 some cases, it might be necessary to emit a barrier instruction as the last
3287 insn to prevent such scheduling. */
3290 ia64_expand_epilogue (int sibcall_p
)
3292 rtx insn
, reg
, alt_reg
, ar_unat_save_reg
;
3293 int regno
, alt_regno
, cfa_off
;
3295 ia64_compute_frame_size (get_frame_size ());
3297 /* If there is a frame pointer, then we use it instead of the stack
3298 pointer, so that the stack pointer does not need to be valid when
3299 the epilogue starts. See EXIT_IGNORE_STACK. */
3300 if (frame_pointer_needed
)
3301 setup_spill_pointers (current_frame_info
.n_spilled
,
3302 hard_frame_pointer_rtx
, 0);
3304 setup_spill_pointers (current_frame_info
.n_spilled
, stack_pointer_rtx
,
3305 current_frame_info
.total_size
);
3307 if (current_frame_info
.total_size
!= 0)
3309 /* ??? At this point we must generate a magic insn that appears to
3310 modify the spill iterators and the frame pointer. This would
3311 allow the most scheduling freedom. For now, just hard stop. */
3312 emit_insn (gen_blockage ());
3315 /* Locate the bottom of the register save area. */
3316 cfa_off
= (current_frame_info
.spill_cfa_off
3317 + current_frame_info
.spill_size
3318 + current_frame_info
.extra_spill_size
);
3320 /* Restore the predicate registers. */
3321 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
3323 if (current_frame_info
.r
[reg_save_pr
] != 0)
3325 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_pr
]);
3326 reg_emitted (reg_save_pr
);
3330 alt_regno
= next_scratch_gr_reg ();
3331 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3332 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
3335 reg
= gen_rtx_REG (DImode
, PR_REG (0));
3336 emit_move_insn (reg
, alt_reg
);
3339 /* Restore the application registers. */
3341 /* Load the saved unat from the stack, but do not restore it until
3342 after the GRs have been restored. */
3343 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
3345 if (current_frame_info
.r
[reg_save_ar_unat
] != 0)
3348 = gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_unat
]);
3349 reg_emitted (reg_save_ar_unat
);
3353 alt_regno
= next_scratch_gr_reg ();
3354 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
3355 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
3356 do_restore (gen_movdi_x
, ar_unat_save_reg
, cfa_off
);
3361 ar_unat_save_reg
= NULL_RTX
;
3363 if (current_frame_info
.r
[reg_save_ar_pfs
] != 0)
3365 reg_emitted (reg_save_ar_pfs
);
3366 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_pfs
]);
3367 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
3368 emit_move_insn (reg
, alt_reg
);
3370 else if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
))
3372 alt_regno
= next_scratch_gr_reg ();
3373 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3374 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
3376 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
3377 emit_move_insn (reg
, alt_reg
);
3380 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
3382 if (current_frame_info
.r
[reg_save_ar_lc
] != 0)
3384 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_lc
]);
3385 reg_emitted (reg_save_ar_lc
);
3389 alt_regno
= next_scratch_gr_reg ();
3390 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3391 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
3394 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
3395 emit_move_insn (reg
, alt_reg
);
3398 /* Restore the return pointer. */
3399 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
3401 if (current_frame_info
.r
[reg_save_b0
] != 0)
3403 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_b0
]);
3404 reg_emitted (reg_save_b0
);
3408 alt_regno
= next_scratch_gr_reg ();
3409 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3410 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
3413 reg
= gen_rtx_REG (DImode
, BR_REG (0));
3414 emit_move_insn (reg
, alt_reg
);
3417 /* We should now be at the base of the gr/br/fr spill area. */
3418 gcc_assert (cfa_off
== (current_frame_info
.spill_cfa_off
3419 + current_frame_info
.spill_size
));
3421 /* The GP may be stored on the stack in the prologue, but it's
3422 never restored in the epilogue. Skip the stack slot. */
3423 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, GR_REG (1)))
3426 /* Restore all general registers. */
3427 for (regno
= GR_REG (2); regno
<= GR_REG (31); ++regno
)
3428 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3430 reg
= gen_rtx_REG (DImode
, regno
);
3431 do_restore (gen_gr_restore
, reg
, cfa_off
);
3435 /* Restore the branch registers. */
3436 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
3437 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3439 alt_regno
= next_scratch_gr_reg ();
3440 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3441 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
3443 reg
= gen_rtx_REG (DImode
, regno
);
3444 emit_move_insn (reg
, alt_reg
);
3447 /* Restore floating point registers. */
3448 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
3449 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3451 gcc_assert (!(cfa_off
& 15));
3452 reg
= gen_rtx_REG (XFmode
, regno
);
3453 do_restore (gen_fr_restore_x
, reg
, cfa_off
);
3457 /* Restore ar.unat for real. */
3458 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
3460 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
3461 emit_move_insn (reg
, ar_unat_save_reg
);
3464 gcc_assert (cfa_off
== current_frame_info
.spill_cfa_off
);
3466 finish_spill_pointers ();
3468 if (current_frame_info
.total_size
3469 || cfun
->machine
->ia64_eh_epilogue_sp
3470 || frame_pointer_needed
)
3472 /* ??? At this point we must generate a magic insn that appears to
3473 modify the spill iterators, the stack pointer, and the frame
3474 pointer. This would allow the most scheduling freedom. For now,
3476 emit_insn (gen_blockage ());
3479 if (cfun
->machine
->ia64_eh_epilogue_sp
)
3480 emit_move_insn (stack_pointer_rtx
, cfun
->machine
->ia64_eh_epilogue_sp
);
3481 else if (frame_pointer_needed
)
3483 insn
= emit_move_insn (stack_pointer_rtx
, hard_frame_pointer_rtx
);
3484 RTX_FRAME_RELATED_P (insn
) = 1;
3486 else if (current_frame_info
.total_size
)
3488 rtx offset
, frame_size_rtx
;
3490 frame_size_rtx
= GEN_INT (current_frame_info
.total_size
);
3491 if (satisfies_constraint_I (frame_size_rtx
))
3492 offset
= frame_size_rtx
;
3495 regno
= next_scratch_gr_reg ();
3496 offset
= gen_rtx_REG (DImode
, regno
);
3497 emit_move_insn (offset
, frame_size_rtx
);
3500 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
, stack_pointer_rtx
,
3503 RTX_FRAME_RELATED_P (insn
) = 1;
3504 if (GET_CODE (offset
) != CONST_INT
)
3507 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
3508 gen_rtx_SET (VOIDmode
,
3510 gen_rtx_PLUS (DImode
,
3517 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
3518 emit_insn (gen_set_bsp (cfun
->machine
->ia64_eh_epilogue_bsp
));
3521 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode
, BR_REG (0))));
3524 int fp
= GR_REG (2);
3525 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
3526 first available call clobbered register. If there was a frame_pointer
3527 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
3528 so we have to make sure we're using the string "r2" when emitting
3529 the register name for the assembler. */
3530 if (current_frame_info
.r
[reg_fp
]
3531 && current_frame_info
.r
[reg_fp
] == GR_REG (2))
3532 fp
= HARD_FRAME_POINTER_REGNUM
;
3534 /* We must emit an alloc to force the input registers to become output
3535 registers. Otherwise, if the callee tries to pass its parameters
3536 through to another call without an intervening alloc, then these
3538 /* ??? We don't need to preserve all input registers. We only need to
3539 preserve those input registers used as arguments to the sibling call.
3540 It is unclear how to compute that number here. */
3541 if (current_frame_info
.n_input_regs
!= 0)
3543 rtx n_inputs
= GEN_INT (current_frame_info
.n_input_regs
);
3544 insn
= emit_insn (gen_alloc (gen_rtx_REG (DImode
, fp
),
3545 const0_rtx
, const0_rtx
,
3546 n_inputs
, const0_rtx
));
3547 RTX_FRAME_RELATED_P (insn
) = 1;
3552 /* Return 1 if br.ret can do all the work required to return from a
3556 ia64_direct_return (void)
3558 if (reload_completed
&& ! frame_pointer_needed
)
3560 ia64_compute_frame_size (get_frame_size ());
3562 return (current_frame_info
.total_size
== 0
3563 && current_frame_info
.n_spilled
== 0
3564 && current_frame_info
.r
[reg_save_b0
] == 0
3565 && current_frame_info
.r
[reg_save_pr
] == 0
3566 && current_frame_info
.r
[reg_save_ar_pfs
] == 0
3567 && current_frame_info
.r
[reg_save_ar_unat
] == 0
3568 && current_frame_info
.r
[reg_save_ar_lc
] == 0);
3573 /* Return the magic cookie that we use to hold the return address
3574 during early compilation. */
3577 ia64_return_addr_rtx (HOST_WIDE_INT count
, rtx frame ATTRIBUTE_UNUSED
)
3581 return gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_RET_ADDR
);
3584 /* Split this value after reload, now that we know where the return
3585 address is saved. */
3588 ia64_split_return_addr_rtx (rtx dest
)
3592 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
3594 if (current_frame_info
.r
[reg_save_b0
] != 0)
3596 src
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_b0
]);
3597 reg_emitted (reg_save_b0
);
3605 /* Compute offset from CFA for BR0. */
3606 /* ??? Must be kept in sync with ia64_expand_prologue. */
3607 off
= (current_frame_info
.spill_cfa_off
3608 + current_frame_info
.spill_size
);
3609 for (regno
= GR_REG (1); regno
<= GR_REG (31); ++regno
)
3610 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3613 /* Convert CFA offset to a register based offset. */
3614 if (frame_pointer_needed
)
3615 src
= hard_frame_pointer_rtx
;
3618 src
= stack_pointer_rtx
;
3619 off
+= current_frame_info
.total_size
;
3622 /* Load address into scratch register. */
3623 off_r
= GEN_INT (off
);
3624 if (satisfies_constraint_I (off_r
))
3625 emit_insn (gen_adddi3 (dest
, src
, off_r
));
3628 emit_move_insn (dest
, off_r
);
3629 emit_insn (gen_adddi3 (dest
, src
, dest
));
3632 src
= gen_rtx_MEM (Pmode
, dest
);
3636 src
= gen_rtx_REG (DImode
, BR_REG (0));
3638 emit_move_insn (dest
, src
);
3642 ia64_hard_regno_rename_ok (int from
, int to
)
3644 /* Don't clobber any of the registers we reserved for the prologue. */
3645 enum ia64_frame_regs r
;
3647 for (r
= reg_fp
; r
<= reg_save_ar_lc
; r
++)
3648 if (to
== current_frame_info
.r
[r
]
3649 || from
== current_frame_info
.r
[r
]
3650 || to
== emitted_frame_related_regs
[r
]
3651 || from
== emitted_frame_related_regs
[r
])
3654 /* Don't use output registers outside the register frame. */
3655 if (OUT_REGNO_P (to
) && to
>= OUT_REG (current_frame_info
.n_output_regs
))
3658 /* Retain even/oddness on predicate register pairs. */
3659 if (PR_REGNO_P (from
) && PR_REGNO_P (to
))
3660 return (from
& 1) == (to
& 1);
3665 /* Target hook for assembling integer objects. Handle word-sized
3666 aligned objects and detect the cases when @fptr is needed. */
3669 ia64_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
3671 if (size
== POINTER_SIZE
/ BITS_PER_UNIT
3672 && !(TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
3673 && GET_CODE (x
) == SYMBOL_REF
3674 && SYMBOL_REF_FUNCTION_P (x
))
3676 static const char * const directive
[2][2] = {
3677 /* 64-bit pointer */ /* 32-bit pointer */
3678 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */
3679 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */
3681 fputs (directive
[(aligned_p
!= 0)][POINTER_SIZE
== 32], asm_out_file
);
3682 output_addr_const (asm_out_file
, x
);
3683 fputs (")\n", asm_out_file
);
3686 return default_assemble_integer (x
, size
, aligned_p
);
3689 /* Emit the function prologue. */
3692 ia64_output_function_prologue (FILE *file
, HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
3694 int mask
, grsave
, grsave_prev
;
3696 if (current_frame_info
.need_regstk
)
3697 fprintf (file
, "\t.regstk %d, %d, %d, %d\n",
3698 current_frame_info
.n_input_regs
,
3699 current_frame_info
.n_local_regs
,
3700 current_frame_info
.n_output_regs
,
3701 current_frame_info
.n_rotate_regs
);
3703 if (!flag_unwind_tables
&& (!flag_exceptions
|| USING_SJLJ_EXCEPTIONS
))
3706 /* Emit the .prologue directive. */
3709 grsave
= grsave_prev
= 0;
3710 if (current_frame_info
.r
[reg_save_b0
] != 0)
3713 grsave
= grsave_prev
= current_frame_info
.r
[reg_save_b0
];
3715 if (current_frame_info
.r
[reg_save_ar_pfs
] != 0
3716 && (grsave_prev
== 0
3717 || current_frame_info
.r
[reg_save_ar_pfs
] == grsave_prev
+ 1))
3720 if (grsave_prev
== 0)
3721 grsave
= current_frame_info
.r
[reg_save_ar_pfs
];
3722 grsave_prev
= current_frame_info
.r
[reg_save_ar_pfs
];
3724 if (current_frame_info
.r
[reg_fp
] != 0
3725 && (grsave_prev
== 0
3726 || current_frame_info
.r
[reg_fp
] == grsave_prev
+ 1))
3729 if (grsave_prev
== 0)
3730 grsave
= HARD_FRAME_POINTER_REGNUM
;
3731 grsave_prev
= current_frame_info
.r
[reg_fp
];
3733 if (current_frame_info
.r
[reg_save_pr
] != 0
3734 && (grsave_prev
== 0
3735 || current_frame_info
.r
[reg_save_pr
] == grsave_prev
+ 1))
3738 if (grsave_prev
== 0)
3739 grsave
= current_frame_info
.r
[reg_save_pr
];
3742 if (mask
&& TARGET_GNU_AS
)
3743 fprintf (file
, "\t.prologue %d, %d\n", mask
,
3744 ia64_dbx_register_number (grsave
));
3746 fputs ("\t.prologue\n", file
);
3748 /* Emit a .spill directive, if necessary, to relocate the base of
3749 the register spill area. */
3750 if (current_frame_info
.spill_cfa_off
!= -16)
3751 fprintf (file
, "\t.spill %ld\n",
3752 (long) (current_frame_info
.spill_cfa_off
3753 + current_frame_info
.spill_size
));
3756 /* Emit the .body directive at the scheduled end of the prologue. */
3759 ia64_output_function_end_prologue (FILE *file
)
3761 if (!flag_unwind_tables
&& (!flag_exceptions
|| USING_SJLJ_EXCEPTIONS
))
3764 fputs ("\t.body\n", file
);
3767 /* Emit the function epilogue. */
3770 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
3771 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
3775 if (current_frame_info
.r
[reg_fp
])
3777 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
3778 reg_names
[HARD_FRAME_POINTER_REGNUM
]
3779 = reg_names
[current_frame_info
.r
[reg_fp
]];
3780 reg_names
[current_frame_info
.r
[reg_fp
]] = tmp
;
3781 reg_emitted (reg_fp
);
3783 if (! TARGET_REG_NAMES
)
3785 for (i
= 0; i
< current_frame_info
.n_input_regs
; i
++)
3786 reg_names
[IN_REG (i
)] = ia64_input_reg_names
[i
];
3787 for (i
= 0; i
< current_frame_info
.n_local_regs
; i
++)
3788 reg_names
[LOC_REG (i
)] = ia64_local_reg_names
[i
];
3789 for (i
= 0; i
< current_frame_info
.n_output_regs
; i
++)
3790 reg_names
[OUT_REG (i
)] = ia64_output_reg_names
[i
];
3793 current_frame_info
.initialized
= 0;
3797 ia64_dbx_register_number (int regno
)
3799 /* In ia64_expand_prologue we quite literally renamed the frame pointer
3800 from its home at loc79 to something inside the register frame. We
3801 must perform the same renumbering here for the debug info. */
3802 if (current_frame_info
.r
[reg_fp
])
3804 if (regno
== HARD_FRAME_POINTER_REGNUM
)
3805 regno
= current_frame_info
.r
[reg_fp
];
3806 else if (regno
== current_frame_info
.r
[reg_fp
])
3807 regno
= HARD_FRAME_POINTER_REGNUM
;
3810 if (IN_REGNO_P (regno
))
3811 return 32 + regno
- IN_REG (0);
3812 else if (LOC_REGNO_P (regno
))
3813 return 32 + current_frame_info
.n_input_regs
+ regno
- LOC_REG (0);
3814 else if (OUT_REGNO_P (regno
))
3815 return (32 + current_frame_info
.n_input_regs
3816 + current_frame_info
.n_local_regs
+ regno
- OUT_REG (0));
3822 ia64_initialize_trampoline (rtx addr
, rtx fnaddr
, rtx static_chain
)
3824 rtx addr_reg
, eight
= GEN_INT (8);
3826 /* The Intel assembler requires that the global __ia64_trampoline symbol
3827 be declared explicitly */
3830 static bool declared_ia64_trampoline
= false;
3832 if (!declared_ia64_trampoline
)
3834 declared_ia64_trampoline
= true;
3835 (*targetm
.asm_out
.globalize_label
) (asm_out_file
,
3836 "__ia64_trampoline");
3840 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
3841 addr
= convert_memory_address (Pmode
, addr
);
3842 fnaddr
= convert_memory_address (Pmode
, fnaddr
);
3843 static_chain
= convert_memory_address (Pmode
, static_chain
);
3845 /* Load up our iterator. */
3846 addr_reg
= gen_reg_rtx (Pmode
);
3847 emit_move_insn (addr_reg
, addr
);
3849 /* The first two words are the fake descriptor:
3850 __ia64_trampoline, ADDR+16. */
3851 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
),
3852 gen_rtx_SYMBOL_REF (Pmode
, "__ia64_trampoline"));
3853 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
3855 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
),
3856 copy_to_reg (plus_constant (addr
, 16)));
3857 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
3859 /* The third word is the target descriptor. */
3860 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
), fnaddr
);
3861 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
3863 /* The fourth word is the static chain. */
3864 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
), static_chain
);
3867 /* Do any needed setup for a variadic function. CUM has not been updated
3868 for the last named argument which has type TYPE and mode MODE.
3870 We generate the actual spill instructions during prologue generation. */
3873 ia64_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3874 tree type
, int * pretend_size
,
3875 int second_time ATTRIBUTE_UNUSED
)
3877 CUMULATIVE_ARGS next_cum
= *cum
;
3879 /* Skip the current argument. */
3880 ia64_function_arg_advance (&next_cum
, mode
, type
, 1);
3882 if (next_cum
.words
< MAX_ARGUMENT_SLOTS
)
3884 int n
= MAX_ARGUMENT_SLOTS
- next_cum
.words
;
3885 *pretend_size
= n
* UNITS_PER_WORD
;
3886 cfun
->machine
->n_varargs
= n
;
3890 /* Check whether TYPE is a homogeneous floating point aggregate. If
3891 it is, return the mode of the floating point type that appears
3892 in all leafs. If it is not, return VOIDmode.
3894 An aggregate is a homogeneous floating point aggregate is if all
3895 fields/elements in it have the same floating point type (e.g,
3896 SFmode). 128-bit quad-precision floats are excluded.
3898 Variable sized aggregates should never arrive here, since we should
3899 have already decided to pass them by reference. Top-level zero-sized
3900 aggregates are excluded because our parallels crash the middle-end. */
3902 static enum machine_mode
3903 hfa_element_mode (const_tree type
, bool nested
)
3905 enum machine_mode element_mode
= VOIDmode
;
3906 enum machine_mode mode
;
3907 enum tree_code code
= TREE_CODE (type
);
3908 int know_element_mode
= 0;
3911 if (!nested
&& (!TYPE_SIZE (type
) || integer_zerop (TYPE_SIZE (type
))))
3916 case VOID_TYPE
: case INTEGER_TYPE
: case ENUMERAL_TYPE
:
3917 case BOOLEAN_TYPE
: case POINTER_TYPE
:
3918 case OFFSET_TYPE
: case REFERENCE_TYPE
: case METHOD_TYPE
:
3919 case LANG_TYPE
: case FUNCTION_TYPE
:
3922 /* Fortran complex types are supposed to be HFAs, so we need to handle
3923 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
3926 if (GET_MODE_CLASS (TYPE_MODE (type
)) == MODE_COMPLEX_FLOAT
3927 && TYPE_MODE (type
) != TCmode
)
3928 return GET_MODE_INNER (TYPE_MODE (type
));
3933 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
3934 mode if this is contained within an aggregate. */
3935 if (nested
&& TYPE_MODE (type
) != TFmode
)
3936 return TYPE_MODE (type
);
3941 return hfa_element_mode (TREE_TYPE (type
), 1);
3945 case QUAL_UNION_TYPE
:
3946 for (t
= TYPE_FIELDS (type
); t
; t
= TREE_CHAIN (t
))
3948 if (TREE_CODE (t
) != FIELD_DECL
)
3951 mode
= hfa_element_mode (TREE_TYPE (t
), 1);
3952 if (know_element_mode
)
3954 if (mode
!= element_mode
)
3957 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
)
3961 know_element_mode
= 1;
3962 element_mode
= mode
;
3965 return element_mode
;
3968 /* If we reach here, we probably have some front-end specific type
3969 that the backend doesn't know about. This can happen via the
3970 aggregate_value_p call in init_function_start. All we can do is
3971 ignore unknown tree types. */
3978 /* Return the number of words required to hold a quantity of TYPE and MODE
3979 when passed as an argument. */
3981 ia64_function_arg_words (tree type
, enum machine_mode mode
)
3985 if (mode
== BLKmode
)
3986 words
= int_size_in_bytes (type
);
3988 words
= GET_MODE_SIZE (mode
);
3990 return (words
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
; /* round up */
3993 /* Return the number of registers that should be skipped so the current
3994 argument (described by TYPE and WORDS) will be properly aligned.
3996 Integer and float arguments larger than 8 bytes start at the next
3997 even boundary. Aggregates larger than 8 bytes start at the next
3998 even boundary if the aggregate has 16 byte alignment. Note that
3999 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
4000 but are still to be aligned in registers.
4002 ??? The ABI does not specify how to handle aggregates with
4003 alignment from 9 to 15 bytes, or greater than 16. We handle them
4004 all as if they had 16 byte alignment. Such aggregates can occur
4005 only if gcc extensions are used. */
4007 ia64_function_arg_offset (CUMULATIVE_ARGS
*cum
, tree type
, int words
)
4009 if ((cum
->words
& 1) == 0)
4013 && TREE_CODE (type
) != INTEGER_TYPE
4014 && TREE_CODE (type
) != REAL_TYPE
)
4015 return TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
;
4020 /* Return rtx for register where argument is passed, or zero if it is passed
4022 /* ??? 128-bit quad-precision floats are always passed in general
4026 ia64_function_arg (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
, tree type
,
4027 int named
, int incoming
)
4029 int basereg
= (incoming
? GR_ARG_FIRST
: AR_ARG_FIRST
);
4030 int words
= ia64_function_arg_words (type
, mode
);
4031 int offset
= ia64_function_arg_offset (cum
, type
, words
);
4032 enum machine_mode hfa_mode
= VOIDmode
;
4034 /* If all argument slots are used, then it must go on the stack. */
4035 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
4038 /* Check for and handle homogeneous FP aggregates. */
4040 hfa_mode
= hfa_element_mode (type
, 0);
4042 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4043 and unprototyped hfas are passed specially. */
4044 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
4048 int fp_regs
= cum
->fp_regs
;
4049 int int_regs
= cum
->words
+ offset
;
4050 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
4054 /* If prototyped, pass it in FR regs then GR regs.
4055 If not prototyped, pass it in both FR and GR regs.
4057 If this is an SFmode aggregate, then it is possible to run out of
4058 FR regs while GR regs are still left. In that case, we pass the
4059 remaining part in the GR regs. */
4061 /* Fill the FP regs. We do this always. We stop if we reach the end
4062 of the argument, the last FP register, or the last argument slot. */
4064 byte_size
= ((mode
== BLKmode
)
4065 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
4066 args_byte_size
= int_regs
* UNITS_PER_WORD
;
4068 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
4069 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
)); i
++)
4071 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
4072 gen_rtx_REG (hfa_mode
, (FR_ARG_FIRST
4076 args_byte_size
+= hfa_size
;
4080 /* If no prototype, then the whole thing must go in GR regs. */
4081 if (! cum
->prototype
)
4083 /* If this is an SFmode aggregate, then we might have some left over
4084 that needs to go in GR regs. */
4085 else if (byte_size
!= offset
)
4086 int_regs
+= offset
/ UNITS_PER_WORD
;
4088 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
4090 for (; offset
< byte_size
&& int_regs
< MAX_ARGUMENT_SLOTS
; i
++)
4092 enum machine_mode gr_mode
= DImode
;
4093 unsigned int gr_size
;
4095 /* If we have an odd 4 byte hunk because we ran out of FR regs,
4096 then this goes in a GR reg left adjusted/little endian, right
4097 adjusted/big endian. */
4098 /* ??? Currently this is handled wrong, because 4-byte hunks are
4099 always right adjusted/little endian. */
4102 /* If we have an even 4 byte hunk because the aggregate is a
4103 multiple of 4 bytes in size, then this goes in a GR reg right
4104 adjusted/little endian. */
4105 else if (byte_size
- offset
== 4)
4108 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
4109 gen_rtx_REG (gr_mode
, (basereg
4113 gr_size
= GET_MODE_SIZE (gr_mode
);
4115 if (gr_size
== UNITS_PER_WORD
4116 || (gr_size
< UNITS_PER_WORD
&& offset
% UNITS_PER_WORD
== 0))
4118 else if (gr_size
> UNITS_PER_WORD
)
4119 int_regs
+= gr_size
/ UNITS_PER_WORD
;
4121 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
4124 /* Integral and aggregates go in general registers. If we have run out of
4125 FR registers, then FP values must also go in general registers. This can
4126 happen when we have a SFmode HFA. */
4127 else if (mode
== TFmode
|| mode
== TCmode
4128 || (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
))
4130 int byte_size
= ((mode
== BLKmode
)
4131 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
4132 if (BYTES_BIG_ENDIAN
4133 && (mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
4134 && byte_size
< UNITS_PER_WORD
4137 rtx gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
4138 gen_rtx_REG (DImode
,
4139 (basereg
+ cum
->words
4142 return gen_rtx_PARALLEL (mode
, gen_rtvec (1, gr_reg
));
4145 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
4149 /* If there is a prototype, then FP values go in a FR register when
4150 named, and in a GR register when unnamed. */
4151 else if (cum
->prototype
)
4154 return gen_rtx_REG (mode
, FR_ARG_FIRST
+ cum
->fp_regs
);
4155 /* In big-endian mode, an anonymous SFmode value must be represented
4156 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4157 the value into the high half of the general register. */
4158 else if (BYTES_BIG_ENDIAN
&& mode
== SFmode
)
4159 return gen_rtx_PARALLEL (mode
,
4161 gen_rtx_EXPR_LIST (VOIDmode
,
4162 gen_rtx_REG (DImode
, basereg
+ cum
->words
+ offset
),
4165 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
4167 /* If there is no prototype, then FP values go in both FR and GR
4171 /* See comment above. */
4172 enum machine_mode inner_mode
=
4173 (BYTES_BIG_ENDIAN
&& mode
== SFmode
) ? DImode
: mode
;
4175 rtx fp_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
4176 gen_rtx_REG (mode
, (FR_ARG_FIRST
4179 rtx gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
4180 gen_rtx_REG (inner_mode
,
4181 (basereg
+ cum
->words
4185 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, fp_reg
, gr_reg
));
4189 /* Return number of bytes, at the beginning of the argument, that must be
4190 put in registers. 0 is the argument is entirely in registers or entirely
4194 ia64_arg_partial_bytes (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4195 tree type
, bool named ATTRIBUTE_UNUSED
)
4197 int words
= ia64_function_arg_words (type
, mode
);
4198 int offset
= ia64_function_arg_offset (cum
, type
, words
);
4200 /* If all argument slots are used, then it must go on the stack. */
4201 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
4204 /* It doesn't matter whether the argument goes in FR or GR regs. If
4205 it fits within the 8 argument slots, then it goes entirely in
4206 registers. If it extends past the last argument slot, then the rest
4207 goes on the stack. */
4209 if (words
+ cum
->words
+ offset
<= MAX_ARGUMENT_SLOTS
)
4212 return (MAX_ARGUMENT_SLOTS
- cum
->words
- offset
) * UNITS_PER_WORD
;
4215 /* Update CUM to point after this argument. This is patterned after
4216 ia64_function_arg. */
4219 ia64_function_arg_advance (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4220 tree type
, int named
)
4222 int words
= ia64_function_arg_words (type
, mode
);
4223 int offset
= ia64_function_arg_offset (cum
, type
, words
);
4224 enum machine_mode hfa_mode
= VOIDmode
;
4226 /* If all arg slots are already full, then there is nothing to do. */
4227 if (cum
->words
>= MAX_ARGUMENT_SLOTS
)
4230 cum
->words
+= words
+ offset
;
4232 /* Check for and handle homogeneous FP aggregates. */
4234 hfa_mode
= hfa_element_mode (type
, 0);
4236 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4237 and unprototyped hfas are passed specially. */
4238 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
4240 int fp_regs
= cum
->fp_regs
;
4241 /* This is the original value of cum->words + offset. */
4242 int int_regs
= cum
->words
- words
;
4243 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
4247 /* If prototyped, pass it in FR regs then GR regs.
4248 If not prototyped, pass it in both FR and GR regs.
4250 If this is an SFmode aggregate, then it is possible to run out of
4251 FR regs while GR regs are still left. In that case, we pass the
4252 remaining part in the GR regs. */
4254 /* Fill the FP regs. We do this always. We stop if we reach the end
4255 of the argument, the last FP register, or the last argument slot. */
4257 byte_size
= ((mode
== BLKmode
)
4258 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
4259 args_byte_size
= int_regs
* UNITS_PER_WORD
;
4261 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
4262 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
));)
4265 args_byte_size
+= hfa_size
;
4269 cum
->fp_regs
= fp_regs
;
4272 /* Integral and aggregates go in general registers. So do TFmode FP values.
4273 If we have run out of FR registers, then other FP values must also go in
4274 general registers. This can happen when we have a SFmode HFA. */
4275 else if (mode
== TFmode
|| mode
== TCmode
4276 || (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
))
4277 cum
->int_regs
= cum
->words
;
4279 /* If there is a prototype, then FP values go in a FR register when
4280 named, and in a GR register when unnamed. */
4281 else if (cum
->prototype
)
4284 cum
->int_regs
= cum
->words
;
4286 /* ??? Complex types should not reach here. */
4287 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
4289 /* If there is no prototype, then FP values go in both FR and GR
4293 /* ??? Complex types should not reach here. */
4294 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
4295 cum
->int_regs
= cum
->words
;
4299 /* Arguments with alignment larger than 8 bytes start at the next even
4300 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary
4301 even though their normal alignment is 8 bytes. See ia64_function_arg. */
4304 ia64_function_arg_boundary (enum machine_mode mode
, tree type
)
4307 if (mode
== TFmode
&& TARGET_HPUX
&& TARGET_ILP32
)
4308 return PARM_BOUNDARY
* 2;
4312 if (TYPE_ALIGN (type
) > PARM_BOUNDARY
)
4313 return PARM_BOUNDARY
* 2;
4315 return PARM_BOUNDARY
;
4318 if (GET_MODE_BITSIZE (mode
) > PARM_BOUNDARY
)
4319 return PARM_BOUNDARY
* 2;
4321 return PARM_BOUNDARY
;
4324 /* True if it is OK to do sibling call optimization for the specified
4325 call expression EXP. DECL will be the called function, or NULL if
4326 this is an indirect call. */
4328 ia64_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
4330 /* We can't perform a sibcall if the current function has the syscall_linkage
4332 if (lookup_attribute ("syscall_linkage",
4333 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
4336 /* We must always return with our current GP. This means we can
4337 only sibcall to functions defined in the current module. */
4338 return decl
&& (*targetm
.binds_local_p
) (decl
);
4342 /* Implement va_arg. */
4345 ia64_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
4348 /* Variable sized types are passed by reference. */
4349 if (pass_by_reference (NULL
, TYPE_MODE (type
), type
, false))
4351 tree ptrtype
= build_pointer_type (type
);
4352 tree addr
= std_gimplify_va_arg_expr (valist
, ptrtype
, pre_p
, post_p
);
4353 return build_va_arg_indirect_ref (addr
);
4356 /* Aggregate arguments with alignment larger than 8 bytes start at
4357 the next even boundary. Integer and floating point arguments
4358 do so if they are larger than 8 bytes, whether or not they are
4359 also aligned larger than 8 bytes. */
4360 if ((TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == INTEGER_TYPE
)
4361 ? int_size_in_bytes (type
) > 8 : TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
4363 tree t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (valist
), valist
,
4364 size_int (2 * UNITS_PER_WORD
- 1));
4365 t
= fold_convert (sizetype
, t
);
4366 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
4367 size_int (-2 * UNITS_PER_WORD
));
4368 t
= fold_convert (TREE_TYPE (valist
), t
);
4369 gimplify_assign (unshare_expr (valist
), t
, pre_p
);
4372 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
4375 /* Return 1 if function return value returned in memory. Return 0 if it is
4379 ia64_return_in_memory (const_tree valtype
, const_tree fntype ATTRIBUTE_UNUSED
)
4381 enum machine_mode mode
;
4382 enum machine_mode hfa_mode
;
4383 HOST_WIDE_INT byte_size
;
4385 mode
= TYPE_MODE (valtype
);
4386 byte_size
= GET_MODE_SIZE (mode
);
4387 if (mode
== BLKmode
)
4389 byte_size
= int_size_in_bytes (valtype
);
4394 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
4396 hfa_mode
= hfa_element_mode (valtype
, 0);
4397 if (hfa_mode
!= VOIDmode
)
4399 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
4401 if (byte_size
/ hfa_size
> MAX_ARGUMENT_SLOTS
)
4406 else if (byte_size
> UNITS_PER_WORD
* MAX_INT_RETURN_SLOTS
)
4412 /* Return rtx for register that holds the function return value. */
4415 ia64_function_value (const_tree valtype
, const_tree func ATTRIBUTE_UNUSED
)
4417 enum machine_mode mode
;
4418 enum machine_mode hfa_mode
;
4420 mode
= TYPE_MODE (valtype
);
4421 hfa_mode
= hfa_element_mode (valtype
, 0);
4423 if (hfa_mode
!= VOIDmode
)
4431 hfa_size
= GET_MODE_SIZE (hfa_mode
);
4432 byte_size
= ((mode
== BLKmode
)
4433 ? int_size_in_bytes (valtype
) : GET_MODE_SIZE (mode
));
4435 for (i
= 0; offset
< byte_size
; i
++)
4437 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
4438 gen_rtx_REG (hfa_mode
, FR_ARG_FIRST
+ i
),
4442 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
4444 else if (FLOAT_TYPE_P (valtype
) && mode
!= TFmode
&& mode
!= TCmode
)
4445 return gen_rtx_REG (mode
, FR_ARG_FIRST
);
4448 bool need_parallel
= false;
4450 /* In big-endian mode, we need to manage the layout of aggregates
4451 in the registers so that we get the bits properly aligned in
4452 the highpart of the registers. */
4453 if (BYTES_BIG_ENDIAN
4454 && (mode
== BLKmode
|| (valtype
&& AGGREGATE_TYPE_P (valtype
))))
4455 need_parallel
= true;
4457 /* Something like struct S { long double x; char a[0] } is not an
4458 HFA structure, and therefore doesn't go in fp registers. But
4459 the middle-end will give it XFmode anyway, and XFmode values
4460 don't normally fit in integer registers. So we need to smuggle
4461 the value inside a parallel. */
4462 else if (mode
== XFmode
|| mode
== XCmode
|| mode
== RFmode
)
4463 need_parallel
= true;
4473 bytesize
= int_size_in_bytes (valtype
);
4474 /* An empty PARALLEL is invalid here, but the return value
4475 doesn't matter for empty structs. */
4477 return gen_rtx_REG (mode
, GR_RET_FIRST
);
4478 for (i
= 0; offset
< bytesize
; i
++)
4480 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
4481 gen_rtx_REG (DImode
,
4484 offset
+= UNITS_PER_WORD
;
4486 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
4489 return gen_rtx_REG (mode
, GR_RET_FIRST
);
4493 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
4494 We need to emit DTP-relative relocations. */
4497 ia64_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
4499 gcc_assert (size
== 4 || size
== 8);
4501 fputs ("\tdata4.ua\t@dtprel(", file
);
4503 fputs ("\tdata8.ua\t@dtprel(", file
);
4504 output_addr_const (file
, x
);
4508 /* Print a memory address as an operand to reference that memory location. */
4510 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
4511 also call this from ia64_print_operand for memory addresses. */
4514 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED
,
4515 rtx address ATTRIBUTE_UNUSED
)
4519 /* Print an operand to an assembler instruction.
4520 C Swap and print a comparison operator.
4521 D Print an FP comparison operator.
4522 E Print 32 - constant, for SImode shifts as extract.
4523 e Print 64 - constant, for DImode rotates.
4524 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
4525 a floating point register emitted normally.
4526 I Invert a predicate register by adding 1.
4527 J Select the proper predicate register for a condition.
4528 j Select the inverse predicate register for a condition.
4529 O Append .acq for volatile load.
4530 P Postincrement of a MEM.
4531 Q Append .rel for volatile store.
4532 R Print .s .d or nothing for a single, double or no truncation.
4533 S Shift amount for shladd instruction.
4534 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
4535 for Intel assembler.
4536 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
4537 for Intel assembler.
4538 X A pair of floating point registers.
4539 r Print register name, or constant 0 as r0. HP compatibility for
4541 v Print vector constant value as an 8-byte integer value. */
4544 ia64_print_operand (FILE * file
, rtx x
, int code
)
4551 /* Handled below. */
4556 enum rtx_code c
= swap_condition (GET_CODE (x
));
4557 fputs (GET_RTX_NAME (c
), file
);
4562 switch (GET_CODE (x
))
4586 str
= GET_RTX_NAME (GET_CODE (x
));
4593 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 32 - INTVAL (x
));
4597 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 64 - INTVAL (x
));
4601 if (x
== CONST0_RTX (GET_MODE (x
)))
4602 str
= reg_names
[FR_REG (0)];
4603 else if (x
== CONST1_RTX (GET_MODE (x
)))
4604 str
= reg_names
[FR_REG (1)];
4607 gcc_assert (GET_CODE (x
) == REG
);
4608 str
= reg_names
[REGNO (x
)];
4614 fputs (reg_names
[REGNO (x
) + 1], file
);
4620 unsigned int regno
= REGNO (XEXP (x
, 0));
4621 if (GET_CODE (x
) == EQ
)
4625 fputs (reg_names
[regno
], file
);
4630 if (MEM_VOLATILE_P (x
))
4631 fputs(".acq", file
);
4636 HOST_WIDE_INT value
;
4638 switch (GET_CODE (XEXP (x
, 0)))
4644 x
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
4645 if (GET_CODE (x
) == CONST_INT
)
4649 gcc_assert (GET_CODE (x
) == REG
);
4650 fprintf (file
, ", %s", reg_names
[REGNO (x
)]);
4656 value
= GET_MODE_SIZE (GET_MODE (x
));
4660 value
= - (HOST_WIDE_INT
) GET_MODE_SIZE (GET_MODE (x
));
4664 fprintf (file
, ", " HOST_WIDE_INT_PRINT_DEC
, value
);
4669 if (MEM_VOLATILE_P (x
))
4670 fputs(".rel", file
);
4674 if (x
== CONST0_RTX (GET_MODE (x
)))
4676 else if (x
== CONST1_RTX (GET_MODE (x
)))
4678 else if (x
== CONST2_RTX (GET_MODE (x
)))
4681 output_operand_lossage ("invalid %%R value");
4685 fprintf (file
, "%d", exact_log2 (INTVAL (x
)));
4689 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
4691 fprintf (file
, "0x%x", (int) INTVAL (x
) & 0xffffffff);
4697 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
4699 const char *prefix
= "0x";
4700 if (INTVAL (x
) & 0x80000000)
4702 fprintf (file
, "0xffffffff");
4705 fprintf (file
, "%s%x", prefix
, (int) INTVAL (x
) & 0xffffffff);
4712 unsigned int regno
= REGNO (x
);
4713 fprintf (file
, "%s, %s", reg_names
[regno
], reg_names
[regno
+ 1]);
4718 /* If this operand is the constant zero, write it as register zero.
4719 Any register, zero, or CONST_INT value is OK here. */
4720 if (GET_CODE (x
) == REG
)
4721 fputs (reg_names
[REGNO (x
)], file
);
4722 else if (x
== CONST0_RTX (GET_MODE (x
)))
4724 else if (GET_CODE (x
) == CONST_INT
)
4725 output_addr_const (file
, x
);
4727 output_operand_lossage ("invalid %%r value");
4731 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
4732 x
= simplify_subreg (DImode
, x
, GET_MODE (x
), 0);
4739 /* For conditional branches, returns or calls, substitute
4740 sptk, dptk, dpnt, or spnt for %s. */
4741 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
4744 int pred_val
= INTVAL (XEXP (x
, 0));
4746 /* Guess top and bottom 10% statically predicted. */
4747 if (pred_val
< REG_BR_PROB_BASE
/ 50
4748 && br_prob_note_reliable_p (x
))
4750 else if (pred_val
< REG_BR_PROB_BASE
/ 2)
4752 else if (pred_val
< REG_BR_PROB_BASE
/ 100 * 98
4753 || !br_prob_note_reliable_p (x
))
4758 else if (GET_CODE (current_output_insn
) == CALL_INSN
)
4763 fputs (which
, file
);
4768 x
= current_insn_predicate
;
4771 unsigned int regno
= REGNO (XEXP (x
, 0));
4772 if (GET_CODE (x
) == EQ
)
4774 fprintf (file
, "(%s) ", reg_names
[regno
]);
4779 output_operand_lossage ("ia64_print_operand: unknown code");
4783 switch (GET_CODE (x
))
4785 /* This happens for the spill/restore instructions. */
4790 /* ... fall through ... */
4793 fputs (reg_names
[REGNO (x
)], file
);
4798 rtx addr
= XEXP (x
, 0);
4799 if (GET_RTX_CLASS (GET_CODE (addr
)) == RTX_AUTOINC
)
4800 addr
= XEXP (addr
, 0);
4801 fprintf (file
, "[%s]", reg_names
[REGNO (addr
)]);
4806 output_addr_const (file
, x
);
4813 /* Compute a (partial) cost for rtx X. Return true if the complete
4814 cost has been computed, and false if subexpressions should be
4815 scanned. In either case, *TOTAL contains the cost result. */
4816 /* ??? This is incomplete. */
4819 ia64_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
4827 *total
= satisfies_constraint_J (x
) ? 0 : COSTS_N_INSNS (1);
4830 if (satisfies_constraint_I (x
))
4832 else if (satisfies_constraint_J (x
))
4835 *total
= COSTS_N_INSNS (1);
4838 if (satisfies_constraint_K (x
) || satisfies_constraint_L (x
))
4841 *total
= COSTS_N_INSNS (1);
4846 *total
= COSTS_N_INSNS (1);
4852 *total
= COSTS_N_INSNS (3);
4856 /* For multiplies wider than HImode, we have to go to the FPU,
4857 which normally involves copies. Plus there's the latency
4858 of the multiply itself, and the latency of the instructions to
4859 transfer integer regs to FP regs. */
4860 /* ??? Check for FP mode. */
4861 if (GET_MODE_SIZE (GET_MODE (x
)) > 2)
4862 *total
= COSTS_N_INSNS (10);
4864 *total
= COSTS_N_INSNS (2);
4872 *total
= COSTS_N_INSNS (1);
4879 /* We make divide expensive, so that divide-by-constant will be
4880 optimized to a multiply. */
4881 *total
= COSTS_N_INSNS (60);
4889 /* Calculate the cost of moving data from a register in class FROM to
4890 one in class TO, using MODE. */
4893 ia64_register_move_cost (enum machine_mode mode
, enum reg_class from
,
4896 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
4897 if (to
== ADDL_REGS
)
4899 if (from
== ADDL_REGS
)
4902 /* All costs are symmetric, so reduce cases by putting the
4903 lower number class as the destination. */
4906 enum reg_class tmp
= to
;
4907 to
= from
, from
= tmp
;
4910 /* Moving from FR<->GR in XFmode must be more expensive than 2,
4911 so that we get secondary memory reloads. Between FR_REGS,
4912 we have to make this at least as expensive as MEMORY_MOVE_COST
4913 to avoid spectacularly poor register class preferencing. */
4914 if (mode
== XFmode
|| mode
== RFmode
)
4916 if (to
!= GR_REGS
|| from
!= GR_REGS
)
4917 return MEMORY_MOVE_COST (mode
, to
, 0);
4925 /* Moving between PR registers takes two insns. */
4926 if (from
== PR_REGS
)
4928 /* Moving between PR and anything but GR is impossible. */
4929 if (from
!= GR_REGS
)
4930 return MEMORY_MOVE_COST (mode
, to
, 0);
4934 /* Moving between BR and anything but GR is impossible. */
4935 if (from
!= GR_REGS
&& from
!= GR_AND_BR_REGS
)
4936 return MEMORY_MOVE_COST (mode
, to
, 0);
4941 /* Moving between AR and anything but GR is impossible. */
4942 if (from
!= GR_REGS
)
4943 return MEMORY_MOVE_COST (mode
, to
, 0);
4949 case GR_AND_FR_REGS
:
4950 case GR_AND_BR_REGS
:
4961 /* Implement PREFERRED_RELOAD_CLASS. Place additional restrictions on RCLASS
4962 to use when copying X into that class. */
4965 ia64_preferred_reload_class (rtx x
, enum reg_class rclass
)
4971 /* Don't allow volatile mem reloads into floating point registers.
4972 This is defined to force reload to choose the r/m case instead
4973 of the f/f case when reloading (set (reg fX) (mem/v)). */
4974 if (MEM_P (x
) && MEM_VOLATILE_P (x
))
4977 /* Force all unrecognized constants into the constant pool. */
4995 /* This function returns the register class required for a secondary
4996 register when copying between one of the registers in RCLASS, and X,
4997 using MODE. A return value of NO_REGS means that no secondary register
5001 ia64_secondary_reload_class (enum reg_class rclass
,
5002 enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
5006 if (GET_CODE (x
) == REG
|| GET_CODE (x
) == SUBREG
)
5007 regno
= true_regnum (x
);
5014 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
5015 interaction. We end up with two pseudos with overlapping lifetimes
5016 both of which are equiv to the same constant, and both which need
5017 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
5018 changes depending on the path length, which means the qty_first_reg
5019 check in make_regs_eqv can give different answers at different times.
5020 At some point I'll probably need a reload_indi pattern to handle
5023 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
5024 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
5025 non-general registers for good measure. */
5026 if (regno
>= 0 && ! GENERAL_REGNO_P (regno
))
5029 /* This is needed if a pseudo used as a call_operand gets spilled to a
5031 if (GET_CODE (x
) == MEM
)
5037 /* Need to go through general registers to get to other class regs. */
5038 if (regno
>= 0 && ! (FR_REGNO_P (regno
) || GENERAL_REGNO_P (regno
)))
5041 /* This can happen when a paradoxical subreg is an operand to the
5043 /* ??? This shouldn't be necessary after instruction scheduling is
5044 enabled, because paradoxical subregs are not accepted by
5045 register_operand when INSN_SCHEDULING is defined. Or alternatively,
5046 stop the paradoxical subreg stupidity in the *_operand functions
5048 if (GET_CODE (x
) == MEM
5049 && (GET_MODE (x
) == SImode
|| GET_MODE (x
) == HImode
5050 || GET_MODE (x
) == QImode
))
5053 /* This can happen because of the ior/and/etc patterns that accept FP
5054 registers as operands. If the third operand is a constant, then it
5055 needs to be reloaded into a FP register. */
5056 if (GET_CODE (x
) == CONST_INT
)
5059 /* This can happen because of register elimination in a muldi3 insn.
5060 E.g. `26107 * (unsigned long)&u'. */
5061 if (GET_CODE (x
) == PLUS
)
5066 /* ??? This happens if we cse/gcse a BImode value across a call,
5067 and the function has a nonlocal goto. This is because global
5068 does not allocate call crossing pseudos to hard registers when
5069 crtl->has_nonlocal_goto is true. This is relatively
5070 common for C++ programs that use exceptions. To reproduce,
5071 return NO_REGS and compile libstdc++. */
5072 if (GET_CODE (x
) == MEM
)
5075 /* This can happen when we take a BImode subreg of a DImode value,
5076 and that DImode value winds up in some non-GR register. */
5077 if (regno
>= 0 && ! GENERAL_REGNO_P (regno
) && ! PR_REGNO_P (regno
))
5089 /* Implement targetm.unspec_may_trap_p hook. */
5091 ia64_unspec_may_trap_p (const_rtx x
, unsigned flags
)
5093 if (GET_CODE (x
) == UNSPEC
)
5095 switch (XINT (x
, 1))
5101 case UNSPEC_CHKACLR
:
5103 /* These unspecs are just wrappers. */
5104 return may_trap_p_1 (XVECEXP (x
, 0, 0), flags
);
5108 return default_unspec_may_trap_p (x
, flags
);
5112 /* Parse the -mfixed-range= option string. */
5115 fix_range (const char *const_str
)
5118 char *str
, *dash
, *comma
;
5120 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5121 REG2 are either register names or register numbers. The effect
5122 of this option is to mark the registers in the range from REG1 to
5123 REG2 as ``fixed'' so they won't be used by the compiler. This is
5124 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
5126 i
= strlen (const_str
);
5127 str
= (char *) alloca (i
+ 1);
5128 memcpy (str
, const_str
, i
+ 1);
5132 dash
= strchr (str
, '-');
5135 warning (0, "value of -mfixed-range must have form REG1-REG2");
5140 comma
= strchr (dash
+ 1, ',');
5144 first
= decode_reg_name (str
);
5147 warning (0, "unknown register name: %s", str
);
5151 last
= decode_reg_name (dash
+ 1);
5154 warning (0, "unknown register name: %s", dash
+ 1);
5162 warning (0, "%s-%s is an empty range", str
, dash
+ 1);
5166 for (i
= first
; i
<= last
; ++i
)
5167 fixed_regs
[i
] = call_used_regs
[i
] = 1;
5177 /* Implement TARGET_HANDLE_OPTION. */
5180 ia64_handle_option (size_t code
, const char *arg
, int value
)
5184 case OPT_mfixed_range_
:
5188 case OPT_mtls_size_
:
5189 if (value
!= 14 && value
!= 22 && value
!= 64)
5190 error ("bad value %<%s%> for -mtls-size= switch", arg
);
5197 const char *name
; /* processor name or nickname. */
5198 enum processor_type processor
;
5200 const processor_alias_table
[] =
5202 {"itanium", PROCESSOR_ITANIUM
},
5203 {"itanium1", PROCESSOR_ITANIUM
},
5204 {"merced", PROCESSOR_ITANIUM
},
5205 {"itanium2", PROCESSOR_ITANIUM2
},
5206 {"mckinley", PROCESSOR_ITANIUM2
},
5208 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
5211 for (i
= 0; i
< pta_size
; i
++)
5212 if (!strcmp (arg
, processor_alias_table
[i
].name
))
5214 ia64_tune
= processor_alias_table
[i
].processor
;
5218 error ("bad value %<%s%> for -mtune= switch", arg
);
5227 /* Implement OVERRIDE_OPTIONS. */
5230 ia64_override_options (void)
5232 if (TARGET_AUTO_PIC
)
5233 target_flags
|= MASK_CONST_GP
;
5235 if (TARGET_INLINE_SQRT
== INL_MIN_LAT
)
5237 warning (0, "not yet implemented: latency-optimized inline square root");
5238 TARGET_INLINE_SQRT
= INL_MAX_THR
;
5241 ia64_section_threshold
= g_switch_set
? g_switch_value
: IA64_DEFAULT_GVALUE
;
5243 init_machine_status
= ia64_init_machine_status
;
5246 /* Initialize the record of emitted frame related registers. */
5248 void ia64_init_expanders (void)
5250 memset (&emitted_frame_related_regs
, 0, sizeof (emitted_frame_related_regs
));
5253 static struct machine_function
*
5254 ia64_init_machine_status (void)
5256 return GGC_CNEW (struct machine_function
);
5259 static enum attr_itanium_class
ia64_safe_itanium_class (rtx
);
5260 static enum attr_type
ia64_safe_type (rtx
);
5262 static enum attr_itanium_class
5263 ia64_safe_itanium_class (rtx insn
)
5265 if (recog_memoized (insn
) >= 0)
5266 return get_attr_itanium_class (insn
);
5268 return ITANIUM_CLASS_UNKNOWN
;
5271 static enum attr_type
5272 ia64_safe_type (rtx insn
)
5274 if (recog_memoized (insn
) >= 0)
5275 return get_attr_type (insn
);
5277 return TYPE_UNKNOWN
;
5280 /* The following collection of routines emit instruction group stop bits as
5281 necessary to avoid dependencies. */
5283 /* Need to track some additional registers as far as serialization is
5284 concerned so we can properly handle br.call and br.ret. We could
5285 make these registers visible to gcc, but since these registers are
5286 never explicitly used in gcc generated code, it seems wasteful to
5287 do so (plus it would make the call and return patterns needlessly
5289 #define REG_RP (BR_REG (0))
5290 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
5291 /* This is used for volatile asms which may require a stop bit immediately
5292 before and after them. */
5293 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
5294 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
5295 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
5297 /* For each register, we keep track of how it has been written in the
5298 current instruction group.
5300 If a register is written unconditionally (no qualifying predicate),
5301 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
5303 If a register is written if its qualifying predicate P is true, we
5304 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
5305 may be written again by the complement of P (P^1) and when this happens,
5306 WRITE_COUNT gets set to 2.
5308 The result of this is that whenever an insn attempts to write a register
5309 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
5311 If a predicate register is written by a floating-point insn, we set
5312 WRITTEN_BY_FP to true.
5314 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
5315 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
5317 #if GCC_VERSION >= 4000
5318 #define RWS_FIELD_TYPE __extension__ unsigned short
5320 #define RWS_FIELD_TYPE unsigned int
5322 struct reg_write_state
5324 RWS_FIELD_TYPE write_count
: 2;
5325 RWS_FIELD_TYPE first_pred
: 10;
5326 RWS_FIELD_TYPE written_by_fp
: 1;
5327 RWS_FIELD_TYPE written_by_and
: 1;
5328 RWS_FIELD_TYPE written_by_or
: 1;
5331 /* Cumulative info for the current instruction group. */
5332 struct reg_write_state rws_sum
[NUM_REGS
];
5333 #ifdef ENABLE_CHECKING
5334 /* Bitmap whether a register has been written in the current insn. */
5335 HARD_REG_ELT_TYPE rws_insn
[(NUM_REGS
+ HOST_BITS_PER_WIDEST_FAST_INT
- 1)
5336 / HOST_BITS_PER_WIDEST_FAST_INT
];
5339 rws_insn_set (int regno
)
5341 gcc_assert (!TEST_HARD_REG_BIT (rws_insn
, regno
));
5342 SET_HARD_REG_BIT (rws_insn
, regno
);
5346 rws_insn_test (int regno
)
5348 return TEST_HARD_REG_BIT (rws_insn
, regno
);
5351 /* When not checking, track just REG_AR_CFM and REG_VOLATILE. */
5352 unsigned char rws_insn
[2];
5355 rws_insn_set (int regno
)
5357 if (regno
== REG_AR_CFM
)
5359 else if (regno
== REG_VOLATILE
)
5364 rws_insn_test (int regno
)
5366 if (regno
== REG_AR_CFM
)
5368 if (regno
== REG_VOLATILE
)
5374 /* Indicates whether this is the first instruction after a stop bit,
5375 in which case we don't need another stop bit. Without this,
5376 ia64_variable_issue will die when scheduling an alloc. */
5377 static int first_instruction
;
5379 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
5380 RTL for one instruction. */
5383 unsigned int is_write
: 1; /* Is register being written? */
5384 unsigned int is_fp
: 1; /* Is register used as part of an fp op? */
5385 unsigned int is_branch
: 1; /* Is register used as part of a branch? */
5386 unsigned int is_and
: 1; /* Is register used as part of and.orcm? */
5387 unsigned int is_or
: 1; /* Is register used as part of or.andcm? */
5388 unsigned int is_sibcall
: 1; /* Is this a sibling or normal call? */
5391 static void rws_update (int, struct reg_flags
, int);
5392 static int rws_access_regno (int, struct reg_flags
, int);
5393 static int rws_access_reg (rtx
, struct reg_flags
, int);
5394 static void update_set_flags (rtx
, struct reg_flags
*);
5395 static int set_src_needs_barrier (rtx
, struct reg_flags
, int);
5396 static int rtx_needs_barrier (rtx
, struct reg_flags
, int);
5397 static void init_insn_group_barriers (void);
5398 static int group_barrier_needed (rtx
);
5399 static int safe_group_barrier_needed (rtx
);
5400 static int in_safe_group_barrier
;
5402 /* Update *RWS for REGNO, which is being written by the current instruction,
5403 with predicate PRED, and associated register flags in FLAGS. */
5406 rws_update (int regno
, struct reg_flags flags
, int pred
)
5409 rws_sum
[regno
].write_count
++;
5411 rws_sum
[regno
].write_count
= 2;
5412 rws_sum
[regno
].written_by_fp
|= flags
.is_fp
;
5413 /* ??? Not tracking and/or across differing predicates. */
5414 rws_sum
[regno
].written_by_and
= flags
.is_and
;
5415 rws_sum
[regno
].written_by_or
= flags
.is_or
;
5416 rws_sum
[regno
].first_pred
= pred
;
5419 /* Handle an access to register REGNO of type FLAGS using predicate register
5420 PRED. Update rws_sum array. Return 1 if this access creates
5421 a dependency with an earlier instruction in the same group. */
5424 rws_access_regno (int regno
, struct reg_flags flags
, int pred
)
5426 int need_barrier
= 0;
5428 gcc_assert (regno
< NUM_REGS
);
5430 if (! PR_REGNO_P (regno
))
5431 flags
.is_and
= flags
.is_or
= 0;
5437 rws_insn_set (regno
);
5438 write_count
= rws_sum
[regno
].write_count
;
5440 switch (write_count
)
5443 /* The register has not been written yet. */
5444 if (!in_safe_group_barrier
)
5445 rws_update (regno
, flags
, pred
);
5449 /* The register has been written via a predicate. If this is
5450 not a complementary predicate, then we need a barrier. */
5451 /* ??? This assumes that P and P+1 are always complementary
5452 predicates for P even. */
5453 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
5455 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
5457 else if ((rws_sum
[regno
].first_pred
^ 1) != pred
)
5459 if (!in_safe_group_barrier
)
5460 rws_update (regno
, flags
, pred
);
5464 /* The register has been unconditionally written already. We
5466 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
5468 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
5472 if (!in_safe_group_barrier
)
5474 rws_sum
[regno
].written_by_and
= flags
.is_and
;
5475 rws_sum
[regno
].written_by_or
= flags
.is_or
;
5485 if (flags
.is_branch
)
5487 /* Branches have several RAW exceptions that allow to avoid
5490 if (REGNO_REG_CLASS (regno
) == BR_REGS
|| regno
== AR_PFS_REGNUM
)
5491 /* RAW dependencies on branch regs are permissible as long
5492 as the writer is a non-branch instruction. Since we
5493 never generate code that uses a branch register written
5494 by a branch instruction, handling this case is
5498 if (REGNO_REG_CLASS (regno
) == PR_REGS
5499 && ! rws_sum
[regno
].written_by_fp
)
5500 /* The predicates of a branch are available within the
5501 same insn group as long as the predicate was written by
5502 something other than a floating-point instruction. */
5506 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
5508 if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
5511 switch (rws_sum
[regno
].write_count
)
5514 /* The register has not been written yet. */
5518 /* The register has been written via a predicate. If this is
5519 not a complementary predicate, then we need a barrier. */
5520 /* ??? This assumes that P and P+1 are always complementary
5521 predicates for P even. */
5522 if ((rws_sum
[regno
].first_pred
^ 1) != pred
)
5527 /* The register has been unconditionally written already. We
5537 return need_barrier
;
5541 rws_access_reg (rtx reg
, struct reg_flags flags
, int pred
)
5543 int regno
= REGNO (reg
);
5544 int n
= HARD_REGNO_NREGS (REGNO (reg
), GET_MODE (reg
));
5547 return rws_access_regno (regno
, flags
, pred
);
5550 int need_barrier
= 0;
5552 need_barrier
|= rws_access_regno (regno
+ n
, flags
, pred
);
5553 return need_barrier
;
5557 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
5558 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
5561 update_set_flags (rtx x
, struct reg_flags
*pflags
)
5563 rtx src
= SET_SRC (x
);
5565 switch (GET_CODE (src
))
5571 /* There are four cases here:
5572 (1) The destination is (pc), in which case this is a branch,
5573 nothing here applies.
5574 (2) The destination is ar.lc, in which case this is a
5575 doloop_end_internal,
5576 (3) The destination is an fp register, in which case this is
5577 an fselect instruction.
5578 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
5579 this is a check load.
5580 In all cases, nothing we do in this function applies. */
5584 if (COMPARISON_P (src
)
5585 && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src
, 0))))
5586 /* Set pflags->is_fp to 1 so that we know we're dealing
5587 with a floating point comparison when processing the
5588 destination of the SET. */
5591 /* Discover if this is a parallel comparison. We only handle
5592 and.orcm and or.andcm at present, since we must retain a
5593 strict inverse on the predicate pair. */
5594 else if (GET_CODE (src
) == AND
)
5596 else if (GET_CODE (src
) == IOR
)
5603 /* Subroutine of rtx_needs_barrier; this function determines whether the
5604 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
5605 are as in rtx_needs_barrier. COND is an rtx that holds the condition
5609 set_src_needs_barrier (rtx x
, struct reg_flags flags
, int pred
)
5611 int need_barrier
= 0;
5613 rtx src
= SET_SRC (x
);
5615 if (GET_CODE (src
) == CALL
)
5616 /* We don't need to worry about the result registers that
5617 get written by subroutine call. */
5618 return rtx_needs_barrier (src
, flags
, pred
);
5619 else if (SET_DEST (x
) == pc_rtx
)
5621 /* X is a conditional branch. */
5622 /* ??? This seems redundant, as the caller sets this bit for
5624 if (!ia64_spec_check_src_p (src
))
5625 flags
.is_branch
= 1;
5626 return rtx_needs_barrier (src
, flags
, pred
);
5629 if (ia64_spec_check_src_p (src
))
5630 /* Avoid checking one register twice (in condition
5631 and in 'then' section) for ldc pattern. */
5633 gcc_assert (REG_P (XEXP (src
, 2)));
5634 need_barrier
= rtx_needs_barrier (XEXP (src
, 2), flags
, pred
);
5636 /* We process MEM below. */
5637 src
= XEXP (src
, 1);
5640 need_barrier
|= rtx_needs_barrier (src
, flags
, pred
);
5643 if (GET_CODE (dst
) == ZERO_EXTRACT
)
5645 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 1), flags
, pred
);
5646 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 2), flags
, pred
);
5648 return need_barrier
;
5651 /* Handle an access to rtx X of type FLAGS using predicate register
5652 PRED. Return 1 if this access creates a dependency with an earlier
5653 instruction in the same group. */
5656 rtx_needs_barrier (rtx x
, struct reg_flags flags
, int pred
)
5659 int is_complemented
= 0;
5660 int need_barrier
= 0;
5661 const char *format_ptr
;
5662 struct reg_flags new_flags
;
5670 switch (GET_CODE (x
))
5673 update_set_flags (x
, &new_flags
);
5674 need_barrier
= set_src_needs_barrier (x
, new_flags
, pred
);
5675 if (GET_CODE (SET_SRC (x
)) != CALL
)
5677 new_flags
.is_write
= 1;
5678 need_barrier
|= rtx_needs_barrier (SET_DEST (x
), new_flags
, pred
);
5683 new_flags
.is_write
= 0;
5684 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
5686 /* Avoid multiple register writes, in case this is a pattern with
5687 multiple CALL rtx. This avoids a failure in rws_access_reg. */
5688 if (! flags
.is_sibcall
&& ! rws_insn_test (REG_AR_CFM
))
5690 new_flags
.is_write
= 1;
5691 need_barrier
|= rws_access_regno (REG_RP
, new_flags
, pred
);
5692 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, new_flags
, pred
);
5693 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
5698 /* X is a predicated instruction. */
5700 cond
= COND_EXEC_TEST (x
);
5702 need_barrier
= rtx_needs_barrier (cond
, flags
, 0);
5704 if (GET_CODE (cond
) == EQ
)
5705 is_complemented
= 1;
5706 cond
= XEXP (cond
, 0);
5707 gcc_assert (GET_CODE (cond
) == REG
5708 && REGNO_REG_CLASS (REGNO (cond
)) == PR_REGS
);
5709 pred
= REGNO (cond
);
5710 if (is_complemented
)
5713 need_barrier
|= rtx_needs_barrier (COND_EXEC_CODE (x
), flags
, pred
);
5714 return need_barrier
;
5718 /* Clobber & use are for earlier compiler-phases only. */
5723 /* We always emit stop bits for traditional asms. We emit stop bits
5724 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
5725 if (GET_CODE (x
) != ASM_OPERANDS
5726 || (MEM_VOLATILE_P (x
) && TARGET_VOL_ASM_STOP
))
5728 /* Avoid writing the register multiple times if we have multiple
5729 asm outputs. This avoids a failure in rws_access_reg. */
5730 if (! rws_insn_test (REG_VOLATILE
))
5732 new_flags
.is_write
= 1;
5733 rws_access_regno (REG_VOLATILE
, new_flags
, pred
);
5738 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
5739 We cannot just fall through here since then we would be confused
5740 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
5741 traditional asms unlike their normal usage. */
5743 for (i
= ASM_OPERANDS_INPUT_LENGTH (x
) - 1; i
>= 0; --i
)
5744 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x
, i
), flags
, pred
))
5749 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
5751 rtx pat
= XVECEXP (x
, 0, i
);
5752 switch (GET_CODE (pat
))
5755 update_set_flags (pat
, &new_flags
);
5756 need_barrier
|= set_src_needs_barrier (pat
, new_flags
, pred
);
5762 need_barrier
|= rtx_needs_barrier (pat
, flags
, pred
);
5773 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
5775 rtx pat
= XVECEXP (x
, 0, i
);
5776 if (GET_CODE (pat
) == SET
)
5778 if (GET_CODE (SET_SRC (pat
)) != CALL
)
5780 new_flags
.is_write
= 1;
5781 need_barrier
|= rtx_needs_barrier (SET_DEST (pat
), new_flags
,
5785 else if (GET_CODE (pat
) == CLOBBER
|| GET_CODE (pat
) == RETURN
)
5786 need_barrier
|= rtx_needs_barrier (pat
, flags
, pred
);
5791 need_barrier
|= rtx_needs_barrier (SUBREG_REG (x
), flags
, pred
);
5794 if (REGNO (x
) == AR_UNAT_REGNUM
)
5796 for (i
= 0; i
< 64; ++i
)
5797 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ i
, flags
, pred
);
5800 need_barrier
= rws_access_reg (x
, flags
, pred
);
5804 /* Find the regs used in memory address computation. */
5805 new_flags
.is_write
= 0;
5806 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
5809 case CONST_INT
: case CONST_DOUBLE
: case CONST_VECTOR
:
5810 case SYMBOL_REF
: case LABEL_REF
: case CONST
:
5813 /* Operators with side-effects. */
5814 case POST_INC
: case POST_DEC
:
5815 gcc_assert (GET_CODE (XEXP (x
, 0)) == REG
);
5817 new_flags
.is_write
= 0;
5818 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
5819 new_flags
.is_write
= 1;
5820 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
5824 gcc_assert (GET_CODE (XEXP (x
, 0)) == REG
);
5826 new_flags
.is_write
= 0;
5827 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
5828 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
5829 new_flags
.is_write
= 1;
5830 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
5833 /* Handle common unary and binary ops for efficiency. */
5834 case COMPARE
: case PLUS
: case MINUS
: case MULT
: case DIV
:
5835 case MOD
: case UDIV
: case UMOD
: case AND
: case IOR
:
5836 case XOR
: case ASHIFT
: case ROTATE
: case ASHIFTRT
: case LSHIFTRT
:
5837 case ROTATERT
: case SMIN
: case SMAX
: case UMIN
: case UMAX
:
5838 case NE
: case EQ
: case GE
: case GT
: case LE
:
5839 case LT
: case GEU
: case GTU
: case LEU
: case LTU
:
5840 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
5841 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
5844 case NEG
: case NOT
: case SIGN_EXTEND
: case ZERO_EXTEND
:
5845 case TRUNCATE
: case FLOAT_EXTEND
: case FLOAT_TRUNCATE
: case FLOAT
:
5846 case FIX
: case UNSIGNED_FLOAT
: case UNSIGNED_FIX
: case ABS
:
5847 case SQRT
: case FFS
: case POPCOUNT
:
5848 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), flags
, pred
);
5852 /* VEC_SELECT's second argument is a PARALLEL with integers that
5853 describe the elements selected. On ia64, those integers are
5854 always constants. Avoid walking the PARALLEL so that we don't
5855 get confused with "normal" parallels and then die. */
5856 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), flags
, pred
);
5860 switch (XINT (x
, 1))
5862 case UNSPEC_LTOFF_DTPMOD
:
5863 case UNSPEC_LTOFF_DTPREL
:
5865 case UNSPEC_LTOFF_TPREL
:
5867 case UNSPEC_PRED_REL_MUTEX
:
5868 case UNSPEC_PIC_CALL
:
5870 case UNSPEC_FETCHADD_ACQ
:
5871 case UNSPEC_BSP_VALUE
:
5872 case UNSPEC_FLUSHRS
:
5873 case UNSPEC_BUNDLE_SELECTOR
:
5876 case UNSPEC_GR_SPILL
:
5877 case UNSPEC_GR_RESTORE
:
5879 HOST_WIDE_INT offset
= INTVAL (XVECEXP (x
, 0, 1));
5880 HOST_WIDE_INT bit
= (offset
>> 3) & 63;
5882 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
5883 new_flags
.is_write
= (XINT (x
, 1) == UNSPEC_GR_SPILL
);
5884 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ bit
,
5889 case UNSPEC_FR_SPILL
:
5890 case UNSPEC_FR_RESTORE
:
5891 case UNSPEC_GETF_EXP
:
5892 case UNSPEC_SETF_EXP
:
5894 case UNSPEC_FR_SQRT_RECIP_APPROX
:
5895 case UNSPEC_FR_SQRT_RECIP_APPROX_RES
:
5899 case UNSPEC_CHKACLR
:
5901 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
5904 case UNSPEC_FR_RECIP_APPROX
:
5906 case UNSPEC_COPYSIGN
:
5907 case UNSPEC_FR_RECIP_APPROX_RES
:
5908 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
5909 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
5912 case UNSPEC_CMPXCHG_ACQ
:
5913 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
5914 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 2), flags
, pred
);
5922 case UNSPEC_VOLATILE
:
5923 switch (XINT (x
, 1))
5926 /* Alloc must always be the first instruction of a group.
5927 We force this by always returning true. */
5928 /* ??? We might get better scheduling if we explicitly check for
5929 input/local/output register dependencies, and modify the
5930 scheduler so that alloc is always reordered to the start of
5931 the current group. We could then eliminate all of the
5932 first_instruction code. */
5933 rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
5935 new_flags
.is_write
= 1;
5936 rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
5939 case UNSPECV_SET_BSP
:
5943 case UNSPECV_BLOCKAGE
:
5944 case UNSPECV_INSN_GROUP_BARRIER
:
5946 case UNSPECV_PSAC_ALL
:
5947 case UNSPECV_PSAC_NORMAL
:
5956 new_flags
.is_write
= 0;
5957 need_barrier
= rws_access_regno (REG_RP
, flags
, pred
);
5958 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
5960 new_flags
.is_write
= 1;
5961 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
5962 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
5966 format_ptr
= GET_RTX_FORMAT (GET_CODE (x
));
5967 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
5968 switch (format_ptr
[i
])
5970 case '0': /* unused field */
5971 case 'i': /* integer */
5972 case 'n': /* note */
5973 case 'w': /* wide integer */
5974 case 's': /* pointer to string */
5975 case 'S': /* optional pointer to string */
5979 if (rtx_needs_barrier (XEXP (x
, i
), flags
, pred
))
5984 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; --j
)
5985 if (rtx_needs_barrier (XVECEXP (x
, i
, j
), flags
, pred
))
5994 return need_barrier
;
5997 /* Clear out the state for group_barrier_needed at the start of a
5998 sequence of insns. */
6001 init_insn_group_barriers (void)
6003 memset (rws_sum
, 0, sizeof (rws_sum
));
6004 first_instruction
= 1;
6007 /* Given the current state, determine whether a group barrier (a stop bit) is
6008 necessary before INSN. Return nonzero if so. This modifies the state to
6009 include the effects of INSN as a side-effect. */
6012 group_barrier_needed (rtx insn
)
6015 int need_barrier
= 0;
6016 struct reg_flags flags
;
6018 memset (&flags
, 0, sizeof (flags
));
6019 switch (GET_CODE (insn
))
6025 /* A barrier doesn't imply an instruction group boundary. */
6029 memset (rws_insn
, 0, sizeof (rws_insn
));
6033 flags
.is_branch
= 1;
6034 flags
.is_sibcall
= SIBLING_CALL_P (insn
);
6035 memset (rws_insn
, 0, sizeof (rws_insn
));
6037 /* Don't bundle a call following another call. */
6038 if ((pat
= prev_active_insn (insn
))
6039 && GET_CODE (pat
) == CALL_INSN
)
6045 need_barrier
= rtx_needs_barrier (PATTERN (insn
), flags
, 0);
6049 if (!ia64_spec_check_p (insn
))
6050 flags
.is_branch
= 1;
6052 /* Don't bundle a jump following a call. */
6053 if ((pat
= prev_active_insn (insn
))
6054 && GET_CODE (pat
) == CALL_INSN
)
6062 if (GET_CODE (PATTERN (insn
)) == USE
6063 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
6064 /* Don't care about USE and CLOBBER "insns"---those are used to
6065 indicate to the optimizer that it shouldn't get rid of
6066 certain operations. */
6069 pat
= PATTERN (insn
);
6071 /* Ug. Hack hacks hacked elsewhere. */
6072 switch (recog_memoized (insn
))
6074 /* We play dependency tricks with the epilogue in order
6075 to get proper schedules. Undo this for dv analysis. */
6076 case CODE_FOR_epilogue_deallocate_stack
:
6077 case CODE_FOR_prologue_allocate_stack
:
6078 pat
= XVECEXP (pat
, 0, 0);
6081 /* The pattern we use for br.cloop confuses the code above.
6082 The second element of the vector is representative. */
6083 case CODE_FOR_doloop_end_internal
:
6084 pat
= XVECEXP (pat
, 0, 1);
6087 /* Doesn't generate code. */
6088 case CODE_FOR_pred_rel_mutex
:
6089 case CODE_FOR_prologue_use
:
6096 memset (rws_insn
, 0, sizeof (rws_insn
));
6097 need_barrier
= rtx_needs_barrier (pat
, flags
, 0);
6099 /* Check to see if the previous instruction was a volatile
6102 need_barrier
= rws_access_regno (REG_VOLATILE
, flags
, 0);
6109 if (first_instruction
&& INSN_P (insn
)
6110 && ia64_safe_itanium_class (insn
) != ITANIUM_CLASS_IGNORE
6111 && GET_CODE (PATTERN (insn
)) != USE
6112 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
6115 first_instruction
= 0;
6118 return need_barrier
;
6121 /* Like group_barrier_needed, but do not clobber the current state. */
6124 safe_group_barrier_needed (rtx insn
)
6126 int saved_first_instruction
;
6129 saved_first_instruction
= first_instruction
;
6130 in_safe_group_barrier
= 1;
6132 t
= group_barrier_needed (insn
);
6134 first_instruction
= saved_first_instruction
;
6135 in_safe_group_barrier
= 0;
6140 /* Scan the current function and insert stop bits as necessary to
6141 eliminate dependencies. This function assumes that a final
6142 instruction scheduling pass has been run which has already
6143 inserted most of the necessary stop bits. This function only
6144 inserts new ones at basic block boundaries, since these are
6145 invisible to the scheduler. */
6148 emit_insn_group_barriers (FILE *dump
)
6152 int insns_since_last_label
= 0;
6154 init_insn_group_barriers ();
6156 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
6158 if (GET_CODE (insn
) == CODE_LABEL
)
6160 if (insns_since_last_label
)
6162 insns_since_last_label
= 0;
6164 else if (GET_CODE (insn
) == NOTE
6165 && NOTE_KIND (insn
) == NOTE_INSN_BASIC_BLOCK
)
6167 if (insns_since_last_label
)
6169 insns_since_last_label
= 0;
6171 else if (GET_CODE (insn
) == INSN
6172 && GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
6173 && XINT (PATTERN (insn
), 1) == UNSPECV_INSN_GROUP_BARRIER
)
6175 init_insn_group_barriers ();
6178 else if (INSN_P (insn
))
6180 insns_since_last_label
= 1;
6182 if (group_barrier_needed (insn
))
6187 fprintf (dump
, "Emitting stop before label %d\n",
6188 INSN_UID (last_label
));
6189 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label
);
6192 init_insn_group_barriers ();
6200 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
6201 This function has to emit all necessary group barriers. */
6204 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED
)
6208 init_insn_group_barriers ();
6210 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
6212 if (GET_CODE (insn
) == BARRIER
)
6214 rtx last
= prev_active_insn (insn
);
6218 if (GET_CODE (last
) == JUMP_INSN
6219 && GET_CODE (PATTERN (last
)) == ADDR_DIFF_VEC
)
6220 last
= prev_active_insn (last
);
6221 if (recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
6222 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last
);
6224 init_insn_group_barriers ();
6226 else if (INSN_P (insn
))
6228 if (recog_memoized (insn
) == CODE_FOR_insn_group_barrier
)
6229 init_insn_group_barriers ();
6230 else if (group_barrier_needed (insn
))
6232 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
6233 init_insn_group_barriers ();
6234 group_barrier_needed (insn
);
6242 /* Instruction scheduling support. */
6244 #define NR_BUNDLES 10
6246 /* A list of names of all available bundles. */
6248 static const char *bundle_name
[NR_BUNDLES
] =
6254 #if NR_BUNDLES == 10
6264 /* Nonzero if we should insert stop bits into the schedule. */
6266 int ia64_final_schedule
= 0;
6268 /* Codes of the corresponding queried units: */
6270 static int _0mii_
, _0mmi_
, _0mfi_
, _0mmf_
;
6271 static int _0bbb_
, _0mbb_
, _0mib_
, _0mmb_
, _0mfb_
, _0mlx_
;
6273 static int _1mii_
, _1mmi_
, _1mfi_
, _1mmf_
;
6274 static int _1bbb_
, _1mbb_
, _1mib_
, _1mmb_
, _1mfb_
, _1mlx_
;
6276 static int pos_1
, pos_2
, pos_3
, pos_4
, pos_5
, pos_6
;
6278 /* The following variable value is an insn group barrier. */
6280 static rtx dfa_stop_insn
;
6282 /* The following variable value is the last issued insn. */
6284 static rtx last_scheduled_insn
;
6286 /* The following variable value is size of the DFA state. */
6288 static size_t dfa_state_size
;
6290 /* The following variable value is pointer to a DFA state used as
6291 temporary variable. */
6293 static state_t temp_dfa_state
= NULL
;
6295 /* The following variable value is DFA state after issuing the last
6298 static state_t prev_cycle_state
= NULL
;
6300 /* The following array element values are TRUE if the corresponding
6301 insn requires to add stop bits before it. */
6303 static char *stops_p
= NULL
;
6305 /* The following array element values are ZERO for non-speculative
6306 instructions and hold corresponding speculation check number for
6307 speculative instructions. */
6308 static int *spec_check_no
= NULL
;
6310 /* Size of spec_check_no array. */
6311 static int max_uid
= 0;
6313 /* The following variable is used to set up the mentioned above array. */
6315 static int stop_before_p
= 0;
6317 /* The following variable value is length of the arrays `clocks' and
6320 static int clocks_length
;
6322 /* The following array element values are cycles on which the
6323 corresponding insn will be issued. The array is used only for
6328 /* The following array element values are numbers of cycles should be
6329 added to improve insn scheduling for MM_insns for Itanium1. */
6331 static int *add_cycles
;
6333 /* The following variable value is number of data speculations in progress. */
6334 static int pending_data_specs
= 0;
6336 static rtx
ia64_single_set (rtx
);
6337 static void ia64_emit_insn_before (rtx
, rtx
);
6339 /* Map a bundle number to its pseudo-op. */
6342 get_bundle_name (int b
)
6344 return bundle_name
[b
];
6348 /* Return the maximum number of instructions a cpu can issue. */
6351 ia64_issue_rate (void)
6356 /* Helper function - like single_set, but look inside COND_EXEC. */
6359 ia64_single_set (rtx insn
)
6361 rtx x
= PATTERN (insn
), ret
;
6362 if (GET_CODE (x
) == COND_EXEC
)
6363 x
= COND_EXEC_CODE (x
);
6364 if (GET_CODE (x
) == SET
)
6367 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
6368 Although they are not classical single set, the second set is there just
6369 to protect it from moving past FP-relative stack accesses. */
6370 switch (recog_memoized (insn
))
6372 case CODE_FOR_prologue_allocate_stack
:
6373 case CODE_FOR_epilogue_deallocate_stack
:
6374 ret
= XVECEXP (x
, 0, 0);
6378 ret
= single_set_2 (insn
, x
);
6385 /* Adjust the cost of a scheduling dependency. Return the new cost of
6386 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
6389 ia64_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
6391 enum attr_itanium_class dep_class
;
6392 enum attr_itanium_class insn_class
;
6394 if (REG_NOTE_KIND (link
) != REG_DEP_OUTPUT
)
6397 insn_class
= ia64_safe_itanium_class (insn
);
6398 dep_class
= ia64_safe_itanium_class (dep_insn
);
6399 if (dep_class
== ITANIUM_CLASS_ST
|| dep_class
== ITANIUM_CLASS_STF
6400 || insn_class
== ITANIUM_CLASS_ST
|| insn_class
== ITANIUM_CLASS_STF
)
6406 /* Like emit_insn_before, but skip cycle_display notes.
6407 ??? When cycle display notes are implemented, update this. */
6410 ia64_emit_insn_before (rtx insn
, rtx before
)
6412 emit_insn_before (insn
, before
);
6415 /* The following function marks insns who produce addresses for load
6416 and store insns. Such insns will be placed into M slots because it
6417 decrease latency time for Itanium1 (see function
6418 `ia64_produce_address_p' and the DFA descriptions). */
6421 ia64_dependencies_evaluation_hook (rtx head
, rtx tail
)
6423 rtx insn
, next
, next_tail
;
6425 /* Before reload, which_alternative is not set, which means that
6426 ia64_safe_itanium_class will produce wrong results for (at least)
6427 move instructions. */
6428 if (!reload_completed
)
6431 next_tail
= NEXT_INSN (tail
);
6432 for (insn
= head
; insn
!= next_tail
; insn
= NEXT_INSN (insn
))
6435 for (insn
= head
; insn
!= next_tail
; insn
= NEXT_INSN (insn
))
6437 && ia64_safe_itanium_class (insn
) == ITANIUM_CLASS_IALU
)
6439 sd_iterator_def sd_it
;
6441 bool has_mem_op_consumer_p
= false;
6443 FOR_EACH_DEP (insn
, SD_LIST_FORW
, sd_it
, dep
)
6445 enum attr_itanium_class c
;
6447 if (DEP_TYPE (dep
) != REG_DEP_TRUE
)
6450 next
= DEP_CON (dep
);
6451 c
= ia64_safe_itanium_class (next
);
6452 if ((c
== ITANIUM_CLASS_ST
6453 || c
== ITANIUM_CLASS_STF
)
6454 && ia64_st_address_bypass_p (insn
, next
))
6456 has_mem_op_consumer_p
= true;
6459 else if ((c
== ITANIUM_CLASS_LD
6460 || c
== ITANIUM_CLASS_FLD
6461 || c
== ITANIUM_CLASS_FLDP
)
6462 && ia64_ld_address_bypass_p (insn
, next
))
6464 has_mem_op_consumer_p
= true;
6469 insn
->call
= has_mem_op_consumer_p
;
6473 /* We're beginning a new block. Initialize data structures as necessary. */
6476 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED
,
6477 int sched_verbose ATTRIBUTE_UNUSED
,
6478 int max_ready ATTRIBUTE_UNUSED
)
6480 #ifdef ENABLE_CHECKING
6483 if (reload_completed
)
6484 for (insn
= NEXT_INSN (current_sched_info
->prev_head
);
6485 insn
!= current_sched_info
->next_tail
;
6486 insn
= NEXT_INSN (insn
))
6487 gcc_assert (!SCHED_GROUP_P (insn
));
6489 last_scheduled_insn
= NULL_RTX
;
6490 init_insn_group_barriers ();
6493 /* We're beginning a scheduling pass. Check assertion. */
6496 ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED
,
6497 int sched_verbose ATTRIBUTE_UNUSED
,
6498 int max_ready ATTRIBUTE_UNUSED
)
6500 gcc_assert (!pending_data_specs
);
6503 /* Scheduling pass is now finished. Free/reset static variable. */
6505 ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED
,
6506 int sched_verbose ATTRIBUTE_UNUSED
)
6508 free (spec_check_no
);
6513 /* We are about to being issuing insns for this clock cycle.
6514 Override the default sort algorithm to better slot instructions. */
6517 ia64_dfa_sched_reorder (FILE *dump
, int sched_verbose
, rtx
*ready
,
6518 int *pn_ready
, int clock_var ATTRIBUTE_UNUSED
,
6522 int n_ready
= *pn_ready
;
6523 rtx
*e_ready
= ready
+ n_ready
;
6527 fprintf (dump
, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type
);
6529 if (reorder_type
== 0)
6531 /* First, move all USEs, CLOBBERs and other crud out of the way. */
6533 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
6534 if (insnp
< e_ready
)
6537 enum attr_type t
= ia64_safe_type (insn
);
6538 if (t
== TYPE_UNKNOWN
)
6540 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
6541 || asm_noperands (PATTERN (insn
)) >= 0)
6543 rtx lowest
= ready
[n_asms
];
6544 ready
[n_asms
] = insn
;
6550 rtx highest
= ready
[n_ready
- 1];
6551 ready
[n_ready
- 1] = insn
;
6558 if (n_asms
< n_ready
)
6560 /* Some normal insns to process. Skip the asms. */
6564 else if (n_ready
> 0)
6568 if (ia64_final_schedule
)
6571 int nr_need_stop
= 0;
6573 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
6574 if (safe_group_barrier_needed (*insnp
))
6577 if (reorder_type
== 1 && n_ready
== nr_need_stop
)
6579 if (reorder_type
== 0)
6582 /* Move down everything that needs a stop bit, preserving
6584 while (insnp
-- > ready
+ deleted
)
6585 while (insnp
>= ready
+ deleted
)
6588 if (! safe_group_barrier_needed (insn
))
6590 memmove (ready
+ 1, ready
, (insnp
- ready
) * sizeof (rtx
));
6601 /* We are about to being issuing insns for this clock cycle. Override
6602 the default sort algorithm to better slot instructions. */
6605 ia64_sched_reorder (FILE *dump
, int sched_verbose
, rtx
*ready
, int *pn_ready
,
6608 return ia64_dfa_sched_reorder (dump
, sched_verbose
, ready
,
6609 pn_ready
, clock_var
, 0);
6612 /* Like ia64_sched_reorder, but called after issuing each insn.
6613 Override the default sort algorithm to better slot instructions. */
6616 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED
,
6617 int sched_verbose ATTRIBUTE_UNUSED
, rtx
*ready
,
6618 int *pn_ready
, int clock_var
)
6620 if (ia64_tune
== PROCESSOR_ITANIUM
&& reload_completed
&& last_scheduled_insn
)
6621 clocks
[INSN_UID (last_scheduled_insn
)] = clock_var
;
6622 return ia64_dfa_sched_reorder (dump
, sched_verbose
, ready
, pn_ready
,
6626 /* We are about to issue INSN. Return the number of insns left on the
6627 ready queue that can be issued this cycle. */
6630 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED
,
6631 int sched_verbose ATTRIBUTE_UNUSED
,
6632 rtx insn ATTRIBUTE_UNUSED
,
6633 int can_issue_more ATTRIBUTE_UNUSED
)
6635 if (current_sched_info
->flags
& DO_SPECULATION
)
6636 /* Modulo scheduling does not extend h_i_d when emitting
6637 new instructions. Deal with it. */
6639 if (DONE_SPEC (insn
) & BEGIN_DATA
)
6640 pending_data_specs
++;
6641 if (CHECK_SPEC (insn
) & BEGIN_DATA
)
6642 pending_data_specs
--;
6645 last_scheduled_insn
= insn
;
6646 memcpy (prev_cycle_state
, curr_state
, dfa_state_size
);
6647 if (reload_completed
)
6649 int needed
= group_barrier_needed (insn
);
6651 gcc_assert (!needed
);
6652 if (GET_CODE (insn
) == CALL_INSN
)
6653 init_insn_group_barriers ();
6654 stops_p
[INSN_UID (insn
)] = stop_before_p
;
6660 /* We are choosing insn from the ready queue. Return nonzero if INSN
6664 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn
)
6666 gcc_assert (insn
&& INSN_P (insn
));
6667 return ((!reload_completed
6668 || !safe_group_barrier_needed (insn
))
6669 && ia64_first_cycle_multipass_dfa_lookahead_guard_spec (insn
));
6672 /* We are choosing insn from the ready queue. Return nonzero if INSN
6676 ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx insn
)
6678 gcc_assert (insn
&& INSN_P (insn
));
6679 /* Size of ALAT is 32. As far as we perform conservative data speculation,
6680 we keep ALAT half-empty. */
6681 return (pending_data_specs
< 16
6682 || !(TODO_SPEC (insn
) & BEGIN_DATA
));
6685 /* The following variable value is pseudo-insn used by the DFA insn
6686 scheduler to change the DFA state when the simulated clock is
6689 static rtx dfa_pre_cycle_insn
;
6691 /* We are about to being issuing INSN. Return nonzero if we cannot
6692 issue it on given cycle CLOCK and return zero if we should not sort
6693 the ready queue on the next clock start. */
6696 ia64_dfa_new_cycle (FILE *dump
, int verbose
, rtx insn
, int last_clock
,
6697 int clock
, int *sort_p
)
6699 int setup_clocks_p
= FALSE
;
6701 gcc_assert (insn
&& INSN_P (insn
));
6702 if ((reload_completed
&& safe_group_barrier_needed (insn
))
6703 || (last_scheduled_insn
6704 && (GET_CODE (last_scheduled_insn
) == CALL_INSN
6705 || GET_CODE (PATTERN (last_scheduled_insn
)) == ASM_INPUT
6706 || asm_noperands (PATTERN (last_scheduled_insn
)) >= 0)))
6708 init_insn_group_barriers ();
6709 if (verbose
&& dump
)
6710 fprintf (dump
, "// Stop should be before %d%s\n", INSN_UID (insn
),
6711 last_clock
== clock
? " + cycle advance" : "");
6713 if (last_clock
== clock
)
6715 state_transition (curr_state
, dfa_stop_insn
);
6716 if (TARGET_EARLY_STOP_BITS
)
6717 *sort_p
= (last_scheduled_insn
== NULL_RTX
6718 || GET_CODE (last_scheduled_insn
) != CALL_INSN
);
6723 else if (reload_completed
)
6724 setup_clocks_p
= TRUE
;
6725 if (GET_CODE (PATTERN (last_scheduled_insn
)) == ASM_INPUT
6726 || asm_noperands (PATTERN (last_scheduled_insn
)) >= 0)
6727 state_reset (curr_state
);
6730 memcpy (curr_state
, prev_cycle_state
, dfa_state_size
);
6731 state_transition (curr_state
, dfa_stop_insn
);
6732 state_transition (curr_state
, dfa_pre_cycle_insn
);
6733 state_transition (curr_state
, NULL
);
6736 else if (reload_completed
)
6737 setup_clocks_p
= TRUE
;
6738 if (setup_clocks_p
&& ia64_tune
== PROCESSOR_ITANIUM
6739 && GET_CODE (PATTERN (insn
)) != ASM_INPUT
6740 && asm_noperands (PATTERN (insn
)) < 0)
6742 enum attr_itanium_class c
= ia64_safe_itanium_class (insn
);
6744 if (c
!= ITANIUM_CLASS_MMMUL
&& c
!= ITANIUM_CLASS_MMSHF
)
6746 sd_iterator_def sd_it
;
6750 FOR_EACH_DEP (insn
, SD_LIST_BACK
, sd_it
, dep
)
6751 if (DEP_TYPE (dep
) == REG_DEP_TRUE
)
6753 enum attr_itanium_class dep_class
;
6754 rtx dep_insn
= DEP_PRO (dep
);
6756 dep_class
= ia64_safe_itanium_class (dep_insn
);
6757 if ((dep_class
== ITANIUM_CLASS_MMMUL
6758 || dep_class
== ITANIUM_CLASS_MMSHF
)
6759 && last_clock
- clocks
[INSN_UID (dep_insn
)] < 4
6761 || last_clock
- clocks
[INSN_UID (dep_insn
)] < d
))
6762 d
= last_clock
- clocks
[INSN_UID (dep_insn
)];
6765 add_cycles
[INSN_UID (insn
)] = 3 - d
;
6771 /* Implement targetm.sched.h_i_d_extended hook.
6772 Extend internal data structures. */
6774 ia64_h_i_d_extended (void)
6776 if (current_sched_info
->flags
& DO_SPECULATION
)
6778 int new_max_uid
= get_max_uid () + 1;
6780 spec_check_no
= (int *) xrecalloc (spec_check_no
, new_max_uid
,
6781 max_uid
, sizeof (*spec_check_no
));
6782 max_uid
= new_max_uid
;
6785 if (stops_p
!= NULL
)
6787 int new_clocks_length
= get_max_uid () + 1;
6789 stops_p
= (char *) xrecalloc (stops_p
, new_clocks_length
, clocks_length
, 1);
6791 if (ia64_tune
== PROCESSOR_ITANIUM
)
6793 clocks
= (int *) xrecalloc (clocks
, new_clocks_length
, clocks_length
,
6795 add_cycles
= (int *) xrecalloc (add_cycles
, new_clocks_length
,
6796 clocks_length
, sizeof (int));
6799 clocks_length
= new_clocks_length
;
6803 /* Constants that help mapping 'enum machine_mode' to int. */
6806 SPEC_MODE_INVALID
= -1,
6807 SPEC_MODE_FIRST
= 0,
6808 SPEC_MODE_FOR_EXTEND_FIRST
= 1,
6809 SPEC_MODE_FOR_EXTEND_LAST
= 3,
6813 /* Return index of the MODE. */
6815 ia64_mode_to_int (enum machine_mode mode
)
6819 case BImode
: return 0; /* SPEC_MODE_FIRST */
6820 case QImode
: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST */
6821 case HImode
: return 2;
6822 case SImode
: return 3; /* SPEC_MODE_FOR_EXTEND_LAST */
6823 case DImode
: return 4;
6824 case SFmode
: return 5;
6825 case DFmode
: return 6;
6826 case XFmode
: return 7;
6828 /* ??? This mode needs testing. Bypasses for ldfp8 instruction are not
6829 mentioned in itanium[12].md. Predicate fp_register_operand also
6830 needs to be defined. Bottom line: better disable for now. */
6831 return SPEC_MODE_INVALID
;
6832 default: return SPEC_MODE_INVALID
;
6836 /* Provide information about speculation capabilities. */
6838 ia64_set_sched_flags (spec_info_t spec_info
)
6840 unsigned int *flags
= &(current_sched_info
->flags
);
6842 if (*flags
& SCHED_RGN
6843 || *flags
& SCHED_EBB
)
6847 if ((mflag_sched_br_data_spec
&& !reload_completed
&& optimize
> 0)
6848 || (mflag_sched_ar_data_spec
&& reload_completed
))
6852 if ((mflag_sched_br_in_data_spec
&& !reload_completed
)
6853 || (mflag_sched_ar_in_data_spec
&& reload_completed
))
6857 if (mflag_sched_control_spec
)
6859 mask
|= BEGIN_CONTROL
;
6861 if (mflag_sched_in_control_spec
)
6862 mask
|= BE_IN_CONTROL
;
6867 *flags
|= USE_DEPS_LIST
| DO_SPECULATION
;
6869 if (mask
& BE_IN_SPEC
)
6872 spec_info
->mask
= mask
;
6873 spec_info
->flags
= 0;
6875 if ((mask
& DATA_SPEC
) && mflag_sched_prefer_non_data_spec_insns
)
6876 spec_info
->flags
|= PREFER_NON_DATA_SPEC
;
6878 if ((mask
& CONTROL_SPEC
)
6879 && mflag_sched_prefer_non_control_spec_insns
)
6880 spec_info
->flags
|= PREFER_NON_CONTROL_SPEC
;
6882 if (mflag_sched_spec_verbose
)
6884 if (sched_verbose
>= 1)
6885 spec_info
->dump
= sched_dump
;
6887 spec_info
->dump
= stderr
;
6890 spec_info
->dump
= 0;
6892 if (mflag_sched_count_spec_in_critical_path
)
6893 spec_info
->flags
|= COUNT_SPEC_IN_CRITICAL_PATH
;
6898 /* Implement targetm.sched.speculate_insn hook.
6899 Check if the INSN can be TS speculative.
6900 If 'no' - return -1.
6901 If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
6902 If current pattern of the INSN already provides TS speculation, return 0. */
6904 ia64_speculate_insn (rtx insn
, ds_t ts
, rtx
*new_pat
)
6906 rtx pat
, reg
, mem
, mem_reg
;
6907 int mode_no
, gen_p
= 1;
6910 gcc_assert (!(ts
& ~BEGIN_SPEC
) && ts
);
6912 pat
= PATTERN (insn
);
6914 if (GET_CODE (pat
) == COND_EXEC
)
6915 pat
= COND_EXEC_CODE (pat
);
6917 /* This should be a SET ... */
6918 if (GET_CODE (pat
) != SET
)
6921 reg
= SET_DEST (pat
);
6922 /* ... to the general/fp register ... */
6923 if (!REG_P (reg
) || !(GR_REGNO_P (REGNO (reg
)) || FP_REGNO_P (REGNO (reg
))))
6926 /* ... from the mem ... */
6927 mem
= SET_SRC (pat
);
6929 /* ... that can, possibly, be a zero_extend ... */
6930 if (GET_CODE (mem
) == ZERO_EXTEND
)
6932 mem
= XEXP (mem
, 0);
6938 /* ... or a speculative load. */
6939 if (GET_CODE (mem
) == UNSPEC
)
6943 code
= XINT (mem
, 1);
6944 if (code
!= UNSPEC_LDA
&& code
!= UNSPEC_LDS
&& code
!= UNSPEC_LDSA
)
6947 if ((code
== UNSPEC_LDA
&& !(ts
& BEGIN_CONTROL
))
6948 || (code
== UNSPEC_LDS
&& !(ts
& BEGIN_DATA
))
6949 || code
== UNSPEC_LDSA
)
6952 mem
= XVECEXP (mem
, 0, 0);
6953 gcc_assert (MEM_P (mem
));
6956 /* Source should be a mem ... */
6960 /* ... addressed by a register. */
6961 mem_reg
= XEXP (mem
, 0);
6962 if (!REG_P (mem_reg
))
6965 /* We should use MEM's mode since REG's mode in presence of ZERO_EXTEND
6966 will always be DImode. */
6967 mode_no
= ia64_mode_to_int (GET_MODE (mem
));
6969 if (mode_no
== SPEC_MODE_INVALID
6971 && !(SPEC_MODE_FOR_EXTEND_FIRST
<= mode_no
6972 && mode_no
<= SPEC_MODE_FOR_EXTEND_LAST
)))
6975 extract_insn_cached (insn
);
6976 gcc_assert (reg
== recog_data
.operand
[0] && mem
== recog_data
.operand
[1]);
6978 *new_pat
= ia64_gen_spec_insn (insn
, ts
, mode_no
, gen_p
!= 0, extend_p
);
6985 /* Offset to reach ZERO_EXTEND patterns. */
6986 SPEC_GEN_EXTEND_OFFSET
= SPEC_MODE_LAST
- SPEC_MODE_FOR_EXTEND_FIRST
+ 1,
6987 /* Number of patterns for each speculation mode. */
6988 SPEC_N
= (SPEC_MODE_LAST
6989 + SPEC_MODE_FOR_EXTEND_LAST
- SPEC_MODE_FOR_EXTEND_FIRST
+ 2)
6992 enum SPEC_GEN_LD_MAP
6994 /* Offset to ld.a patterns. */
6995 SPEC_GEN_A
= 0 * SPEC_N
,
6996 /* Offset to ld.s patterns. */
6997 SPEC_GEN_S
= 1 * SPEC_N
,
6998 /* Offset to ld.sa patterns. */
6999 SPEC_GEN_SA
= 2 * SPEC_N
,
7000 /* Offset to ld.sa patterns. For this patterns corresponding ld.c will
7002 SPEC_GEN_SA_FOR_S
= 3 * SPEC_N
7005 /* These offsets are used to get (4 * SPEC_N). */
7006 enum SPEC_GEN_CHECK_OFFSET
7008 SPEC_GEN_CHKA_FOR_A_OFFSET
= 4 * SPEC_N
- SPEC_GEN_A
,
7009 SPEC_GEN_CHKA_FOR_SA_OFFSET
= 4 * SPEC_N
- SPEC_GEN_SA
7012 /* If GEN_P is true, calculate the index of needed speculation check and return
7013 speculative pattern for INSN with speculative mode TS, machine mode
7014 MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
7015 If GEN_P is false, just calculate the index of needed speculation check. */
7017 ia64_gen_spec_insn (rtx insn
, ds_t ts
, int mode_no
, bool gen_p
, bool extend_p
)
7023 static rtx (* const gen_load
[]) (rtx
, rtx
) = {
7033 gen_zero_extendqidi2_advanced
,
7034 gen_zero_extendhidi2_advanced
,
7035 gen_zero_extendsidi2_advanced
,
7037 gen_movbi_speculative
,
7038 gen_movqi_speculative
,
7039 gen_movhi_speculative
,
7040 gen_movsi_speculative
,
7041 gen_movdi_speculative
,
7042 gen_movsf_speculative
,
7043 gen_movdf_speculative
,
7044 gen_movxf_speculative
,
7045 gen_movti_speculative
,
7046 gen_zero_extendqidi2_speculative
,
7047 gen_zero_extendhidi2_speculative
,
7048 gen_zero_extendsidi2_speculative
,
7050 gen_movbi_speculative_advanced
,
7051 gen_movqi_speculative_advanced
,
7052 gen_movhi_speculative_advanced
,
7053 gen_movsi_speculative_advanced
,
7054 gen_movdi_speculative_advanced
,
7055 gen_movsf_speculative_advanced
,
7056 gen_movdf_speculative_advanced
,
7057 gen_movxf_speculative_advanced
,
7058 gen_movti_speculative_advanced
,
7059 gen_zero_extendqidi2_speculative_advanced
,
7060 gen_zero_extendhidi2_speculative_advanced
,
7061 gen_zero_extendsidi2_speculative_advanced
,
7063 gen_movbi_speculative_advanced
,
7064 gen_movqi_speculative_advanced
,
7065 gen_movhi_speculative_advanced
,
7066 gen_movsi_speculative_advanced
,
7067 gen_movdi_speculative_advanced
,
7068 gen_movsf_speculative_advanced
,
7069 gen_movdf_speculative_advanced
,
7070 gen_movxf_speculative_advanced
,
7071 gen_movti_speculative_advanced
,
7072 gen_zero_extendqidi2_speculative_advanced
,
7073 gen_zero_extendhidi2_speculative_advanced
,
7074 gen_zero_extendsidi2_speculative_advanced
7077 load_no
= extend_p
? mode_no
+ SPEC_GEN_EXTEND_OFFSET
: mode_no
;
7079 if (ts
& BEGIN_DATA
)
7081 /* We don't need recovery because even if this is ld.sa
7082 ALAT entry will be allocated only if NAT bit is set to zero.
7083 So it is enough to use ld.c here. */
7085 if (ts
& BEGIN_CONTROL
)
7087 load_no
+= SPEC_GEN_SA
;
7089 if (!mflag_sched_ldc
)
7090 shift
= SPEC_GEN_CHKA_FOR_SA_OFFSET
;
7094 load_no
+= SPEC_GEN_A
;
7096 if (!mflag_sched_ldc
)
7097 shift
= SPEC_GEN_CHKA_FOR_A_OFFSET
;
7100 else if (ts
& BEGIN_CONTROL
)
7102 /* ld.sa can be used instead of ld.s to avoid basic block splitting. */
7103 if (!mflag_control_ldc
)
7104 load_no
+= SPEC_GEN_S
;
7107 gcc_assert (mflag_sched_ldc
);
7108 load_no
+= SPEC_GEN_SA_FOR_S
;
7114 /* Set the desired check index. We add '1', because zero element in this
7115 array means, that instruction with such uid is non-speculative. */
7116 spec_check_no
[INSN_UID (insn
)] = load_no
+ shift
+ 1;
7121 new_pat
= gen_load
[load_no
] (copy_rtx (recog_data
.operand
[0]),
7122 copy_rtx (recog_data
.operand
[1]));
7124 pat
= PATTERN (insn
);
7125 if (GET_CODE (pat
) == COND_EXEC
)
7126 new_pat
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx
7127 (COND_EXEC_TEST (pat
)), new_pat
);
7132 /* Offset to branchy checks. */
7133 enum { SPEC_GEN_CHECK_MUTATION_OFFSET
= 5 * SPEC_N
};
7135 /* Return nonzero, if INSN needs branchy recovery check. */
7137 ia64_needs_block_p (const_rtx insn
)
7141 check_no
= spec_check_no
[INSN_UID(insn
)] - 1;
7142 gcc_assert (0 <= check_no
&& check_no
< SPEC_GEN_CHECK_MUTATION_OFFSET
);
7144 return ((SPEC_GEN_S
<= check_no
&& check_no
< SPEC_GEN_S
+ SPEC_N
)
7145 || (4 * SPEC_N
<= check_no
&& check_no
< 4 * SPEC_N
+ SPEC_N
));
7148 /* Generate (or regenerate, if (MUTATE_P)) recovery check for INSN.
7149 If (LABEL != 0 || MUTATE_P), generate branchy recovery check.
7150 Otherwise, generate a simple check. */
7152 ia64_gen_check (rtx insn
, rtx label
, bool mutate_p
)
7154 rtx op1
, pat
, check_pat
;
7156 static rtx (* const gen_check
[]) (rtx
, rtx
) = {
7166 gen_zero_extendqidi2_clr
,
7167 gen_zero_extendhidi2_clr
,
7168 gen_zero_extendsidi2_clr
,
7170 gen_speculation_check_bi
,
7171 gen_speculation_check_qi
,
7172 gen_speculation_check_hi
,
7173 gen_speculation_check_si
,
7174 gen_speculation_check_di
,
7175 gen_speculation_check_sf
,
7176 gen_speculation_check_df
,
7177 gen_speculation_check_xf
,
7178 gen_speculation_check_ti
,
7179 gen_speculation_check_di
,
7180 gen_speculation_check_di
,
7181 gen_speculation_check_di
,
7192 gen_zero_extendqidi2_clr
,
7193 gen_zero_extendhidi2_clr
,
7194 gen_zero_extendsidi2_clr
,
7205 gen_zero_extendqidi2_clr
,
7206 gen_zero_extendhidi2_clr
,
7207 gen_zero_extendsidi2_clr
,
7209 gen_advanced_load_check_clr_bi
,
7210 gen_advanced_load_check_clr_qi
,
7211 gen_advanced_load_check_clr_hi
,
7212 gen_advanced_load_check_clr_si
,
7213 gen_advanced_load_check_clr_di
,
7214 gen_advanced_load_check_clr_sf
,
7215 gen_advanced_load_check_clr_df
,
7216 gen_advanced_load_check_clr_xf
,
7217 gen_advanced_load_check_clr_ti
,
7218 gen_advanced_load_check_clr_di
,
7219 gen_advanced_load_check_clr_di
,
7220 gen_advanced_load_check_clr_di
,
7222 /* Following checks are generated during mutation. */
7223 gen_advanced_load_check_clr_bi
,
7224 gen_advanced_load_check_clr_qi
,
7225 gen_advanced_load_check_clr_hi
,
7226 gen_advanced_load_check_clr_si
,
7227 gen_advanced_load_check_clr_di
,
7228 gen_advanced_load_check_clr_sf
,
7229 gen_advanced_load_check_clr_df
,
7230 gen_advanced_load_check_clr_xf
,
7231 gen_advanced_load_check_clr_ti
,
7232 gen_advanced_load_check_clr_di
,
7233 gen_advanced_load_check_clr_di
,
7234 gen_advanced_load_check_clr_di
,
7236 0,0,0,0,0,0,0,0,0,0,0,0,
7238 gen_advanced_load_check_clr_bi
,
7239 gen_advanced_load_check_clr_qi
,
7240 gen_advanced_load_check_clr_hi
,
7241 gen_advanced_load_check_clr_si
,
7242 gen_advanced_load_check_clr_di
,
7243 gen_advanced_load_check_clr_sf
,
7244 gen_advanced_load_check_clr_df
,
7245 gen_advanced_load_check_clr_xf
,
7246 gen_advanced_load_check_clr_ti
,
7247 gen_advanced_load_check_clr_di
,
7248 gen_advanced_load_check_clr_di
,
7249 gen_advanced_load_check_clr_di
,
7251 gen_speculation_check_bi
,
7252 gen_speculation_check_qi
,
7253 gen_speculation_check_hi
,
7254 gen_speculation_check_si
,
7255 gen_speculation_check_di
,
7256 gen_speculation_check_sf
,
7257 gen_speculation_check_df
,
7258 gen_speculation_check_xf
,
7259 gen_speculation_check_ti
,
7260 gen_speculation_check_di
,
7261 gen_speculation_check_di
,
7262 gen_speculation_check_di
7265 extract_insn_cached (insn
);
7269 gcc_assert (mutate_p
|| ia64_needs_block_p (insn
));
7274 gcc_assert (!mutate_p
&& !ia64_needs_block_p (insn
));
7275 op1
= copy_rtx (recog_data
.operand
[1]);
7280 Find the speculation check number by searching for original
7281 speculative load in the RESOLVED_DEPS list of INSN.
7282 As long as patterns are unique for each instruction, this can be
7283 accomplished by matching ORIG_PAT fields. */
7285 sd_iterator_def sd_it
;
7288 rtx orig_pat
= ORIG_PAT (insn
);
7290 FOR_EACH_DEP (insn
, SD_LIST_RES_BACK
, sd_it
, dep
)
7292 rtx x
= DEP_PRO (dep
);
7294 if (ORIG_PAT (x
) == orig_pat
)
7295 check_no
= spec_check_no
[INSN_UID (x
)];
7297 gcc_assert (check_no
);
7299 spec_check_no
[INSN_UID (insn
)] = (check_no
7300 + SPEC_GEN_CHECK_MUTATION_OFFSET
);
7303 check_pat
= (gen_check
[spec_check_no
[INSN_UID (insn
)] - 1]
7304 (copy_rtx (recog_data
.operand
[0]), op1
));
7306 pat
= PATTERN (insn
);
7307 if (GET_CODE (pat
) == COND_EXEC
)
7308 check_pat
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (COND_EXEC_TEST (pat
)),
7314 /* Return nonzero, if X is branchy recovery check. */
7316 ia64_spec_check_p (rtx x
)
7319 if (GET_CODE (x
) == COND_EXEC
)
7320 x
= COND_EXEC_CODE (x
);
7321 if (GET_CODE (x
) == SET
)
7322 return ia64_spec_check_src_p (SET_SRC (x
));
7326 /* Return nonzero, if SRC belongs to recovery check. */
7328 ia64_spec_check_src_p (rtx src
)
7330 if (GET_CODE (src
) == IF_THEN_ELSE
)
7335 if (GET_CODE (t
) == NE
)
7339 if (GET_CODE (t
) == UNSPEC
)
7345 if (code
== UNSPEC_CHKACLR
7346 || code
== UNSPEC_CHKS
7347 || code
== UNSPEC_LDCCLR
)
7349 gcc_assert (code
!= 0);
7359 /* The following page contains abstract data `bundle states' which are
7360 used for bundling insns (inserting nops and template generation). */
7362 /* The following describes state of insn bundling. */
7366 /* Unique bundle state number to identify them in the debugging
7369 rtx insn
; /* corresponding insn, NULL for the 1st and the last state */
7370 /* number nops before and after the insn */
7371 short before_nops_num
, after_nops_num
;
7372 int insn_num
; /* insn number (0 - for initial state, 1 - for the 1st
7374 int cost
; /* cost of the state in cycles */
7375 int accumulated_insns_num
; /* number of all previous insns including
7376 nops. L is considered as 2 insns */
7377 int branch_deviation
; /* deviation of previous branches from 3rd slots */
7378 struct bundle_state
*next
; /* next state with the same insn_num */
7379 struct bundle_state
*originator
; /* originator (previous insn state) */
7380 /* All bundle states are in the following chain. */
7381 struct bundle_state
*allocated_states_chain
;
7382 /* The DFA State after issuing the insn and the nops. */
7386 /* The following is map insn number to the corresponding bundle state. */
7388 static struct bundle_state
**index_to_bundle_states
;
7390 /* The unique number of next bundle state. */
7392 static int bundle_states_num
;
7394 /* All allocated bundle states are in the following chain. */
7396 static struct bundle_state
*allocated_bundle_states_chain
;
7398 /* All allocated but not used bundle states are in the following
7401 static struct bundle_state
*free_bundle_state_chain
;
7404 /* The following function returns a free bundle state. */
7406 static struct bundle_state
*
7407 get_free_bundle_state (void)
7409 struct bundle_state
*result
;
7411 if (free_bundle_state_chain
!= NULL
)
7413 result
= free_bundle_state_chain
;
7414 free_bundle_state_chain
= result
->next
;
7418 result
= XNEW (struct bundle_state
);
7419 result
->dfa_state
= xmalloc (dfa_state_size
);
7420 result
->allocated_states_chain
= allocated_bundle_states_chain
;
7421 allocated_bundle_states_chain
= result
;
7423 result
->unique_num
= bundle_states_num
++;
7428 /* The following function frees given bundle state. */
7431 free_bundle_state (struct bundle_state
*state
)
7433 state
->next
= free_bundle_state_chain
;
7434 free_bundle_state_chain
= state
;
7437 /* Start work with abstract data `bundle states'. */
7440 initiate_bundle_states (void)
7442 bundle_states_num
= 0;
7443 free_bundle_state_chain
= NULL
;
7444 allocated_bundle_states_chain
= NULL
;
7447 /* Finish work with abstract data `bundle states'. */
7450 finish_bundle_states (void)
7452 struct bundle_state
*curr_state
, *next_state
;
7454 for (curr_state
= allocated_bundle_states_chain
;
7456 curr_state
= next_state
)
7458 next_state
= curr_state
->allocated_states_chain
;
7459 free (curr_state
->dfa_state
);
7464 /* Hash table of the bundle states. The key is dfa_state and insn_num
7465 of the bundle states. */
7467 static htab_t bundle_state_table
;
7469 /* The function returns hash of BUNDLE_STATE. */
7472 bundle_state_hash (const void *bundle_state
)
7474 const struct bundle_state
*const state
7475 = (const struct bundle_state
*) bundle_state
;
7478 for (result
= i
= 0; i
< dfa_state_size
; i
++)
7479 result
+= (((unsigned char *) state
->dfa_state
) [i
]
7480 << ((i
% CHAR_BIT
) * 3 + CHAR_BIT
));
7481 return result
+ state
->insn_num
;
7484 /* The function returns nonzero if the bundle state keys are equal. */
7487 bundle_state_eq_p (const void *bundle_state_1
, const void *bundle_state_2
)
7489 const struct bundle_state
*const state1
7490 = (const struct bundle_state
*) bundle_state_1
;
7491 const struct bundle_state
*const state2
7492 = (const struct bundle_state
*) bundle_state_2
;
7494 return (state1
->insn_num
== state2
->insn_num
7495 && memcmp (state1
->dfa_state
, state2
->dfa_state
,
7496 dfa_state_size
) == 0);
7499 /* The function inserts the BUNDLE_STATE into the hash table. The
7500 function returns nonzero if the bundle has been inserted into the
7501 table. The table contains the best bundle state with given key. */
7504 insert_bundle_state (struct bundle_state
*bundle_state
)
7508 entry_ptr
= htab_find_slot (bundle_state_table
, bundle_state
, 1);
7509 if (*entry_ptr
== NULL
)
7511 bundle_state
->next
= index_to_bundle_states
[bundle_state
->insn_num
];
7512 index_to_bundle_states
[bundle_state
->insn_num
] = bundle_state
;
7513 *entry_ptr
= (void *) bundle_state
;
7516 else if (bundle_state
->cost
< ((struct bundle_state
*) *entry_ptr
)->cost
7517 || (bundle_state
->cost
== ((struct bundle_state
*) *entry_ptr
)->cost
7518 && (((struct bundle_state
*)*entry_ptr
)->accumulated_insns_num
7519 > bundle_state
->accumulated_insns_num
7520 || (((struct bundle_state
*)
7521 *entry_ptr
)->accumulated_insns_num
7522 == bundle_state
->accumulated_insns_num
7523 && ((struct bundle_state
*)
7524 *entry_ptr
)->branch_deviation
7525 > bundle_state
->branch_deviation
))))
7528 struct bundle_state temp
;
7530 temp
= *(struct bundle_state
*) *entry_ptr
;
7531 *(struct bundle_state
*) *entry_ptr
= *bundle_state
;
7532 ((struct bundle_state
*) *entry_ptr
)->next
= temp
.next
;
7533 *bundle_state
= temp
;
7538 /* Start work with the hash table. */
7541 initiate_bundle_state_table (void)
7543 bundle_state_table
= htab_create (50, bundle_state_hash
, bundle_state_eq_p
,
7547 /* Finish work with the hash table. */
7550 finish_bundle_state_table (void)
7552 htab_delete (bundle_state_table
);
7557 /* The following variable is a insn `nop' used to check bundle states
7558 with different number of inserted nops. */
7560 static rtx ia64_nop
;
7562 /* The following function tries to issue NOPS_NUM nops for the current
7563 state without advancing processor cycle. If it failed, the
7564 function returns FALSE and frees the current state. */
7567 try_issue_nops (struct bundle_state
*curr_state
, int nops_num
)
7571 for (i
= 0; i
< nops_num
; i
++)
7572 if (state_transition (curr_state
->dfa_state
, ia64_nop
) >= 0)
7574 free_bundle_state (curr_state
);
7580 /* The following function tries to issue INSN for the current
7581 state without advancing processor cycle. If it failed, the
7582 function returns FALSE and frees the current state. */
7585 try_issue_insn (struct bundle_state
*curr_state
, rtx insn
)
7587 if (insn
&& state_transition (curr_state
->dfa_state
, insn
) >= 0)
7589 free_bundle_state (curr_state
);
7595 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
7596 starting with ORIGINATOR without advancing processor cycle. If
7597 TRY_BUNDLE_END_P is TRUE, the function also/only (if
7598 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
7599 If it was successful, the function creates new bundle state and
7600 insert into the hash table and into `index_to_bundle_states'. */
7603 issue_nops_and_insn (struct bundle_state
*originator
, int before_nops_num
,
7604 rtx insn
, int try_bundle_end_p
, int only_bundle_end_p
)
7606 struct bundle_state
*curr_state
;
7608 curr_state
= get_free_bundle_state ();
7609 memcpy (curr_state
->dfa_state
, originator
->dfa_state
, dfa_state_size
);
7610 curr_state
->insn
= insn
;
7611 curr_state
->insn_num
= originator
->insn_num
+ 1;
7612 curr_state
->cost
= originator
->cost
;
7613 curr_state
->originator
= originator
;
7614 curr_state
->before_nops_num
= before_nops_num
;
7615 curr_state
->after_nops_num
= 0;
7616 curr_state
->accumulated_insns_num
7617 = originator
->accumulated_insns_num
+ before_nops_num
;
7618 curr_state
->branch_deviation
= originator
->branch_deviation
;
7620 if (INSN_CODE (insn
) == CODE_FOR_insn_group_barrier
)
7622 gcc_assert (GET_MODE (insn
) != TImode
);
7623 if (!try_issue_nops (curr_state
, before_nops_num
))
7625 if (!try_issue_insn (curr_state
, insn
))
7627 memcpy (temp_dfa_state
, curr_state
->dfa_state
, dfa_state_size
);
7628 if (state_transition (temp_dfa_state
, dfa_pre_cycle_insn
) >= 0
7629 && curr_state
->accumulated_insns_num
% 3 != 0)
7631 free_bundle_state (curr_state
);
7635 else if (GET_MODE (insn
) != TImode
)
7637 if (!try_issue_nops (curr_state
, before_nops_num
))
7639 if (!try_issue_insn (curr_state
, insn
))
7641 curr_state
->accumulated_insns_num
++;
7642 gcc_assert (GET_CODE (PATTERN (insn
)) != ASM_INPUT
7643 && asm_noperands (PATTERN (insn
)) < 0);
7645 if (ia64_safe_type (insn
) == TYPE_L
)
7646 curr_state
->accumulated_insns_num
++;
7650 /* If this is an insn that must be first in a group, then don't allow
7651 nops to be emitted before it. Currently, alloc is the only such
7652 supported instruction. */
7653 /* ??? The bundling automatons should handle this for us, but they do
7654 not yet have support for the first_insn attribute. */
7655 if (before_nops_num
> 0 && get_attr_first_insn (insn
) == FIRST_INSN_YES
)
7657 free_bundle_state (curr_state
);
7661 state_transition (curr_state
->dfa_state
, dfa_pre_cycle_insn
);
7662 state_transition (curr_state
->dfa_state
, NULL
);
7664 if (!try_issue_nops (curr_state
, before_nops_num
))
7666 if (!try_issue_insn (curr_state
, insn
))
7668 curr_state
->accumulated_insns_num
++;
7669 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
7670 || asm_noperands (PATTERN (insn
)) >= 0)
7672 /* Finish bundle containing asm insn. */
7673 curr_state
->after_nops_num
7674 = 3 - curr_state
->accumulated_insns_num
% 3;
7675 curr_state
->accumulated_insns_num
7676 += 3 - curr_state
->accumulated_insns_num
% 3;
7678 else if (ia64_safe_type (insn
) == TYPE_L
)
7679 curr_state
->accumulated_insns_num
++;
7681 if (ia64_safe_type (insn
) == TYPE_B
)
7682 curr_state
->branch_deviation
7683 += 2 - (curr_state
->accumulated_insns_num
- 1) % 3;
7684 if (try_bundle_end_p
&& curr_state
->accumulated_insns_num
% 3 != 0)
7686 if (!only_bundle_end_p
&& insert_bundle_state (curr_state
))
7689 struct bundle_state
*curr_state1
;
7690 struct bundle_state
*allocated_states_chain
;
7692 curr_state1
= get_free_bundle_state ();
7693 dfa_state
= curr_state1
->dfa_state
;
7694 allocated_states_chain
= curr_state1
->allocated_states_chain
;
7695 *curr_state1
= *curr_state
;
7696 curr_state1
->dfa_state
= dfa_state
;
7697 curr_state1
->allocated_states_chain
= allocated_states_chain
;
7698 memcpy (curr_state1
->dfa_state
, curr_state
->dfa_state
,
7700 curr_state
= curr_state1
;
7702 if (!try_issue_nops (curr_state
,
7703 3 - curr_state
->accumulated_insns_num
% 3))
7705 curr_state
->after_nops_num
7706 = 3 - curr_state
->accumulated_insns_num
% 3;
7707 curr_state
->accumulated_insns_num
7708 += 3 - curr_state
->accumulated_insns_num
% 3;
7710 if (!insert_bundle_state (curr_state
))
7711 free_bundle_state (curr_state
);
7715 /* The following function returns position in the two window bundle
7719 get_max_pos (state_t state
)
7721 if (cpu_unit_reservation_p (state
, pos_6
))
7723 else if (cpu_unit_reservation_p (state
, pos_5
))
7725 else if (cpu_unit_reservation_p (state
, pos_4
))
7727 else if (cpu_unit_reservation_p (state
, pos_3
))
7729 else if (cpu_unit_reservation_p (state
, pos_2
))
7731 else if (cpu_unit_reservation_p (state
, pos_1
))
7737 /* The function returns code of a possible template for given position
7738 and state. The function should be called only with 2 values of
7739 position equal to 3 or 6. We avoid generating F NOPs by putting
7740 templates containing F insns at the end of the template search
7741 because undocumented anomaly in McKinley derived cores which can
7742 cause stalls if an F-unit insn (including a NOP) is issued within a
7743 six-cycle window after reading certain application registers (such
7744 as ar.bsp). Furthermore, power-considerations also argue against
7745 the use of F-unit instructions unless they're really needed. */
7748 get_template (state_t state
, int pos
)
7753 if (cpu_unit_reservation_p (state
, _0mmi_
))
7755 else if (cpu_unit_reservation_p (state
, _0mii_
))
7757 else if (cpu_unit_reservation_p (state
, _0mmb_
))
7759 else if (cpu_unit_reservation_p (state
, _0mib_
))
7761 else if (cpu_unit_reservation_p (state
, _0mbb_
))
7763 else if (cpu_unit_reservation_p (state
, _0bbb_
))
7765 else if (cpu_unit_reservation_p (state
, _0mmf_
))
7767 else if (cpu_unit_reservation_p (state
, _0mfi_
))
7769 else if (cpu_unit_reservation_p (state
, _0mfb_
))
7771 else if (cpu_unit_reservation_p (state
, _0mlx_
))
7776 if (cpu_unit_reservation_p (state
, _1mmi_
))
7778 else if (cpu_unit_reservation_p (state
, _1mii_
))
7780 else if (cpu_unit_reservation_p (state
, _1mmb_
))
7782 else if (cpu_unit_reservation_p (state
, _1mib_
))
7784 else if (cpu_unit_reservation_p (state
, _1mbb_
))
7786 else if (cpu_unit_reservation_p (state
, _1bbb_
))
7788 else if (_1mmf_
>= 0 && cpu_unit_reservation_p (state
, _1mmf_
))
7790 else if (cpu_unit_reservation_p (state
, _1mfi_
))
7792 else if (cpu_unit_reservation_p (state
, _1mfb_
))
7794 else if (cpu_unit_reservation_p (state
, _1mlx_
))
7803 /* The following function returns an insn important for insn bundling
7804 followed by INSN and before TAIL. */
7807 get_next_important_insn (rtx insn
, rtx tail
)
7809 for (; insn
&& insn
!= tail
; insn
= NEXT_INSN (insn
))
7811 && ia64_safe_itanium_class (insn
) != ITANIUM_CLASS_IGNORE
7812 && GET_CODE (PATTERN (insn
)) != USE
7813 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
7818 /* Add a bundle selector TEMPLATE0 before INSN. */
7821 ia64_add_bundle_selector_before (int template0
, rtx insn
)
7823 rtx b
= gen_bundle_selector (GEN_INT (template0
));
7825 ia64_emit_insn_before (b
, insn
);
7826 #if NR_BUNDLES == 10
7827 if ((template0
== 4 || template0
== 5)
7828 && (flag_unwind_tables
|| (flag_exceptions
&& !USING_SJLJ_EXCEPTIONS
)))
7831 rtx note
= NULL_RTX
;
7833 /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
7834 first or second slot. If it is and has REG_EH_NOTE set, copy it
7835 to following nops, as br.call sets rp to the address of following
7836 bundle and therefore an EH region end must be on a bundle
7838 insn
= PREV_INSN (insn
);
7839 for (i
= 0; i
< 3; i
++)
7842 insn
= next_active_insn (insn
);
7843 while (GET_CODE (insn
) == INSN
7844 && get_attr_empty (insn
) == EMPTY_YES
);
7845 if (GET_CODE (insn
) == CALL_INSN
)
7846 note
= find_reg_note (insn
, REG_EH_REGION
, NULL_RTX
);
7851 gcc_assert ((code
= recog_memoized (insn
)) == CODE_FOR_nop
7852 || code
== CODE_FOR_nop_b
);
7853 if (find_reg_note (insn
, REG_EH_REGION
, NULL_RTX
))
7857 = gen_rtx_EXPR_LIST (REG_EH_REGION
, XEXP (note
, 0),
7865 /* The following function does insn bundling. Bundling means
7866 inserting templates and nop insns to fit insn groups into permitted
7867 templates. Instruction scheduling uses NDFA (non-deterministic
7868 finite automata) encoding informations about the templates and the
7869 inserted nops. Nondeterminism of the automata permits follows
7870 all possible insn sequences very fast.
7872 Unfortunately it is not possible to get information about inserting
7873 nop insns and used templates from the automata states. The
7874 automata only says that we can issue an insn possibly inserting
7875 some nops before it and using some template. Therefore insn
7876 bundling in this function is implemented by using DFA
7877 (deterministic finite automata). We follow all possible insn
7878 sequences by inserting 0-2 nops (that is what the NDFA describe for
7879 insn scheduling) before/after each insn being bundled. We know the
7880 start of simulated processor cycle from insn scheduling (insn
7881 starting a new cycle has TImode).
7883 Simple implementation of insn bundling would create enormous
7884 number of possible insn sequences satisfying information about new
7885 cycle ticks taken from the insn scheduling. To make the algorithm
7886 practical we use dynamic programming. Each decision (about
7887 inserting nops and implicitly about previous decisions) is described
7888 by structure bundle_state (see above). If we generate the same
7889 bundle state (key is automaton state after issuing the insns and
7890 nops for it), we reuse already generated one. As consequence we
7891 reject some decisions which cannot improve the solution and
7892 reduce memory for the algorithm.
7894 When we reach the end of EBB (extended basic block), we choose the
7895 best sequence and then, moving back in EBB, insert templates for
7896 the best alternative. The templates are taken from querying
7897 automaton state for each insn in chosen bundle states.
7899 So the algorithm makes two (forward and backward) passes through
7900 EBB. There is an additional forward pass through EBB for Itanium1
7901 processor. This pass inserts more nops to make dependency between
7902 a producer insn and MMMUL/MMSHF at least 4 cycles long. */
7905 bundling (FILE *dump
, int verbose
, rtx prev_head_insn
, rtx tail
)
7907 struct bundle_state
*curr_state
, *next_state
, *best_state
;
7908 rtx insn
, next_insn
;
7910 int i
, bundle_end_p
, only_bundle_end_p
, asm_p
;
7911 int pos
= 0, max_pos
, template0
, template1
;
7914 enum attr_type type
;
7917 /* Count insns in the EBB. */
7918 for (insn
= NEXT_INSN (prev_head_insn
);
7919 insn
&& insn
!= tail
;
7920 insn
= NEXT_INSN (insn
))
7926 dfa_clean_insn_cache ();
7927 initiate_bundle_state_table ();
7928 index_to_bundle_states
= XNEWVEC (struct bundle_state
*, insn_num
+ 2);
7929 /* First (forward) pass -- generation of bundle states. */
7930 curr_state
= get_free_bundle_state ();
7931 curr_state
->insn
= NULL
;
7932 curr_state
->before_nops_num
= 0;
7933 curr_state
->after_nops_num
= 0;
7934 curr_state
->insn_num
= 0;
7935 curr_state
->cost
= 0;
7936 curr_state
->accumulated_insns_num
= 0;
7937 curr_state
->branch_deviation
= 0;
7938 curr_state
->next
= NULL
;
7939 curr_state
->originator
= NULL
;
7940 state_reset (curr_state
->dfa_state
);
7941 index_to_bundle_states
[0] = curr_state
;
7943 /* Shift cycle mark if it is put on insn which could be ignored. */
7944 for (insn
= NEXT_INSN (prev_head_insn
);
7946 insn
= NEXT_INSN (insn
))
7948 && (ia64_safe_itanium_class (insn
) == ITANIUM_CLASS_IGNORE
7949 || GET_CODE (PATTERN (insn
)) == USE
7950 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
7951 && GET_MODE (insn
) == TImode
)
7953 PUT_MODE (insn
, VOIDmode
);
7954 for (next_insn
= NEXT_INSN (insn
);
7956 next_insn
= NEXT_INSN (next_insn
))
7957 if (INSN_P (next_insn
)
7958 && ia64_safe_itanium_class (next_insn
) != ITANIUM_CLASS_IGNORE
7959 && GET_CODE (PATTERN (next_insn
)) != USE
7960 && GET_CODE (PATTERN (next_insn
)) != CLOBBER
)
7962 PUT_MODE (next_insn
, TImode
);
7966 /* Forward pass: generation of bundle states. */
7967 for (insn
= get_next_important_insn (NEXT_INSN (prev_head_insn
), tail
);
7971 gcc_assert (INSN_P (insn
)
7972 && ia64_safe_itanium_class (insn
) != ITANIUM_CLASS_IGNORE
7973 && GET_CODE (PATTERN (insn
)) != USE
7974 && GET_CODE (PATTERN (insn
)) != CLOBBER
);
7975 type
= ia64_safe_type (insn
);
7976 next_insn
= get_next_important_insn (NEXT_INSN (insn
), tail
);
7978 index_to_bundle_states
[insn_num
] = NULL
;
7979 for (curr_state
= index_to_bundle_states
[insn_num
- 1];
7981 curr_state
= next_state
)
7983 pos
= curr_state
->accumulated_insns_num
% 3;
7984 next_state
= curr_state
->next
;
7985 /* We must fill up the current bundle in order to start a
7986 subsequent asm insn in a new bundle. Asm insn is always
7987 placed in a separate bundle. */
7989 = (next_insn
!= NULL_RTX
7990 && INSN_CODE (insn
) == CODE_FOR_insn_group_barrier
7991 && ia64_safe_type (next_insn
) == TYPE_UNKNOWN
);
7992 /* We may fill up the current bundle if it is the cycle end
7993 without a group barrier. */
7995 = (only_bundle_end_p
|| next_insn
== NULL_RTX
7996 || (GET_MODE (next_insn
) == TImode
7997 && INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
));
7998 if (type
== TYPE_F
|| type
== TYPE_B
|| type
== TYPE_L
8000 /* We need to insert 2 nops for cases like M_MII. To
8001 guarantee issuing all insns on the same cycle for
8002 Itanium 1, we need to issue 2 nops after the first M
8003 insn (MnnMII where n is a nop insn). */
8004 || ((type
== TYPE_M
|| type
== TYPE_A
)
8005 && ia64_tune
== PROCESSOR_ITANIUM
8006 && !bundle_end_p
&& pos
== 1))
8007 issue_nops_and_insn (curr_state
, 2, insn
, bundle_end_p
,
8009 issue_nops_and_insn (curr_state
, 1, insn
, bundle_end_p
,
8011 issue_nops_and_insn (curr_state
, 0, insn
, bundle_end_p
,
8014 gcc_assert (index_to_bundle_states
[insn_num
]);
8015 for (curr_state
= index_to_bundle_states
[insn_num
];
8017 curr_state
= curr_state
->next
)
8018 if (verbose
>= 2 && dump
)
8020 /* This structure is taken from generated code of the
8021 pipeline hazard recognizer (see file insn-attrtab.c).
8022 Please don't forget to change the structure if a new
8023 automaton is added to .md file. */
8026 unsigned short one_automaton_state
;
8027 unsigned short oneb_automaton_state
;
8028 unsigned short two_automaton_state
;
8029 unsigned short twob_automaton_state
;
8034 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
8035 curr_state
->unique_num
,
8036 (curr_state
->originator
== NULL
8037 ? -1 : curr_state
->originator
->unique_num
),
8039 curr_state
->before_nops_num
, curr_state
->after_nops_num
,
8040 curr_state
->accumulated_insns_num
, curr_state
->branch_deviation
,
8041 (ia64_tune
== PROCESSOR_ITANIUM
8042 ? ((struct DFA_chip
*) curr_state
->dfa_state
)->oneb_automaton_state
8043 : ((struct DFA_chip
*) curr_state
->dfa_state
)->twob_automaton_state
),
8048 /* We should find a solution because the 2nd insn scheduling has
8050 gcc_assert (index_to_bundle_states
[insn_num
]);
8051 /* Find a state corresponding to the best insn sequence. */
8053 for (curr_state
= index_to_bundle_states
[insn_num
];
8055 curr_state
= curr_state
->next
)
8056 /* We are just looking at the states with fully filled up last
8057 bundle. The first we prefer insn sequences with minimal cost
8058 then with minimal inserted nops and finally with branch insns
8059 placed in the 3rd slots. */
8060 if (curr_state
->accumulated_insns_num
% 3 == 0
8061 && (best_state
== NULL
|| best_state
->cost
> curr_state
->cost
8062 || (best_state
->cost
== curr_state
->cost
8063 && (curr_state
->accumulated_insns_num
8064 < best_state
->accumulated_insns_num
8065 || (curr_state
->accumulated_insns_num
8066 == best_state
->accumulated_insns_num
8067 && curr_state
->branch_deviation
8068 < best_state
->branch_deviation
)))))
8069 best_state
= curr_state
;
8070 /* Second (backward) pass: adding nops and templates. */
8071 insn_num
= best_state
->before_nops_num
;
8072 template0
= template1
= -1;
8073 for (curr_state
= best_state
;
8074 curr_state
->originator
!= NULL
;
8075 curr_state
= curr_state
->originator
)
8077 insn
= curr_state
->insn
;
8078 asm_p
= (GET_CODE (PATTERN (insn
)) == ASM_INPUT
8079 || asm_noperands (PATTERN (insn
)) >= 0);
8081 if (verbose
>= 2 && dump
)
8085 unsigned short one_automaton_state
;
8086 unsigned short oneb_automaton_state
;
8087 unsigned short two_automaton_state
;
8088 unsigned short twob_automaton_state
;
8093 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
8094 curr_state
->unique_num
,
8095 (curr_state
->originator
== NULL
8096 ? -1 : curr_state
->originator
->unique_num
),
8098 curr_state
->before_nops_num
, curr_state
->after_nops_num
,
8099 curr_state
->accumulated_insns_num
, curr_state
->branch_deviation
,
8100 (ia64_tune
== PROCESSOR_ITANIUM
8101 ? ((struct DFA_chip
*) curr_state
->dfa_state
)->oneb_automaton_state
8102 : ((struct DFA_chip
*) curr_state
->dfa_state
)->twob_automaton_state
),
8105 /* Find the position in the current bundle window. The window can
8106 contain at most two bundles. Two bundle window means that
8107 the processor will make two bundle rotation. */
8108 max_pos
= get_max_pos (curr_state
->dfa_state
);
8110 /* The following (negative template number) means that the
8111 processor did one bundle rotation. */
8112 || (max_pos
== 3 && template0
< 0))
8114 /* We are at the end of the window -- find template(s) for
8118 template0
= get_template (curr_state
->dfa_state
, 3);
8121 template1
= get_template (curr_state
->dfa_state
, 3);
8122 template0
= get_template (curr_state
->dfa_state
, 6);
8125 if (max_pos
> 3 && template1
< 0)
8126 /* It may happen when we have the stop inside a bundle. */
8128 gcc_assert (pos
<= 3);
8129 template1
= get_template (curr_state
->dfa_state
, 3);
8133 /* Emit nops after the current insn. */
8134 for (i
= 0; i
< curr_state
->after_nops_num
; i
++)
8137 emit_insn_after (nop
, insn
);
8139 gcc_assert (pos
>= 0);
8142 /* We are at the start of a bundle: emit the template
8143 (it should be defined). */
8144 gcc_assert (template0
>= 0);
8145 ia64_add_bundle_selector_before (template0
, nop
);
8146 /* If we have two bundle window, we make one bundle
8147 rotation. Otherwise template0 will be undefined
8148 (negative value). */
8149 template0
= template1
;
8153 /* Move the position backward in the window. Group barrier has
8154 no slot. Asm insn takes all bundle. */
8155 if (INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
8156 && GET_CODE (PATTERN (insn
)) != ASM_INPUT
8157 && asm_noperands (PATTERN (insn
)) < 0)
8159 /* Long insn takes 2 slots. */
8160 if (ia64_safe_type (insn
) == TYPE_L
)
8162 gcc_assert (pos
>= 0);
8164 && INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
8165 && GET_CODE (PATTERN (insn
)) != ASM_INPUT
8166 && asm_noperands (PATTERN (insn
)) < 0)
8168 /* The current insn is at the bundle start: emit the
8170 gcc_assert (template0
>= 0);
8171 ia64_add_bundle_selector_before (template0
, insn
);
8172 b
= PREV_INSN (insn
);
8174 /* See comment above in analogous place for emitting nops
8176 template0
= template1
;
8179 /* Emit nops after the current insn. */
8180 for (i
= 0; i
< curr_state
->before_nops_num
; i
++)
8183 ia64_emit_insn_before (nop
, insn
);
8184 nop
= PREV_INSN (insn
);
8187 gcc_assert (pos
>= 0);
8190 /* See comment above in analogous place for emitting nops
8192 gcc_assert (template0
>= 0);
8193 ia64_add_bundle_selector_before (template0
, insn
);
8194 b
= PREV_INSN (insn
);
8196 template0
= template1
;
8201 if (ia64_tune
== PROCESSOR_ITANIUM
)
8202 /* Insert additional cycles for MM-insns (MMMUL and MMSHF).
8203 Itanium1 has a strange design, if the distance between an insn
8204 and dependent MM-insn is less 4 then we have a 6 additional
8205 cycles stall. So we make the distance equal to 4 cycles if it
8207 for (insn
= get_next_important_insn (NEXT_INSN (prev_head_insn
), tail
);
8211 gcc_assert (INSN_P (insn
)
8212 && ia64_safe_itanium_class (insn
) != ITANIUM_CLASS_IGNORE
8213 && GET_CODE (PATTERN (insn
)) != USE
8214 && GET_CODE (PATTERN (insn
)) != CLOBBER
);
8215 next_insn
= get_next_important_insn (NEXT_INSN (insn
), tail
);
8216 if (INSN_UID (insn
) < clocks_length
&& add_cycles
[INSN_UID (insn
)])
8217 /* We found a MM-insn which needs additional cycles. */
8223 /* Now we are searching for a template of the bundle in
8224 which the MM-insn is placed and the position of the
8225 insn in the bundle (0, 1, 2). Also we are searching
8226 for that there is a stop before the insn. */
8227 last
= prev_active_insn (insn
);
8228 pred_stop_p
= recog_memoized (last
) == CODE_FOR_insn_group_barrier
;
8230 last
= prev_active_insn (last
);
8232 for (;; last
= prev_active_insn (last
))
8233 if (recog_memoized (last
) == CODE_FOR_bundle_selector
)
8235 template0
= XINT (XVECEXP (PATTERN (last
), 0, 0), 0);
8237 /* The insn is in MLX bundle. Change the template
8238 onto MFI because we will add nops before the
8239 insn. It simplifies subsequent code a lot. */
8241 = gen_bundle_selector (const2_rtx
); /* -> MFI */
8244 else if (recog_memoized (last
) != CODE_FOR_insn_group_barrier
8245 && (ia64_safe_itanium_class (last
)
8246 != ITANIUM_CLASS_IGNORE
))
8248 /* Some check of correctness: the stop is not at the
8249 bundle start, there are no more 3 insns in the bundle,
8250 and the MM-insn is not at the start of bundle with
8252 gcc_assert ((!pred_stop_p
|| n
)
8254 && (template0
!= 9 || !n
));
8255 /* Put nops after the insn in the bundle. */
8256 for (j
= 3 - n
; j
> 0; j
--)
8257 ia64_emit_insn_before (gen_nop (), insn
);
8258 /* It takes into account that we will add more N nops
8259 before the insn lately -- please see code below. */
8260 add_cycles
[INSN_UID (insn
)]--;
8261 if (!pred_stop_p
|| add_cycles
[INSN_UID (insn
)])
8262 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
8265 add_cycles
[INSN_UID (insn
)]--;
8266 for (i
= add_cycles
[INSN_UID (insn
)]; i
> 0; i
--)
8268 /* Insert "MII;" template. */
8269 ia64_emit_insn_before (gen_bundle_selector (const0_rtx
),
8271 ia64_emit_insn_before (gen_nop (), insn
);
8272 ia64_emit_insn_before (gen_nop (), insn
);
8275 /* To decrease code size, we use "MI;I;"
8277 ia64_emit_insn_before
8278 (gen_insn_group_barrier (GEN_INT (3)), insn
);
8281 ia64_emit_insn_before (gen_nop (), insn
);
8282 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
8285 /* Put the MM-insn in the same slot of a bundle with the
8286 same template as the original one. */
8287 ia64_add_bundle_selector_before (template0
, insn
);
8288 /* To put the insn in the same slot, add necessary number
8290 for (j
= n
; j
> 0; j
--)
8291 ia64_emit_insn_before (gen_nop (), insn
);
8292 /* Put the stop if the original bundle had it. */
8294 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
8298 free (index_to_bundle_states
);
8299 finish_bundle_state_table ();
8301 dfa_clean_insn_cache ();
8304 /* The following function is called at the end of scheduling BB or
8305 EBB. After reload, it inserts stop bits and does insn bundling. */
8308 ia64_sched_finish (FILE *dump
, int sched_verbose
)
8311 fprintf (dump
, "// Finishing schedule.\n");
8312 if (!reload_completed
)
8314 if (reload_completed
)
8316 final_emit_insn_group_barriers (dump
);
8317 bundling (dump
, sched_verbose
, current_sched_info
->prev_head
,
8318 current_sched_info
->next_tail
);
8319 if (sched_verbose
&& dump
)
8320 fprintf (dump
, "// finishing %d-%d\n",
8321 INSN_UID (NEXT_INSN (current_sched_info
->prev_head
)),
8322 INSN_UID (PREV_INSN (current_sched_info
->next_tail
)));
8328 /* The following function inserts stop bits in scheduled BB or EBB. */
8331 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED
)
8334 int need_barrier_p
= 0;
8335 rtx prev_insn
= NULL_RTX
;
8337 init_insn_group_barriers ();
8339 for (insn
= NEXT_INSN (current_sched_info
->prev_head
);
8340 insn
!= current_sched_info
->next_tail
;
8341 insn
= NEXT_INSN (insn
))
8343 if (GET_CODE (insn
) == BARRIER
)
8345 rtx last
= prev_active_insn (insn
);
8349 if (GET_CODE (last
) == JUMP_INSN
8350 && GET_CODE (PATTERN (last
)) == ADDR_DIFF_VEC
)
8351 last
= prev_active_insn (last
);
8352 if (recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
8353 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last
);
8355 init_insn_group_barriers ();
8357 prev_insn
= NULL_RTX
;
8359 else if (INSN_P (insn
))
8361 if (recog_memoized (insn
) == CODE_FOR_insn_group_barrier
)
8363 init_insn_group_barriers ();
8365 prev_insn
= NULL_RTX
;
8367 else if (need_barrier_p
|| group_barrier_needed (insn
))
8369 if (TARGET_EARLY_STOP_BITS
)
8374 last
!= current_sched_info
->prev_head
;
8375 last
= PREV_INSN (last
))
8376 if (INSN_P (last
) && GET_MODE (last
) == TImode
8377 && stops_p
[INSN_UID (last
)])
8379 if (last
== current_sched_info
->prev_head
)
8381 last
= prev_active_insn (last
);
8383 && recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
8384 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
8386 init_insn_group_barriers ();
8387 for (last
= NEXT_INSN (last
);
8389 last
= NEXT_INSN (last
))
8391 group_barrier_needed (last
);
8395 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
8397 init_insn_group_barriers ();
8399 group_barrier_needed (insn
);
8400 prev_insn
= NULL_RTX
;
8402 else if (recog_memoized (insn
) >= 0)
8404 need_barrier_p
= (GET_CODE (insn
) == CALL_INSN
8405 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
8406 || asm_noperands (PATTERN (insn
)) >= 0);
8413 /* If the following function returns TRUE, we will use the DFA
8417 ia64_first_cycle_multipass_dfa_lookahead (void)
8419 return (reload_completed
? 6 : 4);
8422 /* The following function initiates variable `dfa_pre_cycle_insn'. */
8425 ia64_init_dfa_pre_cycle_insn (void)
8427 if (temp_dfa_state
== NULL
)
8429 dfa_state_size
= state_size ();
8430 temp_dfa_state
= xmalloc (dfa_state_size
);
8431 prev_cycle_state
= xmalloc (dfa_state_size
);
8433 dfa_pre_cycle_insn
= make_insn_raw (gen_pre_cycle ());
8434 PREV_INSN (dfa_pre_cycle_insn
) = NEXT_INSN (dfa_pre_cycle_insn
) = NULL_RTX
;
8435 recog_memoized (dfa_pre_cycle_insn
);
8436 dfa_stop_insn
= make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
8437 PREV_INSN (dfa_stop_insn
) = NEXT_INSN (dfa_stop_insn
) = NULL_RTX
;
8438 recog_memoized (dfa_stop_insn
);
8441 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
8442 used by the DFA insn scheduler. */
8445 ia64_dfa_pre_cycle_insn (void)
8447 return dfa_pre_cycle_insn
;
8450 /* The following function returns TRUE if PRODUCER (of type ilog or
8451 ld) produces address for CONSUMER (of type st or stf). */
8454 ia64_st_address_bypass_p (rtx producer
, rtx consumer
)
8458 gcc_assert (producer
&& consumer
);
8459 dest
= ia64_single_set (producer
);
8461 reg
= SET_DEST (dest
);
8463 if (GET_CODE (reg
) == SUBREG
)
8464 reg
= SUBREG_REG (reg
);
8465 gcc_assert (GET_CODE (reg
) == REG
);
8467 dest
= ia64_single_set (consumer
);
8469 mem
= SET_DEST (dest
);
8470 gcc_assert (mem
&& GET_CODE (mem
) == MEM
);
8471 return reg_mentioned_p (reg
, mem
);
8474 /* The following function returns TRUE if PRODUCER (of type ilog or
8475 ld) produces address for CONSUMER (of type ld or fld). */
8478 ia64_ld_address_bypass_p (rtx producer
, rtx consumer
)
8480 rtx dest
, src
, reg
, mem
;
8482 gcc_assert (producer
&& consumer
);
8483 dest
= ia64_single_set (producer
);
8485 reg
= SET_DEST (dest
);
8487 if (GET_CODE (reg
) == SUBREG
)
8488 reg
= SUBREG_REG (reg
);
8489 gcc_assert (GET_CODE (reg
) == REG
);
8491 src
= ia64_single_set (consumer
);
8493 mem
= SET_SRC (src
);
8496 if (GET_CODE (mem
) == UNSPEC
&& XVECLEN (mem
, 0) > 0)
8497 mem
= XVECEXP (mem
, 0, 0);
8498 else if (GET_CODE (mem
) == IF_THEN_ELSE
)
8499 /* ??? Is this bypass necessary for ld.c? */
8501 gcc_assert (XINT (XEXP (XEXP (mem
, 0), 0), 1) == UNSPEC_LDCCLR
);
8502 mem
= XEXP (mem
, 1);
8505 while (GET_CODE (mem
) == SUBREG
|| GET_CODE (mem
) == ZERO_EXTEND
)
8506 mem
= XEXP (mem
, 0);
8508 if (GET_CODE (mem
) == UNSPEC
)
8510 int c
= XINT (mem
, 1);
8512 gcc_assert (c
== UNSPEC_LDA
|| c
== UNSPEC_LDS
|| c
== UNSPEC_LDSA
);
8513 mem
= XVECEXP (mem
, 0, 0);
8516 /* Note that LO_SUM is used for GOT loads. */
8517 gcc_assert (GET_CODE (mem
) == LO_SUM
|| GET_CODE (mem
) == MEM
);
8519 return reg_mentioned_p (reg
, mem
);
8522 /* The following function returns TRUE if INSN produces address for a
8523 load/store insn. We will place such insns into M slot because it
8524 decreases its latency time. */
8527 ia64_produce_address_p (rtx insn
)
8533 /* Emit pseudo-ops for the assembler to describe predicate relations.
8534 At present this assumes that we only consider predicate pairs to
8535 be mutex, and that the assembler can deduce proper values from
8536 straight-line code. */
8539 emit_predicate_relation_info (void)
8543 FOR_EACH_BB_REVERSE (bb
)
8546 rtx head
= BB_HEAD (bb
);
8548 /* We only need such notes at code labels. */
8549 if (GET_CODE (head
) != CODE_LABEL
)
8551 if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head
)))
8552 head
= NEXT_INSN (head
);
8554 /* Skip p0, which may be thought to be live due to (reg:DI p0)
8555 grabbing the entire block of predicate registers. */
8556 for (r
= PR_REG (2); r
< PR_REG (64); r
+= 2)
8557 if (REGNO_REG_SET_P (df_get_live_in (bb
), r
))
8559 rtx p
= gen_rtx_REG (BImode
, r
);
8560 rtx n
= emit_insn_after (gen_pred_rel_mutex (p
), head
);
8561 if (head
== BB_END (bb
))
8567 /* Look for conditional calls that do not return, and protect predicate
8568 relations around them. Otherwise the assembler will assume the call
8569 returns, and complain about uses of call-clobbered predicates after
8571 FOR_EACH_BB_REVERSE (bb
)
8573 rtx insn
= BB_HEAD (bb
);
8577 if (GET_CODE (insn
) == CALL_INSN
8578 && GET_CODE (PATTERN (insn
)) == COND_EXEC
8579 && find_reg_note (insn
, REG_NORETURN
, NULL_RTX
))
8581 rtx b
= emit_insn_before (gen_safe_across_calls_all (), insn
);
8582 rtx a
= emit_insn_after (gen_safe_across_calls_normal (), insn
);
8583 if (BB_HEAD (bb
) == insn
)
8585 if (BB_END (bb
) == insn
)
8589 if (insn
== BB_END (bb
))
8591 insn
= NEXT_INSN (insn
);
8596 /* Perform machine dependent operations on the rtl chain INSNS. */
8601 /* We are freeing block_for_insn in the toplev to keep compatibility
8602 with old MDEP_REORGS that are not CFG based. Recompute it now. */
8603 compute_bb_for_insn ();
8605 /* If optimizing, we'll have split before scheduling. */
8609 if (optimize
&& ia64_flag_schedule_insns2
&& dbg_cnt (ia64_sched2
))
8611 timevar_push (TV_SCHED2
);
8612 ia64_final_schedule
= 1;
8614 initiate_bundle_states ();
8615 ia64_nop
= make_insn_raw (gen_nop ());
8616 PREV_INSN (ia64_nop
) = NEXT_INSN (ia64_nop
) = NULL_RTX
;
8617 recog_memoized (ia64_nop
);
8618 clocks_length
= get_max_uid () + 1;
8619 stops_p
= XCNEWVEC (char, clocks_length
);
8620 if (ia64_tune
== PROCESSOR_ITANIUM
)
8622 clocks
= XCNEWVEC (int, clocks_length
);
8623 add_cycles
= XCNEWVEC (int, clocks_length
);
8625 if (ia64_tune
== PROCESSOR_ITANIUM2
)
8627 pos_1
= get_cpu_unit_code ("2_1");
8628 pos_2
= get_cpu_unit_code ("2_2");
8629 pos_3
= get_cpu_unit_code ("2_3");
8630 pos_4
= get_cpu_unit_code ("2_4");
8631 pos_5
= get_cpu_unit_code ("2_5");
8632 pos_6
= get_cpu_unit_code ("2_6");
8633 _0mii_
= get_cpu_unit_code ("2b_0mii.");
8634 _0mmi_
= get_cpu_unit_code ("2b_0mmi.");
8635 _0mfi_
= get_cpu_unit_code ("2b_0mfi.");
8636 _0mmf_
= get_cpu_unit_code ("2b_0mmf.");
8637 _0bbb_
= get_cpu_unit_code ("2b_0bbb.");
8638 _0mbb_
= get_cpu_unit_code ("2b_0mbb.");
8639 _0mib_
= get_cpu_unit_code ("2b_0mib.");
8640 _0mmb_
= get_cpu_unit_code ("2b_0mmb.");
8641 _0mfb_
= get_cpu_unit_code ("2b_0mfb.");
8642 _0mlx_
= get_cpu_unit_code ("2b_0mlx.");
8643 _1mii_
= get_cpu_unit_code ("2b_1mii.");
8644 _1mmi_
= get_cpu_unit_code ("2b_1mmi.");
8645 _1mfi_
= get_cpu_unit_code ("2b_1mfi.");
8646 _1mmf_
= get_cpu_unit_code ("2b_1mmf.");
8647 _1bbb_
= get_cpu_unit_code ("2b_1bbb.");
8648 _1mbb_
= get_cpu_unit_code ("2b_1mbb.");
8649 _1mib_
= get_cpu_unit_code ("2b_1mib.");
8650 _1mmb_
= get_cpu_unit_code ("2b_1mmb.");
8651 _1mfb_
= get_cpu_unit_code ("2b_1mfb.");
8652 _1mlx_
= get_cpu_unit_code ("2b_1mlx.");
8656 pos_1
= get_cpu_unit_code ("1_1");
8657 pos_2
= get_cpu_unit_code ("1_2");
8658 pos_3
= get_cpu_unit_code ("1_3");
8659 pos_4
= get_cpu_unit_code ("1_4");
8660 pos_5
= get_cpu_unit_code ("1_5");
8661 pos_6
= get_cpu_unit_code ("1_6");
8662 _0mii_
= get_cpu_unit_code ("1b_0mii.");
8663 _0mmi_
= get_cpu_unit_code ("1b_0mmi.");
8664 _0mfi_
= get_cpu_unit_code ("1b_0mfi.");
8665 _0mmf_
= get_cpu_unit_code ("1b_0mmf.");
8666 _0bbb_
= get_cpu_unit_code ("1b_0bbb.");
8667 _0mbb_
= get_cpu_unit_code ("1b_0mbb.");
8668 _0mib_
= get_cpu_unit_code ("1b_0mib.");
8669 _0mmb_
= get_cpu_unit_code ("1b_0mmb.");
8670 _0mfb_
= get_cpu_unit_code ("1b_0mfb.");
8671 _0mlx_
= get_cpu_unit_code ("1b_0mlx.");
8672 _1mii_
= get_cpu_unit_code ("1b_1mii.");
8673 _1mmi_
= get_cpu_unit_code ("1b_1mmi.");
8674 _1mfi_
= get_cpu_unit_code ("1b_1mfi.");
8675 _1mmf_
= get_cpu_unit_code ("1b_1mmf.");
8676 _1bbb_
= get_cpu_unit_code ("1b_1bbb.");
8677 _1mbb_
= get_cpu_unit_code ("1b_1mbb.");
8678 _1mib_
= get_cpu_unit_code ("1b_1mib.");
8679 _1mmb_
= get_cpu_unit_code ("1b_1mmb.");
8680 _1mfb_
= get_cpu_unit_code ("1b_1mfb.");
8681 _1mlx_
= get_cpu_unit_code ("1b_1mlx.");
8684 /* We cannot reuse this one because it has been corrupted by the
8686 finish_bundle_states ();
8687 if (ia64_tune
== PROCESSOR_ITANIUM
)
8694 emit_insn_group_barriers (dump_file
);
8696 ia64_final_schedule
= 0;
8697 timevar_pop (TV_SCHED2
);
8700 emit_all_insn_group_barriers (dump_file
);
8704 /* A call must not be the last instruction in a function, so that the
8705 return address is still within the function, so that unwinding works
8706 properly. Note that IA-64 differs from dwarf2 on this point. */
8707 if (flag_unwind_tables
|| (flag_exceptions
&& !USING_SJLJ_EXCEPTIONS
))
8712 insn
= get_last_insn ();
8713 if (! INSN_P (insn
))
8714 insn
= prev_active_insn (insn
);
8715 /* Skip over insns that expand to nothing. */
8716 while (GET_CODE (insn
) == INSN
&& get_attr_empty (insn
) == EMPTY_YES
)
8718 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
8719 && XINT (PATTERN (insn
), 1) == UNSPECV_INSN_GROUP_BARRIER
)
8721 insn
= prev_active_insn (insn
);
8723 if (GET_CODE (insn
) == CALL_INSN
)
8726 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
8727 emit_insn (gen_break_f ());
8728 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
8732 emit_predicate_relation_info ();
8734 if (ia64_flag_var_tracking
)
8736 timevar_push (TV_VAR_TRACKING
);
8737 variable_tracking_main ();
8738 timevar_pop (TV_VAR_TRACKING
);
8740 df_finish_pass (false);
8743 /* Return true if REGNO is used by the epilogue. */
8746 ia64_epilogue_uses (int regno
)
8751 /* With a call to a function in another module, we will write a new
8752 value to "gp". After returning from such a call, we need to make
8753 sure the function restores the original gp-value, even if the
8754 function itself does not use the gp anymore. */
8755 return !(TARGET_AUTO_PIC
|| TARGET_NO_PIC
);
8757 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
8758 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
8759 /* For functions defined with the syscall_linkage attribute, all
8760 input registers are marked as live at all function exits. This
8761 prevents the register allocator from using the input registers,
8762 which in turn makes it possible to restart a system call after
8763 an interrupt without having to save/restore the input registers.
8764 This also prevents kernel data from leaking to application code. */
8765 return lookup_attribute ("syscall_linkage",
8766 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))) != NULL
;
8769 /* Conditional return patterns can't represent the use of `b0' as
8770 the return address, so we force the value live this way. */
8774 /* Likewise for ar.pfs, which is used by br.ret. */
8782 /* Return true if REGNO is used by the frame unwinder. */
8785 ia64_eh_uses (int regno
)
8787 enum ia64_frame_regs r
;
8789 if (! reload_completed
)
8795 for (r
= reg_save_b0
; r
<= reg_save_ar_lc
; r
++)
8796 if (regno
== current_frame_info
.r
[r
]
8797 || regno
== emitted_frame_related_regs
[r
])
8803 /* Return true if this goes in small data/bss. */
8805 /* ??? We could also support own long data here. Generating movl/add/ld8
8806 instead of addl,ld8/ld8. This makes the code bigger, but should make the
8807 code faster because there is one less load. This also includes incomplete
8808 types which can't go in sdata/sbss. */
8811 ia64_in_small_data_p (const_tree exp
)
8813 if (TARGET_NO_SDATA
)
8816 /* We want to merge strings, so we never consider them small data. */
8817 if (TREE_CODE (exp
) == STRING_CST
)
8820 /* Functions are never small data. */
8821 if (TREE_CODE (exp
) == FUNCTION_DECL
)
8824 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
8826 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
8828 if (strcmp (section
, ".sdata") == 0
8829 || strncmp (section
, ".sdata.", 7) == 0
8830 || strncmp (section
, ".gnu.linkonce.s.", 16) == 0
8831 || strcmp (section
, ".sbss") == 0
8832 || strncmp (section
, ".sbss.", 6) == 0
8833 || strncmp (section
, ".gnu.linkonce.sb.", 17) == 0)
8838 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
8840 /* If this is an incomplete type with size 0, then we can't put it
8841 in sdata because it might be too big when completed. */
8842 if (size
> 0 && size
<= ia64_section_threshold
)
8849 /* Output assembly directives for prologue regions. */
8851 /* The current basic block number. */
8853 static bool last_block
;
8855 /* True if we need a copy_state command at the start of the next block. */
8857 static bool need_copy_state
;
8859 #ifndef MAX_ARTIFICIAL_LABEL_BYTES
8860 # define MAX_ARTIFICIAL_LABEL_BYTES 30
8863 /* Emit a debugging label after a call-frame-related insn. We'd
8864 rather output the label right away, but we'd have to output it
8865 after, not before, the instruction, and the instruction has not
8866 been output yet. So we emit the label after the insn, delete it to
8867 avoid introducing basic blocks, and mark it as preserved, such that
8868 it is still output, given that it is referenced in debug info. */
8871 ia64_emit_deleted_label_after_insn (rtx insn
)
8873 char label
[MAX_ARTIFICIAL_LABEL_BYTES
];
8874 rtx lb
= gen_label_rtx ();
8875 rtx label_insn
= emit_label_after (lb
, insn
);
8877 LABEL_PRESERVE_P (lb
) = 1;
8879 delete_insn (label_insn
);
8881 ASM_GENERATE_INTERNAL_LABEL (label
, "L", CODE_LABEL_NUMBER (label_insn
));
8883 return xstrdup (label
);
8886 /* Define the CFA after INSN with the steady-state definition. */
8889 ia64_dwarf2out_def_steady_cfa (rtx insn
)
8891 rtx fp
= frame_pointer_needed
8892 ? hard_frame_pointer_rtx
8893 : stack_pointer_rtx
;
8896 (ia64_emit_deleted_label_after_insn (insn
),
8898 ia64_initial_elimination_offset
8899 (REGNO (arg_pointer_rtx
), REGNO (fp
))
8900 + ARG_POINTER_CFA_OFFSET (current_function_decl
));
8903 /* The generic dwarf2 frame debug info generator does not define a
8904 separate region for the very end of the epilogue, so refrain from
8905 doing so in the IA64-specific code as well. */
8907 #define IA64_CHANGE_CFA_IN_EPILOGUE 0
8909 /* The function emits unwind directives for the start of an epilogue. */
8912 process_epilogue (FILE *asm_out_file
, rtx insn
, bool unwind
, bool frame
)
8914 /* If this isn't the last block of the function, then we need to label the
8915 current state, and copy it back in at the start of the next block. */
8920 fprintf (asm_out_file
, "\t.label_state %d\n",
8921 ++cfun
->machine
->state_num
);
8922 need_copy_state
= true;
8926 fprintf (asm_out_file
, "\t.restore sp\n");
8927 if (IA64_CHANGE_CFA_IN_EPILOGUE
&& frame
)
8928 dwarf2out_def_cfa (ia64_emit_deleted_label_after_insn (insn
),
8929 STACK_POINTER_REGNUM
, INCOMING_FRAME_SP_OFFSET
);
8932 /* This function processes a SET pattern looking for specific patterns
8933 which result in emitting an assembly directive required for unwinding. */
8936 process_set (FILE *asm_out_file
, rtx pat
, rtx insn
, bool unwind
, bool frame
)
8938 rtx src
= SET_SRC (pat
);
8939 rtx dest
= SET_DEST (pat
);
8940 int src_regno
, dest_regno
;
8942 /* Look for the ALLOC insn. */
8943 if (GET_CODE (src
) == UNSPEC_VOLATILE
8944 && XINT (src
, 1) == UNSPECV_ALLOC
8945 && GET_CODE (dest
) == REG
)
8947 dest_regno
= REGNO (dest
);
8949 /* If this is the final destination for ar.pfs, then this must
8950 be the alloc in the prologue. */
8951 if (dest_regno
== current_frame_info
.r
[reg_save_ar_pfs
])
8954 fprintf (asm_out_file
, "\t.save ar.pfs, r%d\n",
8955 ia64_dbx_register_number (dest_regno
));
8959 /* This must be an alloc before a sibcall. We must drop the
8960 old frame info. The easiest way to drop the old frame
8961 info is to ensure we had a ".restore sp" directive
8962 followed by a new prologue. If the procedure doesn't
8963 have a memory-stack frame, we'll issue a dummy ".restore
8965 if (current_frame_info
.total_size
== 0 && !frame_pointer_needed
)
8966 /* if haven't done process_epilogue() yet, do it now */
8967 process_epilogue (asm_out_file
, insn
, unwind
, frame
);
8969 fprintf (asm_out_file
, "\t.prologue\n");
8974 /* Look for SP = .... */
8975 if (GET_CODE (dest
) == REG
&& REGNO (dest
) == STACK_POINTER_REGNUM
)
8977 if (GET_CODE (src
) == PLUS
)
8979 rtx op0
= XEXP (src
, 0);
8980 rtx op1
= XEXP (src
, 1);
8982 gcc_assert (op0
== dest
&& GET_CODE (op1
) == CONST_INT
);
8984 if (INTVAL (op1
) < 0)
8986 gcc_assert (!frame_pointer_needed
);
8988 fprintf (asm_out_file
, "\t.fframe "HOST_WIDE_INT_PRINT_DEC
"\n",
8991 ia64_dwarf2out_def_steady_cfa (insn
);
8994 process_epilogue (asm_out_file
, insn
, unwind
, frame
);
8998 gcc_assert (GET_CODE (src
) == REG
8999 && REGNO (src
) == HARD_FRAME_POINTER_REGNUM
);
9000 process_epilogue (asm_out_file
, insn
, unwind
, frame
);
9006 /* Register move we need to look at. */
9007 if (GET_CODE (dest
) == REG
&& GET_CODE (src
) == REG
)
9009 src_regno
= REGNO (src
);
9010 dest_regno
= REGNO (dest
);
9015 /* Saving return address pointer. */
9016 gcc_assert (dest_regno
== current_frame_info
.r
[reg_save_b0
]);
9018 fprintf (asm_out_file
, "\t.save rp, r%d\n",
9019 ia64_dbx_register_number (dest_regno
));
9023 gcc_assert (dest_regno
== current_frame_info
.r
[reg_save_pr
]);
9025 fprintf (asm_out_file
, "\t.save pr, r%d\n",
9026 ia64_dbx_register_number (dest_regno
));
9029 case AR_UNAT_REGNUM
:
9030 gcc_assert (dest_regno
== current_frame_info
.r
[reg_save_ar_unat
]);
9032 fprintf (asm_out_file
, "\t.save ar.unat, r%d\n",
9033 ia64_dbx_register_number (dest_regno
));
9037 gcc_assert (dest_regno
== current_frame_info
.r
[reg_save_ar_lc
]);
9039 fprintf (asm_out_file
, "\t.save ar.lc, r%d\n",
9040 ia64_dbx_register_number (dest_regno
));
9043 case STACK_POINTER_REGNUM
:
9044 gcc_assert (dest_regno
== HARD_FRAME_POINTER_REGNUM
9045 && frame_pointer_needed
);
9047 fprintf (asm_out_file
, "\t.vframe r%d\n",
9048 ia64_dbx_register_number (dest_regno
));
9050 ia64_dwarf2out_def_steady_cfa (insn
);
9054 /* Everything else should indicate being stored to memory. */
9059 /* Memory store we need to look at. */
9060 if (GET_CODE (dest
) == MEM
&& GET_CODE (src
) == REG
)
9066 if (GET_CODE (XEXP (dest
, 0)) == REG
)
9068 base
= XEXP (dest
, 0);
9073 gcc_assert (GET_CODE (XEXP (dest
, 0)) == PLUS
9074 && GET_CODE (XEXP (XEXP (dest
, 0), 1)) == CONST_INT
);
9075 base
= XEXP (XEXP (dest
, 0), 0);
9076 off
= INTVAL (XEXP (XEXP (dest
, 0), 1));
9079 if (base
== hard_frame_pointer_rtx
)
9081 saveop
= ".savepsp";
9086 gcc_assert (base
== stack_pointer_rtx
);
9090 src_regno
= REGNO (src
);
9094 gcc_assert (!current_frame_info
.r
[reg_save_b0
]);
9096 fprintf (asm_out_file
, "\t%s rp, %ld\n", saveop
, off
);
9100 gcc_assert (!current_frame_info
.r
[reg_save_pr
]);
9102 fprintf (asm_out_file
, "\t%s pr, %ld\n", saveop
, off
);
9106 gcc_assert (!current_frame_info
.r
[reg_save_ar_lc
]);
9108 fprintf (asm_out_file
, "\t%s ar.lc, %ld\n", saveop
, off
);
9112 gcc_assert (!current_frame_info
.r
[reg_save_ar_pfs
]);
9114 fprintf (asm_out_file
, "\t%s ar.pfs, %ld\n", saveop
, off
);
9117 case AR_UNAT_REGNUM
:
9118 gcc_assert (!current_frame_info
.r
[reg_save_ar_unat
]);
9120 fprintf (asm_out_file
, "\t%s ar.unat, %ld\n", saveop
, off
);
9128 fprintf (asm_out_file
, "\t.save.g 0x%x\n",
9129 1 << (src_regno
- GR_REG (4)));
9138 fprintf (asm_out_file
, "\t.save.b 0x%x\n",
9139 1 << (src_regno
- BR_REG (1)));
9147 fprintf (asm_out_file
, "\t.save.f 0x%x\n",
9148 1 << (src_regno
- FR_REG (2)));
9151 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
9152 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
9153 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
9154 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
9156 fprintf (asm_out_file
, "\t.save.gf 0x0, 0x%x\n",
9157 1 << (src_regno
- FR_REG (12)));
9169 /* This function looks at a single insn and emits any directives
9170 required to unwind this insn. */
9172 process_for_unwind_directive (FILE *asm_out_file
, rtx insn
)
9174 bool unwind
= (flag_unwind_tables
9175 || (flag_exceptions
&& !USING_SJLJ_EXCEPTIONS
));
9176 bool frame
= dwarf2out_do_frame ();
9178 if (unwind
|| frame
)
9182 if (NOTE_INSN_BASIC_BLOCK_P (insn
))
9184 last_block
= NOTE_BASIC_BLOCK (insn
)->next_bb
== EXIT_BLOCK_PTR
;
9186 /* Restore unwind state from immediately before the epilogue. */
9187 if (need_copy_state
)
9191 fprintf (asm_out_file
, "\t.body\n");
9192 fprintf (asm_out_file
, "\t.copy_state %d\n",
9193 cfun
->machine
->state_num
);
9195 if (IA64_CHANGE_CFA_IN_EPILOGUE
&& frame
)
9196 ia64_dwarf2out_def_steady_cfa (insn
);
9197 need_copy_state
= false;
9201 if (GET_CODE (insn
) == NOTE
|| ! RTX_FRAME_RELATED_P (insn
))
9204 pat
= find_reg_note (insn
, REG_FRAME_RELATED_EXPR
, NULL_RTX
);
9206 pat
= XEXP (pat
, 0);
9208 pat
= PATTERN (insn
);
9210 switch (GET_CODE (pat
))
9213 process_set (asm_out_file
, pat
, insn
, unwind
, frame
);
9219 int limit
= XVECLEN (pat
, 0);
9220 for (par_index
= 0; par_index
< limit
; par_index
++)
9222 rtx x
= XVECEXP (pat
, 0, par_index
);
9223 if (GET_CODE (x
) == SET
)
9224 process_set (asm_out_file
, x
, insn
, unwind
, frame
);
9239 IA64_BUILTIN_FLUSHRS
9243 ia64_init_builtins (void)
9248 /* The __fpreg type. */
9249 fpreg_type
= make_node (REAL_TYPE
);
9250 TYPE_PRECISION (fpreg_type
) = 82;
9251 layout_type (fpreg_type
);
9252 (*lang_hooks
.types
.register_builtin_type
) (fpreg_type
, "__fpreg");
9254 /* The __float80 type. */
9255 float80_type
= make_node (REAL_TYPE
);
9256 TYPE_PRECISION (float80_type
) = 80;
9257 layout_type (float80_type
);
9258 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
9260 /* The __float128 type. */
9263 tree float128_type
= make_node (REAL_TYPE
);
9264 TYPE_PRECISION (float128_type
) = 128;
9265 layout_type (float128_type
);
9266 (*lang_hooks
.types
.register_builtin_type
) (float128_type
, "__float128");
9269 /* Under HPUX, this is a synonym for "long double". */
9270 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
9273 #define def_builtin(name, type, code) \
9274 add_builtin_function ((name), (type), (code), BUILT_IN_MD, \
9277 def_builtin ("__builtin_ia64_bsp",
9278 build_function_type (ptr_type_node
, void_list_node
),
9281 def_builtin ("__builtin_ia64_flushrs",
9282 build_function_type (void_type_node
, void_list_node
),
9283 IA64_BUILTIN_FLUSHRS
);
9289 if (built_in_decls
[BUILT_IN_FINITE
])
9290 set_user_assembler_name (built_in_decls
[BUILT_IN_FINITE
],
9292 if (built_in_decls
[BUILT_IN_FINITEF
])
9293 set_user_assembler_name (built_in_decls
[BUILT_IN_FINITEF
],
9295 if (built_in_decls
[BUILT_IN_FINITEL
])
9296 set_user_assembler_name (built_in_decls
[BUILT_IN_FINITEL
],
9302 ia64_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
9303 enum machine_mode mode ATTRIBUTE_UNUSED
,
9304 int ignore ATTRIBUTE_UNUSED
)
9306 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
9307 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
9311 case IA64_BUILTIN_BSP
:
9312 if (! target
|| ! register_operand (target
, DImode
))
9313 target
= gen_reg_rtx (DImode
);
9314 emit_insn (gen_bsp_value (target
));
9315 #ifdef POINTERS_EXTEND_UNSIGNED
9316 target
= convert_memory_address (ptr_mode
, target
);
9320 case IA64_BUILTIN_FLUSHRS
:
9321 emit_insn (gen_flushrs ());
9331 /* For the HP-UX IA64 aggregate parameters are passed stored in the
9332 most significant bits of the stack slot. */
9335 ia64_hpux_function_arg_padding (enum machine_mode mode
, const_tree type
)
9337 /* Exception to normal case for structures/unions/etc. */
9339 if (type
&& AGGREGATE_TYPE_P (type
)
9340 && int_size_in_bytes (type
) < UNITS_PER_WORD
)
9343 /* Fall back to the default. */
9344 return DEFAULT_FUNCTION_ARG_PADDING (mode
, type
);
9347 /* Emit text to declare externally defined variables and functions, because
9348 the Intel assembler does not support undefined externals. */
9351 ia64_asm_output_external (FILE *file
, tree decl
, const char *name
)
9353 /* We output the name if and only if TREE_SYMBOL_REFERENCED is
9354 set in order to avoid putting out names that are never really
9356 if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl
)))
9358 /* maybe_assemble_visibility will return 1 if the assembler
9359 visibility directive is output. */
9360 int need_visibility
= ((*targetm
.binds_local_p
) (decl
)
9361 && maybe_assemble_visibility (decl
));
9363 /* GNU as does not need anything here, but the HP linker does
9364 need something for external functions. */
9365 if ((TARGET_HPUX_LD
|| !TARGET_GNU_AS
)
9366 && TREE_CODE (decl
) == FUNCTION_DECL
)
9367 (*targetm
.asm_out
.globalize_decl_name
) (file
, decl
);
9368 else if (need_visibility
&& !TARGET_GNU_AS
)
9369 (*targetm
.asm_out
.globalize_label
) (file
, name
);
9373 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
9374 modes of word_mode and larger. Rename the TFmode libfuncs using the
9375 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
9376 backward compatibility. */
9379 ia64_init_libfuncs (void)
9381 set_optab_libfunc (sdiv_optab
, SImode
, "__divsi3");
9382 set_optab_libfunc (udiv_optab
, SImode
, "__udivsi3");
9383 set_optab_libfunc (smod_optab
, SImode
, "__modsi3");
9384 set_optab_libfunc (umod_optab
, SImode
, "__umodsi3");
9386 set_optab_libfunc (add_optab
, TFmode
, "_U_Qfadd");
9387 set_optab_libfunc (sub_optab
, TFmode
, "_U_Qfsub");
9388 set_optab_libfunc (smul_optab
, TFmode
, "_U_Qfmpy");
9389 set_optab_libfunc (sdiv_optab
, TFmode
, "_U_Qfdiv");
9390 set_optab_libfunc (neg_optab
, TFmode
, "_U_Qfneg");
9392 set_conv_libfunc (sext_optab
, TFmode
, SFmode
, "_U_Qfcnvff_sgl_to_quad");
9393 set_conv_libfunc (sext_optab
, TFmode
, DFmode
, "_U_Qfcnvff_dbl_to_quad");
9394 set_conv_libfunc (sext_optab
, TFmode
, XFmode
, "_U_Qfcnvff_f80_to_quad");
9395 set_conv_libfunc (trunc_optab
, SFmode
, TFmode
, "_U_Qfcnvff_quad_to_sgl");
9396 set_conv_libfunc (trunc_optab
, DFmode
, TFmode
, "_U_Qfcnvff_quad_to_dbl");
9397 set_conv_libfunc (trunc_optab
, XFmode
, TFmode
, "_U_Qfcnvff_quad_to_f80");
9399 set_conv_libfunc (sfix_optab
, SImode
, TFmode
, "_U_Qfcnvfxt_quad_to_sgl");
9400 set_conv_libfunc (sfix_optab
, DImode
, TFmode
, "_U_Qfcnvfxt_quad_to_dbl");
9401 set_conv_libfunc (sfix_optab
, TImode
, TFmode
, "_U_Qfcnvfxt_quad_to_quad");
9402 set_conv_libfunc (ufix_optab
, SImode
, TFmode
, "_U_Qfcnvfxut_quad_to_sgl");
9403 set_conv_libfunc (ufix_optab
, DImode
, TFmode
, "_U_Qfcnvfxut_quad_to_dbl");
9405 set_conv_libfunc (sfloat_optab
, TFmode
, SImode
, "_U_Qfcnvxf_sgl_to_quad");
9406 set_conv_libfunc (sfloat_optab
, TFmode
, DImode
, "_U_Qfcnvxf_dbl_to_quad");
9407 set_conv_libfunc (sfloat_optab
, TFmode
, TImode
, "_U_Qfcnvxf_quad_to_quad");
9408 /* HP-UX 11.23 libc does not have a function for unsigned
9409 SImode-to-TFmode conversion. */
9410 set_conv_libfunc (ufloat_optab
, TFmode
, DImode
, "_U_Qfcnvxuf_dbl_to_quad");
9413 /* Rename all the TFmode libfuncs using the HPUX conventions. */
9416 ia64_hpux_init_libfuncs (void)
9418 ia64_init_libfuncs ();
9420 /* The HP SI millicode division and mod functions expect DI arguments.
9421 By turning them off completely we avoid using both libgcc and the
9422 non-standard millicode routines and use the HP DI millicode routines
9425 set_optab_libfunc (sdiv_optab
, SImode
, 0);
9426 set_optab_libfunc (udiv_optab
, SImode
, 0);
9427 set_optab_libfunc (smod_optab
, SImode
, 0);
9428 set_optab_libfunc (umod_optab
, SImode
, 0);
9430 set_optab_libfunc (sdiv_optab
, DImode
, "__milli_divI");
9431 set_optab_libfunc (udiv_optab
, DImode
, "__milli_divU");
9432 set_optab_libfunc (smod_optab
, DImode
, "__milli_remI");
9433 set_optab_libfunc (umod_optab
, DImode
, "__milli_remU");
9435 /* HP-UX libc has TF min/max/abs routines in it. */
9436 set_optab_libfunc (smin_optab
, TFmode
, "_U_Qfmin");
9437 set_optab_libfunc (smax_optab
, TFmode
, "_U_Qfmax");
9438 set_optab_libfunc (abs_optab
, TFmode
, "_U_Qfabs");
9440 /* ia64_expand_compare uses this. */
9441 cmptf_libfunc
= init_one_libfunc ("_U_Qfcmp");
9443 /* These should never be used. */
9444 set_optab_libfunc (eq_optab
, TFmode
, 0);
9445 set_optab_libfunc (ne_optab
, TFmode
, 0);
9446 set_optab_libfunc (gt_optab
, TFmode
, 0);
9447 set_optab_libfunc (ge_optab
, TFmode
, 0);
9448 set_optab_libfunc (lt_optab
, TFmode
, 0);
9449 set_optab_libfunc (le_optab
, TFmode
, 0);
9452 /* Rename the division and modulus functions in VMS. */
9455 ia64_vms_init_libfuncs (void)
9457 set_optab_libfunc (sdiv_optab
, SImode
, "OTS$DIV_I");
9458 set_optab_libfunc (sdiv_optab
, DImode
, "OTS$DIV_L");
9459 set_optab_libfunc (udiv_optab
, SImode
, "OTS$DIV_UI");
9460 set_optab_libfunc (udiv_optab
, DImode
, "OTS$DIV_UL");
9461 set_optab_libfunc (smod_optab
, SImode
, "OTS$REM_I");
9462 set_optab_libfunc (smod_optab
, DImode
, "OTS$REM_L");
9463 set_optab_libfunc (umod_optab
, SImode
, "OTS$REM_UI");
9464 set_optab_libfunc (umod_optab
, DImode
, "OTS$REM_UL");
9467 /* Rename the TFmode libfuncs available from soft-fp in glibc using
9468 the HPUX conventions. */
9471 ia64_sysv4_init_libfuncs (void)
9473 ia64_init_libfuncs ();
9475 /* These functions are not part of the HPUX TFmode interface. We
9476 use them instead of _U_Qfcmp, which doesn't work the way we
9478 set_optab_libfunc (eq_optab
, TFmode
, "_U_Qfeq");
9479 set_optab_libfunc (ne_optab
, TFmode
, "_U_Qfne");
9480 set_optab_libfunc (gt_optab
, TFmode
, "_U_Qfgt");
9481 set_optab_libfunc (ge_optab
, TFmode
, "_U_Qfge");
9482 set_optab_libfunc (lt_optab
, TFmode
, "_U_Qflt");
9483 set_optab_libfunc (le_optab
, TFmode
, "_U_Qfle");
9485 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
9486 glibc doesn't have them. */
9489 /* For HPUX, it is illegal to have relocations in shared segments. */
9492 ia64_hpux_reloc_rw_mask (void)
9497 /* For others, relax this so that relocations to local data goes in
9498 read-only segments, but we still cannot allow global relocations
9499 in read-only segments. */
9502 ia64_reloc_rw_mask (void)
9504 return flag_pic
? 3 : 2;
9507 /* Return the section to use for X. The only special thing we do here
9508 is to honor small data. */
9511 ia64_select_rtx_section (enum machine_mode mode
, rtx x
,
9512 unsigned HOST_WIDE_INT align
)
9514 if (GET_MODE_SIZE (mode
) > 0
9515 && GET_MODE_SIZE (mode
) <= ia64_section_threshold
9516 && !TARGET_NO_SDATA
)
9517 return sdata_section
;
9519 return default_elf_select_rtx_section (mode
, x
, align
);
9523 ia64_section_type_flags (tree decl
, const char *name
, int reloc
)
9525 unsigned int flags
= 0;
9527 if (strcmp (name
, ".sdata") == 0
9528 || strncmp (name
, ".sdata.", 7) == 0
9529 || strncmp (name
, ".gnu.linkonce.s.", 16) == 0
9530 || strncmp (name
, ".sdata2.", 8) == 0
9531 || strncmp (name
, ".gnu.linkonce.s2.", 17) == 0
9532 || strcmp (name
, ".sbss") == 0
9533 || strncmp (name
, ".sbss.", 6) == 0
9534 || strncmp (name
, ".gnu.linkonce.sb.", 17) == 0)
9535 flags
= SECTION_SMALL
;
9537 flags
|= default_section_type_flags (decl
, name
, reloc
);
9541 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
9542 structure type and that the address of that type should be passed
9543 in out0, rather than in r8. */
9546 ia64_struct_retval_addr_is_first_parm_p (tree fntype
)
9548 tree ret_type
= TREE_TYPE (fntype
);
9550 /* The Itanium C++ ABI requires that out0, rather than r8, be used
9551 as the structure return address parameter, if the return value
9552 type has a non-trivial copy constructor or destructor. It is not
9553 clear if this same convention should be used for other
9554 programming languages. Until G++ 3.4, we incorrectly used r8 for
9555 these return values. */
9556 return (abi_version_at_least (2)
9558 && TYPE_MODE (ret_type
) == BLKmode
9559 && TREE_ADDRESSABLE (ret_type
)
9560 && strcmp (lang_hooks
.name
, "GNU C++") == 0);
9563 /* Output the assembler code for a thunk function. THUNK_DECL is the
9564 declaration for the thunk function itself, FUNCTION is the decl for
9565 the target function. DELTA is an immediate constant offset to be
9566 added to THIS. If VCALL_OFFSET is nonzero, the word at
9567 *(*this + vcall_offset) should be added to THIS. */
9570 ia64_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
9571 HOST_WIDE_INT delta
, HOST_WIDE_INT vcall_offset
,
9574 rtx this_rtx
, insn
, funexp
;
9575 unsigned int this_parmno
;
9576 unsigned int this_regno
;
9579 reload_completed
= 1;
9580 epilogue_completed
= 1;
9582 /* Set things up as ia64_expand_prologue might. */
9583 last_scratch_gr_reg
= 15;
9585 memset (¤t_frame_info
, 0, sizeof (current_frame_info
));
9586 current_frame_info
.spill_cfa_off
= -16;
9587 current_frame_info
.n_input_regs
= 1;
9588 current_frame_info
.need_regstk
= (TARGET_REG_NAMES
!= 0);
9590 /* Mark the end of the (empty) prologue. */
9591 emit_note (NOTE_INSN_PROLOGUE_END
);
9593 /* Figure out whether "this" will be the first parameter (the
9594 typical case) or the second parameter (as happens when the
9595 virtual function returns certain class objects). */
9597 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk
))
9599 this_regno
= IN_REG (this_parmno
);
9600 if (!TARGET_REG_NAMES
)
9601 reg_names
[this_regno
] = ia64_reg_numbers
[this_parmno
];
9603 this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
9605 /* Apply the constant offset, if required. */
9606 delta_rtx
= GEN_INT (delta
);
9609 rtx tmp
= gen_rtx_REG (ptr_mode
, this_regno
);
9610 REG_POINTER (tmp
) = 1;
9611 if (delta
&& satisfies_constraint_I (delta_rtx
))
9613 emit_insn (gen_ptr_extend_plus_imm (this_rtx
, tmp
, delta_rtx
));
9617 emit_insn (gen_ptr_extend (this_rtx
, tmp
));
9621 if (!satisfies_constraint_I (delta_rtx
))
9623 rtx tmp
= gen_rtx_REG (Pmode
, 2);
9624 emit_move_insn (tmp
, delta_rtx
);
9627 emit_insn (gen_adddi3 (this_rtx
, this_rtx
, delta_rtx
));
9630 /* Apply the offset from the vtable, if required. */
9633 rtx vcall_offset_rtx
= GEN_INT (vcall_offset
);
9634 rtx tmp
= gen_rtx_REG (Pmode
, 2);
9638 rtx t
= gen_rtx_REG (ptr_mode
, 2);
9639 REG_POINTER (t
) = 1;
9640 emit_move_insn (t
, gen_rtx_MEM (ptr_mode
, this_rtx
));
9641 if (satisfies_constraint_I (vcall_offset_rtx
))
9643 emit_insn (gen_ptr_extend_plus_imm (tmp
, t
, vcall_offset_rtx
));
9647 emit_insn (gen_ptr_extend (tmp
, t
));
9650 emit_move_insn (tmp
, gen_rtx_MEM (Pmode
, this_rtx
));
9654 if (!satisfies_constraint_J (vcall_offset_rtx
))
9656 rtx tmp2
= gen_rtx_REG (Pmode
, next_scratch_gr_reg ());
9657 emit_move_insn (tmp2
, vcall_offset_rtx
);
9658 vcall_offset_rtx
= tmp2
;
9660 emit_insn (gen_adddi3 (tmp
, tmp
, vcall_offset_rtx
));
9664 emit_insn (gen_zero_extendsidi2 (tmp
, gen_rtx_MEM (ptr_mode
, tmp
)));
9666 emit_move_insn (tmp
, gen_rtx_MEM (Pmode
, tmp
));
9668 emit_insn (gen_adddi3 (this_rtx
, this_rtx
, tmp
));
9671 /* Generate a tail call to the target function. */
9672 if (! TREE_USED (function
))
9674 assemble_external (function
);
9675 TREE_USED (function
) = 1;
9677 funexp
= XEXP (DECL_RTL (function
), 0);
9678 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
9679 ia64_expand_call (NULL_RTX
, funexp
, NULL_RTX
, 1);
9680 insn
= get_last_insn ();
9681 SIBLING_CALL_P (insn
) = 1;
9683 /* Code generation for calls relies on splitting. */
9684 reload_completed
= 1;
9685 epilogue_completed
= 1;
9686 try_split (PATTERN (insn
), insn
, 0);
9690 /* Run just enough of rest_of_compilation to get the insns emitted.
9691 There's not really enough bulk here to make other passes such as
9692 instruction scheduling worth while. Note that use_thunk calls
9693 assemble_start_function and assemble_end_function. */
9695 insn_locators_alloc ();
9696 emit_all_insn_group_barriers (NULL
);
9697 insn
= get_insns ();
9698 shorten_branches (insn
);
9699 final_start_function (insn
, file
, 1);
9700 final (insn
, file
, 1);
9701 final_end_function ();
9702 free_after_compilation (cfun
);
9704 reload_completed
= 0;
9705 epilogue_completed
= 0;
9708 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9711 ia64_struct_value_rtx (tree fntype
,
9712 int incoming ATTRIBUTE_UNUSED
)
9714 if (fntype
&& ia64_struct_retval_addr_is_first_parm_p (fntype
))
9716 return gen_rtx_REG (Pmode
, GR_REG (8));
9720 ia64_scalar_mode_supported_p (enum machine_mode mode
)
9746 ia64_vector_mode_supported_p (enum machine_mode mode
)
9763 /* Implement the FUNCTION_PROFILER macro. */
9766 ia64_output_function_profiler (FILE *file
, int labelno
)
9770 /* If the function needs a static chain and the static chain
9771 register is r15, we use an indirect call so as to bypass
9772 the PLT stub in case the executable is dynamically linked,
9773 because the stub clobbers r15 as per 5.3.6 of the psABI.
9774 We don't need to do that in non canonical PIC mode. */
9776 if (cfun
->static_chain_decl
&& !TARGET_NO_PIC
&& !TARGET_AUTO_PIC
)
9778 gcc_assert (STATIC_CHAIN_REGNUM
== 15);
9779 indirect_call
= true;
9782 indirect_call
= false;
9785 fputs ("\t.prologue 4, r40\n", file
);
9787 fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file
);
9788 fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file
);
9790 if (NO_PROFILE_COUNTERS
)
9791 fputs ("\tmov out3 = r0\n", file
);
9795 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
9797 if (TARGET_AUTO_PIC
)
9798 fputs ("\tmovl out3 = @gprel(", file
);
9800 fputs ("\taddl out3 = @ltoff(", file
);
9801 assemble_name (file
, buf
);
9802 if (TARGET_AUTO_PIC
)
9803 fputs (")\n", file
);
9805 fputs ("), r1\n", file
);
9809 fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file
);
9810 fputs ("\t;;\n", file
);
9812 fputs ("\t.save rp, r42\n", file
);
9813 fputs ("\tmov out2 = b0\n", file
);
9815 fputs ("\tld8 r14 = [r14]\n\t;;\n", file
);
9816 fputs ("\t.body\n", file
);
9817 fputs ("\tmov out1 = r1\n", file
);
9820 fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file
);
9821 fputs ("\tmov b6 = r16\n", file
);
9822 fputs ("\tld8 r1 = [r14]\n", file
);
9823 fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file
);
9826 fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file
);
9829 static GTY(()) rtx mcount_func_rtx
;
9831 gen_mcount_func_rtx (void)
9833 if (!mcount_func_rtx
)
9834 mcount_func_rtx
= init_one_libfunc ("_mcount");
9835 return mcount_func_rtx
;
9839 ia64_profile_hook (int labelno
)
9843 if (NO_PROFILE_COUNTERS
)
9848 const char *label_name
;
9849 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
9850 label_name
= (*targetm
.strip_name_encoding
) (ggc_strdup (buf
));
9851 label
= gen_rtx_SYMBOL_REF (Pmode
, label_name
);
9852 SYMBOL_REF_FLAGS (label
) = SYMBOL_FLAG_LOCAL
;
9854 ip
= gen_reg_rtx (Pmode
);
9855 emit_insn (gen_ip_value (ip
));
9856 emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL
,
9858 gen_rtx_REG (Pmode
, BR_REG (0)), Pmode
,
9863 /* Return the mangling of TYPE if it is an extended fundamental type. */
9866 ia64_mangle_type (const_tree type
)
9868 type
= TYPE_MAIN_VARIANT (type
);
9870 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
9871 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
9874 /* On HP-UX, "long double" is mangled as "e" so __float128 is
9876 if (!TARGET_HPUX
&& TYPE_MODE (type
) == TFmode
)
9878 /* On HP-UX, "e" is not available as a mangling of __float80 so use
9879 an extended mangling. Elsewhere, "e" is available since long
9880 double is 80 bits. */
9881 if (TYPE_MODE (type
) == XFmode
)
9882 return TARGET_HPUX
? "u9__float80" : "e";
9883 if (TYPE_MODE (type
) == RFmode
)
9888 /* Return the diagnostic message string if conversion from FROMTYPE to
9889 TOTYPE is not allowed, NULL otherwise. */
9891 ia64_invalid_conversion (const_tree fromtype
, const_tree totype
)
9893 /* Reject nontrivial conversion to or from __fpreg. */
9894 if (TYPE_MODE (fromtype
) == RFmode
9895 && TYPE_MODE (totype
) != RFmode
9896 && TYPE_MODE (totype
) != VOIDmode
)
9897 return N_("invalid conversion from %<__fpreg%>");
9898 if (TYPE_MODE (totype
) == RFmode
9899 && TYPE_MODE (fromtype
) != RFmode
)
9900 return N_("invalid conversion to %<__fpreg%>");
9904 /* Return the diagnostic message string if the unary operation OP is
9905 not permitted on TYPE, NULL otherwise. */
9907 ia64_invalid_unary_op (int op
, const_tree type
)
9909 /* Reject operations on __fpreg other than unary + or &. */
9910 if (TYPE_MODE (type
) == RFmode
9911 && op
!= CONVERT_EXPR
9913 return N_("invalid operation on %<__fpreg%>");
9917 /* Return the diagnostic message string if the binary operation OP is
9918 not permitted on TYPE1 and TYPE2, NULL otherwise. */
9920 ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED
, const_tree type1
, const_tree type2
)
9922 /* Reject operations on __fpreg. */
9923 if (TYPE_MODE (type1
) == RFmode
|| TYPE_MODE (type2
) == RFmode
)
9924 return N_("invalid operation on %<__fpreg%>");
9928 /* Implement overriding of the optimization options. */
9930 ia64_optimization_options (int level ATTRIBUTE_UNUSED
,
9931 int size ATTRIBUTE_UNUSED
)
9933 /* Disable the second machine independent scheduling pass and use one for the
9934 IA-64. This needs to be here instead of in OVERRIDE_OPTIONS because this
9935 is done whenever the optimization is changed via #pragma GCC optimize or
9936 attribute((optimize(...))). */
9937 ia64_flag_schedule_insns2
= flag_schedule_insns_after_reload
;
9938 flag_schedule_insns_after_reload
= 0;
9940 /* Let the scheduler form additional regions. */
9941 set_param_value ("max-sched-extend-regions-iters", 2);
9943 /* Set the default values for cache-related parameters. */
9944 set_param_value ("simultaneous-prefetches", 6);
9945 set_param_value ("l1-cache-line-size", 32);
9949 /* HP-UX version_id attribute.
9950 For object foo, if the version_id is set to 1234 put out an alias
9951 of '.alias foo "foo{1234}" We can't use "foo{1234}" in anything
9952 other than an alias statement because it is an illegal symbol name. */
9955 ia64_handle_version_id_attribute (tree
*node ATTRIBUTE_UNUSED
,
9956 tree name ATTRIBUTE_UNUSED
,
9958 int flags ATTRIBUTE_UNUSED
,
9961 tree arg
= TREE_VALUE (args
);
9963 if (TREE_CODE (arg
) != STRING_CST
)
9965 error("version attribute is not a string");
9966 *no_add_attrs
= true;
9972 /* Target hook for c_mode_for_suffix. */
9974 static enum machine_mode
9975 ia64_c_mode_for_suffix (char suffix
)
9985 #include "gt-ia64.h"