alpha.c (alpha_preferred_reload_class, [...]): Avoid C++ keywords.
[gcc.git] / gcc / config / ia64 / ia64.c
1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
3 Free Software Foundation, Inc.
4 Contributed by James E. Wilson <wilson@cygnus.com> and
5 David Mosberger <davidm@hpl.hp.com>.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "recog.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "except.h"
41 #include "function.h"
42 #include "ggc.h"
43 #include "basic-block.h"
44 #include "toplev.h"
45 #include "sched-int.h"
46 #include "timevar.h"
47 #include "target.h"
48 #include "target-def.h"
49 #include "tm_p.h"
50 #include "hashtab.h"
51 #include "langhooks.h"
52 #include "cfglayout.h"
53 #include "gimple.h"
54 #include "intl.h"
55 #include "df.h"
56 #include "debug.h"
57 #include "params.h"
58 #include "dbgcnt.h"
59 #include "tm-constrs.h"
60
61 /* This is used for communication between ASM_OUTPUT_LABEL and
62 ASM_OUTPUT_LABELREF. */
63 int ia64_asm_output_label = 0;
64
65 /* Define the information needed to generate branch and scc insns. This is
66 stored from the compare operation. */
67 struct rtx_def * ia64_compare_op0;
68 struct rtx_def * ia64_compare_op1;
69
70 /* Register names for ia64_expand_prologue. */
71 static const char * const ia64_reg_numbers[96] =
72 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
73 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
74 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
75 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
76 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
77 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
78 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
79 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
80 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
81 "r104","r105","r106","r107","r108","r109","r110","r111",
82 "r112","r113","r114","r115","r116","r117","r118","r119",
83 "r120","r121","r122","r123","r124","r125","r126","r127"};
84
85 /* ??? These strings could be shared with REGISTER_NAMES. */
86 static const char * const ia64_input_reg_names[8] =
87 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
88
89 /* ??? These strings could be shared with REGISTER_NAMES. */
90 static const char * const ia64_local_reg_names[80] =
91 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
92 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
93 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
94 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
95 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
96 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
97 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
98 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
99 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
100 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
101
102 /* ??? These strings could be shared with REGISTER_NAMES. */
103 static const char * const ia64_output_reg_names[8] =
104 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
105
106 /* Which cpu are we scheduling for. */
107 enum processor_type ia64_tune = PROCESSOR_ITANIUM2;
108
109 /* Determines whether we run our final scheduling pass or not. We always
110 avoid the normal second scheduling pass. */
111 static int ia64_flag_schedule_insns2;
112
113 /* Determines whether we run variable tracking in machine dependent
114 reorganization. */
115 static int ia64_flag_var_tracking;
116
117 /* Variables which are this size or smaller are put in the sdata/sbss
118 sections. */
119
120 unsigned int ia64_section_threshold;
121
122 /* The following variable is used by the DFA insn scheduler. The value is
123 TRUE if we do insn bundling instead of insn scheduling. */
124 int bundling_p = 0;
125
126 enum ia64_frame_regs
127 {
128 reg_fp,
129 reg_save_b0,
130 reg_save_pr,
131 reg_save_ar_pfs,
132 reg_save_ar_unat,
133 reg_save_ar_lc,
134 reg_save_gp,
135 number_of_ia64_frame_regs
136 };
137
138 /* Structure to be filled in by ia64_compute_frame_size with register
139 save masks and offsets for the current function. */
140
141 struct ia64_frame_info
142 {
143 HOST_WIDE_INT total_size; /* size of the stack frame, not including
144 the caller's scratch area. */
145 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
146 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
147 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
148 HARD_REG_SET mask; /* mask of saved registers. */
149 unsigned int gr_used_mask; /* mask of registers in use as gr spill
150 registers or long-term scratches. */
151 int n_spilled; /* number of spilled registers. */
152 int r[number_of_ia64_frame_regs]; /* Frame related registers. */
153 int n_input_regs; /* number of input registers used. */
154 int n_local_regs; /* number of local registers used. */
155 int n_output_regs; /* number of output registers used. */
156 int n_rotate_regs; /* number of rotating registers used. */
157
158 char need_regstk; /* true if a .regstk directive needed. */
159 char initialized; /* true if the data is finalized. */
160 };
161
162 /* Current frame information calculated by ia64_compute_frame_size. */
163 static struct ia64_frame_info current_frame_info;
164 /* The actual registers that are emitted. */
165 static int emitted_frame_related_regs[number_of_ia64_frame_regs];
166 \f
167 static int ia64_first_cycle_multipass_dfa_lookahead (void);
168 static void ia64_dependencies_evaluation_hook (rtx, rtx);
169 static void ia64_init_dfa_pre_cycle_insn (void);
170 static rtx ia64_dfa_pre_cycle_insn (void);
171 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
172 static bool ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx);
173 static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
174 static void ia64_h_i_d_extended (void);
175 static int ia64_mode_to_int (enum machine_mode);
176 static void ia64_set_sched_flags (spec_info_t);
177 static int ia64_speculate_insn (rtx, ds_t, rtx *);
178 static rtx ia64_gen_spec_insn (rtx, ds_t, int, bool, bool);
179 static bool ia64_needs_block_p (const_rtx);
180 static rtx ia64_gen_check (rtx, rtx, bool);
181 static int ia64_spec_check_p (rtx);
182 static int ia64_spec_check_src_p (rtx);
183 static rtx gen_tls_get_addr (void);
184 static rtx gen_thread_pointer (void);
185 static int find_gr_spill (enum ia64_frame_regs, int);
186 static int next_scratch_gr_reg (void);
187 static void mark_reg_gr_used_mask (rtx, void *);
188 static void ia64_compute_frame_size (HOST_WIDE_INT);
189 static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
190 static void finish_spill_pointers (void);
191 static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
192 static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
193 static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
194 static rtx gen_movdi_x (rtx, rtx, rtx);
195 static rtx gen_fr_spill_x (rtx, rtx, rtx);
196 static rtx gen_fr_restore_x (rtx, rtx, rtx);
197
198 static enum machine_mode hfa_element_mode (const_tree, bool);
199 static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
200 tree, int *, int);
201 static int ia64_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
202 tree, bool);
203 static bool ia64_function_ok_for_sibcall (tree, tree);
204 static bool ia64_return_in_memory (const_tree, const_tree);
205 static bool ia64_rtx_costs (rtx, int, int, int *);
206 static int ia64_unspec_may_trap_p (const_rtx, unsigned);
207 static void fix_range (const char *);
208 static bool ia64_handle_option (size_t, const char *, int);
209 static struct machine_function * ia64_init_machine_status (void);
210 static void emit_insn_group_barriers (FILE *);
211 static void emit_all_insn_group_barriers (FILE *);
212 static void final_emit_insn_group_barriers (FILE *);
213 static void emit_predicate_relation_info (void);
214 static void ia64_reorg (void);
215 static bool ia64_in_small_data_p (const_tree);
216 static void process_epilogue (FILE *, rtx, bool, bool);
217 static int process_set (FILE *, rtx, rtx, bool, bool);
218
219 static bool ia64_assemble_integer (rtx, unsigned int, int);
220 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
221 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
222 static void ia64_output_function_end_prologue (FILE *);
223
224 static int ia64_issue_rate (void);
225 static int ia64_adjust_cost (rtx, rtx, rtx, int);
226 static void ia64_sched_init (FILE *, int, int);
227 static void ia64_sched_init_global (FILE *, int, int);
228 static void ia64_sched_finish_global (FILE *, int);
229 static void ia64_sched_finish (FILE *, int);
230 static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
231 static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
232 static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
233 static int ia64_variable_issue (FILE *, int, rtx, int);
234
235 static struct bundle_state *get_free_bundle_state (void);
236 static void free_bundle_state (struct bundle_state *);
237 static void initiate_bundle_states (void);
238 static void finish_bundle_states (void);
239 static unsigned bundle_state_hash (const void *);
240 static int bundle_state_eq_p (const void *, const void *);
241 static int insert_bundle_state (struct bundle_state *);
242 static void initiate_bundle_state_table (void);
243 static void finish_bundle_state_table (void);
244 static int try_issue_nops (struct bundle_state *, int);
245 static int try_issue_insn (struct bundle_state *, rtx);
246 static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
247 static int get_max_pos (state_t);
248 static int get_template (state_t, int);
249
250 static rtx get_next_important_insn (rtx, rtx);
251 static void bundling (FILE *, int, rtx, rtx);
252
253 static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
254 HOST_WIDE_INT, tree);
255 static void ia64_file_start (void);
256 static void ia64_globalize_decl_name (FILE *, tree);
257
258 static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
259 static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
260 static section *ia64_select_rtx_section (enum machine_mode, rtx,
261 unsigned HOST_WIDE_INT);
262 static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
263 ATTRIBUTE_UNUSED;
264 static unsigned int ia64_section_type_flags (tree, const char *, int);
265 static void ia64_init_libfuncs (void)
266 ATTRIBUTE_UNUSED;
267 static void ia64_hpux_init_libfuncs (void)
268 ATTRIBUTE_UNUSED;
269 static void ia64_sysv4_init_libfuncs (void)
270 ATTRIBUTE_UNUSED;
271 static void ia64_vms_init_libfuncs (void)
272 ATTRIBUTE_UNUSED;
273
274 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
275 static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *);
276 static void ia64_encode_section_info (tree, rtx, int);
277 static rtx ia64_struct_value_rtx (tree, int);
278 static tree ia64_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
279 static bool ia64_scalar_mode_supported_p (enum machine_mode mode);
280 static bool ia64_vector_mode_supported_p (enum machine_mode mode);
281 static bool ia64_cannot_force_const_mem (rtx);
282 static const char *ia64_mangle_type (const_tree);
283 static const char *ia64_invalid_conversion (const_tree, const_tree);
284 static const char *ia64_invalid_unary_op (int, const_tree);
285 static const char *ia64_invalid_binary_op (int, const_tree, const_tree);
286 static enum machine_mode ia64_c_mode_for_suffix (char);
287 \f
288 /* Table of valid machine attributes. */
289 static const struct attribute_spec ia64_attribute_table[] =
290 {
291 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
292 { "syscall_linkage", 0, 0, false, true, true, NULL },
293 { "model", 1, 1, true, false, false, ia64_handle_model_attribute },
294 { "version_id", 1, 1, true, false, false,
295 ia64_handle_version_id_attribute },
296 { NULL, 0, 0, false, false, false, NULL }
297 };
298
299 /* Initialize the GCC target structure. */
300 #undef TARGET_ATTRIBUTE_TABLE
301 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
302
303 #undef TARGET_INIT_BUILTINS
304 #define TARGET_INIT_BUILTINS ia64_init_builtins
305
306 #undef TARGET_EXPAND_BUILTIN
307 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
308
309 #undef TARGET_ASM_BYTE_OP
310 #define TARGET_ASM_BYTE_OP "\tdata1\t"
311 #undef TARGET_ASM_ALIGNED_HI_OP
312 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
313 #undef TARGET_ASM_ALIGNED_SI_OP
314 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
315 #undef TARGET_ASM_ALIGNED_DI_OP
316 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
317 #undef TARGET_ASM_UNALIGNED_HI_OP
318 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
319 #undef TARGET_ASM_UNALIGNED_SI_OP
320 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
321 #undef TARGET_ASM_UNALIGNED_DI_OP
322 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
323 #undef TARGET_ASM_INTEGER
324 #define TARGET_ASM_INTEGER ia64_assemble_integer
325
326 #undef TARGET_ASM_FUNCTION_PROLOGUE
327 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
328 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
329 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
330 #undef TARGET_ASM_FUNCTION_EPILOGUE
331 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
332
333 #undef TARGET_IN_SMALL_DATA_P
334 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
335
336 #undef TARGET_SCHED_ADJUST_COST
337 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
338 #undef TARGET_SCHED_ISSUE_RATE
339 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
340 #undef TARGET_SCHED_VARIABLE_ISSUE
341 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
342 #undef TARGET_SCHED_INIT
343 #define TARGET_SCHED_INIT ia64_sched_init
344 #undef TARGET_SCHED_FINISH
345 #define TARGET_SCHED_FINISH ia64_sched_finish
346 #undef TARGET_SCHED_INIT_GLOBAL
347 #define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
348 #undef TARGET_SCHED_FINISH_GLOBAL
349 #define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
350 #undef TARGET_SCHED_REORDER
351 #define TARGET_SCHED_REORDER ia64_sched_reorder
352 #undef TARGET_SCHED_REORDER2
353 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
354
355 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
356 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
357
358 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
359 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
360
361 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
362 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
363 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
364 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
365
366 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
367 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
368 ia64_first_cycle_multipass_dfa_lookahead_guard
369
370 #undef TARGET_SCHED_DFA_NEW_CYCLE
371 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
372
373 #undef TARGET_SCHED_H_I_D_EXTENDED
374 #define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
375
376 #undef TARGET_SCHED_SET_SCHED_FLAGS
377 #define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
378
379 #undef TARGET_SCHED_SPECULATE_INSN
380 #define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
381
382 #undef TARGET_SCHED_NEEDS_BLOCK_P
383 #define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
384
385 #undef TARGET_SCHED_GEN_CHECK
386 #define TARGET_SCHED_GEN_CHECK ia64_gen_check
387
388 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC
389 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC\
390 ia64_first_cycle_multipass_dfa_lookahead_guard_spec
391
392 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
393 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
394 #undef TARGET_ARG_PARTIAL_BYTES
395 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
396
397 #undef TARGET_ASM_OUTPUT_MI_THUNK
398 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
399 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
400 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
401
402 #undef TARGET_ASM_FILE_START
403 #define TARGET_ASM_FILE_START ia64_file_start
404
405 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
406 #define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
407
408 #undef TARGET_RTX_COSTS
409 #define TARGET_RTX_COSTS ia64_rtx_costs
410 #undef TARGET_ADDRESS_COST
411 #define TARGET_ADDRESS_COST hook_int_rtx_0
412
413 #undef TARGET_UNSPEC_MAY_TRAP_P
414 #define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
415
416 #undef TARGET_MACHINE_DEPENDENT_REORG
417 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
418
419 #undef TARGET_ENCODE_SECTION_INFO
420 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
421
422 #undef TARGET_SECTION_TYPE_FLAGS
423 #define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
424
425 #ifdef HAVE_AS_TLS
426 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
427 #define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
428 #endif
429
430 /* ??? ABI doesn't allow us to define this. */
431 #if 0
432 #undef TARGET_PROMOTE_FUNCTION_ARGS
433 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
434 #endif
435
436 /* ??? ABI doesn't allow us to define this. */
437 #if 0
438 #undef TARGET_PROMOTE_FUNCTION_RETURN
439 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
440 #endif
441
442 /* ??? Investigate. */
443 #if 0
444 #undef TARGET_PROMOTE_PROTOTYPES
445 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
446 #endif
447
448 #undef TARGET_STRUCT_VALUE_RTX
449 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
450 #undef TARGET_RETURN_IN_MEMORY
451 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
452 #undef TARGET_SETUP_INCOMING_VARARGS
453 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
454 #undef TARGET_STRICT_ARGUMENT_NAMING
455 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
456 #undef TARGET_MUST_PASS_IN_STACK
457 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
458
459 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
460 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
461
462 #undef TARGET_UNWIND_EMIT
463 #define TARGET_UNWIND_EMIT process_for_unwind_directive
464
465 #undef TARGET_SCALAR_MODE_SUPPORTED_P
466 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
467 #undef TARGET_VECTOR_MODE_SUPPORTED_P
468 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
469
470 /* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
471 in an order different from the specified program order. */
472 #undef TARGET_RELAXED_ORDERING
473 #define TARGET_RELAXED_ORDERING true
474
475 #undef TARGET_DEFAULT_TARGET_FLAGS
476 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
477 #undef TARGET_HANDLE_OPTION
478 #define TARGET_HANDLE_OPTION ia64_handle_option
479
480 #undef TARGET_CANNOT_FORCE_CONST_MEM
481 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
482
483 #undef TARGET_MANGLE_TYPE
484 #define TARGET_MANGLE_TYPE ia64_mangle_type
485
486 #undef TARGET_INVALID_CONVERSION
487 #define TARGET_INVALID_CONVERSION ia64_invalid_conversion
488 #undef TARGET_INVALID_UNARY_OP
489 #define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
490 #undef TARGET_INVALID_BINARY_OP
491 #define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
492
493 #undef TARGET_C_MODE_FOR_SUFFIX
494 #define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
495
496 #undef TARGET_OPTION_COLD_ATTRIBUTE_SETS_OPTIMIZATION
497 #define TARGET_OPTION_COLD_ATTRIBUTE_SETS_OPTIMIZATION true
498
499 #undef TARGET_OPTION_HOT_ATTRIBUTE_SETS_OPTIMIZATION
500 #define TARGET_OPTION_HOT_ATTRIBUTE_SETS_OPTIMIZATION true
501
502 struct gcc_target targetm = TARGET_INITIALIZER;
503 \f
504 typedef enum
505 {
506 ADDR_AREA_NORMAL, /* normal address area */
507 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
508 }
509 ia64_addr_area;
510
511 static GTY(()) tree small_ident1;
512 static GTY(()) tree small_ident2;
513
514 static void
515 init_idents (void)
516 {
517 if (small_ident1 == 0)
518 {
519 small_ident1 = get_identifier ("small");
520 small_ident2 = get_identifier ("__small__");
521 }
522 }
523
524 /* Retrieve the address area that has been chosen for the given decl. */
525
526 static ia64_addr_area
527 ia64_get_addr_area (tree decl)
528 {
529 tree model_attr;
530
531 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
532 if (model_attr)
533 {
534 tree id;
535
536 init_idents ();
537 id = TREE_VALUE (TREE_VALUE (model_attr));
538 if (id == small_ident1 || id == small_ident2)
539 return ADDR_AREA_SMALL;
540 }
541 return ADDR_AREA_NORMAL;
542 }
543
544 static tree
545 ia64_handle_model_attribute (tree *node, tree name, tree args,
546 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
547 {
548 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
549 ia64_addr_area area;
550 tree arg, decl = *node;
551
552 init_idents ();
553 arg = TREE_VALUE (args);
554 if (arg == small_ident1 || arg == small_ident2)
555 {
556 addr_area = ADDR_AREA_SMALL;
557 }
558 else
559 {
560 warning (OPT_Wattributes, "invalid argument of %qs attribute",
561 IDENTIFIER_POINTER (name));
562 *no_add_attrs = true;
563 }
564
565 switch (TREE_CODE (decl))
566 {
567 case VAR_DECL:
568 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
569 == FUNCTION_DECL)
570 && !TREE_STATIC (decl))
571 {
572 error ("%Jan address area attribute cannot be specified for "
573 "local variables", decl);
574 *no_add_attrs = true;
575 }
576 area = ia64_get_addr_area (decl);
577 if (area != ADDR_AREA_NORMAL && addr_area != area)
578 {
579 error ("address area of %q+D conflicts with previous "
580 "declaration", decl);
581 *no_add_attrs = true;
582 }
583 break;
584
585 case FUNCTION_DECL:
586 error ("%Jaddress area attribute cannot be specified for functions",
587 decl);
588 *no_add_attrs = true;
589 break;
590
591 default:
592 warning (OPT_Wattributes, "%qs attribute ignored",
593 IDENTIFIER_POINTER (name));
594 *no_add_attrs = true;
595 break;
596 }
597
598 return NULL_TREE;
599 }
600
601 static void
602 ia64_encode_addr_area (tree decl, rtx symbol)
603 {
604 int flags;
605
606 flags = SYMBOL_REF_FLAGS (symbol);
607 switch (ia64_get_addr_area (decl))
608 {
609 case ADDR_AREA_NORMAL: break;
610 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
611 default: gcc_unreachable ();
612 }
613 SYMBOL_REF_FLAGS (symbol) = flags;
614 }
615
616 static void
617 ia64_encode_section_info (tree decl, rtx rtl, int first)
618 {
619 default_encode_section_info (decl, rtl, first);
620
621 /* Careful not to prod global register variables. */
622 if (TREE_CODE (decl) == VAR_DECL
623 && GET_CODE (DECL_RTL (decl)) == MEM
624 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
625 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
626 ia64_encode_addr_area (decl, XEXP (rtl, 0));
627 }
628 \f
629 /* Return 1 if the operands of a move are ok. */
630
631 int
632 ia64_move_ok (rtx dst, rtx src)
633 {
634 /* If we're under init_recog_no_volatile, we'll not be able to use
635 memory_operand. So check the code directly and don't worry about
636 the validity of the underlying address, which should have been
637 checked elsewhere anyway. */
638 if (GET_CODE (dst) != MEM)
639 return 1;
640 if (GET_CODE (src) == MEM)
641 return 0;
642 if (register_operand (src, VOIDmode))
643 return 1;
644
645 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
646 if (INTEGRAL_MODE_P (GET_MODE (dst)))
647 return src == const0_rtx;
648 else
649 return satisfies_constraint_G (src);
650 }
651
652 /* Return 1 if the operands are ok for a floating point load pair. */
653
654 int
655 ia64_load_pair_ok (rtx dst, rtx src)
656 {
657 if (GET_CODE (dst) != REG || !FP_REGNO_P (REGNO (dst)))
658 return 0;
659 if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
660 return 0;
661 switch (GET_CODE (XEXP (src, 0)))
662 {
663 case REG:
664 case POST_INC:
665 break;
666 case POST_DEC:
667 return 0;
668 case POST_MODIFY:
669 {
670 rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
671
672 if (GET_CODE (adjust) != CONST_INT
673 || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
674 return 0;
675 }
676 break;
677 default:
678 abort ();
679 }
680 return 1;
681 }
682
683 int
684 addp4_optimize_ok (rtx op1, rtx op2)
685 {
686 return (basereg_operand (op1, GET_MODE(op1)) !=
687 basereg_operand (op2, GET_MODE(op2)));
688 }
689
690 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
691 Return the length of the field, or <= 0 on failure. */
692
693 int
694 ia64_depz_field_mask (rtx rop, rtx rshift)
695 {
696 unsigned HOST_WIDE_INT op = INTVAL (rop);
697 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
698
699 /* Get rid of the zero bits we're shifting in. */
700 op >>= shift;
701
702 /* We must now have a solid block of 1's at bit 0. */
703 return exact_log2 (op + 1);
704 }
705
706 /* Return the TLS model to use for ADDR. */
707
708 static enum tls_model
709 tls_symbolic_operand_type (rtx addr)
710 {
711 enum tls_model tls_kind = 0;
712
713 if (GET_CODE (addr) == CONST)
714 {
715 if (GET_CODE (XEXP (addr, 0)) == PLUS
716 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
717 tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
718 }
719 else if (GET_CODE (addr) == SYMBOL_REF)
720 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
721
722 return tls_kind;
723 }
724
725 /* Return true if X is a constant that is valid for some immediate
726 field in an instruction. */
727
728 bool
729 ia64_legitimate_constant_p (rtx x)
730 {
731 switch (GET_CODE (x))
732 {
733 case CONST_INT:
734 case LABEL_REF:
735 return true;
736
737 case CONST_DOUBLE:
738 if (GET_MODE (x) == VOIDmode)
739 return true;
740 return satisfies_constraint_G (x);
741
742 case CONST:
743 case SYMBOL_REF:
744 /* ??? Short term workaround for PR 28490. We must make the code here
745 match the code in ia64_expand_move and move_operand, even though they
746 are both technically wrong. */
747 if (tls_symbolic_operand_type (x) == 0)
748 {
749 HOST_WIDE_INT addend = 0;
750 rtx op = x;
751
752 if (GET_CODE (op) == CONST
753 && GET_CODE (XEXP (op, 0)) == PLUS
754 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
755 {
756 addend = INTVAL (XEXP (XEXP (op, 0), 1));
757 op = XEXP (XEXP (op, 0), 0);
758 }
759
760 if (any_offset_symbol_operand (op, GET_MODE (op))
761 || function_operand (op, GET_MODE (op)))
762 return true;
763 if (aligned_offset_symbol_operand (op, GET_MODE (op)))
764 return (addend & 0x3fff) == 0;
765 return false;
766 }
767 return false;
768
769 case CONST_VECTOR:
770 {
771 enum machine_mode mode = GET_MODE (x);
772
773 if (mode == V2SFmode)
774 return satisfies_constraint_Y (x);
775
776 return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
777 && GET_MODE_SIZE (mode) <= 8);
778 }
779
780 default:
781 return false;
782 }
783 }
784
785 /* Don't allow TLS addresses to get spilled to memory. */
786
787 static bool
788 ia64_cannot_force_const_mem (rtx x)
789 {
790 if (GET_MODE (x) == RFmode)
791 return true;
792 return tls_symbolic_operand_type (x) != 0;
793 }
794
795 /* Expand a symbolic constant load. */
796
797 bool
798 ia64_expand_load_address (rtx dest, rtx src)
799 {
800 gcc_assert (GET_CODE (dest) == REG);
801
802 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
803 having to pointer-extend the value afterward. Other forms of address
804 computation below are also more natural to compute as 64-bit quantities.
805 If we've been given an SImode destination register, change it. */
806 if (GET_MODE (dest) != Pmode)
807 dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest),
808 byte_lowpart_offset (Pmode, GET_MODE (dest)));
809
810 if (TARGET_NO_PIC)
811 return false;
812 if (small_addr_symbolic_operand (src, VOIDmode))
813 return false;
814
815 if (TARGET_AUTO_PIC)
816 emit_insn (gen_load_gprel64 (dest, src));
817 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
818 emit_insn (gen_load_fptr (dest, src));
819 else if (sdata_symbolic_operand (src, VOIDmode))
820 emit_insn (gen_load_gprel (dest, src));
821 else
822 {
823 HOST_WIDE_INT addend = 0;
824 rtx tmp;
825
826 /* We did split constant offsets in ia64_expand_move, and we did try
827 to keep them split in move_operand, but we also allowed reload to
828 rematerialize arbitrary constants rather than spill the value to
829 the stack and reload it. So we have to be prepared here to split
830 them apart again. */
831 if (GET_CODE (src) == CONST)
832 {
833 HOST_WIDE_INT hi, lo;
834
835 hi = INTVAL (XEXP (XEXP (src, 0), 1));
836 lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
837 hi = hi - lo;
838
839 if (lo != 0)
840 {
841 addend = lo;
842 src = plus_constant (XEXP (XEXP (src, 0), 0), hi);
843 }
844 }
845
846 tmp = gen_rtx_HIGH (Pmode, src);
847 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
848 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
849
850 tmp = gen_rtx_LO_SUM (Pmode, dest, src);
851 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
852
853 if (addend)
854 {
855 tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
856 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
857 }
858 }
859
860 return true;
861 }
862
863 static GTY(()) rtx gen_tls_tga;
864 static rtx
865 gen_tls_get_addr (void)
866 {
867 if (!gen_tls_tga)
868 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
869 return gen_tls_tga;
870 }
871
872 static GTY(()) rtx thread_pointer_rtx;
873 static rtx
874 gen_thread_pointer (void)
875 {
876 if (!thread_pointer_rtx)
877 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
878 return thread_pointer_rtx;
879 }
880
881 static rtx
882 ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
883 rtx orig_op1, HOST_WIDE_INT addend)
884 {
885 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
886 rtx orig_op0 = op0;
887 HOST_WIDE_INT addend_lo, addend_hi;
888
889 switch (tls_kind)
890 {
891 case TLS_MODEL_GLOBAL_DYNAMIC:
892 start_sequence ();
893
894 tga_op1 = gen_reg_rtx (Pmode);
895 emit_insn (gen_load_dtpmod (tga_op1, op1));
896
897 tga_op2 = gen_reg_rtx (Pmode);
898 emit_insn (gen_load_dtprel (tga_op2, op1));
899
900 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
901 LCT_CONST, Pmode, 2, tga_op1,
902 Pmode, tga_op2, Pmode);
903
904 insns = get_insns ();
905 end_sequence ();
906
907 if (GET_MODE (op0) != Pmode)
908 op0 = tga_ret;
909 emit_libcall_block (insns, op0, tga_ret, op1);
910 break;
911
912 case TLS_MODEL_LOCAL_DYNAMIC:
913 /* ??? This isn't the completely proper way to do local-dynamic
914 If the call to __tls_get_addr is used only by a single symbol,
915 then we should (somehow) move the dtprel to the second arg
916 to avoid the extra add. */
917 start_sequence ();
918
919 tga_op1 = gen_reg_rtx (Pmode);
920 emit_insn (gen_load_dtpmod (tga_op1, op1));
921
922 tga_op2 = const0_rtx;
923
924 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
925 LCT_CONST, Pmode, 2, tga_op1,
926 Pmode, tga_op2, Pmode);
927
928 insns = get_insns ();
929 end_sequence ();
930
931 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
932 UNSPEC_LD_BASE);
933 tmp = gen_reg_rtx (Pmode);
934 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
935
936 if (!register_operand (op0, Pmode))
937 op0 = gen_reg_rtx (Pmode);
938 if (TARGET_TLS64)
939 {
940 emit_insn (gen_load_dtprel (op0, op1));
941 emit_insn (gen_adddi3 (op0, tmp, op0));
942 }
943 else
944 emit_insn (gen_add_dtprel (op0, op1, tmp));
945 break;
946
947 case TLS_MODEL_INITIAL_EXEC:
948 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
949 addend_hi = addend - addend_lo;
950
951 op1 = plus_constant (op1, addend_hi);
952 addend = addend_lo;
953
954 tmp = gen_reg_rtx (Pmode);
955 emit_insn (gen_load_tprel (tmp, op1));
956
957 if (!register_operand (op0, Pmode))
958 op0 = gen_reg_rtx (Pmode);
959 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
960 break;
961
962 case TLS_MODEL_LOCAL_EXEC:
963 if (!register_operand (op0, Pmode))
964 op0 = gen_reg_rtx (Pmode);
965
966 op1 = orig_op1;
967 addend = 0;
968 if (TARGET_TLS64)
969 {
970 emit_insn (gen_load_tprel (op0, op1));
971 emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
972 }
973 else
974 emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
975 break;
976
977 default:
978 gcc_unreachable ();
979 }
980
981 if (addend)
982 op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
983 orig_op0, 1, OPTAB_DIRECT);
984 if (orig_op0 == op0)
985 return NULL_RTX;
986 if (GET_MODE (orig_op0) == Pmode)
987 return op0;
988 return gen_lowpart (GET_MODE (orig_op0), op0);
989 }
990
991 rtx
992 ia64_expand_move (rtx op0, rtx op1)
993 {
994 enum machine_mode mode = GET_MODE (op0);
995
996 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
997 op1 = force_reg (mode, op1);
998
999 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
1000 {
1001 HOST_WIDE_INT addend = 0;
1002 enum tls_model tls_kind;
1003 rtx sym = op1;
1004
1005 if (GET_CODE (op1) == CONST
1006 && GET_CODE (XEXP (op1, 0)) == PLUS
1007 && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
1008 {
1009 addend = INTVAL (XEXP (XEXP (op1, 0), 1));
1010 sym = XEXP (XEXP (op1, 0), 0);
1011 }
1012
1013 tls_kind = tls_symbolic_operand_type (sym);
1014 if (tls_kind)
1015 return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
1016
1017 if (any_offset_symbol_operand (sym, mode))
1018 addend = 0;
1019 else if (aligned_offset_symbol_operand (sym, mode))
1020 {
1021 HOST_WIDE_INT addend_lo, addend_hi;
1022
1023 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1024 addend_hi = addend - addend_lo;
1025
1026 if (addend_lo != 0)
1027 {
1028 op1 = plus_constant (sym, addend_hi);
1029 addend = addend_lo;
1030 }
1031 else
1032 addend = 0;
1033 }
1034 else
1035 op1 = sym;
1036
1037 if (reload_completed)
1038 {
1039 /* We really should have taken care of this offset earlier. */
1040 gcc_assert (addend == 0);
1041 if (ia64_expand_load_address (op0, op1))
1042 return NULL_RTX;
1043 }
1044
1045 if (addend)
1046 {
1047 rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode);
1048
1049 emit_insn (gen_rtx_SET (VOIDmode, subtarget, op1));
1050
1051 op1 = expand_simple_binop (mode, PLUS, subtarget,
1052 GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1053 if (op0 == op1)
1054 return NULL_RTX;
1055 }
1056 }
1057
1058 return op1;
1059 }
1060
1061 /* Split a move from OP1 to OP0 conditional on COND. */
1062
1063 void
1064 ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1065 {
1066 rtx insn, first = get_last_insn ();
1067
1068 emit_move_insn (op0, op1);
1069
1070 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1071 if (INSN_P (insn))
1072 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1073 PATTERN (insn));
1074 }
1075
1076 /* Split a post-reload TImode or TFmode reference into two DImode
1077 components. This is made extra difficult by the fact that we do
1078 not get any scratch registers to work with, because reload cannot
1079 be prevented from giving us a scratch that overlaps the register
1080 pair involved. So instead, when addressing memory, we tweak the
1081 pointer register up and back down with POST_INCs. Or up and not
1082 back down when we can get away with it.
1083
1084 REVERSED is true when the loads must be done in reversed order
1085 (high word first) for correctness. DEAD is true when the pointer
1086 dies with the second insn we generate and therefore the second
1087 address must not carry a postmodify.
1088
1089 May return an insn which is to be emitted after the moves. */
1090
1091 static rtx
1092 ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
1093 {
1094 rtx fixup = 0;
1095
1096 switch (GET_CODE (in))
1097 {
1098 case REG:
1099 out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1100 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1101 break;
1102
1103 case CONST_INT:
1104 case CONST_DOUBLE:
1105 /* Cannot occur reversed. */
1106 gcc_assert (!reversed);
1107
1108 if (GET_MODE (in) != TFmode)
1109 split_double (in, &out[0], &out[1]);
1110 else
1111 /* split_double does not understand how to split a TFmode
1112 quantity into a pair of DImode constants. */
1113 {
1114 REAL_VALUE_TYPE r;
1115 unsigned HOST_WIDE_INT p[2];
1116 long l[4]; /* TFmode is 128 bits */
1117
1118 REAL_VALUE_FROM_CONST_DOUBLE (r, in);
1119 real_to_target (l, &r, TFmode);
1120
1121 if (FLOAT_WORDS_BIG_ENDIAN)
1122 {
1123 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1124 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1125 }
1126 else
1127 {
1128 p[0] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1129 p[1] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1130 }
1131 out[0] = GEN_INT (p[0]);
1132 out[1] = GEN_INT (p[1]);
1133 }
1134 break;
1135
1136 case MEM:
1137 {
1138 rtx base = XEXP (in, 0);
1139 rtx offset;
1140
1141 switch (GET_CODE (base))
1142 {
1143 case REG:
1144 if (!reversed)
1145 {
1146 out[0] = adjust_automodify_address
1147 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1148 out[1] = adjust_automodify_address
1149 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1150 }
1151 else
1152 {
1153 /* Reversal requires a pre-increment, which can only
1154 be done as a separate insn. */
1155 emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1156 out[0] = adjust_automodify_address
1157 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1158 out[1] = adjust_address (in, DImode, 0);
1159 }
1160 break;
1161
1162 case POST_INC:
1163 gcc_assert (!reversed && !dead);
1164
1165 /* Just do the increment in two steps. */
1166 out[0] = adjust_automodify_address (in, DImode, 0, 0);
1167 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1168 break;
1169
1170 case POST_DEC:
1171 gcc_assert (!reversed && !dead);
1172
1173 /* Add 8, subtract 24. */
1174 base = XEXP (base, 0);
1175 out[0] = adjust_automodify_address
1176 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1177 out[1] = adjust_automodify_address
1178 (in, DImode,
1179 gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
1180 8);
1181 break;
1182
1183 case POST_MODIFY:
1184 gcc_assert (!reversed && !dead);
1185
1186 /* Extract and adjust the modification. This case is
1187 trickier than the others, because we might have an
1188 index register, or we might have a combined offset that
1189 doesn't fit a signed 9-bit displacement field. We can
1190 assume the incoming expression is already legitimate. */
1191 offset = XEXP (base, 1);
1192 base = XEXP (base, 0);
1193
1194 out[0] = adjust_automodify_address
1195 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1196
1197 if (GET_CODE (XEXP (offset, 1)) == REG)
1198 {
1199 /* Can't adjust the postmodify to match. Emit the
1200 original, then a separate addition insn. */
1201 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1202 fixup = gen_adddi3 (base, base, GEN_INT (-8));
1203 }
1204 else
1205 {
1206 gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1207 if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1208 {
1209 /* Again the postmodify cannot be made to match,
1210 but in this case it's more efficient to get rid
1211 of the postmodify entirely and fix up with an
1212 add insn. */
1213 out[1] = adjust_automodify_address (in, DImode, base, 8);
1214 fixup = gen_adddi3
1215 (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1216 }
1217 else
1218 {
1219 /* Combined offset still fits in the displacement field.
1220 (We cannot overflow it at the high end.) */
1221 out[1] = adjust_automodify_address
1222 (in, DImode, gen_rtx_POST_MODIFY
1223 (Pmode, base, gen_rtx_PLUS
1224 (Pmode, base,
1225 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1226 8);
1227 }
1228 }
1229 break;
1230
1231 default:
1232 gcc_unreachable ();
1233 }
1234 break;
1235 }
1236
1237 default:
1238 gcc_unreachable ();
1239 }
1240
1241 return fixup;
1242 }
1243
1244 /* Split a TImode or TFmode move instruction after reload.
1245 This is used by *movtf_internal and *movti_internal. */
1246 void
1247 ia64_split_tmode_move (rtx operands[])
1248 {
1249 rtx in[2], out[2], insn;
1250 rtx fixup[2];
1251 bool dead = false;
1252 bool reversed = false;
1253
1254 /* It is possible for reload to decide to overwrite a pointer with
1255 the value it points to. In that case we have to do the loads in
1256 the appropriate order so that the pointer is not destroyed too
1257 early. Also we must not generate a postmodify for that second
1258 load, or rws_access_regno will die. */
1259 if (GET_CODE (operands[1]) == MEM
1260 && reg_overlap_mentioned_p (operands[0], operands[1]))
1261 {
1262 rtx base = XEXP (operands[1], 0);
1263 while (GET_CODE (base) != REG)
1264 base = XEXP (base, 0);
1265
1266 if (REGNO (base) == REGNO (operands[0]))
1267 reversed = true;
1268 dead = true;
1269 }
1270 /* Another reason to do the moves in reversed order is if the first
1271 element of the target register pair is also the second element of
1272 the source register pair. */
1273 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1274 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1275 reversed = true;
1276
1277 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1278 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1279
1280 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1281 if (GET_CODE (EXP) == MEM \
1282 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1283 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1284 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1285 REG_NOTES (INSN) = gen_rtx_EXPR_LIST (REG_INC, \
1286 XEXP (XEXP (EXP, 0), 0), \
1287 REG_NOTES (INSN))
1288
1289 insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1290 MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1291 MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1292
1293 insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1294 MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1295 MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1296
1297 if (fixup[0])
1298 emit_insn (fixup[0]);
1299 if (fixup[1])
1300 emit_insn (fixup[1]);
1301
1302 #undef MAYBE_ADD_REG_INC_NOTE
1303 }
1304
1305 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1306 through memory plus an extra GR scratch register. Except that you can
1307 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1308 SECONDARY_RELOAD_CLASS, but not both.
1309
1310 We got into problems in the first place by allowing a construct like
1311 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1312 This solution attempts to prevent this situation from occurring. When
1313 we see something like the above, we spill the inner register to memory. */
1314
1315 static rtx
1316 spill_xfmode_rfmode_operand (rtx in, int force, enum machine_mode mode)
1317 {
1318 if (GET_CODE (in) == SUBREG
1319 && GET_MODE (SUBREG_REG (in)) == TImode
1320 && GET_CODE (SUBREG_REG (in)) == REG)
1321 {
1322 rtx memt = assign_stack_temp (TImode, 16, 0);
1323 emit_move_insn (memt, SUBREG_REG (in));
1324 return adjust_address (memt, mode, 0);
1325 }
1326 else if (force && GET_CODE (in) == REG)
1327 {
1328 rtx memx = assign_stack_temp (mode, 16, 0);
1329 emit_move_insn (memx, in);
1330 return memx;
1331 }
1332 else
1333 return in;
1334 }
1335
1336 /* Expand the movxf or movrf pattern (MODE says which) with the given
1337 OPERANDS, returning true if the pattern should then invoke
1338 DONE. */
1339
1340 bool
1341 ia64_expand_movxf_movrf (enum machine_mode mode, rtx operands[])
1342 {
1343 rtx op0 = operands[0];
1344
1345 if (GET_CODE (op0) == SUBREG)
1346 op0 = SUBREG_REG (op0);
1347
1348 /* We must support XFmode loads into general registers for stdarg/vararg,
1349 unprototyped calls, and a rare case where a long double is passed as
1350 an argument after a float HFA fills the FP registers. We split them into
1351 DImode loads for convenience. We also need to support XFmode stores
1352 for the last case. This case does not happen for stdarg/vararg routines,
1353 because we do a block store to memory of unnamed arguments. */
1354
1355 if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
1356 {
1357 rtx out[2];
1358
1359 /* We're hoping to transform everything that deals with XFmode
1360 quantities and GR registers early in the compiler. */
1361 gcc_assert (can_create_pseudo_p ());
1362
1363 /* Struct to register can just use TImode instead. */
1364 if ((GET_CODE (operands[1]) == SUBREG
1365 && GET_MODE (SUBREG_REG (operands[1])) == TImode)
1366 || (GET_CODE (operands[1]) == REG
1367 && GR_REGNO_P (REGNO (operands[1]))))
1368 {
1369 rtx op1 = operands[1];
1370
1371 if (GET_CODE (op1) == SUBREG)
1372 op1 = SUBREG_REG (op1);
1373 else
1374 op1 = gen_rtx_REG (TImode, REGNO (op1));
1375
1376 emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
1377 return true;
1378 }
1379
1380 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1381 {
1382 /* Don't word-swap when reading in the constant. */
1383 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
1384 operand_subword (operands[1], WORDS_BIG_ENDIAN,
1385 0, mode));
1386 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
1387 operand_subword (operands[1], !WORDS_BIG_ENDIAN,
1388 0, mode));
1389 return true;
1390 }
1391
1392 /* If the quantity is in a register not known to be GR, spill it. */
1393 if (register_operand (operands[1], mode))
1394 operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
1395
1396 gcc_assert (GET_CODE (operands[1]) == MEM);
1397
1398 /* Don't word-swap when reading in the value. */
1399 out[0] = gen_rtx_REG (DImode, REGNO (op0));
1400 out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
1401
1402 emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
1403 emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
1404 return true;
1405 }
1406
1407 if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
1408 {
1409 /* We're hoping to transform everything that deals with XFmode
1410 quantities and GR registers early in the compiler. */
1411 gcc_assert (can_create_pseudo_p ());
1412
1413 /* Op0 can't be a GR_REG here, as that case is handled above.
1414 If op0 is a register, then we spill op1, so that we now have a
1415 MEM operand. This requires creating an XFmode subreg of a TImode reg
1416 to force the spill. */
1417 if (register_operand (operands[0], mode))
1418 {
1419 rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
1420 op1 = gen_rtx_SUBREG (mode, op1, 0);
1421 operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
1422 }
1423
1424 else
1425 {
1426 rtx in[2];
1427
1428 gcc_assert (GET_CODE (operands[0]) == MEM);
1429
1430 /* Don't word-swap when writing out the value. */
1431 in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
1432 in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
1433
1434 emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
1435 emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
1436 return true;
1437 }
1438 }
1439
1440 if (!reload_in_progress && !reload_completed)
1441 {
1442 operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
1443
1444 if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
1445 {
1446 rtx memt, memx, in = operands[1];
1447 if (CONSTANT_P (in))
1448 in = validize_mem (force_const_mem (mode, in));
1449 if (GET_CODE (in) == MEM)
1450 memt = adjust_address (in, TImode, 0);
1451 else
1452 {
1453 memt = assign_stack_temp (TImode, 16, 0);
1454 memx = adjust_address (memt, mode, 0);
1455 emit_move_insn (memx, in);
1456 }
1457 emit_move_insn (op0, memt);
1458 return true;
1459 }
1460
1461 if (!ia64_move_ok (operands[0], operands[1]))
1462 operands[1] = force_reg (mode, operands[1]);
1463 }
1464
1465 return false;
1466 }
1467
1468 /* Emit comparison instruction if necessary, returning the expression
1469 that holds the compare result in the proper mode. */
1470
1471 static GTY(()) rtx cmptf_libfunc;
1472
1473 rtx
1474 ia64_expand_compare (enum rtx_code code, enum machine_mode mode)
1475 {
1476 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1477 rtx cmp;
1478
1479 /* If we have a BImode input, then we already have a compare result, and
1480 do not need to emit another comparison. */
1481 if (GET_MODE (op0) == BImode)
1482 {
1483 gcc_assert ((code == NE || code == EQ) && op1 == const0_rtx);
1484 cmp = op0;
1485 }
1486 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1487 magic number as its third argument, that indicates what to do.
1488 The return value is an integer to be compared against zero. */
1489 else if (GET_MODE (op0) == TFmode)
1490 {
1491 enum qfcmp_magic {
1492 QCMP_INV = 1, /* Raise FP_INVALID on SNaN as a side effect. */
1493 QCMP_UNORD = 2,
1494 QCMP_EQ = 4,
1495 QCMP_LT = 8,
1496 QCMP_GT = 16
1497 } magic;
1498 enum rtx_code ncode;
1499 rtx ret, insns;
1500
1501 gcc_assert (cmptf_libfunc && GET_MODE (op1) == TFmode);
1502 switch (code)
1503 {
1504 /* 1 = equal, 0 = not equal. Equality operators do
1505 not raise FP_INVALID when given an SNaN operand. */
1506 case EQ: magic = QCMP_EQ; ncode = NE; break;
1507 case NE: magic = QCMP_EQ; ncode = EQ; break;
1508 /* isunordered() from C99. */
1509 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
1510 case ORDERED: magic = QCMP_UNORD; ncode = EQ; break;
1511 /* Relational operators raise FP_INVALID when given
1512 an SNaN operand. */
1513 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
1514 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1515 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
1516 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1517 /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1518 Expanders for buneq etc. weuld have to be added to ia64.md
1519 for this to be useful. */
1520 default: gcc_unreachable ();
1521 }
1522
1523 start_sequence ();
1524
1525 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1526 op0, TFmode, op1, TFmode,
1527 GEN_INT (magic), DImode);
1528 cmp = gen_reg_rtx (BImode);
1529 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1530 gen_rtx_fmt_ee (ncode, BImode,
1531 ret, const0_rtx)));
1532
1533 insns = get_insns ();
1534 end_sequence ();
1535
1536 emit_libcall_block (insns, cmp, cmp,
1537 gen_rtx_fmt_ee (code, BImode, op0, op1));
1538 code = NE;
1539 }
1540 else
1541 {
1542 cmp = gen_reg_rtx (BImode);
1543 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1544 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1545 code = NE;
1546 }
1547
1548 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1549 }
1550
1551 /* Generate an integral vector comparison. Return true if the condition has
1552 been reversed, and so the sense of the comparison should be inverted. */
1553
1554 static bool
1555 ia64_expand_vecint_compare (enum rtx_code code, enum machine_mode mode,
1556 rtx dest, rtx op0, rtx op1)
1557 {
1558 bool negate = false;
1559 rtx x;
1560
1561 /* Canonicalize the comparison to EQ, GT, GTU. */
1562 switch (code)
1563 {
1564 case EQ:
1565 case GT:
1566 case GTU:
1567 break;
1568
1569 case NE:
1570 case LE:
1571 case LEU:
1572 code = reverse_condition (code);
1573 negate = true;
1574 break;
1575
1576 case GE:
1577 case GEU:
1578 code = reverse_condition (code);
1579 negate = true;
1580 /* FALLTHRU */
1581
1582 case LT:
1583 case LTU:
1584 code = swap_condition (code);
1585 x = op0, op0 = op1, op1 = x;
1586 break;
1587
1588 default:
1589 gcc_unreachable ();
1590 }
1591
1592 /* Unsigned parallel compare is not supported by the hardware. Play some
1593 tricks to turn this into a signed comparison against 0. */
1594 if (code == GTU)
1595 {
1596 switch (mode)
1597 {
1598 case V2SImode:
1599 {
1600 rtx t1, t2, mask;
1601
1602 /* Perform a parallel modulo subtraction. */
1603 t1 = gen_reg_rtx (V2SImode);
1604 emit_insn (gen_subv2si3 (t1, op0, op1));
1605
1606 /* Extract the original sign bit of op0. */
1607 mask = GEN_INT (-0x80000000);
1608 mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask));
1609 mask = force_reg (V2SImode, mask);
1610 t2 = gen_reg_rtx (V2SImode);
1611 emit_insn (gen_andv2si3 (t2, op0, mask));
1612
1613 /* XOR it back into the result of the subtraction. This results
1614 in the sign bit set iff we saw unsigned underflow. */
1615 x = gen_reg_rtx (V2SImode);
1616 emit_insn (gen_xorv2si3 (x, t1, t2));
1617
1618 code = GT;
1619 op0 = x;
1620 op1 = CONST0_RTX (mode);
1621 }
1622 break;
1623
1624 case V8QImode:
1625 case V4HImode:
1626 /* Perform a parallel unsigned saturating subtraction. */
1627 x = gen_reg_rtx (mode);
1628 emit_insn (gen_rtx_SET (VOIDmode, x,
1629 gen_rtx_US_MINUS (mode, op0, op1)));
1630
1631 code = EQ;
1632 op0 = x;
1633 op1 = CONST0_RTX (mode);
1634 negate = !negate;
1635 break;
1636
1637 default:
1638 gcc_unreachable ();
1639 }
1640 }
1641
1642 x = gen_rtx_fmt_ee (code, mode, op0, op1);
1643 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
1644
1645 return negate;
1646 }
1647
1648 /* Emit an integral vector conditional move. */
1649
1650 void
1651 ia64_expand_vecint_cmov (rtx operands[])
1652 {
1653 enum machine_mode mode = GET_MODE (operands[0]);
1654 enum rtx_code code = GET_CODE (operands[3]);
1655 bool negate;
1656 rtx cmp, x, ot, of;
1657
1658 cmp = gen_reg_rtx (mode);
1659 negate = ia64_expand_vecint_compare (code, mode, cmp,
1660 operands[4], operands[5]);
1661
1662 ot = operands[1+negate];
1663 of = operands[2-negate];
1664
1665 if (ot == CONST0_RTX (mode))
1666 {
1667 if (of == CONST0_RTX (mode))
1668 {
1669 emit_move_insn (operands[0], ot);
1670 return;
1671 }
1672
1673 x = gen_rtx_NOT (mode, cmp);
1674 x = gen_rtx_AND (mode, x, of);
1675 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1676 }
1677 else if (of == CONST0_RTX (mode))
1678 {
1679 x = gen_rtx_AND (mode, cmp, ot);
1680 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1681 }
1682 else
1683 {
1684 rtx t, f;
1685
1686 t = gen_reg_rtx (mode);
1687 x = gen_rtx_AND (mode, cmp, operands[1+negate]);
1688 emit_insn (gen_rtx_SET (VOIDmode, t, x));
1689
1690 f = gen_reg_rtx (mode);
1691 x = gen_rtx_NOT (mode, cmp);
1692 x = gen_rtx_AND (mode, x, operands[2-negate]);
1693 emit_insn (gen_rtx_SET (VOIDmode, f, x));
1694
1695 x = gen_rtx_IOR (mode, t, f);
1696 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1697 }
1698 }
1699
1700 /* Emit an integral vector min or max operation. Return true if all done. */
1701
1702 bool
1703 ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
1704 rtx operands[])
1705 {
1706 rtx xops[6];
1707
1708 /* These four combinations are supported directly. */
1709 if (mode == V8QImode && (code == UMIN || code == UMAX))
1710 return false;
1711 if (mode == V4HImode && (code == SMIN || code == SMAX))
1712 return false;
1713
1714 /* This combination can be implemented with only saturating subtraction. */
1715 if (mode == V4HImode && code == UMAX)
1716 {
1717 rtx x, tmp = gen_reg_rtx (mode);
1718
1719 x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
1720 emit_insn (gen_rtx_SET (VOIDmode, tmp, x));
1721
1722 emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
1723 return true;
1724 }
1725
1726 /* Everything else implemented via vector comparisons. */
1727 xops[0] = operands[0];
1728 xops[4] = xops[1] = operands[1];
1729 xops[5] = xops[2] = operands[2];
1730
1731 switch (code)
1732 {
1733 case UMIN:
1734 code = LTU;
1735 break;
1736 case UMAX:
1737 code = GTU;
1738 break;
1739 case SMIN:
1740 code = LT;
1741 break;
1742 case SMAX:
1743 code = GT;
1744 break;
1745 default:
1746 gcc_unreachable ();
1747 }
1748 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
1749
1750 ia64_expand_vecint_cmov (xops);
1751 return true;
1752 }
1753
1754 /* Emit an integral vector widening sum operations. */
1755
1756 void
1757 ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
1758 {
1759 rtx l, h, x, s;
1760 enum machine_mode wmode, mode;
1761 rtx (*unpack_l) (rtx, rtx, rtx);
1762 rtx (*unpack_h) (rtx, rtx, rtx);
1763 rtx (*plus) (rtx, rtx, rtx);
1764
1765 wmode = GET_MODE (operands[0]);
1766 mode = GET_MODE (operands[1]);
1767
1768 switch (mode)
1769 {
1770 case V8QImode:
1771 unpack_l = gen_unpack1_l;
1772 unpack_h = gen_unpack1_h;
1773 plus = gen_addv4hi3;
1774 break;
1775 case V4HImode:
1776 unpack_l = gen_unpack2_l;
1777 unpack_h = gen_unpack2_h;
1778 plus = gen_addv2si3;
1779 break;
1780 default:
1781 gcc_unreachable ();
1782 }
1783
1784 /* Fill in x with the sign extension of each element in op1. */
1785 if (unsignedp)
1786 x = CONST0_RTX (mode);
1787 else
1788 {
1789 bool neg;
1790
1791 x = gen_reg_rtx (mode);
1792
1793 neg = ia64_expand_vecint_compare (LT, mode, x, operands[1],
1794 CONST0_RTX (mode));
1795 gcc_assert (!neg);
1796 }
1797
1798 l = gen_reg_rtx (wmode);
1799 h = gen_reg_rtx (wmode);
1800 s = gen_reg_rtx (wmode);
1801
1802 emit_insn (unpack_l (gen_lowpart (mode, l), operands[1], x));
1803 emit_insn (unpack_h (gen_lowpart (mode, h), operands[1], x));
1804 emit_insn (plus (s, l, operands[2]));
1805 emit_insn (plus (operands[0], h, s));
1806 }
1807
1808 /* Emit a signed or unsigned V8QI dot product operation. */
1809
1810 void
1811 ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp)
1812 {
1813 rtx l1, l2, h1, h2, x1, x2, p1, p2, p3, p4, s1, s2, s3;
1814
1815 /* Fill in x1 and x2 with the sign extension of each element. */
1816 if (unsignedp)
1817 x1 = x2 = CONST0_RTX (V8QImode);
1818 else
1819 {
1820 bool neg;
1821
1822 x1 = gen_reg_rtx (V8QImode);
1823 x2 = gen_reg_rtx (V8QImode);
1824
1825 neg = ia64_expand_vecint_compare (LT, V8QImode, x1, operands[1],
1826 CONST0_RTX (V8QImode));
1827 gcc_assert (!neg);
1828 neg = ia64_expand_vecint_compare (LT, V8QImode, x2, operands[2],
1829 CONST0_RTX (V8QImode));
1830 gcc_assert (!neg);
1831 }
1832
1833 l1 = gen_reg_rtx (V4HImode);
1834 l2 = gen_reg_rtx (V4HImode);
1835 h1 = gen_reg_rtx (V4HImode);
1836 h2 = gen_reg_rtx (V4HImode);
1837
1838 emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l1), operands[1], x1));
1839 emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l2), operands[2], x2));
1840 emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h1), operands[1], x1));
1841 emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h2), operands[2], x2));
1842
1843 p1 = gen_reg_rtx (V2SImode);
1844 p2 = gen_reg_rtx (V2SImode);
1845 p3 = gen_reg_rtx (V2SImode);
1846 p4 = gen_reg_rtx (V2SImode);
1847 emit_insn (gen_pmpy2_r (p1, l1, l2));
1848 emit_insn (gen_pmpy2_l (p2, l1, l2));
1849 emit_insn (gen_pmpy2_r (p3, h1, h2));
1850 emit_insn (gen_pmpy2_l (p4, h1, h2));
1851
1852 s1 = gen_reg_rtx (V2SImode);
1853 s2 = gen_reg_rtx (V2SImode);
1854 s3 = gen_reg_rtx (V2SImode);
1855 emit_insn (gen_addv2si3 (s1, p1, p2));
1856 emit_insn (gen_addv2si3 (s2, p3, p4));
1857 emit_insn (gen_addv2si3 (s3, s1, operands[3]));
1858 emit_insn (gen_addv2si3 (operands[0], s2, s3));
1859 }
1860
1861 /* Emit the appropriate sequence for a call. */
1862
1863 void
1864 ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
1865 int sibcall_p)
1866 {
1867 rtx insn, b0;
1868
1869 addr = XEXP (addr, 0);
1870 addr = convert_memory_address (DImode, addr);
1871 b0 = gen_rtx_REG (DImode, R_BR (0));
1872
1873 /* ??? Should do this for functions known to bind local too. */
1874 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1875 {
1876 if (sibcall_p)
1877 insn = gen_sibcall_nogp (addr);
1878 else if (! retval)
1879 insn = gen_call_nogp (addr, b0);
1880 else
1881 insn = gen_call_value_nogp (retval, addr, b0);
1882 insn = emit_call_insn (insn);
1883 }
1884 else
1885 {
1886 if (sibcall_p)
1887 insn = gen_sibcall_gp (addr);
1888 else if (! retval)
1889 insn = gen_call_gp (addr, b0);
1890 else
1891 insn = gen_call_value_gp (retval, addr, b0);
1892 insn = emit_call_insn (insn);
1893
1894 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
1895 }
1896
1897 if (sibcall_p)
1898 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
1899 }
1900
1901 static void
1902 reg_emitted (enum ia64_frame_regs r)
1903 {
1904 if (emitted_frame_related_regs[r] == 0)
1905 emitted_frame_related_regs[r] = current_frame_info.r[r];
1906 else
1907 gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]);
1908 }
1909
1910 static int
1911 get_reg (enum ia64_frame_regs r)
1912 {
1913 reg_emitted (r);
1914 return current_frame_info.r[r];
1915 }
1916
1917 static bool
1918 is_emitted (int regno)
1919 {
1920 enum ia64_frame_regs r;
1921
1922 for (r = reg_fp; r < number_of_ia64_frame_regs; r++)
1923 if (emitted_frame_related_regs[r] == regno)
1924 return true;
1925 return false;
1926 }
1927
1928 void
1929 ia64_reload_gp (void)
1930 {
1931 rtx tmp;
1932
1933 if (current_frame_info.r[reg_save_gp])
1934 {
1935 tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp));
1936 }
1937 else
1938 {
1939 HOST_WIDE_INT offset;
1940 rtx offset_r;
1941
1942 offset = (current_frame_info.spill_cfa_off
1943 + current_frame_info.spill_size);
1944 if (frame_pointer_needed)
1945 {
1946 tmp = hard_frame_pointer_rtx;
1947 offset = -offset;
1948 }
1949 else
1950 {
1951 tmp = stack_pointer_rtx;
1952 offset = current_frame_info.total_size - offset;
1953 }
1954
1955 offset_r = GEN_INT (offset);
1956 if (satisfies_constraint_I (offset_r))
1957 emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r));
1958 else
1959 {
1960 emit_move_insn (pic_offset_table_rtx, offset_r);
1961 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1962 pic_offset_table_rtx, tmp));
1963 }
1964
1965 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
1966 }
1967
1968 emit_move_insn (pic_offset_table_rtx, tmp);
1969 }
1970
1971 void
1972 ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
1973 rtx scratch_b, int noreturn_p, int sibcall_p)
1974 {
1975 rtx insn;
1976 bool is_desc = false;
1977
1978 /* If we find we're calling through a register, then we're actually
1979 calling through a descriptor, so load up the values. */
1980 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
1981 {
1982 rtx tmp;
1983 bool addr_dead_p;
1984
1985 /* ??? We are currently constrained to *not* use peep2, because
1986 we can legitimately change the global lifetime of the GP
1987 (in the form of killing where previously live). This is
1988 because a call through a descriptor doesn't use the previous
1989 value of the GP, while a direct call does, and we do not
1990 commit to either form until the split here.
1991
1992 That said, this means that we lack precise life info for
1993 whether ADDR is dead after this call. This is not terribly
1994 important, since we can fix things up essentially for free
1995 with the POST_DEC below, but it's nice to not use it when we
1996 can immediately tell it's not necessary. */
1997 addr_dead_p = ((noreturn_p || sibcall_p
1998 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
1999 REGNO (addr)))
2000 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
2001
2002 /* Load the code address into scratch_b. */
2003 tmp = gen_rtx_POST_INC (Pmode, addr);
2004 tmp = gen_rtx_MEM (Pmode, tmp);
2005 emit_move_insn (scratch_r, tmp);
2006 emit_move_insn (scratch_b, scratch_r);
2007
2008 /* Load the GP address. If ADDR is not dead here, then we must
2009 revert the change made above via the POST_INCREMENT. */
2010 if (!addr_dead_p)
2011 tmp = gen_rtx_POST_DEC (Pmode, addr);
2012 else
2013 tmp = addr;
2014 tmp = gen_rtx_MEM (Pmode, tmp);
2015 emit_move_insn (pic_offset_table_rtx, tmp);
2016
2017 is_desc = true;
2018 addr = scratch_b;
2019 }
2020
2021 if (sibcall_p)
2022 insn = gen_sibcall_nogp (addr);
2023 else if (retval)
2024 insn = gen_call_value_nogp (retval, addr, retaddr);
2025 else
2026 insn = gen_call_nogp (addr, retaddr);
2027 emit_call_insn (insn);
2028
2029 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
2030 ia64_reload_gp ();
2031 }
2032
2033 /* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
2034
2035 This differs from the generic code in that we know about the zero-extending
2036 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
2037 also know that ld.acq+cmpxchg.rel equals a full barrier.
2038
2039 The loop we want to generate looks like
2040
2041 cmp_reg = mem;
2042 label:
2043 old_reg = cmp_reg;
2044 new_reg = cmp_reg op val;
2045 cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2046 if (cmp_reg != old_reg)
2047 goto label;
2048
2049 Note that we only do the plain load from memory once. Subsequent
2050 iterations use the value loaded by the compare-and-swap pattern. */
2051
2052 void
2053 ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
2054 rtx old_dst, rtx new_dst)
2055 {
2056 enum machine_mode mode = GET_MODE (mem);
2057 rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
2058 enum insn_code icode;
2059
2060 /* Special case for using fetchadd. */
2061 if ((mode == SImode || mode == DImode)
2062 && (code == PLUS || code == MINUS)
2063 && fetchadd_operand (val, mode))
2064 {
2065 if (code == MINUS)
2066 val = GEN_INT (-INTVAL (val));
2067
2068 if (!old_dst)
2069 old_dst = gen_reg_rtx (mode);
2070
2071 emit_insn (gen_memory_barrier ());
2072
2073 if (mode == SImode)
2074 icode = CODE_FOR_fetchadd_acq_si;
2075 else
2076 icode = CODE_FOR_fetchadd_acq_di;
2077 emit_insn (GEN_FCN (icode) (old_dst, mem, val));
2078
2079 if (new_dst)
2080 {
2081 new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
2082 true, OPTAB_WIDEN);
2083 if (new_reg != new_dst)
2084 emit_move_insn (new_dst, new_reg);
2085 }
2086 return;
2087 }
2088
2089 /* Because of the volatile mem read, we get an ld.acq, which is the
2090 front half of the full barrier. The end half is the cmpxchg.rel. */
2091 gcc_assert (MEM_VOLATILE_P (mem));
2092
2093 old_reg = gen_reg_rtx (DImode);
2094 cmp_reg = gen_reg_rtx (DImode);
2095 label = gen_label_rtx ();
2096
2097 if (mode != DImode)
2098 {
2099 val = simplify_gen_subreg (DImode, val, mode, 0);
2100 emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
2101 }
2102 else
2103 emit_move_insn (cmp_reg, mem);
2104
2105 emit_label (label);
2106
2107 ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
2108 emit_move_insn (old_reg, cmp_reg);
2109 emit_move_insn (ar_ccv, cmp_reg);
2110
2111 if (old_dst)
2112 emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
2113
2114 new_reg = cmp_reg;
2115 if (code == NOT)
2116 {
2117 new_reg = expand_simple_unop (DImode, NOT, new_reg, NULL_RTX, true);
2118 code = AND;
2119 }
2120 new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
2121 true, OPTAB_DIRECT);
2122
2123 if (mode != DImode)
2124 new_reg = gen_lowpart (mode, new_reg);
2125 if (new_dst)
2126 emit_move_insn (new_dst, new_reg);
2127
2128 switch (mode)
2129 {
2130 case QImode: icode = CODE_FOR_cmpxchg_rel_qi; break;
2131 case HImode: icode = CODE_FOR_cmpxchg_rel_hi; break;
2132 case SImode: icode = CODE_FOR_cmpxchg_rel_si; break;
2133 case DImode: icode = CODE_FOR_cmpxchg_rel_di; break;
2134 default:
2135 gcc_unreachable ();
2136 }
2137
2138 emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
2139
2140 emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
2141 }
2142 \f
2143 /* Begin the assembly file. */
2144
2145 static void
2146 ia64_file_start (void)
2147 {
2148 /* Variable tracking should be run after all optimizations which change order
2149 of insns. It also needs a valid CFG. This can't be done in
2150 ia64_override_options, because flag_var_tracking is finalized after
2151 that. */
2152 ia64_flag_var_tracking = flag_var_tracking;
2153 flag_var_tracking = 0;
2154
2155 default_file_start ();
2156 emit_safe_across_calls ();
2157 }
2158
2159 void
2160 emit_safe_across_calls (void)
2161 {
2162 unsigned int rs, re;
2163 int out_state;
2164
2165 rs = 1;
2166 out_state = 0;
2167 while (1)
2168 {
2169 while (rs < 64 && call_used_regs[PR_REG (rs)])
2170 rs++;
2171 if (rs >= 64)
2172 break;
2173 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
2174 continue;
2175 if (out_state == 0)
2176 {
2177 fputs ("\t.pred.safe_across_calls ", asm_out_file);
2178 out_state = 1;
2179 }
2180 else
2181 fputc (',', asm_out_file);
2182 if (re == rs + 1)
2183 fprintf (asm_out_file, "p%u", rs);
2184 else
2185 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
2186 rs = re + 1;
2187 }
2188 if (out_state)
2189 fputc ('\n', asm_out_file);
2190 }
2191
2192 /* Globalize a declaration. */
2193
2194 static void
2195 ia64_globalize_decl_name (FILE * stream, tree decl)
2196 {
2197 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
2198 tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl));
2199 if (version_attr)
2200 {
2201 tree v = TREE_VALUE (TREE_VALUE (version_attr));
2202 const char *p = TREE_STRING_POINTER (v);
2203 fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p);
2204 }
2205 targetm.asm_out.globalize_label (stream, name);
2206 if (TREE_CODE (decl) == FUNCTION_DECL)
2207 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
2208 }
2209
2210 /* Helper function for ia64_compute_frame_size: find an appropriate general
2211 register to spill some special register to. SPECIAL_SPILL_MASK contains
2212 bits in GR0 to GR31 that have already been allocated by this routine.
2213 TRY_LOCALS is true if we should attempt to locate a local regnum. */
2214
2215 static int
2216 find_gr_spill (enum ia64_frame_regs r, int try_locals)
2217 {
2218 int regno;
2219
2220 if (emitted_frame_related_regs[r] != 0)
2221 {
2222 regno = emitted_frame_related_regs[r];
2223 if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed)
2224 && current_frame_info.n_local_regs < regno - LOC_REG (0) + 1)
2225 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2226 else if (current_function_is_leaf
2227 && regno >= GR_REG (1) && regno <= GR_REG (31))
2228 current_frame_info.gr_used_mask |= 1 << regno;
2229
2230 return regno;
2231 }
2232
2233 /* If this is a leaf function, first try an otherwise unused
2234 call-clobbered register. */
2235 if (current_function_is_leaf)
2236 {
2237 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2238 if (! df_regs_ever_live_p (regno)
2239 && call_used_regs[regno]
2240 && ! fixed_regs[regno]
2241 && ! global_regs[regno]
2242 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0
2243 && ! is_emitted (regno))
2244 {
2245 current_frame_info.gr_used_mask |= 1 << regno;
2246 return regno;
2247 }
2248 }
2249
2250 if (try_locals)
2251 {
2252 regno = current_frame_info.n_local_regs;
2253 /* If there is a frame pointer, then we can't use loc79, because
2254 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
2255 reg_name switching code in ia64_expand_prologue. */
2256 while (regno < (80 - frame_pointer_needed))
2257 if (! is_emitted (LOC_REG (regno++)))
2258 {
2259 current_frame_info.n_local_regs = regno;
2260 return LOC_REG (regno - 1);
2261 }
2262 }
2263
2264 /* Failed to find a general register to spill to. Must use stack. */
2265 return 0;
2266 }
2267
2268 /* In order to make for nice schedules, we try to allocate every temporary
2269 to a different register. We must of course stay away from call-saved,
2270 fixed, and global registers. We must also stay away from registers
2271 allocated in current_frame_info.gr_used_mask, since those include regs
2272 used all through the prologue.
2273
2274 Any register allocated here must be used immediately. The idea is to
2275 aid scheduling, not to solve data flow problems. */
2276
2277 static int last_scratch_gr_reg;
2278
2279 static int
2280 next_scratch_gr_reg (void)
2281 {
2282 int i, regno;
2283
2284 for (i = 0; i < 32; ++i)
2285 {
2286 regno = (last_scratch_gr_reg + i + 1) & 31;
2287 if (call_used_regs[regno]
2288 && ! fixed_regs[regno]
2289 && ! global_regs[regno]
2290 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2291 {
2292 last_scratch_gr_reg = regno;
2293 return regno;
2294 }
2295 }
2296
2297 /* There must be _something_ available. */
2298 gcc_unreachable ();
2299 }
2300
2301 /* Helper function for ia64_compute_frame_size, called through
2302 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2303
2304 static void
2305 mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
2306 {
2307 unsigned int regno = REGNO (reg);
2308 if (regno < 32)
2309 {
2310 unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)];
2311 for (i = 0; i < n; ++i)
2312 current_frame_info.gr_used_mask |= 1 << (regno + i);
2313 }
2314 }
2315
2316
2317 /* Returns the number of bytes offset between the frame pointer and the stack
2318 pointer for the current function. SIZE is the number of bytes of space
2319 needed for local variables. */
2320
2321 static void
2322 ia64_compute_frame_size (HOST_WIDE_INT size)
2323 {
2324 HOST_WIDE_INT total_size;
2325 HOST_WIDE_INT spill_size = 0;
2326 HOST_WIDE_INT extra_spill_size = 0;
2327 HOST_WIDE_INT pretend_args_size;
2328 HARD_REG_SET mask;
2329 int n_spilled = 0;
2330 int spilled_gr_p = 0;
2331 int spilled_fr_p = 0;
2332 unsigned int regno;
2333 int min_regno;
2334 int max_regno;
2335 int i;
2336
2337 if (current_frame_info.initialized)
2338 return;
2339
2340 memset (&current_frame_info, 0, sizeof current_frame_info);
2341 CLEAR_HARD_REG_SET (mask);
2342
2343 /* Don't allocate scratches to the return register. */
2344 diddle_return_value (mark_reg_gr_used_mask, NULL);
2345
2346 /* Don't allocate scratches to the EH scratch registers. */
2347 if (cfun->machine->ia64_eh_epilogue_sp)
2348 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2349 if (cfun->machine->ia64_eh_epilogue_bsp)
2350 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
2351
2352 /* Find the size of the register stack frame. We have only 80 local
2353 registers, because we reserve 8 for the inputs and 8 for the
2354 outputs. */
2355
2356 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2357 since we'll be adjusting that down later. */
2358 regno = LOC_REG (78) + ! frame_pointer_needed;
2359 for (; regno >= LOC_REG (0); regno--)
2360 if (df_regs_ever_live_p (regno) && !is_emitted (regno))
2361 break;
2362 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2363
2364 /* For functions marked with the syscall_linkage attribute, we must mark
2365 all eight input registers as in use, so that locals aren't visible to
2366 the caller. */
2367
2368 if (cfun->machine->n_varargs > 0
2369 || lookup_attribute ("syscall_linkage",
2370 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2371 current_frame_info.n_input_regs = 8;
2372 else
2373 {
2374 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2375 if (df_regs_ever_live_p (regno))
2376 break;
2377 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2378 }
2379
2380 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2381 if (df_regs_ever_live_p (regno))
2382 break;
2383 i = regno - OUT_REG (0) + 1;
2384
2385 #ifndef PROFILE_HOOK
2386 /* When -p profiling, we need one output register for the mcount argument.
2387 Likewise for -a profiling for the bb_init_func argument. For -ax
2388 profiling, we need two output registers for the two bb_init_trace_func
2389 arguments. */
2390 if (crtl->profile)
2391 i = MAX (i, 1);
2392 #endif
2393 current_frame_info.n_output_regs = i;
2394
2395 /* ??? No rotating register support yet. */
2396 current_frame_info.n_rotate_regs = 0;
2397
2398 /* Discover which registers need spilling, and how much room that
2399 will take. Begin with floating point and general registers,
2400 which will always wind up on the stack. */
2401
2402 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
2403 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2404 {
2405 SET_HARD_REG_BIT (mask, regno);
2406 spill_size += 16;
2407 n_spilled += 1;
2408 spilled_fr_p = 1;
2409 }
2410
2411 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2412 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2413 {
2414 SET_HARD_REG_BIT (mask, regno);
2415 spill_size += 8;
2416 n_spilled += 1;
2417 spilled_gr_p = 1;
2418 }
2419
2420 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
2421 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2422 {
2423 SET_HARD_REG_BIT (mask, regno);
2424 spill_size += 8;
2425 n_spilled += 1;
2426 }
2427
2428 /* Now come all special registers that might get saved in other
2429 general registers. */
2430
2431 if (frame_pointer_needed)
2432 {
2433 current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1);
2434 /* If we did not get a register, then we take LOC79. This is guaranteed
2435 to be free, even if regs_ever_live is already set, because this is
2436 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2437 as we don't count loc79 above. */
2438 if (current_frame_info.r[reg_fp] == 0)
2439 {
2440 current_frame_info.r[reg_fp] = LOC_REG (79);
2441 current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1;
2442 }
2443 }
2444
2445 if (! current_function_is_leaf)
2446 {
2447 /* Emit a save of BR0 if we call other functions. Do this even
2448 if this function doesn't return, as EH depends on this to be
2449 able to unwind the stack. */
2450 SET_HARD_REG_BIT (mask, BR_REG (0));
2451
2452 current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1);
2453 if (current_frame_info.r[reg_save_b0] == 0)
2454 {
2455 extra_spill_size += 8;
2456 n_spilled += 1;
2457 }
2458
2459 /* Similarly for ar.pfs. */
2460 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2461 current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1);
2462 if (current_frame_info.r[reg_save_ar_pfs] == 0)
2463 {
2464 extra_spill_size += 8;
2465 n_spilled += 1;
2466 }
2467
2468 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2469 registers are clobbered, so we fall back to the stack. */
2470 current_frame_info.r[reg_save_gp]
2471 = (cfun->calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1));
2472 if (current_frame_info.r[reg_save_gp] == 0)
2473 {
2474 SET_HARD_REG_BIT (mask, GR_REG (1));
2475 spill_size += 8;
2476 n_spilled += 1;
2477 }
2478 }
2479 else
2480 {
2481 if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs[BR_REG (0)])
2482 {
2483 SET_HARD_REG_BIT (mask, BR_REG (0));
2484 extra_spill_size += 8;
2485 n_spilled += 1;
2486 }
2487
2488 if (df_regs_ever_live_p (AR_PFS_REGNUM))
2489 {
2490 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2491 current_frame_info.r[reg_save_ar_pfs]
2492 = find_gr_spill (reg_save_ar_pfs, 1);
2493 if (current_frame_info.r[reg_save_ar_pfs] == 0)
2494 {
2495 extra_spill_size += 8;
2496 n_spilled += 1;
2497 }
2498 }
2499 }
2500
2501 /* Unwind descriptor hackery: things are most efficient if we allocate
2502 consecutive GR save registers for RP, PFS, FP in that order. However,
2503 it is absolutely critical that FP get the only hard register that's
2504 guaranteed to be free, so we allocated it first. If all three did
2505 happen to be allocated hard regs, and are consecutive, rearrange them
2506 into the preferred order now.
2507
2508 If we have already emitted code for any of those registers,
2509 then it's already too late to change. */
2510 min_regno = MIN (current_frame_info.r[reg_fp],
2511 MIN (current_frame_info.r[reg_save_b0],
2512 current_frame_info.r[reg_save_ar_pfs]));
2513 max_regno = MAX (current_frame_info.r[reg_fp],
2514 MAX (current_frame_info.r[reg_save_b0],
2515 current_frame_info.r[reg_save_ar_pfs]));
2516 if (min_regno > 0
2517 && min_regno + 2 == max_regno
2518 && (current_frame_info.r[reg_fp] == min_regno + 1
2519 || current_frame_info.r[reg_save_b0] == min_regno + 1
2520 || current_frame_info.r[reg_save_ar_pfs] == min_regno + 1)
2521 && (emitted_frame_related_regs[reg_save_b0] == 0
2522 || emitted_frame_related_regs[reg_save_b0] == min_regno)
2523 && (emitted_frame_related_regs[reg_save_ar_pfs] == 0
2524 || emitted_frame_related_regs[reg_save_ar_pfs] == min_regno + 1)
2525 && (emitted_frame_related_regs[reg_fp] == 0
2526 || emitted_frame_related_regs[reg_fp] == min_regno + 2))
2527 {
2528 current_frame_info.r[reg_save_b0] = min_regno;
2529 current_frame_info.r[reg_save_ar_pfs] = min_regno + 1;
2530 current_frame_info.r[reg_fp] = min_regno + 2;
2531 }
2532
2533 /* See if we need to store the predicate register block. */
2534 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2535 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2536 break;
2537 if (regno <= PR_REG (63))
2538 {
2539 SET_HARD_REG_BIT (mask, PR_REG (0));
2540 current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1);
2541 if (current_frame_info.r[reg_save_pr] == 0)
2542 {
2543 extra_spill_size += 8;
2544 n_spilled += 1;
2545 }
2546
2547 /* ??? Mark them all as used so that register renaming and such
2548 are free to use them. */
2549 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2550 df_set_regs_ever_live (regno, true);
2551 }
2552
2553 /* If we're forced to use st8.spill, we're forced to save and restore
2554 ar.unat as well. The check for existing liveness allows inline asm
2555 to touch ar.unat. */
2556 if (spilled_gr_p || cfun->machine->n_varargs
2557 || df_regs_ever_live_p (AR_UNAT_REGNUM))
2558 {
2559 df_set_regs_ever_live (AR_UNAT_REGNUM, true);
2560 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2561 current_frame_info.r[reg_save_ar_unat]
2562 = find_gr_spill (reg_save_ar_unat, spill_size == 0);
2563 if (current_frame_info.r[reg_save_ar_unat] == 0)
2564 {
2565 extra_spill_size += 8;
2566 n_spilled += 1;
2567 }
2568 }
2569
2570 if (df_regs_ever_live_p (AR_LC_REGNUM))
2571 {
2572 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2573 current_frame_info.r[reg_save_ar_lc]
2574 = find_gr_spill (reg_save_ar_lc, spill_size == 0);
2575 if (current_frame_info.r[reg_save_ar_lc] == 0)
2576 {
2577 extra_spill_size += 8;
2578 n_spilled += 1;
2579 }
2580 }
2581
2582 /* If we have an odd number of words of pretend arguments written to
2583 the stack, then the FR save area will be unaligned. We round the
2584 size of this area up to keep things 16 byte aligned. */
2585 if (spilled_fr_p)
2586 pretend_args_size = IA64_STACK_ALIGN (crtl->args.pretend_args_size);
2587 else
2588 pretend_args_size = crtl->args.pretend_args_size;
2589
2590 total_size = (spill_size + extra_spill_size + size + pretend_args_size
2591 + crtl->outgoing_args_size);
2592 total_size = IA64_STACK_ALIGN (total_size);
2593
2594 /* We always use the 16-byte scratch area provided by the caller, but
2595 if we are a leaf function, there's no one to which we need to provide
2596 a scratch area. */
2597 if (current_function_is_leaf)
2598 total_size = MAX (0, total_size - 16);
2599
2600 current_frame_info.total_size = total_size;
2601 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2602 current_frame_info.spill_size = spill_size;
2603 current_frame_info.extra_spill_size = extra_spill_size;
2604 COPY_HARD_REG_SET (current_frame_info.mask, mask);
2605 current_frame_info.n_spilled = n_spilled;
2606 current_frame_info.initialized = reload_completed;
2607 }
2608
2609 /* Compute the initial difference between the specified pair of registers. */
2610
2611 HOST_WIDE_INT
2612 ia64_initial_elimination_offset (int from, int to)
2613 {
2614 HOST_WIDE_INT offset;
2615
2616 ia64_compute_frame_size (get_frame_size ());
2617 switch (from)
2618 {
2619 case FRAME_POINTER_REGNUM:
2620 switch (to)
2621 {
2622 case HARD_FRAME_POINTER_REGNUM:
2623 if (current_function_is_leaf)
2624 offset = -current_frame_info.total_size;
2625 else
2626 offset = -(current_frame_info.total_size
2627 - crtl->outgoing_args_size - 16);
2628 break;
2629
2630 case STACK_POINTER_REGNUM:
2631 if (current_function_is_leaf)
2632 offset = 0;
2633 else
2634 offset = 16 + crtl->outgoing_args_size;
2635 break;
2636
2637 default:
2638 gcc_unreachable ();
2639 }
2640 break;
2641
2642 case ARG_POINTER_REGNUM:
2643 /* Arguments start above the 16 byte save area, unless stdarg
2644 in which case we store through the 16 byte save area. */
2645 switch (to)
2646 {
2647 case HARD_FRAME_POINTER_REGNUM:
2648 offset = 16 - crtl->args.pretend_args_size;
2649 break;
2650
2651 case STACK_POINTER_REGNUM:
2652 offset = (current_frame_info.total_size
2653 + 16 - crtl->args.pretend_args_size);
2654 break;
2655
2656 default:
2657 gcc_unreachable ();
2658 }
2659 break;
2660
2661 default:
2662 gcc_unreachable ();
2663 }
2664
2665 return offset;
2666 }
2667
2668 /* If there are more than a trivial number of register spills, we use
2669 two interleaved iterators so that we can get two memory references
2670 per insn group.
2671
2672 In order to simplify things in the prologue and epilogue expanders,
2673 we use helper functions to fix up the memory references after the
2674 fact with the appropriate offsets to a POST_MODIFY memory mode.
2675 The following data structure tracks the state of the two iterators
2676 while insns are being emitted. */
2677
2678 struct spill_fill_data
2679 {
2680 rtx init_after; /* point at which to emit initializations */
2681 rtx init_reg[2]; /* initial base register */
2682 rtx iter_reg[2]; /* the iterator registers */
2683 rtx *prev_addr[2]; /* address of last memory use */
2684 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
2685 HOST_WIDE_INT prev_off[2]; /* last offset */
2686 int n_iter; /* number of iterators in use */
2687 int next_iter; /* next iterator to use */
2688 unsigned int save_gr_used_mask;
2689 };
2690
2691 static struct spill_fill_data spill_fill_data;
2692
2693 static void
2694 setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
2695 {
2696 int i;
2697
2698 spill_fill_data.init_after = get_last_insn ();
2699 spill_fill_data.init_reg[0] = init_reg;
2700 spill_fill_data.init_reg[1] = init_reg;
2701 spill_fill_data.prev_addr[0] = NULL;
2702 spill_fill_data.prev_addr[1] = NULL;
2703 spill_fill_data.prev_insn[0] = NULL;
2704 spill_fill_data.prev_insn[1] = NULL;
2705 spill_fill_data.prev_off[0] = cfa_off;
2706 spill_fill_data.prev_off[1] = cfa_off;
2707 spill_fill_data.next_iter = 0;
2708 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2709
2710 spill_fill_data.n_iter = 1 + (n_spills > 2);
2711 for (i = 0; i < spill_fill_data.n_iter; ++i)
2712 {
2713 int regno = next_scratch_gr_reg ();
2714 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2715 current_frame_info.gr_used_mask |= 1 << regno;
2716 }
2717 }
2718
2719 static void
2720 finish_spill_pointers (void)
2721 {
2722 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2723 }
2724
2725 static rtx
2726 spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
2727 {
2728 int iter = spill_fill_data.next_iter;
2729 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2730 rtx disp_rtx = GEN_INT (disp);
2731 rtx mem;
2732
2733 if (spill_fill_data.prev_addr[iter])
2734 {
2735 if (satisfies_constraint_N (disp_rtx))
2736 {
2737 *spill_fill_data.prev_addr[iter]
2738 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2739 gen_rtx_PLUS (DImode,
2740 spill_fill_data.iter_reg[iter],
2741 disp_rtx));
2742 REG_NOTES (spill_fill_data.prev_insn[iter])
2743 = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
2744 REG_NOTES (spill_fill_data.prev_insn[iter]));
2745 }
2746 else
2747 {
2748 /* ??? Could use register post_modify for loads. */
2749 if (!satisfies_constraint_I (disp_rtx))
2750 {
2751 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2752 emit_move_insn (tmp, disp_rtx);
2753 disp_rtx = tmp;
2754 }
2755 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2756 spill_fill_data.iter_reg[iter], disp_rtx));
2757 }
2758 }
2759 /* Micro-optimization: if we've created a frame pointer, it's at
2760 CFA 0, which may allow the real iterator to be initialized lower,
2761 slightly increasing parallelism. Also, if there are few saves
2762 it may eliminate the iterator entirely. */
2763 else if (disp == 0
2764 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2765 && frame_pointer_needed)
2766 {
2767 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
2768 set_mem_alias_set (mem, get_varargs_alias_set ());
2769 return mem;
2770 }
2771 else
2772 {
2773 rtx seq, insn;
2774
2775 if (disp == 0)
2776 seq = gen_movdi (spill_fill_data.iter_reg[iter],
2777 spill_fill_data.init_reg[iter]);
2778 else
2779 {
2780 start_sequence ();
2781
2782 if (!satisfies_constraint_I (disp_rtx))
2783 {
2784 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2785 emit_move_insn (tmp, disp_rtx);
2786 disp_rtx = tmp;
2787 }
2788
2789 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2790 spill_fill_data.init_reg[iter],
2791 disp_rtx));
2792
2793 seq = get_insns ();
2794 end_sequence ();
2795 }
2796
2797 /* Careful for being the first insn in a sequence. */
2798 if (spill_fill_data.init_after)
2799 insn = emit_insn_after (seq, spill_fill_data.init_after);
2800 else
2801 {
2802 rtx first = get_insns ();
2803 if (first)
2804 insn = emit_insn_before (seq, first);
2805 else
2806 insn = emit_insn (seq);
2807 }
2808 spill_fill_data.init_after = insn;
2809 }
2810
2811 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
2812
2813 /* ??? Not all of the spills are for varargs, but some of them are.
2814 The rest of the spills belong in an alias set of their own. But
2815 it doesn't actually hurt to include them here. */
2816 set_mem_alias_set (mem, get_varargs_alias_set ());
2817
2818 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
2819 spill_fill_data.prev_off[iter] = cfa_off;
2820
2821 if (++iter >= spill_fill_data.n_iter)
2822 iter = 0;
2823 spill_fill_data.next_iter = iter;
2824
2825 return mem;
2826 }
2827
2828 static void
2829 do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
2830 rtx frame_reg)
2831 {
2832 int iter = spill_fill_data.next_iter;
2833 rtx mem, insn;
2834
2835 mem = spill_restore_mem (reg, cfa_off);
2836 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
2837 spill_fill_data.prev_insn[iter] = insn;
2838
2839 if (frame_reg)
2840 {
2841 rtx base;
2842 HOST_WIDE_INT off;
2843
2844 RTX_FRAME_RELATED_P (insn) = 1;
2845
2846 /* Don't even pretend that the unwind code can intuit its way
2847 through a pair of interleaved post_modify iterators. Just
2848 provide the correct answer. */
2849
2850 if (frame_pointer_needed)
2851 {
2852 base = hard_frame_pointer_rtx;
2853 off = - cfa_off;
2854 }
2855 else
2856 {
2857 base = stack_pointer_rtx;
2858 off = current_frame_info.total_size - cfa_off;
2859 }
2860
2861 REG_NOTES (insn)
2862 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2863 gen_rtx_SET (VOIDmode,
2864 gen_rtx_MEM (GET_MODE (reg),
2865 plus_constant (base, off)),
2866 frame_reg),
2867 REG_NOTES (insn));
2868 }
2869 }
2870
2871 static void
2872 do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
2873 {
2874 int iter = spill_fill_data.next_iter;
2875 rtx insn;
2876
2877 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
2878 GEN_INT (cfa_off)));
2879 spill_fill_data.prev_insn[iter] = insn;
2880 }
2881
2882 /* Wrapper functions that discards the CONST_INT spill offset. These
2883 exist so that we can give gr_spill/gr_fill the offset they need and
2884 use a consistent function interface. */
2885
2886 static rtx
2887 gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2888 {
2889 return gen_movdi (dest, src);
2890 }
2891
2892 static rtx
2893 gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2894 {
2895 return gen_fr_spill (dest, src);
2896 }
2897
2898 static rtx
2899 gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2900 {
2901 return gen_fr_restore (dest, src);
2902 }
2903
2904 /* Called after register allocation to add any instructions needed for the
2905 prologue. Using a prologue insn is favored compared to putting all of the
2906 instructions in output_function_prologue(), since it allows the scheduler
2907 to intermix instructions with the saves of the caller saved registers. In
2908 some cases, it might be necessary to emit a barrier instruction as the last
2909 insn to prevent such scheduling.
2910
2911 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2912 so that the debug info generation code can handle them properly.
2913
2914 The register save area is layed out like so:
2915 cfa+16
2916 [ varargs spill area ]
2917 [ fr register spill area ]
2918 [ br register spill area ]
2919 [ ar register spill area ]
2920 [ pr register spill area ]
2921 [ gr register spill area ] */
2922
2923 /* ??? Get inefficient code when the frame size is larger than can fit in an
2924 adds instruction. */
2925
2926 void
2927 ia64_expand_prologue (void)
2928 {
2929 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
2930 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
2931 rtx reg, alt_reg;
2932
2933 ia64_compute_frame_size (get_frame_size ());
2934 last_scratch_gr_reg = 15;
2935
2936 if (dump_file)
2937 {
2938 fprintf (dump_file, "ia64 frame related registers "
2939 "recorded in current_frame_info.r[]:\n");
2940 #define PRINTREG(a) if (current_frame_info.r[a]) \
2941 fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
2942 PRINTREG(reg_fp);
2943 PRINTREG(reg_save_b0);
2944 PRINTREG(reg_save_pr);
2945 PRINTREG(reg_save_ar_pfs);
2946 PRINTREG(reg_save_ar_unat);
2947 PRINTREG(reg_save_ar_lc);
2948 PRINTREG(reg_save_gp);
2949 #undef PRINTREG
2950 }
2951
2952 /* If there is no epilogue, then we don't need some prologue insns.
2953 We need to avoid emitting the dead prologue insns, because flow
2954 will complain about them. */
2955 if (optimize)
2956 {
2957 edge e;
2958 edge_iterator ei;
2959
2960 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
2961 if ((e->flags & EDGE_FAKE) == 0
2962 && (e->flags & EDGE_FALLTHRU) != 0)
2963 break;
2964 epilogue_p = (e != NULL);
2965 }
2966 else
2967 epilogue_p = 1;
2968
2969 /* Set the local, input, and output register names. We need to do this
2970 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2971 half. If we use in/loc/out register names, then we get assembler errors
2972 in crtn.S because there is no alloc insn or regstk directive in there. */
2973 if (! TARGET_REG_NAMES)
2974 {
2975 int inputs = current_frame_info.n_input_regs;
2976 int locals = current_frame_info.n_local_regs;
2977 int outputs = current_frame_info.n_output_regs;
2978
2979 for (i = 0; i < inputs; i++)
2980 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2981 for (i = 0; i < locals; i++)
2982 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2983 for (i = 0; i < outputs; i++)
2984 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2985 }
2986
2987 /* Set the frame pointer register name. The regnum is logically loc79,
2988 but of course we'll not have allocated that many locals. Rather than
2989 worrying about renumbering the existing rtxs, we adjust the name. */
2990 /* ??? This code means that we can never use one local register when
2991 there is a frame pointer. loc79 gets wasted in this case, as it is
2992 renamed to a register that will never be used. See also the try_locals
2993 code in find_gr_spill. */
2994 if (current_frame_info.r[reg_fp])
2995 {
2996 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2997 reg_names[HARD_FRAME_POINTER_REGNUM]
2998 = reg_names[current_frame_info.r[reg_fp]];
2999 reg_names[current_frame_info.r[reg_fp]] = tmp;
3000 }
3001
3002 /* We don't need an alloc instruction if we've used no outputs or locals. */
3003 if (current_frame_info.n_local_regs == 0
3004 && current_frame_info.n_output_regs == 0
3005 && current_frame_info.n_input_regs <= crtl->args.info.int_regs
3006 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3007 {
3008 /* If there is no alloc, but there are input registers used, then we
3009 need a .regstk directive. */
3010 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
3011 ar_pfs_save_reg = NULL_RTX;
3012 }
3013 else
3014 {
3015 current_frame_info.need_regstk = 0;
3016
3017 if (current_frame_info.r[reg_save_ar_pfs])
3018 {
3019 regno = current_frame_info.r[reg_save_ar_pfs];
3020 reg_emitted (reg_save_ar_pfs);
3021 }
3022 else
3023 regno = next_scratch_gr_reg ();
3024 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
3025
3026 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
3027 GEN_INT (current_frame_info.n_input_regs),
3028 GEN_INT (current_frame_info.n_local_regs),
3029 GEN_INT (current_frame_info.n_output_regs),
3030 GEN_INT (current_frame_info.n_rotate_regs)));
3031 RTX_FRAME_RELATED_P (insn) = (current_frame_info.r[reg_save_ar_pfs] != 0);
3032 }
3033
3034 /* Set up frame pointer, stack pointer, and spill iterators. */
3035
3036 n_varargs = cfun->machine->n_varargs;
3037 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
3038 stack_pointer_rtx, 0);
3039
3040 if (frame_pointer_needed)
3041 {
3042 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3043 RTX_FRAME_RELATED_P (insn) = 1;
3044 }
3045
3046 if (current_frame_info.total_size != 0)
3047 {
3048 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
3049 rtx offset;
3050
3051 if (satisfies_constraint_I (frame_size_rtx))
3052 offset = frame_size_rtx;
3053 else
3054 {
3055 regno = next_scratch_gr_reg ();
3056 offset = gen_rtx_REG (DImode, regno);
3057 emit_move_insn (offset, frame_size_rtx);
3058 }
3059
3060 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
3061 stack_pointer_rtx, offset));
3062
3063 if (! frame_pointer_needed)
3064 {
3065 RTX_FRAME_RELATED_P (insn) = 1;
3066 if (GET_CODE (offset) != CONST_INT)
3067 {
3068 REG_NOTES (insn)
3069 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3070 gen_rtx_SET (VOIDmode,
3071 stack_pointer_rtx,
3072 gen_rtx_PLUS (DImode,
3073 stack_pointer_rtx,
3074 frame_size_rtx)),
3075 REG_NOTES (insn));
3076 }
3077 }
3078
3079 /* ??? At this point we must generate a magic insn that appears to
3080 modify the stack pointer, the frame pointer, and all spill
3081 iterators. This would allow the most scheduling freedom. For
3082 now, just hard stop. */
3083 emit_insn (gen_blockage ());
3084 }
3085
3086 /* Must copy out ar.unat before doing any integer spills. */
3087 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3088 {
3089 if (current_frame_info.r[reg_save_ar_unat])
3090 {
3091 ar_unat_save_reg
3092 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3093 reg_emitted (reg_save_ar_unat);
3094 }
3095 else
3096 {
3097 alt_regno = next_scratch_gr_reg ();
3098 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3099 current_frame_info.gr_used_mask |= 1 << alt_regno;
3100 }
3101
3102 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3103 insn = emit_move_insn (ar_unat_save_reg, reg);
3104 RTX_FRAME_RELATED_P (insn) = (current_frame_info.r[reg_save_ar_unat] != 0);
3105
3106 /* Even if we're not going to generate an epilogue, we still
3107 need to save the register so that EH works. */
3108 if (! epilogue_p && current_frame_info.r[reg_save_ar_unat])
3109 emit_insn (gen_prologue_use (ar_unat_save_reg));
3110 }
3111 else
3112 ar_unat_save_reg = NULL_RTX;
3113
3114 /* Spill all varargs registers. Do this before spilling any GR registers,
3115 since we want the UNAT bits for the GR registers to override the UNAT
3116 bits from varargs, which we don't care about. */
3117
3118 cfa_off = -16;
3119 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
3120 {
3121 reg = gen_rtx_REG (DImode, regno);
3122 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
3123 }
3124
3125 /* Locate the bottom of the register save area. */
3126 cfa_off = (current_frame_info.spill_cfa_off
3127 + current_frame_info.spill_size
3128 + current_frame_info.extra_spill_size);
3129
3130 /* Save the predicate register block either in a register or in memory. */
3131 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3132 {
3133 reg = gen_rtx_REG (DImode, PR_REG (0));
3134 if (current_frame_info.r[reg_save_pr] != 0)
3135 {
3136 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3137 reg_emitted (reg_save_pr);
3138 insn = emit_move_insn (alt_reg, reg);
3139
3140 /* ??? Denote pr spill/fill by a DImode move that modifies all
3141 64 hard registers. */
3142 RTX_FRAME_RELATED_P (insn) = 1;
3143 REG_NOTES (insn)
3144 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3145 gen_rtx_SET (VOIDmode, alt_reg, reg),
3146 REG_NOTES (insn));
3147
3148 /* Even if we're not going to generate an epilogue, we still
3149 need to save the register so that EH works. */
3150 if (! epilogue_p)
3151 emit_insn (gen_prologue_use (alt_reg));
3152 }
3153 else
3154 {
3155 alt_regno = next_scratch_gr_reg ();
3156 alt_reg = gen_rtx_REG (DImode, alt_regno);
3157 insn = emit_move_insn (alt_reg, reg);
3158 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3159 cfa_off -= 8;
3160 }
3161 }
3162
3163 /* Handle AR regs in numerical order. All of them get special handling. */
3164 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
3165 && current_frame_info.r[reg_save_ar_unat] == 0)
3166 {
3167 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3168 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
3169 cfa_off -= 8;
3170 }
3171
3172 /* The alloc insn already copied ar.pfs into a general register. The
3173 only thing we have to do now is copy that register to a stack slot
3174 if we'd not allocated a local register for the job. */
3175 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
3176 && current_frame_info.r[reg_save_ar_pfs] == 0)
3177 {
3178 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3179 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
3180 cfa_off -= 8;
3181 }
3182
3183 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3184 {
3185 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3186 if (current_frame_info.r[reg_save_ar_lc] != 0)
3187 {
3188 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3189 reg_emitted (reg_save_ar_lc);
3190 insn = emit_move_insn (alt_reg, reg);
3191 RTX_FRAME_RELATED_P (insn) = 1;
3192
3193 /* Even if we're not going to generate an epilogue, we still
3194 need to save the register so that EH works. */
3195 if (! epilogue_p)
3196 emit_insn (gen_prologue_use (alt_reg));
3197 }
3198 else
3199 {
3200 alt_regno = next_scratch_gr_reg ();
3201 alt_reg = gen_rtx_REG (DImode, alt_regno);
3202 emit_move_insn (alt_reg, reg);
3203 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3204 cfa_off -= 8;
3205 }
3206 }
3207
3208 /* Save the return pointer. */
3209 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3210 {
3211 reg = gen_rtx_REG (DImode, BR_REG (0));
3212 if (current_frame_info.r[reg_save_b0] != 0)
3213 {
3214 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3215 reg_emitted (reg_save_b0);
3216 insn = emit_move_insn (alt_reg, reg);
3217 RTX_FRAME_RELATED_P (insn) = 1;
3218
3219 /* Even if we're not going to generate an epilogue, we still
3220 need to save the register so that EH works. */
3221 if (! epilogue_p)
3222 emit_insn (gen_prologue_use (alt_reg));
3223 }
3224 else
3225 {
3226 alt_regno = next_scratch_gr_reg ();
3227 alt_reg = gen_rtx_REG (DImode, alt_regno);
3228 emit_move_insn (alt_reg, reg);
3229 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3230 cfa_off -= 8;
3231 }
3232 }
3233
3234 if (current_frame_info.r[reg_save_gp])
3235 {
3236 reg_emitted (reg_save_gp);
3237 insn = emit_move_insn (gen_rtx_REG (DImode,
3238 current_frame_info.r[reg_save_gp]),
3239 pic_offset_table_rtx);
3240 }
3241
3242 /* We should now be at the base of the gr/br/fr spill area. */
3243 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3244 + current_frame_info.spill_size));
3245
3246 /* Spill all general registers. */
3247 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3248 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3249 {
3250 reg = gen_rtx_REG (DImode, regno);
3251 do_spill (gen_gr_spill, reg, cfa_off, reg);
3252 cfa_off -= 8;
3253 }
3254
3255 /* Spill the rest of the BR registers. */
3256 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3257 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3258 {
3259 alt_regno = next_scratch_gr_reg ();
3260 alt_reg = gen_rtx_REG (DImode, alt_regno);
3261 reg = gen_rtx_REG (DImode, regno);
3262 emit_move_insn (alt_reg, reg);
3263 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3264 cfa_off -= 8;
3265 }
3266
3267 /* Align the frame and spill all FR registers. */
3268 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3269 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3270 {
3271 gcc_assert (!(cfa_off & 15));
3272 reg = gen_rtx_REG (XFmode, regno);
3273 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
3274 cfa_off -= 16;
3275 }
3276
3277 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3278
3279 finish_spill_pointers ();
3280 }
3281
3282 /* Called after register allocation to add any instructions needed for the
3283 epilogue. Using an epilogue insn is favored compared to putting all of the
3284 instructions in output_function_prologue(), since it allows the scheduler
3285 to intermix instructions with the saves of the caller saved registers. In
3286 some cases, it might be necessary to emit a barrier instruction as the last
3287 insn to prevent such scheduling. */
3288
3289 void
3290 ia64_expand_epilogue (int sibcall_p)
3291 {
3292 rtx insn, reg, alt_reg, ar_unat_save_reg;
3293 int regno, alt_regno, cfa_off;
3294
3295 ia64_compute_frame_size (get_frame_size ());
3296
3297 /* If there is a frame pointer, then we use it instead of the stack
3298 pointer, so that the stack pointer does not need to be valid when
3299 the epilogue starts. See EXIT_IGNORE_STACK. */
3300 if (frame_pointer_needed)
3301 setup_spill_pointers (current_frame_info.n_spilled,
3302 hard_frame_pointer_rtx, 0);
3303 else
3304 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
3305 current_frame_info.total_size);
3306
3307 if (current_frame_info.total_size != 0)
3308 {
3309 /* ??? At this point we must generate a magic insn that appears to
3310 modify the spill iterators and the frame pointer. This would
3311 allow the most scheduling freedom. For now, just hard stop. */
3312 emit_insn (gen_blockage ());
3313 }
3314
3315 /* Locate the bottom of the register save area. */
3316 cfa_off = (current_frame_info.spill_cfa_off
3317 + current_frame_info.spill_size
3318 + current_frame_info.extra_spill_size);
3319
3320 /* Restore the predicate registers. */
3321 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3322 {
3323 if (current_frame_info.r[reg_save_pr] != 0)
3324 {
3325 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3326 reg_emitted (reg_save_pr);
3327 }
3328 else
3329 {
3330 alt_regno = next_scratch_gr_reg ();
3331 alt_reg = gen_rtx_REG (DImode, alt_regno);
3332 do_restore (gen_movdi_x, alt_reg, cfa_off);
3333 cfa_off -= 8;
3334 }
3335 reg = gen_rtx_REG (DImode, PR_REG (0));
3336 emit_move_insn (reg, alt_reg);
3337 }
3338
3339 /* Restore the application registers. */
3340
3341 /* Load the saved unat from the stack, but do not restore it until
3342 after the GRs have been restored. */
3343 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3344 {
3345 if (current_frame_info.r[reg_save_ar_unat] != 0)
3346 {
3347 ar_unat_save_reg
3348 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3349 reg_emitted (reg_save_ar_unat);
3350 }
3351 else
3352 {
3353 alt_regno = next_scratch_gr_reg ();
3354 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3355 current_frame_info.gr_used_mask |= 1 << alt_regno;
3356 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
3357 cfa_off -= 8;
3358 }
3359 }
3360 else
3361 ar_unat_save_reg = NULL_RTX;
3362
3363 if (current_frame_info.r[reg_save_ar_pfs] != 0)
3364 {
3365 reg_emitted (reg_save_ar_pfs);
3366 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_pfs]);
3367 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3368 emit_move_insn (reg, alt_reg);
3369 }
3370 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3371 {
3372 alt_regno = next_scratch_gr_reg ();
3373 alt_reg = gen_rtx_REG (DImode, alt_regno);
3374 do_restore (gen_movdi_x, alt_reg, cfa_off);
3375 cfa_off -= 8;
3376 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3377 emit_move_insn (reg, alt_reg);
3378 }
3379
3380 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3381 {
3382 if (current_frame_info.r[reg_save_ar_lc] != 0)
3383 {
3384 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3385 reg_emitted (reg_save_ar_lc);
3386 }
3387 else
3388 {
3389 alt_regno = next_scratch_gr_reg ();
3390 alt_reg = gen_rtx_REG (DImode, alt_regno);
3391 do_restore (gen_movdi_x, alt_reg, cfa_off);
3392 cfa_off -= 8;
3393 }
3394 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3395 emit_move_insn (reg, alt_reg);
3396 }
3397
3398 /* Restore the return pointer. */
3399 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3400 {
3401 if (current_frame_info.r[reg_save_b0] != 0)
3402 {
3403 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3404 reg_emitted (reg_save_b0);
3405 }
3406 else
3407 {
3408 alt_regno = next_scratch_gr_reg ();
3409 alt_reg = gen_rtx_REG (DImode, alt_regno);
3410 do_restore (gen_movdi_x, alt_reg, cfa_off);
3411 cfa_off -= 8;
3412 }
3413 reg = gen_rtx_REG (DImode, BR_REG (0));
3414 emit_move_insn (reg, alt_reg);
3415 }
3416
3417 /* We should now be at the base of the gr/br/fr spill area. */
3418 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3419 + current_frame_info.spill_size));
3420
3421 /* The GP may be stored on the stack in the prologue, but it's
3422 never restored in the epilogue. Skip the stack slot. */
3423 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
3424 cfa_off -= 8;
3425
3426 /* Restore all general registers. */
3427 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
3428 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3429 {
3430 reg = gen_rtx_REG (DImode, regno);
3431 do_restore (gen_gr_restore, reg, cfa_off);
3432 cfa_off -= 8;
3433 }
3434
3435 /* Restore the branch registers. */
3436 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3437 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3438 {
3439 alt_regno = next_scratch_gr_reg ();
3440 alt_reg = gen_rtx_REG (DImode, alt_regno);
3441 do_restore (gen_movdi_x, alt_reg, cfa_off);
3442 cfa_off -= 8;
3443 reg = gen_rtx_REG (DImode, regno);
3444 emit_move_insn (reg, alt_reg);
3445 }
3446
3447 /* Restore floating point registers. */
3448 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3449 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3450 {
3451 gcc_assert (!(cfa_off & 15));
3452 reg = gen_rtx_REG (XFmode, regno);
3453 do_restore (gen_fr_restore_x, reg, cfa_off);
3454 cfa_off -= 16;
3455 }
3456
3457 /* Restore ar.unat for real. */
3458 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3459 {
3460 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3461 emit_move_insn (reg, ar_unat_save_reg);
3462 }
3463
3464 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3465
3466 finish_spill_pointers ();
3467
3468 if (current_frame_info.total_size
3469 || cfun->machine->ia64_eh_epilogue_sp
3470 || frame_pointer_needed)
3471 {
3472 /* ??? At this point we must generate a magic insn that appears to
3473 modify the spill iterators, the stack pointer, and the frame
3474 pointer. This would allow the most scheduling freedom. For now,
3475 just hard stop. */
3476 emit_insn (gen_blockage ());
3477 }
3478
3479 if (cfun->machine->ia64_eh_epilogue_sp)
3480 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
3481 else if (frame_pointer_needed)
3482 {
3483 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
3484 RTX_FRAME_RELATED_P (insn) = 1;
3485 }
3486 else if (current_frame_info.total_size)
3487 {
3488 rtx offset, frame_size_rtx;
3489
3490 frame_size_rtx = GEN_INT (current_frame_info.total_size);
3491 if (satisfies_constraint_I (frame_size_rtx))
3492 offset = frame_size_rtx;
3493 else
3494 {
3495 regno = next_scratch_gr_reg ();
3496 offset = gen_rtx_REG (DImode, regno);
3497 emit_move_insn (offset, frame_size_rtx);
3498 }
3499
3500 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
3501 offset));
3502
3503 RTX_FRAME_RELATED_P (insn) = 1;
3504 if (GET_CODE (offset) != CONST_INT)
3505 {
3506 REG_NOTES (insn)
3507 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3508 gen_rtx_SET (VOIDmode,
3509 stack_pointer_rtx,
3510 gen_rtx_PLUS (DImode,
3511 stack_pointer_rtx,
3512 frame_size_rtx)),
3513 REG_NOTES (insn));
3514 }
3515 }
3516
3517 if (cfun->machine->ia64_eh_epilogue_bsp)
3518 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
3519
3520 if (! sibcall_p)
3521 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
3522 else
3523 {
3524 int fp = GR_REG (2);
3525 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
3526 first available call clobbered register. If there was a frame_pointer
3527 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
3528 so we have to make sure we're using the string "r2" when emitting
3529 the register name for the assembler. */
3530 if (current_frame_info.r[reg_fp]
3531 && current_frame_info.r[reg_fp] == GR_REG (2))
3532 fp = HARD_FRAME_POINTER_REGNUM;
3533
3534 /* We must emit an alloc to force the input registers to become output
3535 registers. Otherwise, if the callee tries to pass its parameters
3536 through to another call without an intervening alloc, then these
3537 values get lost. */
3538 /* ??? We don't need to preserve all input registers. We only need to
3539 preserve those input registers used as arguments to the sibling call.
3540 It is unclear how to compute that number here. */
3541 if (current_frame_info.n_input_regs != 0)
3542 {
3543 rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
3544 insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
3545 const0_rtx, const0_rtx,
3546 n_inputs, const0_rtx));
3547 RTX_FRAME_RELATED_P (insn) = 1;
3548 }
3549 }
3550 }
3551
3552 /* Return 1 if br.ret can do all the work required to return from a
3553 function. */
3554
3555 int
3556 ia64_direct_return (void)
3557 {
3558 if (reload_completed && ! frame_pointer_needed)
3559 {
3560 ia64_compute_frame_size (get_frame_size ());
3561
3562 return (current_frame_info.total_size == 0
3563 && current_frame_info.n_spilled == 0
3564 && current_frame_info.r[reg_save_b0] == 0
3565 && current_frame_info.r[reg_save_pr] == 0
3566 && current_frame_info.r[reg_save_ar_pfs] == 0
3567 && current_frame_info.r[reg_save_ar_unat] == 0
3568 && current_frame_info.r[reg_save_ar_lc] == 0);
3569 }
3570 return 0;
3571 }
3572
3573 /* Return the magic cookie that we use to hold the return address
3574 during early compilation. */
3575
3576 rtx
3577 ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
3578 {
3579 if (count != 0)
3580 return NULL;
3581 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
3582 }
3583
3584 /* Split this value after reload, now that we know where the return
3585 address is saved. */
3586
3587 void
3588 ia64_split_return_addr_rtx (rtx dest)
3589 {
3590 rtx src;
3591
3592 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3593 {
3594 if (current_frame_info.r[reg_save_b0] != 0)
3595 {
3596 src = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3597 reg_emitted (reg_save_b0);
3598 }
3599 else
3600 {
3601 HOST_WIDE_INT off;
3602 unsigned int regno;
3603 rtx off_r;
3604
3605 /* Compute offset from CFA for BR0. */
3606 /* ??? Must be kept in sync with ia64_expand_prologue. */
3607 off = (current_frame_info.spill_cfa_off
3608 + current_frame_info.spill_size);
3609 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3610 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3611 off -= 8;
3612
3613 /* Convert CFA offset to a register based offset. */
3614 if (frame_pointer_needed)
3615 src = hard_frame_pointer_rtx;
3616 else
3617 {
3618 src = stack_pointer_rtx;
3619 off += current_frame_info.total_size;
3620 }
3621
3622 /* Load address into scratch register. */
3623 off_r = GEN_INT (off);
3624 if (satisfies_constraint_I (off_r))
3625 emit_insn (gen_adddi3 (dest, src, off_r));
3626 else
3627 {
3628 emit_move_insn (dest, off_r);
3629 emit_insn (gen_adddi3 (dest, src, dest));
3630 }
3631
3632 src = gen_rtx_MEM (Pmode, dest);
3633 }
3634 }
3635 else
3636 src = gen_rtx_REG (DImode, BR_REG (0));
3637
3638 emit_move_insn (dest, src);
3639 }
3640
3641 int
3642 ia64_hard_regno_rename_ok (int from, int to)
3643 {
3644 /* Don't clobber any of the registers we reserved for the prologue. */
3645 enum ia64_frame_regs r;
3646
3647 for (r = reg_fp; r <= reg_save_ar_lc; r++)
3648 if (to == current_frame_info.r[r]
3649 || from == current_frame_info.r[r]
3650 || to == emitted_frame_related_regs[r]
3651 || from == emitted_frame_related_regs[r])
3652 return 0;
3653
3654 /* Don't use output registers outside the register frame. */
3655 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
3656 return 0;
3657
3658 /* Retain even/oddness on predicate register pairs. */
3659 if (PR_REGNO_P (from) && PR_REGNO_P (to))
3660 return (from & 1) == (to & 1);
3661
3662 return 1;
3663 }
3664
3665 /* Target hook for assembling integer objects. Handle word-sized
3666 aligned objects and detect the cases when @fptr is needed. */
3667
3668 static bool
3669 ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
3670 {
3671 if (size == POINTER_SIZE / BITS_PER_UNIT
3672 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
3673 && GET_CODE (x) == SYMBOL_REF
3674 && SYMBOL_REF_FUNCTION_P (x))
3675 {
3676 static const char * const directive[2][2] = {
3677 /* 64-bit pointer */ /* 32-bit pointer */
3678 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */
3679 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */
3680 };
3681 fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
3682 output_addr_const (asm_out_file, x);
3683 fputs (")\n", asm_out_file);
3684 return true;
3685 }
3686 return default_assemble_integer (x, size, aligned_p);
3687 }
3688
3689 /* Emit the function prologue. */
3690
3691 static void
3692 ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3693 {
3694 int mask, grsave, grsave_prev;
3695
3696 if (current_frame_info.need_regstk)
3697 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
3698 current_frame_info.n_input_regs,
3699 current_frame_info.n_local_regs,
3700 current_frame_info.n_output_regs,
3701 current_frame_info.n_rotate_regs);
3702
3703 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3704 return;
3705
3706 /* Emit the .prologue directive. */
3707
3708 mask = 0;
3709 grsave = grsave_prev = 0;
3710 if (current_frame_info.r[reg_save_b0] != 0)
3711 {
3712 mask |= 8;
3713 grsave = grsave_prev = current_frame_info.r[reg_save_b0];
3714 }
3715 if (current_frame_info.r[reg_save_ar_pfs] != 0
3716 && (grsave_prev == 0
3717 || current_frame_info.r[reg_save_ar_pfs] == grsave_prev + 1))
3718 {
3719 mask |= 4;
3720 if (grsave_prev == 0)
3721 grsave = current_frame_info.r[reg_save_ar_pfs];
3722 grsave_prev = current_frame_info.r[reg_save_ar_pfs];
3723 }
3724 if (current_frame_info.r[reg_fp] != 0
3725 && (grsave_prev == 0
3726 || current_frame_info.r[reg_fp] == grsave_prev + 1))
3727 {
3728 mask |= 2;
3729 if (grsave_prev == 0)
3730 grsave = HARD_FRAME_POINTER_REGNUM;
3731 grsave_prev = current_frame_info.r[reg_fp];
3732 }
3733 if (current_frame_info.r[reg_save_pr] != 0
3734 && (grsave_prev == 0
3735 || current_frame_info.r[reg_save_pr] == grsave_prev + 1))
3736 {
3737 mask |= 1;
3738 if (grsave_prev == 0)
3739 grsave = current_frame_info.r[reg_save_pr];
3740 }
3741
3742 if (mask && TARGET_GNU_AS)
3743 fprintf (file, "\t.prologue %d, %d\n", mask,
3744 ia64_dbx_register_number (grsave));
3745 else
3746 fputs ("\t.prologue\n", file);
3747
3748 /* Emit a .spill directive, if necessary, to relocate the base of
3749 the register spill area. */
3750 if (current_frame_info.spill_cfa_off != -16)
3751 fprintf (file, "\t.spill %ld\n",
3752 (long) (current_frame_info.spill_cfa_off
3753 + current_frame_info.spill_size));
3754 }
3755
3756 /* Emit the .body directive at the scheduled end of the prologue. */
3757
3758 static void
3759 ia64_output_function_end_prologue (FILE *file)
3760 {
3761 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3762 return;
3763
3764 fputs ("\t.body\n", file);
3765 }
3766
3767 /* Emit the function epilogue. */
3768
3769 static void
3770 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
3771 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3772 {
3773 int i;
3774
3775 if (current_frame_info.r[reg_fp])
3776 {
3777 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3778 reg_names[HARD_FRAME_POINTER_REGNUM]
3779 = reg_names[current_frame_info.r[reg_fp]];
3780 reg_names[current_frame_info.r[reg_fp]] = tmp;
3781 reg_emitted (reg_fp);
3782 }
3783 if (! TARGET_REG_NAMES)
3784 {
3785 for (i = 0; i < current_frame_info.n_input_regs; i++)
3786 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
3787 for (i = 0; i < current_frame_info.n_local_regs; i++)
3788 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
3789 for (i = 0; i < current_frame_info.n_output_regs; i++)
3790 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
3791 }
3792
3793 current_frame_info.initialized = 0;
3794 }
3795
3796 int
3797 ia64_dbx_register_number (int regno)
3798 {
3799 /* In ia64_expand_prologue we quite literally renamed the frame pointer
3800 from its home at loc79 to something inside the register frame. We
3801 must perform the same renumbering here for the debug info. */
3802 if (current_frame_info.r[reg_fp])
3803 {
3804 if (regno == HARD_FRAME_POINTER_REGNUM)
3805 regno = current_frame_info.r[reg_fp];
3806 else if (regno == current_frame_info.r[reg_fp])
3807 regno = HARD_FRAME_POINTER_REGNUM;
3808 }
3809
3810 if (IN_REGNO_P (regno))
3811 return 32 + regno - IN_REG (0);
3812 else if (LOC_REGNO_P (regno))
3813 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
3814 else if (OUT_REGNO_P (regno))
3815 return (32 + current_frame_info.n_input_regs
3816 + current_frame_info.n_local_regs + regno - OUT_REG (0));
3817 else
3818 return regno;
3819 }
3820
3821 void
3822 ia64_initialize_trampoline (rtx addr, rtx fnaddr, rtx static_chain)
3823 {
3824 rtx addr_reg, eight = GEN_INT (8);
3825
3826 /* The Intel assembler requires that the global __ia64_trampoline symbol
3827 be declared explicitly */
3828 if (!TARGET_GNU_AS)
3829 {
3830 static bool declared_ia64_trampoline = false;
3831
3832 if (!declared_ia64_trampoline)
3833 {
3834 declared_ia64_trampoline = true;
3835 (*targetm.asm_out.globalize_label) (asm_out_file,
3836 "__ia64_trampoline");
3837 }
3838 }
3839
3840 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
3841 addr = convert_memory_address (Pmode, addr);
3842 fnaddr = convert_memory_address (Pmode, fnaddr);
3843 static_chain = convert_memory_address (Pmode, static_chain);
3844
3845 /* Load up our iterator. */
3846 addr_reg = gen_reg_rtx (Pmode);
3847 emit_move_insn (addr_reg, addr);
3848
3849 /* The first two words are the fake descriptor:
3850 __ia64_trampoline, ADDR+16. */
3851 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3852 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
3853 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3854
3855 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3856 copy_to_reg (plus_constant (addr, 16)));
3857 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3858
3859 /* The third word is the target descriptor. */
3860 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
3861 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3862
3863 /* The fourth word is the static chain. */
3864 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
3865 }
3866 \f
3867 /* Do any needed setup for a variadic function. CUM has not been updated
3868 for the last named argument which has type TYPE and mode MODE.
3869
3870 We generate the actual spill instructions during prologue generation. */
3871
3872 static void
3873 ia64_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3874 tree type, int * pretend_size,
3875 int second_time ATTRIBUTE_UNUSED)
3876 {
3877 CUMULATIVE_ARGS next_cum = *cum;
3878
3879 /* Skip the current argument. */
3880 ia64_function_arg_advance (&next_cum, mode, type, 1);
3881
3882 if (next_cum.words < MAX_ARGUMENT_SLOTS)
3883 {
3884 int n = MAX_ARGUMENT_SLOTS - next_cum.words;
3885 *pretend_size = n * UNITS_PER_WORD;
3886 cfun->machine->n_varargs = n;
3887 }
3888 }
3889
3890 /* Check whether TYPE is a homogeneous floating point aggregate. If
3891 it is, return the mode of the floating point type that appears
3892 in all leafs. If it is not, return VOIDmode.
3893
3894 An aggregate is a homogeneous floating point aggregate is if all
3895 fields/elements in it have the same floating point type (e.g,
3896 SFmode). 128-bit quad-precision floats are excluded.
3897
3898 Variable sized aggregates should never arrive here, since we should
3899 have already decided to pass them by reference. Top-level zero-sized
3900 aggregates are excluded because our parallels crash the middle-end. */
3901
3902 static enum machine_mode
3903 hfa_element_mode (const_tree type, bool nested)
3904 {
3905 enum machine_mode element_mode = VOIDmode;
3906 enum machine_mode mode;
3907 enum tree_code code = TREE_CODE (type);
3908 int know_element_mode = 0;
3909 tree t;
3910
3911 if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
3912 return VOIDmode;
3913
3914 switch (code)
3915 {
3916 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
3917 case BOOLEAN_TYPE: case POINTER_TYPE:
3918 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
3919 case LANG_TYPE: case FUNCTION_TYPE:
3920 return VOIDmode;
3921
3922 /* Fortran complex types are supposed to be HFAs, so we need to handle
3923 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
3924 types though. */
3925 case COMPLEX_TYPE:
3926 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
3927 && TYPE_MODE (type) != TCmode)
3928 return GET_MODE_INNER (TYPE_MODE (type));
3929 else
3930 return VOIDmode;
3931
3932 case REAL_TYPE:
3933 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
3934 mode if this is contained within an aggregate. */
3935 if (nested && TYPE_MODE (type) != TFmode)
3936 return TYPE_MODE (type);
3937 else
3938 return VOIDmode;
3939
3940 case ARRAY_TYPE:
3941 return hfa_element_mode (TREE_TYPE (type), 1);
3942
3943 case RECORD_TYPE:
3944 case UNION_TYPE:
3945 case QUAL_UNION_TYPE:
3946 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
3947 {
3948 if (TREE_CODE (t) != FIELD_DECL)
3949 continue;
3950
3951 mode = hfa_element_mode (TREE_TYPE (t), 1);
3952 if (know_element_mode)
3953 {
3954 if (mode != element_mode)
3955 return VOIDmode;
3956 }
3957 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
3958 return VOIDmode;
3959 else
3960 {
3961 know_element_mode = 1;
3962 element_mode = mode;
3963 }
3964 }
3965 return element_mode;
3966
3967 default:
3968 /* If we reach here, we probably have some front-end specific type
3969 that the backend doesn't know about. This can happen via the
3970 aggregate_value_p call in init_function_start. All we can do is
3971 ignore unknown tree types. */
3972 return VOIDmode;
3973 }
3974
3975 return VOIDmode;
3976 }
3977
3978 /* Return the number of words required to hold a quantity of TYPE and MODE
3979 when passed as an argument. */
3980 static int
3981 ia64_function_arg_words (tree type, enum machine_mode mode)
3982 {
3983 int words;
3984
3985 if (mode == BLKmode)
3986 words = int_size_in_bytes (type);
3987 else
3988 words = GET_MODE_SIZE (mode);
3989
3990 return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */
3991 }
3992
3993 /* Return the number of registers that should be skipped so the current
3994 argument (described by TYPE and WORDS) will be properly aligned.
3995
3996 Integer and float arguments larger than 8 bytes start at the next
3997 even boundary. Aggregates larger than 8 bytes start at the next
3998 even boundary if the aggregate has 16 byte alignment. Note that
3999 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
4000 but are still to be aligned in registers.
4001
4002 ??? The ABI does not specify how to handle aggregates with
4003 alignment from 9 to 15 bytes, or greater than 16. We handle them
4004 all as if they had 16 byte alignment. Such aggregates can occur
4005 only if gcc extensions are used. */
4006 static int
4007 ia64_function_arg_offset (CUMULATIVE_ARGS *cum, tree type, int words)
4008 {
4009 if ((cum->words & 1) == 0)
4010 return 0;
4011
4012 if (type
4013 && TREE_CODE (type) != INTEGER_TYPE
4014 && TREE_CODE (type) != REAL_TYPE)
4015 return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
4016 else
4017 return words > 1;
4018 }
4019
4020 /* Return rtx for register where argument is passed, or zero if it is passed
4021 on the stack. */
4022 /* ??? 128-bit quad-precision floats are always passed in general
4023 registers. */
4024
4025 rtx
4026 ia64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
4027 int named, int incoming)
4028 {
4029 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
4030 int words = ia64_function_arg_words (type, mode);
4031 int offset = ia64_function_arg_offset (cum, type, words);
4032 enum machine_mode hfa_mode = VOIDmode;
4033
4034 /* If all argument slots are used, then it must go on the stack. */
4035 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4036 return 0;
4037
4038 /* Check for and handle homogeneous FP aggregates. */
4039 if (type)
4040 hfa_mode = hfa_element_mode (type, 0);
4041
4042 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4043 and unprototyped hfas are passed specially. */
4044 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4045 {
4046 rtx loc[16];
4047 int i = 0;
4048 int fp_regs = cum->fp_regs;
4049 int int_regs = cum->words + offset;
4050 int hfa_size = GET_MODE_SIZE (hfa_mode);
4051 int byte_size;
4052 int args_byte_size;
4053
4054 /* If prototyped, pass it in FR regs then GR regs.
4055 If not prototyped, pass it in both FR and GR regs.
4056
4057 If this is an SFmode aggregate, then it is possible to run out of
4058 FR regs while GR regs are still left. In that case, we pass the
4059 remaining part in the GR regs. */
4060
4061 /* Fill the FP regs. We do this always. We stop if we reach the end
4062 of the argument, the last FP register, or the last argument slot. */
4063
4064 byte_size = ((mode == BLKmode)
4065 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4066 args_byte_size = int_regs * UNITS_PER_WORD;
4067 offset = 0;
4068 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4069 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
4070 {
4071 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4072 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
4073 + fp_regs)),
4074 GEN_INT (offset));
4075 offset += hfa_size;
4076 args_byte_size += hfa_size;
4077 fp_regs++;
4078 }
4079
4080 /* If no prototype, then the whole thing must go in GR regs. */
4081 if (! cum->prototype)
4082 offset = 0;
4083 /* If this is an SFmode aggregate, then we might have some left over
4084 that needs to go in GR regs. */
4085 else if (byte_size != offset)
4086 int_regs += offset / UNITS_PER_WORD;
4087
4088 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
4089
4090 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
4091 {
4092 enum machine_mode gr_mode = DImode;
4093 unsigned int gr_size;
4094
4095 /* If we have an odd 4 byte hunk because we ran out of FR regs,
4096 then this goes in a GR reg left adjusted/little endian, right
4097 adjusted/big endian. */
4098 /* ??? Currently this is handled wrong, because 4-byte hunks are
4099 always right adjusted/little endian. */
4100 if (offset & 0x4)
4101 gr_mode = SImode;
4102 /* If we have an even 4 byte hunk because the aggregate is a
4103 multiple of 4 bytes in size, then this goes in a GR reg right
4104 adjusted/little endian. */
4105 else if (byte_size - offset == 4)
4106 gr_mode = SImode;
4107
4108 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4109 gen_rtx_REG (gr_mode, (basereg
4110 + int_regs)),
4111 GEN_INT (offset));
4112
4113 gr_size = GET_MODE_SIZE (gr_mode);
4114 offset += gr_size;
4115 if (gr_size == UNITS_PER_WORD
4116 || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
4117 int_regs++;
4118 else if (gr_size > UNITS_PER_WORD)
4119 int_regs += gr_size / UNITS_PER_WORD;
4120 }
4121 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4122 }
4123
4124 /* Integral and aggregates go in general registers. If we have run out of
4125 FR registers, then FP values must also go in general registers. This can
4126 happen when we have a SFmode HFA. */
4127 else if (mode == TFmode || mode == TCmode
4128 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4129 {
4130 int byte_size = ((mode == BLKmode)
4131 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4132 if (BYTES_BIG_ENDIAN
4133 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4134 && byte_size < UNITS_PER_WORD
4135 && byte_size > 0)
4136 {
4137 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4138 gen_rtx_REG (DImode,
4139 (basereg + cum->words
4140 + offset)),
4141 const0_rtx);
4142 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4143 }
4144 else
4145 return gen_rtx_REG (mode, basereg + cum->words + offset);
4146
4147 }
4148
4149 /* If there is a prototype, then FP values go in a FR register when
4150 named, and in a GR register when unnamed. */
4151 else if (cum->prototype)
4152 {
4153 if (named)
4154 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
4155 /* In big-endian mode, an anonymous SFmode value must be represented
4156 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4157 the value into the high half of the general register. */
4158 else if (BYTES_BIG_ENDIAN && mode == SFmode)
4159 return gen_rtx_PARALLEL (mode,
4160 gen_rtvec (1,
4161 gen_rtx_EXPR_LIST (VOIDmode,
4162 gen_rtx_REG (DImode, basereg + cum->words + offset),
4163 const0_rtx)));
4164 else
4165 return gen_rtx_REG (mode, basereg + cum->words + offset);
4166 }
4167 /* If there is no prototype, then FP values go in both FR and GR
4168 registers. */
4169 else
4170 {
4171 /* See comment above. */
4172 enum machine_mode inner_mode =
4173 (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
4174
4175 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
4176 gen_rtx_REG (mode, (FR_ARG_FIRST
4177 + cum->fp_regs)),
4178 const0_rtx);
4179 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4180 gen_rtx_REG (inner_mode,
4181 (basereg + cum->words
4182 + offset)),
4183 const0_rtx);
4184
4185 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
4186 }
4187 }
4188
4189 /* Return number of bytes, at the beginning of the argument, that must be
4190 put in registers. 0 is the argument is entirely in registers or entirely
4191 in memory. */
4192
4193 static int
4194 ia64_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4195 tree type, bool named ATTRIBUTE_UNUSED)
4196 {
4197 int words = ia64_function_arg_words (type, mode);
4198 int offset = ia64_function_arg_offset (cum, type, words);
4199
4200 /* If all argument slots are used, then it must go on the stack. */
4201 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4202 return 0;
4203
4204 /* It doesn't matter whether the argument goes in FR or GR regs. If
4205 it fits within the 8 argument slots, then it goes entirely in
4206 registers. If it extends past the last argument slot, then the rest
4207 goes on the stack. */
4208
4209 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
4210 return 0;
4211
4212 return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
4213 }
4214
4215 /* Update CUM to point after this argument. This is patterned after
4216 ia64_function_arg. */
4217
4218 void
4219 ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4220 tree type, int named)
4221 {
4222 int words = ia64_function_arg_words (type, mode);
4223 int offset = ia64_function_arg_offset (cum, type, words);
4224 enum machine_mode hfa_mode = VOIDmode;
4225
4226 /* If all arg slots are already full, then there is nothing to do. */
4227 if (cum->words >= MAX_ARGUMENT_SLOTS)
4228 return;
4229
4230 cum->words += words + offset;
4231
4232 /* Check for and handle homogeneous FP aggregates. */
4233 if (type)
4234 hfa_mode = hfa_element_mode (type, 0);
4235
4236 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4237 and unprototyped hfas are passed specially. */
4238 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4239 {
4240 int fp_regs = cum->fp_regs;
4241 /* This is the original value of cum->words + offset. */
4242 int int_regs = cum->words - words;
4243 int hfa_size = GET_MODE_SIZE (hfa_mode);
4244 int byte_size;
4245 int args_byte_size;
4246
4247 /* If prototyped, pass it in FR regs then GR regs.
4248 If not prototyped, pass it in both FR and GR regs.
4249
4250 If this is an SFmode aggregate, then it is possible to run out of
4251 FR regs while GR regs are still left. In that case, we pass the
4252 remaining part in the GR regs. */
4253
4254 /* Fill the FP regs. We do this always. We stop if we reach the end
4255 of the argument, the last FP register, or the last argument slot. */
4256
4257 byte_size = ((mode == BLKmode)
4258 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4259 args_byte_size = int_regs * UNITS_PER_WORD;
4260 offset = 0;
4261 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4262 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
4263 {
4264 offset += hfa_size;
4265 args_byte_size += hfa_size;
4266 fp_regs++;
4267 }
4268
4269 cum->fp_regs = fp_regs;
4270 }
4271
4272 /* Integral and aggregates go in general registers. So do TFmode FP values.
4273 If we have run out of FR registers, then other FP values must also go in
4274 general registers. This can happen when we have a SFmode HFA. */
4275 else if (mode == TFmode || mode == TCmode
4276 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4277 cum->int_regs = cum->words;
4278
4279 /* If there is a prototype, then FP values go in a FR register when
4280 named, and in a GR register when unnamed. */
4281 else if (cum->prototype)
4282 {
4283 if (! named)
4284 cum->int_regs = cum->words;
4285 else
4286 /* ??? Complex types should not reach here. */
4287 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4288 }
4289 /* If there is no prototype, then FP values go in both FR and GR
4290 registers. */
4291 else
4292 {
4293 /* ??? Complex types should not reach here. */
4294 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4295 cum->int_regs = cum->words;
4296 }
4297 }
4298
4299 /* Arguments with alignment larger than 8 bytes start at the next even
4300 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary
4301 even though their normal alignment is 8 bytes. See ia64_function_arg. */
4302
4303 int
4304 ia64_function_arg_boundary (enum machine_mode mode, tree type)
4305 {
4306
4307 if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
4308 return PARM_BOUNDARY * 2;
4309
4310 if (type)
4311 {
4312 if (TYPE_ALIGN (type) > PARM_BOUNDARY)
4313 return PARM_BOUNDARY * 2;
4314 else
4315 return PARM_BOUNDARY;
4316 }
4317
4318 if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
4319 return PARM_BOUNDARY * 2;
4320 else
4321 return PARM_BOUNDARY;
4322 }
4323
4324 /* True if it is OK to do sibling call optimization for the specified
4325 call expression EXP. DECL will be the called function, or NULL if
4326 this is an indirect call. */
4327 static bool
4328 ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4329 {
4330 /* We can't perform a sibcall if the current function has the syscall_linkage
4331 attribute. */
4332 if (lookup_attribute ("syscall_linkage",
4333 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
4334 return false;
4335
4336 /* We must always return with our current GP. This means we can
4337 only sibcall to functions defined in the current module. */
4338 return decl && (*targetm.binds_local_p) (decl);
4339 }
4340 \f
4341
4342 /* Implement va_arg. */
4343
4344 static tree
4345 ia64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
4346 gimple_seq *post_p)
4347 {
4348 /* Variable sized types are passed by reference. */
4349 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
4350 {
4351 tree ptrtype = build_pointer_type (type);
4352 tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
4353 return build_va_arg_indirect_ref (addr);
4354 }
4355
4356 /* Aggregate arguments with alignment larger than 8 bytes start at
4357 the next even boundary. Integer and floating point arguments
4358 do so if they are larger than 8 bytes, whether or not they are
4359 also aligned larger than 8 bytes. */
4360 if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
4361 ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
4362 {
4363 tree t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (valist), valist,
4364 size_int (2 * UNITS_PER_WORD - 1));
4365 t = fold_convert (sizetype, t);
4366 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4367 size_int (-2 * UNITS_PER_WORD));
4368 t = fold_convert (TREE_TYPE (valist), t);
4369 gimplify_assign (unshare_expr (valist), t, pre_p);
4370 }
4371
4372 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4373 }
4374 \f
4375 /* Return 1 if function return value returned in memory. Return 0 if it is
4376 in a register. */
4377
4378 static bool
4379 ia64_return_in_memory (const_tree valtype, const_tree fntype ATTRIBUTE_UNUSED)
4380 {
4381 enum machine_mode mode;
4382 enum machine_mode hfa_mode;
4383 HOST_WIDE_INT byte_size;
4384
4385 mode = TYPE_MODE (valtype);
4386 byte_size = GET_MODE_SIZE (mode);
4387 if (mode == BLKmode)
4388 {
4389 byte_size = int_size_in_bytes (valtype);
4390 if (byte_size < 0)
4391 return true;
4392 }
4393
4394 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
4395
4396 hfa_mode = hfa_element_mode (valtype, 0);
4397 if (hfa_mode != VOIDmode)
4398 {
4399 int hfa_size = GET_MODE_SIZE (hfa_mode);
4400
4401 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
4402 return true;
4403 else
4404 return false;
4405 }
4406 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
4407 return true;
4408 else
4409 return false;
4410 }
4411
4412 /* Return rtx for register that holds the function return value. */
4413
4414 rtx
4415 ia64_function_value (const_tree valtype, const_tree func ATTRIBUTE_UNUSED)
4416 {
4417 enum machine_mode mode;
4418 enum machine_mode hfa_mode;
4419
4420 mode = TYPE_MODE (valtype);
4421 hfa_mode = hfa_element_mode (valtype, 0);
4422
4423 if (hfa_mode != VOIDmode)
4424 {
4425 rtx loc[8];
4426 int i;
4427 int hfa_size;
4428 int byte_size;
4429 int offset;
4430
4431 hfa_size = GET_MODE_SIZE (hfa_mode);
4432 byte_size = ((mode == BLKmode)
4433 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
4434 offset = 0;
4435 for (i = 0; offset < byte_size; i++)
4436 {
4437 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4438 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
4439 GEN_INT (offset));
4440 offset += hfa_size;
4441 }
4442 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4443 }
4444 else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
4445 return gen_rtx_REG (mode, FR_ARG_FIRST);
4446 else
4447 {
4448 bool need_parallel = false;
4449
4450 /* In big-endian mode, we need to manage the layout of aggregates
4451 in the registers so that we get the bits properly aligned in
4452 the highpart of the registers. */
4453 if (BYTES_BIG_ENDIAN
4454 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
4455 need_parallel = true;
4456
4457 /* Something like struct S { long double x; char a[0] } is not an
4458 HFA structure, and therefore doesn't go in fp registers. But
4459 the middle-end will give it XFmode anyway, and XFmode values
4460 don't normally fit in integer registers. So we need to smuggle
4461 the value inside a parallel. */
4462 else if (mode == XFmode || mode == XCmode || mode == RFmode)
4463 need_parallel = true;
4464
4465 if (need_parallel)
4466 {
4467 rtx loc[8];
4468 int offset;
4469 int bytesize;
4470 int i;
4471
4472 offset = 0;
4473 bytesize = int_size_in_bytes (valtype);
4474 /* An empty PARALLEL is invalid here, but the return value
4475 doesn't matter for empty structs. */
4476 if (bytesize == 0)
4477 return gen_rtx_REG (mode, GR_RET_FIRST);
4478 for (i = 0; offset < bytesize; i++)
4479 {
4480 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4481 gen_rtx_REG (DImode,
4482 GR_RET_FIRST + i),
4483 GEN_INT (offset));
4484 offset += UNITS_PER_WORD;
4485 }
4486 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4487 }
4488
4489 return gen_rtx_REG (mode, GR_RET_FIRST);
4490 }
4491 }
4492
4493 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
4494 We need to emit DTP-relative relocations. */
4495
4496 static void
4497 ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
4498 {
4499 gcc_assert (size == 4 || size == 8);
4500 if (size == 4)
4501 fputs ("\tdata4.ua\t@dtprel(", file);
4502 else
4503 fputs ("\tdata8.ua\t@dtprel(", file);
4504 output_addr_const (file, x);
4505 fputs (")", file);
4506 }
4507
4508 /* Print a memory address as an operand to reference that memory location. */
4509
4510 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
4511 also call this from ia64_print_operand for memory addresses. */
4512
4513 void
4514 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
4515 rtx address ATTRIBUTE_UNUSED)
4516 {
4517 }
4518
4519 /* Print an operand to an assembler instruction.
4520 C Swap and print a comparison operator.
4521 D Print an FP comparison operator.
4522 E Print 32 - constant, for SImode shifts as extract.
4523 e Print 64 - constant, for DImode rotates.
4524 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
4525 a floating point register emitted normally.
4526 I Invert a predicate register by adding 1.
4527 J Select the proper predicate register for a condition.
4528 j Select the inverse predicate register for a condition.
4529 O Append .acq for volatile load.
4530 P Postincrement of a MEM.
4531 Q Append .rel for volatile store.
4532 R Print .s .d or nothing for a single, double or no truncation.
4533 S Shift amount for shladd instruction.
4534 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
4535 for Intel assembler.
4536 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
4537 for Intel assembler.
4538 X A pair of floating point registers.
4539 r Print register name, or constant 0 as r0. HP compatibility for
4540 Linux kernel.
4541 v Print vector constant value as an 8-byte integer value. */
4542
4543 void
4544 ia64_print_operand (FILE * file, rtx x, int code)
4545 {
4546 const char *str;
4547
4548 switch (code)
4549 {
4550 case 0:
4551 /* Handled below. */
4552 break;
4553
4554 case 'C':
4555 {
4556 enum rtx_code c = swap_condition (GET_CODE (x));
4557 fputs (GET_RTX_NAME (c), file);
4558 return;
4559 }
4560
4561 case 'D':
4562 switch (GET_CODE (x))
4563 {
4564 case NE:
4565 str = "neq";
4566 break;
4567 case UNORDERED:
4568 str = "unord";
4569 break;
4570 case ORDERED:
4571 str = "ord";
4572 break;
4573 case UNLT:
4574 str = "nge";
4575 break;
4576 case UNLE:
4577 str = "ngt";
4578 break;
4579 case UNGT:
4580 str = "nle";
4581 break;
4582 case UNGE:
4583 str = "nlt";
4584 break;
4585 default:
4586 str = GET_RTX_NAME (GET_CODE (x));
4587 break;
4588 }
4589 fputs (str, file);
4590 return;
4591
4592 case 'E':
4593 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
4594 return;
4595
4596 case 'e':
4597 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
4598 return;
4599
4600 case 'F':
4601 if (x == CONST0_RTX (GET_MODE (x)))
4602 str = reg_names [FR_REG (0)];
4603 else if (x == CONST1_RTX (GET_MODE (x)))
4604 str = reg_names [FR_REG (1)];
4605 else
4606 {
4607 gcc_assert (GET_CODE (x) == REG);
4608 str = reg_names [REGNO (x)];
4609 }
4610 fputs (str, file);
4611 return;
4612
4613 case 'I':
4614 fputs (reg_names [REGNO (x) + 1], file);
4615 return;
4616
4617 case 'J':
4618 case 'j':
4619 {
4620 unsigned int regno = REGNO (XEXP (x, 0));
4621 if (GET_CODE (x) == EQ)
4622 regno += 1;
4623 if (code == 'j')
4624 regno ^= 1;
4625 fputs (reg_names [regno], file);
4626 }
4627 return;
4628
4629 case 'O':
4630 if (MEM_VOLATILE_P (x))
4631 fputs(".acq", file);
4632 return;
4633
4634 case 'P':
4635 {
4636 HOST_WIDE_INT value;
4637
4638 switch (GET_CODE (XEXP (x, 0)))
4639 {
4640 default:
4641 return;
4642
4643 case POST_MODIFY:
4644 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
4645 if (GET_CODE (x) == CONST_INT)
4646 value = INTVAL (x);
4647 else
4648 {
4649 gcc_assert (GET_CODE (x) == REG);
4650 fprintf (file, ", %s", reg_names[REGNO (x)]);
4651 return;
4652 }
4653 break;
4654
4655 case POST_INC:
4656 value = GET_MODE_SIZE (GET_MODE (x));
4657 break;
4658
4659 case POST_DEC:
4660 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
4661 break;
4662 }
4663
4664 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
4665 return;
4666 }
4667
4668 case 'Q':
4669 if (MEM_VOLATILE_P (x))
4670 fputs(".rel", file);
4671 return;
4672
4673 case 'R':
4674 if (x == CONST0_RTX (GET_MODE (x)))
4675 fputs(".s", file);
4676 else if (x == CONST1_RTX (GET_MODE (x)))
4677 fputs(".d", file);
4678 else if (x == CONST2_RTX (GET_MODE (x)))
4679 ;
4680 else
4681 output_operand_lossage ("invalid %%R value");
4682 return;
4683
4684 case 'S':
4685 fprintf (file, "%d", exact_log2 (INTVAL (x)));
4686 return;
4687
4688 case 'T':
4689 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4690 {
4691 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
4692 return;
4693 }
4694 break;
4695
4696 case 'U':
4697 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4698 {
4699 const char *prefix = "0x";
4700 if (INTVAL (x) & 0x80000000)
4701 {
4702 fprintf (file, "0xffffffff");
4703 prefix = "";
4704 }
4705 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
4706 return;
4707 }
4708 break;
4709
4710 case 'X':
4711 {
4712 unsigned int regno = REGNO (x);
4713 fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
4714 }
4715 return;
4716
4717 case 'r':
4718 /* If this operand is the constant zero, write it as register zero.
4719 Any register, zero, or CONST_INT value is OK here. */
4720 if (GET_CODE (x) == REG)
4721 fputs (reg_names[REGNO (x)], file);
4722 else if (x == CONST0_RTX (GET_MODE (x)))
4723 fputs ("r0", file);
4724 else if (GET_CODE (x) == CONST_INT)
4725 output_addr_const (file, x);
4726 else
4727 output_operand_lossage ("invalid %%r value");
4728 return;
4729
4730 case 'v':
4731 gcc_assert (GET_CODE (x) == CONST_VECTOR);
4732 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
4733 break;
4734
4735 case '+':
4736 {
4737 const char *which;
4738
4739 /* For conditional branches, returns or calls, substitute
4740 sptk, dptk, dpnt, or spnt for %s. */
4741 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
4742 if (x)
4743 {
4744 int pred_val = INTVAL (XEXP (x, 0));
4745
4746 /* Guess top and bottom 10% statically predicted. */
4747 if (pred_val < REG_BR_PROB_BASE / 50
4748 && br_prob_note_reliable_p (x))
4749 which = ".spnt";
4750 else if (pred_val < REG_BR_PROB_BASE / 2)
4751 which = ".dpnt";
4752 else if (pred_val < REG_BR_PROB_BASE / 100 * 98
4753 || !br_prob_note_reliable_p (x))
4754 which = ".dptk";
4755 else
4756 which = ".sptk";
4757 }
4758 else if (GET_CODE (current_output_insn) == CALL_INSN)
4759 which = ".sptk";
4760 else
4761 which = ".dptk";
4762
4763 fputs (which, file);
4764 return;
4765 }
4766
4767 case ',':
4768 x = current_insn_predicate;
4769 if (x)
4770 {
4771 unsigned int regno = REGNO (XEXP (x, 0));
4772 if (GET_CODE (x) == EQ)
4773 regno += 1;
4774 fprintf (file, "(%s) ", reg_names [regno]);
4775 }
4776 return;
4777
4778 default:
4779 output_operand_lossage ("ia64_print_operand: unknown code");
4780 return;
4781 }
4782
4783 switch (GET_CODE (x))
4784 {
4785 /* This happens for the spill/restore instructions. */
4786 case POST_INC:
4787 case POST_DEC:
4788 case POST_MODIFY:
4789 x = XEXP (x, 0);
4790 /* ... fall through ... */
4791
4792 case REG:
4793 fputs (reg_names [REGNO (x)], file);
4794 break;
4795
4796 case MEM:
4797 {
4798 rtx addr = XEXP (x, 0);
4799 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
4800 addr = XEXP (addr, 0);
4801 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
4802 break;
4803 }
4804
4805 default:
4806 output_addr_const (file, x);
4807 break;
4808 }
4809
4810 return;
4811 }
4812 \f
4813 /* Compute a (partial) cost for rtx X. Return true if the complete
4814 cost has been computed, and false if subexpressions should be
4815 scanned. In either case, *TOTAL contains the cost result. */
4816 /* ??? This is incomplete. */
4817
4818 static bool
4819 ia64_rtx_costs (rtx x, int code, int outer_code, int *total)
4820 {
4821 switch (code)
4822 {
4823 case CONST_INT:
4824 switch (outer_code)
4825 {
4826 case SET:
4827 *total = satisfies_constraint_J (x) ? 0 : COSTS_N_INSNS (1);
4828 return true;
4829 case PLUS:
4830 if (satisfies_constraint_I (x))
4831 *total = 0;
4832 else if (satisfies_constraint_J (x))
4833 *total = 1;
4834 else
4835 *total = COSTS_N_INSNS (1);
4836 return true;
4837 default:
4838 if (satisfies_constraint_K (x) || satisfies_constraint_L (x))
4839 *total = 0;
4840 else
4841 *total = COSTS_N_INSNS (1);
4842 return true;
4843 }
4844
4845 case CONST_DOUBLE:
4846 *total = COSTS_N_INSNS (1);
4847 return true;
4848
4849 case CONST:
4850 case SYMBOL_REF:
4851 case LABEL_REF:
4852 *total = COSTS_N_INSNS (3);
4853 return true;
4854
4855 case MULT:
4856 /* For multiplies wider than HImode, we have to go to the FPU,
4857 which normally involves copies. Plus there's the latency
4858 of the multiply itself, and the latency of the instructions to
4859 transfer integer regs to FP regs. */
4860 /* ??? Check for FP mode. */
4861 if (GET_MODE_SIZE (GET_MODE (x)) > 2)
4862 *total = COSTS_N_INSNS (10);
4863 else
4864 *total = COSTS_N_INSNS (2);
4865 return true;
4866
4867 case PLUS:
4868 case MINUS:
4869 case ASHIFT:
4870 case ASHIFTRT:
4871 case LSHIFTRT:
4872 *total = COSTS_N_INSNS (1);
4873 return true;
4874
4875 case DIV:
4876 case UDIV:
4877 case MOD:
4878 case UMOD:
4879 /* We make divide expensive, so that divide-by-constant will be
4880 optimized to a multiply. */
4881 *total = COSTS_N_INSNS (60);
4882 return true;
4883
4884 default:
4885 return false;
4886 }
4887 }
4888
4889 /* Calculate the cost of moving data from a register in class FROM to
4890 one in class TO, using MODE. */
4891
4892 int
4893 ia64_register_move_cost (enum machine_mode mode, enum reg_class from,
4894 enum reg_class to)
4895 {
4896 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
4897 if (to == ADDL_REGS)
4898 to = GR_REGS;
4899 if (from == ADDL_REGS)
4900 from = GR_REGS;
4901
4902 /* All costs are symmetric, so reduce cases by putting the
4903 lower number class as the destination. */
4904 if (from < to)
4905 {
4906 enum reg_class tmp = to;
4907 to = from, from = tmp;
4908 }
4909
4910 /* Moving from FR<->GR in XFmode must be more expensive than 2,
4911 so that we get secondary memory reloads. Between FR_REGS,
4912 we have to make this at least as expensive as MEMORY_MOVE_COST
4913 to avoid spectacularly poor register class preferencing. */
4914 if (mode == XFmode || mode == RFmode)
4915 {
4916 if (to != GR_REGS || from != GR_REGS)
4917 return MEMORY_MOVE_COST (mode, to, 0);
4918 else
4919 return 3;
4920 }
4921
4922 switch (to)
4923 {
4924 case PR_REGS:
4925 /* Moving between PR registers takes two insns. */
4926 if (from == PR_REGS)
4927 return 3;
4928 /* Moving between PR and anything but GR is impossible. */
4929 if (from != GR_REGS)
4930 return MEMORY_MOVE_COST (mode, to, 0);
4931 break;
4932
4933 case BR_REGS:
4934 /* Moving between BR and anything but GR is impossible. */
4935 if (from != GR_REGS && from != GR_AND_BR_REGS)
4936 return MEMORY_MOVE_COST (mode, to, 0);
4937 break;
4938
4939 case AR_I_REGS:
4940 case AR_M_REGS:
4941 /* Moving between AR and anything but GR is impossible. */
4942 if (from != GR_REGS)
4943 return MEMORY_MOVE_COST (mode, to, 0);
4944 break;
4945
4946 case GR_REGS:
4947 case FR_REGS:
4948 case FP_REGS:
4949 case GR_AND_FR_REGS:
4950 case GR_AND_BR_REGS:
4951 case ALL_REGS:
4952 break;
4953
4954 default:
4955 gcc_unreachable ();
4956 }
4957
4958 return 2;
4959 }
4960
4961 /* Implement PREFERRED_RELOAD_CLASS. Place additional restrictions on RCLASS
4962 to use when copying X into that class. */
4963
4964 enum reg_class
4965 ia64_preferred_reload_class (rtx x, enum reg_class rclass)
4966 {
4967 switch (rclass)
4968 {
4969 case FR_REGS:
4970 case FP_REGS:
4971 /* Don't allow volatile mem reloads into floating point registers.
4972 This is defined to force reload to choose the r/m case instead
4973 of the f/f case when reloading (set (reg fX) (mem/v)). */
4974 if (MEM_P (x) && MEM_VOLATILE_P (x))
4975 return NO_REGS;
4976
4977 /* Force all unrecognized constants into the constant pool. */
4978 if (CONSTANT_P (x))
4979 return NO_REGS;
4980 break;
4981
4982 case AR_M_REGS:
4983 case AR_I_REGS:
4984 if (!OBJECT_P (x))
4985 return NO_REGS;
4986 break;
4987
4988 default:
4989 break;
4990 }
4991
4992 return rclass;
4993 }
4994
4995 /* This function returns the register class required for a secondary
4996 register when copying between one of the registers in RCLASS, and X,
4997 using MODE. A return value of NO_REGS means that no secondary register
4998 is required. */
4999
5000 enum reg_class
5001 ia64_secondary_reload_class (enum reg_class rclass,
5002 enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
5003 {
5004 int regno = -1;
5005
5006 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
5007 regno = true_regnum (x);
5008
5009 switch (rclass)
5010 {
5011 case BR_REGS:
5012 case AR_M_REGS:
5013 case AR_I_REGS:
5014 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
5015 interaction. We end up with two pseudos with overlapping lifetimes
5016 both of which are equiv to the same constant, and both which need
5017 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
5018 changes depending on the path length, which means the qty_first_reg
5019 check in make_regs_eqv can give different answers at different times.
5020 At some point I'll probably need a reload_indi pattern to handle
5021 this.
5022
5023 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
5024 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
5025 non-general registers for good measure. */
5026 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
5027 return GR_REGS;
5028
5029 /* This is needed if a pseudo used as a call_operand gets spilled to a
5030 stack slot. */
5031 if (GET_CODE (x) == MEM)
5032 return GR_REGS;
5033 break;
5034
5035 case FR_REGS:
5036 case FP_REGS:
5037 /* Need to go through general registers to get to other class regs. */
5038 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
5039 return GR_REGS;
5040
5041 /* This can happen when a paradoxical subreg is an operand to the
5042 muldi3 pattern. */
5043 /* ??? This shouldn't be necessary after instruction scheduling is
5044 enabled, because paradoxical subregs are not accepted by
5045 register_operand when INSN_SCHEDULING is defined. Or alternatively,
5046 stop the paradoxical subreg stupidity in the *_operand functions
5047 in recog.c. */
5048 if (GET_CODE (x) == MEM
5049 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
5050 || GET_MODE (x) == QImode))
5051 return GR_REGS;
5052
5053 /* This can happen because of the ior/and/etc patterns that accept FP
5054 registers as operands. If the third operand is a constant, then it
5055 needs to be reloaded into a FP register. */
5056 if (GET_CODE (x) == CONST_INT)
5057 return GR_REGS;
5058
5059 /* This can happen because of register elimination in a muldi3 insn.
5060 E.g. `26107 * (unsigned long)&u'. */
5061 if (GET_CODE (x) == PLUS)
5062 return GR_REGS;
5063 break;
5064
5065 case PR_REGS:
5066 /* ??? This happens if we cse/gcse a BImode value across a call,
5067 and the function has a nonlocal goto. This is because global
5068 does not allocate call crossing pseudos to hard registers when
5069 crtl->has_nonlocal_goto is true. This is relatively
5070 common for C++ programs that use exceptions. To reproduce,
5071 return NO_REGS and compile libstdc++. */
5072 if (GET_CODE (x) == MEM)
5073 return GR_REGS;
5074
5075 /* This can happen when we take a BImode subreg of a DImode value,
5076 and that DImode value winds up in some non-GR register. */
5077 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
5078 return GR_REGS;
5079 break;
5080
5081 default:
5082 break;
5083 }
5084
5085 return NO_REGS;
5086 }
5087
5088 \f
5089 /* Implement targetm.unspec_may_trap_p hook. */
5090 static int
5091 ia64_unspec_may_trap_p (const_rtx x, unsigned flags)
5092 {
5093 if (GET_CODE (x) == UNSPEC)
5094 {
5095 switch (XINT (x, 1))
5096 {
5097 case UNSPEC_LDA:
5098 case UNSPEC_LDS:
5099 case UNSPEC_LDSA:
5100 case UNSPEC_LDCCLR:
5101 case UNSPEC_CHKACLR:
5102 case UNSPEC_CHKS:
5103 /* These unspecs are just wrappers. */
5104 return may_trap_p_1 (XVECEXP (x, 0, 0), flags);
5105 }
5106 }
5107
5108 return default_unspec_may_trap_p (x, flags);
5109 }
5110
5111 \f
5112 /* Parse the -mfixed-range= option string. */
5113
5114 static void
5115 fix_range (const char *const_str)
5116 {
5117 int i, first, last;
5118 char *str, *dash, *comma;
5119
5120 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5121 REG2 are either register names or register numbers. The effect
5122 of this option is to mark the registers in the range from REG1 to
5123 REG2 as ``fixed'' so they won't be used by the compiler. This is
5124 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
5125
5126 i = strlen (const_str);
5127 str = (char *) alloca (i + 1);
5128 memcpy (str, const_str, i + 1);
5129
5130 while (1)
5131 {
5132 dash = strchr (str, '-');
5133 if (!dash)
5134 {
5135 warning (0, "value of -mfixed-range must have form REG1-REG2");
5136 return;
5137 }
5138 *dash = '\0';
5139
5140 comma = strchr (dash + 1, ',');
5141 if (comma)
5142 *comma = '\0';
5143
5144 first = decode_reg_name (str);
5145 if (first < 0)
5146 {
5147 warning (0, "unknown register name: %s", str);
5148 return;
5149 }
5150
5151 last = decode_reg_name (dash + 1);
5152 if (last < 0)
5153 {
5154 warning (0, "unknown register name: %s", dash + 1);
5155 return;
5156 }
5157
5158 *dash = '-';
5159
5160 if (first > last)
5161 {
5162 warning (0, "%s-%s is an empty range", str, dash + 1);
5163 return;
5164 }
5165
5166 for (i = first; i <= last; ++i)
5167 fixed_regs[i] = call_used_regs[i] = 1;
5168
5169 if (!comma)
5170 break;
5171
5172 *comma = ',';
5173 str = comma + 1;
5174 }
5175 }
5176
5177 /* Implement TARGET_HANDLE_OPTION. */
5178
5179 static bool
5180 ia64_handle_option (size_t code, const char *arg, int value)
5181 {
5182 switch (code)
5183 {
5184 case OPT_mfixed_range_:
5185 fix_range (arg);
5186 return true;
5187
5188 case OPT_mtls_size_:
5189 if (value != 14 && value != 22 && value != 64)
5190 error ("bad value %<%s%> for -mtls-size= switch", arg);
5191 return true;
5192
5193 case OPT_mtune_:
5194 {
5195 static struct pta
5196 {
5197 const char *name; /* processor name or nickname. */
5198 enum processor_type processor;
5199 }
5200 const processor_alias_table[] =
5201 {
5202 {"itanium", PROCESSOR_ITANIUM},
5203 {"itanium1", PROCESSOR_ITANIUM},
5204 {"merced", PROCESSOR_ITANIUM},
5205 {"itanium2", PROCESSOR_ITANIUM2},
5206 {"mckinley", PROCESSOR_ITANIUM2},
5207 };
5208 int const pta_size = ARRAY_SIZE (processor_alias_table);
5209 int i;
5210
5211 for (i = 0; i < pta_size; i++)
5212 if (!strcmp (arg, processor_alias_table[i].name))
5213 {
5214 ia64_tune = processor_alias_table[i].processor;
5215 break;
5216 }
5217 if (i == pta_size)
5218 error ("bad value %<%s%> for -mtune= switch", arg);
5219 return true;
5220 }
5221
5222 default:
5223 return true;
5224 }
5225 }
5226
5227 /* Implement OVERRIDE_OPTIONS. */
5228
5229 void
5230 ia64_override_options (void)
5231 {
5232 if (TARGET_AUTO_PIC)
5233 target_flags |= MASK_CONST_GP;
5234
5235 if (TARGET_INLINE_SQRT == INL_MIN_LAT)
5236 {
5237 warning (0, "not yet implemented: latency-optimized inline square root");
5238 TARGET_INLINE_SQRT = INL_MAX_THR;
5239 }
5240
5241 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
5242
5243 init_machine_status = ia64_init_machine_status;
5244 }
5245
5246 /* Initialize the record of emitted frame related registers. */
5247
5248 void ia64_init_expanders (void)
5249 {
5250 memset (&emitted_frame_related_regs, 0, sizeof (emitted_frame_related_regs));
5251 }
5252
5253 static struct machine_function *
5254 ia64_init_machine_status (void)
5255 {
5256 return GGC_CNEW (struct machine_function);
5257 }
5258 \f
5259 static enum attr_itanium_class ia64_safe_itanium_class (rtx);
5260 static enum attr_type ia64_safe_type (rtx);
5261
5262 static enum attr_itanium_class
5263 ia64_safe_itanium_class (rtx insn)
5264 {
5265 if (recog_memoized (insn) >= 0)
5266 return get_attr_itanium_class (insn);
5267 else
5268 return ITANIUM_CLASS_UNKNOWN;
5269 }
5270
5271 static enum attr_type
5272 ia64_safe_type (rtx insn)
5273 {
5274 if (recog_memoized (insn) >= 0)
5275 return get_attr_type (insn);
5276 else
5277 return TYPE_UNKNOWN;
5278 }
5279 \f
5280 /* The following collection of routines emit instruction group stop bits as
5281 necessary to avoid dependencies. */
5282
5283 /* Need to track some additional registers as far as serialization is
5284 concerned so we can properly handle br.call and br.ret. We could
5285 make these registers visible to gcc, but since these registers are
5286 never explicitly used in gcc generated code, it seems wasteful to
5287 do so (plus it would make the call and return patterns needlessly
5288 complex). */
5289 #define REG_RP (BR_REG (0))
5290 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
5291 /* This is used for volatile asms which may require a stop bit immediately
5292 before and after them. */
5293 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
5294 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
5295 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
5296
5297 /* For each register, we keep track of how it has been written in the
5298 current instruction group.
5299
5300 If a register is written unconditionally (no qualifying predicate),
5301 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
5302
5303 If a register is written if its qualifying predicate P is true, we
5304 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
5305 may be written again by the complement of P (P^1) and when this happens,
5306 WRITE_COUNT gets set to 2.
5307
5308 The result of this is that whenever an insn attempts to write a register
5309 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
5310
5311 If a predicate register is written by a floating-point insn, we set
5312 WRITTEN_BY_FP to true.
5313
5314 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
5315 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
5316
5317 #if GCC_VERSION >= 4000
5318 #define RWS_FIELD_TYPE __extension__ unsigned short
5319 #else
5320 #define RWS_FIELD_TYPE unsigned int
5321 #endif
5322 struct reg_write_state
5323 {
5324 RWS_FIELD_TYPE write_count : 2;
5325 RWS_FIELD_TYPE first_pred : 10;
5326 RWS_FIELD_TYPE written_by_fp : 1;
5327 RWS_FIELD_TYPE written_by_and : 1;
5328 RWS_FIELD_TYPE written_by_or : 1;
5329 };
5330
5331 /* Cumulative info for the current instruction group. */
5332 struct reg_write_state rws_sum[NUM_REGS];
5333 #ifdef ENABLE_CHECKING
5334 /* Bitmap whether a register has been written in the current insn. */
5335 HARD_REG_ELT_TYPE rws_insn[(NUM_REGS + HOST_BITS_PER_WIDEST_FAST_INT - 1)
5336 / HOST_BITS_PER_WIDEST_FAST_INT];
5337
5338 static inline void
5339 rws_insn_set (int regno)
5340 {
5341 gcc_assert (!TEST_HARD_REG_BIT (rws_insn, regno));
5342 SET_HARD_REG_BIT (rws_insn, regno);
5343 }
5344
5345 static inline int
5346 rws_insn_test (int regno)
5347 {
5348 return TEST_HARD_REG_BIT (rws_insn, regno);
5349 }
5350 #else
5351 /* When not checking, track just REG_AR_CFM and REG_VOLATILE. */
5352 unsigned char rws_insn[2];
5353
5354 static inline void
5355 rws_insn_set (int regno)
5356 {
5357 if (regno == REG_AR_CFM)
5358 rws_insn[0] = 1;
5359 else if (regno == REG_VOLATILE)
5360 rws_insn[1] = 1;
5361 }
5362
5363 static inline int
5364 rws_insn_test (int regno)
5365 {
5366 if (regno == REG_AR_CFM)
5367 return rws_insn[0];
5368 if (regno == REG_VOLATILE)
5369 return rws_insn[1];
5370 return 0;
5371 }
5372 #endif
5373
5374 /* Indicates whether this is the first instruction after a stop bit,
5375 in which case we don't need another stop bit. Without this,
5376 ia64_variable_issue will die when scheduling an alloc. */
5377 static int first_instruction;
5378
5379 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
5380 RTL for one instruction. */
5381 struct reg_flags
5382 {
5383 unsigned int is_write : 1; /* Is register being written? */
5384 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
5385 unsigned int is_branch : 1; /* Is register used as part of a branch? */
5386 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
5387 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
5388 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
5389 };
5390
5391 static void rws_update (int, struct reg_flags, int);
5392 static int rws_access_regno (int, struct reg_flags, int);
5393 static int rws_access_reg (rtx, struct reg_flags, int);
5394 static void update_set_flags (rtx, struct reg_flags *);
5395 static int set_src_needs_barrier (rtx, struct reg_flags, int);
5396 static int rtx_needs_barrier (rtx, struct reg_flags, int);
5397 static void init_insn_group_barriers (void);
5398 static int group_barrier_needed (rtx);
5399 static int safe_group_barrier_needed (rtx);
5400 static int in_safe_group_barrier;
5401
5402 /* Update *RWS for REGNO, which is being written by the current instruction,
5403 with predicate PRED, and associated register flags in FLAGS. */
5404
5405 static void
5406 rws_update (int regno, struct reg_flags flags, int pred)
5407 {
5408 if (pred)
5409 rws_sum[regno].write_count++;
5410 else
5411 rws_sum[regno].write_count = 2;
5412 rws_sum[regno].written_by_fp |= flags.is_fp;
5413 /* ??? Not tracking and/or across differing predicates. */
5414 rws_sum[regno].written_by_and = flags.is_and;
5415 rws_sum[regno].written_by_or = flags.is_or;
5416 rws_sum[regno].first_pred = pred;
5417 }
5418
5419 /* Handle an access to register REGNO of type FLAGS using predicate register
5420 PRED. Update rws_sum array. Return 1 if this access creates
5421 a dependency with an earlier instruction in the same group. */
5422
5423 static int
5424 rws_access_regno (int regno, struct reg_flags flags, int pred)
5425 {
5426 int need_barrier = 0;
5427
5428 gcc_assert (regno < NUM_REGS);
5429
5430 if (! PR_REGNO_P (regno))
5431 flags.is_and = flags.is_or = 0;
5432
5433 if (flags.is_write)
5434 {
5435 int write_count;
5436
5437 rws_insn_set (regno);
5438 write_count = rws_sum[regno].write_count;
5439
5440 switch (write_count)
5441 {
5442 case 0:
5443 /* The register has not been written yet. */
5444 if (!in_safe_group_barrier)
5445 rws_update (regno, flags, pred);
5446 break;
5447
5448 case 1:
5449 /* The register has been written via a predicate. If this is
5450 not a complementary predicate, then we need a barrier. */
5451 /* ??? This assumes that P and P+1 are always complementary
5452 predicates for P even. */
5453 if (flags.is_and && rws_sum[regno].written_by_and)
5454 ;
5455 else if (flags.is_or && rws_sum[regno].written_by_or)
5456 ;
5457 else if ((rws_sum[regno].first_pred ^ 1) != pred)
5458 need_barrier = 1;
5459 if (!in_safe_group_barrier)
5460 rws_update (regno, flags, pred);
5461 break;
5462
5463 case 2:
5464 /* The register has been unconditionally written already. We
5465 need a barrier. */
5466 if (flags.is_and && rws_sum[regno].written_by_and)
5467 ;
5468 else if (flags.is_or && rws_sum[regno].written_by_or)
5469 ;
5470 else
5471 need_barrier = 1;
5472 if (!in_safe_group_barrier)
5473 {
5474 rws_sum[regno].written_by_and = flags.is_and;
5475 rws_sum[regno].written_by_or = flags.is_or;
5476 }
5477 break;
5478
5479 default:
5480 gcc_unreachable ();
5481 }
5482 }
5483 else
5484 {
5485 if (flags.is_branch)
5486 {
5487 /* Branches have several RAW exceptions that allow to avoid
5488 barriers. */
5489
5490 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
5491 /* RAW dependencies on branch regs are permissible as long
5492 as the writer is a non-branch instruction. Since we
5493 never generate code that uses a branch register written
5494 by a branch instruction, handling this case is
5495 easy. */
5496 return 0;
5497
5498 if (REGNO_REG_CLASS (regno) == PR_REGS
5499 && ! rws_sum[regno].written_by_fp)
5500 /* The predicates of a branch are available within the
5501 same insn group as long as the predicate was written by
5502 something other than a floating-point instruction. */
5503 return 0;
5504 }
5505
5506 if (flags.is_and && rws_sum[regno].written_by_and)
5507 return 0;
5508 if (flags.is_or && rws_sum[regno].written_by_or)
5509 return 0;
5510
5511 switch (rws_sum[regno].write_count)
5512 {
5513 case 0:
5514 /* The register has not been written yet. */
5515 break;
5516
5517 case 1:
5518 /* The register has been written via a predicate. If this is
5519 not a complementary predicate, then we need a barrier. */
5520 /* ??? This assumes that P and P+1 are always complementary
5521 predicates for P even. */
5522 if ((rws_sum[regno].first_pred ^ 1) != pred)
5523 need_barrier = 1;
5524 break;
5525
5526 case 2:
5527 /* The register has been unconditionally written already. We
5528 need a barrier. */
5529 need_barrier = 1;
5530 break;
5531
5532 default:
5533 gcc_unreachable ();
5534 }
5535 }
5536
5537 return need_barrier;
5538 }
5539
5540 static int
5541 rws_access_reg (rtx reg, struct reg_flags flags, int pred)
5542 {
5543 int regno = REGNO (reg);
5544 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
5545
5546 if (n == 1)
5547 return rws_access_regno (regno, flags, pred);
5548 else
5549 {
5550 int need_barrier = 0;
5551 while (--n >= 0)
5552 need_barrier |= rws_access_regno (regno + n, flags, pred);
5553 return need_barrier;
5554 }
5555 }
5556
5557 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
5558 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
5559
5560 static void
5561 update_set_flags (rtx x, struct reg_flags *pflags)
5562 {
5563 rtx src = SET_SRC (x);
5564
5565 switch (GET_CODE (src))
5566 {
5567 case CALL:
5568 return;
5569
5570 case IF_THEN_ELSE:
5571 /* There are four cases here:
5572 (1) The destination is (pc), in which case this is a branch,
5573 nothing here applies.
5574 (2) The destination is ar.lc, in which case this is a
5575 doloop_end_internal,
5576 (3) The destination is an fp register, in which case this is
5577 an fselect instruction.
5578 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
5579 this is a check load.
5580 In all cases, nothing we do in this function applies. */
5581 return;
5582
5583 default:
5584 if (COMPARISON_P (src)
5585 && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
5586 /* Set pflags->is_fp to 1 so that we know we're dealing
5587 with a floating point comparison when processing the
5588 destination of the SET. */
5589 pflags->is_fp = 1;
5590
5591 /* Discover if this is a parallel comparison. We only handle
5592 and.orcm and or.andcm at present, since we must retain a
5593 strict inverse on the predicate pair. */
5594 else if (GET_CODE (src) == AND)
5595 pflags->is_and = 1;
5596 else if (GET_CODE (src) == IOR)
5597 pflags->is_or = 1;
5598
5599 break;
5600 }
5601 }
5602
5603 /* Subroutine of rtx_needs_barrier; this function determines whether the
5604 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
5605 are as in rtx_needs_barrier. COND is an rtx that holds the condition
5606 for this insn. */
5607
5608 static int
5609 set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
5610 {
5611 int need_barrier = 0;
5612 rtx dst;
5613 rtx src = SET_SRC (x);
5614
5615 if (GET_CODE (src) == CALL)
5616 /* We don't need to worry about the result registers that
5617 get written by subroutine call. */
5618 return rtx_needs_barrier (src, flags, pred);
5619 else if (SET_DEST (x) == pc_rtx)
5620 {
5621 /* X is a conditional branch. */
5622 /* ??? This seems redundant, as the caller sets this bit for
5623 all JUMP_INSNs. */
5624 if (!ia64_spec_check_src_p (src))
5625 flags.is_branch = 1;
5626 return rtx_needs_barrier (src, flags, pred);
5627 }
5628
5629 if (ia64_spec_check_src_p (src))
5630 /* Avoid checking one register twice (in condition
5631 and in 'then' section) for ldc pattern. */
5632 {
5633 gcc_assert (REG_P (XEXP (src, 2)));
5634 need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
5635
5636 /* We process MEM below. */
5637 src = XEXP (src, 1);
5638 }
5639
5640 need_barrier |= rtx_needs_barrier (src, flags, pred);
5641
5642 dst = SET_DEST (x);
5643 if (GET_CODE (dst) == ZERO_EXTRACT)
5644 {
5645 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
5646 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
5647 }
5648 return need_barrier;
5649 }
5650
5651 /* Handle an access to rtx X of type FLAGS using predicate register
5652 PRED. Return 1 if this access creates a dependency with an earlier
5653 instruction in the same group. */
5654
5655 static int
5656 rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
5657 {
5658 int i, j;
5659 int is_complemented = 0;
5660 int need_barrier = 0;
5661 const char *format_ptr;
5662 struct reg_flags new_flags;
5663 rtx cond;
5664
5665 if (! x)
5666 return 0;
5667
5668 new_flags = flags;
5669
5670 switch (GET_CODE (x))
5671 {
5672 case SET:
5673 update_set_flags (x, &new_flags);
5674 need_barrier = set_src_needs_barrier (x, new_flags, pred);
5675 if (GET_CODE (SET_SRC (x)) != CALL)
5676 {
5677 new_flags.is_write = 1;
5678 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
5679 }
5680 break;
5681
5682 case CALL:
5683 new_flags.is_write = 0;
5684 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5685
5686 /* Avoid multiple register writes, in case this is a pattern with
5687 multiple CALL rtx. This avoids a failure in rws_access_reg. */
5688 if (! flags.is_sibcall && ! rws_insn_test (REG_AR_CFM))
5689 {
5690 new_flags.is_write = 1;
5691 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
5692 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
5693 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5694 }
5695 break;
5696
5697 case COND_EXEC:
5698 /* X is a predicated instruction. */
5699
5700 cond = COND_EXEC_TEST (x);
5701 gcc_assert (!pred);
5702 need_barrier = rtx_needs_barrier (cond, flags, 0);
5703
5704 if (GET_CODE (cond) == EQ)
5705 is_complemented = 1;
5706 cond = XEXP (cond, 0);
5707 gcc_assert (GET_CODE (cond) == REG
5708 && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
5709 pred = REGNO (cond);
5710 if (is_complemented)
5711 ++pred;
5712
5713 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
5714 return need_barrier;
5715
5716 case CLOBBER:
5717 case USE:
5718 /* Clobber & use are for earlier compiler-phases only. */
5719 break;
5720
5721 case ASM_OPERANDS:
5722 case ASM_INPUT:
5723 /* We always emit stop bits for traditional asms. We emit stop bits
5724 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
5725 if (GET_CODE (x) != ASM_OPERANDS
5726 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
5727 {
5728 /* Avoid writing the register multiple times if we have multiple
5729 asm outputs. This avoids a failure in rws_access_reg. */
5730 if (! rws_insn_test (REG_VOLATILE))
5731 {
5732 new_flags.is_write = 1;
5733 rws_access_regno (REG_VOLATILE, new_flags, pred);
5734 }
5735 return 1;
5736 }
5737
5738 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
5739 We cannot just fall through here since then we would be confused
5740 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
5741 traditional asms unlike their normal usage. */
5742
5743 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
5744 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
5745 need_barrier = 1;
5746 break;
5747
5748 case PARALLEL:
5749 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5750 {
5751 rtx pat = XVECEXP (x, 0, i);
5752 switch (GET_CODE (pat))
5753 {
5754 case SET:
5755 update_set_flags (pat, &new_flags);
5756 need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
5757 break;
5758
5759 case USE:
5760 case CALL:
5761 case ASM_OPERANDS:
5762 need_barrier |= rtx_needs_barrier (pat, flags, pred);
5763 break;
5764
5765 case CLOBBER:
5766 case RETURN:
5767 break;
5768
5769 default:
5770 gcc_unreachable ();
5771 }
5772 }
5773 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5774 {
5775 rtx pat = XVECEXP (x, 0, i);
5776 if (GET_CODE (pat) == SET)
5777 {
5778 if (GET_CODE (SET_SRC (pat)) != CALL)
5779 {
5780 new_flags.is_write = 1;
5781 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
5782 pred);
5783 }
5784 }
5785 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
5786 need_barrier |= rtx_needs_barrier (pat, flags, pred);
5787 }
5788 break;
5789
5790 case SUBREG:
5791 need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
5792 break;
5793 case REG:
5794 if (REGNO (x) == AR_UNAT_REGNUM)
5795 {
5796 for (i = 0; i < 64; ++i)
5797 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
5798 }
5799 else
5800 need_barrier = rws_access_reg (x, flags, pred);
5801 break;
5802
5803 case MEM:
5804 /* Find the regs used in memory address computation. */
5805 new_flags.is_write = 0;
5806 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5807 break;
5808
5809 case CONST_INT: case CONST_DOUBLE: case CONST_VECTOR:
5810 case SYMBOL_REF: case LABEL_REF: case CONST:
5811 break;
5812
5813 /* Operators with side-effects. */
5814 case POST_INC: case POST_DEC:
5815 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
5816
5817 new_flags.is_write = 0;
5818 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
5819 new_flags.is_write = 1;
5820 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5821 break;
5822
5823 case POST_MODIFY:
5824 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
5825
5826 new_flags.is_write = 0;
5827 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
5828 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5829 new_flags.is_write = 1;
5830 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5831 break;
5832
5833 /* Handle common unary and binary ops for efficiency. */
5834 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
5835 case MOD: case UDIV: case UMOD: case AND: case IOR:
5836 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
5837 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
5838 case NE: case EQ: case GE: case GT: case LE:
5839 case LT: case GEU: case GTU: case LEU: case LTU:
5840 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5841 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5842 break;
5843
5844 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
5845 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
5846 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
5847 case SQRT: case FFS: case POPCOUNT:
5848 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
5849 break;
5850
5851 case VEC_SELECT:
5852 /* VEC_SELECT's second argument is a PARALLEL with integers that
5853 describe the elements selected. On ia64, those integers are
5854 always constants. Avoid walking the PARALLEL so that we don't
5855 get confused with "normal" parallels and then die. */
5856 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
5857 break;
5858
5859 case UNSPEC:
5860 switch (XINT (x, 1))
5861 {
5862 case UNSPEC_LTOFF_DTPMOD:
5863 case UNSPEC_LTOFF_DTPREL:
5864 case UNSPEC_DTPREL:
5865 case UNSPEC_LTOFF_TPREL:
5866 case UNSPEC_TPREL:
5867 case UNSPEC_PRED_REL_MUTEX:
5868 case UNSPEC_PIC_CALL:
5869 case UNSPEC_MF:
5870 case UNSPEC_FETCHADD_ACQ:
5871 case UNSPEC_BSP_VALUE:
5872 case UNSPEC_FLUSHRS:
5873 case UNSPEC_BUNDLE_SELECTOR:
5874 break;
5875
5876 case UNSPEC_GR_SPILL:
5877 case UNSPEC_GR_RESTORE:
5878 {
5879 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
5880 HOST_WIDE_INT bit = (offset >> 3) & 63;
5881
5882 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5883 new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
5884 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
5885 new_flags, pred);
5886 break;
5887 }
5888
5889 case UNSPEC_FR_SPILL:
5890 case UNSPEC_FR_RESTORE:
5891 case UNSPEC_GETF_EXP:
5892 case UNSPEC_SETF_EXP:
5893 case UNSPEC_ADDP4:
5894 case UNSPEC_FR_SQRT_RECIP_APPROX:
5895 case UNSPEC_FR_SQRT_RECIP_APPROX_RES:
5896 case UNSPEC_LDA:
5897 case UNSPEC_LDS:
5898 case UNSPEC_LDSA:
5899 case UNSPEC_CHKACLR:
5900 case UNSPEC_CHKS:
5901 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5902 break;
5903
5904 case UNSPEC_FR_RECIP_APPROX:
5905 case UNSPEC_SHRP:
5906 case UNSPEC_COPYSIGN:
5907 case UNSPEC_FR_RECIP_APPROX_RES:
5908 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5909 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5910 break;
5911
5912 case UNSPEC_CMPXCHG_ACQ:
5913 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5914 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
5915 break;
5916
5917 default:
5918 gcc_unreachable ();
5919 }
5920 break;
5921
5922 case UNSPEC_VOLATILE:
5923 switch (XINT (x, 1))
5924 {
5925 case UNSPECV_ALLOC:
5926 /* Alloc must always be the first instruction of a group.
5927 We force this by always returning true. */
5928 /* ??? We might get better scheduling if we explicitly check for
5929 input/local/output register dependencies, and modify the
5930 scheduler so that alloc is always reordered to the start of
5931 the current group. We could then eliminate all of the
5932 first_instruction code. */
5933 rws_access_regno (AR_PFS_REGNUM, flags, pred);
5934
5935 new_flags.is_write = 1;
5936 rws_access_regno (REG_AR_CFM, new_flags, pred);
5937 return 1;
5938
5939 case UNSPECV_SET_BSP:
5940 need_barrier = 1;
5941 break;
5942
5943 case UNSPECV_BLOCKAGE:
5944 case UNSPECV_INSN_GROUP_BARRIER:
5945 case UNSPECV_BREAK:
5946 case UNSPECV_PSAC_ALL:
5947 case UNSPECV_PSAC_NORMAL:
5948 return 0;
5949
5950 default:
5951 gcc_unreachable ();
5952 }
5953 break;
5954
5955 case RETURN:
5956 new_flags.is_write = 0;
5957 need_barrier = rws_access_regno (REG_RP, flags, pred);
5958 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
5959
5960 new_flags.is_write = 1;
5961 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5962 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5963 break;
5964
5965 default:
5966 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
5967 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5968 switch (format_ptr[i])
5969 {
5970 case '0': /* unused field */
5971 case 'i': /* integer */
5972 case 'n': /* note */
5973 case 'w': /* wide integer */
5974 case 's': /* pointer to string */
5975 case 'S': /* optional pointer to string */
5976 break;
5977
5978 case 'e':
5979 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
5980 need_barrier = 1;
5981 break;
5982
5983 case 'E':
5984 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
5985 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
5986 need_barrier = 1;
5987 break;
5988
5989 default:
5990 gcc_unreachable ();
5991 }
5992 break;
5993 }
5994 return need_barrier;
5995 }
5996
5997 /* Clear out the state for group_barrier_needed at the start of a
5998 sequence of insns. */
5999
6000 static void
6001 init_insn_group_barriers (void)
6002 {
6003 memset (rws_sum, 0, sizeof (rws_sum));
6004 first_instruction = 1;
6005 }
6006
6007 /* Given the current state, determine whether a group barrier (a stop bit) is
6008 necessary before INSN. Return nonzero if so. This modifies the state to
6009 include the effects of INSN as a side-effect. */
6010
6011 static int
6012 group_barrier_needed (rtx insn)
6013 {
6014 rtx pat;
6015 int need_barrier = 0;
6016 struct reg_flags flags;
6017
6018 memset (&flags, 0, sizeof (flags));
6019 switch (GET_CODE (insn))
6020 {
6021 case NOTE:
6022 break;
6023
6024 case BARRIER:
6025 /* A barrier doesn't imply an instruction group boundary. */
6026 break;
6027
6028 case CODE_LABEL:
6029 memset (rws_insn, 0, sizeof (rws_insn));
6030 return 1;
6031
6032 case CALL_INSN:
6033 flags.is_branch = 1;
6034 flags.is_sibcall = SIBLING_CALL_P (insn);
6035 memset (rws_insn, 0, sizeof (rws_insn));
6036
6037 /* Don't bundle a call following another call. */
6038 if ((pat = prev_active_insn (insn))
6039 && GET_CODE (pat) == CALL_INSN)
6040 {
6041 need_barrier = 1;
6042 break;
6043 }
6044
6045 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
6046 break;
6047
6048 case JUMP_INSN:
6049 if (!ia64_spec_check_p (insn))
6050 flags.is_branch = 1;
6051
6052 /* Don't bundle a jump following a call. */
6053 if ((pat = prev_active_insn (insn))
6054 && GET_CODE (pat) == CALL_INSN)
6055 {
6056 need_barrier = 1;
6057 break;
6058 }
6059 /* FALLTHRU */
6060
6061 case INSN:
6062 if (GET_CODE (PATTERN (insn)) == USE
6063 || GET_CODE (PATTERN (insn)) == CLOBBER)
6064 /* Don't care about USE and CLOBBER "insns"---those are used to
6065 indicate to the optimizer that it shouldn't get rid of
6066 certain operations. */
6067 break;
6068
6069 pat = PATTERN (insn);
6070
6071 /* Ug. Hack hacks hacked elsewhere. */
6072 switch (recog_memoized (insn))
6073 {
6074 /* We play dependency tricks with the epilogue in order
6075 to get proper schedules. Undo this for dv analysis. */
6076 case CODE_FOR_epilogue_deallocate_stack:
6077 case CODE_FOR_prologue_allocate_stack:
6078 pat = XVECEXP (pat, 0, 0);
6079 break;
6080
6081 /* The pattern we use for br.cloop confuses the code above.
6082 The second element of the vector is representative. */
6083 case CODE_FOR_doloop_end_internal:
6084 pat = XVECEXP (pat, 0, 1);
6085 break;
6086
6087 /* Doesn't generate code. */
6088 case CODE_FOR_pred_rel_mutex:
6089 case CODE_FOR_prologue_use:
6090 return 0;
6091
6092 default:
6093 break;
6094 }
6095
6096 memset (rws_insn, 0, sizeof (rws_insn));
6097 need_barrier = rtx_needs_barrier (pat, flags, 0);
6098
6099 /* Check to see if the previous instruction was a volatile
6100 asm. */
6101 if (! need_barrier)
6102 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
6103 break;
6104
6105 default:
6106 gcc_unreachable ();
6107 }
6108
6109 if (first_instruction && INSN_P (insn)
6110 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6111 && GET_CODE (PATTERN (insn)) != USE
6112 && GET_CODE (PATTERN (insn)) != CLOBBER)
6113 {
6114 need_barrier = 0;
6115 first_instruction = 0;
6116 }
6117
6118 return need_barrier;
6119 }
6120
6121 /* Like group_barrier_needed, but do not clobber the current state. */
6122
6123 static int
6124 safe_group_barrier_needed (rtx insn)
6125 {
6126 int saved_first_instruction;
6127 int t;
6128
6129 saved_first_instruction = first_instruction;
6130 in_safe_group_barrier = 1;
6131
6132 t = group_barrier_needed (insn);
6133
6134 first_instruction = saved_first_instruction;
6135 in_safe_group_barrier = 0;
6136
6137 return t;
6138 }
6139
6140 /* Scan the current function and insert stop bits as necessary to
6141 eliminate dependencies. This function assumes that a final
6142 instruction scheduling pass has been run which has already
6143 inserted most of the necessary stop bits. This function only
6144 inserts new ones at basic block boundaries, since these are
6145 invisible to the scheduler. */
6146
6147 static void
6148 emit_insn_group_barriers (FILE *dump)
6149 {
6150 rtx insn;
6151 rtx last_label = 0;
6152 int insns_since_last_label = 0;
6153
6154 init_insn_group_barriers ();
6155
6156 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6157 {
6158 if (GET_CODE (insn) == CODE_LABEL)
6159 {
6160 if (insns_since_last_label)
6161 last_label = insn;
6162 insns_since_last_label = 0;
6163 }
6164 else if (GET_CODE (insn) == NOTE
6165 && NOTE_KIND (insn) == NOTE_INSN_BASIC_BLOCK)
6166 {
6167 if (insns_since_last_label)
6168 last_label = insn;
6169 insns_since_last_label = 0;
6170 }
6171 else if (GET_CODE (insn) == INSN
6172 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
6173 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
6174 {
6175 init_insn_group_barriers ();
6176 last_label = 0;
6177 }
6178 else if (INSN_P (insn))
6179 {
6180 insns_since_last_label = 1;
6181
6182 if (group_barrier_needed (insn))
6183 {
6184 if (last_label)
6185 {
6186 if (dump)
6187 fprintf (dump, "Emitting stop before label %d\n",
6188 INSN_UID (last_label));
6189 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
6190 insn = last_label;
6191
6192 init_insn_group_barriers ();
6193 last_label = 0;
6194 }
6195 }
6196 }
6197 }
6198 }
6199
6200 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
6201 This function has to emit all necessary group barriers. */
6202
6203 static void
6204 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
6205 {
6206 rtx insn;
6207
6208 init_insn_group_barriers ();
6209
6210 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6211 {
6212 if (GET_CODE (insn) == BARRIER)
6213 {
6214 rtx last = prev_active_insn (insn);
6215
6216 if (! last)
6217 continue;
6218 if (GET_CODE (last) == JUMP_INSN
6219 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
6220 last = prev_active_insn (last);
6221 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
6222 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
6223
6224 init_insn_group_barriers ();
6225 }
6226 else if (INSN_P (insn))
6227 {
6228 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
6229 init_insn_group_barriers ();
6230 else if (group_barrier_needed (insn))
6231 {
6232 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
6233 init_insn_group_barriers ();
6234 group_barrier_needed (insn);
6235 }
6236 }
6237 }
6238 }
6239
6240 \f
6241
6242 /* Instruction scheduling support. */
6243
6244 #define NR_BUNDLES 10
6245
6246 /* A list of names of all available bundles. */
6247
6248 static const char *bundle_name [NR_BUNDLES] =
6249 {
6250 ".mii",
6251 ".mmi",
6252 ".mfi",
6253 ".mmf",
6254 #if NR_BUNDLES == 10
6255 ".bbb",
6256 ".mbb",
6257 #endif
6258 ".mib",
6259 ".mmb",
6260 ".mfb",
6261 ".mlx"
6262 };
6263
6264 /* Nonzero if we should insert stop bits into the schedule. */
6265
6266 int ia64_final_schedule = 0;
6267
6268 /* Codes of the corresponding queried units: */
6269
6270 static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
6271 static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
6272
6273 static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
6274 static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
6275
6276 static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
6277
6278 /* The following variable value is an insn group barrier. */
6279
6280 static rtx dfa_stop_insn;
6281
6282 /* The following variable value is the last issued insn. */
6283
6284 static rtx last_scheduled_insn;
6285
6286 /* The following variable value is size of the DFA state. */
6287
6288 static size_t dfa_state_size;
6289
6290 /* The following variable value is pointer to a DFA state used as
6291 temporary variable. */
6292
6293 static state_t temp_dfa_state = NULL;
6294
6295 /* The following variable value is DFA state after issuing the last
6296 insn. */
6297
6298 static state_t prev_cycle_state = NULL;
6299
6300 /* The following array element values are TRUE if the corresponding
6301 insn requires to add stop bits before it. */
6302
6303 static char *stops_p = NULL;
6304
6305 /* The following array element values are ZERO for non-speculative
6306 instructions and hold corresponding speculation check number for
6307 speculative instructions. */
6308 static int *spec_check_no = NULL;
6309
6310 /* Size of spec_check_no array. */
6311 static int max_uid = 0;
6312
6313 /* The following variable is used to set up the mentioned above array. */
6314
6315 static int stop_before_p = 0;
6316
6317 /* The following variable value is length of the arrays `clocks' and
6318 `add_cycles'. */
6319
6320 static int clocks_length;
6321
6322 /* The following array element values are cycles on which the
6323 corresponding insn will be issued. The array is used only for
6324 Itanium1. */
6325
6326 static int *clocks;
6327
6328 /* The following array element values are numbers of cycles should be
6329 added to improve insn scheduling for MM_insns for Itanium1. */
6330
6331 static int *add_cycles;
6332
6333 /* The following variable value is number of data speculations in progress. */
6334 static int pending_data_specs = 0;
6335
6336 static rtx ia64_single_set (rtx);
6337 static void ia64_emit_insn_before (rtx, rtx);
6338
6339 /* Map a bundle number to its pseudo-op. */
6340
6341 const char *
6342 get_bundle_name (int b)
6343 {
6344 return bundle_name[b];
6345 }
6346
6347
6348 /* Return the maximum number of instructions a cpu can issue. */
6349
6350 static int
6351 ia64_issue_rate (void)
6352 {
6353 return 6;
6354 }
6355
6356 /* Helper function - like single_set, but look inside COND_EXEC. */
6357
6358 static rtx
6359 ia64_single_set (rtx insn)
6360 {
6361 rtx x = PATTERN (insn), ret;
6362 if (GET_CODE (x) == COND_EXEC)
6363 x = COND_EXEC_CODE (x);
6364 if (GET_CODE (x) == SET)
6365 return x;
6366
6367 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
6368 Although they are not classical single set, the second set is there just
6369 to protect it from moving past FP-relative stack accesses. */
6370 switch (recog_memoized (insn))
6371 {
6372 case CODE_FOR_prologue_allocate_stack:
6373 case CODE_FOR_epilogue_deallocate_stack:
6374 ret = XVECEXP (x, 0, 0);
6375 break;
6376
6377 default:
6378 ret = single_set_2 (insn, x);
6379 break;
6380 }
6381
6382 return ret;
6383 }
6384
6385 /* Adjust the cost of a scheduling dependency. Return the new cost of
6386 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
6387
6388 static int
6389 ia64_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
6390 {
6391 enum attr_itanium_class dep_class;
6392 enum attr_itanium_class insn_class;
6393
6394 if (REG_NOTE_KIND (link) != REG_DEP_OUTPUT)
6395 return cost;
6396
6397 insn_class = ia64_safe_itanium_class (insn);
6398 dep_class = ia64_safe_itanium_class (dep_insn);
6399 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
6400 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
6401 return 0;
6402
6403 return cost;
6404 }
6405
6406 /* Like emit_insn_before, but skip cycle_display notes.
6407 ??? When cycle display notes are implemented, update this. */
6408
6409 static void
6410 ia64_emit_insn_before (rtx insn, rtx before)
6411 {
6412 emit_insn_before (insn, before);
6413 }
6414
6415 /* The following function marks insns who produce addresses for load
6416 and store insns. Such insns will be placed into M slots because it
6417 decrease latency time for Itanium1 (see function
6418 `ia64_produce_address_p' and the DFA descriptions). */
6419
6420 static void
6421 ia64_dependencies_evaluation_hook (rtx head, rtx tail)
6422 {
6423 rtx insn, next, next_tail;
6424
6425 /* Before reload, which_alternative is not set, which means that
6426 ia64_safe_itanium_class will produce wrong results for (at least)
6427 move instructions. */
6428 if (!reload_completed)
6429 return;
6430
6431 next_tail = NEXT_INSN (tail);
6432 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6433 if (INSN_P (insn))
6434 insn->call = 0;
6435 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6436 if (INSN_P (insn)
6437 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
6438 {
6439 sd_iterator_def sd_it;
6440 dep_t dep;
6441 bool has_mem_op_consumer_p = false;
6442
6443 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
6444 {
6445 enum attr_itanium_class c;
6446
6447 if (DEP_TYPE (dep) != REG_DEP_TRUE)
6448 continue;
6449
6450 next = DEP_CON (dep);
6451 c = ia64_safe_itanium_class (next);
6452 if ((c == ITANIUM_CLASS_ST
6453 || c == ITANIUM_CLASS_STF)
6454 && ia64_st_address_bypass_p (insn, next))
6455 {
6456 has_mem_op_consumer_p = true;
6457 break;
6458 }
6459 else if ((c == ITANIUM_CLASS_LD
6460 || c == ITANIUM_CLASS_FLD
6461 || c == ITANIUM_CLASS_FLDP)
6462 && ia64_ld_address_bypass_p (insn, next))
6463 {
6464 has_mem_op_consumer_p = true;
6465 break;
6466 }
6467 }
6468
6469 insn->call = has_mem_op_consumer_p;
6470 }
6471 }
6472
6473 /* We're beginning a new block. Initialize data structures as necessary. */
6474
6475 static void
6476 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
6477 int sched_verbose ATTRIBUTE_UNUSED,
6478 int max_ready ATTRIBUTE_UNUSED)
6479 {
6480 #ifdef ENABLE_CHECKING
6481 rtx insn;
6482
6483 if (reload_completed)
6484 for (insn = NEXT_INSN (current_sched_info->prev_head);
6485 insn != current_sched_info->next_tail;
6486 insn = NEXT_INSN (insn))
6487 gcc_assert (!SCHED_GROUP_P (insn));
6488 #endif
6489 last_scheduled_insn = NULL_RTX;
6490 init_insn_group_barriers ();
6491 }
6492
6493 /* We're beginning a scheduling pass. Check assertion. */
6494
6495 static void
6496 ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
6497 int sched_verbose ATTRIBUTE_UNUSED,
6498 int max_ready ATTRIBUTE_UNUSED)
6499 {
6500 gcc_assert (!pending_data_specs);
6501 }
6502
6503 /* Scheduling pass is now finished. Free/reset static variable. */
6504 static void
6505 ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
6506 int sched_verbose ATTRIBUTE_UNUSED)
6507 {
6508 free (spec_check_no);
6509 spec_check_no = 0;
6510 max_uid = 0;
6511 }
6512
6513 /* We are about to being issuing insns for this clock cycle.
6514 Override the default sort algorithm to better slot instructions. */
6515
6516 static int
6517 ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
6518 int *pn_ready, int clock_var ATTRIBUTE_UNUSED,
6519 int reorder_type)
6520 {
6521 int n_asms;
6522 int n_ready = *pn_ready;
6523 rtx *e_ready = ready + n_ready;
6524 rtx *insnp;
6525
6526 if (sched_verbose)
6527 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
6528
6529 if (reorder_type == 0)
6530 {
6531 /* First, move all USEs, CLOBBERs and other crud out of the way. */
6532 n_asms = 0;
6533 for (insnp = ready; insnp < e_ready; insnp++)
6534 if (insnp < e_ready)
6535 {
6536 rtx insn = *insnp;
6537 enum attr_type t = ia64_safe_type (insn);
6538 if (t == TYPE_UNKNOWN)
6539 {
6540 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6541 || asm_noperands (PATTERN (insn)) >= 0)
6542 {
6543 rtx lowest = ready[n_asms];
6544 ready[n_asms] = insn;
6545 *insnp = lowest;
6546 n_asms++;
6547 }
6548 else
6549 {
6550 rtx highest = ready[n_ready - 1];
6551 ready[n_ready - 1] = insn;
6552 *insnp = highest;
6553 return 1;
6554 }
6555 }
6556 }
6557
6558 if (n_asms < n_ready)
6559 {
6560 /* Some normal insns to process. Skip the asms. */
6561 ready += n_asms;
6562 n_ready -= n_asms;
6563 }
6564 else if (n_ready > 0)
6565 return 1;
6566 }
6567
6568 if (ia64_final_schedule)
6569 {
6570 int deleted = 0;
6571 int nr_need_stop = 0;
6572
6573 for (insnp = ready; insnp < e_ready; insnp++)
6574 if (safe_group_barrier_needed (*insnp))
6575 nr_need_stop++;
6576
6577 if (reorder_type == 1 && n_ready == nr_need_stop)
6578 return 0;
6579 if (reorder_type == 0)
6580 return 1;
6581 insnp = e_ready;
6582 /* Move down everything that needs a stop bit, preserving
6583 relative order. */
6584 while (insnp-- > ready + deleted)
6585 while (insnp >= ready + deleted)
6586 {
6587 rtx insn = *insnp;
6588 if (! safe_group_barrier_needed (insn))
6589 break;
6590 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6591 *ready = insn;
6592 deleted++;
6593 }
6594 n_ready -= deleted;
6595 ready += deleted;
6596 }
6597
6598 return 1;
6599 }
6600
6601 /* We are about to being issuing insns for this clock cycle. Override
6602 the default sort algorithm to better slot instructions. */
6603
6604 static int
6605 ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
6606 int clock_var)
6607 {
6608 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
6609 pn_ready, clock_var, 0);
6610 }
6611
6612 /* Like ia64_sched_reorder, but called after issuing each insn.
6613 Override the default sort algorithm to better slot instructions. */
6614
6615 static int
6616 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
6617 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
6618 int *pn_ready, int clock_var)
6619 {
6620 if (ia64_tune == PROCESSOR_ITANIUM && reload_completed && last_scheduled_insn)
6621 clocks [INSN_UID (last_scheduled_insn)] = clock_var;
6622 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
6623 clock_var, 1);
6624 }
6625
6626 /* We are about to issue INSN. Return the number of insns left on the
6627 ready queue that can be issued this cycle. */
6628
6629 static int
6630 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
6631 int sched_verbose ATTRIBUTE_UNUSED,
6632 rtx insn ATTRIBUTE_UNUSED,
6633 int can_issue_more ATTRIBUTE_UNUSED)
6634 {
6635 if (current_sched_info->flags & DO_SPECULATION)
6636 /* Modulo scheduling does not extend h_i_d when emitting
6637 new instructions. Deal with it. */
6638 {
6639 if (DONE_SPEC (insn) & BEGIN_DATA)
6640 pending_data_specs++;
6641 if (CHECK_SPEC (insn) & BEGIN_DATA)
6642 pending_data_specs--;
6643 }
6644
6645 last_scheduled_insn = insn;
6646 memcpy (prev_cycle_state, curr_state, dfa_state_size);
6647 if (reload_completed)
6648 {
6649 int needed = group_barrier_needed (insn);
6650
6651 gcc_assert (!needed);
6652 if (GET_CODE (insn) == CALL_INSN)
6653 init_insn_group_barriers ();
6654 stops_p [INSN_UID (insn)] = stop_before_p;
6655 stop_before_p = 0;
6656 }
6657 return 1;
6658 }
6659
6660 /* We are choosing insn from the ready queue. Return nonzero if INSN
6661 can be chosen. */
6662
6663 static int
6664 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
6665 {
6666 gcc_assert (insn && INSN_P (insn));
6667 return ((!reload_completed
6668 || !safe_group_barrier_needed (insn))
6669 && ia64_first_cycle_multipass_dfa_lookahead_guard_spec (insn));
6670 }
6671
6672 /* We are choosing insn from the ready queue. Return nonzero if INSN
6673 can be chosen. */
6674
6675 static bool
6676 ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx insn)
6677 {
6678 gcc_assert (insn && INSN_P (insn));
6679 /* Size of ALAT is 32. As far as we perform conservative data speculation,
6680 we keep ALAT half-empty. */
6681 return (pending_data_specs < 16
6682 || !(TODO_SPEC (insn) & BEGIN_DATA));
6683 }
6684
6685 /* The following variable value is pseudo-insn used by the DFA insn
6686 scheduler to change the DFA state when the simulated clock is
6687 increased. */
6688
6689 static rtx dfa_pre_cycle_insn;
6690
6691 /* We are about to being issuing INSN. Return nonzero if we cannot
6692 issue it on given cycle CLOCK and return zero if we should not sort
6693 the ready queue on the next clock start. */
6694
6695 static int
6696 ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
6697 int clock, int *sort_p)
6698 {
6699 int setup_clocks_p = FALSE;
6700
6701 gcc_assert (insn && INSN_P (insn));
6702 if ((reload_completed && safe_group_barrier_needed (insn))
6703 || (last_scheduled_insn
6704 && (GET_CODE (last_scheduled_insn) == CALL_INSN
6705 || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6706 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
6707 {
6708 init_insn_group_barriers ();
6709 if (verbose && dump)
6710 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
6711 last_clock == clock ? " + cycle advance" : "");
6712 stop_before_p = 1;
6713 if (last_clock == clock)
6714 {
6715 state_transition (curr_state, dfa_stop_insn);
6716 if (TARGET_EARLY_STOP_BITS)
6717 *sort_p = (last_scheduled_insn == NULL_RTX
6718 || GET_CODE (last_scheduled_insn) != CALL_INSN);
6719 else
6720 *sort_p = 0;
6721 return 1;
6722 }
6723 else if (reload_completed)
6724 setup_clocks_p = TRUE;
6725 if (GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6726 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)
6727 state_reset (curr_state);
6728 else
6729 {
6730 memcpy (curr_state, prev_cycle_state, dfa_state_size);
6731 state_transition (curr_state, dfa_stop_insn);
6732 state_transition (curr_state, dfa_pre_cycle_insn);
6733 state_transition (curr_state, NULL);
6734 }
6735 }
6736 else if (reload_completed)
6737 setup_clocks_p = TRUE;
6738 if (setup_clocks_p && ia64_tune == PROCESSOR_ITANIUM
6739 && GET_CODE (PATTERN (insn)) != ASM_INPUT
6740 && asm_noperands (PATTERN (insn)) < 0)
6741 {
6742 enum attr_itanium_class c = ia64_safe_itanium_class (insn);
6743
6744 if (c != ITANIUM_CLASS_MMMUL && c != ITANIUM_CLASS_MMSHF)
6745 {
6746 sd_iterator_def sd_it;
6747 dep_t dep;
6748 int d = -1;
6749
6750 FOR_EACH_DEP (insn, SD_LIST_BACK, sd_it, dep)
6751 if (DEP_TYPE (dep) == REG_DEP_TRUE)
6752 {
6753 enum attr_itanium_class dep_class;
6754 rtx dep_insn = DEP_PRO (dep);
6755
6756 dep_class = ia64_safe_itanium_class (dep_insn);
6757 if ((dep_class == ITANIUM_CLASS_MMMUL
6758 || dep_class == ITANIUM_CLASS_MMSHF)
6759 && last_clock - clocks [INSN_UID (dep_insn)] < 4
6760 && (d < 0
6761 || last_clock - clocks [INSN_UID (dep_insn)] < d))
6762 d = last_clock - clocks [INSN_UID (dep_insn)];
6763 }
6764 if (d >= 0)
6765 add_cycles [INSN_UID (insn)] = 3 - d;
6766 }
6767 }
6768 return 0;
6769 }
6770
6771 /* Implement targetm.sched.h_i_d_extended hook.
6772 Extend internal data structures. */
6773 static void
6774 ia64_h_i_d_extended (void)
6775 {
6776 if (current_sched_info->flags & DO_SPECULATION)
6777 {
6778 int new_max_uid = get_max_uid () + 1;
6779
6780 spec_check_no = (int *) xrecalloc (spec_check_no, new_max_uid,
6781 max_uid, sizeof (*spec_check_no));
6782 max_uid = new_max_uid;
6783 }
6784
6785 if (stops_p != NULL)
6786 {
6787 int new_clocks_length = get_max_uid () + 1;
6788
6789 stops_p = (char *) xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
6790
6791 if (ia64_tune == PROCESSOR_ITANIUM)
6792 {
6793 clocks = (int *) xrecalloc (clocks, new_clocks_length, clocks_length,
6794 sizeof (int));
6795 add_cycles = (int *) xrecalloc (add_cycles, new_clocks_length,
6796 clocks_length, sizeof (int));
6797 }
6798
6799 clocks_length = new_clocks_length;
6800 }
6801 }
6802
6803 /* Constants that help mapping 'enum machine_mode' to int. */
6804 enum SPEC_MODES
6805 {
6806 SPEC_MODE_INVALID = -1,
6807 SPEC_MODE_FIRST = 0,
6808 SPEC_MODE_FOR_EXTEND_FIRST = 1,
6809 SPEC_MODE_FOR_EXTEND_LAST = 3,
6810 SPEC_MODE_LAST = 8
6811 };
6812
6813 /* Return index of the MODE. */
6814 static int
6815 ia64_mode_to_int (enum machine_mode mode)
6816 {
6817 switch (mode)
6818 {
6819 case BImode: return 0; /* SPEC_MODE_FIRST */
6820 case QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST */
6821 case HImode: return 2;
6822 case SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST */
6823 case DImode: return 4;
6824 case SFmode: return 5;
6825 case DFmode: return 6;
6826 case XFmode: return 7;
6827 case TImode:
6828 /* ??? This mode needs testing. Bypasses for ldfp8 instruction are not
6829 mentioned in itanium[12].md. Predicate fp_register_operand also
6830 needs to be defined. Bottom line: better disable for now. */
6831 return SPEC_MODE_INVALID;
6832 default: return SPEC_MODE_INVALID;
6833 }
6834 }
6835
6836 /* Provide information about speculation capabilities. */
6837 static void
6838 ia64_set_sched_flags (spec_info_t spec_info)
6839 {
6840 unsigned int *flags = &(current_sched_info->flags);
6841
6842 if (*flags & SCHED_RGN
6843 || *flags & SCHED_EBB)
6844 {
6845 int mask = 0;
6846
6847 if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0)
6848 || (mflag_sched_ar_data_spec && reload_completed))
6849 {
6850 mask |= BEGIN_DATA;
6851
6852 if ((mflag_sched_br_in_data_spec && !reload_completed)
6853 || (mflag_sched_ar_in_data_spec && reload_completed))
6854 mask |= BE_IN_DATA;
6855 }
6856
6857 if (mflag_sched_control_spec)
6858 {
6859 mask |= BEGIN_CONTROL;
6860
6861 if (mflag_sched_in_control_spec)
6862 mask |= BE_IN_CONTROL;
6863 }
6864
6865 if (mask)
6866 {
6867 *flags |= USE_DEPS_LIST | DO_SPECULATION;
6868
6869 if (mask & BE_IN_SPEC)
6870 *flags |= NEW_BBS;
6871
6872 spec_info->mask = mask;
6873 spec_info->flags = 0;
6874
6875 if ((mask & DATA_SPEC) && mflag_sched_prefer_non_data_spec_insns)
6876 spec_info->flags |= PREFER_NON_DATA_SPEC;
6877
6878 if ((mask & CONTROL_SPEC)
6879 && mflag_sched_prefer_non_control_spec_insns)
6880 spec_info->flags |= PREFER_NON_CONTROL_SPEC;
6881
6882 if (mflag_sched_spec_verbose)
6883 {
6884 if (sched_verbose >= 1)
6885 spec_info->dump = sched_dump;
6886 else
6887 spec_info->dump = stderr;
6888 }
6889 else
6890 spec_info->dump = 0;
6891
6892 if (mflag_sched_count_spec_in_critical_path)
6893 spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
6894 }
6895 }
6896 }
6897
6898 /* Implement targetm.sched.speculate_insn hook.
6899 Check if the INSN can be TS speculative.
6900 If 'no' - return -1.
6901 If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
6902 If current pattern of the INSN already provides TS speculation, return 0. */
6903 static int
6904 ia64_speculate_insn (rtx insn, ds_t ts, rtx *new_pat)
6905 {
6906 rtx pat, reg, mem, mem_reg;
6907 int mode_no, gen_p = 1;
6908 bool extend_p;
6909
6910 gcc_assert (!(ts & ~BEGIN_SPEC) && ts);
6911
6912 pat = PATTERN (insn);
6913
6914 if (GET_CODE (pat) == COND_EXEC)
6915 pat = COND_EXEC_CODE (pat);
6916
6917 /* This should be a SET ... */
6918 if (GET_CODE (pat) != SET)
6919 return -1;
6920
6921 reg = SET_DEST (pat);
6922 /* ... to the general/fp register ... */
6923 if (!REG_P (reg) || !(GR_REGNO_P (REGNO (reg)) || FP_REGNO_P (REGNO (reg))))
6924 return -1;
6925
6926 /* ... from the mem ... */
6927 mem = SET_SRC (pat);
6928
6929 /* ... that can, possibly, be a zero_extend ... */
6930 if (GET_CODE (mem) == ZERO_EXTEND)
6931 {
6932 mem = XEXP (mem, 0);
6933 extend_p = true;
6934 }
6935 else
6936 extend_p = false;
6937
6938 /* ... or a speculative load. */
6939 if (GET_CODE (mem) == UNSPEC)
6940 {
6941 int code;
6942
6943 code = XINT (mem, 1);
6944 if (code != UNSPEC_LDA && code != UNSPEC_LDS && code != UNSPEC_LDSA)
6945 return -1;
6946
6947 if ((code == UNSPEC_LDA && !(ts & BEGIN_CONTROL))
6948 || (code == UNSPEC_LDS && !(ts & BEGIN_DATA))
6949 || code == UNSPEC_LDSA)
6950 gen_p = 0;
6951
6952 mem = XVECEXP (mem, 0, 0);
6953 gcc_assert (MEM_P (mem));
6954 }
6955
6956 /* Source should be a mem ... */
6957 if (!MEM_P (mem))
6958 return -1;
6959
6960 /* ... addressed by a register. */
6961 mem_reg = XEXP (mem, 0);
6962 if (!REG_P (mem_reg))
6963 return -1;
6964
6965 /* We should use MEM's mode since REG's mode in presence of ZERO_EXTEND
6966 will always be DImode. */
6967 mode_no = ia64_mode_to_int (GET_MODE (mem));
6968
6969 if (mode_no == SPEC_MODE_INVALID
6970 || (extend_p
6971 && !(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
6972 && mode_no <= SPEC_MODE_FOR_EXTEND_LAST)))
6973 return -1;
6974
6975 extract_insn_cached (insn);
6976 gcc_assert (reg == recog_data.operand[0] && mem == recog_data.operand[1]);
6977
6978 *new_pat = ia64_gen_spec_insn (insn, ts, mode_no, gen_p != 0, extend_p);
6979
6980 return gen_p;
6981 }
6982
6983 enum
6984 {
6985 /* Offset to reach ZERO_EXTEND patterns. */
6986 SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1,
6987 /* Number of patterns for each speculation mode. */
6988 SPEC_N = (SPEC_MODE_LAST
6989 + SPEC_MODE_FOR_EXTEND_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 2)
6990 };
6991
6992 enum SPEC_GEN_LD_MAP
6993 {
6994 /* Offset to ld.a patterns. */
6995 SPEC_GEN_A = 0 * SPEC_N,
6996 /* Offset to ld.s patterns. */
6997 SPEC_GEN_S = 1 * SPEC_N,
6998 /* Offset to ld.sa patterns. */
6999 SPEC_GEN_SA = 2 * SPEC_N,
7000 /* Offset to ld.sa patterns. For this patterns corresponding ld.c will
7001 mutate to chk.s. */
7002 SPEC_GEN_SA_FOR_S = 3 * SPEC_N
7003 };
7004
7005 /* These offsets are used to get (4 * SPEC_N). */
7006 enum SPEC_GEN_CHECK_OFFSET
7007 {
7008 SPEC_GEN_CHKA_FOR_A_OFFSET = 4 * SPEC_N - SPEC_GEN_A,
7009 SPEC_GEN_CHKA_FOR_SA_OFFSET = 4 * SPEC_N - SPEC_GEN_SA
7010 };
7011
7012 /* If GEN_P is true, calculate the index of needed speculation check and return
7013 speculative pattern for INSN with speculative mode TS, machine mode
7014 MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
7015 If GEN_P is false, just calculate the index of needed speculation check. */
7016 static rtx
7017 ia64_gen_spec_insn (rtx insn, ds_t ts, int mode_no, bool gen_p, bool extend_p)
7018 {
7019 rtx pat, new_pat;
7020 int load_no;
7021 int shift = 0;
7022
7023 static rtx (* const gen_load[]) (rtx, rtx) = {
7024 gen_movbi_advanced,
7025 gen_movqi_advanced,
7026 gen_movhi_advanced,
7027 gen_movsi_advanced,
7028 gen_movdi_advanced,
7029 gen_movsf_advanced,
7030 gen_movdf_advanced,
7031 gen_movxf_advanced,
7032 gen_movti_advanced,
7033 gen_zero_extendqidi2_advanced,
7034 gen_zero_extendhidi2_advanced,
7035 gen_zero_extendsidi2_advanced,
7036
7037 gen_movbi_speculative,
7038 gen_movqi_speculative,
7039 gen_movhi_speculative,
7040 gen_movsi_speculative,
7041 gen_movdi_speculative,
7042 gen_movsf_speculative,
7043 gen_movdf_speculative,
7044 gen_movxf_speculative,
7045 gen_movti_speculative,
7046 gen_zero_extendqidi2_speculative,
7047 gen_zero_extendhidi2_speculative,
7048 gen_zero_extendsidi2_speculative,
7049
7050 gen_movbi_speculative_advanced,
7051 gen_movqi_speculative_advanced,
7052 gen_movhi_speculative_advanced,
7053 gen_movsi_speculative_advanced,
7054 gen_movdi_speculative_advanced,
7055 gen_movsf_speculative_advanced,
7056 gen_movdf_speculative_advanced,
7057 gen_movxf_speculative_advanced,
7058 gen_movti_speculative_advanced,
7059 gen_zero_extendqidi2_speculative_advanced,
7060 gen_zero_extendhidi2_speculative_advanced,
7061 gen_zero_extendsidi2_speculative_advanced,
7062
7063 gen_movbi_speculative_advanced,
7064 gen_movqi_speculative_advanced,
7065 gen_movhi_speculative_advanced,
7066 gen_movsi_speculative_advanced,
7067 gen_movdi_speculative_advanced,
7068 gen_movsf_speculative_advanced,
7069 gen_movdf_speculative_advanced,
7070 gen_movxf_speculative_advanced,
7071 gen_movti_speculative_advanced,
7072 gen_zero_extendqidi2_speculative_advanced,
7073 gen_zero_extendhidi2_speculative_advanced,
7074 gen_zero_extendsidi2_speculative_advanced
7075 };
7076
7077 load_no = extend_p ? mode_no + SPEC_GEN_EXTEND_OFFSET : mode_no;
7078
7079 if (ts & BEGIN_DATA)
7080 {
7081 /* We don't need recovery because even if this is ld.sa
7082 ALAT entry will be allocated only if NAT bit is set to zero.
7083 So it is enough to use ld.c here. */
7084
7085 if (ts & BEGIN_CONTROL)
7086 {
7087 load_no += SPEC_GEN_SA;
7088
7089 if (!mflag_sched_ldc)
7090 shift = SPEC_GEN_CHKA_FOR_SA_OFFSET;
7091 }
7092 else
7093 {
7094 load_no += SPEC_GEN_A;
7095
7096 if (!mflag_sched_ldc)
7097 shift = SPEC_GEN_CHKA_FOR_A_OFFSET;
7098 }
7099 }
7100 else if (ts & BEGIN_CONTROL)
7101 {
7102 /* ld.sa can be used instead of ld.s to avoid basic block splitting. */
7103 if (!mflag_control_ldc)
7104 load_no += SPEC_GEN_S;
7105 else
7106 {
7107 gcc_assert (mflag_sched_ldc);
7108 load_no += SPEC_GEN_SA_FOR_S;
7109 }
7110 }
7111 else
7112 gcc_unreachable ();
7113
7114 /* Set the desired check index. We add '1', because zero element in this
7115 array means, that instruction with such uid is non-speculative. */
7116 spec_check_no[INSN_UID (insn)] = load_no + shift + 1;
7117
7118 if (!gen_p)
7119 return 0;
7120
7121 new_pat = gen_load[load_no] (copy_rtx (recog_data.operand[0]),
7122 copy_rtx (recog_data.operand[1]));
7123
7124 pat = PATTERN (insn);
7125 if (GET_CODE (pat) == COND_EXEC)
7126 new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx
7127 (COND_EXEC_TEST (pat)), new_pat);
7128
7129 return new_pat;
7130 }
7131
7132 /* Offset to branchy checks. */
7133 enum { SPEC_GEN_CHECK_MUTATION_OFFSET = 5 * SPEC_N };
7134
7135 /* Return nonzero, if INSN needs branchy recovery check. */
7136 static bool
7137 ia64_needs_block_p (const_rtx insn)
7138 {
7139 int check_no;
7140
7141 check_no = spec_check_no[INSN_UID(insn)] - 1;
7142 gcc_assert (0 <= check_no && check_no < SPEC_GEN_CHECK_MUTATION_OFFSET);
7143
7144 return ((SPEC_GEN_S <= check_no && check_no < SPEC_GEN_S + SPEC_N)
7145 || (4 * SPEC_N <= check_no && check_no < 4 * SPEC_N + SPEC_N));
7146 }
7147
7148 /* Generate (or regenerate, if (MUTATE_P)) recovery check for INSN.
7149 If (LABEL != 0 || MUTATE_P), generate branchy recovery check.
7150 Otherwise, generate a simple check. */
7151 static rtx
7152 ia64_gen_check (rtx insn, rtx label, bool mutate_p)
7153 {
7154 rtx op1, pat, check_pat;
7155
7156 static rtx (* const gen_check[]) (rtx, rtx) = {
7157 gen_movbi_clr,
7158 gen_movqi_clr,
7159 gen_movhi_clr,
7160 gen_movsi_clr,
7161 gen_movdi_clr,
7162 gen_movsf_clr,
7163 gen_movdf_clr,
7164 gen_movxf_clr,
7165 gen_movti_clr,
7166 gen_zero_extendqidi2_clr,
7167 gen_zero_extendhidi2_clr,
7168 gen_zero_extendsidi2_clr,
7169
7170 gen_speculation_check_bi,
7171 gen_speculation_check_qi,
7172 gen_speculation_check_hi,
7173 gen_speculation_check_si,
7174 gen_speculation_check_di,
7175 gen_speculation_check_sf,
7176 gen_speculation_check_df,
7177 gen_speculation_check_xf,
7178 gen_speculation_check_ti,
7179 gen_speculation_check_di,
7180 gen_speculation_check_di,
7181 gen_speculation_check_di,
7182
7183 gen_movbi_clr,
7184 gen_movqi_clr,
7185 gen_movhi_clr,
7186 gen_movsi_clr,
7187 gen_movdi_clr,
7188 gen_movsf_clr,
7189 gen_movdf_clr,
7190 gen_movxf_clr,
7191 gen_movti_clr,
7192 gen_zero_extendqidi2_clr,
7193 gen_zero_extendhidi2_clr,
7194 gen_zero_extendsidi2_clr,
7195
7196 gen_movbi_clr,
7197 gen_movqi_clr,
7198 gen_movhi_clr,
7199 gen_movsi_clr,
7200 gen_movdi_clr,
7201 gen_movsf_clr,
7202 gen_movdf_clr,
7203 gen_movxf_clr,
7204 gen_movti_clr,
7205 gen_zero_extendqidi2_clr,
7206 gen_zero_extendhidi2_clr,
7207 gen_zero_extendsidi2_clr,
7208
7209 gen_advanced_load_check_clr_bi,
7210 gen_advanced_load_check_clr_qi,
7211 gen_advanced_load_check_clr_hi,
7212 gen_advanced_load_check_clr_si,
7213 gen_advanced_load_check_clr_di,
7214 gen_advanced_load_check_clr_sf,
7215 gen_advanced_load_check_clr_df,
7216 gen_advanced_load_check_clr_xf,
7217 gen_advanced_load_check_clr_ti,
7218 gen_advanced_load_check_clr_di,
7219 gen_advanced_load_check_clr_di,
7220 gen_advanced_load_check_clr_di,
7221
7222 /* Following checks are generated during mutation. */
7223 gen_advanced_load_check_clr_bi,
7224 gen_advanced_load_check_clr_qi,
7225 gen_advanced_load_check_clr_hi,
7226 gen_advanced_load_check_clr_si,
7227 gen_advanced_load_check_clr_di,
7228 gen_advanced_load_check_clr_sf,
7229 gen_advanced_load_check_clr_df,
7230 gen_advanced_load_check_clr_xf,
7231 gen_advanced_load_check_clr_ti,
7232 gen_advanced_load_check_clr_di,
7233 gen_advanced_load_check_clr_di,
7234 gen_advanced_load_check_clr_di,
7235
7236 0,0,0,0,0,0,0,0,0,0,0,0,
7237
7238 gen_advanced_load_check_clr_bi,
7239 gen_advanced_load_check_clr_qi,
7240 gen_advanced_load_check_clr_hi,
7241 gen_advanced_load_check_clr_si,
7242 gen_advanced_load_check_clr_di,
7243 gen_advanced_load_check_clr_sf,
7244 gen_advanced_load_check_clr_df,
7245 gen_advanced_load_check_clr_xf,
7246 gen_advanced_load_check_clr_ti,
7247 gen_advanced_load_check_clr_di,
7248 gen_advanced_load_check_clr_di,
7249 gen_advanced_load_check_clr_di,
7250
7251 gen_speculation_check_bi,
7252 gen_speculation_check_qi,
7253 gen_speculation_check_hi,
7254 gen_speculation_check_si,
7255 gen_speculation_check_di,
7256 gen_speculation_check_sf,
7257 gen_speculation_check_df,
7258 gen_speculation_check_xf,
7259 gen_speculation_check_ti,
7260 gen_speculation_check_di,
7261 gen_speculation_check_di,
7262 gen_speculation_check_di
7263 };
7264
7265 extract_insn_cached (insn);
7266
7267 if (label)
7268 {
7269 gcc_assert (mutate_p || ia64_needs_block_p (insn));
7270 op1 = label;
7271 }
7272 else
7273 {
7274 gcc_assert (!mutate_p && !ia64_needs_block_p (insn));
7275 op1 = copy_rtx (recog_data.operand[1]);
7276 }
7277
7278 if (mutate_p)
7279 /* INSN is ld.c.
7280 Find the speculation check number by searching for original
7281 speculative load in the RESOLVED_DEPS list of INSN.
7282 As long as patterns are unique for each instruction, this can be
7283 accomplished by matching ORIG_PAT fields. */
7284 {
7285 sd_iterator_def sd_it;
7286 dep_t dep;
7287 int check_no = 0;
7288 rtx orig_pat = ORIG_PAT (insn);
7289
7290 FOR_EACH_DEP (insn, SD_LIST_RES_BACK, sd_it, dep)
7291 {
7292 rtx x = DEP_PRO (dep);
7293
7294 if (ORIG_PAT (x) == orig_pat)
7295 check_no = spec_check_no[INSN_UID (x)];
7296 }
7297 gcc_assert (check_no);
7298
7299 spec_check_no[INSN_UID (insn)] = (check_no
7300 + SPEC_GEN_CHECK_MUTATION_OFFSET);
7301 }
7302
7303 check_pat = (gen_check[spec_check_no[INSN_UID (insn)] - 1]
7304 (copy_rtx (recog_data.operand[0]), op1));
7305
7306 pat = PATTERN (insn);
7307 if (GET_CODE (pat) == COND_EXEC)
7308 check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
7309 check_pat);
7310
7311 return check_pat;
7312 }
7313
7314 /* Return nonzero, if X is branchy recovery check. */
7315 static int
7316 ia64_spec_check_p (rtx x)
7317 {
7318 x = PATTERN (x);
7319 if (GET_CODE (x) == COND_EXEC)
7320 x = COND_EXEC_CODE (x);
7321 if (GET_CODE (x) == SET)
7322 return ia64_spec_check_src_p (SET_SRC (x));
7323 return 0;
7324 }
7325
7326 /* Return nonzero, if SRC belongs to recovery check. */
7327 static int
7328 ia64_spec_check_src_p (rtx src)
7329 {
7330 if (GET_CODE (src) == IF_THEN_ELSE)
7331 {
7332 rtx t;
7333
7334 t = XEXP (src, 0);
7335 if (GET_CODE (t) == NE)
7336 {
7337 t = XEXP (t, 0);
7338
7339 if (GET_CODE (t) == UNSPEC)
7340 {
7341 int code;
7342
7343 code = XINT (t, 1);
7344
7345 if (code == UNSPEC_CHKACLR
7346 || code == UNSPEC_CHKS
7347 || code == UNSPEC_LDCCLR)
7348 {
7349 gcc_assert (code != 0);
7350 return code;
7351 }
7352 }
7353 }
7354 }
7355 return 0;
7356 }
7357 \f
7358
7359 /* The following page contains abstract data `bundle states' which are
7360 used for bundling insns (inserting nops and template generation). */
7361
7362 /* The following describes state of insn bundling. */
7363
7364 struct bundle_state
7365 {
7366 /* Unique bundle state number to identify them in the debugging
7367 output */
7368 int unique_num;
7369 rtx insn; /* corresponding insn, NULL for the 1st and the last state */
7370 /* number nops before and after the insn */
7371 short before_nops_num, after_nops_num;
7372 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
7373 insn */
7374 int cost; /* cost of the state in cycles */
7375 int accumulated_insns_num; /* number of all previous insns including
7376 nops. L is considered as 2 insns */
7377 int branch_deviation; /* deviation of previous branches from 3rd slots */
7378 struct bundle_state *next; /* next state with the same insn_num */
7379 struct bundle_state *originator; /* originator (previous insn state) */
7380 /* All bundle states are in the following chain. */
7381 struct bundle_state *allocated_states_chain;
7382 /* The DFA State after issuing the insn and the nops. */
7383 state_t dfa_state;
7384 };
7385
7386 /* The following is map insn number to the corresponding bundle state. */
7387
7388 static struct bundle_state **index_to_bundle_states;
7389
7390 /* The unique number of next bundle state. */
7391
7392 static int bundle_states_num;
7393
7394 /* All allocated bundle states are in the following chain. */
7395
7396 static struct bundle_state *allocated_bundle_states_chain;
7397
7398 /* All allocated but not used bundle states are in the following
7399 chain. */
7400
7401 static struct bundle_state *free_bundle_state_chain;
7402
7403
7404 /* The following function returns a free bundle state. */
7405
7406 static struct bundle_state *
7407 get_free_bundle_state (void)
7408 {
7409 struct bundle_state *result;
7410
7411 if (free_bundle_state_chain != NULL)
7412 {
7413 result = free_bundle_state_chain;
7414 free_bundle_state_chain = result->next;
7415 }
7416 else
7417 {
7418 result = XNEW (struct bundle_state);
7419 result->dfa_state = xmalloc (dfa_state_size);
7420 result->allocated_states_chain = allocated_bundle_states_chain;
7421 allocated_bundle_states_chain = result;
7422 }
7423 result->unique_num = bundle_states_num++;
7424 return result;
7425
7426 }
7427
7428 /* The following function frees given bundle state. */
7429
7430 static void
7431 free_bundle_state (struct bundle_state *state)
7432 {
7433 state->next = free_bundle_state_chain;
7434 free_bundle_state_chain = state;
7435 }
7436
7437 /* Start work with abstract data `bundle states'. */
7438
7439 static void
7440 initiate_bundle_states (void)
7441 {
7442 bundle_states_num = 0;
7443 free_bundle_state_chain = NULL;
7444 allocated_bundle_states_chain = NULL;
7445 }
7446
7447 /* Finish work with abstract data `bundle states'. */
7448
7449 static void
7450 finish_bundle_states (void)
7451 {
7452 struct bundle_state *curr_state, *next_state;
7453
7454 for (curr_state = allocated_bundle_states_chain;
7455 curr_state != NULL;
7456 curr_state = next_state)
7457 {
7458 next_state = curr_state->allocated_states_chain;
7459 free (curr_state->dfa_state);
7460 free (curr_state);
7461 }
7462 }
7463
7464 /* Hash table of the bundle states. The key is dfa_state and insn_num
7465 of the bundle states. */
7466
7467 static htab_t bundle_state_table;
7468
7469 /* The function returns hash of BUNDLE_STATE. */
7470
7471 static unsigned
7472 bundle_state_hash (const void *bundle_state)
7473 {
7474 const struct bundle_state *const state
7475 = (const struct bundle_state *) bundle_state;
7476 unsigned result, i;
7477
7478 for (result = i = 0; i < dfa_state_size; i++)
7479 result += (((unsigned char *) state->dfa_state) [i]
7480 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
7481 return result + state->insn_num;
7482 }
7483
7484 /* The function returns nonzero if the bundle state keys are equal. */
7485
7486 static int
7487 bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
7488 {
7489 const struct bundle_state *const state1
7490 = (const struct bundle_state *) bundle_state_1;
7491 const struct bundle_state *const state2
7492 = (const struct bundle_state *) bundle_state_2;
7493
7494 return (state1->insn_num == state2->insn_num
7495 && memcmp (state1->dfa_state, state2->dfa_state,
7496 dfa_state_size) == 0);
7497 }
7498
7499 /* The function inserts the BUNDLE_STATE into the hash table. The
7500 function returns nonzero if the bundle has been inserted into the
7501 table. The table contains the best bundle state with given key. */
7502
7503 static int
7504 insert_bundle_state (struct bundle_state *bundle_state)
7505 {
7506 void **entry_ptr;
7507
7508 entry_ptr = htab_find_slot (bundle_state_table, bundle_state, 1);
7509 if (*entry_ptr == NULL)
7510 {
7511 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
7512 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
7513 *entry_ptr = (void *) bundle_state;
7514 return TRUE;
7515 }
7516 else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
7517 || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
7518 && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
7519 > bundle_state->accumulated_insns_num
7520 || (((struct bundle_state *)
7521 *entry_ptr)->accumulated_insns_num
7522 == bundle_state->accumulated_insns_num
7523 && ((struct bundle_state *)
7524 *entry_ptr)->branch_deviation
7525 > bundle_state->branch_deviation))))
7526
7527 {
7528 struct bundle_state temp;
7529
7530 temp = *(struct bundle_state *) *entry_ptr;
7531 *(struct bundle_state *) *entry_ptr = *bundle_state;
7532 ((struct bundle_state *) *entry_ptr)->next = temp.next;
7533 *bundle_state = temp;
7534 }
7535 return FALSE;
7536 }
7537
7538 /* Start work with the hash table. */
7539
7540 static void
7541 initiate_bundle_state_table (void)
7542 {
7543 bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
7544 (htab_del) 0);
7545 }
7546
7547 /* Finish work with the hash table. */
7548
7549 static void
7550 finish_bundle_state_table (void)
7551 {
7552 htab_delete (bundle_state_table);
7553 }
7554
7555 \f
7556
7557 /* The following variable is a insn `nop' used to check bundle states
7558 with different number of inserted nops. */
7559
7560 static rtx ia64_nop;
7561
7562 /* The following function tries to issue NOPS_NUM nops for the current
7563 state without advancing processor cycle. If it failed, the
7564 function returns FALSE and frees the current state. */
7565
7566 static int
7567 try_issue_nops (struct bundle_state *curr_state, int nops_num)
7568 {
7569 int i;
7570
7571 for (i = 0; i < nops_num; i++)
7572 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
7573 {
7574 free_bundle_state (curr_state);
7575 return FALSE;
7576 }
7577 return TRUE;
7578 }
7579
7580 /* The following function tries to issue INSN for the current
7581 state without advancing processor cycle. If it failed, the
7582 function returns FALSE and frees the current state. */
7583
7584 static int
7585 try_issue_insn (struct bundle_state *curr_state, rtx insn)
7586 {
7587 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
7588 {
7589 free_bundle_state (curr_state);
7590 return FALSE;
7591 }
7592 return TRUE;
7593 }
7594
7595 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
7596 starting with ORIGINATOR without advancing processor cycle. If
7597 TRY_BUNDLE_END_P is TRUE, the function also/only (if
7598 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
7599 If it was successful, the function creates new bundle state and
7600 insert into the hash table and into `index_to_bundle_states'. */
7601
7602 static void
7603 issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
7604 rtx insn, int try_bundle_end_p, int only_bundle_end_p)
7605 {
7606 struct bundle_state *curr_state;
7607
7608 curr_state = get_free_bundle_state ();
7609 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
7610 curr_state->insn = insn;
7611 curr_state->insn_num = originator->insn_num + 1;
7612 curr_state->cost = originator->cost;
7613 curr_state->originator = originator;
7614 curr_state->before_nops_num = before_nops_num;
7615 curr_state->after_nops_num = 0;
7616 curr_state->accumulated_insns_num
7617 = originator->accumulated_insns_num + before_nops_num;
7618 curr_state->branch_deviation = originator->branch_deviation;
7619 gcc_assert (insn);
7620 if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
7621 {
7622 gcc_assert (GET_MODE (insn) != TImode);
7623 if (!try_issue_nops (curr_state, before_nops_num))
7624 return;
7625 if (!try_issue_insn (curr_state, insn))
7626 return;
7627 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
7628 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
7629 && curr_state->accumulated_insns_num % 3 != 0)
7630 {
7631 free_bundle_state (curr_state);
7632 return;
7633 }
7634 }
7635 else if (GET_MODE (insn) != TImode)
7636 {
7637 if (!try_issue_nops (curr_state, before_nops_num))
7638 return;
7639 if (!try_issue_insn (curr_state, insn))
7640 return;
7641 curr_state->accumulated_insns_num++;
7642 gcc_assert (GET_CODE (PATTERN (insn)) != ASM_INPUT
7643 && asm_noperands (PATTERN (insn)) < 0);
7644
7645 if (ia64_safe_type (insn) == TYPE_L)
7646 curr_state->accumulated_insns_num++;
7647 }
7648 else
7649 {
7650 /* If this is an insn that must be first in a group, then don't allow
7651 nops to be emitted before it. Currently, alloc is the only such
7652 supported instruction. */
7653 /* ??? The bundling automatons should handle this for us, but they do
7654 not yet have support for the first_insn attribute. */
7655 if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
7656 {
7657 free_bundle_state (curr_state);
7658 return;
7659 }
7660
7661 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
7662 state_transition (curr_state->dfa_state, NULL);
7663 curr_state->cost++;
7664 if (!try_issue_nops (curr_state, before_nops_num))
7665 return;
7666 if (!try_issue_insn (curr_state, insn))
7667 return;
7668 curr_state->accumulated_insns_num++;
7669 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
7670 || asm_noperands (PATTERN (insn)) >= 0)
7671 {
7672 /* Finish bundle containing asm insn. */
7673 curr_state->after_nops_num
7674 = 3 - curr_state->accumulated_insns_num % 3;
7675 curr_state->accumulated_insns_num
7676 += 3 - curr_state->accumulated_insns_num % 3;
7677 }
7678 else if (ia64_safe_type (insn) == TYPE_L)
7679 curr_state->accumulated_insns_num++;
7680 }
7681 if (ia64_safe_type (insn) == TYPE_B)
7682 curr_state->branch_deviation
7683 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
7684 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
7685 {
7686 if (!only_bundle_end_p && insert_bundle_state (curr_state))
7687 {
7688 state_t dfa_state;
7689 struct bundle_state *curr_state1;
7690 struct bundle_state *allocated_states_chain;
7691
7692 curr_state1 = get_free_bundle_state ();
7693 dfa_state = curr_state1->dfa_state;
7694 allocated_states_chain = curr_state1->allocated_states_chain;
7695 *curr_state1 = *curr_state;
7696 curr_state1->dfa_state = dfa_state;
7697 curr_state1->allocated_states_chain = allocated_states_chain;
7698 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
7699 dfa_state_size);
7700 curr_state = curr_state1;
7701 }
7702 if (!try_issue_nops (curr_state,
7703 3 - curr_state->accumulated_insns_num % 3))
7704 return;
7705 curr_state->after_nops_num
7706 = 3 - curr_state->accumulated_insns_num % 3;
7707 curr_state->accumulated_insns_num
7708 += 3 - curr_state->accumulated_insns_num % 3;
7709 }
7710 if (!insert_bundle_state (curr_state))
7711 free_bundle_state (curr_state);
7712 return;
7713 }
7714
7715 /* The following function returns position in the two window bundle
7716 for given STATE. */
7717
7718 static int
7719 get_max_pos (state_t state)
7720 {
7721 if (cpu_unit_reservation_p (state, pos_6))
7722 return 6;
7723 else if (cpu_unit_reservation_p (state, pos_5))
7724 return 5;
7725 else if (cpu_unit_reservation_p (state, pos_4))
7726 return 4;
7727 else if (cpu_unit_reservation_p (state, pos_3))
7728 return 3;
7729 else if (cpu_unit_reservation_p (state, pos_2))
7730 return 2;
7731 else if (cpu_unit_reservation_p (state, pos_1))
7732 return 1;
7733 else
7734 return 0;
7735 }
7736
7737 /* The function returns code of a possible template for given position
7738 and state. The function should be called only with 2 values of
7739 position equal to 3 or 6. We avoid generating F NOPs by putting
7740 templates containing F insns at the end of the template search
7741 because undocumented anomaly in McKinley derived cores which can
7742 cause stalls if an F-unit insn (including a NOP) is issued within a
7743 six-cycle window after reading certain application registers (such
7744 as ar.bsp). Furthermore, power-considerations also argue against
7745 the use of F-unit instructions unless they're really needed. */
7746
7747 static int
7748 get_template (state_t state, int pos)
7749 {
7750 switch (pos)
7751 {
7752 case 3:
7753 if (cpu_unit_reservation_p (state, _0mmi_))
7754 return 1;
7755 else if (cpu_unit_reservation_p (state, _0mii_))
7756 return 0;
7757 else if (cpu_unit_reservation_p (state, _0mmb_))
7758 return 7;
7759 else if (cpu_unit_reservation_p (state, _0mib_))
7760 return 6;
7761 else if (cpu_unit_reservation_p (state, _0mbb_))
7762 return 5;
7763 else if (cpu_unit_reservation_p (state, _0bbb_))
7764 return 4;
7765 else if (cpu_unit_reservation_p (state, _0mmf_))
7766 return 3;
7767 else if (cpu_unit_reservation_p (state, _0mfi_))
7768 return 2;
7769 else if (cpu_unit_reservation_p (state, _0mfb_))
7770 return 8;
7771 else if (cpu_unit_reservation_p (state, _0mlx_))
7772 return 9;
7773 else
7774 gcc_unreachable ();
7775 case 6:
7776 if (cpu_unit_reservation_p (state, _1mmi_))
7777 return 1;
7778 else if (cpu_unit_reservation_p (state, _1mii_))
7779 return 0;
7780 else if (cpu_unit_reservation_p (state, _1mmb_))
7781 return 7;
7782 else if (cpu_unit_reservation_p (state, _1mib_))
7783 return 6;
7784 else if (cpu_unit_reservation_p (state, _1mbb_))
7785 return 5;
7786 else if (cpu_unit_reservation_p (state, _1bbb_))
7787 return 4;
7788 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
7789 return 3;
7790 else if (cpu_unit_reservation_p (state, _1mfi_))
7791 return 2;
7792 else if (cpu_unit_reservation_p (state, _1mfb_))
7793 return 8;
7794 else if (cpu_unit_reservation_p (state, _1mlx_))
7795 return 9;
7796 else
7797 gcc_unreachable ();
7798 default:
7799 gcc_unreachable ();
7800 }
7801 }
7802
7803 /* The following function returns an insn important for insn bundling
7804 followed by INSN and before TAIL. */
7805
7806 static rtx
7807 get_next_important_insn (rtx insn, rtx tail)
7808 {
7809 for (; insn && insn != tail; insn = NEXT_INSN (insn))
7810 if (INSN_P (insn)
7811 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
7812 && GET_CODE (PATTERN (insn)) != USE
7813 && GET_CODE (PATTERN (insn)) != CLOBBER)
7814 return insn;
7815 return NULL_RTX;
7816 }
7817
7818 /* Add a bundle selector TEMPLATE0 before INSN. */
7819
7820 static void
7821 ia64_add_bundle_selector_before (int template0, rtx insn)
7822 {
7823 rtx b = gen_bundle_selector (GEN_INT (template0));
7824
7825 ia64_emit_insn_before (b, insn);
7826 #if NR_BUNDLES == 10
7827 if ((template0 == 4 || template0 == 5)
7828 && (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
7829 {
7830 int i;
7831 rtx note = NULL_RTX;
7832
7833 /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
7834 first or second slot. If it is and has REG_EH_NOTE set, copy it
7835 to following nops, as br.call sets rp to the address of following
7836 bundle and therefore an EH region end must be on a bundle
7837 boundary. */
7838 insn = PREV_INSN (insn);
7839 for (i = 0; i < 3; i++)
7840 {
7841 do
7842 insn = next_active_insn (insn);
7843 while (GET_CODE (insn) == INSN
7844 && get_attr_empty (insn) == EMPTY_YES);
7845 if (GET_CODE (insn) == CALL_INSN)
7846 note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
7847 else if (note)
7848 {
7849 int code;
7850
7851 gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop
7852 || code == CODE_FOR_nop_b);
7853 if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
7854 note = NULL_RTX;
7855 else
7856 REG_NOTES (insn)
7857 = gen_rtx_EXPR_LIST (REG_EH_REGION, XEXP (note, 0),
7858 REG_NOTES (insn));
7859 }
7860 }
7861 }
7862 #endif
7863 }
7864
7865 /* The following function does insn bundling. Bundling means
7866 inserting templates and nop insns to fit insn groups into permitted
7867 templates. Instruction scheduling uses NDFA (non-deterministic
7868 finite automata) encoding informations about the templates and the
7869 inserted nops. Nondeterminism of the automata permits follows
7870 all possible insn sequences very fast.
7871
7872 Unfortunately it is not possible to get information about inserting
7873 nop insns and used templates from the automata states. The
7874 automata only says that we can issue an insn possibly inserting
7875 some nops before it and using some template. Therefore insn
7876 bundling in this function is implemented by using DFA
7877 (deterministic finite automata). We follow all possible insn
7878 sequences by inserting 0-2 nops (that is what the NDFA describe for
7879 insn scheduling) before/after each insn being bundled. We know the
7880 start of simulated processor cycle from insn scheduling (insn
7881 starting a new cycle has TImode).
7882
7883 Simple implementation of insn bundling would create enormous
7884 number of possible insn sequences satisfying information about new
7885 cycle ticks taken from the insn scheduling. To make the algorithm
7886 practical we use dynamic programming. Each decision (about
7887 inserting nops and implicitly about previous decisions) is described
7888 by structure bundle_state (see above). If we generate the same
7889 bundle state (key is automaton state after issuing the insns and
7890 nops for it), we reuse already generated one. As consequence we
7891 reject some decisions which cannot improve the solution and
7892 reduce memory for the algorithm.
7893
7894 When we reach the end of EBB (extended basic block), we choose the
7895 best sequence and then, moving back in EBB, insert templates for
7896 the best alternative. The templates are taken from querying
7897 automaton state for each insn in chosen bundle states.
7898
7899 So the algorithm makes two (forward and backward) passes through
7900 EBB. There is an additional forward pass through EBB for Itanium1
7901 processor. This pass inserts more nops to make dependency between
7902 a producer insn and MMMUL/MMSHF at least 4 cycles long. */
7903
7904 static void
7905 bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
7906 {
7907 struct bundle_state *curr_state, *next_state, *best_state;
7908 rtx insn, next_insn;
7909 int insn_num;
7910 int i, bundle_end_p, only_bundle_end_p, asm_p;
7911 int pos = 0, max_pos, template0, template1;
7912 rtx b;
7913 rtx nop;
7914 enum attr_type type;
7915
7916 insn_num = 0;
7917 /* Count insns in the EBB. */
7918 for (insn = NEXT_INSN (prev_head_insn);
7919 insn && insn != tail;
7920 insn = NEXT_INSN (insn))
7921 if (INSN_P (insn))
7922 insn_num++;
7923 if (insn_num == 0)
7924 return;
7925 bundling_p = 1;
7926 dfa_clean_insn_cache ();
7927 initiate_bundle_state_table ();
7928 index_to_bundle_states = XNEWVEC (struct bundle_state *, insn_num + 2);
7929 /* First (forward) pass -- generation of bundle states. */
7930 curr_state = get_free_bundle_state ();
7931 curr_state->insn = NULL;
7932 curr_state->before_nops_num = 0;
7933 curr_state->after_nops_num = 0;
7934 curr_state->insn_num = 0;
7935 curr_state->cost = 0;
7936 curr_state->accumulated_insns_num = 0;
7937 curr_state->branch_deviation = 0;
7938 curr_state->next = NULL;
7939 curr_state->originator = NULL;
7940 state_reset (curr_state->dfa_state);
7941 index_to_bundle_states [0] = curr_state;
7942 insn_num = 0;
7943 /* Shift cycle mark if it is put on insn which could be ignored. */
7944 for (insn = NEXT_INSN (prev_head_insn);
7945 insn != tail;
7946 insn = NEXT_INSN (insn))
7947 if (INSN_P (insn)
7948 && (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
7949 || GET_CODE (PATTERN (insn)) == USE
7950 || GET_CODE (PATTERN (insn)) == CLOBBER)
7951 && GET_MODE (insn) == TImode)
7952 {
7953 PUT_MODE (insn, VOIDmode);
7954 for (next_insn = NEXT_INSN (insn);
7955 next_insn != tail;
7956 next_insn = NEXT_INSN (next_insn))
7957 if (INSN_P (next_insn)
7958 && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
7959 && GET_CODE (PATTERN (next_insn)) != USE
7960 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
7961 {
7962 PUT_MODE (next_insn, TImode);
7963 break;
7964 }
7965 }
7966 /* Forward pass: generation of bundle states. */
7967 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
7968 insn != NULL_RTX;
7969 insn = next_insn)
7970 {
7971 gcc_assert (INSN_P (insn)
7972 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
7973 && GET_CODE (PATTERN (insn)) != USE
7974 && GET_CODE (PATTERN (insn)) != CLOBBER);
7975 type = ia64_safe_type (insn);
7976 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
7977 insn_num++;
7978 index_to_bundle_states [insn_num] = NULL;
7979 for (curr_state = index_to_bundle_states [insn_num - 1];
7980 curr_state != NULL;
7981 curr_state = next_state)
7982 {
7983 pos = curr_state->accumulated_insns_num % 3;
7984 next_state = curr_state->next;
7985 /* We must fill up the current bundle in order to start a
7986 subsequent asm insn in a new bundle. Asm insn is always
7987 placed in a separate bundle. */
7988 only_bundle_end_p
7989 = (next_insn != NULL_RTX
7990 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
7991 && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
7992 /* We may fill up the current bundle if it is the cycle end
7993 without a group barrier. */
7994 bundle_end_p
7995 = (only_bundle_end_p || next_insn == NULL_RTX
7996 || (GET_MODE (next_insn) == TImode
7997 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
7998 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
7999 || type == TYPE_S
8000 /* We need to insert 2 nops for cases like M_MII. To
8001 guarantee issuing all insns on the same cycle for
8002 Itanium 1, we need to issue 2 nops after the first M
8003 insn (MnnMII where n is a nop insn). */
8004 || ((type == TYPE_M || type == TYPE_A)
8005 && ia64_tune == PROCESSOR_ITANIUM
8006 && !bundle_end_p && pos == 1))
8007 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
8008 only_bundle_end_p);
8009 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
8010 only_bundle_end_p);
8011 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
8012 only_bundle_end_p);
8013 }
8014 gcc_assert (index_to_bundle_states [insn_num]);
8015 for (curr_state = index_to_bundle_states [insn_num];
8016 curr_state != NULL;
8017 curr_state = curr_state->next)
8018 if (verbose >= 2 && dump)
8019 {
8020 /* This structure is taken from generated code of the
8021 pipeline hazard recognizer (see file insn-attrtab.c).
8022 Please don't forget to change the structure if a new
8023 automaton is added to .md file. */
8024 struct DFA_chip
8025 {
8026 unsigned short one_automaton_state;
8027 unsigned short oneb_automaton_state;
8028 unsigned short two_automaton_state;
8029 unsigned short twob_automaton_state;
8030 };
8031
8032 fprintf
8033 (dump,
8034 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
8035 curr_state->unique_num,
8036 (curr_state->originator == NULL
8037 ? -1 : curr_state->originator->unique_num),
8038 curr_state->cost,
8039 curr_state->before_nops_num, curr_state->after_nops_num,
8040 curr_state->accumulated_insns_num, curr_state->branch_deviation,
8041 (ia64_tune == PROCESSOR_ITANIUM
8042 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
8043 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
8044 INSN_UID (insn));
8045 }
8046 }
8047
8048 /* We should find a solution because the 2nd insn scheduling has
8049 found one. */
8050 gcc_assert (index_to_bundle_states [insn_num]);
8051 /* Find a state corresponding to the best insn sequence. */
8052 best_state = NULL;
8053 for (curr_state = index_to_bundle_states [insn_num];
8054 curr_state != NULL;
8055 curr_state = curr_state->next)
8056 /* We are just looking at the states with fully filled up last
8057 bundle. The first we prefer insn sequences with minimal cost
8058 then with minimal inserted nops and finally with branch insns
8059 placed in the 3rd slots. */
8060 if (curr_state->accumulated_insns_num % 3 == 0
8061 && (best_state == NULL || best_state->cost > curr_state->cost
8062 || (best_state->cost == curr_state->cost
8063 && (curr_state->accumulated_insns_num
8064 < best_state->accumulated_insns_num
8065 || (curr_state->accumulated_insns_num
8066 == best_state->accumulated_insns_num
8067 && curr_state->branch_deviation
8068 < best_state->branch_deviation)))))
8069 best_state = curr_state;
8070 /* Second (backward) pass: adding nops and templates. */
8071 insn_num = best_state->before_nops_num;
8072 template0 = template1 = -1;
8073 for (curr_state = best_state;
8074 curr_state->originator != NULL;
8075 curr_state = curr_state->originator)
8076 {
8077 insn = curr_state->insn;
8078 asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
8079 || asm_noperands (PATTERN (insn)) >= 0);
8080 insn_num++;
8081 if (verbose >= 2 && dump)
8082 {
8083 struct DFA_chip
8084 {
8085 unsigned short one_automaton_state;
8086 unsigned short oneb_automaton_state;
8087 unsigned short two_automaton_state;
8088 unsigned short twob_automaton_state;
8089 };
8090
8091 fprintf
8092 (dump,
8093 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
8094 curr_state->unique_num,
8095 (curr_state->originator == NULL
8096 ? -1 : curr_state->originator->unique_num),
8097 curr_state->cost,
8098 curr_state->before_nops_num, curr_state->after_nops_num,
8099 curr_state->accumulated_insns_num, curr_state->branch_deviation,
8100 (ia64_tune == PROCESSOR_ITANIUM
8101 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
8102 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
8103 INSN_UID (insn));
8104 }
8105 /* Find the position in the current bundle window. The window can
8106 contain at most two bundles. Two bundle window means that
8107 the processor will make two bundle rotation. */
8108 max_pos = get_max_pos (curr_state->dfa_state);
8109 if (max_pos == 6
8110 /* The following (negative template number) means that the
8111 processor did one bundle rotation. */
8112 || (max_pos == 3 && template0 < 0))
8113 {
8114 /* We are at the end of the window -- find template(s) for
8115 its bundle(s). */
8116 pos = max_pos;
8117 if (max_pos == 3)
8118 template0 = get_template (curr_state->dfa_state, 3);
8119 else
8120 {
8121 template1 = get_template (curr_state->dfa_state, 3);
8122 template0 = get_template (curr_state->dfa_state, 6);
8123 }
8124 }
8125 if (max_pos > 3 && template1 < 0)
8126 /* It may happen when we have the stop inside a bundle. */
8127 {
8128 gcc_assert (pos <= 3);
8129 template1 = get_template (curr_state->dfa_state, 3);
8130 pos += 3;
8131 }
8132 if (!asm_p)
8133 /* Emit nops after the current insn. */
8134 for (i = 0; i < curr_state->after_nops_num; i++)
8135 {
8136 nop = gen_nop ();
8137 emit_insn_after (nop, insn);
8138 pos--;
8139 gcc_assert (pos >= 0);
8140 if (pos % 3 == 0)
8141 {
8142 /* We are at the start of a bundle: emit the template
8143 (it should be defined). */
8144 gcc_assert (template0 >= 0);
8145 ia64_add_bundle_selector_before (template0, nop);
8146 /* If we have two bundle window, we make one bundle
8147 rotation. Otherwise template0 will be undefined
8148 (negative value). */
8149 template0 = template1;
8150 template1 = -1;
8151 }
8152 }
8153 /* Move the position backward in the window. Group barrier has
8154 no slot. Asm insn takes all bundle. */
8155 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
8156 && GET_CODE (PATTERN (insn)) != ASM_INPUT
8157 && asm_noperands (PATTERN (insn)) < 0)
8158 pos--;
8159 /* Long insn takes 2 slots. */
8160 if (ia64_safe_type (insn) == TYPE_L)
8161 pos--;
8162 gcc_assert (pos >= 0);
8163 if (pos % 3 == 0
8164 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
8165 && GET_CODE (PATTERN (insn)) != ASM_INPUT
8166 && asm_noperands (PATTERN (insn)) < 0)
8167 {
8168 /* The current insn is at the bundle start: emit the
8169 template. */
8170 gcc_assert (template0 >= 0);
8171 ia64_add_bundle_selector_before (template0, insn);
8172 b = PREV_INSN (insn);
8173 insn = b;
8174 /* See comment above in analogous place for emitting nops
8175 after the insn. */
8176 template0 = template1;
8177 template1 = -1;
8178 }
8179 /* Emit nops after the current insn. */
8180 for (i = 0; i < curr_state->before_nops_num; i++)
8181 {
8182 nop = gen_nop ();
8183 ia64_emit_insn_before (nop, insn);
8184 nop = PREV_INSN (insn);
8185 insn = nop;
8186 pos--;
8187 gcc_assert (pos >= 0);
8188 if (pos % 3 == 0)
8189 {
8190 /* See comment above in analogous place for emitting nops
8191 after the insn. */
8192 gcc_assert (template0 >= 0);
8193 ia64_add_bundle_selector_before (template0, insn);
8194 b = PREV_INSN (insn);
8195 insn = b;
8196 template0 = template1;
8197 template1 = -1;
8198 }
8199 }
8200 }
8201 if (ia64_tune == PROCESSOR_ITANIUM)
8202 /* Insert additional cycles for MM-insns (MMMUL and MMSHF).
8203 Itanium1 has a strange design, if the distance between an insn
8204 and dependent MM-insn is less 4 then we have a 6 additional
8205 cycles stall. So we make the distance equal to 4 cycles if it
8206 is less. */
8207 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
8208 insn != NULL_RTX;
8209 insn = next_insn)
8210 {
8211 gcc_assert (INSN_P (insn)
8212 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8213 && GET_CODE (PATTERN (insn)) != USE
8214 && GET_CODE (PATTERN (insn)) != CLOBBER);
8215 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
8216 if (INSN_UID (insn) < clocks_length && add_cycles [INSN_UID (insn)])
8217 /* We found a MM-insn which needs additional cycles. */
8218 {
8219 rtx last;
8220 int i, j, n;
8221 int pred_stop_p;
8222
8223 /* Now we are searching for a template of the bundle in
8224 which the MM-insn is placed and the position of the
8225 insn in the bundle (0, 1, 2). Also we are searching
8226 for that there is a stop before the insn. */
8227 last = prev_active_insn (insn);
8228 pred_stop_p = recog_memoized (last) == CODE_FOR_insn_group_barrier;
8229 if (pred_stop_p)
8230 last = prev_active_insn (last);
8231 n = 0;
8232 for (;; last = prev_active_insn (last))
8233 if (recog_memoized (last) == CODE_FOR_bundle_selector)
8234 {
8235 template0 = XINT (XVECEXP (PATTERN (last), 0, 0), 0);
8236 if (template0 == 9)
8237 /* The insn is in MLX bundle. Change the template
8238 onto MFI because we will add nops before the
8239 insn. It simplifies subsequent code a lot. */
8240 PATTERN (last)
8241 = gen_bundle_selector (const2_rtx); /* -> MFI */
8242 break;
8243 }
8244 else if (recog_memoized (last) != CODE_FOR_insn_group_barrier
8245 && (ia64_safe_itanium_class (last)
8246 != ITANIUM_CLASS_IGNORE))
8247 n++;
8248 /* Some check of correctness: the stop is not at the
8249 bundle start, there are no more 3 insns in the bundle,
8250 and the MM-insn is not at the start of bundle with
8251 template MLX. */
8252 gcc_assert ((!pred_stop_p || n)
8253 && n <= 2
8254 && (template0 != 9 || !n));
8255 /* Put nops after the insn in the bundle. */
8256 for (j = 3 - n; j > 0; j --)
8257 ia64_emit_insn_before (gen_nop (), insn);
8258 /* It takes into account that we will add more N nops
8259 before the insn lately -- please see code below. */
8260 add_cycles [INSN_UID (insn)]--;
8261 if (!pred_stop_p || add_cycles [INSN_UID (insn)])
8262 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
8263 insn);
8264 if (pred_stop_p)
8265 add_cycles [INSN_UID (insn)]--;
8266 for (i = add_cycles [INSN_UID (insn)]; i > 0; i--)
8267 {
8268 /* Insert "MII;" template. */
8269 ia64_emit_insn_before (gen_bundle_selector (const0_rtx),
8270 insn);
8271 ia64_emit_insn_before (gen_nop (), insn);
8272 ia64_emit_insn_before (gen_nop (), insn);
8273 if (i > 1)
8274 {
8275 /* To decrease code size, we use "MI;I;"
8276 template. */
8277 ia64_emit_insn_before
8278 (gen_insn_group_barrier (GEN_INT (3)), insn);
8279 i--;
8280 }
8281 ia64_emit_insn_before (gen_nop (), insn);
8282 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
8283 insn);
8284 }
8285 /* Put the MM-insn in the same slot of a bundle with the
8286 same template as the original one. */
8287 ia64_add_bundle_selector_before (template0, insn);
8288 /* To put the insn in the same slot, add necessary number
8289 of nops. */
8290 for (j = n; j > 0; j --)
8291 ia64_emit_insn_before (gen_nop (), insn);
8292 /* Put the stop if the original bundle had it. */
8293 if (pred_stop_p)
8294 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
8295 insn);
8296 }
8297 }
8298 free (index_to_bundle_states);
8299 finish_bundle_state_table ();
8300 bundling_p = 0;
8301 dfa_clean_insn_cache ();
8302 }
8303
8304 /* The following function is called at the end of scheduling BB or
8305 EBB. After reload, it inserts stop bits and does insn bundling. */
8306
8307 static void
8308 ia64_sched_finish (FILE *dump, int sched_verbose)
8309 {
8310 if (sched_verbose)
8311 fprintf (dump, "// Finishing schedule.\n");
8312 if (!reload_completed)
8313 return;
8314 if (reload_completed)
8315 {
8316 final_emit_insn_group_barriers (dump);
8317 bundling (dump, sched_verbose, current_sched_info->prev_head,
8318 current_sched_info->next_tail);
8319 if (sched_verbose && dump)
8320 fprintf (dump, "// finishing %d-%d\n",
8321 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
8322 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
8323
8324 return;
8325 }
8326 }
8327
8328 /* The following function inserts stop bits in scheduled BB or EBB. */
8329
8330 static void
8331 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
8332 {
8333 rtx insn;
8334 int need_barrier_p = 0;
8335 rtx prev_insn = NULL_RTX;
8336
8337 init_insn_group_barriers ();
8338
8339 for (insn = NEXT_INSN (current_sched_info->prev_head);
8340 insn != current_sched_info->next_tail;
8341 insn = NEXT_INSN (insn))
8342 {
8343 if (GET_CODE (insn) == BARRIER)
8344 {
8345 rtx last = prev_active_insn (insn);
8346
8347 if (! last)
8348 continue;
8349 if (GET_CODE (last) == JUMP_INSN
8350 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
8351 last = prev_active_insn (last);
8352 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
8353 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
8354
8355 init_insn_group_barriers ();
8356 need_barrier_p = 0;
8357 prev_insn = NULL_RTX;
8358 }
8359 else if (INSN_P (insn))
8360 {
8361 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
8362 {
8363 init_insn_group_barriers ();
8364 need_barrier_p = 0;
8365 prev_insn = NULL_RTX;
8366 }
8367 else if (need_barrier_p || group_barrier_needed (insn))
8368 {
8369 if (TARGET_EARLY_STOP_BITS)
8370 {
8371 rtx last;
8372
8373 for (last = insn;
8374 last != current_sched_info->prev_head;
8375 last = PREV_INSN (last))
8376 if (INSN_P (last) && GET_MODE (last) == TImode
8377 && stops_p [INSN_UID (last)])
8378 break;
8379 if (last == current_sched_info->prev_head)
8380 last = insn;
8381 last = prev_active_insn (last);
8382 if (last
8383 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
8384 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
8385 last);
8386 init_insn_group_barriers ();
8387 for (last = NEXT_INSN (last);
8388 last != insn;
8389 last = NEXT_INSN (last))
8390 if (INSN_P (last))
8391 group_barrier_needed (last);
8392 }
8393 else
8394 {
8395 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
8396 insn);
8397 init_insn_group_barriers ();
8398 }
8399 group_barrier_needed (insn);
8400 prev_insn = NULL_RTX;
8401 }
8402 else if (recog_memoized (insn) >= 0)
8403 prev_insn = insn;
8404 need_barrier_p = (GET_CODE (insn) == CALL_INSN
8405 || GET_CODE (PATTERN (insn)) == ASM_INPUT
8406 || asm_noperands (PATTERN (insn)) >= 0);
8407 }
8408 }
8409 }
8410
8411 \f
8412
8413 /* If the following function returns TRUE, we will use the DFA
8414 insn scheduler. */
8415
8416 static int
8417 ia64_first_cycle_multipass_dfa_lookahead (void)
8418 {
8419 return (reload_completed ? 6 : 4);
8420 }
8421
8422 /* The following function initiates variable `dfa_pre_cycle_insn'. */
8423
8424 static void
8425 ia64_init_dfa_pre_cycle_insn (void)
8426 {
8427 if (temp_dfa_state == NULL)
8428 {
8429 dfa_state_size = state_size ();
8430 temp_dfa_state = xmalloc (dfa_state_size);
8431 prev_cycle_state = xmalloc (dfa_state_size);
8432 }
8433 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
8434 PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
8435 recog_memoized (dfa_pre_cycle_insn);
8436 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
8437 PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
8438 recog_memoized (dfa_stop_insn);
8439 }
8440
8441 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
8442 used by the DFA insn scheduler. */
8443
8444 static rtx
8445 ia64_dfa_pre_cycle_insn (void)
8446 {
8447 return dfa_pre_cycle_insn;
8448 }
8449
8450 /* The following function returns TRUE if PRODUCER (of type ilog or
8451 ld) produces address for CONSUMER (of type st or stf). */
8452
8453 int
8454 ia64_st_address_bypass_p (rtx producer, rtx consumer)
8455 {
8456 rtx dest, reg, mem;
8457
8458 gcc_assert (producer && consumer);
8459 dest = ia64_single_set (producer);
8460 gcc_assert (dest);
8461 reg = SET_DEST (dest);
8462 gcc_assert (reg);
8463 if (GET_CODE (reg) == SUBREG)
8464 reg = SUBREG_REG (reg);
8465 gcc_assert (GET_CODE (reg) == REG);
8466
8467 dest = ia64_single_set (consumer);
8468 gcc_assert (dest);
8469 mem = SET_DEST (dest);
8470 gcc_assert (mem && GET_CODE (mem) == MEM);
8471 return reg_mentioned_p (reg, mem);
8472 }
8473
8474 /* The following function returns TRUE if PRODUCER (of type ilog or
8475 ld) produces address for CONSUMER (of type ld or fld). */
8476
8477 int
8478 ia64_ld_address_bypass_p (rtx producer, rtx consumer)
8479 {
8480 rtx dest, src, reg, mem;
8481
8482 gcc_assert (producer && consumer);
8483 dest = ia64_single_set (producer);
8484 gcc_assert (dest);
8485 reg = SET_DEST (dest);
8486 gcc_assert (reg);
8487 if (GET_CODE (reg) == SUBREG)
8488 reg = SUBREG_REG (reg);
8489 gcc_assert (GET_CODE (reg) == REG);
8490
8491 src = ia64_single_set (consumer);
8492 gcc_assert (src);
8493 mem = SET_SRC (src);
8494 gcc_assert (mem);
8495
8496 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
8497 mem = XVECEXP (mem, 0, 0);
8498 else if (GET_CODE (mem) == IF_THEN_ELSE)
8499 /* ??? Is this bypass necessary for ld.c? */
8500 {
8501 gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR);
8502 mem = XEXP (mem, 1);
8503 }
8504
8505 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
8506 mem = XEXP (mem, 0);
8507
8508 if (GET_CODE (mem) == UNSPEC)
8509 {
8510 int c = XINT (mem, 1);
8511
8512 gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDSA);
8513 mem = XVECEXP (mem, 0, 0);
8514 }
8515
8516 /* Note that LO_SUM is used for GOT loads. */
8517 gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
8518
8519 return reg_mentioned_p (reg, mem);
8520 }
8521
8522 /* The following function returns TRUE if INSN produces address for a
8523 load/store insn. We will place such insns into M slot because it
8524 decreases its latency time. */
8525
8526 int
8527 ia64_produce_address_p (rtx insn)
8528 {
8529 return insn->call;
8530 }
8531
8532 \f
8533 /* Emit pseudo-ops for the assembler to describe predicate relations.
8534 At present this assumes that we only consider predicate pairs to
8535 be mutex, and that the assembler can deduce proper values from
8536 straight-line code. */
8537
8538 static void
8539 emit_predicate_relation_info (void)
8540 {
8541 basic_block bb;
8542
8543 FOR_EACH_BB_REVERSE (bb)
8544 {
8545 int r;
8546 rtx head = BB_HEAD (bb);
8547
8548 /* We only need such notes at code labels. */
8549 if (GET_CODE (head) != CODE_LABEL)
8550 continue;
8551 if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head)))
8552 head = NEXT_INSN (head);
8553
8554 /* Skip p0, which may be thought to be live due to (reg:DI p0)
8555 grabbing the entire block of predicate registers. */
8556 for (r = PR_REG (2); r < PR_REG (64); r += 2)
8557 if (REGNO_REG_SET_P (df_get_live_in (bb), r))
8558 {
8559 rtx p = gen_rtx_REG (BImode, r);
8560 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
8561 if (head == BB_END (bb))
8562 BB_END (bb) = n;
8563 head = n;
8564 }
8565 }
8566
8567 /* Look for conditional calls that do not return, and protect predicate
8568 relations around them. Otherwise the assembler will assume the call
8569 returns, and complain about uses of call-clobbered predicates after
8570 the call. */
8571 FOR_EACH_BB_REVERSE (bb)
8572 {
8573 rtx insn = BB_HEAD (bb);
8574
8575 while (1)
8576 {
8577 if (GET_CODE (insn) == CALL_INSN
8578 && GET_CODE (PATTERN (insn)) == COND_EXEC
8579 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
8580 {
8581 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
8582 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
8583 if (BB_HEAD (bb) == insn)
8584 BB_HEAD (bb) = b;
8585 if (BB_END (bb) == insn)
8586 BB_END (bb) = a;
8587 }
8588
8589 if (insn == BB_END (bb))
8590 break;
8591 insn = NEXT_INSN (insn);
8592 }
8593 }
8594 }
8595
8596 /* Perform machine dependent operations on the rtl chain INSNS. */
8597
8598 static void
8599 ia64_reorg (void)
8600 {
8601 /* We are freeing block_for_insn in the toplev to keep compatibility
8602 with old MDEP_REORGS that are not CFG based. Recompute it now. */
8603 compute_bb_for_insn ();
8604
8605 /* If optimizing, we'll have split before scheduling. */
8606 if (optimize == 0)
8607 split_all_insns ();
8608
8609 if (optimize && ia64_flag_schedule_insns2 && dbg_cnt (ia64_sched2))
8610 {
8611 timevar_push (TV_SCHED2);
8612 ia64_final_schedule = 1;
8613
8614 initiate_bundle_states ();
8615 ia64_nop = make_insn_raw (gen_nop ());
8616 PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
8617 recog_memoized (ia64_nop);
8618 clocks_length = get_max_uid () + 1;
8619 stops_p = XCNEWVEC (char, clocks_length);
8620 if (ia64_tune == PROCESSOR_ITANIUM)
8621 {
8622 clocks = XCNEWVEC (int, clocks_length);
8623 add_cycles = XCNEWVEC (int, clocks_length);
8624 }
8625 if (ia64_tune == PROCESSOR_ITANIUM2)
8626 {
8627 pos_1 = get_cpu_unit_code ("2_1");
8628 pos_2 = get_cpu_unit_code ("2_2");
8629 pos_3 = get_cpu_unit_code ("2_3");
8630 pos_4 = get_cpu_unit_code ("2_4");
8631 pos_5 = get_cpu_unit_code ("2_5");
8632 pos_6 = get_cpu_unit_code ("2_6");
8633 _0mii_ = get_cpu_unit_code ("2b_0mii.");
8634 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
8635 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
8636 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
8637 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
8638 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
8639 _0mib_ = get_cpu_unit_code ("2b_0mib.");
8640 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
8641 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
8642 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
8643 _1mii_ = get_cpu_unit_code ("2b_1mii.");
8644 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
8645 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
8646 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
8647 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
8648 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
8649 _1mib_ = get_cpu_unit_code ("2b_1mib.");
8650 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
8651 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
8652 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
8653 }
8654 else
8655 {
8656 pos_1 = get_cpu_unit_code ("1_1");
8657 pos_2 = get_cpu_unit_code ("1_2");
8658 pos_3 = get_cpu_unit_code ("1_3");
8659 pos_4 = get_cpu_unit_code ("1_4");
8660 pos_5 = get_cpu_unit_code ("1_5");
8661 pos_6 = get_cpu_unit_code ("1_6");
8662 _0mii_ = get_cpu_unit_code ("1b_0mii.");
8663 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
8664 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
8665 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
8666 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
8667 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
8668 _0mib_ = get_cpu_unit_code ("1b_0mib.");
8669 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
8670 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
8671 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
8672 _1mii_ = get_cpu_unit_code ("1b_1mii.");
8673 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
8674 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
8675 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
8676 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
8677 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
8678 _1mib_ = get_cpu_unit_code ("1b_1mib.");
8679 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
8680 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
8681 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
8682 }
8683 schedule_ebbs ();
8684 /* We cannot reuse this one because it has been corrupted by the
8685 evil glat. */
8686 finish_bundle_states ();
8687 if (ia64_tune == PROCESSOR_ITANIUM)
8688 {
8689 free (add_cycles);
8690 free (clocks);
8691 }
8692 free (stops_p);
8693 stops_p = NULL;
8694 emit_insn_group_barriers (dump_file);
8695
8696 ia64_final_schedule = 0;
8697 timevar_pop (TV_SCHED2);
8698 }
8699 else
8700 emit_all_insn_group_barriers (dump_file);
8701
8702 df_analyze ();
8703
8704 /* A call must not be the last instruction in a function, so that the
8705 return address is still within the function, so that unwinding works
8706 properly. Note that IA-64 differs from dwarf2 on this point. */
8707 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
8708 {
8709 rtx insn;
8710 int saw_stop = 0;
8711
8712 insn = get_last_insn ();
8713 if (! INSN_P (insn))
8714 insn = prev_active_insn (insn);
8715 /* Skip over insns that expand to nothing. */
8716 while (GET_CODE (insn) == INSN && get_attr_empty (insn) == EMPTY_YES)
8717 {
8718 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
8719 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
8720 saw_stop = 1;
8721 insn = prev_active_insn (insn);
8722 }
8723 if (GET_CODE (insn) == CALL_INSN)
8724 {
8725 if (! saw_stop)
8726 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
8727 emit_insn (gen_break_f ());
8728 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
8729 }
8730 }
8731
8732 emit_predicate_relation_info ();
8733
8734 if (ia64_flag_var_tracking)
8735 {
8736 timevar_push (TV_VAR_TRACKING);
8737 variable_tracking_main ();
8738 timevar_pop (TV_VAR_TRACKING);
8739 }
8740 df_finish_pass (false);
8741 }
8742 \f
8743 /* Return true if REGNO is used by the epilogue. */
8744
8745 int
8746 ia64_epilogue_uses (int regno)
8747 {
8748 switch (regno)
8749 {
8750 case R_GR (1):
8751 /* With a call to a function in another module, we will write a new
8752 value to "gp". After returning from such a call, we need to make
8753 sure the function restores the original gp-value, even if the
8754 function itself does not use the gp anymore. */
8755 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
8756
8757 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
8758 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
8759 /* For functions defined with the syscall_linkage attribute, all
8760 input registers are marked as live at all function exits. This
8761 prevents the register allocator from using the input registers,
8762 which in turn makes it possible to restart a system call after
8763 an interrupt without having to save/restore the input registers.
8764 This also prevents kernel data from leaking to application code. */
8765 return lookup_attribute ("syscall_linkage",
8766 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
8767
8768 case R_BR (0):
8769 /* Conditional return patterns can't represent the use of `b0' as
8770 the return address, so we force the value live this way. */
8771 return 1;
8772
8773 case AR_PFS_REGNUM:
8774 /* Likewise for ar.pfs, which is used by br.ret. */
8775 return 1;
8776
8777 default:
8778 return 0;
8779 }
8780 }
8781
8782 /* Return true if REGNO is used by the frame unwinder. */
8783
8784 int
8785 ia64_eh_uses (int regno)
8786 {
8787 enum ia64_frame_regs r;
8788
8789 if (! reload_completed)
8790 return 0;
8791
8792 if (regno == 0)
8793 return 0;
8794
8795 for (r = reg_save_b0; r <= reg_save_ar_lc; r++)
8796 if (regno == current_frame_info.r[r]
8797 || regno == emitted_frame_related_regs[r])
8798 return 1;
8799
8800 return 0;
8801 }
8802 \f
8803 /* Return true if this goes in small data/bss. */
8804
8805 /* ??? We could also support own long data here. Generating movl/add/ld8
8806 instead of addl,ld8/ld8. This makes the code bigger, but should make the
8807 code faster because there is one less load. This also includes incomplete
8808 types which can't go in sdata/sbss. */
8809
8810 static bool
8811 ia64_in_small_data_p (const_tree exp)
8812 {
8813 if (TARGET_NO_SDATA)
8814 return false;
8815
8816 /* We want to merge strings, so we never consider them small data. */
8817 if (TREE_CODE (exp) == STRING_CST)
8818 return false;
8819
8820 /* Functions are never small data. */
8821 if (TREE_CODE (exp) == FUNCTION_DECL)
8822 return false;
8823
8824 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
8825 {
8826 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
8827
8828 if (strcmp (section, ".sdata") == 0
8829 || strncmp (section, ".sdata.", 7) == 0
8830 || strncmp (section, ".gnu.linkonce.s.", 16) == 0
8831 || strcmp (section, ".sbss") == 0
8832 || strncmp (section, ".sbss.", 6) == 0
8833 || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
8834 return true;
8835 }
8836 else
8837 {
8838 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
8839
8840 /* If this is an incomplete type with size 0, then we can't put it
8841 in sdata because it might be too big when completed. */
8842 if (size > 0 && size <= ia64_section_threshold)
8843 return true;
8844 }
8845
8846 return false;
8847 }
8848 \f
8849 /* Output assembly directives for prologue regions. */
8850
8851 /* The current basic block number. */
8852
8853 static bool last_block;
8854
8855 /* True if we need a copy_state command at the start of the next block. */
8856
8857 static bool need_copy_state;
8858
8859 #ifndef MAX_ARTIFICIAL_LABEL_BYTES
8860 # define MAX_ARTIFICIAL_LABEL_BYTES 30
8861 #endif
8862
8863 /* Emit a debugging label after a call-frame-related insn. We'd
8864 rather output the label right away, but we'd have to output it
8865 after, not before, the instruction, and the instruction has not
8866 been output yet. So we emit the label after the insn, delete it to
8867 avoid introducing basic blocks, and mark it as preserved, such that
8868 it is still output, given that it is referenced in debug info. */
8869
8870 static const char *
8871 ia64_emit_deleted_label_after_insn (rtx insn)
8872 {
8873 char label[MAX_ARTIFICIAL_LABEL_BYTES];
8874 rtx lb = gen_label_rtx ();
8875 rtx label_insn = emit_label_after (lb, insn);
8876
8877 LABEL_PRESERVE_P (lb) = 1;
8878
8879 delete_insn (label_insn);
8880
8881 ASM_GENERATE_INTERNAL_LABEL (label, "L", CODE_LABEL_NUMBER (label_insn));
8882
8883 return xstrdup (label);
8884 }
8885
8886 /* Define the CFA after INSN with the steady-state definition. */
8887
8888 static void
8889 ia64_dwarf2out_def_steady_cfa (rtx insn)
8890 {
8891 rtx fp = frame_pointer_needed
8892 ? hard_frame_pointer_rtx
8893 : stack_pointer_rtx;
8894
8895 dwarf2out_def_cfa
8896 (ia64_emit_deleted_label_after_insn (insn),
8897 REGNO (fp),
8898 ia64_initial_elimination_offset
8899 (REGNO (arg_pointer_rtx), REGNO (fp))
8900 + ARG_POINTER_CFA_OFFSET (current_function_decl));
8901 }
8902
8903 /* The generic dwarf2 frame debug info generator does not define a
8904 separate region for the very end of the epilogue, so refrain from
8905 doing so in the IA64-specific code as well. */
8906
8907 #define IA64_CHANGE_CFA_IN_EPILOGUE 0
8908
8909 /* The function emits unwind directives for the start of an epilogue. */
8910
8911 static void
8912 process_epilogue (FILE *asm_out_file, rtx insn, bool unwind, bool frame)
8913 {
8914 /* If this isn't the last block of the function, then we need to label the
8915 current state, and copy it back in at the start of the next block. */
8916
8917 if (!last_block)
8918 {
8919 if (unwind)
8920 fprintf (asm_out_file, "\t.label_state %d\n",
8921 ++cfun->machine->state_num);
8922 need_copy_state = true;
8923 }
8924
8925 if (unwind)
8926 fprintf (asm_out_file, "\t.restore sp\n");
8927 if (IA64_CHANGE_CFA_IN_EPILOGUE && frame)
8928 dwarf2out_def_cfa (ia64_emit_deleted_label_after_insn (insn),
8929 STACK_POINTER_REGNUM, INCOMING_FRAME_SP_OFFSET);
8930 }
8931
8932 /* This function processes a SET pattern looking for specific patterns
8933 which result in emitting an assembly directive required for unwinding. */
8934
8935 static int
8936 process_set (FILE *asm_out_file, rtx pat, rtx insn, bool unwind, bool frame)
8937 {
8938 rtx src = SET_SRC (pat);
8939 rtx dest = SET_DEST (pat);
8940 int src_regno, dest_regno;
8941
8942 /* Look for the ALLOC insn. */
8943 if (GET_CODE (src) == UNSPEC_VOLATILE
8944 && XINT (src, 1) == UNSPECV_ALLOC
8945 && GET_CODE (dest) == REG)
8946 {
8947 dest_regno = REGNO (dest);
8948
8949 /* If this is the final destination for ar.pfs, then this must
8950 be the alloc in the prologue. */
8951 if (dest_regno == current_frame_info.r[reg_save_ar_pfs])
8952 {
8953 if (unwind)
8954 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
8955 ia64_dbx_register_number (dest_regno));
8956 }
8957 else
8958 {
8959 /* This must be an alloc before a sibcall. We must drop the
8960 old frame info. The easiest way to drop the old frame
8961 info is to ensure we had a ".restore sp" directive
8962 followed by a new prologue. If the procedure doesn't
8963 have a memory-stack frame, we'll issue a dummy ".restore
8964 sp" now. */
8965 if (current_frame_info.total_size == 0 && !frame_pointer_needed)
8966 /* if haven't done process_epilogue() yet, do it now */
8967 process_epilogue (asm_out_file, insn, unwind, frame);
8968 if (unwind)
8969 fprintf (asm_out_file, "\t.prologue\n");
8970 }
8971 return 1;
8972 }
8973
8974 /* Look for SP = .... */
8975 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
8976 {
8977 if (GET_CODE (src) == PLUS)
8978 {
8979 rtx op0 = XEXP (src, 0);
8980 rtx op1 = XEXP (src, 1);
8981
8982 gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
8983
8984 if (INTVAL (op1) < 0)
8985 {
8986 gcc_assert (!frame_pointer_needed);
8987 if (unwind)
8988 fprintf (asm_out_file, "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
8989 -INTVAL (op1));
8990 if (frame)
8991 ia64_dwarf2out_def_steady_cfa (insn);
8992 }
8993 else
8994 process_epilogue (asm_out_file, insn, unwind, frame);
8995 }
8996 else
8997 {
8998 gcc_assert (GET_CODE (src) == REG
8999 && REGNO (src) == HARD_FRAME_POINTER_REGNUM);
9000 process_epilogue (asm_out_file, insn, unwind, frame);
9001 }
9002
9003 return 1;
9004 }
9005
9006 /* Register move we need to look at. */
9007 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
9008 {
9009 src_regno = REGNO (src);
9010 dest_regno = REGNO (dest);
9011
9012 switch (src_regno)
9013 {
9014 case BR_REG (0):
9015 /* Saving return address pointer. */
9016 gcc_assert (dest_regno == current_frame_info.r[reg_save_b0]);
9017 if (unwind)
9018 fprintf (asm_out_file, "\t.save rp, r%d\n",
9019 ia64_dbx_register_number (dest_regno));
9020 return 1;
9021
9022 case PR_REG (0):
9023 gcc_assert (dest_regno == current_frame_info.r[reg_save_pr]);
9024 if (unwind)
9025 fprintf (asm_out_file, "\t.save pr, r%d\n",
9026 ia64_dbx_register_number (dest_regno));
9027 return 1;
9028
9029 case AR_UNAT_REGNUM:
9030 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_unat]);
9031 if (unwind)
9032 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
9033 ia64_dbx_register_number (dest_regno));
9034 return 1;
9035
9036 case AR_LC_REGNUM:
9037 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_lc]);
9038 if (unwind)
9039 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
9040 ia64_dbx_register_number (dest_regno));
9041 return 1;
9042
9043 case STACK_POINTER_REGNUM:
9044 gcc_assert (dest_regno == HARD_FRAME_POINTER_REGNUM
9045 && frame_pointer_needed);
9046 if (unwind)
9047 fprintf (asm_out_file, "\t.vframe r%d\n",
9048 ia64_dbx_register_number (dest_regno));
9049 if (frame)
9050 ia64_dwarf2out_def_steady_cfa (insn);
9051 return 1;
9052
9053 default:
9054 /* Everything else should indicate being stored to memory. */
9055 gcc_unreachable ();
9056 }
9057 }
9058
9059 /* Memory store we need to look at. */
9060 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
9061 {
9062 long off;
9063 rtx base;
9064 const char *saveop;
9065
9066 if (GET_CODE (XEXP (dest, 0)) == REG)
9067 {
9068 base = XEXP (dest, 0);
9069 off = 0;
9070 }
9071 else
9072 {
9073 gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
9074 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
9075 base = XEXP (XEXP (dest, 0), 0);
9076 off = INTVAL (XEXP (XEXP (dest, 0), 1));
9077 }
9078
9079 if (base == hard_frame_pointer_rtx)
9080 {
9081 saveop = ".savepsp";
9082 off = - off;
9083 }
9084 else
9085 {
9086 gcc_assert (base == stack_pointer_rtx);
9087 saveop = ".savesp";
9088 }
9089
9090 src_regno = REGNO (src);
9091 switch (src_regno)
9092 {
9093 case BR_REG (0):
9094 gcc_assert (!current_frame_info.r[reg_save_b0]);
9095 if (unwind)
9096 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
9097 return 1;
9098
9099 case PR_REG (0):
9100 gcc_assert (!current_frame_info.r[reg_save_pr]);
9101 if (unwind)
9102 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
9103 return 1;
9104
9105 case AR_LC_REGNUM:
9106 gcc_assert (!current_frame_info.r[reg_save_ar_lc]);
9107 if (unwind)
9108 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
9109 return 1;
9110
9111 case AR_PFS_REGNUM:
9112 gcc_assert (!current_frame_info.r[reg_save_ar_pfs]);
9113 if (unwind)
9114 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
9115 return 1;
9116
9117 case AR_UNAT_REGNUM:
9118 gcc_assert (!current_frame_info.r[reg_save_ar_unat]);
9119 if (unwind)
9120 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
9121 return 1;
9122
9123 case GR_REG (4):
9124 case GR_REG (5):
9125 case GR_REG (6):
9126 case GR_REG (7):
9127 if (unwind)
9128 fprintf (asm_out_file, "\t.save.g 0x%x\n",
9129 1 << (src_regno - GR_REG (4)));
9130 return 1;
9131
9132 case BR_REG (1):
9133 case BR_REG (2):
9134 case BR_REG (3):
9135 case BR_REG (4):
9136 case BR_REG (5):
9137 if (unwind)
9138 fprintf (asm_out_file, "\t.save.b 0x%x\n",
9139 1 << (src_regno - BR_REG (1)));
9140 return 1;
9141
9142 case FR_REG (2):
9143 case FR_REG (3):
9144 case FR_REG (4):
9145 case FR_REG (5):
9146 if (unwind)
9147 fprintf (asm_out_file, "\t.save.f 0x%x\n",
9148 1 << (src_regno - FR_REG (2)));
9149 return 1;
9150
9151 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
9152 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
9153 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
9154 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
9155 if (unwind)
9156 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
9157 1 << (src_regno - FR_REG (12)));
9158 return 1;
9159
9160 default:
9161 return 0;
9162 }
9163 }
9164
9165 return 0;
9166 }
9167
9168
9169 /* This function looks at a single insn and emits any directives
9170 required to unwind this insn. */
9171 void
9172 process_for_unwind_directive (FILE *asm_out_file, rtx insn)
9173 {
9174 bool unwind = (flag_unwind_tables
9175 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS));
9176 bool frame = dwarf2out_do_frame ();
9177
9178 if (unwind || frame)
9179 {
9180 rtx pat;
9181
9182 if (NOTE_INSN_BASIC_BLOCK_P (insn))
9183 {
9184 last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
9185
9186 /* Restore unwind state from immediately before the epilogue. */
9187 if (need_copy_state)
9188 {
9189 if (unwind)
9190 {
9191 fprintf (asm_out_file, "\t.body\n");
9192 fprintf (asm_out_file, "\t.copy_state %d\n",
9193 cfun->machine->state_num);
9194 }
9195 if (IA64_CHANGE_CFA_IN_EPILOGUE && frame)
9196 ia64_dwarf2out_def_steady_cfa (insn);
9197 need_copy_state = false;
9198 }
9199 }
9200
9201 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
9202 return;
9203
9204 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
9205 if (pat)
9206 pat = XEXP (pat, 0);
9207 else
9208 pat = PATTERN (insn);
9209
9210 switch (GET_CODE (pat))
9211 {
9212 case SET:
9213 process_set (asm_out_file, pat, insn, unwind, frame);
9214 break;
9215
9216 case PARALLEL:
9217 {
9218 int par_index;
9219 int limit = XVECLEN (pat, 0);
9220 for (par_index = 0; par_index < limit; par_index++)
9221 {
9222 rtx x = XVECEXP (pat, 0, par_index);
9223 if (GET_CODE (x) == SET)
9224 process_set (asm_out_file, x, insn, unwind, frame);
9225 }
9226 break;
9227 }
9228
9229 default:
9230 gcc_unreachable ();
9231 }
9232 }
9233 }
9234
9235 \f
9236 enum ia64_builtins
9237 {
9238 IA64_BUILTIN_BSP,
9239 IA64_BUILTIN_FLUSHRS
9240 };
9241
9242 void
9243 ia64_init_builtins (void)
9244 {
9245 tree fpreg_type;
9246 tree float80_type;
9247
9248 /* The __fpreg type. */
9249 fpreg_type = make_node (REAL_TYPE);
9250 TYPE_PRECISION (fpreg_type) = 82;
9251 layout_type (fpreg_type);
9252 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
9253
9254 /* The __float80 type. */
9255 float80_type = make_node (REAL_TYPE);
9256 TYPE_PRECISION (float80_type) = 80;
9257 layout_type (float80_type);
9258 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
9259
9260 /* The __float128 type. */
9261 if (!TARGET_HPUX)
9262 {
9263 tree float128_type = make_node (REAL_TYPE);
9264 TYPE_PRECISION (float128_type) = 128;
9265 layout_type (float128_type);
9266 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
9267 }
9268 else
9269 /* Under HPUX, this is a synonym for "long double". */
9270 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
9271 "__float128");
9272
9273 #define def_builtin(name, type, code) \
9274 add_builtin_function ((name), (type), (code), BUILT_IN_MD, \
9275 NULL, NULL_TREE)
9276
9277 def_builtin ("__builtin_ia64_bsp",
9278 build_function_type (ptr_type_node, void_list_node),
9279 IA64_BUILTIN_BSP);
9280
9281 def_builtin ("__builtin_ia64_flushrs",
9282 build_function_type (void_type_node, void_list_node),
9283 IA64_BUILTIN_FLUSHRS);
9284
9285 #undef def_builtin
9286
9287 if (TARGET_HPUX)
9288 {
9289 if (built_in_decls [BUILT_IN_FINITE])
9290 set_user_assembler_name (built_in_decls [BUILT_IN_FINITE],
9291 "_Isfinite");
9292 if (built_in_decls [BUILT_IN_FINITEF])
9293 set_user_assembler_name (built_in_decls [BUILT_IN_FINITEF],
9294 "_Isfinitef");
9295 if (built_in_decls [BUILT_IN_FINITEL])
9296 set_user_assembler_name (built_in_decls [BUILT_IN_FINITEL],
9297 "_Isfinitef128");
9298 }
9299 }
9300
9301 rtx
9302 ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9303 enum machine_mode mode ATTRIBUTE_UNUSED,
9304 int ignore ATTRIBUTE_UNUSED)
9305 {
9306 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
9307 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9308
9309 switch (fcode)
9310 {
9311 case IA64_BUILTIN_BSP:
9312 if (! target || ! register_operand (target, DImode))
9313 target = gen_reg_rtx (DImode);
9314 emit_insn (gen_bsp_value (target));
9315 #ifdef POINTERS_EXTEND_UNSIGNED
9316 target = convert_memory_address (ptr_mode, target);
9317 #endif
9318 return target;
9319
9320 case IA64_BUILTIN_FLUSHRS:
9321 emit_insn (gen_flushrs ());
9322 return const0_rtx;
9323
9324 default:
9325 break;
9326 }
9327
9328 return NULL_RTX;
9329 }
9330
9331 /* For the HP-UX IA64 aggregate parameters are passed stored in the
9332 most significant bits of the stack slot. */
9333
9334 enum direction
9335 ia64_hpux_function_arg_padding (enum machine_mode mode, const_tree type)
9336 {
9337 /* Exception to normal case for structures/unions/etc. */
9338
9339 if (type && AGGREGATE_TYPE_P (type)
9340 && int_size_in_bytes (type) < UNITS_PER_WORD)
9341 return upward;
9342
9343 /* Fall back to the default. */
9344 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
9345 }
9346
9347 /* Emit text to declare externally defined variables and functions, because
9348 the Intel assembler does not support undefined externals. */
9349
9350 void
9351 ia64_asm_output_external (FILE *file, tree decl, const char *name)
9352 {
9353 /* We output the name if and only if TREE_SYMBOL_REFERENCED is
9354 set in order to avoid putting out names that are never really
9355 used. */
9356 if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
9357 {
9358 /* maybe_assemble_visibility will return 1 if the assembler
9359 visibility directive is output. */
9360 int need_visibility = ((*targetm.binds_local_p) (decl)
9361 && maybe_assemble_visibility (decl));
9362
9363 /* GNU as does not need anything here, but the HP linker does
9364 need something for external functions. */
9365 if ((TARGET_HPUX_LD || !TARGET_GNU_AS)
9366 && TREE_CODE (decl) == FUNCTION_DECL)
9367 (*targetm.asm_out.globalize_decl_name) (file, decl);
9368 else if (need_visibility && !TARGET_GNU_AS)
9369 (*targetm.asm_out.globalize_label) (file, name);
9370 }
9371 }
9372
9373 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
9374 modes of word_mode and larger. Rename the TFmode libfuncs using the
9375 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
9376 backward compatibility. */
9377
9378 static void
9379 ia64_init_libfuncs (void)
9380 {
9381 set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
9382 set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
9383 set_optab_libfunc (smod_optab, SImode, "__modsi3");
9384 set_optab_libfunc (umod_optab, SImode, "__umodsi3");
9385
9386 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
9387 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
9388 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
9389 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
9390 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
9391
9392 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
9393 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
9394 set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
9395 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
9396 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
9397 set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
9398
9399 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
9400 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
9401 set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad");
9402 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
9403 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
9404
9405 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
9406 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
9407 set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad");
9408 /* HP-UX 11.23 libc does not have a function for unsigned
9409 SImode-to-TFmode conversion. */
9410 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad");
9411 }
9412
9413 /* Rename all the TFmode libfuncs using the HPUX conventions. */
9414
9415 static void
9416 ia64_hpux_init_libfuncs (void)
9417 {
9418 ia64_init_libfuncs ();
9419
9420 /* The HP SI millicode division and mod functions expect DI arguments.
9421 By turning them off completely we avoid using both libgcc and the
9422 non-standard millicode routines and use the HP DI millicode routines
9423 instead. */
9424
9425 set_optab_libfunc (sdiv_optab, SImode, 0);
9426 set_optab_libfunc (udiv_optab, SImode, 0);
9427 set_optab_libfunc (smod_optab, SImode, 0);
9428 set_optab_libfunc (umod_optab, SImode, 0);
9429
9430 set_optab_libfunc (sdiv_optab, DImode, "__milli_divI");
9431 set_optab_libfunc (udiv_optab, DImode, "__milli_divU");
9432 set_optab_libfunc (smod_optab, DImode, "__milli_remI");
9433 set_optab_libfunc (umod_optab, DImode, "__milli_remU");
9434
9435 /* HP-UX libc has TF min/max/abs routines in it. */
9436 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
9437 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
9438 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
9439
9440 /* ia64_expand_compare uses this. */
9441 cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
9442
9443 /* These should never be used. */
9444 set_optab_libfunc (eq_optab, TFmode, 0);
9445 set_optab_libfunc (ne_optab, TFmode, 0);
9446 set_optab_libfunc (gt_optab, TFmode, 0);
9447 set_optab_libfunc (ge_optab, TFmode, 0);
9448 set_optab_libfunc (lt_optab, TFmode, 0);
9449 set_optab_libfunc (le_optab, TFmode, 0);
9450 }
9451
9452 /* Rename the division and modulus functions in VMS. */
9453
9454 static void
9455 ia64_vms_init_libfuncs (void)
9456 {
9457 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
9458 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
9459 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
9460 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
9461 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
9462 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
9463 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
9464 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
9465 }
9466
9467 /* Rename the TFmode libfuncs available from soft-fp in glibc using
9468 the HPUX conventions. */
9469
9470 static void
9471 ia64_sysv4_init_libfuncs (void)
9472 {
9473 ia64_init_libfuncs ();
9474
9475 /* These functions are not part of the HPUX TFmode interface. We
9476 use them instead of _U_Qfcmp, which doesn't work the way we
9477 expect. */
9478 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
9479 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
9480 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
9481 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
9482 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
9483 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
9484
9485 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
9486 glibc doesn't have them. */
9487 }
9488 \f
9489 /* For HPUX, it is illegal to have relocations in shared segments. */
9490
9491 static int
9492 ia64_hpux_reloc_rw_mask (void)
9493 {
9494 return 3;
9495 }
9496
9497 /* For others, relax this so that relocations to local data goes in
9498 read-only segments, but we still cannot allow global relocations
9499 in read-only segments. */
9500
9501 static int
9502 ia64_reloc_rw_mask (void)
9503 {
9504 return flag_pic ? 3 : 2;
9505 }
9506
9507 /* Return the section to use for X. The only special thing we do here
9508 is to honor small data. */
9509
9510 static section *
9511 ia64_select_rtx_section (enum machine_mode mode, rtx x,
9512 unsigned HOST_WIDE_INT align)
9513 {
9514 if (GET_MODE_SIZE (mode) > 0
9515 && GET_MODE_SIZE (mode) <= ia64_section_threshold
9516 && !TARGET_NO_SDATA)
9517 return sdata_section;
9518 else
9519 return default_elf_select_rtx_section (mode, x, align);
9520 }
9521
9522 static unsigned int
9523 ia64_section_type_flags (tree decl, const char *name, int reloc)
9524 {
9525 unsigned int flags = 0;
9526
9527 if (strcmp (name, ".sdata") == 0
9528 || strncmp (name, ".sdata.", 7) == 0
9529 || strncmp (name, ".gnu.linkonce.s.", 16) == 0
9530 || strncmp (name, ".sdata2.", 8) == 0
9531 || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
9532 || strcmp (name, ".sbss") == 0
9533 || strncmp (name, ".sbss.", 6) == 0
9534 || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
9535 flags = SECTION_SMALL;
9536
9537 flags |= default_section_type_flags (decl, name, reloc);
9538 return flags;
9539 }
9540
9541 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
9542 structure type and that the address of that type should be passed
9543 in out0, rather than in r8. */
9544
9545 static bool
9546 ia64_struct_retval_addr_is_first_parm_p (tree fntype)
9547 {
9548 tree ret_type = TREE_TYPE (fntype);
9549
9550 /* The Itanium C++ ABI requires that out0, rather than r8, be used
9551 as the structure return address parameter, if the return value
9552 type has a non-trivial copy constructor or destructor. It is not
9553 clear if this same convention should be used for other
9554 programming languages. Until G++ 3.4, we incorrectly used r8 for
9555 these return values. */
9556 return (abi_version_at_least (2)
9557 && ret_type
9558 && TYPE_MODE (ret_type) == BLKmode
9559 && TREE_ADDRESSABLE (ret_type)
9560 && strcmp (lang_hooks.name, "GNU C++") == 0);
9561 }
9562
9563 /* Output the assembler code for a thunk function. THUNK_DECL is the
9564 declaration for the thunk function itself, FUNCTION is the decl for
9565 the target function. DELTA is an immediate constant offset to be
9566 added to THIS. If VCALL_OFFSET is nonzero, the word at
9567 *(*this + vcall_offset) should be added to THIS. */
9568
9569 static void
9570 ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
9571 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9572 tree function)
9573 {
9574 rtx this_rtx, insn, funexp;
9575 unsigned int this_parmno;
9576 unsigned int this_regno;
9577 rtx delta_rtx;
9578
9579 reload_completed = 1;
9580 epilogue_completed = 1;
9581
9582 /* Set things up as ia64_expand_prologue might. */
9583 last_scratch_gr_reg = 15;
9584
9585 memset (&current_frame_info, 0, sizeof (current_frame_info));
9586 current_frame_info.spill_cfa_off = -16;
9587 current_frame_info.n_input_regs = 1;
9588 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
9589
9590 /* Mark the end of the (empty) prologue. */
9591 emit_note (NOTE_INSN_PROLOGUE_END);
9592
9593 /* Figure out whether "this" will be the first parameter (the
9594 typical case) or the second parameter (as happens when the
9595 virtual function returns certain class objects). */
9596 this_parmno
9597 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
9598 ? 1 : 0);
9599 this_regno = IN_REG (this_parmno);
9600 if (!TARGET_REG_NAMES)
9601 reg_names[this_regno] = ia64_reg_numbers[this_parmno];
9602
9603 this_rtx = gen_rtx_REG (Pmode, this_regno);
9604
9605 /* Apply the constant offset, if required. */
9606 delta_rtx = GEN_INT (delta);
9607 if (TARGET_ILP32)
9608 {
9609 rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
9610 REG_POINTER (tmp) = 1;
9611 if (delta && satisfies_constraint_I (delta_rtx))
9612 {
9613 emit_insn (gen_ptr_extend_plus_imm (this_rtx, tmp, delta_rtx));
9614 delta = 0;
9615 }
9616 else
9617 emit_insn (gen_ptr_extend (this_rtx, tmp));
9618 }
9619 if (delta)
9620 {
9621 if (!satisfies_constraint_I (delta_rtx))
9622 {
9623 rtx tmp = gen_rtx_REG (Pmode, 2);
9624 emit_move_insn (tmp, delta_rtx);
9625 delta_rtx = tmp;
9626 }
9627 emit_insn (gen_adddi3 (this_rtx, this_rtx, delta_rtx));
9628 }
9629
9630 /* Apply the offset from the vtable, if required. */
9631 if (vcall_offset)
9632 {
9633 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
9634 rtx tmp = gen_rtx_REG (Pmode, 2);
9635
9636 if (TARGET_ILP32)
9637 {
9638 rtx t = gen_rtx_REG (ptr_mode, 2);
9639 REG_POINTER (t) = 1;
9640 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this_rtx));
9641 if (satisfies_constraint_I (vcall_offset_rtx))
9642 {
9643 emit_insn (gen_ptr_extend_plus_imm (tmp, t, vcall_offset_rtx));
9644 vcall_offset = 0;
9645 }
9646 else
9647 emit_insn (gen_ptr_extend (tmp, t));
9648 }
9649 else
9650 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
9651
9652 if (vcall_offset)
9653 {
9654 if (!satisfies_constraint_J (vcall_offset_rtx))
9655 {
9656 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
9657 emit_move_insn (tmp2, vcall_offset_rtx);
9658 vcall_offset_rtx = tmp2;
9659 }
9660 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
9661 }
9662
9663 if (TARGET_ILP32)
9664 emit_insn (gen_zero_extendsidi2 (tmp, gen_rtx_MEM (ptr_mode, tmp)));
9665 else
9666 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
9667
9668 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
9669 }
9670
9671 /* Generate a tail call to the target function. */
9672 if (! TREE_USED (function))
9673 {
9674 assemble_external (function);
9675 TREE_USED (function) = 1;
9676 }
9677 funexp = XEXP (DECL_RTL (function), 0);
9678 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
9679 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
9680 insn = get_last_insn ();
9681 SIBLING_CALL_P (insn) = 1;
9682
9683 /* Code generation for calls relies on splitting. */
9684 reload_completed = 1;
9685 epilogue_completed = 1;
9686 try_split (PATTERN (insn), insn, 0);
9687
9688 emit_barrier ();
9689
9690 /* Run just enough of rest_of_compilation to get the insns emitted.
9691 There's not really enough bulk here to make other passes such as
9692 instruction scheduling worth while. Note that use_thunk calls
9693 assemble_start_function and assemble_end_function. */
9694
9695 insn_locators_alloc ();
9696 emit_all_insn_group_barriers (NULL);
9697 insn = get_insns ();
9698 shorten_branches (insn);
9699 final_start_function (insn, file, 1);
9700 final (insn, file, 1);
9701 final_end_function ();
9702 free_after_compilation (cfun);
9703
9704 reload_completed = 0;
9705 epilogue_completed = 0;
9706 }
9707
9708 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9709
9710 static rtx
9711 ia64_struct_value_rtx (tree fntype,
9712 int incoming ATTRIBUTE_UNUSED)
9713 {
9714 if (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype))
9715 return NULL_RTX;
9716 return gen_rtx_REG (Pmode, GR_REG (8));
9717 }
9718
9719 static bool
9720 ia64_scalar_mode_supported_p (enum machine_mode mode)
9721 {
9722 switch (mode)
9723 {
9724 case QImode:
9725 case HImode:
9726 case SImode:
9727 case DImode:
9728 case TImode:
9729 return true;
9730
9731 case SFmode:
9732 case DFmode:
9733 case XFmode:
9734 case RFmode:
9735 return true;
9736
9737 case TFmode:
9738 return TARGET_HPUX;
9739
9740 default:
9741 return false;
9742 }
9743 }
9744
9745 static bool
9746 ia64_vector_mode_supported_p (enum machine_mode mode)
9747 {
9748 switch (mode)
9749 {
9750 case V8QImode:
9751 case V4HImode:
9752 case V2SImode:
9753 return true;
9754
9755 case V2SFmode:
9756 return true;
9757
9758 default:
9759 return false;
9760 }
9761 }
9762
9763 /* Implement the FUNCTION_PROFILER macro. */
9764
9765 void
9766 ia64_output_function_profiler (FILE *file, int labelno)
9767 {
9768 bool indirect_call;
9769
9770 /* If the function needs a static chain and the static chain
9771 register is r15, we use an indirect call so as to bypass
9772 the PLT stub in case the executable is dynamically linked,
9773 because the stub clobbers r15 as per 5.3.6 of the psABI.
9774 We don't need to do that in non canonical PIC mode. */
9775
9776 if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
9777 {
9778 gcc_assert (STATIC_CHAIN_REGNUM == 15);
9779 indirect_call = true;
9780 }
9781 else
9782 indirect_call = false;
9783
9784 if (TARGET_GNU_AS)
9785 fputs ("\t.prologue 4, r40\n", file);
9786 else
9787 fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
9788 fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
9789
9790 if (NO_PROFILE_COUNTERS)
9791 fputs ("\tmov out3 = r0\n", file);
9792 else
9793 {
9794 char buf[20];
9795 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
9796
9797 if (TARGET_AUTO_PIC)
9798 fputs ("\tmovl out3 = @gprel(", file);
9799 else
9800 fputs ("\taddl out3 = @ltoff(", file);
9801 assemble_name (file, buf);
9802 if (TARGET_AUTO_PIC)
9803 fputs (")\n", file);
9804 else
9805 fputs ("), r1\n", file);
9806 }
9807
9808 if (indirect_call)
9809 fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
9810 fputs ("\t;;\n", file);
9811
9812 fputs ("\t.save rp, r42\n", file);
9813 fputs ("\tmov out2 = b0\n", file);
9814 if (indirect_call)
9815 fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
9816 fputs ("\t.body\n", file);
9817 fputs ("\tmov out1 = r1\n", file);
9818 if (indirect_call)
9819 {
9820 fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
9821 fputs ("\tmov b6 = r16\n", file);
9822 fputs ("\tld8 r1 = [r14]\n", file);
9823 fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
9824 }
9825 else
9826 fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
9827 }
9828
9829 static GTY(()) rtx mcount_func_rtx;
9830 static rtx
9831 gen_mcount_func_rtx (void)
9832 {
9833 if (!mcount_func_rtx)
9834 mcount_func_rtx = init_one_libfunc ("_mcount");
9835 return mcount_func_rtx;
9836 }
9837
9838 void
9839 ia64_profile_hook (int labelno)
9840 {
9841 rtx label, ip;
9842
9843 if (NO_PROFILE_COUNTERS)
9844 label = const0_rtx;
9845 else
9846 {
9847 char buf[30];
9848 const char *label_name;
9849 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
9850 label_name = (*targetm.strip_name_encoding) (ggc_strdup (buf));
9851 label = gen_rtx_SYMBOL_REF (Pmode, label_name);
9852 SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
9853 }
9854 ip = gen_reg_rtx (Pmode);
9855 emit_insn (gen_ip_value (ip));
9856 emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
9857 VOIDmode, 3,
9858 gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
9859 ip, Pmode,
9860 label, Pmode);
9861 }
9862
9863 /* Return the mangling of TYPE if it is an extended fundamental type. */
9864
9865 static const char *
9866 ia64_mangle_type (const_tree type)
9867 {
9868 type = TYPE_MAIN_VARIANT (type);
9869
9870 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
9871 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
9872 return NULL;
9873
9874 /* On HP-UX, "long double" is mangled as "e" so __float128 is
9875 mangled as "e". */
9876 if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
9877 return "g";
9878 /* On HP-UX, "e" is not available as a mangling of __float80 so use
9879 an extended mangling. Elsewhere, "e" is available since long
9880 double is 80 bits. */
9881 if (TYPE_MODE (type) == XFmode)
9882 return TARGET_HPUX ? "u9__float80" : "e";
9883 if (TYPE_MODE (type) == RFmode)
9884 return "u7__fpreg";
9885 return NULL;
9886 }
9887
9888 /* Return the diagnostic message string if conversion from FROMTYPE to
9889 TOTYPE is not allowed, NULL otherwise. */
9890 static const char *
9891 ia64_invalid_conversion (const_tree fromtype, const_tree totype)
9892 {
9893 /* Reject nontrivial conversion to or from __fpreg. */
9894 if (TYPE_MODE (fromtype) == RFmode
9895 && TYPE_MODE (totype) != RFmode
9896 && TYPE_MODE (totype) != VOIDmode)
9897 return N_("invalid conversion from %<__fpreg%>");
9898 if (TYPE_MODE (totype) == RFmode
9899 && TYPE_MODE (fromtype) != RFmode)
9900 return N_("invalid conversion to %<__fpreg%>");
9901 return NULL;
9902 }
9903
9904 /* Return the diagnostic message string if the unary operation OP is
9905 not permitted on TYPE, NULL otherwise. */
9906 static const char *
9907 ia64_invalid_unary_op (int op, const_tree type)
9908 {
9909 /* Reject operations on __fpreg other than unary + or &. */
9910 if (TYPE_MODE (type) == RFmode
9911 && op != CONVERT_EXPR
9912 && op != ADDR_EXPR)
9913 return N_("invalid operation on %<__fpreg%>");
9914 return NULL;
9915 }
9916
9917 /* Return the diagnostic message string if the binary operation OP is
9918 not permitted on TYPE1 and TYPE2, NULL otherwise. */
9919 static const char *
9920 ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
9921 {
9922 /* Reject operations on __fpreg. */
9923 if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode)
9924 return N_("invalid operation on %<__fpreg%>");
9925 return NULL;
9926 }
9927
9928 /* Implement overriding of the optimization options. */
9929 void
9930 ia64_optimization_options (int level ATTRIBUTE_UNUSED,
9931 int size ATTRIBUTE_UNUSED)
9932 {
9933 /* Disable the second machine independent scheduling pass and use one for the
9934 IA-64. This needs to be here instead of in OVERRIDE_OPTIONS because this
9935 is done whenever the optimization is changed via #pragma GCC optimize or
9936 attribute((optimize(...))). */
9937 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
9938 flag_schedule_insns_after_reload = 0;
9939
9940 /* Let the scheduler form additional regions. */
9941 set_param_value ("max-sched-extend-regions-iters", 2);
9942
9943 /* Set the default values for cache-related parameters. */
9944 set_param_value ("simultaneous-prefetches", 6);
9945 set_param_value ("l1-cache-line-size", 32);
9946
9947 }
9948
9949 /* HP-UX version_id attribute.
9950 For object foo, if the version_id is set to 1234 put out an alias
9951 of '.alias foo "foo{1234}" We can't use "foo{1234}" in anything
9952 other than an alias statement because it is an illegal symbol name. */
9953
9954 static tree
9955 ia64_handle_version_id_attribute (tree *node ATTRIBUTE_UNUSED,
9956 tree name ATTRIBUTE_UNUSED,
9957 tree args,
9958 int flags ATTRIBUTE_UNUSED,
9959 bool *no_add_attrs)
9960 {
9961 tree arg = TREE_VALUE (args);
9962
9963 if (TREE_CODE (arg) != STRING_CST)
9964 {
9965 error("version attribute is not a string");
9966 *no_add_attrs = true;
9967 return NULL_TREE;
9968 }
9969 return NULL_TREE;
9970 }
9971
9972 /* Target hook for c_mode_for_suffix. */
9973
9974 static enum machine_mode
9975 ia64_c_mode_for_suffix (char suffix)
9976 {
9977 if (suffix == 'q')
9978 return TFmode;
9979 if (suffix == 'w')
9980 return XFmode;
9981
9982 return VOIDmode;
9983 }
9984
9985 #include "gt-ia64.h"