b5fad9fd8e0075eae9e52c6d7c3239199fb4a089
[gcc.git] / gcc / config / ia64 / ia64.c
1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
3 2009, 2010, 2011
4 Free Software Foundation, Inc.
5 Contributed by James E. Wilson <wilson@cygnus.com> and
6 David Mosberger <davidm@hpl.hp.com>.
7
8 This file is part of GCC.
9
10 GCC is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
14
15 GCC is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "regs.h"
31 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "recog.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "except.h"
41 #include "function.h"
42 #include "ggc.h"
43 #include "basic-block.h"
44 #include "libfuncs.h"
45 #include "diagnostic-core.h"
46 #include "sched-int.h"
47 #include "timevar.h"
48 #include "target.h"
49 #include "target-def.h"
50 #include "tm_p.h"
51 #include "hashtab.h"
52 #include "langhooks.h"
53 #include "cfglayout.h"
54 #include "gimple.h"
55 #include "intl.h"
56 #include "df.h"
57 #include "debug.h"
58 #include "params.h"
59 #include "dbgcnt.h"
60 #include "tm-constrs.h"
61 #include "sel-sched.h"
62 #include "reload.h"
63 #include "dwarf2out.h"
64 #include "opts.h"
65
66 /* This is used for communication between ASM_OUTPUT_LABEL and
67 ASM_OUTPUT_LABELREF. */
68 int ia64_asm_output_label = 0;
69
70 /* Register names for ia64_expand_prologue. */
71 static const char * const ia64_reg_numbers[96] =
72 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
73 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
74 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
75 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
76 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
77 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
78 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
79 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
80 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
81 "r104","r105","r106","r107","r108","r109","r110","r111",
82 "r112","r113","r114","r115","r116","r117","r118","r119",
83 "r120","r121","r122","r123","r124","r125","r126","r127"};
84
85 /* ??? These strings could be shared with REGISTER_NAMES. */
86 static const char * const ia64_input_reg_names[8] =
87 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
88
89 /* ??? These strings could be shared with REGISTER_NAMES. */
90 static const char * const ia64_local_reg_names[80] =
91 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
92 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
93 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
94 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
95 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
96 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
97 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
98 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
99 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
100 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
101
102 /* ??? These strings could be shared with REGISTER_NAMES. */
103 static const char * const ia64_output_reg_names[8] =
104 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
105
106 /* Variables which are this size or smaller are put in the sdata/sbss
107 sections. */
108
109 unsigned int ia64_section_threshold;
110
111 /* The following variable is used by the DFA insn scheduler. The value is
112 TRUE if we do insn bundling instead of insn scheduling. */
113 int bundling_p = 0;
114
115 enum ia64_frame_regs
116 {
117 reg_fp,
118 reg_save_b0,
119 reg_save_pr,
120 reg_save_ar_pfs,
121 reg_save_ar_unat,
122 reg_save_ar_lc,
123 reg_save_gp,
124 number_of_ia64_frame_regs
125 };
126
127 /* Structure to be filled in by ia64_compute_frame_size with register
128 save masks and offsets for the current function. */
129
130 struct ia64_frame_info
131 {
132 HOST_WIDE_INT total_size; /* size of the stack frame, not including
133 the caller's scratch area. */
134 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
135 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
136 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
137 HARD_REG_SET mask; /* mask of saved registers. */
138 unsigned int gr_used_mask; /* mask of registers in use as gr spill
139 registers or long-term scratches. */
140 int n_spilled; /* number of spilled registers. */
141 int r[number_of_ia64_frame_regs]; /* Frame related registers. */
142 int n_input_regs; /* number of input registers used. */
143 int n_local_regs; /* number of local registers used. */
144 int n_output_regs; /* number of output registers used. */
145 int n_rotate_regs; /* number of rotating registers used. */
146
147 char need_regstk; /* true if a .regstk directive needed. */
148 char initialized; /* true if the data is finalized. */
149 };
150
151 /* Current frame information calculated by ia64_compute_frame_size. */
152 static struct ia64_frame_info current_frame_info;
153 /* The actual registers that are emitted. */
154 static int emitted_frame_related_regs[number_of_ia64_frame_regs];
155 \f
156 static int ia64_first_cycle_multipass_dfa_lookahead (void);
157 static void ia64_dependencies_evaluation_hook (rtx, rtx);
158 static void ia64_init_dfa_pre_cycle_insn (void);
159 static rtx ia64_dfa_pre_cycle_insn (void);
160 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
161 static bool ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx);
162 static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
163 static void ia64_h_i_d_extended (void);
164 static void * ia64_alloc_sched_context (void);
165 static void ia64_init_sched_context (void *, bool);
166 static void ia64_set_sched_context (void *);
167 static void ia64_clear_sched_context (void *);
168 static void ia64_free_sched_context (void *);
169 static int ia64_mode_to_int (enum machine_mode);
170 static void ia64_set_sched_flags (spec_info_t);
171 static ds_t ia64_get_insn_spec_ds (rtx);
172 static ds_t ia64_get_insn_checked_ds (rtx);
173 static bool ia64_skip_rtx_p (const_rtx);
174 static int ia64_speculate_insn (rtx, ds_t, rtx *);
175 static bool ia64_needs_block_p (int);
176 static rtx ia64_gen_spec_check (rtx, rtx, ds_t);
177 static int ia64_spec_check_p (rtx);
178 static int ia64_spec_check_src_p (rtx);
179 static rtx gen_tls_get_addr (void);
180 static rtx gen_thread_pointer (void);
181 static int find_gr_spill (enum ia64_frame_regs, int);
182 static int next_scratch_gr_reg (void);
183 static void mark_reg_gr_used_mask (rtx, void *);
184 static void ia64_compute_frame_size (HOST_WIDE_INT);
185 static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
186 static void finish_spill_pointers (void);
187 static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
188 static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
189 static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
190 static rtx gen_movdi_x (rtx, rtx, rtx);
191 static rtx gen_fr_spill_x (rtx, rtx, rtx);
192 static rtx gen_fr_restore_x (rtx, rtx, rtx);
193
194 static void ia64_option_override (void);
195 static bool ia64_can_eliminate (const int, const int);
196 static enum machine_mode hfa_element_mode (const_tree, bool);
197 static void ia64_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
198 tree, int *, int);
199 static int ia64_arg_partial_bytes (cumulative_args_t, enum machine_mode,
200 tree, bool);
201 static rtx ia64_function_arg_1 (cumulative_args_t, enum machine_mode,
202 const_tree, bool, bool);
203 static rtx ia64_function_arg (cumulative_args_t, enum machine_mode,
204 const_tree, bool);
205 static rtx ia64_function_incoming_arg (cumulative_args_t,
206 enum machine_mode, const_tree, bool);
207 static void ia64_function_arg_advance (cumulative_args_t, enum machine_mode,
208 const_tree, bool);
209 static unsigned int ia64_function_arg_boundary (enum machine_mode,
210 const_tree);
211 static bool ia64_function_ok_for_sibcall (tree, tree);
212 static bool ia64_return_in_memory (const_tree, const_tree);
213 static rtx ia64_function_value (const_tree, const_tree, bool);
214 static rtx ia64_libcall_value (enum machine_mode, const_rtx);
215 static bool ia64_function_value_regno_p (const unsigned int);
216 static int ia64_register_move_cost (enum machine_mode, reg_class_t,
217 reg_class_t);
218 static int ia64_memory_move_cost (enum machine_mode mode, reg_class_t,
219 bool);
220 static bool ia64_rtx_costs (rtx, int, int, int, int *, bool);
221 static int ia64_unspec_may_trap_p (const_rtx, unsigned);
222 static void fix_range (const char *);
223 static struct machine_function * ia64_init_machine_status (void);
224 static void emit_insn_group_barriers (FILE *);
225 static void emit_all_insn_group_barriers (FILE *);
226 static void final_emit_insn_group_barriers (FILE *);
227 static void emit_predicate_relation_info (void);
228 static void ia64_reorg (void);
229 static bool ia64_in_small_data_p (const_tree);
230 static void process_epilogue (FILE *, rtx, bool, bool);
231
232 static bool ia64_assemble_integer (rtx, unsigned int, int);
233 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
234 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
235 static void ia64_output_function_end_prologue (FILE *);
236
237 static void ia64_print_operand (FILE *, rtx, int);
238 static void ia64_print_operand_address (FILE *, rtx);
239 static bool ia64_print_operand_punct_valid_p (unsigned char code);
240
241 static int ia64_issue_rate (void);
242 static int ia64_adjust_cost_2 (rtx, int, rtx, int, dw_t);
243 static void ia64_sched_init (FILE *, int, int);
244 static void ia64_sched_init_global (FILE *, int, int);
245 static void ia64_sched_finish_global (FILE *, int);
246 static void ia64_sched_finish (FILE *, int);
247 static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
248 static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
249 static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
250 static int ia64_variable_issue (FILE *, int, rtx, int);
251
252 static void ia64_asm_unwind_emit (FILE *, rtx);
253 static void ia64_asm_emit_except_personality (rtx);
254 static void ia64_asm_init_sections (void);
255
256 static enum unwind_info_type ia64_debug_unwind_info (void);
257
258 static struct bundle_state *get_free_bundle_state (void);
259 static void free_bundle_state (struct bundle_state *);
260 static void initiate_bundle_states (void);
261 static void finish_bundle_states (void);
262 static unsigned bundle_state_hash (const void *);
263 static int bundle_state_eq_p (const void *, const void *);
264 static int insert_bundle_state (struct bundle_state *);
265 static void initiate_bundle_state_table (void);
266 static void finish_bundle_state_table (void);
267 static int try_issue_nops (struct bundle_state *, int);
268 static int try_issue_insn (struct bundle_state *, rtx);
269 static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
270 static int get_max_pos (state_t);
271 static int get_template (state_t, int);
272
273 static rtx get_next_important_insn (rtx, rtx);
274 static bool important_for_bundling_p (rtx);
275 static void bundling (FILE *, int, rtx, rtx);
276
277 static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
278 HOST_WIDE_INT, tree);
279 static void ia64_file_start (void);
280 static void ia64_globalize_decl_name (FILE *, tree);
281
282 static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
283 static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
284 static section *ia64_select_rtx_section (enum machine_mode, rtx,
285 unsigned HOST_WIDE_INT);
286 static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
287 ATTRIBUTE_UNUSED;
288 static unsigned int ia64_section_type_flags (tree, const char *, int);
289 static void ia64_init_libfuncs (void)
290 ATTRIBUTE_UNUSED;
291 static void ia64_hpux_init_libfuncs (void)
292 ATTRIBUTE_UNUSED;
293 static void ia64_sysv4_init_libfuncs (void)
294 ATTRIBUTE_UNUSED;
295 static void ia64_vms_init_libfuncs (void)
296 ATTRIBUTE_UNUSED;
297 static void ia64_soft_fp_init_libfuncs (void)
298 ATTRIBUTE_UNUSED;
299 static bool ia64_vms_valid_pointer_mode (enum machine_mode mode)
300 ATTRIBUTE_UNUSED;
301 static tree ia64_vms_common_object_attribute (tree *, tree, tree, int, bool *)
302 ATTRIBUTE_UNUSED;
303
304 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
305 static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *);
306 static void ia64_encode_section_info (tree, rtx, int);
307 static rtx ia64_struct_value_rtx (tree, int);
308 static tree ia64_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
309 static bool ia64_scalar_mode_supported_p (enum machine_mode mode);
310 static bool ia64_vector_mode_supported_p (enum machine_mode mode);
311 static bool ia64_legitimate_constant_p (enum machine_mode, rtx);
312 static bool ia64_legitimate_address_p (enum machine_mode, rtx, bool);
313 static bool ia64_cannot_force_const_mem (enum machine_mode, rtx);
314 static const char *ia64_mangle_type (const_tree);
315 static const char *ia64_invalid_conversion (const_tree, const_tree);
316 static const char *ia64_invalid_unary_op (int, const_tree);
317 static const char *ia64_invalid_binary_op (int, const_tree, const_tree);
318 static enum machine_mode ia64_c_mode_for_suffix (char);
319 static void ia64_trampoline_init (rtx, tree, rtx);
320 static void ia64_override_options_after_change (void);
321
322 static tree ia64_builtin_decl (unsigned, bool);
323
324 static reg_class_t ia64_preferred_reload_class (rtx, reg_class_t);
325 static enum machine_mode ia64_get_reg_raw_mode (int regno);
326 static section * ia64_hpux_function_section (tree, enum node_frequency,
327 bool, bool);
328
329 static bool ia64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
330 const unsigned char *sel);
331
332 #define MAX_VECT_LEN 8
333
334 struct expand_vec_perm_d
335 {
336 rtx target, op0, op1;
337 unsigned char perm[MAX_VECT_LEN];
338 enum machine_mode vmode;
339 unsigned char nelt;
340 bool one_operand_p;
341 bool testing_p;
342 };
343
344 static bool ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d);
345
346 \f
347 /* Table of valid machine attributes. */
348 static const struct attribute_spec ia64_attribute_table[] =
349 {
350 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
351 affects_type_identity } */
352 { "syscall_linkage", 0, 0, false, true, true, NULL, false },
353 { "model", 1, 1, true, false, false, ia64_handle_model_attribute,
354 false },
355 #if TARGET_ABI_OPEN_VMS
356 { "common_object", 1, 1, true, false, false,
357 ia64_vms_common_object_attribute, false },
358 #endif
359 { "version_id", 1, 1, true, false, false,
360 ia64_handle_version_id_attribute, false },
361 { NULL, 0, 0, false, false, false, NULL, false }
362 };
363
364 /* Initialize the GCC target structure. */
365 #undef TARGET_ATTRIBUTE_TABLE
366 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
367
368 #undef TARGET_INIT_BUILTINS
369 #define TARGET_INIT_BUILTINS ia64_init_builtins
370
371 #undef TARGET_EXPAND_BUILTIN
372 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
373
374 #undef TARGET_BUILTIN_DECL
375 #define TARGET_BUILTIN_DECL ia64_builtin_decl
376
377 #undef TARGET_ASM_BYTE_OP
378 #define TARGET_ASM_BYTE_OP "\tdata1\t"
379 #undef TARGET_ASM_ALIGNED_HI_OP
380 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
381 #undef TARGET_ASM_ALIGNED_SI_OP
382 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
383 #undef TARGET_ASM_ALIGNED_DI_OP
384 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
385 #undef TARGET_ASM_UNALIGNED_HI_OP
386 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
387 #undef TARGET_ASM_UNALIGNED_SI_OP
388 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
389 #undef TARGET_ASM_UNALIGNED_DI_OP
390 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
391 #undef TARGET_ASM_INTEGER
392 #define TARGET_ASM_INTEGER ia64_assemble_integer
393
394 #undef TARGET_OPTION_OVERRIDE
395 #define TARGET_OPTION_OVERRIDE ia64_option_override
396
397 #undef TARGET_ASM_FUNCTION_PROLOGUE
398 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
399 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
400 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
401 #undef TARGET_ASM_FUNCTION_EPILOGUE
402 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
403
404 #undef TARGET_PRINT_OPERAND
405 #define TARGET_PRINT_OPERAND ia64_print_operand
406 #undef TARGET_PRINT_OPERAND_ADDRESS
407 #define TARGET_PRINT_OPERAND_ADDRESS ia64_print_operand_address
408 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
409 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ia64_print_operand_punct_valid_p
410
411 #undef TARGET_IN_SMALL_DATA_P
412 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
413
414 #undef TARGET_SCHED_ADJUST_COST_2
415 #define TARGET_SCHED_ADJUST_COST_2 ia64_adjust_cost_2
416 #undef TARGET_SCHED_ISSUE_RATE
417 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
418 #undef TARGET_SCHED_VARIABLE_ISSUE
419 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
420 #undef TARGET_SCHED_INIT
421 #define TARGET_SCHED_INIT ia64_sched_init
422 #undef TARGET_SCHED_FINISH
423 #define TARGET_SCHED_FINISH ia64_sched_finish
424 #undef TARGET_SCHED_INIT_GLOBAL
425 #define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
426 #undef TARGET_SCHED_FINISH_GLOBAL
427 #define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
428 #undef TARGET_SCHED_REORDER
429 #define TARGET_SCHED_REORDER ia64_sched_reorder
430 #undef TARGET_SCHED_REORDER2
431 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
432
433 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
434 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
435
436 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
437 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
438
439 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
440 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
441 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
442 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
443
444 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
445 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
446 ia64_first_cycle_multipass_dfa_lookahead_guard
447
448 #undef TARGET_SCHED_DFA_NEW_CYCLE
449 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
450
451 #undef TARGET_SCHED_H_I_D_EXTENDED
452 #define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
453
454 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
455 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
456
457 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
458 #define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
459
460 #undef TARGET_SCHED_SET_SCHED_CONTEXT
461 #define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
462
463 #undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
464 #define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
465
466 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
467 #define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
468
469 #undef TARGET_SCHED_SET_SCHED_FLAGS
470 #define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
471
472 #undef TARGET_SCHED_GET_INSN_SPEC_DS
473 #define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
474
475 #undef TARGET_SCHED_GET_INSN_CHECKED_DS
476 #define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
477
478 #undef TARGET_SCHED_SPECULATE_INSN
479 #define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
480
481 #undef TARGET_SCHED_NEEDS_BLOCK_P
482 #define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
483
484 #undef TARGET_SCHED_GEN_SPEC_CHECK
485 #define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
486
487 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC
488 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC\
489 ia64_first_cycle_multipass_dfa_lookahead_guard_spec
490
491 #undef TARGET_SCHED_SKIP_RTX_P
492 #define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
493
494 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
495 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
496 #undef TARGET_ARG_PARTIAL_BYTES
497 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
498 #undef TARGET_FUNCTION_ARG
499 #define TARGET_FUNCTION_ARG ia64_function_arg
500 #undef TARGET_FUNCTION_INCOMING_ARG
501 #define TARGET_FUNCTION_INCOMING_ARG ia64_function_incoming_arg
502 #undef TARGET_FUNCTION_ARG_ADVANCE
503 #define TARGET_FUNCTION_ARG_ADVANCE ia64_function_arg_advance
504 #undef TARGET_FUNCTION_ARG_BOUNDARY
505 #define TARGET_FUNCTION_ARG_BOUNDARY ia64_function_arg_boundary
506
507 #undef TARGET_ASM_OUTPUT_MI_THUNK
508 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
509 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
510 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
511
512 #undef TARGET_ASM_FILE_START
513 #define TARGET_ASM_FILE_START ia64_file_start
514
515 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
516 #define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
517
518 #undef TARGET_REGISTER_MOVE_COST
519 #define TARGET_REGISTER_MOVE_COST ia64_register_move_cost
520 #undef TARGET_MEMORY_MOVE_COST
521 #define TARGET_MEMORY_MOVE_COST ia64_memory_move_cost
522 #undef TARGET_RTX_COSTS
523 #define TARGET_RTX_COSTS ia64_rtx_costs
524 #undef TARGET_ADDRESS_COST
525 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0
526
527 #undef TARGET_UNSPEC_MAY_TRAP_P
528 #define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
529
530 #undef TARGET_MACHINE_DEPENDENT_REORG
531 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
532
533 #undef TARGET_ENCODE_SECTION_INFO
534 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
535
536 #undef TARGET_SECTION_TYPE_FLAGS
537 #define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
538
539 #ifdef HAVE_AS_TLS
540 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
541 #define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
542 #endif
543
544 /* ??? Investigate. */
545 #if 0
546 #undef TARGET_PROMOTE_PROTOTYPES
547 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
548 #endif
549
550 #undef TARGET_FUNCTION_VALUE
551 #define TARGET_FUNCTION_VALUE ia64_function_value
552 #undef TARGET_LIBCALL_VALUE
553 #define TARGET_LIBCALL_VALUE ia64_libcall_value
554 #undef TARGET_FUNCTION_VALUE_REGNO_P
555 #define TARGET_FUNCTION_VALUE_REGNO_P ia64_function_value_regno_p
556
557 #undef TARGET_STRUCT_VALUE_RTX
558 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
559 #undef TARGET_RETURN_IN_MEMORY
560 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
561 #undef TARGET_SETUP_INCOMING_VARARGS
562 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
563 #undef TARGET_STRICT_ARGUMENT_NAMING
564 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
565 #undef TARGET_MUST_PASS_IN_STACK
566 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
567 #undef TARGET_GET_RAW_RESULT_MODE
568 #define TARGET_GET_RAW_RESULT_MODE ia64_get_reg_raw_mode
569 #undef TARGET_GET_RAW_ARG_MODE
570 #define TARGET_GET_RAW_ARG_MODE ia64_get_reg_raw_mode
571
572 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
573 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
574
575 #undef TARGET_ASM_UNWIND_EMIT
576 #define TARGET_ASM_UNWIND_EMIT ia64_asm_unwind_emit
577 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
578 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY ia64_asm_emit_except_personality
579 #undef TARGET_ASM_INIT_SECTIONS
580 #define TARGET_ASM_INIT_SECTIONS ia64_asm_init_sections
581
582 #undef TARGET_DEBUG_UNWIND_INFO
583 #define TARGET_DEBUG_UNWIND_INFO ia64_debug_unwind_info
584
585 #undef TARGET_SCALAR_MODE_SUPPORTED_P
586 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
587 #undef TARGET_VECTOR_MODE_SUPPORTED_P
588 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
589
590 /* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
591 in an order different from the specified program order. */
592 #undef TARGET_RELAXED_ORDERING
593 #define TARGET_RELAXED_ORDERING true
594
595 #undef TARGET_LEGITIMATE_CONSTANT_P
596 #define TARGET_LEGITIMATE_CONSTANT_P ia64_legitimate_constant_p
597 #undef TARGET_LEGITIMATE_ADDRESS_P
598 #define TARGET_LEGITIMATE_ADDRESS_P ia64_legitimate_address_p
599
600 #undef TARGET_CANNOT_FORCE_CONST_MEM
601 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
602
603 #undef TARGET_MANGLE_TYPE
604 #define TARGET_MANGLE_TYPE ia64_mangle_type
605
606 #undef TARGET_INVALID_CONVERSION
607 #define TARGET_INVALID_CONVERSION ia64_invalid_conversion
608 #undef TARGET_INVALID_UNARY_OP
609 #define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
610 #undef TARGET_INVALID_BINARY_OP
611 #define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
612
613 #undef TARGET_C_MODE_FOR_SUFFIX
614 #define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
615
616 #undef TARGET_CAN_ELIMINATE
617 #define TARGET_CAN_ELIMINATE ia64_can_eliminate
618
619 #undef TARGET_TRAMPOLINE_INIT
620 #define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
621
622 #undef TARGET_INVALID_WITHIN_DOLOOP
623 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_null
624
625 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
626 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
627
628 #undef TARGET_PREFERRED_RELOAD_CLASS
629 #define TARGET_PREFERRED_RELOAD_CLASS ia64_preferred_reload_class
630
631 #undef TARGET_DELAY_SCHED2
632 #define TARGET_DELAY_SCHED2 true
633
634 /* Variable tracking should be run after all optimizations which
635 change order of insns. It also needs a valid CFG. */
636 #undef TARGET_DELAY_VARTRACK
637 #define TARGET_DELAY_VARTRACK true
638
639 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
640 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK ia64_vectorize_vec_perm_const_ok
641
642 struct gcc_target targetm = TARGET_INITIALIZER;
643 \f
644 typedef enum
645 {
646 ADDR_AREA_NORMAL, /* normal address area */
647 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
648 }
649 ia64_addr_area;
650
651 static GTY(()) tree small_ident1;
652 static GTY(()) tree small_ident2;
653
654 static void
655 init_idents (void)
656 {
657 if (small_ident1 == 0)
658 {
659 small_ident1 = get_identifier ("small");
660 small_ident2 = get_identifier ("__small__");
661 }
662 }
663
664 /* Retrieve the address area that has been chosen for the given decl. */
665
666 static ia64_addr_area
667 ia64_get_addr_area (tree decl)
668 {
669 tree model_attr;
670
671 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
672 if (model_attr)
673 {
674 tree id;
675
676 init_idents ();
677 id = TREE_VALUE (TREE_VALUE (model_attr));
678 if (id == small_ident1 || id == small_ident2)
679 return ADDR_AREA_SMALL;
680 }
681 return ADDR_AREA_NORMAL;
682 }
683
684 static tree
685 ia64_handle_model_attribute (tree *node, tree name, tree args,
686 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
687 {
688 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
689 ia64_addr_area area;
690 tree arg, decl = *node;
691
692 init_idents ();
693 arg = TREE_VALUE (args);
694 if (arg == small_ident1 || arg == small_ident2)
695 {
696 addr_area = ADDR_AREA_SMALL;
697 }
698 else
699 {
700 warning (OPT_Wattributes, "invalid argument of %qE attribute",
701 name);
702 *no_add_attrs = true;
703 }
704
705 switch (TREE_CODE (decl))
706 {
707 case VAR_DECL:
708 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
709 == FUNCTION_DECL)
710 && !TREE_STATIC (decl))
711 {
712 error_at (DECL_SOURCE_LOCATION (decl),
713 "an address area attribute cannot be specified for "
714 "local variables");
715 *no_add_attrs = true;
716 }
717 area = ia64_get_addr_area (decl);
718 if (area != ADDR_AREA_NORMAL && addr_area != area)
719 {
720 error ("address area of %q+D conflicts with previous "
721 "declaration", decl);
722 *no_add_attrs = true;
723 }
724 break;
725
726 case FUNCTION_DECL:
727 error_at (DECL_SOURCE_LOCATION (decl),
728 "address area attribute cannot be specified for "
729 "functions");
730 *no_add_attrs = true;
731 break;
732
733 default:
734 warning (OPT_Wattributes, "%qE attribute ignored",
735 name);
736 *no_add_attrs = true;
737 break;
738 }
739
740 return NULL_TREE;
741 }
742
743 /* The section must have global and overlaid attributes. */
744 #define SECTION_VMS_OVERLAY SECTION_MACH_DEP
745
746 /* Part of the low level implementation of DEC Ada pragma Common_Object which
747 enables the shared use of variables stored in overlaid linker areas
748 corresponding to the use of Fortran COMMON. */
749
750 static tree
751 ia64_vms_common_object_attribute (tree *node, tree name, tree args,
752 int flags ATTRIBUTE_UNUSED,
753 bool *no_add_attrs)
754 {
755 tree decl = *node;
756 tree id, val;
757 if (! DECL_P (decl))
758 abort ();
759
760 DECL_COMMON (decl) = 1;
761 id = TREE_VALUE (args);
762 if (TREE_CODE (id) == IDENTIFIER_NODE)
763 val = build_string (IDENTIFIER_LENGTH (id), IDENTIFIER_POINTER (id));
764 else if (TREE_CODE (id) == STRING_CST)
765 val = id;
766 else
767 {
768 warning (OPT_Wattributes,
769 "%qE attribute requires a string constant argument", name);
770 *no_add_attrs = true;
771 return NULL_TREE;
772 }
773 DECL_SECTION_NAME (decl) = val;
774 return NULL_TREE;
775 }
776
777 /* Part of the low level implementation of DEC Ada pragma Common_Object. */
778
779 void
780 ia64_vms_output_aligned_decl_common (FILE *file, tree decl, const char *name,
781 unsigned HOST_WIDE_INT size,
782 unsigned int align)
783 {
784 tree attr = DECL_ATTRIBUTES (decl);
785
786 /* As common_object attribute set DECL_SECTION_NAME check it before
787 looking up the attribute. */
788 if (DECL_SECTION_NAME (decl) && attr)
789 attr = lookup_attribute ("common_object", attr);
790 else
791 attr = NULL_TREE;
792
793 if (!attr)
794 {
795 /* Code from elfos.h. */
796 fprintf (file, "%s", COMMON_ASM_OP);
797 assemble_name (file, name);
798 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
799 size, align / BITS_PER_UNIT);
800 }
801 else
802 {
803 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
804 ASM_OUTPUT_LABEL (file, name);
805 ASM_OUTPUT_SKIP (file, size ? size : 1);
806 }
807 }
808
809 /* Definition of TARGET_ASM_NAMED_SECTION for VMS. */
810
811 void
812 ia64_vms_elf_asm_named_section (const char *name, unsigned int flags,
813 tree decl)
814 {
815 if (!(flags & SECTION_VMS_OVERLAY))
816 {
817 default_elf_asm_named_section (name, flags, decl);
818 return;
819 }
820 if (flags != (SECTION_VMS_OVERLAY | SECTION_WRITE))
821 abort ();
822
823 if (flags & SECTION_DECLARED)
824 {
825 fprintf (asm_out_file, "\t.section\t%s\n", name);
826 return;
827 }
828
829 fprintf (asm_out_file, "\t.section\t%s,\"awgO\"\n", name);
830 }
831
832 static void
833 ia64_encode_addr_area (tree decl, rtx symbol)
834 {
835 int flags;
836
837 flags = SYMBOL_REF_FLAGS (symbol);
838 switch (ia64_get_addr_area (decl))
839 {
840 case ADDR_AREA_NORMAL: break;
841 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
842 default: gcc_unreachable ();
843 }
844 SYMBOL_REF_FLAGS (symbol) = flags;
845 }
846
847 static void
848 ia64_encode_section_info (tree decl, rtx rtl, int first)
849 {
850 default_encode_section_info (decl, rtl, first);
851
852 /* Careful not to prod global register variables. */
853 if (TREE_CODE (decl) == VAR_DECL
854 && GET_CODE (DECL_RTL (decl)) == MEM
855 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
856 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
857 ia64_encode_addr_area (decl, XEXP (rtl, 0));
858 }
859 \f
860 /* Return 1 if the operands of a move are ok. */
861
862 int
863 ia64_move_ok (rtx dst, rtx src)
864 {
865 /* If we're under init_recog_no_volatile, we'll not be able to use
866 memory_operand. So check the code directly and don't worry about
867 the validity of the underlying address, which should have been
868 checked elsewhere anyway. */
869 if (GET_CODE (dst) != MEM)
870 return 1;
871 if (GET_CODE (src) == MEM)
872 return 0;
873 if (register_operand (src, VOIDmode))
874 return 1;
875
876 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
877 if (INTEGRAL_MODE_P (GET_MODE (dst)))
878 return src == const0_rtx;
879 else
880 return satisfies_constraint_G (src);
881 }
882
883 /* Return 1 if the operands are ok for a floating point load pair. */
884
885 int
886 ia64_load_pair_ok (rtx dst, rtx src)
887 {
888 if (GET_CODE (dst) != REG || !FP_REGNO_P (REGNO (dst)))
889 return 0;
890 if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
891 return 0;
892 switch (GET_CODE (XEXP (src, 0)))
893 {
894 case REG:
895 case POST_INC:
896 break;
897 case POST_DEC:
898 return 0;
899 case POST_MODIFY:
900 {
901 rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
902
903 if (GET_CODE (adjust) != CONST_INT
904 || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
905 return 0;
906 }
907 break;
908 default:
909 abort ();
910 }
911 return 1;
912 }
913
914 int
915 addp4_optimize_ok (rtx op1, rtx op2)
916 {
917 return (basereg_operand (op1, GET_MODE(op1)) !=
918 basereg_operand (op2, GET_MODE(op2)));
919 }
920
921 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
922 Return the length of the field, or <= 0 on failure. */
923
924 int
925 ia64_depz_field_mask (rtx rop, rtx rshift)
926 {
927 unsigned HOST_WIDE_INT op = INTVAL (rop);
928 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
929
930 /* Get rid of the zero bits we're shifting in. */
931 op >>= shift;
932
933 /* We must now have a solid block of 1's at bit 0. */
934 return exact_log2 (op + 1);
935 }
936
937 /* Return the TLS model to use for ADDR. */
938
939 static enum tls_model
940 tls_symbolic_operand_type (rtx addr)
941 {
942 enum tls_model tls_kind = TLS_MODEL_NONE;
943
944 if (GET_CODE (addr) == CONST)
945 {
946 if (GET_CODE (XEXP (addr, 0)) == PLUS
947 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
948 tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
949 }
950 else if (GET_CODE (addr) == SYMBOL_REF)
951 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
952
953 return tls_kind;
954 }
955
956 /* Returns true if REG (assumed to be a `reg' RTX) is valid for use
957 as a base register. */
958
959 static inline bool
960 ia64_reg_ok_for_base_p (const_rtx reg, bool strict)
961 {
962 if (strict
963 && REGNO_OK_FOR_BASE_P (REGNO (reg)))
964 return true;
965 else if (!strict
966 && (GENERAL_REGNO_P (REGNO (reg))
967 || !HARD_REGISTER_P (reg)))
968 return true;
969 else
970 return false;
971 }
972
973 static bool
974 ia64_legitimate_address_reg (const_rtx reg, bool strict)
975 {
976 if ((REG_P (reg) && ia64_reg_ok_for_base_p (reg, strict))
977 || (GET_CODE (reg) == SUBREG && REG_P (XEXP (reg, 0))
978 && ia64_reg_ok_for_base_p (XEXP (reg, 0), strict)))
979 return true;
980
981 return false;
982 }
983
984 static bool
985 ia64_legitimate_address_disp (const_rtx reg, const_rtx disp, bool strict)
986 {
987 if (GET_CODE (disp) == PLUS
988 && rtx_equal_p (reg, XEXP (disp, 0))
989 && (ia64_legitimate_address_reg (XEXP (disp, 1), strict)
990 || (CONST_INT_P (XEXP (disp, 1))
991 && IN_RANGE (INTVAL (XEXP (disp, 1)), -256, 255))))
992 return true;
993
994 return false;
995 }
996
997 /* Implement TARGET_LEGITIMATE_ADDRESS_P. */
998
999 static bool
1000 ia64_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
1001 rtx x, bool strict)
1002 {
1003 if (ia64_legitimate_address_reg (x, strict))
1004 return true;
1005 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == POST_DEC)
1006 && ia64_legitimate_address_reg (XEXP (x, 0), strict)
1007 && XEXP (x, 0) != arg_pointer_rtx)
1008 return true;
1009 else if (GET_CODE (x) == POST_MODIFY
1010 && ia64_legitimate_address_reg (XEXP (x, 0), strict)
1011 && XEXP (x, 0) != arg_pointer_rtx
1012 && ia64_legitimate_address_disp (XEXP (x, 0), XEXP (x, 1), strict))
1013 return true;
1014 else
1015 return false;
1016 }
1017
1018 /* Return true if X is a constant that is valid for some immediate
1019 field in an instruction. */
1020
1021 static bool
1022 ia64_legitimate_constant_p (enum machine_mode mode, rtx x)
1023 {
1024 switch (GET_CODE (x))
1025 {
1026 case CONST_INT:
1027 case LABEL_REF:
1028 return true;
1029
1030 case CONST_DOUBLE:
1031 if (GET_MODE (x) == VOIDmode || mode == SFmode || mode == DFmode)
1032 return true;
1033 return satisfies_constraint_G (x);
1034
1035 case CONST:
1036 case SYMBOL_REF:
1037 /* ??? Short term workaround for PR 28490. We must make the code here
1038 match the code in ia64_expand_move and move_operand, even though they
1039 are both technically wrong. */
1040 if (tls_symbolic_operand_type (x) == 0)
1041 {
1042 HOST_WIDE_INT addend = 0;
1043 rtx op = x;
1044
1045 if (GET_CODE (op) == CONST
1046 && GET_CODE (XEXP (op, 0)) == PLUS
1047 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
1048 {
1049 addend = INTVAL (XEXP (XEXP (op, 0), 1));
1050 op = XEXP (XEXP (op, 0), 0);
1051 }
1052
1053 if (any_offset_symbol_operand (op, mode)
1054 || function_operand (op, mode))
1055 return true;
1056 if (aligned_offset_symbol_operand (op, mode))
1057 return (addend & 0x3fff) == 0;
1058 return false;
1059 }
1060 return false;
1061
1062 case CONST_VECTOR:
1063 if (mode == V2SFmode)
1064 return satisfies_constraint_Y (x);
1065
1066 return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1067 && GET_MODE_SIZE (mode) <= 8);
1068
1069 default:
1070 return false;
1071 }
1072 }
1073
1074 /* Don't allow TLS addresses to get spilled to memory. */
1075
1076 static bool
1077 ia64_cannot_force_const_mem (enum machine_mode mode, rtx x)
1078 {
1079 if (mode == RFmode)
1080 return true;
1081 return tls_symbolic_operand_type (x) != 0;
1082 }
1083
1084 /* Expand a symbolic constant load. */
1085
1086 bool
1087 ia64_expand_load_address (rtx dest, rtx src)
1088 {
1089 gcc_assert (GET_CODE (dest) == REG);
1090
1091 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
1092 having to pointer-extend the value afterward. Other forms of address
1093 computation below are also more natural to compute as 64-bit quantities.
1094 If we've been given an SImode destination register, change it. */
1095 if (GET_MODE (dest) != Pmode)
1096 dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest),
1097 byte_lowpart_offset (Pmode, GET_MODE (dest)));
1098
1099 if (TARGET_NO_PIC)
1100 return false;
1101 if (small_addr_symbolic_operand (src, VOIDmode))
1102 return false;
1103
1104 if (TARGET_AUTO_PIC)
1105 emit_insn (gen_load_gprel64 (dest, src));
1106 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
1107 emit_insn (gen_load_fptr (dest, src));
1108 else if (sdata_symbolic_operand (src, VOIDmode))
1109 emit_insn (gen_load_gprel (dest, src));
1110 else
1111 {
1112 HOST_WIDE_INT addend = 0;
1113 rtx tmp;
1114
1115 /* We did split constant offsets in ia64_expand_move, and we did try
1116 to keep them split in move_operand, but we also allowed reload to
1117 rematerialize arbitrary constants rather than spill the value to
1118 the stack and reload it. So we have to be prepared here to split
1119 them apart again. */
1120 if (GET_CODE (src) == CONST)
1121 {
1122 HOST_WIDE_INT hi, lo;
1123
1124 hi = INTVAL (XEXP (XEXP (src, 0), 1));
1125 lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
1126 hi = hi - lo;
1127
1128 if (lo != 0)
1129 {
1130 addend = lo;
1131 src = plus_constant (XEXP (XEXP (src, 0), 0), hi);
1132 }
1133 }
1134
1135 tmp = gen_rtx_HIGH (Pmode, src);
1136 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
1137 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1138
1139 tmp = gen_rtx_LO_SUM (Pmode, gen_const_mem (Pmode, dest), src);
1140 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1141
1142 if (addend)
1143 {
1144 tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
1145 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1146 }
1147 }
1148
1149 return true;
1150 }
1151
1152 static GTY(()) rtx gen_tls_tga;
1153 static rtx
1154 gen_tls_get_addr (void)
1155 {
1156 if (!gen_tls_tga)
1157 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1158 return gen_tls_tga;
1159 }
1160
1161 static GTY(()) rtx thread_pointer_rtx;
1162 static rtx
1163 gen_thread_pointer (void)
1164 {
1165 if (!thread_pointer_rtx)
1166 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
1167 return thread_pointer_rtx;
1168 }
1169
1170 static rtx
1171 ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
1172 rtx orig_op1, HOST_WIDE_INT addend)
1173 {
1174 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
1175 rtx orig_op0 = op0;
1176 HOST_WIDE_INT addend_lo, addend_hi;
1177
1178 switch (tls_kind)
1179 {
1180 case TLS_MODEL_GLOBAL_DYNAMIC:
1181 start_sequence ();
1182
1183 tga_op1 = gen_reg_rtx (Pmode);
1184 emit_insn (gen_load_dtpmod (tga_op1, op1));
1185
1186 tga_op2 = gen_reg_rtx (Pmode);
1187 emit_insn (gen_load_dtprel (tga_op2, op1));
1188
1189 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1190 LCT_CONST, Pmode, 2, tga_op1,
1191 Pmode, tga_op2, Pmode);
1192
1193 insns = get_insns ();
1194 end_sequence ();
1195
1196 if (GET_MODE (op0) != Pmode)
1197 op0 = tga_ret;
1198 emit_libcall_block (insns, op0, tga_ret, op1);
1199 break;
1200
1201 case TLS_MODEL_LOCAL_DYNAMIC:
1202 /* ??? This isn't the completely proper way to do local-dynamic
1203 If the call to __tls_get_addr is used only by a single symbol,
1204 then we should (somehow) move the dtprel to the second arg
1205 to avoid the extra add. */
1206 start_sequence ();
1207
1208 tga_op1 = gen_reg_rtx (Pmode);
1209 emit_insn (gen_load_dtpmod (tga_op1, op1));
1210
1211 tga_op2 = const0_rtx;
1212
1213 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1214 LCT_CONST, Pmode, 2, tga_op1,
1215 Pmode, tga_op2, Pmode);
1216
1217 insns = get_insns ();
1218 end_sequence ();
1219
1220 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1221 UNSPEC_LD_BASE);
1222 tmp = gen_reg_rtx (Pmode);
1223 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1224
1225 if (!register_operand (op0, Pmode))
1226 op0 = gen_reg_rtx (Pmode);
1227 if (TARGET_TLS64)
1228 {
1229 emit_insn (gen_load_dtprel (op0, op1));
1230 emit_insn (gen_adddi3 (op0, tmp, op0));
1231 }
1232 else
1233 emit_insn (gen_add_dtprel (op0, op1, tmp));
1234 break;
1235
1236 case TLS_MODEL_INITIAL_EXEC:
1237 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1238 addend_hi = addend - addend_lo;
1239
1240 op1 = plus_constant (op1, addend_hi);
1241 addend = addend_lo;
1242
1243 tmp = gen_reg_rtx (Pmode);
1244 emit_insn (gen_load_tprel (tmp, op1));
1245
1246 if (!register_operand (op0, Pmode))
1247 op0 = gen_reg_rtx (Pmode);
1248 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1249 break;
1250
1251 case TLS_MODEL_LOCAL_EXEC:
1252 if (!register_operand (op0, Pmode))
1253 op0 = gen_reg_rtx (Pmode);
1254
1255 op1 = orig_op1;
1256 addend = 0;
1257 if (TARGET_TLS64)
1258 {
1259 emit_insn (gen_load_tprel (op0, op1));
1260 emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
1261 }
1262 else
1263 emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
1264 break;
1265
1266 default:
1267 gcc_unreachable ();
1268 }
1269
1270 if (addend)
1271 op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
1272 orig_op0, 1, OPTAB_DIRECT);
1273 if (orig_op0 == op0)
1274 return NULL_RTX;
1275 if (GET_MODE (orig_op0) == Pmode)
1276 return op0;
1277 return gen_lowpart (GET_MODE (orig_op0), op0);
1278 }
1279
1280 rtx
1281 ia64_expand_move (rtx op0, rtx op1)
1282 {
1283 enum machine_mode mode = GET_MODE (op0);
1284
1285 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1286 op1 = force_reg (mode, op1);
1287
1288 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
1289 {
1290 HOST_WIDE_INT addend = 0;
1291 enum tls_model tls_kind;
1292 rtx sym = op1;
1293
1294 if (GET_CODE (op1) == CONST
1295 && GET_CODE (XEXP (op1, 0)) == PLUS
1296 && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
1297 {
1298 addend = INTVAL (XEXP (XEXP (op1, 0), 1));
1299 sym = XEXP (XEXP (op1, 0), 0);
1300 }
1301
1302 tls_kind = tls_symbolic_operand_type (sym);
1303 if (tls_kind)
1304 return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
1305
1306 if (any_offset_symbol_operand (sym, mode))
1307 addend = 0;
1308 else if (aligned_offset_symbol_operand (sym, mode))
1309 {
1310 HOST_WIDE_INT addend_lo, addend_hi;
1311
1312 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1313 addend_hi = addend - addend_lo;
1314
1315 if (addend_lo != 0)
1316 {
1317 op1 = plus_constant (sym, addend_hi);
1318 addend = addend_lo;
1319 }
1320 else
1321 addend = 0;
1322 }
1323 else
1324 op1 = sym;
1325
1326 if (reload_completed)
1327 {
1328 /* We really should have taken care of this offset earlier. */
1329 gcc_assert (addend == 0);
1330 if (ia64_expand_load_address (op0, op1))
1331 return NULL_RTX;
1332 }
1333
1334 if (addend)
1335 {
1336 rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode);
1337
1338 emit_insn (gen_rtx_SET (VOIDmode, subtarget, op1));
1339
1340 op1 = expand_simple_binop (mode, PLUS, subtarget,
1341 GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1342 if (op0 == op1)
1343 return NULL_RTX;
1344 }
1345 }
1346
1347 return op1;
1348 }
1349
1350 /* Split a move from OP1 to OP0 conditional on COND. */
1351
1352 void
1353 ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1354 {
1355 rtx insn, first = get_last_insn ();
1356
1357 emit_move_insn (op0, op1);
1358
1359 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1360 if (INSN_P (insn))
1361 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1362 PATTERN (insn));
1363 }
1364
1365 /* Split a post-reload TImode or TFmode reference into two DImode
1366 components. This is made extra difficult by the fact that we do
1367 not get any scratch registers to work with, because reload cannot
1368 be prevented from giving us a scratch that overlaps the register
1369 pair involved. So instead, when addressing memory, we tweak the
1370 pointer register up and back down with POST_INCs. Or up and not
1371 back down when we can get away with it.
1372
1373 REVERSED is true when the loads must be done in reversed order
1374 (high word first) for correctness. DEAD is true when the pointer
1375 dies with the second insn we generate and therefore the second
1376 address must not carry a postmodify.
1377
1378 May return an insn which is to be emitted after the moves. */
1379
1380 static rtx
1381 ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
1382 {
1383 rtx fixup = 0;
1384
1385 switch (GET_CODE (in))
1386 {
1387 case REG:
1388 out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1389 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1390 break;
1391
1392 case CONST_INT:
1393 case CONST_DOUBLE:
1394 /* Cannot occur reversed. */
1395 gcc_assert (!reversed);
1396
1397 if (GET_MODE (in) != TFmode)
1398 split_double (in, &out[0], &out[1]);
1399 else
1400 /* split_double does not understand how to split a TFmode
1401 quantity into a pair of DImode constants. */
1402 {
1403 REAL_VALUE_TYPE r;
1404 unsigned HOST_WIDE_INT p[2];
1405 long l[4]; /* TFmode is 128 bits */
1406
1407 REAL_VALUE_FROM_CONST_DOUBLE (r, in);
1408 real_to_target (l, &r, TFmode);
1409
1410 if (FLOAT_WORDS_BIG_ENDIAN)
1411 {
1412 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1413 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1414 }
1415 else
1416 {
1417 p[0] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1418 p[1] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1419 }
1420 out[0] = GEN_INT (p[0]);
1421 out[1] = GEN_INT (p[1]);
1422 }
1423 break;
1424
1425 case MEM:
1426 {
1427 rtx base = XEXP (in, 0);
1428 rtx offset;
1429
1430 switch (GET_CODE (base))
1431 {
1432 case REG:
1433 if (!reversed)
1434 {
1435 out[0] = adjust_automodify_address
1436 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1437 out[1] = adjust_automodify_address
1438 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1439 }
1440 else
1441 {
1442 /* Reversal requires a pre-increment, which can only
1443 be done as a separate insn. */
1444 emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1445 out[0] = adjust_automodify_address
1446 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1447 out[1] = adjust_address (in, DImode, 0);
1448 }
1449 break;
1450
1451 case POST_INC:
1452 gcc_assert (!reversed && !dead);
1453
1454 /* Just do the increment in two steps. */
1455 out[0] = adjust_automodify_address (in, DImode, 0, 0);
1456 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1457 break;
1458
1459 case POST_DEC:
1460 gcc_assert (!reversed && !dead);
1461
1462 /* Add 8, subtract 24. */
1463 base = XEXP (base, 0);
1464 out[0] = adjust_automodify_address
1465 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1466 out[1] = adjust_automodify_address
1467 (in, DImode,
1468 gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
1469 8);
1470 break;
1471
1472 case POST_MODIFY:
1473 gcc_assert (!reversed && !dead);
1474
1475 /* Extract and adjust the modification. This case is
1476 trickier than the others, because we might have an
1477 index register, or we might have a combined offset that
1478 doesn't fit a signed 9-bit displacement field. We can
1479 assume the incoming expression is already legitimate. */
1480 offset = XEXP (base, 1);
1481 base = XEXP (base, 0);
1482
1483 out[0] = adjust_automodify_address
1484 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1485
1486 if (GET_CODE (XEXP (offset, 1)) == REG)
1487 {
1488 /* Can't adjust the postmodify to match. Emit the
1489 original, then a separate addition insn. */
1490 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1491 fixup = gen_adddi3 (base, base, GEN_INT (-8));
1492 }
1493 else
1494 {
1495 gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1496 if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1497 {
1498 /* Again the postmodify cannot be made to match,
1499 but in this case it's more efficient to get rid
1500 of the postmodify entirely and fix up with an
1501 add insn. */
1502 out[1] = adjust_automodify_address (in, DImode, base, 8);
1503 fixup = gen_adddi3
1504 (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1505 }
1506 else
1507 {
1508 /* Combined offset still fits in the displacement field.
1509 (We cannot overflow it at the high end.) */
1510 out[1] = adjust_automodify_address
1511 (in, DImode, gen_rtx_POST_MODIFY
1512 (Pmode, base, gen_rtx_PLUS
1513 (Pmode, base,
1514 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1515 8);
1516 }
1517 }
1518 break;
1519
1520 default:
1521 gcc_unreachable ();
1522 }
1523 break;
1524 }
1525
1526 default:
1527 gcc_unreachable ();
1528 }
1529
1530 return fixup;
1531 }
1532
1533 /* Split a TImode or TFmode move instruction after reload.
1534 This is used by *movtf_internal and *movti_internal. */
1535 void
1536 ia64_split_tmode_move (rtx operands[])
1537 {
1538 rtx in[2], out[2], insn;
1539 rtx fixup[2];
1540 bool dead = false;
1541 bool reversed = false;
1542
1543 /* It is possible for reload to decide to overwrite a pointer with
1544 the value it points to. In that case we have to do the loads in
1545 the appropriate order so that the pointer is not destroyed too
1546 early. Also we must not generate a postmodify for that second
1547 load, or rws_access_regno will die. */
1548 if (GET_CODE (operands[1]) == MEM
1549 && reg_overlap_mentioned_p (operands[0], operands[1]))
1550 {
1551 rtx base = XEXP (operands[1], 0);
1552 while (GET_CODE (base) != REG)
1553 base = XEXP (base, 0);
1554
1555 if (REGNO (base) == REGNO (operands[0]))
1556 reversed = true;
1557 dead = true;
1558 }
1559 /* Another reason to do the moves in reversed order is if the first
1560 element of the target register pair is also the second element of
1561 the source register pair. */
1562 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1563 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1564 reversed = true;
1565
1566 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1567 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1568
1569 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1570 if (GET_CODE (EXP) == MEM \
1571 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1572 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1573 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1574 add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
1575
1576 insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1577 MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1578 MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1579
1580 insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1581 MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1582 MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1583
1584 if (fixup[0])
1585 emit_insn (fixup[0]);
1586 if (fixup[1])
1587 emit_insn (fixup[1]);
1588
1589 #undef MAYBE_ADD_REG_INC_NOTE
1590 }
1591
1592 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1593 through memory plus an extra GR scratch register. Except that you can
1594 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1595 SECONDARY_RELOAD_CLASS, but not both.
1596
1597 We got into problems in the first place by allowing a construct like
1598 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1599 This solution attempts to prevent this situation from occurring. When
1600 we see something like the above, we spill the inner register to memory. */
1601
1602 static rtx
1603 spill_xfmode_rfmode_operand (rtx in, int force, enum machine_mode mode)
1604 {
1605 if (GET_CODE (in) == SUBREG
1606 && GET_MODE (SUBREG_REG (in)) == TImode
1607 && GET_CODE (SUBREG_REG (in)) == REG)
1608 {
1609 rtx memt = assign_stack_temp (TImode, 16, 0);
1610 emit_move_insn (memt, SUBREG_REG (in));
1611 return adjust_address (memt, mode, 0);
1612 }
1613 else if (force && GET_CODE (in) == REG)
1614 {
1615 rtx memx = assign_stack_temp (mode, 16, 0);
1616 emit_move_insn (memx, in);
1617 return memx;
1618 }
1619 else
1620 return in;
1621 }
1622
1623 /* Expand the movxf or movrf pattern (MODE says which) with the given
1624 OPERANDS, returning true if the pattern should then invoke
1625 DONE. */
1626
1627 bool
1628 ia64_expand_movxf_movrf (enum machine_mode mode, rtx operands[])
1629 {
1630 rtx op0 = operands[0];
1631
1632 if (GET_CODE (op0) == SUBREG)
1633 op0 = SUBREG_REG (op0);
1634
1635 /* We must support XFmode loads into general registers for stdarg/vararg,
1636 unprototyped calls, and a rare case where a long double is passed as
1637 an argument after a float HFA fills the FP registers. We split them into
1638 DImode loads for convenience. We also need to support XFmode stores
1639 for the last case. This case does not happen for stdarg/vararg routines,
1640 because we do a block store to memory of unnamed arguments. */
1641
1642 if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
1643 {
1644 rtx out[2];
1645
1646 /* We're hoping to transform everything that deals with XFmode
1647 quantities and GR registers early in the compiler. */
1648 gcc_assert (can_create_pseudo_p ());
1649
1650 /* Struct to register can just use TImode instead. */
1651 if ((GET_CODE (operands[1]) == SUBREG
1652 && GET_MODE (SUBREG_REG (operands[1])) == TImode)
1653 || (GET_CODE (operands[1]) == REG
1654 && GR_REGNO_P (REGNO (operands[1]))))
1655 {
1656 rtx op1 = operands[1];
1657
1658 if (GET_CODE (op1) == SUBREG)
1659 op1 = SUBREG_REG (op1);
1660 else
1661 op1 = gen_rtx_REG (TImode, REGNO (op1));
1662
1663 emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
1664 return true;
1665 }
1666
1667 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1668 {
1669 /* Don't word-swap when reading in the constant. */
1670 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
1671 operand_subword (operands[1], WORDS_BIG_ENDIAN,
1672 0, mode));
1673 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
1674 operand_subword (operands[1], !WORDS_BIG_ENDIAN,
1675 0, mode));
1676 return true;
1677 }
1678
1679 /* If the quantity is in a register not known to be GR, spill it. */
1680 if (register_operand (operands[1], mode))
1681 operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
1682
1683 gcc_assert (GET_CODE (operands[1]) == MEM);
1684
1685 /* Don't word-swap when reading in the value. */
1686 out[0] = gen_rtx_REG (DImode, REGNO (op0));
1687 out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
1688
1689 emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
1690 emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
1691 return true;
1692 }
1693
1694 if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
1695 {
1696 /* We're hoping to transform everything that deals with XFmode
1697 quantities and GR registers early in the compiler. */
1698 gcc_assert (can_create_pseudo_p ());
1699
1700 /* Op0 can't be a GR_REG here, as that case is handled above.
1701 If op0 is a register, then we spill op1, so that we now have a
1702 MEM operand. This requires creating an XFmode subreg of a TImode reg
1703 to force the spill. */
1704 if (register_operand (operands[0], mode))
1705 {
1706 rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
1707 op1 = gen_rtx_SUBREG (mode, op1, 0);
1708 operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
1709 }
1710
1711 else
1712 {
1713 rtx in[2];
1714
1715 gcc_assert (GET_CODE (operands[0]) == MEM);
1716
1717 /* Don't word-swap when writing out the value. */
1718 in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
1719 in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
1720
1721 emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
1722 emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
1723 return true;
1724 }
1725 }
1726
1727 if (!reload_in_progress && !reload_completed)
1728 {
1729 operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
1730
1731 if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
1732 {
1733 rtx memt, memx, in = operands[1];
1734 if (CONSTANT_P (in))
1735 in = validize_mem (force_const_mem (mode, in));
1736 if (GET_CODE (in) == MEM)
1737 memt = adjust_address (in, TImode, 0);
1738 else
1739 {
1740 memt = assign_stack_temp (TImode, 16, 0);
1741 memx = adjust_address (memt, mode, 0);
1742 emit_move_insn (memx, in);
1743 }
1744 emit_move_insn (op0, memt);
1745 return true;
1746 }
1747
1748 if (!ia64_move_ok (operands[0], operands[1]))
1749 operands[1] = force_reg (mode, operands[1]);
1750 }
1751
1752 return false;
1753 }
1754
1755 /* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
1756 with the expression that holds the compare result (in VOIDmode). */
1757
1758 static GTY(()) rtx cmptf_libfunc;
1759
1760 void
1761 ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1)
1762 {
1763 enum rtx_code code = GET_CODE (*expr);
1764 rtx cmp;
1765
1766 /* If we have a BImode input, then we already have a compare result, and
1767 do not need to emit another comparison. */
1768 if (GET_MODE (*op0) == BImode)
1769 {
1770 gcc_assert ((code == NE || code == EQ) && *op1 == const0_rtx);
1771 cmp = *op0;
1772 }
1773 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1774 magic number as its third argument, that indicates what to do.
1775 The return value is an integer to be compared against zero. */
1776 else if (TARGET_HPUX && GET_MODE (*op0) == TFmode)
1777 {
1778 enum qfcmp_magic {
1779 QCMP_INV = 1, /* Raise FP_INVALID on SNaN as a side effect. */
1780 QCMP_UNORD = 2,
1781 QCMP_EQ = 4,
1782 QCMP_LT = 8,
1783 QCMP_GT = 16
1784 };
1785 int magic;
1786 enum rtx_code ncode;
1787 rtx ret, insns;
1788
1789 gcc_assert (cmptf_libfunc && GET_MODE (*op1) == TFmode);
1790 switch (code)
1791 {
1792 /* 1 = equal, 0 = not equal. Equality operators do
1793 not raise FP_INVALID when given an SNaN operand. */
1794 case EQ: magic = QCMP_EQ; ncode = NE; break;
1795 case NE: magic = QCMP_EQ; ncode = EQ; break;
1796 /* isunordered() from C99. */
1797 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
1798 case ORDERED: magic = QCMP_UNORD; ncode = EQ; break;
1799 /* Relational operators raise FP_INVALID when given
1800 an SNaN operand. */
1801 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
1802 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1803 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
1804 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1805 /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1806 Expanders for buneq etc. weuld have to be added to ia64.md
1807 for this to be useful. */
1808 default: gcc_unreachable ();
1809 }
1810
1811 start_sequence ();
1812
1813 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1814 *op0, TFmode, *op1, TFmode,
1815 GEN_INT (magic), DImode);
1816 cmp = gen_reg_rtx (BImode);
1817 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1818 gen_rtx_fmt_ee (ncode, BImode,
1819 ret, const0_rtx)));
1820
1821 insns = get_insns ();
1822 end_sequence ();
1823
1824 emit_libcall_block (insns, cmp, cmp,
1825 gen_rtx_fmt_ee (code, BImode, *op0, *op1));
1826 code = NE;
1827 }
1828 else
1829 {
1830 cmp = gen_reg_rtx (BImode);
1831 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1832 gen_rtx_fmt_ee (code, BImode, *op0, *op1)));
1833 code = NE;
1834 }
1835
1836 *expr = gen_rtx_fmt_ee (code, VOIDmode, cmp, const0_rtx);
1837 *op0 = cmp;
1838 *op1 = const0_rtx;
1839 }
1840
1841 /* Generate an integral vector comparison. Return true if the condition has
1842 been reversed, and so the sense of the comparison should be inverted. */
1843
1844 static bool
1845 ia64_expand_vecint_compare (enum rtx_code code, enum machine_mode mode,
1846 rtx dest, rtx op0, rtx op1)
1847 {
1848 bool negate = false;
1849 rtx x;
1850
1851 /* Canonicalize the comparison to EQ, GT, GTU. */
1852 switch (code)
1853 {
1854 case EQ:
1855 case GT:
1856 case GTU:
1857 break;
1858
1859 case NE:
1860 case LE:
1861 case LEU:
1862 code = reverse_condition (code);
1863 negate = true;
1864 break;
1865
1866 case GE:
1867 case GEU:
1868 code = reverse_condition (code);
1869 negate = true;
1870 /* FALLTHRU */
1871
1872 case LT:
1873 case LTU:
1874 code = swap_condition (code);
1875 x = op0, op0 = op1, op1 = x;
1876 break;
1877
1878 default:
1879 gcc_unreachable ();
1880 }
1881
1882 /* Unsigned parallel compare is not supported by the hardware. Play some
1883 tricks to turn this into a signed comparison against 0. */
1884 if (code == GTU)
1885 {
1886 switch (mode)
1887 {
1888 case V2SImode:
1889 {
1890 rtx t1, t2, mask;
1891
1892 /* Subtract (-(INT MAX) - 1) from both operands to make
1893 them signed. */
1894 mask = GEN_INT (0x80000000);
1895 mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask));
1896 mask = force_reg (mode, mask);
1897 t1 = gen_reg_rtx (mode);
1898 emit_insn (gen_subv2si3 (t1, op0, mask));
1899 t2 = gen_reg_rtx (mode);
1900 emit_insn (gen_subv2si3 (t2, op1, mask));
1901 op0 = t1;
1902 op1 = t2;
1903 code = GT;
1904 }
1905 break;
1906
1907 case V8QImode:
1908 case V4HImode:
1909 /* Perform a parallel unsigned saturating subtraction. */
1910 x = gen_reg_rtx (mode);
1911 emit_insn (gen_rtx_SET (VOIDmode, x,
1912 gen_rtx_US_MINUS (mode, op0, op1)));
1913
1914 code = EQ;
1915 op0 = x;
1916 op1 = CONST0_RTX (mode);
1917 negate = !negate;
1918 break;
1919
1920 default:
1921 gcc_unreachable ();
1922 }
1923 }
1924
1925 x = gen_rtx_fmt_ee (code, mode, op0, op1);
1926 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
1927
1928 return negate;
1929 }
1930
1931 /* Emit an integral vector conditional move. */
1932
1933 void
1934 ia64_expand_vecint_cmov (rtx operands[])
1935 {
1936 enum machine_mode mode = GET_MODE (operands[0]);
1937 enum rtx_code code = GET_CODE (operands[3]);
1938 bool negate;
1939 rtx cmp, x, ot, of;
1940
1941 cmp = gen_reg_rtx (mode);
1942 negate = ia64_expand_vecint_compare (code, mode, cmp,
1943 operands[4], operands[5]);
1944
1945 ot = operands[1+negate];
1946 of = operands[2-negate];
1947
1948 if (ot == CONST0_RTX (mode))
1949 {
1950 if (of == CONST0_RTX (mode))
1951 {
1952 emit_move_insn (operands[0], ot);
1953 return;
1954 }
1955
1956 x = gen_rtx_NOT (mode, cmp);
1957 x = gen_rtx_AND (mode, x, of);
1958 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1959 }
1960 else if (of == CONST0_RTX (mode))
1961 {
1962 x = gen_rtx_AND (mode, cmp, ot);
1963 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1964 }
1965 else
1966 {
1967 rtx t, f;
1968
1969 t = gen_reg_rtx (mode);
1970 x = gen_rtx_AND (mode, cmp, operands[1+negate]);
1971 emit_insn (gen_rtx_SET (VOIDmode, t, x));
1972
1973 f = gen_reg_rtx (mode);
1974 x = gen_rtx_NOT (mode, cmp);
1975 x = gen_rtx_AND (mode, x, operands[2-negate]);
1976 emit_insn (gen_rtx_SET (VOIDmode, f, x));
1977
1978 x = gen_rtx_IOR (mode, t, f);
1979 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1980 }
1981 }
1982
1983 /* Emit an integral vector min or max operation. Return true if all done. */
1984
1985 bool
1986 ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
1987 rtx operands[])
1988 {
1989 rtx xops[6];
1990
1991 /* These four combinations are supported directly. */
1992 if (mode == V8QImode && (code == UMIN || code == UMAX))
1993 return false;
1994 if (mode == V4HImode && (code == SMIN || code == SMAX))
1995 return false;
1996
1997 /* This combination can be implemented with only saturating subtraction. */
1998 if (mode == V4HImode && code == UMAX)
1999 {
2000 rtx x, tmp = gen_reg_rtx (mode);
2001
2002 x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
2003 emit_insn (gen_rtx_SET (VOIDmode, tmp, x));
2004
2005 emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
2006 return true;
2007 }
2008
2009 /* Everything else implemented via vector comparisons. */
2010 xops[0] = operands[0];
2011 xops[4] = xops[1] = operands[1];
2012 xops[5] = xops[2] = operands[2];
2013
2014 switch (code)
2015 {
2016 case UMIN:
2017 code = LTU;
2018 break;
2019 case UMAX:
2020 code = GTU;
2021 break;
2022 case SMIN:
2023 code = LT;
2024 break;
2025 case SMAX:
2026 code = GT;
2027 break;
2028 default:
2029 gcc_unreachable ();
2030 }
2031 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
2032
2033 ia64_expand_vecint_cmov (xops);
2034 return true;
2035 }
2036
2037 /* The vectors LO and HI each contain N halves of a double-wide vector.
2038 Reassemble either the first N/2 or the second N/2 elements. */
2039
2040 void
2041 ia64_unpack_assemble (rtx out, rtx lo, rtx hi, bool highp)
2042 {
2043 enum machine_mode vmode = GET_MODE (lo);
2044 unsigned int i, high, nelt = GET_MODE_NUNITS (vmode);
2045 struct expand_vec_perm_d d;
2046 bool ok;
2047
2048 d.target = gen_lowpart (vmode, out);
2049 d.op0 = (TARGET_BIG_ENDIAN ? hi : lo);
2050 d.op1 = (TARGET_BIG_ENDIAN ? lo : hi);
2051 d.vmode = vmode;
2052 d.nelt = nelt;
2053 d.one_operand_p = false;
2054 d.testing_p = false;
2055
2056 high = (highp ? nelt / 2 : 0);
2057 for (i = 0; i < nelt / 2; ++i)
2058 {
2059 d.perm[i * 2] = i + high;
2060 d.perm[i * 2 + 1] = i + high + nelt;
2061 }
2062
2063 ok = ia64_expand_vec_perm_const_1 (&d);
2064 gcc_assert (ok);
2065 }
2066
2067 /* Return a vector of the sign-extension of VEC. */
2068
2069 static rtx
2070 ia64_unpack_sign (rtx vec, bool unsignedp)
2071 {
2072 enum machine_mode mode = GET_MODE (vec);
2073 rtx zero = CONST0_RTX (mode);
2074
2075 if (unsignedp)
2076 return zero;
2077 else
2078 {
2079 rtx sign = gen_reg_rtx (mode);
2080 bool neg;
2081
2082 neg = ia64_expand_vecint_compare (LT, mode, sign, vec, zero);
2083 gcc_assert (!neg);
2084
2085 return sign;
2086 }
2087 }
2088
2089 /* Emit an integral vector unpack operation. */
2090
2091 void
2092 ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
2093 {
2094 rtx sign = ia64_unpack_sign (operands[1], unsignedp);
2095 ia64_unpack_assemble (operands[0], operands[1], sign, highp);
2096 }
2097
2098 /* Emit an integral vector widening sum operations. */
2099
2100 void
2101 ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
2102 {
2103 enum machine_mode wmode;
2104 rtx l, h, t, sign;
2105
2106 sign = ia64_unpack_sign (operands[1], unsignedp);
2107
2108 wmode = GET_MODE (operands[0]);
2109 l = gen_reg_rtx (wmode);
2110 h = gen_reg_rtx (wmode);
2111
2112 ia64_unpack_assemble (l, operands[1], sign, false);
2113 ia64_unpack_assemble (h, operands[1], sign, true);
2114
2115 t = expand_binop (wmode, add_optab, l, operands[2], NULL, 0, OPTAB_DIRECT);
2116 t = expand_binop (wmode, add_optab, h, t, operands[0], 0, OPTAB_DIRECT);
2117 if (t != operands[0])
2118 emit_move_insn (operands[0], t);
2119 }
2120
2121 /* Emit a signed or unsigned V8QI dot product operation. */
2122
2123 void
2124 ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp)
2125 {
2126 rtx op1, op2, sn1, sn2, l1, l2, h1, h2;
2127 rtx p1, p2, p3, p4, s1, s2, s3;
2128
2129 op1 = operands[1];
2130 op2 = operands[2];
2131 sn1 = ia64_unpack_sign (op1, unsignedp);
2132 sn2 = ia64_unpack_sign (op2, unsignedp);
2133
2134 l1 = gen_reg_rtx (V4HImode);
2135 l2 = gen_reg_rtx (V4HImode);
2136 h1 = gen_reg_rtx (V4HImode);
2137 h2 = gen_reg_rtx (V4HImode);
2138 ia64_unpack_assemble (l1, op1, sn1, false);
2139 ia64_unpack_assemble (l2, op2, sn2, false);
2140 ia64_unpack_assemble (h1, op1, sn1, true);
2141 ia64_unpack_assemble (h2, op2, sn2, true);
2142
2143 p1 = gen_reg_rtx (V2SImode);
2144 p2 = gen_reg_rtx (V2SImode);
2145 p3 = gen_reg_rtx (V2SImode);
2146 p4 = gen_reg_rtx (V2SImode);
2147 emit_insn (gen_pmpy2_even (p1, l1, l2));
2148 emit_insn (gen_pmpy2_even (p2, h1, h2));
2149 emit_insn (gen_pmpy2_odd (p3, l1, l2));
2150 emit_insn (gen_pmpy2_odd (p4, h1, h2));
2151
2152 s1 = gen_reg_rtx (V2SImode);
2153 s2 = gen_reg_rtx (V2SImode);
2154 s3 = gen_reg_rtx (V2SImode);
2155 emit_insn (gen_addv2si3 (s1, p1, p2));
2156 emit_insn (gen_addv2si3 (s2, p3, p4));
2157 emit_insn (gen_addv2si3 (s3, s1, operands[3]));
2158 emit_insn (gen_addv2si3 (operands[0], s2, s3));
2159 }
2160
2161 /* Emit the appropriate sequence for a call. */
2162
2163 void
2164 ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
2165 int sibcall_p)
2166 {
2167 rtx insn, b0;
2168
2169 addr = XEXP (addr, 0);
2170 addr = convert_memory_address (DImode, addr);
2171 b0 = gen_rtx_REG (DImode, R_BR (0));
2172
2173 /* ??? Should do this for functions known to bind local too. */
2174 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
2175 {
2176 if (sibcall_p)
2177 insn = gen_sibcall_nogp (addr);
2178 else if (! retval)
2179 insn = gen_call_nogp (addr, b0);
2180 else
2181 insn = gen_call_value_nogp (retval, addr, b0);
2182 insn = emit_call_insn (insn);
2183 }
2184 else
2185 {
2186 if (sibcall_p)
2187 insn = gen_sibcall_gp (addr);
2188 else if (! retval)
2189 insn = gen_call_gp (addr, b0);
2190 else
2191 insn = gen_call_value_gp (retval, addr, b0);
2192 insn = emit_call_insn (insn);
2193
2194 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
2195 }
2196
2197 if (sibcall_p)
2198 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
2199
2200 if (TARGET_ABI_OPEN_VMS)
2201 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
2202 gen_rtx_REG (DImode, GR_REG (25)));
2203 }
2204
2205 static void
2206 reg_emitted (enum ia64_frame_regs r)
2207 {
2208 if (emitted_frame_related_regs[r] == 0)
2209 emitted_frame_related_regs[r] = current_frame_info.r[r];
2210 else
2211 gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]);
2212 }
2213
2214 static int
2215 get_reg (enum ia64_frame_regs r)
2216 {
2217 reg_emitted (r);
2218 return current_frame_info.r[r];
2219 }
2220
2221 static bool
2222 is_emitted (int regno)
2223 {
2224 unsigned int r;
2225
2226 for (r = reg_fp; r < number_of_ia64_frame_regs; r++)
2227 if (emitted_frame_related_regs[r] == regno)
2228 return true;
2229 return false;
2230 }
2231
2232 void
2233 ia64_reload_gp (void)
2234 {
2235 rtx tmp;
2236
2237 if (current_frame_info.r[reg_save_gp])
2238 {
2239 tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp));
2240 }
2241 else
2242 {
2243 HOST_WIDE_INT offset;
2244 rtx offset_r;
2245
2246 offset = (current_frame_info.spill_cfa_off
2247 + current_frame_info.spill_size);
2248 if (frame_pointer_needed)
2249 {
2250 tmp = hard_frame_pointer_rtx;
2251 offset = -offset;
2252 }
2253 else
2254 {
2255 tmp = stack_pointer_rtx;
2256 offset = current_frame_info.total_size - offset;
2257 }
2258
2259 offset_r = GEN_INT (offset);
2260 if (satisfies_constraint_I (offset_r))
2261 emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r));
2262 else
2263 {
2264 emit_move_insn (pic_offset_table_rtx, offset_r);
2265 emit_insn (gen_adddi3 (pic_offset_table_rtx,
2266 pic_offset_table_rtx, tmp));
2267 }
2268
2269 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
2270 }
2271
2272 emit_move_insn (pic_offset_table_rtx, tmp);
2273 }
2274
2275 void
2276 ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
2277 rtx scratch_b, int noreturn_p, int sibcall_p)
2278 {
2279 rtx insn;
2280 bool is_desc = false;
2281
2282 /* If we find we're calling through a register, then we're actually
2283 calling through a descriptor, so load up the values. */
2284 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
2285 {
2286 rtx tmp;
2287 bool addr_dead_p;
2288
2289 /* ??? We are currently constrained to *not* use peep2, because
2290 we can legitimately change the global lifetime of the GP
2291 (in the form of killing where previously live). This is
2292 because a call through a descriptor doesn't use the previous
2293 value of the GP, while a direct call does, and we do not
2294 commit to either form until the split here.
2295
2296 That said, this means that we lack precise life info for
2297 whether ADDR is dead after this call. This is not terribly
2298 important, since we can fix things up essentially for free
2299 with the POST_DEC below, but it's nice to not use it when we
2300 can immediately tell it's not necessary. */
2301 addr_dead_p = ((noreturn_p || sibcall_p
2302 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
2303 REGNO (addr)))
2304 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
2305
2306 /* Load the code address into scratch_b. */
2307 tmp = gen_rtx_POST_INC (Pmode, addr);
2308 tmp = gen_rtx_MEM (Pmode, tmp);
2309 emit_move_insn (scratch_r, tmp);
2310 emit_move_insn (scratch_b, scratch_r);
2311
2312 /* Load the GP address. If ADDR is not dead here, then we must
2313 revert the change made above via the POST_INCREMENT. */
2314 if (!addr_dead_p)
2315 tmp = gen_rtx_POST_DEC (Pmode, addr);
2316 else
2317 tmp = addr;
2318 tmp = gen_rtx_MEM (Pmode, tmp);
2319 emit_move_insn (pic_offset_table_rtx, tmp);
2320
2321 is_desc = true;
2322 addr = scratch_b;
2323 }
2324
2325 if (sibcall_p)
2326 insn = gen_sibcall_nogp (addr);
2327 else if (retval)
2328 insn = gen_call_value_nogp (retval, addr, retaddr);
2329 else
2330 insn = gen_call_nogp (addr, retaddr);
2331 emit_call_insn (insn);
2332
2333 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
2334 ia64_reload_gp ();
2335 }
2336
2337 /* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
2338
2339 This differs from the generic code in that we know about the zero-extending
2340 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
2341 also know that ld.acq+cmpxchg.rel equals a full barrier.
2342
2343 The loop we want to generate looks like
2344
2345 cmp_reg = mem;
2346 label:
2347 old_reg = cmp_reg;
2348 new_reg = cmp_reg op val;
2349 cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2350 if (cmp_reg != old_reg)
2351 goto label;
2352
2353 Note that we only do the plain load from memory once. Subsequent
2354 iterations use the value loaded by the compare-and-swap pattern. */
2355
2356 void
2357 ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
2358 rtx old_dst, rtx new_dst, enum memmodel model)
2359 {
2360 enum machine_mode mode = GET_MODE (mem);
2361 rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
2362 enum insn_code icode;
2363
2364 /* Special case for using fetchadd. */
2365 if ((mode == SImode || mode == DImode)
2366 && (code == PLUS || code == MINUS)
2367 && fetchadd_operand (val, mode))
2368 {
2369 if (code == MINUS)
2370 val = GEN_INT (-INTVAL (val));
2371
2372 if (!old_dst)
2373 old_dst = gen_reg_rtx (mode);
2374
2375 switch (model)
2376 {
2377 case MEMMODEL_ACQ_REL:
2378 case MEMMODEL_SEQ_CST:
2379 emit_insn (gen_memory_barrier ());
2380 /* FALLTHRU */
2381 case MEMMODEL_RELAXED:
2382 case MEMMODEL_ACQUIRE:
2383 case MEMMODEL_CONSUME:
2384 if (mode == SImode)
2385 icode = CODE_FOR_fetchadd_acq_si;
2386 else
2387 icode = CODE_FOR_fetchadd_acq_di;
2388 break;
2389 case MEMMODEL_RELEASE:
2390 if (mode == SImode)
2391 icode = CODE_FOR_fetchadd_rel_si;
2392 else
2393 icode = CODE_FOR_fetchadd_rel_di;
2394 break;
2395
2396 default:
2397 gcc_unreachable ();
2398 }
2399
2400 emit_insn (GEN_FCN (icode) (old_dst, mem, val));
2401
2402 if (new_dst)
2403 {
2404 new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
2405 true, OPTAB_WIDEN);
2406 if (new_reg != new_dst)
2407 emit_move_insn (new_dst, new_reg);
2408 }
2409 return;
2410 }
2411
2412 /* Because of the volatile mem read, we get an ld.acq, which is the
2413 front half of the full barrier. The end half is the cmpxchg.rel.
2414 For relaxed and release memory models, we don't need this. But we
2415 also don't bother trying to prevent it either. */
2416 gcc_assert (model == MEMMODEL_RELAXED
2417 || model == MEMMODEL_RELEASE
2418 || MEM_VOLATILE_P (mem));
2419
2420 old_reg = gen_reg_rtx (DImode);
2421 cmp_reg = gen_reg_rtx (DImode);
2422 label = gen_label_rtx ();
2423
2424 if (mode != DImode)
2425 {
2426 val = simplify_gen_subreg (DImode, val, mode, 0);
2427 emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
2428 }
2429 else
2430 emit_move_insn (cmp_reg, mem);
2431
2432 emit_label (label);
2433
2434 ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
2435 emit_move_insn (old_reg, cmp_reg);
2436 emit_move_insn (ar_ccv, cmp_reg);
2437
2438 if (old_dst)
2439 emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
2440
2441 new_reg = cmp_reg;
2442 if (code == NOT)
2443 {
2444 new_reg = expand_simple_binop (DImode, AND, new_reg, val, NULL_RTX,
2445 true, OPTAB_DIRECT);
2446 new_reg = expand_simple_unop (DImode, code, new_reg, NULL_RTX, true);
2447 }
2448 else
2449 new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
2450 true, OPTAB_DIRECT);
2451
2452 if (mode != DImode)
2453 new_reg = gen_lowpart (mode, new_reg);
2454 if (new_dst)
2455 emit_move_insn (new_dst, new_reg);
2456
2457 switch (model)
2458 {
2459 case MEMMODEL_RELAXED:
2460 case MEMMODEL_ACQUIRE:
2461 case MEMMODEL_CONSUME:
2462 switch (mode)
2463 {
2464 case QImode: icode = CODE_FOR_cmpxchg_acq_qi; break;
2465 case HImode: icode = CODE_FOR_cmpxchg_acq_hi; break;
2466 case SImode: icode = CODE_FOR_cmpxchg_acq_si; break;
2467 case DImode: icode = CODE_FOR_cmpxchg_acq_di; break;
2468 default:
2469 gcc_unreachable ();
2470 }
2471 break;
2472
2473 case MEMMODEL_RELEASE:
2474 case MEMMODEL_ACQ_REL:
2475 case MEMMODEL_SEQ_CST:
2476 switch (mode)
2477 {
2478 case QImode: icode = CODE_FOR_cmpxchg_rel_qi; break;
2479 case HImode: icode = CODE_FOR_cmpxchg_rel_hi; break;
2480 case SImode: icode = CODE_FOR_cmpxchg_rel_si; break;
2481 case DImode: icode = CODE_FOR_cmpxchg_rel_di; break;
2482 default:
2483 gcc_unreachable ();
2484 }
2485 break;
2486
2487 default:
2488 gcc_unreachable ();
2489 }
2490
2491 emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
2492
2493 emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
2494 }
2495 \f
2496 /* Begin the assembly file. */
2497
2498 static void
2499 ia64_file_start (void)
2500 {
2501 default_file_start ();
2502 emit_safe_across_calls ();
2503 }
2504
2505 void
2506 emit_safe_across_calls (void)
2507 {
2508 unsigned int rs, re;
2509 int out_state;
2510
2511 rs = 1;
2512 out_state = 0;
2513 while (1)
2514 {
2515 while (rs < 64 && call_used_regs[PR_REG (rs)])
2516 rs++;
2517 if (rs >= 64)
2518 break;
2519 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
2520 continue;
2521 if (out_state == 0)
2522 {
2523 fputs ("\t.pred.safe_across_calls ", asm_out_file);
2524 out_state = 1;
2525 }
2526 else
2527 fputc (',', asm_out_file);
2528 if (re == rs + 1)
2529 fprintf (asm_out_file, "p%u", rs);
2530 else
2531 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
2532 rs = re + 1;
2533 }
2534 if (out_state)
2535 fputc ('\n', asm_out_file);
2536 }
2537
2538 /* Globalize a declaration. */
2539
2540 static void
2541 ia64_globalize_decl_name (FILE * stream, tree decl)
2542 {
2543 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
2544 tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl));
2545 if (version_attr)
2546 {
2547 tree v = TREE_VALUE (TREE_VALUE (version_attr));
2548 const char *p = TREE_STRING_POINTER (v);
2549 fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p);
2550 }
2551 targetm.asm_out.globalize_label (stream, name);
2552 if (TREE_CODE (decl) == FUNCTION_DECL)
2553 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
2554 }
2555
2556 /* Helper function for ia64_compute_frame_size: find an appropriate general
2557 register to spill some special register to. SPECIAL_SPILL_MASK contains
2558 bits in GR0 to GR31 that have already been allocated by this routine.
2559 TRY_LOCALS is true if we should attempt to locate a local regnum. */
2560
2561 static int
2562 find_gr_spill (enum ia64_frame_regs r, int try_locals)
2563 {
2564 int regno;
2565
2566 if (emitted_frame_related_regs[r] != 0)
2567 {
2568 regno = emitted_frame_related_regs[r];
2569 if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed)
2570 && current_frame_info.n_local_regs < regno - LOC_REG (0) + 1)
2571 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2572 else if (current_function_is_leaf
2573 && regno >= GR_REG (1) && regno <= GR_REG (31))
2574 current_frame_info.gr_used_mask |= 1 << regno;
2575
2576 return regno;
2577 }
2578
2579 /* If this is a leaf function, first try an otherwise unused
2580 call-clobbered register. */
2581 if (current_function_is_leaf)
2582 {
2583 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2584 if (! df_regs_ever_live_p (regno)
2585 && call_used_regs[regno]
2586 && ! fixed_regs[regno]
2587 && ! global_regs[regno]
2588 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0
2589 && ! is_emitted (regno))
2590 {
2591 current_frame_info.gr_used_mask |= 1 << regno;
2592 return regno;
2593 }
2594 }
2595
2596 if (try_locals)
2597 {
2598 regno = current_frame_info.n_local_regs;
2599 /* If there is a frame pointer, then we can't use loc79, because
2600 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
2601 reg_name switching code in ia64_expand_prologue. */
2602 while (regno < (80 - frame_pointer_needed))
2603 if (! is_emitted (LOC_REG (regno++)))
2604 {
2605 current_frame_info.n_local_regs = regno;
2606 return LOC_REG (regno - 1);
2607 }
2608 }
2609
2610 /* Failed to find a general register to spill to. Must use stack. */
2611 return 0;
2612 }
2613
2614 /* In order to make for nice schedules, we try to allocate every temporary
2615 to a different register. We must of course stay away from call-saved,
2616 fixed, and global registers. We must also stay away from registers
2617 allocated in current_frame_info.gr_used_mask, since those include regs
2618 used all through the prologue.
2619
2620 Any register allocated here must be used immediately. The idea is to
2621 aid scheduling, not to solve data flow problems. */
2622
2623 static int last_scratch_gr_reg;
2624
2625 static int
2626 next_scratch_gr_reg (void)
2627 {
2628 int i, regno;
2629
2630 for (i = 0; i < 32; ++i)
2631 {
2632 regno = (last_scratch_gr_reg + i + 1) & 31;
2633 if (call_used_regs[regno]
2634 && ! fixed_regs[regno]
2635 && ! global_regs[regno]
2636 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2637 {
2638 last_scratch_gr_reg = regno;
2639 return regno;
2640 }
2641 }
2642
2643 /* There must be _something_ available. */
2644 gcc_unreachable ();
2645 }
2646
2647 /* Helper function for ia64_compute_frame_size, called through
2648 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2649
2650 static void
2651 mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
2652 {
2653 unsigned int regno = REGNO (reg);
2654 if (regno < 32)
2655 {
2656 unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)];
2657 for (i = 0; i < n; ++i)
2658 current_frame_info.gr_used_mask |= 1 << (regno + i);
2659 }
2660 }
2661
2662
2663 /* Returns the number of bytes offset between the frame pointer and the stack
2664 pointer for the current function. SIZE is the number of bytes of space
2665 needed for local variables. */
2666
2667 static void
2668 ia64_compute_frame_size (HOST_WIDE_INT size)
2669 {
2670 HOST_WIDE_INT total_size;
2671 HOST_WIDE_INT spill_size = 0;
2672 HOST_WIDE_INT extra_spill_size = 0;
2673 HOST_WIDE_INT pretend_args_size;
2674 HARD_REG_SET mask;
2675 int n_spilled = 0;
2676 int spilled_gr_p = 0;
2677 int spilled_fr_p = 0;
2678 unsigned int regno;
2679 int min_regno;
2680 int max_regno;
2681 int i;
2682
2683 if (current_frame_info.initialized)
2684 return;
2685
2686 memset (&current_frame_info, 0, sizeof current_frame_info);
2687 CLEAR_HARD_REG_SET (mask);
2688
2689 /* Don't allocate scratches to the return register. */
2690 diddle_return_value (mark_reg_gr_used_mask, NULL);
2691
2692 /* Don't allocate scratches to the EH scratch registers. */
2693 if (cfun->machine->ia64_eh_epilogue_sp)
2694 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2695 if (cfun->machine->ia64_eh_epilogue_bsp)
2696 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
2697
2698 /* Find the size of the register stack frame. We have only 80 local
2699 registers, because we reserve 8 for the inputs and 8 for the
2700 outputs. */
2701
2702 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2703 since we'll be adjusting that down later. */
2704 regno = LOC_REG (78) + ! frame_pointer_needed;
2705 for (; regno >= LOC_REG (0); regno--)
2706 if (df_regs_ever_live_p (regno) && !is_emitted (regno))
2707 break;
2708 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2709
2710 /* For functions marked with the syscall_linkage attribute, we must mark
2711 all eight input registers as in use, so that locals aren't visible to
2712 the caller. */
2713
2714 if (cfun->machine->n_varargs > 0
2715 || lookup_attribute ("syscall_linkage",
2716 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2717 current_frame_info.n_input_regs = 8;
2718 else
2719 {
2720 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2721 if (df_regs_ever_live_p (regno))
2722 break;
2723 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2724 }
2725
2726 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2727 if (df_regs_ever_live_p (regno))
2728 break;
2729 i = regno - OUT_REG (0) + 1;
2730
2731 #ifndef PROFILE_HOOK
2732 /* When -p profiling, we need one output register for the mcount argument.
2733 Likewise for -a profiling for the bb_init_func argument. For -ax
2734 profiling, we need two output registers for the two bb_init_trace_func
2735 arguments. */
2736 if (crtl->profile)
2737 i = MAX (i, 1);
2738 #endif
2739 current_frame_info.n_output_regs = i;
2740
2741 /* ??? No rotating register support yet. */
2742 current_frame_info.n_rotate_regs = 0;
2743
2744 /* Discover which registers need spilling, and how much room that
2745 will take. Begin with floating point and general registers,
2746 which will always wind up on the stack. */
2747
2748 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
2749 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2750 {
2751 SET_HARD_REG_BIT (mask, regno);
2752 spill_size += 16;
2753 n_spilled += 1;
2754 spilled_fr_p = 1;
2755 }
2756
2757 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2758 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2759 {
2760 SET_HARD_REG_BIT (mask, regno);
2761 spill_size += 8;
2762 n_spilled += 1;
2763 spilled_gr_p = 1;
2764 }
2765
2766 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
2767 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2768 {
2769 SET_HARD_REG_BIT (mask, regno);
2770 spill_size += 8;
2771 n_spilled += 1;
2772 }
2773
2774 /* Now come all special registers that might get saved in other
2775 general registers. */
2776
2777 if (frame_pointer_needed)
2778 {
2779 current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1);
2780 /* If we did not get a register, then we take LOC79. This is guaranteed
2781 to be free, even if regs_ever_live is already set, because this is
2782 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2783 as we don't count loc79 above. */
2784 if (current_frame_info.r[reg_fp] == 0)
2785 {
2786 current_frame_info.r[reg_fp] = LOC_REG (79);
2787 current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1;
2788 }
2789 }
2790
2791 if (! current_function_is_leaf)
2792 {
2793 /* Emit a save of BR0 if we call other functions. Do this even
2794 if this function doesn't return, as EH depends on this to be
2795 able to unwind the stack. */
2796 SET_HARD_REG_BIT (mask, BR_REG (0));
2797
2798 current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1);
2799 if (current_frame_info.r[reg_save_b0] == 0)
2800 {
2801 extra_spill_size += 8;
2802 n_spilled += 1;
2803 }
2804
2805 /* Similarly for ar.pfs. */
2806 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2807 current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1);
2808 if (current_frame_info.r[reg_save_ar_pfs] == 0)
2809 {
2810 extra_spill_size += 8;
2811 n_spilled += 1;
2812 }
2813
2814 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2815 registers are clobbered, so we fall back to the stack. */
2816 current_frame_info.r[reg_save_gp]
2817 = (cfun->calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1));
2818 if (current_frame_info.r[reg_save_gp] == 0)
2819 {
2820 SET_HARD_REG_BIT (mask, GR_REG (1));
2821 spill_size += 8;
2822 n_spilled += 1;
2823 }
2824 }
2825 else
2826 {
2827 if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs[BR_REG (0)])
2828 {
2829 SET_HARD_REG_BIT (mask, BR_REG (0));
2830 extra_spill_size += 8;
2831 n_spilled += 1;
2832 }
2833
2834 if (df_regs_ever_live_p (AR_PFS_REGNUM))
2835 {
2836 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2837 current_frame_info.r[reg_save_ar_pfs]
2838 = find_gr_spill (reg_save_ar_pfs, 1);
2839 if (current_frame_info.r[reg_save_ar_pfs] == 0)
2840 {
2841 extra_spill_size += 8;
2842 n_spilled += 1;
2843 }
2844 }
2845 }
2846
2847 /* Unwind descriptor hackery: things are most efficient if we allocate
2848 consecutive GR save registers for RP, PFS, FP in that order. However,
2849 it is absolutely critical that FP get the only hard register that's
2850 guaranteed to be free, so we allocated it first. If all three did
2851 happen to be allocated hard regs, and are consecutive, rearrange them
2852 into the preferred order now.
2853
2854 If we have already emitted code for any of those registers,
2855 then it's already too late to change. */
2856 min_regno = MIN (current_frame_info.r[reg_fp],
2857 MIN (current_frame_info.r[reg_save_b0],
2858 current_frame_info.r[reg_save_ar_pfs]));
2859 max_regno = MAX (current_frame_info.r[reg_fp],
2860 MAX (current_frame_info.r[reg_save_b0],
2861 current_frame_info.r[reg_save_ar_pfs]));
2862 if (min_regno > 0
2863 && min_regno + 2 == max_regno
2864 && (current_frame_info.r[reg_fp] == min_regno + 1
2865 || current_frame_info.r[reg_save_b0] == min_regno + 1
2866 || current_frame_info.r[reg_save_ar_pfs] == min_regno + 1)
2867 && (emitted_frame_related_regs[reg_save_b0] == 0
2868 || emitted_frame_related_regs[reg_save_b0] == min_regno)
2869 && (emitted_frame_related_regs[reg_save_ar_pfs] == 0
2870 || emitted_frame_related_regs[reg_save_ar_pfs] == min_regno + 1)
2871 && (emitted_frame_related_regs[reg_fp] == 0
2872 || emitted_frame_related_regs[reg_fp] == min_regno + 2))
2873 {
2874 current_frame_info.r[reg_save_b0] = min_regno;
2875 current_frame_info.r[reg_save_ar_pfs] = min_regno + 1;
2876 current_frame_info.r[reg_fp] = min_regno + 2;
2877 }
2878
2879 /* See if we need to store the predicate register block. */
2880 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2881 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2882 break;
2883 if (regno <= PR_REG (63))
2884 {
2885 SET_HARD_REG_BIT (mask, PR_REG (0));
2886 current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1);
2887 if (current_frame_info.r[reg_save_pr] == 0)
2888 {
2889 extra_spill_size += 8;
2890 n_spilled += 1;
2891 }
2892
2893 /* ??? Mark them all as used so that register renaming and such
2894 are free to use them. */
2895 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2896 df_set_regs_ever_live (regno, true);
2897 }
2898
2899 /* If we're forced to use st8.spill, we're forced to save and restore
2900 ar.unat as well. The check for existing liveness allows inline asm
2901 to touch ar.unat. */
2902 if (spilled_gr_p || cfun->machine->n_varargs
2903 || df_regs_ever_live_p (AR_UNAT_REGNUM))
2904 {
2905 df_set_regs_ever_live (AR_UNAT_REGNUM, true);
2906 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2907 current_frame_info.r[reg_save_ar_unat]
2908 = find_gr_spill (reg_save_ar_unat, spill_size == 0);
2909 if (current_frame_info.r[reg_save_ar_unat] == 0)
2910 {
2911 extra_spill_size += 8;
2912 n_spilled += 1;
2913 }
2914 }
2915
2916 if (df_regs_ever_live_p (AR_LC_REGNUM))
2917 {
2918 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2919 current_frame_info.r[reg_save_ar_lc]
2920 = find_gr_spill (reg_save_ar_lc, spill_size == 0);
2921 if (current_frame_info.r[reg_save_ar_lc] == 0)
2922 {
2923 extra_spill_size += 8;
2924 n_spilled += 1;
2925 }
2926 }
2927
2928 /* If we have an odd number of words of pretend arguments written to
2929 the stack, then the FR save area will be unaligned. We round the
2930 size of this area up to keep things 16 byte aligned. */
2931 if (spilled_fr_p)
2932 pretend_args_size = IA64_STACK_ALIGN (crtl->args.pretend_args_size);
2933 else
2934 pretend_args_size = crtl->args.pretend_args_size;
2935
2936 total_size = (spill_size + extra_spill_size + size + pretend_args_size
2937 + crtl->outgoing_args_size);
2938 total_size = IA64_STACK_ALIGN (total_size);
2939
2940 /* We always use the 16-byte scratch area provided by the caller, but
2941 if we are a leaf function, there's no one to which we need to provide
2942 a scratch area. */
2943 if (current_function_is_leaf)
2944 total_size = MAX (0, total_size - 16);
2945
2946 current_frame_info.total_size = total_size;
2947 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2948 current_frame_info.spill_size = spill_size;
2949 current_frame_info.extra_spill_size = extra_spill_size;
2950 COPY_HARD_REG_SET (current_frame_info.mask, mask);
2951 current_frame_info.n_spilled = n_spilled;
2952 current_frame_info.initialized = reload_completed;
2953 }
2954
2955 /* Worker function for TARGET_CAN_ELIMINATE. */
2956
2957 bool
2958 ia64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
2959 {
2960 return (to == BR_REG (0) ? current_function_is_leaf : true);
2961 }
2962
2963 /* Compute the initial difference between the specified pair of registers. */
2964
2965 HOST_WIDE_INT
2966 ia64_initial_elimination_offset (int from, int to)
2967 {
2968 HOST_WIDE_INT offset;
2969
2970 ia64_compute_frame_size (get_frame_size ());
2971 switch (from)
2972 {
2973 case FRAME_POINTER_REGNUM:
2974 switch (to)
2975 {
2976 case HARD_FRAME_POINTER_REGNUM:
2977 if (current_function_is_leaf)
2978 offset = -current_frame_info.total_size;
2979 else
2980 offset = -(current_frame_info.total_size
2981 - crtl->outgoing_args_size - 16);
2982 break;
2983
2984 case STACK_POINTER_REGNUM:
2985 if (current_function_is_leaf)
2986 offset = 0;
2987 else
2988 offset = 16 + crtl->outgoing_args_size;
2989 break;
2990
2991 default:
2992 gcc_unreachable ();
2993 }
2994 break;
2995
2996 case ARG_POINTER_REGNUM:
2997 /* Arguments start above the 16 byte save area, unless stdarg
2998 in which case we store through the 16 byte save area. */
2999 switch (to)
3000 {
3001 case HARD_FRAME_POINTER_REGNUM:
3002 offset = 16 - crtl->args.pretend_args_size;
3003 break;
3004
3005 case STACK_POINTER_REGNUM:
3006 offset = (current_frame_info.total_size
3007 + 16 - crtl->args.pretend_args_size);
3008 break;
3009
3010 default:
3011 gcc_unreachable ();
3012 }
3013 break;
3014
3015 default:
3016 gcc_unreachable ();
3017 }
3018
3019 return offset;
3020 }
3021
3022 /* If there are more than a trivial number of register spills, we use
3023 two interleaved iterators so that we can get two memory references
3024 per insn group.
3025
3026 In order to simplify things in the prologue and epilogue expanders,
3027 we use helper functions to fix up the memory references after the
3028 fact with the appropriate offsets to a POST_MODIFY memory mode.
3029 The following data structure tracks the state of the two iterators
3030 while insns are being emitted. */
3031
3032 struct spill_fill_data
3033 {
3034 rtx init_after; /* point at which to emit initializations */
3035 rtx init_reg[2]; /* initial base register */
3036 rtx iter_reg[2]; /* the iterator registers */
3037 rtx *prev_addr[2]; /* address of last memory use */
3038 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
3039 HOST_WIDE_INT prev_off[2]; /* last offset */
3040 int n_iter; /* number of iterators in use */
3041 int next_iter; /* next iterator to use */
3042 unsigned int save_gr_used_mask;
3043 };
3044
3045 static struct spill_fill_data spill_fill_data;
3046
3047 static void
3048 setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
3049 {
3050 int i;
3051
3052 spill_fill_data.init_after = get_last_insn ();
3053 spill_fill_data.init_reg[0] = init_reg;
3054 spill_fill_data.init_reg[1] = init_reg;
3055 spill_fill_data.prev_addr[0] = NULL;
3056 spill_fill_data.prev_addr[1] = NULL;
3057 spill_fill_data.prev_insn[0] = NULL;
3058 spill_fill_data.prev_insn[1] = NULL;
3059 spill_fill_data.prev_off[0] = cfa_off;
3060 spill_fill_data.prev_off[1] = cfa_off;
3061 spill_fill_data.next_iter = 0;
3062 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
3063
3064 spill_fill_data.n_iter = 1 + (n_spills > 2);
3065 for (i = 0; i < spill_fill_data.n_iter; ++i)
3066 {
3067 int regno = next_scratch_gr_reg ();
3068 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
3069 current_frame_info.gr_used_mask |= 1 << regno;
3070 }
3071 }
3072
3073 static void
3074 finish_spill_pointers (void)
3075 {
3076 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
3077 }
3078
3079 static rtx
3080 spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
3081 {
3082 int iter = spill_fill_data.next_iter;
3083 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
3084 rtx disp_rtx = GEN_INT (disp);
3085 rtx mem;
3086
3087 if (spill_fill_data.prev_addr[iter])
3088 {
3089 if (satisfies_constraint_N (disp_rtx))
3090 {
3091 *spill_fill_data.prev_addr[iter]
3092 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
3093 gen_rtx_PLUS (DImode,
3094 spill_fill_data.iter_reg[iter],
3095 disp_rtx));
3096 add_reg_note (spill_fill_data.prev_insn[iter],
3097 REG_INC, spill_fill_data.iter_reg[iter]);
3098 }
3099 else
3100 {
3101 /* ??? Could use register post_modify for loads. */
3102 if (!satisfies_constraint_I (disp_rtx))
3103 {
3104 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3105 emit_move_insn (tmp, disp_rtx);
3106 disp_rtx = tmp;
3107 }
3108 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3109 spill_fill_data.iter_reg[iter], disp_rtx));
3110 }
3111 }
3112 /* Micro-optimization: if we've created a frame pointer, it's at
3113 CFA 0, which may allow the real iterator to be initialized lower,
3114 slightly increasing parallelism. Also, if there are few saves
3115 it may eliminate the iterator entirely. */
3116 else if (disp == 0
3117 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
3118 && frame_pointer_needed)
3119 {
3120 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
3121 set_mem_alias_set (mem, get_varargs_alias_set ());
3122 return mem;
3123 }
3124 else
3125 {
3126 rtx seq, insn;
3127
3128 if (disp == 0)
3129 seq = gen_movdi (spill_fill_data.iter_reg[iter],
3130 spill_fill_data.init_reg[iter]);
3131 else
3132 {
3133 start_sequence ();
3134
3135 if (!satisfies_constraint_I (disp_rtx))
3136 {
3137 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3138 emit_move_insn (tmp, disp_rtx);
3139 disp_rtx = tmp;
3140 }
3141
3142 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3143 spill_fill_data.init_reg[iter],
3144 disp_rtx));
3145
3146 seq = get_insns ();
3147 end_sequence ();
3148 }
3149
3150 /* Careful for being the first insn in a sequence. */
3151 if (spill_fill_data.init_after)
3152 insn = emit_insn_after (seq, spill_fill_data.init_after);
3153 else
3154 {
3155 rtx first = get_insns ();
3156 if (first)
3157 insn = emit_insn_before (seq, first);
3158 else
3159 insn = emit_insn (seq);
3160 }
3161 spill_fill_data.init_after = insn;
3162 }
3163
3164 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
3165
3166 /* ??? Not all of the spills are for varargs, but some of them are.
3167 The rest of the spills belong in an alias set of their own. But
3168 it doesn't actually hurt to include them here. */
3169 set_mem_alias_set (mem, get_varargs_alias_set ());
3170
3171 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
3172 spill_fill_data.prev_off[iter] = cfa_off;
3173
3174 if (++iter >= spill_fill_data.n_iter)
3175 iter = 0;
3176 spill_fill_data.next_iter = iter;
3177
3178 return mem;
3179 }
3180
3181 static void
3182 do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
3183 rtx frame_reg)
3184 {
3185 int iter = spill_fill_data.next_iter;
3186 rtx mem, insn;
3187
3188 mem = spill_restore_mem (reg, cfa_off);
3189 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
3190 spill_fill_data.prev_insn[iter] = insn;
3191
3192 if (frame_reg)
3193 {
3194 rtx base;
3195 HOST_WIDE_INT off;
3196
3197 RTX_FRAME_RELATED_P (insn) = 1;
3198
3199 /* Don't even pretend that the unwind code can intuit its way
3200 through a pair of interleaved post_modify iterators. Just
3201 provide the correct answer. */
3202
3203 if (frame_pointer_needed)
3204 {
3205 base = hard_frame_pointer_rtx;
3206 off = - cfa_off;
3207 }
3208 else
3209 {
3210 base = stack_pointer_rtx;
3211 off = current_frame_info.total_size - cfa_off;
3212 }
3213
3214 add_reg_note (insn, REG_CFA_OFFSET,
3215 gen_rtx_SET (VOIDmode,
3216 gen_rtx_MEM (GET_MODE (reg),
3217 plus_constant (base, off)),
3218 frame_reg));
3219 }
3220 }
3221
3222 static void
3223 do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
3224 {
3225 int iter = spill_fill_data.next_iter;
3226 rtx insn;
3227
3228 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
3229 GEN_INT (cfa_off)));
3230 spill_fill_data.prev_insn[iter] = insn;
3231 }
3232
3233 /* Wrapper functions that discards the CONST_INT spill offset. These
3234 exist so that we can give gr_spill/gr_fill the offset they need and
3235 use a consistent function interface. */
3236
3237 static rtx
3238 gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3239 {
3240 return gen_movdi (dest, src);
3241 }
3242
3243 static rtx
3244 gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3245 {
3246 return gen_fr_spill (dest, src);
3247 }
3248
3249 static rtx
3250 gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3251 {
3252 return gen_fr_restore (dest, src);
3253 }
3254
3255 /* Called after register allocation to add any instructions needed for the
3256 prologue. Using a prologue insn is favored compared to putting all of the
3257 instructions in output_function_prologue(), since it allows the scheduler
3258 to intermix instructions with the saves of the caller saved registers. In
3259 some cases, it might be necessary to emit a barrier instruction as the last
3260 insn to prevent such scheduling.
3261
3262 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
3263 so that the debug info generation code can handle them properly.
3264
3265 The register save area is layed out like so:
3266 cfa+16
3267 [ varargs spill area ]
3268 [ fr register spill area ]
3269 [ br register spill area ]
3270 [ ar register spill area ]
3271 [ pr register spill area ]
3272 [ gr register spill area ] */
3273
3274 /* ??? Get inefficient code when the frame size is larger than can fit in an
3275 adds instruction. */
3276
3277 void
3278 ia64_expand_prologue (void)
3279 {
3280 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
3281 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
3282 rtx reg, alt_reg;
3283
3284 ia64_compute_frame_size (get_frame_size ());
3285 last_scratch_gr_reg = 15;
3286
3287 if (flag_stack_usage_info)
3288 current_function_static_stack_size = current_frame_info.total_size;
3289
3290 if (dump_file)
3291 {
3292 fprintf (dump_file, "ia64 frame related registers "
3293 "recorded in current_frame_info.r[]:\n");
3294 #define PRINTREG(a) if (current_frame_info.r[a]) \
3295 fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
3296 PRINTREG(reg_fp);
3297 PRINTREG(reg_save_b0);
3298 PRINTREG(reg_save_pr);
3299 PRINTREG(reg_save_ar_pfs);
3300 PRINTREG(reg_save_ar_unat);
3301 PRINTREG(reg_save_ar_lc);
3302 PRINTREG(reg_save_gp);
3303 #undef PRINTREG
3304 }
3305
3306 /* If there is no epilogue, then we don't need some prologue insns.
3307 We need to avoid emitting the dead prologue insns, because flow
3308 will complain about them. */
3309 if (optimize)
3310 {
3311 edge e;
3312 edge_iterator ei;
3313
3314 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
3315 if ((e->flags & EDGE_FAKE) == 0
3316 && (e->flags & EDGE_FALLTHRU) != 0)
3317 break;
3318 epilogue_p = (e != NULL);
3319 }
3320 else
3321 epilogue_p = 1;
3322
3323 /* Set the local, input, and output register names. We need to do this
3324 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
3325 half. If we use in/loc/out register names, then we get assembler errors
3326 in crtn.S because there is no alloc insn or regstk directive in there. */
3327 if (! TARGET_REG_NAMES)
3328 {
3329 int inputs = current_frame_info.n_input_regs;
3330 int locals = current_frame_info.n_local_regs;
3331 int outputs = current_frame_info.n_output_regs;
3332
3333 for (i = 0; i < inputs; i++)
3334 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
3335 for (i = 0; i < locals; i++)
3336 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
3337 for (i = 0; i < outputs; i++)
3338 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
3339 }
3340
3341 /* Set the frame pointer register name. The regnum is logically loc79,
3342 but of course we'll not have allocated that many locals. Rather than
3343 worrying about renumbering the existing rtxs, we adjust the name. */
3344 /* ??? This code means that we can never use one local register when
3345 there is a frame pointer. loc79 gets wasted in this case, as it is
3346 renamed to a register that will never be used. See also the try_locals
3347 code in find_gr_spill. */
3348 if (current_frame_info.r[reg_fp])
3349 {
3350 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3351 reg_names[HARD_FRAME_POINTER_REGNUM]
3352 = reg_names[current_frame_info.r[reg_fp]];
3353 reg_names[current_frame_info.r[reg_fp]] = tmp;
3354 }
3355
3356 /* We don't need an alloc instruction if we've used no outputs or locals. */
3357 if (current_frame_info.n_local_regs == 0
3358 && current_frame_info.n_output_regs == 0
3359 && current_frame_info.n_input_regs <= crtl->args.info.int_regs
3360 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3361 {
3362 /* If there is no alloc, but there are input registers used, then we
3363 need a .regstk directive. */
3364 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
3365 ar_pfs_save_reg = NULL_RTX;
3366 }
3367 else
3368 {
3369 current_frame_info.need_regstk = 0;
3370
3371 if (current_frame_info.r[reg_save_ar_pfs])
3372 {
3373 regno = current_frame_info.r[reg_save_ar_pfs];
3374 reg_emitted (reg_save_ar_pfs);
3375 }
3376 else
3377 regno = next_scratch_gr_reg ();
3378 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
3379
3380 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
3381 GEN_INT (current_frame_info.n_input_regs),
3382 GEN_INT (current_frame_info.n_local_regs),
3383 GEN_INT (current_frame_info.n_output_regs),
3384 GEN_INT (current_frame_info.n_rotate_regs)));
3385 if (current_frame_info.r[reg_save_ar_pfs])
3386 {
3387 RTX_FRAME_RELATED_P (insn) = 1;
3388 add_reg_note (insn, REG_CFA_REGISTER,
3389 gen_rtx_SET (VOIDmode,
3390 ar_pfs_save_reg,
3391 gen_rtx_REG (DImode, AR_PFS_REGNUM)));
3392 }
3393 }
3394
3395 /* Set up frame pointer, stack pointer, and spill iterators. */
3396
3397 n_varargs = cfun->machine->n_varargs;
3398 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
3399 stack_pointer_rtx, 0);
3400
3401 if (frame_pointer_needed)
3402 {
3403 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3404 RTX_FRAME_RELATED_P (insn) = 1;
3405
3406 /* Force the unwind info to recognize this as defining a new CFA,
3407 rather than some temp register setup. */
3408 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX);
3409 }
3410
3411 if (current_frame_info.total_size != 0)
3412 {
3413 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
3414 rtx offset;
3415
3416 if (satisfies_constraint_I (frame_size_rtx))
3417 offset = frame_size_rtx;
3418 else
3419 {
3420 regno = next_scratch_gr_reg ();
3421 offset = gen_rtx_REG (DImode, regno);
3422 emit_move_insn (offset, frame_size_rtx);
3423 }
3424
3425 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
3426 stack_pointer_rtx, offset));
3427
3428 if (! frame_pointer_needed)
3429 {
3430 RTX_FRAME_RELATED_P (insn) = 1;
3431 add_reg_note (insn, REG_CFA_ADJUST_CFA,
3432 gen_rtx_SET (VOIDmode,
3433 stack_pointer_rtx,
3434 gen_rtx_PLUS (DImode,
3435 stack_pointer_rtx,
3436 frame_size_rtx)));
3437 }
3438
3439 /* ??? At this point we must generate a magic insn that appears to
3440 modify the stack pointer, the frame pointer, and all spill
3441 iterators. This would allow the most scheduling freedom. For
3442 now, just hard stop. */
3443 emit_insn (gen_blockage ());
3444 }
3445
3446 /* Must copy out ar.unat before doing any integer spills. */
3447 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3448 {
3449 if (current_frame_info.r[reg_save_ar_unat])
3450 {
3451 ar_unat_save_reg
3452 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3453 reg_emitted (reg_save_ar_unat);
3454 }
3455 else
3456 {
3457 alt_regno = next_scratch_gr_reg ();
3458 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3459 current_frame_info.gr_used_mask |= 1 << alt_regno;
3460 }
3461
3462 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3463 insn = emit_move_insn (ar_unat_save_reg, reg);
3464 if (current_frame_info.r[reg_save_ar_unat])
3465 {
3466 RTX_FRAME_RELATED_P (insn) = 1;
3467 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3468 }
3469
3470 /* Even if we're not going to generate an epilogue, we still
3471 need to save the register so that EH works. */
3472 if (! epilogue_p && current_frame_info.r[reg_save_ar_unat])
3473 emit_insn (gen_prologue_use (ar_unat_save_reg));
3474 }
3475 else
3476 ar_unat_save_reg = NULL_RTX;
3477
3478 /* Spill all varargs registers. Do this before spilling any GR registers,
3479 since we want the UNAT bits for the GR registers to override the UNAT
3480 bits from varargs, which we don't care about. */
3481
3482 cfa_off = -16;
3483 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
3484 {
3485 reg = gen_rtx_REG (DImode, regno);
3486 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
3487 }
3488
3489 /* Locate the bottom of the register save area. */
3490 cfa_off = (current_frame_info.spill_cfa_off
3491 + current_frame_info.spill_size
3492 + current_frame_info.extra_spill_size);
3493
3494 /* Save the predicate register block either in a register or in memory. */
3495 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3496 {
3497 reg = gen_rtx_REG (DImode, PR_REG (0));
3498 if (current_frame_info.r[reg_save_pr] != 0)
3499 {
3500 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3501 reg_emitted (reg_save_pr);
3502 insn = emit_move_insn (alt_reg, reg);
3503
3504 /* ??? Denote pr spill/fill by a DImode move that modifies all
3505 64 hard registers. */
3506 RTX_FRAME_RELATED_P (insn) = 1;
3507 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3508
3509 /* Even if we're not going to generate an epilogue, we still
3510 need to save the register so that EH works. */
3511 if (! epilogue_p)
3512 emit_insn (gen_prologue_use (alt_reg));
3513 }
3514 else
3515 {
3516 alt_regno = next_scratch_gr_reg ();
3517 alt_reg = gen_rtx_REG (DImode, alt_regno);
3518 insn = emit_move_insn (alt_reg, reg);
3519 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3520 cfa_off -= 8;
3521 }
3522 }
3523
3524 /* Handle AR regs in numerical order. All of them get special handling. */
3525 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
3526 && current_frame_info.r[reg_save_ar_unat] == 0)
3527 {
3528 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3529 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
3530 cfa_off -= 8;
3531 }
3532
3533 /* The alloc insn already copied ar.pfs into a general register. The
3534 only thing we have to do now is copy that register to a stack slot
3535 if we'd not allocated a local register for the job. */
3536 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
3537 && current_frame_info.r[reg_save_ar_pfs] == 0)
3538 {
3539 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3540 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
3541 cfa_off -= 8;
3542 }
3543
3544 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3545 {
3546 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3547 if (current_frame_info.r[reg_save_ar_lc] != 0)
3548 {
3549 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3550 reg_emitted (reg_save_ar_lc);
3551 insn = emit_move_insn (alt_reg, reg);
3552 RTX_FRAME_RELATED_P (insn) = 1;
3553 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3554
3555 /* Even if we're not going to generate an epilogue, we still
3556 need to save the register so that EH works. */
3557 if (! epilogue_p)
3558 emit_insn (gen_prologue_use (alt_reg));
3559 }
3560 else
3561 {
3562 alt_regno = next_scratch_gr_reg ();
3563 alt_reg = gen_rtx_REG (DImode, alt_regno);
3564 emit_move_insn (alt_reg, reg);
3565 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3566 cfa_off -= 8;
3567 }
3568 }
3569
3570 /* Save the return pointer. */
3571 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3572 {
3573 reg = gen_rtx_REG (DImode, BR_REG (0));
3574 if (current_frame_info.r[reg_save_b0] != 0)
3575 {
3576 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3577 reg_emitted (reg_save_b0);
3578 insn = emit_move_insn (alt_reg, reg);
3579 RTX_FRAME_RELATED_P (insn) = 1;
3580 add_reg_note (insn, REG_CFA_REGISTER,
3581 gen_rtx_SET (VOIDmode, alt_reg, pc_rtx));
3582
3583 /* Even if we're not going to generate an epilogue, we still
3584 need to save the register so that EH works. */
3585 if (! epilogue_p)
3586 emit_insn (gen_prologue_use (alt_reg));
3587 }
3588 else
3589 {
3590 alt_regno = next_scratch_gr_reg ();
3591 alt_reg = gen_rtx_REG (DImode, alt_regno);
3592 emit_move_insn (alt_reg, reg);
3593 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3594 cfa_off -= 8;
3595 }
3596 }
3597
3598 if (current_frame_info.r[reg_save_gp])
3599 {
3600 reg_emitted (reg_save_gp);
3601 insn = emit_move_insn (gen_rtx_REG (DImode,
3602 current_frame_info.r[reg_save_gp]),
3603 pic_offset_table_rtx);
3604 }
3605
3606 /* We should now be at the base of the gr/br/fr spill area. */
3607 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3608 + current_frame_info.spill_size));
3609
3610 /* Spill all general registers. */
3611 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3612 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3613 {
3614 reg = gen_rtx_REG (DImode, regno);
3615 do_spill (gen_gr_spill, reg, cfa_off, reg);
3616 cfa_off -= 8;
3617 }
3618
3619 /* Spill the rest of the BR registers. */
3620 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3621 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3622 {
3623 alt_regno = next_scratch_gr_reg ();
3624 alt_reg = gen_rtx_REG (DImode, alt_regno);
3625 reg = gen_rtx_REG (DImode, regno);
3626 emit_move_insn (alt_reg, reg);
3627 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3628 cfa_off -= 8;
3629 }
3630
3631 /* Align the frame and spill all FR registers. */
3632 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3633 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3634 {
3635 gcc_assert (!(cfa_off & 15));
3636 reg = gen_rtx_REG (XFmode, regno);
3637 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
3638 cfa_off -= 16;
3639 }
3640
3641 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3642
3643 finish_spill_pointers ();
3644 }
3645
3646 /* Output the textual info surrounding the prologue. */
3647
3648 void
3649 ia64_start_function (FILE *file, const char *fnname,
3650 tree decl ATTRIBUTE_UNUSED)
3651 {
3652 #if VMS_DEBUGGING_INFO
3653 if (vms_debug_main
3654 && debug_info_level > DINFO_LEVEL_NONE
3655 && strncmp (vms_debug_main, fnname, strlen (vms_debug_main)) == 0)
3656 {
3657 targetm.asm_out.globalize_label (asm_out_file, VMS_DEBUG_MAIN_POINTER);
3658 ASM_OUTPUT_DEF (asm_out_file, VMS_DEBUG_MAIN_POINTER, fnname);
3659 dwarf2out_vms_debug_main_pointer ();
3660 vms_debug_main = 0;
3661 }
3662 #endif
3663
3664 fputs ("\t.proc ", file);
3665 assemble_name (file, fnname);
3666 fputc ('\n', file);
3667 ASM_OUTPUT_LABEL (file, fnname);
3668 }
3669
3670 /* Called after register allocation to add any instructions needed for the
3671 epilogue. Using an epilogue insn is favored compared to putting all of the
3672 instructions in output_function_prologue(), since it allows the scheduler
3673 to intermix instructions with the saves of the caller saved registers. In
3674 some cases, it might be necessary to emit a barrier instruction as the last
3675 insn to prevent such scheduling. */
3676
3677 void
3678 ia64_expand_epilogue (int sibcall_p)
3679 {
3680 rtx insn, reg, alt_reg, ar_unat_save_reg;
3681 int regno, alt_regno, cfa_off;
3682
3683 ia64_compute_frame_size (get_frame_size ());
3684
3685 /* If there is a frame pointer, then we use it instead of the stack
3686 pointer, so that the stack pointer does not need to be valid when
3687 the epilogue starts. See EXIT_IGNORE_STACK. */
3688 if (frame_pointer_needed)
3689 setup_spill_pointers (current_frame_info.n_spilled,
3690 hard_frame_pointer_rtx, 0);
3691 else
3692 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
3693 current_frame_info.total_size);
3694
3695 if (current_frame_info.total_size != 0)
3696 {
3697 /* ??? At this point we must generate a magic insn that appears to
3698 modify the spill iterators and the frame pointer. This would
3699 allow the most scheduling freedom. For now, just hard stop. */
3700 emit_insn (gen_blockage ());
3701 }
3702
3703 /* Locate the bottom of the register save area. */
3704 cfa_off = (current_frame_info.spill_cfa_off
3705 + current_frame_info.spill_size
3706 + current_frame_info.extra_spill_size);
3707
3708 /* Restore the predicate registers. */
3709 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3710 {
3711 if (current_frame_info.r[reg_save_pr] != 0)
3712 {
3713 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3714 reg_emitted (reg_save_pr);
3715 }
3716 else
3717 {
3718 alt_regno = next_scratch_gr_reg ();
3719 alt_reg = gen_rtx_REG (DImode, alt_regno);
3720 do_restore (gen_movdi_x, alt_reg, cfa_off);
3721 cfa_off -= 8;
3722 }
3723 reg = gen_rtx_REG (DImode, PR_REG (0));
3724 emit_move_insn (reg, alt_reg);
3725 }
3726
3727 /* Restore the application registers. */
3728
3729 /* Load the saved unat from the stack, but do not restore it until
3730 after the GRs have been restored. */
3731 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3732 {
3733 if (current_frame_info.r[reg_save_ar_unat] != 0)
3734 {
3735 ar_unat_save_reg
3736 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3737 reg_emitted (reg_save_ar_unat);
3738 }
3739 else
3740 {
3741 alt_regno = next_scratch_gr_reg ();
3742 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3743 current_frame_info.gr_used_mask |= 1 << alt_regno;
3744 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
3745 cfa_off -= 8;
3746 }
3747 }
3748 else
3749 ar_unat_save_reg = NULL_RTX;
3750
3751 if (current_frame_info.r[reg_save_ar_pfs] != 0)
3752 {
3753 reg_emitted (reg_save_ar_pfs);
3754 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_pfs]);
3755 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3756 emit_move_insn (reg, alt_reg);
3757 }
3758 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3759 {
3760 alt_regno = next_scratch_gr_reg ();
3761 alt_reg = gen_rtx_REG (DImode, alt_regno);
3762 do_restore (gen_movdi_x, alt_reg, cfa_off);
3763 cfa_off -= 8;
3764 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3765 emit_move_insn (reg, alt_reg);
3766 }
3767
3768 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3769 {
3770 if (current_frame_info.r[reg_save_ar_lc] != 0)
3771 {
3772 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3773 reg_emitted (reg_save_ar_lc);
3774 }
3775 else
3776 {
3777 alt_regno = next_scratch_gr_reg ();
3778 alt_reg = gen_rtx_REG (DImode, alt_regno);
3779 do_restore (gen_movdi_x, alt_reg, cfa_off);
3780 cfa_off -= 8;
3781 }
3782 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3783 emit_move_insn (reg, alt_reg);
3784 }
3785
3786 /* Restore the return pointer. */
3787 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3788 {
3789 if (current_frame_info.r[reg_save_b0] != 0)
3790 {
3791 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3792 reg_emitted (reg_save_b0);
3793 }
3794 else
3795 {
3796 alt_regno = next_scratch_gr_reg ();
3797 alt_reg = gen_rtx_REG (DImode, alt_regno);
3798 do_restore (gen_movdi_x, alt_reg, cfa_off);
3799 cfa_off -= 8;
3800 }
3801 reg = gen_rtx_REG (DImode, BR_REG (0));
3802 emit_move_insn (reg, alt_reg);
3803 }
3804
3805 /* We should now be at the base of the gr/br/fr spill area. */
3806 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3807 + current_frame_info.spill_size));
3808
3809 /* The GP may be stored on the stack in the prologue, but it's
3810 never restored in the epilogue. Skip the stack slot. */
3811 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
3812 cfa_off -= 8;
3813
3814 /* Restore all general registers. */
3815 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
3816 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3817 {
3818 reg = gen_rtx_REG (DImode, regno);
3819 do_restore (gen_gr_restore, reg, cfa_off);
3820 cfa_off -= 8;
3821 }
3822
3823 /* Restore the branch registers. */
3824 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3825 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3826 {
3827 alt_regno = next_scratch_gr_reg ();
3828 alt_reg = gen_rtx_REG (DImode, alt_regno);
3829 do_restore (gen_movdi_x, alt_reg, cfa_off);
3830 cfa_off -= 8;
3831 reg = gen_rtx_REG (DImode, regno);
3832 emit_move_insn (reg, alt_reg);
3833 }
3834
3835 /* Restore floating point registers. */
3836 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3837 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3838 {
3839 gcc_assert (!(cfa_off & 15));
3840 reg = gen_rtx_REG (XFmode, regno);
3841 do_restore (gen_fr_restore_x, reg, cfa_off);
3842 cfa_off -= 16;
3843 }
3844
3845 /* Restore ar.unat for real. */
3846 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3847 {
3848 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3849 emit_move_insn (reg, ar_unat_save_reg);
3850 }
3851
3852 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3853
3854 finish_spill_pointers ();
3855
3856 if (current_frame_info.total_size
3857 || cfun->machine->ia64_eh_epilogue_sp
3858 || frame_pointer_needed)
3859 {
3860 /* ??? At this point we must generate a magic insn that appears to
3861 modify the spill iterators, the stack pointer, and the frame
3862 pointer. This would allow the most scheduling freedom. For now,
3863 just hard stop. */
3864 emit_insn (gen_blockage ());
3865 }
3866
3867 if (cfun->machine->ia64_eh_epilogue_sp)
3868 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
3869 else if (frame_pointer_needed)
3870 {
3871 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
3872 RTX_FRAME_RELATED_P (insn) = 1;
3873 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
3874 }
3875 else if (current_frame_info.total_size)
3876 {
3877 rtx offset, frame_size_rtx;
3878
3879 frame_size_rtx = GEN_INT (current_frame_info.total_size);
3880 if (satisfies_constraint_I (frame_size_rtx))
3881 offset = frame_size_rtx;
3882 else
3883 {
3884 regno = next_scratch_gr_reg ();
3885 offset = gen_rtx_REG (DImode, regno);
3886 emit_move_insn (offset, frame_size_rtx);
3887 }
3888
3889 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
3890 offset));
3891
3892 RTX_FRAME_RELATED_P (insn) = 1;
3893 add_reg_note (insn, REG_CFA_ADJUST_CFA,
3894 gen_rtx_SET (VOIDmode,
3895 stack_pointer_rtx,
3896 gen_rtx_PLUS (DImode,
3897 stack_pointer_rtx,
3898 frame_size_rtx)));
3899 }
3900
3901 if (cfun->machine->ia64_eh_epilogue_bsp)
3902 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
3903
3904 if (! sibcall_p)
3905 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
3906 else
3907 {
3908 int fp = GR_REG (2);
3909 /* We need a throw away register here, r0 and r1 are reserved,
3910 so r2 is the first available call clobbered register. If
3911 there was a frame_pointer register, we may have swapped the
3912 names of r2 and HARD_FRAME_POINTER_REGNUM, so we have to make
3913 sure we're using the string "r2" when emitting the register
3914 name for the assembler. */
3915 if (current_frame_info.r[reg_fp]
3916 && current_frame_info.r[reg_fp] == GR_REG (2))
3917 fp = HARD_FRAME_POINTER_REGNUM;
3918
3919 /* We must emit an alloc to force the input registers to become output
3920 registers. Otherwise, if the callee tries to pass its parameters
3921 through to another call without an intervening alloc, then these
3922 values get lost. */
3923 /* ??? We don't need to preserve all input registers. We only need to
3924 preserve those input registers used as arguments to the sibling call.
3925 It is unclear how to compute that number here. */
3926 if (current_frame_info.n_input_regs != 0)
3927 {
3928 rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
3929
3930 insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
3931 const0_rtx, const0_rtx,
3932 n_inputs, const0_rtx));
3933 RTX_FRAME_RELATED_P (insn) = 1;
3934
3935 /* ??? We need to mark the alloc as frame-related so that it gets
3936 passed into ia64_asm_unwind_emit for ia64-specific unwinding.
3937 But there's nothing dwarf2 related to be done wrt the register
3938 windows. If we do nothing, dwarf2out will abort on the UNSPEC;
3939 the empty parallel means dwarf2out will not see anything. */
3940 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3941 gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (0)));
3942 }
3943 }
3944 }
3945
3946 /* Return 1 if br.ret can do all the work required to return from a
3947 function. */
3948
3949 int
3950 ia64_direct_return (void)
3951 {
3952 if (reload_completed && ! frame_pointer_needed)
3953 {
3954 ia64_compute_frame_size (get_frame_size ());
3955
3956 return (current_frame_info.total_size == 0
3957 && current_frame_info.n_spilled == 0
3958 && current_frame_info.r[reg_save_b0] == 0
3959 && current_frame_info.r[reg_save_pr] == 0
3960 && current_frame_info.r[reg_save_ar_pfs] == 0
3961 && current_frame_info.r[reg_save_ar_unat] == 0
3962 && current_frame_info.r[reg_save_ar_lc] == 0);
3963 }
3964 return 0;
3965 }
3966
3967 /* Return the magic cookie that we use to hold the return address
3968 during early compilation. */
3969
3970 rtx
3971 ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
3972 {
3973 if (count != 0)
3974 return NULL;
3975 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
3976 }
3977
3978 /* Split this value after reload, now that we know where the return
3979 address is saved. */
3980
3981 void
3982 ia64_split_return_addr_rtx (rtx dest)
3983 {
3984 rtx src;
3985
3986 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3987 {
3988 if (current_frame_info.r[reg_save_b0] != 0)
3989 {
3990 src = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3991 reg_emitted (reg_save_b0);
3992 }
3993 else
3994 {
3995 HOST_WIDE_INT off;
3996 unsigned int regno;
3997 rtx off_r;
3998
3999 /* Compute offset from CFA for BR0. */
4000 /* ??? Must be kept in sync with ia64_expand_prologue. */
4001 off = (current_frame_info.spill_cfa_off
4002 + current_frame_info.spill_size);
4003 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
4004 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4005 off -= 8;
4006
4007 /* Convert CFA offset to a register based offset. */
4008 if (frame_pointer_needed)
4009 src = hard_frame_pointer_rtx;
4010 else
4011 {
4012 src = stack_pointer_rtx;
4013 off += current_frame_info.total_size;
4014 }
4015
4016 /* Load address into scratch register. */
4017 off_r = GEN_INT (off);
4018 if (satisfies_constraint_I (off_r))
4019 emit_insn (gen_adddi3 (dest, src, off_r));
4020 else
4021 {
4022 emit_move_insn (dest, off_r);
4023 emit_insn (gen_adddi3 (dest, src, dest));
4024 }
4025
4026 src = gen_rtx_MEM (Pmode, dest);
4027 }
4028 }
4029 else
4030 src = gen_rtx_REG (DImode, BR_REG (0));
4031
4032 emit_move_insn (dest, src);
4033 }
4034
4035 int
4036 ia64_hard_regno_rename_ok (int from, int to)
4037 {
4038 /* Don't clobber any of the registers we reserved for the prologue. */
4039 unsigned int r;
4040
4041 for (r = reg_fp; r <= reg_save_ar_lc; r++)
4042 if (to == current_frame_info.r[r]
4043 || from == current_frame_info.r[r]
4044 || to == emitted_frame_related_regs[r]
4045 || from == emitted_frame_related_regs[r])
4046 return 0;
4047
4048 /* Don't use output registers outside the register frame. */
4049 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
4050 return 0;
4051
4052 /* Retain even/oddness on predicate register pairs. */
4053 if (PR_REGNO_P (from) && PR_REGNO_P (to))
4054 return (from & 1) == (to & 1);
4055
4056 return 1;
4057 }
4058
4059 /* Target hook for assembling integer objects. Handle word-sized
4060 aligned objects and detect the cases when @fptr is needed. */
4061
4062 static bool
4063 ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
4064 {
4065 if (size == POINTER_SIZE / BITS_PER_UNIT
4066 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
4067 && GET_CODE (x) == SYMBOL_REF
4068 && SYMBOL_REF_FUNCTION_P (x))
4069 {
4070 static const char * const directive[2][2] = {
4071 /* 64-bit pointer */ /* 32-bit pointer */
4072 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */
4073 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */
4074 };
4075 fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
4076 output_addr_const (asm_out_file, x);
4077 fputs (")\n", asm_out_file);
4078 return true;
4079 }
4080 return default_assemble_integer (x, size, aligned_p);
4081 }
4082
4083 /* Emit the function prologue. */
4084
4085 static void
4086 ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4087 {
4088 int mask, grsave, grsave_prev;
4089
4090 if (current_frame_info.need_regstk)
4091 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
4092 current_frame_info.n_input_regs,
4093 current_frame_info.n_local_regs,
4094 current_frame_info.n_output_regs,
4095 current_frame_info.n_rotate_regs);
4096
4097 if (ia64_except_unwind_info (&global_options) != UI_TARGET)
4098 return;
4099
4100 /* Emit the .prologue directive. */
4101
4102 mask = 0;
4103 grsave = grsave_prev = 0;
4104 if (current_frame_info.r[reg_save_b0] != 0)
4105 {
4106 mask |= 8;
4107 grsave = grsave_prev = current_frame_info.r[reg_save_b0];
4108 }
4109 if (current_frame_info.r[reg_save_ar_pfs] != 0
4110 && (grsave_prev == 0
4111 || current_frame_info.r[reg_save_ar_pfs] == grsave_prev + 1))
4112 {
4113 mask |= 4;
4114 if (grsave_prev == 0)
4115 grsave = current_frame_info.r[reg_save_ar_pfs];
4116 grsave_prev = current_frame_info.r[reg_save_ar_pfs];
4117 }
4118 if (current_frame_info.r[reg_fp] != 0
4119 && (grsave_prev == 0
4120 || current_frame_info.r[reg_fp] == grsave_prev + 1))
4121 {
4122 mask |= 2;
4123 if (grsave_prev == 0)
4124 grsave = HARD_FRAME_POINTER_REGNUM;
4125 grsave_prev = current_frame_info.r[reg_fp];
4126 }
4127 if (current_frame_info.r[reg_save_pr] != 0
4128 && (grsave_prev == 0
4129 || current_frame_info.r[reg_save_pr] == grsave_prev + 1))
4130 {
4131 mask |= 1;
4132 if (grsave_prev == 0)
4133 grsave = current_frame_info.r[reg_save_pr];
4134 }
4135
4136 if (mask && TARGET_GNU_AS)
4137 fprintf (file, "\t.prologue %d, %d\n", mask,
4138 ia64_dbx_register_number (grsave));
4139 else
4140 fputs ("\t.prologue\n", file);
4141
4142 /* Emit a .spill directive, if necessary, to relocate the base of
4143 the register spill area. */
4144 if (current_frame_info.spill_cfa_off != -16)
4145 fprintf (file, "\t.spill %ld\n",
4146 (long) (current_frame_info.spill_cfa_off
4147 + current_frame_info.spill_size));
4148 }
4149
4150 /* Emit the .body directive at the scheduled end of the prologue. */
4151
4152 static void
4153 ia64_output_function_end_prologue (FILE *file)
4154 {
4155 if (ia64_except_unwind_info (&global_options) != UI_TARGET)
4156 return;
4157
4158 fputs ("\t.body\n", file);
4159 }
4160
4161 /* Emit the function epilogue. */
4162
4163 static void
4164 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
4165 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4166 {
4167 int i;
4168
4169 if (current_frame_info.r[reg_fp])
4170 {
4171 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
4172 reg_names[HARD_FRAME_POINTER_REGNUM]
4173 = reg_names[current_frame_info.r[reg_fp]];
4174 reg_names[current_frame_info.r[reg_fp]] = tmp;
4175 reg_emitted (reg_fp);
4176 }
4177 if (! TARGET_REG_NAMES)
4178 {
4179 for (i = 0; i < current_frame_info.n_input_regs; i++)
4180 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
4181 for (i = 0; i < current_frame_info.n_local_regs; i++)
4182 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
4183 for (i = 0; i < current_frame_info.n_output_regs; i++)
4184 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
4185 }
4186
4187 current_frame_info.initialized = 0;
4188 }
4189
4190 int
4191 ia64_dbx_register_number (int regno)
4192 {
4193 /* In ia64_expand_prologue we quite literally renamed the frame pointer
4194 from its home at loc79 to something inside the register frame. We
4195 must perform the same renumbering here for the debug info. */
4196 if (current_frame_info.r[reg_fp])
4197 {
4198 if (regno == HARD_FRAME_POINTER_REGNUM)
4199 regno = current_frame_info.r[reg_fp];
4200 else if (regno == current_frame_info.r[reg_fp])
4201 regno = HARD_FRAME_POINTER_REGNUM;
4202 }
4203
4204 if (IN_REGNO_P (regno))
4205 return 32 + regno - IN_REG (0);
4206 else if (LOC_REGNO_P (regno))
4207 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
4208 else if (OUT_REGNO_P (regno))
4209 return (32 + current_frame_info.n_input_regs
4210 + current_frame_info.n_local_regs + regno - OUT_REG (0));
4211 else
4212 return regno;
4213 }
4214
4215 /* Implement TARGET_TRAMPOLINE_INIT.
4216
4217 The trampoline should set the static chain pointer to value placed
4218 into the trampoline and should branch to the specified routine.
4219 To make the normal indirect-subroutine calling convention work,
4220 the trampoline must look like a function descriptor; the first
4221 word being the target address and the second being the target's
4222 global pointer.
4223
4224 We abuse the concept of a global pointer by arranging for it
4225 to point to the data we need to load. The complete trampoline
4226 has the following form:
4227
4228 +-------------------+ \
4229 TRAMP: | __ia64_trampoline | |
4230 +-------------------+ > fake function descriptor
4231 | TRAMP+16 | |
4232 +-------------------+ /
4233 | target descriptor |
4234 +-------------------+
4235 | static link |
4236 +-------------------+
4237 */
4238
4239 static void
4240 ia64_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
4241 {
4242 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
4243 rtx addr, addr_reg, tramp, eight = GEN_INT (8);
4244
4245 /* The Intel assembler requires that the global __ia64_trampoline symbol
4246 be declared explicitly */
4247 if (!TARGET_GNU_AS)
4248 {
4249 static bool declared_ia64_trampoline = false;
4250
4251 if (!declared_ia64_trampoline)
4252 {
4253 declared_ia64_trampoline = true;
4254 (*targetm.asm_out.globalize_label) (asm_out_file,
4255 "__ia64_trampoline");
4256 }
4257 }
4258
4259 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
4260 addr = convert_memory_address (Pmode, XEXP (m_tramp, 0));
4261 fnaddr = convert_memory_address (Pmode, fnaddr);
4262 static_chain = convert_memory_address (Pmode, static_chain);
4263
4264 /* Load up our iterator. */
4265 addr_reg = copy_to_reg (addr);
4266 m_tramp = adjust_automodify_address (m_tramp, Pmode, addr_reg, 0);
4267
4268 /* The first two words are the fake descriptor:
4269 __ia64_trampoline, ADDR+16. */
4270 tramp = gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline");
4271 if (TARGET_ABI_OPEN_VMS)
4272 {
4273 /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity
4274 in the Macro-32 compiler) and changed the semantics of the LTOFF22
4275 relocation against function symbols to make it identical to the
4276 LTOFF_FPTR22 relocation. Emit the latter directly to stay within
4277 strict ELF and dereference to get the bare code address. */
4278 rtx reg = gen_reg_rtx (Pmode);
4279 SYMBOL_REF_FLAGS (tramp) |= SYMBOL_FLAG_FUNCTION;
4280 emit_move_insn (reg, tramp);
4281 emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
4282 tramp = reg;
4283 }
4284 emit_move_insn (m_tramp, tramp);
4285 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4286 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4287
4288 emit_move_insn (m_tramp, force_reg (Pmode, plus_constant (addr, 16)));
4289 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4290 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4291
4292 /* The third word is the target descriptor. */
4293 emit_move_insn (m_tramp, force_reg (Pmode, fnaddr));
4294 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4295 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4296
4297 /* The fourth word is the static chain. */
4298 emit_move_insn (m_tramp, static_chain);
4299 }
4300 \f
4301 /* Do any needed setup for a variadic function. CUM has not been updated
4302 for the last named argument which has type TYPE and mode MODE.
4303
4304 We generate the actual spill instructions during prologue generation. */
4305
4306 static void
4307 ia64_setup_incoming_varargs (cumulative_args_t cum, enum machine_mode mode,
4308 tree type, int * pretend_size,
4309 int second_time ATTRIBUTE_UNUSED)
4310 {
4311 CUMULATIVE_ARGS next_cum = *get_cumulative_args (cum);
4312
4313 /* Skip the current argument. */
4314 ia64_function_arg_advance (pack_cumulative_args (&next_cum), mode, type, 1);
4315
4316 if (next_cum.words < MAX_ARGUMENT_SLOTS)
4317 {
4318 int n = MAX_ARGUMENT_SLOTS - next_cum.words;
4319 *pretend_size = n * UNITS_PER_WORD;
4320 cfun->machine->n_varargs = n;
4321 }
4322 }
4323
4324 /* Check whether TYPE is a homogeneous floating point aggregate. If
4325 it is, return the mode of the floating point type that appears
4326 in all leafs. If it is not, return VOIDmode.
4327
4328 An aggregate is a homogeneous floating point aggregate is if all
4329 fields/elements in it have the same floating point type (e.g,
4330 SFmode). 128-bit quad-precision floats are excluded.
4331
4332 Variable sized aggregates should never arrive here, since we should
4333 have already decided to pass them by reference. Top-level zero-sized
4334 aggregates are excluded because our parallels crash the middle-end. */
4335
4336 static enum machine_mode
4337 hfa_element_mode (const_tree type, bool nested)
4338 {
4339 enum machine_mode element_mode = VOIDmode;
4340 enum machine_mode mode;
4341 enum tree_code code = TREE_CODE (type);
4342 int know_element_mode = 0;
4343 tree t;
4344
4345 if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
4346 return VOIDmode;
4347
4348 switch (code)
4349 {
4350 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
4351 case BOOLEAN_TYPE: case POINTER_TYPE:
4352 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
4353 case LANG_TYPE: case FUNCTION_TYPE:
4354 return VOIDmode;
4355
4356 /* Fortran complex types are supposed to be HFAs, so we need to handle
4357 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
4358 types though. */
4359 case COMPLEX_TYPE:
4360 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
4361 && TYPE_MODE (type) != TCmode)
4362 return GET_MODE_INNER (TYPE_MODE (type));
4363 else
4364 return VOIDmode;
4365
4366 case REAL_TYPE:
4367 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
4368 mode if this is contained within an aggregate. */
4369 if (nested && TYPE_MODE (type) != TFmode)
4370 return TYPE_MODE (type);
4371 else
4372 return VOIDmode;
4373
4374 case ARRAY_TYPE:
4375 return hfa_element_mode (TREE_TYPE (type), 1);
4376
4377 case RECORD_TYPE:
4378 case UNION_TYPE:
4379 case QUAL_UNION_TYPE:
4380 for (t = TYPE_FIELDS (type); t; t = DECL_CHAIN (t))
4381 {
4382 if (TREE_CODE (t) != FIELD_DECL)
4383 continue;
4384
4385 mode = hfa_element_mode (TREE_TYPE (t), 1);
4386 if (know_element_mode)
4387 {
4388 if (mode != element_mode)
4389 return VOIDmode;
4390 }
4391 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
4392 return VOIDmode;
4393 else
4394 {
4395 know_element_mode = 1;
4396 element_mode = mode;
4397 }
4398 }
4399 return element_mode;
4400
4401 default:
4402 /* If we reach here, we probably have some front-end specific type
4403 that the backend doesn't know about. This can happen via the
4404 aggregate_value_p call in init_function_start. All we can do is
4405 ignore unknown tree types. */
4406 return VOIDmode;
4407 }
4408
4409 return VOIDmode;
4410 }
4411
4412 /* Return the number of words required to hold a quantity of TYPE and MODE
4413 when passed as an argument. */
4414 static int
4415 ia64_function_arg_words (const_tree type, enum machine_mode mode)
4416 {
4417 int words;
4418
4419 if (mode == BLKmode)
4420 words = int_size_in_bytes (type);
4421 else
4422 words = GET_MODE_SIZE (mode);
4423
4424 return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */
4425 }
4426
4427 /* Return the number of registers that should be skipped so the current
4428 argument (described by TYPE and WORDS) will be properly aligned.
4429
4430 Integer and float arguments larger than 8 bytes start at the next
4431 even boundary. Aggregates larger than 8 bytes start at the next
4432 even boundary if the aggregate has 16 byte alignment. Note that
4433 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
4434 but are still to be aligned in registers.
4435
4436 ??? The ABI does not specify how to handle aggregates with
4437 alignment from 9 to 15 bytes, or greater than 16. We handle them
4438 all as if they had 16 byte alignment. Such aggregates can occur
4439 only if gcc extensions are used. */
4440 static int
4441 ia64_function_arg_offset (const CUMULATIVE_ARGS *cum,
4442 const_tree type, int words)
4443 {
4444 /* No registers are skipped on VMS. */
4445 if (TARGET_ABI_OPEN_VMS || (cum->words & 1) == 0)
4446 return 0;
4447
4448 if (type
4449 && TREE_CODE (type) != INTEGER_TYPE
4450 && TREE_CODE (type) != REAL_TYPE)
4451 return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
4452 else
4453 return words > 1;
4454 }
4455
4456 /* Return rtx for register where argument is passed, or zero if it is passed
4457 on the stack. */
4458 /* ??? 128-bit quad-precision floats are always passed in general
4459 registers. */
4460
4461 static rtx
4462 ia64_function_arg_1 (cumulative_args_t cum_v, enum machine_mode mode,
4463 const_tree type, bool named, bool incoming)
4464 {
4465 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4466
4467 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
4468 int words = ia64_function_arg_words (type, mode);
4469 int offset = ia64_function_arg_offset (cum, type, words);
4470 enum machine_mode hfa_mode = VOIDmode;
4471
4472 /* For OPEN VMS, emit the instruction setting up the argument register here,
4473 when we know this will be together with the other arguments setup related
4474 insns. This is not the conceptually best place to do this, but this is
4475 the easiest as we have convenient access to cumulative args info. */
4476
4477 if (TARGET_ABI_OPEN_VMS && mode == VOIDmode && type == void_type_node
4478 && named == 1)
4479 {
4480 unsigned HOST_WIDE_INT regval = cum->words;
4481 int i;
4482
4483 for (i = 0; i < 8; i++)
4484 regval |= ((int) cum->atypes[i]) << (i * 3 + 8);
4485
4486 emit_move_insn (gen_rtx_REG (DImode, GR_REG (25)),
4487 GEN_INT (regval));
4488 }
4489
4490 /* If all argument slots are used, then it must go on the stack. */
4491 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4492 return 0;
4493
4494 /* Check for and handle homogeneous FP aggregates. */
4495 if (type)
4496 hfa_mode = hfa_element_mode (type, 0);
4497
4498 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4499 and unprototyped hfas are passed specially. */
4500 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4501 {
4502 rtx loc[16];
4503 int i = 0;
4504 int fp_regs = cum->fp_regs;
4505 int int_regs = cum->words + offset;
4506 int hfa_size = GET_MODE_SIZE (hfa_mode);
4507 int byte_size;
4508 int args_byte_size;
4509
4510 /* If prototyped, pass it in FR regs then GR regs.
4511 If not prototyped, pass it in both FR and GR regs.
4512
4513 If this is an SFmode aggregate, then it is possible to run out of
4514 FR regs while GR regs are still left. In that case, we pass the
4515 remaining part in the GR regs. */
4516
4517 /* Fill the FP regs. We do this always. We stop if we reach the end
4518 of the argument, the last FP register, or the last argument slot. */
4519
4520 byte_size = ((mode == BLKmode)
4521 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4522 args_byte_size = int_regs * UNITS_PER_WORD;
4523 offset = 0;
4524 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4525 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
4526 {
4527 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4528 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
4529 + fp_regs)),
4530 GEN_INT (offset));
4531 offset += hfa_size;
4532 args_byte_size += hfa_size;
4533 fp_regs++;
4534 }
4535
4536 /* If no prototype, then the whole thing must go in GR regs. */
4537 if (! cum->prototype)
4538 offset = 0;
4539 /* If this is an SFmode aggregate, then we might have some left over
4540 that needs to go in GR regs. */
4541 else if (byte_size != offset)
4542 int_regs += offset / UNITS_PER_WORD;
4543
4544 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
4545
4546 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
4547 {
4548 enum machine_mode gr_mode = DImode;
4549 unsigned int gr_size;
4550
4551 /* If we have an odd 4 byte hunk because we ran out of FR regs,
4552 then this goes in a GR reg left adjusted/little endian, right
4553 adjusted/big endian. */
4554 /* ??? Currently this is handled wrong, because 4-byte hunks are
4555 always right adjusted/little endian. */
4556 if (offset & 0x4)
4557 gr_mode = SImode;
4558 /* If we have an even 4 byte hunk because the aggregate is a
4559 multiple of 4 bytes in size, then this goes in a GR reg right
4560 adjusted/little endian. */
4561 else if (byte_size - offset == 4)
4562 gr_mode = SImode;
4563
4564 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4565 gen_rtx_REG (gr_mode, (basereg
4566 + int_regs)),
4567 GEN_INT (offset));
4568
4569 gr_size = GET_MODE_SIZE (gr_mode);
4570 offset += gr_size;
4571 if (gr_size == UNITS_PER_WORD
4572 || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
4573 int_regs++;
4574 else if (gr_size > UNITS_PER_WORD)
4575 int_regs += gr_size / UNITS_PER_WORD;
4576 }
4577 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4578 }
4579
4580 /* On OpenVMS variable argument is either in Rn or Fn. */
4581 else if (TARGET_ABI_OPEN_VMS && named == 0)
4582 {
4583 if (FLOAT_MODE_P (mode))
4584 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->words);
4585 else
4586 return gen_rtx_REG (mode, basereg + cum->words);
4587 }
4588
4589 /* Integral and aggregates go in general registers. If we have run out of
4590 FR registers, then FP values must also go in general registers. This can
4591 happen when we have a SFmode HFA. */
4592 else if (mode == TFmode || mode == TCmode
4593 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4594 {
4595 int byte_size = ((mode == BLKmode)
4596 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4597 if (BYTES_BIG_ENDIAN
4598 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4599 && byte_size < UNITS_PER_WORD
4600 && byte_size > 0)
4601 {
4602 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4603 gen_rtx_REG (DImode,
4604 (basereg + cum->words
4605 + offset)),
4606 const0_rtx);
4607 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4608 }
4609 else
4610 return gen_rtx_REG (mode, basereg + cum->words + offset);
4611
4612 }
4613
4614 /* If there is a prototype, then FP values go in a FR register when
4615 named, and in a GR register when unnamed. */
4616 else if (cum->prototype)
4617 {
4618 if (named)
4619 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
4620 /* In big-endian mode, an anonymous SFmode value must be represented
4621 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4622 the value into the high half of the general register. */
4623 else if (BYTES_BIG_ENDIAN && mode == SFmode)
4624 return gen_rtx_PARALLEL (mode,
4625 gen_rtvec (1,
4626 gen_rtx_EXPR_LIST (VOIDmode,
4627 gen_rtx_REG (DImode, basereg + cum->words + offset),
4628 const0_rtx)));
4629 else
4630 return gen_rtx_REG (mode, basereg + cum->words + offset);
4631 }
4632 /* If there is no prototype, then FP values go in both FR and GR
4633 registers. */
4634 else
4635 {
4636 /* See comment above. */
4637 enum machine_mode inner_mode =
4638 (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
4639
4640 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
4641 gen_rtx_REG (mode, (FR_ARG_FIRST
4642 + cum->fp_regs)),
4643 const0_rtx);
4644 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4645 gen_rtx_REG (inner_mode,
4646 (basereg + cum->words
4647 + offset)),
4648 const0_rtx);
4649
4650 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
4651 }
4652 }
4653
4654 /* Implement TARGET_FUNCION_ARG target hook. */
4655
4656 static rtx
4657 ia64_function_arg (cumulative_args_t cum, enum machine_mode mode,
4658 const_tree type, bool named)
4659 {
4660 return ia64_function_arg_1 (cum, mode, type, named, false);
4661 }
4662
4663 /* Implement TARGET_FUNCION_INCOMING_ARG target hook. */
4664
4665 static rtx
4666 ia64_function_incoming_arg (cumulative_args_t cum,
4667 enum machine_mode mode,
4668 const_tree type, bool named)
4669 {
4670 return ia64_function_arg_1 (cum, mode, type, named, true);
4671 }
4672
4673 /* Return number of bytes, at the beginning of the argument, that must be
4674 put in registers. 0 is the argument is entirely in registers or entirely
4675 in memory. */
4676
4677 static int
4678 ia64_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
4679 tree type, bool named ATTRIBUTE_UNUSED)
4680 {
4681 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4682
4683 int words = ia64_function_arg_words (type, mode);
4684 int offset = ia64_function_arg_offset (cum, type, words);
4685
4686 /* If all argument slots are used, then it must go on the stack. */
4687 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4688 return 0;
4689
4690 /* It doesn't matter whether the argument goes in FR or GR regs. If
4691 it fits within the 8 argument slots, then it goes entirely in
4692 registers. If it extends past the last argument slot, then the rest
4693 goes on the stack. */
4694
4695 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
4696 return 0;
4697
4698 return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
4699 }
4700
4701 /* Return ivms_arg_type based on machine_mode. */
4702
4703 static enum ivms_arg_type
4704 ia64_arg_type (enum machine_mode mode)
4705 {
4706 switch (mode)
4707 {
4708 case SFmode:
4709 return FS;
4710 case DFmode:
4711 return FT;
4712 default:
4713 return I64;
4714 }
4715 }
4716
4717 /* Update CUM to point after this argument. This is patterned after
4718 ia64_function_arg. */
4719
4720 static void
4721 ia64_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
4722 const_tree type, bool named)
4723 {
4724 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4725 int words = ia64_function_arg_words (type, mode);
4726 int offset = ia64_function_arg_offset (cum, type, words);
4727 enum machine_mode hfa_mode = VOIDmode;
4728
4729 /* If all arg slots are already full, then there is nothing to do. */
4730 if (cum->words >= MAX_ARGUMENT_SLOTS)
4731 {
4732 cum->words += words + offset;
4733 return;
4734 }
4735
4736 cum->atypes[cum->words] = ia64_arg_type (mode);
4737 cum->words += words + offset;
4738
4739 /* Check for and handle homogeneous FP aggregates. */
4740 if (type)
4741 hfa_mode = hfa_element_mode (type, 0);
4742
4743 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4744 and unprototyped hfas are passed specially. */
4745 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4746 {
4747 int fp_regs = cum->fp_regs;
4748 /* This is the original value of cum->words + offset. */
4749 int int_regs = cum->words - words;
4750 int hfa_size = GET_MODE_SIZE (hfa_mode);
4751 int byte_size;
4752 int args_byte_size;
4753
4754 /* If prototyped, pass it in FR regs then GR regs.
4755 If not prototyped, pass it in both FR and GR regs.
4756
4757 If this is an SFmode aggregate, then it is possible to run out of
4758 FR regs while GR regs are still left. In that case, we pass the
4759 remaining part in the GR regs. */
4760
4761 /* Fill the FP regs. We do this always. We stop if we reach the end
4762 of the argument, the last FP register, or the last argument slot. */
4763
4764 byte_size = ((mode == BLKmode)
4765 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4766 args_byte_size = int_regs * UNITS_PER_WORD;
4767 offset = 0;
4768 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4769 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
4770 {
4771 offset += hfa_size;
4772 args_byte_size += hfa_size;
4773 fp_regs++;
4774 }
4775
4776 cum->fp_regs = fp_regs;
4777 }
4778
4779 /* On OpenVMS variable argument is either in Rn or Fn. */
4780 else if (TARGET_ABI_OPEN_VMS && named == 0)
4781 {
4782 cum->int_regs = cum->words;
4783 cum->fp_regs = cum->words;
4784 }
4785
4786 /* Integral and aggregates go in general registers. So do TFmode FP values.
4787 If we have run out of FR registers, then other FP values must also go in
4788 general registers. This can happen when we have a SFmode HFA. */
4789 else if (mode == TFmode || mode == TCmode
4790 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4791 cum->int_regs = cum->words;
4792
4793 /* If there is a prototype, then FP values go in a FR register when
4794 named, and in a GR register when unnamed. */
4795 else if (cum->prototype)
4796 {
4797 if (! named)
4798 cum->int_regs = cum->words;
4799 else
4800 /* ??? Complex types should not reach here. */
4801 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4802 }
4803 /* If there is no prototype, then FP values go in both FR and GR
4804 registers. */
4805 else
4806 {
4807 /* ??? Complex types should not reach here. */
4808 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4809 cum->int_regs = cum->words;
4810 }
4811 }
4812
4813 /* Arguments with alignment larger than 8 bytes start at the next even
4814 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary
4815 even though their normal alignment is 8 bytes. See ia64_function_arg. */
4816
4817 static unsigned int
4818 ia64_function_arg_boundary (enum machine_mode mode, const_tree type)
4819 {
4820 if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
4821 return PARM_BOUNDARY * 2;
4822
4823 if (type)
4824 {
4825 if (TYPE_ALIGN (type) > PARM_BOUNDARY)
4826 return PARM_BOUNDARY * 2;
4827 else
4828 return PARM_BOUNDARY;
4829 }
4830
4831 if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
4832 return PARM_BOUNDARY * 2;
4833 else
4834 return PARM_BOUNDARY;
4835 }
4836
4837 /* True if it is OK to do sibling call optimization for the specified
4838 call expression EXP. DECL will be the called function, or NULL if
4839 this is an indirect call. */
4840 static bool
4841 ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4842 {
4843 /* We can't perform a sibcall if the current function has the syscall_linkage
4844 attribute. */
4845 if (lookup_attribute ("syscall_linkage",
4846 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
4847 return false;
4848
4849 /* We must always return with our current GP. This means we can
4850 only sibcall to functions defined in the current module unless
4851 TARGET_CONST_GP is set to true. */
4852 return (decl && (*targetm.binds_local_p) (decl)) || TARGET_CONST_GP;
4853 }
4854 \f
4855
4856 /* Implement va_arg. */
4857
4858 static tree
4859 ia64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
4860 gimple_seq *post_p)
4861 {
4862 /* Variable sized types are passed by reference. */
4863 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
4864 {
4865 tree ptrtype = build_pointer_type (type);
4866 tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
4867 return build_va_arg_indirect_ref (addr);
4868 }
4869
4870 /* Aggregate arguments with alignment larger than 8 bytes start at
4871 the next even boundary. Integer and floating point arguments
4872 do so if they are larger than 8 bytes, whether or not they are
4873 also aligned larger than 8 bytes. */
4874 if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
4875 ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
4876 {
4877 tree t = fold_build_pointer_plus_hwi (valist, 2 * UNITS_PER_WORD - 1);
4878 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4879 build_int_cst (TREE_TYPE (t), -2 * UNITS_PER_WORD));
4880 gimplify_assign (unshare_expr (valist), t, pre_p);
4881 }
4882
4883 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4884 }
4885 \f
4886 /* Return 1 if function return value returned in memory. Return 0 if it is
4887 in a register. */
4888
4889 static bool
4890 ia64_return_in_memory (const_tree valtype, const_tree fntype ATTRIBUTE_UNUSED)
4891 {
4892 enum machine_mode mode;
4893 enum machine_mode hfa_mode;
4894 HOST_WIDE_INT byte_size;
4895
4896 mode = TYPE_MODE (valtype);
4897 byte_size = GET_MODE_SIZE (mode);
4898 if (mode == BLKmode)
4899 {
4900 byte_size = int_size_in_bytes (valtype);
4901 if (byte_size < 0)
4902 return true;
4903 }
4904
4905 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
4906
4907 hfa_mode = hfa_element_mode (valtype, 0);
4908 if (hfa_mode != VOIDmode)
4909 {
4910 int hfa_size = GET_MODE_SIZE (hfa_mode);
4911
4912 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
4913 return true;
4914 else
4915 return false;
4916 }
4917 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
4918 return true;
4919 else
4920 return false;
4921 }
4922
4923 /* Return rtx for register that holds the function return value. */
4924
4925 static rtx
4926 ia64_function_value (const_tree valtype,
4927 const_tree fn_decl_or_type,
4928 bool outgoing ATTRIBUTE_UNUSED)
4929 {
4930 enum machine_mode mode;
4931 enum machine_mode hfa_mode;
4932 int unsignedp;
4933 const_tree func = fn_decl_or_type;
4934
4935 if (fn_decl_or_type
4936 && !DECL_P (fn_decl_or_type))
4937 func = NULL;
4938
4939 mode = TYPE_MODE (valtype);
4940 hfa_mode = hfa_element_mode (valtype, 0);
4941
4942 if (hfa_mode != VOIDmode)
4943 {
4944 rtx loc[8];
4945 int i;
4946 int hfa_size;
4947 int byte_size;
4948 int offset;
4949
4950 hfa_size = GET_MODE_SIZE (hfa_mode);
4951 byte_size = ((mode == BLKmode)
4952 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
4953 offset = 0;
4954 for (i = 0; offset < byte_size; i++)
4955 {
4956 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4957 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
4958 GEN_INT (offset));
4959 offset += hfa_size;
4960 }
4961 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4962 }
4963 else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
4964 return gen_rtx_REG (mode, FR_ARG_FIRST);
4965 else
4966 {
4967 bool need_parallel = false;
4968
4969 /* In big-endian mode, we need to manage the layout of aggregates
4970 in the registers so that we get the bits properly aligned in
4971 the highpart of the registers. */
4972 if (BYTES_BIG_ENDIAN
4973 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
4974 need_parallel = true;
4975
4976 /* Something like struct S { long double x; char a[0] } is not an
4977 HFA structure, and therefore doesn't go in fp registers. But
4978 the middle-end will give it XFmode anyway, and XFmode values
4979 don't normally fit in integer registers. So we need to smuggle
4980 the value inside a parallel. */
4981 else if (mode == XFmode || mode == XCmode || mode == RFmode)
4982 need_parallel = true;
4983
4984 if (need_parallel)
4985 {
4986 rtx loc[8];
4987 int offset;
4988 int bytesize;
4989 int i;
4990
4991 offset = 0;
4992 bytesize = int_size_in_bytes (valtype);
4993 /* An empty PARALLEL is invalid here, but the return value
4994 doesn't matter for empty structs. */
4995 if (bytesize == 0)
4996 return gen_rtx_REG (mode, GR_RET_FIRST);
4997 for (i = 0; offset < bytesize; i++)
4998 {
4999 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
5000 gen_rtx_REG (DImode,
5001 GR_RET_FIRST + i),
5002 GEN_INT (offset));
5003 offset += UNITS_PER_WORD;
5004 }
5005 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
5006 }
5007
5008 mode = promote_function_mode (valtype, mode, &unsignedp,
5009 func ? TREE_TYPE (func) : NULL_TREE,
5010 true);
5011
5012 return gen_rtx_REG (mode, GR_RET_FIRST);
5013 }
5014 }
5015
5016 /* Worker function for TARGET_LIBCALL_VALUE. */
5017
5018 static rtx
5019 ia64_libcall_value (enum machine_mode mode,
5020 const_rtx fun ATTRIBUTE_UNUSED)
5021 {
5022 return gen_rtx_REG (mode,
5023 (((GET_MODE_CLASS (mode) == MODE_FLOAT
5024 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5025 && (mode) != TFmode)
5026 ? FR_RET_FIRST : GR_RET_FIRST));
5027 }
5028
5029 /* Worker function for FUNCTION_VALUE_REGNO_P. */
5030
5031 static bool
5032 ia64_function_value_regno_p (const unsigned int regno)
5033 {
5034 return ((regno >= GR_RET_FIRST && regno <= GR_RET_LAST)
5035 || (regno >= FR_RET_FIRST && regno <= FR_RET_LAST));
5036 }
5037
5038 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
5039 We need to emit DTP-relative relocations. */
5040
5041 static void
5042 ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
5043 {
5044 gcc_assert (size == 4 || size == 8);
5045 if (size == 4)
5046 fputs ("\tdata4.ua\t@dtprel(", file);
5047 else
5048 fputs ("\tdata8.ua\t@dtprel(", file);
5049 output_addr_const (file, x);
5050 fputs (")", file);
5051 }
5052
5053 /* Print a memory address as an operand to reference that memory location. */
5054
5055 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
5056 also call this from ia64_print_operand for memory addresses. */
5057
5058 static void
5059 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
5060 rtx address ATTRIBUTE_UNUSED)
5061 {
5062 }
5063
5064 /* Print an operand to an assembler instruction.
5065 C Swap and print a comparison operator.
5066 D Print an FP comparison operator.
5067 E Print 32 - constant, for SImode shifts as extract.
5068 e Print 64 - constant, for DImode rotates.
5069 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
5070 a floating point register emitted normally.
5071 G A floating point constant.
5072 I Invert a predicate register by adding 1.
5073 J Select the proper predicate register for a condition.
5074 j Select the inverse predicate register for a condition.
5075 O Append .acq for volatile load.
5076 P Postincrement of a MEM.
5077 Q Append .rel for volatile store.
5078 R Print .s .d or nothing for a single, double or no truncation.
5079 S Shift amount for shladd instruction.
5080 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
5081 for Intel assembler.
5082 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
5083 for Intel assembler.
5084 X A pair of floating point registers.
5085 r Print register name, or constant 0 as r0. HP compatibility for
5086 Linux kernel.
5087 v Print vector constant value as an 8-byte integer value. */
5088
5089 static void
5090 ia64_print_operand (FILE * file, rtx x, int code)
5091 {
5092 const char *str;
5093
5094 switch (code)
5095 {
5096 case 0:
5097 /* Handled below. */
5098 break;
5099
5100 case 'C':
5101 {
5102 enum rtx_code c = swap_condition (GET_CODE (x));
5103 fputs (GET_RTX_NAME (c), file);
5104 return;
5105 }
5106
5107 case 'D':
5108 switch (GET_CODE (x))
5109 {
5110 case NE:
5111 str = "neq";
5112 break;
5113 case UNORDERED:
5114 str = "unord";
5115 break;
5116 case ORDERED:
5117 str = "ord";
5118 break;
5119 case UNLT:
5120 str = "nge";
5121 break;
5122 case UNLE:
5123 str = "ngt";
5124 break;
5125 case UNGT:
5126 str = "nle";
5127 break;
5128 case UNGE:
5129 str = "nlt";
5130 break;
5131 default:
5132 str = GET_RTX_NAME (GET_CODE (x));
5133 break;
5134 }
5135 fputs (str, file);
5136 return;
5137
5138 case 'E':
5139 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
5140 return;
5141
5142 case 'e':
5143 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
5144 return;
5145
5146 case 'F':
5147 if (x == CONST0_RTX (GET_MODE (x)))
5148 str = reg_names [FR_REG (0)];
5149 else if (x == CONST1_RTX (GET_MODE (x)))
5150 str = reg_names [FR_REG (1)];
5151 else
5152 {
5153 gcc_assert (GET_CODE (x) == REG);
5154 str = reg_names [REGNO (x)];
5155 }
5156 fputs (str, file);
5157 return;
5158
5159 case 'G':
5160 {
5161 long val[4];
5162 REAL_VALUE_TYPE rv;
5163 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
5164 real_to_target (val, &rv, GET_MODE (x));
5165 if (GET_MODE (x) == SFmode)
5166 fprintf (file, "0x%08lx", val[0] & 0xffffffff);
5167 else if (GET_MODE (x) == DFmode)
5168 fprintf (file, "0x%08lx%08lx", (WORDS_BIG_ENDIAN ? val[0] : val[1])
5169 & 0xffffffff,
5170 (WORDS_BIG_ENDIAN ? val[1] : val[0])
5171 & 0xffffffff);
5172 else
5173 output_operand_lossage ("invalid %%G mode");
5174 }
5175 return;
5176
5177 case 'I':
5178 fputs (reg_names [REGNO (x) + 1], file);
5179 return;
5180
5181 case 'J':
5182 case 'j':
5183 {
5184 unsigned int regno = REGNO (XEXP (x, 0));
5185 if (GET_CODE (x) == EQ)
5186 regno += 1;
5187 if (code == 'j')
5188 regno ^= 1;
5189 fputs (reg_names [regno], file);
5190 }
5191 return;
5192
5193 case 'O':
5194 if (MEM_VOLATILE_P (x))
5195 fputs(".acq", file);
5196 return;
5197
5198 case 'P':
5199 {
5200 HOST_WIDE_INT value;
5201
5202 switch (GET_CODE (XEXP (x, 0)))
5203 {
5204 default:
5205 return;
5206
5207 case POST_MODIFY:
5208 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
5209 if (GET_CODE (x) == CONST_INT)
5210 value = INTVAL (x);
5211 else
5212 {
5213 gcc_assert (GET_CODE (x) == REG);
5214 fprintf (file, ", %s", reg_names[REGNO (x)]);
5215 return;
5216 }
5217 break;
5218
5219 case POST_INC:
5220 value = GET_MODE_SIZE (GET_MODE (x));
5221 break;
5222
5223 case POST_DEC:
5224 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
5225 break;
5226 }
5227
5228 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
5229 return;
5230 }
5231
5232 case 'Q':
5233 if (MEM_VOLATILE_P (x))
5234 fputs(".rel", file);
5235 return;
5236
5237 case 'R':
5238 if (x == CONST0_RTX (GET_MODE (x)))
5239 fputs(".s", file);
5240 else if (x == CONST1_RTX (GET_MODE (x)))
5241 fputs(".d", file);
5242 else if (x == CONST2_RTX (GET_MODE (x)))
5243 ;
5244 else
5245 output_operand_lossage ("invalid %%R value");
5246 return;
5247
5248 case 'S':
5249 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5250 return;
5251
5252 case 'T':
5253 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5254 {
5255 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
5256 return;
5257 }
5258 break;
5259
5260 case 'U':
5261 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5262 {
5263 const char *prefix = "0x";
5264 if (INTVAL (x) & 0x80000000)
5265 {
5266 fprintf (file, "0xffffffff");
5267 prefix = "";
5268 }
5269 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
5270 return;
5271 }
5272 break;
5273
5274 case 'X':
5275 {
5276 unsigned int regno = REGNO (x);
5277 fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
5278 }
5279 return;
5280
5281 case 'r':
5282 /* If this operand is the constant zero, write it as register zero.
5283 Any register, zero, or CONST_INT value is OK here. */
5284 if (GET_CODE (x) == REG)
5285 fputs (reg_names[REGNO (x)], file);
5286 else if (x == CONST0_RTX (GET_MODE (x)))
5287 fputs ("r0", file);
5288 else if (GET_CODE (x) == CONST_INT)
5289 output_addr_const (file, x);
5290 else
5291 output_operand_lossage ("invalid %%r value");
5292 return;
5293
5294 case 'v':
5295 gcc_assert (GET_CODE (x) == CONST_VECTOR);
5296 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
5297 break;
5298
5299 case '+':
5300 {
5301 const char *which;
5302
5303 /* For conditional branches, returns or calls, substitute
5304 sptk, dptk, dpnt, or spnt for %s. */
5305 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5306 if (x)
5307 {
5308 int pred_val = INTVAL (XEXP (x, 0));
5309
5310 /* Guess top and bottom 10% statically predicted. */
5311 if (pred_val < REG_BR_PROB_BASE / 50
5312 && br_prob_note_reliable_p (x))
5313 which = ".spnt";
5314 else if (pred_val < REG_BR_PROB_BASE / 2)
5315 which = ".dpnt";
5316 else if (pred_val < REG_BR_PROB_BASE / 100 * 98
5317 || !br_prob_note_reliable_p (x))
5318 which = ".dptk";
5319 else
5320 which = ".sptk";
5321 }
5322 else if (GET_CODE (current_output_insn) == CALL_INSN)
5323 which = ".sptk";
5324 else
5325 which = ".dptk";
5326
5327 fputs (which, file);
5328 return;
5329 }
5330
5331 case ',':
5332 x = current_insn_predicate;
5333 if (x)
5334 {
5335 unsigned int regno = REGNO (XEXP (x, 0));
5336 if (GET_CODE (x) == EQ)
5337 regno += 1;
5338 fprintf (file, "(%s) ", reg_names [regno]);
5339 }
5340 return;
5341
5342 default:
5343 output_operand_lossage ("ia64_print_operand: unknown code");
5344 return;
5345 }
5346
5347 switch (GET_CODE (x))
5348 {
5349 /* This happens for the spill/restore instructions. */
5350 case POST_INC:
5351 case POST_DEC:
5352 case POST_MODIFY:
5353 x = XEXP (x, 0);
5354 /* ... fall through ... */
5355
5356 case REG:
5357 fputs (reg_names [REGNO (x)], file);
5358 break;
5359
5360 case MEM:
5361 {
5362 rtx addr = XEXP (x, 0);
5363 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
5364 addr = XEXP (addr, 0);
5365 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
5366 break;
5367 }
5368
5369 default:
5370 output_addr_const (file, x);
5371 break;
5372 }
5373
5374 return;
5375 }
5376
5377 /* Worker function for TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
5378
5379 static bool
5380 ia64_print_operand_punct_valid_p (unsigned char code)
5381 {
5382 return (code == '+' || code == ',');
5383 }
5384 \f
5385 /* Compute a (partial) cost for rtx X. Return true if the complete
5386 cost has been computed, and false if subexpressions should be
5387 scanned. In either case, *TOTAL contains the cost result. */
5388 /* ??? This is incomplete. */
5389
5390 static bool
5391 ia64_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
5392 int *total, bool speed ATTRIBUTE_UNUSED)
5393 {
5394 switch (code)
5395 {
5396 case CONST_INT:
5397 switch (outer_code)
5398 {
5399 case SET:
5400 *total = satisfies_constraint_J (x) ? 0 : COSTS_N_INSNS (1);
5401 return true;
5402 case PLUS:
5403 if (satisfies_constraint_I (x))
5404 *total = 0;
5405 else if (satisfies_constraint_J (x))
5406 *total = 1;
5407 else
5408 *total = COSTS_N_INSNS (1);
5409 return true;
5410 default:
5411 if (satisfies_constraint_K (x) || satisfies_constraint_L (x))
5412 *total = 0;
5413 else
5414 *total = COSTS_N_INSNS (1);
5415 return true;
5416 }
5417
5418 case CONST_DOUBLE:
5419 *total = COSTS_N_INSNS (1);
5420 return true;
5421
5422 case CONST:
5423 case SYMBOL_REF:
5424 case LABEL_REF:
5425 *total = COSTS_N_INSNS (3);
5426 return true;
5427
5428 case FMA:
5429 *total = COSTS_N_INSNS (4);
5430 return true;
5431
5432 case MULT:
5433 /* For multiplies wider than HImode, we have to go to the FPU,
5434 which normally involves copies. Plus there's the latency
5435 of the multiply itself, and the latency of the instructions to
5436 transfer integer regs to FP regs. */
5437 if (FLOAT_MODE_P (GET_MODE (x)))
5438 *total = COSTS_N_INSNS (4);
5439 else if (GET_MODE_SIZE (GET_MODE (x)) > 2)
5440 *total = COSTS_N_INSNS (10);
5441 else
5442 *total = COSTS_N_INSNS (2);
5443 return true;
5444
5445 case PLUS:
5446 case MINUS:
5447 if (FLOAT_MODE_P (GET_MODE (x)))
5448 {
5449 *total = COSTS_N_INSNS (4);
5450 return true;
5451 }
5452 /* FALLTHRU */
5453
5454 case ASHIFT:
5455 case ASHIFTRT:
5456 case LSHIFTRT:
5457 *total = COSTS_N_INSNS (1);
5458 return true;
5459
5460 case DIV:
5461 case UDIV:
5462 case MOD:
5463 case UMOD:
5464 /* We make divide expensive, so that divide-by-constant will be
5465 optimized to a multiply. */
5466 *total = COSTS_N_INSNS (60);
5467 return true;
5468
5469 default:
5470 return false;
5471 }
5472 }
5473
5474 /* Calculate the cost of moving data from a register in class FROM to
5475 one in class TO, using MODE. */
5476
5477 static int
5478 ia64_register_move_cost (enum machine_mode mode, reg_class_t from,
5479 reg_class_t to)
5480 {
5481 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
5482 if (to == ADDL_REGS)
5483 to = GR_REGS;
5484 if (from == ADDL_REGS)
5485 from = GR_REGS;
5486
5487 /* All costs are symmetric, so reduce cases by putting the
5488 lower number class as the destination. */
5489 if (from < to)
5490 {
5491 reg_class_t tmp = to;
5492 to = from, from = tmp;
5493 }
5494
5495 /* Moving from FR<->GR in XFmode must be more expensive than 2,
5496 so that we get secondary memory reloads. Between FR_REGS,
5497 we have to make this at least as expensive as memory_move_cost
5498 to avoid spectacularly poor register class preferencing. */
5499 if (mode == XFmode || mode == RFmode)
5500 {
5501 if (to != GR_REGS || from != GR_REGS)
5502 return memory_move_cost (mode, to, false);
5503 else
5504 return 3;
5505 }
5506
5507 switch (to)
5508 {
5509 case PR_REGS:
5510 /* Moving between PR registers takes two insns. */
5511 if (from == PR_REGS)
5512 return 3;
5513 /* Moving between PR and anything but GR is impossible. */
5514 if (from != GR_REGS)
5515 return memory_move_cost (mode, to, false);
5516 break;
5517
5518 case BR_REGS:
5519 /* Moving between BR and anything but GR is impossible. */
5520 if (from != GR_REGS && from != GR_AND_BR_REGS)
5521 return memory_move_cost (mode, to, false);
5522 break;
5523
5524 case AR_I_REGS:
5525 case AR_M_REGS:
5526 /* Moving between AR and anything but GR is impossible. */
5527 if (from != GR_REGS)
5528 return memory_move_cost (mode, to, false);
5529 break;
5530
5531 case GR_REGS:
5532 case FR_REGS:
5533 case FP_REGS:
5534 case GR_AND_FR_REGS:
5535 case GR_AND_BR_REGS:
5536 case ALL_REGS:
5537 break;
5538
5539 default:
5540 gcc_unreachable ();
5541 }
5542
5543 return 2;
5544 }
5545
5546 /* Calculate the cost of moving data of MODE from a register to or from
5547 memory. */
5548
5549 static int
5550 ia64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
5551 reg_class_t rclass,
5552 bool in ATTRIBUTE_UNUSED)
5553 {
5554 if (rclass == GENERAL_REGS
5555 || rclass == FR_REGS
5556 || rclass == FP_REGS
5557 || rclass == GR_AND_FR_REGS)
5558 return 4;
5559 else
5560 return 10;
5561 }
5562
5563 /* Implement TARGET_PREFERRED_RELOAD_CLASS. Place additional restrictions
5564 on RCLASS to use when copying X into that class. */
5565
5566 static reg_class_t
5567 ia64_preferred_reload_class (rtx x, reg_class_t rclass)
5568 {
5569 switch (rclass)
5570 {
5571 case FR_REGS:
5572 case FP_REGS:
5573 /* Don't allow volatile mem reloads into floating point registers.
5574 This is defined to force reload to choose the r/m case instead
5575 of the f/f case when reloading (set (reg fX) (mem/v)). */
5576 if (MEM_P (x) && MEM_VOLATILE_P (x))
5577 return NO_REGS;
5578
5579 /* Force all unrecognized constants into the constant pool. */
5580 if (CONSTANT_P (x))
5581 return NO_REGS;
5582 break;
5583
5584 case AR_M_REGS:
5585 case AR_I_REGS:
5586 if (!OBJECT_P (x))
5587 return NO_REGS;
5588 break;
5589
5590 default:
5591 break;
5592 }
5593
5594 return rclass;
5595 }
5596
5597 /* This function returns the register class required for a secondary
5598 register when copying between one of the registers in RCLASS, and X,
5599 using MODE. A return value of NO_REGS means that no secondary register
5600 is required. */
5601
5602 enum reg_class
5603 ia64_secondary_reload_class (enum reg_class rclass,
5604 enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
5605 {
5606 int regno = -1;
5607
5608 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
5609 regno = true_regnum (x);
5610
5611 switch (rclass)
5612 {
5613 case BR_REGS:
5614 case AR_M_REGS:
5615 case AR_I_REGS:
5616 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
5617 interaction. We end up with two pseudos with overlapping lifetimes
5618 both of which are equiv to the same constant, and both which need
5619 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
5620 changes depending on the path length, which means the qty_first_reg
5621 check in make_regs_eqv can give different answers at different times.
5622 At some point I'll probably need a reload_indi pattern to handle
5623 this.
5624
5625 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
5626 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
5627 non-general registers for good measure. */
5628 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
5629 return GR_REGS;
5630
5631 /* This is needed if a pseudo used as a call_operand gets spilled to a
5632 stack slot. */
5633 if (GET_CODE (x) == MEM)
5634 return GR_REGS;
5635 break;
5636
5637 case FR_REGS:
5638 case FP_REGS:
5639 /* Need to go through general registers to get to other class regs. */
5640 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
5641 return GR_REGS;
5642
5643 /* This can happen when a paradoxical subreg is an operand to the
5644 muldi3 pattern. */
5645 /* ??? This shouldn't be necessary after instruction scheduling is
5646 enabled, because paradoxical subregs are not accepted by
5647 register_operand when INSN_SCHEDULING is defined. Or alternatively,
5648 stop the paradoxical subreg stupidity in the *_operand functions
5649 in recog.c. */
5650 if (GET_CODE (x) == MEM
5651 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
5652 || GET_MODE (x) == QImode))
5653 return GR_REGS;
5654
5655 /* This can happen because of the ior/and/etc patterns that accept FP
5656 registers as operands. If the third operand is a constant, then it
5657 needs to be reloaded into a FP register. */
5658 if (GET_CODE (x) == CONST_INT)
5659 return GR_REGS;
5660
5661 /* This can happen because of register elimination in a muldi3 insn.
5662 E.g. `26107 * (unsigned long)&u'. */
5663 if (GET_CODE (x) == PLUS)
5664 return GR_REGS;
5665 break;
5666
5667 case PR_REGS:
5668 /* ??? This happens if we cse/gcse a BImode value across a call,
5669 and the function has a nonlocal goto. This is because global
5670 does not allocate call crossing pseudos to hard registers when
5671 crtl->has_nonlocal_goto is true. This is relatively
5672 common for C++ programs that use exceptions. To reproduce,
5673 return NO_REGS and compile libstdc++. */
5674 if (GET_CODE (x) == MEM)
5675 return GR_REGS;
5676
5677 /* This can happen when we take a BImode subreg of a DImode value,
5678 and that DImode value winds up in some non-GR register. */
5679 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
5680 return GR_REGS;
5681 break;
5682
5683 default:
5684 break;
5685 }
5686
5687 return NO_REGS;
5688 }
5689
5690 \f
5691 /* Implement targetm.unspec_may_trap_p hook. */
5692 static int
5693 ia64_unspec_may_trap_p (const_rtx x, unsigned flags)
5694 {
5695 if (GET_CODE (x) == UNSPEC)
5696 {
5697 switch (XINT (x, 1))
5698 {
5699 case UNSPEC_LDA:
5700 case UNSPEC_LDS:
5701 case UNSPEC_LDSA:
5702 case UNSPEC_LDCCLR:
5703 case UNSPEC_CHKACLR:
5704 case UNSPEC_CHKS:
5705 /* These unspecs are just wrappers. */
5706 return may_trap_p_1 (XVECEXP (x, 0, 0), flags);
5707 }
5708 }
5709
5710 return default_unspec_may_trap_p (x, flags);
5711 }
5712
5713 \f
5714 /* Parse the -mfixed-range= option string. */
5715
5716 static void
5717 fix_range (const char *const_str)
5718 {
5719 int i, first, last;
5720 char *str, *dash, *comma;
5721
5722 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5723 REG2 are either register names or register numbers. The effect
5724 of this option is to mark the registers in the range from REG1 to
5725 REG2 as ``fixed'' so they won't be used by the compiler. This is
5726 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
5727
5728 i = strlen (const_str);
5729 str = (char *) alloca (i + 1);
5730 memcpy (str, const_str, i + 1);
5731
5732 while (1)
5733 {
5734 dash = strchr (str, '-');
5735 if (!dash)
5736 {
5737 warning (0, "value of -mfixed-range must have form REG1-REG2");
5738 return;
5739 }
5740 *dash = '\0';
5741
5742 comma = strchr (dash + 1, ',');
5743 if (comma)
5744 *comma = '\0';
5745
5746 first = decode_reg_name (str);
5747 if (first < 0)
5748 {
5749 warning (0, "unknown register name: %s", str);
5750 return;
5751 }
5752
5753 last = decode_reg_name (dash + 1);
5754 if (last < 0)
5755 {
5756 warning (0, "unknown register name: %s", dash + 1);
5757 return;
5758 }
5759
5760 *dash = '-';
5761
5762 if (first > last)
5763 {
5764 warning (0, "%s-%s is an empty range", str, dash + 1);
5765 return;
5766 }
5767
5768 for (i = first; i <= last; ++i)
5769 fixed_regs[i] = call_used_regs[i] = 1;
5770
5771 if (!comma)
5772 break;
5773
5774 *comma = ',';
5775 str = comma + 1;
5776 }
5777 }
5778
5779 /* Implement TARGET_OPTION_OVERRIDE. */
5780
5781 static void
5782 ia64_option_override (void)
5783 {
5784 unsigned int i;
5785 cl_deferred_option *opt;
5786 VEC(cl_deferred_option,heap) *vec
5787 = (VEC(cl_deferred_option,heap) *) ia64_deferred_options;
5788
5789 FOR_EACH_VEC_ELT (cl_deferred_option, vec, i, opt)
5790 {
5791 switch (opt->opt_index)
5792 {
5793 case OPT_mfixed_range_:
5794 fix_range (opt->arg);
5795 break;
5796
5797 default:
5798 gcc_unreachable ();
5799 }
5800 }
5801
5802 if (TARGET_AUTO_PIC)
5803 target_flags |= MASK_CONST_GP;
5804
5805 /* Numerous experiment shows that IRA based loop pressure
5806 calculation works better for RTL loop invariant motion on targets
5807 with enough (>= 32) registers. It is an expensive optimization.
5808 So it is on only for peak performance. */
5809 if (optimize >= 3)
5810 flag_ira_loop_pressure = 1;
5811
5812
5813 ia64_section_threshold = (global_options_set.x_g_switch_value
5814 ? g_switch_value
5815 : IA64_DEFAULT_GVALUE);
5816
5817 init_machine_status = ia64_init_machine_status;
5818
5819 if (align_functions <= 0)
5820 align_functions = 64;
5821 if (align_loops <= 0)
5822 align_loops = 32;
5823 if (TARGET_ABI_OPEN_VMS)
5824 flag_no_common = 1;
5825
5826 ia64_override_options_after_change();
5827 }
5828
5829 /* Implement targetm.override_options_after_change. */
5830
5831 static void
5832 ia64_override_options_after_change (void)
5833 {
5834 if (optimize >= 3
5835 && !global_options_set.x_flag_selective_scheduling
5836 && !global_options_set.x_flag_selective_scheduling2)
5837 {
5838 flag_selective_scheduling2 = 1;
5839 flag_sel_sched_pipelining = 1;
5840 }
5841 if (mflag_sched_control_spec == 2)
5842 {
5843 /* Control speculation is on by default for the selective scheduler,
5844 but not for the Haifa scheduler. */
5845 mflag_sched_control_spec = flag_selective_scheduling2 ? 1 : 0;
5846 }
5847 if (flag_sel_sched_pipelining && flag_auto_inc_dec)
5848 {
5849 /* FIXME: remove this when we'd implement breaking autoinsns as
5850 a transformation. */
5851 flag_auto_inc_dec = 0;
5852 }
5853 }
5854
5855 /* Initialize the record of emitted frame related registers. */
5856
5857 void ia64_init_expanders (void)
5858 {
5859 memset (&emitted_frame_related_regs, 0, sizeof (emitted_frame_related_regs));
5860 }
5861
5862 static struct machine_function *
5863 ia64_init_machine_status (void)
5864 {
5865 return ggc_alloc_cleared_machine_function ();
5866 }
5867 \f
5868 static enum attr_itanium_class ia64_safe_itanium_class (rtx);
5869 static enum attr_type ia64_safe_type (rtx);
5870
5871 static enum attr_itanium_class
5872 ia64_safe_itanium_class (rtx insn)
5873 {
5874 if (recog_memoized (insn) >= 0)
5875 return get_attr_itanium_class (insn);
5876 else if (DEBUG_INSN_P (insn))
5877 return ITANIUM_CLASS_IGNORE;
5878 else
5879 return ITANIUM_CLASS_UNKNOWN;
5880 }
5881
5882 static enum attr_type
5883 ia64_safe_type (rtx insn)
5884 {
5885 if (recog_memoized (insn) >= 0)
5886 return get_attr_type (insn);
5887 else
5888 return TYPE_UNKNOWN;
5889 }
5890 \f
5891 /* The following collection of routines emit instruction group stop bits as
5892 necessary to avoid dependencies. */
5893
5894 /* Need to track some additional registers as far as serialization is
5895 concerned so we can properly handle br.call and br.ret. We could
5896 make these registers visible to gcc, but since these registers are
5897 never explicitly used in gcc generated code, it seems wasteful to
5898 do so (plus it would make the call and return patterns needlessly
5899 complex). */
5900 #define REG_RP (BR_REG (0))
5901 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
5902 /* This is used for volatile asms which may require a stop bit immediately
5903 before and after them. */
5904 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
5905 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
5906 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
5907
5908 /* For each register, we keep track of how it has been written in the
5909 current instruction group.
5910
5911 If a register is written unconditionally (no qualifying predicate),
5912 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
5913
5914 If a register is written if its qualifying predicate P is true, we
5915 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
5916 may be written again by the complement of P (P^1) and when this happens,
5917 WRITE_COUNT gets set to 2.
5918
5919 The result of this is that whenever an insn attempts to write a register
5920 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
5921
5922 If a predicate register is written by a floating-point insn, we set
5923 WRITTEN_BY_FP to true.
5924
5925 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
5926 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
5927
5928 #if GCC_VERSION >= 4000
5929 #define RWS_FIELD_TYPE __extension__ unsigned short
5930 #else
5931 #define RWS_FIELD_TYPE unsigned int
5932 #endif
5933 struct reg_write_state
5934 {
5935 RWS_FIELD_TYPE write_count : 2;
5936 RWS_FIELD_TYPE first_pred : 10;
5937 RWS_FIELD_TYPE written_by_fp : 1;
5938 RWS_FIELD_TYPE written_by_and : 1;
5939 RWS_FIELD_TYPE written_by_or : 1;
5940 };
5941
5942 /* Cumulative info for the current instruction group. */
5943 struct reg_write_state rws_sum[NUM_REGS];
5944 #ifdef ENABLE_CHECKING
5945 /* Bitmap whether a register has been written in the current insn. */
5946 HARD_REG_ELT_TYPE rws_insn[(NUM_REGS + HOST_BITS_PER_WIDEST_FAST_INT - 1)
5947 / HOST_BITS_PER_WIDEST_FAST_INT];
5948
5949 static inline void
5950 rws_insn_set (int regno)
5951 {
5952 gcc_assert (!TEST_HARD_REG_BIT (rws_insn, regno));
5953 SET_HARD_REG_BIT (rws_insn, regno);
5954 }
5955
5956 static inline int
5957 rws_insn_test (int regno)
5958 {
5959 return TEST_HARD_REG_BIT (rws_insn, regno);
5960 }
5961 #else
5962 /* When not checking, track just REG_AR_CFM and REG_VOLATILE. */
5963 unsigned char rws_insn[2];
5964
5965 static inline void
5966 rws_insn_set (int regno)
5967 {
5968 if (regno == REG_AR_CFM)
5969 rws_insn[0] = 1;
5970 else if (regno == REG_VOLATILE)
5971 rws_insn[1] = 1;
5972 }
5973
5974 static inline int
5975 rws_insn_test (int regno)
5976 {
5977 if (regno == REG_AR_CFM)
5978 return rws_insn[0];
5979 if (regno == REG_VOLATILE)
5980 return rws_insn[1];
5981 return 0;
5982 }
5983 #endif
5984
5985 /* Indicates whether this is the first instruction after a stop bit,
5986 in which case we don't need another stop bit. Without this,
5987 ia64_variable_issue will die when scheduling an alloc. */
5988 static int first_instruction;
5989
5990 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
5991 RTL for one instruction. */
5992 struct reg_flags
5993 {
5994 unsigned int is_write : 1; /* Is register being written? */
5995 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
5996 unsigned int is_branch : 1; /* Is register used as part of a branch? */
5997 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
5998 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
5999 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
6000 };
6001
6002 static void rws_update (int, struct reg_flags, int);
6003 static int rws_access_regno (int, struct reg_flags, int);
6004 static int rws_access_reg (rtx, struct reg_flags, int);
6005 static void update_set_flags (rtx, struct reg_flags *);
6006 static int set_src_needs_barrier (rtx, struct reg_flags, int);
6007 static int rtx_needs_barrier (rtx, struct reg_flags, int);
6008 static void init_insn_group_barriers (void);
6009 static int group_barrier_needed (rtx);
6010 static int safe_group_barrier_needed (rtx);
6011 static int in_safe_group_barrier;
6012
6013 /* Update *RWS for REGNO, which is being written by the current instruction,
6014 with predicate PRED, and associated register flags in FLAGS. */
6015
6016 static void
6017 rws_update (int regno, struct reg_flags flags, int pred)
6018 {
6019 if (pred)
6020 rws_sum[regno].write_count++;
6021 else
6022 rws_sum[regno].write_count = 2;
6023 rws_sum[regno].written_by_fp |= flags.is_fp;
6024 /* ??? Not tracking and/or across differing predicates. */
6025 rws_sum[regno].written_by_and = flags.is_and;
6026 rws_sum[regno].written_by_or = flags.is_or;
6027 rws_sum[regno].first_pred = pred;
6028 }
6029
6030 /* Handle an access to register REGNO of type FLAGS using predicate register
6031 PRED. Update rws_sum array. Return 1 if this access creates
6032 a dependency with an earlier instruction in the same group. */
6033
6034 static int
6035 rws_access_regno (int regno, struct reg_flags flags, int pred)
6036 {
6037 int need_barrier = 0;
6038
6039 gcc_assert (regno < NUM_REGS);
6040
6041 if (! PR_REGNO_P (regno))
6042 flags.is_and = flags.is_or = 0;
6043
6044 if (flags.is_write)
6045 {
6046 int write_count;
6047
6048 rws_insn_set (regno);
6049 write_count = rws_sum[regno].write_count;
6050
6051 switch (write_count)
6052 {
6053 case 0:
6054 /* The register has not been written yet. */
6055 if (!in_safe_group_barrier)
6056 rws_update (regno, flags, pred);
6057 break;
6058
6059 case 1:
6060 /* The register has been written via a predicate. Treat
6061 it like a unconditional write and do not try to check
6062 for complementary pred reg in earlier write. */
6063 if (flags.is_and && rws_sum[regno].written_by_and)
6064 ;
6065 else if (flags.is_or && rws_sum[regno].written_by_or)
6066 ;
6067 else
6068 need_barrier = 1;
6069 if (!in_safe_group_barrier)
6070 rws_update (regno, flags, pred);
6071 break;
6072
6073 case 2:
6074 /* The register has been unconditionally written already. We
6075 need a barrier. */
6076 if (flags.is_and && rws_sum[regno].written_by_and)
6077 ;
6078 else if (flags.is_or && rws_sum[regno].written_by_or)
6079 ;
6080 else
6081 need_barrier = 1;
6082 if (!in_safe_group_barrier)
6083 {
6084 rws_sum[regno].written_by_and = flags.is_and;
6085 rws_sum[regno].written_by_or = flags.is_or;
6086 }
6087 break;
6088
6089 default:
6090 gcc_unreachable ();
6091 }
6092 }
6093 else
6094 {
6095 if (flags.is_branch)
6096 {
6097 /* Branches have several RAW exceptions that allow to avoid
6098 barriers. */
6099
6100 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
6101 /* RAW dependencies on branch regs are permissible as long
6102 as the writer is a non-branch instruction. Since we
6103 never generate code that uses a branch register written
6104 by a branch instruction, handling this case is
6105 easy. */
6106 return 0;
6107
6108 if (REGNO_REG_CLASS (regno) == PR_REGS
6109 && ! rws_sum[regno].written_by_fp)
6110 /* The predicates of a branch are available within the
6111 same insn group as long as the predicate was written by
6112 something other than a floating-point instruction. */
6113 return 0;
6114 }
6115
6116 if (flags.is_and && rws_sum[regno].written_by_and)
6117 return 0;
6118 if (flags.is_or && rws_sum[regno].written_by_or)
6119 return 0;
6120
6121 switch (rws_sum[regno].write_count)
6122 {
6123 case 0:
6124 /* The register has not been written yet. */
6125 break;
6126
6127 case 1:
6128 /* The register has been written via a predicate, assume we
6129 need a barrier (don't check for complementary regs). */
6130 need_barrier = 1;
6131 break;
6132
6133 case 2:
6134 /* The register has been unconditionally written already. We
6135 need a barrier. */
6136 need_barrier = 1;
6137 break;
6138
6139 default:
6140 gcc_unreachable ();
6141 }
6142 }
6143
6144 return need_barrier;
6145 }
6146
6147 static int
6148 rws_access_reg (rtx reg, struct reg_flags flags, int pred)
6149 {
6150 int regno = REGNO (reg);
6151 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
6152
6153 if (n == 1)
6154 return rws_access_regno (regno, flags, pred);
6155 else
6156 {
6157 int need_barrier = 0;
6158 while (--n >= 0)
6159 need_barrier |= rws_access_regno (regno + n, flags, pred);
6160 return need_barrier;
6161 }
6162 }
6163
6164 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
6165 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
6166
6167 static void
6168 update_set_flags (rtx x, struct reg_flags *pflags)
6169 {
6170 rtx src = SET_SRC (x);
6171
6172 switch (GET_CODE (src))
6173 {
6174 case CALL:
6175 return;
6176
6177 case IF_THEN_ELSE:
6178 /* There are four cases here:
6179 (1) The destination is (pc), in which case this is a branch,
6180 nothing here applies.
6181 (2) The destination is ar.lc, in which case this is a
6182 doloop_end_internal,
6183 (3) The destination is an fp register, in which case this is
6184 an fselect instruction.
6185 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
6186 this is a check load.
6187 In all cases, nothing we do in this function applies. */
6188 return;
6189
6190 default:
6191 if (COMPARISON_P (src)
6192 && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
6193 /* Set pflags->is_fp to 1 so that we know we're dealing
6194 with a floating point comparison when processing the
6195 destination of the SET. */
6196 pflags->is_fp = 1;
6197
6198 /* Discover if this is a parallel comparison. We only handle
6199 and.orcm and or.andcm at present, since we must retain a
6200 strict inverse on the predicate pair. */
6201 else if (GET_CODE (src) == AND)
6202 pflags->is_and = 1;
6203 else if (GET_CODE (src) == IOR)
6204 pflags->is_or = 1;
6205
6206 break;
6207 }
6208 }
6209
6210 /* Subroutine of rtx_needs_barrier; this function determines whether the
6211 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
6212 are as in rtx_needs_barrier. COND is an rtx that holds the condition
6213 for this insn. */
6214
6215 static int
6216 set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
6217 {
6218 int need_barrier = 0;
6219 rtx dst;
6220 rtx src = SET_SRC (x);
6221
6222 if (GET_CODE (src) == CALL)
6223 /* We don't need to worry about the result registers that
6224 get written by subroutine call. */
6225 return rtx_needs_barrier (src, flags, pred);
6226 else if (SET_DEST (x) == pc_rtx)
6227 {
6228 /* X is a conditional branch. */
6229 /* ??? This seems redundant, as the caller sets this bit for
6230 all JUMP_INSNs. */
6231 if (!ia64_spec_check_src_p (src))
6232 flags.is_branch = 1;
6233 return rtx_needs_barrier (src, flags, pred);
6234 }
6235
6236 if (ia64_spec_check_src_p (src))
6237 /* Avoid checking one register twice (in condition
6238 and in 'then' section) for ldc pattern. */
6239 {
6240 gcc_assert (REG_P (XEXP (src, 2)));
6241 need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
6242
6243 /* We process MEM below. */
6244 src = XEXP (src, 1);
6245 }
6246
6247 need_barrier |= rtx_needs_barrier (src, flags, pred);
6248
6249 dst = SET_DEST (x);
6250 if (GET_CODE (dst) == ZERO_EXTRACT)
6251 {
6252 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
6253 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
6254 }
6255 return need_barrier;
6256 }
6257
6258 /* Handle an access to rtx X of type FLAGS using predicate register
6259 PRED. Return 1 if this access creates a dependency with an earlier
6260 instruction in the same group. */
6261
6262 static int
6263 rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
6264 {
6265 int i, j;
6266 int is_complemented = 0;
6267 int need_barrier = 0;
6268 const char *format_ptr;
6269 struct reg_flags new_flags;
6270 rtx cond;
6271
6272 if (! x)
6273 return 0;
6274
6275 new_flags = flags;
6276
6277 switch (GET_CODE (x))
6278 {
6279 case SET:
6280 update_set_flags (x, &new_flags);
6281 need_barrier = set_src_needs_barrier (x, new_flags, pred);
6282 if (GET_CODE (SET_SRC (x)) != CALL)
6283 {
6284 new_flags.is_write = 1;
6285 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
6286 }
6287 break;
6288
6289 case CALL:
6290 new_flags.is_write = 0;
6291 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6292
6293 /* Avoid multiple register writes, in case this is a pattern with
6294 multiple CALL rtx. This avoids a failure in rws_access_reg. */
6295 if (! flags.is_sibcall && ! rws_insn_test (REG_AR_CFM))
6296 {
6297 new_flags.is_write = 1;
6298 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
6299 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
6300 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
6301 }
6302 break;
6303
6304 case COND_EXEC:
6305 /* X is a predicated instruction. */
6306
6307 cond = COND_EXEC_TEST (x);
6308 gcc_assert (!pred);
6309 need_barrier = rtx_needs_barrier (cond, flags, 0);
6310
6311 if (GET_CODE (cond) == EQ)
6312 is_complemented = 1;
6313 cond = XEXP (cond, 0);
6314 gcc_assert (GET_CODE (cond) == REG
6315 && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
6316 pred = REGNO (cond);
6317 if (is_complemented)
6318 ++pred;
6319
6320 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
6321 return need_barrier;
6322
6323 case CLOBBER:
6324 case USE:
6325 /* Clobber & use are for earlier compiler-phases only. */
6326 break;
6327
6328 case ASM_OPERANDS:
6329 case ASM_INPUT:
6330 /* We always emit stop bits for traditional asms. We emit stop bits
6331 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
6332 if (GET_CODE (x) != ASM_OPERANDS
6333 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
6334 {
6335 /* Avoid writing the register multiple times if we have multiple
6336 asm outputs. This avoids a failure in rws_access_reg. */
6337 if (! rws_insn_test (REG_VOLATILE))
6338 {
6339 new_flags.is_write = 1;
6340 rws_access_regno (REG_VOLATILE, new_flags, pred);
6341 }
6342 return 1;
6343 }
6344
6345 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
6346 We cannot just fall through here since then we would be confused
6347 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
6348 traditional asms unlike their normal usage. */
6349
6350 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
6351 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
6352 need_barrier = 1;
6353 break;
6354
6355 case PARALLEL:
6356 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6357 {
6358 rtx pat = XVECEXP (x, 0, i);
6359 switch (GET_CODE (pat))
6360 {
6361 case SET:
6362 update_set_flags (pat, &new_flags);
6363 need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
6364 break;
6365
6366 case USE:
6367 case CALL:
6368 case ASM_OPERANDS:
6369 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6370 break;
6371
6372 case CLOBBER:
6373 if (REG_P (XEXP (pat, 0))
6374 && extract_asm_operands (x) != NULL_RTX
6375 && REGNO (XEXP (pat, 0)) != AR_UNAT_REGNUM)
6376 {
6377 new_flags.is_write = 1;
6378 need_barrier |= rtx_needs_barrier (XEXP (pat, 0),
6379 new_flags, pred);
6380 new_flags = flags;
6381 }
6382 break;
6383
6384 case RETURN:
6385 break;
6386
6387 default:
6388 gcc_unreachable ();
6389 }
6390 }
6391 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6392 {
6393 rtx pat = XVECEXP (x, 0, i);
6394 if (GET_CODE (pat) == SET)
6395 {
6396 if (GET_CODE (SET_SRC (pat)) != CALL)
6397 {
6398 new_flags.is_write = 1;
6399 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
6400 pred);
6401 }
6402 }
6403 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
6404 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6405 }
6406 break;
6407
6408 case SUBREG:
6409 need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
6410 break;
6411 case REG:
6412 if (REGNO (x) == AR_UNAT_REGNUM)
6413 {
6414 for (i = 0; i < 64; ++i)
6415 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
6416 }
6417 else
6418 need_barrier = rws_access_reg (x, flags, pred);
6419 break;
6420
6421 case MEM:
6422 /* Find the regs used in memory address computation. */
6423 new_flags.is_write = 0;
6424 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6425 break;
6426
6427 case CONST_INT: case CONST_DOUBLE: case CONST_VECTOR:
6428 case SYMBOL_REF: case LABEL_REF: case CONST:
6429 break;
6430
6431 /* Operators with side-effects. */
6432 case POST_INC: case POST_DEC:
6433 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6434
6435 new_flags.is_write = 0;
6436 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
6437 new_flags.is_write = 1;
6438 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6439 break;
6440
6441 case POST_MODIFY:
6442 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6443
6444 new_flags.is_write = 0;
6445 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
6446 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6447 new_flags.is_write = 1;
6448 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6449 break;
6450
6451 /* Handle common unary and binary ops for efficiency. */
6452 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
6453 case MOD: case UDIV: case UMOD: case AND: case IOR:
6454 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
6455 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
6456 case NE: case EQ: case GE: case GT: case LE:
6457 case LT: case GEU: case GTU: case LEU: case LTU:
6458 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6459 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6460 break;
6461
6462 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
6463 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
6464 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
6465 case SQRT: case FFS: case POPCOUNT:
6466 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6467 break;
6468
6469 case VEC_SELECT:
6470 /* VEC_SELECT's second argument is a PARALLEL with integers that
6471 describe the elements selected. On ia64, those integers are
6472 always constants. Avoid walking the PARALLEL so that we don't
6473 get confused with "normal" parallels and then die. */
6474 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6475 break;
6476
6477 case UNSPEC:
6478 switch (XINT (x, 1))
6479 {
6480 case UNSPEC_LTOFF_DTPMOD:
6481 case UNSPEC_LTOFF_DTPREL:
6482 case UNSPEC_DTPREL:
6483 case UNSPEC_LTOFF_TPREL:
6484 case UNSPEC_TPREL:
6485 case UNSPEC_PRED_REL_MUTEX:
6486 case UNSPEC_PIC_CALL:
6487 case UNSPEC_MF:
6488 case UNSPEC_FETCHADD_ACQ:
6489 case UNSPEC_FETCHADD_REL:
6490 case UNSPEC_BSP_VALUE:
6491 case UNSPEC_FLUSHRS:
6492 case UNSPEC_BUNDLE_SELECTOR:
6493 break;
6494
6495 case UNSPEC_GR_SPILL:
6496 case UNSPEC_GR_RESTORE:
6497 {
6498 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
6499 HOST_WIDE_INT bit = (offset >> 3) & 63;
6500
6501 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6502 new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
6503 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
6504 new_flags, pred);
6505 break;
6506 }
6507
6508 case UNSPEC_FR_SPILL:
6509 case UNSPEC_FR_RESTORE:
6510 case UNSPEC_GETF_EXP:
6511 case UNSPEC_SETF_EXP:
6512 case UNSPEC_ADDP4:
6513 case UNSPEC_FR_SQRT_RECIP_APPROX:
6514 case UNSPEC_FR_SQRT_RECIP_APPROX_RES:
6515 case UNSPEC_LDA:
6516 case UNSPEC_LDS:
6517 case UNSPEC_LDS_A:
6518 case UNSPEC_LDSA:
6519 case UNSPEC_CHKACLR:
6520 case UNSPEC_CHKS:
6521 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6522 break;
6523
6524 case UNSPEC_FR_RECIP_APPROX:
6525 case UNSPEC_SHRP:
6526 case UNSPEC_COPYSIGN:
6527 case UNSPEC_FR_RECIP_APPROX_RES:
6528 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6529 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6530 break;
6531
6532 case UNSPEC_CMPXCHG_ACQ:
6533 case UNSPEC_CMPXCHG_REL:
6534 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6535 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
6536 break;
6537
6538 default:
6539 gcc_unreachable ();
6540 }
6541 break;
6542
6543 case UNSPEC_VOLATILE:
6544 switch (XINT (x, 1))
6545 {
6546 case UNSPECV_ALLOC:
6547 /* Alloc must always be the first instruction of a group.
6548 We force this by always returning true. */
6549 /* ??? We might get better scheduling if we explicitly check for
6550 input/local/output register dependencies, and modify the
6551 scheduler so that alloc is always reordered to the start of
6552 the current group. We could then eliminate all of the
6553 first_instruction code. */
6554 rws_access_regno (AR_PFS_REGNUM, flags, pred);
6555
6556 new_flags.is_write = 1;
6557 rws_access_regno (REG_AR_CFM, new_flags, pred);
6558 return 1;
6559
6560 case UNSPECV_SET_BSP:
6561 need_barrier = 1;
6562 break;
6563
6564 case UNSPECV_BLOCKAGE:
6565 case UNSPECV_INSN_GROUP_BARRIER:
6566 case UNSPECV_BREAK:
6567 case UNSPECV_PSAC_ALL:
6568 case UNSPECV_PSAC_NORMAL:
6569 return 0;
6570
6571 default:
6572 gcc_unreachable ();
6573 }
6574 break;
6575
6576 case RETURN:
6577 new_flags.is_write = 0;
6578 need_barrier = rws_access_regno (REG_RP, flags, pred);
6579 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
6580
6581 new_flags.is_write = 1;
6582 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6583 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
6584 break;
6585
6586 default:
6587 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
6588 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6589 switch (format_ptr[i])
6590 {
6591 case '0': /* unused field */
6592 case 'i': /* integer */
6593 case 'n': /* note */
6594 case 'w': /* wide integer */
6595 case 's': /* pointer to string */
6596 case 'S': /* optional pointer to string */
6597 break;
6598
6599 case 'e':
6600 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
6601 need_barrier = 1;
6602 break;
6603
6604 case 'E':
6605 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
6606 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
6607 need_barrier = 1;
6608 break;
6609
6610 default:
6611 gcc_unreachable ();
6612 }
6613 break;
6614 }
6615 return need_barrier;
6616 }
6617
6618 /* Clear out the state for group_barrier_needed at the start of a
6619 sequence of insns. */
6620
6621 static void
6622 init_insn_group_barriers (void)
6623 {
6624 memset (rws_sum, 0, sizeof (rws_sum));
6625 first_instruction = 1;
6626 }
6627
6628 /* Given the current state, determine whether a group barrier (a stop bit) is
6629 necessary before INSN. Return nonzero if so. This modifies the state to
6630 include the effects of INSN as a side-effect. */
6631
6632 static int
6633 group_barrier_needed (rtx insn)
6634 {
6635 rtx pat;
6636 int need_barrier = 0;
6637 struct reg_flags flags;
6638
6639 memset (&flags, 0, sizeof (flags));
6640 switch (GET_CODE (insn))
6641 {
6642 case NOTE:
6643 case DEBUG_INSN:
6644 break;
6645
6646 case BARRIER:
6647 /* A barrier doesn't imply an instruction group boundary. */
6648 break;
6649
6650 case CODE_LABEL:
6651 memset (rws_insn, 0, sizeof (rws_insn));
6652 return 1;
6653
6654 case CALL_INSN:
6655 flags.is_branch = 1;
6656 flags.is_sibcall = SIBLING_CALL_P (insn);
6657 memset (rws_insn, 0, sizeof (rws_insn));
6658
6659 /* Don't bundle a call following another call. */
6660 if ((pat = prev_active_insn (insn))
6661 && GET_CODE (pat) == CALL_INSN)
6662 {
6663 need_barrier = 1;
6664 break;
6665 }
6666
6667 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
6668 break;
6669
6670 case JUMP_INSN:
6671 if (!ia64_spec_check_p (insn))
6672 flags.is_branch = 1;
6673
6674 /* Don't bundle a jump following a call. */
6675 if ((pat = prev_active_insn (insn))
6676 && GET_CODE (pat) == CALL_INSN)
6677 {
6678 need_barrier = 1;
6679 break;
6680 }
6681 /* FALLTHRU */
6682
6683 case INSN:
6684 if (GET_CODE (PATTERN (insn)) == USE
6685 || GET_CODE (PATTERN (insn)) == CLOBBER)
6686 /* Don't care about USE and CLOBBER "insns"---those are used to
6687 indicate to the optimizer that it shouldn't get rid of
6688 certain operations. */
6689 break;
6690
6691 pat = PATTERN (insn);
6692
6693 /* Ug. Hack hacks hacked elsewhere. */
6694 switch (recog_memoized (insn))
6695 {
6696 /* We play dependency tricks with the epilogue in order
6697 to get proper schedules. Undo this for dv analysis. */
6698 case CODE_FOR_epilogue_deallocate_stack:
6699 case CODE_FOR_prologue_allocate_stack:
6700 pat = XVECEXP (pat, 0, 0);
6701 break;
6702
6703 /* The pattern we use for br.cloop confuses the code above.
6704 The second element of the vector is representative. */
6705 case CODE_FOR_doloop_end_internal:
6706 pat = XVECEXP (pat, 0, 1);
6707 break;
6708
6709 /* Doesn't generate code. */
6710 case CODE_FOR_pred_rel_mutex:
6711 case CODE_FOR_prologue_use:
6712 return 0;
6713
6714 default:
6715 break;
6716 }
6717
6718 memset (rws_insn, 0, sizeof (rws_insn));
6719 need_barrier = rtx_needs_barrier (pat, flags, 0);
6720
6721 /* Check to see if the previous instruction was a volatile
6722 asm. */
6723 if (! need_barrier)
6724 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
6725
6726 break;
6727
6728 default:
6729 gcc_unreachable ();
6730 }
6731
6732 if (first_instruction && INSN_P (insn)
6733 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6734 && GET_CODE (PATTERN (insn)) != USE
6735 && GET_CODE (PATTERN (insn)) != CLOBBER)
6736 {
6737 need_barrier = 0;
6738 first_instruction = 0;
6739 }
6740
6741 return need_barrier;
6742 }
6743
6744 /* Like group_barrier_needed, but do not clobber the current state. */
6745
6746 static int
6747 safe_group_barrier_needed (rtx insn)
6748 {
6749 int saved_first_instruction;
6750 int t;
6751
6752 saved_first_instruction = first_instruction;
6753 in_safe_group_barrier = 1;
6754
6755 t = group_barrier_needed (insn);
6756
6757 first_instruction = saved_first_instruction;
6758 in_safe_group_barrier = 0;
6759
6760 return t;
6761 }
6762
6763 /* Scan the current function and insert stop bits as necessary to
6764 eliminate dependencies. This function assumes that a final
6765 instruction scheduling pass has been run which has already
6766 inserted most of the necessary stop bits. This function only
6767 inserts new ones at basic block boundaries, since these are
6768 invisible to the scheduler. */
6769
6770 static void
6771 emit_insn_group_barriers (FILE *dump)
6772 {
6773 rtx insn;
6774 rtx last_label = 0;
6775 int insns_since_last_label = 0;
6776
6777 init_insn_group_barriers ();
6778
6779 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6780 {
6781 if (GET_CODE (insn) == CODE_LABEL)
6782 {
6783 if (insns_since_last_label)
6784 last_label = insn;
6785 insns_since_last_label = 0;
6786 }
6787 else if (GET_CODE (insn) == NOTE
6788 && NOTE_KIND (insn) == NOTE_INSN_BASIC_BLOCK)
6789 {
6790 if (insns_since_last_label)
6791 last_label = insn;
6792 insns_since_last_label = 0;
6793 }
6794 else if (GET_CODE (insn) == INSN
6795 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
6796 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
6797 {
6798 init_insn_group_barriers ();
6799 last_label = 0;
6800 }
6801 else if (NONDEBUG_INSN_P (insn))
6802 {
6803 insns_since_last_label = 1;
6804
6805 if (group_barrier_needed (insn))
6806 {
6807 if (last_label)
6808 {
6809 if (dump)
6810 fprintf (dump, "Emitting stop before label %d\n",
6811 INSN_UID (last_label));
6812 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
6813 insn = last_label;
6814
6815 init_insn_group_barriers ();
6816 last_label = 0;
6817 }
6818 }
6819 }
6820 }
6821 }
6822
6823 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
6824 This function has to emit all necessary group barriers. */
6825
6826 static void
6827 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
6828 {
6829 rtx insn;
6830
6831 init_insn_group_barriers ();
6832
6833 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6834 {
6835 if (GET_CODE (insn) == BARRIER)
6836 {
6837 rtx last = prev_active_insn (insn);
6838
6839 if (! last)
6840 continue;
6841 if (GET_CODE (last) == JUMP_INSN
6842 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
6843 last = prev_active_insn (last);
6844 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
6845 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
6846
6847 init_insn_group_barriers ();
6848 }
6849 else if (NONDEBUG_INSN_P (insn))
6850 {
6851 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
6852 init_insn_group_barriers ();
6853 else if (group_barrier_needed (insn))
6854 {
6855 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
6856 init_insn_group_barriers ();
6857 group_barrier_needed (insn);
6858 }
6859 }
6860 }
6861 }
6862
6863 \f
6864
6865 /* Instruction scheduling support. */
6866
6867 #define NR_BUNDLES 10
6868
6869 /* A list of names of all available bundles. */
6870
6871 static const char *bundle_name [NR_BUNDLES] =
6872 {
6873 ".mii",
6874 ".mmi",
6875 ".mfi",
6876 ".mmf",
6877 #if NR_BUNDLES == 10
6878 ".bbb",
6879 ".mbb",
6880 #endif
6881 ".mib",
6882 ".mmb",
6883 ".mfb",
6884 ".mlx"
6885 };
6886
6887 /* Nonzero if we should insert stop bits into the schedule. */
6888
6889 int ia64_final_schedule = 0;
6890
6891 /* Codes of the corresponding queried units: */
6892
6893 static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
6894 static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
6895
6896 static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
6897 static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
6898
6899 static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
6900
6901 /* The following variable value is an insn group barrier. */
6902
6903 static rtx dfa_stop_insn;
6904
6905 /* The following variable value is the last issued insn. */
6906
6907 static rtx last_scheduled_insn;
6908
6909 /* The following variable value is pointer to a DFA state used as
6910 temporary variable. */
6911
6912 static state_t temp_dfa_state = NULL;
6913
6914 /* The following variable value is DFA state after issuing the last
6915 insn. */
6916
6917 static state_t prev_cycle_state = NULL;
6918
6919 /* The following array element values are TRUE if the corresponding
6920 insn requires to add stop bits before it. */
6921
6922 static char *stops_p = NULL;
6923
6924 /* The following variable is used to set up the mentioned above array. */
6925
6926 static int stop_before_p = 0;
6927
6928 /* The following variable value is length of the arrays `clocks' and
6929 `add_cycles'. */
6930
6931 static int clocks_length;
6932
6933 /* The following variable value is number of data speculations in progress. */
6934 static int pending_data_specs = 0;
6935
6936 /* Number of memory references on current and three future processor cycles. */
6937 static char mem_ops_in_group[4];
6938
6939 /* Number of current processor cycle (from scheduler's point of view). */
6940 static int current_cycle;
6941
6942 static rtx ia64_single_set (rtx);
6943 static void ia64_emit_insn_before (rtx, rtx);
6944
6945 /* Map a bundle number to its pseudo-op. */
6946
6947 const char *
6948 get_bundle_name (int b)
6949 {
6950 return bundle_name[b];
6951 }
6952
6953
6954 /* Return the maximum number of instructions a cpu can issue. */
6955
6956 static int
6957 ia64_issue_rate (void)
6958 {
6959 return 6;
6960 }
6961
6962 /* Helper function - like single_set, but look inside COND_EXEC. */
6963
6964 static rtx
6965 ia64_single_set (rtx insn)
6966 {
6967 rtx x = PATTERN (insn), ret;
6968 if (GET_CODE (x) == COND_EXEC)
6969 x = COND_EXEC_CODE (x);
6970 if (GET_CODE (x) == SET)
6971 return x;
6972
6973 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
6974 Although they are not classical single set, the second set is there just
6975 to protect it from moving past FP-relative stack accesses. */
6976 switch (recog_memoized (insn))
6977 {
6978 case CODE_FOR_prologue_allocate_stack:
6979 case CODE_FOR_epilogue_deallocate_stack:
6980 ret = XVECEXP (x, 0, 0);
6981 break;
6982
6983 default:
6984 ret = single_set_2 (insn, x);
6985 break;
6986 }
6987
6988 return ret;
6989 }
6990
6991 /* Adjust the cost of a scheduling dependency.
6992 Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
6993 COST is the current cost, DW is dependency weakness. */
6994 static int
6995 ia64_adjust_cost_2 (rtx insn, int dep_type1, rtx dep_insn, int cost, dw_t dw)
6996 {
6997 enum reg_note dep_type = (enum reg_note) dep_type1;
6998 enum attr_itanium_class dep_class;
6999 enum attr_itanium_class insn_class;
7000
7001 insn_class = ia64_safe_itanium_class (insn);
7002 dep_class = ia64_safe_itanium_class (dep_insn);
7003
7004 /* Treat true memory dependencies separately. Ignore apparent true
7005 dependence between store and call (call has a MEM inside a SYMBOL_REF). */
7006 if (dep_type == REG_DEP_TRUE
7007 && (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF)
7008 && (insn_class == ITANIUM_CLASS_BR || insn_class == ITANIUM_CLASS_SCALL))
7009 return 0;
7010
7011 if (dw == MIN_DEP_WEAK)
7012 /* Store and load are likely to alias, use higher cost to avoid stall. */
7013 return PARAM_VALUE (PARAM_SCHED_MEM_TRUE_DEP_COST);
7014 else if (dw > MIN_DEP_WEAK)
7015 {
7016 /* Store and load are less likely to alias. */
7017 if (mflag_sched_fp_mem_deps_zero_cost && dep_class == ITANIUM_CLASS_STF)
7018 /* Assume there will be no cache conflict for floating-point data.
7019 For integer data, L1 conflict penalty is huge (17 cycles), so we
7020 never assume it will not cause a conflict. */
7021 return 0;
7022 else
7023 return cost;
7024 }
7025
7026 if (dep_type != REG_DEP_OUTPUT)
7027 return cost;
7028
7029 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
7030 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
7031 return 0;
7032
7033 return cost;
7034 }
7035
7036 /* Like emit_insn_before, but skip cycle_display notes.
7037 ??? When cycle display notes are implemented, update this. */
7038
7039 static void
7040 ia64_emit_insn_before (rtx insn, rtx before)
7041 {
7042 emit_insn_before (insn, before);
7043 }
7044
7045 /* The following function marks insns who produce addresses for load
7046 and store insns. Such insns will be placed into M slots because it
7047 decrease latency time for Itanium1 (see function
7048 `ia64_produce_address_p' and the DFA descriptions). */
7049
7050 static void
7051 ia64_dependencies_evaluation_hook (rtx head, rtx tail)
7052 {
7053 rtx insn, next, next_tail;
7054
7055 /* Before reload, which_alternative is not set, which means that
7056 ia64_safe_itanium_class will produce wrong results for (at least)
7057 move instructions. */
7058 if (!reload_completed)
7059 return;
7060
7061 next_tail = NEXT_INSN (tail);
7062 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
7063 if (INSN_P (insn))
7064 insn->call = 0;
7065 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
7066 if (INSN_P (insn)
7067 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
7068 {
7069 sd_iterator_def sd_it;
7070 dep_t dep;
7071 bool has_mem_op_consumer_p = false;
7072
7073 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
7074 {
7075 enum attr_itanium_class c;
7076
7077 if (DEP_TYPE (dep) != REG_DEP_TRUE)
7078 continue;
7079
7080 next = DEP_CON (dep);
7081 c = ia64_safe_itanium_class (next);
7082 if ((c == ITANIUM_CLASS_ST
7083 || c == ITANIUM_CLASS_STF)
7084 && ia64_st_address_bypass_p (insn, next))
7085 {
7086 has_mem_op_consumer_p = true;
7087 break;
7088 }
7089 else if ((c == ITANIUM_CLASS_LD
7090 || c == ITANIUM_CLASS_FLD
7091 || c == ITANIUM_CLASS_FLDP)
7092 && ia64_ld_address_bypass_p (insn, next))
7093 {
7094 has_mem_op_consumer_p = true;
7095 break;
7096 }
7097 }
7098
7099 insn->call = has_mem_op_consumer_p;
7100 }
7101 }
7102
7103 /* We're beginning a new block. Initialize data structures as necessary. */
7104
7105 static void
7106 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
7107 int sched_verbose ATTRIBUTE_UNUSED,
7108 int max_ready ATTRIBUTE_UNUSED)
7109 {
7110 #ifdef ENABLE_CHECKING
7111 rtx insn;
7112
7113 if (!sel_sched_p () && reload_completed)
7114 for (insn = NEXT_INSN (current_sched_info->prev_head);
7115 insn != current_sched_info->next_tail;
7116 insn = NEXT_INSN (insn))
7117 gcc_assert (!SCHED_GROUP_P (insn));
7118 #endif
7119 last_scheduled_insn = NULL_RTX;
7120 init_insn_group_barriers ();
7121
7122 current_cycle = 0;
7123 memset (mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7124 }
7125
7126 /* We're beginning a scheduling pass. Check assertion. */
7127
7128 static void
7129 ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
7130 int sched_verbose ATTRIBUTE_UNUSED,
7131 int max_ready ATTRIBUTE_UNUSED)
7132 {
7133 gcc_assert (pending_data_specs == 0);
7134 }
7135
7136 /* Scheduling pass is now finished. Free/reset static variable. */
7137 static void
7138 ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
7139 int sched_verbose ATTRIBUTE_UNUSED)
7140 {
7141 gcc_assert (pending_data_specs == 0);
7142 }
7143
7144 /* Return TRUE if INSN is a load (either normal or speculative, but not a
7145 speculation check), FALSE otherwise. */
7146 static bool
7147 is_load_p (rtx insn)
7148 {
7149 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7150
7151 return
7152 ((insn_class == ITANIUM_CLASS_LD || insn_class == ITANIUM_CLASS_FLD)
7153 && get_attr_check_load (insn) == CHECK_LOAD_NO);
7154 }
7155
7156 /* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array
7157 (taking account for 3-cycle cache reference postponing for stores: Intel
7158 Itanium 2 Reference Manual for Software Development and Optimization,
7159 6.7.3.1). */
7160 static void
7161 record_memory_reference (rtx insn)
7162 {
7163 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7164
7165 switch (insn_class) {
7166 case ITANIUM_CLASS_FLD:
7167 case ITANIUM_CLASS_LD:
7168 mem_ops_in_group[current_cycle % 4]++;
7169 break;
7170 case ITANIUM_CLASS_STF:
7171 case ITANIUM_CLASS_ST:
7172 mem_ops_in_group[(current_cycle + 3) % 4]++;
7173 break;
7174 default:;
7175 }
7176 }
7177
7178 /* We are about to being issuing insns for this clock cycle.
7179 Override the default sort algorithm to better slot instructions. */
7180
7181 static int
7182 ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
7183 int *pn_ready, int clock_var,
7184 int reorder_type)
7185 {
7186 int n_asms;
7187 int n_ready = *pn_ready;
7188 rtx *e_ready = ready + n_ready;
7189 rtx *insnp;
7190
7191 if (sched_verbose)
7192 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
7193
7194 if (reorder_type == 0)
7195 {
7196 /* First, move all USEs, CLOBBERs and other crud out of the way. */
7197 n_asms = 0;
7198 for (insnp = ready; insnp < e_ready; insnp++)
7199 if (insnp < e_ready)
7200 {
7201 rtx insn = *insnp;
7202 enum attr_type t = ia64_safe_type (insn);
7203 if (t == TYPE_UNKNOWN)
7204 {
7205 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
7206 || asm_noperands (PATTERN (insn)) >= 0)
7207 {
7208 rtx lowest = ready[n_asms];
7209 ready[n_asms] = insn;
7210 *insnp = lowest;
7211 n_asms++;
7212 }
7213 else
7214 {
7215 rtx highest = ready[n_ready - 1];
7216 ready[n_ready - 1] = insn;
7217 *insnp = highest;
7218 return 1;
7219 }
7220 }
7221 }
7222
7223 if (n_asms < n_ready)
7224 {
7225 /* Some normal insns to process. Skip the asms. */
7226 ready += n_asms;
7227 n_ready -= n_asms;
7228 }
7229 else if (n_ready > 0)
7230 return 1;
7231 }
7232
7233 if (ia64_final_schedule)
7234 {
7235 int deleted = 0;
7236 int nr_need_stop = 0;
7237
7238 for (insnp = ready; insnp < e_ready; insnp++)
7239 if (safe_group_barrier_needed (*insnp))
7240 nr_need_stop++;
7241
7242 if (reorder_type == 1 && n_ready == nr_need_stop)
7243 return 0;
7244 if (reorder_type == 0)
7245 return 1;
7246 insnp = e_ready;
7247 /* Move down everything that needs a stop bit, preserving
7248 relative order. */
7249 while (insnp-- > ready + deleted)
7250 while (insnp >= ready + deleted)
7251 {
7252 rtx insn = *insnp;
7253 if (! safe_group_barrier_needed (insn))
7254 break;
7255 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7256 *ready = insn;
7257 deleted++;
7258 }
7259 n_ready -= deleted;
7260 ready += deleted;
7261 }
7262
7263 current_cycle = clock_var;
7264 if (reload_completed && mem_ops_in_group[clock_var % 4] >= ia64_max_memory_insns)
7265 {
7266 int moved = 0;
7267
7268 insnp = e_ready;
7269 /* Move down loads/stores, preserving relative order. */
7270 while (insnp-- > ready + moved)
7271 while (insnp >= ready + moved)
7272 {
7273 rtx insn = *insnp;
7274 if (! is_load_p (insn))
7275 break;
7276 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7277 *ready = insn;
7278 moved++;
7279 }
7280 n_ready -= moved;
7281 ready += moved;
7282 }
7283
7284 return 1;
7285 }
7286
7287 /* We are about to being issuing insns for this clock cycle. Override
7288 the default sort algorithm to better slot instructions. */
7289
7290 static int
7291 ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
7292 int clock_var)
7293 {
7294 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
7295 pn_ready, clock_var, 0);
7296 }
7297
7298 /* Like ia64_sched_reorder, but called after issuing each insn.
7299 Override the default sort algorithm to better slot instructions. */
7300
7301 static int
7302 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
7303 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
7304 int *pn_ready, int clock_var)
7305 {
7306 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
7307 clock_var, 1);
7308 }
7309
7310 /* We are about to issue INSN. Return the number of insns left on the
7311 ready queue that can be issued this cycle. */
7312
7313 static int
7314 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
7315 int sched_verbose ATTRIBUTE_UNUSED,
7316 rtx insn ATTRIBUTE_UNUSED,
7317 int can_issue_more ATTRIBUTE_UNUSED)
7318 {
7319 if (sched_deps_info->generate_spec_deps && !sel_sched_p ())
7320 /* Modulo scheduling does not extend h_i_d when emitting
7321 new instructions. Don't use h_i_d, if we don't have to. */
7322 {
7323 if (DONE_SPEC (insn) & BEGIN_DATA)
7324 pending_data_specs++;
7325 if (CHECK_SPEC (insn) & BEGIN_DATA)
7326 pending_data_specs--;
7327 }
7328
7329 if (DEBUG_INSN_P (insn))
7330 return 1;
7331
7332 last_scheduled_insn = insn;
7333 memcpy (prev_cycle_state, curr_state, dfa_state_size);
7334 if (reload_completed)
7335 {
7336 int needed = group_barrier_needed (insn);
7337
7338 gcc_assert (!needed);
7339 if (GET_CODE (insn) == CALL_INSN)
7340 init_insn_group_barriers ();
7341 stops_p [INSN_UID (insn)] = stop_before_p;
7342 stop_before_p = 0;
7343
7344 record_memory_reference (insn);
7345 }
7346 return 1;
7347 }
7348
7349 /* We are choosing insn from the ready queue. Return nonzero if INSN
7350 can be chosen. */
7351
7352 static int
7353 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
7354 {
7355 gcc_assert (insn && INSN_P (insn));
7356 return ((!reload_completed
7357 || !safe_group_barrier_needed (insn))
7358 && ia64_first_cycle_multipass_dfa_lookahead_guard_spec (insn)
7359 && (!mflag_sched_mem_insns_hard_limit
7360 || !is_load_p (insn)
7361 || mem_ops_in_group[current_cycle % 4] < ia64_max_memory_insns));
7362 }
7363
7364 /* We are choosing insn from the ready queue. Return nonzero if INSN
7365 can be chosen. */
7366
7367 static bool
7368 ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx insn)
7369 {
7370 gcc_assert (insn && INSN_P (insn));
7371 /* Size of ALAT is 32. As far as we perform conservative data speculation,
7372 we keep ALAT half-empty. */
7373 return (pending_data_specs < 16
7374 || !(TODO_SPEC (insn) & BEGIN_DATA));
7375 }
7376
7377 /* The following variable value is pseudo-insn used by the DFA insn
7378 scheduler to change the DFA state when the simulated clock is
7379 increased. */
7380
7381 static rtx dfa_pre_cycle_insn;
7382
7383 /* Returns 1 when a meaningful insn was scheduled between the last group
7384 barrier and LAST. */
7385 static int
7386 scheduled_good_insn (rtx last)
7387 {
7388 if (last && recog_memoized (last) >= 0)
7389 return 1;
7390
7391 for ( ;
7392 last != NULL && !NOTE_INSN_BASIC_BLOCK_P (last)
7393 && !stops_p[INSN_UID (last)];
7394 last = PREV_INSN (last))
7395 /* We could hit a NOTE_INSN_DELETED here which is actually outside
7396 the ebb we're scheduling. */
7397 if (INSN_P (last) && recog_memoized (last) >= 0)
7398 return 1;
7399
7400 return 0;
7401 }
7402
7403 /* We are about to being issuing INSN. Return nonzero if we cannot
7404 issue it on given cycle CLOCK and return zero if we should not sort
7405 the ready queue on the next clock start. */
7406
7407 static int
7408 ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
7409 int clock, int *sort_p)
7410 {
7411 gcc_assert (insn && INSN_P (insn));
7412
7413 if (DEBUG_INSN_P (insn))
7414 return 0;
7415
7416 /* When a group barrier is needed for insn, last_scheduled_insn
7417 should be set. */
7418 gcc_assert (!(reload_completed && safe_group_barrier_needed (insn))
7419 || last_scheduled_insn);
7420
7421 if ((reload_completed
7422 && (safe_group_barrier_needed (insn)
7423 || (mflag_sched_stop_bits_after_every_cycle
7424 && last_clock != clock
7425 && last_scheduled_insn
7426 && scheduled_good_insn (last_scheduled_insn))))
7427 || (last_scheduled_insn
7428 && (GET_CODE (last_scheduled_insn) == CALL_INSN
7429 || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
7430 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
7431 {
7432 init_insn_group_barriers ();
7433
7434 if (verbose && dump)
7435 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
7436 last_clock == clock ? " + cycle advance" : "");
7437
7438 stop_before_p = 1;
7439 current_cycle = clock;
7440 mem_ops_in_group[current_cycle % 4] = 0;
7441
7442 if (last_clock == clock)
7443 {
7444 state_transition (curr_state, dfa_stop_insn);
7445 if (TARGET_EARLY_STOP_BITS)
7446 *sort_p = (last_scheduled_insn == NULL_RTX
7447 || GET_CODE (last_scheduled_insn) != CALL_INSN);
7448 else
7449 *sort_p = 0;
7450 return 1;
7451 }
7452
7453 if (last_scheduled_insn)
7454 {
7455 if (GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
7456 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)
7457 state_reset (curr_state);
7458 else
7459 {
7460 memcpy (curr_state, prev_cycle_state, dfa_state_size);
7461 state_transition (curr_state, dfa_stop_insn);
7462 state_transition (curr_state, dfa_pre_cycle_insn);
7463 state_transition (curr_state, NULL);
7464 }
7465 }
7466 }
7467 return 0;
7468 }
7469
7470 /* Implement targetm.sched.h_i_d_extended hook.
7471 Extend internal data structures. */
7472 static void
7473 ia64_h_i_d_extended (void)
7474 {
7475 if (stops_p != NULL)
7476 {
7477 int new_clocks_length = get_max_uid () * 3 / 2;
7478 stops_p = (char *) xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
7479 clocks_length = new_clocks_length;
7480 }
7481 }
7482 \f
7483
7484 /* This structure describes the data used by the backend to guide scheduling.
7485 When the current scheduling point is switched, this data should be saved
7486 and restored later, if the scheduler returns to this point. */
7487 struct _ia64_sched_context
7488 {
7489 state_t prev_cycle_state;
7490 rtx last_scheduled_insn;
7491 struct reg_write_state rws_sum[NUM_REGS];
7492 struct reg_write_state rws_insn[NUM_REGS];
7493 int first_instruction;
7494 int pending_data_specs;
7495 int current_cycle;
7496 char mem_ops_in_group[4];
7497 };
7498 typedef struct _ia64_sched_context *ia64_sched_context_t;
7499
7500 /* Allocates a scheduling context. */
7501 static void *
7502 ia64_alloc_sched_context (void)
7503 {
7504 return xmalloc (sizeof (struct _ia64_sched_context));
7505 }
7506
7507 /* Initializes the _SC context with clean data, if CLEAN_P, and from
7508 the global context otherwise. */
7509 static void
7510 ia64_init_sched_context (void *_sc, bool clean_p)
7511 {
7512 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7513
7514 sc->prev_cycle_state = xmalloc (dfa_state_size);
7515 if (clean_p)
7516 {
7517 state_reset (sc->prev_cycle_state);
7518 sc->last_scheduled_insn = NULL_RTX;
7519 memset (sc->rws_sum, 0, sizeof (rws_sum));
7520 memset (sc->rws_insn, 0, sizeof (rws_insn));
7521 sc->first_instruction = 1;
7522 sc->pending_data_specs = 0;
7523 sc->current_cycle = 0;
7524 memset (sc->mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7525 }
7526 else
7527 {
7528 memcpy (sc->prev_cycle_state, prev_cycle_state, dfa_state_size);
7529 sc->last_scheduled_insn = last_scheduled_insn;
7530 memcpy (sc->rws_sum, rws_sum, sizeof (rws_sum));
7531 memcpy (sc->rws_insn, rws_insn, sizeof (rws_insn));
7532 sc->first_instruction = first_instruction;
7533 sc->pending_data_specs = pending_data_specs;
7534 sc->current_cycle = current_cycle;
7535 memcpy (sc->mem_ops_in_group, mem_ops_in_group, sizeof (mem_ops_in_group));
7536 }
7537 }
7538
7539 /* Sets the global scheduling context to the one pointed to by _SC. */
7540 static void
7541 ia64_set_sched_context (void *_sc)
7542 {
7543 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7544
7545 gcc_assert (sc != NULL);
7546
7547 memcpy (prev_cycle_state, sc->prev_cycle_state, dfa_state_size);
7548 last_scheduled_insn = sc->last_scheduled_insn;
7549 memcpy (rws_sum, sc->rws_sum, sizeof (rws_sum));
7550 memcpy (rws_insn, sc->rws_insn, sizeof (rws_insn));
7551 first_instruction = sc->first_instruction;
7552 pending_data_specs = sc->pending_data_specs;
7553 current_cycle = sc->current_cycle;
7554 memcpy (mem_ops_in_group, sc->mem_ops_in_group, sizeof (mem_ops_in_group));
7555 }
7556
7557 /* Clears the data in the _SC scheduling context. */
7558 static void
7559 ia64_clear_sched_context (void *_sc)
7560 {
7561 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7562
7563 free (sc->prev_cycle_state);
7564 sc->prev_cycle_state = NULL;
7565 }
7566
7567 /* Frees the _SC scheduling context. */
7568 static void
7569 ia64_free_sched_context (void *_sc)
7570 {
7571 gcc_assert (_sc != NULL);
7572
7573 free (_sc);
7574 }
7575
7576 typedef rtx (* gen_func_t) (rtx, rtx);
7577
7578 /* Return a function that will generate a load of mode MODE_NO
7579 with speculation types TS. */
7580 static gen_func_t
7581 get_spec_load_gen_function (ds_t ts, int mode_no)
7582 {
7583 static gen_func_t gen_ld_[] = {
7584 gen_movbi,
7585 gen_movqi_internal,
7586 gen_movhi_internal,
7587 gen_movsi_internal,
7588 gen_movdi_internal,
7589 gen_movsf_internal,
7590 gen_movdf_internal,
7591 gen_movxf_internal,
7592 gen_movti_internal,
7593 gen_zero_extendqidi2,
7594 gen_zero_extendhidi2,
7595 gen_zero_extendsidi2,
7596 };
7597
7598 static gen_func_t gen_ld_a[] = {
7599 gen_movbi_advanced,
7600 gen_movqi_advanced,
7601 gen_movhi_advanced,
7602 gen_movsi_advanced,
7603 gen_movdi_advanced,
7604 gen_movsf_advanced,
7605 gen_movdf_advanced,
7606 gen_movxf_advanced,
7607 gen_movti_advanced,
7608 gen_zero_extendqidi2_advanced,
7609 gen_zero_extendhidi2_advanced,
7610 gen_zero_extendsidi2_advanced,
7611 };
7612 static gen_func_t gen_ld_s[] = {
7613 gen_movbi_speculative,
7614 gen_movqi_speculative,
7615 gen_movhi_speculative,
7616 gen_movsi_speculative,
7617 gen_movdi_speculative,
7618 gen_movsf_speculative,
7619 gen_movdf_speculative,
7620 gen_movxf_speculative,
7621 gen_movti_speculative,
7622 gen_zero_extendqidi2_speculative,
7623 gen_zero_extendhidi2_speculative,
7624 gen_zero_extendsidi2_speculative,
7625 };
7626 static gen_func_t gen_ld_sa[] = {
7627 gen_movbi_speculative_advanced,
7628 gen_movqi_speculative_advanced,
7629 gen_movhi_speculative_advanced,
7630 gen_movsi_speculative_advanced,
7631 gen_movdi_speculative_advanced,
7632 gen_movsf_speculative_advanced,
7633 gen_movdf_speculative_advanced,
7634 gen_movxf_speculative_advanced,
7635 gen_movti_speculative_advanced,
7636 gen_zero_extendqidi2_speculative_advanced,
7637 gen_zero_extendhidi2_speculative_advanced,
7638 gen_zero_extendsidi2_speculative_advanced,
7639 };
7640 static gen_func_t gen_ld_s_a[] = {
7641 gen_movbi_speculative_a,
7642 gen_movqi_speculative_a,
7643 gen_movhi_speculative_a,
7644 gen_movsi_speculative_a,
7645 gen_movdi_speculative_a,
7646 gen_movsf_speculative_a,
7647 gen_movdf_speculative_a,
7648 gen_movxf_speculative_a,
7649 gen_movti_speculative_a,
7650 gen_zero_extendqidi2_speculative_a,
7651 gen_zero_extendhidi2_speculative_a,
7652 gen_zero_extendsidi2_speculative_a,
7653 };
7654
7655 gen_func_t *gen_ld;
7656
7657 if (ts & BEGIN_DATA)
7658 {
7659 if (ts & BEGIN_CONTROL)
7660 gen_ld = gen_ld_sa;
7661 else
7662 gen_ld = gen_ld_a;
7663 }
7664 else if (ts & BEGIN_CONTROL)
7665 {
7666 if ((spec_info->flags & SEL_SCHED_SPEC_DONT_CHECK_CONTROL)
7667 || ia64_needs_block_p (ts))
7668 gen_ld = gen_ld_s;
7669 else
7670 gen_ld = gen_ld_s_a;
7671 }
7672 else if (ts == 0)
7673 gen_ld = gen_ld_;
7674 else
7675 gcc_unreachable ();
7676
7677 return gen_ld[mode_no];
7678 }
7679
7680 /* Constants that help mapping 'enum machine_mode' to int. */
7681 enum SPEC_MODES
7682 {
7683 SPEC_MODE_INVALID = -1,
7684 SPEC_MODE_FIRST = 0,
7685 SPEC_MODE_FOR_EXTEND_FIRST = 1,
7686 SPEC_MODE_FOR_EXTEND_LAST = 3,
7687 SPEC_MODE_LAST = 8
7688 };
7689
7690 enum
7691 {
7692 /* Offset to reach ZERO_EXTEND patterns. */
7693 SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1
7694 };
7695
7696 /* Return index of the MODE. */
7697 static int
7698 ia64_mode_to_int (enum machine_mode mode)
7699 {
7700 switch (mode)
7701 {
7702 case BImode: return 0; /* SPEC_MODE_FIRST */
7703 case QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST */
7704 case HImode: return 2;
7705 case SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST */
7706 case DImode: return 4;
7707 case SFmode: return 5;
7708 case DFmode: return 6;
7709 case XFmode: return 7;
7710 case TImode:
7711 /* ??? This mode needs testing. Bypasses for ldfp8 instruction are not
7712 mentioned in itanium[12].md. Predicate fp_register_operand also
7713 needs to be defined. Bottom line: better disable for now. */
7714 return SPEC_MODE_INVALID;
7715 default: return SPEC_MODE_INVALID;
7716 }
7717 }
7718
7719 /* Provide information about speculation capabilities. */
7720 static void
7721 ia64_set_sched_flags (spec_info_t spec_info)
7722 {
7723 unsigned int *flags = &(current_sched_info->flags);
7724
7725 if (*flags & SCHED_RGN
7726 || *flags & SCHED_EBB
7727 || *flags & SEL_SCHED)
7728 {
7729 int mask = 0;
7730
7731 if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0)
7732 || (mflag_sched_ar_data_spec && reload_completed))
7733 {
7734 mask |= BEGIN_DATA;
7735
7736 if (!sel_sched_p ()
7737 && ((mflag_sched_br_in_data_spec && !reload_completed)
7738 || (mflag_sched_ar_in_data_spec && reload_completed)))
7739 mask |= BE_IN_DATA;
7740 }
7741
7742 if (mflag_sched_control_spec
7743 && (!sel_sched_p ()
7744 || reload_completed))
7745 {
7746 mask |= BEGIN_CONTROL;
7747
7748 if (!sel_sched_p () && mflag_sched_in_control_spec)
7749 mask |= BE_IN_CONTROL;
7750 }
7751
7752 spec_info->mask = mask;
7753
7754 if (mask)
7755 {
7756 *flags |= USE_DEPS_LIST | DO_SPECULATION;
7757
7758 if (mask & BE_IN_SPEC)
7759 *flags |= NEW_BBS;
7760
7761 spec_info->flags = 0;
7762
7763 if ((mask & DATA_SPEC) && mflag_sched_prefer_non_data_spec_insns)
7764 spec_info->flags |= PREFER_NON_DATA_SPEC;
7765
7766 if (mask & CONTROL_SPEC)
7767 {
7768 if (mflag_sched_prefer_non_control_spec_insns)
7769 spec_info->flags |= PREFER_NON_CONTROL_SPEC;
7770
7771 if (sel_sched_p () && mflag_sel_sched_dont_check_control_spec)
7772 spec_info->flags |= SEL_SCHED_SPEC_DONT_CHECK_CONTROL;
7773 }
7774
7775 if (sched_verbose >= 1)
7776 spec_info->dump = sched_dump;
7777 else
7778 spec_info->dump = 0;
7779
7780 if (mflag_sched_count_spec_in_critical_path)
7781 spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
7782 }
7783 }
7784 else
7785 spec_info->mask = 0;
7786 }
7787
7788 /* If INSN is an appropriate load return its mode.
7789 Return -1 otherwise. */
7790 static int
7791 get_mode_no_for_insn (rtx insn)
7792 {
7793 rtx reg, mem, mode_rtx;
7794 int mode_no;
7795 bool extend_p;
7796
7797 extract_insn_cached (insn);
7798
7799 /* We use WHICH_ALTERNATIVE only after reload. This will
7800 guarantee that reload won't touch a speculative insn. */
7801
7802 if (recog_data.n_operands != 2)
7803 return -1;
7804
7805 reg = recog_data.operand[0];
7806 mem = recog_data.operand[1];
7807
7808 /* We should use MEM's mode since REG's mode in presence of
7809 ZERO_EXTEND will always be DImode. */
7810 if (get_attr_speculable1 (insn) == SPECULABLE1_YES)
7811 /* Process non-speculative ld. */
7812 {
7813 if (!reload_completed)
7814 {
7815 /* Do not speculate into regs like ar.lc. */
7816 if (!REG_P (reg) || AR_REGNO_P (REGNO (reg)))
7817 return -1;
7818
7819 if (!MEM_P (mem))
7820 return -1;
7821
7822 {
7823 rtx mem_reg = XEXP (mem, 0);
7824
7825 if (!REG_P (mem_reg))
7826 return -1;
7827 }
7828
7829 mode_rtx = mem;
7830 }
7831 else if (get_attr_speculable2 (insn) == SPECULABLE2_YES)
7832 {
7833 gcc_assert (REG_P (reg) && MEM_P (mem));
7834 mode_rtx = mem;
7835 }
7836 else
7837 return -1;
7838 }
7839 else if (get_attr_data_speculative (insn) == DATA_SPECULATIVE_YES
7840 || get_attr_control_speculative (insn) == CONTROL_SPECULATIVE_YES
7841 || get_attr_check_load (insn) == CHECK_LOAD_YES)
7842 /* Process speculative ld or ld.c. */
7843 {
7844 gcc_assert (REG_P (reg) && MEM_P (mem));
7845 mode_rtx = mem;
7846 }
7847 else
7848 {
7849 enum attr_itanium_class attr_class = get_attr_itanium_class (insn);
7850
7851 if (attr_class == ITANIUM_CLASS_CHK_A
7852 || attr_class == ITANIUM_CLASS_CHK_S_I
7853 || attr_class == ITANIUM_CLASS_CHK_S_F)
7854 /* Process chk. */
7855 mode_rtx = reg;
7856 else
7857 return -1;
7858 }
7859
7860 mode_no = ia64_mode_to_int (GET_MODE (mode_rtx));
7861
7862 if (mode_no == SPEC_MODE_INVALID)
7863 return -1;
7864
7865 extend_p = (GET_MODE (reg) != GET_MODE (mode_rtx));
7866
7867 if (extend_p)
7868 {
7869 if (!(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
7870 && mode_no <= SPEC_MODE_FOR_EXTEND_LAST))
7871 return -1;
7872
7873 mode_no += SPEC_GEN_EXTEND_OFFSET;
7874 }
7875
7876 return mode_no;
7877 }
7878
7879 /* If X is an unspec part of a speculative load, return its code.
7880 Return -1 otherwise. */
7881 static int
7882 get_spec_unspec_code (const_rtx x)
7883 {
7884 if (GET_CODE (x) != UNSPEC)
7885 return -1;
7886
7887 {
7888 int code;
7889
7890 code = XINT (x, 1);
7891
7892 switch (code)
7893 {
7894 case UNSPEC_LDA:
7895 case UNSPEC_LDS:
7896 case UNSPEC_LDS_A:
7897 case UNSPEC_LDSA:
7898 return code;
7899
7900 default:
7901 return -1;
7902 }
7903 }
7904 }
7905
7906 /* Implement skip_rtx_p hook. */
7907 static bool
7908 ia64_skip_rtx_p (const_rtx x)
7909 {
7910 return get_spec_unspec_code (x) != -1;
7911 }
7912
7913 /* If INSN is a speculative load, return its UNSPEC code.
7914 Return -1 otherwise. */
7915 static int
7916 get_insn_spec_code (const_rtx insn)
7917 {
7918 rtx pat, reg, mem;
7919
7920 pat = PATTERN (insn);
7921
7922 if (GET_CODE (pat) == COND_EXEC)
7923 pat = COND_EXEC_CODE (pat);
7924
7925 if (GET_CODE (pat) != SET)
7926 return -1;
7927
7928 reg = SET_DEST (pat);
7929 if (!REG_P (reg))
7930 return -1;
7931
7932 mem = SET_SRC (pat);
7933 if (GET_CODE (mem) == ZERO_EXTEND)
7934 mem = XEXP (mem, 0);
7935
7936 return get_spec_unspec_code (mem);
7937 }
7938
7939 /* If INSN is a speculative load, return a ds with the speculation types.
7940 Otherwise [if INSN is a normal instruction] return 0. */
7941 static ds_t
7942 ia64_get_insn_spec_ds (rtx insn)
7943 {
7944 int code = get_insn_spec_code (insn);
7945
7946 switch (code)
7947 {
7948 case UNSPEC_LDA:
7949 return BEGIN_DATA;
7950
7951 case UNSPEC_LDS:
7952 case UNSPEC_LDS_A:
7953 return BEGIN_CONTROL;
7954
7955 case UNSPEC_LDSA:
7956 return BEGIN_DATA | BEGIN_CONTROL;
7957
7958 default:
7959 return 0;
7960 }
7961 }
7962
7963 /* If INSN is a speculative load return a ds with the speculation types that
7964 will be checked.
7965 Otherwise [if INSN is a normal instruction] return 0. */
7966 static ds_t
7967 ia64_get_insn_checked_ds (rtx insn)
7968 {
7969 int code = get_insn_spec_code (insn);
7970
7971 switch (code)
7972 {
7973 case UNSPEC_LDA:
7974 return BEGIN_DATA | BEGIN_CONTROL;
7975
7976 case UNSPEC_LDS:
7977 return BEGIN_CONTROL;
7978
7979 case UNSPEC_LDS_A:
7980 case UNSPEC_LDSA:
7981 return BEGIN_DATA | BEGIN_CONTROL;
7982
7983 default:
7984 return 0;
7985 }
7986 }
7987
7988 /* If GEN_P is true, calculate the index of needed speculation check and return
7989 speculative pattern for INSN with speculative mode TS, machine mode
7990 MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
7991 If GEN_P is false, just calculate the index of needed speculation check. */
7992 static rtx
7993 ia64_gen_spec_load (rtx insn, ds_t ts, int mode_no)
7994 {
7995 rtx pat, new_pat;
7996 gen_func_t gen_load;
7997
7998 gen_load = get_spec_load_gen_function (ts, mode_no);
7999
8000 new_pat = gen_load (copy_rtx (recog_data.operand[0]),
8001 copy_rtx (recog_data.operand[1]));
8002
8003 pat = PATTERN (insn);
8004 if (GET_CODE (pat) == COND_EXEC)
8005 new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
8006 new_pat);
8007
8008 return new_pat;
8009 }
8010
8011 static bool
8012 insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED,
8013 ds_t ds ATTRIBUTE_UNUSED)
8014 {
8015 return false;
8016 }
8017
8018 /* Implement targetm.sched.speculate_insn hook.
8019 Check if the INSN can be TS speculative.
8020 If 'no' - return -1.
8021 If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
8022 If current pattern of the INSN already provides TS speculation,
8023 return 0. */
8024 static int
8025 ia64_speculate_insn (rtx insn, ds_t ts, rtx *new_pat)
8026 {
8027 int mode_no;
8028 int res;
8029
8030 gcc_assert (!(ts & ~SPECULATIVE));
8031
8032 if (ia64_spec_check_p (insn))
8033 return -1;
8034
8035 if ((ts & BE_IN_SPEC)
8036 && !insn_can_be_in_speculative_p (insn, ts))
8037 return -1;
8038
8039 mode_no = get_mode_no_for_insn (insn);
8040
8041 if (mode_no != SPEC_MODE_INVALID)
8042 {
8043 if (ia64_get_insn_spec_ds (insn) == ds_get_speculation_types (ts))
8044 res = 0;
8045 else
8046 {
8047 res = 1;
8048 *new_pat = ia64_gen_spec_load (insn, ts, mode_no);
8049 }
8050 }
8051 else
8052 res = -1;
8053
8054 return res;
8055 }
8056
8057 /* Return a function that will generate a check for speculation TS with mode
8058 MODE_NO.
8059 If simple check is needed, pass true for SIMPLE_CHECK_P.
8060 If clearing check is needed, pass true for CLEARING_CHECK_P. */
8061 static gen_func_t
8062 get_spec_check_gen_function (ds_t ts, int mode_no,
8063 bool simple_check_p, bool clearing_check_p)
8064 {
8065 static gen_func_t gen_ld_c_clr[] = {
8066 gen_movbi_clr,
8067 gen_movqi_clr,
8068 gen_movhi_clr,
8069 gen_movsi_clr,
8070 gen_movdi_clr,
8071 gen_movsf_clr,
8072 gen_movdf_clr,
8073 gen_movxf_clr,
8074 gen_movti_clr,
8075 gen_zero_extendqidi2_clr,
8076 gen_zero_extendhidi2_clr,
8077 gen_zero_extendsidi2_clr,
8078 };
8079 static gen_func_t gen_ld_c_nc[] = {
8080 gen_movbi_nc,
8081 gen_movqi_nc,
8082 gen_movhi_nc,
8083 gen_movsi_nc,
8084 gen_movdi_nc,
8085 gen_movsf_nc,
8086 gen_movdf_nc,
8087 gen_movxf_nc,
8088 gen_movti_nc,
8089 gen_zero_extendqidi2_nc,
8090 gen_zero_extendhidi2_nc,
8091 gen_zero_extendsidi2_nc,
8092 };
8093 static gen_func_t gen_chk_a_clr[] = {
8094 gen_advanced_load_check_clr_bi,
8095 gen_advanced_load_check_clr_qi,
8096 gen_advanced_load_check_clr_hi,
8097 gen_advanced_load_check_clr_si,
8098 gen_advanced_load_check_clr_di,
8099 gen_advanced_load_check_clr_sf,
8100 gen_advanced_load_check_clr_df,
8101 gen_advanced_load_check_clr_xf,
8102 gen_advanced_load_check_clr_ti,
8103 gen_advanced_load_check_clr_di,
8104 gen_advanced_load_check_clr_di,
8105 gen_advanced_load_check_clr_di,
8106 };
8107 static gen_func_t gen_chk_a_nc[] = {
8108 gen_advanced_load_check_nc_bi,
8109 gen_advanced_load_check_nc_qi,
8110 gen_advanced_load_check_nc_hi,
8111 gen_advanced_load_check_nc_si,
8112 gen_advanced_load_check_nc_di,
8113 gen_advanced_load_check_nc_sf,
8114 gen_advanced_load_check_nc_df,
8115 gen_advanced_load_check_nc_xf,
8116 gen_advanced_load_check_nc_ti,
8117 gen_advanced_load_check_nc_di,
8118 gen_advanced_load_check_nc_di,
8119 gen_advanced_load_check_nc_di,
8120 };
8121 static gen_func_t gen_chk_s[] = {
8122 gen_speculation_check_bi,
8123 gen_speculation_check_qi,
8124 gen_speculation_check_hi,
8125 gen_speculation_check_si,
8126 gen_speculation_check_di,
8127 gen_speculation_check_sf,
8128 gen_speculation_check_df,
8129 gen_speculation_check_xf,
8130 gen_speculation_check_ti,
8131 gen_speculation_check_di,
8132 gen_speculation_check_di,
8133 gen_speculation_check_di,
8134 };
8135
8136 gen_func_t *gen_check;
8137
8138 if (ts & BEGIN_DATA)
8139 {
8140 /* We don't need recovery because even if this is ld.sa
8141 ALAT entry will be allocated only if NAT bit is set to zero.
8142 So it is enough to use ld.c here. */
8143
8144 if (simple_check_p)
8145 {
8146 gcc_assert (mflag_sched_spec_ldc);
8147
8148 if (clearing_check_p)
8149 gen_check = gen_ld_c_clr;
8150 else
8151 gen_check = gen_ld_c_nc;
8152 }
8153 else
8154 {
8155 if (clearing_check_p)
8156 gen_check = gen_chk_a_clr;
8157 else
8158 gen_check = gen_chk_a_nc;
8159 }
8160 }
8161 else if (ts & BEGIN_CONTROL)
8162 {
8163 if (simple_check_p)
8164 /* We might want to use ld.sa -> ld.c instead of
8165 ld.s -> chk.s. */
8166 {
8167 gcc_assert (!ia64_needs_block_p (ts));
8168
8169 if (clearing_check_p)
8170 gen_check = gen_ld_c_clr;
8171 else
8172 gen_check = gen_ld_c_nc;
8173 }
8174 else
8175 {
8176 gen_check = gen_chk_s;
8177 }
8178 }
8179 else
8180 gcc_unreachable ();
8181
8182 gcc_assert (mode_no >= 0);
8183 return gen_check[mode_no];
8184 }
8185
8186 /* Return nonzero, if INSN needs branchy recovery check. */
8187 static bool
8188 ia64_needs_block_p (ds_t ts)
8189 {
8190 if (ts & BEGIN_DATA)
8191 return !mflag_sched_spec_ldc;
8192
8193 gcc_assert ((ts & BEGIN_CONTROL) != 0);
8194
8195 return !(mflag_sched_spec_control_ldc && mflag_sched_spec_ldc);
8196 }
8197
8198 /* Generate (or regenerate, if (MUTATE_P)) recovery check for INSN.
8199 If (LABEL != 0 || MUTATE_P), generate branchy recovery check.
8200 Otherwise, generate a simple check. */
8201 static rtx
8202 ia64_gen_spec_check (rtx insn, rtx label, ds_t ds)
8203 {
8204 rtx op1, pat, check_pat;
8205 gen_func_t gen_check;
8206 int mode_no;
8207
8208 mode_no = get_mode_no_for_insn (insn);
8209 gcc_assert (mode_no >= 0);
8210
8211 if (label)
8212 op1 = label;
8213 else
8214 {
8215 gcc_assert (!ia64_needs_block_p (ds));
8216 op1 = copy_rtx (recog_data.operand[1]);
8217 }
8218
8219 gen_check = get_spec_check_gen_function (ds, mode_no, label == NULL_RTX,
8220 true);
8221
8222 check_pat = gen_check (copy_rtx (recog_data.operand[0]), op1);
8223
8224 pat = PATTERN (insn);
8225 if (GET_CODE (pat) == COND_EXEC)
8226 check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
8227 check_pat);
8228
8229 return check_pat;
8230 }
8231
8232 /* Return nonzero, if X is branchy recovery check. */
8233 static int
8234 ia64_spec_check_p (rtx x)
8235 {
8236 x = PATTERN (x);
8237 if (GET_CODE (x) == COND_EXEC)
8238 x = COND_EXEC_CODE (x);
8239 if (GET_CODE (x) == SET)
8240 return ia64_spec_check_src_p (SET_SRC (x));
8241 return 0;
8242 }
8243
8244 /* Return nonzero, if SRC belongs to recovery check. */
8245 static int
8246 ia64_spec_check_src_p (rtx src)
8247 {
8248 if (GET_CODE (src) == IF_THEN_ELSE)
8249 {
8250 rtx t;
8251
8252 t = XEXP (src, 0);
8253 if (GET_CODE (t) == NE)
8254 {
8255 t = XEXP (t, 0);
8256
8257 if (GET_CODE (t) == UNSPEC)
8258 {
8259 int code;
8260
8261 code = XINT (t, 1);
8262
8263 if (code == UNSPEC_LDCCLR
8264 || code == UNSPEC_LDCNC
8265 || code == UNSPEC_CHKACLR
8266 || code == UNSPEC_CHKANC
8267 || code == UNSPEC_CHKS)
8268 {
8269 gcc_assert (code != 0);
8270 return code;
8271 }
8272 }
8273 }
8274 }
8275 return 0;
8276 }
8277 \f
8278
8279 /* The following page contains abstract data `bundle states' which are
8280 used for bundling insns (inserting nops and template generation). */
8281
8282 /* The following describes state of insn bundling. */
8283
8284 struct bundle_state
8285 {
8286 /* Unique bundle state number to identify them in the debugging
8287 output */
8288 int unique_num;
8289 rtx insn; /* corresponding insn, NULL for the 1st and the last state */
8290 /* number nops before and after the insn */
8291 short before_nops_num, after_nops_num;
8292 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
8293 insn */
8294 int cost; /* cost of the state in cycles */
8295 int accumulated_insns_num; /* number of all previous insns including
8296 nops. L is considered as 2 insns */
8297 int branch_deviation; /* deviation of previous branches from 3rd slots */
8298 int middle_bundle_stops; /* number of stop bits in the middle of bundles */
8299 struct bundle_state *next; /* next state with the same insn_num */
8300 struct bundle_state *originator; /* originator (previous insn state) */
8301 /* All bundle states are in the following chain. */
8302 struct bundle_state *allocated_states_chain;
8303 /* The DFA State after issuing the insn and the nops. */
8304 state_t dfa_state;
8305 };
8306
8307 /* The following is map insn number to the corresponding bundle state. */
8308
8309 static struct bundle_state **index_to_bundle_states;
8310
8311 /* The unique number of next bundle state. */
8312
8313 static int bundle_states_num;
8314
8315 /* All allocated bundle states are in the following chain. */
8316
8317 static struct bundle_state *allocated_bundle_states_chain;
8318
8319 /* All allocated but not used bundle states are in the following
8320 chain. */
8321
8322 static struct bundle_state *free_bundle_state_chain;
8323
8324
8325 /* The following function returns a free bundle state. */
8326
8327 static struct bundle_state *
8328 get_free_bundle_state (void)
8329 {
8330 struct bundle_state *result;
8331
8332 if (free_bundle_state_chain != NULL)
8333 {
8334 result = free_bundle_state_chain;
8335 free_bundle_state_chain = result->next;
8336 }
8337 else
8338 {
8339 result = XNEW (struct bundle_state);
8340 result->dfa_state = xmalloc (dfa_state_size);
8341 result->allocated_states_chain = allocated_bundle_states_chain;
8342 allocated_bundle_states_chain = result;
8343 }
8344 result->unique_num = bundle_states_num++;
8345 return result;
8346
8347 }
8348
8349 /* The following function frees given bundle state. */
8350
8351 static void
8352 free_bundle_state (struct bundle_state *state)
8353 {
8354 state->next = free_bundle_state_chain;
8355 free_bundle_state_chain = state;
8356 }
8357
8358 /* Start work with abstract data `bundle states'. */
8359
8360 static void
8361 initiate_bundle_states (void)
8362 {
8363 bundle_states_num = 0;
8364 free_bundle_state_chain = NULL;
8365 allocated_bundle_states_chain = NULL;
8366 }
8367
8368 /* Finish work with abstract data `bundle states'. */
8369
8370 static void
8371 finish_bundle_states (void)
8372 {
8373 struct bundle_state *curr_state, *next_state;
8374
8375 for (curr_state = allocated_bundle_states_chain;
8376 curr_state != NULL;
8377 curr_state = next_state)
8378 {
8379 next_state = curr_state->allocated_states_chain;
8380 free (curr_state->dfa_state);
8381 free (curr_state);
8382 }
8383 }
8384
8385 /* Hash table of the bundle states. The key is dfa_state and insn_num
8386 of the bundle states. */
8387
8388 static htab_t bundle_state_table;
8389
8390 /* The function returns hash of BUNDLE_STATE. */
8391
8392 static unsigned
8393 bundle_state_hash (const void *bundle_state)
8394 {
8395 const struct bundle_state *const state
8396 = (const struct bundle_state *) bundle_state;
8397 unsigned result, i;
8398
8399 for (result = i = 0; i < dfa_state_size; i++)
8400 result += (((unsigned char *) state->dfa_state) [i]
8401 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
8402 return result + state->insn_num;
8403 }
8404
8405 /* The function returns nonzero if the bundle state keys are equal. */
8406
8407 static int
8408 bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
8409 {
8410 const struct bundle_state *const state1
8411 = (const struct bundle_state *) bundle_state_1;
8412 const struct bundle_state *const state2
8413 = (const struct bundle_state *) bundle_state_2;
8414
8415 return (state1->insn_num == state2->insn_num
8416 && memcmp (state1->dfa_state, state2->dfa_state,
8417 dfa_state_size) == 0);
8418 }
8419
8420 /* The function inserts the BUNDLE_STATE into the hash table. The
8421 function returns nonzero if the bundle has been inserted into the
8422 table. The table contains the best bundle state with given key. */
8423
8424 static int
8425 insert_bundle_state (struct bundle_state *bundle_state)
8426 {
8427 void **entry_ptr;
8428
8429 entry_ptr = htab_find_slot (bundle_state_table, bundle_state, INSERT);
8430 if (*entry_ptr == NULL)
8431 {
8432 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
8433 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
8434 *entry_ptr = (void *) bundle_state;
8435 return TRUE;
8436 }
8437 else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
8438 || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
8439 && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
8440 > bundle_state->accumulated_insns_num
8441 || (((struct bundle_state *)
8442 *entry_ptr)->accumulated_insns_num
8443 == bundle_state->accumulated_insns_num
8444 && (((struct bundle_state *)
8445 *entry_ptr)->branch_deviation
8446 > bundle_state->branch_deviation
8447 || (((struct bundle_state *)
8448 *entry_ptr)->branch_deviation
8449 == bundle_state->branch_deviation
8450 && ((struct bundle_state *)
8451 *entry_ptr)->middle_bundle_stops
8452 > bundle_state->middle_bundle_stops))))))
8453
8454 {
8455 struct bundle_state temp;
8456
8457 temp = *(struct bundle_state *) *entry_ptr;
8458 *(struct bundle_state *) *entry_ptr = *bundle_state;
8459 ((struct bundle_state *) *entry_ptr)->next = temp.next;
8460 *bundle_state = temp;
8461 }
8462 return FALSE;
8463 }
8464
8465 /* Start work with the hash table. */
8466
8467 static void
8468 initiate_bundle_state_table (void)
8469 {
8470 bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
8471 (htab_del) 0);
8472 }
8473
8474 /* Finish work with the hash table. */
8475
8476 static void
8477 finish_bundle_state_table (void)
8478 {
8479 htab_delete (bundle_state_table);
8480 }
8481
8482 \f
8483
8484 /* The following variable is a insn `nop' used to check bundle states
8485 with different number of inserted nops. */
8486
8487 static rtx ia64_nop;
8488
8489 /* The following function tries to issue NOPS_NUM nops for the current
8490 state without advancing processor cycle. If it failed, the
8491 function returns FALSE and frees the current state. */
8492
8493 static int
8494 try_issue_nops (struct bundle_state *curr_state, int nops_num)
8495 {
8496 int i;
8497
8498 for (i = 0; i < nops_num; i++)
8499 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
8500 {
8501 free_bundle_state (curr_state);
8502 return FALSE;
8503 }
8504 return TRUE;
8505 }
8506
8507 /* The following function tries to issue INSN for the current
8508 state without advancing processor cycle. If it failed, the
8509 function returns FALSE and frees the current state. */
8510
8511 static int
8512 try_issue_insn (struct bundle_state *curr_state, rtx insn)
8513 {
8514 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
8515 {
8516 free_bundle_state (curr_state);
8517 return FALSE;
8518 }
8519 return TRUE;
8520 }
8521
8522 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
8523 starting with ORIGINATOR without advancing processor cycle. If
8524 TRY_BUNDLE_END_P is TRUE, the function also/only (if
8525 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
8526 If it was successful, the function creates new bundle state and
8527 insert into the hash table and into `index_to_bundle_states'. */
8528
8529 static void
8530 issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
8531 rtx insn, int try_bundle_end_p, int only_bundle_end_p)
8532 {
8533 struct bundle_state *curr_state;
8534
8535 curr_state = get_free_bundle_state ();
8536 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
8537 curr_state->insn = insn;
8538 curr_state->insn_num = originator->insn_num + 1;
8539 curr_state->cost = originator->cost;
8540 curr_state->originator = originator;
8541 curr_state->before_nops_num = before_nops_num;
8542 curr_state->after_nops_num = 0;
8543 curr_state->accumulated_insns_num
8544 = originator->accumulated_insns_num + before_nops_num;
8545 curr_state->branch_deviation = originator->branch_deviation;
8546 curr_state->middle_bundle_stops = originator->middle_bundle_stops;
8547 gcc_assert (insn);
8548 if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
8549 {
8550 gcc_assert (GET_MODE (insn) != TImode);
8551 if (!try_issue_nops (curr_state, before_nops_num))
8552 return;
8553 if (!try_issue_insn (curr_state, insn))
8554 return;
8555 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
8556 if (curr_state->accumulated_insns_num % 3 != 0)
8557 curr_state->middle_bundle_stops++;
8558 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
8559 && curr_state->accumulated_insns_num % 3 != 0)
8560 {
8561 free_bundle_state (curr_state);
8562 return;
8563 }
8564 }
8565 else if (GET_MODE (insn) != TImode)
8566 {
8567 if (!try_issue_nops (curr_state, before_nops_num))
8568 return;
8569 if (!try_issue_insn (curr_state, insn))
8570 return;
8571 curr_state->accumulated_insns_num++;
8572 gcc_assert (GET_CODE (PATTERN (insn)) != ASM_INPUT
8573 && asm_noperands (PATTERN (insn)) < 0);
8574
8575 if (ia64_safe_type (insn) == TYPE_L)
8576 curr_state->accumulated_insns_num++;
8577 }
8578 else
8579 {
8580 /* If this is an insn that must be first in a group, then don't allow
8581 nops to be emitted before it. Currently, alloc is the only such
8582 supported instruction. */
8583 /* ??? The bundling automatons should handle this for us, but they do
8584 not yet have support for the first_insn attribute. */
8585 if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
8586 {
8587 free_bundle_state (curr_state);
8588 return;
8589 }
8590
8591 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
8592 state_transition (curr_state->dfa_state, NULL);
8593 curr_state->cost++;
8594 if (!try_issue_nops (curr_state, before_nops_num))
8595 return;
8596 if (!try_issue_insn (curr_state, insn))
8597 return;
8598 curr_state->accumulated_insns_num++;
8599 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
8600 || asm_noperands (PATTERN (insn)) >= 0)
8601 {
8602 /* Finish bundle containing asm insn. */
8603 curr_state->after_nops_num
8604 = 3 - curr_state->accumulated_insns_num % 3;
8605 curr_state->accumulated_insns_num
8606 += 3 - curr_state->accumulated_insns_num % 3;
8607 }
8608 else if (ia64_safe_type (insn) == TYPE_L)
8609 curr_state->accumulated_insns_num++;
8610 }
8611 if (ia64_safe_type (insn) == TYPE_B)
8612 curr_state->branch_deviation
8613 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
8614 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
8615 {
8616 if (!only_bundle_end_p && insert_bundle_state (curr_state))
8617 {
8618 state_t dfa_state;
8619 struct bundle_state *curr_state1;
8620 struct bundle_state *allocated_states_chain;
8621
8622 curr_state1 = get_free_bundle_state ();
8623 dfa_state = curr_state1->dfa_state;
8624 allocated_states_chain = curr_state1->allocated_states_chain;
8625 *curr_state1 = *curr_state;
8626 curr_state1->dfa_state = dfa_state;
8627 curr_state1->allocated_states_chain = allocated_states_chain;
8628 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
8629 dfa_state_size);
8630 curr_state = curr_state1;
8631 }
8632 if (!try_issue_nops (curr_state,
8633 3 - curr_state->accumulated_insns_num % 3))
8634 return;
8635 curr_state->after_nops_num
8636 = 3 - curr_state->accumulated_insns_num % 3;
8637 curr_state->accumulated_insns_num
8638 += 3 - curr_state->accumulated_insns_num % 3;
8639 }
8640 if (!insert_bundle_state (curr_state))
8641 free_bundle_state (curr_state);
8642 return;
8643 }
8644
8645 /* The following function returns position in the two window bundle
8646 for given STATE. */
8647
8648 static int
8649 get_max_pos (state_t state)
8650 {
8651 if (cpu_unit_reservation_p (state, pos_6))
8652 return 6;
8653 else if (cpu_unit_reservation_p (state, pos_5))
8654 return 5;
8655 else if (cpu_unit_reservation_p (state, pos_4))
8656 return 4;
8657 else if (cpu_unit_reservation_p (state, pos_3))
8658 return 3;
8659 else if (cpu_unit_reservation_p (state, pos_2))
8660 return 2;
8661 else if (cpu_unit_reservation_p (state, pos_1))
8662 return 1;
8663 else
8664 return 0;
8665 }
8666
8667 /* The function returns code of a possible template for given position
8668 and state. The function should be called only with 2 values of
8669 position equal to 3 or 6. We avoid generating F NOPs by putting
8670 templates containing F insns at the end of the template search
8671 because undocumented anomaly in McKinley derived cores which can
8672 cause stalls if an F-unit insn (including a NOP) is issued within a
8673 six-cycle window after reading certain application registers (such
8674 as ar.bsp). Furthermore, power-considerations also argue against
8675 the use of F-unit instructions unless they're really needed. */
8676
8677 static int
8678 get_template (state_t state, int pos)
8679 {
8680 switch (pos)
8681 {
8682 case 3:
8683 if (cpu_unit_reservation_p (state, _0mmi_))
8684 return 1;
8685 else if (cpu_unit_reservation_p (state, _0mii_))
8686 return 0;
8687 else if (cpu_unit_reservation_p (state, _0mmb_))
8688 return 7;
8689 else if (cpu_unit_reservation_p (state, _0mib_))
8690 return 6;
8691 else if (cpu_unit_reservation_p (state, _0mbb_))
8692 return 5;
8693 else if (cpu_unit_reservation_p (state, _0bbb_))
8694 return 4;
8695 else if (cpu_unit_reservation_p (state, _0mmf_))
8696 return 3;
8697 else if (cpu_unit_reservation_p (state, _0mfi_))
8698 return 2;
8699 else if (cpu_unit_reservation_p (state, _0mfb_))
8700 return 8;
8701 else if (cpu_unit_reservation_p (state, _0mlx_))
8702 return 9;
8703 else
8704 gcc_unreachable ();
8705 case 6:
8706 if (cpu_unit_reservation_p (state, _1mmi_))
8707 return 1;
8708 else if (cpu_unit_reservation_p (state, _1mii_))
8709 return 0;
8710 else if (cpu_unit_reservation_p (state, _1mmb_))
8711 return 7;
8712 else if (cpu_unit_reservation_p (state, _1mib_))
8713 return 6;
8714 else if (cpu_unit_reservation_p (state, _1mbb_))
8715 return 5;
8716 else if (cpu_unit_reservation_p (state, _1bbb_))
8717 return 4;
8718 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
8719 return 3;
8720 else if (cpu_unit_reservation_p (state, _1mfi_))
8721 return 2;
8722 else if (cpu_unit_reservation_p (state, _1mfb_))
8723 return 8;
8724 else if (cpu_unit_reservation_p (state, _1mlx_))
8725 return 9;
8726 else
8727 gcc_unreachable ();
8728 default:
8729 gcc_unreachable ();
8730 }
8731 }
8732
8733 /* True when INSN is important for bundling. */
8734 static bool
8735 important_for_bundling_p (rtx insn)
8736 {
8737 return (INSN_P (insn)
8738 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8739 && GET_CODE (PATTERN (insn)) != USE
8740 && GET_CODE (PATTERN (insn)) != CLOBBER);
8741 }
8742
8743 /* The following function returns an insn important for insn bundling
8744 followed by INSN and before TAIL. */
8745
8746 static rtx
8747 get_next_important_insn (rtx insn, rtx tail)
8748 {
8749 for (; insn && insn != tail; insn = NEXT_INSN (insn))
8750 if (important_for_bundling_p (insn))
8751 return insn;
8752 return NULL_RTX;
8753 }
8754
8755 /* Add a bundle selector TEMPLATE0 before INSN. */
8756
8757 static void
8758 ia64_add_bundle_selector_before (int template0, rtx insn)
8759 {
8760 rtx b = gen_bundle_selector (GEN_INT (template0));
8761
8762 ia64_emit_insn_before (b, insn);
8763 #if NR_BUNDLES == 10
8764 if ((template0 == 4 || template0 == 5)
8765 && ia64_except_unwind_info (&global_options) == UI_TARGET)
8766 {
8767 int i;
8768 rtx note = NULL_RTX;
8769
8770 /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
8771 first or second slot. If it is and has REG_EH_NOTE set, copy it
8772 to following nops, as br.call sets rp to the address of following
8773 bundle and therefore an EH region end must be on a bundle
8774 boundary. */
8775 insn = PREV_INSN (insn);
8776 for (i = 0; i < 3; i++)
8777 {
8778 do
8779 insn = next_active_insn (insn);
8780 while (GET_CODE (insn) == INSN
8781 && get_attr_empty (insn) == EMPTY_YES);
8782 if (GET_CODE (insn) == CALL_INSN)
8783 note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
8784 else if (note)
8785 {
8786 int code;
8787
8788 gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop
8789 || code == CODE_FOR_nop_b);
8790 if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
8791 note = NULL_RTX;
8792 else
8793 add_reg_note (insn, REG_EH_REGION, XEXP (note, 0));
8794 }
8795 }
8796 }
8797 #endif
8798 }
8799
8800 /* The following function does insn bundling. Bundling means
8801 inserting templates and nop insns to fit insn groups into permitted
8802 templates. Instruction scheduling uses NDFA (non-deterministic
8803 finite automata) encoding informations about the templates and the
8804 inserted nops. Nondeterminism of the automata permits follows
8805 all possible insn sequences very fast.
8806
8807 Unfortunately it is not possible to get information about inserting
8808 nop insns and used templates from the automata states. The
8809 automata only says that we can issue an insn possibly inserting
8810 some nops before it and using some template. Therefore insn
8811 bundling in this function is implemented by using DFA
8812 (deterministic finite automata). We follow all possible insn
8813 sequences by inserting 0-2 nops (that is what the NDFA describe for
8814 insn scheduling) before/after each insn being bundled. We know the
8815 start of simulated processor cycle from insn scheduling (insn
8816 starting a new cycle has TImode).
8817
8818 Simple implementation of insn bundling would create enormous
8819 number of possible insn sequences satisfying information about new
8820 cycle ticks taken from the insn scheduling. To make the algorithm
8821 practical we use dynamic programming. Each decision (about
8822 inserting nops and implicitly about previous decisions) is described
8823 by structure bundle_state (see above). If we generate the same
8824 bundle state (key is automaton state after issuing the insns and
8825 nops for it), we reuse already generated one. As consequence we
8826 reject some decisions which cannot improve the solution and
8827 reduce memory for the algorithm.
8828
8829 When we reach the end of EBB (extended basic block), we choose the
8830 best sequence and then, moving back in EBB, insert templates for
8831 the best alternative. The templates are taken from querying
8832 automaton state for each insn in chosen bundle states.
8833
8834 So the algorithm makes two (forward and backward) passes through
8835 EBB. */
8836
8837 static void
8838 bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
8839 {
8840 struct bundle_state *curr_state, *next_state, *best_state;
8841 rtx insn, next_insn;
8842 int insn_num;
8843 int i, bundle_end_p, only_bundle_end_p, asm_p;
8844 int pos = 0, max_pos, template0, template1;
8845 rtx b;
8846 rtx nop;
8847 enum attr_type type;
8848
8849 insn_num = 0;
8850 /* Count insns in the EBB. */
8851 for (insn = NEXT_INSN (prev_head_insn);
8852 insn && insn != tail;
8853 insn = NEXT_INSN (insn))
8854 if (INSN_P (insn))
8855 insn_num++;
8856 if (insn_num == 0)
8857 return;
8858 bundling_p = 1;
8859 dfa_clean_insn_cache ();
8860 initiate_bundle_state_table ();
8861 index_to_bundle_states = XNEWVEC (struct bundle_state *, insn_num + 2);
8862 /* First (forward) pass -- generation of bundle states. */
8863 curr_state = get_free_bundle_state ();
8864 curr_state->insn = NULL;
8865 curr_state->before_nops_num = 0;
8866 curr_state->after_nops_num = 0;
8867 curr_state->insn_num = 0;
8868 curr_state->cost = 0;
8869 curr_state->accumulated_insns_num = 0;
8870 curr_state->branch_deviation = 0;
8871 curr_state->middle_bundle_stops = 0;
8872 curr_state->next = NULL;
8873 curr_state->originator = NULL;
8874 state_reset (curr_state->dfa_state);
8875 index_to_bundle_states [0] = curr_state;
8876 insn_num = 0;
8877 /* Shift cycle mark if it is put on insn which could be ignored. */
8878 for (insn = NEXT_INSN (prev_head_insn);
8879 insn != tail;
8880 insn = NEXT_INSN (insn))
8881 if (INSN_P (insn)
8882 && (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
8883 || GET_CODE (PATTERN (insn)) == USE
8884 || GET_CODE (PATTERN (insn)) == CLOBBER)
8885 && GET_MODE (insn) == TImode)
8886 {
8887 PUT_MODE (insn, VOIDmode);
8888 for (next_insn = NEXT_INSN (insn);
8889 next_insn != tail;
8890 next_insn = NEXT_INSN (next_insn))
8891 if (INSN_P (next_insn)
8892 && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
8893 && GET_CODE (PATTERN (next_insn)) != USE
8894 && GET_CODE (PATTERN (next_insn)) != CLOBBER
8895 && INSN_CODE (next_insn) != CODE_FOR_insn_group_barrier)
8896 {
8897 PUT_MODE (next_insn, TImode);
8898 break;
8899 }
8900 }
8901 /* Forward pass: generation of bundle states. */
8902 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
8903 insn != NULL_RTX;
8904 insn = next_insn)
8905 {
8906 gcc_assert (INSN_P (insn)
8907 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8908 && GET_CODE (PATTERN (insn)) != USE
8909 && GET_CODE (PATTERN (insn)) != CLOBBER);
8910 type = ia64_safe_type (insn);
8911 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
8912 insn_num++;
8913 index_to_bundle_states [insn_num] = NULL;
8914 for (curr_state = index_to_bundle_states [insn_num - 1];
8915 curr_state != NULL;
8916 curr_state = next_state)
8917 {
8918 pos = curr_state->accumulated_insns_num % 3;
8919 next_state = curr_state->next;
8920 /* We must fill up the current bundle in order to start a
8921 subsequent asm insn in a new bundle. Asm insn is always
8922 placed in a separate bundle. */
8923 only_bundle_end_p
8924 = (next_insn != NULL_RTX
8925 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
8926 && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
8927 /* We may fill up the current bundle if it is the cycle end
8928 without a group barrier. */
8929 bundle_end_p
8930 = (only_bundle_end_p || next_insn == NULL_RTX
8931 || (GET_MODE (next_insn) == TImode
8932 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
8933 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
8934 || type == TYPE_S)
8935 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
8936 only_bundle_end_p);
8937 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
8938 only_bundle_end_p);
8939 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
8940 only_bundle_end_p);
8941 }
8942 gcc_assert (index_to_bundle_states [insn_num]);
8943 for (curr_state = index_to_bundle_states [insn_num];
8944 curr_state != NULL;
8945 curr_state = curr_state->next)
8946 if (verbose >= 2 && dump)
8947 {
8948 /* This structure is taken from generated code of the
8949 pipeline hazard recognizer (see file insn-attrtab.c).
8950 Please don't forget to change the structure if a new
8951 automaton is added to .md file. */
8952 struct DFA_chip
8953 {
8954 unsigned short one_automaton_state;
8955 unsigned short oneb_automaton_state;
8956 unsigned short two_automaton_state;
8957 unsigned short twob_automaton_state;
8958 };
8959
8960 fprintf
8961 (dump,
8962 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n",
8963 curr_state->unique_num,
8964 (curr_state->originator == NULL
8965 ? -1 : curr_state->originator->unique_num),
8966 curr_state->cost,
8967 curr_state->before_nops_num, curr_state->after_nops_num,
8968 curr_state->accumulated_insns_num, curr_state->branch_deviation,
8969 curr_state->middle_bundle_stops,
8970 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
8971 INSN_UID (insn));
8972 }
8973 }
8974
8975 /* We should find a solution because the 2nd insn scheduling has
8976 found one. */
8977 gcc_assert (index_to_bundle_states [insn_num]);
8978 /* Find a state corresponding to the best insn sequence. */
8979 best_state = NULL;
8980 for (curr_state = index_to_bundle_states [insn_num];
8981 curr_state != NULL;
8982 curr_state = curr_state->next)
8983 /* We are just looking at the states with fully filled up last
8984 bundle. The first we prefer insn sequences with minimal cost
8985 then with minimal inserted nops and finally with branch insns
8986 placed in the 3rd slots. */
8987 if (curr_state->accumulated_insns_num % 3 == 0
8988 && (best_state == NULL || best_state->cost > curr_state->cost
8989 || (best_state->cost == curr_state->cost
8990 && (curr_state->accumulated_insns_num
8991 < best_state->accumulated_insns_num
8992 || (curr_state->accumulated_insns_num
8993 == best_state->accumulated_insns_num
8994 && (curr_state->branch_deviation
8995 < best_state->branch_deviation
8996 || (curr_state->branch_deviation
8997 == best_state->branch_deviation
8998 && curr_state->middle_bundle_stops
8999 < best_state->middle_bundle_stops)))))))
9000 best_state = curr_state;
9001 /* Second (backward) pass: adding nops and templates. */
9002 gcc_assert (best_state);
9003 insn_num = best_state->before_nops_num;
9004 template0 = template1 = -1;
9005 for (curr_state = best_state;
9006 curr_state->originator != NULL;
9007 curr_state = curr_state->originator)
9008 {
9009 insn = curr_state->insn;
9010 asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
9011 || asm_noperands (PATTERN (insn)) >= 0);
9012 insn_num++;
9013 if (verbose >= 2 && dump)
9014 {
9015 struct DFA_chip
9016 {
9017 unsigned short one_automaton_state;
9018 unsigned short oneb_automaton_state;
9019 unsigned short two_automaton_state;
9020 unsigned short twob_automaton_state;
9021 };
9022
9023 fprintf
9024 (dump,
9025 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n",
9026 curr_state->unique_num,
9027 (curr_state->originator == NULL
9028 ? -1 : curr_state->originator->unique_num),
9029 curr_state->cost,
9030 curr_state->before_nops_num, curr_state->after_nops_num,
9031 curr_state->accumulated_insns_num, curr_state->branch_deviation,
9032 curr_state->middle_bundle_stops,
9033 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
9034 INSN_UID (insn));
9035 }
9036 /* Find the position in the current bundle window. The window can
9037 contain at most two bundles. Two bundle window means that
9038 the processor will make two bundle rotation. */
9039 max_pos = get_max_pos (curr_state->dfa_state);
9040 if (max_pos == 6
9041 /* The following (negative template number) means that the
9042 processor did one bundle rotation. */
9043 || (max_pos == 3 && template0 < 0))
9044 {
9045 /* We are at the end of the window -- find template(s) for
9046 its bundle(s). */
9047 pos = max_pos;
9048 if (max_pos == 3)
9049 template0 = get_template (curr_state->dfa_state, 3);
9050 else
9051 {
9052 template1 = get_template (curr_state->dfa_state, 3);
9053 template0 = get_template (curr_state->dfa_state, 6);
9054 }
9055 }
9056 if (max_pos > 3 && template1 < 0)
9057 /* It may happen when we have the stop inside a bundle. */
9058 {
9059 gcc_assert (pos <= 3);
9060 template1 = get_template (curr_state->dfa_state, 3);
9061 pos += 3;
9062 }
9063 if (!asm_p)
9064 /* Emit nops after the current insn. */
9065 for (i = 0; i < curr_state->after_nops_num; i++)
9066 {
9067 nop = gen_nop ();
9068 emit_insn_after (nop, insn);
9069 pos--;
9070 gcc_assert (pos >= 0);
9071 if (pos % 3 == 0)
9072 {
9073 /* We are at the start of a bundle: emit the template
9074 (it should be defined). */
9075 gcc_assert (template0 >= 0);
9076 ia64_add_bundle_selector_before (template0, nop);
9077 /* If we have two bundle window, we make one bundle
9078 rotation. Otherwise template0 will be undefined
9079 (negative value). */
9080 template0 = template1;
9081 template1 = -1;
9082 }
9083 }
9084 /* Move the position backward in the window. Group barrier has
9085 no slot. Asm insn takes all bundle. */
9086 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
9087 && GET_CODE (PATTERN (insn)) != ASM_INPUT
9088 && asm_noperands (PATTERN (insn)) < 0)
9089 pos--;
9090 /* Long insn takes 2 slots. */
9091 if (ia64_safe_type (insn) == TYPE_L)
9092 pos--;
9093 gcc_assert (pos >= 0);
9094 if (pos % 3 == 0
9095 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
9096 && GET_CODE (PATTERN (insn)) != ASM_INPUT
9097 && asm_noperands (PATTERN (insn)) < 0)
9098 {
9099 /* The current insn is at the bundle start: emit the
9100 template. */
9101 gcc_assert (template0 >= 0);
9102 ia64_add_bundle_selector_before (template0, insn);
9103 b = PREV_INSN (insn);
9104 insn = b;
9105 /* See comment above in analogous place for emitting nops
9106 after the insn. */
9107 template0 = template1;
9108 template1 = -1;
9109 }
9110 /* Emit nops after the current insn. */
9111 for (i = 0; i < curr_state->before_nops_num; i++)
9112 {
9113 nop = gen_nop ();
9114 ia64_emit_insn_before (nop, insn);
9115 nop = PREV_INSN (insn);
9116 insn = nop;
9117 pos--;
9118 gcc_assert (pos >= 0);
9119 if (pos % 3 == 0)
9120 {
9121 /* See comment above in analogous place for emitting nops
9122 after the insn. */
9123 gcc_assert (template0 >= 0);
9124 ia64_add_bundle_selector_before (template0, insn);
9125 b = PREV_INSN (insn);
9126 insn = b;
9127 template0 = template1;
9128 template1 = -1;
9129 }
9130 }
9131 }
9132
9133 #ifdef ENABLE_CHECKING
9134 {
9135 /* Assert right calculation of middle_bundle_stops. */
9136 int num = best_state->middle_bundle_stops;
9137 bool start_bundle = true, end_bundle = false;
9138
9139 for (insn = NEXT_INSN (prev_head_insn);
9140 insn && insn != tail;
9141 insn = NEXT_INSN (insn))
9142 {
9143 if (!INSN_P (insn))
9144 continue;
9145 if (recog_memoized (insn) == CODE_FOR_bundle_selector)
9146 start_bundle = true;
9147 else
9148 {
9149 rtx next_insn;
9150
9151 for (next_insn = NEXT_INSN (insn);
9152 next_insn && next_insn != tail;
9153 next_insn = NEXT_INSN (next_insn))
9154 if (INSN_P (next_insn)
9155 && (ia64_safe_itanium_class (next_insn)
9156 != ITANIUM_CLASS_IGNORE
9157 || recog_memoized (next_insn)
9158 == CODE_FOR_bundle_selector)
9159 && GET_CODE (PATTERN (next_insn)) != USE
9160 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
9161 break;
9162
9163 end_bundle = next_insn == NULL_RTX
9164 || next_insn == tail
9165 || (INSN_P (next_insn)
9166 && recog_memoized (next_insn)
9167 == CODE_FOR_bundle_selector);
9168 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier
9169 && !start_bundle && !end_bundle
9170 && next_insn
9171 && GET_CODE (PATTERN (next_insn)) != ASM_INPUT
9172 && asm_noperands (PATTERN (next_insn)) < 0)
9173 num--;
9174
9175 start_bundle = false;
9176 }
9177 }
9178
9179 gcc_assert (num == 0);
9180 }
9181 #endif
9182
9183 free (index_to_bundle_states);
9184 finish_bundle_state_table ();
9185 bundling_p = 0;
9186 dfa_clean_insn_cache ();
9187 }
9188
9189 /* The following function is called at the end of scheduling BB or
9190 EBB. After reload, it inserts stop bits and does insn bundling. */
9191
9192 static void
9193 ia64_sched_finish (FILE *dump, int sched_verbose)
9194 {
9195 if (sched_verbose)
9196 fprintf (dump, "// Finishing schedule.\n");
9197 if (!reload_completed)
9198 return;
9199 if (reload_completed)
9200 {
9201 final_emit_insn_group_barriers (dump);
9202 bundling (dump, sched_verbose, current_sched_info->prev_head,
9203 current_sched_info->next_tail);
9204 if (sched_verbose && dump)
9205 fprintf (dump, "// finishing %d-%d\n",
9206 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
9207 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
9208
9209 return;
9210 }
9211 }
9212
9213 /* The following function inserts stop bits in scheduled BB or EBB. */
9214
9215 static void
9216 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
9217 {
9218 rtx insn;
9219 int need_barrier_p = 0;
9220 int seen_good_insn = 0;
9221
9222 init_insn_group_barriers ();
9223
9224 for (insn = NEXT_INSN (current_sched_info->prev_head);
9225 insn != current_sched_info->next_tail;
9226 insn = NEXT_INSN (insn))
9227 {
9228 if (GET_CODE (insn) == BARRIER)
9229 {
9230 rtx last = prev_active_insn (insn);
9231
9232 if (! last)
9233 continue;
9234 if (GET_CODE (last) == JUMP_INSN
9235 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
9236 last = prev_active_insn (last);
9237 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
9238 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
9239
9240 init_insn_group_barriers ();
9241 seen_good_insn = 0;
9242 need_barrier_p = 0;
9243 }
9244 else if (NONDEBUG_INSN_P (insn))
9245 {
9246 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
9247 {
9248 init_insn_group_barriers ();
9249 seen_good_insn = 0;
9250 need_barrier_p = 0;
9251 }
9252 else if (need_barrier_p || group_barrier_needed (insn)
9253 || (mflag_sched_stop_bits_after_every_cycle
9254 && GET_MODE (insn) == TImode
9255 && seen_good_insn))
9256 {
9257 if (TARGET_EARLY_STOP_BITS)
9258 {
9259 rtx last;
9260
9261 for (last = insn;
9262 last != current_sched_info->prev_head;
9263 last = PREV_INSN (last))
9264 if (INSN_P (last) && GET_MODE (last) == TImode
9265 && stops_p [INSN_UID (last)])
9266 break;
9267 if (last == current_sched_info->prev_head)
9268 last = insn;
9269 last = prev_active_insn (last);
9270 if (last
9271 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
9272 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
9273 last);
9274 init_insn_group_barriers ();
9275 for (last = NEXT_INSN (last);
9276 last != insn;
9277 last = NEXT_INSN (last))
9278 if (INSN_P (last))
9279 {
9280 group_barrier_needed (last);
9281 if (recog_memoized (last) >= 0
9282 && important_for_bundling_p (last))
9283 seen_good_insn = 1;
9284 }
9285 }
9286 else
9287 {
9288 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
9289 insn);
9290 init_insn_group_barriers ();
9291 seen_good_insn = 0;
9292 }
9293 group_barrier_needed (insn);
9294 if (recog_memoized (insn) >= 0
9295 && important_for_bundling_p (insn))
9296 seen_good_insn = 1;
9297 }
9298 else if (recog_memoized (insn) >= 0
9299 && important_for_bundling_p (insn))
9300 seen_good_insn = 1;
9301 need_barrier_p = (GET_CODE (insn) == CALL_INSN
9302 || GET_CODE (PATTERN (insn)) == ASM_INPUT
9303 || asm_noperands (PATTERN (insn)) >= 0);
9304 }
9305 }
9306 }
9307
9308 \f
9309
9310 /* If the following function returns TRUE, we will use the DFA
9311 insn scheduler. */
9312
9313 static int
9314 ia64_first_cycle_multipass_dfa_lookahead (void)
9315 {
9316 return (reload_completed ? 6 : 4);
9317 }
9318
9319 /* The following function initiates variable `dfa_pre_cycle_insn'. */
9320
9321 static void
9322 ia64_init_dfa_pre_cycle_insn (void)
9323 {
9324 if (temp_dfa_state == NULL)
9325 {
9326 dfa_state_size = state_size ();
9327 temp_dfa_state = xmalloc (dfa_state_size);
9328 prev_cycle_state = xmalloc (dfa_state_size);
9329 }
9330 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
9331 PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
9332 recog_memoized (dfa_pre_cycle_insn);
9333 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
9334 PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
9335 recog_memoized (dfa_stop_insn);
9336 }
9337
9338 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
9339 used by the DFA insn scheduler. */
9340
9341 static rtx
9342 ia64_dfa_pre_cycle_insn (void)
9343 {
9344 return dfa_pre_cycle_insn;
9345 }
9346
9347 /* The following function returns TRUE if PRODUCER (of type ilog or
9348 ld) produces address for CONSUMER (of type st or stf). */
9349
9350 int
9351 ia64_st_address_bypass_p (rtx producer, rtx consumer)
9352 {
9353 rtx dest, reg, mem;
9354
9355 gcc_assert (producer && consumer);
9356 dest = ia64_single_set (producer);
9357 gcc_assert (dest);
9358 reg = SET_DEST (dest);
9359 gcc_assert (reg);
9360 if (GET_CODE (reg) == SUBREG)
9361 reg = SUBREG_REG (reg);
9362 gcc_assert (GET_CODE (reg) == REG);
9363
9364 dest = ia64_single_set (consumer);
9365 gcc_assert (dest);
9366 mem = SET_DEST (dest);
9367 gcc_assert (mem && GET_CODE (mem) == MEM);
9368 return reg_mentioned_p (reg, mem);
9369 }
9370
9371 /* The following function returns TRUE if PRODUCER (of type ilog or
9372 ld) produces address for CONSUMER (of type ld or fld). */
9373
9374 int
9375 ia64_ld_address_bypass_p (rtx producer, rtx consumer)
9376 {
9377 rtx dest, src, reg, mem;
9378
9379 gcc_assert (producer && consumer);
9380 dest = ia64_single_set (producer);
9381 gcc_assert (dest);
9382 reg = SET_DEST (dest);
9383 gcc_assert (reg);
9384 if (GET_CODE (reg) == SUBREG)
9385 reg = SUBREG_REG (reg);
9386 gcc_assert (GET_CODE (reg) == REG);
9387
9388 src = ia64_single_set (consumer);
9389 gcc_assert (src);
9390 mem = SET_SRC (src);
9391 gcc_assert (mem);
9392
9393 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
9394 mem = XVECEXP (mem, 0, 0);
9395 else if (GET_CODE (mem) == IF_THEN_ELSE)
9396 /* ??? Is this bypass necessary for ld.c? */
9397 {
9398 gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR);
9399 mem = XEXP (mem, 1);
9400 }
9401
9402 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
9403 mem = XEXP (mem, 0);
9404
9405 if (GET_CODE (mem) == UNSPEC)
9406 {
9407 int c = XINT (mem, 1);
9408
9409 gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDS_A
9410 || c == UNSPEC_LDSA);
9411 mem = XVECEXP (mem, 0, 0);
9412 }
9413
9414 /* Note that LO_SUM is used for GOT loads. */
9415 gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
9416
9417 return reg_mentioned_p (reg, mem);
9418 }
9419
9420 /* The following function returns TRUE if INSN produces address for a
9421 load/store insn. We will place such insns into M slot because it
9422 decreases its latency time. */
9423
9424 int
9425 ia64_produce_address_p (rtx insn)
9426 {
9427 return insn->call;
9428 }
9429
9430 \f
9431 /* Emit pseudo-ops for the assembler to describe predicate relations.
9432 At present this assumes that we only consider predicate pairs to
9433 be mutex, and that the assembler can deduce proper values from
9434 straight-line code. */
9435
9436 static void
9437 emit_predicate_relation_info (void)
9438 {
9439 basic_block bb;
9440
9441 FOR_EACH_BB_REVERSE (bb)
9442 {
9443 int r;
9444 rtx head = BB_HEAD (bb);
9445
9446 /* We only need such notes at code labels. */
9447 if (GET_CODE (head) != CODE_LABEL)
9448 continue;
9449 if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head)))
9450 head = NEXT_INSN (head);
9451
9452 /* Skip p0, which may be thought to be live due to (reg:DI p0)
9453 grabbing the entire block of predicate registers. */
9454 for (r = PR_REG (2); r < PR_REG (64); r += 2)
9455 if (REGNO_REG_SET_P (df_get_live_in (bb), r))
9456 {
9457 rtx p = gen_rtx_REG (BImode, r);
9458 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
9459 if (head == BB_END (bb))
9460 BB_END (bb) = n;
9461 head = n;
9462 }
9463 }
9464
9465 /* Look for conditional calls that do not return, and protect predicate
9466 relations around them. Otherwise the assembler will assume the call
9467 returns, and complain about uses of call-clobbered predicates after
9468 the call. */
9469 FOR_EACH_BB_REVERSE (bb)
9470 {
9471 rtx insn = BB_HEAD (bb);
9472
9473 while (1)
9474 {
9475 if (GET_CODE (insn) == CALL_INSN
9476 && GET_CODE (PATTERN (insn)) == COND_EXEC
9477 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
9478 {
9479 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
9480 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
9481 if (BB_HEAD (bb) == insn)
9482 BB_HEAD (bb) = b;
9483 if (BB_END (bb) == insn)
9484 BB_END (bb) = a;
9485 }
9486
9487 if (insn == BB_END (bb))
9488 break;
9489 insn = NEXT_INSN (insn);
9490 }
9491 }
9492 }
9493
9494 /* Perform machine dependent operations on the rtl chain INSNS. */
9495
9496 static void
9497 ia64_reorg (void)
9498 {
9499 /* We are freeing block_for_insn in the toplev to keep compatibility
9500 with old MDEP_REORGS that are not CFG based. Recompute it now. */
9501 compute_bb_for_insn ();
9502
9503 /* If optimizing, we'll have split before scheduling. */
9504 if (optimize == 0)
9505 split_all_insns ();
9506
9507 if (optimize && flag_schedule_insns_after_reload
9508 && dbg_cnt (ia64_sched2))
9509 {
9510 basic_block bb;
9511 timevar_push (TV_SCHED2);
9512 ia64_final_schedule = 1;
9513
9514 /* We can't let modulo-sched prevent us from scheduling any bbs,
9515 since we need the final schedule to produce bundle information. */
9516 FOR_EACH_BB (bb)
9517 bb->flags &= ~BB_DISABLE_SCHEDULE;
9518
9519 initiate_bundle_states ();
9520 ia64_nop = make_insn_raw (gen_nop ());
9521 PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
9522 recog_memoized (ia64_nop);
9523 clocks_length = get_max_uid () + 1;
9524 stops_p = XCNEWVEC (char, clocks_length);
9525
9526 if (ia64_tune == PROCESSOR_ITANIUM2)
9527 {
9528 pos_1 = get_cpu_unit_code ("2_1");
9529 pos_2 = get_cpu_unit_code ("2_2");
9530 pos_3 = get_cpu_unit_code ("2_3");
9531 pos_4 = get_cpu_unit_code ("2_4");
9532 pos_5 = get_cpu_unit_code ("2_5");
9533 pos_6 = get_cpu_unit_code ("2_6");
9534 _0mii_ = get_cpu_unit_code ("2b_0mii.");
9535 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
9536 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
9537 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
9538 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
9539 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
9540 _0mib_ = get_cpu_unit_code ("2b_0mib.");
9541 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
9542 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
9543 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
9544 _1mii_ = get_cpu_unit_code ("2b_1mii.");
9545 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
9546 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
9547 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
9548 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
9549 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
9550 _1mib_ = get_cpu_unit_code ("2b_1mib.");
9551 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
9552 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
9553 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
9554 }
9555 else
9556 {
9557 pos_1 = get_cpu_unit_code ("1_1");
9558 pos_2 = get_cpu_unit_code ("1_2");
9559 pos_3 = get_cpu_unit_code ("1_3");
9560 pos_4 = get_cpu_unit_code ("1_4");
9561 pos_5 = get_cpu_unit_code ("1_5");
9562 pos_6 = get_cpu_unit_code ("1_6");
9563 _0mii_ = get_cpu_unit_code ("1b_0mii.");
9564 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
9565 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
9566 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
9567 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
9568 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
9569 _0mib_ = get_cpu_unit_code ("1b_0mib.");
9570 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
9571 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
9572 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
9573 _1mii_ = get_cpu_unit_code ("1b_1mii.");
9574 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
9575 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
9576 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
9577 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
9578 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
9579 _1mib_ = get_cpu_unit_code ("1b_1mib.");
9580 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
9581 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
9582 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
9583 }
9584
9585 if (flag_selective_scheduling2
9586 && !maybe_skip_selective_scheduling ())
9587 run_selective_scheduling ();
9588 else
9589 schedule_ebbs ();
9590
9591 /* Redo alignment computation, as it might gone wrong. */
9592 compute_alignments ();
9593
9594 /* We cannot reuse this one because it has been corrupted by the
9595 evil glat. */
9596 finish_bundle_states ();
9597 free (stops_p);
9598 stops_p = NULL;
9599 emit_insn_group_barriers (dump_file);
9600
9601 ia64_final_schedule = 0;
9602 timevar_pop (TV_SCHED2);
9603 }
9604 else
9605 emit_all_insn_group_barriers (dump_file);
9606
9607 df_analyze ();
9608
9609 /* A call must not be the last instruction in a function, so that the
9610 return address is still within the function, so that unwinding works
9611 properly. Note that IA-64 differs from dwarf2 on this point. */
9612 if (ia64_except_unwind_info (&global_options) == UI_TARGET)
9613 {
9614 rtx insn;
9615 int saw_stop = 0;
9616
9617 insn = get_last_insn ();
9618 if (! INSN_P (insn))
9619 insn = prev_active_insn (insn);
9620 if (insn)
9621 {
9622 /* Skip over insns that expand to nothing. */
9623 while (GET_CODE (insn) == INSN
9624 && get_attr_empty (insn) == EMPTY_YES)
9625 {
9626 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
9627 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
9628 saw_stop = 1;
9629 insn = prev_active_insn (insn);
9630 }
9631 if (GET_CODE (insn) == CALL_INSN)
9632 {
9633 if (! saw_stop)
9634 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9635 emit_insn (gen_break_f ());
9636 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9637 }
9638 }
9639 }
9640
9641 emit_predicate_relation_info ();
9642
9643 if (flag_var_tracking)
9644 {
9645 timevar_push (TV_VAR_TRACKING);
9646 variable_tracking_main ();
9647 timevar_pop (TV_VAR_TRACKING);
9648 }
9649 df_finish_pass (false);
9650 }
9651 \f
9652 /* Return true if REGNO is used by the epilogue. */
9653
9654 int
9655 ia64_epilogue_uses (int regno)
9656 {
9657 switch (regno)
9658 {
9659 case R_GR (1):
9660 /* With a call to a function in another module, we will write a new
9661 value to "gp". After returning from such a call, we need to make
9662 sure the function restores the original gp-value, even if the
9663 function itself does not use the gp anymore. */
9664 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
9665
9666 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
9667 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
9668 /* For functions defined with the syscall_linkage attribute, all
9669 input registers are marked as live at all function exits. This
9670 prevents the register allocator from using the input registers,
9671 which in turn makes it possible to restart a system call after
9672 an interrupt without having to save/restore the input registers.
9673 This also prevents kernel data from leaking to application code. */
9674 return lookup_attribute ("syscall_linkage",
9675 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
9676
9677 case R_BR (0):
9678 /* Conditional return patterns can't represent the use of `b0' as
9679 the return address, so we force the value live this way. */
9680 return 1;
9681
9682 case AR_PFS_REGNUM:
9683 /* Likewise for ar.pfs, which is used by br.ret. */
9684 return 1;
9685
9686 default:
9687 return 0;
9688 }
9689 }
9690
9691 /* Return true if REGNO is used by the frame unwinder. */
9692
9693 int
9694 ia64_eh_uses (int regno)
9695 {
9696 unsigned int r;
9697
9698 if (! reload_completed)
9699 return 0;
9700
9701 if (regno == 0)
9702 return 0;
9703
9704 for (r = reg_save_b0; r <= reg_save_ar_lc; r++)
9705 if (regno == current_frame_info.r[r]
9706 || regno == emitted_frame_related_regs[r])
9707 return 1;
9708
9709 return 0;
9710 }
9711 \f
9712 /* Return true if this goes in small data/bss. */
9713
9714 /* ??? We could also support own long data here. Generating movl/add/ld8
9715 instead of addl,ld8/ld8. This makes the code bigger, but should make the
9716 code faster because there is one less load. This also includes incomplete
9717 types which can't go in sdata/sbss. */
9718
9719 static bool
9720 ia64_in_small_data_p (const_tree exp)
9721 {
9722 if (TARGET_NO_SDATA)
9723 return false;
9724
9725 /* We want to merge strings, so we never consider them small data. */
9726 if (TREE_CODE (exp) == STRING_CST)
9727 return false;
9728
9729 /* Functions are never small data. */
9730 if (TREE_CODE (exp) == FUNCTION_DECL)
9731 return false;
9732
9733 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
9734 {
9735 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
9736
9737 if (strcmp (section, ".sdata") == 0
9738 || strncmp (section, ".sdata.", 7) == 0
9739 || strncmp (section, ".gnu.linkonce.s.", 16) == 0
9740 || strcmp (section, ".sbss") == 0
9741 || strncmp (section, ".sbss.", 6) == 0
9742 || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
9743 return true;
9744 }
9745 else
9746 {
9747 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
9748
9749 /* If this is an incomplete type with size 0, then we can't put it
9750 in sdata because it might be too big when completed. */
9751 if (size > 0 && size <= ia64_section_threshold)
9752 return true;
9753 }
9754
9755 return false;
9756 }
9757 \f
9758 /* Output assembly directives for prologue regions. */
9759
9760 /* The current basic block number. */
9761
9762 static bool last_block;
9763
9764 /* True if we need a copy_state command at the start of the next block. */
9765
9766 static bool need_copy_state;
9767
9768 #ifndef MAX_ARTIFICIAL_LABEL_BYTES
9769 # define MAX_ARTIFICIAL_LABEL_BYTES 30
9770 #endif
9771
9772 /* The function emits unwind directives for the start of an epilogue. */
9773
9774 static void
9775 process_epilogue (FILE *asm_out_file, rtx insn ATTRIBUTE_UNUSED,
9776 bool unwind, bool frame ATTRIBUTE_UNUSED)
9777 {
9778 /* If this isn't the last block of the function, then we need to label the
9779 current state, and copy it back in at the start of the next block. */
9780
9781 if (!last_block)
9782 {
9783 if (unwind)
9784 fprintf (asm_out_file, "\t.label_state %d\n",
9785 ++cfun->machine->state_num);
9786 need_copy_state = true;
9787 }
9788
9789 if (unwind)
9790 fprintf (asm_out_file, "\t.restore sp\n");
9791 }
9792
9793 /* This function processes a SET pattern for REG_CFA_ADJUST_CFA. */
9794
9795 static void
9796 process_cfa_adjust_cfa (FILE *asm_out_file, rtx pat, rtx insn,
9797 bool unwind, bool frame)
9798 {
9799 rtx dest = SET_DEST (pat);
9800 rtx src = SET_SRC (pat);
9801
9802 if (dest == stack_pointer_rtx)
9803 {
9804 if (GET_CODE (src) == PLUS)
9805 {
9806 rtx op0 = XEXP (src, 0);
9807 rtx op1 = XEXP (src, 1);
9808
9809 gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
9810
9811 if (INTVAL (op1) < 0)
9812 {
9813 gcc_assert (!frame_pointer_needed);
9814 if (unwind)
9815 fprintf (asm_out_file,
9816 "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
9817 -INTVAL (op1));
9818 }
9819 else
9820 process_epilogue (asm_out_file, insn, unwind, frame);
9821 }
9822 else
9823 {
9824 gcc_assert (src == hard_frame_pointer_rtx);
9825 process_epilogue (asm_out_file, insn, unwind, frame);
9826 }
9827 }
9828 else if (dest == hard_frame_pointer_rtx)
9829 {
9830 gcc_assert (src == stack_pointer_rtx);
9831 gcc_assert (frame_pointer_needed);
9832
9833 if (unwind)
9834 fprintf (asm_out_file, "\t.vframe r%d\n",
9835 ia64_dbx_register_number (REGNO (dest)));
9836 }
9837 else
9838 gcc_unreachable ();
9839 }
9840
9841 /* This function processes a SET pattern for REG_CFA_REGISTER. */
9842
9843 static void
9844 process_cfa_register (FILE *asm_out_file, rtx pat, bool unwind)
9845 {
9846 rtx dest = SET_DEST (pat);
9847 rtx src = SET_SRC (pat);
9848 int dest_regno = REGNO (dest);
9849 int src_regno;
9850
9851 if (src == pc_rtx)
9852 {
9853 /* Saving return address pointer. */
9854 if (unwind)
9855 fprintf (asm_out_file, "\t.save rp, r%d\n",
9856 ia64_dbx_register_number (dest_regno));
9857 return;
9858 }
9859
9860 src_regno = REGNO (src);
9861
9862 switch (src_regno)
9863 {
9864 case PR_REG (0):
9865 gcc_assert (dest_regno == current_frame_info.r[reg_save_pr]);
9866 if (unwind)
9867 fprintf (asm_out_file, "\t.save pr, r%d\n",
9868 ia64_dbx_register_number (dest_regno));
9869 break;
9870
9871 case AR_UNAT_REGNUM:
9872 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_unat]);
9873 if (unwind)
9874 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
9875 ia64_dbx_register_number (dest_regno));
9876 break;
9877
9878 case AR_LC_REGNUM:
9879 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_lc]);
9880 if (unwind)
9881 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
9882 ia64_dbx_register_number (dest_regno));
9883 break;
9884
9885 default:
9886 /* Everything else should indicate being stored to memory. */
9887 gcc_unreachable ();
9888 }
9889 }
9890
9891 /* This function processes a SET pattern for REG_CFA_OFFSET. */
9892
9893 static void
9894 process_cfa_offset (FILE *asm_out_file, rtx pat, bool unwind)
9895 {
9896 rtx dest = SET_DEST (pat);
9897 rtx src = SET_SRC (pat);
9898 int src_regno = REGNO (src);
9899 const char *saveop;
9900 HOST_WIDE_INT off;
9901 rtx base;
9902
9903 gcc_assert (MEM_P (dest));
9904 if (GET_CODE (XEXP (dest, 0)) == REG)
9905 {
9906 base = XEXP (dest, 0);
9907 off = 0;
9908 }
9909 else
9910 {
9911 gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
9912 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
9913 base = XEXP (XEXP (dest, 0), 0);
9914 off = INTVAL (XEXP (XEXP (dest, 0), 1));
9915 }
9916
9917 if (base == hard_frame_pointer_rtx)
9918 {
9919 saveop = ".savepsp";
9920 off = - off;
9921 }
9922 else
9923 {
9924 gcc_assert (base == stack_pointer_rtx);
9925 saveop = ".savesp";
9926 }
9927
9928 src_regno = REGNO (src);
9929 switch (src_regno)
9930 {
9931 case BR_REG (0):
9932 gcc_assert (!current_frame_info.r[reg_save_b0]);
9933 if (unwind)
9934 fprintf (asm_out_file, "\t%s rp, " HOST_WIDE_INT_PRINT_DEC "\n",
9935 saveop, off);
9936 break;
9937
9938 case PR_REG (0):
9939 gcc_assert (!current_frame_info.r[reg_save_pr]);
9940 if (unwind)
9941 fprintf (asm_out_file, "\t%s pr, " HOST_WIDE_INT_PRINT_DEC "\n",
9942 saveop, off);
9943 break;
9944
9945 case AR_LC_REGNUM:
9946 gcc_assert (!current_frame_info.r[reg_save_ar_lc]);
9947 if (unwind)
9948 fprintf (asm_out_file, "\t%s ar.lc, " HOST_WIDE_INT_PRINT_DEC "\n",
9949 saveop, off);
9950 break;
9951
9952 case AR_PFS_REGNUM:
9953 gcc_assert (!current_frame_info.r[reg_save_ar_pfs]);
9954 if (unwind)
9955 fprintf (asm_out_file, "\t%s ar.pfs, " HOST_WIDE_INT_PRINT_DEC "\n",
9956 saveop, off);
9957 break;
9958
9959 case AR_UNAT_REGNUM:
9960 gcc_assert (!current_frame_info.r[reg_save_ar_unat]);
9961 if (unwind)
9962 fprintf (asm_out_file, "\t%s ar.unat, " HOST_WIDE_INT_PRINT_DEC "\n",
9963 saveop, off);
9964 break;
9965
9966 case GR_REG (4):
9967 case GR_REG (5):
9968 case GR_REG (6):
9969 case GR_REG (7):
9970 if (unwind)
9971 fprintf (asm_out_file, "\t.save.g 0x%x\n",
9972 1 << (src_regno - GR_REG (4)));
9973 break;
9974
9975 case BR_REG (1):
9976 case BR_REG (2):
9977 case BR_REG (3):
9978 case BR_REG (4):
9979 case BR_REG (5):
9980 if (unwind)
9981 fprintf (asm_out_file, "\t.save.b 0x%x\n",
9982 1 << (src_regno - BR_REG (1)));
9983 break;
9984
9985 case FR_REG (2):
9986 case FR_REG (3):
9987 case FR_REG (4):
9988 case FR_REG (5):
9989 if (unwind)
9990 fprintf (asm_out_file, "\t.save.f 0x%x\n",
9991 1 << (src_regno - FR_REG (2)));
9992 break;
9993
9994 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
9995 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
9996 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
9997 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
9998 if (unwind)
9999 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
10000 1 << (src_regno - FR_REG (12)));
10001 break;
10002
10003 default:
10004 /* ??? For some reason we mark other general registers, even those
10005 we can't represent in the unwind info. Ignore them. */
10006 break;
10007 }
10008 }
10009
10010 /* This function looks at a single insn and emits any directives
10011 required to unwind this insn. */
10012
10013 static void
10014 ia64_asm_unwind_emit (FILE *asm_out_file, rtx insn)
10015 {
10016 bool unwind = ia64_except_unwind_info (&global_options) == UI_TARGET;
10017 bool frame = dwarf2out_do_frame ();
10018 rtx note, pat;
10019 bool handled_one;
10020
10021 if (!unwind && !frame)
10022 return;
10023
10024 if (NOTE_INSN_BASIC_BLOCK_P (insn))
10025 {
10026 last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
10027
10028 /* Restore unwind state from immediately before the epilogue. */
10029 if (need_copy_state)
10030 {
10031 if (unwind)
10032 {
10033 fprintf (asm_out_file, "\t.body\n");
10034 fprintf (asm_out_file, "\t.copy_state %d\n",
10035 cfun->machine->state_num);
10036 }
10037 need_copy_state = false;
10038 }
10039 }
10040
10041 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
10042 return;
10043
10044 /* Look for the ALLOC insn. */
10045 if (INSN_CODE (insn) == CODE_FOR_alloc)
10046 {
10047 rtx dest = SET_DEST (XVECEXP (PATTERN (insn), 0, 0));
10048 int dest_regno = REGNO (dest);
10049
10050 /* If this is the final destination for ar.pfs, then this must
10051 be the alloc in the prologue. */
10052 if (dest_regno == current_frame_info.r[reg_save_ar_pfs])
10053 {
10054 if (unwind)
10055 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
10056 ia64_dbx_register_number (dest_regno));
10057 }
10058 else
10059 {
10060 /* This must be an alloc before a sibcall. We must drop the
10061 old frame info. The easiest way to drop the old frame
10062 info is to ensure we had a ".restore sp" directive
10063 followed by a new prologue. If the procedure doesn't
10064 have a memory-stack frame, we'll issue a dummy ".restore
10065 sp" now. */
10066 if (current_frame_info.total_size == 0 && !frame_pointer_needed)
10067 /* if haven't done process_epilogue() yet, do it now */
10068 process_epilogue (asm_out_file, insn, unwind, frame);
10069 if (unwind)
10070 fprintf (asm_out_file, "\t.prologue\n");
10071 }
10072 return;
10073 }
10074
10075 handled_one = false;
10076 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
10077 switch (REG_NOTE_KIND (note))
10078 {
10079 case REG_CFA_ADJUST_CFA:
10080 pat = XEXP (note, 0);
10081 if (pat == NULL)
10082 pat = PATTERN (insn);
10083 process_cfa_adjust_cfa (asm_out_file, pat, insn, unwind, frame);
10084 handled_one = true;
10085 break;
10086
10087 case REG_CFA_OFFSET:
10088 pat = XEXP (note, 0);
10089 if (pat == NULL)
10090 pat = PATTERN (insn);
10091 process_cfa_offset (asm_out_file, pat, unwind);
10092 handled_one = true;
10093 break;
10094
10095 case REG_CFA_REGISTER:
10096 pat = XEXP (note, 0);
10097 if (pat == NULL)
10098 pat = PATTERN (insn);
10099 process_cfa_register (asm_out_file, pat, unwind);
10100 handled_one = true;
10101 break;
10102
10103 case REG_FRAME_RELATED_EXPR:
10104 case REG_CFA_DEF_CFA:
10105 case REG_CFA_EXPRESSION:
10106 case REG_CFA_RESTORE:
10107 case REG_CFA_SET_VDRAP:
10108 /* Not used in the ia64 port. */
10109 gcc_unreachable ();
10110
10111 default:
10112 /* Not a frame-related note. */
10113 break;
10114 }
10115
10116 /* All REG_FRAME_RELATED_P insns, besides ALLOC, are marked with the
10117 explicit action to take. No guessing required. */
10118 gcc_assert (handled_one);
10119 }
10120
10121 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
10122
10123 static void
10124 ia64_asm_emit_except_personality (rtx personality)
10125 {
10126 fputs ("\t.personality\t", asm_out_file);
10127 output_addr_const (asm_out_file, personality);
10128 fputc ('\n', asm_out_file);
10129 }
10130
10131 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
10132
10133 static void
10134 ia64_asm_init_sections (void)
10135 {
10136 exception_section = get_unnamed_section (0, output_section_asm_op,
10137 "\t.handlerdata");
10138 }
10139
10140 /* Implement TARGET_DEBUG_UNWIND_INFO. */
10141
10142 static enum unwind_info_type
10143 ia64_debug_unwind_info (void)
10144 {
10145 return UI_TARGET;
10146 }
10147 \f
10148 enum ia64_builtins
10149 {
10150 IA64_BUILTIN_BSP,
10151 IA64_BUILTIN_COPYSIGNQ,
10152 IA64_BUILTIN_FABSQ,
10153 IA64_BUILTIN_FLUSHRS,
10154 IA64_BUILTIN_INFQ,
10155 IA64_BUILTIN_HUGE_VALQ,
10156 IA64_BUILTIN_max
10157 };
10158
10159 static GTY(()) tree ia64_builtins[(int) IA64_BUILTIN_max];
10160
10161 void
10162 ia64_init_builtins (void)
10163 {
10164 tree fpreg_type;
10165 tree float80_type;
10166 tree decl;
10167
10168 /* The __fpreg type. */
10169 fpreg_type = make_node (REAL_TYPE);
10170 TYPE_PRECISION (fpreg_type) = 82;
10171 layout_type (fpreg_type);
10172 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
10173
10174 /* The __float80 type. */
10175 float80_type = make_node (REAL_TYPE);
10176 TYPE_PRECISION (float80_type) = 80;
10177 layout_type (float80_type);
10178 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
10179
10180 /* The __float128 type. */
10181 if (!TARGET_HPUX)
10182 {
10183 tree ftype;
10184 tree float128_type = make_node (REAL_TYPE);
10185
10186 TYPE_PRECISION (float128_type) = 128;
10187 layout_type (float128_type);
10188 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
10189
10190 /* TFmode support builtins. */
10191 ftype = build_function_type_list (float128_type, NULL_TREE);
10192 decl = add_builtin_function ("__builtin_infq", ftype,
10193 IA64_BUILTIN_INFQ, BUILT_IN_MD,
10194 NULL, NULL_TREE);
10195 ia64_builtins[IA64_BUILTIN_INFQ] = decl;
10196
10197 decl = add_builtin_function ("__builtin_huge_valq", ftype,
10198 IA64_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
10199 NULL, NULL_TREE);
10200 ia64_builtins[IA64_BUILTIN_HUGE_VALQ] = decl;
10201
10202 ftype = build_function_type_list (float128_type,
10203 float128_type,
10204 NULL_TREE);
10205 decl = add_builtin_function ("__builtin_fabsq", ftype,
10206 IA64_BUILTIN_FABSQ, BUILT_IN_MD,
10207 "__fabstf2", NULL_TREE);
10208 TREE_READONLY (decl) = 1;
10209 ia64_builtins[IA64_BUILTIN_FABSQ] = decl;
10210
10211 ftype = build_function_type_list (float128_type,
10212 float128_type,
10213 float128_type,
10214 NULL_TREE);
10215 decl = add_builtin_function ("__builtin_copysignq", ftype,
10216 IA64_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
10217 "__copysigntf3", NULL_TREE);
10218 TREE_READONLY (decl) = 1;
10219 ia64_builtins[IA64_BUILTIN_COPYSIGNQ] = decl;
10220 }
10221 else
10222 /* Under HPUX, this is a synonym for "long double". */
10223 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
10224 "__float128");
10225
10226 /* Fwrite on VMS is non-standard. */
10227 #if TARGET_ABI_OPEN_VMS
10228 vms_patch_builtins ();
10229 #endif
10230
10231 #define def_builtin(name, type, code) \
10232 add_builtin_function ((name), (type), (code), BUILT_IN_MD, \
10233 NULL, NULL_TREE)
10234
10235 decl = def_builtin ("__builtin_ia64_bsp",
10236 build_function_type_list (ptr_type_node, NULL_TREE),
10237 IA64_BUILTIN_BSP);
10238 ia64_builtins[IA64_BUILTIN_BSP] = decl;
10239
10240 decl = def_builtin ("__builtin_ia64_flushrs",
10241 build_function_type_list (void_type_node, NULL_TREE),
10242 IA64_BUILTIN_FLUSHRS);
10243 ia64_builtins[IA64_BUILTIN_FLUSHRS] = decl;
10244
10245 #undef def_builtin
10246
10247 if (TARGET_HPUX)
10248 {
10249 if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
10250 set_user_assembler_name (decl, "_Isfinite");
10251 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
10252 set_user_assembler_name (decl, "_Isfinitef");
10253 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEL)) != NULL_TREE)
10254 set_user_assembler_name (decl, "_Isfinitef128");
10255 }
10256 }
10257
10258 rtx
10259 ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10260 enum machine_mode mode ATTRIBUTE_UNUSED,
10261 int ignore ATTRIBUTE_UNUSED)
10262 {
10263 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10264 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10265
10266 switch (fcode)
10267 {
10268 case IA64_BUILTIN_BSP:
10269 if (! target || ! register_operand (target, DImode))
10270 target = gen_reg_rtx (DImode);
10271 emit_insn (gen_bsp_value (target));
10272 #ifdef POINTERS_EXTEND_UNSIGNED
10273 target = convert_memory_address (ptr_mode, target);
10274 #endif
10275 return target;
10276
10277 case IA64_BUILTIN_FLUSHRS:
10278 emit_insn (gen_flushrs ());
10279 return const0_rtx;
10280
10281 case IA64_BUILTIN_INFQ:
10282 case IA64_BUILTIN_HUGE_VALQ:
10283 {
10284 enum machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
10285 REAL_VALUE_TYPE inf;
10286 rtx tmp;
10287
10288 real_inf (&inf);
10289 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, target_mode);
10290
10291 tmp = validize_mem (force_const_mem (target_mode, tmp));
10292
10293 if (target == 0)
10294 target = gen_reg_rtx (target_mode);
10295
10296 emit_move_insn (target, tmp);
10297 return target;
10298 }
10299
10300 case IA64_BUILTIN_FABSQ:
10301 case IA64_BUILTIN_COPYSIGNQ:
10302 return expand_call (exp, target, ignore);
10303
10304 default:
10305 gcc_unreachable ();
10306 }
10307
10308 return NULL_RTX;
10309 }
10310
10311 /* Return the ia64 builtin for CODE. */
10312
10313 static tree
10314 ia64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10315 {
10316 if (code >= IA64_BUILTIN_max)
10317 return error_mark_node;
10318
10319 return ia64_builtins[code];
10320 }
10321
10322 /* For the HP-UX IA64 aggregate parameters are passed stored in the
10323 most significant bits of the stack slot. */
10324
10325 enum direction
10326 ia64_hpux_function_arg_padding (enum machine_mode mode, const_tree type)
10327 {
10328 /* Exception to normal case for structures/unions/etc. */
10329
10330 if (type && AGGREGATE_TYPE_P (type)
10331 && int_size_in_bytes (type) < UNITS_PER_WORD)
10332 return upward;
10333
10334 /* Fall back to the default. */
10335 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
10336 }
10337
10338 /* Emit text to declare externally defined variables and functions, because
10339 the Intel assembler does not support undefined externals. */
10340
10341 void
10342 ia64_asm_output_external (FILE *file, tree decl, const char *name)
10343 {
10344 /* We output the name if and only if TREE_SYMBOL_REFERENCED is
10345 set in order to avoid putting out names that are never really
10346 used. */
10347 if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
10348 {
10349 /* maybe_assemble_visibility will return 1 if the assembler
10350 visibility directive is output. */
10351 int need_visibility = ((*targetm.binds_local_p) (decl)
10352 && maybe_assemble_visibility (decl));
10353
10354 /* GNU as does not need anything here, but the HP linker does
10355 need something for external functions. */
10356 if ((TARGET_HPUX_LD || !TARGET_GNU_AS)
10357 && TREE_CODE (decl) == FUNCTION_DECL)
10358 (*targetm.asm_out.globalize_decl_name) (file, decl);
10359 else if (need_visibility && !TARGET_GNU_AS)
10360 (*targetm.asm_out.globalize_label) (file, name);
10361 }
10362 }
10363
10364 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
10365 modes of word_mode and larger. Rename the TFmode libfuncs using the
10366 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
10367 backward compatibility. */
10368
10369 static void
10370 ia64_init_libfuncs (void)
10371 {
10372 set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
10373 set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
10374 set_optab_libfunc (smod_optab, SImode, "__modsi3");
10375 set_optab_libfunc (umod_optab, SImode, "__umodsi3");
10376
10377 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
10378 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
10379 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
10380 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
10381 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
10382
10383 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
10384 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
10385 set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
10386 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
10387 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
10388 set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
10389
10390 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
10391 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
10392 set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad");
10393 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
10394 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
10395
10396 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
10397 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
10398 set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad");
10399 /* HP-UX 11.23 libc does not have a function for unsigned
10400 SImode-to-TFmode conversion. */
10401 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad");
10402 }
10403
10404 /* Rename all the TFmode libfuncs using the HPUX conventions. */
10405
10406 static void
10407 ia64_hpux_init_libfuncs (void)
10408 {
10409 ia64_init_libfuncs ();
10410
10411 /* The HP SI millicode division and mod functions expect DI arguments.
10412 By turning them off completely we avoid using both libgcc and the
10413 non-standard millicode routines and use the HP DI millicode routines
10414 instead. */
10415
10416 set_optab_libfunc (sdiv_optab, SImode, 0);
10417 set_optab_libfunc (udiv_optab, SImode, 0);
10418 set_optab_libfunc (smod_optab, SImode, 0);
10419 set_optab_libfunc (umod_optab, SImode, 0);
10420
10421 set_optab_libfunc (sdiv_optab, DImode, "__milli_divI");
10422 set_optab_libfunc (udiv_optab, DImode, "__milli_divU");
10423 set_optab_libfunc (smod_optab, DImode, "__milli_remI");
10424 set_optab_libfunc (umod_optab, DImode, "__milli_remU");
10425
10426 /* HP-UX libc has TF min/max/abs routines in it. */
10427 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
10428 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
10429 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
10430
10431 /* ia64_expand_compare uses this. */
10432 cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
10433
10434 /* These should never be used. */
10435 set_optab_libfunc (eq_optab, TFmode, 0);
10436 set_optab_libfunc (ne_optab, TFmode, 0);
10437 set_optab_libfunc (gt_optab, TFmode, 0);
10438 set_optab_libfunc (ge_optab, TFmode, 0);
10439 set_optab_libfunc (lt_optab, TFmode, 0);
10440 set_optab_libfunc (le_optab, TFmode, 0);
10441 }
10442
10443 /* Rename the division and modulus functions in VMS. */
10444
10445 static void
10446 ia64_vms_init_libfuncs (void)
10447 {
10448 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
10449 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
10450 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
10451 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
10452 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
10453 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
10454 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
10455 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
10456 abort_libfunc = init_one_libfunc ("decc$abort");
10457 memcmp_libfunc = init_one_libfunc ("decc$memcmp");
10458 #ifdef MEM_LIBFUNCS_INIT
10459 MEM_LIBFUNCS_INIT;
10460 #endif
10461 }
10462
10463 /* Rename the TFmode libfuncs available from soft-fp in glibc using
10464 the HPUX conventions. */
10465
10466 static void
10467 ia64_sysv4_init_libfuncs (void)
10468 {
10469 ia64_init_libfuncs ();
10470
10471 /* These functions are not part of the HPUX TFmode interface. We
10472 use them instead of _U_Qfcmp, which doesn't work the way we
10473 expect. */
10474 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
10475 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
10476 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
10477 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
10478 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
10479 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
10480
10481 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
10482 glibc doesn't have them. */
10483 }
10484
10485 /* Use soft-fp. */
10486
10487 static void
10488 ia64_soft_fp_init_libfuncs (void)
10489 {
10490 }
10491
10492 static bool
10493 ia64_vms_valid_pointer_mode (enum machine_mode mode)
10494 {
10495 return (mode == SImode || mode == DImode);
10496 }
10497 \f
10498 /* For HPUX, it is illegal to have relocations in shared segments. */
10499
10500 static int
10501 ia64_hpux_reloc_rw_mask (void)
10502 {
10503 return 3;
10504 }
10505
10506 /* For others, relax this so that relocations to local data goes in
10507 read-only segments, but we still cannot allow global relocations
10508 in read-only segments. */
10509
10510 static int
10511 ia64_reloc_rw_mask (void)
10512 {
10513 return flag_pic ? 3 : 2;
10514 }
10515
10516 /* Return the section to use for X. The only special thing we do here
10517 is to honor small data. */
10518
10519 static section *
10520 ia64_select_rtx_section (enum machine_mode mode, rtx x,
10521 unsigned HOST_WIDE_INT align)
10522 {
10523 if (GET_MODE_SIZE (mode) > 0
10524 && GET_MODE_SIZE (mode) <= ia64_section_threshold
10525 && !TARGET_NO_SDATA)
10526 return sdata_section;
10527 else
10528 return default_elf_select_rtx_section (mode, x, align);
10529 }
10530
10531 static unsigned int
10532 ia64_section_type_flags (tree decl, const char *name, int reloc)
10533 {
10534 unsigned int flags = 0;
10535
10536 if (strcmp (name, ".sdata") == 0
10537 || strncmp (name, ".sdata.", 7) == 0
10538 || strncmp (name, ".gnu.linkonce.s.", 16) == 0
10539 || strncmp (name, ".sdata2.", 8) == 0
10540 || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
10541 || strcmp (name, ".sbss") == 0
10542 || strncmp (name, ".sbss.", 6) == 0
10543 || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
10544 flags = SECTION_SMALL;
10545
10546 #if TARGET_ABI_OPEN_VMS
10547 if (decl && DECL_ATTRIBUTES (decl)
10548 && lookup_attribute ("common_object", DECL_ATTRIBUTES (decl)))
10549 flags |= SECTION_VMS_OVERLAY;
10550 #endif
10551
10552 flags |= default_section_type_flags (decl, name, reloc);
10553 return flags;
10554 }
10555
10556 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
10557 structure type and that the address of that type should be passed
10558 in out0, rather than in r8. */
10559
10560 static bool
10561 ia64_struct_retval_addr_is_first_parm_p (tree fntype)
10562 {
10563 tree ret_type = TREE_TYPE (fntype);
10564
10565 /* The Itanium C++ ABI requires that out0, rather than r8, be used
10566 as the structure return address parameter, if the return value
10567 type has a non-trivial copy constructor or destructor. It is not
10568 clear if this same convention should be used for other
10569 programming languages. Until G++ 3.4, we incorrectly used r8 for
10570 these return values. */
10571 return (abi_version_at_least (2)
10572 && ret_type
10573 && TYPE_MODE (ret_type) == BLKmode
10574 && TREE_ADDRESSABLE (ret_type)
10575 && strcmp (lang_hooks.name, "GNU C++") == 0);
10576 }
10577
10578 /* Output the assembler code for a thunk function. THUNK_DECL is the
10579 declaration for the thunk function itself, FUNCTION is the decl for
10580 the target function. DELTA is an immediate constant offset to be
10581 added to THIS. If VCALL_OFFSET is nonzero, the word at
10582 *(*this + vcall_offset) should be added to THIS. */
10583
10584 static void
10585 ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
10586 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10587 tree function)
10588 {
10589 rtx this_rtx, insn, funexp;
10590 unsigned int this_parmno;
10591 unsigned int this_regno;
10592 rtx delta_rtx;
10593
10594 reload_completed = 1;
10595 epilogue_completed = 1;
10596
10597 /* Set things up as ia64_expand_prologue might. */
10598 last_scratch_gr_reg = 15;
10599
10600 memset (&current_frame_info, 0, sizeof (current_frame_info));
10601 current_frame_info.spill_cfa_off = -16;
10602 current_frame_info.n_input_regs = 1;
10603 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
10604
10605 /* Mark the end of the (empty) prologue. */
10606 emit_note (NOTE_INSN_PROLOGUE_END);
10607
10608 /* Figure out whether "this" will be the first parameter (the
10609 typical case) or the second parameter (as happens when the
10610 virtual function returns certain class objects). */
10611 this_parmno
10612 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
10613 ? 1 : 0);
10614 this_regno = IN_REG (this_parmno);
10615 if (!TARGET_REG_NAMES)
10616 reg_names[this_regno] = ia64_reg_numbers[this_parmno];
10617
10618 this_rtx = gen_rtx_REG (Pmode, this_regno);
10619
10620 /* Apply the constant offset, if required. */
10621 delta_rtx = GEN_INT (delta);
10622 if (TARGET_ILP32)
10623 {
10624 rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
10625 REG_POINTER (tmp) = 1;
10626 if (delta && satisfies_constraint_I (delta_rtx))
10627 {
10628 emit_insn (gen_ptr_extend_plus_imm (this_rtx, tmp, delta_rtx));
10629 delta = 0;
10630 }
10631 else
10632 emit_insn (gen_ptr_extend (this_rtx, tmp));
10633 }
10634 if (delta)
10635 {
10636 if (!satisfies_constraint_I (delta_rtx))
10637 {
10638 rtx tmp = gen_rtx_REG (Pmode, 2);
10639 emit_move_insn (tmp, delta_rtx);
10640 delta_rtx = tmp;
10641 }
10642 emit_insn (gen_adddi3 (this_rtx, this_rtx, delta_rtx));
10643 }
10644
10645 /* Apply the offset from the vtable, if required. */
10646 if (vcall_offset)
10647 {
10648 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
10649 rtx tmp = gen_rtx_REG (Pmode, 2);
10650
10651 if (TARGET_ILP32)
10652 {
10653 rtx t = gen_rtx_REG (ptr_mode, 2);
10654 REG_POINTER (t) = 1;
10655 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this_rtx));
10656 if (satisfies_constraint_I (vcall_offset_rtx))
10657 {
10658 emit_insn (gen_ptr_extend_plus_imm (tmp, t, vcall_offset_rtx));
10659 vcall_offset = 0;
10660 }
10661 else
10662 emit_insn (gen_ptr_extend (tmp, t));
10663 }
10664 else
10665 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
10666
10667 if (vcall_offset)
10668 {
10669 if (!satisfies_constraint_J (vcall_offset_rtx))
10670 {
10671 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
10672 emit_move_insn (tmp2, vcall_offset_rtx);
10673 vcall_offset_rtx = tmp2;
10674 }
10675 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
10676 }
10677
10678 if (TARGET_ILP32)
10679 emit_insn (gen_zero_extendsidi2 (tmp, gen_rtx_MEM (ptr_mode, tmp)));
10680 else
10681 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
10682
10683 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
10684 }
10685
10686 /* Generate a tail call to the target function. */
10687 if (! TREE_USED (function))
10688 {
10689 assemble_external (function);
10690 TREE_USED (function) = 1;
10691 }
10692 funexp = XEXP (DECL_RTL (function), 0);
10693 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
10694 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
10695 insn = get_last_insn ();
10696 SIBLING_CALL_P (insn) = 1;
10697
10698 /* Code generation for calls relies on splitting. */
10699 reload_completed = 1;
10700 epilogue_completed = 1;
10701 try_split (PATTERN (insn), insn, 0);
10702
10703 emit_barrier ();
10704
10705 /* Run just enough of rest_of_compilation to get the insns emitted.
10706 There's not really enough bulk here to make other passes such as
10707 instruction scheduling worth while. Note that use_thunk calls
10708 assemble_start_function and assemble_end_function. */
10709
10710 insn_locators_alloc ();
10711 emit_all_insn_group_barriers (NULL);
10712 insn = get_insns ();
10713 shorten_branches (insn);
10714 final_start_function (insn, file, 1);
10715 final (insn, file, 1);
10716 final_end_function ();
10717
10718 reload_completed = 0;
10719 epilogue_completed = 0;
10720 }
10721
10722 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
10723
10724 static rtx
10725 ia64_struct_value_rtx (tree fntype,
10726 int incoming ATTRIBUTE_UNUSED)
10727 {
10728 if (TARGET_ABI_OPEN_VMS ||
10729 (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype)))
10730 return NULL_RTX;
10731 return gen_rtx_REG (Pmode, GR_REG (8));
10732 }
10733
10734 static bool
10735 ia64_scalar_mode_supported_p (enum machine_mode mode)
10736 {
10737 switch (mode)
10738 {
10739 case QImode:
10740 case HImode:
10741 case SImode:
10742 case DImode:
10743 case TImode:
10744 return true;
10745
10746 case SFmode:
10747 case DFmode:
10748 case XFmode:
10749 case RFmode:
10750 return true;
10751
10752 case TFmode:
10753 return true;
10754
10755 default:
10756 return false;
10757 }
10758 }
10759
10760 static bool
10761 ia64_vector_mode_supported_p (enum machine_mode mode)
10762 {
10763 switch (mode)
10764 {
10765 case V8QImode:
10766 case V4HImode:
10767 case V2SImode:
10768 return true;
10769
10770 case V2SFmode:
10771 return true;
10772
10773 default:
10774 return false;
10775 }
10776 }
10777
10778 /* Implement the FUNCTION_PROFILER macro. */
10779
10780 void
10781 ia64_output_function_profiler (FILE *file, int labelno)
10782 {
10783 bool indirect_call;
10784
10785 /* If the function needs a static chain and the static chain
10786 register is r15, we use an indirect call so as to bypass
10787 the PLT stub in case the executable is dynamically linked,
10788 because the stub clobbers r15 as per 5.3.6 of the psABI.
10789 We don't need to do that in non canonical PIC mode. */
10790
10791 if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
10792 {
10793 gcc_assert (STATIC_CHAIN_REGNUM == 15);
10794 indirect_call = true;
10795 }
10796 else
10797 indirect_call = false;
10798
10799 if (TARGET_GNU_AS)
10800 fputs ("\t.prologue 4, r40\n", file);
10801 else
10802 fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
10803 fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
10804
10805 if (NO_PROFILE_COUNTERS)
10806 fputs ("\tmov out3 = r0\n", file);
10807 else
10808 {
10809 char buf[20];
10810 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10811
10812 if (TARGET_AUTO_PIC)
10813 fputs ("\tmovl out3 = @gprel(", file);
10814 else
10815 fputs ("\taddl out3 = @ltoff(", file);
10816 assemble_name (file, buf);
10817 if (TARGET_AUTO_PIC)
10818 fputs (")\n", file);
10819 else
10820 fputs ("), r1\n", file);
10821 }
10822
10823 if (indirect_call)
10824 fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
10825 fputs ("\t;;\n", file);
10826
10827 fputs ("\t.save rp, r42\n", file);
10828 fputs ("\tmov out2 = b0\n", file);
10829 if (indirect_call)
10830 fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
10831 fputs ("\t.body\n", file);
10832 fputs ("\tmov out1 = r1\n", file);
10833 if (indirect_call)
10834 {
10835 fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
10836 fputs ("\tmov b6 = r16\n", file);
10837 fputs ("\tld8 r1 = [r14]\n", file);
10838 fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
10839 }
10840 else
10841 fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
10842 }
10843
10844 static GTY(()) rtx mcount_func_rtx;
10845 static rtx
10846 gen_mcount_func_rtx (void)
10847 {
10848 if (!mcount_func_rtx)
10849 mcount_func_rtx = init_one_libfunc ("_mcount");
10850 return mcount_func_rtx;
10851 }
10852
10853 void
10854 ia64_profile_hook (int labelno)
10855 {
10856 rtx label, ip;
10857
10858 if (NO_PROFILE_COUNTERS)
10859 label = const0_rtx;
10860 else
10861 {
10862 char buf[30];
10863 const char *label_name;
10864 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10865 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
10866 label = gen_rtx_SYMBOL_REF (Pmode, label_name);
10867 SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
10868 }
10869 ip = gen_reg_rtx (Pmode);
10870 emit_insn (gen_ip_value (ip));
10871 emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
10872 VOIDmode, 3,
10873 gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
10874 ip, Pmode,
10875 label, Pmode);
10876 }
10877
10878 /* Return the mangling of TYPE if it is an extended fundamental type. */
10879
10880 static const char *
10881 ia64_mangle_type (const_tree type)
10882 {
10883 type = TYPE_MAIN_VARIANT (type);
10884
10885 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
10886 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
10887 return NULL;
10888
10889 /* On HP-UX, "long double" is mangled as "e" so __float128 is
10890 mangled as "e". */
10891 if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
10892 return "g";
10893 /* On HP-UX, "e" is not available as a mangling of __float80 so use
10894 an extended mangling. Elsewhere, "e" is available since long
10895 double is 80 bits. */
10896 if (TYPE_MODE (type) == XFmode)
10897 return TARGET_HPUX ? "u9__float80" : "e";
10898 if (TYPE_MODE (type) == RFmode)
10899 return "u7__fpreg";
10900 return NULL;
10901 }
10902
10903 /* Return the diagnostic message string if conversion from FROMTYPE to
10904 TOTYPE is not allowed, NULL otherwise. */
10905 static const char *
10906 ia64_invalid_conversion (const_tree fromtype, const_tree totype)
10907 {
10908 /* Reject nontrivial conversion to or from __fpreg. */
10909 if (TYPE_MODE (fromtype) == RFmode
10910 && TYPE_MODE (totype) != RFmode
10911 && TYPE_MODE (totype) != VOIDmode)
10912 return N_("invalid conversion from %<__fpreg%>");
10913 if (TYPE_MODE (totype) == RFmode
10914 && TYPE_MODE (fromtype) != RFmode)
10915 return N_("invalid conversion to %<__fpreg%>");
10916 return NULL;
10917 }
10918
10919 /* Return the diagnostic message string if the unary operation OP is
10920 not permitted on TYPE, NULL otherwise. */
10921 static const char *
10922 ia64_invalid_unary_op (int op, const_tree type)
10923 {
10924 /* Reject operations on __fpreg other than unary + or &. */
10925 if (TYPE_MODE (type) == RFmode
10926 && op != CONVERT_EXPR
10927 && op != ADDR_EXPR)
10928 return N_("invalid operation on %<__fpreg%>");
10929 return NULL;
10930 }
10931
10932 /* Return the diagnostic message string if the binary operation OP is
10933 not permitted on TYPE1 and TYPE2, NULL otherwise. */
10934 static const char *
10935 ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
10936 {
10937 /* Reject operations on __fpreg. */
10938 if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode)
10939 return N_("invalid operation on %<__fpreg%>");
10940 return NULL;
10941 }
10942
10943 /* HP-UX version_id attribute.
10944 For object foo, if the version_id is set to 1234 put out an alias
10945 of '.alias foo "foo{1234}" We can't use "foo{1234}" in anything
10946 other than an alias statement because it is an illegal symbol name. */
10947
10948 static tree
10949 ia64_handle_version_id_attribute (tree *node ATTRIBUTE_UNUSED,
10950 tree name ATTRIBUTE_UNUSED,
10951 tree args,
10952 int flags ATTRIBUTE_UNUSED,
10953 bool *no_add_attrs)
10954 {
10955 tree arg = TREE_VALUE (args);
10956
10957 if (TREE_CODE (arg) != STRING_CST)
10958 {
10959 error("version attribute is not a string");
10960 *no_add_attrs = true;
10961 return NULL_TREE;
10962 }
10963 return NULL_TREE;
10964 }
10965
10966 /* Target hook for c_mode_for_suffix. */
10967
10968 static enum machine_mode
10969 ia64_c_mode_for_suffix (char suffix)
10970 {
10971 if (suffix == 'q')
10972 return TFmode;
10973 if (suffix == 'w')
10974 return XFmode;
10975
10976 return VOIDmode;
10977 }
10978
10979 static GTY(()) rtx ia64_dconst_0_5_rtx;
10980
10981 rtx
10982 ia64_dconst_0_5 (void)
10983 {
10984 if (! ia64_dconst_0_5_rtx)
10985 {
10986 REAL_VALUE_TYPE rv;
10987 real_from_string (&rv, "0.5");
10988 ia64_dconst_0_5_rtx = const_double_from_real_value (rv, DFmode);
10989 }
10990 return ia64_dconst_0_5_rtx;
10991 }
10992
10993 static GTY(()) rtx ia64_dconst_0_375_rtx;
10994
10995 rtx
10996 ia64_dconst_0_375 (void)
10997 {
10998 if (! ia64_dconst_0_375_rtx)
10999 {
11000 REAL_VALUE_TYPE rv;
11001 real_from_string (&rv, "0.375");
11002 ia64_dconst_0_375_rtx = const_double_from_real_value (rv, DFmode);
11003 }
11004 return ia64_dconst_0_375_rtx;
11005 }
11006
11007 static enum machine_mode
11008 ia64_get_reg_raw_mode (int regno)
11009 {
11010 if (FR_REGNO_P (regno))
11011 return XFmode;
11012 return default_get_reg_raw_mode(regno);
11013 }
11014
11015 /* Always default to .text section until HP-UX linker is fixed. */
11016
11017 ATTRIBUTE_UNUSED static section *
11018 ia64_hpux_function_section (tree decl ATTRIBUTE_UNUSED,
11019 enum node_frequency freq ATTRIBUTE_UNUSED,
11020 bool startup ATTRIBUTE_UNUSED,
11021 bool exit ATTRIBUTE_UNUSED)
11022 {
11023 return NULL;
11024 }
11025 \f
11026 /* Construct (set target (vec_select op0 (parallel perm))) and
11027 return true if that's a valid instruction in the active ISA. */
11028
11029 static bool
11030 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
11031 {
11032 rtx rperm[MAX_VECT_LEN], x;
11033 unsigned i;
11034
11035 for (i = 0; i < nelt; ++i)
11036 rperm[i] = GEN_INT (perm[i]);
11037
11038 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
11039 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
11040 x = gen_rtx_SET (VOIDmode, target, x);
11041
11042 x = emit_insn (x);
11043 if (recog_memoized (x) < 0)
11044 {
11045 remove_insn (x);
11046 return false;
11047 }
11048 return true;
11049 }
11050
11051 /* Similar, but generate a vec_concat from op0 and op1 as well. */
11052
11053 static bool
11054 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
11055 const unsigned char *perm, unsigned nelt)
11056 {
11057 enum machine_mode v2mode;
11058 rtx x;
11059
11060 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
11061 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
11062 return expand_vselect (target, x, perm, nelt);
11063 }
11064
11065 /* Try to expand a no-op permutation. */
11066
11067 static bool
11068 expand_vec_perm_identity (struct expand_vec_perm_d *d)
11069 {
11070 unsigned i, nelt = d->nelt;
11071
11072 for (i = 0; i < nelt; ++i)
11073 if (d->perm[i] != i)
11074 return false;
11075
11076 if (!d->testing_p)
11077 emit_move_insn (d->target, d->op0);
11078
11079 return true;
11080 }
11081
11082 /* Try to expand D via a shrp instruction. */
11083
11084 static bool
11085 expand_vec_perm_shrp (struct expand_vec_perm_d *d)
11086 {
11087 unsigned i, nelt = d->nelt, shift, mask;
11088 rtx tmp, hi, lo;
11089
11090 /* ??? Don't force V2SFmode into the integer registers. */
11091 if (d->vmode == V2SFmode)
11092 return false;
11093
11094 mask = (d->one_operand_p ? nelt - 1 : 2 * nelt - 1);
11095
11096 shift = d->perm[0];
11097 if (BYTES_BIG_ENDIAN && shift > nelt)
11098 return false;
11099
11100 for (i = 1; i < nelt; ++i)
11101 if (d->perm[i] != ((shift + i) & mask))
11102 return false;
11103
11104 if (d->testing_p)
11105 return true;
11106
11107 hi = shift < nelt ? d->op1 : d->op0;
11108 lo = shift < nelt ? d->op0 : d->op1;
11109
11110 shift %= nelt;
11111
11112 shift *= GET_MODE_UNIT_SIZE (d->vmode) * BITS_PER_UNIT;
11113
11114 /* We've eliminated the shift 0 case via expand_vec_perm_identity. */
11115 gcc_assert (IN_RANGE (shift, 1, 63));
11116
11117 /* Recall that big-endian elements are numbered starting at the top of
11118 the register. Ideally we'd have a shift-left-pair. But since we
11119 don't, convert to a shift the other direction. */
11120 if (BYTES_BIG_ENDIAN)
11121 shift = 64 - shift;
11122
11123 tmp = gen_reg_rtx (DImode);
11124 hi = gen_lowpart (DImode, hi);
11125 lo = gen_lowpart (DImode, lo);
11126 emit_insn (gen_shrp (tmp, hi, lo, GEN_INT (shift)));
11127
11128 emit_move_insn (d->target, gen_lowpart (d->vmode, tmp));
11129 return true;
11130 }
11131
11132 /* Try to instantiate D in a single instruction. */
11133
11134 static bool
11135 expand_vec_perm_1 (struct expand_vec_perm_d *d)
11136 {
11137 unsigned i, nelt = d->nelt;
11138 unsigned char perm2[MAX_VECT_LEN];
11139
11140 /* Try single-operand selections. */
11141 if (d->one_operand_p)
11142 {
11143 if (expand_vec_perm_identity (d))
11144 return true;
11145 if (expand_vselect (d->target, d->op0, d->perm, nelt))
11146 return true;
11147 }
11148
11149 /* Try two operand selections. */
11150 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
11151 return true;
11152
11153 /* Recognize interleave style patterns with reversed operands. */
11154 if (!d->one_operand_p)
11155 {
11156 for (i = 0; i < nelt; ++i)
11157 {
11158 unsigned e = d->perm[i];
11159 if (e >= nelt)
11160 e -= nelt;
11161 else
11162 e += nelt;
11163 perm2[i] = e;
11164 }
11165
11166 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
11167 return true;
11168 }
11169
11170 if (expand_vec_perm_shrp (d))
11171 return true;
11172
11173 /* ??? Look for deposit-like permutations where most of the result
11174 comes from one vector unchanged and the rest comes from a
11175 sequential hunk of the other vector. */
11176
11177 return false;
11178 }
11179
11180 /* Pattern match broadcast permutations. */
11181
11182 static bool
11183 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
11184 {
11185 unsigned i, elt, nelt = d->nelt;
11186 unsigned char perm2[2];
11187 rtx temp;
11188 bool ok;
11189
11190 if (!d->one_operand_p)
11191 return false;
11192
11193 elt = d->perm[0];
11194 for (i = 1; i < nelt; ++i)
11195 if (d->perm[i] != elt)
11196 return false;
11197
11198 switch (d->vmode)
11199 {
11200 case V2SImode:
11201 case V2SFmode:
11202 /* Implementable by interleave. */
11203 perm2[0] = elt;
11204 perm2[1] = elt + 2;
11205 ok = expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, 2);
11206 gcc_assert (ok);
11207 break;
11208
11209 case V8QImode:
11210 /* Implementable by extract + broadcast. */
11211 if (BYTES_BIG_ENDIAN)
11212 elt = 7 - elt;
11213 elt *= BITS_PER_UNIT;
11214 temp = gen_reg_rtx (DImode);
11215 emit_insn (gen_extzv (temp, gen_lowpart (DImode, d->op0),
11216 GEN_INT (8), GEN_INT (elt)));
11217 emit_insn (gen_mux1_brcst_qi (d->target, gen_lowpart (QImode, temp)));
11218 break;
11219
11220 case V4HImode:
11221 /* Should have been matched directly by vec_select. */
11222 default:
11223 gcc_unreachable ();
11224 }
11225
11226 return true;
11227 }
11228
11229 /* A subroutine of ia64_expand_vec_perm_const_1. Try to simplify a
11230 two vector permutation into a single vector permutation by using
11231 an interleave operation to merge the vectors. */
11232
11233 static bool
11234 expand_vec_perm_interleave_2 (struct expand_vec_perm_d *d)
11235 {
11236 struct expand_vec_perm_d dremap, dfinal;
11237 unsigned char remap[2 * MAX_VECT_LEN];
11238 unsigned contents, i, nelt, nelt2;
11239 unsigned h0, h1, h2, h3;
11240 rtx seq;
11241 bool ok;
11242
11243 if (d->one_operand_p)
11244 return false;
11245
11246 nelt = d->nelt;
11247 nelt2 = nelt / 2;
11248
11249 /* Examine from whence the elements come. */
11250 contents = 0;
11251 for (i = 0; i < nelt; ++i)
11252 contents |= 1u << d->perm[i];
11253
11254 memset (remap, 0xff, sizeof (remap));
11255 dremap = *d;
11256
11257 h0 = (1u << nelt2) - 1;
11258 h1 = h0 << nelt2;
11259 h2 = h0 << nelt;
11260 h3 = h0 << (nelt + nelt2);
11261
11262 if ((contents & (h0 | h2)) == contents) /* punpck even halves */
11263 {
11264 for (i = 0; i < nelt; ++i)
11265 {
11266 unsigned which = i / 2 + (i & 1 ? nelt : 0);
11267 remap[which] = i;
11268 dremap.perm[i] = which;
11269 }
11270 }
11271 else if ((contents & (h1 | h3)) == contents) /* punpck odd halves */
11272 {
11273 for (i = 0; i < nelt; ++i)
11274 {
11275 unsigned which = i / 2 + nelt2 + (i & 1 ? nelt : 0);
11276 remap[which] = i;
11277 dremap.perm[i] = which;
11278 }
11279 }
11280 else if ((contents & 0x5555) == contents) /* mix even elements */
11281 {
11282 for (i = 0; i < nelt; ++i)
11283 {
11284 unsigned which = (i & ~1) + (i & 1 ? nelt : 0);
11285 remap[which] = i;
11286 dremap.perm[i] = which;
11287 }
11288 }
11289 else if ((contents & 0xaaaa) == contents) /* mix odd elements */
11290 {
11291 for (i = 0; i < nelt; ++i)
11292 {
11293 unsigned which = (i | 1) + (i & 1 ? nelt : 0);
11294 remap[which] = i;
11295 dremap.perm[i] = which;
11296 }
11297 }
11298 else if (floor_log2 (contents) - ctz_hwi (contents) < (int)nelt) /* shrp */
11299 {
11300 unsigned shift = ctz_hwi (contents);
11301 for (i = 0; i < nelt; ++i)
11302 {
11303 unsigned which = (i + shift) & (2 * nelt - 1);
11304 remap[which] = i;
11305 dremap.perm[i] = which;
11306 }
11307 }
11308 else
11309 return false;
11310
11311 /* Use the remapping array set up above to move the elements from their
11312 swizzled locations into their final destinations. */
11313 dfinal = *d;
11314 for (i = 0; i < nelt; ++i)
11315 {
11316 unsigned e = remap[d->perm[i]];
11317 gcc_assert (e < nelt);
11318 dfinal.perm[i] = e;
11319 }
11320 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
11321 dfinal.op1 = dfinal.op0;
11322 dfinal.one_operand_p = true;
11323 dremap.target = dfinal.op0;
11324
11325 /* Test if the final remap can be done with a single insn. For V4HImode
11326 this *will* succeed. For V8QImode or V2SImode it may not. */
11327 start_sequence ();
11328 ok = expand_vec_perm_1 (&dfinal);
11329 seq = get_insns ();
11330 end_sequence ();
11331 if (!ok)
11332 return false;
11333 if (d->testing_p)
11334 return true;
11335
11336 ok = expand_vec_perm_1 (&dremap);
11337 gcc_assert (ok);
11338
11339 emit_insn (seq);
11340 return true;
11341 }
11342
11343 /* A subroutine of ia64_expand_vec_perm_const_1. Emit a full V4HImode
11344 constant permutation via two mux2 and a merge. */
11345
11346 static bool
11347 expand_vec_perm_v4hi_5 (struct expand_vec_perm_d *d)
11348 {
11349 unsigned char perm2[4];
11350 rtx rmask[4];
11351 unsigned i;
11352 rtx t0, t1, mask, x;
11353 bool ok;
11354
11355 if (d->vmode != V4HImode || d->one_operand_p)
11356 return false;
11357 if (d->testing_p)
11358 return true;
11359
11360 for (i = 0; i < 4; ++i)
11361 {
11362 perm2[i] = d->perm[i] & 3;
11363 rmask[i] = (d->perm[i] & 4 ? const0_rtx : constm1_rtx);
11364 }
11365 mask = gen_rtx_CONST_VECTOR (V4HImode, gen_rtvec_v (4, rmask));
11366 mask = force_reg (V4HImode, mask);
11367
11368 t0 = gen_reg_rtx (V4HImode);
11369 t1 = gen_reg_rtx (V4HImode);
11370
11371 ok = expand_vselect (t0, d->op0, perm2, 4);
11372 gcc_assert (ok);
11373 ok = expand_vselect (t1, d->op1, perm2, 4);
11374 gcc_assert (ok);
11375
11376 x = gen_rtx_AND (V4HImode, mask, t0);
11377 emit_insn (gen_rtx_SET (VOIDmode, t0, x));
11378
11379 x = gen_rtx_NOT (V4HImode, mask);
11380 x = gen_rtx_AND (V4HImode, x, t1);
11381 emit_insn (gen_rtx_SET (VOIDmode, t1, x));
11382
11383 x = gen_rtx_IOR (V4HImode, t0, t1);
11384 emit_insn (gen_rtx_SET (VOIDmode, d->target, x));
11385
11386 return true;
11387 }
11388
11389 /* The guts of ia64_expand_vec_perm_const, also used by the ok hook.
11390 With all of the interface bits taken care of, perform the expansion
11391 in D and return true on success. */
11392
11393 static bool
11394 ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
11395 {
11396 if (expand_vec_perm_1 (d))
11397 return true;
11398 if (expand_vec_perm_broadcast (d))
11399 return true;
11400 if (expand_vec_perm_interleave_2 (d))
11401 return true;
11402 if (expand_vec_perm_v4hi_5 (d))
11403 return true;
11404 return false;
11405 }
11406
11407 bool
11408 ia64_expand_vec_perm_const (rtx operands[4])
11409 {
11410 struct expand_vec_perm_d d;
11411 unsigned char perm[MAX_VECT_LEN];
11412 int i, nelt, which;
11413 rtx sel;
11414
11415 d.target = operands[0];
11416 d.op0 = operands[1];
11417 d.op1 = operands[2];
11418 sel = operands[3];
11419
11420 d.vmode = GET_MODE (d.target);
11421 gcc_assert (VECTOR_MODE_P (d.vmode));
11422 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
11423 d.testing_p = false;
11424
11425 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
11426 gcc_assert (XVECLEN (sel, 0) == nelt);
11427 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
11428
11429 for (i = which = 0; i < nelt; ++i)
11430 {
11431 rtx e = XVECEXP (sel, 0, i);
11432 int ei = INTVAL (e) & (2 * nelt - 1);
11433
11434 which |= (ei < nelt ? 1 : 2);
11435 d.perm[i] = ei;
11436 perm[i] = ei;
11437 }
11438
11439 switch (which)
11440 {
11441 default:
11442 gcc_unreachable();
11443
11444 case 3:
11445 if (!rtx_equal_p (d.op0, d.op1))
11446 {
11447 d.one_operand_p = false;
11448 break;
11449 }
11450
11451 /* The elements of PERM do not suggest that only the first operand
11452 is used, but both operands are identical. Allow easier matching
11453 of the permutation by folding the permutation into the single
11454 input vector. */
11455 for (i = 0; i < nelt; ++i)
11456 if (d.perm[i] >= nelt)
11457 d.perm[i] -= nelt;
11458 /* FALLTHRU */
11459
11460 case 1:
11461 d.op1 = d.op0;
11462 d.one_operand_p = true;
11463 break;
11464
11465 case 2:
11466 for (i = 0; i < nelt; ++i)
11467 d.perm[i] -= nelt;
11468 d.op0 = d.op1;
11469 d.one_operand_p = true;
11470 break;
11471 }
11472
11473 if (ia64_expand_vec_perm_const_1 (&d))
11474 return true;
11475
11476 /* If the mask says both arguments are needed, but they are the same,
11477 the above tried to expand with one_operand_p true. If that didn't
11478 work, retry with one_operand_p false, as that's what we used in _ok. */
11479 if (which == 3 && d.one_operand_p)
11480 {
11481 memcpy (d.perm, perm, sizeof (perm));
11482 d.one_operand_p = false;
11483 return ia64_expand_vec_perm_const_1 (&d);
11484 }
11485
11486 return false;
11487 }
11488
11489 /* Implement targetm.vectorize.vec_perm_const_ok. */
11490
11491 static bool
11492 ia64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
11493 const unsigned char *sel)
11494 {
11495 struct expand_vec_perm_d d;
11496 unsigned int i, nelt, which;
11497 bool ret;
11498
11499 d.vmode = vmode;
11500 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
11501 d.testing_p = true;
11502
11503 /* Extract the values from the vector CST into the permutation
11504 array in D. */
11505 memcpy (d.perm, sel, nelt);
11506 for (i = which = 0; i < nelt; ++i)
11507 {
11508 unsigned char e = d.perm[i];
11509 gcc_assert (e < 2 * nelt);
11510 which |= (e < nelt ? 1 : 2);
11511 }
11512
11513 /* For all elements from second vector, fold the elements to first. */
11514 if (which == 2)
11515 for (i = 0; i < nelt; ++i)
11516 d.perm[i] -= nelt;
11517
11518 /* Check whether the mask can be applied to the vector type. */
11519 d.one_operand_p = (which != 3);
11520
11521 /* Otherwise we have to go through the motions and see if we can
11522 figure out how to generate the requested permutation. */
11523 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
11524 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
11525 if (!d.one_operand_p)
11526 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
11527
11528 start_sequence ();
11529 ret = ia64_expand_vec_perm_const_1 (&d);
11530 end_sequence ();
11531
11532 return ret;
11533 }
11534
11535 void
11536 ia64_expand_vec_setv2sf (rtx operands[3])
11537 {
11538 struct expand_vec_perm_d d;
11539 unsigned int which;
11540 bool ok;
11541
11542 d.target = operands[0];
11543 d.op0 = operands[0];
11544 d.op1 = gen_reg_rtx (V2SFmode);
11545 d.vmode = V2SFmode;
11546 d.nelt = 2;
11547 d.one_operand_p = false;
11548 d.testing_p = false;
11549
11550 which = INTVAL (operands[2]);
11551 gcc_assert (which <= 1);
11552 d.perm[0] = 1 - which;
11553 d.perm[1] = which + 2;
11554
11555 emit_insn (gen_fpack (d.op1, operands[1], CONST0_RTX (SFmode)));
11556
11557 ok = ia64_expand_vec_perm_const_1 (&d);
11558 gcc_assert (ok);
11559 }
11560
11561 void
11562 ia64_expand_vec_perm_even_odd (rtx target, rtx op0, rtx op1, int odd)
11563 {
11564 struct expand_vec_perm_d d;
11565 enum machine_mode vmode = GET_MODE (target);
11566 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
11567 bool ok;
11568
11569 d.target = target;
11570 d.op0 = op0;
11571 d.op1 = op1;
11572 d.vmode = vmode;
11573 d.nelt = nelt;
11574 d.one_operand_p = false;
11575 d.testing_p = false;
11576
11577 for (i = 0; i < nelt; ++i)
11578 d.perm[i] = i * 2 + odd;
11579
11580 ok = ia64_expand_vec_perm_const_1 (&d);
11581 gcc_assert (ok);
11582 }
11583
11584 #include "gt-ia64.h"