ia64-modes.def (V4SF): Add.
[gcc.git] / gcc / config / ia64 / ia64.c
1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005
3 Free Software Foundation, Inc.
4 Contributed by James E. Wilson <wilson@cygnus.com> and
5 David Mosberger <davidm@hpl.hp.com>.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "regs.h"
31 #include "hard-reg-set.h"
32 #include "real.h"
33 #include "insn-config.h"
34 #include "conditions.h"
35 #include "output.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "recog.h"
39 #include "expr.h"
40 #include "optabs.h"
41 #include "except.h"
42 #include "function.h"
43 #include "ggc.h"
44 #include "basic-block.h"
45 #include "toplev.h"
46 #include "sched-int.h"
47 #include "timevar.h"
48 #include "target.h"
49 #include "target-def.h"
50 #include "tm_p.h"
51 #include "hashtab.h"
52 #include "langhooks.h"
53 #include "cfglayout.h"
54 #include "tree-gimple.h"
55
56 /* This is used for communication between ASM_OUTPUT_LABEL and
57 ASM_OUTPUT_LABELREF. */
58 int ia64_asm_output_label = 0;
59
60 /* Define the information needed to generate branch and scc insns. This is
61 stored from the compare operation. */
62 struct rtx_def * ia64_compare_op0;
63 struct rtx_def * ia64_compare_op1;
64
65 /* Register names for ia64_expand_prologue. */
66 static const char * const ia64_reg_numbers[96] =
67 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
68 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
69 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
70 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
71 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
72 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
73 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
74 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
75 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
76 "r104","r105","r106","r107","r108","r109","r110","r111",
77 "r112","r113","r114","r115","r116","r117","r118","r119",
78 "r120","r121","r122","r123","r124","r125","r126","r127"};
79
80 /* ??? These strings could be shared with REGISTER_NAMES. */
81 static const char * const ia64_input_reg_names[8] =
82 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
83
84 /* ??? These strings could be shared with REGISTER_NAMES. */
85 static const char * const ia64_local_reg_names[80] =
86 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
87 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
88 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
89 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
90 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
91 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
92 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
93 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
94 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
95 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
96
97 /* ??? These strings could be shared with REGISTER_NAMES. */
98 static const char * const ia64_output_reg_names[8] =
99 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
100
101 /* Which cpu are we scheduling for. */
102 enum processor_type ia64_tune = PROCESSOR_ITANIUM2;
103
104 /* Determines whether we run our final scheduling pass or not. We always
105 avoid the normal second scheduling pass. */
106 static int ia64_flag_schedule_insns2;
107
108 /* Determines whether we run variable tracking in machine dependent
109 reorganization. */
110 static int ia64_flag_var_tracking;
111
112 /* Variables which are this size or smaller are put in the sdata/sbss
113 sections. */
114
115 unsigned int ia64_section_threshold;
116
117 /* The following variable is used by the DFA insn scheduler. The value is
118 TRUE if we do insn bundling instead of insn scheduling. */
119 int bundling_p = 0;
120
121 /* Structure to be filled in by ia64_compute_frame_size with register
122 save masks and offsets for the current function. */
123
124 struct ia64_frame_info
125 {
126 HOST_WIDE_INT total_size; /* size of the stack frame, not including
127 the caller's scratch area. */
128 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
129 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
130 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
131 HARD_REG_SET mask; /* mask of saved registers. */
132 unsigned int gr_used_mask; /* mask of registers in use as gr spill
133 registers or long-term scratches. */
134 int n_spilled; /* number of spilled registers. */
135 int reg_fp; /* register for fp. */
136 int reg_save_b0; /* save register for b0. */
137 int reg_save_pr; /* save register for prs. */
138 int reg_save_ar_pfs; /* save register for ar.pfs. */
139 int reg_save_ar_unat; /* save register for ar.unat. */
140 int reg_save_ar_lc; /* save register for ar.lc. */
141 int reg_save_gp; /* save register for gp. */
142 int n_input_regs; /* number of input registers used. */
143 int n_local_regs; /* number of local registers used. */
144 int n_output_regs; /* number of output registers used. */
145 int n_rotate_regs; /* number of rotating registers used. */
146
147 char need_regstk; /* true if a .regstk directive needed. */
148 char initialized; /* true if the data is finalized. */
149 };
150
151 /* Current frame information calculated by ia64_compute_frame_size. */
152 static struct ia64_frame_info current_frame_info;
153 \f
154 static int ia64_first_cycle_multipass_dfa_lookahead (void);
155 static void ia64_dependencies_evaluation_hook (rtx, rtx);
156 static void ia64_init_dfa_pre_cycle_insn (void);
157 static rtx ia64_dfa_pre_cycle_insn (void);
158 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
159 static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
160 static rtx gen_tls_get_addr (void);
161 static rtx gen_thread_pointer (void);
162 static int find_gr_spill (int);
163 static int next_scratch_gr_reg (void);
164 static void mark_reg_gr_used_mask (rtx, void *);
165 static void ia64_compute_frame_size (HOST_WIDE_INT);
166 static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
167 static void finish_spill_pointers (void);
168 static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
169 static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
170 static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
171 static rtx gen_movdi_x (rtx, rtx, rtx);
172 static rtx gen_fr_spill_x (rtx, rtx, rtx);
173 static rtx gen_fr_restore_x (rtx, rtx, rtx);
174
175 static enum machine_mode hfa_element_mode (tree, bool);
176 static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
177 tree, int *, int);
178 static bool ia64_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
179 tree, bool);
180 static int ia64_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
181 tree, bool);
182 static bool ia64_function_ok_for_sibcall (tree, tree);
183 static bool ia64_return_in_memory (tree, tree);
184 static bool ia64_rtx_costs (rtx, int, int, int *);
185 static void fix_range (const char *);
186 static bool ia64_handle_option (size_t, const char *, int);
187 static struct machine_function * ia64_init_machine_status (void);
188 static void emit_insn_group_barriers (FILE *);
189 static void emit_all_insn_group_barriers (FILE *);
190 static void final_emit_insn_group_barriers (FILE *);
191 static void emit_predicate_relation_info (void);
192 static void ia64_reorg (void);
193 static bool ia64_in_small_data_p (tree);
194 static void process_epilogue (void);
195 static int process_set (FILE *, rtx);
196
197 static bool ia64_assemble_integer (rtx, unsigned int, int);
198 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
199 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
200 static void ia64_output_function_end_prologue (FILE *);
201
202 static int ia64_issue_rate (void);
203 static int ia64_adjust_cost (rtx, rtx, rtx, int);
204 static void ia64_sched_init (FILE *, int, int);
205 static void ia64_sched_finish (FILE *, int);
206 static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
207 static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
208 static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
209 static int ia64_variable_issue (FILE *, int, rtx, int);
210
211 static struct bundle_state *get_free_bundle_state (void);
212 static void free_bundle_state (struct bundle_state *);
213 static void initiate_bundle_states (void);
214 static void finish_bundle_states (void);
215 static unsigned bundle_state_hash (const void *);
216 static int bundle_state_eq_p (const void *, const void *);
217 static int insert_bundle_state (struct bundle_state *);
218 static void initiate_bundle_state_table (void);
219 static void finish_bundle_state_table (void);
220 static int try_issue_nops (struct bundle_state *, int);
221 static int try_issue_insn (struct bundle_state *, rtx);
222 static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
223 static int get_max_pos (state_t);
224 static int get_template (state_t, int);
225
226 static rtx get_next_important_insn (rtx, rtx);
227 static void bundling (FILE *, int, rtx, rtx);
228
229 static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
230 HOST_WIDE_INT, tree);
231 static void ia64_file_start (void);
232
233 static void ia64_select_rtx_section (enum machine_mode, rtx,
234 unsigned HOST_WIDE_INT);
235 static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
236 ATTRIBUTE_UNUSED;
237 static void ia64_rwreloc_select_section (tree, int, unsigned HOST_WIDE_INT)
238 ATTRIBUTE_UNUSED;
239 static void ia64_rwreloc_unique_section (tree, int)
240 ATTRIBUTE_UNUSED;
241 static void ia64_rwreloc_select_rtx_section (enum machine_mode, rtx,
242 unsigned HOST_WIDE_INT)
243 ATTRIBUTE_UNUSED;
244 static unsigned int ia64_section_type_flags (tree, const char *, int);
245 static void ia64_hpux_add_extern_decl (tree decl)
246 ATTRIBUTE_UNUSED;
247 static void ia64_hpux_file_end (void)
248 ATTRIBUTE_UNUSED;
249 static void ia64_init_libfuncs (void)
250 ATTRIBUTE_UNUSED;
251 static void ia64_hpux_init_libfuncs (void)
252 ATTRIBUTE_UNUSED;
253 static void ia64_sysv4_init_libfuncs (void)
254 ATTRIBUTE_UNUSED;
255 static void ia64_vms_init_libfuncs (void)
256 ATTRIBUTE_UNUSED;
257
258 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
259 static void ia64_encode_section_info (tree, rtx, int);
260 static rtx ia64_struct_value_rtx (tree, int);
261 static tree ia64_gimplify_va_arg (tree, tree, tree *, tree *);
262 static bool ia64_scalar_mode_supported_p (enum machine_mode mode);
263 static bool ia64_vector_mode_supported_p (enum machine_mode mode);
264 static bool ia64_cannot_force_const_mem (rtx);
265 \f
266 /* Table of valid machine attributes. */
267 static const struct attribute_spec ia64_attribute_table[] =
268 {
269 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
270 { "syscall_linkage", 0, 0, false, true, true, NULL },
271 { "model", 1, 1, true, false, false, ia64_handle_model_attribute },
272 { NULL, 0, 0, false, false, false, NULL }
273 };
274
275 /* Initialize the GCC target structure. */
276 #undef TARGET_ATTRIBUTE_TABLE
277 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
278
279 #undef TARGET_INIT_BUILTINS
280 #define TARGET_INIT_BUILTINS ia64_init_builtins
281
282 #undef TARGET_EXPAND_BUILTIN
283 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
284
285 #undef TARGET_ASM_BYTE_OP
286 #define TARGET_ASM_BYTE_OP "\tdata1\t"
287 #undef TARGET_ASM_ALIGNED_HI_OP
288 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
289 #undef TARGET_ASM_ALIGNED_SI_OP
290 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
291 #undef TARGET_ASM_ALIGNED_DI_OP
292 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
293 #undef TARGET_ASM_UNALIGNED_HI_OP
294 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
295 #undef TARGET_ASM_UNALIGNED_SI_OP
296 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
297 #undef TARGET_ASM_UNALIGNED_DI_OP
298 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
299 #undef TARGET_ASM_INTEGER
300 #define TARGET_ASM_INTEGER ia64_assemble_integer
301
302 #undef TARGET_ASM_FUNCTION_PROLOGUE
303 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
304 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
305 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
306 #undef TARGET_ASM_FUNCTION_EPILOGUE
307 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
308
309 #undef TARGET_IN_SMALL_DATA_P
310 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
311
312 #undef TARGET_SCHED_ADJUST_COST
313 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
314 #undef TARGET_SCHED_ISSUE_RATE
315 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
316 #undef TARGET_SCHED_VARIABLE_ISSUE
317 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
318 #undef TARGET_SCHED_INIT
319 #define TARGET_SCHED_INIT ia64_sched_init
320 #undef TARGET_SCHED_FINISH
321 #define TARGET_SCHED_FINISH ia64_sched_finish
322 #undef TARGET_SCHED_REORDER
323 #define TARGET_SCHED_REORDER ia64_sched_reorder
324 #undef TARGET_SCHED_REORDER2
325 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
326
327 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
328 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
329
330 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
331 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
332
333 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
334 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
335 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
336 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
337
338 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
339 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
340 ia64_first_cycle_multipass_dfa_lookahead_guard
341
342 #undef TARGET_SCHED_DFA_NEW_CYCLE
343 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
344
345 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
346 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
347 #undef TARGET_PASS_BY_REFERENCE
348 #define TARGET_PASS_BY_REFERENCE ia64_pass_by_reference
349 #undef TARGET_ARG_PARTIAL_BYTES
350 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
351
352 #undef TARGET_ASM_OUTPUT_MI_THUNK
353 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
354 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
355 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
356
357 #undef TARGET_ASM_FILE_START
358 #define TARGET_ASM_FILE_START ia64_file_start
359
360 #undef TARGET_RTX_COSTS
361 #define TARGET_RTX_COSTS ia64_rtx_costs
362 #undef TARGET_ADDRESS_COST
363 #define TARGET_ADDRESS_COST hook_int_rtx_0
364
365 #undef TARGET_MACHINE_DEPENDENT_REORG
366 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
367
368 #undef TARGET_ENCODE_SECTION_INFO
369 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
370
371 #undef TARGET_SECTION_TYPE_FLAGS
372 #define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
373
374 #ifdef HAVE_AS_TLS
375 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
376 #define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
377 #endif
378
379 /* ??? ABI doesn't allow us to define this. */
380 #if 0
381 #undef TARGET_PROMOTE_FUNCTION_ARGS
382 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
383 #endif
384
385 /* ??? ABI doesn't allow us to define this. */
386 #if 0
387 #undef TARGET_PROMOTE_FUNCTION_RETURN
388 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
389 #endif
390
391 /* ??? Investigate. */
392 #if 0
393 #undef TARGET_PROMOTE_PROTOTYPES
394 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
395 #endif
396
397 #undef TARGET_STRUCT_VALUE_RTX
398 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
399 #undef TARGET_RETURN_IN_MEMORY
400 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
401 #undef TARGET_SETUP_INCOMING_VARARGS
402 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
403 #undef TARGET_STRICT_ARGUMENT_NAMING
404 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
405 #undef TARGET_MUST_PASS_IN_STACK
406 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
407
408 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
409 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
410
411 #undef TARGET_UNWIND_EMIT
412 #define TARGET_UNWIND_EMIT process_for_unwind_directive
413
414 #undef TARGET_SCALAR_MODE_SUPPORTED_P
415 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
416 #undef TARGET_VECTOR_MODE_SUPPORTED_P
417 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
418
419 /* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
420 in an order different from the specified program order. */
421 #undef TARGET_RELAXED_ORDERING
422 #define TARGET_RELAXED_ORDERING true
423
424 #undef TARGET_DEFAULT_TARGET_FLAGS
425 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
426 #undef TARGET_HANDLE_OPTION
427 #define TARGET_HANDLE_OPTION ia64_handle_option
428
429 #undef TARGET_CANNOT_FORCE_CONST_MEM
430 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
431
432 struct gcc_target targetm = TARGET_INITIALIZER;
433 \f
434 typedef enum
435 {
436 ADDR_AREA_NORMAL, /* normal address area */
437 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
438 }
439 ia64_addr_area;
440
441 static GTY(()) tree small_ident1;
442 static GTY(()) tree small_ident2;
443
444 static void
445 init_idents (void)
446 {
447 if (small_ident1 == 0)
448 {
449 small_ident1 = get_identifier ("small");
450 small_ident2 = get_identifier ("__small__");
451 }
452 }
453
454 /* Retrieve the address area that has been chosen for the given decl. */
455
456 static ia64_addr_area
457 ia64_get_addr_area (tree decl)
458 {
459 tree model_attr;
460
461 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
462 if (model_attr)
463 {
464 tree id;
465
466 init_idents ();
467 id = TREE_VALUE (TREE_VALUE (model_attr));
468 if (id == small_ident1 || id == small_ident2)
469 return ADDR_AREA_SMALL;
470 }
471 return ADDR_AREA_NORMAL;
472 }
473
474 static tree
475 ia64_handle_model_attribute (tree *node, tree name, tree args,
476 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
477 {
478 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
479 ia64_addr_area area;
480 tree arg, decl = *node;
481
482 init_idents ();
483 arg = TREE_VALUE (args);
484 if (arg == small_ident1 || arg == small_ident2)
485 {
486 addr_area = ADDR_AREA_SMALL;
487 }
488 else
489 {
490 warning (OPT_Wattributes, "invalid argument of %qs attribute",
491 IDENTIFIER_POINTER (name));
492 *no_add_attrs = true;
493 }
494
495 switch (TREE_CODE (decl))
496 {
497 case VAR_DECL:
498 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
499 == FUNCTION_DECL)
500 && !TREE_STATIC (decl))
501 {
502 error ("%Jan address area attribute cannot be specified for "
503 "local variables", decl, decl);
504 *no_add_attrs = true;
505 }
506 area = ia64_get_addr_area (decl);
507 if (area != ADDR_AREA_NORMAL && addr_area != area)
508 {
509 error ("%Jaddress area of '%s' conflicts with previous "
510 "declaration", decl, decl);
511 *no_add_attrs = true;
512 }
513 break;
514
515 case FUNCTION_DECL:
516 error ("%Jaddress area attribute cannot be specified for functions",
517 decl, decl);
518 *no_add_attrs = true;
519 break;
520
521 default:
522 warning (OPT_Wattributes, "%qs attribute ignored",
523 IDENTIFIER_POINTER (name));
524 *no_add_attrs = true;
525 break;
526 }
527
528 return NULL_TREE;
529 }
530
531 static void
532 ia64_encode_addr_area (tree decl, rtx symbol)
533 {
534 int flags;
535
536 flags = SYMBOL_REF_FLAGS (symbol);
537 switch (ia64_get_addr_area (decl))
538 {
539 case ADDR_AREA_NORMAL: break;
540 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
541 default: gcc_unreachable ();
542 }
543 SYMBOL_REF_FLAGS (symbol) = flags;
544 }
545
546 static void
547 ia64_encode_section_info (tree decl, rtx rtl, int first)
548 {
549 default_encode_section_info (decl, rtl, first);
550
551 /* Careful not to prod global register variables. */
552 if (TREE_CODE (decl) == VAR_DECL
553 && GET_CODE (DECL_RTL (decl)) == MEM
554 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
555 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
556 ia64_encode_addr_area (decl, XEXP (rtl, 0));
557 }
558 \f
559 /* Implement CONST_OK_FOR_LETTER_P. */
560
561 bool
562 ia64_const_ok_for_letter_p (HOST_WIDE_INT value, char c)
563 {
564 switch (c)
565 {
566 case 'I':
567 return CONST_OK_FOR_I (value);
568 case 'J':
569 return CONST_OK_FOR_J (value);
570 case 'K':
571 return CONST_OK_FOR_K (value);
572 case 'L':
573 return CONST_OK_FOR_L (value);
574 case 'M':
575 return CONST_OK_FOR_M (value);
576 case 'N':
577 return CONST_OK_FOR_N (value);
578 case 'O':
579 return CONST_OK_FOR_O (value);
580 case 'P':
581 return CONST_OK_FOR_P (value);
582 default:
583 return false;
584 }
585 }
586
587 /* Implement CONST_DOUBLE_OK_FOR_LETTER_P. */
588
589 bool
590 ia64_const_double_ok_for_letter_p (rtx value, char c)
591 {
592 switch (c)
593 {
594 case 'G':
595 return CONST_DOUBLE_OK_FOR_G (value);
596 default:
597 return false;
598 }
599 }
600
601 /* Implement EXTRA_CONSTRAINT. */
602
603 bool
604 ia64_extra_constraint (rtx value, char c)
605 {
606 switch (c)
607 {
608 case 'Q':
609 /* Non-volatile memory for FP_REG loads/stores. */
610 return memory_operand(value, VOIDmode) && !MEM_VOLATILE_P (value);
611
612 case 'R':
613 /* 1..4 for shladd arguments. */
614 return (GET_CODE (value) == CONST_INT
615 && INTVAL (value) >= 1 && INTVAL (value) <= 4);
616
617 case 'S':
618 /* Non-post-inc memory for asms and other unsavory creatures. */
619 return (GET_CODE (value) == MEM
620 && GET_RTX_CLASS (GET_CODE (XEXP (value, 0))) != RTX_AUTOINC
621 && (reload_in_progress || memory_operand (value, VOIDmode)));
622
623 case 'T':
624 /* Symbol ref to small-address-area. */
625 return small_addr_symbolic_operand (value, VOIDmode);
626
627 case 'U':
628 /* Vector zero. */
629 return value == CONST0_RTX (GET_MODE (value));
630
631 case 'W':
632 /* An integer vector, such that conversion to an integer yields a
633 value appropriate for an integer 'J' constraint. */
634 if (GET_CODE (value) == CONST_VECTOR
635 && GET_MODE_CLASS (GET_MODE (value)) == MODE_VECTOR_INT)
636 {
637 value = simplify_subreg (DImode, value, GET_MODE (value), 0);
638 return ia64_const_ok_for_letter_p (INTVAL (value), 'J');
639 }
640 return false;
641
642 case 'Y':
643 /* A V2SF vector containing elements that satisfy 'G'. */
644 return
645 (GET_CODE (value) == CONST_VECTOR
646 && GET_MODE (value) == V2SFmode
647 && ia64_const_double_ok_for_letter_p (XVECEXP (value, 0, 0), 'G')
648 && ia64_const_double_ok_for_letter_p (XVECEXP (value, 0, 1), 'G'));
649
650 default:
651 return false;
652 }
653 }
654 \f
655 /* Return 1 if the operands of a move are ok. */
656
657 int
658 ia64_move_ok (rtx dst, rtx src)
659 {
660 /* If we're under init_recog_no_volatile, we'll not be able to use
661 memory_operand. So check the code directly and don't worry about
662 the validity of the underlying address, which should have been
663 checked elsewhere anyway. */
664 if (GET_CODE (dst) != MEM)
665 return 1;
666 if (GET_CODE (src) == MEM)
667 return 0;
668 if (register_operand (src, VOIDmode))
669 return 1;
670
671 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
672 if (INTEGRAL_MODE_P (GET_MODE (dst)))
673 return src == const0_rtx;
674 else
675 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
676 }
677
678 int
679 addp4_optimize_ok (rtx op1, rtx op2)
680 {
681 return (basereg_operand (op1, GET_MODE(op1)) !=
682 basereg_operand (op2, GET_MODE(op2)));
683 }
684
685 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
686 Return the length of the field, or <= 0 on failure. */
687
688 int
689 ia64_depz_field_mask (rtx rop, rtx rshift)
690 {
691 unsigned HOST_WIDE_INT op = INTVAL (rop);
692 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
693
694 /* Get rid of the zero bits we're shifting in. */
695 op >>= shift;
696
697 /* We must now have a solid block of 1's at bit 0. */
698 return exact_log2 (op + 1);
699 }
700
701 /* Return the TLS model to use for ADDR. */
702
703 static enum tls_model
704 tls_symbolic_operand_type (rtx addr)
705 {
706 enum tls_model tls_kind = 0;
707
708 if (GET_CODE (addr) == CONST)
709 {
710 if (GET_CODE (XEXP (addr, 0)) == PLUS
711 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
712 tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
713 }
714 else if (GET_CODE (addr) == SYMBOL_REF)
715 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
716
717 return tls_kind;
718 }
719
720 /* Return true if X is a constant that is valid for some immediate
721 field in an instruction. */
722
723 bool
724 ia64_legitimate_constant_p (rtx x)
725 {
726 switch (GET_CODE (x))
727 {
728 case CONST_INT:
729 case LABEL_REF:
730 return true;
731
732 case CONST_DOUBLE:
733 if (GET_MODE (x) == VOIDmode)
734 return true;
735 return CONST_DOUBLE_OK_FOR_G (x);
736
737 case CONST:
738 case SYMBOL_REF:
739 return tls_symbolic_operand_type (x) == 0;
740
741 case CONST_VECTOR:
742 {
743 enum machine_mode mode = GET_MODE (x);
744
745 if (mode == V2SFmode)
746 return ia64_extra_constraint (x, 'Y');
747
748 return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
749 && GET_MODE_SIZE (mode) <= 8);
750 }
751
752 default:
753 return false;
754 }
755 }
756
757 /* Don't allow TLS addresses to get spilled to memory. */
758
759 static bool
760 ia64_cannot_force_const_mem (rtx x)
761 {
762 return tls_symbolic_operand_type (x) != 0;
763 }
764
765 /* Expand a symbolic constant load. */
766
767 bool
768 ia64_expand_load_address (rtx dest, rtx src)
769 {
770 gcc_assert (GET_CODE (dest) == REG);
771
772 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
773 having to pointer-extend the value afterward. Other forms of address
774 computation below are also more natural to compute as 64-bit quantities.
775 If we've been given an SImode destination register, change it. */
776 if (GET_MODE (dest) != Pmode)
777 dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest), 0);
778
779 if (TARGET_NO_PIC)
780 return false;
781 if (small_addr_symbolic_operand (src, VOIDmode))
782 return false;
783
784 if (TARGET_AUTO_PIC)
785 emit_insn (gen_load_gprel64 (dest, src));
786 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
787 emit_insn (gen_load_fptr (dest, src));
788 else if (sdata_symbolic_operand (src, VOIDmode))
789 emit_insn (gen_load_gprel (dest, src));
790 else
791 {
792 HOST_WIDE_INT addend = 0;
793 rtx tmp;
794
795 /* We did split constant offsets in ia64_expand_move, and we did try
796 to keep them split in move_operand, but we also allowed reload to
797 rematerialize arbitrary constants rather than spill the value to
798 the stack and reload it. So we have to be prepared here to split
799 them apart again. */
800 if (GET_CODE (src) == CONST)
801 {
802 HOST_WIDE_INT hi, lo;
803
804 hi = INTVAL (XEXP (XEXP (src, 0), 1));
805 lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
806 hi = hi - lo;
807
808 if (lo != 0)
809 {
810 addend = lo;
811 src = plus_constant (XEXP (XEXP (src, 0), 0), hi);
812 }
813 }
814
815 tmp = gen_rtx_HIGH (Pmode, src);
816 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
817 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
818
819 tmp = gen_rtx_LO_SUM (Pmode, dest, src);
820 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
821
822 if (addend)
823 {
824 tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
825 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
826 }
827 }
828
829 return true;
830 }
831
832 static GTY(()) rtx gen_tls_tga;
833 static rtx
834 gen_tls_get_addr (void)
835 {
836 if (!gen_tls_tga)
837 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
838 return gen_tls_tga;
839 }
840
841 static GTY(()) rtx thread_pointer_rtx;
842 static rtx
843 gen_thread_pointer (void)
844 {
845 if (!thread_pointer_rtx)
846 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
847 return thread_pointer_rtx;
848 }
849
850 static rtx
851 ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
852 HOST_WIDE_INT addend)
853 {
854 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
855 rtx orig_op0 = op0, orig_op1 = op1;
856 HOST_WIDE_INT addend_lo, addend_hi;
857
858 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
859 addend_hi = addend - addend_lo;
860
861 switch (tls_kind)
862 {
863 case TLS_MODEL_GLOBAL_DYNAMIC:
864 start_sequence ();
865
866 tga_op1 = gen_reg_rtx (Pmode);
867 emit_insn (gen_load_dtpmod (tga_op1, op1));
868
869 tga_op2 = gen_reg_rtx (Pmode);
870 emit_insn (gen_load_dtprel (tga_op2, op1));
871
872 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
873 LCT_CONST, Pmode, 2, tga_op1,
874 Pmode, tga_op2, Pmode);
875
876 insns = get_insns ();
877 end_sequence ();
878
879 if (GET_MODE (op0) != Pmode)
880 op0 = tga_ret;
881 emit_libcall_block (insns, op0, tga_ret, op1);
882 break;
883
884 case TLS_MODEL_LOCAL_DYNAMIC:
885 /* ??? This isn't the completely proper way to do local-dynamic
886 If the call to __tls_get_addr is used only by a single symbol,
887 then we should (somehow) move the dtprel to the second arg
888 to avoid the extra add. */
889 start_sequence ();
890
891 tga_op1 = gen_reg_rtx (Pmode);
892 emit_insn (gen_load_dtpmod (tga_op1, op1));
893
894 tga_op2 = const0_rtx;
895
896 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
897 LCT_CONST, Pmode, 2, tga_op1,
898 Pmode, tga_op2, Pmode);
899
900 insns = get_insns ();
901 end_sequence ();
902
903 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
904 UNSPEC_LD_BASE);
905 tmp = gen_reg_rtx (Pmode);
906 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
907
908 if (!register_operand (op0, Pmode))
909 op0 = gen_reg_rtx (Pmode);
910 if (TARGET_TLS64)
911 {
912 emit_insn (gen_load_dtprel (op0, op1));
913 emit_insn (gen_adddi3 (op0, tmp, op0));
914 }
915 else
916 emit_insn (gen_add_dtprel (op0, op1, tmp));
917 break;
918
919 case TLS_MODEL_INITIAL_EXEC:
920 op1 = plus_constant (op1, addend_hi);
921 addend = addend_lo;
922
923 tmp = gen_reg_rtx (Pmode);
924 emit_insn (gen_load_tprel (tmp, op1));
925
926 if (!register_operand (op0, Pmode))
927 op0 = gen_reg_rtx (Pmode);
928 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
929 break;
930
931 case TLS_MODEL_LOCAL_EXEC:
932 if (!register_operand (op0, Pmode))
933 op0 = gen_reg_rtx (Pmode);
934
935 op1 = orig_op1;
936 addend = 0;
937 if (TARGET_TLS64)
938 {
939 emit_insn (gen_load_tprel (op0, op1));
940 emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
941 }
942 else
943 emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
944 break;
945
946 default:
947 gcc_unreachable ();
948 }
949
950 if (addend)
951 op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
952 orig_op0, 1, OPTAB_DIRECT);
953 if (orig_op0 == op0)
954 return NULL_RTX;
955 if (GET_MODE (orig_op0) == Pmode)
956 return op0;
957 return gen_lowpart (GET_MODE (orig_op0), op0);
958 }
959
960 rtx
961 ia64_expand_move (rtx op0, rtx op1)
962 {
963 enum machine_mode mode = GET_MODE (op0);
964
965 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
966 op1 = force_reg (mode, op1);
967
968 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
969 {
970 HOST_WIDE_INT addend = 0;
971 enum tls_model tls_kind;
972 rtx sym = op1;
973
974 if (GET_CODE (op1) == CONST
975 && GET_CODE (XEXP (op1, 0)) == PLUS
976 && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
977 {
978 addend = INTVAL (XEXP (XEXP (op1, 0), 1));
979 sym = XEXP (XEXP (op1, 0), 0);
980 }
981
982 tls_kind = tls_symbolic_operand_type (sym);
983 if (tls_kind)
984 return ia64_expand_tls_address (tls_kind, op0, sym, addend);
985
986 if (any_offset_symbol_operand (sym, mode))
987 addend = 0;
988 else if (aligned_offset_symbol_operand (sym, mode))
989 {
990 HOST_WIDE_INT addend_lo, addend_hi;
991
992 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
993 addend_hi = addend - addend_lo;
994
995 if (addend_lo != 0)
996 {
997 op1 = plus_constant (sym, addend_hi);
998 addend = addend_lo;
999 }
1000 else
1001 addend = 0;
1002 }
1003 else
1004 op1 = sym;
1005
1006 if (reload_completed)
1007 {
1008 /* We really should have taken care of this offset earlier. */
1009 gcc_assert (addend == 0);
1010 if (ia64_expand_load_address (op0, op1))
1011 return NULL_RTX;
1012 }
1013
1014 if (addend)
1015 {
1016 rtx subtarget = no_new_pseudos ? op0 : gen_reg_rtx (mode);
1017
1018 emit_insn (gen_rtx_SET (VOIDmode, subtarget, op1));
1019
1020 op1 = expand_simple_binop (mode, PLUS, subtarget,
1021 GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1022 if (op0 == op1)
1023 return NULL_RTX;
1024 }
1025 }
1026
1027 return op1;
1028 }
1029
1030 /* Split a move from OP1 to OP0 conditional on COND. */
1031
1032 void
1033 ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1034 {
1035 rtx insn, first = get_last_insn ();
1036
1037 emit_move_insn (op0, op1);
1038
1039 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1040 if (INSN_P (insn))
1041 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1042 PATTERN (insn));
1043 }
1044
1045 /* Split a post-reload TImode or TFmode reference into two DImode
1046 components. This is made extra difficult by the fact that we do
1047 not get any scratch registers to work with, because reload cannot
1048 be prevented from giving us a scratch that overlaps the register
1049 pair involved. So instead, when addressing memory, we tweak the
1050 pointer register up and back down with POST_INCs. Or up and not
1051 back down when we can get away with it.
1052
1053 REVERSED is true when the loads must be done in reversed order
1054 (high word first) for correctness. DEAD is true when the pointer
1055 dies with the second insn we generate and therefore the second
1056 address must not carry a postmodify.
1057
1058 May return an insn which is to be emitted after the moves. */
1059
1060 static rtx
1061 ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
1062 {
1063 rtx fixup = 0;
1064
1065 switch (GET_CODE (in))
1066 {
1067 case REG:
1068 out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1069 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1070 break;
1071
1072 case CONST_INT:
1073 case CONST_DOUBLE:
1074 /* Cannot occur reversed. */
1075 gcc_assert (!reversed);
1076
1077 if (GET_MODE (in) != TFmode)
1078 split_double (in, &out[0], &out[1]);
1079 else
1080 /* split_double does not understand how to split a TFmode
1081 quantity into a pair of DImode constants. */
1082 {
1083 REAL_VALUE_TYPE r;
1084 unsigned HOST_WIDE_INT p[2];
1085 long l[4]; /* TFmode is 128 bits */
1086
1087 REAL_VALUE_FROM_CONST_DOUBLE (r, in);
1088 real_to_target (l, &r, TFmode);
1089
1090 if (FLOAT_WORDS_BIG_ENDIAN)
1091 {
1092 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1093 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1094 }
1095 else
1096 {
1097 p[0] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1098 p[1] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1099 }
1100 out[0] = GEN_INT (p[0]);
1101 out[1] = GEN_INT (p[1]);
1102 }
1103 break;
1104
1105 case MEM:
1106 {
1107 rtx base = XEXP (in, 0);
1108 rtx offset;
1109
1110 switch (GET_CODE (base))
1111 {
1112 case REG:
1113 if (!reversed)
1114 {
1115 out[0] = adjust_automodify_address
1116 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1117 out[1] = adjust_automodify_address
1118 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1119 }
1120 else
1121 {
1122 /* Reversal requires a pre-increment, which can only
1123 be done as a separate insn. */
1124 emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1125 out[0] = adjust_automodify_address
1126 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1127 out[1] = adjust_address (in, DImode, 0);
1128 }
1129 break;
1130
1131 case POST_INC:
1132 gcc_assert (!reversed && !dead);
1133
1134 /* Just do the increment in two steps. */
1135 out[0] = adjust_automodify_address (in, DImode, 0, 0);
1136 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1137 break;
1138
1139 case POST_DEC:
1140 gcc_assert (!reversed && !dead);
1141
1142 /* Add 8, subtract 24. */
1143 base = XEXP (base, 0);
1144 out[0] = adjust_automodify_address
1145 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1146 out[1] = adjust_automodify_address
1147 (in, DImode,
1148 gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
1149 8);
1150 break;
1151
1152 case POST_MODIFY:
1153 gcc_assert (!reversed && !dead);
1154
1155 /* Extract and adjust the modification. This case is
1156 trickier than the others, because we might have an
1157 index register, or we might have a combined offset that
1158 doesn't fit a signed 9-bit displacement field. We can
1159 assume the incoming expression is already legitimate. */
1160 offset = XEXP (base, 1);
1161 base = XEXP (base, 0);
1162
1163 out[0] = adjust_automodify_address
1164 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1165
1166 if (GET_CODE (XEXP (offset, 1)) == REG)
1167 {
1168 /* Can't adjust the postmodify to match. Emit the
1169 original, then a separate addition insn. */
1170 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1171 fixup = gen_adddi3 (base, base, GEN_INT (-8));
1172 }
1173 else
1174 {
1175 gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1176 if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1177 {
1178 /* Again the postmodify cannot be made to match,
1179 but in this case it's more efficient to get rid
1180 of the postmodify entirely and fix up with an
1181 add insn. */
1182 out[1] = adjust_automodify_address (in, DImode, base, 8);
1183 fixup = gen_adddi3
1184 (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1185 }
1186 else
1187 {
1188 /* Combined offset still fits in the displacement field.
1189 (We cannot overflow it at the high end.) */
1190 out[1] = adjust_automodify_address
1191 (in, DImode, gen_rtx_POST_MODIFY
1192 (Pmode, base, gen_rtx_PLUS
1193 (Pmode, base,
1194 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1195 8);
1196 }
1197 }
1198 break;
1199
1200 default:
1201 gcc_unreachable ();
1202 }
1203 break;
1204 }
1205
1206 default:
1207 gcc_unreachable ();
1208 }
1209
1210 return fixup;
1211 }
1212
1213 /* Split a TImode or TFmode move instruction after reload.
1214 This is used by *movtf_internal and *movti_internal. */
1215 void
1216 ia64_split_tmode_move (rtx operands[])
1217 {
1218 rtx in[2], out[2], insn;
1219 rtx fixup[2];
1220 bool dead = false;
1221 bool reversed = false;
1222
1223 /* It is possible for reload to decide to overwrite a pointer with
1224 the value it points to. In that case we have to do the loads in
1225 the appropriate order so that the pointer is not destroyed too
1226 early. Also we must not generate a postmodify for that second
1227 load, or rws_access_regno will die. */
1228 if (GET_CODE (operands[1]) == MEM
1229 && reg_overlap_mentioned_p (operands[0], operands[1]))
1230 {
1231 rtx base = XEXP (operands[1], 0);
1232 while (GET_CODE (base) != REG)
1233 base = XEXP (base, 0);
1234
1235 if (REGNO (base) == REGNO (operands[0]))
1236 reversed = true;
1237 dead = true;
1238 }
1239 /* Another reason to do the moves in reversed order is if the first
1240 element of the target register pair is also the second element of
1241 the source register pair. */
1242 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1243 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1244 reversed = true;
1245
1246 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1247 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1248
1249 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1250 if (GET_CODE (EXP) == MEM \
1251 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1252 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1253 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1254 REG_NOTES (INSN) = gen_rtx_EXPR_LIST (REG_INC, \
1255 XEXP (XEXP (EXP, 0), 0), \
1256 REG_NOTES (INSN))
1257
1258 insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1259 MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1260 MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1261
1262 insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1263 MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1264 MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1265
1266 if (fixup[0])
1267 emit_insn (fixup[0]);
1268 if (fixup[1])
1269 emit_insn (fixup[1]);
1270
1271 #undef MAYBE_ADD_REG_INC_NOTE
1272 }
1273
1274 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1275 through memory plus an extra GR scratch register. Except that you can
1276 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1277 SECONDARY_RELOAD_CLASS, but not both.
1278
1279 We got into problems in the first place by allowing a construct like
1280 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1281 This solution attempts to prevent this situation from occurring. When
1282 we see something like the above, we spill the inner register to memory. */
1283
1284 rtx
1285 spill_xfmode_operand (rtx in, int force)
1286 {
1287 if (GET_CODE (in) == SUBREG
1288 && GET_MODE (SUBREG_REG (in)) == TImode
1289 && GET_CODE (SUBREG_REG (in)) == REG)
1290 {
1291 rtx memt = assign_stack_temp (TImode, 16, 0);
1292 emit_move_insn (memt, SUBREG_REG (in));
1293 return adjust_address (memt, XFmode, 0);
1294 }
1295 else if (force && GET_CODE (in) == REG)
1296 {
1297 rtx memx = assign_stack_temp (XFmode, 16, 0);
1298 emit_move_insn (memx, in);
1299 return memx;
1300 }
1301 else
1302 return in;
1303 }
1304
1305 /* Emit comparison instruction if necessary, returning the expression
1306 that holds the compare result in the proper mode. */
1307
1308 static GTY(()) rtx cmptf_libfunc;
1309
1310 rtx
1311 ia64_expand_compare (enum rtx_code code, enum machine_mode mode)
1312 {
1313 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1314 rtx cmp;
1315
1316 /* If we have a BImode input, then we already have a compare result, and
1317 do not need to emit another comparison. */
1318 if (GET_MODE (op0) == BImode)
1319 {
1320 gcc_assert ((code == NE || code == EQ) && op1 == const0_rtx);
1321 cmp = op0;
1322 }
1323 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1324 magic number as its third argument, that indicates what to do.
1325 The return value is an integer to be compared against zero. */
1326 else if (GET_MODE (op0) == TFmode)
1327 {
1328 enum qfcmp_magic {
1329 QCMP_INV = 1, /* Raise FP_INVALID on SNaN as a side effect. */
1330 QCMP_UNORD = 2,
1331 QCMP_EQ = 4,
1332 QCMP_LT = 8,
1333 QCMP_GT = 16
1334 } magic;
1335 enum rtx_code ncode;
1336 rtx ret, insns;
1337
1338 gcc_assert (cmptf_libfunc && GET_MODE (op1) == TFmode);
1339 switch (code)
1340 {
1341 /* 1 = equal, 0 = not equal. Equality operators do
1342 not raise FP_INVALID when given an SNaN operand. */
1343 case EQ: magic = QCMP_EQ; ncode = NE; break;
1344 case NE: magic = QCMP_EQ; ncode = EQ; break;
1345 /* isunordered() from C99. */
1346 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
1347 case ORDERED: magic = QCMP_UNORD; ncode = EQ; break;
1348 /* Relational operators raise FP_INVALID when given
1349 an SNaN operand. */
1350 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
1351 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1352 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
1353 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1354 /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1355 Expanders for buneq etc. weuld have to be added to ia64.md
1356 for this to be useful. */
1357 default: gcc_unreachable ();
1358 }
1359
1360 start_sequence ();
1361
1362 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1363 op0, TFmode, op1, TFmode,
1364 GEN_INT (magic), DImode);
1365 cmp = gen_reg_rtx (BImode);
1366 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1367 gen_rtx_fmt_ee (ncode, BImode,
1368 ret, const0_rtx)));
1369
1370 insns = get_insns ();
1371 end_sequence ();
1372
1373 emit_libcall_block (insns, cmp, cmp,
1374 gen_rtx_fmt_ee (code, BImode, op0, op1));
1375 code = NE;
1376 }
1377 else
1378 {
1379 cmp = gen_reg_rtx (BImode);
1380 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1381 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1382 code = NE;
1383 }
1384
1385 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1386 }
1387
1388 /* Generate an integral vector comparison. */
1389
1390 static bool
1391 ia64_expand_vecint_compare (enum rtx_code code, enum machine_mode mode,
1392 rtx dest, rtx op0, rtx op1)
1393 {
1394 bool negate = false;
1395 rtx x;
1396
1397 switch (code)
1398 {
1399 case EQ:
1400 case GT:
1401 break;
1402
1403 case NE:
1404 code = EQ;
1405 negate = true;
1406 break;
1407
1408 case LE:
1409 code = GT;
1410 negate = true;
1411 break;
1412
1413 case GE:
1414 negate = true;
1415 /* FALLTHRU */
1416
1417 case LT:
1418 x = op0;
1419 op0 = op1;
1420 op1 = x;
1421 code = GT;
1422 break;
1423
1424 case GTU:
1425 case GEU:
1426 case LTU:
1427 case LEU:
1428 {
1429 rtx w0h, w0l, w1h, w1l, ch, cl;
1430 enum machine_mode wmode;
1431 rtx (*unpack_l) (rtx, rtx, rtx);
1432 rtx (*unpack_h) (rtx, rtx, rtx);
1433 rtx (*pack) (rtx, rtx, rtx);
1434
1435 /* We don't have native unsigned comparisons, but we can generate
1436 them better than generic code can. */
1437
1438 gcc_assert (mode != V2SImode);
1439 switch (mode)
1440 {
1441 case V8QImode:
1442 wmode = V4HImode;
1443 pack = gen_pack2_sss;
1444 unpack_l = gen_unpack1_l;
1445 unpack_h = gen_unpack1_h;
1446 break;
1447
1448 case V4HImode:
1449 wmode = V2SImode;
1450 pack = gen_pack4_sss;
1451 unpack_l = gen_unpack2_l;
1452 unpack_h = gen_unpack2_h;
1453 break;
1454
1455 default:
1456 gcc_unreachable ();
1457 }
1458
1459 /* Unpack into wider vectors, zero extending the elements. */
1460
1461 w0l = gen_reg_rtx (wmode);
1462 w0h = gen_reg_rtx (wmode);
1463 w1l = gen_reg_rtx (wmode);
1464 w1h = gen_reg_rtx (wmode);
1465 emit_insn (unpack_l (gen_lowpart (mode, w0l), op0, CONST0_RTX (mode)));
1466 emit_insn (unpack_h (gen_lowpart (mode, w0h), op0, CONST0_RTX (mode)));
1467 emit_insn (unpack_l (gen_lowpart (mode, w1l), op1, CONST0_RTX (mode)));
1468 emit_insn (unpack_h (gen_lowpart (mode, w1h), op1, CONST0_RTX (mode)));
1469
1470 /* Compare in the wider mode. */
1471
1472 cl = gen_reg_rtx (wmode);
1473 ch = gen_reg_rtx (wmode);
1474 code = signed_condition (code);
1475 ia64_expand_vecint_compare (code, wmode, cl, w0l, w1l);
1476 negate = ia64_expand_vecint_compare (code, wmode, ch, w0h, w1h);
1477
1478 /* Repack into a single narrower vector. */
1479
1480 emit_insn (pack (dest, cl, ch));
1481 }
1482 return negate;
1483
1484 default:
1485 gcc_unreachable ();
1486 }
1487
1488 x = gen_rtx_fmt_ee (code, mode, op0, op1);
1489 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
1490
1491 return negate;
1492 }
1493
1494 static void
1495 ia64_expand_vcondu_v2si (enum rtx_code code, rtx operands[])
1496 {
1497 rtx dl, dh, bl, bh, op1l, op1h, op2l, op2h, op4l, op4h, op5l, op5h, x;
1498
1499 /* In this case, we extract the two SImode quantities and generate
1500 normal comparisons for each of them. */
1501
1502 op1l = gen_lowpart (SImode, operands[1]);
1503 op2l = gen_lowpart (SImode, operands[2]);
1504 op4l = gen_lowpart (SImode, operands[4]);
1505 op5l = gen_lowpart (SImode, operands[5]);
1506
1507 op1h = gen_reg_rtx (SImode);
1508 op2h = gen_reg_rtx (SImode);
1509 op4h = gen_reg_rtx (SImode);
1510 op5h = gen_reg_rtx (SImode);
1511
1512 emit_insn (gen_lshrdi3 (gen_lowpart (DImode, op1h),
1513 gen_lowpart (DImode, operands[1]), GEN_INT (32)));
1514 emit_insn (gen_lshrdi3 (gen_lowpart (DImode, op2h),
1515 gen_lowpart (DImode, operands[2]), GEN_INT (32)));
1516 emit_insn (gen_lshrdi3 (gen_lowpart (DImode, op4h),
1517 gen_lowpart (DImode, operands[4]), GEN_INT (32)));
1518 emit_insn (gen_lshrdi3 (gen_lowpart (DImode, op5h),
1519 gen_lowpart (DImode, operands[5]), GEN_INT (32)));
1520
1521 bl = gen_reg_rtx (BImode);
1522 x = gen_rtx_fmt_ee (code, BImode, op4l, op5l);
1523 emit_insn (gen_rtx_SET (VOIDmode, bl, x));
1524
1525 bh = gen_reg_rtx (BImode);
1526 x = gen_rtx_fmt_ee (code, BImode, op4h, op5h);
1527 emit_insn (gen_rtx_SET (VOIDmode, bh, x));
1528
1529 /* With the results of the comparisons, emit conditional moves. */
1530
1531 dl = gen_reg_rtx (SImode);
1532 x = gen_rtx_IF_THEN_ELSE (SImode, bl, op1l, op2l);
1533 emit_insn (gen_rtx_SET (VOIDmode, dl, x));
1534
1535 dh = gen_reg_rtx (SImode);
1536 x = gen_rtx_IF_THEN_ELSE (SImode, bh, op1h, op2h);
1537 emit_insn (gen_rtx_SET (VOIDmode, dh, x));
1538
1539 /* Merge the two partial results back into a vector. */
1540
1541 x = gen_rtx_VEC_CONCAT (V2SImode, dl, dh);
1542 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1543 }
1544
1545 /* Emit an integral vector conditional move. */
1546
1547 void
1548 ia64_expand_vecint_cmov (rtx operands[])
1549 {
1550 enum machine_mode mode = GET_MODE (operands[0]);
1551 enum rtx_code code = GET_CODE (operands[3]);
1552 bool negate;
1553 rtx cmp, x, ot, of;
1554
1555 /* Since we don't have unsigned V2SImode comparisons, it's more efficient
1556 to special-case them entirely. */
1557 if (mode == V2SImode
1558 && (code == GTU || code == GEU || code == LEU || code == LTU))
1559 {
1560 ia64_expand_vcondu_v2si (code, operands);
1561 return;
1562 }
1563
1564 cmp = gen_reg_rtx (mode);
1565 negate = ia64_expand_vecint_compare (code, mode, cmp,
1566 operands[4], operands[5]);
1567
1568 ot = operands[1+negate];
1569 of = operands[2-negate];
1570
1571 if (ot == CONST0_RTX (mode))
1572 {
1573 if (of == CONST0_RTX (mode))
1574 {
1575 emit_move_insn (operands[0], ot);
1576 return;
1577 }
1578
1579 x = gen_rtx_NOT (mode, cmp);
1580 x = gen_rtx_AND (mode, x, of);
1581 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1582 }
1583 else if (of == CONST0_RTX (mode))
1584 {
1585 x = gen_rtx_AND (mode, cmp, ot);
1586 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1587 }
1588 else
1589 {
1590 rtx t, f;
1591
1592 t = gen_reg_rtx (mode);
1593 x = gen_rtx_AND (mode, cmp, operands[1+negate]);
1594 emit_insn (gen_rtx_SET (VOIDmode, t, x));
1595
1596 f = gen_reg_rtx (mode);
1597 x = gen_rtx_NOT (mode, cmp);
1598 x = gen_rtx_AND (mode, x, operands[2-negate]);
1599 emit_insn (gen_rtx_SET (VOIDmode, f, x));
1600
1601 x = gen_rtx_IOR (mode, t, f);
1602 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1603 }
1604 }
1605
1606 /* Emit an integral vector min or max operation. Return true if all done. */
1607
1608 bool
1609 ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
1610 rtx operands[])
1611 {
1612 rtx xops[5];
1613
1614 /* These four combinations are supported directly. */
1615 if (mode == V8QImode && (code == UMIN || code == UMAX))
1616 return false;
1617 if (mode == V4HImode && (code == SMIN || code == SMAX))
1618 return false;
1619
1620 /* Everything else implemented via vector comparisons. */
1621 xops[0] = operands[0];
1622 xops[4] = xops[1] = operands[1];
1623 xops[5] = xops[2] = operands[2];
1624
1625 switch (code)
1626 {
1627 case UMIN:
1628 code = LTU;
1629 break;
1630 case UMAX:
1631 code = GTU;
1632 break;
1633 case SMIN:
1634 code = LT;
1635 break;
1636 case SMAX:
1637 code = GT;
1638 break;
1639 default:
1640 gcc_unreachable ();
1641 }
1642 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
1643
1644 ia64_expand_vecint_cmov (xops);
1645 return true;
1646 }
1647
1648 /* Emit the appropriate sequence for a call. */
1649
1650 void
1651 ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
1652 int sibcall_p)
1653 {
1654 rtx insn, b0;
1655
1656 addr = XEXP (addr, 0);
1657 addr = convert_memory_address (DImode, addr);
1658 b0 = gen_rtx_REG (DImode, R_BR (0));
1659
1660 /* ??? Should do this for functions known to bind local too. */
1661 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1662 {
1663 if (sibcall_p)
1664 insn = gen_sibcall_nogp (addr);
1665 else if (! retval)
1666 insn = gen_call_nogp (addr, b0);
1667 else
1668 insn = gen_call_value_nogp (retval, addr, b0);
1669 insn = emit_call_insn (insn);
1670 }
1671 else
1672 {
1673 if (sibcall_p)
1674 insn = gen_sibcall_gp (addr);
1675 else if (! retval)
1676 insn = gen_call_gp (addr, b0);
1677 else
1678 insn = gen_call_value_gp (retval, addr, b0);
1679 insn = emit_call_insn (insn);
1680
1681 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
1682 }
1683
1684 if (sibcall_p)
1685 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
1686 }
1687
1688 void
1689 ia64_reload_gp (void)
1690 {
1691 rtx tmp;
1692
1693 if (current_frame_info.reg_save_gp)
1694 tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp);
1695 else
1696 {
1697 HOST_WIDE_INT offset;
1698
1699 offset = (current_frame_info.spill_cfa_off
1700 + current_frame_info.spill_size);
1701 if (frame_pointer_needed)
1702 {
1703 tmp = hard_frame_pointer_rtx;
1704 offset = -offset;
1705 }
1706 else
1707 {
1708 tmp = stack_pointer_rtx;
1709 offset = current_frame_info.total_size - offset;
1710 }
1711
1712 if (CONST_OK_FOR_I (offset))
1713 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1714 tmp, GEN_INT (offset)));
1715 else
1716 {
1717 emit_move_insn (pic_offset_table_rtx, GEN_INT (offset));
1718 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1719 pic_offset_table_rtx, tmp));
1720 }
1721
1722 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
1723 }
1724
1725 emit_move_insn (pic_offset_table_rtx, tmp);
1726 }
1727
1728 void
1729 ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
1730 rtx scratch_b, int noreturn_p, int sibcall_p)
1731 {
1732 rtx insn;
1733 bool is_desc = false;
1734
1735 /* If we find we're calling through a register, then we're actually
1736 calling through a descriptor, so load up the values. */
1737 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
1738 {
1739 rtx tmp;
1740 bool addr_dead_p;
1741
1742 /* ??? We are currently constrained to *not* use peep2, because
1743 we can legitimately change the global lifetime of the GP
1744 (in the form of killing where previously live). This is
1745 because a call through a descriptor doesn't use the previous
1746 value of the GP, while a direct call does, and we do not
1747 commit to either form until the split here.
1748
1749 That said, this means that we lack precise life info for
1750 whether ADDR is dead after this call. This is not terribly
1751 important, since we can fix things up essentially for free
1752 with the POST_DEC below, but it's nice to not use it when we
1753 can immediately tell it's not necessary. */
1754 addr_dead_p = ((noreturn_p || sibcall_p
1755 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
1756 REGNO (addr)))
1757 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
1758
1759 /* Load the code address into scratch_b. */
1760 tmp = gen_rtx_POST_INC (Pmode, addr);
1761 tmp = gen_rtx_MEM (Pmode, tmp);
1762 emit_move_insn (scratch_r, tmp);
1763 emit_move_insn (scratch_b, scratch_r);
1764
1765 /* Load the GP address. If ADDR is not dead here, then we must
1766 revert the change made above via the POST_INCREMENT. */
1767 if (!addr_dead_p)
1768 tmp = gen_rtx_POST_DEC (Pmode, addr);
1769 else
1770 tmp = addr;
1771 tmp = gen_rtx_MEM (Pmode, tmp);
1772 emit_move_insn (pic_offset_table_rtx, tmp);
1773
1774 is_desc = true;
1775 addr = scratch_b;
1776 }
1777
1778 if (sibcall_p)
1779 insn = gen_sibcall_nogp (addr);
1780 else if (retval)
1781 insn = gen_call_value_nogp (retval, addr, retaddr);
1782 else
1783 insn = gen_call_nogp (addr, retaddr);
1784 emit_call_insn (insn);
1785
1786 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
1787 ia64_reload_gp ();
1788 }
1789
1790 /* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
1791
1792 This differs from the generic code in that we know about the zero-extending
1793 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
1794 also know that ld.acq+cmpxchg.rel equals a full barrier.
1795
1796 The loop we want to generate looks like
1797
1798 cmp_reg = mem;
1799 label:
1800 old_reg = cmp_reg;
1801 new_reg = cmp_reg op val;
1802 cmp_reg = compare-and-swap(mem, old_reg, new_reg)
1803 if (cmp_reg != old_reg)
1804 goto label;
1805
1806 Note that we only do the plain load from memory once. Subsequent
1807 iterations use the value loaded by the compare-and-swap pattern. */
1808
1809 void
1810 ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
1811 rtx old_dst, rtx new_dst)
1812 {
1813 enum machine_mode mode = GET_MODE (mem);
1814 rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
1815 enum insn_code icode;
1816
1817 /* Special case for using fetchadd. */
1818 if ((mode == SImode || mode == DImode) && fetchadd_operand (val, mode))
1819 {
1820 if (!old_dst)
1821 old_dst = gen_reg_rtx (mode);
1822
1823 emit_insn (gen_memory_barrier ());
1824
1825 if (mode == SImode)
1826 icode = CODE_FOR_fetchadd_acq_si;
1827 else
1828 icode = CODE_FOR_fetchadd_acq_di;
1829 emit_insn (GEN_FCN (icode) (old_dst, mem, val));
1830
1831 if (new_dst)
1832 {
1833 new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
1834 true, OPTAB_WIDEN);
1835 if (new_reg != new_dst)
1836 emit_move_insn (new_dst, new_reg);
1837 }
1838 return;
1839 }
1840
1841 /* Because of the volatile mem read, we get an ld.acq, which is the
1842 front half of the full barrier. The end half is the cmpxchg.rel. */
1843 gcc_assert (MEM_VOLATILE_P (mem));
1844
1845 old_reg = gen_reg_rtx (DImode);
1846 cmp_reg = gen_reg_rtx (DImode);
1847 label = gen_label_rtx ();
1848
1849 if (mode != DImode)
1850 {
1851 val = simplify_gen_subreg (DImode, val, mode, 0);
1852 emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
1853 }
1854 else
1855 emit_move_insn (cmp_reg, mem);
1856
1857 emit_label (label);
1858
1859 ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
1860 emit_move_insn (old_reg, cmp_reg);
1861 emit_move_insn (ar_ccv, cmp_reg);
1862
1863 if (old_dst)
1864 emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
1865
1866 new_reg = cmp_reg;
1867 if (code == NOT)
1868 {
1869 new_reg = expand_simple_unop (DImode, NOT, new_reg, NULL_RTX, true);
1870 code = AND;
1871 }
1872 new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
1873 true, OPTAB_DIRECT);
1874
1875 if (mode != DImode)
1876 new_reg = gen_lowpart (mode, new_reg);
1877 if (new_dst)
1878 emit_move_insn (new_dst, new_reg);
1879
1880 switch (mode)
1881 {
1882 case QImode: icode = CODE_FOR_cmpxchg_rel_qi; break;
1883 case HImode: icode = CODE_FOR_cmpxchg_rel_hi; break;
1884 case SImode: icode = CODE_FOR_cmpxchg_rel_si; break;
1885 case DImode: icode = CODE_FOR_cmpxchg_rel_di; break;
1886 default:
1887 gcc_unreachable ();
1888 }
1889
1890 emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
1891
1892 emit_cmp_and_jump_insns (cmp_reg, old_reg, EQ, NULL, DImode, true, label);
1893 }
1894 \f
1895 /* Begin the assembly file. */
1896
1897 static void
1898 ia64_file_start (void)
1899 {
1900 /* Variable tracking should be run after all optimizations which change order
1901 of insns. It also needs a valid CFG. This can't be done in
1902 ia64_override_options, because flag_var_tracking is finalized after
1903 that. */
1904 ia64_flag_var_tracking = flag_var_tracking;
1905 flag_var_tracking = 0;
1906
1907 default_file_start ();
1908 emit_safe_across_calls ();
1909 }
1910
1911 void
1912 emit_safe_across_calls (void)
1913 {
1914 unsigned int rs, re;
1915 int out_state;
1916
1917 rs = 1;
1918 out_state = 0;
1919 while (1)
1920 {
1921 while (rs < 64 && call_used_regs[PR_REG (rs)])
1922 rs++;
1923 if (rs >= 64)
1924 break;
1925 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1926 continue;
1927 if (out_state == 0)
1928 {
1929 fputs ("\t.pred.safe_across_calls ", asm_out_file);
1930 out_state = 1;
1931 }
1932 else
1933 fputc (',', asm_out_file);
1934 if (re == rs + 1)
1935 fprintf (asm_out_file, "p%u", rs);
1936 else
1937 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
1938 rs = re + 1;
1939 }
1940 if (out_state)
1941 fputc ('\n', asm_out_file);
1942 }
1943
1944 /* Helper function for ia64_compute_frame_size: find an appropriate general
1945 register to spill some special register to. SPECIAL_SPILL_MASK contains
1946 bits in GR0 to GR31 that have already been allocated by this routine.
1947 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1948
1949 static int
1950 find_gr_spill (int try_locals)
1951 {
1952 int regno;
1953
1954 /* If this is a leaf function, first try an otherwise unused
1955 call-clobbered register. */
1956 if (current_function_is_leaf)
1957 {
1958 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1959 if (! regs_ever_live[regno]
1960 && call_used_regs[regno]
1961 && ! fixed_regs[regno]
1962 && ! global_regs[regno]
1963 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1964 {
1965 current_frame_info.gr_used_mask |= 1 << regno;
1966 return regno;
1967 }
1968 }
1969
1970 if (try_locals)
1971 {
1972 regno = current_frame_info.n_local_regs;
1973 /* If there is a frame pointer, then we can't use loc79, because
1974 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1975 reg_name switching code in ia64_expand_prologue. */
1976 if (regno < (80 - frame_pointer_needed))
1977 {
1978 current_frame_info.n_local_regs = regno + 1;
1979 return LOC_REG (0) + regno;
1980 }
1981 }
1982
1983 /* Failed to find a general register to spill to. Must use stack. */
1984 return 0;
1985 }
1986
1987 /* In order to make for nice schedules, we try to allocate every temporary
1988 to a different register. We must of course stay away from call-saved,
1989 fixed, and global registers. We must also stay away from registers
1990 allocated in current_frame_info.gr_used_mask, since those include regs
1991 used all through the prologue.
1992
1993 Any register allocated here must be used immediately. The idea is to
1994 aid scheduling, not to solve data flow problems. */
1995
1996 static int last_scratch_gr_reg;
1997
1998 static int
1999 next_scratch_gr_reg (void)
2000 {
2001 int i, regno;
2002
2003 for (i = 0; i < 32; ++i)
2004 {
2005 regno = (last_scratch_gr_reg + i + 1) & 31;
2006 if (call_used_regs[regno]
2007 && ! fixed_regs[regno]
2008 && ! global_regs[regno]
2009 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2010 {
2011 last_scratch_gr_reg = regno;
2012 return regno;
2013 }
2014 }
2015
2016 /* There must be _something_ available. */
2017 gcc_unreachable ();
2018 }
2019
2020 /* Helper function for ia64_compute_frame_size, called through
2021 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2022
2023 static void
2024 mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
2025 {
2026 unsigned int regno = REGNO (reg);
2027 if (regno < 32)
2028 {
2029 unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)];
2030 for (i = 0; i < n; ++i)
2031 current_frame_info.gr_used_mask |= 1 << (regno + i);
2032 }
2033 }
2034
2035 /* Returns the number of bytes offset between the frame pointer and the stack
2036 pointer for the current function. SIZE is the number of bytes of space
2037 needed for local variables. */
2038
2039 static void
2040 ia64_compute_frame_size (HOST_WIDE_INT size)
2041 {
2042 HOST_WIDE_INT total_size;
2043 HOST_WIDE_INT spill_size = 0;
2044 HOST_WIDE_INT extra_spill_size = 0;
2045 HOST_WIDE_INT pretend_args_size;
2046 HARD_REG_SET mask;
2047 int n_spilled = 0;
2048 int spilled_gr_p = 0;
2049 int spilled_fr_p = 0;
2050 unsigned int regno;
2051 int i;
2052
2053 if (current_frame_info.initialized)
2054 return;
2055
2056 memset (&current_frame_info, 0, sizeof current_frame_info);
2057 CLEAR_HARD_REG_SET (mask);
2058
2059 /* Don't allocate scratches to the return register. */
2060 diddle_return_value (mark_reg_gr_used_mask, NULL);
2061
2062 /* Don't allocate scratches to the EH scratch registers. */
2063 if (cfun->machine->ia64_eh_epilogue_sp)
2064 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2065 if (cfun->machine->ia64_eh_epilogue_bsp)
2066 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
2067
2068 /* Find the size of the register stack frame. We have only 80 local
2069 registers, because we reserve 8 for the inputs and 8 for the
2070 outputs. */
2071
2072 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2073 since we'll be adjusting that down later. */
2074 regno = LOC_REG (78) + ! frame_pointer_needed;
2075 for (; regno >= LOC_REG (0); regno--)
2076 if (regs_ever_live[regno])
2077 break;
2078 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2079
2080 /* For functions marked with the syscall_linkage attribute, we must mark
2081 all eight input registers as in use, so that locals aren't visible to
2082 the caller. */
2083
2084 if (cfun->machine->n_varargs > 0
2085 || lookup_attribute ("syscall_linkage",
2086 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2087 current_frame_info.n_input_regs = 8;
2088 else
2089 {
2090 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2091 if (regs_ever_live[regno])
2092 break;
2093 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2094 }
2095
2096 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2097 if (regs_ever_live[regno])
2098 break;
2099 i = regno - OUT_REG (0) + 1;
2100
2101 #ifndef PROFILE_HOOK
2102 /* When -p profiling, we need one output register for the mcount argument.
2103 Likewise for -a profiling for the bb_init_func argument. For -ax
2104 profiling, we need two output registers for the two bb_init_trace_func
2105 arguments. */
2106 if (current_function_profile)
2107 i = MAX (i, 1);
2108 #endif
2109 current_frame_info.n_output_regs = i;
2110
2111 /* ??? No rotating register support yet. */
2112 current_frame_info.n_rotate_regs = 0;
2113
2114 /* Discover which registers need spilling, and how much room that
2115 will take. Begin with floating point and general registers,
2116 which will always wind up on the stack. */
2117
2118 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
2119 if (regs_ever_live[regno] && ! call_used_regs[regno])
2120 {
2121 SET_HARD_REG_BIT (mask, regno);
2122 spill_size += 16;
2123 n_spilled += 1;
2124 spilled_fr_p = 1;
2125 }
2126
2127 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2128 if (regs_ever_live[regno] && ! call_used_regs[regno])
2129 {
2130 SET_HARD_REG_BIT (mask, regno);
2131 spill_size += 8;
2132 n_spilled += 1;
2133 spilled_gr_p = 1;
2134 }
2135
2136 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
2137 if (regs_ever_live[regno] && ! call_used_regs[regno])
2138 {
2139 SET_HARD_REG_BIT (mask, regno);
2140 spill_size += 8;
2141 n_spilled += 1;
2142 }
2143
2144 /* Now come all special registers that might get saved in other
2145 general registers. */
2146
2147 if (frame_pointer_needed)
2148 {
2149 current_frame_info.reg_fp = find_gr_spill (1);
2150 /* If we did not get a register, then we take LOC79. This is guaranteed
2151 to be free, even if regs_ever_live is already set, because this is
2152 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2153 as we don't count loc79 above. */
2154 if (current_frame_info.reg_fp == 0)
2155 {
2156 current_frame_info.reg_fp = LOC_REG (79);
2157 current_frame_info.n_local_regs++;
2158 }
2159 }
2160
2161 if (! current_function_is_leaf)
2162 {
2163 /* Emit a save of BR0 if we call other functions. Do this even
2164 if this function doesn't return, as EH depends on this to be
2165 able to unwind the stack. */
2166 SET_HARD_REG_BIT (mask, BR_REG (0));
2167
2168 current_frame_info.reg_save_b0 = find_gr_spill (1);
2169 if (current_frame_info.reg_save_b0 == 0)
2170 {
2171 spill_size += 8;
2172 n_spilled += 1;
2173 }
2174
2175 /* Similarly for ar.pfs. */
2176 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2177 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
2178 if (current_frame_info.reg_save_ar_pfs == 0)
2179 {
2180 extra_spill_size += 8;
2181 n_spilled += 1;
2182 }
2183
2184 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2185 registers are clobbered, so we fall back to the stack. */
2186 current_frame_info.reg_save_gp
2187 = (current_function_calls_setjmp ? 0 : find_gr_spill (1));
2188 if (current_frame_info.reg_save_gp == 0)
2189 {
2190 SET_HARD_REG_BIT (mask, GR_REG (1));
2191 spill_size += 8;
2192 n_spilled += 1;
2193 }
2194 }
2195 else
2196 {
2197 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
2198 {
2199 SET_HARD_REG_BIT (mask, BR_REG (0));
2200 spill_size += 8;
2201 n_spilled += 1;
2202 }
2203
2204 if (regs_ever_live[AR_PFS_REGNUM])
2205 {
2206 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2207 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
2208 if (current_frame_info.reg_save_ar_pfs == 0)
2209 {
2210 extra_spill_size += 8;
2211 n_spilled += 1;
2212 }
2213 }
2214 }
2215
2216 /* Unwind descriptor hackery: things are most efficient if we allocate
2217 consecutive GR save registers for RP, PFS, FP in that order. However,
2218 it is absolutely critical that FP get the only hard register that's
2219 guaranteed to be free, so we allocated it first. If all three did
2220 happen to be allocated hard regs, and are consecutive, rearrange them
2221 into the preferred order now. */
2222 if (current_frame_info.reg_fp != 0
2223 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
2224 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
2225 {
2226 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
2227 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
2228 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
2229 }
2230
2231 /* See if we need to store the predicate register block. */
2232 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2233 if (regs_ever_live[regno] && ! call_used_regs[regno])
2234 break;
2235 if (regno <= PR_REG (63))
2236 {
2237 SET_HARD_REG_BIT (mask, PR_REG (0));
2238 current_frame_info.reg_save_pr = find_gr_spill (1);
2239 if (current_frame_info.reg_save_pr == 0)
2240 {
2241 extra_spill_size += 8;
2242 n_spilled += 1;
2243 }
2244
2245 /* ??? Mark them all as used so that register renaming and such
2246 are free to use them. */
2247 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2248 regs_ever_live[regno] = 1;
2249 }
2250
2251 /* If we're forced to use st8.spill, we're forced to save and restore
2252 ar.unat as well. The check for existing liveness allows inline asm
2253 to touch ar.unat. */
2254 if (spilled_gr_p || cfun->machine->n_varargs
2255 || regs_ever_live[AR_UNAT_REGNUM])
2256 {
2257 regs_ever_live[AR_UNAT_REGNUM] = 1;
2258 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2259 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
2260 if (current_frame_info.reg_save_ar_unat == 0)
2261 {
2262 extra_spill_size += 8;
2263 n_spilled += 1;
2264 }
2265 }
2266
2267 if (regs_ever_live[AR_LC_REGNUM])
2268 {
2269 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2270 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
2271 if (current_frame_info.reg_save_ar_lc == 0)
2272 {
2273 extra_spill_size += 8;
2274 n_spilled += 1;
2275 }
2276 }
2277
2278 /* If we have an odd number of words of pretend arguments written to
2279 the stack, then the FR save area will be unaligned. We round the
2280 size of this area up to keep things 16 byte aligned. */
2281 if (spilled_fr_p)
2282 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
2283 else
2284 pretend_args_size = current_function_pretend_args_size;
2285
2286 total_size = (spill_size + extra_spill_size + size + pretend_args_size
2287 + current_function_outgoing_args_size);
2288 total_size = IA64_STACK_ALIGN (total_size);
2289
2290 /* We always use the 16-byte scratch area provided by the caller, but
2291 if we are a leaf function, there's no one to which we need to provide
2292 a scratch area. */
2293 if (current_function_is_leaf)
2294 total_size = MAX (0, total_size - 16);
2295
2296 current_frame_info.total_size = total_size;
2297 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2298 current_frame_info.spill_size = spill_size;
2299 current_frame_info.extra_spill_size = extra_spill_size;
2300 COPY_HARD_REG_SET (current_frame_info.mask, mask);
2301 current_frame_info.n_spilled = n_spilled;
2302 current_frame_info.initialized = reload_completed;
2303 }
2304
2305 /* Compute the initial difference between the specified pair of registers. */
2306
2307 HOST_WIDE_INT
2308 ia64_initial_elimination_offset (int from, int to)
2309 {
2310 HOST_WIDE_INT offset;
2311
2312 ia64_compute_frame_size (get_frame_size ());
2313 switch (from)
2314 {
2315 case FRAME_POINTER_REGNUM:
2316 switch (to)
2317 {
2318 case HARD_FRAME_POINTER_REGNUM:
2319 if (current_function_is_leaf)
2320 offset = -current_frame_info.total_size;
2321 else
2322 offset = -(current_frame_info.total_size
2323 - current_function_outgoing_args_size - 16);
2324 break;
2325
2326 case STACK_POINTER_REGNUM:
2327 if (current_function_is_leaf)
2328 offset = 0;
2329 else
2330 offset = 16 + current_function_outgoing_args_size;
2331 break;
2332
2333 default:
2334 gcc_unreachable ();
2335 }
2336 break;
2337
2338 case ARG_POINTER_REGNUM:
2339 /* Arguments start above the 16 byte save area, unless stdarg
2340 in which case we store through the 16 byte save area. */
2341 switch (to)
2342 {
2343 case HARD_FRAME_POINTER_REGNUM:
2344 offset = 16 - current_function_pretend_args_size;
2345 break;
2346
2347 case STACK_POINTER_REGNUM:
2348 offset = (current_frame_info.total_size
2349 + 16 - current_function_pretend_args_size);
2350 break;
2351
2352 default:
2353 gcc_unreachable ();
2354 }
2355 break;
2356
2357 default:
2358 gcc_unreachable ();
2359 }
2360
2361 return offset;
2362 }
2363
2364 /* If there are more than a trivial number of register spills, we use
2365 two interleaved iterators so that we can get two memory references
2366 per insn group.
2367
2368 In order to simplify things in the prologue and epilogue expanders,
2369 we use helper functions to fix up the memory references after the
2370 fact with the appropriate offsets to a POST_MODIFY memory mode.
2371 The following data structure tracks the state of the two iterators
2372 while insns are being emitted. */
2373
2374 struct spill_fill_data
2375 {
2376 rtx init_after; /* point at which to emit initializations */
2377 rtx init_reg[2]; /* initial base register */
2378 rtx iter_reg[2]; /* the iterator registers */
2379 rtx *prev_addr[2]; /* address of last memory use */
2380 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
2381 HOST_WIDE_INT prev_off[2]; /* last offset */
2382 int n_iter; /* number of iterators in use */
2383 int next_iter; /* next iterator to use */
2384 unsigned int save_gr_used_mask;
2385 };
2386
2387 static struct spill_fill_data spill_fill_data;
2388
2389 static void
2390 setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
2391 {
2392 int i;
2393
2394 spill_fill_data.init_after = get_last_insn ();
2395 spill_fill_data.init_reg[0] = init_reg;
2396 spill_fill_data.init_reg[1] = init_reg;
2397 spill_fill_data.prev_addr[0] = NULL;
2398 spill_fill_data.prev_addr[1] = NULL;
2399 spill_fill_data.prev_insn[0] = NULL;
2400 spill_fill_data.prev_insn[1] = NULL;
2401 spill_fill_data.prev_off[0] = cfa_off;
2402 spill_fill_data.prev_off[1] = cfa_off;
2403 spill_fill_data.next_iter = 0;
2404 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2405
2406 spill_fill_data.n_iter = 1 + (n_spills > 2);
2407 for (i = 0; i < spill_fill_data.n_iter; ++i)
2408 {
2409 int regno = next_scratch_gr_reg ();
2410 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2411 current_frame_info.gr_used_mask |= 1 << regno;
2412 }
2413 }
2414
2415 static void
2416 finish_spill_pointers (void)
2417 {
2418 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2419 }
2420
2421 static rtx
2422 spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
2423 {
2424 int iter = spill_fill_data.next_iter;
2425 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2426 rtx disp_rtx = GEN_INT (disp);
2427 rtx mem;
2428
2429 if (spill_fill_data.prev_addr[iter])
2430 {
2431 if (CONST_OK_FOR_N (disp))
2432 {
2433 *spill_fill_data.prev_addr[iter]
2434 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2435 gen_rtx_PLUS (DImode,
2436 spill_fill_data.iter_reg[iter],
2437 disp_rtx));
2438 REG_NOTES (spill_fill_data.prev_insn[iter])
2439 = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
2440 REG_NOTES (spill_fill_data.prev_insn[iter]));
2441 }
2442 else
2443 {
2444 /* ??? Could use register post_modify for loads. */
2445 if (! CONST_OK_FOR_I (disp))
2446 {
2447 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2448 emit_move_insn (tmp, disp_rtx);
2449 disp_rtx = tmp;
2450 }
2451 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2452 spill_fill_data.iter_reg[iter], disp_rtx));
2453 }
2454 }
2455 /* Micro-optimization: if we've created a frame pointer, it's at
2456 CFA 0, which may allow the real iterator to be initialized lower,
2457 slightly increasing parallelism. Also, if there are few saves
2458 it may eliminate the iterator entirely. */
2459 else if (disp == 0
2460 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2461 && frame_pointer_needed)
2462 {
2463 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
2464 set_mem_alias_set (mem, get_varargs_alias_set ());
2465 return mem;
2466 }
2467 else
2468 {
2469 rtx seq, insn;
2470
2471 if (disp == 0)
2472 seq = gen_movdi (spill_fill_data.iter_reg[iter],
2473 spill_fill_data.init_reg[iter]);
2474 else
2475 {
2476 start_sequence ();
2477
2478 if (! CONST_OK_FOR_I (disp))
2479 {
2480 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2481 emit_move_insn (tmp, disp_rtx);
2482 disp_rtx = tmp;
2483 }
2484
2485 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2486 spill_fill_data.init_reg[iter],
2487 disp_rtx));
2488
2489 seq = get_insns ();
2490 end_sequence ();
2491 }
2492
2493 /* Careful for being the first insn in a sequence. */
2494 if (spill_fill_data.init_after)
2495 insn = emit_insn_after (seq, spill_fill_data.init_after);
2496 else
2497 {
2498 rtx first = get_insns ();
2499 if (first)
2500 insn = emit_insn_before (seq, first);
2501 else
2502 insn = emit_insn (seq);
2503 }
2504 spill_fill_data.init_after = insn;
2505
2506 /* If DISP is 0, we may or may not have a further adjustment
2507 afterward. If we do, then the load/store insn may be modified
2508 to be a post-modify. If we don't, then this copy may be
2509 eliminated by copyprop_hardreg_forward, which makes this
2510 insn garbage, which runs afoul of the sanity check in
2511 propagate_one_insn. So mark this insn as legal to delete. */
2512 if (disp == 0)
2513 REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
2514 REG_NOTES (insn));
2515 }
2516
2517 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
2518
2519 /* ??? Not all of the spills are for varargs, but some of them are.
2520 The rest of the spills belong in an alias set of their own. But
2521 it doesn't actually hurt to include them here. */
2522 set_mem_alias_set (mem, get_varargs_alias_set ());
2523
2524 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
2525 spill_fill_data.prev_off[iter] = cfa_off;
2526
2527 if (++iter >= spill_fill_data.n_iter)
2528 iter = 0;
2529 spill_fill_data.next_iter = iter;
2530
2531 return mem;
2532 }
2533
2534 static void
2535 do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
2536 rtx frame_reg)
2537 {
2538 int iter = spill_fill_data.next_iter;
2539 rtx mem, insn;
2540
2541 mem = spill_restore_mem (reg, cfa_off);
2542 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
2543 spill_fill_data.prev_insn[iter] = insn;
2544
2545 if (frame_reg)
2546 {
2547 rtx base;
2548 HOST_WIDE_INT off;
2549
2550 RTX_FRAME_RELATED_P (insn) = 1;
2551
2552 /* Don't even pretend that the unwind code can intuit its way
2553 through a pair of interleaved post_modify iterators. Just
2554 provide the correct answer. */
2555
2556 if (frame_pointer_needed)
2557 {
2558 base = hard_frame_pointer_rtx;
2559 off = - cfa_off;
2560 }
2561 else
2562 {
2563 base = stack_pointer_rtx;
2564 off = current_frame_info.total_size - cfa_off;
2565 }
2566
2567 REG_NOTES (insn)
2568 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2569 gen_rtx_SET (VOIDmode,
2570 gen_rtx_MEM (GET_MODE (reg),
2571 plus_constant (base, off)),
2572 frame_reg),
2573 REG_NOTES (insn));
2574 }
2575 }
2576
2577 static void
2578 do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
2579 {
2580 int iter = spill_fill_data.next_iter;
2581 rtx insn;
2582
2583 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
2584 GEN_INT (cfa_off)));
2585 spill_fill_data.prev_insn[iter] = insn;
2586 }
2587
2588 /* Wrapper functions that discards the CONST_INT spill offset. These
2589 exist so that we can give gr_spill/gr_fill the offset they need and
2590 use a consistent function interface. */
2591
2592 static rtx
2593 gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2594 {
2595 return gen_movdi (dest, src);
2596 }
2597
2598 static rtx
2599 gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2600 {
2601 return gen_fr_spill (dest, src);
2602 }
2603
2604 static rtx
2605 gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2606 {
2607 return gen_fr_restore (dest, src);
2608 }
2609
2610 /* Called after register allocation to add any instructions needed for the
2611 prologue. Using a prologue insn is favored compared to putting all of the
2612 instructions in output_function_prologue(), since it allows the scheduler
2613 to intermix instructions with the saves of the caller saved registers. In
2614 some cases, it might be necessary to emit a barrier instruction as the last
2615 insn to prevent such scheduling.
2616
2617 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2618 so that the debug info generation code can handle them properly.
2619
2620 The register save area is layed out like so:
2621 cfa+16
2622 [ varargs spill area ]
2623 [ fr register spill area ]
2624 [ br register spill area ]
2625 [ ar register spill area ]
2626 [ pr register spill area ]
2627 [ gr register spill area ] */
2628
2629 /* ??? Get inefficient code when the frame size is larger than can fit in an
2630 adds instruction. */
2631
2632 void
2633 ia64_expand_prologue (void)
2634 {
2635 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
2636 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
2637 rtx reg, alt_reg;
2638
2639 ia64_compute_frame_size (get_frame_size ());
2640 last_scratch_gr_reg = 15;
2641
2642 /* If there is no epilogue, then we don't need some prologue insns.
2643 We need to avoid emitting the dead prologue insns, because flow
2644 will complain about them. */
2645 if (optimize)
2646 {
2647 edge e;
2648 edge_iterator ei;
2649
2650 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
2651 if ((e->flags & EDGE_FAKE) == 0
2652 && (e->flags & EDGE_FALLTHRU) != 0)
2653 break;
2654 epilogue_p = (e != NULL);
2655 }
2656 else
2657 epilogue_p = 1;
2658
2659 /* Set the local, input, and output register names. We need to do this
2660 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2661 half. If we use in/loc/out register names, then we get assembler errors
2662 in crtn.S because there is no alloc insn or regstk directive in there. */
2663 if (! TARGET_REG_NAMES)
2664 {
2665 int inputs = current_frame_info.n_input_regs;
2666 int locals = current_frame_info.n_local_regs;
2667 int outputs = current_frame_info.n_output_regs;
2668
2669 for (i = 0; i < inputs; i++)
2670 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2671 for (i = 0; i < locals; i++)
2672 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2673 for (i = 0; i < outputs; i++)
2674 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2675 }
2676
2677 /* Set the frame pointer register name. The regnum is logically loc79,
2678 but of course we'll not have allocated that many locals. Rather than
2679 worrying about renumbering the existing rtxs, we adjust the name. */
2680 /* ??? This code means that we can never use one local register when
2681 there is a frame pointer. loc79 gets wasted in this case, as it is
2682 renamed to a register that will never be used. See also the try_locals
2683 code in find_gr_spill. */
2684 if (current_frame_info.reg_fp)
2685 {
2686 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2687 reg_names[HARD_FRAME_POINTER_REGNUM]
2688 = reg_names[current_frame_info.reg_fp];
2689 reg_names[current_frame_info.reg_fp] = tmp;
2690 }
2691
2692 /* We don't need an alloc instruction if we've used no outputs or locals. */
2693 if (current_frame_info.n_local_regs == 0
2694 && current_frame_info.n_output_regs == 0
2695 && current_frame_info.n_input_regs <= current_function_args_info.int_regs
2696 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2697 {
2698 /* If there is no alloc, but there are input registers used, then we
2699 need a .regstk directive. */
2700 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2701 ar_pfs_save_reg = NULL_RTX;
2702 }
2703 else
2704 {
2705 current_frame_info.need_regstk = 0;
2706
2707 if (current_frame_info.reg_save_ar_pfs)
2708 regno = current_frame_info.reg_save_ar_pfs;
2709 else
2710 regno = next_scratch_gr_reg ();
2711 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2712
2713 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2714 GEN_INT (current_frame_info.n_input_regs),
2715 GEN_INT (current_frame_info.n_local_regs),
2716 GEN_INT (current_frame_info.n_output_regs),
2717 GEN_INT (current_frame_info.n_rotate_regs)));
2718 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2719 }
2720
2721 /* Set up frame pointer, stack pointer, and spill iterators. */
2722
2723 n_varargs = cfun->machine->n_varargs;
2724 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
2725 stack_pointer_rtx, 0);
2726
2727 if (frame_pointer_needed)
2728 {
2729 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2730 RTX_FRAME_RELATED_P (insn) = 1;
2731 }
2732
2733 if (current_frame_info.total_size != 0)
2734 {
2735 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2736 rtx offset;
2737
2738 if (CONST_OK_FOR_I (- current_frame_info.total_size))
2739 offset = frame_size_rtx;
2740 else
2741 {
2742 regno = next_scratch_gr_reg ();
2743 offset = gen_rtx_REG (DImode, regno);
2744 emit_move_insn (offset, frame_size_rtx);
2745 }
2746
2747 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
2748 stack_pointer_rtx, offset));
2749
2750 if (! frame_pointer_needed)
2751 {
2752 RTX_FRAME_RELATED_P (insn) = 1;
2753 if (GET_CODE (offset) != CONST_INT)
2754 {
2755 REG_NOTES (insn)
2756 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2757 gen_rtx_SET (VOIDmode,
2758 stack_pointer_rtx,
2759 gen_rtx_PLUS (DImode,
2760 stack_pointer_rtx,
2761 frame_size_rtx)),
2762 REG_NOTES (insn));
2763 }
2764 }
2765
2766 /* ??? At this point we must generate a magic insn that appears to
2767 modify the stack pointer, the frame pointer, and all spill
2768 iterators. This would allow the most scheduling freedom. For
2769 now, just hard stop. */
2770 emit_insn (gen_blockage ());
2771 }
2772
2773 /* Must copy out ar.unat before doing any integer spills. */
2774 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2775 {
2776 if (current_frame_info.reg_save_ar_unat)
2777 ar_unat_save_reg
2778 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2779 else
2780 {
2781 alt_regno = next_scratch_gr_reg ();
2782 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2783 current_frame_info.gr_used_mask |= 1 << alt_regno;
2784 }
2785
2786 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2787 insn = emit_move_insn (ar_unat_save_reg, reg);
2788 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2789
2790 /* Even if we're not going to generate an epilogue, we still
2791 need to save the register so that EH works. */
2792 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2793 emit_insn (gen_prologue_use (ar_unat_save_reg));
2794 }
2795 else
2796 ar_unat_save_reg = NULL_RTX;
2797
2798 /* Spill all varargs registers. Do this before spilling any GR registers,
2799 since we want the UNAT bits for the GR registers to override the UNAT
2800 bits from varargs, which we don't care about. */
2801
2802 cfa_off = -16;
2803 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
2804 {
2805 reg = gen_rtx_REG (DImode, regno);
2806 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
2807 }
2808
2809 /* Locate the bottom of the register save area. */
2810 cfa_off = (current_frame_info.spill_cfa_off
2811 + current_frame_info.spill_size
2812 + current_frame_info.extra_spill_size);
2813
2814 /* Save the predicate register block either in a register or in memory. */
2815 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2816 {
2817 reg = gen_rtx_REG (DImode, PR_REG (0));
2818 if (current_frame_info.reg_save_pr != 0)
2819 {
2820 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2821 insn = emit_move_insn (alt_reg, reg);
2822
2823 /* ??? Denote pr spill/fill by a DImode move that modifies all
2824 64 hard registers. */
2825 RTX_FRAME_RELATED_P (insn) = 1;
2826 REG_NOTES (insn)
2827 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2828 gen_rtx_SET (VOIDmode, alt_reg, reg),
2829 REG_NOTES (insn));
2830
2831 /* Even if we're not going to generate an epilogue, we still
2832 need to save the register so that EH works. */
2833 if (! epilogue_p)
2834 emit_insn (gen_prologue_use (alt_reg));
2835 }
2836 else
2837 {
2838 alt_regno = next_scratch_gr_reg ();
2839 alt_reg = gen_rtx_REG (DImode, alt_regno);
2840 insn = emit_move_insn (alt_reg, reg);
2841 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2842 cfa_off -= 8;
2843 }
2844 }
2845
2846 /* Handle AR regs in numerical order. All of them get special handling. */
2847 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2848 && current_frame_info.reg_save_ar_unat == 0)
2849 {
2850 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2851 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
2852 cfa_off -= 8;
2853 }
2854
2855 /* The alloc insn already copied ar.pfs into a general register. The
2856 only thing we have to do now is copy that register to a stack slot
2857 if we'd not allocated a local register for the job. */
2858 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
2859 && current_frame_info.reg_save_ar_pfs == 0)
2860 {
2861 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2862 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
2863 cfa_off -= 8;
2864 }
2865
2866 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2867 {
2868 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2869 if (current_frame_info.reg_save_ar_lc != 0)
2870 {
2871 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2872 insn = emit_move_insn (alt_reg, reg);
2873 RTX_FRAME_RELATED_P (insn) = 1;
2874
2875 /* Even if we're not going to generate an epilogue, we still
2876 need to save the register so that EH works. */
2877 if (! epilogue_p)
2878 emit_insn (gen_prologue_use (alt_reg));
2879 }
2880 else
2881 {
2882 alt_regno = next_scratch_gr_reg ();
2883 alt_reg = gen_rtx_REG (DImode, alt_regno);
2884 emit_move_insn (alt_reg, reg);
2885 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2886 cfa_off -= 8;
2887 }
2888 }
2889
2890 if (current_frame_info.reg_save_gp)
2891 {
2892 insn = emit_move_insn (gen_rtx_REG (DImode,
2893 current_frame_info.reg_save_gp),
2894 pic_offset_table_rtx);
2895 /* We don't know for sure yet if this is actually needed, since
2896 we've not split the PIC call patterns. If all of the calls
2897 are indirect, and not followed by any uses of the gp, then
2898 this save is dead. Allow it to go away. */
2899 REG_NOTES (insn)
2900 = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn));
2901 }
2902
2903 /* We should now be at the base of the gr/br/fr spill area. */
2904 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
2905 + current_frame_info.spill_size));
2906
2907 /* Spill all general registers. */
2908 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2909 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2910 {
2911 reg = gen_rtx_REG (DImode, regno);
2912 do_spill (gen_gr_spill, reg, cfa_off, reg);
2913 cfa_off -= 8;
2914 }
2915
2916 /* Handle BR0 specially -- it may be getting stored permanently in
2917 some GR register. */
2918 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2919 {
2920 reg = gen_rtx_REG (DImode, BR_REG (0));
2921 if (current_frame_info.reg_save_b0 != 0)
2922 {
2923 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2924 insn = emit_move_insn (alt_reg, reg);
2925 RTX_FRAME_RELATED_P (insn) = 1;
2926
2927 /* Even if we're not going to generate an epilogue, we still
2928 need to save the register so that EH works. */
2929 if (! epilogue_p)
2930 emit_insn (gen_prologue_use (alt_reg));
2931 }
2932 else
2933 {
2934 alt_regno = next_scratch_gr_reg ();
2935 alt_reg = gen_rtx_REG (DImode, alt_regno);
2936 emit_move_insn (alt_reg, reg);
2937 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2938 cfa_off -= 8;
2939 }
2940 }
2941
2942 /* Spill the rest of the BR registers. */
2943 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2944 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2945 {
2946 alt_regno = next_scratch_gr_reg ();
2947 alt_reg = gen_rtx_REG (DImode, alt_regno);
2948 reg = gen_rtx_REG (DImode, regno);
2949 emit_move_insn (alt_reg, reg);
2950 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2951 cfa_off -= 8;
2952 }
2953
2954 /* Align the frame and spill all FR registers. */
2955 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2956 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2957 {
2958 gcc_assert (!(cfa_off & 15));
2959 reg = gen_rtx_REG (XFmode, regno);
2960 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
2961 cfa_off -= 16;
2962 }
2963
2964 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
2965
2966 finish_spill_pointers ();
2967 }
2968
2969 /* Called after register allocation to add any instructions needed for the
2970 epilogue. Using an epilogue insn is favored compared to putting all of the
2971 instructions in output_function_prologue(), since it allows the scheduler
2972 to intermix instructions with the saves of the caller saved registers. In
2973 some cases, it might be necessary to emit a barrier instruction as the last
2974 insn to prevent such scheduling. */
2975
2976 void
2977 ia64_expand_epilogue (int sibcall_p)
2978 {
2979 rtx insn, reg, alt_reg, ar_unat_save_reg;
2980 int regno, alt_regno, cfa_off;
2981
2982 ia64_compute_frame_size (get_frame_size ());
2983
2984 /* If there is a frame pointer, then we use it instead of the stack
2985 pointer, so that the stack pointer does not need to be valid when
2986 the epilogue starts. See EXIT_IGNORE_STACK. */
2987 if (frame_pointer_needed)
2988 setup_spill_pointers (current_frame_info.n_spilled,
2989 hard_frame_pointer_rtx, 0);
2990 else
2991 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2992 current_frame_info.total_size);
2993
2994 if (current_frame_info.total_size != 0)
2995 {
2996 /* ??? At this point we must generate a magic insn that appears to
2997 modify the spill iterators and the frame pointer. This would
2998 allow the most scheduling freedom. For now, just hard stop. */
2999 emit_insn (gen_blockage ());
3000 }
3001
3002 /* Locate the bottom of the register save area. */
3003 cfa_off = (current_frame_info.spill_cfa_off
3004 + current_frame_info.spill_size
3005 + current_frame_info.extra_spill_size);
3006
3007 /* Restore the predicate registers. */
3008 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3009 {
3010 if (current_frame_info.reg_save_pr != 0)
3011 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
3012 else
3013 {
3014 alt_regno = next_scratch_gr_reg ();
3015 alt_reg = gen_rtx_REG (DImode, alt_regno);
3016 do_restore (gen_movdi_x, alt_reg, cfa_off);
3017 cfa_off -= 8;
3018 }
3019 reg = gen_rtx_REG (DImode, PR_REG (0));
3020 emit_move_insn (reg, alt_reg);
3021 }
3022
3023 /* Restore the application registers. */
3024
3025 /* Load the saved unat from the stack, but do not restore it until
3026 after the GRs have been restored. */
3027 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3028 {
3029 if (current_frame_info.reg_save_ar_unat != 0)
3030 ar_unat_save_reg
3031 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
3032 else
3033 {
3034 alt_regno = next_scratch_gr_reg ();
3035 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3036 current_frame_info.gr_used_mask |= 1 << alt_regno;
3037 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
3038 cfa_off -= 8;
3039 }
3040 }
3041 else
3042 ar_unat_save_reg = NULL_RTX;
3043
3044 if (current_frame_info.reg_save_ar_pfs != 0)
3045 {
3046 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
3047 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3048 emit_move_insn (reg, alt_reg);
3049 }
3050 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3051 {
3052 alt_regno = next_scratch_gr_reg ();
3053 alt_reg = gen_rtx_REG (DImode, alt_regno);
3054 do_restore (gen_movdi_x, alt_reg, cfa_off);
3055 cfa_off -= 8;
3056 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3057 emit_move_insn (reg, alt_reg);
3058 }
3059
3060 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3061 {
3062 if (current_frame_info.reg_save_ar_lc != 0)
3063 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
3064 else
3065 {
3066 alt_regno = next_scratch_gr_reg ();
3067 alt_reg = gen_rtx_REG (DImode, alt_regno);
3068 do_restore (gen_movdi_x, alt_reg, cfa_off);
3069 cfa_off -= 8;
3070 }
3071 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3072 emit_move_insn (reg, alt_reg);
3073 }
3074
3075 /* We should now be at the base of the gr/br/fr spill area. */
3076 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3077 + current_frame_info.spill_size));
3078
3079 /* The GP may be stored on the stack in the prologue, but it's
3080 never restored in the epilogue. Skip the stack slot. */
3081 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
3082 cfa_off -= 8;
3083
3084 /* Restore all general registers. */
3085 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
3086 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3087 {
3088 reg = gen_rtx_REG (DImode, regno);
3089 do_restore (gen_gr_restore, reg, cfa_off);
3090 cfa_off -= 8;
3091 }
3092
3093 /* Restore the branch registers. Handle B0 specially, as it may
3094 have gotten stored in some GR register. */
3095 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3096 {
3097 if (current_frame_info.reg_save_b0 != 0)
3098 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
3099 else
3100 {
3101 alt_regno = next_scratch_gr_reg ();
3102 alt_reg = gen_rtx_REG (DImode, alt_regno);
3103 do_restore (gen_movdi_x, alt_reg, cfa_off);
3104 cfa_off -= 8;
3105 }
3106 reg = gen_rtx_REG (DImode, BR_REG (0));
3107 emit_move_insn (reg, alt_reg);
3108 }
3109
3110 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3111 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3112 {
3113 alt_regno = next_scratch_gr_reg ();
3114 alt_reg = gen_rtx_REG (DImode, alt_regno);
3115 do_restore (gen_movdi_x, alt_reg, cfa_off);
3116 cfa_off -= 8;
3117 reg = gen_rtx_REG (DImode, regno);
3118 emit_move_insn (reg, alt_reg);
3119 }
3120
3121 /* Restore floating point registers. */
3122 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3123 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3124 {
3125 gcc_assert (!(cfa_off & 15));
3126 reg = gen_rtx_REG (XFmode, regno);
3127 do_restore (gen_fr_restore_x, reg, cfa_off);
3128 cfa_off -= 16;
3129 }
3130
3131 /* Restore ar.unat for real. */
3132 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3133 {
3134 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3135 emit_move_insn (reg, ar_unat_save_reg);
3136 }
3137
3138 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3139
3140 finish_spill_pointers ();
3141
3142 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
3143 {
3144 /* ??? At this point we must generate a magic insn that appears to
3145 modify the spill iterators, the stack pointer, and the frame
3146 pointer. This would allow the most scheduling freedom. For now,
3147 just hard stop. */
3148 emit_insn (gen_blockage ());
3149 }
3150
3151 if (cfun->machine->ia64_eh_epilogue_sp)
3152 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
3153 else if (frame_pointer_needed)
3154 {
3155 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
3156 RTX_FRAME_RELATED_P (insn) = 1;
3157 }
3158 else if (current_frame_info.total_size)
3159 {
3160 rtx offset, frame_size_rtx;
3161
3162 frame_size_rtx = GEN_INT (current_frame_info.total_size);
3163 if (CONST_OK_FOR_I (current_frame_info.total_size))
3164 offset = frame_size_rtx;
3165 else
3166 {
3167 regno = next_scratch_gr_reg ();
3168 offset = gen_rtx_REG (DImode, regno);
3169 emit_move_insn (offset, frame_size_rtx);
3170 }
3171
3172 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
3173 offset));
3174
3175 RTX_FRAME_RELATED_P (insn) = 1;
3176 if (GET_CODE (offset) != CONST_INT)
3177 {
3178 REG_NOTES (insn)
3179 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3180 gen_rtx_SET (VOIDmode,
3181 stack_pointer_rtx,
3182 gen_rtx_PLUS (DImode,
3183 stack_pointer_rtx,
3184 frame_size_rtx)),
3185 REG_NOTES (insn));
3186 }
3187 }
3188
3189 if (cfun->machine->ia64_eh_epilogue_bsp)
3190 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
3191
3192 if (! sibcall_p)
3193 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
3194 else
3195 {
3196 int fp = GR_REG (2);
3197 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
3198 first available call clobbered register. If there was a frame_pointer
3199 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
3200 so we have to make sure we're using the string "r2" when emitting
3201 the register name for the assembler. */
3202 if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
3203 fp = HARD_FRAME_POINTER_REGNUM;
3204
3205 /* We must emit an alloc to force the input registers to become output
3206 registers. Otherwise, if the callee tries to pass its parameters
3207 through to another call without an intervening alloc, then these
3208 values get lost. */
3209 /* ??? We don't need to preserve all input registers. We only need to
3210 preserve those input registers used as arguments to the sibling call.
3211 It is unclear how to compute that number here. */
3212 if (current_frame_info.n_input_regs != 0)
3213 {
3214 rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
3215 insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
3216 const0_rtx, const0_rtx,
3217 n_inputs, const0_rtx));
3218 RTX_FRAME_RELATED_P (insn) = 1;
3219 }
3220 }
3221 }
3222
3223 /* Return 1 if br.ret can do all the work required to return from a
3224 function. */
3225
3226 int
3227 ia64_direct_return (void)
3228 {
3229 if (reload_completed && ! frame_pointer_needed)
3230 {
3231 ia64_compute_frame_size (get_frame_size ());
3232
3233 return (current_frame_info.total_size == 0
3234 && current_frame_info.n_spilled == 0
3235 && current_frame_info.reg_save_b0 == 0
3236 && current_frame_info.reg_save_pr == 0
3237 && current_frame_info.reg_save_ar_pfs == 0
3238 && current_frame_info.reg_save_ar_unat == 0
3239 && current_frame_info.reg_save_ar_lc == 0);
3240 }
3241 return 0;
3242 }
3243
3244 /* Return the magic cookie that we use to hold the return address
3245 during early compilation. */
3246
3247 rtx
3248 ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
3249 {
3250 if (count != 0)
3251 return NULL;
3252 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
3253 }
3254
3255 /* Split this value after reload, now that we know where the return
3256 address is saved. */
3257
3258 void
3259 ia64_split_return_addr_rtx (rtx dest)
3260 {
3261 rtx src;
3262
3263 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3264 {
3265 if (current_frame_info.reg_save_b0 != 0)
3266 src = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
3267 else
3268 {
3269 HOST_WIDE_INT off;
3270 unsigned int regno;
3271
3272 /* Compute offset from CFA for BR0. */
3273 /* ??? Must be kept in sync with ia64_expand_prologue. */
3274 off = (current_frame_info.spill_cfa_off
3275 + current_frame_info.spill_size);
3276 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3277 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3278 off -= 8;
3279
3280 /* Convert CFA offset to a register based offset. */
3281 if (frame_pointer_needed)
3282 src = hard_frame_pointer_rtx;
3283 else
3284 {
3285 src = stack_pointer_rtx;
3286 off += current_frame_info.total_size;
3287 }
3288
3289 /* Load address into scratch register. */
3290 if (CONST_OK_FOR_I (off))
3291 emit_insn (gen_adddi3 (dest, src, GEN_INT (off)));
3292 else
3293 {
3294 emit_move_insn (dest, GEN_INT (off));
3295 emit_insn (gen_adddi3 (dest, src, dest));
3296 }
3297
3298 src = gen_rtx_MEM (Pmode, dest);
3299 }
3300 }
3301 else
3302 src = gen_rtx_REG (DImode, BR_REG (0));
3303
3304 emit_move_insn (dest, src);
3305 }
3306
3307 int
3308 ia64_hard_regno_rename_ok (int from, int to)
3309 {
3310 /* Don't clobber any of the registers we reserved for the prologue. */
3311 if (to == current_frame_info.reg_fp
3312 || to == current_frame_info.reg_save_b0
3313 || to == current_frame_info.reg_save_pr
3314 || to == current_frame_info.reg_save_ar_pfs
3315 || to == current_frame_info.reg_save_ar_unat
3316 || to == current_frame_info.reg_save_ar_lc)
3317 return 0;
3318
3319 if (from == current_frame_info.reg_fp
3320 || from == current_frame_info.reg_save_b0
3321 || from == current_frame_info.reg_save_pr
3322 || from == current_frame_info.reg_save_ar_pfs
3323 || from == current_frame_info.reg_save_ar_unat
3324 || from == current_frame_info.reg_save_ar_lc)
3325 return 0;
3326
3327 /* Don't use output registers outside the register frame. */
3328 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
3329 return 0;
3330
3331 /* Retain even/oddness on predicate register pairs. */
3332 if (PR_REGNO_P (from) && PR_REGNO_P (to))
3333 return (from & 1) == (to & 1);
3334
3335 return 1;
3336 }
3337
3338 /* Target hook for assembling integer objects. Handle word-sized
3339 aligned objects and detect the cases when @fptr is needed. */
3340
3341 static bool
3342 ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
3343 {
3344 if (size == POINTER_SIZE / BITS_PER_UNIT
3345 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
3346 && GET_CODE (x) == SYMBOL_REF
3347 && SYMBOL_REF_FUNCTION_P (x))
3348 {
3349 static const char * const directive[2][2] = {
3350 /* 64-bit pointer */ /* 32-bit pointer */
3351 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */
3352 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */
3353 };
3354 fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
3355 output_addr_const (asm_out_file, x);
3356 fputs (")\n", asm_out_file);
3357 return true;
3358 }
3359 return default_assemble_integer (x, size, aligned_p);
3360 }
3361
3362 /* Emit the function prologue. */
3363
3364 static void
3365 ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3366 {
3367 int mask, grsave, grsave_prev;
3368
3369 if (current_frame_info.need_regstk)
3370 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
3371 current_frame_info.n_input_regs,
3372 current_frame_info.n_local_regs,
3373 current_frame_info.n_output_regs,
3374 current_frame_info.n_rotate_regs);
3375
3376 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3377 return;
3378
3379 /* Emit the .prologue directive. */
3380
3381 mask = 0;
3382 grsave = grsave_prev = 0;
3383 if (current_frame_info.reg_save_b0 != 0)
3384 {
3385 mask |= 8;
3386 grsave = grsave_prev = current_frame_info.reg_save_b0;
3387 }
3388 if (current_frame_info.reg_save_ar_pfs != 0
3389 && (grsave_prev == 0
3390 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
3391 {
3392 mask |= 4;
3393 if (grsave_prev == 0)
3394 grsave = current_frame_info.reg_save_ar_pfs;
3395 grsave_prev = current_frame_info.reg_save_ar_pfs;
3396 }
3397 if (current_frame_info.reg_fp != 0
3398 && (grsave_prev == 0
3399 || current_frame_info.reg_fp == grsave_prev + 1))
3400 {
3401 mask |= 2;
3402 if (grsave_prev == 0)
3403 grsave = HARD_FRAME_POINTER_REGNUM;
3404 grsave_prev = current_frame_info.reg_fp;
3405 }
3406 if (current_frame_info.reg_save_pr != 0
3407 && (grsave_prev == 0
3408 || current_frame_info.reg_save_pr == grsave_prev + 1))
3409 {
3410 mask |= 1;
3411 if (grsave_prev == 0)
3412 grsave = current_frame_info.reg_save_pr;
3413 }
3414
3415 if (mask && TARGET_GNU_AS)
3416 fprintf (file, "\t.prologue %d, %d\n", mask,
3417 ia64_dbx_register_number (grsave));
3418 else
3419 fputs ("\t.prologue\n", file);
3420
3421 /* Emit a .spill directive, if necessary, to relocate the base of
3422 the register spill area. */
3423 if (current_frame_info.spill_cfa_off != -16)
3424 fprintf (file, "\t.spill %ld\n",
3425 (long) (current_frame_info.spill_cfa_off
3426 + current_frame_info.spill_size));
3427 }
3428
3429 /* Emit the .body directive at the scheduled end of the prologue. */
3430
3431 static void
3432 ia64_output_function_end_prologue (FILE *file)
3433 {
3434 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3435 return;
3436
3437 fputs ("\t.body\n", file);
3438 }
3439
3440 /* Emit the function epilogue. */
3441
3442 static void
3443 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
3444 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3445 {
3446 int i;
3447
3448 if (current_frame_info.reg_fp)
3449 {
3450 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3451 reg_names[HARD_FRAME_POINTER_REGNUM]
3452 = reg_names[current_frame_info.reg_fp];
3453 reg_names[current_frame_info.reg_fp] = tmp;
3454 }
3455 if (! TARGET_REG_NAMES)
3456 {
3457 for (i = 0; i < current_frame_info.n_input_regs; i++)
3458 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
3459 for (i = 0; i < current_frame_info.n_local_regs; i++)
3460 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
3461 for (i = 0; i < current_frame_info.n_output_regs; i++)
3462 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
3463 }
3464
3465 current_frame_info.initialized = 0;
3466 }
3467
3468 int
3469 ia64_dbx_register_number (int regno)
3470 {
3471 /* In ia64_expand_prologue we quite literally renamed the frame pointer
3472 from its home at loc79 to something inside the register frame. We
3473 must perform the same renumbering here for the debug info. */
3474 if (current_frame_info.reg_fp)
3475 {
3476 if (regno == HARD_FRAME_POINTER_REGNUM)
3477 regno = current_frame_info.reg_fp;
3478 else if (regno == current_frame_info.reg_fp)
3479 regno = HARD_FRAME_POINTER_REGNUM;
3480 }
3481
3482 if (IN_REGNO_P (regno))
3483 return 32 + regno - IN_REG (0);
3484 else if (LOC_REGNO_P (regno))
3485 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
3486 else if (OUT_REGNO_P (regno))
3487 return (32 + current_frame_info.n_input_regs
3488 + current_frame_info.n_local_regs + regno - OUT_REG (0));
3489 else
3490 return regno;
3491 }
3492
3493 void
3494 ia64_initialize_trampoline (rtx addr, rtx fnaddr, rtx static_chain)
3495 {
3496 rtx addr_reg, eight = GEN_INT (8);
3497
3498 /* The Intel assembler requires that the global __ia64_trampoline symbol
3499 be declared explicitly */
3500 if (!TARGET_GNU_AS)
3501 {
3502 static bool declared_ia64_trampoline = false;
3503
3504 if (!declared_ia64_trampoline)
3505 {
3506 declared_ia64_trampoline = true;
3507 (*targetm.asm_out.globalize_label) (asm_out_file,
3508 "__ia64_trampoline");
3509 }
3510 }
3511
3512 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
3513 addr = convert_memory_address (Pmode, addr);
3514 fnaddr = convert_memory_address (Pmode, fnaddr);
3515 static_chain = convert_memory_address (Pmode, static_chain);
3516
3517 /* Load up our iterator. */
3518 addr_reg = gen_reg_rtx (Pmode);
3519 emit_move_insn (addr_reg, addr);
3520
3521 /* The first two words are the fake descriptor:
3522 __ia64_trampoline, ADDR+16. */
3523 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3524 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
3525 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3526
3527 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3528 copy_to_reg (plus_constant (addr, 16)));
3529 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3530
3531 /* The third word is the target descriptor. */
3532 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
3533 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3534
3535 /* The fourth word is the static chain. */
3536 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
3537 }
3538 \f
3539 /* Do any needed setup for a variadic function. CUM has not been updated
3540 for the last named argument which has type TYPE and mode MODE.
3541
3542 We generate the actual spill instructions during prologue generation. */
3543
3544 static void
3545 ia64_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3546 tree type, int * pretend_size,
3547 int second_time ATTRIBUTE_UNUSED)
3548 {
3549 CUMULATIVE_ARGS next_cum = *cum;
3550
3551 /* Skip the current argument. */
3552 ia64_function_arg_advance (&next_cum, mode, type, 1);
3553
3554 if (next_cum.words < MAX_ARGUMENT_SLOTS)
3555 {
3556 int n = MAX_ARGUMENT_SLOTS - next_cum.words;
3557 *pretend_size = n * UNITS_PER_WORD;
3558 cfun->machine->n_varargs = n;
3559 }
3560 }
3561
3562 /* Check whether TYPE is a homogeneous floating point aggregate. If
3563 it is, return the mode of the floating point type that appears
3564 in all leafs. If it is not, return VOIDmode.
3565
3566 An aggregate is a homogeneous floating point aggregate is if all
3567 fields/elements in it have the same floating point type (e.g,
3568 SFmode). 128-bit quad-precision floats are excluded.
3569
3570 Variable sized aggregates should never arrive here, since we should
3571 have already decided to pass them by reference. Top-level zero-sized
3572 aggregates are excluded because our parallels crash the middle-end. */
3573
3574 static enum machine_mode
3575 hfa_element_mode (tree type, bool nested)
3576 {
3577 enum machine_mode element_mode = VOIDmode;
3578 enum machine_mode mode;
3579 enum tree_code code = TREE_CODE (type);
3580 int know_element_mode = 0;
3581 tree t;
3582
3583 if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
3584 return VOIDmode;
3585
3586 switch (code)
3587 {
3588 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
3589 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
3590 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
3591 case LANG_TYPE: case FUNCTION_TYPE:
3592 return VOIDmode;
3593
3594 /* Fortran complex types are supposed to be HFAs, so we need to handle
3595 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
3596 types though. */
3597 case COMPLEX_TYPE:
3598 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
3599 && TYPE_MODE (type) != TCmode)
3600 return GET_MODE_INNER (TYPE_MODE (type));
3601 else
3602 return VOIDmode;
3603
3604 case REAL_TYPE:
3605 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
3606 mode if this is contained within an aggregate. */
3607 if (nested && TYPE_MODE (type) != TFmode)
3608 return TYPE_MODE (type);
3609 else
3610 return VOIDmode;
3611
3612 case ARRAY_TYPE:
3613 return hfa_element_mode (TREE_TYPE (type), 1);
3614
3615 case RECORD_TYPE:
3616 case UNION_TYPE:
3617 case QUAL_UNION_TYPE:
3618 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
3619 {
3620 if (TREE_CODE (t) != FIELD_DECL)
3621 continue;
3622
3623 mode = hfa_element_mode (TREE_TYPE (t), 1);
3624 if (know_element_mode)
3625 {
3626 if (mode != element_mode)
3627 return VOIDmode;
3628 }
3629 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
3630 return VOIDmode;
3631 else
3632 {
3633 know_element_mode = 1;
3634 element_mode = mode;
3635 }
3636 }
3637 return element_mode;
3638
3639 default:
3640 /* If we reach here, we probably have some front-end specific type
3641 that the backend doesn't know about. This can happen via the
3642 aggregate_value_p call in init_function_start. All we can do is
3643 ignore unknown tree types. */
3644 return VOIDmode;
3645 }
3646
3647 return VOIDmode;
3648 }
3649
3650 /* Return the number of words required to hold a quantity of TYPE and MODE
3651 when passed as an argument. */
3652 static int
3653 ia64_function_arg_words (tree type, enum machine_mode mode)
3654 {
3655 int words;
3656
3657 if (mode == BLKmode)
3658 words = int_size_in_bytes (type);
3659 else
3660 words = GET_MODE_SIZE (mode);
3661
3662 return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */
3663 }
3664
3665 /* Return the number of registers that should be skipped so the current
3666 argument (described by TYPE and WORDS) will be properly aligned.
3667
3668 Integer and float arguments larger than 8 bytes start at the next
3669 even boundary. Aggregates larger than 8 bytes start at the next
3670 even boundary if the aggregate has 16 byte alignment. Note that
3671 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
3672 but are still to be aligned in registers.
3673
3674 ??? The ABI does not specify how to handle aggregates with
3675 alignment from 9 to 15 bytes, or greater than 16. We handle them
3676 all as if they had 16 byte alignment. Such aggregates can occur
3677 only if gcc extensions are used. */
3678 static int
3679 ia64_function_arg_offset (CUMULATIVE_ARGS *cum, tree type, int words)
3680 {
3681 if ((cum->words & 1) == 0)
3682 return 0;
3683
3684 if (type
3685 && TREE_CODE (type) != INTEGER_TYPE
3686 && TREE_CODE (type) != REAL_TYPE)
3687 return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
3688 else
3689 return words > 1;
3690 }
3691
3692 /* Return rtx for register where argument is passed, or zero if it is passed
3693 on the stack. */
3694 /* ??? 128-bit quad-precision floats are always passed in general
3695 registers. */
3696
3697 rtx
3698 ia64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
3699 int named, int incoming)
3700 {
3701 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
3702 int words = ia64_function_arg_words (type, mode);
3703 int offset = ia64_function_arg_offset (cum, type, words);
3704 enum machine_mode hfa_mode = VOIDmode;
3705
3706 /* If all argument slots are used, then it must go on the stack. */
3707 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3708 return 0;
3709
3710 /* Check for and handle homogeneous FP aggregates. */
3711 if (type)
3712 hfa_mode = hfa_element_mode (type, 0);
3713
3714 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3715 and unprototyped hfas are passed specially. */
3716 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3717 {
3718 rtx loc[16];
3719 int i = 0;
3720 int fp_regs = cum->fp_regs;
3721 int int_regs = cum->words + offset;
3722 int hfa_size = GET_MODE_SIZE (hfa_mode);
3723 int byte_size;
3724 int args_byte_size;
3725
3726 /* If prototyped, pass it in FR regs then GR regs.
3727 If not prototyped, pass it in both FR and GR regs.
3728
3729 If this is an SFmode aggregate, then it is possible to run out of
3730 FR regs while GR regs are still left. In that case, we pass the
3731 remaining part in the GR regs. */
3732
3733 /* Fill the FP regs. We do this always. We stop if we reach the end
3734 of the argument, the last FP register, or the last argument slot. */
3735
3736 byte_size = ((mode == BLKmode)
3737 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3738 args_byte_size = int_regs * UNITS_PER_WORD;
3739 offset = 0;
3740 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3741 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
3742 {
3743 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3744 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
3745 + fp_regs)),
3746 GEN_INT (offset));
3747 offset += hfa_size;
3748 args_byte_size += hfa_size;
3749 fp_regs++;
3750 }
3751
3752 /* If no prototype, then the whole thing must go in GR regs. */
3753 if (! cum->prototype)
3754 offset = 0;
3755 /* If this is an SFmode aggregate, then we might have some left over
3756 that needs to go in GR regs. */
3757 else if (byte_size != offset)
3758 int_regs += offset / UNITS_PER_WORD;
3759
3760 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
3761
3762 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
3763 {
3764 enum machine_mode gr_mode = DImode;
3765 unsigned int gr_size;
3766
3767 /* If we have an odd 4 byte hunk because we ran out of FR regs,
3768 then this goes in a GR reg left adjusted/little endian, right
3769 adjusted/big endian. */
3770 /* ??? Currently this is handled wrong, because 4-byte hunks are
3771 always right adjusted/little endian. */
3772 if (offset & 0x4)
3773 gr_mode = SImode;
3774 /* If we have an even 4 byte hunk because the aggregate is a
3775 multiple of 4 bytes in size, then this goes in a GR reg right
3776 adjusted/little endian. */
3777 else if (byte_size - offset == 4)
3778 gr_mode = SImode;
3779
3780 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3781 gen_rtx_REG (gr_mode, (basereg
3782 + int_regs)),
3783 GEN_INT (offset));
3784
3785 gr_size = GET_MODE_SIZE (gr_mode);
3786 offset += gr_size;
3787 if (gr_size == UNITS_PER_WORD
3788 || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
3789 int_regs++;
3790 else if (gr_size > UNITS_PER_WORD)
3791 int_regs += gr_size / UNITS_PER_WORD;
3792 }
3793 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3794 }
3795
3796 /* Integral and aggregates go in general registers. If we have run out of
3797 FR registers, then FP values must also go in general registers. This can
3798 happen when we have a SFmode HFA. */
3799 else if (mode == TFmode || mode == TCmode
3800 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3801 {
3802 int byte_size = ((mode == BLKmode)
3803 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3804 if (BYTES_BIG_ENDIAN
3805 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3806 && byte_size < UNITS_PER_WORD
3807 && byte_size > 0)
3808 {
3809 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3810 gen_rtx_REG (DImode,
3811 (basereg + cum->words
3812 + offset)),
3813 const0_rtx);
3814 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3815 }
3816 else
3817 return gen_rtx_REG (mode, basereg + cum->words + offset);
3818
3819 }
3820
3821 /* If there is a prototype, then FP values go in a FR register when
3822 named, and in a GR register when unnamed. */
3823 else if (cum->prototype)
3824 {
3825 if (named)
3826 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
3827 /* In big-endian mode, an anonymous SFmode value must be represented
3828 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
3829 the value into the high half of the general register. */
3830 else if (BYTES_BIG_ENDIAN && mode == SFmode)
3831 return gen_rtx_PARALLEL (mode,
3832 gen_rtvec (1,
3833 gen_rtx_EXPR_LIST (VOIDmode,
3834 gen_rtx_REG (DImode, basereg + cum->words + offset),
3835 const0_rtx)));
3836 /* Similarly, an anonymous XFmode value must be split into two
3837 registers and padded appropriately. */
3838 else if (BYTES_BIG_ENDIAN && mode == XFmode)
3839 {
3840 rtx loc[2];
3841 loc[0] = gen_rtx_EXPR_LIST (VOIDmode,
3842 gen_rtx_REG (DImode, basereg + cum->words + offset),
3843 const0_rtx);
3844 loc[1] = gen_rtx_EXPR_LIST (VOIDmode,
3845 gen_rtx_REG (DImode, basereg + cum->words + offset + 1),
3846 GEN_INT (UNITS_PER_WORD));
3847 return gen_rtx_PARALLEL (mode, gen_rtvec_v (2, loc));
3848 }
3849 else
3850 return gen_rtx_REG (mode, basereg + cum->words + offset);
3851 }
3852 /* If there is no prototype, then FP values go in both FR and GR
3853 registers. */
3854 else
3855 {
3856 /* See comment above. */
3857 enum machine_mode inner_mode =
3858 (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
3859
3860 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
3861 gen_rtx_REG (mode, (FR_ARG_FIRST
3862 + cum->fp_regs)),
3863 const0_rtx);
3864 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3865 gen_rtx_REG (inner_mode,
3866 (basereg + cum->words
3867 + offset)),
3868 const0_rtx);
3869
3870 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
3871 }
3872 }
3873
3874 /* Return number of bytes, at the beginning of the argument, that must be
3875 put in registers. 0 is the argument is entirely in registers or entirely
3876 in memory. */
3877
3878 static int
3879 ia64_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3880 tree type, bool named ATTRIBUTE_UNUSED)
3881 {
3882 int words = ia64_function_arg_words (type, mode);
3883 int offset = ia64_function_arg_offset (cum, type, words);
3884
3885 /* If all argument slots are used, then it must go on the stack. */
3886 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3887 return 0;
3888
3889 /* It doesn't matter whether the argument goes in FR or GR regs. If
3890 it fits within the 8 argument slots, then it goes entirely in
3891 registers. If it extends past the last argument slot, then the rest
3892 goes on the stack. */
3893
3894 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
3895 return 0;
3896
3897 return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
3898 }
3899
3900 /* Update CUM to point after this argument. This is patterned after
3901 ia64_function_arg. */
3902
3903 void
3904 ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3905 tree type, int named)
3906 {
3907 int words = ia64_function_arg_words (type, mode);
3908 int offset = ia64_function_arg_offset (cum, type, words);
3909 enum machine_mode hfa_mode = VOIDmode;
3910
3911 /* If all arg slots are already full, then there is nothing to do. */
3912 if (cum->words >= MAX_ARGUMENT_SLOTS)
3913 return;
3914
3915 cum->words += words + offset;
3916
3917 /* Check for and handle homogeneous FP aggregates. */
3918 if (type)
3919 hfa_mode = hfa_element_mode (type, 0);
3920
3921 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3922 and unprototyped hfas are passed specially. */
3923 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3924 {
3925 int fp_regs = cum->fp_regs;
3926 /* This is the original value of cum->words + offset. */
3927 int int_regs = cum->words - words;
3928 int hfa_size = GET_MODE_SIZE (hfa_mode);
3929 int byte_size;
3930 int args_byte_size;
3931
3932 /* If prototyped, pass it in FR regs then GR regs.
3933 If not prototyped, pass it in both FR and GR regs.
3934
3935 If this is an SFmode aggregate, then it is possible to run out of
3936 FR regs while GR regs are still left. In that case, we pass the
3937 remaining part in the GR regs. */
3938
3939 /* Fill the FP regs. We do this always. We stop if we reach the end
3940 of the argument, the last FP register, or the last argument slot. */
3941
3942 byte_size = ((mode == BLKmode)
3943 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3944 args_byte_size = int_regs * UNITS_PER_WORD;
3945 offset = 0;
3946 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3947 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3948 {
3949 offset += hfa_size;
3950 args_byte_size += hfa_size;
3951 fp_regs++;
3952 }
3953
3954 cum->fp_regs = fp_regs;
3955 }
3956
3957 /* Integral and aggregates go in general registers. So do TFmode FP values.
3958 If we have run out of FR registers, then other FP values must also go in
3959 general registers. This can happen when we have a SFmode HFA. */
3960 else if (mode == TFmode || mode == TCmode
3961 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3962 cum->int_regs = cum->words;
3963
3964 /* If there is a prototype, then FP values go in a FR register when
3965 named, and in a GR register when unnamed. */
3966 else if (cum->prototype)
3967 {
3968 if (! named)
3969 cum->int_regs = cum->words;
3970 else
3971 /* ??? Complex types should not reach here. */
3972 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3973 }
3974 /* If there is no prototype, then FP values go in both FR and GR
3975 registers. */
3976 else
3977 {
3978 /* ??? Complex types should not reach here. */
3979 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3980 cum->int_regs = cum->words;
3981 }
3982 }
3983
3984 /* Arguments with alignment larger than 8 bytes start at the next even
3985 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary
3986 even though their normal alignment is 8 bytes. See ia64_function_arg. */
3987
3988 int
3989 ia64_function_arg_boundary (enum machine_mode mode, tree type)
3990 {
3991
3992 if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
3993 return PARM_BOUNDARY * 2;
3994
3995 if (type)
3996 {
3997 if (TYPE_ALIGN (type) > PARM_BOUNDARY)
3998 return PARM_BOUNDARY * 2;
3999 else
4000 return PARM_BOUNDARY;
4001 }
4002
4003 if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
4004 return PARM_BOUNDARY * 2;
4005 else
4006 return PARM_BOUNDARY;
4007 }
4008
4009 /* Variable sized types are passed by reference. */
4010 /* ??? At present this is a GCC extension to the IA-64 ABI. */
4011
4012 static bool
4013 ia64_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4014 enum machine_mode mode ATTRIBUTE_UNUSED,
4015 tree type, bool named ATTRIBUTE_UNUSED)
4016 {
4017 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4018 }
4019
4020 /* True if it is OK to do sibling call optimization for the specified
4021 call expression EXP. DECL will be the called function, or NULL if
4022 this is an indirect call. */
4023 static bool
4024 ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4025 {
4026 /* We can't perform a sibcall if the current function has the syscall_linkage
4027 attribute. */
4028 if (lookup_attribute ("syscall_linkage",
4029 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
4030 return false;
4031
4032 /* We must always return with our current GP. This means we can
4033 only sibcall to functions defined in the current module. */
4034 return decl && (*targetm.binds_local_p) (decl);
4035 }
4036 \f
4037
4038 /* Implement va_arg. */
4039
4040 static tree
4041 ia64_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4042 {
4043 /* Variable sized types are passed by reference. */
4044 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
4045 {
4046 tree ptrtype = build_pointer_type (type);
4047 tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
4048 return build_va_arg_indirect_ref (addr);
4049 }
4050
4051 /* Aggregate arguments with alignment larger than 8 bytes start at
4052 the next even boundary. Integer and floating point arguments
4053 do so if they are larger than 8 bytes, whether or not they are
4054 also aligned larger than 8 bytes. */
4055 if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
4056 ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
4057 {
4058 tree t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
4059 build_int_cst (NULL_TREE, 2 * UNITS_PER_WORD - 1));
4060 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
4061 build_int_cst (NULL_TREE, -2 * UNITS_PER_WORD));
4062 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
4063 gimplify_and_add (t, pre_p);
4064 }
4065
4066 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4067 }
4068 \f
4069 /* Return 1 if function return value returned in memory. Return 0 if it is
4070 in a register. */
4071
4072 static bool
4073 ia64_return_in_memory (tree valtype, tree fntype ATTRIBUTE_UNUSED)
4074 {
4075 enum machine_mode mode;
4076 enum machine_mode hfa_mode;
4077 HOST_WIDE_INT byte_size;
4078
4079 mode = TYPE_MODE (valtype);
4080 byte_size = GET_MODE_SIZE (mode);
4081 if (mode == BLKmode)
4082 {
4083 byte_size = int_size_in_bytes (valtype);
4084 if (byte_size < 0)
4085 return true;
4086 }
4087
4088 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
4089
4090 hfa_mode = hfa_element_mode (valtype, 0);
4091 if (hfa_mode != VOIDmode)
4092 {
4093 int hfa_size = GET_MODE_SIZE (hfa_mode);
4094
4095 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
4096 return true;
4097 else
4098 return false;
4099 }
4100 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
4101 return true;
4102 else
4103 return false;
4104 }
4105
4106 /* Return rtx for register that holds the function return value. */
4107
4108 rtx
4109 ia64_function_value (tree valtype, tree func ATTRIBUTE_UNUSED)
4110 {
4111 enum machine_mode mode;
4112 enum machine_mode hfa_mode;
4113
4114 mode = TYPE_MODE (valtype);
4115 hfa_mode = hfa_element_mode (valtype, 0);
4116
4117 if (hfa_mode != VOIDmode)
4118 {
4119 rtx loc[8];
4120 int i;
4121 int hfa_size;
4122 int byte_size;
4123 int offset;
4124
4125 hfa_size = GET_MODE_SIZE (hfa_mode);
4126 byte_size = ((mode == BLKmode)
4127 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
4128 offset = 0;
4129 for (i = 0; offset < byte_size; i++)
4130 {
4131 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4132 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
4133 GEN_INT (offset));
4134 offset += hfa_size;
4135 }
4136 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4137 }
4138 else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
4139 return gen_rtx_REG (mode, FR_ARG_FIRST);
4140 else
4141 {
4142 bool need_parallel = false;
4143
4144 /* In big-endian mode, we need to manage the layout of aggregates
4145 in the registers so that we get the bits properly aligned in
4146 the highpart of the registers. */
4147 if (BYTES_BIG_ENDIAN
4148 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
4149 need_parallel = true;
4150
4151 /* Something like struct S { long double x; char a[0] } is not an
4152 HFA structure, and therefore doesn't go in fp registers. But
4153 the middle-end will give it XFmode anyway, and XFmode values
4154 don't normally fit in integer registers. So we need to smuggle
4155 the value inside a parallel. */
4156 else if (mode == XFmode || mode == XCmode)
4157 need_parallel = true;
4158
4159 if (need_parallel)
4160 {
4161 rtx loc[8];
4162 int offset;
4163 int bytesize;
4164 int i;
4165
4166 offset = 0;
4167 bytesize = int_size_in_bytes (valtype);
4168 /* An empty PARALLEL is invalid here, but the return value
4169 doesn't matter for empty structs. */
4170 if (bytesize == 0)
4171 return gen_rtx_REG (mode, GR_RET_FIRST);
4172 for (i = 0; offset < bytesize; i++)
4173 {
4174 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4175 gen_rtx_REG (DImode,
4176 GR_RET_FIRST + i),
4177 GEN_INT (offset));
4178 offset += UNITS_PER_WORD;
4179 }
4180 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4181 }
4182
4183 return gen_rtx_REG (mode, GR_RET_FIRST);
4184 }
4185 }
4186
4187 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
4188 We need to emit DTP-relative relocations. */
4189
4190 static void
4191 ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
4192 {
4193 gcc_assert (size == 8);
4194 fputs ("\tdata8.ua\t@dtprel(", file);
4195 output_addr_const (file, x);
4196 fputs (")", file);
4197 }
4198
4199 /* Print a memory address as an operand to reference that memory location. */
4200
4201 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
4202 also call this from ia64_print_operand for memory addresses. */
4203
4204 void
4205 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
4206 rtx address ATTRIBUTE_UNUSED)
4207 {
4208 }
4209
4210 /* Print an operand to an assembler instruction.
4211 C Swap and print a comparison operator.
4212 D Print an FP comparison operator.
4213 E Print 32 - constant, for SImode shifts as extract.
4214 e Print 64 - constant, for DImode rotates.
4215 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
4216 a floating point register emitted normally.
4217 I Invert a predicate register by adding 1.
4218 J Select the proper predicate register for a condition.
4219 j Select the inverse predicate register for a condition.
4220 O Append .acq for volatile load.
4221 P Postincrement of a MEM.
4222 Q Append .rel for volatile store.
4223 S Shift amount for shladd instruction.
4224 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
4225 for Intel assembler.
4226 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
4227 for Intel assembler.
4228 r Print register name, or constant 0 as r0. HP compatibility for
4229 Linux kernel.
4230 v Print vector constant value as an 8-byte integer value. */
4231
4232 void
4233 ia64_print_operand (FILE * file, rtx x, int code)
4234 {
4235 const char *str;
4236
4237 switch (code)
4238 {
4239 case 0:
4240 /* Handled below. */
4241 break;
4242
4243 case 'C':
4244 {
4245 enum rtx_code c = swap_condition (GET_CODE (x));
4246 fputs (GET_RTX_NAME (c), file);
4247 return;
4248 }
4249
4250 case 'D':
4251 switch (GET_CODE (x))
4252 {
4253 case NE:
4254 str = "neq";
4255 break;
4256 case UNORDERED:
4257 str = "unord";
4258 break;
4259 case ORDERED:
4260 str = "ord";
4261 break;
4262 default:
4263 str = GET_RTX_NAME (GET_CODE (x));
4264 break;
4265 }
4266 fputs (str, file);
4267 return;
4268
4269 case 'E':
4270 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
4271 return;
4272
4273 case 'e':
4274 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
4275 return;
4276
4277 case 'F':
4278 if (x == CONST0_RTX (GET_MODE (x)))
4279 str = reg_names [FR_REG (0)];
4280 else if (x == CONST1_RTX (GET_MODE (x)))
4281 str = reg_names [FR_REG (1)];
4282 else
4283 {
4284 gcc_assert (GET_CODE (x) == REG);
4285 str = reg_names [REGNO (x)];
4286 }
4287 fputs (str, file);
4288 return;
4289
4290 case 'I':
4291 fputs (reg_names [REGNO (x) + 1], file);
4292 return;
4293
4294 case 'J':
4295 case 'j':
4296 {
4297 unsigned int regno = REGNO (XEXP (x, 0));
4298 if (GET_CODE (x) == EQ)
4299 regno += 1;
4300 if (code == 'j')
4301 regno ^= 1;
4302 fputs (reg_names [regno], file);
4303 }
4304 return;
4305
4306 case 'O':
4307 if (MEM_VOLATILE_P (x))
4308 fputs(".acq", file);
4309 return;
4310
4311 case 'P':
4312 {
4313 HOST_WIDE_INT value;
4314
4315 switch (GET_CODE (XEXP (x, 0)))
4316 {
4317 default:
4318 return;
4319
4320 case POST_MODIFY:
4321 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
4322 if (GET_CODE (x) == CONST_INT)
4323 value = INTVAL (x);
4324 else
4325 {
4326 gcc_assert (GET_CODE (x) == REG);
4327 fprintf (file, ", %s", reg_names[REGNO (x)]);
4328 return;
4329 }
4330 break;
4331
4332 case POST_INC:
4333 value = GET_MODE_SIZE (GET_MODE (x));
4334 break;
4335
4336 case POST_DEC:
4337 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
4338 break;
4339 }
4340
4341 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
4342 return;
4343 }
4344
4345 case 'Q':
4346 if (MEM_VOLATILE_P (x))
4347 fputs(".rel", file);
4348 return;
4349
4350 case 'S':
4351 fprintf (file, "%d", exact_log2 (INTVAL (x)));
4352 return;
4353
4354 case 'T':
4355 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4356 {
4357 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
4358 return;
4359 }
4360 break;
4361
4362 case 'U':
4363 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4364 {
4365 const char *prefix = "0x";
4366 if (INTVAL (x) & 0x80000000)
4367 {
4368 fprintf (file, "0xffffffff");
4369 prefix = "";
4370 }
4371 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
4372 return;
4373 }
4374 break;
4375
4376 case 'r':
4377 /* If this operand is the constant zero, write it as register zero.
4378 Any register, zero, or CONST_INT value is OK here. */
4379 if (GET_CODE (x) == REG)
4380 fputs (reg_names[REGNO (x)], file);
4381 else if (x == CONST0_RTX (GET_MODE (x)))
4382 fputs ("r0", file);
4383 else if (GET_CODE (x) == CONST_INT)
4384 output_addr_const (file, x);
4385 else
4386 output_operand_lossage ("invalid %%r value");
4387 return;
4388
4389 case 'v':
4390 gcc_assert (GET_CODE (x) == CONST_VECTOR);
4391 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
4392 break;
4393
4394 case '+':
4395 {
4396 const char *which;
4397
4398 /* For conditional branches, returns or calls, substitute
4399 sptk, dptk, dpnt, or spnt for %s. */
4400 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
4401 if (x)
4402 {
4403 int pred_val = INTVAL (XEXP (x, 0));
4404
4405 /* Guess top and bottom 10% statically predicted. */
4406 if (pred_val < REG_BR_PROB_BASE / 50)
4407 which = ".spnt";
4408 else if (pred_val < REG_BR_PROB_BASE / 2)
4409 which = ".dpnt";
4410 else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
4411 which = ".dptk";
4412 else
4413 which = ".sptk";
4414 }
4415 else if (GET_CODE (current_output_insn) == CALL_INSN)
4416 which = ".sptk";
4417 else
4418 which = ".dptk";
4419
4420 fputs (which, file);
4421 return;
4422 }
4423
4424 case ',':
4425 x = current_insn_predicate;
4426 if (x)
4427 {
4428 unsigned int regno = REGNO (XEXP (x, 0));
4429 if (GET_CODE (x) == EQ)
4430 regno += 1;
4431 fprintf (file, "(%s) ", reg_names [regno]);
4432 }
4433 return;
4434
4435 default:
4436 output_operand_lossage ("ia64_print_operand: unknown code");
4437 return;
4438 }
4439
4440 switch (GET_CODE (x))
4441 {
4442 /* This happens for the spill/restore instructions. */
4443 case POST_INC:
4444 case POST_DEC:
4445 case POST_MODIFY:
4446 x = XEXP (x, 0);
4447 /* ... fall through ... */
4448
4449 case REG:
4450 fputs (reg_names [REGNO (x)], file);
4451 break;
4452
4453 case MEM:
4454 {
4455 rtx addr = XEXP (x, 0);
4456 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
4457 addr = XEXP (addr, 0);
4458 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
4459 break;
4460 }
4461
4462 default:
4463 output_addr_const (file, x);
4464 break;
4465 }
4466
4467 return;
4468 }
4469 \f
4470 /* Compute a (partial) cost for rtx X. Return true if the complete
4471 cost has been computed, and false if subexpressions should be
4472 scanned. In either case, *TOTAL contains the cost result. */
4473 /* ??? This is incomplete. */
4474
4475 static bool
4476 ia64_rtx_costs (rtx x, int code, int outer_code, int *total)
4477 {
4478 switch (code)
4479 {
4480 case CONST_INT:
4481 switch (outer_code)
4482 {
4483 case SET:
4484 *total = CONST_OK_FOR_J (INTVAL (x)) ? 0 : COSTS_N_INSNS (1);
4485 return true;
4486 case PLUS:
4487 if (CONST_OK_FOR_I (INTVAL (x)))
4488 *total = 0;
4489 else if (CONST_OK_FOR_J (INTVAL (x)))
4490 *total = 1;
4491 else
4492 *total = COSTS_N_INSNS (1);
4493 return true;
4494 default:
4495 if (CONST_OK_FOR_K (INTVAL (x)) || CONST_OK_FOR_L (INTVAL (x)))
4496 *total = 0;
4497 else
4498 *total = COSTS_N_INSNS (1);
4499 return true;
4500 }
4501
4502 case CONST_DOUBLE:
4503 *total = COSTS_N_INSNS (1);
4504 return true;
4505
4506 case CONST:
4507 case SYMBOL_REF:
4508 case LABEL_REF:
4509 *total = COSTS_N_INSNS (3);
4510 return true;
4511
4512 case MULT:
4513 /* For multiplies wider than HImode, we have to go to the FPU,
4514 which normally involves copies. Plus there's the latency
4515 of the multiply itself, and the latency of the instructions to
4516 transfer integer regs to FP regs. */
4517 /* ??? Check for FP mode. */
4518 if (GET_MODE_SIZE (GET_MODE (x)) > 2)
4519 *total = COSTS_N_INSNS (10);
4520 else
4521 *total = COSTS_N_INSNS (2);
4522 return true;
4523
4524 case PLUS:
4525 case MINUS:
4526 case ASHIFT:
4527 case ASHIFTRT:
4528 case LSHIFTRT:
4529 *total = COSTS_N_INSNS (1);
4530 return true;
4531
4532 case DIV:
4533 case UDIV:
4534 case MOD:
4535 case UMOD:
4536 /* We make divide expensive, so that divide-by-constant will be
4537 optimized to a multiply. */
4538 *total = COSTS_N_INSNS (60);
4539 return true;
4540
4541 default:
4542 return false;
4543 }
4544 }
4545
4546 /* Calculate the cost of moving data from a register in class FROM to
4547 one in class TO, using MODE. */
4548
4549 int
4550 ia64_register_move_cost (enum machine_mode mode, enum reg_class from,
4551 enum reg_class to)
4552 {
4553 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
4554 if (to == ADDL_REGS)
4555 to = GR_REGS;
4556 if (from == ADDL_REGS)
4557 from = GR_REGS;
4558
4559 /* All costs are symmetric, so reduce cases by putting the
4560 lower number class as the destination. */
4561 if (from < to)
4562 {
4563 enum reg_class tmp = to;
4564 to = from, from = tmp;
4565 }
4566
4567 /* Moving from FR<->GR in XFmode must be more expensive than 2,
4568 so that we get secondary memory reloads. Between FR_REGS,
4569 we have to make this at least as expensive as MEMORY_MOVE_COST
4570 to avoid spectacularly poor register class preferencing. */
4571 if (mode == XFmode)
4572 {
4573 if (to != GR_REGS || from != GR_REGS)
4574 return MEMORY_MOVE_COST (mode, to, 0);
4575 else
4576 return 3;
4577 }
4578
4579 switch (to)
4580 {
4581 case PR_REGS:
4582 /* Moving between PR registers takes two insns. */
4583 if (from == PR_REGS)
4584 return 3;
4585 /* Moving between PR and anything but GR is impossible. */
4586 if (from != GR_REGS)
4587 return MEMORY_MOVE_COST (mode, to, 0);
4588 break;
4589
4590 case BR_REGS:
4591 /* Moving between BR and anything but GR is impossible. */
4592 if (from != GR_REGS && from != GR_AND_BR_REGS)
4593 return MEMORY_MOVE_COST (mode, to, 0);
4594 break;
4595
4596 case AR_I_REGS:
4597 case AR_M_REGS:
4598 /* Moving between AR and anything but GR is impossible. */
4599 if (from != GR_REGS)
4600 return MEMORY_MOVE_COST (mode, to, 0);
4601 break;
4602
4603 case GR_REGS:
4604 case FR_REGS:
4605 case GR_AND_FR_REGS:
4606 case GR_AND_BR_REGS:
4607 case ALL_REGS:
4608 break;
4609
4610 default:
4611 gcc_unreachable ();
4612 }
4613
4614 return 2;
4615 }
4616
4617 /* Implement PREFERRED_RELOAD_CLASS. Place additional restrictions on CLASS
4618 to use when copying X into that class. */
4619
4620 enum reg_class
4621 ia64_preferred_reload_class (rtx x, enum reg_class class)
4622 {
4623 switch (class)
4624 {
4625 case FR_REGS:
4626 /* Don't allow volatile mem reloads into floating point registers.
4627 This is defined to force reload to choose the r/m case instead
4628 of the f/f case when reloading (set (reg fX) (mem/v)). */
4629 if (MEM_P (x) && MEM_VOLATILE_P (x))
4630 return NO_REGS;
4631
4632 /* Force all unrecognized constants into the constant pool. */
4633 if (CONSTANT_P (x))
4634 return NO_REGS;
4635 break;
4636
4637 case AR_M_REGS:
4638 case AR_I_REGS:
4639 if (!OBJECT_P (x))
4640 return NO_REGS;
4641 break;
4642
4643 default:
4644 break;
4645 }
4646
4647 return class;
4648 }
4649
4650 /* This function returns the register class required for a secondary
4651 register when copying between one of the registers in CLASS, and X,
4652 using MODE. A return value of NO_REGS means that no secondary register
4653 is required. */
4654
4655 enum reg_class
4656 ia64_secondary_reload_class (enum reg_class class,
4657 enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
4658 {
4659 int regno = -1;
4660
4661 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
4662 regno = true_regnum (x);
4663
4664 switch (class)
4665 {
4666 case BR_REGS:
4667 case AR_M_REGS:
4668 case AR_I_REGS:
4669 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
4670 interaction. We end up with two pseudos with overlapping lifetimes
4671 both of which are equiv to the same constant, and both which need
4672 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
4673 changes depending on the path length, which means the qty_first_reg
4674 check in make_regs_eqv can give different answers at different times.
4675 At some point I'll probably need a reload_indi pattern to handle
4676 this.
4677
4678 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
4679 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
4680 non-general registers for good measure. */
4681 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
4682 return GR_REGS;
4683
4684 /* This is needed if a pseudo used as a call_operand gets spilled to a
4685 stack slot. */
4686 if (GET_CODE (x) == MEM)
4687 return GR_REGS;
4688 break;
4689
4690 case FR_REGS:
4691 /* Need to go through general registers to get to other class regs. */
4692 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
4693 return GR_REGS;
4694
4695 /* This can happen when a paradoxical subreg is an operand to the
4696 muldi3 pattern. */
4697 /* ??? This shouldn't be necessary after instruction scheduling is
4698 enabled, because paradoxical subregs are not accepted by
4699 register_operand when INSN_SCHEDULING is defined. Or alternatively,
4700 stop the paradoxical subreg stupidity in the *_operand functions
4701 in recog.c. */
4702 if (GET_CODE (x) == MEM
4703 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
4704 || GET_MODE (x) == QImode))
4705 return GR_REGS;
4706
4707 /* This can happen because of the ior/and/etc patterns that accept FP
4708 registers as operands. If the third operand is a constant, then it
4709 needs to be reloaded into a FP register. */
4710 if (GET_CODE (x) == CONST_INT)
4711 return GR_REGS;
4712
4713 /* This can happen because of register elimination in a muldi3 insn.
4714 E.g. `26107 * (unsigned long)&u'. */
4715 if (GET_CODE (x) == PLUS)
4716 return GR_REGS;
4717 break;
4718
4719 case PR_REGS:
4720 /* ??? This happens if we cse/gcse a BImode value across a call,
4721 and the function has a nonlocal goto. This is because global
4722 does not allocate call crossing pseudos to hard registers when
4723 current_function_has_nonlocal_goto is true. This is relatively
4724 common for C++ programs that use exceptions. To reproduce,
4725 return NO_REGS and compile libstdc++. */
4726 if (GET_CODE (x) == MEM)
4727 return GR_REGS;
4728
4729 /* This can happen when we take a BImode subreg of a DImode value,
4730 and that DImode value winds up in some non-GR register. */
4731 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
4732 return GR_REGS;
4733 break;
4734
4735 default:
4736 break;
4737 }
4738
4739 return NO_REGS;
4740 }
4741
4742 \f
4743 /* Emit text to declare externally defined variables and functions, because
4744 the Intel assembler does not support undefined externals. */
4745
4746 void
4747 ia64_asm_output_external (FILE *file, tree decl, const char *name)
4748 {
4749 int save_referenced;
4750
4751 /* GNU as does not need anything here, but the HP linker does need
4752 something for external functions. */
4753
4754 if (TARGET_GNU_AS
4755 && (!TARGET_HPUX_LD
4756 || TREE_CODE (decl) != FUNCTION_DECL
4757 || strstr (name, "__builtin_") == name))
4758 return;
4759
4760 /* ??? The Intel assembler creates a reference that needs to be satisfied by
4761 the linker when we do this, so we need to be careful not to do this for
4762 builtin functions which have no library equivalent. Unfortunately, we
4763 can't tell here whether or not a function will actually be called by
4764 expand_expr, so we pull in library functions even if we may not need
4765 them later. */
4766 if (! strcmp (name, "__builtin_next_arg")
4767 || ! strcmp (name, "alloca")
4768 || ! strcmp (name, "__builtin_constant_p")
4769 || ! strcmp (name, "__builtin_args_info"))
4770 return;
4771
4772 if (TARGET_HPUX_LD)
4773 ia64_hpux_add_extern_decl (decl);
4774 else
4775 {
4776 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
4777 restore it. */
4778 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
4779 if (TREE_CODE (decl) == FUNCTION_DECL)
4780 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
4781 (*targetm.asm_out.globalize_label) (file, name);
4782 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
4783 }
4784 }
4785 \f
4786 /* Parse the -mfixed-range= option string. */
4787
4788 static void
4789 fix_range (const char *const_str)
4790 {
4791 int i, first, last;
4792 char *str, *dash, *comma;
4793
4794 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4795 REG2 are either register names or register numbers. The effect
4796 of this option is to mark the registers in the range from REG1 to
4797 REG2 as ``fixed'' so they won't be used by the compiler. This is
4798 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
4799
4800 i = strlen (const_str);
4801 str = (char *) alloca (i + 1);
4802 memcpy (str, const_str, i + 1);
4803
4804 while (1)
4805 {
4806 dash = strchr (str, '-');
4807 if (!dash)
4808 {
4809 warning (0, "value of -mfixed-range must have form REG1-REG2");
4810 return;
4811 }
4812 *dash = '\0';
4813
4814 comma = strchr (dash + 1, ',');
4815 if (comma)
4816 *comma = '\0';
4817
4818 first = decode_reg_name (str);
4819 if (first < 0)
4820 {
4821 warning (0, "unknown register name: %s", str);
4822 return;
4823 }
4824
4825 last = decode_reg_name (dash + 1);
4826 if (last < 0)
4827 {
4828 warning (0, "unknown register name: %s", dash + 1);
4829 return;
4830 }
4831
4832 *dash = '-';
4833
4834 if (first > last)
4835 {
4836 warning (0, "%s-%s is an empty range", str, dash + 1);
4837 return;
4838 }
4839
4840 for (i = first; i <= last; ++i)
4841 fixed_regs[i] = call_used_regs[i] = 1;
4842
4843 if (!comma)
4844 break;
4845
4846 *comma = ',';
4847 str = comma + 1;
4848 }
4849 }
4850
4851 /* Implement TARGET_HANDLE_OPTION. */
4852
4853 static bool
4854 ia64_handle_option (size_t code, const char *arg, int value)
4855 {
4856 switch (code)
4857 {
4858 case OPT_mfixed_range_:
4859 fix_range (arg);
4860 return true;
4861
4862 case OPT_mtls_size_:
4863 if (value != 14 && value != 22 && value != 64)
4864 error ("bad value %<%s%> for -mtls-size= switch", arg);
4865 return true;
4866
4867 case OPT_mtune_:
4868 {
4869 static struct pta
4870 {
4871 const char *name; /* processor name or nickname. */
4872 enum processor_type processor;
4873 }
4874 const processor_alias_table[] =
4875 {
4876 {"itanium", PROCESSOR_ITANIUM},
4877 {"itanium1", PROCESSOR_ITANIUM},
4878 {"merced", PROCESSOR_ITANIUM},
4879 {"itanium2", PROCESSOR_ITANIUM2},
4880 {"mckinley", PROCESSOR_ITANIUM2},
4881 };
4882 int const pta_size = ARRAY_SIZE (processor_alias_table);
4883 int i;
4884
4885 for (i = 0; i < pta_size; i++)
4886 if (!strcmp (arg, processor_alias_table[i].name))
4887 {
4888 ia64_tune = processor_alias_table[i].processor;
4889 break;
4890 }
4891 if (i == pta_size)
4892 error ("bad value %<%s%> for -mtune= switch", arg);
4893 return true;
4894 }
4895
4896 default:
4897 return true;
4898 }
4899 }
4900
4901 /* Implement OVERRIDE_OPTIONS. */
4902
4903 void
4904 ia64_override_options (void)
4905 {
4906 if (TARGET_AUTO_PIC)
4907 target_flags |= MASK_CONST_GP;
4908
4909 if (TARGET_INLINE_SQRT == INL_MIN_LAT)
4910 {
4911 warning (0, "not yet implemented: latency-optimized inline square root");
4912 TARGET_INLINE_SQRT = INL_MAX_THR;
4913 }
4914
4915 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
4916 flag_schedule_insns_after_reload = 0;
4917
4918 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
4919
4920 init_machine_status = ia64_init_machine_status;
4921 }
4922
4923 static struct machine_function *
4924 ia64_init_machine_status (void)
4925 {
4926 return ggc_alloc_cleared (sizeof (struct machine_function));
4927 }
4928 \f
4929 static enum attr_itanium_class ia64_safe_itanium_class (rtx);
4930 static enum attr_type ia64_safe_type (rtx);
4931
4932 static enum attr_itanium_class
4933 ia64_safe_itanium_class (rtx insn)
4934 {
4935 if (recog_memoized (insn) >= 0)
4936 return get_attr_itanium_class (insn);
4937 else
4938 return ITANIUM_CLASS_UNKNOWN;
4939 }
4940
4941 static enum attr_type
4942 ia64_safe_type (rtx insn)
4943 {
4944 if (recog_memoized (insn) >= 0)
4945 return get_attr_type (insn);
4946 else
4947 return TYPE_UNKNOWN;
4948 }
4949 \f
4950 /* The following collection of routines emit instruction group stop bits as
4951 necessary to avoid dependencies. */
4952
4953 /* Need to track some additional registers as far as serialization is
4954 concerned so we can properly handle br.call and br.ret. We could
4955 make these registers visible to gcc, but since these registers are
4956 never explicitly used in gcc generated code, it seems wasteful to
4957 do so (plus it would make the call and return patterns needlessly
4958 complex). */
4959 #define REG_RP (BR_REG (0))
4960 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
4961 /* This is used for volatile asms which may require a stop bit immediately
4962 before and after them. */
4963 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
4964 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
4965 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
4966
4967 /* For each register, we keep track of how it has been written in the
4968 current instruction group.
4969
4970 If a register is written unconditionally (no qualifying predicate),
4971 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4972
4973 If a register is written if its qualifying predicate P is true, we
4974 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
4975 may be written again by the complement of P (P^1) and when this happens,
4976 WRITE_COUNT gets set to 2.
4977
4978 The result of this is that whenever an insn attempts to write a register
4979 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
4980
4981 If a predicate register is written by a floating-point insn, we set
4982 WRITTEN_BY_FP to true.
4983
4984 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4985 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
4986
4987 struct reg_write_state
4988 {
4989 unsigned int write_count : 2;
4990 unsigned int first_pred : 16;
4991 unsigned int written_by_fp : 1;
4992 unsigned int written_by_and : 1;
4993 unsigned int written_by_or : 1;
4994 };
4995
4996 /* Cumulative info for the current instruction group. */
4997 struct reg_write_state rws_sum[NUM_REGS];
4998 /* Info for the current instruction. This gets copied to rws_sum after a
4999 stop bit is emitted. */
5000 struct reg_write_state rws_insn[NUM_REGS];
5001
5002 /* Indicates whether this is the first instruction after a stop bit,
5003 in which case we don't need another stop bit. Without this,
5004 ia64_variable_issue will die when scheduling an alloc. */
5005 static int first_instruction;
5006
5007 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
5008 RTL for one instruction. */
5009 struct reg_flags
5010 {
5011 unsigned int is_write : 1; /* Is register being written? */
5012 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
5013 unsigned int is_branch : 1; /* Is register used as part of a branch? */
5014 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
5015 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
5016 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
5017 };
5018
5019 static void rws_update (struct reg_write_state *, int, struct reg_flags, int);
5020 static int rws_access_regno (int, struct reg_flags, int);
5021 static int rws_access_reg (rtx, struct reg_flags, int);
5022 static void update_set_flags (rtx, struct reg_flags *);
5023 static int set_src_needs_barrier (rtx, struct reg_flags, int);
5024 static int rtx_needs_barrier (rtx, struct reg_flags, int);
5025 static void init_insn_group_barriers (void);
5026 static int group_barrier_needed (rtx);
5027 static int safe_group_barrier_needed (rtx);
5028
5029 /* Update *RWS for REGNO, which is being written by the current instruction,
5030 with predicate PRED, and associated register flags in FLAGS. */
5031
5032 static void
5033 rws_update (struct reg_write_state *rws, int regno, struct reg_flags flags, int pred)
5034 {
5035 if (pred)
5036 rws[regno].write_count++;
5037 else
5038 rws[regno].write_count = 2;
5039 rws[regno].written_by_fp |= flags.is_fp;
5040 /* ??? Not tracking and/or across differing predicates. */
5041 rws[regno].written_by_and = flags.is_and;
5042 rws[regno].written_by_or = flags.is_or;
5043 rws[regno].first_pred = pred;
5044 }
5045
5046 /* Handle an access to register REGNO of type FLAGS using predicate register
5047 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
5048 a dependency with an earlier instruction in the same group. */
5049
5050 static int
5051 rws_access_regno (int regno, struct reg_flags flags, int pred)
5052 {
5053 int need_barrier = 0;
5054
5055 gcc_assert (regno < NUM_REGS);
5056
5057 if (! PR_REGNO_P (regno))
5058 flags.is_and = flags.is_or = 0;
5059
5060 if (flags.is_write)
5061 {
5062 int write_count;
5063
5064 /* One insn writes same reg multiple times? */
5065 gcc_assert (!rws_insn[regno].write_count);
5066
5067 /* Update info for current instruction. */
5068 rws_update (rws_insn, regno, flags, pred);
5069 write_count = rws_sum[regno].write_count;
5070
5071 switch (write_count)
5072 {
5073 case 0:
5074 /* The register has not been written yet. */
5075 rws_update (rws_sum, regno, flags, pred);
5076 break;
5077
5078 case 1:
5079 /* The register has been written via a predicate. If this is
5080 not a complementary predicate, then we need a barrier. */
5081 /* ??? This assumes that P and P+1 are always complementary
5082 predicates for P even. */
5083 if (flags.is_and && rws_sum[regno].written_by_and)
5084 ;
5085 else if (flags.is_or && rws_sum[regno].written_by_or)
5086 ;
5087 else if ((rws_sum[regno].first_pred ^ 1) != pred)
5088 need_barrier = 1;
5089 rws_update (rws_sum, regno, flags, pred);
5090 break;
5091
5092 case 2:
5093 /* The register has been unconditionally written already. We
5094 need a barrier. */
5095 if (flags.is_and && rws_sum[regno].written_by_and)
5096 ;
5097 else if (flags.is_or && rws_sum[regno].written_by_or)
5098 ;
5099 else
5100 need_barrier = 1;
5101 rws_sum[regno].written_by_and = flags.is_and;
5102 rws_sum[regno].written_by_or = flags.is_or;
5103 break;
5104
5105 default:
5106 gcc_unreachable ();
5107 }
5108 }
5109 else
5110 {
5111 if (flags.is_branch)
5112 {
5113 /* Branches have several RAW exceptions that allow to avoid
5114 barriers. */
5115
5116 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
5117 /* RAW dependencies on branch regs are permissible as long
5118 as the writer is a non-branch instruction. Since we
5119 never generate code that uses a branch register written
5120 by a branch instruction, handling this case is
5121 easy. */
5122 return 0;
5123
5124 if (REGNO_REG_CLASS (regno) == PR_REGS
5125 && ! rws_sum[regno].written_by_fp)
5126 /* The predicates of a branch are available within the
5127 same insn group as long as the predicate was written by
5128 something other than a floating-point instruction. */
5129 return 0;
5130 }
5131
5132 if (flags.is_and && rws_sum[regno].written_by_and)
5133 return 0;
5134 if (flags.is_or && rws_sum[regno].written_by_or)
5135 return 0;
5136
5137 switch (rws_sum[regno].write_count)
5138 {
5139 case 0:
5140 /* The register has not been written yet. */
5141 break;
5142
5143 case 1:
5144 /* The register has been written via a predicate. If this is
5145 not a complementary predicate, then we need a barrier. */
5146 /* ??? This assumes that P and P+1 are always complementary
5147 predicates for P even. */
5148 if ((rws_sum[regno].first_pred ^ 1) != pred)
5149 need_barrier = 1;
5150 break;
5151
5152 case 2:
5153 /* The register has been unconditionally written already. We
5154 need a barrier. */
5155 need_barrier = 1;
5156 break;
5157
5158 default:
5159 gcc_unreachable ();
5160 }
5161 }
5162
5163 return need_barrier;
5164 }
5165
5166 static int
5167 rws_access_reg (rtx reg, struct reg_flags flags, int pred)
5168 {
5169 int regno = REGNO (reg);
5170 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
5171
5172 if (n == 1)
5173 return rws_access_regno (regno, flags, pred);
5174 else
5175 {
5176 int need_barrier = 0;
5177 while (--n >= 0)
5178 need_barrier |= rws_access_regno (regno + n, flags, pred);
5179 return need_barrier;
5180 }
5181 }
5182
5183 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
5184 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
5185
5186 static void
5187 update_set_flags (rtx x, struct reg_flags *pflags)
5188 {
5189 rtx src = SET_SRC (x);
5190
5191 switch (GET_CODE (src))
5192 {
5193 case CALL:
5194 return;
5195
5196 case IF_THEN_ELSE:
5197 /* There are three cases here:
5198 (1) The destination is (pc), in which case this is a branch,
5199 nothing here applies.
5200 (2) The destination is ar.lc, in which case this is a
5201 doloop_end_internal,
5202 (3) The destination is an fp register, in which case this is
5203 an fselect instruction.
5204 In all cases, nothing we do in this function applies. */
5205 return;
5206
5207 default:
5208 if (COMPARISON_P (src)
5209 && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
5210 /* Set pflags->is_fp to 1 so that we know we're dealing
5211 with a floating point comparison when processing the
5212 destination of the SET. */
5213 pflags->is_fp = 1;
5214
5215 /* Discover if this is a parallel comparison. We only handle
5216 and.orcm and or.andcm at present, since we must retain a
5217 strict inverse on the predicate pair. */
5218 else if (GET_CODE (src) == AND)
5219 pflags->is_and = 1;
5220 else if (GET_CODE (src) == IOR)
5221 pflags->is_or = 1;
5222
5223 break;
5224 }
5225 }
5226
5227 /* Subroutine of rtx_needs_barrier; this function determines whether the
5228 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
5229 are as in rtx_needs_barrier. COND is an rtx that holds the condition
5230 for this insn. */
5231
5232 static int
5233 set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
5234 {
5235 int need_barrier = 0;
5236 rtx dst;
5237 rtx src = SET_SRC (x);
5238
5239 if (GET_CODE (src) == CALL)
5240 /* We don't need to worry about the result registers that
5241 get written by subroutine call. */
5242 return rtx_needs_barrier (src, flags, pred);
5243 else if (SET_DEST (x) == pc_rtx)
5244 {
5245 /* X is a conditional branch. */
5246 /* ??? This seems redundant, as the caller sets this bit for
5247 all JUMP_INSNs. */
5248 flags.is_branch = 1;
5249 return rtx_needs_barrier (src, flags, pred);
5250 }
5251
5252 need_barrier = rtx_needs_barrier (src, flags, pred);
5253
5254 dst = SET_DEST (x);
5255 if (GET_CODE (dst) == ZERO_EXTRACT)
5256 {
5257 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
5258 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
5259 dst = XEXP (dst, 0);
5260 }
5261 return need_barrier;
5262 }
5263
5264 /* Handle an access to rtx X of type FLAGS using predicate register
5265 PRED. Return 1 if this access creates a dependency with an earlier
5266 instruction in the same group. */
5267
5268 static int
5269 rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
5270 {
5271 int i, j;
5272 int is_complemented = 0;
5273 int need_barrier = 0;
5274 const char *format_ptr;
5275 struct reg_flags new_flags;
5276 rtx cond;
5277
5278 if (! x)
5279 return 0;
5280
5281 new_flags = flags;
5282
5283 switch (GET_CODE (x))
5284 {
5285 case SET:
5286 update_set_flags (x, &new_flags);
5287 need_barrier = set_src_needs_barrier (x, new_flags, pred);
5288 if (GET_CODE (SET_SRC (x)) != CALL)
5289 {
5290 new_flags.is_write = 1;
5291 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
5292 }
5293 break;
5294
5295 case CALL:
5296 new_flags.is_write = 0;
5297 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5298
5299 /* Avoid multiple register writes, in case this is a pattern with
5300 multiple CALL rtx. This avoids a failure in rws_access_reg. */
5301 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
5302 {
5303 new_flags.is_write = 1;
5304 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
5305 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
5306 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5307 }
5308 break;
5309
5310 case COND_EXEC:
5311 /* X is a predicated instruction. */
5312
5313 cond = COND_EXEC_TEST (x);
5314 gcc_assert (!pred);
5315 need_barrier = rtx_needs_barrier (cond, flags, 0);
5316
5317 if (GET_CODE (cond) == EQ)
5318 is_complemented = 1;
5319 cond = XEXP (cond, 0);
5320 gcc_assert (GET_CODE (cond) == REG
5321 && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
5322 pred = REGNO (cond);
5323 if (is_complemented)
5324 ++pred;
5325
5326 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
5327 return need_barrier;
5328
5329 case CLOBBER:
5330 case USE:
5331 /* Clobber & use are for earlier compiler-phases only. */
5332 break;
5333
5334 case ASM_OPERANDS:
5335 case ASM_INPUT:
5336 /* We always emit stop bits for traditional asms. We emit stop bits
5337 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
5338 if (GET_CODE (x) != ASM_OPERANDS
5339 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
5340 {
5341 /* Avoid writing the register multiple times if we have multiple
5342 asm outputs. This avoids a failure in rws_access_reg. */
5343 if (! rws_insn[REG_VOLATILE].write_count)
5344 {
5345 new_flags.is_write = 1;
5346 rws_access_regno (REG_VOLATILE, new_flags, pred);
5347 }
5348 return 1;
5349 }
5350
5351 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
5352 We cannot just fall through here since then we would be confused
5353 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
5354 traditional asms unlike their normal usage. */
5355
5356 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
5357 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
5358 need_barrier = 1;
5359 break;
5360
5361 case PARALLEL:
5362 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5363 {
5364 rtx pat = XVECEXP (x, 0, i);
5365 switch (GET_CODE (pat))
5366 {
5367 case SET:
5368 update_set_flags (pat, &new_flags);
5369 need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
5370 break;
5371
5372 case USE:
5373 case CALL:
5374 case ASM_OPERANDS:
5375 need_barrier |= rtx_needs_barrier (pat, flags, pred);
5376 break;
5377
5378 case CLOBBER:
5379 case RETURN:
5380 break;
5381
5382 default:
5383 gcc_unreachable ();
5384 }
5385 }
5386 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5387 {
5388 rtx pat = XVECEXP (x, 0, i);
5389 if (GET_CODE (pat) == SET)
5390 {
5391 if (GET_CODE (SET_SRC (pat)) != CALL)
5392 {
5393 new_flags.is_write = 1;
5394 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
5395 pred);
5396 }
5397 }
5398 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
5399 need_barrier |= rtx_needs_barrier (pat, flags, pred);
5400 }
5401 break;
5402
5403 case SUBREG:
5404 need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
5405 break;
5406 case REG:
5407 if (REGNO (x) == AR_UNAT_REGNUM)
5408 {
5409 for (i = 0; i < 64; ++i)
5410 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
5411 }
5412 else
5413 need_barrier = rws_access_reg (x, flags, pred);
5414 break;
5415
5416 case MEM:
5417 /* Find the regs used in memory address computation. */
5418 new_flags.is_write = 0;
5419 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5420 break;
5421
5422 case CONST_INT: case CONST_DOUBLE: case CONST_VECTOR:
5423 case SYMBOL_REF: case LABEL_REF: case CONST:
5424 break;
5425
5426 /* Operators with side-effects. */
5427 case POST_INC: case POST_DEC:
5428 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
5429
5430 new_flags.is_write = 0;
5431 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
5432 new_flags.is_write = 1;
5433 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5434 break;
5435
5436 case POST_MODIFY:
5437 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
5438
5439 new_flags.is_write = 0;
5440 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
5441 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5442 new_flags.is_write = 1;
5443 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5444 break;
5445
5446 /* Handle common unary and binary ops for efficiency. */
5447 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
5448 case MOD: case UDIV: case UMOD: case AND: case IOR:
5449 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
5450 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
5451 case NE: case EQ: case GE: case GT: case LE:
5452 case LT: case GEU: case GTU: case LEU: case LTU:
5453 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5454 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5455 break;
5456
5457 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
5458 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
5459 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
5460 case SQRT: case FFS: case POPCOUNT:
5461 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
5462 break;
5463
5464 case VEC_SELECT:
5465 /* VEC_SELECT's second argument is a PARALLEL with integers that
5466 describe the elements selected. On ia64, those integers are
5467 always constants. Avoid walking the PARALLEL so that we don't
5468 get confused with "normal" parallels and then die. */
5469 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
5470 break;
5471
5472 case UNSPEC:
5473 switch (XINT (x, 1))
5474 {
5475 case UNSPEC_LTOFF_DTPMOD:
5476 case UNSPEC_LTOFF_DTPREL:
5477 case UNSPEC_DTPREL:
5478 case UNSPEC_LTOFF_TPREL:
5479 case UNSPEC_TPREL:
5480 case UNSPEC_PRED_REL_MUTEX:
5481 case UNSPEC_PIC_CALL:
5482 case UNSPEC_MF:
5483 case UNSPEC_FETCHADD_ACQ:
5484 case UNSPEC_BSP_VALUE:
5485 case UNSPEC_FLUSHRS:
5486 case UNSPEC_BUNDLE_SELECTOR:
5487 break;
5488
5489 case UNSPEC_GR_SPILL:
5490 case UNSPEC_GR_RESTORE:
5491 {
5492 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
5493 HOST_WIDE_INT bit = (offset >> 3) & 63;
5494
5495 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5496 new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
5497 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
5498 new_flags, pred);
5499 break;
5500 }
5501
5502 case UNSPEC_FR_SPILL:
5503 case UNSPEC_FR_RESTORE:
5504 case UNSPEC_GETF_EXP:
5505 case UNSPEC_SETF_EXP:
5506 case UNSPEC_ADDP4:
5507 case UNSPEC_FR_SQRT_RECIP_APPROX:
5508 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5509 break;
5510
5511 case UNSPEC_FR_RECIP_APPROX:
5512 case UNSPEC_SHRP:
5513 case UNSPEC_COPYSIGN:
5514 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5515 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5516 break;
5517
5518 case UNSPEC_CMPXCHG_ACQ:
5519 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5520 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
5521 break;
5522
5523 default:
5524 gcc_unreachable ();
5525 }
5526 break;
5527
5528 case UNSPEC_VOLATILE:
5529 switch (XINT (x, 1))
5530 {
5531 case UNSPECV_ALLOC:
5532 /* Alloc must always be the first instruction of a group.
5533 We force this by always returning true. */
5534 /* ??? We might get better scheduling if we explicitly check for
5535 input/local/output register dependencies, and modify the
5536 scheduler so that alloc is always reordered to the start of
5537 the current group. We could then eliminate all of the
5538 first_instruction code. */
5539 rws_access_regno (AR_PFS_REGNUM, flags, pred);
5540
5541 new_flags.is_write = 1;
5542 rws_access_regno (REG_AR_CFM, new_flags, pred);
5543 return 1;
5544
5545 case UNSPECV_SET_BSP:
5546 need_barrier = 1;
5547 break;
5548
5549 case UNSPECV_BLOCKAGE:
5550 case UNSPECV_INSN_GROUP_BARRIER:
5551 case UNSPECV_BREAK:
5552 case UNSPECV_PSAC_ALL:
5553 case UNSPECV_PSAC_NORMAL:
5554 return 0;
5555
5556 default:
5557 gcc_unreachable ();
5558 }
5559 break;
5560
5561 case RETURN:
5562 new_flags.is_write = 0;
5563 need_barrier = rws_access_regno (REG_RP, flags, pred);
5564 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
5565
5566 new_flags.is_write = 1;
5567 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5568 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5569 break;
5570
5571 default:
5572 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
5573 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5574 switch (format_ptr[i])
5575 {
5576 case '0': /* unused field */
5577 case 'i': /* integer */
5578 case 'n': /* note */
5579 case 'w': /* wide integer */
5580 case 's': /* pointer to string */
5581 case 'S': /* optional pointer to string */
5582 break;
5583
5584 case 'e':
5585 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
5586 need_barrier = 1;
5587 break;
5588
5589 case 'E':
5590 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
5591 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
5592 need_barrier = 1;
5593 break;
5594
5595 default:
5596 gcc_unreachable ();
5597 }
5598 break;
5599 }
5600 return need_barrier;
5601 }
5602
5603 /* Clear out the state for group_barrier_needed at the start of a
5604 sequence of insns. */
5605
5606 static void
5607 init_insn_group_barriers (void)
5608 {
5609 memset (rws_sum, 0, sizeof (rws_sum));
5610 first_instruction = 1;
5611 }
5612
5613 /* Given the current state, determine whether a group barrier (a stop bit) is
5614 necessary before INSN. Return nonzero if so. This modifies the state to
5615 include the effects of INSN as a side-effect. */
5616
5617 static int
5618 group_barrier_needed (rtx insn)
5619 {
5620 rtx pat;
5621 int need_barrier = 0;
5622 struct reg_flags flags;
5623
5624 memset (&flags, 0, sizeof (flags));
5625 switch (GET_CODE (insn))
5626 {
5627 case NOTE:
5628 break;
5629
5630 case BARRIER:
5631 /* A barrier doesn't imply an instruction group boundary. */
5632 break;
5633
5634 case CODE_LABEL:
5635 memset (rws_insn, 0, sizeof (rws_insn));
5636 return 1;
5637
5638 case CALL_INSN:
5639 flags.is_branch = 1;
5640 flags.is_sibcall = SIBLING_CALL_P (insn);
5641 memset (rws_insn, 0, sizeof (rws_insn));
5642
5643 /* Don't bundle a call following another call. */
5644 if ((pat = prev_active_insn (insn))
5645 && GET_CODE (pat) == CALL_INSN)
5646 {
5647 need_barrier = 1;
5648 break;
5649 }
5650
5651 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
5652 break;
5653
5654 case JUMP_INSN:
5655 flags.is_branch = 1;
5656
5657 /* Don't bundle a jump following a call. */
5658 if ((pat = prev_active_insn (insn))
5659 && GET_CODE (pat) == CALL_INSN)
5660 {
5661 need_barrier = 1;
5662 break;
5663 }
5664 /* FALLTHRU */
5665
5666 case INSN:
5667 if (GET_CODE (PATTERN (insn)) == USE
5668 || GET_CODE (PATTERN (insn)) == CLOBBER)
5669 /* Don't care about USE and CLOBBER "insns"---those are used to
5670 indicate to the optimizer that it shouldn't get rid of
5671 certain operations. */
5672 break;
5673
5674 pat = PATTERN (insn);
5675
5676 /* Ug. Hack hacks hacked elsewhere. */
5677 switch (recog_memoized (insn))
5678 {
5679 /* We play dependency tricks with the epilogue in order
5680 to get proper schedules. Undo this for dv analysis. */
5681 case CODE_FOR_epilogue_deallocate_stack:
5682 case CODE_FOR_prologue_allocate_stack:
5683 pat = XVECEXP (pat, 0, 0);
5684 break;
5685
5686 /* The pattern we use for br.cloop confuses the code above.
5687 The second element of the vector is representative. */
5688 case CODE_FOR_doloop_end_internal:
5689 pat = XVECEXP (pat, 0, 1);
5690 break;
5691
5692 /* Doesn't generate code. */
5693 case CODE_FOR_pred_rel_mutex:
5694 case CODE_FOR_prologue_use:
5695 return 0;
5696
5697 default:
5698 break;
5699 }
5700
5701 memset (rws_insn, 0, sizeof (rws_insn));
5702 need_barrier = rtx_needs_barrier (pat, flags, 0);
5703
5704 /* Check to see if the previous instruction was a volatile
5705 asm. */
5706 if (! need_barrier)
5707 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
5708 break;
5709
5710 default:
5711 gcc_unreachable ();
5712 }
5713
5714 if (first_instruction && INSN_P (insn)
5715 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
5716 && GET_CODE (PATTERN (insn)) != USE
5717 && GET_CODE (PATTERN (insn)) != CLOBBER)
5718 {
5719 need_barrier = 0;
5720 first_instruction = 0;
5721 }
5722
5723 return need_barrier;
5724 }
5725
5726 /* Like group_barrier_needed, but do not clobber the current state. */
5727
5728 static int
5729 safe_group_barrier_needed (rtx insn)
5730 {
5731 struct reg_write_state rws_saved[NUM_REGS];
5732 int saved_first_instruction;
5733 int t;
5734
5735 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
5736 saved_first_instruction = first_instruction;
5737
5738 t = group_barrier_needed (insn);
5739
5740 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
5741 first_instruction = saved_first_instruction;
5742
5743 return t;
5744 }
5745
5746 /* Scan the current function and insert stop bits as necessary to
5747 eliminate dependencies. This function assumes that a final
5748 instruction scheduling pass has been run which has already
5749 inserted most of the necessary stop bits. This function only
5750 inserts new ones at basic block boundaries, since these are
5751 invisible to the scheduler. */
5752
5753 static void
5754 emit_insn_group_barriers (FILE *dump)
5755 {
5756 rtx insn;
5757 rtx last_label = 0;
5758 int insns_since_last_label = 0;
5759
5760 init_insn_group_barriers ();
5761
5762 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5763 {
5764 if (GET_CODE (insn) == CODE_LABEL)
5765 {
5766 if (insns_since_last_label)
5767 last_label = insn;
5768 insns_since_last_label = 0;
5769 }
5770 else if (GET_CODE (insn) == NOTE
5771 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
5772 {
5773 if (insns_since_last_label)
5774 last_label = insn;
5775 insns_since_last_label = 0;
5776 }
5777 else if (GET_CODE (insn) == INSN
5778 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
5779 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
5780 {
5781 init_insn_group_barriers ();
5782 last_label = 0;
5783 }
5784 else if (INSN_P (insn))
5785 {
5786 insns_since_last_label = 1;
5787
5788 if (group_barrier_needed (insn))
5789 {
5790 if (last_label)
5791 {
5792 if (dump)
5793 fprintf (dump, "Emitting stop before label %d\n",
5794 INSN_UID (last_label));
5795 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
5796 insn = last_label;
5797
5798 init_insn_group_barriers ();
5799 last_label = 0;
5800 }
5801 }
5802 }
5803 }
5804 }
5805
5806 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
5807 This function has to emit all necessary group barriers. */
5808
5809 static void
5810 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
5811 {
5812 rtx insn;
5813
5814 init_insn_group_barriers ();
5815
5816 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5817 {
5818 if (GET_CODE (insn) == BARRIER)
5819 {
5820 rtx last = prev_active_insn (insn);
5821
5822 if (! last)
5823 continue;
5824 if (GET_CODE (last) == JUMP_INSN
5825 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
5826 last = prev_active_insn (last);
5827 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
5828 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
5829
5830 init_insn_group_barriers ();
5831 }
5832 else if (INSN_P (insn))
5833 {
5834 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
5835 init_insn_group_barriers ();
5836 else if (group_barrier_needed (insn))
5837 {
5838 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5839 init_insn_group_barriers ();
5840 group_barrier_needed (insn);
5841 }
5842 }
5843 }
5844 }
5845
5846 \f
5847
5848 /* Instruction scheduling support. */
5849
5850 #define NR_BUNDLES 10
5851
5852 /* A list of names of all available bundles. */
5853
5854 static const char *bundle_name [NR_BUNDLES] =
5855 {
5856 ".mii",
5857 ".mmi",
5858 ".mfi",
5859 ".mmf",
5860 #if NR_BUNDLES == 10
5861 ".bbb",
5862 ".mbb",
5863 #endif
5864 ".mib",
5865 ".mmb",
5866 ".mfb",
5867 ".mlx"
5868 };
5869
5870 /* Nonzero if we should insert stop bits into the schedule. */
5871
5872 int ia64_final_schedule = 0;
5873
5874 /* Codes of the corresponding queried units: */
5875
5876 static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
5877 static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
5878
5879 static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
5880 static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
5881
5882 static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
5883
5884 /* The following variable value is an insn group barrier. */
5885
5886 static rtx dfa_stop_insn;
5887
5888 /* The following variable value is the last issued insn. */
5889
5890 static rtx last_scheduled_insn;
5891
5892 /* The following variable value is size of the DFA state. */
5893
5894 static size_t dfa_state_size;
5895
5896 /* The following variable value is pointer to a DFA state used as
5897 temporary variable. */
5898
5899 static state_t temp_dfa_state = NULL;
5900
5901 /* The following variable value is DFA state after issuing the last
5902 insn. */
5903
5904 static state_t prev_cycle_state = NULL;
5905
5906 /* The following array element values are TRUE if the corresponding
5907 insn requires to add stop bits before it. */
5908
5909 static char *stops_p;
5910
5911 /* The following variable is used to set up the mentioned above array. */
5912
5913 static int stop_before_p = 0;
5914
5915 /* The following variable value is length of the arrays `clocks' and
5916 `add_cycles'. */
5917
5918 static int clocks_length;
5919
5920 /* The following array element values are cycles on which the
5921 corresponding insn will be issued. The array is used only for
5922 Itanium1. */
5923
5924 static int *clocks;
5925
5926 /* The following array element values are numbers of cycles should be
5927 added to improve insn scheduling for MM_insns for Itanium1. */
5928
5929 static int *add_cycles;
5930
5931 static rtx ia64_single_set (rtx);
5932 static void ia64_emit_insn_before (rtx, rtx);
5933
5934 /* Map a bundle number to its pseudo-op. */
5935
5936 const char *
5937 get_bundle_name (int b)
5938 {
5939 return bundle_name[b];
5940 }
5941
5942
5943 /* Return the maximum number of instructions a cpu can issue. */
5944
5945 static int
5946 ia64_issue_rate (void)
5947 {
5948 return 6;
5949 }
5950
5951 /* Helper function - like single_set, but look inside COND_EXEC. */
5952
5953 static rtx
5954 ia64_single_set (rtx insn)
5955 {
5956 rtx x = PATTERN (insn), ret;
5957 if (GET_CODE (x) == COND_EXEC)
5958 x = COND_EXEC_CODE (x);
5959 if (GET_CODE (x) == SET)
5960 return x;
5961
5962 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
5963 Although they are not classical single set, the second set is there just
5964 to protect it from moving past FP-relative stack accesses. */
5965 switch (recog_memoized (insn))
5966 {
5967 case CODE_FOR_prologue_allocate_stack:
5968 case CODE_FOR_epilogue_deallocate_stack:
5969 ret = XVECEXP (x, 0, 0);
5970 break;
5971
5972 default:
5973 ret = single_set_2 (insn, x);
5974 break;
5975 }
5976
5977 return ret;
5978 }
5979
5980 /* Adjust the cost of a scheduling dependency. Return the new cost of
5981 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5982
5983 static int
5984 ia64_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
5985 {
5986 enum attr_itanium_class dep_class;
5987 enum attr_itanium_class insn_class;
5988
5989 if (REG_NOTE_KIND (link) != REG_DEP_OUTPUT)
5990 return cost;
5991
5992 insn_class = ia64_safe_itanium_class (insn);
5993 dep_class = ia64_safe_itanium_class (dep_insn);
5994 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
5995 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
5996 return 0;
5997
5998 return cost;
5999 }
6000
6001 /* Like emit_insn_before, but skip cycle_display notes.
6002 ??? When cycle display notes are implemented, update this. */
6003
6004 static void
6005 ia64_emit_insn_before (rtx insn, rtx before)
6006 {
6007 emit_insn_before (insn, before);
6008 }
6009
6010 /* The following function marks insns who produce addresses for load
6011 and store insns. Such insns will be placed into M slots because it
6012 decrease latency time for Itanium1 (see function
6013 `ia64_produce_address_p' and the DFA descriptions). */
6014
6015 static void
6016 ia64_dependencies_evaluation_hook (rtx head, rtx tail)
6017 {
6018 rtx insn, link, next, next_tail;
6019
6020 /* Before reload, which_alternative is not set, which means that
6021 ia64_safe_itanium_class will produce wrong results for (at least)
6022 move instructions. */
6023 if (!reload_completed)
6024 return;
6025
6026 next_tail = NEXT_INSN (tail);
6027 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6028 if (INSN_P (insn))
6029 insn->call = 0;
6030 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6031 if (INSN_P (insn)
6032 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
6033 {
6034 for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1))
6035 {
6036 if (REG_NOTE_KIND (link) != REG_DEP_TRUE)
6037 continue;
6038 next = XEXP (link, 0);
6039 if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_ST
6040 || ia64_safe_itanium_class (next) == ITANIUM_CLASS_STF)
6041 && ia64_st_address_bypass_p (insn, next))
6042 break;
6043 else if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_LD
6044 || ia64_safe_itanium_class (next)
6045 == ITANIUM_CLASS_FLD)
6046 && ia64_ld_address_bypass_p (insn, next))
6047 break;
6048 }
6049 insn->call = link != 0;
6050 }
6051 }
6052
6053 /* We're beginning a new block. Initialize data structures as necessary. */
6054
6055 static void
6056 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
6057 int sched_verbose ATTRIBUTE_UNUSED,
6058 int max_ready ATTRIBUTE_UNUSED)
6059 {
6060 #ifdef ENABLE_CHECKING
6061 rtx insn;
6062
6063 if (reload_completed)
6064 for (insn = NEXT_INSN (current_sched_info->prev_head);
6065 insn != current_sched_info->next_tail;
6066 insn = NEXT_INSN (insn))
6067 gcc_assert (!SCHED_GROUP_P (insn));
6068 #endif
6069 last_scheduled_insn = NULL_RTX;
6070 init_insn_group_barriers ();
6071 }
6072
6073 /* We are about to being issuing insns for this clock cycle.
6074 Override the default sort algorithm to better slot instructions. */
6075
6076 static int
6077 ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
6078 int *pn_ready, int clock_var ATTRIBUTE_UNUSED,
6079 int reorder_type)
6080 {
6081 int n_asms;
6082 int n_ready = *pn_ready;
6083 rtx *e_ready = ready + n_ready;
6084 rtx *insnp;
6085
6086 if (sched_verbose)
6087 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
6088
6089 if (reorder_type == 0)
6090 {
6091 /* First, move all USEs, CLOBBERs and other crud out of the way. */
6092 n_asms = 0;
6093 for (insnp = ready; insnp < e_ready; insnp++)
6094 if (insnp < e_ready)
6095 {
6096 rtx insn = *insnp;
6097 enum attr_type t = ia64_safe_type (insn);
6098 if (t == TYPE_UNKNOWN)
6099 {
6100 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6101 || asm_noperands (PATTERN (insn)) >= 0)
6102 {
6103 rtx lowest = ready[n_asms];
6104 ready[n_asms] = insn;
6105 *insnp = lowest;
6106 n_asms++;
6107 }
6108 else
6109 {
6110 rtx highest = ready[n_ready - 1];
6111 ready[n_ready - 1] = insn;
6112 *insnp = highest;
6113 return 1;
6114 }
6115 }
6116 }
6117
6118 if (n_asms < n_ready)
6119 {
6120 /* Some normal insns to process. Skip the asms. */
6121 ready += n_asms;
6122 n_ready -= n_asms;
6123 }
6124 else if (n_ready > 0)
6125 return 1;
6126 }
6127
6128 if (ia64_final_schedule)
6129 {
6130 int deleted = 0;
6131 int nr_need_stop = 0;
6132
6133 for (insnp = ready; insnp < e_ready; insnp++)
6134 if (safe_group_barrier_needed (*insnp))
6135 nr_need_stop++;
6136
6137 if (reorder_type == 1 && n_ready == nr_need_stop)
6138 return 0;
6139 if (reorder_type == 0)
6140 return 1;
6141 insnp = e_ready;
6142 /* Move down everything that needs a stop bit, preserving
6143 relative order. */
6144 while (insnp-- > ready + deleted)
6145 while (insnp >= ready + deleted)
6146 {
6147 rtx insn = *insnp;
6148 if (! safe_group_barrier_needed (insn))
6149 break;
6150 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6151 *ready = insn;
6152 deleted++;
6153 }
6154 n_ready -= deleted;
6155 ready += deleted;
6156 }
6157
6158 return 1;
6159 }
6160
6161 /* We are about to being issuing insns for this clock cycle. Override
6162 the default sort algorithm to better slot instructions. */
6163
6164 static int
6165 ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
6166 int clock_var)
6167 {
6168 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
6169 pn_ready, clock_var, 0);
6170 }
6171
6172 /* Like ia64_sched_reorder, but called after issuing each insn.
6173 Override the default sort algorithm to better slot instructions. */
6174
6175 static int
6176 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
6177 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
6178 int *pn_ready, int clock_var)
6179 {
6180 if (ia64_tune == PROCESSOR_ITANIUM && reload_completed && last_scheduled_insn)
6181 clocks [INSN_UID (last_scheduled_insn)] = clock_var;
6182 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
6183 clock_var, 1);
6184 }
6185
6186 /* We are about to issue INSN. Return the number of insns left on the
6187 ready queue that can be issued this cycle. */
6188
6189 static int
6190 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
6191 int sched_verbose ATTRIBUTE_UNUSED,
6192 rtx insn ATTRIBUTE_UNUSED,
6193 int can_issue_more ATTRIBUTE_UNUSED)
6194 {
6195 last_scheduled_insn = insn;
6196 memcpy (prev_cycle_state, curr_state, dfa_state_size);
6197 if (reload_completed)
6198 {
6199 int needed = group_barrier_needed (insn);
6200
6201 gcc_assert (!needed);
6202 if (GET_CODE (insn) == CALL_INSN)
6203 init_insn_group_barriers ();
6204 stops_p [INSN_UID (insn)] = stop_before_p;
6205 stop_before_p = 0;
6206 }
6207 return 1;
6208 }
6209
6210 /* We are choosing insn from the ready queue. Return nonzero if INSN
6211 can be chosen. */
6212
6213 static int
6214 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
6215 {
6216 gcc_assert (insn && INSN_P (insn));
6217 return (!reload_completed
6218 || !safe_group_barrier_needed (insn));
6219 }
6220
6221 /* The following variable value is pseudo-insn used by the DFA insn
6222 scheduler to change the DFA state when the simulated clock is
6223 increased. */
6224
6225 static rtx dfa_pre_cycle_insn;
6226
6227 /* We are about to being issuing INSN. Return nonzero if we cannot
6228 issue it on given cycle CLOCK and return zero if we should not sort
6229 the ready queue on the next clock start. */
6230
6231 static int
6232 ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
6233 int clock, int *sort_p)
6234 {
6235 int setup_clocks_p = FALSE;
6236
6237 gcc_assert (insn && INSN_P (insn));
6238 if ((reload_completed && safe_group_barrier_needed (insn))
6239 || (last_scheduled_insn
6240 && (GET_CODE (last_scheduled_insn) == CALL_INSN
6241 || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6242 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
6243 {
6244 init_insn_group_barriers ();
6245 if (verbose && dump)
6246 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
6247 last_clock == clock ? " + cycle advance" : "");
6248 stop_before_p = 1;
6249 if (last_clock == clock)
6250 {
6251 state_transition (curr_state, dfa_stop_insn);
6252 if (TARGET_EARLY_STOP_BITS)
6253 *sort_p = (last_scheduled_insn == NULL_RTX
6254 || GET_CODE (last_scheduled_insn) != CALL_INSN);
6255 else
6256 *sort_p = 0;
6257 return 1;
6258 }
6259 else if (reload_completed)
6260 setup_clocks_p = TRUE;
6261 if (GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6262 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)
6263 state_reset (curr_state);
6264 else
6265 {
6266 memcpy (curr_state, prev_cycle_state, dfa_state_size);
6267 state_transition (curr_state, dfa_stop_insn);
6268 state_transition (curr_state, dfa_pre_cycle_insn);
6269 state_transition (curr_state, NULL);
6270 }
6271 }
6272 else if (reload_completed)
6273 setup_clocks_p = TRUE;
6274 if (setup_clocks_p && ia64_tune == PROCESSOR_ITANIUM
6275 && GET_CODE (PATTERN (insn)) != ASM_INPUT
6276 && asm_noperands (PATTERN (insn)) < 0)
6277 {
6278 enum attr_itanium_class c = ia64_safe_itanium_class (insn);
6279
6280 if (c != ITANIUM_CLASS_MMMUL && c != ITANIUM_CLASS_MMSHF)
6281 {
6282 rtx link;
6283 int d = -1;
6284
6285 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6286 if (REG_NOTE_KIND (link) == 0)
6287 {
6288 enum attr_itanium_class dep_class;
6289 rtx dep_insn = XEXP (link, 0);
6290
6291 dep_class = ia64_safe_itanium_class (dep_insn);
6292 if ((dep_class == ITANIUM_CLASS_MMMUL
6293 || dep_class == ITANIUM_CLASS_MMSHF)
6294 && last_clock - clocks [INSN_UID (dep_insn)] < 4
6295 && (d < 0
6296 || last_clock - clocks [INSN_UID (dep_insn)] < d))
6297 d = last_clock - clocks [INSN_UID (dep_insn)];
6298 }
6299 if (d >= 0)
6300 add_cycles [INSN_UID (insn)] = 3 - d;
6301 }
6302 }
6303 return 0;
6304 }
6305
6306 \f
6307
6308 /* The following page contains abstract data `bundle states' which are
6309 used for bundling insns (inserting nops and template generation). */
6310
6311 /* The following describes state of insn bundling. */
6312
6313 struct bundle_state
6314 {
6315 /* Unique bundle state number to identify them in the debugging
6316 output */
6317 int unique_num;
6318 rtx insn; /* corresponding insn, NULL for the 1st and the last state */
6319 /* number nops before and after the insn */
6320 short before_nops_num, after_nops_num;
6321 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
6322 insn */
6323 int cost; /* cost of the state in cycles */
6324 int accumulated_insns_num; /* number of all previous insns including
6325 nops. L is considered as 2 insns */
6326 int branch_deviation; /* deviation of previous branches from 3rd slots */
6327 struct bundle_state *next; /* next state with the same insn_num */
6328 struct bundle_state *originator; /* originator (previous insn state) */
6329 /* All bundle states are in the following chain. */
6330 struct bundle_state *allocated_states_chain;
6331 /* The DFA State after issuing the insn and the nops. */
6332 state_t dfa_state;
6333 };
6334
6335 /* The following is map insn number to the corresponding bundle state. */
6336
6337 static struct bundle_state **index_to_bundle_states;
6338
6339 /* The unique number of next bundle state. */
6340
6341 static int bundle_states_num;
6342
6343 /* All allocated bundle states are in the following chain. */
6344
6345 static struct bundle_state *allocated_bundle_states_chain;
6346
6347 /* All allocated but not used bundle states are in the following
6348 chain. */
6349
6350 static struct bundle_state *free_bundle_state_chain;
6351
6352
6353 /* The following function returns a free bundle state. */
6354
6355 static struct bundle_state *
6356 get_free_bundle_state (void)
6357 {
6358 struct bundle_state *result;
6359
6360 if (free_bundle_state_chain != NULL)
6361 {
6362 result = free_bundle_state_chain;
6363 free_bundle_state_chain = result->next;
6364 }
6365 else
6366 {
6367 result = xmalloc (sizeof (struct bundle_state));
6368 result->dfa_state = xmalloc (dfa_state_size);
6369 result->allocated_states_chain = allocated_bundle_states_chain;
6370 allocated_bundle_states_chain = result;
6371 }
6372 result->unique_num = bundle_states_num++;
6373 return result;
6374
6375 }
6376
6377 /* The following function frees given bundle state. */
6378
6379 static void
6380 free_bundle_state (struct bundle_state *state)
6381 {
6382 state->next = free_bundle_state_chain;
6383 free_bundle_state_chain = state;
6384 }
6385
6386 /* Start work with abstract data `bundle states'. */
6387
6388 static void
6389 initiate_bundle_states (void)
6390 {
6391 bundle_states_num = 0;
6392 free_bundle_state_chain = NULL;
6393 allocated_bundle_states_chain = NULL;
6394 }
6395
6396 /* Finish work with abstract data `bundle states'. */
6397
6398 static void
6399 finish_bundle_states (void)
6400 {
6401 struct bundle_state *curr_state, *next_state;
6402
6403 for (curr_state = allocated_bundle_states_chain;
6404 curr_state != NULL;
6405 curr_state = next_state)
6406 {
6407 next_state = curr_state->allocated_states_chain;
6408 free (curr_state->dfa_state);
6409 free (curr_state);
6410 }
6411 }
6412
6413 /* Hash table of the bundle states. The key is dfa_state and insn_num
6414 of the bundle states. */
6415
6416 static htab_t bundle_state_table;
6417
6418 /* The function returns hash of BUNDLE_STATE. */
6419
6420 static unsigned
6421 bundle_state_hash (const void *bundle_state)
6422 {
6423 const struct bundle_state *state = (struct bundle_state *) bundle_state;
6424 unsigned result, i;
6425
6426 for (result = i = 0; i < dfa_state_size; i++)
6427 result += (((unsigned char *) state->dfa_state) [i]
6428 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
6429 return result + state->insn_num;
6430 }
6431
6432 /* The function returns nonzero if the bundle state keys are equal. */
6433
6434 static int
6435 bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
6436 {
6437 const struct bundle_state * state1 = (struct bundle_state *) bundle_state_1;
6438 const struct bundle_state * state2 = (struct bundle_state *) bundle_state_2;
6439
6440 return (state1->insn_num == state2->insn_num
6441 && memcmp (state1->dfa_state, state2->dfa_state,
6442 dfa_state_size) == 0);
6443 }
6444
6445 /* The function inserts the BUNDLE_STATE into the hash table. The
6446 function returns nonzero if the bundle has been inserted into the
6447 table. The table contains the best bundle state with given key. */
6448
6449 static int
6450 insert_bundle_state (struct bundle_state *bundle_state)
6451 {
6452 void **entry_ptr;
6453
6454 entry_ptr = htab_find_slot (bundle_state_table, bundle_state, 1);
6455 if (*entry_ptr == NULL)
6456 {
6457 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
6458 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
6459 *entry_ptr = (void *) bundle_state;
6460 return TRUE;
6461 }
6462 else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
6463 || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
6464 && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
6465 > bundle_state->accumulated_insns_num
6466 || (((struct bundle_state *)
6467 *entry_ptr)->accumulated_insns_num
6468 == bundle_state->accumulated_insns_num
6469 && ((struct bundle_state *)
6470 *entry_ptr)->branch_deviation
6471 > bundle_state->branch_deviation))))
6472
6473 {
6474 struct bundle_state temp;
6475
6476 temp = *(struct bundle_state *) *entry_ptr;
6477 *(struct bundle_state *) *entry_ptr = *bundle_state;
6478 ((struct bundle_state *) *entry_ptr)->next = temp.next;
6479 *bundle_state = temp;
6480 }
6481 return FALSE;
6482 }
6483
6484 /* Start work with the hash table. */
6485
6486 static void
6487 initiate_bundle_state_table (void)
6488 {
6489 bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
6490 (htab_del) 0);
6491 }
6492
6493 /* Finish work with the hash table. */
6494
6495 static void
6496 finish_bundle_state_table (void)
6497 {
6498 htab_delete (bundle_state_table);
6499 }
6500
6501 \f
6502
6503 /* The following variable is a insn `nop' used to check bundle states
6504 with different number of inserted nops. */
6505
6506 static rtx ia64_nop;
6507
6508 /* The following function tries to issue NOPS_NUM nops for the current
6509 state without advancing processor cycle. If it failed, the
6510 function returns FALSE and frees the current state. */
6511
6512 static int
6513 try_issue_nops (struct bundle_state *curr_state, int nops_num)
6514 {
6515 int i;
6516
6517 for (i = 0; i < nops_num; i++)
6518 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
6519 {
6520 free_bundle_state (curr_state);
6521 return FALSE;
6522 }
6523 return TRUE;
6524 }
6525
6526 /* The following function tries to issue INSN for the current
6527 state without advancing processor cycle. If it failed, the
6528 function returns FALSE and frees the current state. */
6529
6530 static int
6531 try_issue_insn (struct bundle_state *curr_state, rtx insn)
6532 {
6533 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
6534 {
6535 free_bundle_state (curr_state);
6536 return FALSE;
6537 }
6538 return TRUE;
6539 }
6540
6541 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
6542 starting with ORIGINATOR without advancing processor cycle. If
6543 TRY_BUNDLE_END_P is TRUE, the function also/only (if
6544 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
6545 If it was successful, the function creates new bundle state and
6546 insert into the hash table and into `index_to_bundle_states'. */
6547
6548 static void
6549 issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
6550 rtx insn, int try_bundle_end_p, int only_bundle_end_p)
6551 {
6552 struct bundle_state *curr_state;
6553
6554 curr_state = get_free_bundle_state ();
6555 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
6556 curr_state->insn = insn;
6557 curr_state->insn_num = originator->insn_num + 1;
6558 curr_state->cost = originator->cost;
6559 curr_state->originator = originator;
6560 curr_state->before_nops_num = before_nops_num;
6561 curr_state->after_nops_num = 0;
6562 curr_state->accumulated_insns_num
6563 = originator->accumulated_insns_num + before_nops_num;
6564 curr_state->branch_deviation = originator->branch_deviation;
6565 gcc_assert (insn);
6566 if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
6567 {
6568 gcc_assert (GET_MODE (insn) != TImode);
6569 if (!try_issue_nops (curr_state, before_nops_num))
6570 return;
6571 if (!try_issue_insn (curr_state, insn))
6572 return;
6573 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
6574 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
6575 && curr_state->accumulated_insns_num % 3 != 0)
6576 {
6577 free_bundle_state (curr_state);
6578 return;
6579 }
6580 }
6581 else if (GET_MODE (insn) != TImode)
6582 {
6583 if (!try_issue_nops (curr_state, before_nops_num))
6584 return;
6585 if (!try_issue_insn (curr_state, insn))
6586 return;
6587 curr_state->accumulated_insns_num++;
6588 gcc_assert (GET_CODE (PATTERN (insn)) != ASM_INPUT
6589 && asm_noperands (PATTERN (insn)) < 0);
6590
6591 if (ia64_safe_type (insn) == TYPE_L)
6592 curr_state->accumulated_insns_num++;
6593 }
6594 else
6595 {
6596 /* If this is an insn that must be first in a group, then don't allow
6597 nops to be emitted before it. Currently, alloc is the only such
6598 supported instruction. */
6599 /* ??? The bundling automatons should handle this for us, but they do
6600 not yet have support for the first_insn attribute. */
6601 if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
6602 {
6603 free_bundle_state (curr_state);
6604 return;
6605 }
6606
6607 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
6608 state_transition (curr_state->dfa_state, NULL);
6609 curr_state->cost++;
6610 if (!try_issue_nops (curr_state, before_nops_num))
6611 return;
6612 if (!try_issue_insn (curr_state, insn))
6613 return;
6614 curr_state->accumulated_insns_num++;
6615 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6616 || asm_noperands (PATTERN (insn)) >= 0)
6617 {
6618 /* Finish bundle containing asm insn. */
6619 curr_state->after_nops_num
6620 = 3 - curr_state->accumulated_insns_num % 3;
6621 curr_state->accumulated_insns_num
6622 += 3 - curr_state->accumulated_insns_num % 3;
6623 }
6624 else if (ia64_safe_type (insn) == TYPE_L)
6625 curr_state->accumulated_insns_num++;
6626 }
6627 if (ia64_safe_type (insn) == TYPE_B)
6628 curr_state->branch_deviation
6629 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
6630 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
6631 {
6632 if (!only_bundle_end_p && insert_bundle_state (curr_state))
6633 {
6634 state_t dfa_state;
6635 struct bundle_state *curr_state1;
6636 struct bundle_state *allocated_states_chain;
6637
6638 curr_state1 = get_free_bundle_state ();
6639 dfa_state = curr_state1->dfa_state;
6640 allocated_states_chain = curr_state1->allocated_states_chain;
6641 *curr_state1 = *curr_state;
6642 curr_state1->dfa_state = dfa_state;
6643 curr_state1->allocated_states_chain = allocated_states_chain;
6644 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
6645 dfa_state_size);
6646 curr_state = curr_state1;
6647 }
6648 if (!try_issue_nops (curr_state,
6649 3 - curr_state->accumulated_insns_num % 3))
6650 return;
6651 curr_state->after_nops_num
6652 = 3 - curr_state->accumulated_insns_num % 3;
6653 curr_state->accumulated_insns_num
6654 += 3 - curr_state->accumulated_insns_num % 3;
6655 }
6656 if (!insert_bundle_state (curr_state))
6657 free_bundle_state (curr_state);
6658 return;
6659 }
6660
6661 /* The following function returns position in the two window bundle
6662 for given STATE. */
6663
6664 static int
6665 get_max_pos (state_t state)
6666 {
6667 if (cpu_unit_reservation_p (state, pos_6))
6668 return 6;
6669 else if (cpu_unit_reservation_p (state, pos_5))
6670 return 5;
6671 else if (cpu_unit_reservation_p (state, pos_4))
6672 return 4;
6673 else if (cpu_unit_reservation_p (state, pos_3))
6674 return 3;
6675 else if (cpu_unit_reservation_p (state, pos_2))
6676 return 2;
6677 else if (cpu_unit_reservation_p (state, pos_1))
6678 return 1;
6679 else
6680 return 0;
6681 }
6682
6683 /* The function returns code of a possible template for given position
6684 and state. The function should be called only with 2 values of
6685 position equal to 3 or 6. We avoid generating F NOPs by putting
6686 templates containing F insns at the end of the template search
6687 because undocumented anomaly in McKinley derived cores which can
6688 cause stalls if an F-unit insn (including a NOP) is issued within a
6689 six-cycle window after reading certain application registers (such
6690 as ar.bsp). Furthermore, power-considerations also argue against
6691 the use of F-unit instructions unless they're really needed. */
6692
6693 static int
6694 get_template (state_t state, int pos)
6695 {
6696 switch (pos)
6697 {
6698 case 3:
6699 if (cpu_unit_reservation_p (state, _0mmi_))
6700 return 1;
6701 else if (cpu_unit_reservation_p (state, _0mii_))
6702 return 0;
6703 else if (cpu_unit_reservation_p (state, _0mmb_))
6704 return 7;
6705 else if (cpu_unit_reservation_p (state, _0mib_))
6706 return 6;
6707 else if (cpu_unit_reservation_p (state, _0mbb_))
6708 return 5;
6709 else if (cpu_unit_reservation_p (state, _0bbb_))
6710 return 4;
6711 else if (cpu_unit_reservation_p (state, _0mmf_))
6712 return 3;
6713 else if (cpu_unit_reservation_p (state, _0mfi_))
6714 return 2;
6715 else if (cpu_unit_reservation_p (state, _0mfb_))
6716 return 8;
6717 else if (cpu_unit_reservation_p (state, _0mlx_))
6718 return 9;
6719 else
6720 gcc_unreachable ();
6721 case 6:
6722 if (cpu_unit_reservation_p (state, _1mmi_))
6723 return 1;
6724 else if (cpu_unit_reservation_p (state, _1mii_))
6725 return 0;
6726 else if (cpu_unit_reservation_p (state, _1mmb_))
6727 return 7;
6728 else if (cpu_unit_reservation_p (state, _1mib_))
6729 return 6;
6730 else if (cpu_unit_reservation_p (state, _1mbb_))
6731 return 5;
6732 else if (cpu_unit_reservation_p (state, _1bbb_))
6733 return 4;
6734 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
6735 return 3;
6736 else if (cpu_unit_reservation_p (state, _1mfi_))
6737 return 2;
6738 else if (cpu_unit_reservation_p (state, _1mfb_))
6739 return 8;
6740 else if (cpu_unit_reservation_p (state, _1mlx_))
6741 return 9;
6742 else
6743 gcc_unreachable ();
6744 default:
6745 gcc_unreachable ();
6746 }
6747 }
6748
6749 /* The following function returns an insn important for insn bundling
6750 followed by INSN and before TAIL. */
6751
6752 static rtx
6753 get_next_important_insn (rtx insn, rtx tail)
6754 {
6755 for (; insn && insn != tail; insn = NEXT_INSN (insn))
6756 if (INSN_P (insn)
6757 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6758 && GET_CODE (PATTERN (insn)) != USE
6759 && GET_CODE (PATTERN (insn)) != CLOBBER)
6760 return insn;
6761 return NULL_RTX;
6762 }
6763
6764 /* The following function does insn bundling. Bundling means
6765 inserting templates and nop insns to fit insn groups into permitted
6766 templates. Instruction scheduling uses NDFA (non-deterministic
6767 finite automata) encoding informations about the templates and the
6768 inserted nops. Nondeterminism of the automata permits follows
6769 all possible insn sequences very fast.
6770
6771 Unfortunately it is not possible to get information about inserting
6772 nop insns and used templates from the automata states. The
6773 automata only says that we can issue an insn possibly inserting
6774 some nops before it and using some template. Therefore insn
6775 bundling in this function is implemented by using DFA
6776 (deterministic finite automata). We follows all possible insn
6777 sequences by inserting 0-2 nops (that is what the NDFA describe for
6778 insn scheduling) before/after each insn being bundled. We know the
6779 start of simulated processor cycle from insn scheduling (insn
6780 starting a new cycle has TImode).
6781
6782 Simple implementation of insn bundling would create enormous
6783 number of possible insn sequences satisfying information about new
6784 cycle ticks taken from the insn scheduling. To make the algorithm
6785 practical we use dynamic programming. Each decision (about
6786 inserting nops and implicitly about previous decisions) is described
6787 by structure bundle_state (see above). If we generate the same
6788 bundle state (key is automaton state after issuing the insns and
6789 nops for it), we reuse already generated one. As consequence we
6790 reject some decisions which cannot improve the solution and
6791 reduce memory for the algorithm.
6792
6793 When we reach the end of EBB (extended basic block), we choose the
6794 best sequence and then, moving back in EBB, insert templates for
6795 the best alternative. The templates are taken from querying
6796 automaton state for each insn in chosen bundle states.
6797
6798 So the algorithm makes two (forward and backward) passes through
6799 EBB. There is an additional forward pass through EBB for Itanium1
6800 processor. This pass inserts more nops to make dependency between
6801 a producer insn and MMMUL/MMSHF at least 4 cycles long. */
6802
6803 static void
6804 bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
6805 {
6806 struct bundle_state *curr_state, *next_state, *best_state;
6807 rtx insn, next_insn;
6808 int insn_num;
6809 int i, bundle_end_p, only_bundle_end_p, asm_p;
6810 int pos = 0, max_pos, template0, template1;
6811 rtx b;
6812 rtx nop;
6813 enum attr_type type;
6814
6815 insn_num = 0;
6816 /* Count insns in the EBB. */
6817 for (insn = NEXT_INSN (prev_head_insn);
6818 insn && insn != tail;
6819 insn = NEXT_INSN (insn))
6820 if (INSN_P (insn))
6821 insn_num++;
6822 if (insn_num == 0)
6823 return;
6824 bundling_p = 1;
6825 dfa_clean_insn_cache ();
6826 initiate_bundle_state_table ();
6827 index_to_bundle_states = xmalloc ((insn_num + 2)
6828 * sizeof (struct bundle_state *));
6829 /* First (forward) pass -- generation of bundle states. */
6830 curr_state = get_free_bundle_state ();
6831 curr_state->insn = NULL;
6832 curr_state->before_nops_num = 0;
6833 curr_state->after_nops_num = 0;
6834 curr_state->insn_num = 0;
6835 curr_state->cost = 0;
6836 curr_state->accumulated_insns_num = 0;
6837 curr_state->branch_deviation = 0;
6838 curr_state->next = NULL;
6839 curr_state->originator = NULL;
6840 state_reset (curr_state->dfa_state);
6841 index_to_bundle_states [0] = curr_state;
6842 insn_num = 0;
6843 /* Shift cycle mark if it is put on insn which could be ignored. */
6844 for (insn = NEXT_INSN (prev_head_insn);
6845 insn != tail;
6846 insn = NEXT_INSN (insn))
6847 if (INSN_P (insn)
6848 && (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6849 || GET_CODE (PATTERN (insn)) == USE
6850 || GET_CODE (PATTERN (insn)) == CLOBBER)
6851 && GET_MODE (insn) == TImode)
6852 {
6853 PUT_MODE (insn, VOIDmode);
6854 for (next_insn = NEXT_INSN (insn);
6855 next_insn != tail;
6856 next_insn = NEXT_INSN (next_insn))
6857 if (INSN_P (next_insn)
6858 && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
6859 && GET_CODE (PATTERN (next_insn)) != USE
6860 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
6861 {
6862 PUT_MODE (next_insn, TImode);
6863 break;
6864 }
6865 }
6866 /* Froward pass: generation of bundle states. */
6867 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
6868 insn != NULL_RTX;
6869 insn = next_insn)
6870 {
6871 gcc_assert (INSN_P (insn)
6872 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6873 && GET_CODE (PATTERN (insn)) != USE
6874 && GET_CODE (PATTERN (insn)) != CLOBBER);
6875 type = ia64_safe_type (insn);
6876 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
6877 insn_num++;
6878 index_to_bundle_states [insn_num] = NULL;
6879 for (curr_state = index_to_bundle_states [insn_num - 1];
6880 curr_state != NULL;
6881 curr_state = next_state)
6882 {
6883 pos = curr_state->accumulated_insns_num % 3;
6884 next_state = curr_state->next;
6885 /* We must fill up the current bundle in order to start a
6886 subsequent asm insn in a new bundle. Asm insn is always
6887 placed in a separate bundle. */
6888 only_bundle_end_p
6889 = (next_insn != NULL_RTX
6890 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
6891 && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
6892 /* We may fill up the current bundle if it is the cycle end
6893 without a group barrier. */
6894 bundle_end_p
6895 = (only_bundle_end_p || next_insn == NULL_RTX
6896 || (GET_MODE (next_insn) == TImode
6897 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
6898 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
6899 || type == TYPE_S
6900 /* We need to insert 2 nops for cases like M_MII. To
6901 guarantee issuing all insns on the same cycle for
6902 Itanium 1, we need to issue 2 nops after the first M
6903 insn (MnnMII where n is a nop insn). */
6904 || ((type == TYPE_M || type == TYPE_A)
6905 && ia64_tune == PROCESSOR_ITANIUM
6906 && !bundle_end_p && pos == 1))
6907 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
6908 only_bundle_end_p);
6909 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
6910 only_bundle_end_p);
6911 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
6912 only_bundle_end_p);
6913 }
6914 gcc_assert (index_to_bundle_states [insn_num]);
6915 for (curr_state = index_to_bundle_states [insn_num];
6916 curr_state != NULL;
6917 curr_state = curr_state->next)
6918 if (verbose >= 2 && dump)
6919 {
6920 /* This structure is taken from generated code of the
6921 pipeline hazard recognizer (see file insn-attrtab.c).
6922 Please don't forget to change the structure if a new
6923 automaton is added to .md file. */
6924 struct DFA_chip
6925 {
6926 unsigned short one_automaton_state;
6927 unsigned short oneb_automaton_state;
6928 unsigned short two_automaton_state;
6929 unsigned short twob_automaton_state;
6930 };
6931
6932 fprintf
6933 (dump,
6934 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6935 curr_state->unique_num,
6936 (curr_state->originator == NULL
6937 ? -1 : curr_state->originator->unique_num),
6938 curr_state->cost,
6939 curr_state->before_nops_num, curr_state->after_nops_num,
6940 curr_state->accumulated_insns_num, curr_state->branch_deviation,
6941 (ia64_tune == PROCESSOR_ITANIUM
6942 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
6943 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
6944 INSN_UID (insn));
6945 }
6946 }
6947
6948 /* We should find a solution because the 2nd insn scheduling has
6949 found one. */
6950 gcc_assert (index_to_bundle_states [insn_num]);
6951 /* Find a state corresponding to the best insn sequence. */
6952 best_state = NULL;
6953 for (curr_state = index_to_bundle_states [insn_num];
6954 curr_state != NULL;
6955 curr_state = curr_state->next)
6956 /* We are just looking at the states with fully filled up last
6957 bundle. The first we prefer insn sequences with minimal cost
6958 then with minimal inserted nops and finally with branch insns
6959 placed in the 3rd slots. */
6960 if (curr_state->accumulated_insns_num % 3 == 0
6961 && (best_state == NULL || best_state->cost > curr_state->cost
6962 || (best_state->cost == curr_state->cost
6963 && (curr_state->accumulated_insns_num
6964 < best_state->accumulated_insns_num
6965 || (curr_state->accumulated_insns_num
6966 == best_state->accumulated_insns_num
6967 && curr_state->branch_deviation
6968 < best_state->branch_deviation)))))
6969 best_state = curr_state;
6970 /* Second (backward) pass: adding nops and templates. */
6971 insn_num = best_state->before_nops_num;
6972 template0 = template1 = -1;
6973 for (curr_state = best_state;
6974 curr_state->originator != NULL;
6975 curr_state = curr_state->originator)
6976 {
6977 insn = curr_state->insn;
6978 asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
6979 || asm_noperands (PATTERN (insn)) >= 0);
6980 insn_num++;
6981 if (verbose >= 2 && dump)
6982 {
6983 struct DFA_chip
6984 {
6985 unsigned short one_automaton_state;
6986 unsigned short oneb_automaton_state;
6987 unsigned short two_automaton_state;
6988 unsigned short twob_automaton_state;
6989 };
6990
6991 fprintf
6992 (dump,
6993 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6994 curr_state->unique_num,
6995 (curr_state->originator == NULL
6996 ? -1 : curr_state->originator->unique_num),
6997 curr_state->cost,
6998 curr_state->before_nops_num, curr_state->after_nops_num,
6999 curr_state->accumulated_insns_num, curr_state->branch_deviation,
7000 (ia64_tune == PROCESSOR_ITANIUM
7001 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
7002 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
7003 INSN_UID (insn));
7004 }
7005 /* Find the position in the current bundle window. The window can
7006 contain at most two bundles. Two bundle window means that
7007 the processor will make two bundle rotation. */
7008 max_pos = get_max_pos (curr_state->dfa_state);
7009 if (max_pos == 6
7010 /* The following (negative template number) means that the
7011 processor did one bundle rotation. */
7012 || (max_pos == 3 && template0 < 0))
7013 {
7014 /* We are at the end of the window -- find template(s) for
7015 its bundle(s). */
7016 pos = max_pos;
7017 if (max_pos == 3)
7018 template0 = get_template (curr_state->dfa_state, 3);
7019 else
7020 {
7021 template1 = get_template (curr_state->dfa_state, 3);
7022 template0 = get_template (curr_state->dfa_state, 6);
7023 }
7024 }
7025 if (max_pos > 3 && template1 < 0)
7026 /* It may happen when we have the stop inside a bundle. */
7027 {
7028 gcc_assert (pos <= 3);
7029 template1 = get_template (curr_state->dfa_state, 3);
7030 pos += 3;
7031 }
7032 if (!asm_p)
7033 /* Emit nops after the current insn. */
7034 for (i = 0; i < curr_state->after_nops_num; i++)
7035 {
7036 nop = gen_nop ();
7037 emit_insn_after (nop, insn);
7038 pos--;
7039 gcc_assert (pos >= 0);
7040 if (pos % 3 == 0)
7041 {
7042 /* We are at the start of a bundle: emit the template
7043 (it should be defined). */
7044 gcc_assert (template0 >= 0);
7045 b = gen_bundle_selector (GEN_INT (template0));
7046 ia64_emit_insn_before (b, nop);
7047 /* If we have two bundle window, we make one bundle
7048 rotation. Otherwise template0 will be undefined
7049 (negative value). */
7050 template0 = template1;
7051 template1 = -1;
7052 }
7053 }
7054 /* Move the position backward in the window. Group barrier has
7055 no slot. Asm insn takes all bundle. */
7056 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
7057 && GET_CODE (PATTERN (insn)) != ASM_INPUT
7058 && asm_noperands (PATTERN (insn)) < 0)
7059 pos--;
7060 /* Long insn takes 2 slots. */
7061 if (ia64_safe_type (insn) == TYPE_L)
7062 pos--;
7063 gcc_assert (pos >= 0);
7064 if (pos % 3 == 0
7065 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
7066 && GET_CODE (PATTERN (insn)) != ASM_INPUT
7067 && asm_noperands (PATTERN (insn)) < 0)
7068 {
7069 /* The current insn is at the bundle start: emit the
7070 template. */
7071 gcc_assert (template0 >= 0);
7072 b = gen_bundle_selector (GEN_INT (template0));
7073 ia64_emit_insn_before (b, insn);
7074 b = PREV_INSN (insn);
7075 insn = b;
7076 /* See comment above in analogous place for emitting nops
7077 after the insn. */
7078 template0 = template1;
7079 template1 = -1;
7080 }
7081 /* Emit nops after the current insn. */
7082 for (i = 0; i < curr_state->before_nops_num; i++)
7083 {
7084 nop = gen_nop ();
7085 ia64_emit_insn_before (nop, insn);
7086 nop = PREV_INSN (insn);
7087 insn = nop;
7088 pos--;
7089 gcc_assert (pos >= 0);
7090 if (pos % 3 == 0)
7091 {
7092 /* See comment above in analogous place for emitting nops
7093 after the insn. */
7094 gcc_assert (template0 >= 0);
7095 b = gen_bundle_selector (GEN_INT (template0));
7096 ia64_emit_insn_before (b, insn);
7097 b = PREV_INSN (insn);
7098 insn = b;
7099 template0 = template1;
7100 template1 = -1;
7101 }
7102 }
7103 }
7104 if (ia64_tune == PROCESSOR_ITANIUM)
7105 /* Insert additional cycles for MM-insns (MMMUL and MMSHF).
7106 Itanium1 has a strange design, if the distance between an insn
7107 and dependent MM-insn is less 4 then we have a 6 additional
7108 cycles stall. So we make the distance equal to 4 cycles if it
7109 is less. */
7110 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
7111 insn != NULL_RTX;
7112 insn = next_insn)
7113 {
7114 gcc_assert (INSN_P (insn)
7115 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
7116 && GET_CODE (PATTERN (insn)) != USE
7117 && GET_CODE (PATTERN (insn)) != CLOBBER);
7118 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
7119 if (INSN_UID (insn) < clocks_length && add_cycles [INSN_UID (insn)])
7120 /* We found a MM-insn which needs additional cycles. */
7121 {
7122 rtx last;
7123 int i, j, n;
7124 int pred_stop_p;
7125
7126 /* Now we are searching for a template of the bundle in
7127 which the MM-insn is placed and the position of the
7128 insn in the bundle (0, 1, 2). Also we are searching
7129 for that there is a stop before the insn. */
7130 last = prev_active_insn (insn);
7131 pred_stop_p = recog_memoized (last) == CODE_FOR_insn_group_barrier;
7132 if (pred_stop_p)
7133 last = prev_active_insn (last);
7134 n = 0;
7135 for (;; last = prev_active_insn (last))
7136 if (recog_memoized (last) == CODE_FOR_bundle_selector)
7137 {
7138 template0 = XINT (XVECEXP (PATTERN (last), 0, 0), 0);
7139 if (template0 == 9)
7140 /* The insn is in MLX bundle. Change the template
7141 onto MFI because we will add nops before the
7142 insn. It simplifies subsequent code a lot. */
7143 PATTERN (last)
7144 = gen_bundle_selector (const2_rtx); /* -> MFI */
7145 break;
7146 }
7147 else if (recog_memoized (last) != CODE_FOR_insn_group_barrier
7148 && (ia64_safe_itanium_class (last)
7149 != ITANIUM_CLASS_IGNORE))
7150 n++;
7151 /* Some check of correctness: the stop is not at the
7152 bundle start, there are no more 3 insns in the bundle,
7153 and the MM-insn is not at the start of bundle with
7154 template MLX. */
7155 gcc_assert ((!pred_stop_p || n)
7156 && n <= 2
7157 && (template0 != 9 || !n));
7158 /* Put nops after the insn in the bundle. */
7159 for (j = 3 - n; j > 0; j --)
7160 ia64_emit_insn_before (gen_nop (), insn);
7161 /* It takes into account that we will add more N nops
7162 before the insn lately -- please see code below. */
7163 add_cycles [INSN_UID (insn)]--;
7164 if (!pred_stop_p || add_cycles [INSN_UID (insn)])
7165 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7166 insn);
7167 if (pred_stop_p)
7168 add_cycles [INSN_UID (insn)]--;
7169 for (i = add_cycles [INSN_UID (insn)]; i > 0; i--)
7170 {
7171 /* Insert "MII;" template. */
7172 ia64_emit_insn_before (gen_bundle_selector (const0_rtx),
7173 insn);
7174 ia64_emit_insn_before (gen_nop (), insn);
7175 ia64_emit_insn_before (gen_nop (), insn);
7176 if (i > 1)
7177 {
7178 /* To decrease code size, we use "MI;I;"
7179 template. */
7180 ia64_emit_insn_before
7181 (gen_insn_group_barrier (GEN_INT (3)), insn);
7182 i--;
7183 }
7184 ia64_emit_insn_before (gen_nop (), insn);
7185 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7186 insn);
7187 }
7188 /* Put the MM-insn in the same slot of a bundle with the
7189 same template as the original one. */
7190 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (template0)),
7191 insn);
7192 /* To put the insn in the same slot, add necessary number
7193 of nops. */
7194 for (j = n; j > 0; j --)
7195 ia64_emit_insn_before (gen_nop (), insn);
7196 /* Put the stop if the original bundle had it. */
7197 if (pred_stop_p)
7198 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7199 insn);
7200 }
7201 }
7202 free (index_to_bundle_states);
7203 finish_bundle_state_table ();
7204 bundling_p = 0;
7205 dfa_clean_insn_cache ();
7206 }
7207
7208 /* The following function is called at the end of scheduling BB or
7209 EBB. After reload, it inserts stop bits and does insn bundling. */
7210
7211 static void
7212 ia64_sched_finish (FILE *dump, int sched_verbose)
7213 {
7214 if (sched_verbose)
7215 fprintf (dump, "// Finishing schedule.\n");
7216 if (!reload_completed)
7217 return;
7218 if (reload_completed)
7219 {
7220 final_emit_insn_group_barriers (dump);
7221 bundling (dump, sched_verbose, current_sched_info->prev_head,
7222 current_sched_info->next_tail);
7223 if (sched_verbose && dump)
7224 fprintf (dump, "// finishing %d-%d\n",
7225 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
7226 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
7227
7228 return;
7229 }
7230 }
7231
7232 /* The following function inserts stop bits in scheduled BB or EBB. */
7233
7234 static void
7235 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
7236 {
7237 rtx insn;
7238 int need_barrier_p = 0;
7239 rtx prev_insn = NULL_RTX;
7240
7241 init_insn_group_barriers ();
7242
7243 for (insn = NEXT_INSN (current_sched_info->prev_head);
7244 insn != current_sched_info->next_tail;
7245 insn = NEXT_INSN (insn))
7246 {
7247 if (GET_CODE (insn) == BARRIER)
7248 {
7249 rtx last = prev_active_insn (insn);
7250
7251 if (! last)
7252 continue;
7253 if (GET_CODE (last) == JUMP_INSN
7254 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
7255 last = prev_active_insn (last);
7256 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
7257 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
7258
7259 init_insn_group_barriers ();
7260 need_barrier_p = 0;
7261 prev_insn = NULL_RTX;
7262 }
7263 else if (INSN_P (insn))
7264 {
7265 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
7266 {
7267 init_insn_group_barriers ();
7268 need_barrier_p = 0;
7269 prev_insn = NULL_RTX;
7270 }
7271 else if (need_barrier_p || group_barrier_needed (insn))
7272 {
7273 if (TARGET_EARLY_STOP_BITS)
7274 {
7275 rtx last;
7276
7277 for (last = insn;
7278 last != current_sched_info->prev_head;
7279 last = PREV_INSN (last))
7280 if (INSN_P (last) && GET_MODE (last) == TImode
7281 && stops_p [INSN_UID (last)])
7282 break;
7283 if (last == current_sched_info->prev_head)
7284 last = insn;
7285 last = prev_active_insn (last);
7286 if (last
7287 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
7288 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
7289 last);
7290 init_insn_group_barriers ();
7291 for (last = NEXT_INSN (last);
7292 last != insn;
7293 last = NEXT_INSN (last))
7294 if (INSN_P (last))
7295 group_barrier_needed (last);
7296 }
7297 else
7298 {
7299 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7300 insn);
7301 init_insn_group_barriers ();
7302 }
7303 group_barrier_needed (insn);
7304 prev_insn = NULL_RTX;
7305 }
7306 else if (recog_memoized (insn) >= 0)
7307 prev_insn = insn;
7308 need_barrier_p = (GET_CODE (insn) == CALL_INSN
7309 || GET_CODE (PATTERN (insn)) == ASM_INPUT
7310 || asm_noperands (PATTERN (insn)) >= 0);
7311 }
7312 }
7313 }
7314
7315 \f
7316
7317 /* If the following function returns TRUE, we will use the the DFA
7318 insn scheduler. */
7319
7320 static int
7321 ia64_first_cycle_multipass_dfa_lookahead (void)
7322 {
7323 return (reload_completed ? 6 : 4);
7324 }
7325
7326 /* The following function initiates variable `dfa_pre_cycle_insn'. */
7327
7328 static void
7329 ia64_init_dfa_pre_cycle_insn (void)
7330 {
7331 if (temp_dfa_state == NULL)
7332 {
7333 dfa_state_size = state_size ();
7334 temp_dfa_state = xmalloc (dfa_state_size);
7335 prev_cycle_state = xmalloc (dfa_state_size);
7336 }
7337 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
7338 PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
7339 recog_memoized (dfa_pre_cycle_insn);
7340 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
7341 PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
7342 recog_memoized (dfa_stop_insn);
7343 }
7344
7345 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
7346 used by the DFA insn scheduler. */
7347
7348 static rtx
7349 ia64_dfa_pre_cycle_insn (void)
7350 {
7351 return dfa_pre_cycle_insn;
7352 }
7353
7354 /* The following function returns TRUE if PRODUCER (of type ilog or
7355 ld) produces address for CONSUMER (of type st or stf). */
7356
7357 int
7358 ia64_st_address_bypass_p (rtx producer, rtx consumer)
7359 {
7360 rtx dest, reg, mem;
7361
7362 gcc_assert (producer && consumer);
7363 dest = ia64_single_set (producer);
7364 gcc_assert (dest);
7365 reg = SET_DEST (dest);
7366 gcc_assert (reg);
7367 if (GET_CODE (reg) == SUBREG)
7368 reg = SUBREG_REG (reg);
7369 gcc_assert (GET_CODE (reg) == REG);
7370
7371 dest = ia64_single_set (consumer);
7372 gcc_assert (dest);
7373 mem = SET_DEST (dest);
7374 gcc_assert (mem && GET_CODE (mem) == MEM);
7375 return reg_mentioned_p (reg, mem);
7376 }
7377
7378 /* The following function returns TRUE if PRODUCER (of type ilog or
7379 ld) produces address for CONSUMER (of type ld or fld). */
7380
7381 int
7382 ia64_ld_address_bypass_p (rtx producer, rtx consumer)
7383 {
7384 rtx dest, src, reg, mem;
7385
7386 gcc_assert (producer && consumer);
7387 dest = ia64_single_set (producer);
7388 gcc_assert (dest);
7389 reg = SET_DEST (dest);
7390 gcc_assert (reg);
7391 if (GET_CODE (reg) == SUBREG)
7392 reg = SUBREG_REG (reg);
7393 gcc_assert (GET_CODE (reg) == REG);
7394
7395 src = ia64_single_set (consumer);
7396 gcc_assert (src);
7397 mem = SET_SRC (src);
7398 gcc_assert (mem);
7399 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
7400 mem = XVECEXP (mem, 0, 0);
7401 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
7402 mem = XEXP (mem, 0);
7403
7404 /* Note that LO_SUM is used for GOT loads. */
7405 gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
7406
7407 return reg_mentioned_p (reg, mem);
7408 }
7409
7410 /* The following function returns TRUE if INSN produces address for a
7411 load/store insn. We will place such insns into M slot because it
7412 decreases its latency time. */
7413
7414 int
7415 ia64_produce_address_p (rtx insn)
7416 {
7417 return insn->call;
7418 }
7419
7420 \f
7421 /* Emit pseudo-ops for the assembler to describe predicate relations.
7422 At present this assumes that we only consider predicate pairs to
7423 be mutex, and that the assembler can deduce proper values from
7424 straight-line code. */
7425
7426 static void
7427 emit_predicate_relation_info (void)
7428 {
7429 basic_block bb;
7430
7431 FOR_EACH_BB_REVERSE (bb)
7432 {
7433 int r;
7434 rtx head = BB_HEAD (bb);
7435
7436 /* We only need such notes at code labels. */
7437 if (GET_CODE (head) != CODE_LABEL)
7438 continue;
7439 if (GET_CODE (NEXT_INSN (head)) == NOTE
7440 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
7441 head = NEXT_INSN (head);
7442
7443 /* Skip p0, which may be thought to be live due to (reg:DI p0)
7444 grabbing the entire block of predicate registers. */
7445 for (r = PR_REG (2); r < PR_REG (64); r += 2)
7446 if (REGNO_REG_SET_P (bb->il.rtl->global_live_at_start, r))
7447 {
7448 rtx p = gen_rtx_REG (BImode, r);
7449 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
7450 if (head == BB_END (bb))
7451 BB_END (bb) = n;
7452 head = n;
7453 }
7454 }
7455
7456 /* Look for conditional calls that do not return, and protect predicate
7457 relations around them. Otherwise the assembler will assume the call
7458 returns, and complain about uses of call-clobbered predicates after
7459 the call. */
7460 FOR_EACH_BB_REVERSE (bb)
7461 {
7462 rtx insn = BB_HEAD (bb);
7463
7464 while (1)
7465 {
7466 if (GET_CODE (insn) == CALL_INSN
7467 && GET_CODE (PATTERN (insn)) == COND_EXEC
7468 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
7469 {
7470 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
7471 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
7472 if (BB_HEAD (bb) == insn)
7473 BB_HEAD (bb) = b;
7474 if (BB_END (bb) == insn)
7475 BB_END (bb) = a;
7476 }
7477
7478 if (insn == BB_END (bb))
7479 break;
7480 insn = NEXT_INSN (insn);
7481 }
7482 }
7483 }
7484
7485 /* Perform machine dependent operations on the rtl chain INSNS. */
7486
7487 static void
7488 ia64_reorg (void)
7489 {
7490 /* We are freeing block_for_insn in the toplev to keep compatibility
7491 with old MDEP_REORGS that are not CFG based. Recompute it now. */
7492 compute_bb_for_insn ();
7493
7494 /* If optimizing, we'll have split before scheduling. */
7495 if (optimize == 0)
7496 split_all_insns (0);
7497
7498 /* ??? update_life_info_in_dirty_blocks fails to terminate during
7499 non-optimizing bootstrap. */
7500 update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, PROP_DEATH_NOTES);
7501
7502 if (ia64_flag_schedule_insns2)
7503 {
7504 timevar_push (TV_SCHED2);
7505 ia64_final_schedule = 1;
7506
7507 initiate_bundle_states ();
7508 ia64_nop = make_insn_raw (gen_nop ());
7509 PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
7510 recog_memoized (ia64_nop);
7511 clocks_length = get_max_uid () + 1;
7512 stops_p = xcalloc (1, clocks_length);
7513 if (ia64_tune == PROCESSOR_ITANIUM)
7514 {
7515 clocks = xcalloc (clocks_length, sizeof (int));
7516 add_cycles = xcalloc (clocks_length, sizeof (int));
7517 }
7518 if (ia64_tune == PROCESSOR_ITANIUM2)
7519 {
7520 pos_1 = get_cpu_unit_code ("2_1");
7521 pos_2 = get_cpu_unit_code ("2_2");
7522 pos_3 = get_cpu_unit_code ("2_3");
7523 pos_4 = get_cpu_unit_code ("2_4");
7524 pos_5 = get_cpu_unit_code ("2_5");
7525 pos_6 = get_cpu_unit_code ("2_6");
7526 _0mii_ = get_cpu_unit_code ("2b_0mii.");
7527 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
7528 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
7529 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
7530 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
7531 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
7532 _0mib_ = get_cpu_unit_code ("2b_0mib.");
7533 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
7534 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
7535 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
7536 _1mii_ = get_cpu_unit_code ("2b_1mii.");
7537 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
7538 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
7539 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
7540 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
7541 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
7542 _1mib_ = get_cpu_unit_code ("2b_1mib.");
7543 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
7544 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
7545 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
7546 }
7547 else
7548 {
7549 pos_1 = get_cpu_unit_code ("1_1");
7550 pos_2 = get_cpu_unit_code ("1_2");
7551 pos_3 = get_cpu_unit_code ("1_3");
7552 pos_4 = get_cpu_unit_code ("1_4");
7553 pos_5 = get_cpu_unit_code ("1_5");
7554 pos_6 = get_cpu_unit_code ("1_6");
7555 _0mii_ = get_cpu_unit_code ("1b_0mii.");
7556 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
7557 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
7558 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
7559 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
7560 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
7561 _0mib_ = get_cpu_unit_code ("1b_0mib.");
7562 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
7563 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
7564 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
7565 _1mii_ = get_cpu_unit_code ("1b_1mii.");
7566 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
7567 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
7568 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
7569 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
7570 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
7571 _1mib_ = get_cpu_unit_code ("1b_1mib.");
7572 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
7573 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
7574 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
7575 }
7576 schedule_ebbs (dump_file);
7577 finish_bundle_states ();
7578 if (ia64_tune == PROCESSOR_ITANIUM)
7579 {
7580 free (add_cycles);
7581 free (clocks);
7582 }
7583 free (stops_p);
7584 emit_insn_group_barriers (dump_file);
7585
7586 ia64_final_schedule = 0;
7587 timevar_pop (TV_SCHED2);
7588 }
7589 else
7590 emit_all_insn_group_barriers (dump_file);
7591
7592 /* A call must not be the last instruction in a function, so that the
7593 return address is still within the function, so that unwinding works
7594 properly. Note that IA-64 differs from dwarf2 on this point. */
7595 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7596 {
7597 rtx insn;
7598 int saw_stop = 0;
7599
7600 insn = get_last_insn ();
7601 if (! INSN_P (insn))
7602 insn = prev_active_insn (insn);
7603 /* Skip over insns that expand to nothing. */
7604 while (GET_CODE (insn) == INSN && get_attr_empty (insn) == EMPTY_YES)
7605 {
7606 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
7607 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
7608 saw_stop = 1;
7609 insn = prev_active_insn (insn);
7610 }
7611 if (GET_CODE (insn) == CALL_INSN)
7612 {
7613 if (! saw_stop)
7614 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7615 emit_insn (gen_break_f ());
7616 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7617 }
7618 }
7619
7620 emit_predicate_relation_info ();
7621
7622 if (ia64_flag_var_tracking)
7623 {
7624 timevar_push (TV_VAR_TRACKING);
7625 variable_tracking_main ();
7626 timevar_pop (TV_VAR_TRACKING);
7627 }
7628 }
7629 \f
7630 /* Return true if REGNO is used by the epilogue. */
7631
7632 int
7633 ia64_epilogue_uses (int regno)
7634 {
7635 switch (regno)
7636 {
7637 case R_GR (1):
7638 /* With a call to a function in another module, we will write a new
7639 value to "gp". After returning from such a call, we need to make
7640 sure the function restores the original gp-value, even if the
7641 function itself does not use the gp anymore. */
7642 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
7643
7644 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
7645 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
7646 /* For functions defined with the syscall_linkage attribute, all
7647 input registers are marked as live at all function exits. This
7648 prevents the register allocator from using the input registers,
7649 which in turn makes it possible to restart a system call after
7650 an interrupt without having to save/restore the input registers.
7651 This also prevents kernel data from leaking to application code. */
7652 return lookup_attribute ("syscall_linkage",
7653 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
7654
7655 case R_BR (0):
7656 /* Conditional return patterns can't represent the use of `b0' as
7657 the return address, so we force the value live this way. */
7658 return 1;
7659
7660 case AR_PFS_REGNUM:
7661 /* Likewise for ar.pfs, which is used by br.ret. */
7662 return 1;
7663
7664 default:
7665 return 0;
7666 }
7667 }
7668
7669 /* Return true if REGNO is used by the frame unwinder. */
7670
7671 int
7672 ia64_eh_uses (int regno)
7673 {
7674 if (! reload_completed)
7675 return 0;
7676
7677 if (current_frame_info.reg_save_b0
7678 && regno == current_frame_info.reg_save_b0)
7679 return 1;
7680 if (current_frame_info.reg_save_pr
7681 && regno == current_frame_info.reg_save_pr)
7682 return 1;
7683 if (current_frame_info.reg_save_ar_pfs
7684 && regno == current_frame_info.reg_save_ar_pfs)
7685 return 1;
7686 if (current_frame_info.reg_save_ar_unat
7687 && regno == current_frame_info.reg_save_ar_unat)
7688 return 1;
7689 if (current_frame_info.reg_save_ar_lc
7690 && regno == current_frame_info.reg_save_ar_lc)
7691 return 1;
7692
7693 return 0;
7694 }
7695 \f
7696 /* Return true if this goes in small data/bss. */
7697
7698 /* ??? We could also support own long data here. Generating movl/add/ld8
7699 instead of addl,ld8/ld8. This makes the code bigger, but should make the
7700 code faster because there is one less load. This also includes incomplete
7701 types which can't go in sdata/sbss. */
7702
7703 static bool
7704 ia64_in_small_data_p (tree exp)
7705 {
7706 if (TARGET_NO_SDATA)
7707 return false;
7708
7709 /* We want to merge strings, so we never consider them small data. */
7710 if (TREE_CODE (exp) == STRING_CST)
7711 return false;
7712
7713 /* Functions are never small data. */
7714 if (TREE_CODE (exp) == FUNCTION_DECL)
7715 return false;
7716
7717 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
7718 {
7719 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
7720
7721 if (strcmp (section, ".sdata") == 0
7722 || strncmp (section, ".sdata.", 7) == 0
7723 || strncmp (section, ".gnu.linkonce.s.", 16) == 0
7724 || strcmp (section, ".sbss") == 0
7725 || strncmp (section, ".sbss.", 6) == 0
7726 || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
7727 return true;
7728 }
7729 else
7730 {
7731 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
7732
7733 /* If this is an incomplete type with size 0, then we can't put it
7734 in sdata because it might be too big when completed. */
7735 if (size > 0 && size <= ia64_section_threshold)
7736 return true;
7737 }
7738
7739 return false;
7740 }
7741 \f
7742 /* Output assembly directives for prologue regions. */
7743
7744 /* The current basic block number. */
7745
7746 static bool last_block;
7747
7748 /* True if we need a copy_state command at the start of the next block. */
7749
7750 static bool need_copy_state;
7751
7752 /* The function emits unwind directives for the start of an epilogue. */
7753
7754 static void
7755 process_epilogue (void)
7756 {
7757 /* If this isn't the last block of the function, then we need to label the
7758 current state, and copy it back in at the start of the next block. */
7759
7760 if (!last_block)
7761 {
7762 fprintf (asm_out_file, "\t.label_state %d\n",
7763 ++cfun->machine->state_num);
7764 need_copy_state = true;
7765 }
7766
7767 fprintf (asm_out_file, "\t.restore sp\n");
7768 }
7769
7770 /* This function processes a SET pattern looking for specific patterns
7771 which result in emitting an assembly directive required for unwinding. */
7772
7773 static int
7774 process_set (FILE *asm_out_file, rtx pat)
7775 {
7776 rtx src = SET_SRC (pat);
7777 rtx dest = SET_DEST (pat);
7778 int src_regno, dest_regno;
7779
7780 /* Look for the ALLOC insn. */
7781 if (GET_CODE (src) == UNSPEC_VOLATILE
7782 && XINT (src, 1) == UNSPECV_ALLOC
7783 && GET_CODE (dest) == REG)
7784 {
7785 dest_regno = REGNO (dest);
7786
7787 /* If this is the final destination for ar.pfs, then this must
7788 be the alloc in the prologue. */
7789 if (dest_regno == current_frame_info.reg_save_ar_pfs)
7790 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
7791 ia64_dbx_register_number (dest_regno));
7792 else
7793 {
7794 /* This must be an alloc before a sibcall. We must drop the
7795 old frame info. The easiest way to drop the old frame
7796 info is to ensure we had a ".restore sp" directive
7797 followed by a new prologue. If the procedure doesn't
7798 have a memory-stack frame, we'll issue a dummy ".restore
7799 sp" now. */
7800 if (current_frame_info.total_size == 0 && !frame_pointer_needed)
7801 /* if haven't done process_epilogue() yet, do it now */
7802 process_epilogue ();
7803 fprintf (asm_out_file, "\t.prologue\n");
7804 }
7805 return 1;
7806 }
7807
7808 /* Look for SP = .... */
7809 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
7810 {
7811 if (GET_CODE (src) == PLUS)
7812 {
7813 rtx op0 = XEXP (src, 0);
7814 rtx op1 = XEXP (src, 1);
7815
7816 gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
7817
7818 if (INTVAL (op1) < 0)
7819 fprintf (asm_out_file, "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
7820 -INTVAL (op1));
7821 else
7822 process_epilogue ();
7823 }
7824 else
7825 {
7826 gcc_assert (GET_CODE (src) == REG
7827 && REGNO (src) == HARD_FRAME_POINTER_REGNUM);
7828 process_epilogue ();
7829 }
7830
7831 return 1;
7832 }
7833
7834 /* Register move we need to look at. */
7835 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
7836 {
7837 src_regno = REGNO (src);
7838 dest_regno = REGNO (dest);
7839
7840 switch (src_regno)
7841 {
7842 case BR_REG (0):
7843 /* Saving return address pointer. */
7844 gcc_assert (dest_regno == current_frame_info.reg_save_b0);
7845 fprintf (asm_out_file, "\t.save rp, r%d\n",
7846 ia64_dbx_register_number (dest_regno));
7847 return 1;
7848
7849 case PR_REG (0):
7850 gcc_assert (dest_regno == current_frame_info.reg_save_pr);
7851 fprintf (asm_out_file, "\t.save pr, r%d\n",
7852 ia64_dbx_register_number (dest_regno));
7853 return 1;
7854
7855 case AR_UNAT_REGNUM:
7856 gcc_assert (dest_regno == current_frame_info.reg_save_ar_unat);
7857 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
7858 ia64_dbx_register_number (dest_regno));
7859 return 1;
7860
7861 case AR_LC_REGNUM:
7862 gcc_assert (dest_regno == current_frame_info.reg_save_ar_lc);
7863 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
7864 ia64_dbx_register_number (dest_regno));
7865 return 1;
7866
7867 case STACK_POINTER_REGNUM:
7868 gcc_assert (dest_regno == HARD_FRAME_POINTER_REGNUM
7869 && frame_pointer_needed);
7870 fprintf (asm_out_file, "\t.vframe r%d\n",
7871 ia64_dbx_register_number (dest_regno));
7872 return 1;
7873
7874 default:
7875 /* Everything else should indicate being stored to memory. */
7876 gcc_unreachable ();
7877 }
7878 }
7879
7880 /* Memory store we need to look at. */
7881 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
7882 {
7883 long off;
7884 rtx base;
7885 const char *saveop;
7886
7887 if (GET_CODE (XEXP (dest, 0)) == REG)
7888 {
7889 base = XEXP (dest, 0);
7890 off = 0;
7891 }
7892 else
7893 {
7894 gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
7895 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
7896 base = XEXP (XEXP (dest, 0), 0);
7897 off = INTVAL (XEXP (XEXP (dest, 0), 1));
7898 }
7899
7900 if (base == hard_frame_pointer_rtx)
7901 {
7902 saveop = ".savepsp";
7903 off = - off;
7904 }
7905 else
7906 {
7907 gcc_assert (base == stack_pointer_rtx);
7908 saveop = ".savesp";
7909 }
7910
7911 src_regno = REGNO (src);
7912 switch (src_regno)
7913 {
7914 case BR_REG (0):
7915 gcc_assert (!current_frame_info.reg_save_b0);
7916 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
7917 return 1;
7918
7919 case PR_REG (0):
7920 gcc_assert (!current_frame_info.reg_save_pr);
7921 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
7922 return 1;
7923
7924 case AR_LC_REGNUM:
7925 gcc_assert (!current_frame_info.reg_save_ar_lc);
7926 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
7927 return 1;
7928
7929 case AR_PFS_REGNUM:
7930 gcc_assert (!current_frame_info.reg_save_ar_pfs);
7931 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
7932 return 1;
7933
7934 case AR_UNAT_REGNUM:
7935 gcc_assert (!current_frame_info.reg_save_ar_unat);
7936 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
7937 return 1;
7938
7939 case GR_REG (4):
7940 case GR_REG (5):
7941 case GR_REG (6):
7942 case GR_REG (7):
7943 fprintf (asm_out_file, "\t.save.g 0x%x\n",
7944 1 << (src_regno - GR_REG (4)));
7945 return 1;
7946
7947 case BR_REG (1):
7948 case BR_REG (2):
7949 case BR_REG (3):
7950 case BR_REG (4):
7951 case BR_REG (5):
7952 fprintf (asm_out_file, "\t.save.b 0x%x\n",
7953 1 << (src_regno - BR_REG (1)));
7954 return 1;
7955
7956 case FR_REG (2):
7957 case FR_REG (3):
7958 case FR_REG (4):
7959 case FR_REG (5):
7960 fprintf (asm_out_file, "\t.save.f 0x%x\n",
7961 1 << (src_regno - FR_REG (2)));
7962 return 1;
7963
7964 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7965 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7966 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7967 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7968 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
7969 1 << (src_regno - FR_REG (12)));
7970 return 1;
7971
7972 default:
7973 return 0;
7974 }
7975 }
7976
7977 return 0;
7978 }
7979
7980
7981 /* This function looks at a single insn and emits any directives
7982 required to unwind this insn. */
7983 void
7984 process_for_unwind_directive (FILE *asm_out_file, rtx insn)
7985 {
7986 if (flag_unwind_tables
7987 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7988 {
7989 rtx pat;
7990
7991 if (GET_CODE (insn) == NOTE
7992 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
7993 {
7994 last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
7995
7996 /* Restore unwind state from immediately before the epilogue. */
7997 if (need_copy_state)
7998 {
7999 fprintf (asm_out_file, "\t.body\n");
8000 fprintf (asm_out_file, "\t.copy_state %d\n",
8001 cfun->machine->state_num);
8002 need_copy_state = false;
8003 }
8004 }
8005
8006 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
8007 return;
8008
8009 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
8010 if (pat)
8011 pat = XEXP (pat, 0);
8012 else
8013 pat = PATTERN (insn);
8014
8015 switch (GET_CODE (pat))
8016 {
8017 case SET:
8018 process_set (asm_out_file, pat);
8019 break;
8020
8021 case PARALLEL:
8022 {
8023 int par_index;
8024 int limit = XVECLEN (pat, 0);
8025 for (par_index = 0; par_index < limit; par_index++)
8026 {
8027 rtx x = XVECEXP (pat, 0, par_index);
8028 if (GET_CODE (x) == SET)
8029 process_set (asm_out_file, x);
8030 }
8031 break;
8032 }
8033
8034 default:
8035 gcc_unreachable ();
8036 }
8037 }
8038 }
8039
8040 \f
8041 enum ia64_builtins
8042 {
8043 IA64_BUILTIN_BSP,
8044 IA64_BUILTIN_FLUSHRS
8045 };
8046
8047 void
8048 ia64_init_builtins (void)
8049 {
8050 tree fpreg_type;
8051 tree float80_type;
8052
8053 /* The __fpreg type. */
8054 fpreg_type = make_node (REAL_TYPE);
8055 /* ??? The back end should know to load/save __fpreg variables using
8056 the ldf.fill and stf.spill instructions. */
8057 TYPE_PRECISION (fpreg_type) = 80;
8058 layout_type (fpreg_type);
8059 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
8060
8061 /* The __float80 type. */
8062 float80_type = make_node (REAL_TYPE);
8063 TYPE_PRECISION (float80_type) = 80;
8064 layout_type (float80_type);
8065 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
8066
8067 /* The __float128 type. */
8068 if (!TARGET_HPUX)
8069 {
8070 tree float128_type = make_node (REAL_TYPE);
8071 TYPE_PRECISION (float128_type) = 128;
8072 layout_type (float128_type);
8073 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
8074 }
8075 else
8076 /* Under HPUX, this is a synonym for "long double". */
8077 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
8078 "__float128");
8079
8080 #define def_builtin(name, type, code) \
8081 lang_hooks.builtin_function ((name), (type), (code), BUILT_IN_MD, \
8082 NULL, NULL_TREE)
8083
8084 def_builtin ("__builtin_ia64_bsp",
8085 build_function_type (ptr_type_node, void_list_node),
8086 IA64_BUILTIN_BSP);
8087
8088 def_builtin ("__builtin_ia64_flushrs",
8089 build_function_type (void_type_node, void_list_node),
8090 IA64_BUILTIN_FLUSHRS);
8091
8092 #undef def_builtin
8093 }
8094
8095 rtx
8096 ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
8097 enum machine_mode mode ATTRIBUTE_UNUSED,
8098 int ignore ATTRIBUTE_UNUSED)
8099 {
8100 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
8101 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
8102
8103 switch (fcode)
8104 {
8105 case IA64_BUILTIN_BSP:
8106 if (! target || ! register_operand (target, DImode))
8107 target = gen_reg_rtx (DImode);
8108 emit_insn (gen_bsp_value (target));
8109 #ifdef POINTERS_EXTEND_UNSIGNED
8110 target = convert_memory_address (ptr_mode, target);
8111 #endif
8112 return target;
8113
8114 case IA64_BUILTIN_FLUSHRS:
8115 emit_insn (gen_flushrs ());
8116 return const0_rtx;
8117
8118 default:
8119 break;
8120 }
8121
8122 return NULL_RTX;
8123 }
8124
8125 /* For the HP-UX IA64 aggregate parameters are passed stored in the
8126 most significant bits of the stack slot. */
8127
8128 enum direction
8129 ia64_hpux_function_arg_padding (enum machine_mode mode, tree type)
8130 {
8131 /* Exception to normal case for structures/unions/etc. */
8132
8133 if (type && AGGREGATE_TYPE_P (type)
8134 && int_size_in_bytes (type) < UNITS_PER_WORD)
8135 return upward;
8136
8137 /* Fall back to the default. */
8138 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
8139 }
8140
8141 /* Linked list of all external functions that are to be emitted by GCC.
8142 We output the name if and only if TREE_SYMBOL_REFERENCED is set in
8143 order to avoid putting out names that are never really used. */
8144
8145 struct extern_func_list GTY(())
8146 {
8147 struct extern_func_list *next;
8148 tree decl;
8149 };
8150
8151 static GTY(()) struct extern_func_list *extern_func_head;
8152
8153 static void
8154 ia64_hpux_add_extern_decl (tree decl)
8155 {
8156 struct extern_func_list *p = ggc_alloc (sizeof (struct extern_func_list));
8157
8158 p->decl = decl;
8159 p->next = extern_func_head;
8160 extern_func_head = p;
8161 }
8162
8163 /* Print out the list of used global functions. */
8164
8165 static void
8166 ia64_hpux_file_end (void)
8167 {
8168 struct extern_func_list *p;
8169
8170 for (p = extern_func_head; p; p = p->next)
8171 {
8172 tree decl = p->decl;
8173 tree id = DECL_ASSEMBLER_NAME (decl);
8174
8175 gcc_assert (id);
8176
8177 if (!TREE_ASM_WRITTEN (decl) && TREE_SYMBOL_REFERENCED (id))
8178 {
8179 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
8180
8181 TREE_ASM_WRITTEN (decl) = 1;
8182 (*targetm.asm_out.globalize_label) (asm_out_file, name);
8183 fputs (TYPE_ASM_OP, asm_out_file);
8184 assemble_name (asm_out_file, name);
8185 fprintf (asm_out_file, "," TYPE_OPERAND_FMT "\n", "function");
8186 }
8187 }
8188
8189 extern_func_head = 0;
8190 }
8191
8192 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
8193 modes of word_mode and larger. Rename the TFmode libfuncs using the
8194 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
8195 backward compatibility. */
8196
8197 static void
8198 ia64_init_libfuncs (void)
8199 {
8200 set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
8201 set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
8202 set_optab_libfunc (smod_optab, SImode, "__modsi3");
8203 set_optab_libfunc (umod_optab, SImode, "__umodsi3");
8204
8205 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
8206 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
8207 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
8208 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
8209 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
8210
8211 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
8212 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
8213 set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
8214 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
8215 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
8216 set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
8217
8218 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
8219 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
8220 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
8221 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
8222
8223 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
8224 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
8225 }
8226
8227 /* Rename all the TFmode libfuncs using the HPUX conventions. */
8228
8229 static void
8230 ia64_hpux_init_libfuncs (void)
8231 {
8232 ia64_init_libfuncs ();
8233
8234 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
8235 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
8236 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
8237
8238 /* ia64_expand_compare uses this. */
8239 cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
8240
8241 /* These should never be used. */
8242 set_optab_libfunc (eq_optab, TFmode, 0);
8243 set_optab_libfunc (ne_optab, TFmode, 0);
8244 set_optab_libfunc (gt_optab, TFmode, 0);
8245 set_optab_libfunc (ge_optab, TFmode, 0);
8246 set_optab_libfunc (lt_optab, TFmode, 0);
8247 set_optab_libfunc (le_optab, TFmode, 0);
8248 }
8249
8250 /* Rename the division and modulus functions in VMS. */
8251
8252 static void
8253 ia64_vms_init_libfuncs (void)
8254 {
8255 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
8256 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
8257 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
8258 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
8259 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
8260 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
8261 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
8262 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
8263 }
8264
8265 /* Rename the TFmode libfuncs available from soft-fp in glibc using
8266 the HPUX conventions. */
8267
8268 static void
8269 ia64_sysv4_init_libfuncs (void)
8270 {
8271 ia64_init_libfuncs ();
8272
8273 /* These functions are not part of the HPUX TFmode interface. We
8274 use them instead of _U_Qfcmp, which doesn't work the way we
8275 expect. */
8276 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
8277 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
8278 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
8279 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
8280 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
8281 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
8282
8283 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
8284 glibc doesn't have them. */
8285 }
8286 \f
8287 /* Switch to the section to which we should output X. The only thing
8288 special we do here is to honor small data. */
8289
8290 static void
8291 ia64_select_rtx_section (enum machine_mode mode, rtx x,
8292 unsigned HOST_WIDE_INT align)
8293 {
8294 if (GET_MODE_SIZE (mode) > 0
8295 && GET_MODE_SIZE (mode) <= ia64_section_threshold)
8296 sdata_section ();
8297 else
8298 default_elf_select_rtx_section (mode, x, align);
8299 }
8300
8301 /* It is illegal to have relocations in shared segments on AIX and HPUX.
8302 Pretend flag_pic is always set. */
8303
8304 static void
8305 ia64_rwreloc_select_section (tree exp, int reloc, unsigned HOST_WIDE_INT align)
8306 {
8307 default_elf_select_section_1 (exp, reloc, align, true);
8308 }
8309
8310 static void
8311 ia64_rwreloc_unique_section (tree decl, int reloc)
8312 {
8313 default_unique_section_1 (decl, reloc, true);
8314 }
8315
8316 static void
8317 ia64_rwreloc_select_rtx_section (enum machine_mode mode, rtx x,
8318 unsigned HOST_WIDE_INT align)
8319 {
8320 int save_pic = flag_pic;
8321 flag_pic = 1;
8322 ia64_select_rtx_section (mode, x, align);
8323 flag_pic = save_pic;
8324 }
8325
8326 #ifndef TARGET_RWRELOC
8327 #define TARGET_RWRELOC flag_pic
8328 #endif
8329
8330 static unsigned int
8331 ia64_section_type_flags (tree decl, const char *name, int reloc)
8332 {
8333 unsigned int flags = 0;
8334
8335 if (strcmp (name, ".sdata") == 0
8336 || strncmp (name, ".sdata.", 7) == 0
8337 || strncmp (name, ".gnu.linkonce.s.", 16) == 0
8338 || strncmp (name, ".sdata2.", 8) == 0
8339 || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
8340 || strcmp (name, ".sbss") == 0
8341 || strncmp (name, ".sbss.", 6) == 0
8342 || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
8343 flags = SECTION_SMALL;
8344
8345 flags |= default_section_type_flags_1 (decl, name, reloc, TARGET_RWRELOC);
8346 return flags;
8347 }
8348
8349 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
8350 structure type and that the address of that type should be passed
8351 in out0, rather than in r8. */
8352
8353 static bool
8354 ia64_struct_retval_addr_is_first_parm_p (tree fntype)
8355 {
8356 tree ret_type = TREE_TYPE (fntype);
8357
8358 /* The Itanium C++ ABI requires that out0, rather than r8, be used
8359 as the structure return address parameter, if the return value
8360 type has a non-trivial copy constructor or destructor. It is not
8361 clear if this same convention should be used for other
8362 programming languages. Until G++ 3.4, we incorrectly used r8 for
8363 these return values. */
8364 return (abi_version_at_least (2)
8365 && ret_type
8366 && TYPE_MODE (ret_type) == BLKmode
8367 && TREE_ADDRESSABLE (ret_type)
8368 && strcmp (lang_hooks.name, "GNU C++") == 0);
8369 }
8370
8371 /* Output the assembler code for a thunk function. THUNK_DECL is the
8372 declaration for the thunk function itself, FUNCTION is the decl for
8373 the target function. DELTA is an immediate constant offset to be
8374 added to THIS. If VCALL_OFFSET is nonzero, the word at
8375 *(*this + vcall_offset) should be added to THIS. */
8376
8377 static void
8378 ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
8379 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8380 tree function)
8381 {
8382 rtx this, insn, funexp;
8383 unsigned int this_parmno;
8384 unsigned int this_regno;
8385
8386 reload_completed = 1;
8387 epilogue_completed = 1;
8388 no_new_pseudos = 1;
8389 reset_block_changes ();
8390
8391 /* Set things up as ia64_expand_prologue might. */
8392 last_scratch_gr_reg = 15;
8393
8394 memset (&current_frame_info, 0, sizeof (current_frame_info));
8395 current_frame_info.spill_cfa_off = -16;
8396 current_frame_info.n_input_regs = 1;
8397 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
8398
8399 /* Mark the end of the (empty) prologue. */
8400 emit_note (NOTE_INSN_PROLOGUE_END);
8401
8402 /* Figure out whether "this" will be the first parameter (the
8403 typical case) or the second parameter (as happens when the
8404 virtual function returns certain class objects). */
8405 this_parmno
8406 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
8407 ? 1 : 0);
8408 this_regno = IN_REG (this_parmno);
8409 if (!TARGET_REG_NAMES)
8410 reg_names[this_regno] = ia64_reg_numbers[this_parmno];
8411
8412 this = gen_rtx_REG (Pmode, this_regno);
8413 if (TARGET_ILP32)
8414 {
8415 rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
8416 REG_POINTER (tmp) = 1;
8417 if (delta && CONST_OK_FOR_I (delta))
8418 {
8419 emit_insn (gen_ptr_extend_plus_imm (this, tmp, GEN_INT (delta)));
8420 delta = 0;
8421 }
8422 else
8423 emit_insn (gen_ptr_extend (this, tmp));
8424 }
8425
8426 /* Apply the constant offset, if required. */
8427 if (delta)
8428 {
8429 rtx delta_rtx = GEN_INT (delta);
8430
8431 if (!CONST_OK_FOR_I (delta))
8432 {
8433 rtx tmp = gen_rtx_REG (Pmode, 2);
8434 emit_move_insn (tmp, delta_rtx);
8435 delta_rtx = tmp;
8436 }
8437 emit_insn (gen_adddi3 (this, this, delta_rtx));
8438 }
8439
8440 /* Apply the offset from the vtable, if required. */
8441 if (vcall_offset)
8442 {
8443 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
8444 rtx tmp = gen_rtx_REG (Pmode, 2);
8445
8446 if (TARGET_ILP32)
8447 {
8448 rtx t = gen_rtx_REG (ptr_mode, 2);
8449 REG_POINTER (t) = 1;
8450 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this));
8451 if (CONST_OK_FOR_I (vcall_offset))
8452 {
8453 emit_insn (gen_ptr_extend_plus_imm (tmp, t,
8454 vcall_offset_rtx));
8455 vcall_offset = 0;
8456 }
8457 else
8458 emit_insn (gen_ptr_extend (tmp, t));
8459 }
8460 else
8461 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this));
8462
8463 if (vcall_offset)
8464 {
8465 if (!CONST_OK_FOR_J (vcall_offset))
8466 {
8467 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
8468 emit_move_insn (tmp2, vcall_offset_rtx);
8469 vcall_offset_rtx = tmp2;
8470 }
8471 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
8472 }
8473
8474 if (TARGET_ILP32)
8475 emit_move_insn (gen_rtx_REG (ptr_mode, 2),
8476 gen_rtx_MEM (ptr_mode, tmp));
8477 else
8478 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
8479
8480 emit_insn (gen_adddi3 (this, this, tmp));
8481 }
8482
8483 /* Generate a tail call to the target function. */
8484 if (! TREE_USED (function))
8485 {
8486 assemble_external (function);
8487 TREE_USED (function) = 1;
8488 }
8489 funexp = XEXP (DECL_RTL (function), 0);
8490 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8491 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
8492 insn = get_last_insn ();
8493 SIBLING_CALL_P (insn) = 1;
8494
8495 /* Code generation for calls relies on splitting. */
8496 reload_completed = 1;
8497 epilogue_completed = 1;
8498 try_split (PATTERN (insn), insn, 0);
8499
8500 emit_barrier ();
8501
8502 /* Run just enough of rest_of_compilation to get the insns emitted.
8503 There's not really enough bulk here to make other passes such as
8504 instruction scheduling worth while. Note that use_thunk calls
8505 assemble_start_function and assemble_end_function. */
8506
8507 insn_locators_initialize ();
8508 emit_all_insn_group_barriers (NULL);
8509 insn = get_insns ();
8510 shorten_branches (insn);
8511 final_start_function (insn, file, 1);
8512 final (insn, file, 1);
8513 final_end_function ();
8514
8515 reload_completed = 0;
8516 epilogue_completed = 0;
8517 no_new_pseudos = 0;
8518 }
8519
8520 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
8521
8522 static rtx
8523 ia64_struct_value_rtx (tree fntype,
8524 int incoming ATTRIBUTE_UNUSED)
8525 {
8526 if (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype))
8527 return NULL_RTX;
8528 return gen_rtx_REG (Pmode, GR_REG (8));
8529 }
8530
8531 static bool
8532 ia64_scalar_mode_supported_p (enum machine_mode mode)
8533 {
8534 switch (mode)
8535 {
8536 case QImode:
8537 case HImode:
8538 case SImode:
8539 case DImode:
8540 case TImode:
8541 return true;
8542
8543 case SFmode:
8544 case DFmode:
8545 case XFmode:
8546 return true;
8547
8548 case TFmode:
8549 return TARGET_HPUX;
8550
8551 default:
8552 return false;
8553 }
8554 }
8555
8556 static bool
8557 ia64_vector_mode_supported_p (enum machine_mode mode)
8558 {
8559 switch (mode)
8560 {
8561 case V8QImode:
8562 case V4HImode:
8563 case V2SImode:
8564 return true;
8565
8566 case V2SFmode:
8567 return true;
8568
8569 default:
8570 return false;
8571 }
8572 }
8573
8574 void
8575 ia64_output_function_profiler (FILE *file, int labelno)
8576 {
8577 if (TARGET_GNU_AS)
8578 fputs ("\t.prologue 4, r40\n", file);
8579 else
8580 fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
8581 fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
8582
8583 if (NO_PROFILE_COUNTERS)
8584 fputs ("\tmov out3 = r0\n\t;;\n", file);
8585 else
8586 {
8587 char buf[20];
8588 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
8589
8590 if (TARGET_AUTO_PIC)
8591 fputs ("\tmovl out3 = @gprel(", file);
8592 else
8593 fputs ("\taddl out3 = @ltoff(", file);
8594 assemble_name (file, buf);
8595 if (TARGET_AUTO_PIC)
8596 fputs (")\n\t;;\n", file);
8597 else
8598 fputs ("), r1\n\t;;\n", file);
8599 }
8600
8601 fputs ("\t.save rp, r42\n", file);
8602 fputs ("\tmov out2 = b0\n", file);
8603 fputs ("\t.body\n", file);
8604 fputs ("\tmov out1 = r1\n", file);
8605 fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
8606 }
8607
8608 static GTY(()) rtx mcount_func_rtx;
8609 static rtx
8610 gen_mcount_func_rtx (void)
8611 {
8612 if (!mcount_func_rtx)
8613 mcount_func_rtx = init_one_libfunc ("_mcount");
8614 return mcount_func_rtx;
8615 }
8616
8617 void
8618 ia64_profile_hook (int labelno)
8619 {
8620 rtx label, ip;
8621
8622 if (NO_PROFILE_COUNTERS)
8623 label = const0_rtx;
8624 else
8625 {
8626 char buf[30];
8627 const char *label_name;
8628 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
8629 label_name = (*targetm.strip_name_encoding) (ggc_strdup (buf));
8630 label = gen_rtx_SYMBOL_REF (Pmode, label_name);
8631 SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
8632 }
8633 ip = gen_reg_rtx (Pmode);
8634 emit_insn (gen_ip_value (ip));
8635 emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
8636 VOIDmode, 3,
8637 gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
8638 ip, Pmode,
8639 label, Pmode);
8640 }
8641
8642 #include "gt-ia64.h"