re PR target/21551 (ia64 bootstrap failed)
[gcc.git] / gcc / config / ia64 / ia64.c
1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005
3 Free Software Foundation, Inc.
4 Contributed by James E. Wilson <wilson@cygnus.com> and
5 David Mosberger <davidm@hpl.hp.com>.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "regs.h"
31 #include "hard-reg-set.h"
32 #include "real.h"
33 #include "insn-config.h"
34 #include "conditions.h"
35 #include "output.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "recog.h"
39 #include "expr.h"
40 #include "optabs.h"
41 #include "except.h"
42 #include "function.h"
43 #include "ggc.h"
44 #include "basic-block.h"
45 #include "toplev.h"
46 #include "sched-int.h"
47 #include "timevar.h"
48 #include "target.h"
49 #include "target-def.h"
50 #include "tm_p.h"
51 #include "hashtab.h"
52 #include "langhooks.h"
53 #include "cfglayout.h"
54 #include "tree-gimple.h"
55
56 /* This is used for communication between ASM_OUTPUT_LABEL and
57 ASM_OUTPUT_LABELREF. */
58 int ia64_asm_output_label = 0;
59
60 /* Define the information needed to generate branch and scc insns. This is
61 stored from the compare operation. */
62 struct rtx_def * ia64_compare_op0;
63 struct rtx_def * ia64_compare_op1;
64
65 /* Register names for ia64_expand_prologue. */
66 static const char * const ia64_reg_numbers[96] =
67 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
68 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
69 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
70 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
71 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
72 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
73 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
74 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
75 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
76 "r104","r105","r106","r107","r108","r109","r110","r111",
77 "r112","r113","r114","r115","r116","r117","r118","r119",
78 "r120","r121","r122","r123","r124","r125","r126","r127"};
79
80 /* ??? These strings could be shared with REGISTER_NAMES. */
81 static const char * const ia64_input_reg_names[8] =
82 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
83
84 /* ??? These strings could be shared with REGISTER_NAMES. */
85 static const char * const ia64_local_reg_names[80] =
86 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
87 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
88 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
89 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
90 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
91 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
92 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
93 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
94 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
95 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
96
97 /* ??? These strings could be shared with REGISTER_NAMES. */
98 static const char * const ia64_output_reg_names[8] =
99 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
100
101 /* Determines whether we use adds, addl, or movl to generate our
102 TLS immediate offsets. */
103 int ia64_tls_size = 22;
104
105 /* Which cpu are we scheduling for. */
106 enum processor_type ia64_tune = PROCESSOR_ITANIUM2;
107
108 /* Determines whether we run our final scheduling pass or not. We always
109 avoid the normal second scheduling pass. */
110 static int ia64_flag_schedule_insns2;
111
112 /* Determines whether we run variable tracking in machine dependent
113 reorganization. */
114 static int ia64_flag_var_tracking;
115
116 /* Variables which are this size or smaller are put in the sdata/sbss
117 sections. */
118
119 unsigned int ia64_section_threshold;
120
121 /* The following variable is used by the DFA insn scheduler. The value is
122 TRUE if we do insn bundling instead of insn scheduling. */
123 int bundling_p = 0;
124
125 /* Structure to be filled in by ia64_compute_frame_size with register
126 save masks and offsets for the current function. */
127
128 struct ia64_frame_info
129 {
130 HOST_WIDE_INT total_size; /* size of the stack frame, not including
131 the caller's scratch area. */
132 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
133 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
134 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
135 HARD_REG_SET mask; /* mask of saved registers. */
136 unsigned int gr_used_mask; /* mask of registers in use as gr spill
137 registers or long-term scratches. */
138 int n_spilled; /* number of spilled registers. */
139 int reg_fp; /* register for fp. */
140 int reg_save_b0; /* save register for b0. */
141 int reg_save_pr; /* save register for prs. */
142 int reg_save_ar_pfs; /* save register for ar.pfs. */
143 int reg_save_ar_unat; /* save register for ar.unat. */
144 int reg_save_ar_lc; /* save register for ar.lc. */
145 int reg_save_gp; /* save register for gp. */
146 int n_input_regs; /* number of input registers used. */
147 int n_local_regs; /* number of local registers used. */
148 int n_output_regs; /* number of output registers used. */
149 int n_rotate_regs; /* number of rotating registers used. */
150
151 char need_regstk; /* true if a .regstk directive needed. */
152 char initialized; /* true if the data is finalized. */
153 };
154
155 /* Current frame information calculated by ia64_compute_frame_size. */
156 static struct ia64_frame_info current_frame_info;
157 \f
158 static int ia64_first_cycle_multipass_dfa_lookahead (void);
159 static void ia64_dependencies_evaluation_hook (rtx, rtx);
160 static void ia64_init_dfa_pre_cycle_insn (void);
161 static rtx ia64_dfa_pre_cycle_insn (void);
162 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
163 static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
164 static rtx gen_tls_get_addr (void);
165 static rtx gen_thread_pointer (void);
166 static int find_gr_spill (int);
167 static int next_scratch_gr_reg (void);
168 static void mark_reg_gr_used_mask (rtx, void *);
169 static void ia64_compute_frame_size (HOST_WIDE_INT);
170 static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
171 static void finish_spill_pointers (void);
172 static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
173 static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
174 static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
175 static rtx gen_movdi_x (rtx, rtx, rtx);
176 static rtx gen_fr_spill_x (rtx, rtx, rtx);
177 static rtx gen_fr_restore_x (rtx, rtx, rtx);
178
179 static enum machine_mode hfa_element_mode (tree, bool);
180 static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
181 tree, int *, int);
182 static bool ia64_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
183 tree, bool);
184 static int ia64_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
185 tree, bool);
186 static bool ia64_function_ok_for_sibcall (tree, tree);
187 static bool ia64_return_in_memory (tree, tree);
188 static bool ia64_rtx_costs (rtx, int, int, int *);
189 static void fix_range (const char *);
190 static bool ia64_handle_option (size_t, const char *, int);
191 static struct machine_function * ia64_init_machine_status (void);
192 static void emit_insn_group_barriers (FILE *);
193 static void emit_all_insn_group_barriers (FILE *);
194 static void final_emit_insn_group_barriers (FILE *);
195 static void emit_predicate_relation_info (void);
196 static void ia64_reorg (void);
197 static bool ia64_in_small_data_p (tree);
198 static void process_epilogue (void);
199 static int process_set (FILE *, rtx);
200
201 static bool ia64_assemble_integer (rtx, unsigned int, int);
202 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
203 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
204 static void ia64_output_function_end_prologue (FILE *);
205
206 static int ia64_issue_rate (void);
207 static int ia64_adjust_cost (rtx, rtx, rtx, int);
208 static void ia64_sched_init (FILE *, int, int);
209 static void ia64_sched_finish (FILE *, int);
210 static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
211 static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
212 static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
213 static int ia64_variable_issue (FILE *, int, rtx, int);
214
215 static struct bundle_state *get_free_bundle_state (void);
216 static void free_bundle_state (struct bundle_state *);
217 static void initiate_bundle_states (void);
218 static void finish_bundle_states (void);
219 static unsigned bundle_state_hash (const void *);
220 static int bundle_state_eq_p (const void *, const void *);
221 static int insert_bundle_state (struct bundle_state *);
222 static void initiate_bundle_state_table (void);
223 static void finish_bundle_state_table (void);
224 static int try_issue_nops (struct bundle_state *, int);
225 static int try_issue_insn (struct bundle_state *, rtx);
226 static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
227 static int get_max_pos (state_t);
228 static int get_template (state_t, int);
229
230 static rtx get_next_important_insn (rtx, rtx);
231 static void bundling (FILE *, int, rtx, rtx);
232
233 static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
234 HOST_WIDE_INT, tree);
235 static void ia64_file_start (void);
236
237 static void ia64_select_rtx_section (enum machine_mode, rtx,
238 unsigned HOST_WIDE_INT);
239 static void ia64_rwreloc_select_section (tree, int, unsigned HOST_WIDE_INT)
240 ATTRIBUTE_UNUSED;
241 static void ia64_rwreloc_unique_section (tree, int)
242 ATTRIBUTE_UNUSED;
243 static void ia64_rwreloc_select_rtx_section (enum machine_mode, rtx,
244 unsigned HOST_WIDE_INT)
245 ATTRIBUTE_UNUSED;
246 static unsigned int ia64_section_type_flags (tree, const char *, int);
247 static void ia64_hpux_add_extern_decl (tree decl)
248 ATTRIBUTE_UNUSED;
249 static void ia64_hpux_file_end (void)
250 ATTRIBUTE_UNUSED;
251 static void ia64_init_libfuncs (void)
252 ATTRIBUTE_UNUSED;
253 static void ia64_hpux_init_libfuncs (void)
254 ATTRIBUTE_UNUSED;
255 static void ia64_sysv4_init_libfuncs (void)
256 ATTRIBUTE_UNUSED;
257 static void ia64_vms_init_libfuncs (void)
258 ATTRIBUTE_UNUSED;
259
260 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
261 static void ia64_encode_section_info (tree, rtx, int);
262 static rtx ia64_struct_value_rtx (tree, int);
263 static tree ia64_gimplify_va_arg (tree, tree, tree *, tree *);
264 static bool ia64_scalar_mode_supported_p (enum machine_mode mode);
265 static bool ia64_vector_mode_supported_p (enum machine_mode mode);
266 static bool ia64_cannot_force_const_mem (rtx);
267 \f
268 /* Table of valid machine attributes. */
269 static const struct attribute_spec ia64_attribute_table[] =
270 {
271 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
272 { "syscall_linkage", 0, 0, false, true, true, NULL },
273 { "model", 1, 1, true, false, false, ia64_handle_model_attribute },
274 { NULL, 0, 0, false, false, false, NULL }
275 };
276
277 /* Initialize the GCC target structure. */
278 #undef TARGET_ATTRIBUTE_TABLE
279 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
280
281 #undef TARGET_INIT_BUILTINS
282 #define TARGET_INIT_BUILTINS ia64_init_builtins
283
284 #undef TARGET_EXPAND_BUILTIN
285 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
286
287 #undef TARGET_ASM_BYTE_OP
288 #define TARGET_ASM_BYTE_OP "\tdata1\t"
289 #undef TARGET_ASM_ALIGNED_HI_OP
290 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
291 #undef TARGET_ASM_ALIGNED_SI_OP
292 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
293 #undef TARGET_ASM_ALIGNED_DI_OP
294 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
295 #undef TARGET_ASM_UNALIGNED_HI_OP
296 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
297 #undef TARGET_ASM_UNALIGNED_SI_OP
298 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
299 #undef TARGET_ASM_UNALIGNED_DI_OP
300 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
301 #undef TARGET_ASM_INTEGER
302 #define TARGET_ASM_INTEGER ia64_assemble_integer
303
304 #undef TARGET_ASM_FUNCTION_PROLOGUE
305 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
306 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
307 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
308 #undef TARGET_ASM_FUNCTION_EPILOGUE
309 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
310
311 #undef TARGET_IN_SMALL_DATA_P
312 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
313
314 #undef TARGET_SCHED_ADJUST_COST
315 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
316 #undef TARGET_SCHED_ISSUE_RATE
317 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
318 #undef TARGET_SCHED_VARIABLE_ISSUE
319 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
320 #undef TARGET_SCHED_INIT
321 #define TARGET_SCHED_INIT ia64_sched_init
322 #undef TARGET_SCHED_FINISH
323 #define TARGET_SCHED_FINISH ia64_sched_finish
324 #undef TARGET_SCHED_REORDER
325 #define TARGET_SCHED_REORDER ia64_sched_reorder
326 #undef TARGET_SCHED_REORDER2
327 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
328
329 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
330 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
331
332 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
333 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
334
335 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
336 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
337 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
338 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
339
340 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
341 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
342 ia64_first_cycle_multipass_dfa_lookahead_guard
343
344 #undef TARGET_SCHED_DFA_NEW_CYCLE
345 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
346
347 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
348 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
349 #undef TARGET_PASS_BY_REFERENCE
350 #define TARGET_PASS_BY_REFERENCE ia64_pass_by_reference
351 #undef TARGET_ARG_PARTIAL_BYTES
352 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
353
354 #undef TARGET_ASM_OUTPUT_MI_THUNK
355 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
356 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
357 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
358
359 #undef TARGET_ASM_FILE_START
360 #define TARGET_ASM_FILE_START ia64_file_start
361
362 #undef TARGET_RTX_COSTS
363 #define TARGET_RTX_COSTS ia64_rtx_costs
364 #undef TARGET_ADDRESS_COST
365 #define TARGET_ADDRESS_COST hook_int_rtx_0
366
367 #undef TARGET_MACHINE_DEPENDENT_REORG
368 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
369
370 #undef TARGET_ENCODE_SECTION_INFO
371 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
372
373 #undef TARGET_SECTION_TYPE_FLAGS
374 #define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
375
376 /* ??? ABI doesn't allow us to define this. */
377 #if 0
378 #undef TARGET_PROMOTE_FUNCTION_ARGS
379 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
380 #endif
381
382 /* ??? ABI doesn't allow us to define this. */
383 #if 0
384 #undef TARGET_PROMOTE_FUNCTION_RETURN
385 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
386 #endif
387
388 /* ??? Investigate. */
389 #if 0
390 #undef TARGET_PROMOTE_PROTOTYPES
391 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
392 #endif
393
394 #undef TARGET_STRUCT_VALUE_RTX
395 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
396 #undef TARGET_RETURN_IN_MEMORY
397 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
398 #undef TARGET_SETUP_INCOMING_VARARGS
399 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
400 #undef TARGET_STRICT_ARGUMENT_NAMING
401 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
402 #undef TARGET_MUST_PASS_IN_STACK
403 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
404
405 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
406 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
407
408 #undef TARGET_UNWIND_EMIT
409 #define TARGET_UNWIND_EMIT process_for_unwind_directive
410
411 #undef TARGET_SCALAR_MODE_SUPPORTED_P
412 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
413 #undef TARGET_VECTOR_MODE_SUPPORTED_P
414 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
415
416 /* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
417 in an order different from the specified program order. */
418 #undef TARGET_RELAXED_ORDERING
419 #define TARGET_RELAXED_ORDERING true
420
421 #undef TARGET_DEFAULT_TARGET_FLAGS
422 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
423 #undef TARGET_HANDLE_OPTION
424 #define TARGET_HANDLE_OPTION ia64_handle_option
425
426 #undef TARGET_CANNOT_FORCE_CONST_MEM
427 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
428
429 struct gcc_target targetm = TARGET_INITIALIZER;
430 \f
431 typedef enum
432 {
433 ADDR_AREA_NORMAL, /* normal address area */
434 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
435 }
436 ia64_addr_area;
437
438 static GTY(()) tree small_ident1;
439 static GTY(()) tree small_ident2;
440
441 static void
442 init_idents (void)
443 {
444 if (small_ident1 == 0)
445 {
446 small_ident1 = get_identifier ("small");
447 small_ident2 = get_identifier ("__small__");
448 }
449 }
450
451 /* Retrieve the address area that has been chosen for the given decl. */
452
453 static ia64_addr_area
454 ia64_get_addr_area (tree decl)
455 {
456 tree model_attr;
457
458 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
459 if (model_attr)
460 {
461 tree id;
462
463 init_idents ();
464 id = TREE_VALUE (TREE_VALUE (model_attr));
465 if (id == small_ident1 || id == small_ident2)
466 return ADDR_AREA_SMALL;
467 }
468 return ADDR_AREA_NORMAL;
469 }
470
471 static tree
472 ia64_handle_model_attribute (tree *node, tree name, tree args,
473 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
474 {
475 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
476 ia64_addr_area area;
477 tree arg, decl = *node;
478
479 init_idents ();
480 arg = TREE_VALUE (args);
481 if (arg == small_ident1 || arg == small_ident2)
482 {
483 addr_area = ADDR_AREA_SMALL;
484 }
485 else
486 {
487 warning (0, "invalid argument of %qs attribute",
488 IDENTIFIER_POINTER (name));
489 *no_add_attrs = true;
490 }
491
492 switch (TREE_CODE (decl))
493 {
494 case VAR_DECL:
495 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
496 == FUNCTION_DECL)
497 && !TREE_STATIC (decl))
498 {
499 error ("%Jan address area attribute cannot be specified for "
500 "local variables", decl, decl);
501 *no_add_attrs = true;
502 }
503 area = ia64_get_addr_area (decl);
504 if (area != ADDR_AREA_NORMAL && addr_area != area)
505 {
506 error ("%Jaddress area of '%s' conflicts with previous "
507 "declaration", decl, decl);
508 *no_add_attrs = true;
509 }
510 break;
511
512 case FUNCTION_DECL:
513 error ("%Jaddress area attribute cannot be specified for functions",
514 decl, decl);
515 *no_add_attrs = true;
516 break;
517
518 default:
519 warning (0, "%qs attribute ignored", IDENTIFIER_POINTER (name));
520 *no_add_attrs = true;
521 break;
522 }
523
524 return NULL_TREE;
525 }
526
527 static void
528 ia64_encode_addr_area (tree decl, rtx symbol)
529 {
530 int flags;
531
532 flags = SYMBOL_REF_FLAGS (symbol);
533 switch (ia64_get_addr_area (decl))
534 {
535 case ADDR_AREA_NORMAL: break;
536 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
537 default: gcc_unreachable ();
538 }
539 SYMBOL_REF_FLAGS (symbol) = flags;
540 }
541
542 static void
543 ia64_encode_section_info (tree decl, rtx rtl, int first)
544 {
545 default_encode_section_info (decl, rtl, first);
546
547 /* Careful not to prod global register variables. */
548 if (TREE_CODE (decl) == VAR_DECL
549 && GET_CODE (DECL_RTL (decl)) == MEM
550 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
551 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
552 ia64_encode_addr_area (decl, XEXP (rtl, 0));
553 }
554 \f
555 /* Implement CONST_OK_FOR_LETTER_P. */
556
557 bool
558 ia64_const_ok_for_letter_p (HOST_WIDE_INT value, char c)
559 {
560 switch (c)
561 {
562 case 'I':
563 return CONST_OK_FOR_I (value);
564 case 'J':
565 return CONST_OK_FOR_J (value);
566 case 'K':
567 return CONST_OK_FOR_K (value);
568 case 'L':
569 return CONST_OK_FOR_L (value);
570 case 'M':
571 return CONST_OK_FOR_M (value);
572 case 'N':
573 return CONST_OK_FOR_N (value);
574 case 'O':
575 return CONST_OK_FOR_O (value);
576 case 'P':
577 return CONST_OK_FOR_P (value);
578 default:
579 return false;
580 }
581 }
582
583 /* Implement CONST_DOUBLE_OK_FOR_LETTER_P. */
584
585 bool
586 ia64_const_double_ok_for_letter_p (rtx value, char c)
587 {
588 switch (c)
589 {
590 case 'G':
591 return CONST_DOUBLE_OK_FOR_G (value);
592 default:
593 return false;
594 }
595 }
596
597 /* Implement EXTRA_CONSTRAINT. */
598
599 bool
600 ia64_extra_constraint (rtx value, char c)
601 {
602 switch (c)
603 {
604 case 'Q':
605 /* Non-volatile memory for FP_REG loads/stores. */
606 return memory_operand(value, VOIDmode) && !MEM_VOLATILE_P (value);
607
608 case 'R':
609 /* 1..4 for shladd arguments. */
610 return (GET_CODE (value) == CONST_INT
611 && INTVAL (value) >= 1 && INTVAL (value) <= 4);
612
613 case 'S':
614 /* Non-post-inc memory for asms and other unsavory creatures. */
615 return (GET_CODE (value) == MEM
616 && GET_RTX_CLASS (GET_CODE (XEXP (value, 0))) != RTX_AUTOINC
617 && (reload_in_progress || memory_operand (value, VOIDmode)));
618
619 case 'T':
620 /* Symbol ref to small-address-area. */
621 return (GET_CODE (value) == SYMBOL_REF
622 && SYMBOL_REF_SMALL_ADDR_P (value));
623
624 case 'U':
625 /* Vector zero. */
626 return value == CONST0_RTX (GET_MODE (value));
627
628 case 'W':
629 /* An integer vector, such that conversion to an integer yields a
630 value appropriate for an integer 'J' constraint. */
631 if (GET_CODE (value) == CONST_VECTOR
632 && GET_MODE_CLASS (GET_MODE (value)) == MODE_VECTOR_INT)
633 {
634 value = simplify_subreg (DImode, value, GET_MODE (value), 0);
635 return ia64_const_ok_for_letter_p (INTVAL (value), 'J');
636 }
637 return false;
638
639 case 'Y':
640 /* A V2SF vector containing elements that satisfy 'G'. */
641 return
642 (GET_CODE (value) == CONST_VECTOR
643 && GET_MODE (value) == V2SFmode
644 && ia64_const_double_ok_for_letter_p (XVECEXP (value, 0, 0), 'G')
645 && ia64_const_double_ok_for_letter_p (XVECEXP (value, 0, 1), 'G'));
646
647 default:
648 return false;
649 }
650 }
651 \f
652 /* Return 1 if the operands of a move are ok. */
653
654 int
655 ia64_move_ok (rtx dst, rtx src)
656 {
657 /* If we're under init_recog_no_volatile, we'll not be able to use
658 memory_operand. So check the code directly and don't worry about
659 the validity of the underlying address, which should have been
660 checked elsewhere anyway. */
661 if (GET_CODE (dst) != MEM)
662 return 1;
663 if (GET_CODE (src) == MEM)
664 return 0;
665 if (register_operand (src, VOIDmode))
666 return 1;
667
668 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
669 if (INTEGRAL_MODE_P (GET_MODE (dst)))
670 return src == const0_rtx;
671 else
672 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
673 }
674
675 int
676 addp4_optimize_ok (rtx op1, rtx op2)
677 {
678 return (basereg_operand (op1, GET_MODE(op1)) !=
679 basereg_operand (op2, GET_MODE(op2)));
680 }
681
682 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
683 Return the length of the field, or <= 0 on failure. */
684
685 int
686 ia64_depz_field_mask (rtx rop, rtx rshift)
687 {
688 unsigned HOST_WIDE_INT op = INTVAL (rop);
689 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
690
691 /* Get rid of the zero bits we're shifting in. */
692 op >>= shift;
693
694 /* We must now have a solid block of 1's at bit 0. */
695 return exact_log2 (op + 1);
696 }
697
698 /* Return the TLS model to use for ADDR. */
699
700 static enum tls_model
701 tls_symbolic_operand_type (rtx addr)
702 {
703 enum tls_model tls_kind = 0;
704
705 if (GET_CODE (addr) == CONST)
706 {
707 if (GET_CODE (XEXP (addr, 0)) == PLUS
708 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
709 tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
710 }
711 else if (GET_CODE (addr) == SYMBOL_REF)
712 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
713
714 return tls_kind;
715 }
716
717 /* Return true if X is a constant that is valid for some immediate
718 field in an instruction. */
719
720 bool
721 ia64_legitimate_constant_p (rtx x)
722 {
723 switch (GET_CODE (x))
724 {
725 case CONST_INT:
726 case LABEL_REF:
727 return true;
728
729 case CONST_DOUBLE:
730 if (GET_MODE (x) == VOIDmode)
731 return true;
732 return CONST_DOUBLE_OK_FOR_G (x);
733
734 case CONST:
735 case SYMBOL_REF:
736 return tls_symbolic_operand_type (x) == 0;
737
738 default:
739 return false;
740 }
741 }
742
743 /* Don't allow TLS addresses to get spilled to memory. */
744
745 static bool
746 ia64_cannot_force_const_mem (rtx x)
747 {
748 return tls_symbolic_operand_type (x) != 0;
749 }
750
751 /* Expand a symbolic constant load. */
752
753 bool
754 ia64_expand_load_address (rtx dest, rtx src)
755 {
756 gcc_assert (GET_CODE (dest) == REG);
757
758 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
759 having to pointer-extend the value afterward. Other forms of address
760 computation below are also more natural to compute as 64-bit quantities.
761 If we've been given an SImode destination register, change it. */
762 if (GET_MODE (dest) != Pmode)
763 dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest), 0);
764
765 if (TARGET_NO_PIC)
766 return false;
767 if (small_addr_symbolic_operand (src, VOIDmode))
768 return false;
769
770 if (TARGET_AUTO_PIC)
771 emit_insn (gen_load_gprel64 (dest, src));
772 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
773 emit_insn (gen_load_fptr (dest, src));
774 else if (sdata_symbolic_operand (src, VOIDmode))
775 emit_insn (gen_load_gprel (dest, src));
776 else
777 {
778 HOST_WIDE_INT addend = 0;
779 rtx tmp;
780
781 /* We did split constant offsets in ia64_expand_move, and we did try
782 to keep them split in move_operand, but we also allowed reload to
783 rematerialize arbitrary constants rather than spill the value to
784 the stack and reload it. So we have to be prepared here to split
785 them apart again. */
786 if (GET_CODE (src) == CONST)
787 {
788 HOST_WIDE_INT hi, lo;
789
790 hi = INTVAL (XEXP (XEXP (src, 0), 1));
791 lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
792 hi = hi - lo;
793
794 if (lo != 0)
795 {
796 addend = lo;
797 src = plus_constant (XEXP (XEXP (src, 0), 0), hi);
798 }
799 }
800
801 tmp = gen_rtx_HIGH (Pmode, src);
802 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
803 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
804
805 tmp = gen_rtx_LO_SUM (Pmode, dest, src);
806 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
807
808 if (addend)
809 {
810 tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
811 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
812 }
813 }
814
815 return true;
816 }
817
818 static GTY(()) rtx gen_tls_tga;
819 static rtx
820 gen_tls_get_addr (void)
821 {
822 if (!gen_tls_tga)
823 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
824 return gen_tls_tga;
825 }
826
827 static GTY(()) rtx thread_pointer_rtx;
828 static rtx
829 gen_thread_pointer (void)
830 {
831 if (!thread_pointer_rtx)
832 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
833 return thread_pointer_rtx;
834 }
835
836 static rtx
837 ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
838 HOST_WIDE_INT addend)
839 {
840 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
841 rtx orig_op0 = op0, orig_op1 = op1;
842 HOST_WIDE_INT addend_lo, addend_hi;
843
844 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
845 addend_hi = addend - addend_lo;
846
847 switch (tls_kind)
848 {
849 case TLS_MODEL_GLOBAL_DYNAMIC:
850 start_sequence ();
851
852 tga_op1 = gen_reg_rtx (Pmode);
853 emit_insn (gen_load_dtpmod (tga_op1, op1));
854
855 tga_op2 = gen_reg_rtx (Pmode);
856 emit_insn (gen_load_dtprel (tga_op2, op1));
857
858 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
859 LCT_CONST, Pmode, 2, tga_op1,
860 Pmode, tga_op2, Pmode);
861
862 insns = get_insns ();
863 end_sequence ();
864
865 if (GET_MODE (op0) != Pmode)
866 op0 = tga_ret;
867 emit_libcall_block (insns, op0, tga_ret, op1);
868 break;
869
870 case TLS_MODEL_LOCAL_DYNAMIC:
871 /* ??? This isn't the completely proper way to do local-dynamic
872 If the call to __tls_get_addr is used only by a single symbol,
873 then we should (somehow) move the dtprel to the second arg
874 to avoid the extra add. */
875 start_sequence ();
876
877 tga_op1 = gen_reg_rtx (Pmode);
878 emit_insn (gen_load_dtpmod (tga_op1, op1));
879 tga_op1 = gen_const_mem (Pmode, tga_op1);
880
881 tga_op2 = const0_rtx;
882
883 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
884 LCT_CONST, Pmode, 2, tga_op1,
885 Pmode, tga_op2, Pmode);
886
887 insns = get_insns ();
888 end_sequence ();
889
890 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
891 UNSPEC_LD_BASE);
892 tmp = gen_reg_rtx (Pmode);
893 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
894
895 if (!register_operand (op0, Pmode))
896 op0 = gen_reg_rtx (Pmode);
897 if (TARGET_TLS64)
898 {
899 emit_insn (gen_load_dtprel (op0, op1));
900 emit_insn (gen_adddi3 (op0, tmp, op0));
901 }
902 else
903 emit_insn (gen_add_dtprel (op0, op1, tmp));
904 break;
905
906 case TLS_MODEL_INITIAL_EXEC:
907 op1 = plus_constant (op1, addend_hi);
908 addend = addend_lo;
909
910 tmp = gen_reg_rtx (Pmode);
911 emit_insn (gen_load_tprel (tmp, op1));
912
913 if (!register_operand (op0, Pmode))
914 op0 = gen_reg_rtx (Pmode);
915 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
916 break;
917
918 case TLS_MODEL_LOCAL_EXEC:
919 if (!register_operand (op0, Pmode))
920 op0 = gen_reg_rtx (Pmode);
921
922 op1 = orig_op1;
923 addend = 0;
924 if (TARGET_TLS64)
925 {
926 emit_insn (gen_load_tprel (op0, op1));
927 emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
928 }
929 else
930 emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
931 break;
932
933 default:
934 gcc_unreachable ();
935 }
936
937 if (addend)
938 op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
939 orig_op0, 1, OPTAB_DIRECT);
940 if (orig_op0 == op0)
941 return NULL_RTX;
942 if (GET_MODE (orig_op0) == Pmode)
943 return op0;
944 return gen_lowpart (GET_MODE (orig_op0), op0);
945 }
946
947 rtx
948 ia64_expand_move (rtx op0, rtx op1)
949 {
950 enum machine_mode mode = GET_MODE (op0);
951
952 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
953 op1 = force_reg (mode, op1);
954
955 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
956 {
957 HOST_WIDE_INT addend = 0;
958 enum tls_model tls_kind;
959 rtx sym = op1;
960
961 if (GET_CODE (op1) == CONST
962 && GET_CODE (XEXP (op1, 0)) == PLUS
963 && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
964 {
965 addend = INTVAL (XEXP (XEXP (op1, 0), 1));
966 sym = XEXP (XEXP (op1, 0), 0);
967 }
968
969 tls_kind = tls_symbolic_operand_type (sym);
970 if (tls_kind)
971 return ia64_expand_tls_address (tls_kind, op0, sym, addend);
972
973 if (any_offset_symbol_operand (sym, mode))
974 addend = 0;
975 else if (aligned_offset_symbol_operand (sym, mode))
976 {
977 HOST_WIDE_INT addend_lo, addend_hi;
978
979 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
980 addend_hi = addend - addend_lo;
981
982 if (addend_lo != 0)
983 {
984 op1 = plus_constant (sym, addend_hi);
985 addend = addend_lo;
986 }
987 else
988 addend = 0;
989 }
990 else
991 op1 = sym;
992
993 if (reload_completed)
994 {
995 /* We really should have taken care of this offset earlier. */
996 gcc_assert (addend == 0);
997 if (ia64_expand_load_address (op0, op1))
998 return NULL_RTX;
999 }
1000
1001 if (addend)
1002 {
1003 rtx subtarget = no_new_pseudos ? op0 : gen_reg_rtx (mode);
1004
1005 emit_insn (gen_rtx_SET (VOIDmode, subtarget, op1));
1006
1007 op1 = expand_simple_binop (mode, PLUS, subtarget,
1008 GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1009 if (op0 == op1)
1010 return NULL_RTX;
1011 }
1012 }
1013
1014 return op1;
1015 }
1016
1017 /* Split a move from OP1 to OP0 conditional on COND. */
1018
1019 void
1020 ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1021 {
1022 rtx insn, first = get_last_insn ();
1023
1024 emit_move_insn (op0, op1);
1025
1026 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1027 if (INSN_P (insn))
1028 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1029 PATTERN (insn));
1030 }
1031
1032 /* Split a post-reload TImode or TFmode reference into two DImode
1033 components. This is made extra difficult by the fact that we do
1034 not get any scratch registers to work with, because reload cannot
1035 be prevented from giving us a scratch that overlaps the register
1036 pair involved. So instead, when addressing memory, we tweak the
1037 pointer register up and back down with POST_INCs. Or up and not
1038 back down when we can get away with it.
1039
1040 REVERSED is true when the loads must be done in reversed order
1041 (high word first) for correctness. DEAD is true when the pointer
1042 dies with the second insn we generate and therefore the second
1043 address must not carry a postmodify.
1044
1045 May return an insn which is to be emitted after the moves. */
1046
1047 static rtx
1048 ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
1049 {
1050 rtx fixup = 0;
1051
1052 switch (GET_CODE (in))
1053 {
1054 case REG:
1055 out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1056 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1057 break;
1058
1059 case CONST_INT:
1060 case CONST_DOUBLE:
1061 /* Cannot occur reversed. */
1062 gcc_assert (!reversed);
1063
1064 if (GET_MODE (in) != TFmode)
1065 split_double (in, &out[0], &out[1]);
1066 else
1067 /* split_double does not understand how to split a TFmode
1068 quantity into a pair of DImode constants. */
1069 {
1070 REAL_VALUE_TYPE r;
1071 unsigned HOST_WIDE_INT p[2];
1072 long l[4]; /* TFmode is 128 bits */
1073
1074 REAL_VALUE_FROM_CONST_DOUBLE (r, in);
1075 real_to_target (l, &r, TFmode);
1076
1077 if (FLOAT_WORDS_BIG_ENDIAN)
1078 {
1079 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1080 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1081 }
1082 else
1083 {
1084 p[0] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1085 p[1] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1086 }
1087 out[0] = GEN_INT (p[0]);
1088 out[1] = GEN_INT (p[1]);
1089 }
1090 break;
1091
1092 case MEM:
1093 {
1094 rtx base = XEXP (in, 0);
1095 rtx offset;
1096
1097 switch (GET_CODE (base))
1098 {
1099 case REG:
1100 if (!reversed)
1101 {
1102 out[0] = adjust_automodify_address
1103 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1104 out[1] = adjust_automodify_address
1105 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1106 }
1107 else
1108 {
1109 /* Reversal requires a pre-increment, which can only
1110 be done as a separate insn. */
1111 emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1112 out[0] = adjust_automodify_address
1113 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1114 out[1] = adjust_address (in, DImode, 0);
1115 }
1116 break;
1117
1118 case POST_INC:
1119 gcc_assert (!reversed && !dead);
1120
1121 /* Just do the increment in two steps. */
1122 out[0] = adjust_automodify_address (in, DImode, 0, 0);
1123 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1124 break;
1125
1126 case POST_DEC:
1127 gcc_assert (!reversed && !dead);
1128
1129 /* Add 8, subtract 24. */
1130 base = XEXP (base, 0);
1131 out[0] = adjust_automodify_address
1132 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1133 out[1] = adjust_automodify_address
1134 (in, DImode,
1135 gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
1136 8);
1137 break;
1138
1139 case POST_MODIFY:
1140 gcc_assert (!reversed && !dead);
1141
1142 /* Extract and adjust the modification. This case is
1143 trickier than the others, because we might have an
1144 index register, or we might have a combined offset that
1145 doesn't fit a signed 9-bit displacement field. We can
1146 assume the incoming expression is already legitimate. */
1147 offset = XEXP (base, 1);
1148 base = XEXP (base, 0);
1149
1150 out[0] = adjust_automodify_address
1151 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1152
1153 if (GET_CODE (XEXP (offset, 1)) == REG)
1154 {
1155 /* Can't adjust the postmodify to match. Emit the
1156 original, then a separate addition insn. */
1157 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1158 fixup = gen_adddi3 (base, base, GEN_INT (-8));
1159 }
1160 else
1161 {
1162 gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1163 if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1164 {
1165 /* Again the postmodify cannot be made to match,
1166 but in this case it's more efficient to get rid
1167 of the postmodify entirely and fix up with an
1168 add insn. */
1169 out[1] = adjust_automodify_address (in, DImode, base, 8);
1170 fixup = gen_adddi3
1171 (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1172 }
1173 else
1174 {
1175 /* Combined offset still fits in the displacement field.
1176 (We cannot overflow it at the high end.) */
1177 out[1] = adjust_automodify_address
1178 (in, DImode, gen_rtx_POST_MODIFY
1179 (Pmode, base, gen_rtx_PLUS
1180 (Pmode, base,
1181 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1182 8);
1183 }
1184 }
1185 break;
1186
1187 default:
1188 gcc_unreachable ();
1189 }
1190 break;
1191 }
1192
1193 default:
1194 gcc_unreachable ();
1195 }
1196
1197 return fixup;
1198 }
1199
1200 /* Split a TImode or TFmode move instruction after reload.
1201 This is used by *movtf_internal and *movti_internal. */
1202 void
1203 ia64_split_tmode_move (rtx operands[])
1204 {
1205 rtx in[2], out[2], insn;
1206 rtx fixup[2];
1207 bool dead = false;
1208 bool reversed = false;
1209
1210 /* It is possible for reload to decide to overwrite a pointer with
1211 the value it points to. In that case we have to do the loads in
1212 the appropriate order so that the pointer is not destroyed too
1213 early. Also we must not generate a postmodify for that second
1214 load, or rws_access_regno will die. */
1215 if (GET_CODE (operands[1]) == MEM
1216 && reg_overlap_mentioned_p (operands[0], operands[1]))
1217 {
1218 rtx base = XEXP (operands[1], 0);
1219 while (GET_CODE (base) != REG)
1220 base = XEXP (base, 0);
1221
1222 if (REGNO (base) == REGNO (operands[0]))
1223 reversed = true;
1224 dead = true;
1225 }
1226 /* Another reason to do the moves in reversed order is if the first
1227 element of the target register pair is also the second element of
1228 the source register pair. */
1229 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1230 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1231 reversed = true;
1232
1233 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1234 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1235
1236 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1237 if (GET_CODE (EXP) == MEM \
1238 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1239 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1240 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1241 REG_NOTES (INSN) = gen_rtx_EXPR_LIST (REG_INC, \
1242 XEXP (XEXP (EXP, 0), 0), \
1243 REG_NOTES (INSN))
1244
1245 insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1246 MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1247 MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1248
1249 insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1250 MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1251 MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1252
1253 if (fixup[0])
1254 emit_insn (fixup[0]);
1255 if (fixup[1])
1256 emit_insn (fixup[1]);
1257
1258 #undef MAYBE_ADD_REG_INC_NOTE
1259 }
1260
1261 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1262 through memory plus an extra GR scratch register. Except that you can
1263 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1264 SECONDARY_RELOAD_CLASS, but not both.
1265
1266 We got into problems in the first place by allowing a construct like
1267 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1268 This solution attempts to prevent this situation from occurring. When
1269 we see something like the above, we spill the inner register to memory. */
1270
1271 rtx
1272 spill_xfmode_operand (rtx in, int force)
1273 {
1274 if (GET_CODE (in) == SUBREG
1275 && GET_MODE (SUBREG_REG (in)) == TImode
1276 && GET_CODE (SUBREG_REG (in)) == REG)
1277 {
1278 rtx memt = assign_stack_temp (TImode, 16, 0);
1279 emit_move_insn (memt, SUBREG_REG (in));
1280 return adjust_address (memt, XFmode, 0);
1281 }
1282 else if (force && GET_CODE (in) == REG)
1283 {
1284 rtx memx = assign_stack_temp (XFmode, 16, 0);
1285 emit_move_insn (memx, in);
1286 return memx;
1287 }
1288 else
1289 return in;
1290 }
1291
1292 /* Emit comparison instruction if necessary, returning the expression
1293 that holds the compare result in the proper mode. */
1294
1295 static GTY(()) rtx cmptf_libfunc;
1296
1297 rtx
1298 ia64_expand_compare (enum rtx_code code, enum machine_mode mode)
1299 {
1300 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1301 rtx cmp;
1302
1303 /* If we have a BImode input, then we already have a compare result, and
1304 do not need to emit another comparison. */
1305 if (GET_MODE (op0) == BImode)
1306 {
1307 gcc_assert ((code == NE || code == EQ) && op1 == const0_rtx);
1308 cmp = op0;
1309 }
1310 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1311 magic number as its third argument, that indicates what to do.
1312 The return value is an integer to be compared against zero. */
1313 else if (GET_MODE (op0) == TFmode)
1314 {
1315 enum qfcmp_magic {
1316 QCMP_INV = 1, /* Raise FP_INVALID on SNaN as a side effect. */
1317 QCMP_UNORD = 2,
1318 QCMP_EQ = 4,
1319 QCMP_LT = 8,
1320 QCMP_GT = 16
1321 } magic;
1322 enum rtx_code ncode;
1323 rtx ret, insns;
1324
1325 gcc_assert (cmptf_libfunc && GET_MODE (op1) == TFmode);
1326 switch (code)
1327 {
1328 /* 1 = equal, 0 = not equal. Equality operators do
1329 not raise FP_INVALID when given an SNaN operand. */
1330 case EQ: magic = QCMP_EQ; ncode = NE; break;
1331 case NE: magic = QCMP_EQ; ncode = EQ; break;
1332 /* isunordered() from C99. */
1333 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
1334 case ORDERED: magic = QCMP_UNORD; ncode = EQ; break;
1335 /* Relational operators raise FP_INVALID when given
1336 an SNaN operand. */
1337 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
1338 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1339 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
1340 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1341 /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1342 Expanders for buneq etc. weuld have to be added to ia64.md
1343 for this to be useful. */
1344 default: gcc_unreachable ();
1345 }
1346
1347 start_sequence ();
1348
1349 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1350 op0, TFmode, op1, TFmode,
1351 GEN_INT (magic), DImode);
1352 cmp = gen_reg_rtx (BImode);
1353 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1354 gen_rtx_fmt_ee (ncode, BImode,
1355 ret, const0_rtx)));
1356
1357 insns = get_insns ();
1358 end_sequence ();
1359
1360 emit_libcall_block (insns, cmp, cmp,
1361 gen_rtx_fmt_ee (code, BImode, op0, op1));
1362 code = NE;
1363 }
1364 else
1365 {
1366 cmp = gen_reg_rtx (BImode);
1367 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1368 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1369 code = NE;
1370 }
1371
1372 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1373 }
1374
1375 /* Generate an integral vector comparison. */
1376
1377 static bool
1378 ia64_expand_vecint_compare (enum rtx_code code, enum machine_mode mode,
1379 rtx dest, rtx op0, rtx op1)
1380 {
1381 bool negate = false;
1382 rtx x;
1383
1384 switch (code)
1385 {
1386 case EQ:
1387 case GT:
1388 break;
1389
1390 case NE:
1391 code = EQ;
1392 negate = true;
1393 break;
1394
1395 case LE:
1396 code = GT;
1397 negate = true;
1398 break;
1399
1400 case GE:
1401 negate = true;
1402 /* FALLTHRU */
1403
1404 case LT:
1405 x = op0;
1406 op0 = op1;
1407 op1 = x;
1408 code = GT;
1409 break;
1410
1411 case GTU:
1412 case GEU:
1413 case LTU:
1414 case LEU:
1415 {
1416 rtx w0h, w0l, w1h, w1l, ch, cl;
1417 enum machine_mode wmode;
1418 rtx (*unpack_l) (rtx, rtx, rtx);
1419 rtx (*unpack_h) (rtx, rtx, rtx);
1420 rtx (*pack) (rtx, rtx, rtx);
1421
1422 /* We don't have native unsigned comparisons, but we can generate
1423 them better than generic code can. */
1424
1425 gcc_assert (mode != V2SImode);
1426 switch (mode)
1427 {
1428 case V8QImode:
1429 wmode = V4HImode;
1430 pack = gen_pack2_sss;
1431 unpack_l = gen_unpack1_l;
1432 unpack_h = gen_unpack1_h;
1433 break;
1434
1435 case V4HImode:
1436 wmode = V2SImode;
1437 pack = gen_pack4_sss;
1438 unpack_l = gen_unpack2_l;
1439 unpack_h = gen_unpack2_h;
1440 break;
1441
1442 default:
1443 gcc_unreachable ();
1444 }
1445
1446 /* Unpack into wider vectors, zero extending the elements. */
1447
1448 w0l = gen_reg_rtx (wmode);
1449 w0h = gen_reg_rtx (wmode);
1450 w1l = gen_reg_rtx (wmode);
1451 w1h = gen_reg_rtx (wmode);
1452 emit_insn (unpack_l (gen_lowpart (mode, w0l), op0, CONST0_RTX (mode)));
1453 emit_insn (unpack_h (gen_lowpart (mode, w0h), op0, CONST0_RTX (mode)));
1454 emit_insn (unpack_l (gen_lowpart (mode, w1l), op1, CONST0_RTX (mode)));
1455 emit_insn (unpack_h (gen_lowpart (mode, w1h), op1, CONST0_RTX (mode)));
1456
1457 /* Compare in the wider mode. */
1458
1459 cl = gen_reg_rtx (wmode);
1460 ch = gen_reg_rtx (wmode);
1461 code = signed_condition (code);
1462 ia64_expand_vecint_compare (code, wmode, cl, w0l, w1l);
1463 negate = ia64_expand_vecint_compare (code, wmode, ch, w0h, w1h);
1464
1465 /* Repack into a single narrower vector. */
1466
1467 emit_insn (pack (dest, cl, ch));
1468 }
1469 return negate;
1470
1471 default:
1472 gcc_unreachable ();
1473 }
1474
1475 x = gen_rtx_fmt_ee (code, mode, op0, op1);
1476 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
1477
1478 return negate;
1479 }
1480
1481 static void
1482 ia64_expand_vcondu_v2si (enum rtx_code code, rtx operands[])
1483 {
1484 rtx dl, dh, bl, bh, op1l, op1h, op2l, op2h, op4l, op4h, op5l, op5h, x;
1485
1486 /* In this case, we extract the two SImode quantities and generate
1487 normal comparisons for each of them. */
1488
1489 op1l = gen_lowpart (SImode, operands[1]);
1490 op2l = gen_lowpart (SImode, operands[2]);
1491 op4l = gen_lowpart (SImode, operands[4]);
1492 op5l = gen_lowpart (SImode, operands[5]);
1493
1494 op1h = gen_reg_rtx (SImode);
1495 op2h = gen_reg_rtx (SImode);
1496 op4h = gen_reg_rtx (SImode);
1497 op5h = gen_reg_rtx (SImode);
1498
1499 emit_insn (gen_lshrdi3 (gen_lowpart (DImode, op1h),
1500 gen_lowpart (DImode, operands[1]), GEN_INT (32)));
1501 emit_insn (gen_lshrdi3 (gen_lowpart (DImode, op2h),
1502 gen_lowpart (DImode, operands[2]), GEN_INT (32)));
1503 emit_insn (gen_lshrdi3 (gen_lowpart (DImode, op4h),
1504 gen_lowpart (DImode, operands[4]), GEN_INT (32)));
1505 emit_insn (gen_lshrdi3 (gen_lowpart (DImode, op5h),
1506 gen_lowpart (DImode, operands[5]), GEN_INT (32)));
1507
1508 bl = gen_reg_rtx (BImode);
1509 x = gen_rtx_fmt_ee (code, BImode, op4l, op5l);
1510 emit_insn (gen_rtx_SET (VOIDmode, bl, x));
1511
1512 bh = gen_reg_rtx (BImode);
1513 x = gen_rtx_fmt_ee (code, BImode, op4h, op5h);
1514 emit_insn (gen_rtx_SET (VOIDmode, bh, x));
1515
1516 /* With the results of the comparisons, emit conditional moves. */
1517
1518 dl = gen_reg_rtx (SImode);
1519 x = gen_rtx_IF_THEN_ELSE (SImode, bl, op1l, op2l);
1520 emit_insn (gen_rtx_SET (VOIDmode, dl, x));
1521
1522 dh = gen_reg_rtx (SImode);
1523 x = gen_rtx_IF_THEN_ELSE (SImode, bh, op1h, op2h);
1524 emit_insn (gen_rtx_SET (VOIDmode, dh, x));
1525
1526 /* Merge the two partial results back into a vector. */
1527
1528 x = gen_rtx_VEC_CONCAT (V2SImode, dl, dh);
1529 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1530 }
1531
1532 /* Emit an integral vector conditional move. */
1533
1534 void
1535 ia64_expand_vecint_cmov (rtx operands[])
1536 {
1537 enum machine_mode mode = GET_MODE (operands[0]);
1538 enum rtx_code code = GET_CODE (operands[3]);
1539 bool negate;
1540 rtx cmp, x, ot, of;
1541
1542 /* Since we don't have unsigned V2SImode comparisons, it's more efficient
1543 to special-case them entirely. */
1544 if (mode == V2SImode
1545 && (code == GTU || code == GEU || code == LEU || code == LTU))
1546 {
1547 ia64_expand_vcondu_v2si (code, operands);
1548 return;
1549 }
1550
1551 cmp = gen_reg_rtx (mode);
1552 negate = ia64_expand_vecint_compare (code, mode, cmp,
1553 operands[4], operands[5]);
1554
1555 ot = operands[1+negate];
1556 of = operands[2-negate];
1557
1558 if (ot == CONST0_RTX (mode))
1559 {
1560 if (of == CONST0_RTX (mode))
1561 {
1562 emit_move_insn (operands[0], ot);
1563 return;
1564 }
1565
1566 x = gen_rtx_NOT (mode, cmp);
1567 x = gen_rtx_AND (mode, x, of);
1568 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1569 }
1570 else if (of == CONST0_RTX (mode))
1571 {
1572 x = gen_rtx_AND (mode, cmp, ot);
1573 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1574 }
1575 else
1576 {
1577 rtx t, f;
1578
1579 t = gen_reg_rtx (mode);
1580 x = gen_rtx_AND (mode, cmp, operands[1+negate]);
1581 emit_insn (gen_rtx_SET (VOIDmode, t, x));
1582
1583 f = gen_reg_rtx (mode);
1584 x = gen_rtx_NOT (mode, cmp);
1585 x = gen_rtx_AND (mode, x, operands[2-negate]);
1586 emit_insn (gen_rtx_SET (VOIDmode, f, x));
1587
1588 x = gen_rtx_IOR (mode, t, f);
1589 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1590 }
1591 }
1592
1593 /* Emit an integral vector min or max operation. Return true if all done. */
1594
1595 bool
1596 ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
1597 rtx operands[])
1598 {
1599 rtx xops[5];
1600
1601 /* These four combinations are supported directly. */
1602 if (mode == V8QImode && (code == UMIN || code == UMAX))
1603 return false;
1604 if (mode == V4HImode && (code == SMIN || code == SMAX))
1605 return false;
1606
1607 /* Everything else implemented via vector comparisons. */
1608 xops[0] = operands[0];
1609 xops[4] = xops[1] = operands[1];
1610 xops[5] = xops[2] = operands[2];
1611
1612 switch (code)
1613 {
1614 case UMIN:
1615 code = LTU;
1616 break;
1617 case UMAX:
1618 code = GTU;
1619 break;
1620 case SMIN:
1621 code = LT;
1622 break;
1623 case SMAX:
1624 code = GT;
1625 break;
1626 default:
1627 gcc_unreachable ();
1628 }
1629 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
1630
1631 ia64_expand_vecint_cmov (xops);
1632 return true;
1633 }
1634
1635 /* Emit the appropriate sequence for a call. */
1636
1637 void
1638 ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
1639 int sibcall_p)
1640 {
1641 rtx insn, b0;
1642
1643 addr = XEXP (addr, 0);
1644 addr = convert_memory_address (DImode, addr);
1645 b0 = gen_rtx_REG (DImode, R_BR (0));
1646
1647 /* ??? Should do this for functions known to bind local too. */
1648 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1649 {
1650 if (sibcall_p)
1651 insn = gen_sibcall_nogp (addr);
1652 else if (! retval)
1653 insn = gen_call_nogp (addr, b0);
1654 else
1655 insn = gen_call_value_nogp (retval, addr, b0);
1656 insn = emit_call_insn (insn);
1657 }
1658 else
1659 {
1660 if (sibcall_p)
1661 insn = gen_sibcall_gp (addr);
1662 else if (! retval)
1663 insn = gen_call_gp (addr, b0);
1664 else
1665 insn = gen_call_value_gp (retval, addr, b0);
1666 insn = emit_call_insn (insn);
1667
1668 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
1669 }
1670
1671 if (sibcall_p)
1672 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
1673 }
1674
1675 void
1676 ia64_reload_gp (void)
1677 {
1678 rtx tmp;
1679
1680 if (current_frame_info.reg_save_gp)
1681 tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp);
1682 else
1683 {
1684 HOST_WIDE_INT offset;
1685
1686 offset = (current_frame_info.spill_cfa_off
1687 + current_frame_info.spill_size);
1688 if (frame_pointer_needed)
1689 {
1690 tmp = hard_frame_pointer_rtx;
1691 offset = -offset;
1692 }
1693 else
1694 {
1695 tmp = stack_pointer_rtx;
1696 offset = current_frame_info.total_size - offset;
1697 }
1698
1699 if (CONST_OK_FOR_I (offset))
1700 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1701 tmp, GEN_INT (offset)));
1702 else
1703 {
1704 emit_move_insn (pic_offset_table_rtx, GEN_INT (offset));
1705 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1706 pic_offset_table_rtx, tmp));
1707 }
1708
1709 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
1710 }
1711
1712 emit_move_insn (pic_offset_table_rtx, tmp);
1713 }
1714
1715 void
1716 ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
1717 rtx scratch_b, int noreturn_p, int sibcall_p)
1718 {
1719 rtx insn;
1720 bool is_desc = false;
1721
1722 /* If we find we're calling through a register, then we're actually
1723 calling through a descriptor, so load up the values. */
1724 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
1725 {
1726 rtx tmp;
1727 bool addr_dead_p;
1728
1729 /* ??? We are currently constrained to *not* use peep2, because
1730 we can legitimately change the global lifetime of the GP
1731 (in the form of killing where previously live). This is
1732 because a call through a descriptor doesn't use the previous
1733 value of the GP, while a direct call does, and we do not
1734 commit to either form until the split here.
1735
1736 That said, this means that we lack precise life info for
1737 whether ADDR is dead after this call. This is not terribly
1738 important, since we can fix things up essentially for free
1739 with the POST_DEC below, but it's nice to not use it when we
1740 can immediately tell it's not necessary. */
1741 addr_dead_p = ((noreturn_p || sibcall_p
1742 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
1743 REGNO (addr)))
1744 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
1745
1746 /* Load the code address into scratch_b. */
1747 tmp = gen_rtx_POST_INC (Pmode, addr);
1748 tmp = gen_rtx_MEM (Pmode, tmp);
1749 emit_move_insn (scratch_r, tmp);
1750 emit_move_insn (scratch_b, scratch_r);
1751
1752 /* Load the GP address. If ADDR is not dead here, then we must
1753 revert the change made above via the POST_INCREMENT. */
1754 if (!addr_dead_p)
1755 tmp = gen_rtx_POST_DEC (Pmode, addr);
1756 else
1757 tmp = addr;
1758 tmp = gen_rtx_MEM (Pmode, tmp);
1759 emit_move_insn (pic_offset_table_rtx, tmp);
1760
1761 is_desc = true;
1762 addr = scratch_b;
1763 }
1764
1765 if (sibcall_p)
1766 insn = gen_sibcall_nogp (addr);
1767 else if (retval)
1768 insn = gen_call_value_nogp (retval, addr, retaddr);
1769 else
1770 insn = gen_call_nogp (addr, retaddr);
1771 emit_call_insn (insn);
1772
1773 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
1774 ia64_reload_gp ();
1775 }
1776
1777 /* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
1778
1779 This differs from the generic code in that we know about the zero-extending
1780 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
1781 also know that ld.acq+cmpxchg.rel equals a full barrier.
1782
1783 The loop we want to generate looks like
1784
1785 cmp_reg = mem;
1786 label:
1787 old_reg = cmp_reg;
1788 new_reg = cmp_reg op val;
1789 cmp_reg = compare-and-swap(mem, old_reg, new_reg)
1790 if (cmp_reg != old_reg)
1791 goto label;
1792
1793 Note that we only do the plain load from memory once. Subsequent
1794 iterations use the value loaded by the compare-and-swap pattern. */
1795
1796 void
1797 ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
1798 rtx old_dst, rtx new_dst)
1799 {
1800 enum machine_mode mode = GET_MODE (mem);
1801 rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
1802 enum insn_code icode;
1803
1804 /* Special case for using fetchadd. */
1805 if ((mode == SImode || mode == DImode) && fetchadd_operand (val, mode))
1806 {
1807 if (!old_dst)
1808 old_dst = gen_reg_rtx (mode);
1809
1810 emit_insn (gen_memory_barrier ());
1811
1812 if (mode == SImode)
1813 icode = CODE_FOR_fetchadd_acq_si;
1814 else
1815 icode = CODE_FOR_fetchadd_acq_di;
1816 emit_insn (GEN_FCN (icode) (old_dst, mem, val));
1817
1818 if (new_dst)
1819 {
1820 new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
1821 true, OPTAB_WIDEN);
1822 if (new_reg != new_dst)
1823 emit_move_insn (new_dst, new_reg);
1824 }
1825 return;
1826 }
1827
1828 /* Because of the volatile mem read, we get an ld.acq, which is the
1829 front half of the full barrier. The end half is the cmpxchg.rel. */
1830 gcc_assert (MEM_VOLATILE_P (mem));
1831
1832 old_reg = gen_reg_rtx (DImode);
1833 cmp_reg = gen_reg_rtx (DImode);
1834 label = gen_label_rtx ();
1835
1836 if (mode != DImode)
1837 {
1838 val = simplify_gen_subreg (DImode, val, mode, 0);
1839 emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
1840 }
1841 else
1842 emit_move_insn (cmp_reg, mem);
1843
1844 emit_label (label);
1845
1846 ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
1847 emit_move_insn (old_reg, cmp_reg);
1848 emit_move_insn (ar_ccv, cmp_reg);
1849
1850 if (old_dst)
1851 emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
1852
1853 new_reg = cmp_reg;
1854 if (code == NOT)
1855 {
1856 new_reg = expand_simple_unop (DImode, NOT, new_reg, NULL_RTX, true);
1857 code = AND;
1858 }
1859 new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
1860 true, OPTAB_DIRECT);
1861
1862 if (mode != DImode)
1863 new_reg = gen_lowpart (mode, new_reg);
1864 if (new_dst)
1865 emit_move_insn (new_dst, new_reg);
1866
1867 switch (mode)
1868 {
1869 case QImode: icode = CODE_FOR_cmpxchg_rel_qi; break;
1870 case HImode: icode = CODE_FOR_cmpxchg_rel_hi; break;
1871 case SImode: icode = CODE_FOR_cmpxchg_rel_si; break;
1872 case DImode: icode = CODE_FOR_cmpxchg_rel_di; break;
1873 default:
1874 gcc_unreachable ();
1875 }
1876
1877 emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
1878
1879 emit_cmp_and_jump_insns (cmp_reg, old_reg, EQ, NULL, DImode, true, label);
1880 }
1881 \f
1882 /* Begin the assembly file. */
1883
1884 static void
1885 ia64_file_start (void)
1886 {
1887 /* Variable tracking should be run after all optimizations which change order
1888 of insns. It also needs a valid CFG. This can't be done in
1889 ia64_override_options, because flag_var_tracking is finalized after
1890 that. */
1891 ia64_flag_var_tracking = flag_var_tracking;
1892 flag_var_tracking = 0;
1893
1894 default_file_start ();
1895 emit_safe_across_calls ();
1896 }
1897
1898 void
1899 emit_safe_across_calls (void)
1900 {
1901 unsigned int rs, re;
1902 int out_state;
1903
1904 rs = 1;
1905 out_state = 0;
1906 while (1)
1907 {
1908 while (rs < 64 && call_used_regs[PR_REG (rs)])
1909 rs++;
1910 if (rs >= 64)
1911 break;
1912 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1913 continue;
1914 if (out_state == 0)
1915 {
1916 fputs ("\t.pred.safe_across_calls ", asm_out_file);
1917 out_state = 1;
1918 }
1919 else
1920 fputc (',', asm_out_file);
1921 if (re == rs + 1)
1922 fprintf (asm_out_file, "p%u", rs);
1923 else
1924 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
1925 rs = re + 1;
1926 }
1927 if (out_state)
1928 fputc ('\n', asm_out_file);
1929 }
1930
1931 /* Helper function for ia64_compute_frame_size: find an appropriate general
1932 register to spill some special register to. SPECIAL_SPILL_MASK contains
1933 bits in GR0 to GR31 that have already been allocated by this routine.
1934 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1935
1936 static int
1937 find_gr_spill (int try_locals)
1938 {
1939 int regno;
1940
1941 /* If this is a leaf function, first try an otherwise unused
1942 call-clobbered register. */
1943 if (current_function_is_leaf)
1944 {
1945 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1946 if (! regs_ever_live[regno]
1947 && call_used_regs[regno]
1948 && ! fixed_regs[regno]
1949 && ! global_regs[regno]
1950 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1951 {
1952 current_frame_info.gr_used_mask |= 1 << regno;
1953 return regno;
1954 }
1955 }
1956
1957 if (try_locals)
1958 {
1959 regno = current_frame_info.n_local_regs;
1960 /* If there is a frame pointer, then we can't use loc79, because
1961 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1962 reg_name switching code in ia64_expand_prologue. */
1963 if (regno < (80 - frame_pointer_needed))
1964 {
1965 current_frame_info.n_local_regs = regno + 1;
1966 return LOC_REG (0) + regno;
1967 }
1968 }
1969
1970 /* Failed to find a general register to spill to. Must use stack. */
1971 return 0;
1972 }
1973
1974 /* In order to make for nice schedules, we try to allocate every temporary
1975 to a different register. We must of course stay away from call-saved,
1976 fixed, and global registers. We must also stay away from registers
1977 allocated in current_frame_info.gr_used_mask, since those include regs
1978 used all through the prologue.
1979
1980 Any register allocated here must be used immediately. The idea is to
1981 aid scheduling, not to solve data flow problems. */
1982
1983 static int last_scratch_gr_reg;
1984
1985 static int
1986 next_scratch_gr_reg (void)
1987 {
1988 int i, regno;
1989
1990 for (i = 0; i < 32; ++i)
1991 {
1992 regno = (last_scratch_gr_reg + i + 1) & 31;
1993 if (call_used_regs[regno]
1994 && ! fixed_regs[regno]
1995 && ! global_regs[regno]
1996 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1997 {
1998 last_scratch_gr_reg = regno;
1999 return regno;
2000 }
2001 }
2002
2003 /* There must be _something_ available. */
2004 gcc_unreachable ();
2005 }
2006
2007 /* Helper function for ia64_compute_frame_size, called through
2008 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2009
2010 static void
2011 mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
2012 {
2013 unsigned int regno = REGNO (reg);
2014 if (regno < 32)
2015 {
2016 unsigned int i, n = HARD_REGNO_NREGS (regno, GET_MODE (reg));
2017 for (i = 0; i < n; ++i)
2018 current_frame_info.gr_used_mask |= 1 << (regno + i);
2019 }
2020 }
2021
2022 /* Returns the number of bytes offset between the frame pointer and the stack
2023 pointer for the current function. SIZE is the number of bytes of space
2024 needed for local variables. */
2025
2026 static void
2027 ia64_compute_frame_size (HOST_WIDE_INT size)
2028 {
2029 HOST_WIDE_INT total_size;
2030 HOST_WIDE_INT spill_size = 0;
2031 HOST_WIDE_INT extra_spill_size = 0;
2032 HOST_WIDE_INT pretend_args_size;
2033 HARD_REG_SET mask;
2034 int n_spilled = 0;
2035 int spilled_gr_p = 0;
2036 int spilled_fr_p = 0;
2037 unsigned int regno;
2038 int i;
2039
2040 if (current_frame_info.initialized)
2041 return;
2042
2043 memset (&current_frame_info, 0, sizeof current_frame_info);
2044 CLEAR_HARD_REG_SET (mask);
2045
2046 /* Don't allocate scratches to the return register. */
2047 diddle_return_value (mark_reg_gr_used_mask, NULL);
2048
2049 /* Don't allocate scratches to the EH scratch registers. */
2050 if (cfun->machine->ia64_eh_epilogue_sp)
2051 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2052 if (cfun->machine->ia64_eh_epilogue_bsp)
2053 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
2054
2055 /* Find the size of the register stack frame. We have only 80 local
2056 registers, because we reserve 8 for the inputs and 8 for the
2057 outputs. */
2058
2059 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2060 since we'll be adjusting that down later. */
2061 regno = LOC_REG (78) + ! frame_pointer_needed;
2062 for (; regno >= LOC_REG (0); regno--)
2063 if (regs_ever_live[regno])
2064 break;
2065 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2066
2067 /* For functions marked with the syscall_linkage attribute, we must mark
2068 all eight input registers as in use, so that locals aren't visible to
2069 the caller. */
2070
2071 if (cfun->machine->n_varargs > 0
2072 || lookup_attribute ("syscall_linkage",
2073 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2074 current_frame_info.n_input_regs = 8;
2075 else
2076 {
2077 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2078 if (regs_ever_live[regno])
2079 break;
2080 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2081 }
2082
2083 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2084 if (regs_ever_live[regno])
2085 break;
2086 i = regno - OUT_REG (0) + 1;
2087
2088 /* When -p profiling, we need one output register for the mcount argument.
2089 Likewise for -a profiling for the bb_init_func argument. For -ax
2090 profiling, we need two output registers for the two bb_init_trace_func
2091 arguments. */
2092 if (current_function_profile)
2093 i = MAX (i, 1);
2094 current_frame_info.n_output_regs = i;
2095
2096 /* ??? No rotating register support yet. */
2097 current_frame_info.n_rotate_regs = 0;
2098
2099 /* Discover which registers need spilling, and how much room that
2100 will take. Begin with floating point and general registers,
2101 which will always wind up on the stack. */
2102
2103 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
2104 if (regs_ever_live[regno] && ! call_used_regs[regno])
2105 {
2106 SET_HARD_REG_BIT (mask, regno);
2107 spill_size += 16;
2108 n_spilled += 1;
2109 spilled_fr_p = 1;
2110 }
2111
2112 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2113 if (regs_ever_live[regno] && ! call_used_regs[regno])
2114 {
2115 SET_HARD_REG_BIT (mask, regno);
2116 spill_size += 8;
2117 n_spilled += 1;
2118 spilled_gr_p = 1;
2119 }
2120
2121 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
2122 if (regs_ever_live[regno] && ! call_used_regs[regno])
2123 {
2124 SET_HARD_REG_BIT (mask, regno);
2125 spill_size += 8;
2126 n_spilled += 1;
2127 }
2128
2129 /* Now come all special registers that might get saved in other
2130 general registers. */
2131
2132 if (frame_pointer_needed)
2133 {
2134 current_frame_info.reg_fp = find_gr_spill (1);
2135 /* If we did not get a register, then we take LOC79. This is guaranteed
2136 to be free, even if regs_ever_live is already set, because this is
2137 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2138 as we don't count loc79 above. */
2139 if (current_frame_info.reg_fp == 0)
2140 {
2141 current_frame_info.reg_fp = LOC_REG (79);
2142 current_frame_info.n_local_regs++;
2143 }
2144 }
2145
2146 if (! current_function_is_leaf)
2147 {
2148 /* Emit a save of BR0 if we call other functions. Do this even
2149 if this function doesn't return, as EH depends on this to be
2150 able to unwind the stack. */
2151 SET_HARD_REG_BIT (mask, BR_REG (0));
2152
2153 current_frame_info.reg_save_b0 = find_gr_spill (1);
2154 if (current_frame_info.reg_save_b0 == 0)
2155 {
2156 spill_size += 8;
2157 n_spilled += 1;
2158 }
2159
2160 /* Similarly for ar.pfs. */
2161 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2162 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
2163 if (current_frame_info.reg_save_ar_pfs == 0)
2164 {
2165 extra_spill_size += 8;
2166 n_spilled += 1;
2167 }
2168
2169 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2170 registers are clobbered, so we fall back to the stack. */
2171 current_frame_info.reg_save_gp
2172 = (current_function_calls_setjmp ? 0 : find_gr_spill (1));
2173 if (current_frame_info.reg_save_gp == 0)
2174 {
2175 SET_HARD_REG_BIT (mask, GR_REG (1));
2176 spill_size += 8;
2177 n_spilled += 1;
2178 }
2179 }
2180 else
2181 {
2182 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
2183 {
2184 SET_HARD_REG_BIT (mask, BR_REG (0));
2185 spill_size += 8;
2186 n_spilled += 1;
2187 }
2188
2189 if (regs_ever_live[AR_PFS_REGNUM])
2190 {
2191 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2192 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
2193 if (current_frame_info.reg_save_ar_pfs == 0)
2194 {
2195 extra_spill_size += 8;
2196 n_spilled += 1;
2197 }
2198 }
2199 }
2200
2201 /* Unwind descriptor hackery: things are most efficient if we allocate
2202 consecutive GR save registers for RP, PFS, FP in that order. However,
2203 it is absolutely critical that FP get the only hard register that's
2204 guaranteed to be free, so we allocated it first. If all three did
2205 happen to be allocated hard regs, and are consecutive, rearrange them
2206 into the preferred order now. */
2207 if (current_frame_info.reg_fp != 0
2208 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
2209 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
2210 {
2211 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
2212 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
2213 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
2214 }
2215
2216 /* See if we need to store the predicate register block. */
2217 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2218 if (regs_ever_live[regno] && ! call_used_regs[regno])
2219 break;
2220 if (regno <= PR_REG (63))
2221 {
2222 SET_HARD_REG_BIT (mask, PR_REG (0));
2223 current_frame_info.reg_save_pr = find_gr_spill (1);
2224 if (current_frame_info.reg_save_pr == 0)
2225 {
2226 extra_spill_size += 8;
2227 n_spilled += 1;
2228 }
2229
2230 /* ??? Mark them all as used so that register renaming and such
2231 are free to use them. */
2232 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2233 regs_ever_live[regno] = 1;
2234 }
2235
2236 /* If we're forced to use st8.spill, we're forced to save and restore
2237 ar.unat as well. The check for existing liveness allows inline asm
2238 to touch ar.unat. */
2239 if (spilled_gr_p || cfun->machine->n_varargs
2240 || regs_ever_live[AR_UNAT_REGNUM])
2241 {
2242 regs_ever_live[AR_UNAT_REGNUM] = 1;
2243 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2244 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
2245 if (current_frame_info.reg_save_ar_unat == 0)
2246 {
2247 extra_spill_size += 8;
2248 n_spilled += 1;
2249 }
2250 }
2251
2252 if (regs_ever_live[AR_LC_REGNUM])
2253 {
2254 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2255 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
2256 if (current_frame_info.reg_save_ar_lc == 0)
2257 {
2258 extra_spill_size += 8;
2259 n_spilled += 1;
2260 }
2261 }
2262
2263 /* If we have an odd number of words of pretend arguments written to
2264 the stack, then the FR save area will be unaligned. We round the
2265 size of this area up to keep things 16 byte aligned. */
2266 if (spilled_fr_p)
2267 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
2268 else
2269 pretend_args_size = current_function_pretend_args_size;
2270
2271 total_size = (spill_size + extra_spill_size + size + pretend_args_size
2272 + current_function_outgoing_args_size);
2273 total_size = IA64_STACK_ALIGN (total_size);
2274
2275 /* We always use the 16-byte scratch area provided by the caller, but
2276 if we are a leaf function, there's no one to which we need to provide
2277 a scratch area. */
2278 if (current_function_is_leaf)
2279 total_size = MAX (0, total_size - 16);
2280
2281 current_frame_info.total_size = total_size;
2282 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2283 current_frame_info.spill_size = spill_size;
2284 current_frame_info.extra_spill_size = extra_spill_size;
2285 COPY_HARD_REG_SET (current_frame_info.mask, mask);
2286 current_frame_info.n_spilled = n_spilled;
2287 current_frame_info.initialized = reload_completed;
2288 }
2289
2290 /* Compute the initial difference between the specified pair of registers. */
2291
2292 HOST_WIDE_INT
2293 ia64_initial_elimination_offset (int from, int to)
2294 {
2295 HOST_WIDE_INT offset;
2296
2297 ia64_compute_frame_size (get_frame_size ());
2298 switch (from)
2299 {
2300 case FRAME_POINTER_REGNUM:
2301 switch (to)
2302 {
2303 case HARD_FRAME_POINTER_REGNUM:
2304 if (current_function_is_leaf)
2305 offset = -current_frame_info.total_size;
2306 else
2307 offset = -(current_frame_info.total_size
2308 - current_function_outgoing_args_size - 16);
2309 break;
2310
2311 case STACK_POINTER_REGNUM:
2312 if (current_function_is_leaf)
2313 offset = 0;
2314 else
2315 offset = 16 + current_function_outgoing_args_size;
2316 break;
2317
2318 default:
2319 gcc_unreachable ();
2320 }
2321 break;
2322
2323 case ARG_POINTER_REGNUM:
2324 /* Arguments start above the 16 byte save area, unless stdarg
2325 in which case we store through the 16 byte save area. */
2326 switch (to)
2327 {
2328 case HARD_FRAME_POINTER_REGNUM:
2329 offset = 16 - current_function_pretend_args_size;
2330 break;
2331
2332 case STACK_POINTER_REGNUM:
2333 offset = (current_frame_info.total_size
2334 + 16 - current_function_pretend_args_size);
2335 break;
2336
2337 default:
2338 gcc_unreachable ();
2339 }
2340 break;
2341
2342 default:
2343 gcc_unreachable ();
2344 }
2345
2346 return offset;
2347 }
2348
2349 /* If there are more than a trivial number of register spills, we use
2350 two interleaved iterators so that we can get two memory references
2351 per insn group.
2352
2353 In order to simplify things in the prologue and epilogue expanders,
2354 we use helper functions to fix up the memory references after the
2355 fact with the appropriate offsets to a POST_MODIFY memory mode.
2356 The following data structure tracks the state of the two iterators
2357 while insns are being emitted. */
2358
2359 struct spill_fill_data
2360 {
2361 rtx init_after; /* point at which to emit initializations */
2362 rtx init_reg[2]; /* initial base register */
2363 rtx iter_reg[2]; /* the iterator registers */
2364 rtx *prev_addr[2]; /* address of last memory use */
2365 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
2366 HOST_WIDE_INT prev_off[2]; /* last offset */
2367 int n_iter; /* number of iterators in use */
2368 int next_iter; /* next iterator to use */
2369 unsigned int save_gr_used_mask;
2370 };
2371
2372 static struct spill_fill_data spill_fill_data;
2373
2374 static void
2375 setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
2376 {
2377 int i;
2378
2379 spill_fill_data.init_after = get_last_insn ();
2380 spill_fill_data.init_reg[0] = init_reg;
2381 spill_fill_data.init_reg[1] = init_reg;
2382 spill_fill_data.prev_addr[0] = NULL;
2383 spill_fill_data.prev_addr[1] = NULL;
2384 spill_fill_data.prev_insn[0] = NULL;
2385 spill_fill_data.prev_insn[1] = NULL;
2386 spill_fill_data.prev_off[0] = cfa_off;
2387 spill_fill_data.prev_off[1] = cfa_off;
2388 spill_fill_data.next_iter = 0;
2389 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2390
2391 spill_fill_data.n_iter = 1 + (n_spills > 2);
2392 for (i = 0; i < spill_fill_data.n_iter; ++i)
2393 {
2394 int regno = next_scratch_gr_reg ();
2395 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2396 current_frame_info.gr_used_mask |= 1 << regno;
2397 }
2398 }
2399
2400 static void
2401 finish_spill_pointers (void)
2402 {
2403 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2404 }
2405
2406 static rtx
2407 spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
2408 {
2409 int iter = spill_fill_data.next_iter;
2410 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2411 rtx disp_rtx = GEN_INT (disp);
2412 rtx mem;
2413
2414 if (spill_fill_data.prev_addr[iter])
2415 {
2416 if (CONST_OK_FOR_N (disp))
2417 {
2418 *spill_fill_data.prev_addr[iter]
2419 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2420 gen_rtx_PLUS (DImode,
2421 spill_fill_data.iter_reg[iter],
2422 disp_rtx));
2423 REG_NOTES (spill_fill_data.prev_insn[iter])
2424 = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
2425 REG_NOTES (spill_fill_data.prev_insn[iter]));
2426 }
2427 else
2428 {
2429 /* ??? Could use register post_modify for loads. */
2430 if (! CONST_OK_FOR_I (disp))
2431 {
2432 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2433 emit_move_insn (tmp, disp_rtx);
2434 disp_rtx = tmp;
2435 }
2436 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2437 spill_fill_data.iter_reg[iter], disp_rtx));
2438 }
2439 }
2440 /* Micro-optimization: if we've created a frame pointer, it's at
2441 CFA 0, which may allow the real iterator to be initialized lower,
2442 slightly increasing parallelism. Also, if there are few saves
2443 it may eliminate the iterator entirely. */
2444 else if (disp == 0
2445 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2446 && frame_pointer_needed)
2447 {
2448 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
2449 set_mem_alias_set (mem, get_varargs_alias_set ());
2450 return mem;
2451 }
2452 else
2453 {
2454 rtx seq, insn;
2455
2456 if (disp == 0)
2457 seq = gen_movdi (spill_fill_data.iter_reg[iter],
2458 spill_fill_data.init_reg[iter]);
2459 else
2460 {
2461 start_sequence ();
2462
2463 if (! CONST_OK_FOR_I (disp))
2464 {
2465 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2466 emit_move_insn (tmp, disp_rtx);
2467 disp_rtx = tmp;
2468 }
2469
2470 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2471 spill_fill_data.init_reg[iter],
2472 disp_rtx));
2473
2474 seq = get_insns ();
2475 end_sequence ();
2476 }
2477
2478 /* Careful for being the first insn in a sequence. */
2479 if (spill_fill_data.init_after)
2480 insn = emit_insn_after (seq, spill_fill_data.init_after);
2481 else
2482 {
2483 rtx first = get_insns ();
2484 if (first)
2485 insn = emit_insn_before (seq, first);
2486 else
2487 insn = emit_insn (seq);
2488 }
2489 spill_fill_data.init_after = insn;
2490
2491 /* If DISP is 0, we may or may not have a further adjustment
2492 afterward. If we do, then the load/store insn may be modified
2493 to be a post-modify. If we don't, then this copy may be
2494 eliminated by copyprop_hardreg_forward, which makes this
2495 insn garbage, which runs afoul of the sanity check in
2496 propagate_one_insn. So mark this insn as legal to delete. */
2497 if (disp == 0)
2498 REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
2499 REG_NOTES (insn));
2500 }
2501
2502 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
2503
2504 /* ??? Not all of the spills are for varargs, but some of them are.
2505 The rest of the spills belong in an alias set of their own. But
2506 it doesn't actually hurt to include them here. */
2507 set_mem_alias_set (mem, get_varargs_alias_set ());
2508
2509 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
2510 spill_fill_data.prev_off[iter] = cfa_off;
2511
2512 if (++iter >= spill_fill_data.n_iter)
2513 iter = 0;
2514 spill_fill_data.next_iter = iter;
2515
2516 return mem;
2517 }
2518
2519 static void
2520 do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
2521 rtx frame_reg)
2522 {
2523 int iter = spill_fill_data.next_iter;
2524 rtx mem, insn;
2525
2526 mem = spill_restore_mem (reg, cfa_off);
2527 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
2528 spill_fill_data.prev_insn[iter] = insn;
2529
2530 if (frame_reg)
2531 {
2532 rtx base;
2533 HOST_WIDE_INT off;
2534
2535 RTX_FRAME_RELATED_P (insn) = 1;
2536
2537 /* Don't even pretend that the unwind code can intuit its way
2538 through a pair of interleaved post_modify iterators. Just
2539 provide the correct answer. */
2540
2541 if (frame_pointer_needed)
2542 {
2543 base = hard_frame_pointer_rtx;
2544 off = - cfa_off;
2545 }
2546 else
2547 {
2548 base = stack_pointer_rtx;
2549 off = current_frame_info.total_size - cfa_off;
2550 }
2551
2552 REG_NOTES (insn)
2553 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2554 gen_rtx_SET (VOIDmode,
2555 gen_rtx_MEM (GET_MODE (reg),
2556 plus_constant (base, off)),
2557 frame_reg),
2558 REG_NOTES (insn));
2559 }
2560 }
2561
2562 static void
2563 do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
2564 {
2565 int iter = spill_fill_data.next_iter;
2566 rtx insn;
2567
2568 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
2569 GEN_INT (cfa_off)));
2570 spill_fill_data.prev_insn[iter] = insn;
2571 }
2572
2573 /* Wrapper functions that discards the CONST_INT spill offset. These
2574 exist so that we can give gr_spill/gr_fill the offset they need and
2575 use a consistent function interface. */
2576
2577 static rtx
2578 gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2579 {
2580 return gen_movdi (dest, src);
2581 }
2582
2583 static rtx
2584 gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2585 {
2586 return gen_fr_spill (dest, src);
2587 }
2588
2589 static rtx
2590 gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2591 {
2592 return gen_fr_restore (dest, src);
2593 }
2594
2595 /* Called after register allocation to add any instructions needed for the
2596 prologue. Using a prologue insn is favored compared to putting all of the
2597 instructions in output_function_prologue(), since it allows the scheduler
2598 to intermix instructions with the saves of the caller saved registers. In
2599 some cases, it might be necessary to emit a barrier instruction as the last
2600 insn to prevent such scheduling.
2601
2602 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2603 so that the debug info generation code can handle them properly.
2604
2605 The register save area is layed out like so:
2606 cfa+16
2607 [ varargs spill area ]
2608 [ fr register spill area ]
2609 [ br register spill area ]
2610 [ ar register spill area ]
2611 [ pr register spill area ]
2612 [ gr register spill area ] */
2613
2614 /* ??? Get inefficient code when the frame size is larger than can fit in an
2615 adds instruction. */
2616
2617 void
2618 ia64_expand_prologue (void)
2619 {
2620 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
2621 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
2622 rtx reg, alt_reg;
2623
2624 ia64_compute_frame_size (get_frame_size ());
2625 last_scratch_gr_reg = 15;
2626
2627 /* If there is no epilogue, then we don't need some prologue insns.
2628 We need to avoid emitting the dead prologue insns, because flow
2629 will complain about them. */
2630 if (optimize)
2631 {
2632 edge e;
2633 edge_iterator ei;
2634
2635 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
2636 if ((e->flags & EDGE_FAKE) == 0
2637 && (e->flags & EDGE_FALLTHRU) != 0)
2638 break;
2639 epilogue_p = (e != NULL);
2640 }
2641 else
2642 epilogue_p = 1;
2643
2644 /* Set the local, input, and output register names. We need to do this
2645 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2646 half. If we use in/loc/out register names, then we get assembler errors
2647 in crtn.S because there is no alloc insn or regstk directive in there. */
2648 if (! TARGET_REG_NAMES)
2649 {
2650 int inputs = current_frame_info.n_input_regs;
2651 int locals = current_frame_info.n_local_regs;
2652 int outputs = current_frame_info.n_output_regs;
2653
2654 for (i = 0; i < inputs; i++)
2655 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2656 for (i = 0; i < locals; i++)
2657 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2658 for (i = 0; i < outputs; i++)
2659 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2660 }
2661
2662 /* Set the frame pointer register name. The regnum is logically loc79,
2663 but of course we'll not have allocated that many locals. Rather than
2664 worrying about renumbering the existing rtxs, we adjust the name. */
2665 /* ??? This code means that we can never use one local register when
2666 there is a frame pointer. loc79 gets wasted in this case, as it is
2667 renamed to a register that will never be used. See also the try_locals
2668 code in find_gr_spill. */
2669 if (current_frame_info.reg_fp)
2670 {
2671 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2672 reg_names[HARD_FRAME_POINTER_REGNUM]
2673 = reg_names[current_frame_info.reg_fp];
2674 reg_names[current_frame_info.reg_fp] = tmp;
2675 }
2676
2677 /* We don't need an alloc instruction if we've used no outputs or locals. */
2678 if (current_frame_info.n_local_regs == 0
2679 && current_frame_info.n_output_regs == 0
2680 && current_frame_info.n_input_regs <= current_function_args_info.int_regs
2681 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2682 {
2683 /* If there is no alloc, but there are input registers used, then we
2684 need a .regstk directive. */
2685 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2686 ar_pfs_save_reg = NULL_RTX;
2687 }
2688 else
2689 {
2690 current_frame_info.need_regstk = 0;
2691
2692 if (current_frame_info.reg_save_ar_pfs)
2693 regno = current_frame_info.reg_save_ar_pfs;
2694 else
2695 regno = next_scratch_gr_reg ();
2696 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2697
2698 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2699 GEN_INT (current_frame_info.n_input_regs),
2700 GEN_INT (current_frame_info.n_local_regs),
2701 GEN_INT (current_frame_info.n_output_regs),
2702 GEN_INT (current_frame_info.n_rotate_regs)));
2703 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2704 }
2705
2706 /* Set up frame pointer, stack pointer, and spill iterators. */
2707
2708 n_varargs = cfun->machine->n_varargs;
2709 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
2710 stack_pointer_rtx, 0);
2711
2712 if (frame_pointer_needed)
2713 {
2714 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2715 RTX_FRAME_RELATED_P (insn) = 1;
2716 }
2717
2718 if (current_frame_info.total_size != 0)
2719 {
2720 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2721 rtx offset;
2722
2723 if (CONST_OK_FOR_I (- current_frame_info.total_size))
2724 offset = frame_size_rtx;
2725 else
2726 {
2727 regno = next_scratch_gr_reg ();
2728 offset = gen_rtx_REG (DImode, regno);
2729 emit_move_insn (offset, frame_size_rtx);
2730 }
2731
2732 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
2733 stack_pointer_rtx, offset));
2734
2735 if (! frame_pointer_needed)
2736 {
2737 RTX_FRAME_RELATED_P (insn) = 1;
2738 if (GET_CODE (offset) != CONST_INT)
2739 {
2740 REG_NOTES (insn)
2741 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2742 gen_rtx_SET (VOIDmode,
2743 stack_pointer_rtx,
2744 gen_rtx_PLUS (DImode,
2745 stack_pointer_rtx,
2746 frame_size_rtx)),
2747 REG_NOTES (insn));
2748 }
2749 }
2750
2751 /* ??? At this point we must generate a magic insn that appears to
2752 modify the stack pointer, the frame pointer, and all spill
2753 iterators. This would allow the most scheduling freedom. For
2754 now, just hard stop. */
2755 emit_insn (gen_blockage ());
2756 }
2757
2758 /* Must copy out ar.unat before doing any integer spills. */
2759 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2760 {
2761 if (current_frame_info.reg_save_ar_unat)
2762 ar_unat_save_reg
2763 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2764 else
2765 {
2766 alt_regno = next_scratch_gr_reg ();
2767 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2768 current_frame_info.gr_used_mask |= 1 << alt_regno;
2769 }
2770
2771 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2772 insn = emit_move_insn (ar_unat_save_reg, reg);
2773 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2774
2775 /* Even if we're not going to generate an epilogue, we still
2776 need to save the register so that EH works. */
2777 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2778 emit_insn (gen_prologue_use (ar_unat_save_reg));
2779 }
2780 else
2781 ar_unat_save_reg = NULL_RTX;
2782
2783 /* Spill all varargs registers. Do this before spilling any GR registers,
2784 since we want the UNAT bits for the GR registers to override the UNAT
2785 bits from varargs, which we don't care about. */
2786
2787 cfa_off = -16;
2788 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
2789 {
2790 reg = gen_rtx_REG (DImode, regno);
2791 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
2792 }
2793
2794 /* Locate the bottom of the register save area. */
2795 cfa_off = (current_frame_info.spill_cfa_off
2796 + current_frame_info.spill_size
2797 + current_frame_info.extra_spill_size);
2798
2799 /* Save the predicate register block either in a register or in memory. */
2800 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2801 {
2802 reg = gen_rtx_REG (DImode, PR_REG (0));
2803 if (current_frame_info.reg_save_pr != 0)
2804 {
2805 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2806 insn = emit_move_insn (alt_reg, reg);
2807
2808 /* ??? Denote pr spill/fill by a DImode move that modifies all
2809 64 hard registers. */
2810 RTX_FRAME_RELATED_P (insn) = 1;
2811 REG_NOTES (insn)
2812 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2813 gen_rtx_SET (VOIDmode, alt_reg, reg),
2814 REG_NOTES (insn));
2815
2816 /* Even if we're not going to generate an epilogue, we still
2817 need to save the register so that EH works. */
2818 if (! epilogue_p)
2819 emit_insn (gen_prologue_use (alt_reg));
2820 }
2821 else
2822 {
2823 alt_regno = next_scratch_gr_reg ();
2824 alt_reg = gen_rtx_REG (DImode, alt_regno);
2825 insn = emit_move_insn (alt_reg, reg);
2826 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2827 cfa_off -= 8;
2828 }
2829 }
2830
2831 /* Handle AR regs in numerical order. All of them get special handling. */
2832 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2833 && current_frame_info.reg_save_ar_unat == 0)
2834 {
2835 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2836 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
2837 cfa_off -= 8;
2838 }
2839
2840 /* The alloc insn already copied ar.pfs into a general register. The
2841 only thing we have to do now is copy that register to a stack slot
2842 if we'd not allocated a local register for the job. */
2843 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
2844 && current_frame_info.reg_save_ar_pfs == 0)
2845 {
2846 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2847 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
2848 cfa_off -= 8;
2849 }
2850
2851 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2852 {
2853 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2854 if (current_frame_info.reg_save_ar_lc != 0)
2855 {
2856 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2857 insn = emit_move_insn (alt_reg, reg);
2858 RTX_FRAME_RELATED_P (insn) = 1;
2859
2860 /* Even if we're not going to generate an epilogue, we still
2861 need to save the register so that EH works. */
2862 if (! epilogue_p)
2863 emit_insn (gen_prologue_use (alt_reg));
2864 }
2865 else
2866 {
2867 alt_regno = next_scratch_gr_reg ();
2868 alt_reg = gen_rtx_REG (DImode, alt_regno);
2869 emit_move_insn (alt_reg, reg);
2870 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2871 cfa_off -= 8;
2872 }
2873 }
2874
2875 if (current_frame_info.reg_save_gp)
2876 {
2877 insn = emit_move_insn (gen_rtx_REG (DImode,
2878 current_frame_info.reg_save_gp),
2879 pic_offset_table_rtx);
2880 /* We don't know for sure yet if this is actually needed, since
2881 we've not split the PIC call patterns. If all of the calls
2882 are indirect, and not followed by any uses of the gp, then
2883 this save is dead. Allow it to go away. */
2884 REG_NOTES (insn)
2885 = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn));
2886 }
2887
2888 /* We should now be at the base of the gr/br/fr spill area. */
2889 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
2890 + current_frame_info.spill_size));
2891
2892 /* Spill all general registers. */
2893 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2894 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2895 {
2896 reg = gen_rtx_REG (DImode, regno);
2897 do_spill (gen_gr_spill, reg, cfa_off, reg);
2898 cfa_off -= 8;
2899 }
2900
2901 /* Handle BR0 specially -- it may be getting stored permanently in
2902 some GR register. */
2903 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2904 {
2905 reg = gen_rtx_REG (DImode, BR_REG (0));
2906 if (current_frame_info.reg_save_b0 != 0)
2907 {
2908 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2909 insn = emit_move_insn (alt_reg, reg);
2910 RTX_FRAME_RELATED_P (insn) = 1;
2911
2912 /* Even if we're not going to generate an epilogue, we still
2913 need to save the register so that EH works. */
2914 if (! epilogue_p)
2915 emit_insn (gen_prologue_use (alt_reg));
2916 }
2917 else
2918 {
2919 alt_regno = next_scratch_gr_reg ();
2920 alt_reg = gen_rtx_REG (DImode, alt_regno);
2921 emit_move_insn (alt_reg, reg);
2922 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2923 cfa_off -= 8;
2924 }
2925 }
2926
2927 /* Spill the rest of the BR registers. */
2928 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2929 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2930 {
2931 alt_regno = next_scratch_gr_reg ();
2932 alt_reg = gen_rtx_REG (DImode, alt_regno);
2933 reg = gen_rtx_REG (DImode, regno);
2934 emit_move_insn (alt_reg, reg);
2935 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2936 cfa_off -= 8;
2937 }
2938
2939 /* Align the frame and spill all FR registers. */
2940 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2941 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2942 {
2943 gcc_assert (!(cfa_off & 15));
2944 reg = gen_rtx_REG (XFmode, regno);
2945 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
2946 cfa_off -= 16;
2947 }
2948
2949 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
2950
2951 finish_spill_pointers ();
2952 }
2953
2954 /* Called after register allocation to add any instructions needed for the
2955 epilogue. Using an epilogue insn is favored compared to putting all of the
2956 instructions in output_function_prologue(), since it allows the scheduler
2957 to intermix instructions with the saves of the caller saved registers. In
2958 some cases, it might be necessary to emit a barrier instruction as the last
2959 insn to prevent such scheduling. */
2960
2961 void
2962 ia64_expand_epilogue (int sibcall_p)
2963 {
2964 rtx insn, reg, alt_reg, ar_unat_save_reg;
2965 int regno, alt_regno, cfa_off;
2966
2967 ia64_compute_frame_size (get_frame_size ());
2968
2969 /* If there is a frame pointer, then we use it instead of the stack
2970 pointer, so that the stack pointer does not need to be valid when
2971 the epilogue starts. See EXIT_IGNORE_STACK. */
2972 if (frame_pointer_needed)
2973 setup_spill_pointers (current_frame_info.n_spilled,
2974 hard_frame_pointer_rtx, 0);
2975 else
2976 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2977 current_frame_info.total_size);
2978
2979 if (current_frame_info.total_size != 0)
2980 {
2981 /* ??? At this point we must generate a magic insn that appears to
2982 modify the spill iterators and the frame pointer. This would
2983 allow the most scheduling freedom. For now, just hard stop. */
2984 emit_insn (gen_blockage ());
2985 }
2986
2987 /* Locate the bottom of the register save area. */
2988 cfa_off = (current_frame_info.spill_cfa_off
2989 + current_frame_info.spill_size
2990 + current_frame_info.extra_spill_size);
2991
2992 /* Restore the predicate registers. */
2993 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2994 {
2995 if (current_frame_info.reg_save_pr != 0)
2996 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2997 else
2998 {
2999 alt_regno = next_scratch_gr_reg ();
3000 alt_reg = gen_rtx_REG (DImode, alt_regno);
3001 do_restore (gen_movdi_x, alt_reg, cfa_off);
3002 cfa_off -= 8;
3003 }
3004 reg = gen_rtx_REG (DImode, PR_REG (0));
3005 emit_move_insn (reg, alt_reg);
3006 }
3007
3008 /* Restore the application registers. */
3009
3010 /* Load the saved unat from the stack, but do not restore it until
3011 after the GRs have been restored. */
3012 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3013 {
3014 if (current_frame_info.reg_save_ar_unat != 0)
3015 ar_unat_save_reg
3016 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
3017 else
3018 {
3019 alt_regno = next_scratch_gr_reg ();
3020 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3021 current_frame_info.gr_used_mask |= 1 << alt_regno;
3022 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
3023 cfa_off -= 8;
3024 }
3025 }
3026 else
3027 ar_unat_save_reg = NULL_RTX;
3028
3029 if (current_frame_info.reg_save_ar_pfs != 0)
3030 {
3031 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
3032 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3033 emit_move_insn (reg, alt_reg);
3034 }
3035 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3036 {
3037 alt_regno = next_scratch_gr_reg ();
3038 alt_reg = gen_rtx_REG (DImode, alt_regno);
3039 do_restore (gen_movdi_x, alt_reg, cfa_off);
3040 cfa_off -= 8;
3041 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3042 emit_move_insn (reg, alt_reg);
3043 }
3044
3045 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3046 {
3047 if (current_frame_info.reg_save_ar_lc != 0)
3048 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
3049 else
3050 {
3051 alt_regno = next_scratch_gr_reg ();
3052 alt_reg = gen_rtx_REG (DImode, alt_regno);
3053 do_restore (gen_movdi_x, alt_reg, cfa_off);
3054 cfa_off -= 8;
3055 }
3056 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3057 emit_move_insn (reg, alt_reg);
3058 }
3059
3060 /* We should now be at the base of the gr/br/fr spill area. */
3061 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3062 + current_frame_info.spill_size));
3063
3064 /* The GP may be stored on the stack in the prologue, but it's
3065 never restored in the epilogue. Skip the stack slot. */
3066 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
3067 cfa_off -= 8;
3068
3069 /* Restore all general registers. */
3070 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
3071 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3072 {
3073 reg = gen_rtx_REG (DImode, regno);
3074 do_restore (gen_gr_restore, reg, cfa_off);
3075 cfa_off -= 8;
3076 }
3077
3078 /* Restore the branch registers. Handle B0 specially, as it may
3079 have gotten stored in some GR register. */
3080 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3081 {
3082 if (current_frame_info.reg_save_b0 != 0)
3083 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
3084 else
3085 {
3086 alt_regno = next_scratch_gr_reg ();
3087 alt_reg = gen_rtx_REG (DImode, alt_regno);
3088 do_restore (gen_movdi_x, alt_reg, cfa_off);
3089 cfa_off -= 8;
3090 }
3091 reg = gen_rtx_REG (DImode, BR_REG (0));
3092 emit_move_insn (reg, alt_reg);
3093 }
3094
3095 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3096 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3097 {
3098 alt_regno = next_scratch_gr_reg ();
3099 alt_reg = gen_rtx_REG (DImode, alt_regno);
3100 do_restore (gen_movdi_x, alt_reg, cfa_off);
3101 cfa_off -= 8;
3102 reg = gen_rtx_REG (DImode, regno);
3103 emit_move_insn (reg, alt_reg);
3104 }
3105
3106 /* Restore floating point registers. */
3107 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3108 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3109 {
3110 gcc_assert (!(cfa_off & 15));
3111 reg = gen_rtx_REG (XFmode, regno);
3112 do_restore (gen_fr_restore_x, reg, cfa_off);
3113 cfa_off -= 16;
3114 }
3115
3116 /* Restore ar.unat for real. */
3117 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3118 {
3119 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3120 emit_move_insn (reg, ar_unat_save_reg);
3121 }
3122
3123 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3124
3125 finish_spill_pointers ();
3126
3127 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
3128 {
3129 /* ??? At this point we must generate a magic insn that appears to
3130 modify the spill iterators, the stack pointer, and the frame
3131 pointer. This would allow the most scheduling freedom. For now,
3132 just hard stop. */
3133 emit_insn (gen_blockage ());
3134 }
3135
3136 if (cfun->machine->ia64_eh_epilogue_sp)
3137 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
3138 else if (frame_pointer_needed)
3139 {
3140 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
3141 RTX_FRAME_RELATED_P (insn) = 1;
3142 }
3143 else if (current_frame_info.total_size)
3144 {
3145 rtx offset, frame_size_rtx;
3146
3147 frame_size_rtx = GEN_INT (current_frame_info.total_size);
3148 if (CONST_OK_FOR_I (current_frame_info.total_size))
3149 offset = frame_size_rtx;
3150 else
3151 {
3152 regno = next_scratch_gr_reg ();
3153 offset = gen_rtx_REG (DImode, regno);
3154 emit_move_insn (offset, frame_size_rtx);
3155 }
3156
3157 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
3158 offset));
3159
3160 RTX_FRAME_RELATED_P (insn) = 1;
3161 if (GET_CODE (offset) != CONST_INT)
3162 {
3163 REG_NOTES (insn)
3164 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3165 gen_rtx_SET (VOIDmode,
3166 stack_pointer_rtx,
3167 gen_rtx_PLUS (DImode,
3168 stack_pointer_rtx,
3169 frame_size_rtx)),
3170 REG_NOTES (insn));
3171 }
3172 }
3173
3174 if (cfun->machine->ia64_eh_epilogue_bsp)
3175 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
3176
3177 if (! sibcall_p)
3178 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
3179 else
3180 {
3181 int fp = GR_REG (2);
3182 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
3183 first available call clobbered register. If there was a frame_pointer
3184 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
3185 so we have to make sure we're using the string "r2" when emitting
3186 the register name for the assembler. */
3187 if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
3188 fp = HARD_FRAME_POINTER_REGNUM;
3189
3190 /* We must emit an alloc to force the input registers to become output
3191 registers. Otherwise, if the callee tries to pass its parameters
3192 through to another call without an intervening alloc, then these
3193 values get lost. */
3194 /* ??? We don't need to preserve all input registers. We only need to
3195 preserve those input registers used as arguments to the sibling call.
3196 It is unclear how to compute that number here. */
3197 if (current_frame_info.n_input_regs != 0)
3198 {
3199 rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
3200 insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
3201 const0_rtx, const0_rtx,
3202 n_inputs, const0_rtx));
3203 RTX_FRAME_RELATED_P (insn) = 1;
3204 }
3205 }
3206 }
3207
3208 /* Return 1 if br.ret can do all the work required to return from a
3209 function. */
3210
3211 int
3212 ia64_direct_return (void)
3213 {
3214 if (reload_completed && ! frame_pointer_needed)
3215 {
3216 ia64_compute_frame_size (get_frame_size ());
3217
3218 return (current_frame_info.total_size == 0
3219 && current_frame_info.n_spilled == 0
3220 && current_frame_info.reg_save_b0 == 0
3221 && current_frame_info.reg_save_pr == 0
3222 && current_frame_info.reg_save_ar_pfs == 0
3223 && current_frame_info.reg_save_ar_unat == 0
3224 && current_frame_info.reg_save_ar_lc == 0);
3225 }
3226 return 0;
3227 }
3228
3229 /* Return the magic cookie that we use to hold the return address
3230 during early compilation. */
3231
3232 rtx
3233 ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
3234 {
3235 if (count != 0)
3236 return NULL;
3237 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
3238 }
3239
3240 /* Split this value after reload, now that we know where the return
3241 address is saved. */
3242
3243 void
3244 ia64_split_return_addr_rtx (rtx dest)
3245 {
3246 rtx src;
3247
3248 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3249 {
3250 if (current_frame_info.reg_save_b0 != 0)
3251 src = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
3252 else
3253 {
3254 HOST_WIDE_INT off;
3255 unsigned int regno;
3256
3257 /* Compute offset from CFA for BR0. */
3258 /* ??? Must be kept in sync with ia64_expand_prologue. */
3259 off = (current_frame_info.spill_cfa_off
3260 + current_frame_info.spill_size);
3261 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3262 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3263 off -= 8;
3264
3265 /* Convert CFA offset to a register based offset. */
3266 if (frame_pointer_needed)
3267 src = hard_frame_pointer_rtx;
3268 else
3269 {
3270 src = stack_pointer_rtx;
3271 off += current_frame_info.total_size;
3272 }
3273
3274 /* Load address into scratch register. */
3275 if (CONST_OK_FOR_I (off))
3276 emit_insn (gen_adddi3 (dest, src, GEN_INT (off)));
3277 else
3278 {
3279 emit_move_insn (dest, GEN_INT (off));
3280 emit_insn (gen_adddi3 (dest, src, dest));
3281 }
3282
3283 src = gen_rtx_MEM (Pmode, dest);
3284 }
3285 }
3286 else
3287 src = gen_rtx_REG (DImode, BR_REG (0));
3288
3289 emit_move_insn (dest, src);
3290 }
3291
3292 int
3293 ia64_hard_regno_rename_ok (int from, int to)
3294 {
3295 /* Don't clobber any of the registers we reserved for the prologue. */
3296 if (to == current_frame_info.reg_fp
3297 || to == current_frame_info.reg_save_b0
3298 || to == current_frame_info.reg_save_pr
3299 || to == current_frame_info.reg_save_ar_pfs
3300 || to == current_frame_info.reg_save_ar_unat
3301 || to == current_frame_info.reg_save_ar_lc)
3302 return 0;
3303
3304 if (from == current_frame_info.reg_fp
3305 || from == current_frame_info.reg_save_b0
3306 || from == current_frame_info.reg_save_pr
3307 || from == current_frame_info.reg_save_ar_pfs
3308 || from == current_frame_info.reg_save_ar_unat
3309 || from == current_frame_info.reg_save_ar_lc)
3310 return 0;
3311
3312 /* Don't use output registers outside the register frame. */
3313 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
3314 return 0;
3315
3316 /* Retain even/oddness on predicate register pairs. */
3317 if (PR_REGNO_P (from) && PR_REGNO_P (to))
3318 return (from & 1) == (to & 1);
3319
3320 return 1;
3321 }
3322
3323 /* Target hook for assembling integer objects. Handle word-sized
3324 aligned objects and detect the cases when @fptr is needed. */
3325
3326 static bool
3327 ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
3328 {
3329 if (size == POINTER_SIZE / BITS_PER_UNIT
3330 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
3331 && GET_CODE (x) == SYMBOL_REF
3332 && SYMBOL_REF_FUNCTION_P (x))
3333 {
3334 static const char * const directive[2][2] = {
3335 /* 64-bit pointer */ /* 32-bit pointer */
3336 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */
3337 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */
3338 };
3339 fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
3340 output_addr_const (asm_out_file, x);
3341 fputs (")\n", asm_out_file);
3342 return true;
3343 }
3344 return default_assemble_integer (x, size, aligned_p);
3345 }
3346
3347 /* Emit the function prologue. */
3348
3349 static void
3350 ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3351 {
3352 int mask, grsave, grsave_prev;
3353
3354 if (current_frame_info.need_regstk)
3355 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
3356 current_frame_info.n_input_regs,
3357 current_frame_info.n_local_regs,
3358 current_frame_info.n_output_regs,
3359 current_frame_info.n_rotate_regs);
3360
3361 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3362 return;
3363
3364 /* Emit the .prologue directive. */
3365
3366 mask = 0;
3367 grsave = grsave_prev = 0;
3368 if (current_frame_info.reg_save_b0 != 0)
3369 {
3370 mask |= 8;
3371 grsave = grsave_prev = current_frame_info.reg_save_b0;
3372 }
3373 if (current_frame_info.reg_save_ar_pfs != 0
3374 && (grsave_prev == 0
3375 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
3376 {
3377 mask |= 4;
3378 if (grsave_prev == 0)
3379 grsave = current_frame_info.reg_save_ar_pfs;
3380 grsave_prev = current_frame_info.reg_save_ar_pfs;
3381 }
3382 if (current_frame_info.reg_fp != 0
3383 && (grsave_prev == 0
3384 || current_frame_info.reg_fp == grsave_prev + 1))
3385 {
3386 mask |= 2;
3387 if (grsave_prev == 0)
3388 grsave = HARD_FRAME_POINTER_REGNUM;
3389 grsave_prev = current_frame_info.reg_fp;
3390 }
3391 if (current_frame_info.reg_save_pr != 0
3392 && (grsave_prev == 0
3393 || current_frame_info.reg_save_pr == grsave_prev + 1))
3394 {
3395 mask |= 1;
3396 if (grsave_prev == 0)
3397 grsave = current_frame_info.reg_save_pr;
3398 }
3399
3400 if (mask && TARGET_GNU_AS)
3401 fprintf (file, "\t.prologue %d, %d\n", mask,
3402 ia64_dbx_register_number (grsave));
3403 else
3404 fputs ("\t.prologue\n", file);
3405
3406 /* Emit a .spill directive, if necessary, to relocate the base of
3407 the register spill area. */
3408 if (current_frame_info.spill_cfa_off != -16)
3409 fprintf (file, "\t.spill %ld\n",
3410 (long) (current_frame_info.spill_cfa_off
3411 + current_frame_info.spill_size));
3412 }
3413
3414 /* Emit the .body directive at the scheduled end of the prologue. */
3415
3416 static void
3417 ia64_output_function_end_prologue (FILE *file)
3418 {
3419 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3420 return;
3421
3422 fputs ("\t.body\n", file);
3423 }
3424
3425 /* Emit the function epilogue. */
3426
3427 static void
3428 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
3429 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3430 {
3431 int i;
3432
3433 if (current_frame_info.reg_fp)
3434 {
3435 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3436 reg_names[HARD_FRAME_POINTER_REGNUM]
3437 = reg_names[current_frame_info.reg_fp];
3438 reg_names[current_frame_info.reg_fp] = tmp;
3439 }
3440 if (! TARGET_REG_NAMES)
3441 {
3442 for (i = 0; i < current_frame_info.n_input_regs; i++)
3443 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
3444 for (i = 0; i < current_frame_info.n_local_regs; i++)
3445 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
3446 for (i = 0; i < current_frame_info.n_output_regs; i++)
3447 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
3448 }
3449
3450 current_frame_info.initialized = 0;
3451 }
3452
3453 int
3454 ia64_dbx_register_number (int regno)
3455 {
3456 /* In ia64_expand_prologue we quite literally renamed the frame pointer
3457 from its home at loc79 to something inside the register frame. We
3458 must perform the same renumbering here for the debug info. */
3459 if (current_frame_info.reg_fp)
3460 {
3461 if (regno == HARD_FRAME_POINTER_REGNUM)
3462 regno = current_frame_info.reg_fp;
3463 else if (regno == current_frame_info.reg_fp)
3464 regno = HARD_FRAME_POINTER_REGNUM;
3465 }
3466
3467 if (IN_REGNO_P (regno))
3468 return 32 + regno - IN_REG (0);
3469 else if (LOC_REGNO_P (regno))
3470 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
3471 else if (OUT_REGNO_P (regno))
3472 return (32 + current_frame_info.n_input_regs
3473 + current_frame_info.n_local_regs + regno - OUT_REG (0));
3474 else
3475 return regno;
3476 }
3477
3478 void
3479 ia64_initialize_trampoline (rtx addr, rtx fnaddr, rtx static_chain)
3480 {
3481 rtx addr_reg, eight = GEN_INT (8);
3482
3483 /* The Intel assembler requires that the global __ia64_trampoline symbol
3484 be declared explicitly */
3485 if (!TARGET_GNU_AS)
3486 {
3487 static bool declared_ia64_trampoline = false;
3488
3489 if (!declared_ia64_trampoline)
3490 {
3491 declared_ia64_trampoline = true;
3492 (*targetm.asm_out.globalize_label) (asm_out_file,
3493 "__ia64_trampoline");
3494 }
3495 }
3496
3497 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
3498 addr = convert_memory_address (Pmode, addr);
3499 fnaddr = convert_memory_address (Pmode, fnaddr);
3500 static_chain = convert_memory_address (Pmode, static_chain);
3501
3502 /* Load up our iterator. */
3503 addr_reg = gen_reg_rtx (Pmode);
3504 emit_move_insn (addr_reg, addr);
3505
3506 /* The first two words are the fake descriptor:
3507 __ia64_trampoline, ADDR+16. */
3508 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3509 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
3510 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3511
3512 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3513 copy_to_reg (plus_constant (addr, 16)));
3514 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3515
3516 /* The third word is the target descriptor. */
3517 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
3518 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3519
3520 /* The fourth word is the static chain. */
3521 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
3522 }
3523 \f
3524 /* Do any needed setup for a variadic function. CUM has not been updated
3525 for the last named argument which has type TYPE and mode MODE.
3526
3527 We generate the actual spill instructions during prologue generation. */
3528
3529 static void
3530 ia64_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3531 tree type, int * pretend_size,
3532 int second_time ATTRIBUTE_UNUSED)
3533 {
3534 CUMULATIVE_ARGS next_cum = *cum;
3535
3536 /* Skip the current argument. */
3537 ia64_function_arg_advance (&next_cum, mode, type, 1);
3538
3539 if (next_cum.words < MAX_ARGUMENT_SLOTS)
3540 {
3541 int n = MAX_ARGUMENT_SLOTS - next_cum.words;
3542 *pretend_size = n * UNITS_PER_WORD;
3543 cfun->machine->n_varargs = n;
3544 }
3545 }
3546
3547 /* Check whether TYPE is a homogeneous floating point aggregate. If
3548 it is, return the mode of the floating point type that appears
3549 in all leafs. If it is not, return VOIDmode.
3550
3551 An aggregate is a homogeneous floating point aggregate is if all
3552 fields/elements in it have the same floating point type (e.g,
3553 SFmode). 128-bit quad-precision floats are excluded.
3554
3555 Variable sized aggregates should never arrive here, since we should
3556 have already decided to pass them by reference. Top-level zero-sized
3557 aggregates are excluded because our parallels crash the middle-end. */
3558
3559 static enum machine_mode
3560 hfa_element_mode (tree type, bool nested)
3561 {
3562 enum machine_mode element_mode = VOIDmode;
3563 enum machine_mode mode;
3564 enum tree_code code = TREE_CODE (type);
3565 int know_element_mode = 0;
3566 tree t;
3567
3568 if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
3569 return VOIDmode;
3570
3571 switch (code)
3572 {
3573 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
3574 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
3575 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
3576 case LANG_TYPE: case FUNCTION_TYPE:
3577 return VOIDmode;
3578
3579 /* Fortran complex types are supposed to be HFAs, so we need to handle
3580 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
3581 types though. */
3582 case COMPLEX_TYPE:
3583 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
3584 && TYPE_MODE (type) != TCmode)
3585 return GET_MODE_INNER (TYPE_MODE (type));
3586 else
3587 return VOIDmode;
3588
3589 case REAL_TYPE:
3590 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
3591 mode if this is contained within an aggregate. */
3592 if (nested && TYPE_MODE (type) != TFmode)
3593 return TYPE_MODE (type);
3594 else
3595 return VOIDmode;
3596
3597 case ARRAY_TYPE:
3598 return hfa_element_mode (TREE_TYPE (type), 1);
3599
3600 case RECORD_TYPE:
3601 case UNION_TYPE:
3602 case QUAL_UNION_TYPE:
3603 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
3604 {
3605 if (TREE_CODE (t) != FIELD_DECL)
3606 continue;
3607
3608 mode = hfa_element_mode (TREE_TYPE (t), 1);
3609 if (know_element_mode)
3610 {
3611 if (mode != element_mode)
3612 return VOIDmode;
3613 }
3614 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
3615 return VOIDmode;
3616 else
3617 {
3618 know_element_mode = 1;
3619 element_mode = mode;
3620 }
3621 }
3622 return element_mode;
3623
3624 default:
3625 /* If we reach here, we probably have some front-end specific type
3626 that the backend doesn't know about. This can happen via the
3627 aggregate_value_p call in init_function_start. All we can do is
3628 ignore unknown tree types. */
3629 return VOIDmode;
3630 }
3631
3632 return VOIDmode;
3633 }
3634
3635 /* Return the number of words required to hold a quantity of TYPE and MODE
3636 when passed as an argument. */
3637 static int
3638 ia64_function_arg_words (tree type, enum machine_mode mode)
3639 {
3640 int words;
3641
3642 if (mode == BLKmode)
3643 words = int_size_in_bytes (type);
3644 else
3645 words = GET_MODE_SIZE (mode);
3646
3647 return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */
3648 }
3649
3650 /* Return the number of registers that should be skipped so the current
3651 argument (described by TYPE and WORDS) will be properly aligned.
3652
3653 Integer and float arguments larger than 8 bytes start at the next
3654 even boundary. Aggregates larger than 8 bytes start at the next
3655 even boundary if the aggregate has 16 byte alignment. Note that
3656 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
3657 but are still to be aligned in registers.
3658
3659 ??? The ABI does not specify how to handle aggregates with
3660 alignment from 9 to 15 bytes, or greater than 16. We handle them
3661 all as if they had 16 byte alignment. Such aggregates can occur
3662 only if gcc extensions are used. */
3663 static int
3664 ia64_function_arg_offset (CUMULATIVE_ARGS *cum, tree type, int words)
3665 {
3666 if ((cum->words & 1) == 0)
3667 return 0;
3668
3669 if (type
3670 && TREE_CODE (type) != INTEGER_TYPE
3671 && TREE_CODE (type) != REAL_TYPE)
3672 return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
3673 else
3674 return words > 1;
3675 }
3676
3677 /* Return rtx for register where argument is passed, or zero if it is passed
3678 on the stack. */
3679 /* ??? 128-bit quad-precision floats are always passed in general
3680 registers. */
3681
3682 rtx
3683 ia64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
3684 int named, int incoming)
3685 {
3686 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
3687 int words = ia64_function_arg_words (type, mode);
3688 int offset = ia64_function_arg_offset (cum, type, words);
3689 enum machine_mode hfa_mode = VOIDmode;
3690
3691 /* If all argument slots are used, then it must go on the stack. */
3692 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3693 return 0;
3694
3695 /* Check for and handle homogeneous FP aggregates. */
3696 if (type)
3697 hfa_mode = hfa_element_mode (type, 0);
3698
3699 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3700 and unprototyped hfas are passed specially. */
3701 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3702 {
3703 rtx loc[16];
3704 int i = 0;
3705 int fp_regs = cum->fp_regs;
3706 int int_regs = cum->words + offset;
3707 int hfa_size = GET_MODE_SIZE (hfa_mode);
3708 int byte_size;
3709 int args_byte_size;
3710
3711 /* If prototyped, pass it in FR regs then GR regs.
3712 If not prototyped, pass it in both FR and GR regs.
3713
3714 If this is an SFmode aggregate, then it is possible to run out of
3715 FR regs while GR regs are still left. In that case, we pass the
3716 remaining part in the GR regs. */
3717
3718 /* Fill the FP regs. We do this always. We stop if we reach the end
3719 of the argument, the last FP register, or the last argument slot. */
3720
3721 byte_size = ((mode == BLKmode)
3722 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3723 args_byte_size = int_regs * UNITS_PER_WORD;
3724 offset = 0;
3725 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3726 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
3727 {
3728 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3729 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
3730 + fp_regs)),
3731 GEN_INT (offset));
3732 offset += hfa_size;
3733 args_byte_size += hfa_size;
3734 fp_regs++;
3735 }
3736
3737 /* If no prototype, then the whole thing must go in GR regs. */
3738 if (! cum->prototype)
3739 offset = 0;
3740 /* If this is an SFmode aggregate, then we might have some left over
3741 that needs to go in GR regs. */
3742 else if (byte_size != offset)
3743 int_regs += offset / UNITS_PER_WORD;
3744
3745 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
3746
3747 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
3748 {
3749 enum machine_mode gr_mode = DImode;
3750 unsigned int gr_size;
3751
3752 /* If we have an odd 4 byte hunk because we ran out of FR regs,
3753 then this goes in a GR reg left adjusted/little endian, right
3754 adjusted/big endian. */
3755 /* ??? Currently this is handled wrong, because 4-byte hunks are
3756 always right adjusted/little endian. */
3757 if (offset & 0x4)
3758 gr_mode = SImode;
3759 /* If we have an even 4 byte hunk because the aggregate is a
3760 multiple of 4 bytes in size, then this goes in a GR reg right
3761 adjusted/little endian. */
3762 else if (byte_size - offset == 4)
3763 gr_mode = SImode;
3764
3765 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3766 gen_rtx_REG (gr_mode, (basereg
3767 + int_regs)),
3768 GEN_INT (offset));
3769
3770 gr_size = GET_MODE_SIZE (gr_mode);
3771 offset += gr_size;
3772 if (gr_size == UNITS_PER_WORD
3773 || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
3774 int_regs++;
3775 else if (gr_size > UNITS_PER_WORD)
3776 int_regs += gr_size / UNITS_PER_WORD;
3777 }
3778 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3779 }
3780
3781 /* Integral and aggregates go in general registers. If we have run out of
3782 FR registers, then FP values must also go in general registers. This can
3783 happen when we have a SFmode HFA. */
3784 else if (mode == TFmode || mode == TCmode
3785 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3786 {
3787 int byte_size = ((mode == BLKmode)
3788 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3789 if (BYTES_BIG_ENDIAN
3790 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3791 && byte_size < UNITS_PER_WORD
3792 && byte_size > 0)
3793 {
3794 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3795 gen_rtx_REG (DImode,
3796 (basereg + cum->words
3797 + offset)),
3798 const0_rtx);
3799 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3800 }
3801 else
3802 return gen_rtx_REG (mode, basereg + cum->words + offset);
3803
3804 }
3805
3806 /* If there is a prototype, then FP values go in a FR register when
3807 named, and in a GR register when unnamed. */
3808 else if (cum->prototype)
3809 {
3810 if (named)
3811 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
3812 /* In big-endian mode, an anonymous SFmode value must be represented
3813 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
3814 the value into the high half of the general register. */
3815 else if (BYTES_BIG_ENDIAN && mode == SFmode)
3816 return gen_rtx_PARALLEL (mode,
3817 gen_rtvec (1,
3818 gen_rtx_EXPR_LIST (VOIDmode,
3819 gen_rtx_REG (DImode, basereg + cum->words + offset),
3820 const0_rtx)));
3821 else
3822 return gen_rtx_REG (mode, basereg + cum->words + offset);
3823 }
3824 /* If there is no prototype, then FP values go in both FR and GR
3825 registers. */
3826 else
3827 {
3828 /* See comment above. */
3829 enum machine_mode inner_mode =
3830 (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
3831
3832 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
3833 gen_rtx_REG (mode, (FR_ARG_FIRST
3834 + cum->fp_regs)),
3835 const0_rtx);
3836 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3837 gen_rtx_REG (inner_mode,
3838 (basereg + cum->words
3839 + offset)),
3840 const0_rtx);
3841
3842 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
3843 }
3844 }
3845
3846 /* Return number of bytes, at the beginning of the argument, that must be
3847 put in registers. 0 is the argument is entirely in registers or entirely
3848 in memory. */
3849
3850 static int
3851 ia64_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3852 tree type, bool named ATTRIBUTE_UNUSED)
3853 {
3854 int words = ia64_function_arg_words (type, mode);
3855 int offset = ia64_function_arg_offset (cum, type, words);
3856
3857 /* If all argument slots are used, then it must go on the stack. */
3858 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3859 return 0;
3860
3861 /* It doesn't matter whether the argument goes in FR or GR regs. If
3862 it fits within the 8 argument slots, then it goes entirely in
3863 registers. If it extends past the last argument slot, then the rest
3864 goes on the stack. */
3865
3866 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
3867 return 0;
3868
3869 return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
3870 }
3871
3872 /* Update CUM to point after this argument. This is patterned after
3873 ia64_function_arg. */
3874
3875 void
3876 ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3877 tree type, int named)
3878 {
3879 int words = ia64_function_arg_words (type, mode);
3880 int offset = ia64_function_arg_offset (cum, type, words);
3881 enum machine_mode hfa_mode = VOIDmode;
3882
3883 /* If all arg slots are already full, then there is nothing to do. */
3884 if (cum->words >= MAX_ARGUMENT_SLOTS)
3885 return;
3886
3887 cum->words += words + offset;
3888
3889 /* Check for and handle homogeneous FP aggregates. */
3890 if (type)
3891 hfa_mode = hfa_element_mode (type, 0);
3892
3893 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3894 and unprototyped hfas are passed specially. */
3895 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3896 {
3897 int fp_regs = cum->fp_regs;
3898 /* This is the original value of cum->words + offset. */
3899 int int_regs = cum->words - words;
3900 int hfa_size = GET_MODE_SIZE (hfa_mode);
3901 int byte_size;
3902 int args_byte_size;
3903
3904 /* If prototyped, pass it in FR regs then GR regs.
3905 If not prototyped, pass it in both FR and GR regs.
3906
3907 If this is an SFmode aggregate, then it is possible to run out of
3908 FR regs while GR regs are still left. In that case, we pass the
3909 remaining part in the GR regs. */
3910
3911 /* Fill the FP regs. We do this always. We stop if we reach the end
3912 of the argument, the last FP register, or the last argument slot. */
3913
3914 byte_size = ((mode == BLKmode)
3915 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3916 args_byte_size = int_regs * UNITS_PER_WORD;
3917 offset = 0;
3918 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3919 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3920 {
3921 offset += hfa_size;
3922 args_byte_size += hfa_size;
3923 fp_regs++;
3924 }
3925
3926 cum->fp_regs = fp_regs;
3927 }
3928
3929 /* Integral and aggregates go in general registers. So do TFmode FP values.
3930 If we have run out of FR registers, then other FP values must also go in
3931 general registers. This can happen when we have a SFmode HFA. */
3932 else if (mode == TFmode || mode == TCmode
3933 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3934 cum->int_regs = cum->words;
3935
3936 /* If there is a prototype, then FP values go in a FR register when
3937 named, and in a GR register when unnamed. */
3938 else if (cum->prototype)
3939 {
3940 if (! named)
3941 cum->int_regs = cum->words;
3942 else
3943 /* ??? Complex types should not reach here. */
3944 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3945 }
3946 /* If there is no prototype, then FP values go in both FR and GR
3947 registers. */
3948 else
3949 {
3950 /* ??? Complex types should not reach here. */
3951 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3952 cum->int_regs = cum->words;
3953 }
3954 }
3955
3956 /* Arguments with alignment larger than 8 bytes start at the next even
3957 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary
3958 even though their normal alignment is 8 bytes. See ia64_function_arg. */
3959
3960 int
3961 ia64_function_arg_boundary (enum machine_mode mode, tree type)
3962 {
3963
3964 if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
3965 return PARM_BOUNDARY * 2;
3966
3967 if (type)
3968 {
3969 if (TYPE_ALIGN (type) > PARM_BOUNDARY)
3970 return PARM_BOUNDARY * 2;
3971 else
3972 return PARM_BOUNDARY;
3973 }
3974
3975 if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
3976 return PARM_BOUNDARY * 2;
3977 else
3978 return PARM_BOUNDARY;
3979 }
3980
3981 /* Variable sized types are passed by reference. */
3982 /* ??? At present this is a GCC extension to the IA-64 ABI. */
3983
3984 static bool
3985 ia64_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3986 enum machine_mode mode ATTRIBUTE_UNUSED,
3987 tree type, bool named ATTRIBUTE_UNUSED)
3988 {
3989 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3990 }
3991
3992 /* True if it is OK to do sibling call optimization for the specified
3993 call expression EXP. DECL will be the called function, or NULL if
3994 this is an indirect call. */
3995 static bool
3996 ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3997 {
3998 /* We can't perform a sibcall if the current function has the syscall_linkage
3999 attribute. */
4000 if (lookup_attribute ("syscall_linkage",
4001 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
4002 return false;
4003
4004 /* We must always return with our current GP. This means we can
4005 only sibcall to functions defined in the current module. */
4006 return decl && (*targetm.binds_local_p) (decl);
4007 }
4008 \f
4009
4010 /* Implement va_arg. */
4011
4012 static tree
4013 ia64_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4014 {
4015 /* Variable sized types are passed by reference. */
4016 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
4017 {
4018 tree ptrtype = build_pointer_type (type);
4019 tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
4020 return build_va_arg_indirect_ref (addr);
4021 }
4022
4023 /* Aggregate arguments with alignment larger than 8 bytes start at
4024 the next even boundary. Integer and floating point arguments
4025 do so if they are larger than 8 bytes, whether or not they are
4026 also aligned larger than 8 bytes. */
4027 if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
4028 ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
4029 {
4030 tree t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
4031 build_int_cst (NULL_TREE, 2 * UNITS_PER_WORD - 1));
4032 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
4033 build_int_cst (NULL_TREE, -2 * UNITS_PER_WORD));
4034 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
4035 gimplify_and_add (t, pre_p);
4036 }
4037
4038 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4039 }
4040 \f
4041 /* Return 1 if function return value returned in memory. Return 0 if it is
4042 in a register. */
4043
4044 static bool
4045 ia64_return_in_memory (tree valtype, tree fntype ATTRIBUTE_UNUSED)
4046 {
4047 enum machine_mode mode;
4048 enum machine_mode hfa_mode;
4049 HOST_WIDE_INT byte_size;
4050
4051 mode = TYPE_MODE (valtype);
4052 byte_size = GET_MODE_SIZE (mode);
4053 if (mode == BLKmode)
4054 {
4055 byte_size = int_size_in_bytes (valtype);
4056 if (byte_size < 0)
4057 return true;
4058 }
4059
4060 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
4061
4062 hfa_mode = hfa_element_mode (valtype, 0);
4063 if (hfa_mode != VOIDmode)
4064 {
4065 int hfa_size = GET_MODE_SIZE (hfa_mode);
4066
4067 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
4068 return true;
4069 else
4070 return false;
4071 }
4072 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
4073 return true;
4074 else
4075 return false;
4076 }
4077
4078 /* Return rtx for register that holds the function return value. */
4079
4080 rtx
4081 ia64_function_value (tree valtype, tree func ATTRIBUTE_UNUSED)
4082 {
4083 enum machine_mode mode;
4084 enum machine_mode hfa_mode;
4085
4086 mode = TYPE_MODE (valtype);
4087 hfa_mode = hfa_element_mode (valtype, 0);
4088
4089 if (hfa_mode != VOIDmode)
4090 {
4091 rtx loc[8];
4092 int i;
4093 int hfa_size;
4094 int byte_size;
4095 int offset;
4096
4097 hfa_size = GET_MODE_SIZE (hfa_mode);
4098 byte_size = ((mode == BLKmode)
4099 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
4100 offset = 0;
4101 for (i = 0; offset < byte_size; i++)
4102 {
4103 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4104 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
4105 GEN_INT (offset));
4106 offset += hfa_size;
4107 }
4108 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4109 }
4110 else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
4111 return gen_rtx_REG (mode, FR_ARG_FIRST);
4112 else
4113 {
4114 bool need_parallel = false;
4115
4116 /* In big-endian mode, we need to manage the layout of aggregates
4117 in the registers so that we get the bits properly aligned in
4118 the highpart of the registers. */
4119 if (BYTES_BIG_ENDIAN
4120 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
4121 need_parallel = true;
4122
4123 /* Something like struct S { long double x; char a[0] } is not an
4124 HFA structure, and therefore doesn't go in fp registers. But
4125 the middle-end will give it XFmode anyway, and XFmode values
4126 don't normally fit in integer registers. So we need to smuggle
4127 the value inside a parallel. */
4128 else if (mode == XFmode || mode == XCmode)
4129 need_parallel = true;
4130
4131 if (need_parallel)
4132 {
4133 rtx loc[8];
4134 int offset;
4135 int bytesize;
4136 int i;
4137
4138 offset = 0;
4139 bytesize = int_size_in_bytes (valtype);
4140 /* An empty PARALLEL is invalid here, but the return value
4141 doesn't matter for empty structs. */
4142 if (bytesize == 0)
4143 return gen_rtx_REG (mode, GR_RET_FIRST);
4144 for (i = 0; offset < bytesize; i++)
4145 {
4146 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4147 gen_rtx_REG (DImode,
4148 GR_RET_FIRST + i),
4149 GEN_INT (offset));
4150 offset += UNITS_PER_WORD;
4151 }
4152 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4153 }
4154
4155 return gen_rtx_REG (mode, GR_RET_FIRST);
4156 }
4157 }
4158
4159 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
4160 We need to emit DTP-relative relocations. */
4161
4162 void
4163 ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
4164 {
4165 gcc_assert (size == 8);
4166 fputs ("\tdata8.ua\t@dtprel(", file);
4167 output_addr_const (file, x);
4168 fputs (")", file);
4169 }
4170
4171 /* Print a memory address as an operand to reference that memory location. */
4172
4173 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
4174 also call this from ia64_print_operand for memory addresses. */
4175
4176 void
4177 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
4178 rtx address ATTRIBUTE_UNUSED)
4179 {
4180 }
4181
4182 /* Print an operand to an assembler instruction.
4183 C Swap and print a comparison operator.
4184 D Print an FP comparison operator.
4185 E Print 32 - constant, for SImode shifts as extract.
4186 e Print 64 - constant, for DImode rotates.
4187 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
4188 a floating point register emitted normally.
4189 I Invert a predicate register by adding 1.
4190 J Select the proper predicate register for a condition.
4191 j Select the inverse predicate register for a condition.
4192 O Append .acq for volatile load.
4193 P Postincrement of a MEM.
4194 Q Append .rel for volatile store.
4195 S Shift amount for shladd instruction.
4196 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
4197 for Intel assembler.
4198 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
4199 for Intel assembler.
4200 r Print register name, or constant 0 as r0. HP compatibility for
4201 Linux kernel.
4202 v Print vector constant value as an 8-byte integer value. */
4203
4204 void
4205 ia64_print_operand (FILE * file, rtx x, int code)
4206 {
4207 const char *str;
4208
4209 switch (code)
4210 {
4211 case 0:
4212 /* Handled below. */
4213 break;
4214
4215 case 'C':
4216 {
4217 enum rtx_code c = swap_condition (GET_CODE (x));
4218 fputs (GET_RTX_NAME (c), file);
4219 return;
4220 }
4221
4222 case 'D':
4223 switch (GET_CODE (x))
4224 {
4225 case NE:
4226 str = "neq";
4227 break;
4228 case UNORDERED:
4229 str = "unord";
4230 break;
4231 case ORDERED:
4232 str = "ord";
4233 break;
4234 default:
4235 str = GET_RTX_NAME (GET_CODE (x));
4236 break;
4237 }
4238 fputs (str, file);
4239 return;
4240
4241 case 'E':
4242 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
4243 return;
4244
4245 case 'e':
4246 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
4247 return;
4248
4249 case 'F':
4250 if (x == CONST0_RTX (GET_MODE (x)))
4251 str = reg_names [FR_REG (0)];
4252 else if (x == CONST1_RTX (GET_MODE (x)))
4253 str = reg_names [FR_REG (1)];
4254 else
4255 {
4256 gcc_assert (GET_CODE (x) == REG);
4257 str = reg_names [REGNO (x)];
4258 }
4259 fputs (str, file);
4260 return;
4261
4262 case 'I':
4263 fputs (reg_names [REGNO (x) + 1], file);
4264 return;
4265
4266 case 'J':
4267 case 'j':
4268 {
4269 unsigned int regno = REGNO (XEXP (x, 0));
4270 if (GET_CODE (x) == EQ)
4271 regno += 1;
4272 if (code == 'j')
4273 regno ^= 1;
4274 fputs (reg_names [regno], file);
4275 }
4276 return;
4277
4278 case 'O':
4279 if (MEM_VOLATILE_P (x))
4280 fputs(".acq", file);
4281 return;
4282
4283 case 'P':
4284 {
4285 HOST_WIDE_INT value;
4286
4287 switch (GET_CODE (XEXP (x, 0)))
4288 {
4289 default:
4290 return;
4291
4292 case POST_MODIFY:
4293 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
4294 if (GET_CODE (x) == CONST_INT)
4295 value = INTVAL (x);
4296 else
4297 {
4298 gcc_assert (GET_CODE (x) == REG);
4299 fprintf (file, ", %s", reg_names[REGNO (x)]);
4300 return;
4301 }
4302 break;
4303
4304 case POST_INC:
4305 value = GET_MODE_SIZE (GET_MODE (x));
4306 break;
4307
4308 case POST_DEC:
4309 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
4310 break;
4311 }
4312
4313 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
4314 return;
4315 }
4316
4317 case 'Q':
4318 if (MEM_VOLATILE_P (x))
4319 fputs(".rel", file);
4320 return;
4321
4322 case 'S':
4323 fprintf (file, "%d", exact_log2 (INTVAL (x)));
4324 return;
4325
4326 case 'T':
4327 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4328 {
4329 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
4330 return;
4331 }
4332 break;
4333
4334 case 'U':
4335 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4336 {
4337 const char *prefix = "0x";
4338 if (INTVAL (x) & 0x80000000)
4339 {
4340 fprintf (file, "0xffffffff");
4341 prefix = "";
4342 }
4343 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
4344 return;
4345 }
4346 break;
4347
4348 case 'r':
4349 /* If this operand is the constant zero, write it as register zero.
4350 Any register, zero, or CONST_INT value is OK here. */
4351 if (GET_CODE (x) == REG)
4352 fputs (reg_names[REGNO (x)], file);
4353 else if (x == CONST0_RTX (GET_MODE (x)))
4354 fputs ("r0", file);
4355 else if (GET_CODE (x) == CONST_INT)
4356 output_addr_const (file, x);
4357 else
4358 output_operand_lossage ("invalid %%r value");
4359 return;
4360
4361 case 'v':
4362 gcc_assert (GET_CODE (x) == CONST_VECTOR);
4363 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
4364 break;
4365
4366 case '+':
4367 {
4368 const char *which;
4369
4370 /* For conditional branches, returns or calls, substitute
4371 sptk, dptk, dpnt, or spnt for %s. */
4372 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
4373 if (x)
4374 {
4375 int pred_val = INTVAL (XEXP (x, 0));
4376
4377 /* Guess top and bottom 10% statically predicted. */
4378 if (pred_val < REG_BR_PROB_BASE / 50)
4379 which = ".spnt";
4380 else if (pred_val < REG_BR_PROB_BASE / 2)
4381 which = ".dpnt";
4382 else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
4383 which = ".dptk";
4384 else
4385 which = ".sptk";
4386 }
4387 else if (GET_CODE (current_output_insn) == CALL_INSN)
4388 which = ".sptk";
4389 else
4390 which = ".dptk";
4391
4392 fputs (which, file);
4393 return;
4394 }
4395
4396 case ',':
4397 x = current_insn_predicate;
4398 if (x)
4399 {
4400 unsigned int regno = REGNO (XEXP (x, 0));
4401 if (GET_CODE (x) == EQ)
4402 regno += 1;
4403 fprintf (file, "(%s) ", reg_names [regno]);
4404 }
4405 return;
4406
4407 default:
4408 output_operand_lossage ("ia64_print_operand: unknown code");
4409 return;
4410 }
4411
4412 switch (GET_CODE (x))
4413 {
4414 /* This happens for the spill/restore instructions. */
4415 case POST_INC:
4416 case POST_DEC:
4417 case POST_MODIFY:
4418 x = XEXP (x, 0);
4419 /* ... fall through ... */
4420
4421 case REG:
4422 fputs (reg_names [REGNO (x)], file);
4423 break;
4424
4425 case MEM:
4426 {
4427 rtx addr = XEXP (x, 0);
4428 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
4429 addr = XEXP (addr, 0);
4430 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
4431 break;
4432 }
4433
4434 default:
4435 output_addr_const (file, x);
4436 break;
4437 }
4438
4439 return;
4440 }
4441 \f
4442 /* Compute a (partial) cost for rtx X. Return true if the complete
4443 cost has been computed, and false if subexpressions should be
4444 scanned. In either case, *TOTAL contains the cost result. */
4445 /* ??? This is incomplete. */
4446
4447 static bool
4448 ia64_rtx_costs (rtx x, int code, int outer_code, int *total)
4449 {
4450 switch (code)
4451 {
4452 case CONST_INT:
4453 switch (outer_code)
4454 {
4455 case SET:
4456 *total = CONST_OK_FOR_J (INTVAL (x)) ? 0 : COSTS_N_INSNS (1);
4457 return true;
4458 case PLUS:
4459 if (CONST_OK_FOR_I (INTVAL (x)))
4460 *total = 0;
4461 else if (CONST_OK_FOR_J (INTVAL (x)))
4462 *total = 1;
4463 else
4464 *total = COSTS_N_INSNS (1);
4465 return true;
4466 default:
4467 if (CONST_OK_FOR_K (INTVAL (x)) || CONST_OK_FOR_L (INTVAL (x)))
4468 *total = 0;
4469 else
4470 *total = COSTS_N_INSNS (1);
4471 return true;
4472 }
4473
4474 case CONST_DOUBLE:
4475 *total = COSTS_N_INSNS (1);
4476 return true;
4477
4478 case CONST:
4479 case SYMBOL_REF:
4480 case LABEL_REF:
4481 *total = COSTS_N_INSNS (3);
4482 return true;
4483
4484 case MULT:
4485 /* For multiplies wider than HImode, we have to go to the FPU,
4486 which normally involves copies. Plus there's the latency
4487 of the multiply itself, and the latency of the instructions to
4488 transfer integer regs to FP regs. */
4489 /* ??? Check for FP mode. */
4490 if (GET_MODE_SIZE (GET_MODE (x)) > 2)
4491 *total = COSTS_N_INSNS (10);
4492 else
4493 *total = COSTS_N_INSNS (2);
4494 return true;
4495
4496 case PLUS:
4497 case MINUS:
4498 case ASHIFT:
4499 case ASHIFTRT:
4500 case LSHIFTRT:
4501 *total = COSTS_N_INSNS (1);
4502 return true;
4503
4504 case DIV:
4505 case UDIV:
4506 case MOD:
4507 case UMOD:
4508 /* We make divide expensive, so that divide-by-constant will be
4509 optimized to a multiply. */
4510 *total = COSTS_N_INSNS (60);
4511 return true;
4512
4513 default:
4514 return false;
4515 }
4516 }
4517
4518 /* Calculate the cost of moving data from a register in class FROM to
4519 one in class TO, using MODE. */
4520
4521 int
4522 ia64_register_move_cost (enum machine_mode mode, enum reg_class from,
4523 enum reg_class to)
4524 {
4525 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
4526 if (to == ADDL_REGS)
4527 to = GR_REGS;
4528 if (from == ADDL_REGS)
4529 from = GR_REGS;
4530
4531 /* All costs are symmetric, so reduce cases by putting the
4532 lower number class as the destination. */
4533 if (from < to)
4534 {
4535 enum reg_class tmp = to;
4536 to = from, from = tmp;
4537 }
4538
4539 /* Moving from FR<->GR in XFmode must be more expensive than 2,
4540 so that we get secondary memory reloads. Between FR_REGS,
4541 we have to make this at least as expensive as MEMORY_MOVE_COST
4542 to avoid spectacularly poor register class preferencing. */
4543 if (mode == XFmode)
4544 {
4545 if (to != GR_REGS || from != GR_REGS)
4546 return MEMORY_MOVE_COST (mode, to, 0);
4547 else
4548 return 3;
4549 }
4550
4551 switch (to)
4552 {
4553 case PR_REGS:
4554 /* Moving between PR registers takes two insns. */
4555 if (from == PR_REGS)
4556 return 3;
4557 /* Moving between PR and anything but GR is impossible. */
4558 if (from != GR_REGS)
4559 return MEMORY_MOVE_COST (mode, to, 0);
4560 break;
4561
4562 case BR_REGS:
4563 /* Moving between BR and anything but GR is impossible. */
4564 if (from != GR_REGS && from != GR_AND_BR_REGS)
4565 return MEMORY_MOVE_COST (mode, to, 0);
4566 break;
4567
4568 case AR_I_REGS:
4569 case AR_M_REGS:
4570 /* Moving between AR and anything but GR is impossible. */
4571 if (from != GR_REGS)
4572 return MEMORY_MOVE_COST (mode, to, 0);
4573 break;
4574
4575 case GR_REGS:
4576 case FR_REGS:
4577 case GR_AND_FR_REGS:
4578 case GR_AND_BR_REGS:
4579 case ALL_REGS:
4580 break;
4581
4582 default:
4583 gcc_unreachable ();
4584 }
4585
4586 return 2;
4587 }
4588
4589 /* Implement PREFERRED_RELOAD_CLASS. Place additional restrictions on CLASS
4590 to use when copying X into that class. */
4591
4592 enum reg_class
4593 ia64_preferred_reload_class (rtx x, enum reg_class class)
4594 {
4595 switch (class)
4596 {
4597 case FR_REGS:
4598 /* Don't allow volatile mem reloads into floating point registers.
4599 This is defined to force reload to choose the r/m case instead
4600 of the f/f case when reloading (set (reg fX) (mem/v)). */
4601 if (MEM_P (x) && MEM_VOLATILE_P (x))
4602 return NO_REGS;
4603
4604 /* Force all unrecognized constants into the constant pool. */
4605 if (CONSTANT_P (x))
4606 return NO_REGS;
4607 break;
4608
4609 case AR_M_REGS:
4610 case AR_I_REGS:
4611 if (!OBJECT_P (x))
4612 return NO_REGS;
4613 break;
4614
4615 default:
4616 break;
4617 }
4618
4619 return class;
4620 }
4621
4622 /* This function returns the register class required for a secondary
4623 register when copying between one of the registers in CLASS, and X,
4624 using MODE. A return value of NO_REGS means that no secondary register
4625 is required. */
4626
4627 enum reg_class
4628 ia64_secondary_reload_class (enum reg_class class,
4629 enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
4630 {
4631 int regno = -1;
4632
4633 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
4634 regno = true_regnum (x);
4635
4636 switch (class)
4637 {
4638 case BR_REGS:
4639 case AR_M_REGS:
4640 case AR_I_REGS:
4641 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
4642 interaction. We end up with two pseudos with overlapping lifetimes
4643 both of which are equiv to the same constant, and both which need
4644 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
4645 changes depending on the path length, which means the qty_first_reg
4646 check in make_regs_eqv can give different answers at different times.
4647 At some point I'll probably need a reload_indi pattern to handle
4648 this.
4649
4650 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
4651 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
4652 non-general registers for good measure. */
4653 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
4654 return GR_REGS;
4655
4656 /* This is needed if a pseudo used as a call_operand gets spilled to a
4657 stack slot. */
4658 if (GET_CODE (x) == MEM)
4659 return GR_REGS;
4660 break;
4661
4662 case FR_REGS:
4663 /* Need to go through general registers to get to other class regs. */
4664 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
4665 return GR_REGS;
4666
4667 /* This can happen when a paradoxical subreg is an operand to the
4668 muldi3 pattern. */
4669 /* ??? This shouldn't be necessary after instruction scheduling is
4670 enabled, because paradoxical subregs are not accepted by
4671 register_operand when INSN_SCHEDULING is defined. Or alternatively,
4672 stop the paradoxical subreg stupidity in the *_operand functions
4673 in recog.c. */
4674 if (GET_CODE (x) == MEM
4675 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
4676 || GET_MODE (x) == QImode))
4677 return GR_REGS;
4678
4679 /* This can happen because of the ior/and/etc patterns that accept FP
4680 registers as operands. If the third operand is a constant, then it
4681 needs to be reloaded into a FP register. */
4682 if (GET_CODE (x) == CONST_INT)
4683 return GR_REGS;
4684
4685 /* This can happen because of register elimination in a muldi3 insn.
4686 E.g. `26107 * (unsigned long)&u'. */
4687 if (GET_CODE (x) == PLUS)
4688 return GR_REGS;
4689 break;
4690
4691 case PR_REGS:
4692 /* ??? This happens if we cse/gcse a BImode value across a call,
4693 and the function has a nonlocal goto. This is because global
4694 does not allocate call crossing pseudos to hard registers when
4695 current_function_has_nonlocal_goto is true. This is relatively
4696 common for C++ programs that use exceptions. To reproduce,
4697 return NO_REGS and compile libstdc++. */
4698 if (GET_CODE (x) == MEM)
4699 return GR_REGS;
4700
4701 /* This can happen when we take a BImode subreg of a DImode value,
4702 and that DImode value winds up in some non-GR register. */
4703 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
4704 return GR_REGS;
4705 break;
4706
4707 default:
4708 break;
4709 }
4710
4711 return NO_REGS;
4712 }
4713
4714 \f
4715 /* Emit text to declare externally defined variables and functions, because
4716 the Intel assembler does not support undefined externals. */
4717
4718 void
4719 ia64_asm_output_external (FILE *file, tree decl, const char *name)
4720 {
4721 int save_referenced;
4722
4723 /* GNU as does not need anything here, but the HP linker does need
4724 something for external functions. */
4725
4726 if (TARGET_GNU_AS
4727 && (!TARGET_HPUX_LD
4728 || TREE_CODE (decl) != FUNCTION_DECL
4729 || strstr (name, "__builtin_") == name))
4730 return;
4731
4732 /* ??? The Intel assembler creates a reference that needs to be satisfied by
4733 the linker when we do this, so we need to be careful not to do this for
4734 builtin functions which have no library equivalent. Unfortunately, we
4735 can't tell here whether or not a function will actually be called by
4736 expand_expr, so we pull in library functions even if we may not need
4737 them later. */
4738 if (! strcmp (name, "__builtin_next_arg")
4739 || ! strcmp (name, "alloca")
4740 || ! strcmp (name, "__builtin_constant_p")
4741 || ! strcmp (name, "__builtin_args_info"))
4742 return;
4743
4744 if (TARGET_HPUX_LD)
4745 ia64_hpux_add_extern_decl (decl);
4746 else
4747 {
4748 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
4749 restore it. */
4750 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
4751 if (TREE_CODE (decl) == FUNCTION_DECL)
4752 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
4753 (*targetm.asm_out.globalize_label) (file, name);
4754 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
4755 }
4756 }
4757 \f
4758 /* Parse the -mfixed-range= option string. */
4759
4760 static void
4761 fix_range (const char *const_str)
4762 {
4763 int i, first, last;
4764 char *str, *dash, *comma;
4765
4766 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4767 REG2 are either register names or register numbers. The effect
4768 of this option is to mark the registers in the range from REG1 to
4769 REG2 as ``fixed'' so they won't be used by the compiler. This is
4770 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
4771
4772 i = strlen (const_str);
4773 str = (char *) alloca (i + 1);
4774 memcpy (str, const_str, i + 1);
4775
4776 while (1)
4777 {
4778 dash = strchr (str, '-');
4779 if (!dash)
4780 {
4781 warning (0, "value of -mfixed-range must have form REG1-REG2");
4782 return;
4783 }
4784 *dash = '\0';
4785
4786 comma = strchr (dash + 1, ',');
4787 if (comma)
4788 *comma = '\0';
4789
4790 first = decode_reg_name (str);
4791 if (first < 0)
4792 {
4793 warning (0, "unknown register name: %s", str);
4794 return;
4795 }
4796
4797 last = decode_reg_name (dash + 1);
4798 if (last < 0)
4799 {
4800 warning (0, "unknown register name: %s", dash + 1);
4801 return;
4802 }
4803
4804 *dash = '-';
4805
4806 if (first > last)
4807 {
4808 warning (0, "%s-%s is an empty range", str, dash + 1);
4809 return;
4810 }
4811
4812 for (i = first; i <= last; ++i)
4813 fixed_regs[i] = call_used_regs[i] = 1;
4814
4815 if (!comma)
4816 break;
4817
4818 *comma = ',';
4819 str = comma + 1;
4820 }
4821 }
4822
4823 /* Implement TARGET_HANDLE_OPTION. */
4824
4825 static bool
4826 ia64_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
4827 {
4828 switch (code)
4829 {
4830 case OPT_mfixed_range_:
4831 fix_range (arg);
4832 return true;
4833
4834 case OPT_mtls_size_:
4835 {
4836 char *end;
4837 unsigned long tmp = strtoul (arg, &end, 10);
4838 if (*end || (tmp != 14 && tmp != 22 && tmp != 64))
4839 error ("bad value %<%s%> for -mtls-size= switch", arg);
4840 else
4841 ia64_tls_size = tmp;
4842 return true;
4843 }
4844
4845 case OPT_mtune_:
4846 {
4847 static struct pta
4848 {
4849 const char *name; /* processor name or nickname. */
4850 enum processor_type processor;
4851 }
4852 const processor_alias_table[] =
4853 {
4854 {"itanium", PROCESSOR_ITANIUM},
4855 {"itanium1", PROCESSOR_ITANIUM},
4856 {"merced", PROCESSOR_ITANIUM},
4857 {"itanium2", PROCESSOR_ITANIUM2},
4858 {"mckinley", PROCESSOR_ITANIUM2},
4859 };
4860 int const pta_size = ARRAY_SIZE (processor_alias_table);
4861 int i;
4862
4863 for (i = 0; i < pta_size; i++)
4864 if (!strcmp (arg, processor_alias_table[i].name))
4865 {
4866 ia64_tune = processor_alias_table[i].processor;
4867 break;
4868 }
4869 if (i == pta_size)
4870 error ("bad value %<%s%> for -mtune= switch", arg);
4871 return true;
4872 }
4873
4874 default:
4875 return true;
4876 }
4877 }
4878
4879 /* Handle TARGET_OPTIONS switches. */
4880
4881 void
4882 ia64_override_options (void)
4883 {
4884 if (TARGET_AUTO_PIC)
4885 target_flags |= MASK_CONST_GP;
4886
4887 if (TARGET_INLINE_SQRT == INL_MIN_LAT)
4888 {
4889 warning (0, "not yet implemented: latency-optimized inline square root");
4890 TARGET_INLINE_SQRT = INL_MAX_THR;
4891 }
4892
4893 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
4894 flag_schedule_insns_after_reload = 0;
4895
4896 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
4897
4898 init_machine_status = ia64_init_machine_status;
4899 }
4900
4901 static struct machine_function *
4902 ia64_init_machine_status (void)
4903 {
4904 return ggc_alloc_cleared (sizeof (struct machine_function));
4905 }
4906 \f
4907 static enum attr_itanium_class ia64_safe_itanium_class (rtx);
4908 static enum attr_type ia64_safe_type (rtx);
4909
4910 static enum attr_itanium_class
4911 ia64_safe_itanium_class (rtx insn)
4912 {
4913 if (recog_memoized (insn) >= 0)
4914 return get_attr_itanium_class (insn);
4915 else
4916 return ITANIUM_CLASS_UNKNOWN;
4917 }
4918
4919 static enum attr_type
4920 ia64_safe_type (rtx insn)
4921 {
4922 if (recog_memoized (insn) >= 0)
4923 return get_attr_type (insn);
4924 else
4925 return TYPE_UNKNOWN;
4926 }
4927 \f
4928 /* The following collection of routines emit instruction group stop bits as
4929 necessary to avoid dependencies. */
4930
4931 /* Need to track some additional registers as far as serialization is
4932 concerned so we can properly handle br.call and br.ret. We could
4933 make these registers visible to gcc, but since these registers are
4934 never explicitly used in gcc generated code, it seems wasteful to
4935 do so (plus it would make the call and return patterns needlessly
4936 complex). */
4937 #define REG_RP (BR_REG (0))
4938 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
4939 /* This is used for volatile asms which may require a stop bit immediately
4940 before and after them. */
4941 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
4942 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
4943 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
4944
4945 /* For each register, we keep track of how it has been written in the
4946 current instruction group.
4947
4948 If a register is written unconditionally (no qualifying predicate),
4949 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4950
4951 If a register is written if its qualifying predicate P is true, we
4952 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
4953 may be written again by the complement of P (P^1) and when this happens,
4954 WRITE_COUNT gets set to 2.
4955
4956 The result of this is that whenever an insn attempts to write a register
4957 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
4958
4959 If a predicate register is written by a floating-point insn, we set
4960 WRITTEN_BY_FP to true.
4961
4962 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4963 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
4964
4965 struct reg_write_state
4966 {
4967 unsigned int write_count : 2;
4968 unsigned int first_pred : 16;
4969 unsigned int written_by_fp : 1;
4970 unsigned int written_by_and : 1;
4971 unsigned int written_by_or : 1;
4972 };
4973
4974 /* Cumulative info for the current instruction group. */
4975 struct reg_write_state rws_sum[NUM_REGS];
4976 /* Info for the current instruction. This gets copied to rws_sum after a
4977 stop bit is emitted. */
4978 struct reg_write_state rws_insn[NUM_REGS];
4979
4980 /* Indicates whether this is the first instruction after a stop bit,
4981 in which case we don't need another stop bit. Without this,
4982 ia64_variable_issue will die when scheduling an alloc. */
4983 static int first_instruction;
4984
4985 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4986 RTL for one instruction. */
4987 struct reg_flags
4988 {
4989 unsigned int is_write : 1; /* Is register being written? */
4990 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
4991 unsigned int is_branch : 1; /* Is register used as part of a branch? */
4992 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
4993 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
4994 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
4995 };
4996
4997 static void rws_update (struct reg_write_state *, int, struct reg_flags, int);
4998 static int rws_access_regno (int, struct reg_flags, int);
4999 static int rws_access_reg (rtx, struct reg_flags, int);
5000 static void update_set_flags (rtx, struct reg_flags *);
5001 static int set_src_needs_barrier (rtx, struct reg_flags, int);
5002 static int rtx_needs_barrier (rtx, struct reg_flags, int);
5003 static void init_insn_group_barriers (void);
5004 static int group_barrier_needed (rtx);
5005 static int safe_group_barrier_needed (rtx);
5006
5007 /* Update *RWS for REGNO, which is being written by the current instruction,
5008 with predicate PRED, and associated register flags in FLAGS. */
5009
5010 static void
5011 rws_update (struct reg_write_state *rws, int regno, struct reg_flags flags, int pred)
5012 {
5013 if (pred)
5014 rws[regno].write_count++;
5015 else
5016 rws[regno].write_count = 2;
5017 rws[regno].written_by_fp |= flags.is_fp;
5018 /* ??? Not tracking and/or across differing predicates. */
5019 rws[regno].written_by_and = flags.is_and;
5020 rws[regno].written_by_or = flags.is_or;
5021 rws[regno].first_pred = pred;
5022 }
5023
5024 /* Handle an access to register REGNO of type FLAGS using predicate register
5025 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
5026 a dependency with an earlier instruction in the same group. */
5027
5028 static int
5029 rws_access_regno (int regno, struct reg_flags flags, int pred)
5030 {
5031 int need_barrier = 0;
5032
5033 gcc_assert (regno < NUM_REGS);
5034
5035 if (! PR_REGNO_P (regno))
5036 flags.is_and = flags.is_or = 0;
5037
5038 if (flags.is_write)
5039 {
5040 int write_count;
5041
5042 /* One insn writes same reg multiple times? */
5043 gcc_assert (!rws_insn[regno].write_count);
5044
5045 /* Update info for current instruction. */
5046 rws_update (rws_insn, regno, flags, pred);
5047 write_count = rws_sum[regno].write_count;
5048
5049 switch (write_count)
5050 {
5051 case 0:
5052 /* The register has not been written yet. */
5053 rws_update (rws_sum, regno, flags, pred);
5054 break;
5055
5056 case 1:
5057 /* The register has been written via a predicate. If this is
5058 not a complementary predicate, then we need a barrier. */
5059 /* ??? This assumes that P and P+1 are always complementary
5060 predicates for P even. */
5061 if (flags.is_and && rws_sum[regno].written_by_and)
5062 ;
5063 else if (flags.is_or && rws_sum[regno].written_by_or)
5064 ;
5065 else if ((rws_sum[regno].first_pred ^ 1) != pred)
5066 need_barrier = 1;
5067 rws_update (rws_sum, regno, flags, pred);
5068 break;
5069
5070 case 2:
5071 /* The register has been unconditionally written already. We
5072 need a barrier. */
5073 if (flags.is_and && rws_sum[regno].written_by_and)
5074 ;
5075 else if (flags.is_or && rws_sum[regno].written_by_or)
5076 ;
5077 else
5078 need_barrier = 1;
5079 rws_sum[regno].written_by_and = flags.is_and;
5080 rws_sum[regno].written_by_or = flags.is_or;
5081 break;
5082
5083 default:
5084 gcc_unreachable ();
5085 }
5086 }
5087 else
5088 {
5089 if (flags.is_branch)
5090 {
5091 /* Branches have several RAW exceptions that allow to avoid
5092 barriers. */
5093
5094 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
5095 /* RAW dependencies on branch regs are permissible as long
5096 as the writer is a non-branch instruction. Since we
5097 never generate code that uses a branch register written
5098 by a branch instruction, handling this case is
5099 easy. */
5100 return 0;
5101
5102 if (REGNO_REG_CLASS (regno) == PR_REGS
5103 && ! rws_sum[regno].written_by_fp)
5104 /* The predicates of a branch are available within the
5105 same insn group as long as the predicate was written by
5106 something other than a floating-point instruction. */
5107 return 0;
5108 }
5109
5110 if (flags.is_and && rws_sum[regno].written_by_and)
5111 return 0;
5112 if (flags.is_or && rws_sum[regno].written_by_or)
5113 return 0;
5114
5115 switch (rws_sum[regno].write_count)
5116 {
5117 case 0:
5118 /* The register has not been written yet. */
5119 break;
5120
5121 case 1:
5122 /* The register has been written via a predicate. If this is
5123 not a complementary predicate, then we need a barrier. */
5124 /* ??? This assumes that P and P+1 are always complementary
5125 predicates for P even. */
5126 if ((rws_sum[regno].first_pred ^ 1) != pred)
5127 need_barrier = 1;
5128 break;
5129
5130 case 2:
5131 /* The register has been unconditionally written already. We
5132 need a barrier. */
5133 need_barrier = 1;
5134 break;
5135
5136 default:
5137 gcc_unreachable ();
5138 }
5139 }
5140
5141 return need_barrier;
5142 }
5143
5144 static int
5145 rws_access_reg (rtx reg, struct reg_flags flags, int pred)
5146 {
5147 int regno = REGNO (reg);
5148 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
5149
5150 if (n == 1)
5151 return rws_access_regno (regno, flags, pred);
5152 else
5153 {
5154 int need_barrier = 0;
5155 while (--n >= 0)
5156 need_barrier |= rws_access_regno (regno + n, flags, pred);
5157 return need_barrier;
5158 }
5159 }
5160
5161 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
5162 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
5163
5164 static void
5165 update_set_flags (rtx x, struct reg_flags *pflags)
5166 {
5167 rtx src = SET_SRC (x);
5168
5169 switch (GET_CODE (src))
5170 {
5171 case CALL:
5172 return;
5173
5174 case IF_THEN_ELSE:
5175 if (SET_DEST (x) == pc_rtx)
5176 /* X is a conditional branch. */
5177 return;
5178 else
5179 {
5180 /* X is a conditional move. */
5181 rtx cond = XEXP (src, 0);
5182 cond = XEXP (cond, 0);
5183
5184 /* We always split conditional moves into COND_EXEC patterns, so the
5185 only pattern that can reach here is doloop_end_internal. We don't
5186 need to do anything special for this pattern. */
5187 gcc_assert (GET_CODE (cond) == REG && REGNO (cond) == AR_LC_REGNUM);
5188 return;
5189 }
5190
5191 default:
5192 if (COMPARISON_P (src)
5193 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
5194 /* Set pflags->is_fp to 1 so that we know we're dealing
5195 with a floating point comparison when processing the
5196 destination of the SET. */
5197 pflags->is_fp = 1;
5198
5199 /* Discover if this is a parallel comparison. We only handle
5200 and.orcm and or.andcm at present, since we must retain a
5201 strict inverse on the predicate pair. */
5202 else if (GET_CODE (src) == AND)
5203 pflags->is_and = 1;
5204 else if (GET_CODE (src) == IOR)
5205 pflags->is_or = 1;
5206
5207 break;
5208 }
5209 }
5210
5211 /* Subroutine of rtx_needs_barrier; this function determines whether the
5212 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
5213 are as in rtx_needs_barrier. COND is an rtx that holds the condition
5214 for this insn. */
5215
5216 static int
5217 set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
5218 {
5219 int need_barrier = 0;
5220 rtx dst;
5221 rtx src = SET_SRC (x);
5222
5223 if (GET_CODE (src) == CALL)
5224 /* We don't need to worry about the result registers that
5225 get written by subroutine call. */
5226 return rtx_needs_barrier (src, flags, pred);
5227 else if (SET_DEST (x) == pc_rtx)
5228 {
5229 /* X is a conditional branch. */
5230 /* ??? This seems redundant, as the caller sets this bit for
5231 all JUMP_INSNs. */
5232 flags.is_branch = 1;
5233 return rtx_needs_barrier (src, flags, pred);
5234 }
5235
5236 need_barrier = rtx_needs_barrier (src, flags, pred);
5237
5238 dst = SET_DEST (x);
5239 if (GET_CODE (dst) == ZERO_EXTRACT)
5240 {
5241 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
5242 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
5243 dst = XEXP (dst, 0);
5244 }
5245 return need_barrier;
5246 }
5247
5248 /* Handle an access to rtx X of type FLAGS using predicate register
5249 PRED. Return 1 if this access creates a dependency with an earlier
5250 instruction in the same group. */
5251
5252 static int
5253 rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
5254 {
5255 int i, j;
5256 int is_complemented = 0;
5257 int need_barrier = 0;
5258 const char *format_ptr;
5259 struct reg_flags new_flags;
5260 rtx cond;
5261
5262 if (! x)
5263 return 0;
5264
5265 new_flags = flags;
5266
5267 switch (GET_CODE (x))
5268 {
5269 case SET:
5270 update_set_flags (x, &new_flags);
5271 need_barrier = set_src_needs_barrier (x, new_flags, pred);
5272 if (GET_CODE (SET_SRC (x)) != CALL)
5273 {
5274 new_flags.is_write = 1;
5275 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
5276 }
5277 break;
5278
5279 case CALL:
5280 new_flags.is_write = 0;
5281 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5282
5283 /* Avoid multiple register writes, in case this is a pattern with
5284 multiple CALL rtx. This avoids a failure in rws_access_reg. */
5285 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
5286 {
5287 new_flags.is_write = 1;
5288 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
5289 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
5290 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5291 }
5292 break;
5293
5294 case COND_EXEC:
5295 /* X is a predicated instruction. */
5296
5297 cond = COND_EXEC_TEST (x);
5298 gcc_assert (!pred);
5299 need_barrier = rtx_needs_barrier (cond, flags, 0);
5300
5301 if (GET_CODE (cond) == EQ)
5302 is_complemented = 1;
5303 cond = XEXP (cond, 0);
5304 gcc_assert (GET_CODE (cond) == REG
5305 && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
5306 pred = REGNO (cond);
5307 if (is_complemented)
5308 ++pred;
5309
5310 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
5311 return need_barrier;
5312
5313 case CLOBBER:
5314 case USE:
5315 /* Clobber & use are for earlier compiler-phases only. */
5316 break;
5317
5318 case ASM_OPERANDS:
5319 case ASM_INPUT:
5320 /* We always emit stop bits for traditional asms. We emit stop bits
5321 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
5322 if (GET_CODE (x) != ASM_OPERANDS
5323 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
5324 {
5325 /* Avoid writing the register multiple times if we have multiple
5326 asm outputs. This avoids a failure in rws_access_reg. */
5327 if (! rws_insn[REG_VOLATILE].write_count)
5328 {
5329 new_flags.is_write = 1;
5330 rws_access_regno (REG_VOLATILE, new_flags, pred);
5331 }
5332 return 1;
5333 }
5334
5335 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
5336 We cannot just fall through here since then we would be confused
5337 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
5338 traditional asms unlike their normal usage. */
5339
5340 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
5341 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
5342 need_barrier = 1;
5343 break;
5344
5345 case PARALLEL:
5346 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5347 {
5348 rtx pat = XVECEXP (x, 0, i);
5349 switch (GET_CODE (pat))
5350 {
5351 case SET:
5352 update_set_flags (pat, &new_flags);
5353 need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
5354 break;
5355
5356 case USE:
5357 case CALL:
5358 case ASM_OPERANDS:
5359 need_barrier |= rtx_needs_barrier (pat, flags, pred);
5360 break;
5361
5362 case CLOBBER:
5363 case RETURN:
5364 break;
5365
5366 default:
5367 gcc_unreachable ();
5368 }
5369 }
5370 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5371 {
5372 rtx pat = XVECEXP (x, 0, i);
5373 if (GET_CODE (pat) == SET)
5374 {
5375 if (GET_CODE (SET_SRC (pat)) != CALL)
5376 {
5377 new_flags.is_write = 1;
5378 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
5379 pred);
5380 }
5381 }
5382 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
5383 need_barrier |= rtx_needs_barrier (pat, flags, pred);
5384 }
5385 break;
5386
5387 case SUBREG:
5388 need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
5389 break;
5390 case REG:
5391 if (REGNO (x) == AR_UNAT_REGNUM)
5392 {
5393 for (i = 0; i < 64; ++i)
5394 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
5395 }
5396 else
5397 need_barrier = rws_access_reg (x, flags, pred);
5398 break;
5399
5400 case MEM:
5401 /* Find the regs used in memory address computation. */
5402 new_flags.is_write = 0;
5403 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5404 break;
5405
5406 case CONST_INT: case CONST_DOUBLE: case CONST_VECTOR:
5407 case SYMBOL_REF: case LABEL_REF: case CONST:
5408 break;
5409
5410 /* Operators with side-effects. */
5411 case POST_INC: case POST_DEC:
5412 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
5413
5414 new_flags.is_write = 0;
5415 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
5416 new_flags.is_write = 1;
5417 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5418 break;
5419
5420 case POST_MODIFY:
5421 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
5422
5423 new_flags.is_write = 0;
5424 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
5425 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5426 new_flags.is_write = 1;
5427 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5428 break;
5429
5430 /* Handle common unary and binary ops for efficiency. */
5431 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
5432 case MOD: case UDIV: case UMOD: case AND: case IOR:
5433 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
5434 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
5435 case NE: case EQ: case GE: case GT: case LE:
5436 case LT: case GEU: case GTU: case LEU: case LTU:
5437 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5438 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5439 break;
5440
5441 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
5442 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
5443 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
5444 case SQRT: case FFS: case POPCOUNT:
5445 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
5446 break;
5447
5448 case VEC_SELECT:
5449 /* VEC_SELECT's second argument is a PARALLEL with integers that
5450 describe the elements selected. On ia64, those integers are
5451 always constants. Avoid walking the PARALLEL so that we don't
5452 get confused with "normal" parallels and then die. */
5453 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
5454 break;
5455
5456 case UNSPEC:
5457 switch (XINT (x, 1))
5458 {
5459 case UNSPEC_LTOFF_DTPMOD:
5460 case UNSPEC_LTOFF_DTPREL:
5461 case UNSPEC_DTPREL:
5462 case UNSPEC_LTOFF_TPREL:
5463 case UNSPEC_TPREL:
5464 case UNSPEC_PRED_REL_MUTEX:
5465 case UNSPEC_PIC_CALL:
5466 case UNSPEC_MF:
5467 case UNSPEC_FETCHADD_ACQ:
5468 case UNSPEC_BSP_VALUE:
5469 case UNSPEC_FLUSHRS:
5470 case UNSPEC_BUNDLE_SELECTOR:
5471 break;
5472
5473 case UNSPEC_GR_SPILL:
5474 case UNSPEC_GR_RESTORE:
5475 {
5476 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
5477 HOST_WIDE_INT bit = (offset >> 3) & 63;
5478
5479 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5480 new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
5481 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
5482 new_flags, pred);
5483 break;
5484 }
5485
5486 case UNSPEC_FR_SPILL:
5487 case UNSPEC_FR_RESTORE:
5488 case UNSPEC_GETF_EXP:
5489 case UNSPEC_SETF_EXP:
5490 case UNSPEC_ADDP4:
5491 case UNSPEC_FR_SQRT_RECIP_APPROX:
5492 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5493 break;
5494
5495 case UNSPEC_FR_RECIP_APPROX:
5496 case UNSPEC_SHRP:
5497 case UNSPEC_COPYSIGN:
5498 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5499 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5500 break;
5501
5502 case UNSPEC_CMPXCHG_ACQ:
5503 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5504 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
5505 break;
5506
5507 default:
5508 gcc_unreachable ();
5509 }
5510 break;
5511
5512 case UNSPEC_VOLATILE:
5513 switch (XINT (x, 1))
5514 {
5515 case UNSPECV_ALLOC:
5516 /* Alloc must always be the first instruction of a group.
5517 We force this by always returning true. */
5518 /* ??? We might get better scheduling if we explicitly check for
5519 input/local/output register dependencies, and modify the
5520 scheduler so that alloc is always reordered to the start of
5521 the current group. We could then eliminate all of the
5522 first_instruction code. */
5523 rws_access_regno (AR_PFS_REGNUM, flags, pred);
5524
5525 new_flags.is_write = 1;
5526 rws_access_regno (REG_AR_CFM, new_flags, pred);
5527 return 1;
5528
5529 case UNSPECV_SET_BSP:
5530 need_barrier = 1;
5531 break;
5532
5533 case UNSPECV_BLOCKAGE:
5534 case UNSPECV_INSN_GROUP_BARRIER:
5535 case UNSPECV_BREAK:
5536 case UNSPECV_PSAC_ALL:
5537 case UNSPECV_PSAC_NORMAL:
5538 return 0;
5539
5540 default:
5541 gcc_unreachable ();
5542 }
5543 break;
5544
5545 case RETURN:
5546 new_flags.is_write = 0;
5547 need_barrier = rws_access_regno (REG_RP, flags, pred);
5548 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
5549
5550 new_flags.is_write = 1;
5551 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5552 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5553 break;
5554
5555 default:
5556 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
5557 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5558 switch (format_ptr[i])
5559 {
5560 case '0': /* unused field */
5561 case 'i': /* integer */
5562 case 'n': /* note */
5563 case 'w': /* wide integer */
5564 case 's': /* pointer to string */
5565 case 'S': /* optional pointer to string */
5566 break;
5567
5568 case 'e':
5569 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
5570 need_barrier = 1;
5571 break;
5572
5573 case 'E':
5574 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
5575 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
5576 need_barrier = 1;
5577 break;
5578
5579 default:
5580 gcc_unreachable ();
5581 }
5582 break;
5583 }
5584 return need_barrier;
5585 }
5586
5587 /* Clear out the state for group_barrier_needed at the start of a
5588 sequence of insns. */
5589
5590 static void
5591 init_insn_group_barriers (void)
5592 {
5593 memset (rws_sum, 0, sizeof (rws_sum));
5594 first_instruction = 1;
5595 }
5596
5597 /* Given the current state, determine whether a group barrier (a stop bit) is
5598 necessary before INSN. Return nonzero if so. This modifies the state to
5599 include the effects of INSN as a side-effect. */
5600
5601 static int
5602 group_barrier_needed (rtx insn)
5603 {
5604 rtx pat;
5605 int need_barrier = 0;
5606 struct reg_flags flags;
5607
5608 memset (&flags, 0, sizeof (flags));
5609 switch (GET_CODE (insn))
5610 {
5611 case NOTE:
5612 break;
5613
5614 case BARRIER:
5615 /* A barrier doesn't imply an instruction group boundary. */
5616 break;
5617
5618 case CODE_LABEL:
5619 memset (rws_insn, 0, sizeof (rws_insn));
5620 return 1;
5621
5622 case CALL_INSN:
5623 flags.is_branch = 1;
5624 flags.is_sibcall = SIBLING_CALL_P (insn);
5625 memset (rws_insn, 0, sizeof (rws_insn));
5626
5627 /* Don't bundle a call following another call. */
5628 if ((pat = prev_active_insn (insn))
5629 && GET_CODE (pat) == CALL_INSN)
5630 {
5631 need_barrier = 1;
5632 break;
5633 }
5634
5635 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
5636 break;
5637
5638 case JUMP_INSN:
5639 flags.is_branch = 1;
5640
5641 /* Don't bundle a jump following a call. */
5642 if ((pat = prev_active_insn (insn))
5643 && GET_CODE (pat) == CALL_INSN)
5644 {
5645 need_barrier = 1;
5646 break;
5647 }
5648 /* FALLTHRU */
5649
5650 case INSN:
5651 if (GET_CODE (PATTERN (insn)) == USE
5652 || GET_CODE (PATTERN (insn)) == CLOBBER)
5653 /* Don't care about USE and CLOBBER "insns"---those are used to
5654 indicate to the optimizer that it shouldn't get rid of
5655 certain operations. */
5656 break;
5657
5658 pat = PATTERN (insn);
5659
5660 /* Ug. Hack hacks hacked elsewhere. */
5661 switch (recog_memoized (insn))
5662 {
5663 /* We play dependency tricks with the epilogue in order
5664 to get proper schedules. Undo this for dv analysis. */
5665 case CODE_FOR_epilogue_deallocate_stack:
5666 case CODE_FOR_prologue_allocate_stack:
5667 pat = XVECEXP (pat, 0, 0);
5668 break;
5669
5670 /* The pattern we use for br.cloop confuses the code above.
5671 The second element of the vector is representative. */
5672 case CODE_FOR_doloop_end_internal:
5673 pat = XVECEXP (pat, 0, 1);
5674 break;
5675
5676 /* Doesn't generate code. */
5677 case CODE_FOR_pred_rel_mutex:
5678 case CODE_FOR_prologue_use:
5679 return 0;
5680
5681 default:
5682 break;
5683 }
5684
5685 memset (rws_insn, 0, sizeof (rws_insn));
5686 need_barrier = rtx_needs_barrier (pat, flags, 0);
5687
5688 /* Check to see if the previous instruction was a volatile
5689 asm. */
5690 if (! need_barrier)
5691 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
5692 break;
5693
5694 default:
5695 gcc_unreachable ();
5696 }
5697
5698 if (first_instruction && INSN_P (insn)
5699 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
5700 && GET_CODE (PATTERN (insn)) != USE
5701 && GET_CODE (PATTERN (insn)) != CLOBBER)
5702 {
5703 need_barrier = 0;
5704 first_instruction = 0;
5705 }
5706
5707 return need_barrier;
5708 }
5709
5710 /* Like group_barrier_needed, but do not clobber the current state. */
5711
5712 static int
5713 safe_group_barrier_needed (rtx insn)
5714 {
5715 struct reg_write_state rws_saved[NUM_REGS];
5716 int saved_first_instruction;
5717 int t;
5718
5719 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
5720 saved_first_instruction = first_instruction;
5721
5722 t = group_barrier_needed (insn);
5723
5724 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
5725 first_instruction = saved_first_instruction;
5726
5727 return t;
5728 }
5729
5730 /* Scan the current function and insert stop bits as necessary to
5731 eliminate dependencies. This function assumes that a final
5732 instruction scheduling pass has been run which has already
5733 inserted most of the necessary stop bits. This function only
5734 inserts new ones at basic block boundaries, since these are
5735 invisible to the scheduler. */
5736
5737 static void
5738 emit_insn_group_barriers (FILE *dump)
5739 {
5740 rtx insn;
5741 rtx last_label = 0;
5742 int insns_since_last_label = 0;
5743
5744 init_insn_group_barriers ();
5745
5746 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5747 {
5748 if (GET_CODE (insn) == CODE_LABEL)
5749 {
5750 if (insns_since_last_label)
5751 last_label = insn;
5752 insns_since_last_label = 0;
5753 }
5754 else if (GET_CODE (insn) == NOTE
5755 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
5756 {
5757 if (insns_since_last_label)
5758 last_label = insn;
5759 insns_since_last_label = 0;
5760 }
5761 else if (GET_CODE (insn) == INSN
5762 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
5763 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
5764 {
5765 init_insn_group_barriers ();
5766 last_label = 0;
5767 }
5768 else if (INSN_P (insn))
5769 {
5770 insns_since_last_label = 1;
5771
5772 if (group_barrier_needed (insn))
5773 {
5774 if (last_label)
5775 {
5776 if (dump)
5777 fprintf (dump, "Emitting stop before label %d\n",
5778 INSN_UID (last_label));
5779 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
5780 insn = last_label;
5781
5782 init_insn_group_barriers ();
5783 last_label = 0;
5784 }
5785 }
5786 }
5787 }
5788 }
5789
5790 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
5791 This function has to emit all necessary group barriers. */
5792
5793 static void
5794 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
5795 {
5796 rtx insn;
5797
5798 init_insn_group_barriers ();
5799
5800 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5801 {
5802 if (GET_CODE (insn) == BARRIER)
5803 {
5804 rtx last = prev_active_insn (insn);
5805
5806 if (! last)
5807 continue;
5808 if (GET_CODE (last) == JUMP_INSN
5809 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
5810 last = prev_active_insn (last);
5811 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
5812 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
5813
5814 init_insn_group_barriers ();
5815 }
5816 else if (INSN_P (insn))
5817 {
5818 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
5819 init_insn_group_barriers ();
5820 else if (group_barrier_needed (insn))
5821 {
5822 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5823 init_insn_group_barriers ();
5824 group_barrier_needed (insn);
5825 }
5826 }
5827 }
5828 }
5829
5830 \f
5831
5832 /* Instruction scheduling support. */
5833
5834 #define NR_BUNDLES 10
5835
5836 /* A list of names of all available bundles. */
5837
5838 static const char *bundle_name [NR_BUNDLES] =
5839 {
5840 ".mii",
5841 ".mmi",
5842 ".mfi",
5843 ".mmf",
5844 #if NR_BUNDLES == 10
5845 ".bbb",
5846 ".mbb",
5847 #endif
5848 ".mib",
5849 ".mmb",
5850 ".mfb",
5851 ".mlx"
5852 };
5853
5854 /* Nonzero if we should insert stop bits into the schedule. */
5855
5856 int ia64_final_schedule = 0;
5857
5858 /* Codes of the corresponding queried units: */
5859
5860 static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
5861 static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
5862
5863 static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
5864 static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
5865
5866 static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
5867
5868 /* The following variable value is an insn group barrier. */
5869
5870 static rtx dfa_stop_insn;
5871
5872 /* The following variable value is the last issued insn. */
5873
5874 static rtx last_scheduled_insn;
5875
5876 /* The following variable value is size of the DFA state. */
5877
5878 static size_t dfa_state_size;
5879
5880 /* The following variable value is pointer to a DFA state used as
5881 temporary variable. */
5882
5883 static state_t temp_dfa_state = NULL;
5884
5885 /* The following variable value is DFA state after issuing the last
5886 insn. */
5887
5888 static state_t prev_cycle_state = NULL;
5889
5890 /* The following array element values are TRUE if the corresponding
5891 insn requires to add stop bits before it. */
5892
5893 static char *stops_p;
5894
5895 /* The following variable is used to set up the mentioned above array. */
5896
5897 static int stop_before_p = 0;
5898
5899 /* The following variable value is length of the arrays `clocks' and
5900 `add_cycles'. */
5901
5902 static int clocks_length;
5903
5904 /* The following array element values are cycles on which the
5905 corresponding insn will be issued. The array is used only for
5906 Itanium1. */
5907
5908 static int *clocks;
5909
5910 /* The following array element values are numbers of cycles should be
5911 added to improve insn scheduling for MM_insns for Itanium1. */
5912
5913 static int *add_cycles;
5914
5915 static rtx ia64_single_set (rtx);
5916 static void ia64_emit_insn_before (rtx, rtx);
5917
5918 /* Map a bundle number to its pseudo-op. */
5919
5920 const char *
5921 get_bundle_name (int b)
5922 {
5923 return bundle_name[b];
5924 }
5925
5926
5927 /* Return the maximum number of instructions a cpu can issue. */
5928
5929 static int
5930 ia64_issue_rate (void)
5931 {
5932 return 6;
5933 }
5934
5935 /* Helper function - like single_set, but look inside COND_EXEC. */
5936
5937 static rtx
5938 ia64_single_set (rtx insn)
5939 {
5940 rtx x = PATTERN (insn), ret;
5941 if (GET_CODE (x) == COND_EXEC)
5942 x = COND_EXEC_CODE (x);
5943 if (GET_CODE (x) == SET)
5944 return x;
5945
5946 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
5947 Although they are not classical single set, the second set is there just
5948 to protect it from moving past FP-relative stack accesses. */
5949 switch (recog_memoized (insn))
5950 {
5951 case CODE_FOR_prologue_allocate_stack:
5952 case CODE_FOR_epilogue_deallocate_stack:
5953 ret = XVECEXP (x, 0, 0);
5954 break;
5955
5956 default:
5957 ret = single_set_2 (insn, x);
5958 break;
5959 }
5960
5961 return ret;
5962 }
5963
5964 /* Adjust the cost of a scheduling dependency. Return the new cost of
5965 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5966
5967 static int
5968 ia64_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
5969 {
5970 enum attr_itanium_class dep_class;
5971 enum attr_itanium_class insn_class;
5972
5973 if (REG_NOTE_KIND (link) != REG_DEP_OUTPUT)
5974 return cost;
5975
5976 insn_class = ia64_safe_itanium_class (insn);
5977 dep_class = ia64_safe_itanium_class (dep_insn);
5978 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
5979 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
5980 return 0;
5981
5982 return cost;
5983 }
5984
5985 /* Like emit_insn_before, but skip cycle_display notes.
5986 ??? When cycle display notes are implemented, update this. */
5987
5988 static void
5989 ia64_emit_insn_before (rtx insn, rtx before)
5990 {
5991 emit_insn_before (insn, before);
5992 }
5993
5994 /* The following function marks insns who produce addresses for load
5995 and store insns. Such insns will be placed into M slots because it
5996 decrease latency time for Itanium1 (see function
5997 `ia64_produce_address_p' and the DFA descriptions). */
5998
5999 static void
6000 ia64_dependencies_evaluation_hook (rtx head, rtx tail)
6001 {
6002 rtx insn, link, next, next_tail;
6003
6004 /* Before reload, which_alternative is not set, which means that
6005 ia64_safe_itanium_class will produce wrong results for (at least)
6006 move instructions. */
6007 if (!reload_completed)
6008 return;
6009
6010 next_tail = NEXT_INSN (tail);
6011 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6012 if (INSN_P (insn))
6013 insn->call = 0;
6014 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6015 if (INSN_P (insn)
6016 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
6017 {
6018 for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1))
6019 {
6020 if (REG_NOTE_KIND (link) != REG_DEP_TRUE)
6021 continue;
6022 next = XEXP (link, 0);
6023 if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_ST
6024 || ia64_safe_itanium_class (next) == ITANIUM_CLASS_STF)
6025 && ia64_st_address_bypass_p (insn, next))
6026 break;
6027 else if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_LD
6028 || ia64_safe_itanium_class (next)
6029 == ITANIUM_CLASS_FLD)
6030 && ia64_ld_address_bypass_p (insn, next))
6031 break;
6032 }
6033 insn->call = link != 0;
6034 }
6035 }
6036
6037 /* We're beginning a new block. Initialize data structures as necessary. */
6038
6039 static void
6040 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
6041 int sched_verbose ATTRIBUTE_UNUSED,
6042 int max_ready ATTRIBUTE_UNUSED)
6043 {
6044 #ifdef ENABLE_CHECKING
6045 rtx insn;
6046
6047 if (reload_completed)
6048 for (insn = NEXT_INSN (current_sched_info->prev_head);
6049 insn != current_sched_info->next_tail;
6050 insn = NEXT_INSN (insn))
6051 gcc_assert (!SCHED_GROUP_P (insn));
6052 #endif
6053 last_scheduled_insn = NULL_RTX;
6054 init_insn_group_barriers ();
6055 }
6056
6057 /* We are about to being issuing insns for this clock cycle.
6058 Override the default sort algorithm to better slot instructions. */
6059
6060 static int
6061 ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
6062 int *pn_ready, int clock_var ATTRIBUTE_UNUSED,
6063 int reorder_type)
6064 {
6065 int n_asms;
6066 int n_ready = *pn_ready;
6067 rtx *e_ready = ready + n_ready;
6068 rtx *insnp;
6069
6070 if (sched_verbose)
6071 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
6072
6073 if (reorder_type == 0)
6074 {
6075 /* First, move all USEs, CLOBBERs and other crud out of the way. */
6076 n_asms = 0;
6077 for (insnp = ready; insnp < e_ready; insnp++)
6078 if (insnp < e_ready)
6079 {
6080 rtx insn = *insnp;
6081 enum attr_type t = ia64_safe_type (insn);
6082 if (t == TYPE_UNKNOWN)
6083 {
6084 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6085 || asm_noperands (PATTERN (insn)) >= 0)
6086 {
6087 rtx lowest = ready[n_asms];
6088 ready[n_asms] = insn;
6089 *insnp = lowest;
6090 n_asms++;
6091 }
6092 else
6093 {
6094 rtx highest = ready[n_ready - 1];
6095 ready[n_ready - 1] = insn;
6096 *insnp = highest;
6097 return 1;
6098 }
6099 }
6100 }
6101
6102 if (n_asms < n_ready)
6103 {
6104 /* Some normal insns to process. Skip the asms. */
6105 ready += n_asms;
6106 n_ready -= n_asms;
6107 }
6108 else if (n_ready > 0)
6109 return 1;
6110 }
6111
6112 if (ia64_final_schedule)
6113 {
6114 int deleted = 0;
6115 int nr_need_stop = 0;
6116
6117 for (insnp = ready; insnp < e_ready; insnp++)
6118 if (safe_group_barrier_needed (*insnp))
6119 nr_need_stop++;
6120
6121 if (reorder_type == 1 && n_ready == nr_need_stop)
6122 return 0;
6123 if (reorder_type == 0)
6124 return 1;
6125 insnp = e_ready;
6126 /* Move down everything that needs a stop bit, preserving
6127 relative order. */
6128 while (insnp-- > ready + deleted)
6129 while (insnp >= ready + deleted)
6130 {
6131 rtx insn = *insnp;
6132 if (! safe_group_barrier_needed (insn))
6133 break;
6134 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6135 *ready = insn;
6136 deleted++;
6137 }
6138 n_ready -= deleted;
6139 ready += deleted;
6140 }
6141
6142 return 1;
6143 }
6144
6145 /* We are about to being issuing insns for this clock cycle. Override
6146 the default sort algorithm to better slot instructions. */
6147
6148 static int
6149 ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
6150 int clock_var)
6151 {
6152 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
6153 pn_ready, clock_var, 0);
6154 }
6155
6156 /* Like ia64_sched_reorder, but called after issuing each insn.
6157 Override the default sort algorithm to better slot instructions. */
6158
6159 static int
6160 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
6161 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
6162 int *pn_ready, int clock_var)
6163 {
6164 if (ia64_tune == PROCESSOR_ITANIUM && reload_completed && last_scheduled_insn)
6165 clocks [INSN_UID (last_scheduled_insn)] = clock_var;
6166 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
6167 clock_var, 1);
6168 }
6169
6170 /* We are about to issue INSN. Return the number of insns left on the
6171 ready queue that can be issued this cycle. */
6172
6173 static int
6174 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
6175 int sched_verbose ATTRIBUTE_UNUSED,
6176 rtx insn ATTRIBUTE_UNUSED,
6177 int can_issue_more ATTRIBUTE_UNUSED)
6178 {
6179 last_scheduled_insn = insn;
6180 memcpy (prev_cycle_state, curr_state, dfa_state_size);
6181 if (reload_completed)
6182 {
6183 int needed = group_barrier_needed (insn);
6184
6185 gcc_assert (!needed);
6186 if (GET_CODE (insn) == CALL_INSN)
6187 init_insn_group_barriers ();
6188 stops_p [INSN_UID (insn)] = stop_before_p;
6189 stop_before_p = 0;
6190 }
6191 return 1;
6192 }
6193
6194 /* We are choosing insn from the ready queue. Return nonzero if INSN
6195 can be chosen. */
6196
6197 static int
6198 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
6199 {
6200 gcc_assert (insn && INSN_P (insn));
6201 return (!reload_completed
6202 || !safe_group_barrier_needed (insn));
6203 }
6204
6205 /* The following variable value is pseudo-insn used by the DFA insn
6206 scheduler to change the DFA state when the simulated clock is
6207 increased. */
6208
6209 static rtx dfa_pre_cycle_insn;
6210
6211 /* We are about to being issuing INSN. Return nonzero if we cannot
6212 issue it on given cycle CLOCK and return zero if we should not sort
6213 the ready queue on the next clock start. */
6214
6215 static int
6216 ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
6217 int clock, int *sort_p)
6218 {
6219 int setup_clocks_p = FALSE;
6220
6221 gcc_assert (insn && INSN_P (insn));
6222 if ((reload_completed && safe_group_barrier_needed (insn))
6223 || (last_scheduled_insn
6224 && (GET_CODE (last_scheduled_insn) == CALL_INSN
6225 || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6226 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
6227 {
6228 init_insn_group_barriers ();
6229 if (verbose && dump)
6230 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
6231 last_clock == clock ? " + cycle advance" : "");
6232 stop_before_p = 1;
6233 if (last_clock == clock)
6234 {
6235 state_transition (curr_state, dfa_stop_insn);
6236 if (TARGET_EARLY_STOP_BITS)
6237 *sort_p = (last_scheduled_insn == NULL_RTX
6238 || GET_CODE (last_scheduled_insn) != CALL_INSN);
6239 else
6240 *sort_p = 0;
6241 return 1;
6242 }
6243 else if (reload_completed)
6244 setup_clocks_p = TRUE;
6245 if (GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6246 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)
6247 state_reset (curr_state);
6248 else
6249 {
6250 memcpy (curr_state, prev_cycle_state, dfa_state_size);
6251 state_transition (curr_state, dfa_stop_insn);
6252 state_transition (curr_state, dfa_pre_cycle_insn);
6253 state_transition (curr_state, NULL);
6254 }
6255 }
6256 else if (reload_completed)
6257 setup_clocks_p = TRUE;
6258 if (setup_clocks_p && ia64_tune == PROCESSOR_ITANIUM
6259 && GET_CODE (PATTERN (insn)) != ASM_INPUT
6260 && asm_noperands (PATTERN (insn)) < 0)
6261 {
6262 enum attr_itanium_class c = ia64_safe_itanium_class (insn);
6263
6264 if (c != ITANIUM_CLASS_MMMUL && c != ITANIUM_CLASS_MMSHF)
6265 {
6266 rtx link;
6267 int d = -1;
6268
6269 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6270 if (REG_NOTE_KIND (link) == 0)
6271 {
6272 enum attr_itanium_class dep_class;
6273 rtx dep_insn = XEXP (link, 0);
6274
6275 dep_class = ia64_safe_itanium_class (dep_insn);
6276 if ((dep_class == ITANIUM_CLASS_MMMUL
6277 || dep_class == ITANIUM_CLASS_MMSHF)
6278 && last_clock - clocks [INSN_UID (dep_insn)] < 4
6279 && (d < 0
6280 || last_clock - clocks [INSN_UID (dep_insn)] < d))
6281 d = last_clock - clocks [INSN_UID (dep_insn)];
6282 }
6283 if (d >= 0)
6284 add_cycles [INSN_UID (insn)] = 3 - d;
6285 }
6286 }
6287 return 0;
6288 }
6289
6290 \f
6291
6292 /* The following page contains abstract data `bundle states' which are
6293 used for bundling insns (inserting nops and template generation). */
6294
6295 /* The following describes state of insn bundling. */
6296
6297 struct bundle_state
6298 {
6299 /* Unique bundle state number to identify them in the debugging
6300 output */
6301 int unique_num;
6302 rtx insn; /* corresponding insn, NULL for the 1st and the last state */
6303 /* number nops before and after the insn */
6304 short before_nops_num, after_nops_num;
6305 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
6306 insn */
6307 int cost; /* cost of the state in cycles */
6308 int accumulated_insns_num; /* number of all previous insns including
6309 nops. L is considered as 2 insns */
6310 int branch_deviation; /* deviation of previous branches from 3rd slots */
6311 struct bundle_state *next; /* next state with the same insn_num */
6312 struct bundle_state *originator; /* originator (previous insn state) */
6313 /* All bundle states are in the following chain. */
6314 struct bundle_state *allocated_states_chain;
6315 /* The DFA State after issuing the insn and the nops. */
6316 state_t dfa_state;
6317 };
6318
6319 /* The following is map insn number to the corresponding bundle state. */
6320
6321 static struct bundle_state **index_to_bundle_states;
6322
6323 /* The unique number of next bundle state. */
6324
6325 static int bundle_states_num;
6326
6327 /* All allocated bundle states are in the following chain. */
6328
6329 static struct bundle_state *allocated_bundle_states_chain;
6330
6331 /* All allocated but not used bundle states are in the following
6332 chain. */
6333
6334 static struct bundle_state *free_bundle_state_chain;
6335
6336
6337 /* The following function returns a free bundle state. */
6338
6339 static struct bundle_state *
6340 get_free_bundle_state (void)
6341 {
6342 struct bundle_state *result;
6343
6344 if (free_bundle_state_chain != NULL)
6345 {
6346 result = free_bundle_state_chain;
6347 free_bundle_state_chain = result->next;
6348 }
6349 else
6350 {
6351 result = xmalloc (sizeof (struct bundle_state));
6352 result->dfa_state = xmalloc (dfa_state_size);
6353 result->allocated_states_chain = allocated_bundle_states_chain;
6354 allocated_bundle_states_chain = result;
6355 }
6356 result->unique_num = bundle_states_num++;
6357 return result;
6358
6359 }
6360
6361 /* The following function frees given bundle state. */
6362
6363 static void
6364 free_bundle_state (struct bundle_state *state)
6365 {
6366 state->next = free_bundle_state_chain;
6367 free_bundle_state_chain = state;
6368 }
6369
6370 /* Start work with abstract data `bundle states'. */
6371
6372 static void
6373 initiate_bundle_states (void)
6374 {
6375 bundle_states_num = 0;
6376 free_bundle_state_chain = NULL;
6377 allocated_bundle_states_chain = NULL;
6378 }
6379
6380 /* Finish work with abstract data `bundle states'. */
6381
6382 static void
6383 finish_bundle_states (void)
6384 {
6385 struct bundle_state *curr_state, *next_state;
6386
6387 for (curr_state = allocated_bundle_states_chain;
6388 curr_state != NULL;
6389 curr_state = next_state)
6390 {
6391 next_state = curr_state->allocated_states_chain;
6392 free (curr_state->dfa_state);
6393 free (curr_state);
6394 }
6395 }
6396
6397 /* Hash table of the bundle states. The key is dfa_state and insn_num
6398 of the bundle states. */
6399
6400 static htab_t bundle_state_table;
6401
6402 /* The function returns hash of BUNDLE_STATE. */
6403
6404 static unsigned
6405 bundle_state_hash (const void *bundle_state)
6406 {
6407 const struct bundle_state *state = (struct bundle_state *) bundle_state;
6408 unsigned result, i;
6409
6410 for (result = i = 0; i < dfa_state_size; i++)
6411 result += (((unsigned char *) state->dfa_state) [i]
6412 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
6413 return result + state->insn_num;
6414 }
6415
6416 /* The function returns nonzero if the bundle state keys are equal. */
6417
6418 static int
6419 bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
6420 {
6421 const struct bundle_state * state1 = (struct bundle_state *) bundle_state_1;
6422 const struct bundle_state * state2 = (struct bundle_state *) bundle_state_2;
6423
6424 return (state1->insn_num == state2->insn_num
6425 && memcmp (state1->dfa_state, state2->dfa_state,
6426 dfa_state_size) == 0);
6427 }
6428
6429 /* The function inserts the BUNDLE_STATE into the hash table. The
6430 function returns nonzero if the bundle has been inserted into the
6431 table. The table contains the best bundle state with given key. */
6432
6433 static int
6434 insert_bundle_state (struct bundle_state *bundle_state)
6435 {
6436 void **entry_ptr;
6437
6438 entry_ptr = htab_find_slot (bundle_state_table, bundle_state, 1);
6439 if (*entry_ptr == NULL)
6440 {
6441 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
6442 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
6443 *entry_ptr = (void *) bundle_state;
6444 return TRUE;
6445 }
6446 else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
6447 || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
6448 && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
6449 > bundle_state->accumulated_insns_num
6450 || (((struct bundle_state *)
6451 *entry_ptr)->accumulated_insns_num
6452 == bundle_state->accumulated_insns_num
6453 && ((struct bundle_state *)
6454 *entry_ptr)->branch_deviation
6455 > bundle_state->branch_deviation))))
6456
6457 {
6458 struct bundle_state temp;
6459
6460 temp = *(struct bundle_state *) *entry_ptr;
6461 *(struct bundle_state *) *entry_ptr = *bundle_state;
6462 ((struct bundle_state *) *entry_ptr)->next = temp.next;
6463 *bundle_state = temp;
6464 }
6465 return FALSE;
6466 }
6467
6468 /* Start work with the hash table. */
6469
6470 static void
6471 initiate_bundle_state_table (void)
6472 {
6473 bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
6474 (htab_del) 0);
6475 }
6476
6477 /* Finish work with the hash table. */
6478
6479 static void
6480 finish_bundle_state_table (void)
6481 {
6482 htab_delete (bundle_state_table);
6483 }
6484
6485 \f
6486
6487 /* The following variable is a insn `nop' used to check bundle states
6488 with different number of inserted nops. */
6489
6490 static rtx ia64_nop;
6491
6492 /* The following function tries to issue NOPS_NUM nops for the current
6493 state without advancing processor cycle. If it failed, the
6494 function returns FALSE and frees the current state. */
6495
6496 static int
6497 try_issue_nops (struct bundle_state *curr_state, int nops_num)
6498 {
6499 int i;
6500
6501 for (i = 0; i < nops_num; i++)
6502 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
6503 {
6504 free_bundle_state (curr_state);
6505 return FALSE;
6506 }
6507 return TRUE;
6508 }
6509
6510 /* The following function tries to issue INSN for the current
6511 state without advancing processor cycle. If it failed, the
6512 function returns FALSE and frees the current state. */
6513
6514 static int
6515 try_issue_insn (struct bundle_state *curr_state, rtx insn)
6516 {
6517 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
6518 {
6519 free_bundle_state (curr_state);
6520 return FALSE;
6521 }
6522 return TRUE;
6523 }
6524
6525 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
6526 starting with ORIGINATOR without advancing processor cycle. If
6527 TRY_BUNDLE_END_P is TRUE, the function also/only (if
6528 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
6529 If it was successful, the function creates new bundle state and
6530 insert into the hash table and into `index_to_bundle_states'. */
6531
6532 static void
6533 issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
6534 rtx insn, int try_bundle_end_p, int only_bundle_end_p)
6535 {
6536 struct bundle_state *curr_state;
6537
6538 curr_state = get_free_bundle_state ();
6539 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
6540 curr_state->insn = insn;
6541 curr_state->insn_num = originator->insn_num + 1;
6542 curr_state->cost = originator->cost;
6543 curr_state->originator = originator;
6544 curr_state->before_nops_num = before_nops_num;
6545 curr_state->after_nops_num = 0;
6546 curr_state->accumulated_insns_num
6547 = originator->accumulated_insns_num + before_nops_num;
6548 curr_state->branch_deviation = originator->branch_deviation;
6549 gcc_assert (insn);
6550 if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
6551 {
6552 gcc_assert (GET_MODE (insn) != TImode);
6553 if (!try_issue_nops (curr_state, before_nops_num))
6554 return;
6555 if (!try_issue_insn (curr_state, insn))
6556 return;
6557 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
6558 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
6559 && curr_state->accumulated_insns_num % 3 != 0)
6560 {
6561 free_bundle_state (curr_state);
6562 return;
6563 }
6564 }
6565 else if (GET_MODE (insn) != TImode)
6566 {
6567 if (!try_issue_nops (curr_state, before_nops_num))
6568 return;
6569 if (!try_issue_insn (curr_state, insn))
6570 return;
6571 curr_state->accumulated_insns_num++;
6572 gcc_assert (GET_CODE (PATTERN (insn)) != ASM_INPUT
6573 && asm_noperands (PATTERN (insn)) < 0);
6574
6575 if (ia64_safe_type (insn) == TYPE_L)
6576 curr_state->accumulated_insns_num++;
6577 }
6578 else
6579 {
6580 /* If this is an insn that must be first in a group, then don't allow
6581 nops to be emitted before it. Currently, alloc is the only such
6582 supported instruction. */
6583 /* ??? The bundling automatons should handle this for us, but they do
6584 not yet have support for the first_insn attribute. */
6585 if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
6586 {
6587 free_bundle_state (curr_state);
6588 return;
6589 }
6590
6591 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
6592 state_transition (curr_state->dfa_state, NULL);
6593 curr_state->cost++;
6594 if (!try_issue_nops (curr_state, before_nops_num))
6595 return;
6596 if (!try_issue_insn (curr_state, insn))
6597 return;
6598 curr_state->accumulated_insns_num++;
6599 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6600 || asm_noperands (PATTERN (insn)) >= 0)
6601 {
6602 /* Finish bundle containing asm insn. */
6603 curr_state->after_nops_num
6604 = 3 - curr_state->accumulated_insns_num % 3;
6605 curr_state->accumulated_insns_num
6606 += 3 - curr_state->accumulated_insns_num % 3;
6607 }
6608 else if (ia64_safe_type (insn) == TYPE_L)
6609 curr_state->accumulated_insns_num++;
6610 }
6611 if (ia64_safe_type (insn) == TYPE_B)
6612 curr_state->branch_deviation
6613 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
6614 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
6615 {
6616 if (!only_bundle_end_p && insert_bundle_state (curr_state))
6617 {
6618 state_t dfa_state;
6619 struct bundle_state *curr_state1;
6620 struct bundle_state *allocated_states_chain;
6621
6622 curr_state1 = get_free_bundle_state ();
6623 dfa_state = curr_state1->dfa_state;
6624 allocated_states_chain = curr_state1->allocated_states_chain;
6625 *curr_state1 = *curr_state;
6626 curr_state1->dfa_state = dfa_state;
6627 curr_state1->allocated_states_chain = allocated_states_chain;
6628 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
6629 dfa_state_size);
6630 curr_state = curr_state1;
6631 }
6632 if (!try_issue_nops (curr_state,
6633 3 - curr_state->accumulated_insns_num % 3))
6634 return;
6635 curr_state->after_nops_num
6636 = 3 - curr_state->accumulated_insns_num % 3;
6637 curr_state->accumulated_insns_num
6638 += 3 - curr_state->accumulated_insns_num % 3;
6639 }
6640 if (!insert_bundle_state (curr_state))
6641 free_bundle_state (curr_state);
6642 return;
6643 }
6644
6645 /* The following function returns position in the two window bundle
6646 for given STATE. */
6647
6648 static int
6649 get_max_pos (state_t state)
6650 {
6651 if (cpu_unit_reservation_p (state, pos_6))
6652 return 6;
6653 else if (cpu_unit_reservation_p (state, pos_5))
6654 return 5;
6655 else if (cpu_unit_reservation_p (state, pos_4))
6656 return 4;
6657 else if (cpu_unit_reservation_p (state, pos_3))
6658 return 3;
6659 else if (cpu_unit_reservation_p (state, pos_2))
6660 return 2;
6661 else if (cpu_unit_reservation_p (state, pos_1))
6662 return 1;
6663 else
6664 return 0;
6665 }
6666
6667 /* The function returns code of a possible template for given position
6668 and state. The function should be called only with 2 values of
6669 position equal to 3 or 6. We avoid generating F NOPs by putting
6670 templates containing F insns at the end of the template search
6671 because undocumented anomaly in McKinley derived cores which can
6672 cause stalls if an F-unit insn (including a NOP) is issued within a
6673 six-cycle window after reading certain application registers (such
6674 as ar.bsp). Furthermore, power-considerations also argue against
6675 the use of F-unit instructions unless they're really needed. */
6676
6677 static int
6678 get_template (state_t state, int pos)
6679 {
6680 switch (pos)
6681 {
6682 case 3:
6683 if (cpu_unit_reservation_p (state, _0mmi_))
6684 return 1;
6685 else if (cpu_unit_reservation_p (state, _0mii_))
6686 return 0;
6687 else if (cpu_unit_reservation_p (state, _0mmb_))
6688 return 7;
6689 else if (cpu_unit_reservation_p (state, _0mib_))
6690 return 6;
6691 else if (cpu_unit_reservation_p (state, _0mbb_))
6692 return 5;
6693 else if (cpu_unit_reservation_p (state, _0bbb_))
6694 return 4;
6695 else if (cpu_unit_reservation_p (state, _0mmf_))
6696 return 3;
6697 else if (cpu_unit_reservation_p (state, _0mfi_))
6698 return 2;
6699 else if (cpu_unit_reservation_p (state, _0mfb_))
6700 return 8;
6701 else if (cpu_unit_reservation_p (state, _0mlx_))
6702 return 9;
6703 else
6704 gcc_unreachable ();
6705 case 6:
6706 if (cpu_unit_reservation_p (state, _1mmi_))
6707 return 1;
6708 else if (cpu_unit_reservation_p (state, _1mii_))
6709 return 0;
6710 else if (cpu_unit_reservation_p (state, _1mmb_))
6711 return 7;
6712 else if (cpu_unit_reservation_p (state, _1mib_))
6713 return 6;
6714 else if (cpu_unit_reservation_p (state, _1mbb_))
6715 return 5;
6716 else if (cpu_unit_reservation_p (state, _1bbb_))
6717 return 4;
6718 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
6719 return 3;
6720 else if (cpu_unit_reservation_p (state, _1mfi_))
6721 return 2;
6722 else if (cpu_unit_reservation_p (state, _1mfb_))
6723 return 8;
6724 else if (cpu_unit_reservation_p (state, _1mlx_))
6725 return 9;
6726 else
6727 gcc_unreachable ();
6728 default:
6729 gcc_unreachable ();
6730 }
6731 }
6732
6733 /* The following function returns an insn important for insn bundling
6734 followed by INSN and before TAIL. */
6735
6736 static rtx
6737 get_next_important_insn (rtx insn, rtx tail)
6738 {
6739 for (; insn && insn != tail; insn = NEXT_INSN (insn))
6740 if (INSN_P (insn)
6741 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6742 && GET_CODE (PATTERN (insn)) != USE
6743 && GET_CODE (PATTERN (insn)) != CLOBBER)
6744 return insn;
6745 return NULL_RTX;
6746 }
6747
6748 /* The following function does insn bundling. Bundling means
6749 inserting templates and nop insns to fit insn groups into permitted
6750 templates. Instruction scheduling uses NDFA (non-deterministic
6751 finite automata) encoding informations about the templates and the
6752 inserted nops. Nondeterminism of the automata permits follows
6753 all possible insn sequences very fast.
6754
6755 Unfortunately it is not possible to get information about inserting
6756 nop insns and used templates from the automata states. The
6757 automata only says that we can issue an insn possibly inserting
6758 some nops before it and using some template. Therefore insn
6759 bundling in this function is implemented by using DFA
6760 (deterministic finite automata). We follows all possible insn
6761 sequences by inserting 0-2 nops (that is what the NDFA describe for
6762 insn scheduling) before/after each insn being bundled. We know the
6763 start of simulated processor cycle from insn scheduling (insn
6764 starting a new cycle has TImode).
6765
6766 Simple implementation of insn bundling would create enormous
6767 number of possible insn sequences satisfying information about new
6768 cycle ticks taken from the insn scheduling. To make the algorithm
6769 practical we use dynamic programming. Each decision (about
6770 inserting nops and implicitly about previous decisions) is described
6771 by structure bundle_state (see above). If we generate the same
6772 bundle state (key is automaton state after issuing the insns and
6773 nops for it), we reuse already generated one. As consequence we
6774 reject some decisions which cannot improve the solution and
6775 reduce memory for the algorithm.
6776
6777 When we reach the end of EBB (extended basic block), we choose the
6778 best sequence and then, moving back in EBB, insert templates for
6779 the best alternative. The templates are taken from querying
6780 automaton state for each insn in chosen bundle states.
6781
6782 So the algorithm makes two (forward and backward) passes through
6783 EBB. There is an additional forward pass through EBB for Itanium1
6784 processor. This pass inserts more nops to make dependency between
6785 a producer insn and MMMUL/MMSHF at least 4 cycles long. */
6786
6787 static void
6788 bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
6789 {
6790 struct bundle_state *curr_state, *next_state, *best_state;
6791 rtx insn, next_insn;
6792 int insn_num;
6793 int i, bundle_end_p, only_bundle_end_p, asm_p;
6794 int pos = 0, max_pos, template0, template1;
6795 rtx b;
6796 rtx nop;
6797 enum attr_type type;
6798
6799 insn_num = 0;
6800 /* Count insns in the EBB. */
6801 for (insn = NEXT_INSN (prev_head_insn);
6802 insn && insn != tail;
6803 insn = NEXT_INSN (insn))
6804 if (INSN_P (insn))
6805 insn_num++;
6806 if (insn_num == 0)
6807 return;
6808 bundling_p = 1;
6809 dfa_clean_insn_cache ();
6810 initiate_bundle_state_table ();
6811 index_to_bundle_states = xmalloc ((insn_num + 2)
6812 * sizeof (struct bundle_state *));
6813 /* First (forward) pass -- generation of bundle states. */
6814 curr_state = get_free_bundle_state ();
6815 curr_state->insn = NULL;
6816 curr_state->before_nops_num = 0;
6817 curr_state->after_nops_num = 0;
6818 curr_state->insn_num = 0;
6819 curr_state->cost = 0;
6820 curr_state->accumulated_insns_num = 0;
6821 curr_state->branch_deviation = 0;
6822 curr_state->next = NULL;
6823 curr_state->originator = NULL;
6824 state_reset (curr_state->dfa_state);
6825 index_to_bundle_states [0] = curr_state;
6826 insn_num = 0;
6827 /* Shift cycle mark if it is put on insn which could be ignored. */
6828 for (insn = NEXT_INSN (prev_head_insn);
6829 insn != tail;
6830 insn = NEXT_INSN (insn))
6831 if (INSN_P (insn)
6832 && (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6833 || GET_CODE (PATTERN (insn)) == USE
6834 || GET_CODE (PATTERN (insn)) == CLOBBER)
6835 && GET_MODE (insn) == TImode)
6836 {
6837 PUT_MODE (insn, VOIDmode);
6838 for (next_insn = NEXT_INSN (insn);
6839 next_insn != tail;
6840 next_insn = NEXT_INSN (next_insn))
6841 if (INSN_P (next_insn)
6842 && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
6843 && GET_CODE (PATTERN (next_insn)) != USE
6844 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
6845 {
6846 PUT_MODE (next_insn, TImode);
6847 break;
6848 }
6849 }
6850 /* Froward pass: generation of bundle states. */
6851 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
6852 insn != NULL_RTX;
6853 insn = next_insn)
6854 {
6855 gcc_assert (INSN_P (insn)
6856 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6857 && GET_CODE (PATTERN (insn)) != USE
6858 && GET_CODE (PATTERN (insn)) != CLOBBER);
6859 type = ia64_safe_type (insn);
6860 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
6861 insn_num++;
6862 index_to_bundle_states [insn_num] = NULL;
6863 for (curr_state = index_to_bundle_states [insn_num - 1];
6864 curr_state != NULL;
6865 curr_state = next_state)
6866 {
6867 pos = curr_state->accumulated_insns_num % 3;
6868 next_state = curr_state->next;
6869 /* We must fill up the current bundle in order to start a
6870 subsequent asm insn in a new bundle. Asm insn is always
6871 placed in a separate bundle. */
6872 only_bundle_end_p
6873 = (next_insn != NULL_RTX
6874 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
6875 && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
6876 /* We may fill up the current bundle if it is the cycle end
6877 without a group barrier. */
6878 bundle_end_p
6879 = (only_bundle_end_p || next_insn == NULL_RTX
6880 || (GET_MODE (next_insn) == TImode
6881 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
6882 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
6883 || type == TYPE_S
6884 /* We need to insert 2 nops for cases like M_MII. To
6885 guarantee issuing all insns on the same cycle for
6886 Itanium 1, we need to issue 2 nops after the first M
6887 insn (MnnMII where n is a nop insn). */
6888 || ((type == TYPE_M || type == TYPE_A)
6889 && ia64_tune == PROCESSOR_ITANIUM
6890 && !bundle_end_p && pos == 1))
6891 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
6892 only_bundle_end_p);
6893 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
6894 only_bundle_end_p);
6895 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
6896 only_bundle_end_p);
6897 }
6898 gcc_assert (index_to_bundle_states [insn_num]);
6899 for (curr_state = index_to_bundle_states [insn_num];
6900 curr_state != NULL;
6901 curr_state = curr_state->next)
6902 if (verbose >= 2 && dump)
6903 {
6904 /* This structure is taken from generated code of the
6905 pipeline hazard recognizer (see file insn-attrtab.c).
6906 Please don't forget to change the structure if a new
6907 automaton is added to .md file. */
6908 struct DFA_chip
6909 {
6910 unsigned short one_automaton_state;
6911 unsigned short oneb_automaton_state;
6912 unsigned short two_automaton_state;
6913 unsigned short twob_automaton_state;
6914 };
6915
6916 fprintf
6917 (dump,
6918 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6919 curr_state->unique_num,
6920 (curr_state->originator == NULL
6921 ? -1 : curr_state->originator->unique_num),
6922 curr_state->cost,
6923 curr_state->before_nops_num, curr_state->after_nops_num,
6924 curr_state->accumulated_insns_num, curr_state->branch_deviation,
6925 (ia64_tune == PROCESSOR_ITANIUM
6926 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
6927 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
6928 INSN_UID (insn));
6929 }
6930 }
6931
6932 /* We should find a solution because the 2nd insn scheduling has
6933 found one. */
6934 gcc_assert (index_to_bundle_states [insn_num]);
6935 /* Find a state corresponding to the best insn sequence. */
6936 best_state = NULL;
6937 for (curr_state = index_to_bundle_states [insn_num];
6938 curr_state != NULL;
6939 curr_state = curr_state->next)
6940 /* We are just looking at the states with fully filled up last
6941 bundle. The first we prefer insn sequences with minimal cost
6942 then with minimal inserted nops and finally with branch insns
6943 placed in the 3rd slots. */
6944 if (curr_state->accumulated_insns_num % 3 == 0
6945 && (best_state == NULL || best_state->cost > curr_state->cost
6946 || (best_state->cost == curr_state->cost
6947 && (curr_state->accumulated_insns_num
6948 < best_state->accumulated_insns_num
6949 || (curr_state->accumulated_insns_num
6950 == best_state->accumulated_insns_num
6951 && curr_state->branch_deviation
6952 < best_state->branch_deviation)))))
6953 best_state = curr_state;
6954 /* Second (backward) pass: adding nops and templates. */
6955 insn_num = best_state->before_nops_num;
6956 template0 = template1 = -1;
6957 for (curr_state = best_state;
6958 curr_state->originator != NULL;
6959 curr_state = curr_state->originator)
6960 {
6961 insn = curr_state->insn;
6962 asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
6963 || asm_noperands (PATTERN (insn)) >= 0);
6964 insn_num++;
6965 if (verbose >= 2 && dump)
6966 {
6967 struct DFA_chip
6968 {
6969 unsigned short one_automaton_state;
6970 unsigned short oneb_automaton_state;
6971 unsigned short two_automaton_state;
6972 unsigned short twob_automaton_state;
6973 };
6974
6975 fprintf
6976 (dump,
6977 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6978 curr_state->unique_num,
6979 (curr_state->originator == NULL
6980 ? -1 : curr_state->originator->unique_num),
6981 curr_state->cost,
6982 curr_state->before_nops_num, curr_state->after_nops_num,
6983 curr_state->accumulated_insns_num, curr_state->branch_deviation,
6984 (ia64_tune == PROCESSOR_ITANIUM
6985 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
6986 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
6987 INSN_UID (insn));
6988 }
6989 /* Find the position in the current bundle window. The window can
6990 contain at most two bundles. Two bundle window means that
6991 the processor will make two bundle rotation. */
6992 max_pos = get_max_pos (curr_state->dfa_state);
6993 if (max_pos == 6
6994 /* The following (negative template number) means that the
6995 processor did one bundle rotation. */
6996 || (max_pos == 3 && template0 < 0))
6997 {
6998 /* We are at the end of the window -- find template(s) for
6999 its bundle(s). */
7000 pos = max_pos;
7001 if (max_pos == 3)
7002 template0 = get_template (curr_state->dfa_state, 3);
7003 else
7004 {
7005 template1 = get_template (curr_state->dfa_state, 3);
7006 template0 = get_template (curr_state->dfa_state, 6);
7007 }
7008 }
7009 if (max_pos > 3 && template1 < 0)
7010 /* It may happen when we have the stop inside a bundle. */
7011 {
7012 gcc_assert (pos <= 3);
7013 template1 = get_template (curr_state->dfa_state, 3);
7014 pos += 3;
7015 }
7016 if (!asm_p)
7017 /* Emit nops after the current insn. */
7018 for (i = 0; i < curr_state->after_nops_num; i++)
7019 {
7020 nop = gen_nop ();
7021 emit_insn_after (nop, insn);
7022 pos--;
7023 gcc_assert (pos >= 0);
7024 if (pos % 3 == 0)
7025 {
7026 /* We are at the start of a bundle: emit the template
7027 (it should be defined). */
7028 gcc_assert (template0 >= 0);
7029 b = gen_bundle_selector (GEN_INT (template0));
7030 ia64_emit_insn_before (b, nop);
7031 /* If we have two bundle window, we make one bundle
7032 rotation. Otherwise template0 will be undefined
7033 (negative value). */
7034 template0 = template1;
7035 template1 = -1;
7036 }
7037 }
7038 /* Move the position backward in the window. Group barrier has
7039 no slot. Asm insn takes all bundle. */
7040 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
7041 && GET_CODE (PATTERN (insn)) != ASM_INPUT
7042 && asm_noperands (PATTERN (insn)) < 0)
7043 pos--;
7044 /* Long insn takes 2 slots. */
7045 if (ia64_safe_type (insn) == TYPE_L)
7046 pos--;
7047 gcc_assert (pos >= 0);
7048 if (pos % 3 == 0
7049 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
7050 && GET_CODE (PATTERN (insn)) != ASM_INPUT
7051 && asm_noperands (PATTERN (insn)) < 0)
7052 {
7053 /* The current insn is at the bundle start: emit the
7054 template. */
7055 gcc_assert (template0 >= 0);
7056 b = gen_bundle_selector (GEN_INT (template0));
7057 ia64_emit_insn_before (b, insn);
7058 b = PREV_INSN (insn);
7059 insn = b;
7060 /* See comment above in analogous place for emitting nops
7061 after the insn. */
7062 template0 = template1;
7063 template1 = -1;
7064 }
7065 /* Emit nops after the current insn. */
7066 for (i = 0; i < curr_state->before_nops_num; i++)
7067 {
7068 nop = gen_nop ();
7069 ia64_emit_insn_before (nop, insn);
7070 nop = PREV_INSN (insn);
7071 insn = nop;
7072 pos--;
7073 gcc_assert (pos >= 0);
7074 if (pos % 3 == 0)
7075 {
7076 /* See comment above in analogous place for emitting nops
7077 after the insn. */
7078 gcc_assert (template0 >= 0);
7079 b = gen_bundle_selector (GEN_INT (template0));
7080 ia64_emit_insn_before (b, insn);
7081 b = PREV_INSN (insn);
7082 insn = b;
7083 template0 = template1;
7084 template1 = -1;
7085 }
7086 }
7087 }
7088 if (ia64_tune == PROCESSOR_ITANIUM)
7089 /* Insert additional cycles for MM-insns (MMMUL and MMSHF).
7090 Itanium1 has a strange design, if the distance between an insn
7091 and dependent MM-insn is less 4 then we have a 6 additional
7092 cycles stall. So we make the distance equal to 4 cycles if it
7093 is less. */
7094 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
7095 insn != NULL_RTX;
7096 insn = next_insn)
7097 {
7098 gcc_assert (INSN_P (insn)
7099 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
7100 && GET_CODE (PATTERN (insn)) != USE
7101 && GET_CODE (PATTERN (insn)) != CLOBBER);
7102 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
7103 if (INSN_UID (insn) < clocks_length && add_cycles [INSN_UID (insn)])
7104 /* We found a MM-insn which needs additional cycles. */
7105 {
7106 rtx last;
7107 int i, j, n;
7108 int pred_stop_p;
7109
7110 /* Now we are searching for a template of the bundle in
7111 which the MM-insn is placed and the position of the
7112 insn in the bundle (0, 1, 2). Also we are searching
7113 for that there is a stop before the insn. */
7114 last = prev_active_insn (insn);
7115 pred_stop_p = recog_memoized (last) == CODE_FOR_insn_group_barrier;
7116 if (pred_stop_p)
7117 last = prev_active_insn (last);
7118 n = 0;
7119 for (;; last = prev_active_insn (last))
7120 if (recog_memoized (last) == CODE_FOR_bundle_selector)
7121 {
7122 template0 = XINT (XVECEXP (PATTERN (last), 0, 0), 0);
7123 if (template0 == 9)
7124 /* The insn is in MLX bundle. Change the template
7125 onto MFI because we will add nops before the
7126 insn. It simplifies subsequent code a lot. */
7127 PATTERN (last)
7128 = gen_bundle_selector (const2_rtx); /* -> MFI */
7129 break;
7130 }
7131 else if (recog_memoized (last) != CODE_FOR_insn_group_barrier
7132 && (ia64_safe_itanium_class (last)
7133 != ITANIUM_CLASS_IGNORE))
7134 n++;
7135 /* Some check of correctness: the stop is not at the
7136 bundle start, there are no more 3 insns in the bundle,
7137 and the MM-insn is not at the start of bundle with
7138 template MLX. */
7139 gcc_assert ((!pred_stop_p || n)
7140 && n <= 2
7141 && (template0 != 9 || !n));
7142 /* Put nops after the insn in the bundle. */
7143 for (j = 3 - n; j > 0; j --)
7144 ia64_emit_insn_before (gen_nop (), insn);
7145 /* It takes into account that we will add more N nops
7146 before the insn lately -- please see code below. */
7147 add_cycles [INSN_UID (insn)]--;
7148 if (!pred_stop_p || add_cycles [INSN_UID (insn)])
7149 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7150 insn);
7151 if (pred_stop_p)
7152 add_cycles [INSN_UID (insn)]--;
7153 for (i = add_cycles [INSN_UID (insn)]; i > 0; i--)
7154 {
7155 /* Insert "MII;" template. */
7156 ia64_emit_insn_before (gen_bundle_selector (const0_rtx),
7157 insn);
7158 ia64_emit_insn_before (gen_nop (), insn);
7159 ia64_emit_insn_before (gen_nop (), insn);
7160 if (i > 1)
7161 {
7162 /* To decrease code size, we use "MI;I;"
7163 template. */
7164 ia64_emit_insn_before
7165 (gen_insn_group_barrier (GEN_INT (3)), insn);
7166 i--;
7167 }
7168 ia64_emit_insn_before (gen_nop (), insn);
7169 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7170 insn);
7171 }
7172 /* Put the MM-insn in the same slot of a bundle with the
7173 same template as the original one. */
7174 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (template0)),
7175 insn);
7176 /* To put the insn in the same slot, add necessary number
7177 of nops. */
7178 for (j = n; j > 0; j --)
7179 ia64_emit_insn_before (gen_nop (), insn);
7180 /* Put the stop if the original bundle had it. */
7181 if (pred_stop_p)
7182 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7183 insn);
7184 }
7185 }
7186 free (index_to_bundle_states);
7187 finish_bundle_state_table ();
7188 bundling_p = 0;
7189 dfa_clean_insn_cache ();
7190 }
7191
7192 /* The following function is called at the end of scheduling BB or
7193 EBB. After reload, it inserts stop bits and does insn bundling. */
7194
7195 static void
7196 ia64_sched_finish (FILE *dump, int sched_verbose)
7197 {
7198 if (sched_verbose)
7199 fprintf (dump, "// Finishing schedule.\n");
7200 if (!reload_completed)
7201 return;
7202 if (reload_completed)
7203 {
7204 final_emit_insn_group_barriers (dump);
7205 bundling (dump, sched_verbose, current_sched_info->prev_head,
7206 current_sched_info->next_tail);
7207 if (sched_verbose && dump)
7208 fprintf (dump, "// finishing %d-%d\n",
7209 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
7210 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
7211
7212 return;
7213 }
7214 }
7215
7216 /* The following function inserts stop bits in scheduled BB or EBB. */
7217
7218 static void
7219 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
7220 {
7221 rtx insn;
7222 int need_barrier_p = 0;
7223 rtx prev_insn = NULL_RTX;
7224
7225 init_insn_group_barriers ();
7226
7227 for (insn = NEXT_INSN (current_sched_info->prev_head);
7228 insn != current_sched_info->next_tail;
7229 insn = NEXT_INSN (insn))
7230 {
7231 if (GET_CODE (insn) == BARRIER)
7232 {
7233 rtx last = prev_active_insn (insn);
7234
7235 if (! last)
7236 continue;
7237 if (GET_CODE (last) == JUMP_INSN
7238 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
7239 last = prev_active_insn (last);
7240 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
7241 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
7242
7243 init_insn_group_barriers ();
7244 need_barrier_p = 0;
7245 prev_insn = NULL_RTX;
7246 }
7247 else if (INSN_P (insn))
7248 {
7249 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
7250 {
7251 init_insn_group_barriers ();
7252 need_barrier_p = 0;
7253 prev_insn = NULL_RTX;
7254 }
7255 else if (need_barrier_p || group_barrier_needed (insn))
7256 {
7257 if (TARGET_EARLY_STOP_BITS)
7258 {
7259 rtx last;
7260
7261 for (last = insn;
7262 last != current_sched_info->prev_head;
7263 last = PREV_INSN (last))
7264 if (INSN_P (last) && GET_MODE (last) == TImode
7265 && stops_p [INSN_UID (last)])
7266 break;
7267 if (last == current_sched_info->prev_head)
7268 last = insn;
7269 last = prev_active_insn (last);
7270 if (last
7271 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
7272 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
7273 last);
7274 init_insn_group_barriers ();
7275 for (last = NEXT_INSN (last);
7276 last != insn;
7277 last = NEXT_INSN (last))
7278 if (INSN_P (last))
7279 group_barrier_needed (last);
7280 }
7281 else
7282 {
7283 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7284 insn);
7285 init_insn_group_barriers ();
7286 }
7287 group_barrier_needed (insn);
7288 prev_insn = NULL_RTX;
7289 }
7290 else if (recog_memoized (insn) >= 0)
7291 prev_insn = insn;
7292 need_barrier_p = (GET_CODE (insn) == CALL_INSN
7293 || GET_CODE (PATTERN (insn)) == ASM_INPUT
7294 || asm_noperands (PATTERN (insn)) >= 0);
7295 }
7296 }
7297 }
7298
7299 \f
7300
7301 /* If the following function returns TRUE, we will use the the DFA
7302 insn scheduler. */
7303
7304 static int
7305 ia64_first_cycle_multipass_dfa_lookahead (void)
7306 {
7307 return (reload_completed ? 6 : 4);
7308 }
7309
7310 /* The following function initiates variable `dfa_pre_cycle_insn'. */
7311
7312 static void
7313 ia64_init_dfa_pre_cycle_insn (void)
7314 {
7315 if (temp_dfa_state == NULL)
7316 {
7317 dfa_state_size = state_size ();
7318 temp_dfa_state = xmalloc (dfa_state_size);
7319 prev_cycle_state = xmalloc (dfa_state_size);
7320 }
7321 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
7322 PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
7323 recog_memoized (dfa_pre_cycle_insn);
7324 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
7325 PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
7326 recog_memoized (dfa_stop_insn);
7327 }
7328
7329 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
7330 used by the DFA insn scheduler. */
7331
7332 static rtx
7333 ia64_dfa_pre_cycle_insn (void)
7334 {
7335 return dfa_pre_cycle_insn;
7336 }
7337
7338 /* The following function returns TRUE if PRODUCER (of type ilog or
7339 ld) produces address for CONSUMER (of type st or stf). */
7340
7341 int
7342 ia64_st_address_bypass_p (rtx producer, rtx consumer)
7343 {
7344 rtx dest, reg, mem;
7345
7346 gcc_assert (producer && consumer);
7347 dest = ia64_single_set (producer);
7348 gcc_assert (dest);
7349 reg = SET_DEST (dest);
7350 gcc_assert (reg);
7351 if (GET_CODE (reg) == SUBREG)
7352 reg = SUBREG_REG (reg);
7353 gcc_assert (GET_CODE (reg) == REG);
7354
7355 dest = ia64_single_set (consumer);
7356 gcc_assert (dest);
7357 mem = SET_DEST (dest);
7358 gcc_assert (mem && GET_CODE (mem) == MEM);
7359 return reg_mentioned_p (reg, mem);
7360 }
7361
7362 /* The following function returns TRUE if PRODUCER (of type ilog or
7363 ld) produces address for CONSUMER (of type ld or fld). */
7364
7365 int
7366 ia64_ld_address_bypass_p (rtx producer, rtx consumer)
7367 {
7368 rtx dest, src, reg, mem;
7369
7370 gcc_assert (producer && consumer);
7371 dest = ia64_single_set (producer);
7372 gcc_assert (dest);
7373 reg = SET_DEST (dest);
7374 gcc_assert (reg);
7375 if (GET_CODE (reg) == SUBREG)
7376 reg = SUBREG_REG (reg);
7377 gcc_assert (GET_CODE (reg) == REG);
7378
7379 src = ia64_single_set (consumer);
7380 gcc_assert (src);
7381 mem = SET_SRC (src);
7382 gcc_assert (mem);
7383 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
7384 mem = XVECEXP (mem, 0, 0);
7385 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
7386 mem = XEXP (mem, 0);
7387
7388 /* Note that LO_SUM is used for GOT loads. */
7389 gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
7390
7391 return reg_mentioned_p (reg, mem);
7392 }
7393
7394 /* The following function returns TRUE if INSN produces address for a
7395 load/store insn. We will place such insns into M slot because it
7396 decreases its latency time. */
7397
7398 int
7399 ia64_produce_address_p (rtx insn)
7400 {
7401 return insn->call;
7402 }
7403
7404 \f
7405 /* Emit pseudo-ops for the assembler to describe predicate relations.
7406 At present this assumes that we only consider predicate pairs to
7407 be mutex, and that the assembler can deduce proper values from
7408 straight-line code. */
7409
7410 static void
7411 emit_predicate_relation_info (void)
7412 {
7413 basic_block bb;
7414
7415 FOR_EACH_BB_REVERSE (bb)
7416 {
7417 int r;
7418 rtx head = BB_HEAD (bb);
7419
7420 /* We only need such notes at code labels. */
7421 if (GET_CODE (head) != CODE_LABEL)
7422 continue;
7423 if (GET_CODE (NEXT_INSN (head)) == NOTE
7424 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
7425 head = NEXT_INSN (head);
7426
7427 for (r = PR_REG (0); r < PR_REG (64); r += 2)
7428 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
7429 {
7430 rtx p = gen_rtx_REG (BImode, r);
7431 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
7432 if (head == BB_END (bb))
7433 BB_END (bb) = n;
7434 head = n;
7435 }
7436 }
7437
7438 /* Look for conditional calls that do not return, and protect predicate
7439 relations around them. Otherwise the assembler will assume the call
7440 returns, and complain about uses of call-clobbered predicates after
7441 the call. */
7442 FOR_EACH_BB_REVERSE (bb)
7443 {
7444 rtx insn = BB_HEAD (bb);
7445
7446 while (1)
7447 {
7448 if (GET_CODE (insn) == CALL_INSN
7449 && GET_CODE (PATTERN (insn)) == COND_EXEC
7450 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
7451 {
7452 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
7453 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
7454 if (BB_HEAD (bb) == insn)
7455 BB_HEAD (bb) = b;
7456 if (BB_END (bb) == insn)
7457 BB_END (bb) = a;
7458 }
7459
7460 if (insn == BB_END (bb))
7461 break;
7462 insn = NEXT_INSN (insn);
7463 }
7464 }
7465 }
7466
7467 /* Perform machine dependent operations on the rtl chain INSNS. */
7468
7469 static void
7470 ia64_reorg (void)
7471 {
7472 /* We are freeing block_for_insn in the toplev to keep compatibility
7473 with old MDEP_REORGS that are not CFG based. Recompute it now. */
7474 compute_bb_for_insn ();
7475
7476 /* If optimizing, we'll have split before scheduling. */
7477 if (optimize == 0)
7478 split_all_insns (0);
7479
7480 /* ??? update_life_info_in_dirty_blocks fails to terminate during
7481 non-optimizing bootstrap. */
7482 update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, PROP_DEATH_NOTES);
7483
7484 if (ia64_flag_schedule_insns2)
7485 {
7486 timevar_push (TV_SCHED2);
7487 ia64_final_schedule = 1;
7488
7489 initiate_bundle_states ();
7490 ia64_nop = make_insn_raw (gen_nop ());
7491 PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
7492 recog_memoized (ia64_nop);
7493 clocks_length = get_max_uid () + 1;
7494 stops_p = xcalloc (1, clocks_length);
7495 if (ia64_tune == PROCESSOR_ITANIUM)
7496 {
7497 clocks = xcalloc (clocks_length, sizeof (int));
7498 add_cycles = xcalloc (clocks_length, sizeof (int));
7499 }
7500 if (ia64_tune == PROCESSOR_ITANIUM2)
7501 {
7502 pos_1 = get_cpu_unit_code ("2_1");
7503 pos_2 = get_cpu_unit_code ("2_2");
7504 pos_3 = get_cpu_unit_code ("2_3");
7505 pos_4 = get_cpu_unit_code ("2_4");
7506 pos_5 = get_cpu_unit_code ("2_5");
7507 pos_6 = get_cpu_unit_code ("2_6");
7508 _0mii_ = get_cpu_unit_code ("2b_0mii.");
7509 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
7510 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
7511 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
7512 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
7513 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
7514 _0mib_ = get_cpu_unit_code ("2b_0mib.");
7515 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
7516 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
7517 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
7518 _1mii_ = get_cpu_unit_code ("2b_1mii.");
7519 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
7520 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
7521 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
7522 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
7523 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
7524 _1mib_ = get_cpu_unit_code ("2b_1mib.");
7525 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
7526 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
7527 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
7528 }
7529 else
7530 {
7531 pos_1 = get_cpu_unit_code ("1_1");
7532 pos_2 = get_cpu_unit_code ("1_2");
7533 pos_3 = get_cpu_unit_code ("1_3");
7534 pos_4 = get_cpu_unit_code ("1_4");
7535 pos_5 = get_cpu_unit_code ("1_5");
7536 pos_6 = get_cpu_unit_code ("1_6");
7537 _0mii_ = get_cpu_unit_code ("1b_0mii.");
7538 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
7539 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
7540 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
7541 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
7542 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
7543 _0mib_ = get_cpu_unit_code ("1b_0mib.");
7544 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
7545 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
7546 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
7547 _1mii_ = get_cpu_unit_code ("1b_1mii.");
7548 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
7549 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
7550 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
7551 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
7552 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
7553 _1mib_ = get_cpu_unit_code ("1b_1mib.");
7554 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
7555 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
7556 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
7557 }
7558 schedule_ebbs (dump_file);
7559 finish_bundle_states ();
7560 if (ia64_tune == PROCESSOR_ITANIUM)
7561 {
7562 free (add_cycles);
7563 free (clocks);
7564 }
7565 free (stops_p);
7566 emit_insn_group_barriers (dump_file);
7567
7568 ia64_final_schedule = 0;
7569 timevar_pop (TV_SCHED2);
7570 }
7571 else
7572 emit_all_insn_group_barriers (dump_file);
7573
7574 /* A call must not be the last instruction in a function, so that the
7575 return address is still within the function, so that unwinding works
7576 properly. Note that IA-64 differs from dwarf2 on this point. */
7577 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7578 {
7579 rtx insn;
7580 int saw_stop = 0;
7581
7582 insn = get_last_insn ();
7583 if (! INSN_P (insn))
7584 insn = prev_active_insn (insn);
7585 /* Skip over insns that expand to nothing. */
7586 while (GET_CODE (insn) == INSN && get_attr_empty (insn) == EMPTY_YES)
7587 {
7588 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
7589 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
7590 saw_stop = 1;
7591 insn = prev_active_insn (insn);
7592 }
7593 if (GET_CODE (insn) == CALL_INSN)
7594 {
7595 if (! saw_stop)
7596 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7597 emit_insn (gen_break_f ());
7598 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7599 }
7600 }
7601
7602 emit_predicate_relation_info ();
7603
7604 if (ia64_flag_var_tracking)
7605 {
7606 timevar_push (TV_VAR_TRACKING);
7607 variable_tracking_main ();
7608 timevar_pop (TV_VAR_TRACKING);
7609 }
7610 }
7611 \f
7612 /* Return true if REGNO is used by the epilogue. */
7613
7614 int
7615 ia64_epilogue_uses (int regno)
7616 {
7617 switch (regno)
7618 {
7619 case R_GR (1):
7620 /* With a call to a function in another module, we will write a new
7621 value to "gp". After returning from such a call, we need to make
7622 sure the function restores the original gp-value, even if the
7623 function itself does not use the gp anymore. */
7624 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
7625
7626 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
7627 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
7628 /* For functions defined with the syscall_linkage attribute, all
7629 input registers are marked as live at all function exits. This
7630 prevents the register allocator from using the input registers,
7631 which in turn makes it possible to restart a system call after
7632 an interrupt without having to save/restore the input registers.
7633 This also prevents kernel data from leaking to application code. */
7634 return lookup_attribute ("syscall_linkage",
7635 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
7636
7637 case R_BR (0):
7638 /* Conditional return patterns can't represent the use of `b0' as
7639 the return address, so we force the value live this way. */
7640 return 1;
7641
7642 case AR_PFS_REGNUM:
7643 /* Likewise for ar.pfs, which is used by br.ret. */
7644 return 1;
7645
7646 default:
7647 return 0;
7648 }
7649 }
7650
7651 /* Return true if REGNO is used by the frame unwinder. */
7652
7653 int
7654 ia64_eh_uses (int regno)
7655 {
7656 if (! reload_completed)
7657 return 0;
7658
7659 if (current_frame_info.reg_save_b0
7660 && regno == current_frame_info.reg_save_b0)
7661 return 1;
7662 if (current_frame_info.reg_save_pr
7663 && regno == current_frame_info.reg_save_pr)
7664 return 1;
7665 if (current_frame_info.reg_save_ar_pfs
7666 && regno == current_frame_info.reg_save_ar_pfs)
7667 return 1;
7668 if (current_frame_info.reg_save_ar_unat
7669 && regno == current_frame_info.reg_save_ar_unat)
7670 return 1;
7671 if (current_frame_info.reg_save_ar_lc
7672 && regno == current_frame_info.reg_save_ar_lc)
7673 return 1;
7674
7675 return 0;
7676 }
7677 \f
7678 /* Return true if this goes in small data/bss. */
7679
7680 /* ??? We could also support own long data here. Generating movl/add/ld8
7681 instead of addl,ld8/ld8. This makes the code bigger, but should make the
7682 code faster because there is one less load. This also includes incomplete
7683 types which can't go in sdata/sbss. */
7684
7685 static bool
7686 ia64_in_small_data_p (tree exp)
7687 {
7688 if (TARGET_NO_SDATA)
7689 return false;
7690
7691 /* We want to merge strings, so we never consider them small data. */
7692 if (TREE_CODE (exp) == STRING_CST)
7693 return false;
7694
7695 /* Functions are never small data. */
7696 if (TREE_CODE (exp) == FUNCTION_DECL)
7697 return false;
7698
7699 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
7700 {
7701 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
7702
7703 if (strcmp (section, ".sdata") == 0
7704 || strncmp (section, ".sdata.", 7) == 0
7705 || strncmp (section, ".gnu.linkonce.s.", 16) == 0
7706 || strcmp (section, ".sbss") == 0
7707 || strncmp (section, ".sbss.", 6) == 0
7708 || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
7709 return true;
7710 }
7711 else
7712 {
7713 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
7714
7715 /* If this is an incomplete type with size 0, then we can't put it
7716 in sdata because it might be too big when completed. */
7717 if (size > 0 && size <= ia64_section_threshold)
7718 return true;
7719 }
7720
7721 return false;
7722 }
7723 \f
7724 /* Output assembly directives for prologue regions. */
7725
7726 /* The current basic block number. */
7727
7728 static bool last_block;
7729
7730 /* True if we need a copy_state command at the start of the next block. */
7731
7732 static bool need_copy_state;
7733
7734 /* The function emits unwind directives for the start of an epilogue. */
7735
7736 static void
7737 process_epilogue (void)
7738 {
7739 /* If this isn't the last block of the function, then we need to label the
7740 current state, and copy it back in at the start of the next block. */
7741
7742 if (!last_block)
7743 {
7744 fprintf (asm_out_file, "\t.label_state %d\n",
7745 ++cfun->machine->state_num);
7746 need_copy_state = true;
7747 }
7748
7749 fprintf (asm_out_file, "\t.restore sp\n");
7750 }
7751
7752 /* This function processes a SET pattern looking for specific patterns
7753 which result in emitting an assembly directive required for unwinding. */
7754
7755 static int
7756 process_set (FILE *asm_out_file, rtx pat)
7757 {
7758 rtx src = SET_SRC (pat);
7759 rtx dest = SET_DEST (pat);
7760 int src_regno, dest_regno;
7761
7762 /* Look for the ALLOC insn. */
7763 if (GET_CODE (src) == UNSPEC_VOLATILE
7764 && XINT (src, 1) == UNSPECV_ALLOC
7765 && GET_CODE (dest) == REG)
7766 {
7767 dest_regno = REGNO (dest);
7768
7769 /* If this is the final destination for ar.pfs, then this must
7770 be the alloc in the prologue. */
7771 if (dest_regno == current_frame_info.reg_save_ar_pfs)
7772 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
7773 ia64_dbx_register_number (dest_regno));
7774 else
7775 {
7776 /* This must be an alloc before a sibcall. We must drop the
7777 old frame info. The easiest way to drop the old frame
7778 info is to ensure we had a ".restore sp" directive
7779 followed by a new prologue. If the procedure doesn't
7780 have a memory-stack frame, we'll issue a dummy ".restore
7781 sp" now. */
7782 if (current_frame_info.total_size == 0 && !frame_pointer_needed)
7783 /* if haven't done process_epilogue() yet, do it now */
7784 process_epilogue ();
7785 fprintf (asm_out_file, "\t.prologue\n");
7786 }
7787 return 1;
7788 }
7789
7790 /* Look for SP = .... */
7791 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
7792 {
7793 if (GET_CODE (src) == PLUS)
7794 {
7795 rtx op0 = XEXP (src, 0);
7796 rtx op1 = XEXP (src, 1);
7797
7798 gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
7799
7800 if (INTVAL (op1) < 0)
7801 fprintf (asm_out_file, "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
7802 -INTVAL (op1));
7803 else
7804 process_epilogue ();
7805 }
7806 else
7807 {
7808 gcc_assert (GET_CODE (src) == REG
7809 && REGNO (src) == HARD_FRAME_POINTER_REGNUM);
7810 process_epilogue ();
7811 }
7812
7813 return 1;
7814 }
7815
7816 /* Register move we need to look at. */
7817 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
7818 {
7819 src_regno = REGNO (src);
7820 dest_regno = REGNO (dest);
7821
7822 switch (src_regno)
7823 {
7824 case BR_REG (0):
7825 /* Saving return address pointer. */
7826 gcc_assert (dest_regno == current_frame_info.reg_save_b0);
7827 fprintf (asm_out_file, "\t.save rp, r%d\n",
7828 ia64_dbx_register_number (dest_regno));
7829 return 1;
7830
7831 case PR_REG (0):
7832 gcc_assert (dest_regno == current_frame_info.reg_save_pr);
7833 fprintf (asm_out_file, "\t.save pr, r%d\n",
7834 ia64_dbx_register_number (dest_regno));
7835 return 1;
7836
7837 case AR_UNAT_REGNUM:
7838 gcc_assert (dest_regno == current_frame_info.reg_save_ar_unat);
7839 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
7840 ia64_dbx_register_number (dest_regno));
7841 return 1;
7842
7843 case AR_LC_REGNUM:
7844 gcc_assert (dest_regno == current_frame_info.reg_save_ar_lc);
7845 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
7846 ia64_dbx_register_number (dest_regno));
7847 return 1;
7848
7849 case STACK_POINTER_REGNUM:
7850 gcc_assert (dest_regno == HARD_FRAME_POINTER_REGNUM
7851 && frame_pointer_needed);
7852 fprintf (asm_out_file, "\t.vframe r%d\n",
7853 ia64_dbx_register_number (dest_regno));
7854 return 1;
7855
7856 default:
7857 /* Everything else should indicate being stored to memory. */
7858 gcc_unreachable ();
7859 }
7860 }
7861
7862 /* Memory store we need to look at. */
7863 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
7864 {
7865 long off;
7866 rtx base;
7867 const char *saveop;
7868
7869 if (GET_CODE (XEXP (dest, 0)) == REG)
7870 {
7871 base = XEXP (dest, 0);
7872 off = 0;
7873 }
7874 else
7875 {
7876 gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
7877 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
7878 base = XEXP (XEXP (dest, 0), 0);
7879 off = INTVAL (XEXP (XEXP (dest, 0), 1));
7880 }
7881
7882 if (base == hard_frame_pointer_rtx)
7883 {
7884 saveop = ".savepsp";
7885 off = - off;
7886 }
7887 else
7888 {
7889 gcc_assert (base == stack_pointer_rtx);
7890 saveop = ".savesp";
7891 }
7892
7893 src_regno = REGNO (src);
7894 switch (src_regno)
7895 {
7896 case BR_REG (0):
7897 gcc_assert (!current_frame_info.reg_save_b0);
7898 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
7899 return 1;
7900
7901 case PR_REG (0):
7902 gcc_assert (!current_frame_info.reg_save_pr);
7903 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
7904 return 1;
7905
7906 case AR_LC_REGNUM:
7907 gcc_assert (!current_frame_info.reg_save_ar_lc);
7908 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
7909 return 1;
7910
7911 case AR_PFS_REGNUM:
7912 gcc_assert (!current_frame_info.reg_save_ar_pfs);
7913 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
7914 return 1;
7915
7916 case AR_UNAT_REGNUM:
7917 gcc_assert (!current_frame_info.reg_save_ar_unat);
7918 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
7919 return 1;
7920
7921 case GR_REG (4):
7922 case GR_REG (5):
7923 case GR_REG (6):
7924 case GR_REG (7):
7925 fprintf (asm_out_file, "\t.save.g 0x%x\n",
7926 1 << (src_regno - GR_REG (4)));
7927 return 1;
7928
7929 case BR_REG (1):
7930 case BR_REG (2):
7931 case BR_REG (3):
7932 case BR_REG (4):
7933 case BR_REG (5):
7934 fprintf (asm_out_file, "\t.save.b 0x%x\n",
7935 1 << (src_regno - BR_REG (1)));
7936 return 1;
7937
7938 case FR_REG (2):
7939 case FR_REG (3):
7940 case FR_REG (4):
7941 case FR_REG (5):
7942 fprintf (asm_out_file, "\t.save.f 0x%x\n",
7943 1 << (src_regno - FR_REG (2)));
7944 return 1;
7945
7946 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7947 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7948 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7949 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7950 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
7951 1 << (src_regno - FR_REG (12)));
7952 return 1;
7953
7954 default:
7955 return 0;
7956 }
7957 }
7958
7959 return 0;
7960 }
7961
7962
7963 /* This function looks at a single insn and emits any directives
7964 required to unwind this insn. */
7965 void
7966 process_for_unwind_directive (FILE *asm_out_file, rtx insn)
7967 {
7968 if (flag_unwind_tables
7969 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7970 {
7971 rtx pat;
7972
7973 if (GET_CODE (insn) == NOTE
7974 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
7975 {
7976 last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
7977
7978 /* Restore unwind state from immediately before the epilogue. */
7979 if (need_copy_state)
7980 {
7981 fprintf (asm_out_file, "\t.body\n");
7982 fprintf (asm_out_file, "\t.copy_state %d\n",
7983 cfun->machine->state_num);
7984 need_copy_state = false;
7985 }
7986 }
7987
7988 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
7989 return;
7990
7991 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
7992 if (pat)
7993 pat = XEXP (pat, 0);
7994 else
7995 pat = PATTERN (insn);
7996
7997 switch (GET_CODE (pat))
7998 {
7999 case SET:
8000 process_set (asm_out_file, pat);
8001 break;
8002
8003 case PARALLEL:
8004 {
8005 int par_index;
8006 int limit = XVECLEN (pat, 0);
8007 for (par_index = 0; par_index < limit; par_index++)
8008 {
8009 rtx x = XVECEXP (pat, 0, par_index);
8010 if (GET_CODE (x) == SET)
8011 process_set (asm_out_file, x);
8012 }
8013 break;
8014 }
8015
8016 default:
8017 gcc_unreachable ();
8018 }
8019 }
8020 }
8021
8022 \f
8023 enum ia64_builtins
8024 {
8025 IA64_BUILTIN_BSP,
8026 IA64_BUILTIN_FLUSHRS
8027 };
8028
8029 void
8030 ia64_init_builtins (void)
8031 {
8032 tree fpreg_type;
8033 tree float80_type;
8034
8035 /* The __fpreg type. */
8036 fpreg_type = make_node (REAL_TYPE);
8037 /* ??? The back end should know to load/save __fpreg variables using
8038 the ldf.fill and stf.spill instructions. */
8039 TYPE_PRECISION (fpreg_type) = 80;
8040 layout_type (fpreg_type);
8041 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
8042
8043 /* The __float80 type. */
8044 float80_type = make_node (REAL_TYPE);
8045 TYPE_PRECISION (float80_type) = 80;
8046 layout_type (float80_type);
8047 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
8048
8049 /* The __float128 type. */
8050 if (!TARGET_HPUX)
8051 {
8052 tree float128_type = make_node (REAL_TYPE);
8053 TYPE_PRECISION (float128_type) = 128;
8054 layout_type (float128_type);
8055 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
8056 }
8057 else
8058 /* Under HPUX, this is a synonym for "long double". */
8059 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
8060 "__float128");
8061
8062 #define def_builtin(name, type, code) \
8063 lang_hooks.builtin_function ((name), (type), (code), BUILT_IN_MD, \
8064 NULL, NULL_TREE)
8065
8066 def_builtin ("__builtin_ia64_bsp",
8067 build_function_type (ptr_type_node, void_list_node),
8068 IA64_BUILTIN_BSP);
8069
8070 def_builtin ("__builtin_ia64_flushrs",
8071 build_function_type (void_type_node, void_list_node),
8072 IA64_BUILTIN_FLUSHRS);
8073
8074 #undef def_builtin
8075 }
8076
8077 rtx
8078 ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
8079 enum machine_mode mode ATTRIBUTE_UNUSED,
8080 int ignore ATTRIBUTE_UNUSED)
8081 {
8082 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
8083 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
8084
8085 switch (fcode)
8086 {
8087 case IA64_BUILTIN_BSP:
8088 if (! target || ! register_operand (target, DImode))
8089 target = gen_reg_rtx (DImode);
8090 emit_insn (gen_bsp_value (target));
8091 #ifdef POINTERS_EXTEND_UNSIGNED
8092 target = convert_memory_address (ptr_mode, target);
8093 #endif
8094 return target;
8095
8096 case IA64_BUILTIN_FLUSHRS:
8097 emit_insn (gen_flushrs ());
8098 return const0_rtx;
8099
8100 default:
8101 break;
8102 }
8103
8104 return NULL_RTX;
8105 }
8106
8107 /* For the HP-UX IA64 aggregate parameters are passed stored in the
8108 most significant bits of the stack slot. */
8109
8110 enum direction
8111 ia64_hpux_function_arg_padding (enum machine_mode mode, tree type)
8112 {
8113 /* Exception to normal case for structures/unions/etc. */
8114
8115 if (type && AGGREGATE_TYPE_P (type)
8116 && int_size_in_bytes (type) < UNITS_PER_WORD)
8117 return upward;
8118
8119 /* Fall back to the default. */
8120 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
8121 }
8122
8123 /* Linked list of all external functions that are to be emitted by GCC.
8124 We output the name if and only if TREE_SYMBOL_REFERENCED is set in
8125 order to avoid putting out names that are never really used. */
8126
8127 struct extern_func_list GTY(())
8128 {
8129 struct extern_func_list *next;
8130 tree decl;
8131 };
8132
8133 static GTY(()) struct extern_func_list *extern_func_head;
8134
8135 static void
8136 ia64_hpux_add_extern_decl (tree decl)
8137 {
8138 struct extern_func_list *p = ggc_alloc (sizeof (struct extern_func_list));
8139
8140 p->decl = decl;
8141 p->next = extern_func_head;
8142 extern_func_head = p;
8143 }
8144
8145 /* Print out the list of used global functions. */
8146
8147 static void
8148 ia64_hpux_file_end (void)
8149 {
8150 struct extern_func_list *p;
8151
8152 for (p = extern_func_head; p; p = p->next)
8153 {
8154 tree decl = p->decl;
8155 tree id = DECL_ASSEMBLER_NAME (decl);
8156
8157 gcc_assert (id);
8158
8159 if (!TREE_ASM_WRITTEN (decl) && TREE_SYMBOL_REFERENCED (id))
8160 {
8161 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
8162
8163 TREE_ASM_WRITTEN (decl) = 1;
8164 (*targetm.asm_out.globalize_label) (asm_out_file, name);
8165 fputs (TYPE_ASM_OP, asm_out_file);
8166 assemble_name (asm_out_file, name);
8167 fprintf (asm_out_file, "," TYPE_OPERAND_FMT "\n", "function");
8168 }
8169 }
8170
8171 extern_func_head = 0;
8172 }
8173
8174 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
8175 modes of word_mode and larger. Rename the TFmode libfuncs using the
8176 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
8177 backward compatibility. */
8178
8179 static void
8180 ia64_init_libfuncs (void)
8181 {
8182 set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
8183 set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
8184 set_optab_libfunc (smod_optab, SImode, "__modsi3");
8185 set_optab_libfunc (umod_optab, SImode, "__umodsi3");
8186
8187 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
8188 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
8189 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
8190 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
8191 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
8192
8193 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
8194 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
8195 set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
8196 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
8197 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
8198 set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
8199
8200 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
8201 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
8202 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
8203 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
8204
8205 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
8206 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
8207 }
8208
8209 /* Rename all the TFmode libfuncs using the HPUX conventions. */
8210
8211 static void
8212 ia64_hpux_init_libfuncs (void)
8213 {
8214 ia64_init_libfuncs ();
8215
8216 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
8217 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
8218 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
8219
8220 /* ia64_expand_compare uses this. */
8221 cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
8222
8223 /* These should never be used. */
8224 set_optab_libfunc (eq_optab, TFmode, 0);
8225 set_optab_libfunc (ne_optab, TFmode, 0);
8226 set_optab_libfunc (gt_optab, TFmode, 0);
8227 set_optab_libfunc (ge_optab, TFmode, 0);
8228 set_optab_libfunc (lt_optab, TFmode, 0);
8229 set_optab_libfunc (le_optab, TFmode, 0);
8230 }
8231
8232 /* Rename the division and modulus functions in VMS. */
8233
8234 static void
8235 ia64_vms_init_libfuncs (void)
8236 {
8237 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
8238 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
8239 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
8240 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
8241 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
8242 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
8243 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
8244 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
8245 }
8246
8247 /* Rename the TFmode libfuncs available from soft-fp in glibc using
8248 the HPUX conventions. */
8249
8250 static void
8251 ia64_sysv4_init_libfuncs (void)
8252 {
8253 ia64_init_libfuncs ();
8254
8255 /* These functions are not part of the HPUX TFmode interface. We
8256 use them instead of _U_Qfcmp, which doesn't work the way we
8257 expect. */
8258 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
8259 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
8260 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
8261 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
8262 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
8263 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
8264
8265 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
8266 glibc doesn't have them. */
8267 }
8268 \f
8269 /* Switch to the section to which we should output X. The only thing
8270 special we do here is to honor small data. */
8271
8272 static void
8273 ia64_select_rtx_section (enum machine_mode mode, rtx x,
8274 unsigned HOST_WIDE_INT align)
8275 {
8276 if (GET_MODE_SIZE (mode) > 0
8277 && GET_MODE_SIZE (mode) <= ia64_section_threshold)
8278 sdata_section ();
8279 else
8280 default_elf_select_rtx_section (mode, x, align);
8281 }
8282
8283 /* It is illegal to have relocations in shared segments on AIX and HPUX.
8284 Pretend flag_pic is always set. */
8285
8286 static void
8287 ia64_rwreloc_select_section (tree exp, int reloc, unsigned HOST_WIDE_INT align)
8288 {
8289 default_elf_select_section_1 (exp, reloc, align, true);
8290 }
8291
8292 static void
8293 ia64_rwreloc_unique_section (tree decl, int reloc)
8294 {
8295 default_unique_section_1 (decl, reloc, true);
8296 }
8297
8298 static void
8299 ia64_rwreloc_select_rtx_section (enum machine_mode mode, rtx x,
8300 unsigned HOST_WIDE_INT align)
8301 {
8302 int save_pic = flag_pic;
8303 flag_pic = 1;
8304 ia64_select_rtx_section (mode, x, align);
8305 flag_pic = save_pic;
8306 }
8307
8308 #ifndef TARGET_RWRELOC
8309 #define TARGET_RWRELOC flag_pic
8310 #endif
8311
8312 static unsigned int
8313 ia64_section_type_flags (tree decl, const char *name, int reloc)
8314 {
8315 unsigned int flags = 0;
8316
8317 if (strcmp (name, ".sdata") == 0
8318 || strncmp (name, ".sdata.", 7) == 0
8319 || strncmp (name, ".gnu.linkonce.s.", 16) == 0
8320 || strncmp (name, ".sdata2.", 8) == 0
8321 || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
8322 || strcmp (name, ".sbss") == 0
8323 || strncmp (name, ".sbss.", 6) == 0
8324 || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
8325 flags = SECTION_SMALL;
8326
8327 flags |= default_section_type_flags_1 (decl, name, reloc, TARGET_RWRELOC);
8328 return flags;
8329 }
8330
8331 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
8332 structure type and that the address of that type should be passed
8333 in out0, rather than in r8. */
8334
8335 static bool
8336 ia64_struct_retval_addr_is_first_parm_p (tree fntype)
8337 {
8338 tree ret_type = TREE_TYPE (fntype);
8339
8340 /* The Itanium C++ ABI requires that out0, rather than r8, be used
8341 as the structure return address parameter, if the return value
8342 type has a non-trivial copy constructor or destructor. It is not
8343 clear if this same convention should be used for other
8344 programming languages. Until G++ 3.4, we incorrectly used r8 for
8345 these return values. */
8346 return (abi_version_at_least (2)
8347 && ret_type
8348 && TYPE_MODE (ret_type) == BLKmode
8349 && TREE_ADDRESSABLE (ret_type)
8350 && strcmp (lang_hooks.name, "GNU C++") == 0);
8351 }
8352
8353 /* Output the assembler code for a thunk function. THUNK_DECL is the
8354 declaration for the thunk function itself, FUNCTION is the decl for
8355 the target function. DELTA is an immediate constant offset to be
8356 added to THIS. If VCALL_OFFSET is nonzero, the word at
8357 *(*this + vcall_offset) should be added to THIS. */
8358
8359 static void
8360 ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
8361 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8362 tree function)
8363 {
8364 rtx this, insn, funexp;
8365 unsigned int this_parmno;
8366 unsigned int this_regno;
8367
8368 reload_completed = 1;
8369 epilogue_completed = 1;
8370 no_new_pseudos = 1;
8371 reset_block_changes ();
8372
8373 /* Set things up as ia64_expand_prologue might. */
8374 last_scratch_gr_reg = 15;
8375
8376 memset (&current_frame_info, 0, sizeof (current_frame_info));
8377 current_frame_info.spill_cfa_off = -16;
8378 current_frame_info.n_input_regs = 1;
8379 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
8380
8381 /* Mark the end of the (empty) prologue. */
8382 emit_note (NOTE_INSN_PROLOGUE_END);
8383
8384 /* Figure out whether "this" will be the first parameter (the
8385 typical case) or the second parameter (as happens when the
8386 virtual function returns certain class objects). */
8387 this_parmno
8388 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
8389 ? 1 : 0);
8390 this_regno = IN_REG (this_parmno);
8391 if (!TARGET_REG_NAMES)
8392 reg_names[this_regno] = ia64_reg_numbers[this_parmno];
8393
8394 this = gen_rtx_REG (Pmode, this_regno);
8395 if (TARGET_ILP32)
8396 {
8397 rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
8398 REG_POINTER (tmp) = 1;
8399 if (delta && CONST_OK_FOR_I (delta))
8400 {
8401 emit_insn (gen_ptr_extend_plus_imm (this, tmp, GEN_INT (delta)));
8402 delta = 0;
8403 }
8404 else
8405 emit_insn (gen_ptr_extend (this, tmp));
8406 }
8407
8408 /* Apply the constant offset, if required. */
8409 if (delta)
8410 {
8411 rtx delta_rtx = GEN_INT (delta);
8412
8413 if (!CONST_OK_FOR_I (delta))
8414 {
8415 rtx tmp = gen_rtx_REG (Pmode, 2);
8416 emit_move_insn (tmp, delta_rtx);
8417 delta_rtx = tmp;
8418 }
8419 emit_insn (gen_adddi3 (this, this, delta_rtx));
8420 }
8421
8422 /* Apply the offset from the vtable, if required. */
8423 if (vcall_offset)
8424 {
8425 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
8426 rtx tmp = gen_rtx_REG (Pmode, 2);
8427
8428 if (TARGET_ILP32)
8429 {
8430 rtx t = gen_rtx_REG (ptr_mode, 2);
8431 REG_POINTER (t) = 1;
8432 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this));
8433 if (CONST_OK_FOR_I (vcall_offset))
8434 {
8435 emit_insn (gen_ptr_extend_plus_imm (tmp, t,
8436 vcall_offset_rtx));
8437 vcall_offset = 0;
8438 }
8439 else
8440 emit_insn (gen_ptr_extend (tmp, t));
8441 }
8442 else
8443 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this));
8444
8445 if (vcall_offset)
8446 {
8447 if (!CONST_OK_FOR_J (vcall_offset))
8448 {
8449 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
8450 emit_move_insn (tmp2, vcall_offset_rtx);
8451 vcall_offset_rtx = tmp2;
8452 }
8453 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
8454 }
8455
8456 if (TARGET_ILP32)
8457 emit_move_insn (gen_rtx_REG (ptr_mode, 2),
8458 gen_rtx_MEM (ptr_mode, tmp));
8459 else
8460 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
8461
8462 emit_insn (gen_adddi3 (this, this, tmp));
8463 }
8464
8465 /* Generate a tail call to the target function. */
8466 if (! TREE_USED (function))
8467 {
8468 assemble_external (function);
8469 TREE_USED (function) = 1;
8470 }
8471 funexp = XEXP (DECL_RTL (function), 0);
8472 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8473 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
8474 insn = get_last_insn ();
8475 SIBLING_CALL_P (insn) = 1;
8476
8477 /* Code generation for calls relies on splitting. */
8478 reload_completed = 1;
8479 epilogue_completed = 1;
8480 try_split (PATTERN (insn), insn, 0);
8481
8482 emit_barrier ();
8483
8484 /* Run just enough of rest_of_compilation to get the insns emitted.
8485 There's not really enough bulk here to make other passes such as
8486 instruction scheduling worth while. Note that use_thunk calls
8487 assemble_start_function and assemble_end_function. */
8488
8489 insn_locators_initialize ();
8490 emit_all_insn_group_barriers (NULL);
8491 insn = get_insns ();
8492 shorten_branches (insn);
8493 final_start_function (insn, file, 1);
8494 final (insn, file, 1);
8495 final_end_function ();
8496
8497 reload_completed = 0;
8498 epilogue_completed = 0;
8499 no_new_pseudos = 0;
8500 }
8501
8502 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
8503
8504 static rtx
8505 ia64_struct_value_rtx (tree fntype,
8506 int incoming ATTRIBUTE_UNUSED)
8507 {
8508 if (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype))
8509 return NULL_RTX;
8510 return gen_rtx_REG (Pmode, GR_REG (8));
8511 }
8512
8513 static bool
8514 ia64_scalar_mode_supported_p (enum machine_mode mode)
8515 {
8516 switch (mode)
8517 {
8518 case QImode:
8519 case HImode:
8520 case SImode:
8521 case DImode:
8522 case TImode:
8523 return true;
8524
8525 case SFmode:
8526 case DFmode:
8527 case XFmode:
8528 return true;
8529
8530 case TFmode:
8531 return TARGET_HPUX;
8532
8533 default:
8534 return false;
8535 }
8536 }
8537
8538 static bool
8539 ia64_vector_mode_supported_p (enum machine_mode mode)
8540 {
8541 switch (mode)
8542 {
8543 case V8QImode:
8544 case V4HImode:
8545 case V2SImode:
8546 return true;
8547
8548 case V2SFmode:
8549 return true;
8550
8551 default:
8552 return false;
8553 }
8554 }
8555
8556 #include "gt-ia64.h"