re PR target/11535 (__builtin_return_address may not work on ia64)
[gcc.git] / gcc / config / ia64 / ia64.c
1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
12
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "recog.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "except.h"
41 #include "function.h"
42 #include "ggc.h"
43 #include "basic-block.h"
44 #include "toplev.h"
45 #include "sched-int.h"
46 #include "timevar.h"
47 #include "target.h"
48 #include "target-def.h"
49 #include "tm_p.h"
50 #include "hashtab.h"
51 #include "langhooks.h"
52 #include "cfglayout.h"
53
54 /* This is used for communication between ASM_OUTPUT_LABEL and
55 ASM_OUTPUT_LABELREF. */
56 int ia64_asm_output_label = 0;
57
58 /* Define the information needed to generate branch and scc insns. This is
59 stored from the compare operation. */
60 struct rtx_def * ia64_compare_op0;
61 struct rtx_def * ia64_compare_op1;
62
63 /* Register names for ia64_expand_prologue. */
64 static const char * const ia64_reg_numbers[96] =
65 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
66 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
67 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
68 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
69 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
70 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
71 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
72 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
73 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
74 "r104","r105","r106","r107","r108","r109","r110","r111",
75 "r112","r113","r114","r115","r116","r117","r118","r119",
76 "r120","r121","r122","r123","r124","r125","r126","r127"};
77
78 /* ??? These strings could be shared with REGISTER_NAMES. */
79 static const char * const ia64_input_reg_names[8] =
80 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
81
82 /* ??? These strings could be shared with REGISTER_NAMES. */
83 static const char * const ia64_local_reg_names[80] =
84 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
85 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
86 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
87 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
88 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
89 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
90 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
91 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
92 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
93 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
94
95 /* ??? These strings could be shared with REGISTER_NAMES. */
96 static const char * const ia64_output_reg_names[8] =
97 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
98
99 /* String used with the -mfixed-range= option. */
100 const char *ia64_fixed_range_string;
101
102 /* Determines whether we use adds, addl, or movl to generate our
103 TLS immediate offsets. */
104 int ia64_tls_size = 22;
105
106 /* String used with the -mtls-size= option. */
107 const char *ia64_tls_size_string;
108
109 /* Which cpu are we scheduling for. */
110 enum processor_type ia64_tune;
111
112 /* String used with the -tune= option. */
113 const char *ia64_tune_string;
114
115 /* Determines whether we run our final scheduling pass or not. We always
116 avoid the normal second scheduling pass. */
117 static int ia64_flag_schedule_insns2;
118
119 /* Variables which are this size or smaller are put in the sdata/sbss
120 sections. */
121
122 unsigned int ia64_section_threshold;
123
124 /* The following variable is used by the DFA insn scheduler. The value is
125 TRUE if we do insn bundling instead of insn scheduling. */
126 int bundling_p = 0;
127
128 /* Structure to be filled in by ia64_compute_frame_size with register
129 save masks and offsets for the current function. */
130
131 struct ia64_frame_info
132 {
133 HOST_WIDE_INT total_size; /* size of the stack frame, not including
134 the caller's scratch area. */
135 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
136 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
137 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
138 HARD_REG_SET mask; /* mask of saved registers. */
139 unsigned int gr_used_mask; /* mask of registers in use as gr spill
140 registers or long-term scratches. */
141 int n_spilled; /* number of spilled registers. */
142 int reg_fp; /* register for fp. */
143 int reg_save_b0; /* save register for b0. */
144 int reg_save_pr; /* save register for prs. */
145 int reg_save_ar_pfs; /* save register for ar.pfs. */
146 int reg_save_ar_unat; /* save register for ar.unat. */
147 int reg_save_ar_lc; /* save register for ar.lc. */
148 int reg_save_gp; /* save register for gp. */
149 int n_input_regs; /* number of input registers used. */
150 int n_local_regs; /* number of local registers used. */
151 int n_output_regs; /* number of output registers used. */
152 int n_rotate_regs; /* number of rotating registers used. */
153
154 char need_regstk; /* true if a .regstk directive needed. */
155 char initialized; /* true if the data is finalized. */
156 };
157
158 /* Current frame information calculated by ia64_compute_frame_size. */
159 static struct ia64_frame_info current_frame_info;
160 \f
161 static int ia64_use_dfa_pipeline_interface PARAMS ((void));
162 static int ia64_first_cycle_multipass_dfa_lookahead PARAMS ((void));
163 static void ia64_dependencies_evaluation_hook PARAMS ((rtx, rtx));
164 static void ia64_init_dfa_pre_cycle_insn PARAMS ((void));
165 static rtx ia64_dfa_pre_cycle_insn PARAMS ((void));
166 static int ia64_first_cycle_multipass_dfa_lookahead_guard PARAMS ((rtx));
167 static int ia64_dfa_new_cycle PARAMS ((FILE *, int, rtx, int, int, int *));
168 static rtx gen_tls_get_addr PARAMS ((void));
169 static rtx gen_thread_pointer PARAMS ((void));
170 static rtx ia64_expand_tls_address PARAMS ((enum tls_model, rtx, rtx));
171 static int find_gr_spill PARAMS ((int));
172 static int next_scratch_gr_reg PARAMS ((void));
173 static void mark_reg_gr_used_mask PARAMS ((rtx, void *));
174 static void ia64_compute_frame_size PARAMS ((HOST_WIDE_INT));
175 static void setup_spill_pointers PARAMS ((int, rtx, HOST_WIDE_INT));
176 static void finish_spill_pointers PARAMS ((void));
177 static rtx spill_restore_mem PARAMS ((rtx, HOST_WIDE_INT));
178 static void do_spill PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx));
179 static void do_restore PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT));
180 static rtx gen_movdi_x PARAMS ((rtx, rtx, rtx));
181 static rtx gen_fr_spill_x PARAMS ((rtx, rtx, rtx));
182 static rtx gen_fr_restore_x PARAMS ((rtx, rtx, rtx));
183
184 static enum machine_mode hfa_element_mode PARAMS ((tree, int));
185 static bool ia64_function_ok_for_sibcall PARAMS ((tree, tree));
186 static bool ia64_rtx_costs PARAMS ((rtx, int, int, int *));
187 static void fix_range PARAMS ((const char *));
188 static struct machine_function * ia64_init_machine_status PARAMS ((void));
189 static void emit_insn_group_barriers PARAMS ((FILE *));
190 static void emit_all_insn_group_barriers PARAMS ((FILE *));
191 static void final_emit_insn_group_barriers PARAMS ((FILE *));
192 static void emit_predicate_relation_info PARAMS ((void));
193 static void ia64_reorg PARAMS ((void));
194 static bool ia64_in_small_data_p PARAMS ((tree));
195 static void process_epilogue PARAMS ((void));
196 static int process_set PARAMS ((FILE *, rtx));
197
198 static rtx ia64_expand_fetch_and_op PARAMS ((optab, enum machine_mode,
199 tree, rtx));
200 static rtx ia64_expand_op_and_fetch PARAMS ((optab, enum machine_mode,
201 tree, rtx));
202 static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode,
203 enum machine_mode,
204 int, tree, rtx));
205 static rtx ia64_expand_lock_test_and_set PARAMS ((enum machine_mode,
206 tree, rtx));
207 static rtx ia64_expand_lock_release PARAMS ((enum machine_mode, tree, rtx));
208 static bool ia64_assemble_integer PARAMS ((rtx, unsigned int, int));
209 static void ia64_output_function_prologue PARAMS ((FILE *, HOST_WIDE_INT));
210 static void ia64_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
211 static void ia64_output_function_end_prologue PARAMS ((FILE *));
212
213 static int ia64_issue_rate PARAMS ((void));
214 static int ia64_adjust_cost PARAMS ((rtx, rtx, rtx, int));
215 static void ia64_sched_init PARAMS ((FILE *, int, int));
216 static void ia64_sched_finish PARAMS ((FILE *, int));
217 static int ia64_dfa_sched_reorder PARAMS ((FILE *, int, rtx *, int *,
218 int, int));
219 static int ia64_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
220 static int ia64_sched_reorder2 PARAMS ((FILE *, int, rtx *, int *, int));
221 static int ia64_variable_issue PARAMS ((FILE *, int, rtx, int));
222
223 static struct bundle_state *get_free_bundle_state PARAMS ((void));
224 static void free_bundle_state PARAMS ((struct bundle_state *));
225 static void initiate_bundle_states PARAMS ((void));
226 static void finish_bundle_states PARAMS ((void));
227 static unsigned bundle_state_hash PARAMS ((const void *));
228 static int bundle_state_eq_p PARAMS ((const void *, const void *));
229 static int insert_bundle_state PARAMS ((struct bundle_state *));
230 static void initiate_bundle_state_table PARAMS ((void));
231 static void finish_bundle_state_table PARAMS ((void));
232 static int try_issue_nops PARAMS ((struct bundle_state *, int));
233 static int try_issue_insn PARAMS ((struct bundle_state *, rtx));
234 static void issue_nops_and_insn PARAMS ((struct bundle_state *, int,
235 rtx, int, int));
236 static int get_max_pos PARAMS ((state_t));
237 static int get_template PARAMS ((state_t, int));
238
239 static rtx get_next_important_insn PARAMS ((rtx, rtx));
240 static void bundling PARAMS ((FILE *, int, rtx, rtx));
241
242 static void ia64_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
243 HOST_WIDE_INT, tree));
244 static void ia64_file_start PARAMS ((void));
245
246 static void ia64_select_rtx_section PARAMS ((enum machine_mode, rtx,
247 unsigned HOST_WIDE_INT));
248 static void ia64_rwreloc_select_section PARAMS ((tree, int,
249 unsigned HOST_WIDE_INT))
250 ATTRIBUTE_UNUSED;
251 static void ia64_rwreloc_unique_section PARAMS ((tree, int))
252 ATTRIBUTE_UNUSED;
253 static void ia64_rwreloc_select_rtx_section PARAMS ((enum machine_mode, rtx,
254 unsigned HOST_WIDE_INT))
255 ATTRIBUTE_UNUSED;
256 static unsigned int ia64_rwreloc_section_type_flags
257 PARAMS ((tree, const char *, int))
258 ATTRIBUTE_UNUSED;
259
260 static void ia64_hpux_add_extern_decl PARAMS ((const char *name))
261 ATTRIBUTE_UNUSED;
262 static void ia64_hpux_file_end PARAMS ((void))
263 ATTRIBUTE_UNUSED;
264
265 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
266 static void ia64_encode_section_info (tree, rtx, int);
267
268 \f
269 /* Table of valid machine attributes. */
270 static const struct attribute_spec ia64_attribute_table[] =
271 {
272 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
273 { "syscall_linkage", 0, 0, false, true, true, NULL },
274 { "model", 1, 1, true, false, false, ia64_handle_model_attribute },
275 { NULL, 0, 0, false, false, false, NULL }
276 };
277
278 /* Initialize the GCC target structure. */
279 #undef TARGET_ATTRIBUTE_TABLE
280 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
281
282 #undef TARGET_INIT_BUILTINS
283 #define TARGET_INIT_BUILTINS ia64_init_builtins
284
285 #undef TARGET_EXPAND_BUILTIN
286 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
287
288 #undef TARGET_ASM_BYTE_OP
289 #define TARGET_ASM_BYTE_OP "\tdata1\t"
290 #undef TARGET_ASM_ALIGNED_HI_OP
291 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
292 #undef TARGET_ASM_ALIGNED_SI_OP
293 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
294 #undef TARGET_ASM_ALIGNED_DI_OP
295 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
296 #undef TARGET_ASM_UNALIGNED_HI_OP
297 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
298 #undef TARGET_ASM_UNALIGNED_SI_OP
299 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
300 #undef TARGET_ASM_UNALIGNED_DI_OP
301 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
302 #undef TARGET_ASM_INTEGER
303 #define TARGET_ASM_INTEGER ia64_assemble_integer
304
305 #undef TARGET_ASM_FUNCTION_PROLOGUE
306 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
307 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
308 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
309 #undef TARGET_ASM_FUNCTION_EPILOGUE
310 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
311
312 #undef TARGET_IN_SMALL_DATA_P
313 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
314
315 #undef TARGET_SCHED_ADJUST_COST
316 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
317 #undef TARGET_SCHED_ISSUE_RATE
318 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
319 #undef TARGET_SCHED_VARIABLE_ISSUE
320 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
321 #undef TARGET_SCHED_INIT
322 #define TARGET_SCHED_INIT ia64_sched_init
323 #undef TARGET_SCHED_FINISH
324 #define TARGET_SCHED_FINISH ia64_sched_finish
325 #undef TARGET_SCHED_REORDER
326 #define TARGET_SCHED_REORDER ia64_sched_reorder
327 #undef TARGET_SCHED_REORDER2
328 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
329
330 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
331 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
332
333 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
334 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE ia64_use_dfa_pipeline_interface
335
336 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
337 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
338
339 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
340 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
341 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
342 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
343
344 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
345 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
346 ia64_first_cycle_multipass_dfa_lookahead_guard
347
348 #undef TARGET_SCHED_DFA_NEW_CYCLE
349 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
350
351 #ifdef HAVE_AS_TLS
352 #undef TARGET_HAVE_TLS
353 #define TARGET_HAVE_TLS true
354 #endif
355
356 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
357 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
358
359 #undef TARGET_ASM_OUTPUT_MI_THUNK
360 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
361 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
362 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
363
364 #undef TARGET_ASM_FILE_START
365 #define TARGET_ASM_FILE_START ia64_file_start
366
367 #undef TARGET_RTX_COSTS
368 #define TARGET_RTX_COSTS ia64_rtx_costs
369 #undef TARGET_ADDRESS_COST
370 #define TARGET_ADDRESS_COST hook_int_rtx_0
371
372 #undef TARGET_MACHINE_DEPENDENT_REORG
373 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
374
375 #undef TARGET_ENCODE_SECTION_INFO
376 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
377
378 struct gcc_target targetm = TARGET_INITIALIZER;
379 \f
380 /* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
381
382 int
383 call_operand (op, mode)
384 rtx op;
385 enum machine_mode mode;
386 {
387 if (mode != GET_MODE (op) && mode != VOIDmode)
388 return 0;
389
390 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
391 || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
392 }
393
394 /* Return 1 if OP refers to a symbol in the sdata section. */
395
396 int
397 sdata_symbolic_operand (op, mode)
398 rtx op;
399 enum machine_mode mode ATTRIBUTE_UNUSED;
400 {
401 switch (GET_CODE (op))
402 {
403 case CONST:
404 if (GET_CODE (XEXP (op, 0)) != PLUS
405 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF)
406 break;
407 op = XEXP (XEXP (op, 0), 0);
408 /* FALLTHRU */
409
410 case SYMBOL_REF:
411 if (CONSTANT_POOL_ADDRESS_P (op))
412 return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold;
413 else
414 return SYMBOL_REF_LOCAL_P (op) && SYMBOL_REF_SMALL_P (op);
415
416 default:
417 break;
418 }
419
420 return 0;
421 }
422
423 int
424 small_addr_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
425 {
426 return SYMBOL_REF_SMALL_ADDR_P (op);
427 }
428
429 /* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
430
431 int
432 got_symbolic_operand (op, mode)
433 rtx op;
434 enum machine_mode mode ATTRIBUTE_UNUSED;
435 {
436 switch (GET_CODE (op))
437 {
438 case CONST:
439 op = XEXP (op, 0);
440 if (GET_CODE (op) != PLUS)
441 return 0;
442 if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
443 return 0;
444 op = XEXP (op, 1);
445 if (GET_CODE (op) != CONST_INT)
446 return 0;
447
448 return 1;
449
450 /* Ok if we're not using GOT entries at all. */
451 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
452 return 1;
453
454 /* "Ok" while emitting rtl, since otherwise we won't be provided
455 with the entire offset during emission, which makes it very
456 hard to split the offset into high and low parts. */
457 if (rtx_equal_function_value_matters)
458 return 1;
459
460 /* Force the low 14 bits of the constant to zero so that we do not
461 use up so many GOT entries. */
462 return (INTVAL (op) & 0x3fff) == 0;
463
464 case SYMBOL_REF:
465 if (SYMBOL_REF_SMALL_ADDR_P (op))
466 return 0;
467 case LABEL_REF:
468 return 1;
469
470 default:
471 break;
472 }
473 return 0;
474 }
475
476 /* Return 1 if OP refers to a symbol. */
477
478 int
479 symbolic_operand (op, mode)
480 rtx op;
481 enum machine_mode mode ATTRIBUTE_UNUSED;
482 {
483 switch (GET_CODE (op))
484 {
485 case CONST:
486 case SYMBOL_REF:
487 case LABEL_REF:
488 return 1;
489
490 default:
491 break;
492 }
493 return 0;
494 }
495
496 /* Return tls_model if OP refers to a TLS symbol. */
497
498 int
499 tls_symbolic_operand (op, mode)
500 rtx op;
501 enum machine_mode mode ATTRIBUTE_UNUSED;
502 {
503 if (GET_CODE (op) != SYMBOL_REF)
504 return 0;
505 return SYMBOL_REF_TLS_MODEL (op);
506 }
507
508
509 /* Return 1 if OP refers to a function. */
510
511 int
512 function_operand (op, mode)
513 rtx op;
514 enum machine_mode mode ATTRIBUTE_UNUSED;
515 {
516 if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (op))
517 return 1;
518 else
519 return 0;
520 }
521
522 /* Return 1 if OP is setjmp or a similar function. */
523
524 /* ??? This is an unsatisfying solution. Should rethink. */
525
526 int
527 setjmp_operand (op, mode)
528 rtx op;
529 enum machine_mode mode ATTRIBUTE_UNUSED;
530 {
531 const char *name;
532 int retval = 0;
533
534 if (GET_CODE (op) != SYMBOL_REF)
535 return 0;
536
537 name = XSTR (op, 0);
538
539 /* The following code is borrowed from special_function_p in calls.c. */
540
541 /* Disregard prefix _, __ or __x. */
542 if (name[0] == '_')
543 {
544 if (name[1] == '_' && name[2] == 'x')
545 name += 3;
546 else if (name[1] == '_')
547 name += 2;
548 else
549 name += 1;
550 }
551
552 if (name[0] == 's')
553 {
554 retval
555 = ((name[1] == 'e'
556 && (! strcmp (name, "setjmp")
557 || ! strcmp (name, "setjmp_syscall")))
558 || (name[1] == 'i'
559 && ! strcmp (name, "sigsetjmp"))
560 || (name[1] == 'a'
561 && ! strcmp (name, "savectx")));
562 }
563 else if ((name[0] == 'q' && name[1] == 's'
564 && ! strcmp (name, "qsetjmp"))
565 || (name[0] == 'v' && name[1] == 'f'
566 && ! strcmp (name, "vfork")))
567 retval = 1;
568
569 return retval;
570 }
571
572 /* Return 1 if OP is a general operand, excluding tls symbolic operands. */
573
574 int
575 move_operand (op, mode)
576 rtx op;
577 enum machine_mode mode;
578 {
579 return general_operand (op, mode) && !tls_symbolic_operand (op, mode);
580 }
581
582 /* Return 1 if OP is a register operand that is (or could be) a GR reg. */
583
584 int
585 gr_register_operand (op, mode)
586 rtx op;
587 enum machine_mode mode;
588 {
589 if (! register_operand (op, mode))
590 return 0;
591 if (GET_CODE (op) == SUBREG)
592 op = SUBREG_REG (op);
593 if (GET_CODE (op) == REG)
594 {
595 unsigned int regno = REGNO (op);
596 if (regno < FIRST_PSEUDO_REGISTER)
597 return GENERAL_REGNO_P (regno);
598 }
599 return 1;
600 }
601
602 /* Return 1 if OP is a register operand that is (or could be) an FR reg. */
603
604 int
605 fr_register_operand (op, mode)
606 rtx op;
607 enum machine_mode mode;
608 {
609 if (! register_operand (op, mode))
610 return 0;
611 if (GET_CODE (op) == SUBREG)
612 op = SUBREG_REG (op);
613 if (GET_CODE (op) == REG)
614 {
615 unsigned int regno = REGNO (op);
616 if (regno < FIRST_PSEUDO_REGISTER)
617 return FR_REGNO_P (regno);
618 }
619 return 1;
620 }
621
622 /* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */
623
624 int
625 grfr_register_operand (op, mode)
626 rtx op;
627 enum machine_mode mode;
628 {
629 if (! register_operand (op, mode))
630 return 0;
631 if (GET_CODE (op) == SUBREG)
632 op = SUBREG_REG (op);
633 if (GET_CODE (op) == REG)
634 {
635 unsigned int regno = REGNO (op);
636 if (regno < FIRST_PSEUDO_REGISTER)
637 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
638 }
639 return 1;
640 }
641
642 /* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */
643
644 int
645 gr_nonimmediate_operand (op, mode)
646 rtx op;
647 enum machine_mode mode;
648 {
649 if (! nonimmediate_operand (op, mode))
650 return 0;
651 if (GET_CODE (op) == SUBREG)
652 op = SUBREG_REG (op);
653 if (GET_CODE (op) == REG)
654 {
655 unsigned int regno = REGNO (op);
656 if (regno < FIRST_PSEUDO_REGISTER)
657 return GENERAL_REGNO_P (regno);
658 }
659 return 1;
660 }
661
662 /* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
663
664 int
665 fr_nonimmediate_operand (op, mode)
666 rtx op;
667 enum machine_mode mode;
668 {
669 if (! nonimmediate_operand (op, mode))
670 return 0;
671 if (GET_CODE (op) == SUBREG)
672 op = SUBREG_REG (op);
673 if (GET_CODE (op) == REG)
674 {
675 unsigned int regno = REGNO (op);
676 if (regno < FIRST_PSEUDO_REGISTER)
677 return FR_REGNO_P (regno);
678 }
679 return 1;
680 }
681
682 /* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
683
684 int
685 grfr_nonimmediate_operand (op, mode)
686 rtx op;
687 enum machine_mode mode;
688 {
689 if (! nonimmediate_operand (op, mode))
690 return 0;
691 if (GET_CODE (op) == SUBREG)
692 op = SUBREG_REG (op);
693 if (GET_CODE (op) == REG)
694 {
695 unsigned int regno = REGNO (op);
696 if (regno < FIRST_PSEUDO_REGISTER)
697 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
698 }
699 return 1;
700 }
701
702 /* Return 1 if OP is a GR register operand, or zero. */
703
704 int
705 gr_reg_or_0_operand (op, mode)
706 rtx op;
707 enum machine_mode mode;
708 {
709 return (op == const0_rtx || gr_register_operand (op, mode));
710 }
711
712 /* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */
713
714 int
715 gr_reg_or_5bit_operand (op, mode)
716 rtx op;
717 enum machine_mode mode;
718 {
719 return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32)
720 || GET_CODE (op) == CONSTANT_P_RTX
721 || gr_register_operand (op, mode));
722 }
723
724 /* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */
725
726 int
727 gr_reg_or_6bit_operand (op, mode)
728 rtx op;
729 enum machine_mode mode;
730 {
731 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
732 || GET_CODE (op) == CONSTANT_P_RTX
733 || gr_register_operand (op, mode));
734 }
735
736 /* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */
737
738 int
739 gr_reg_or_8bit_operand (op, mode)
740 rtx op;
741 enum machine_mode mode;
742 {
743 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
744 || GET_CODE (op) == CONSTANT_P_RTX
745 || gr_register_operand (op, mode));
746 }
747
748 /* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */
749
750 int
751 grfr_reg_or_8bit_operand (op, mode)
752 rtx op;
753 enum machine_mode mode;
754 {
755 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
756 || GET_CODE (op) == CONSTANT_P_RTX
757 || grfr_register_operand (op, mode));
758 }
759
760 /* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
761 operand. */
762
763 int
764 gr_reg_or_8bit_adjusted_operand (op, mode)
765 rtx op;
766 enum machine_mode mode;
767 {
768 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
769 || GET_CODE (op) == CONSTANT_P_RTX
770 || gr_register_operand (op, mode));
771 }
772
773 /* Return 1 if OP is a register operand, or is valid for both an 8 bit
774 immediate and an 8 bit adjusted immediate operand. This is necessary
775 because when we emit a compare, we don't know what the condition will be,
776 so we need the union of the immediates accepted by GT and LT. */
777
778 int
779 gr_reg_or_8bit_and_adjusted_operand (op, mode)
780 rtx op;
781 enum machine_mode mode;
782 {
783 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
784 && CONST_OK_FOR_L (INTVAL (op)))
785 || GET_CODE (op) == CONSTANT_P_RTX
786 || gr_register_operand (op, mode));
787 }
788
789 /* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
790
791 int
792 gr_reg_or_14bit_operand (op, mode)
793 rtx op;
794 enum machine_mode mode;
795 {
796 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
797 || GET_CODE (op) == CONSTANT_P_RTX
798 || gr_register_operand (op, mode));
799 }
800
801 /* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
802
803 int
804 gr_reg_or_22bit_operand (op, mode)
805 rtx op;
806 enum machine_mode mode;
807 {
808 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
809 || GET_CODE (op) == CONSTANT_P_RTX
810 || gr_register_operand (op, mode));
811 }
812
813 /* Return 1 if OP is a 6 bit immediate operand. */
814
815 int
816 shift_count_operand (op, mode)
817 rtx op;
818 enum machine_mode mode ATTRIBUTE_UNUSED;
819 {
820 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
821 || GET_CODE (op) == CONSTANT_P_RTX);
822 }
823
824 /* Return 1 if OP is a 5 bit immediate operand. */
825
826 int
827 shift_32bit_count_operand (op, mode)
828 rtx op;
829 enum machine_mode mode ATTRIBUTE_UNUSED;
830 {
831 return ((GET_CODE (op) == CONST_INT
832 && (INTVAL (op) >= 0 && INTVAL (op) < 32))
833 || GET_CODE (op) == CONSTANT_P_RTX);
834 }
835
836 /* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
837
838 int
839 shladd_operand (op, mode)
840 rtx op;
841 enum machine_mode mode ATTRIBUTE_UNUSED;
842 {
843 return (GET_CODE (op) == CONST_INT
844 && (INTVAL (op) == 2 || INTVAL (op) == 4
845 || INTVAL (op) == 8 || INTVAL (op) == 16));
846 }
847
848 /* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
849
850 int
851 fetchadd_operand (op, mode)
852 rtx op;
853 enum machine_mode mode ATTRIBUTE_UNUSED;
854 {
855 return (GET_CODE (op) == CONST_INT
856 && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
857 INTVAL (op) == -4 || INTVAL (op) == -1 ||
858 INTVAL (op) == 1 || INTVAL (op) == 4 ||
859 INTVAL (op) == 8 || INTVAL (op) == 16));
860 }
861
862 /* Return 1 if OP is a floating-point constant zero, one, or a register. */
863
864 int
865 fr_reg_or_fp01_operand (op, mode)
866 rtx op;
867 enum machine_mode mode;
868 {
869 return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
870 || fr_register_operand (op, mode));
871 }
872
873 /* Like nonimmediate_operand, but don't allow MEMs that try to use a
874 POST_MODIFY with a REG as displacement. */
875
876 int
877 destination_operand (op, mode)
878 rtx op;
879 enum machine_mode mode;
880 {
881 if (! nonimmediate_operand (op, mode))
882 return 0;
883 if (GET_CODE (op) == MEM
884 && GET_CODE (XEXP (op, 0)) == POST_MODIFY
885 && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG)
886 return 0;
887 return 1;
888 }
889
890 /* Like memory_operand, but don't allow post-increments. */
891
892 int
893 not_postinc_memory_operand (op, mode)
894 rtx op;
895 enum machine_mode mode;
896 {
897 return (memory_operand (op, mode)
898 && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != 'a');
899 }
900
901 /* Return 1 if this is a comparison operator, which accepts a normal 8-bit
902 signed immediate operand. */
903
904 int
905 normal_comparison_operator (op, mode)
906 register rtx op;
907 enum machine_mode mode;
908 {
909 enum rtx_code code = GET_CODE (op);
910 return ((mode == VOIDmode || GET_MODE (op) == mode)
911 && (code == EQ || code == NE
912 || code == GT || code == LE || code == GTU || code == LEU));
913 }
914
915 /* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
916 signed immediate operand. */
917
918 int
919 adjusted_comparison_operator (op, mode)
920 register rtx op;
921 enum machine_mode mode;
922 {
923 enum rtx_code code = GET_CODE (op);
924 return ((mode == VOIDmode || GET_MODE (op) == mode)
925 && (code == LT || code == GE || code == LTU || code == GEU));
926 }
927
928 /* Return 1 if this is a signed inequality operator. */
929
930 int
931 signed_inequality_operator (op, mode)
932 register rtx op;
933 enum machine_mode mode;
934 {
935 enum rtx_code code = GET_CODE (op);
936 return ((mode == VOIDmode || GET_MODE (op) == mode)
937 && (code == GE || code == GT
938 || code == LE || code == LT));
939 }
940
941 /* Return 1 if this operator is valid for predication. */
942
943 int
944 predicate_operator (op, mode)
945 register rtx op;
946 enum machine_mode mode;
947 {
948 enum rtx_code code = GET_CODE (op);
949 return ((GET_MODE (op) == mode || mode == VOIDmode)
950 && (code == EQ || code == NE));
951 }
952
953 /* Return 1 if this operator can be used in a conditional operation. */
954
955 int
956 condop_operator (op, mode)
957 register rtx op;
958 enum machine_mode mode;
959 {
960 enum rtx_code code = GET_CODE (op);
961 return ((GET_MODE (op) == mode || mode == VOIDmode)
962 && (code == PLUS || code == MINUS || code == AND
963 || code == IOR || code == XOR));
964 }
965
966 /* Return 1 if this is the ar.lc register. */
967
968 int
969 ar_lc_reg_operand (op, mode)
970 register rtx op;
971 enum machine_mode mode;
972 {
973 return (GET_MODE (op) == DImode
974 && (mode == DImode || mode == VOIDmode)
975 && GET_CODE (op) == REG
976 && REGNO (op) == AR_LC_REGNUM);
977 }
978
979 /* Return 1 if this is the ar.ccv register. */
980
981 int
982 ar_ccv_reg_operand (op, mode)
983 register rtx op;
984 enum machine_mode mode;
985 {
986 return ((GET_MODE (op) == mode || mode == VOIDmode)
987 && GET_CODE (op) == REG
988 && REGNO (op) == AR_CCV_REGNUM);
989 }
990
991 /* Return 1 if this is the ar.pfs register. */
992
993 int
994 ar_pfs_reg_operand (op, mode)
995 register rtx op;
996 enum machine_mode mode;
997 {
998 return ((GET_MODE (op) == mode || mode == VOIDmode)
999 && GET_CODE (op) == REG
1000 && REGNO (op) == AR_PFS_REGNUM);
1001 }
1002
1003 /* Like general_operand, but don't allow (mem (addressof)). */
1004
1005 int
1006 general_tfmode_operand (op, mode)
1007 rtx op;
1008 enum machine_mode mode;
1009 {
1010 if (! general_operand (op, mode))
1011 return 0;
1012 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
1013 return 0;
1014 return 1;
1015 }
1016
1017 /* Similarly. */
1018
1019 int
1020 destination_tfmode_operand (op, mode)
1021 rtx op;
1022 enum machine_mode mode;
1023 {
1024 if (! destination_operand (op, mode))
1025 return 0;
1026 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
1027 return 0;
1028 return 1;
1029 }
1030
1031 /* Similarly. */
1032
1033 int
1034 tfreg_or_fp01_operand (op, mode)
1035 rtx op;
1036 enum machine_mode mode;
1037 {
1038 if (GET_CODE (op) == SUBREG)
1039 return 0;
1040 return fr_reg_or_fp01_operand (op, mode);
1041 }
1042
1043 /* Return 1 if OP is valid as a base register in a reg + offset address. */
1044
1045 int
1046 basereg_operand (op, mode)
1047 rtx op;
1048 enum machine_mode mode;
1049 {
1050 /* ??? Should I copy the flag_omit_frame_pointer and cse_not_expected
1051 checks from pa.c basereg_operand as well? Seems to be OK without them
1052 in test runs. */
1053
1054 return (register_operand (op, mode) &&
1055 REG_POINTER ((GET_CODE (op) == SUBREG) ? SUBREG_REG (op) : op));
1056 }
1057 \f
1058 typedef enum
1059 {
1060 ADDR_AREA_NORMAL, /* normal address area */
1061 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
1062 }
1063 ia64_addr_area;
1064
1065 static GTY(()) tree small_ident1;
1066 static GTY(()) tree small_ident2;
1067
1068 static void
1069 init_idents (void)
1070 {
1071 if (small_ident1 == 0)
1072 {
1073 small_ident1 = get_identifier ("small");
1074 small_ident2 = get_identifier ("__small__");
1075 }
1076 }
1077
1078 /* Retrieve the address area that has been chosen for the given decl. */
1079
1080 static ia64_addr_area
1081 ia64_get_addr_area (tree decl)
1082 {
1083 tree model_attr;
1084
1085 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
1086 if (model_attr)
1087 {
1088 tree id;
1089
1090 init_idents ();
1091 id = TREE_VALUE (TREE_VALUE (model_attr));
1092 if (id == small_ident1 || id == small_ident2)
1093 return ADDR_AREA_SMALL;
1094 }
1095 return ADDR_AREA_NORMAL;
1096 }
1097
1098 static tree
1099 ia64_handle_model_attribute (tree *node, tree name,
1100 tree args,
1101 int flags ATTRIBUTE_UNUSED,
1102 bool *no_add_attrs)
1103 {
1104 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
1105 ia64_addr_area area;
1106 tree arg, decl = *node;
1107
1108 init_idents ();
1109 arg = TREE_VALUE (args);
1110 if (arg == small_ident1 || arg == small_ident2)
1111 {
1112 addr_area = ADDR_AREA_SMALL;
1113 }
1114 else
1115 {
1116 warning ("invalid argument of `%s' attribute",
1117 IDENTIFIER_POINTER (name));
1118 *no_add_attrs = true;
1119 }
1120
1121 switch (TREE_CODE (decl))
1122 {
1123 case VAR_DECL:
1124 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
1125 == FUNCTION_DECL)
1126 && !TREE_STATIC (decl))
1127 {
1128 error ("%Ha an address area attribute cannot be specified for "
1129 "local variables", &DECL_SOURCE_LOCATION (decl), decl);
1130 *no_add_attrs = true;
1131 }
1132 area = ia64_get_addr_area (decl);
1133 if (area != ADDR_AREA_NORMAL && addr_area != area)
1134 {
1135 error ("%Ha address area of '%s' conflicts with previous "
1136 "declaration", &DECL_SOURCE_LOCATION (decl), decl);
1137 *no_add_attrs = true;
1138 }
1139 break;
1140
1141 case FUNCTION_DECL:
1142 error ("%Ha address area attribute cannot be specified for functions",
1143 &DECL_SOURCE_LOCATION (decl), decl);
1144 *no_add_attrs = true;
1145 break;
1146
1147 default:
1148 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1149 *no_add_attrs = true;
1150 break;
1151 }
1152
1153 return NULL_TREE;
1154 }
1155
1156 static void
1157 ia64_encode_addr_area (tree decl, rtx symbol)
1158 {
1159 int flags;
1160
1161 flags = SYMBOL_REF_FLAGS (symbol);
1162 switch (ia64_get_addr_area (decl))
1163 {
1164 case ADDR_AREA_NORMAL: break;
1165 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
1166 default: abort ();
1167 }
1168 SYMBOL_REF_FLAGS (symbol) = flags;
1169 }
1170
1171 static void
1172 ia64_encode_section_info (tree decl, rtx rtl, int first)
1173 {
1174 default_encode_section_info (decl, rtl, first);
1175
1176 if (TREE_CODE (decl) == VAR_DECL
1177 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
1178 ia64_encode_addr_area (decl, XEXP (rtl, 0));
1179 }
1180 \f
1181 /* Return 1 if the operands of a move are ok. */
1182
1183 int
1184 ia64_move_ok (dst, src)
1185 rtx dst, src;
1186 {
1187 /* If we're under init_recog_no_volatile, we'll not be able to use
1188 memory_operand. So check the code directly and don't worry about
1189 the validity of the underlying address, which should have been
1190 checked elsewhere anyway. */
1191 if (GET_CODE (dst) != MEM)
1192 return 1;
1193 if (GET_CODE (src) == MEM)
1194 return 0;
1195 if (register_operand (src, VOIDmode))
1196 return 1;
1197
1198 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
1199 if (INTEGRAL_MODE_P (GET_MODE (dst)))
1200 return src == const0_rtx;
1201 else
1202 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
1203 }
1204
1205 /* Return 0 if we are doing C++ code. This optimization fails with
1206 C++ because of GNAT c++/6685. */
1207
1208 int
1209 addp4_optimize_ok (op1, op2)
1210 rtx op1, op2;
1211 {
1212
1213 if (!strcmp (lang_hooks.name, "GNU C++"))
1214 return 0;
1215
1216 return (basereg_operand (op1, GET_MODE(op1)) !=
1217 basereg_operand (op2, GET_MODE(op2)));
1218 }
1219
1220 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
1221 Return the length of the field, or <= 0 on failure. */
1222
1223 int
1224 ia64_depz_field_mask (rop, rshift)
1225 rtx rop, rshift;
1226 {
1227 unsigned HOST_WIDE_INT op = INTVAL (rop);
1228 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
1229
1230 /* Get rid of the zero bits we're shifting in. */
1231 op >>= shift;
1232
1233 /* We must now have a solid block of 1's at bit 0. */
1234 return exact_log2 (op + 1);
1235 }
1236
1237 /* Expand a symbolic constant load. */
1238
1239 void
1240 ia64_expand_load_address (dest, src)
1241 rtx dest, src;
1242 {
1243 if (tls_symbolic_operand (src, VOIDmode))
1244 abort ();
1245 if (GET_CODE (dest) != REG)
1246 abort ();
1247
1248 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
1249 having to pointer-extend the value afterward. Other forms of address
1250 computation below are also more natural to compute as 64-bit quantities.
1251 If we've been given an SImode destination register, change it. */
1252 if (GET_MODE (dest) != Pmode)
1253 dest = gen_rtx_REG (Pmode, REGNO (dest));
1254
1255 if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_SMALL_ADDR_P (src))
1256 {
1257 emit_insn (gen_rtx_SET (VOIDmode, dest, src));
1258 return;
1259 }
1260 else if (TARGET_AUTO_PIC)
1261 {
1262 emit_insn (gen_load_gprel64 (dest, src));
1263 return;
1264 }
1265 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
1266 {
1267 emit_insn (gen_load_fptr (dest, src));
1268 return;
1269 }
1270 else if (sdata_symbolic_operand (src, VOIDmode))
1271 {
1272 emit_insn (gen_load_gprel (dest, src));
1273 return;
1274 }
1275
1276 if (GET_CODE (src) == CONST
1277 && GET_CODE (XEXP (src, 0)) == PLUS
1278 && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
1279 && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0)
1280 {
1281 rtx sym = XEXP (XEXP (src, 0), 0);
1282 HOST_WIDE_INT ofs, hi, lo;
1283
1284 /* Split the offset into a sign extended 14-bit low part
1285 and a complementary high part. */
1286 ofs = INTVAL (XEXP (XEXP (src, 0), 1));
1287 lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
1288 hi = ofs - lo;
1289
1290 ia64_expand_load_address (dest, plus_constant (sym, hi));
1291 emit_insn (gen_adddi3 (dest, dest, GEN_INT (lo)));
1292 }
1293 else
1294 {
1295 rtx tmp;
1296
1297 tmp = gen_rtx_HIGH (Pmode, src);
1298 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
1299 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1300
1301 tmp = gen_rtx_LO_SUM (GET_MODE (dest), dest, src);
1302 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1303 }
1304 }
1305
1306 static GTY(()) rtx gen_tls_tga;
1307 static rtx
1308 gen_tls_get_addr ()
1309 {
1310 if (!gen_tls_tga)
1311 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1312 return gen_tls_tga;
1313 }
1314
1315 static GTY(()) rtx thread_pointer_rtx;
1316 static rtx
1317 gen_thread_pointer ()
1318 {
1319 if (!thread_pointer_rtx)
1320 {
1321 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
1322 RTX_UNCHANGING_P (thread_pointer_rtx) = 1;
1323 }
1324 return thread_pointer_rtx;
1325 }
1326
1327 static rtx
1328 ia64_expand_tls_address (tls_kind, op0, op1)
1329 enum tls_model tls_kind;
1330 rtx op0, op1;
1331 {
1332 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
1333
1334 switch (tls_kind)
1335 {
1336 case TLS_MODEL_GLOBAL_DYNAMIC:
1337 start_sequence ();
1338
1339 tga_op1 = gen_reg_rtx (Pmode);
1340 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
1341 tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
1342 RTX_UNCHANGING_P (tga_op1) = 1;
1343
1344 tga_op2 = gen_reg_rtx (Pmode);
1345 emit_insn (gen_load_ltoff_dtprel (tga_op2, op1));
1346 tga_op2 = gen_rtx_MEM (Pmode, tga_op2);
1347 RTX_UNCHANGING_P (tga_op2) = 1;
1348
1349 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1350 LCT_CONST, Pmode, 2, tga_op1,
1351 Pmode, tga_op2, Pmode);
1352
1353 insns = get_insns ();
1354 end_sequence ();
1355
1356 emit_libcall_block (insns, op0, tga_ret, op1);
1357 return NULL_RTX;
1358
1359 case TLS_MODEL_LOCAL_DYNAMIC:
1360 /* ??? This isn't the completely proper way to do local-dynamic
1361 If the call to __tls_get_addr is used only by a single symbol,
1362 then we should (somehow) move the dtprel to the second arg
1363 to avoid the extra add. */
1364 start_sequence ();
1365
1366 tga_op1 = gen_reg_rtx (Pmode);
1367 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
1368 tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
1369 RTX_UNCHANGING_P (tga_op1) = 1;
1370
1371 tga_op2 = const0_rtx;
1372
1373 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1374 LCT_CONST, Pmode, 2, tga_op1,
1375 Pmode, tga_op2, Pmode);
1376
1377 insns = get_insns ();
1378 end_sequence ();
1379
1380 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1381 UNSPEC_LD_BASE);
1382 tmp = gen_reg_rtx (Pmode);
1383 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1384
1385 if (register_operand (op0, Pmode))
1386 tga_ret = op0;
1387 else
1388 tga_ret = gen_reg_rtx (Pmode);
1389 if (TARGET_TLS64)
1390 {
1391 emit_insn (gen_load_dtprel (tga_ret, op1));
1392 emit_insn (gen_adddi3 (tga_ret, tmp, tga_ret));
1393 }
1394 else
1395 emit_insn (gen_add_dtprel (tga_ret, tmp, op1));
1396
1397 return (tga_ret == op0 ? NULL_RTX : tga_ret);
1398
1399 case TLS_MODEL_INITIAL_EXEC:
1400 tmp = gen_reg_rtx (Pmode);
1401 emit_insn (gen_load_ltoff_tprel (tmp, op1));
1402 tmp = gen_rtx_MEM (Pmode, tmp);
1403 RTX_UNCHANGING_P (tmp) = 1;
1404 tmp = force_reg (Pmode, tmp);
1405
1406 if (register_operand (op0, Pmode))
1407 op1 = op0;
1408 else
1409 op1 = gen_reg_rtx (Pmode);
1410 emit_insn (gen_adddi3 (op1, tmp, gen_thread_pointer ()));
1411
1412 return (op1 == op0 ? NULL_RTX : op1);
1413
1414 case TLS_MODEL_LOCAL_EXEC:
1415 if (register_operand (op0, Pmode))
1416 tmp = op0;
1417 else
1418 tmp = gen_reg_rtx (Pmode);
1419 if (TARGET_TLS64)
1420 {
1421 emit_insn (gen_load_tprel (tmp, op1));
1422 emit_insn (gen_adddi3 (tmp, gen_thread_pointer (), tmp));
1423 }
1424 else
1425 emit_insn (gen_add_tprel (tmp, gen_thread_pointer (), op1));
1426
1427 return (tmp == op0 ? NULL_RTX : tmp);
1428
1429 default:
1430 abort ();
1431 }
1432 }
1433
1434 rtx
1435 ia64_expand_move (op0, op1)
1436 rtx op0, op1;
1437 {
1438 enum machine_mode mode = GET_MODE (op0);
1439
1440 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1441 op1 = force_reg (mode, op1);
1442
1443 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
1444 {
1445 enum tls_model tls_kind;
1446 if ((tls_kind = tls_symbolic_operand (op1, VOIDmode)))
1447 return ia64_expand_tls_address (tls_kind, op0, op1);
1448
1449 if (!TARGET_NO_PIC && reload_completed)
1450 {
1451 ia64_expand_load_address (op0, op1);
1452 return NULL_RTX;
1453 }
1454 }
1455
1456 return op1;
1457 }
1458
1459 /* Split a move from OP1 to OP0 conditional on COND. */
1460
1461 void
1462 ia64_emit_cond_move (op0, op1, cond)
1463 rtx op0, op1, cond;
1464 {
1465 rtx insn, first = get_last_insn ();
1466
1467 emit_move_insn (op0, op1);
1468
1469 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1470 if (INSN_P (insn))
1471 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1472 PATTERN (insn));
1473 }
1474
1475 /* Split a post-reload TImode reference into two DImode components. */
1476
1477 rtx
1478 ia64_split_timode (out, in, scratch)
1479 rtx out[2];
1480 rtx in, scratch;
1481 {
1482 switch (GET_CODE (in))
1483 {
1484 case REG:
1485 out[0] = gen_rtx_REG (DImode, REGNO (in));
1486 out[1] = gen_rtx_REG (DImode, REGNO (in) + 1);
1487 return NULL_RTX;
1488
1489 case MEM:
1490 {
1491 rtx base = XEXP (in, 0);
1492
1493 switch (GET_CODE (base))
1494 {
1495 case REG:
1496 out[0] = adjust_address (in, DImode, 0);
1497 break;
1498 case POST_MODIFY:
1499 base = XEXP (base, 0);
1500 out[0] = adjust_address (in, DImode, 0);
1501 break;
1502
1503 /* Since we're changing the mode, we need to change to POST_MODIFY
1504 as well to preserve the size of the increment. Either that or
1505 do the update in two steps, but we've already got this scratch
1506 register handy so let's use it. */
1507 case POST_INC:
1508 base = XEXP (base, 0);
1509 out[0]
1510 = change_address (in, DImode,
1511 gen_rtx_POST_MODIFY
1512 (Pmode, base, plus_constant (base, 16)));
1513 break;
1514 case POST_DEC:
1515 base = XEXP (base, 0);
1516 out[0]
1517 = change_address (in, DImode,
1518 gen_rtx_POST_MODIFY
1519 (Pmode, base, plus_constant (base, -16)));
1520 break;
1521 default:
1522 abort ();
1523 }
1524
1525 if (scratch == NULL_RTX)
1526 abort ();
1527 out[1] = change_address (in, DImode, scratch);
1528 return gen_adddi3 (scratch, base, GEN_INT (8));
1529 }
1530
1531 case CONST_INT:
1532 case CONST_DOUBLE:
1533 split_double (in, &out[0], &out[1]);
1534 return NULL_RTX;
1535
1536 default:
1537 abort ();
1538 }
1539 }
1540
1541 /* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go
1542 through memory plus an extra GR scratch register. Except that you can
1543 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1544 SECONDARY_RELOAD_CLASS, but not both.
1545
1546 We got into problems in the first place by allowing a construct like
1547 (subreg:TF (reg:TI)), which we got from a union containing a long double.
1548 This solution attempts to prevent this situation from occurring. When
1549 we see something like the above, we spill the inner register to memory. */
1550
1551 rtx
1552 spill_tfmode_operand (in, force)
1553 rtx in;
1554 int force;
1555 {
1556 if (GET_CODE (in) == SUBREG
1557 && GET_MODE (SUBREG_REG (in)) == TImode
1558 && GET_CODE (SUBREG_REG (in)) == REG)
1559 {
1560 rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE, /*rescan=*/true);
1561 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1562 }
1563 else if (force && GET_CODE (in) == REG)
1564 {
1565 rtx mem = gen_mem_addressof (in, NULL_TREE, /*rescan=*/true);
1566 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1567 }
1568 else if (GET_CODE (in) == MEM
1569 && GET_CODE (XEXP (in, 0)) == ADDRESSOF)
1570 return change_address (in, TFmode, copy_to_reg (XEXP (in, 0)));
1571 else
1572 return in;
1573 }
1574
1575 /* Emit comparison instruction if necessary, returning the expression
1576 that holds the compare result in the proper mode. */
1577
1578 rtx
1579 ia64_expand_compare (code, mode)
1580 enum rtx_code code;
1581 enum machine_mode mode;
1582 {
1583 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1584 rtx cmp;
1585
1586 /* If we have a BImode input, then we already have a compare result, and
1587 do not need to emit another comparison. */
1588 if (GET_MODE (op0) == BImode)
1589 {
1590 if ((code == NE || code == EQ) && op1 == const0_rtx)
1591 cmp = op0;
1592 else
1593 abort ();
1594 }
1595 else
1596 {
1597 cmp = gen_reg_rtx (BImode);
1598 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1599 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1600 code = NE;
1601 }
1602
1603 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1604 }
1605
1606 /* Emit the appropriate sequence for a call. */
1607
1608 void
1609 ia64_expand_call (retval, addr, nextarg, sibcall_p)
1610 rtx retval;
1611 rtx addr;
1612 rtx nextarg ATTRIBUTE_UNUSED;
1613 int sibcall_p;
1614 {
1615 rtx insn, b0;
1616
1617 addr = XEXP (addr, 0);
1618 b0 = gen_rtx_REG (DImode, R_BR (0));
1619
1620 /* ??? Should do this for functions known to bind local too. */
1621 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1622 {
1623 if (sibcall_p)
1624 insn = gen_sibcall_nogp (addr);
1625 else if (! retval)
1626 insn = gen_call_nogp (addr, b0);
1627 else
1628 insn = gen_call_value_nogp (retval, addr, b0);
1629 insn = emit_call_insn (insn);
1630 }
1631 else
1632 {
1633 if (sibcall_p)
1634 insn = gen_sibcall_gp (addr);
1635 else if (! retval)
1636 insn = gen_call_gp (addr, b0);
1637 else
1638 insn = gen_call_value_gp (retval, addr, b0);
1639 insn = emit_call_insn (insn);
1640
1641 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
1642 }
1643
1644 if (sibcall_p)
1645 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
1646 }
1647
1648 void
1649 ia64_reload_gp ()
1650 {
1651 rtx tmp;
1652
1653 if (current_frame_info.reg_save_gp)
1654 tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp);
1655 else
1656 {
1657 HOST_WIDE_INT offset;
1658
1659 offset = (current_frame_info.spill_cfa_off
1660 + current_frame_info.spill_size);
1661 if (frame_pointer_needed)
1662 {
1663 tmp = hard_frame_pointer_rtx;
1664 offset = -offset;
1665 }
1666 else
1667 {
1668 tmp = stack_pointer_rtx;
1669 offset = current_frame_info.total_size - offset;
1670 }
1671
1672 if (CONST_OK_FOR_I (offset))
1673 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1674 tmp, GEN_INT (offset)));
1675 else
1676 {
1677 emit_move_insn (pic_offset_table_rtx, GEN_INT (offset));
1678 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1679 pic_offset_table_rtx, tmp));
1680 }
1681
1682 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
1683 }
1684
1685 emit_move_insn (pic_offset_table_rtx, tmp);
1686 }
1687
1688 void
1689 ia64_split_call (retval, addr, retaddr, scratch_r, scratch_b,
1690 noreturn_p, sibcall_p)
1691 rtx retval, addr, retaddr, scratch_r, scratch_b;
1692 int noreturn_p, sibcall_p;
1693 {
1694 rtx insn;
1695 bool is_desc = false;
1696
1697 /* If we find we're calling through a register, then we're actually
1698 calling through a descriptor, so load up the values. */
1699 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
1700 {
1701 rtx tmp;
1702 bool addr_dead_p;
1703
1704 /* ??? We are currently constrained to *not* use peep2, because
1705 we can legitimately change the global lifetime of the GP
1706 (in the form of killing where previously live). This is
1707 because a call through a descriptor doesn't use the previous
1708 value of the GP, while a direct call does, and we do not
1709 commit to either form until the split here.
1710
1711 That said, this means that we lack precise life info for
1712 whether ADDR is dead after this call. This is not terribly
1713 important, since we can fix things up essentially for free
1714 with the POST_DEC below, but it's nice to not use it when we
1715 can immediately tell it's not necessary. */
1716 addr_dead_p = ((noreturn_p || sibcall_p
1717 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
1718 REGNO (addr)))
1719 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
1720
1721 /* Load the code address into scratch_b. */
1722 tmp = gen_rtx_POST_INC (Pmode, addr);
1723 tmp = gen_rtx_MEM (Pmode, tmp);
1724 emit_move_insn (scratch_r, tmp);
1725 emit_move_insn (scratch_b, scratch_r);
1726
1727 /* Load the GP address. If ADDR is not dead here, then we must
1728 revert the change made above via the POST_INCREMENT. */
1729 if (!addr_dead_p)
1730 tmp = gen_rtx_POST_DEC (Pmode, addr);
1731 else
1732 tmp = addr;
1733 tmp = gen_rtx_MEM (Pmode, tmp);
1734 emit_move_insn (pic_offset_table_rtx, tmp);
1735
1736 is_desc = true;
1737 addr = scratch_b;
1738 }
1739
1740 if (sibcall_p)
1741 insn = gen_sibcall_nogp (addr);
1742 else if (retval)
1743 insn = gen_call_value_nogp (retval, addr, retaddr);
1744 else
1745 insn = gen_call_nogp (addr, retaddr);
1746 emit_call_insn (insn);
1747
1748 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
1749 ia64_reload_gp ();
1750 }
1751 \f
1752 /* Begin the assembly file. */
1753
1754 static void
1755 ia64_file_start ()
1756 {
1757 default_file_start ();
1758 emit_safe_across_calls ();
1759 }
1760
1761 void
1762 emit_safe_across_calls ()
1763 {
1764 unsigned int rs, re;
1765 int out_state;
1766
1767 rs = 1;
1768 out_state = 0;
1769 while (1)
1770 {
1771 while (rs < 64 && call_used_regs[PR_REG (rs)])
1772 rs++;
1773 if (rs >= 64)
1774 break;
1775 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1776 continue;
1777 if (out_state == 0)
1778 {
1779 fputs ("\t.pred.safe_across_calls ", asm_out_file);
1780 out_state = 1;
1781 }
1782 else
1783 fputc (',', asm_out_file);
1784 if (re == rs + 1)
1785 fprintf (asm_out_file, "p%u", rs);
1786 else
1787 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
1788 rs = re + 1;
1789 }
1790 if (out_state)
1791 fputc ('\n', asm_out_file);
1792 }
1793
1794 /* Helper function for ia64_compute_frame_size: find an appropriate general
1795 register to spill some special register to. SPECIAL_SPILL_MASK contains
1796 bits in GR0 to GR31 that have already been allocated by this routine.
1797 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1798
1799 static int
1800 find_gr_spill (try_locals)
1801 int try_locals;
1802 {
1803 int regno;
1804
1805 /* If this is a leaf function, first try an otherwise unused
1806 call-clobbered register. */
1807 if (current_function_is_leaf)
1808 {
1809 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1810 if (! regs_ever_live[regno]
1811 && call_used_regs[regno]
1812 && ! fixed_regs[regno]
1813 && ! global_regs[regno]
1814 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1815 {
1816 current_frame_info.gr_used_mask |= 1 << regno;
1817 return regno;
1818 }
1819 }
1820
1821 if (try_locals)
1822 {
1823 regno = current_frame_info.n_local_regs;
1824 /* If there is a frame pointer, then we can't use loc79, because
1825 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1826 reg_name switching code in ia64_expand_prologue. */
1827 if (regno < (80 - frame_pointer_needed))
1828 {
1829 current_frame_info.n_local_regs = regno + 1;
1830 return LOC_REG (0) + regno;
1831 }
1832 }
1833
1834 /* Failed to find a general register to spill to. Must use stack. */
1835 return 0;
1836 }
1837
1838 /* In order to make for nice schedules, we try to allocate every temporary
1839 to a different register. We must of course stay away from call-saved,
1840 fixed, and global registers. We must also stay away from registers
1841 allocated in current_frame_info.gr_used_mask, since those include regs
1842 used all through the prologue.
1843
1844 Any register allocated here must be used immediately. The idea is to
1845 aid scheduling, not to solve data flow problems. */
1846
1847 static int last_scratch_gr_reg;
1848
1849 static int
1850 next_scratch_gr_reg ()
1851 {
1852 int i, regno;
1853
1854 for (i = 0; i < 32; ++i)
1855 {
1856 regno = (last_scratch_gr_reg + i + 1) & 31;
1857 if (call_used_regs[regno]
1858 && ! fixed_regs[regno]
1859 && ! global_regs[regno]
1860 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1861 {
1862 last_scratch_gr_reg = regno;
1863 return regno;
1864 }
1865 }
1866
1867 /* There must be _something_ available. */
1868 abort ();
1869 }
1870
1871 /* Helper function for ia64_compute_frame_size, called through
1872 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1873
1874 static void
1875 mark_reg_gr_used_mask (reg, data)
1876 rtx reg;
1877 void *data ATTRIBUTE_UNUSED;
1878 {
1879 unsigned int regno = REGNO (reg);
1880 if (regno < 32)
1881 {
1882 unsigned int i, n = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1883 for (i = 0; i < n; ++i)
1884 current_frame_info.gr_used_mask |= 1 << (regno + i);
1885 }
1886 }
1887
1888 /* Returns the number of bytes offset between the frame pointer and the stack
1889 pointer for the current function. SIZE is the number of bytes of space
1890 needed for local variables. */
1891
1892 static void
1893 ia64_compute_frame_size (size)
1894 HOST_WIDE_INT size;
1895 {
1896 HOST_WIDE_INT total_size;
1897 HOST_WIDE_INT spill_size = 0;
1898 HOST_WIDE_INT extra_spill_size = 0;
1899 HOST_WIDE_INT pretend_args_size;
1900 HARD_REG_SET mask;
1901 int n_spilled = 0;
1902 int spilled_gr_p = 0;
1903 int spilled_fr_p = 0;
1904 unsigned int regno;
1905 int i;
1906
1907 if (current_frame_info.initialized)
1908 return;
1909
1910 memset (&current_frame_info, 0, sizeof current_frame_info);
1911 CLEAR_HARD_REG_SET (mask);
1912
1913 /* Don't allocate scratches to the return register. */
1914 diddle_return_value (mark_reg_gr_used_mask, NULL);
1915
1916 /* Don't allocate scratches to the EH scratch registers. */
1917 if (cfun->machine->ia64_eh_epilogue_sp)
1918 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
1919 if (cfun->machine->ia64_eh_epilogue_bsp)
1920 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
1921
1922 /* Find the size of the register stack frame. We have only 80 local
1923 registers, because we reserve 8 for the inputs and 8 for the
1924 outputs. */
1925
1926 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1927 since we'll be adjusting that down later. */
1928 regno = LOC_REG (78) + ! frame_pointer_needed;
1929 for (; regno >= LOC_REG (0); regno--)
1930 if (regs_ever_live[regno])
1931 break;
1932 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
1933
1934 /* For functions marked with the syscall_linkage attribute, we must mark
1935 all eight input registers as in use, so that locals aren't visible to
1936 the caller. */
1937
1938 if (cfun->machine->n_varargs > 0
1939 || lookup_attribute ("syscall_linkage",
1940 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
1941 current_frame_info.n_input_regs = 8;
1942 else
1943 {
1944 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
1945 if (regs_ever_live[regno])
1946 break;
1947 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
1948 }
1949
1950 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
1951 if (regs_ever_live[regno])
1952 break;
1953 i = regno - OUT_REG (0) + 1;
1954
1955 /* When -p profiling, we need one output register for the mcount argument.
1956 Likewise for -a profiling for the bb_init_func argument. For -ax
1957 profiling, we need two output registers for the two bb_init_trace_func
1958 arguments. */
1959 if (current_function_profile)
1960 i = MAX (i, 1);
1961 current_frame_info.n_output_regs = i;
1962
1963 /* ??? No rotating register support yet. */
1964 current_frame_info.n_rotate_regs = 0;
1965
1966 /* Discover which registers need spilling, and how much room that
1967 will take. Begin with floating point and general registers,
1968 which will always wind up on the stack. */
1969
1970 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
1971 if (regs_ever_live[regno] && ! call_used_regs[regno])
1972 {
1973 SET_HARD_REG_BIT (mask, regno);
1974 spill_size += 16;
1975 n_spilled += 1;
1976 spilled_fr_p = 1;
1977 }
1978
1979 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1980 if (regs_ever_live[regno] && ! call_used_regs[regno])
1981 {
1982 SET_HARD_REG_BIT (mask, regno);
1983 spill_size += 8;
1984 n_spilled += 1;
1985 spilled_gr_p = 1;
1986 }
1987
1988 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
1989 if (regs_ever_live[regno] && ! call_used_regs[regno])
1990 {
1991 SET_HARD_REG_BIT (mask, regno);
1992 spill_size += 8;
1993 n_spilled += 1;
1994 }
1995
1996 /* Now come all special registers that might get saved in other
1997 general registers. */
1998
1999 if (frame_pointer_needed)
2000 {
2001 current_frame_info.reg_fp = find_gr_spill (1);
2002 /* If we did not get a register, then we take LOC79. This is guaranteed
2003 to be free, even if regs_ever_live is already set, because this is
2004 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2005 as we don't count loc79 above. */
2006 if (current_frame_info.reg_fp == 0)
2007 {
2008 current_frame_info.reg_fp = LOC_REG (79);
2009 current_frame_info.n_local_regs++;
2010 }
2011 }
2012
2013 if (! current_function_is_leaf)
2014 {
2015 /* Emit a save of BR0 if we call other functions. Do this even
2016 if this function doesn't return, as EH depends on this to be
2017 able to unwind the stack. */
2018 SET_HARD_REG_BIT (mask, BR_REG (0));
2019
2020 current_frame_info.reg_save_b0 = find_gr_spill (1);
2021 if (current_frame_info.reg_save_b0 == 0)
2022 {
2023 spill_size += 8;
2024 n_spilled += 1;
2025 }
2026
2027 /* Similarly for ar.pfs. */
2028 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2029 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
2030 if (current_frame_info.reg_save_ar_pfs == 0)
2031 {
2032 extra_spill_size += 8;
2033 n_spilled += 1;
2034 }
2035
2036 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2037 registers are clobbered, so we fall back to the stack. */
2038 current_frame_info.reg_save_gp
2039 = (current_function_calls_setjmp ? 0 : find_gr_spill (1));
2040 if (current_frame_info.reg_save_gp == 0)
2041 {
2042 SET_HARD_REG_BIT (mask, GR_REG (1));
2043 spill_size += 8;
2044 n_spilled += 1;
2045 }
2046 }
2047 else
2048 {
2049 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
2050 {
2051 SET_HARD_REG_BIT (mask, BR_REG (0));
2052 spill_size += 8;
2053 n_spilled += 1;
2054 }
2055
2056 if (regs_ever_live[AR_PFS_REGNUM])
2057 {
2058 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2059 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
2060 if (current_frame_info.reg_save_ar_pfs == 0)
2061 {
2062 extra_spill_size += 8;
2063 n_spilled += 1;
2064 }
2065 }
2066 }
2067
2068 /* Unwind descriptor hackery: things are most efficient if we allocate
2069 consecutive GR save registers for RP, PFS, FP in that order. However,
2070 it is absolutely critical that FP get the only hard register that's
2071 guaranteed to be free, so we allocated it first. If all three did
2072 happen to be allocated hard regs, and are consecutive, rearrange them
2073 into the preferred order now. */
2074 if (current_frame_info.reg_fp != 0
2075 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
2076 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
2077 {
2078 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
2079 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
2080 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
2081 }
2082
2083 /* See if we need to store the predicate register block. */
2084 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2085 if (regs_ever_live[regno] && ! call_used_regs[regno])
2086 break;
2087 if (regno <= PR_REG (63))
2088 {
2089 SET_HARD_REG_BIT (mask, PR_REG (0));
2090 current_frame_info.reg_save_pr = find_gr_spill (1);
2091 if (current_frame_info.reg_save_pr == 0)
2092 {
2093 extra_spill_size += 8;
2094 n_spilled += 1;
2095 }
2096
2097 /* ??? Mark them all as used so that register renaming and such
2098 are free to use them. */
2099 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2100 regs_ever_live[regno] = 1;
2101 }
2102
2103 /* If we're forced to use st8.spill, we're forced to save and restore
2104 ar.unat as well. The check for existing liveness allows inline asm
2105 to touch ar.unat. */
2106 if (spilled_gr_p || cfun->machine->n_varargs
2107 || regs_ever_live[AR_UNAT_REGNUM])
2108 {
2109 regs_ever_live[AR_UNAT_REGNUM] = 1;
2110 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2111 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
2112 if (current_frame_info.reg_save_ar_unat == 0)
2113 {
2114 extra_spill_size += 8;
2115 n_spilled += 1;
2116 }
2117 }
2118
2119 if (regs_ever_live[AR_LC_REGNUM])
2120 {
2121 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2122 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
2123 if (current_frame_info.reg_save_ar_lc == 0)
2124 {
2125 extra_spill_size += 8;
2126 n_spilled += 1;
2127 }
2128 }
2129
2130 /* If we have an odd number of words of pretend arguments written to
2131 the stack, then the FR save area will be unaligned. We round the
2132 size of this area up to keep things 16 byte aligned. */
2133 if (spilled_fr_p)
2134 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
2135 else
2136 pretend_args_size = current_function_pretend_args_size;
2137
2138 total_size = (spill_size + extra_spill_size + size + pretend_args_size
2139 + current_function_outgoing_args_size);
2140 total_size = IA64_STACK_ALIGN (total_size);
2141
2142 /* We always use the 16-byte scratch area provided by the caller, but
2143 if we are a leaf function, there's no one to which we need to provide
2144 a scratch area. */
2145 if (current_function_is_leaf)
2146 total_size = MAX (0, total_size - 16);
2147
2148 current_frame_info.total_size = total_size;
2149 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2150 current_frame_info.spill_size = spill_size;
2151 current_frame_info.extra_spill_size = extra_spill_size;
2152 COPY_HARD_REG_SET (current_frame_info.mask, mask);
2153 current_frame_info.n_spilled = n_spilled;
2154 current_frame_info.initialized = reload_completed;
2155 }
2156
2157 /* Compute the initial difference between the specified pair of registers. */
2158
2159 HOST_WIDE_INT
2160 ia64_initial_elimination_offset (from, to)
2161 int from, to;
2162 {
2163 HOST_WIDE_INT offset;
2164
2165 ia64_compute_frame_size (get_frame_size ());
2166 switch (from)
2167 {
2168 case FRAME_POINTER_REGNUM:
2169 if (to == HARD_FRAME_POINTER_REGNUM)
2170 {
2171 if (current_function_is_leaf)
2172 offset = -current_frame_info.total_size;
2173 else
2174 offset = -(current_frame_info.total_size
2175 - current_function_outgoing_args_size - 16);
2176 }
2177 else if (to == STACK_POINTER_REGNUM)
2178 {
2179 if (current_function_is_leaf)
2180 offset = 0;
2181 else
2182 offset = 16 + current_function_outgoing_args_size;
2183 }
2184 else
2185 abort ();
2186 break;
2187
2188 case ARG_POINTER_REGNUM:
2189 /* Arguments start above the 16 byte save area, unless stdarg
2190 in which case we store through the 16 byte save area. */
2191 if (to == HARD_FRAME_POINTER_REGNUM)
2192 offset = 16 - current_function_pretend_args_size;
2193 else if (to == STACK_POINTER_REGNUM)
2194 offset = (current_frame_info.total_size
2195 + 16 - current_function_pretend_args_size);
2196 else
2197 abort ();
2198 break;
2199
2200 default:
2201 abort ();
2202 }
2203
2204 return offset;
2205 }
2206
2207 /* If there are more than a trivial number of register spills, we use
2208 two interleaved iterators so that we can get two memory references
2209 per insn group.
2210
2211 In order to simplify things in the prologue and epilogue expanders,
2212 we use helper functions to fix up the memory references after the
2213 fact with the appropriate offsets to a POST_MODIFY memory mode.
2214 The following data structure tracks the state of the two iterators
2215 while insns are being emitted. */
2216
2217 struct spill_fill_data
2218 {
2219 rtx init_after; /* point at which to emit initializations */
2220 rtx init_reg[2]; /* initial base register */
2221 rtx iter_reg[2]; /* the iterator registers */
2222 rtx *prev_addr[2]; /* address of last memory use */
2223 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
2224 HOST_WIDE_INT prev_off[2]; /* last offset */
2225 int n_iter; /* number of iterators in use */
2226 int next_iter; /* next iterator to use */
2227 unsigned int save_gr_used_mask;
2228 };
2229
2230 static struct spill_fill_data spill_fill_data;
2231
2232 static void
2233 setup_spill_pointers (n_spills, init_reg, cfa_off)
2234 int n_spills;
2235 rtx init_reg;
2236 HOST_WIDE_INT cfa_off;
2237 {
2238 int i;
2239
2240 spill_fill_data.init_after = get_last_insn ();
2241 spill_fill_data.init_reg[0] = init_reg;
2242 spill_fill_data.init_reg[1] = init_reg;
2243 spill_fill_data.prev_addr[0] = NULL;
2244 spill_fill_data.prev_addr[1] = NULL;
2245 spill_fill_data.prev_insn[0] = NULL;
2246 spill_fill_data.prev_insn[1] = NULL;
2247 spill_fill_data.prev_off[0] = cfa_off;
2248 spill_fill_data.prev_off[1] = cfa_off;
2249 spill_fill_data.next_iter = 0;
2250 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2251
2252 spill_fill_data.n_iter = 1 + (n_spills > 2);
2253 for (i = 0; i < spill_fill_data.n_iter; ++i)
2254 {
2255 int regno = next_scratch_gr_reg ();
2256 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2257 current_frame_info.gr_used_mask |= 1 << regno;
2258 }
2259 }
2260
2261 static void
2262 finish_spill_pointers ()
2263 {
2264 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2265 }
2266
2267 static rtx
2268 spill_restore_mem (reg, cfa_off)
2269 rtx reg;
2270 HOST_WIDE_INT cfa_off;
2271 {
2272 int iter = spill_fill_data.next_iter;
2273 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2274 rtx disp_rtx = GEN_INT (disp);
2275 rtx mem;
2276
2277 if (spill_fill_data.prev_addr[iter])
2278 {
2279 if (CONST_OK_FOR_N (disp))
2280 {
2281 *spill_fill_data.prev_addr[iter]
2282 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2283 gen_rtx_PLUS (DImode,
2284 spill_fill_data.iter_reg[iter],
2285 disp_rtx));
2286 REG_NOTES (spill_fill_data.prev_insn[iter])
2287 = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
2288 REG_NOTES (spill_fill_data.prev_insn[iter]));
2289 }
2290 else
2291 {
2292 /* ??? Could use register post_modify for loads. */
2293 if (! CONST_OK_FOR_I (disp))
2294 {
2295 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2296 emit_move_insn (tmp, disp_rtx);
2297 disp_rtx = tmp;
2298 }
2299 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2300 spill_fill_data.iter_reg[iter], disp_rtx));
2301 }
2302 }
2303 /* Micro-optimization: if we've created a frame pointer, it's at
2304 CFA 0, which may allow the real iterator to be initialized lower,
2305 slightly increasing parallelism. Also, if there are few saves
2306 it may eliminate the iterator entirely. */
2307 else if (disp == 0
2308 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2309 && frame_pointer_needed)
2310 {
2311 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
2312 set_mem_alias_set (mem, get_varargs_alias_set ());
2313 return mem;
2314 }
2315 else
2316 {
2317 rtx seq, insn;
2318
2319 if (disp == 0)
2320 seq = gen_movdi (spill_fill_data.iter_reg[iter],
2321 spill_fill_data.init_reg[iter]);
2322 else
2323 {
2324 start_sequence ();
2325
2326 if (! CONST_OK_FOR_I (disp))
2327 {
2328 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2329 emit_move_insn (tmp, disp_rtx);
2330 disp_rtx = tmp;
2331 }
2332
2333 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2334 spill_fill_data.init_reg[iter],
2335 disp_rtx));
2336
2337 seq = get_insns ();
2338 end_sequence ();
2339 }
2340
2341 /* Careful for being the first insn in a sequence. */
2342 if (spill_fill_data.init_after)
2343 insn = emit_insn_after (seq, spill_fill_data.init_after);
2344 else
2345 {
2346 rtx first = get_insns ();
2347 if (first)
2348 insn = emit_insn_before (seq, first);
2349 else
2350 insn = emit_insn (seq);
2351 }
2352 spill_fill_data.init_after = insn;
2353
2354 /* If DISP is 0, we may or may not have a further adjustment
2355 afterward. If we do, then the load/store insn may be modified
2356 to be a post-modify. If we don't, then this copy may be
2357 eliminated by copyprop_hardreg_forward, which makes this
2358 insn garbage, which runs afoul of the sanity check in
2359 propagate_one_insn. So mark this insn as legal to delete. */
2360 if (disp == 0)
2361 REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
2362 REG_NOTES (insn));
2363 }
2364
2365 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
2366
2367 /* ??? Not all of the spills are for varargs, but some of them are.
2368 The rest of the spills belong in an alias set of their own. But
2369 it doesn't actually hurt to include them here. */
2370 set_mem_alias_set (mem, get_varargs_alias_set ());
2371
2372 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
2373 spill_fill_data.prev_off[iter] = cfa_off;
2374
2375 if (++iter >= spill_fill_data.n_iter)
2376 iter = 0;
2377 spill_fill_data.next_iter = iter;
2378
2379 return mem;
2380 }
2381
2382 static void
2383 do_spill (move_fn, reg, cfa_off, frame_reg)
2384 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
2385 rtx reg, frame_reg;
2386 HOST_WIDE_INT cfa_off;
2387 {
2388 int iter = spill_fill_data.next_iter;
2389 rtx mem, insn;
2390
2391 mem = spill_restore_mem (reg, cfa_off);
2392 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
2393 spill_fill_data.prev_insn[iter] = insn;
2394
2395 if (frame_reg)
2396 {
2397 rtx base;
2398 HOST_WIDE_INT off;
2399
2400 RTX_FRAME_RELATED_P (insn) = 1;
2401
2402 /* Don't even pretend that the unwind code can intuit its way
2403 through a pair of interleaved post_modify iterators. Just
2404 provide the correct answer. */
2405
2406 if (frame_pointer_needed)
2407 {
2408 base = hard_frame_pointer_rtx;
2409 off = - cfa_off;
2410 }
2411 else
2412 {
2413 base = stack_pointer_rtx;
2414 off = current_frame_info.total_size - cfa_off;
2415 }
2416
2417 REG_NOTES (insn)
2418 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2419 gen_rtx_SET (VOIDmode,
2420 gen_rtx_MEM (GET_MODE (reg),
2421 plus_constant (base, off)),
2422 frame_reg),
2423 REG_NOTES (insn));
2424 }
2425 }
2426
2427 static void
2428 do_restore (move_fn, reg, cfa_off)
2429 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
2430 rtx reg;
2431 HOST_WIDE_INT cfa_off;
2432 {
2433 int iter = spill_fill_data.next_iter;
2434 rtx insn;
2435
2436 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
2437 GEN_INT (cfa_off)));
2438 spill_fill_data.prev_insn[iter] = insn;
2439 }
2440
2441 /* Wrapper functions that discards the CONST_INT spill offset. These
2442 exist so that we can give gr_spill/gr_fill the offset they need and
2443 use a consistent function interface. */
2444
2445 static rtx
2446 gen_movdi_x (dest, src, offset)
2447 rtx dest, src;
2448 rtx offset ATTRIBUTE_UNUSED;
2449 {
2450 return gen_movdi (dest, src);
2451 }
2452
2453 static rtx
2454 gen_fr_spill_x (dest, src, offset)
2455 rtx dest, src;
2456 rtx offset ATTRIBUTE_UNUSED;
2457 {
2458 return gen_fr_spill (dest, src);
2459 }
2460
2461 static rtx
2462 gen_fr_restore_x (dest, src, offset)
2463 rtx dest, src;
2464 rtx offset ATTRIBUTE_UNUSED;
2465 {
2466 return gen_fr_restore (dest, src);
2467 }
2468
2469 /* Called after register allocation to add any instructions needed for the
2470 prologue. Using a prologue insn is favored compared to putting all of the
2471 instructions in output_function_prologue(), since it allows the scheduler
2472 to intermix instructions with the saves of the caller saved registers. In
2473 some cases, it might be necessary to emit a barrier instruction as the last
2474 insn to prevent such scheduling.
2475
2476 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2477 so that the debug info generation code can handle them properly.
2478
2479 The register save area is layed out like so:
2480 cfa+16
2481 [ varargs spill area ]
2482 [ fr register spill area ]
2483 [ br register spill area ]
2484 [ ar register spill area ]
2485 [ pr register spill area ]
2486 [ gr register spill area ] */
2487
2488 /* ??? Get inefficient code when the frame size is larger than can fit in an
2489 adds instruction. */
2490
2491 void
2492 ia64_expand_prologue ()
2493 {
2494 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
2495 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
2496 rtx reg, alt_reg;
2497
2498 ia64_compute_frame_size (get_frame_size ());
2499 last_scratch_gr_reg = 15;
2500
2501 /* If there is no epilogue, then we don't need some prologue insns.
2502 We need to avoid emitting the dead prologue insns, because flow
2503 will complain about them. */
2504 if (optimize)
2505 {
2506 edge e;
2507
2508 for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
2509 if ((e->flags & EDGE_FAKE) == 0
2510 && (e->flags & EDGE_FALLTHRU) != 0)
2511 break;
2512 epilogue_p = (e != NULL);
2513 }
2514 else
2515 epilogue_p = 1;
2516
2517 /* Set the local, input, and output register names. We need to do this
2518 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2519 half. If we use in/loc/out register names, then we get assembler errors
2520 in crtn.S because there is no alloc insn or regstk directive in there. */
2521 if (! TARGET_REG_NAMES)
2522 {
2523 int inputs = current_frame_info.n_input_regs;
2524 int locals = current_frame_info.n_local_regs;
2525 int outputs = current_frame_info.n_output_regs;
2526
2527 for (i = 0; i < inputs; i++)
2528 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2529 for (i = 0; i < locals; i++)
2530 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2531 for (i = 0; i < outputs; i++)
2532 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2533 }
2534
2535 /* Set the frame pointer register name. The regnum is logically loc79,
2536 but of course we'll not have allocated that many locals. Rather than
2537 worrying about renumbering the existing rtxs, we adjust the name. */
2538 /* ??? This code means that we can never use one local register when
2539 there is a frame pointer. loc79 gets wasted in this case, as it is
2540 renamed to a register that will never be used. See also the try_locals
2541 code in find_gr_spill. */
2542 if (current_frame_info.reg_fp)
2543 {
2544 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2545 reg_names[HARD_FRAME_POINTER_REGNUM]
2546 = reg_names[current_frame_info.reg_fp];
2547 reg_names[current_frame_info.reg_fp] = tmp;
2548 }
2549
2550 /* We don't need an alloc instruction if we've used no outputs or locals. */
2551 if (current_frame_info.n_local_regs == 0
2552 && current_frame_info.n_output_regs == 0
2553 && current_frame_info.n_input_regs <= current_function_args_info.int_regs
2554 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2555 {
2556 /* If there is no alloc, but there are input registers used, then we
2557 need a .regstk directive. */
2558 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2559 ar_pfs_save_reg = NULL_RTX;
2560 }
2561 else
2562 {
2563 current_frame_info.need_regstk = 0;
2564
2565 if (current_frame_info.reg_save_ar_pfs)
2566 regno = current_frame_info.reg_save_ar_pfs;
2567 else
2568 regno = next_scratch_gr_reg ();
2569 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2570
2571 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2572 GEN_INT (current_frame_info.n_input_regs),
2573 GEN_INT (current_frame_info.n_local_regs),
2574 GEN_INT (current_frame_info.n_output_regs),
2575 GEN_INT (current_frame_info.n_rotate_regs)));
2576 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2577 }
2578
2579 /* Set up frame pointer, stack pointer, and spill iterators. */
2580
2581 n_varargs = cfun->machine->n_varargs;
2582 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
2583 stack_pointer_rtx, 0);
2584
2585 if (frame_pointer_needed)
2586 {
2587 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2588 RTX_FRAME_RELATED_P (insn) = 1;
2589 }
2590
2591 if (current_frame_info.total_size != 0)
2592 {
2593 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2594 rtx offset;
2595
2596 if (CONST_OK_FOR_I (- current_frame_info.total_size))
2597 offset = frame_size_rtx;
2598 else
2599 {
2600 regno = next_scratch_gr_reg ();
2601 offset = gen_rtx_REG (DImode, regno);
2602 emit_move_insn (offset, frame_size_rtx);
2603 }
2604
2605 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
2606 stack_pointer_rtx, offset));
2607
2608 if (! frame_pointer_needed)
2609 {
2610 RTX_FRAME_RELATED_P (insn) = 1;
2611 if (GET_CODE (offset) != CONST_INT)
2612 {
2613 REG_NOTES (insn)
2614 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2615 gen_rtx_SET (VOIDmode,
2616 stack_pointer_rtx,
2617 gen_rtx_PLUS (DImode,
2618 stack_pointer_rtx,
2619 frame_size_rtx)),
2620 REG_NOTES (insn));
2621 }
2622 }
2623
2624 /* ??? At this point we must generate a magic insn that appears to
2625 modify the stack pointer, the frame pointer, and all spill
2626 iterators. This would allow the most scheduling freedom. For
2627 now, just hard stop. */
2628 emit_insn (gen_blockage ());
2629 }
2630
2631 /* Must copy out ar.unat before doing any integer spills. */
2632 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2633 {
2634 if (current_frame_info.reg_save_ar_unat)
2635 ar_unat_save_reg
2636 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2637 else
2638 {
2639 alt_regno = next_scratch_gr_reg ();
2640 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2641 current_frame_info.gr_used_mask |= 1 << alt_regno;
2642 }
2643
2644 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2645 insn = emit_move_insn (ar_unat_save_reg, reg);
2646 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2647
2648 /* Even if we're not going to generate an epilogue, we still
2649 need to save the register so that EH works. */
2650 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2651 emit_insn (gen_prologue_use (ar_unat_save_reg));
2652 }
2653 else
2654 ar_unat_save_reg = NULL_RTX;
2655
2656 /* Spill all varargs registers. Do this before spilling any GR registers,
2657 since we want the UNAT bits for the GR registers to override the UNAT
2658 bits from varargs, which we don't care about. */
2659
2660 cfa_off = -16;
2661 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
2662 {
2663 reg = gen_rtx_REG (DImode, regno);
2664 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
2665 }
2666
2667 /* Locate the bottom of the register save area. */
2668 cfa_off = (current_frame_info.spill_cfa_off
2669 + current_frame_info.spill_size
2670 + current_frame_info.extra_spill_size);
2671
2672 /* Save the predicate register block either in a register or in memory. */
2673 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2674 {
2675 reg = gen_rtx_REG (DImode, PR_REG (0));
2676 if (current_frame_info.reg_save_pr != 0)
2677 {
2678 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2679 insn = emit_move_insn (alt_reg, reg);
2680
2681 /* ??? Denote pr spill/fill by a DImode move that modifies all
2682 64 hard registers. */
2683 RTX_FRAME_RELATED_P (insn) = 1;
2684 REG_NOTES (insn)
2685 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2686 gen_rtx_SET (VOIDmode, alt_reg, reg),
2687 REG_NOTES (insn));
2688
2689 /* Even if we're not going to generate an epilogue, we still
2690 need to save the register so that EH works. */
2691 if (! epilogue_p)
2692 emit_insn (gen_prologue_use (alt_reg));
2693 }
2694 else
2695 {
2696 alt_regno = next_scratch_gr_reg ();
2697 alt_reg = gen_rtx_REG (DImode, alt_regno);
2698 insn = emit_move_insn (alt_reg, reg);
2699 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2700 cfa_off -= 8;
2701 }
2702 }
2703
2704 /* Handle AR regs in numerical order. All of them get special handling. */
2705 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2706 && current_frame_info.reg_save_ar_unat == 0)
2707 {
2708 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2709 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
2710 cfa_off -= 8;
2711 }
2712
2713 /* The alloc insn already copied ar.pfs into a general register. The
2714 only thing we have to do now is copy that register to a stack slot
2715 if we'd not allocated a local register for the job. */
2716 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
2717 && current_frame_info.reg_save_ar_pfs == 0)
2718 {
2719 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2720 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
2721 cfa_off -= 8;
2722 }
2723
2724 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2725 {
2726 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2727 if (current_frame_info.reg_save_ar_lc != 0)
2728 {
2729 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2730 insn = emit_move_insn (alt_reg, reg);
2731 RTX_FRAME_RELATED_P (insn) = 1;
2732
2733 /* Even if we're not going to generate an epilogue, we still
2734 need to save the register so that EH works. */
2735 if (! epilogue_p)
2736 emit_insn (gen_prologue_use (alt_reg));
2737 }
2738 else
2739 {
2740 alt_regno = next_scratch_gr_reg ();
2741 alt_reg = gen_rtx_REG (DImode, alt_regno);
2742 emit_move_insn (alt_reg, reg);
2743 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2744 cfa_off -= 8;
2745 }
2746 }
2747
2748 if (current_frame_info.reg_save_gp)
2749 {
2750 insn = emit_move_insn (gen_rtx_REG (DImode,
2751 current_frame_info.reg_save_gp),
2752 pic_offset_table_rtx);
2753 /* We don't know for sure yet if this is actually needed, since
2754 we've not split the PIC call patterns. If all of the calls
2755 are indirect, and not followed by any uses of the gp, then
2756 this save is dead. Allow it to go away. */
2757 REG_NOTES (insn)
2758 = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn));
2759 }
2760
2761 /* We should now be at the base of the gr/br/fr spill area. */
2762 if (cfa_off != (current_frame_info.spill_cfa_off
2763 + current_frame_info.spill_size))
2764 abort ();
2765
2766 /* Spill all general registers. */
2767 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2768 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2769 {
2770 reg = gen_rtx_REG (DImode, regno);
2771 do_spill (gen_gr_spill, reg, cfa_off, reg);
2772 cfa_off -= 8;
2773 }
2774
2775 /* Handle BR0 specially -- it may be getting stored permanently in
2776 some GR register. */
2777 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2778 {
2779 reg = gen_rtx_REG (DImode, BR_REG (0));
2780 if (current_frame_info.reg_save_b0 != 0)
2781 {
2782 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2783 insn = emit_move_insn (alt_reg, reg);
2784 RTX_FRAME_RELATED_P (insn) = 1;
2785
2786 /* Even if we're not going to generate an epilogue, we still
2787 need to save the register so that EH works. */
2788 if (! epilogue_p)
2789 emit_insn (gen_prologue_use (alt_reg));
2790 }
2791 else
2792 {
2793 alt_regno = next_scratch_gr_reg ();
2794 alt_reg = gen_rtx_REG (DImode, alt_regno);
2795 emit_move_insn (alt_reg, reg);
2796 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2797 cfa_off -= 8;
2798 }
2799 }
2800
2801 /* Spill the rest of the BR registers. */
2802 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2803 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2804 {
2805 alt_regno = next_scratch_gr_reg ();
2806 alt_reg = gen_rtx_REG (DImode, alt_regno);
2807 reg = gen_rtx_REG (DImode, regno);
2808 emit_move_insn (alt_reg, reg);
2809 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2810 cfa_off -= 8;
2811 }
2812
2813 /* Align the frame and spill all FR registers. */
2814 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2815 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2816 {
2817 if (cfa_off & 15)
2818 abort ();
2819 reg = gen_rtx_REG (TFmode, regno);
2820 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
2821 cfa_off -= 16;
2822 }
2823
2824 if (cfa_off != current_frame_info.spill_cfa_off)
2825 abort ();
2826
2827 finish_spill_pointers ();
2828 }
2829
2830 /* Called after register allocation to add any instructions needed for the
2831 epilogue. Using an epilogue insn is favored compared to putting all of the
2832 instructions in output_function_prologue(), since it allows the scheduler
2833 to intermix instructions with the saves of the caller saved registers. In
2834 some cases, it might be necessary to emit a barrier instruction as the last
2835 insn to prevent such scheduling. */
2836
2837 void
2838 ia64_expand_epilogue (sibcall_p)
2839 int sibcall_p;
2840 {
2841 rtx insn, reg, alt_reg, ar_unat_save_reg;
2842 int regno, alt_regno, cfa_off;
2843
2844 ia64_compute_frame_size (get_frame_size ());
2845
2846 /* If there is a frame pointer, then we use it instead of the stack
2847 pointer, so that the stack pointer does not need to be valid when
2848 the epilogue starts. See EXIT_IGNORE_STACK. */
2849 if (frame_pointer_needed)
2850 setup_spill_pointers (current_frame_info.n_spilled,
2851 hard_frame_pointer_rtx, 0);
2852 else
2853 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2854 current_frame_info.total_size);
2855
2856 if (current_frame_info.total_size != 0)
2857 {
2858 /* ??? At this point we must generate a magic insn that appears to
2859 modify the spill iterators and the frame pointer. This would
2860 allow the most scheduling freedom. For now, just hard stop. */
2861 emit_insn (gen_blockage ());
2862 }
2863
2864 /* Locate the bottom of the register save area. */
2865 cfa_off = (current_frame_info.spill_cfa_off
2866 + current_frame_info.spill_size
2867 + current_frame_info.extra_spill_size);
2868
2869 /* Restore the predicate registers. */
2870 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2871 {
2872 if (current_frame_info.reg_save_pr != 0)
2873 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2874 else
2875 {
2876 alt_regno = next_scratch_gr_reg ();
2877 alt_reg = gen_rtx_REG (DImode, alt_regno);
2878 do_restore (gen_movdi_x, alt_reg, cfa_off);
2879 cfa_off -= 8;
2880 }
2881 reg = gen_rtx_REG (DImode, PR_REG (0));
2882 emit_move_insn (reg, alt_reg);
2883 }
2884
2885 /* Restore the application registers. */
2886
2887 /* Load the saved unat from the stack, but do not restore it until
2888 after the GRs have been restored. */
2889 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2890 {
2891 if (current_frame_info.reg_save_ar_unat != 0)
2892 ar_unat_save_reg
2893 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2894 else
2895 {
2896 alt_regno = next_scratch_gr_reg ();
2897 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2898 current_frame_info.gr_used_mask |= 1 << alt_regno;
2899 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
2900 cfa_off -= 8;
2901 }
2902 }
2903 else
2904 ar_unat_save_reg = NULL_RTX;
2905
2906 if (current_frame_info.reg_save_ar_pfs != 0)
2907 {
2908 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
2909 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2910 emit_move_insn (reg, alt_reg);
2911 }
2912 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2913 {
2914 alt_regno = next_scratch_gr_reg ();
2915 alt_reg = gen_rtx_REG (DImode, alt_regno);
2916 do_restore (gen_movdi_x, alt_reg, cfa_off);
2917 cfa_off -= 8;
2918 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2919 emit_move_insn (reg, alt_reg);
2920 }
2921
2922 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2923 {
2924 if (current_frame_info.reg_save_ar_lc != 0)
2925 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2926 else
2927 {
2928 alt_regno = next_scratch_gr_reg ();
2929 alt_reg = gen_rtx_REG (DImode, alt_regno);
2930 do_restore (gen_movdi_x, alt_reg, cfa_off);
2931 cfa_off -= 8;
2932 }
2933 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2934 emit_move_insn (reg, alt_reg);
2935 }
2936
2937 /* We should now be at the base of the gr/br/fr spill area. */
2938 if (cfa_off != (current_frame_info.spill_cfa_off
2939 + current_frame_info.spill_size))
2940 abort ();
2941
2942 /* The GP may be stored on the stack in the prologue, but it's
2943 never restored in the epilogue. Skip the stack slot. */
2944 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
2945 cfa_off -= 8;
2946
2947 /* Restore all general registers. */
2948 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
2949 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2950 {
2951 reg = gen_rtx_REG (DImode, regno);
2952 do_restore (gen_gr_restore, reg, cfa_off);
2953 cfa_off -= 8;
2954 }
2955
2956 /* Restore the branch registers. Handle B0 specially, as it may
2957 have gotten stored in some GR register. */
2958 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2959 {
2960 if (current_frame_info.reg_save_b0 != 0)
2961 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2962 else
2963 {
2964 alt_regno = next_scratch_gr_reg ();
2965 alt_reg = gen_rtx_REG (DImode, alt_regno);
2966 do_restore (gen_movdi_x, alt_reg, cfa_off);
2967 cfa_off -= 8;
2968 }
2969 reg = gen_rtx_REG (DImode, BR_REG (0));
2970 emit_move_insn (reg, alt_reg);
2971 }
2972
2973 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2974 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2975 {
2976 alt_regno = next_scratch_gr_reg ();
2977 alt_reg = gen_rtx_REG (DImode, alt_regno);
2978 do_restore (gen_movdi_x, alt_reg, cfa_off);
2979 cfa_off -= 8;
2980 reg = gen_rtx_REG (DImode, regno);
2981 emit_move_insn (reg, alt_reg);
2982 }
2983
2984 /* Restore floating point registers. */
2985 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2986 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2987 {
2988 if (cfa_off & 15)
2989 abort ();
2990 reg = gen_rtx_REG (TFmode, regno);
2991 do_restore (gen_fr_restore_x, reg, cfa_off);
2992 cfa_off -= 16;
2993 }
2994
2995 /* Restore ar.unat for real. */
2996 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2997 {
2998 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2999 emit_move_insn (reg, ar_unat_save_reg);
3000 }
3001
3002 if (cfa_off != current_frame_info.spill_cfa_off)
3003 abort ();
3004
3005 finish_spill_pointers ();
3006
3007 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
3008 {
3009 /* ??? At this point we must generate a magic insn that appears to
3010 modify the spill iterators, the stack pointer, and the frame
3011 pointer. This would allow the most scheduling freedom. For now,
3012 just hard stop. */
3013 emit_insn (gen_blockage ());
3014 }
3015
3016 if (cfun->machine->ia64_eh_epilogue_sp)
3017 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
3018 else if (frame_pointer_needed)
3019 {
3020 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
3021 RTX_FRAME_RELATED_P (insn) = 1;
3022 }
3023 else if (current_frame_info.total_size)
3024 {
3025 rtx offset, frame_size_rtx;
3026
3027 frame_size_rtx = GEN_INT (current_frame_info.total_size);
3028 if (CONST_OK_FOR_I (current_frame_info.total_size))
3029 offset = frame_size_rtx;
3030 else
3031 {
3032 regno = next_scratch_gr_reg ();
3033 offset = gen_rtx_REG (DImode, regno);
3034 emit_move_insn (offset, frame_size_rtx);
3035 }
3036
3037 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
3038 offset));
3039
3040 RTX_FRAME_RELATED_P (insn) = 1;
3041 if (GET_CODE (offset) != CONST_INT)
3042 {
3043 REG_NOTES (insn)
3044 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3045 gen_rtx_SET (VOIDmode,
3046 stack_pointer_rtx,
3047 gen_rtx_PLUS (DImode,
3048 stack_pointer_rtx,
3049 frame_size_rtx)),
3050 REG_NOTES (insn));
3051 }
3052 }
3053
3054 if (cfun->machine->ia64_eh_epilogue_bsp)
3055 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
3056
3057 if (! sibcall_p)
3058 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
3059 else
3060 {
3061 int fp = GR_REG (2);
3062 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
3063 first available call clobbered register. If there was a frame_pointer
3064 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
3065 so we have to make sure we're using the string "r2" when emitting
3066 the register name for the assembler. */
3067 if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
3068 fp = HARD_FRAME_POINTER_REGNUM;
3069
3070 /* We must emit an alloc to force the input registers to become output
3071 registers. Otherwise, if the callee tries to pass its parameters
3072 through to another call without an intervening alloc, then these
3073 values get lost. */
3074 /* ??? We don't need to preserve all input registers. We only need to
3075 preserve those input registers used as arguments to the sibling call.
3076 It is unclear how to compute that number here. */
3077 if (current_frame_info.n_input_regs != 0)
3078 emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
3079 GEN_INT (0), GEN_INT (0),
3080 GEN_INT (current_frame_info.n_input_regs),
3081 GEN_INT (0)));
3082 }
3083 }
3084
3085 /* Return 1 if br.ret can do all the work required to return from a
3086 function. */
3087
3088 int
3089 ia64_direct_return ()
3090 {
3091 if (reload_completed && ! frame_pointer_needed)
3092 {
3093 ia64_compute_frame_size (get_frame_size ());
3094
3095 return (current_frame_info.total_size == 0
3096 && current_frame_info.n_spilled == 0
3097 && current_frame_info.reg_save_b0 == 0
3098 && current_frame_info.reg_save_pr == 0
3099 && current_frame_info.reg_save_ar_pfs == 0
3100 && current_frame_info.reg_save_ar_unat == 0
3101 && current_frame_info.reg_save_ar_lc == 0);
3102 }
3103 return 0;
3104 }
3105
3106 /* Return the magic cookie that we use to hold the return address
3107 during early compilation. */
3108
3109 rtx
3110 ia64_return_addr_rtx (count, frame)
3111 HOST_WIDE_INT count;
3112 rtx frame ATTRIBUTE_UNUSED;
3113 {
3114 if (count != 0)
3115 return NULL;
3116 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
3117 }
3118
3119 /* Split this value after reload, now that we know where the return
3120 address is saved. */
3121
3122 void
3123 ia64_split_return_addr_rtx (dest)
3124 rtx dest;
3125 {
3126 rtx src;
3127
3128 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3129 {
3130 if (current_frame_info.reg_save_b0 != 0)
3131 src = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
3132 else
3133 {
3134 HOST_WIDE_INT off;
3135 unsigned int regno;
3136
3137 /* Compute offset from CFA for BR0. */
3138 /* ??? Must be kept in sync with ia64_expand_prologue. */
3139 off = (current_frame_info.spill_cfa_off
3140 + current_frame_info.spill_size);
3141 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3142 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3143 off -= 8;
3144
3145 /* Convert CFA offset to a register based offset. */
3146 if (frame_pointer_needed)
3147 src = hard_frame_pointer_rtx;
3148 else
3149 {
3150 src = stack_pointer_rtx;
3151 off += current_frame_info.total_size;
3152 }
3153
3154 /* Load address into scratch register. */
3155 if (CONST_OK_FOR_I (off))
3156 emit_insn (gen_adddi3 (dest, src, GEN_INT (off)));
3157 else
3158 {
3159 emit_move_insn (dest, GEN_INT (off));
3160 emit_insn (gen_adddi3 (dest, src, dest));
3161 }
3162
3163 src = gen_rtx_MEM (Pmode, dest);
3164 }
3165 }
3166 else
3167 src = gen_rtx_REG (DImode, BR_REG (0));
3168
3169 emit_move_insn (dest, src);
3170 }
3171
3172 int
3173 ia64_hard_regno_rename_ok (from, to)
3174 int from;
3175 int to;
3176 {
3177 /* Don't clobber any of the registers we reserved for the prologue. */
3178 if (to == current_frame_info.reg_fp
3179 || to == current_frame_info.reg_save_b0
3180 || to == current_frame_info.reg_save_pr
3181 || to == current_frame_info.reg_save_ar_pfs
3182 || to == current_frame_info.reg_save_ar_unat
3183 || to == current_frame_info.reg_save_ar_lc)
3184 return 0;
3185
3186 if (from == current_frame_info.reg_fp
3187 || from == current_frame_info.reg_save_b0
3188 || from == current_frame_info.reg_save_pr
3189 || from == current_frame_info.reg_save_ar_pfs
3190 || from == current_frame_info.reg_save_ar_unat
3191 || from == current_frame_info.reg_save_ar_lc)
3192 return 0;
3193
3194 /* Don't use output registers outside the register frame. */
3195 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
3196 return 0;
3197
3198 /* Retain even/oddness on predicate register pairs. */
3199 if (PR_REGNO_P (from) && PR_REGNO_P (to))
3200 return (from & 1) == (to & 1);
3201
3202 return 1;
3203 }
3204
3205 /* Target hook for assembling integer objects. Handle word-sized
3206 aligned objects and detect the cases when @fptr is needed. */
3207
3208 static bool
3209 ia64_assemble_integer (x, size, aligned_p)
3210 rtx x;
3211 unsigned int size;
3212 int aligned_p;
3213 {
3214 if (size == (TARGET_ILP32 ? 4 : 8)
3215 && aligned_p
3216 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
3217 && GET_CODE (x) == SYMBOL_REF
3218 && SYMBOL_REF_FUNCTION_P (x))
3219 {
3220 if (TARGET_ILP32)
3221 fputs ("\tdata4\t@fptr(", asm_out_file);
3222 else
3223 fputs ("\tdata8\t@fptr(", asm_out_file);
3224 output_addr_const (asm_out_file, x);
3225 fputs (")\n", asm_out_file);
3226 return true;
3227 }
3228 return default_assemble_integer (x, size, aligned_p);
3229 }
3230
3231 /* Emit the function prologue. */
3232
3233 static void
3234 ia64_output_function_prologue (file, size)
3235 FILE *file;
3236 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
3237 {
3238 int mask, grsave, grsave_prev;
3239
3240 if (current_frame_info.need_regstk)
3241 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
3242 current_frame_info.n_input_regs,
3243 current_frame_info.n_local_regs,
3244 current_frame_info.n_output_regs,
3245 current_frame_info.n_rotate_regs);
3246
3247 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3248 return;
3249
3250 /* Emit the .prologue directive. */
3251
3252 mask = 0;
3253 grsave = grsave_prev = 0;
3254 if (current_frame_info.reg_save_b0 != 0)
3255 {
3256 mask |= 8;
3257 grsave = grsave_prev = current_frame_info.reg_save_b0;
3258 }
3259 if (current_frame_info.reg_save_ar_pfs != 0
3260 && (grsave_prev == 0
3261 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
3262 {
3263 mask |= 4;
3264 if (grsave_prev == 0)
3265 grsave = current_frame_info.reg_save_ar_pfs;
3266 grsave_prev = current_frame_info.reg_save_ar_pfs;
3267 }
3268 if (current_frame_info.reg_fp != 0
3269 && (grsave_prev == 0
3270 || current_frame_info.reg_fp == grsave_prev + 1))
3271 {
3272 mask |= 2;
3273 if (grsave_prev == 0)
3274 grsave = HARD_FRAME_POINTER_REGNUM;
3275 grsave_prev = current_frame_info.reg_fp;
3276 }
3277 if (current_frame_info.reg_save_pr != 0
3278 && (grsave_prev == 0
3279 || current_frame_info.reg_save_pr == grsave_prev + 1))
3280 {
3281 mask |= 1;
3282 if (grsave_prev == 0)
3283 grsave = current_frame_info.reg_save_pr;
3284 }
3285
3286 if (mask)
3287 fprintf (file, "\t.prologue %d, %d\n", mask,
3288 ia64_dbx_register_number (grsave));
3289 else
3290 fputs ("\t.prologue\n", file);
3291
3292 /* Emit a .spill directive, if necessary, to relocate the base of
3293 the register spill area. */
3294 if (current_frame_info.spill_cfa_off != -16)
3295 fprintf (file, "\t.spill %ld\n",
3296 (long) (current_frame_info.spill_cfa_off
3297 + current_frame_info.spill_size));
3298 }
3299
3300 /* Emit the .body directive at the scheduled end of the prologue. */
3301
3302 static void
3303 ia64_output_function_end_prologue (file)
3304 FILE *file;
3305 {
3306 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3307 return;
3308
3309 fputs ("\t.body\n", file);
3310 }
3311
3312 /* Emit the function epilogue. */
3313
3314 static void
3315 ia64_output_function_epilogue (file, size)
3316 FILE *file ATTRIBUTE_UNUSED;
3317 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
3318 {
3319 int i;
3320
3321 if (current_frame_info.reg_fp)
3322 {
3323 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3324 reg_names[HARD_FRAME_POINTER_REGNUM]
3325 = reg_names[current_frame_info.reg_fp];
3326 reg_names[current_frame_info.reg_fp] = tmp;
3327 }
3328 if (! TARGET_REG_NAMES)
3329 {
3330 for (i = 0; i < current_frame_info.n_input_regs; i++)
3331 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
3332 for (i = 0; i < current_frame_info.n_local_regs; i++)
3333 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
3334 for (i = 0; i < current_frame_info.n_output_regs; i++)
3335 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
3336 }
3337
3338 current_frame_info.initialized = 0;
3339 }
3340
3341 int
3342 ia64_dbx_register_number (regno)
3343 int regno;
3344 {
3345 /* In ia64_expand_prologue we quite literally renamed the frame pointer
3346 from its home at loc79 to something inside the register frame. We
3347 must perform the same renumbering here for the debug info. */
3348 if (current_frame_info.reg_fp)
3349 {
3350 if (regno == HARD_FRAME_POINTER_REGNUM)
3351 regno = current_frame_info.reg_fp;
3352 else if (regno == current_frame_info.reg_fp)
3353 regno = HARD_FRAME_POINTER_REGNUM;
3354 }
3355
3356 if (IN_REGNO_P (regno))
3357 return 32 + regno - IN_REG (0);
3358 else if (LOC_REGNO_P (regno))
3359 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
3360 else if (OUT_REGNO_P (regno))
3361 return (32 + current_frame_info.n_input_regs
3362 + current_frame_info.n_local_regs + regno - OUT_REG (0));
3363 else
3364 return regno;
3365 }
3366
3367 void
3368 ia64_initialize_trampoline (addr, fnaddr, static_chain)
3369 rtx addr, fnaddr, static_chain;
3370 {
3371 rtx addr_reg, eight = GEN_INT (8);
3372
3373 /* Load up our iterator. */
3374 addr_reg = gen_reg_rtx (Pmode);
3375 emit_move_insn (addr_reg, addr);
3376
3377 /* The first two words are the fake descriptor:
3378 __ia64_trampoline, ADDR+16. */
3379 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3380 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
3381 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3382
3383 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3384 copy_to_reg (plus_constant (addr, 16)));
3385 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3386
3387 /* The third word is the target descriptor. */
3388 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
3389 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3390
3391 /* The fourth word is the static chain. */
3392 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
3393 }
3394 \f
3395 /* Do any needed setup for a variadic function. CUM has not been updated
3396 for the last named argument which has type TYPE and mode MODE.
3397
3398 We generate the actual spill instructions during prologue generation. */
3399
3400 void
3401 ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time)
3402 CUMULATIVE_ARGS cum;
3403 int int_mode;
3404 tree type;
3405 int * pretend_size;
3406 int second_time ATTRIBUTE_UNUSED;
3407 {
3408 /* Skip the current argument. */
3409 ia64_function_arg_advance (&cum, int_mode, type, 1);
3410
3411 if (cum.words < MAX_ARGUMENT_SLOTS)
3412 {
3413 int n = MAX_ARGUMENT_SLOTS - cum.words;
3414 *pretend_size = n * UNITS_PER_WORD;
3415 cfun->machine->n_varargs = n;
3416 }
3417 }
3418
3419 /* Check whether TYPE is a homogeneous floating point aggregate. If
3420 it is, return the mode of the floating point type that appears
3421 in all leafs. If it is not, return VOIDmode.
3422
3423 An aggregate is a homogeneous floating point aggregate is if all
3424 fields/elements in it have the same floating point type (e.g,
3425 SFmode). 128-bit quad-precision floats are excluded. */
3426
3427 static enum machine_mode
3428 hfa_element_mode (type, nested)
3429 tree type;
3430 int nested;
3431 {
3432 enum machine_mode element_mode = VOIDmode;
3433 enum machine_mode mode;
3434 enum tree_code code = TREE_CODE (type);
3435 int know_element_mode = 0;
3436 tree t;
3437
3438 switch (code)
3439 {
3440 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
3441 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
3442 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
3443 case FILE_TYPE: case SET_TYPE: case LANG_TYPE:
3444 case FUNCTION_TYPE:
3445 return VOIDmode;
3446
3447 /* Fortran complex types are supposed to be HFAs, so we need to handle
3448 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
3449 types though. */
3450 case COMPLEX_TYPE:
3451 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
3452 && (TYPE_MODE (type) != TCmode || INTEL_EXTENDED_IEEE_FORMAT))
3453 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type))
3454 * BITS_PER_UNIT, MODE_FLOAT, 0);
3455 else
3456 return VOIDmode;
3457
3458 case REAL_TYPE:
3459 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
3460 mode if this is contained within an aggregate. */
3461 if (nested && (TYPE_MODE (type) != TFmode || INTEL_EXTENDED_IEEE_FORMAT))
3462 return TYPE_MODE (type);
3463 else
3464 return VOIDmode;
3465
3466 case ARRAY_TYPE:
3467 return hfa_element_mode (TREE_TYPE (type), 1);
3468
3469 case RECORD_TYPE:
3470 case UNION_TYPE:
3471 case QUAL_UNION_TYPE:
3472 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
3473 {
3474 if (TREE_CODE (t) != FIELD_DECL)
3475 continue;
3476
3477 mode = hfa_element_mode (TREE_TYPE (t), 1);
3478 if (know_element_mode)
3479 {
3480 if (mode != element_mode)
3481 return VOIDmode;
3482 }
3483 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
3484 return VOIDmode;
3485 else
3486 {
3487 know_element_mode = 1;
3488 element_mode = mode;
3489 }
3490 }
3491 return element_mode;
3492
3493 default:
3494 /* If we reach here, we probably have some front-end specific type
3495 that the backend doesn't know about. This can happen via the
3496 aggregate_value_p call in init_function_start. All we can do is
3497 ignore unknown tree types. */
3498 return VOIDmode;
3499 }
3500
3501 return VOIDmode;
3502 }
3503
3504 /* Return rtx for register where argument is passed, or zero if it is passed
3505 on the stack. */
3506
3507 /* ??? 128-bit quad-precision floats are always passed in general
3508 registers. */
3509
3510 rtx
3511 ia64_function_arg (cum, mode, type, named, incoming)
3512 CUMULATIVE_ARGS *cum;
3513 enum machine_mode mode;
3514 tree type;
3515 int named;
3516 int incoming;
3517 {
3518 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
3519 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3520 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3521 / UNITS_PER_WORD);
3522 int offset = 0;
3523 enum machine_mode hfa_mode = VOIDmode;
3524
3525 /* Integer and float arguments larger than 8 bytes start at the next even
3526 boundary. Aggregates larger than 8 bytes start at the next even boundary
3527 if the aggregate has 16 byte alignment. Net effect is that types with
3528 alignment greater than 8 start at the next even boundary. */
3529 /* ??? The ABI does not specify how to handle aggregates with alignment from
3530 9 to 15 bytes, or greater than 16. We handle them all as if they had
3531 16 byte alignment. Such aggregates can occur only if gcc extensions are
3532 used. */
3533 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3534 : (words > 1))
3535 && (cum->words & 1))
3536 offset = 1;
3537
3538 /* If all argument slots are used, then it must go on the stack. */
3539 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3540 return 0;
3541
3542 /* Check for and handle homogeneous FP aggregates. */
3543 if (type)
3544 hfa_mode = hfa_element_mode (type, 0);
3545
3546 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3547 and unprototyped hfas are passed specially. */
3548 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3549 {
3550 rtx loc[16];
3551 int i = 0;
3552 int fp_regs = cum->fp_regs;
3553 int int_regs = cum->words + offset;
3554 int hfa_size = GET_MODE_SIZE (hfa_mode);
3555 int byte_size;
3556 int args_byte_size;
3557
3558 /* If prototyped, pass it in FR regs then GR regs.
3559 If not prototyped, pass it in both FR and GR regs.
3560
3561 If this is an SFmode aggregate, then it is possible to run out of
3562 FR regs while GR regs are still left. In that case, we pass the
3563 remaining part in the GR regs. */
3564
3565 /* Fill the FP regs. We do this always. We stop if we reach the end
3566 of the argument, the last FP register, or the last argument slot. */
3567
3568 byte_size = ((mode == BLKmode)
3569 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3570 args_byte_size = int_regs * UNITS_PER_WORD;
3571 offset = 0;
3572 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3573 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
3574 {
3575 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3576 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
3577 + fp_regs)),
3578 GEN_INT (offset));
3579 offset += hfa_size;
3580 args_byte_size += hfa_size;
3581 fp_regs++;
3582 }
3583
3584 /* If no prototype, then the whole thing must go in GR regs. */
3585 if (! cum->prototype)
3586 offset = 0;
3587 /* If this is an SFmode aggregate, then we might have some left over
3588 that needs to go in GR regs. */
3589 else if (byte_size != offset)
3590 int_regs += offset / UNITS_PER_WORD;
3591
3592 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
3593
3594 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
3595 {
3596 enum machine_mode gr_mode = DImode;
3597
3598 /* If we have an odd 4 byte hunk because we ran out of FR regs,
3599 then this goes in a GR reg left adjusted/little endian, right
3600 adjusted/big endian. */
3601 /* ??? Currently this is handled wrong, because 4-byte hunks are
3602 always right adjusted/little endian. */
3603 if (offset & 0x4)
3604 gr_mode = SImode;
3605 /* If we have an even 4 byte hunk because the aggregate is a
3606 multiple of 4 bytes in size, then this goes in a GR reg right
3607 adjusted/little endian. */
3608 else if (byte_size - offset == 4)
3609 gr_mode = SImode;
3610 /* Complex floats need to have float mode. */
3611 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3612 gr_mode = hfa_mode;
3613
3614 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3615 gen_rtx_REG (gr_mode, (basereg
3616 + int_regs)),
3617 GEN_INT (offset));
3618 offset += GET_MODE_SIZE (gr_mode);
3619 int_regs += GET_MODE_SIZE (gr_mode) <= UNITS_PER_WORD
3620 ? 1 : GET_MODE_SIZE (gr_mode) / UNITS_PER_WORD;
3621 }
3622
3623 /* If we ended up using just one location, just return that one loc. */
3624 if (i == 1)
3625 return XEXP (loc[0], 0);
3626 else
3627 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3628 }
3629
3630 /* Integral and aggregates go in general registers. If we have run out of
3631 FR registers, then FP values must also go in general registers. This can
3632 happen when we have a SFmode HFA. */
3633 else if (((mode == TFmode) && ! INTEL_EXTENDED_IEEE_FORMAT)
3634 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3635 {
3636 int byte_size = ((mode == BLKmode)
3637 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3638 if (BYTES_BIG_ENDIAN
3639 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3640 && byte_size < UNITS_PER_WORD
3641 && byte_size > 0)
3642 {
3643 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3644 gen_rtx_REG (DImode,
3645 (basereg + cum->words
3646 + offset)),
3647 const0_rtx);
3648 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3649 }
3650 else
3651 return gen_rtx_REG (mode, basereg + cum->words + offset);
3652
3653 }
3654
3655 /* If there is a prototype, then FP values go in a FR register when
3656 named, and in a GR register when unnamed. */
3657 else if (cum->prototype)
3658 {
3659 if (! named)
3660 return gen_rtx_REG (mode, basereg + cum->words + offset);
3661 else
3662 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
3663 }
3664 /* If there is no prototype, then FP values go in both FR and GR
3665 registers. */
3666 else
3667 {
3668 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
3669 gen_rtx_REG (mode, (FR_ARG_FIRST
3670 + cum->fp_regs)),
3671 const0_rtx);
3672 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3673 gen_rtx_REG (mode,
3674 (basereg + cum->words
3675 + offset)),
3676 const0_rtx);
3677
3678 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
3679 }
3680 }
3681
3682 /* Return number of words, at the beginning of the argument, that must be
3683 put in registers. 0 is the argument is entirely in registers or entirely
3684 in memory. */
3685
3686 int
3687 ia64_function_arg_partial_nregs (cum, mode, type, named)
3688 CUMULATIVE_ARGS *cum;
3689 enum machine_mode mode;
3690 tree type;
3691 int named ATTRIBUTE_UNUSED;
3692 {
3693 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3694 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3695 / UNITS_PER_WORD);
3696 int offset = 0;
3697
3698 /* Arguments with alignment larger than 8 bytes start at the next even
3699 boundary. */
3700 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3701 : (words > 1))
3702 && (cum->words & 1))
3703 offset = 1;
3704
3705 /* If all argument slots are used, then it must go on the stack. */
3706 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3707 return 0;
3708
3709 /* It doesn't matter whether the argument goes in FR or GR regs. If
3710 it fits within the 8 argument slots, then it goes entirely in
3711 registers. If it extends past the last argument slot, then the rest
3712 goes on the stack. */
3713
3714 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
3715 return 0;
3716
3717 return MAX_ARGUMENT_SLOTS - cum->words - offset;
3718 }
3719
3720 /* Update CUM to point after this argument. This is patterned after
3721 ia64_function_arg. */
3722
3723 void
3724 ia64_function_arg_advance (cum, mode, type, named)
3725 CUMULATIVE_ARGS *cum;
3726 enum machine_mode mode;
3727 tree type;
3728 int named;
3729 {
3730 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3731 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3732 / UNITS_PER_WORD);
3733 int offset = 0;
3734 enum machine_mode hfa_mode = VOIDmode;
3735
3736 /* If all arg slots are already full, then there is nothing to do. */
3737 if (cum->words >= MAX_ARGUMENT_SLOTS)
3738 return;
3739
3740 /* Arguments with alignment larger than 8 bytes start at the next even
3741 boundary. */
3742 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3743 : (words > 1))
3744 && (cum->words & 1))
3745 offset = 1;
3746
3747 cum->words += words + offset;
3748
3749 /* Check for and handle homogeneous FP aggregates. */
3750 if (type)
3751 hfa_mode = hfa_element_mode (type, 0);
3752
3753 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3754 and unprototyped hfas are passed specially. */
3755 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3756 {
3757 int fp_regs = cum->fp_regs;
3758 /* This is the original value of cum->words + offset. */
3759 int int_regs = cum->words - words;
3760 int hfa_size = GET_MODE_SIZE (hfa_mode);
3761 int byte_size;
3762 int args_byte_size;
3763
3764 /* If prototyped, pass it in FR regs then GR regs.
3765 If not prototyped, pass it in both FR and GR regs.
3766
3767 If this is an SFmode aggregate, then it is possible to run out of
3768 FR regs while GR regs are still left. In that case, we pass the
3769 remaining part in the GR regs. */
3770
3771 /* Fill the FP regs. We do this always. We stop if we reach the end
3772 of the argument, the last FP register, or the last argument slot. */
3773
3774 byte_size = ((mode == BLKmode)
3775 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3776 args_byte_size = int_regs * UNITS_PER_WORD;
3777 offset = 0;
3778 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3779 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3780 {
3781 offset += hfa_size;
3782 args_byte_size += hfa_size;
3783 fp_regs++;
3784 }
3785
3786 cum->fp_regs = fp_regs;
3787 }
3788
3789 /* Integral and aggregates go in general registers. If we have run out of
3790 FR registers, then FP values must also go in general registers. This can
3791 happen when we have a SFmode HFA. */
3792 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
3793 cum->int_regs = cum->words;
3794
3795 /* If there is a prototype, then FP values go in a FR register when
3796 named, and in a GR register when unnamed. */
3797 else if (cum->prototype)
3798 {
3799 if (! named)
3800 cum->int_regs = cum->words;
3801 else
3802 /* ??? Complex types should not reach here. */
3803 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3804 }
3805 /* If there is no prototype, then FP values go in both FR and GR
3806 registers. */
3807 else
3808 {
3809 /* ??? Complex types should not reach here. */
3810 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3811 cum->int_regs = cum->words;
3812 }
3813 }
3814
3815 /* Variable sized types are passed by reference. */
3816 /* ??? At present this is a GCC extension to the IA-64 ABI. */
3817
3818 int
3819 ia64_function_arg_pass_by_reference (cum, mode, type, named)
3820 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
3821 enum machine_mode mode ATTRIBUTE_UNUSED;
3822 tree type;
3823 int named ATTRIBUTE_UNUSED;
3824 {
3825 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3826 }
3827
3828 /* True if it is OK to do sibling call optimization for the specified
3829 call expression EXP. DECL will be the called function, or NULL if
3830 this is an indirect call. */
3831 static bool
3832 ia64_function_ok_for_sibcall (decl, exp)
3833 tree decl;
3834 tree exp ATTRIBUTE_UNUSED;
3835 {
3836 /* We must always return with our current GP. This means we can
3837 only sibcall to functions defined in the current module. */
3838 return decl && (*targetm.binds_local_p) (decl);
3839 }
3840 \f
3841
3842 /* Implement va_arg. */
3843
3844 rtx
3845 ia64_va_arg (valist, type)
3846 tree valist, type;
3847 {
3848 tree t;
3849
3850 /* Variable sized types are passed by reference. */
3851 if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
3852 {
3853 rtx addr = std_expand_builtin_va_arg (valist, build_pointer_type (type));
3854 return gen_rtx_MEM (ptr_mode, force_reg (Pmode, addr));
3855 }
3856
3857 /* Arguments with alignment larger than 8 bytes start at the next even
3858 boundary. */
3859 if (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3860 {
3861 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3862 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
3863 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3864 build_int_2 (-2 * UNITS_PER_WORD, -1));
3865 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
3866 TREE_SIDE_EFFECTS (t) = 1;
3867 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3868 }
3869
3870 return std_expand_builtin_va_arg (valist, type);
3871 }
3872 \f
3873 /* Return 1 if function return value returned in memory. Return 0 if it is
3874 in a register. */
3875
3876 int
3877 ia64_return_in_memory (valtype)
3878 tree valtype;
3879 {
3880 enum machine_mode mode;
3881 enum machine_mode hfa_mode;
3882 HOST_WIDE_INT byte_size;
3883
3884 mode = TYPE_MODE (valtype);
3885 byte_size = GET_MODE_SIZE (mode);
3886 if (mode == BLKmode)
3887 {
3888 byte_size = int_size_in_bytes (valtype);
3889 if (byte_size < 0)
3890 return 1;
3891 }
3892
3893 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3894
3895 hfa_mode = hfa_element_mode (valtype, 0);
3896 if (hfa_mode != VOIDmode)
3897 {
3898 int hfa_size = GET_MODE_SIZE (hfa_mode);
3899
3900 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
3901 return 1;
3902 else
3903 return 0;
3904 }
3905 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
3906 return 1;
3907 else
3908 return 0;
3909 }
3910
3911 /* Return rtx for register that holds the function return value. */
3912
3913 rtx
3914 ia64_function_value (valtype, func)
3915 tree valtype;
3916 tree func ATTRIBUTE_UNUSED;
3917 {
3918 enum machine_mode mode;
3919 enum machine_mode hfa_mode;
3920
3921 mode = TYPE_MODE (valtype);
3922 hfa_mode = hfa_element_mode (valtype, 0);
3923
3924 if (hfa_mode != VOIDmode)
3925 {
3926 rtx loc[8];
3927 int i;
3928 int hfa_size;
3929 int byte_size;
3930 int offset;
3931
3932 hfa_size = GET_MODE_SIZE (hfa_mode);
3933 byte_size = ((mode == BLKmode)
3934 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3935 offset = 0;
3936 for (i = 0; offset < byte_size; i++)
3937 {
3938 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3939 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
3940 GEN_INT (offset));
3941 offset += hfa_size;
3942 }
3943
3944 if (i == 1)
3945 return XEXP (loc[0], 0);
3946 else
3947 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3948 }
3949 else if (FLOAT_TYPE_P (valtype) &&
3950 ((mode != TFmode) || INTEL_EXTENDED_IEEE_FORMAT))
3951 return gen_rtx_REG (mode, FR_ARG_FIRST);
3952 else
3953 {
3954 if (BYTES_BIG_ENDIAN
3955 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
3956 {
3957 rtx loc[8];
3958 int offset;
3959 int bytesize;
3960 int i;
3961
3962 offset = 0;
3963 bytesize = int_size_in_bytes (valtype);
3964 for (i = 0; offset < bytesize; i++)
3965 {
3966 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3967 gen_rtx_REG (DImode,
3968 GR_RET_FIRST + i),
3969 GEN_INT (offset));
3970 offset += UNITS_PER_WORD;
3971 }
3972 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3973 }
3974 else
3975 return gen_rtx_REG (mode, GR_RET_FIRST);
3976 }
3977 }
3978
3979 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
3980 We need to emit DTP-relative relocations. */
3981
3982 void
3983 ia64_output_dwarf_dtprel (file, size, x)
3984 FILE *file;
3985 int size;
3986 rtx x;
3987 {
3988 if (size != 8)
3989 abort ();
3990 fputs ("\tdata8.ua\t@dtprel(", file);
3991 output_addr_const (file, x);
3992 fputs (")", file);
3993 }
3994
3995 /* Print a memory address as an operand to reference that memory location. */
3996
3997 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3998 also call this from ia64_print_operand for memory addresses. */
3999
4000 void
4001 ia64_print_operand_address (stream, address)
4002 FILE * stream ATTRIBUTE_UNUSED;
4003 rtx address ATTRIBUTE_UNUSED;
4004 {
4005 }
4006
4007 /* Print an operand to an assembler instruction.
4008 C Swap and print a comparison operator.
4009 D Print an FP comparison operator.
4010 E Print 32 - constant, for SImode shifts as extract.
4011 e Print 64 - constant, for DImode rotates.
4012 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
4013 a floating point register emitted normally.
4014 I Invert a predicate register by adding 1.
4015 J Select the proper predicate register for a condition.
4016 j Select the inverse predicate register for a condition.
4017 O Append .acq for volatile load.
4018 P Postincrement of a MEM.
4019 Q Append .rel for volatile store.
4020 S Shift amount for shladd instruction.
4021 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
4022 for Intel assembler.
4023 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
4024 for Intel assembler.
4025 r Print register name, or constant 0 as r0. HP compatibility for
4026 Linux kernel. */
4027 void
4028 ia64_print_operand (file, x, code)
4029 FILE * file;
4030 rtx x;
4031 int code;
4032 {
4033 const char *str;
4034
4035 switch (code)
4036 {
4037 case 0:
4038 /* Handled below. */
4039 break;
4040
4041 case 'C':
4042 {
4043 enum rtx_code c = swap_condition (GET_CODE (x));
4044 fputs (GET_RTX_NAME (c), file);
4045 return;
4046 }
4047
4048 case 'D':
4049 switch (GET_CODE (x))
4050 {
4051 case NE:
4052 str = "neq";
4053 break;
4054 case UNORDERED:
4055 str = "unord";
4056 break;
4057 case ORDERED:
4058 str = "ord";
4059 break;
4060 default:
4061 str = GET_RTX_NAME (GET_CODE (x));
4062 break;
4063 }
4064 fputs (str, file);
4065 return;
4066
4067 case 'E':
4068 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
4069 return;
4070
4071 case 'e':
4072 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
4073 return;
4074
4075 case 'F':
4076 if (x == CONST0_RTX (GET_MODE (x)))
4077 str = reg_names [FR_REG (0)];
4078 else if (x == CONST1_RTX (GET_MODE (x)))
4079 str = reg_names [FR_REG (1)];
4080 else if (GET_CODE (x) == REG)
4081 str = reg_names [REGNO (x)];
4082 else
4083 abort ();
4084 fputs (str, file);
4085 return;
4086
4087 case 'I':
4088 fputs (reg_names [REGNO (x) + 1], file);
4089 return;
4090
4091 case 'J':
4092 case 'j':
4093 {
4094 unsigned int regno = REGNO (XEXP (x, 0));
4095 if (GET_CODE (x) == EQ)
4096 regno += 1;
4097 if (code == 'j')
4098 regno ^= 1;
4099 fputs (reg_names [regno], file);
4100 }
4101 return;
4102
4103 case 'O':
4104 if (MEM_VOLATILE_P (x))
4105 fputs(".acq", file);
4106 return;
4107
4108 case 'P':
4109 {
4110 HOST_WIDE_INT value;
4111
4112 switch (GET_CODE (XEXP (x, 0)))
4113 {
4114 default:
4115 return;
4116
4117 case POST_MODIFY:
4118 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
4119 if (GET_CODE (x) == CONST_INT)
4120 value = INTVAL (x);
4121 else if (GET_CODE (x) == REG)
4122 {
4123 fprintf (file, ", %s", reg_names[REGNO (x)]);
4124 return;
4125 }
4126 else
4127 abort ();
4128 break;
4129
4130 case POST_INC:
4131 value = GET_MODE_SIZE (GET_MODE (x));
4132 break;
4133
4134 case POST_DEC:
4135 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
4136 break;
4137 }
4138
4139 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
4140 return;
4141 }
4142
4143 case 'Q':
4144 if (MEM_VOLATILE_P (x))
4145 fputs(".rel", file);
4146 return;
4147
4148 case 'S':
4149 fprintf (file, "%d", exact_log2 (INTVAL (x)));
4150 return;
4151
4152 case 'T':
4153 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4154 {
4155 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
4156 return;
4157 }
4158 break;
4159
4160 case 'U':
4161 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4162 {
4163 const char *prefix = "0x";
4164 if (INTVAL (x) & 0x80000000)
4165 {
4166 fprintf (file, "0xffffffff");
4167 prefix = "";
4168 }
4169 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
4170 return;
4171 }
4172 break;
4173
4174 case 'r':
4175 /* If this operand is the constant zero, write it as register zero.
4176 Any register, zero, or CONST_INT value is OK here. */
4177 if (GET_CODE (x) == REG)
4178 fputs (reg_names[REGNO (x)], file);
4179 else if (x == CONST0_RTX (GET_MODE (x)))
4180 fputs ("r0", file);
4181 else if (GET_CODE (x) == CONST_INT)
4182 output_addr_const (file, x);
4183 else
4184 output_operand_lossage ("invalid %%r value");
4185 return;
4186
4187 case '+':
4188 {
4189 const char *which;
4190
4191 /* For conditional branches, returns or calls, substitute
4192 sptk, dptk, dpnt, or spnt for %s. */
4193 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
4194 if (x)
4195 {
4196 int pred_val = INTVAL (XEXP (x, 0));
4197
4198 /* Guess top and bottom 10% statically predicted. */
4199 if (pred_val < REG_BR_PROB_BASE / 50)
4200 which = ".spnt";
4201 else if (pred_val < REG_BR_PROB_BASE / 2)
4202 which = ".dpnt";
4203 else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
4204 which = ".dptk";
4205 else
4206 which = ".sptk";
4207 }
4208 else if (GET_CODE (current_output_insn) == CALL_INSN)
4209 which = ".sptk";
4210 else
4211 which = ".dptk";
4212
4213 fputs (which, file);
4214 return;
4215 }
4216
4217 case ',':
4218 x = current_insn_predicate;
4219 if (x)
4220 {
4221 unsigned int regno = REGNO (XEXP (x, 0));
4222 if (GET_CODE (x) == EQ)
4223 regno += 1;
4224 fprintf (file, "(%s) ", reg_names [regno]);
4225 }
4226 return;
4227
4228 default:
4229 output_operand_lossage ("ia64_print_operand: unknown code");
4230 return;
4231 }
4232
4233 switch (GET_CODE (x))
4234 {
4235 /* This happens for the spill/restore instructions. */
4236 case POST_INC:
4237 case POST_DEC:
4238 case POST_MODIFY:
4239 x = XEXP (x, 0);
4240 /* ... fall through ... */
4241
4242 case REG:
4243 fputs (reg_names [REGNO (x)], file);
4244 break;
4245
4246 case MEM:
4247 {
4248 rtx addr = XEXP (x, 0);
4249 if (GET_RTX_CLASS (GET_CODE (addr)) == 'a')
4250 addr = XEXP (addr, 0);
4251 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
4252 break;
4253 }
4254
4255 default:
4256 output_addr_const (file, x);
4257 break;
4258 }
4259
4260 return;
4261 }
4262 \f
4263 /* Compute a (partial) cost for rtx X. Return true if the complete
4264 cost has been computed, and false if subexpressions should be
4265 scanned. In either case, *TOTAL contains the cost result. */
4266 /* ??? This is incomplete. */
4267
4268 static bool
4269 ia64_rtx_costs (x, code, outer_code, total)
4270 rtx x;
4271 int code, outer_code;
4272 int *total;
4273 {
4274 switch (code)
4275 {
4276 case CONST_INT:
4277 switch (outer_code)
4278 {
4279 case SET:
4280 *total = CONST_OK_FOR_J (INTVAL (x)) ? 0 : COSTS_N_INSNS (1);
4281 return true;
4282 case PLUS:
4283 if (CONST_OK_FOR_I (INTVAL (x)))
4284 *total = 0;
4285 else if (CONST_OK_FOR_J (INTVAL (x)))
4286 *total = 1;
4287 else
4288 *total = COSTS_N_INSNS (1);
4289 return true;
4290 default:
4291 if (CONST_OK_FOR_K (INTVAL (x)) || CONST_OK_FOR_L (INTVAL (x)))
4292 *total = 0;
4293 else
4294 *total = COSTS_N_INSNS (1);
4295 return true;
4296 }
4297
4298 case CONST_DOUBLE:
4299 *total = COSTS_N_INSNS (1);
4300 return true;
4301
4302 case CONST:
4303 case SYMBOL_REF:
4304 case LABEL_REF:
4305 *total = COSTS_N_INSNS (3);
4306 return true;
4307
4308 case MULT:
4309 /* For multiplies wider than HImode, we have to go to the FPU,
4310 which normally involves copies. Plus there's the latency
4311 of the multiply itself, and the latency of the instructions to
4312 transfer integer regs to FP regs. */
4313 /* ??? Check for FP mode. */
4314 if (GET_MODE_SIZE (GET_MODE (x)) > 2)
4315 *total = COSTS_N_INSNS (10);
4316 else
4317 *total = COSTS_N_INSNS (2);
4318 return true;
4319
4320 case PLUS:
4321 case MINUS:
4322 case ASHIFT:
4323 case ASHIFTRT:
4324 case LSHIFTRT:
4325 *total = COSTS_N_INSNS (1);
4326 return true;
4327
4328 case DIV:
4329 case UDIV:
4330 case MOD:
4331 case UMOD:
4332 /* We make divide expensive, so that divide-by-constant will be
4333 optimized to a multiply. */
4334 *total = COSTS_N_INSNS (60);
4335 return true;
4336
4337 default:
4338 return false;
4339 }
4340 }
4341
4342 /* Calculate the cost of moving data from a register in class FROM to
4343 one in class TO, using MODE. */
4344
4345 int
4346 ia64_register_move_cost (mode, from, to)
4347 enum machine_mode mode;
4348 enum reg_class from, to;
4349 {
4350 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
4351 if (to == ADDL_REGS)
4352 to = GR_REGS;
4353 if (from == ADDL_REGS)
4354 from = GR_REGS;
4355
4356 /* All costs are symmetric, so reduce cases by putting the
4357 lower number class as the destination. */
4358 if (from < to)
4359 {
4360 enum reg_class tmp = to;
4361 to = from, from = tmp;
4362 }
4363
4364 /* Moving from FR<->GR in TFmode must be more expensive than 2,
4365 so that we get secondary memory reloads. Between FR_REGS,
4366 we have to make this at least as expensive as MEMORY_MOVE_COST
4367 to avoid spectacularly poor register class preferencing. */
4368 if (mode == TFmode)
4369 {
4370 if (to != GR_REGS || from != GR_REGS)
4371 return MEMORY_MOVE_COST (mode, to, 0);
4372 else
4373 return 3;
4374 }
4375
4376 switch (to)
4377 {
4378 case PR_REGS:
4379 /* Moving between PR registers takes two insns. */
4380 if (from == PR_REGS)
4381 return 3;
4382 /* Moving between PR and anything but GR is impossible. */
4383 if (from != GR_REGS)
4384 return MEMORY_MOVE_COST (mode, to, 0);
4385 break;
4386
4387 case BR_REGS:
4388 /* Moving between BR and anything but GR is impossible. */
4389 if (from != GR_REGS && from != GR_AND_BR_REGS)
4390 return MEMORY_MOVE_COST (mode, to, 0);
4391 break;
4392
4393 case AR_I_REGS:
4394 case AR_M_REGS:
4395 /* Moving between AR and anything but GR is impossible. */
4396 if (from != GR_REGS)
4397 return MEMORY_MOVE_COST (mode, to, 0);
4398 break;
4399
4400 case GR_REGS:
4401 case FR_REGS:
4402 case GR_AND_FR_REGS:
4403 case GR_AND_BR_REGS:
4404 case ALL_REGS:
4405 break;
4406
4407 default:
4408 abort ();
4409 }
4410
4411 return 2;
4412 }
4413
4414 /* This function returns the register class required for a secondary
4415 register when copying between one of the registers in CLASS, and X,
4416 using MODE. A return value of NO_REGS means that no secondary register
4417 is required. */
4418
4419 enum reg_class
4420 ia64_secondary_reload_class (class, mode, x)
4421 enum reg_class class;
4422 enum machine_mode mode ATTRIBUTE_UNUSED;
4423 rtx x;
4424 {
4425 int regno = -1;
4426
4427 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
4428 regno = true_regnum (x);
4429
4430 switch (class)
4431 {
4432 case BR_REGS:
4433 case AR_M_REGS:
4434 case AR_I_REGS:
4435 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
4436 interaction. We end up with two pseudos with overlapping lifetimes
4437 both of which are equiv to the same constant, and both which need
4438 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
4439 changes depending on the path length, which means the qty_first_reg
4440 check in make_regs_eqv can give different answers at different times.
4441 At some point I'll probably need a reload_indi pattern to handle
4442 this.
4443
4444 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
4445 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
4446 non-general registers for good measure. */
4447 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
4448 return GR_REGS;
4449
4450 /* This is needed if a pseudo used as a call_operand gets spilled to a
4451 stack slot. */
4452 if (GET_CODE (x) == MEM)
4453 return GR_REGS;
4454 break;
4455
4456 case FR_REGS:
4457 /* Need to go through general registers to get to other class regs. */
4458 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
4459 return GR_REGS;
4460
4461 /* This can happen when a paradoxical subreg is an operand to the
4462 muldi3 pattern. */
4463 /* ??? This shouldn't be necessary after instruction scheduling is
4464 enabled, because paradoxical subregs are not accepted by
4465 register_operand when INSN_SCHEDULING is defined. Or alternatively,
4466 stop the paradoxical subreg stupidity in the *_operand functions
4467 in recog.c. */
4468 if (GET_CODE (x) == MEM
4469 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
4470 || GET_MODE (x) == QImode))
4471 return GR_REGS;
4472
4473 /* This can happen because of the ior/and/etc patterns that accept FP
4474 registers as operands. If the third operand is a constant, then it
4475 needs to be reloaded into a FP register. */
4476 if (GET_CODE (x) == CONST_INT)
4477 return GR_REGS;
4478
4479 /* This can happen because of register elimination in a muldi3 insn.
4480 E.g. `26107 * (unsigned long)&u'. */
4481 if (GET_CODE (x) == PLUS)
4482 return GR_REGS;
4483 break;
4484
4485 case PR_REGS:
4486 /* ??? This happens if we cse/gcse a BImode value across a call,
4487 and the function has a nonlocal goto. This is because global
4488 does not allocate call crossing pseudos to hard registers when
4489 current_function_has_nonlocal_goto is true. This is relatively
4490 common for C++ programs that use exceptions. To reproduce,
4491 return NO_REGS and compile libstdc++. */
4492 if (GET_CODE (x) == MEM)
4493 return GR_REGS;
4494
4495 /* This can happen when we take a BImode subreg of a DImode value,
4496 and that DImode value winds up in some non-GR register. */
4497 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
4498 return GR_REGS;
4499 break;
4500
4501 case GR_REGS:
4502 /* Since we have no offsettable memory addresses, we need a temporary
4503 to hold the address of the second word. */
4504 if (mode == TImode)
4505 return GR_REGS;
4506 break;
4507
4508 default:
4509 break;
4510 }
4511
4512 return NO_REGS;
4513 }
4514
4515 \f
4516 /* Emit text to declare externally defined variables and functions, because
4517 the Intel assembler does not support undefined externals. */
4518
4519 void
4520 ia64_asm_output_external (file, decl, name)
4521 FILE *file;
4522 tree decl;
4523 const char *name;
4524 {
4525 int save_referenced;
4526
4527 /* GNU as does not need anything here, but the HP linker does need
4528 something for external functions. */
4529
4530 if (TARGET_GNU_AS
4531 && (!TARGET_HPUX_LD
4532 || TREE_CODE (decl) != FUNCTION_DECL
4533 || strstr(name, "__builtin_") == name))
4534 return;
4535
4536 /* ??? The Intel assembler creates a reference that needs to be satisfied by
4537 the linker when we do this, so we need to be careful not to do this for
4538 builtin functions which have no library equivalent. Unfortunately, we
4539 can't tell here whether or not a function will actually be called by
4540 expand_expr, so we pull in library functions even if we may not need
4541 them later. */
4542 if (! strcmp (name, "__builtin_next_arg")
4543 || ! strcmp (name, "alloca")
4544 || ! strcmp (name, "__builtin_constant_p")
4545 || ! strcmp (name, "__builtin_args_info"))
4546 return;
4547
4548 if (TARGET_HPUX_LD)
4549 ia64_hpux_add_extern_decl (name);
4550 else
4551 {
4552 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
4553 restore it. */
4554 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
4555 if (TREE_CODE (decl) == FUNCTION_DECL)
4556 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
4557 (*targetm.asm_out.globalize_label) (file, name);
4558 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
4559 }
4560 }
4561 \f
4562 /* Parse the -mfixed-range= option string. */
4563
4564 static void
4565 fix_range (const_str)
4566 const char *const_str;
4567 {
4568 int i, first, last;
4569 char *str, *dash, *comma;
4570
4571 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4572 REG2 are either register names or register numbers. The effect
4573 of this option is to mark the registers in the range from REG1 to
4574 REG2 as ``fixed'' so they won't be used by the compiler. This is
4575 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
4576
4577 i = strlen (const_str);
4578 str = (char *) alloca (i + 1);
4579 memcpy (str, const_str, i + 1);
4580
4581 while (1)
4582 {
4583 dash = strchr (str, '-');
4584 if (!dash)
4585 {
4586 warning ("value of -mfixed-range must have form REG1-REG2");
4587 return;
4588 }
4589 *dash = '\0';
4590
4591 comma = strchr (dash + 1, ',');
4592 if (comma)
4593 *comma = '\0';
4594
4595 first = decode_reg_name (str);
4596 if (first < 0)
4597 {
4598 warning ("unknown register name: %s", str);
4599 return;
4600 }
4601
4602 last = decode_reg_name (dash + 1);
4603 if (last < 0)
4604 {
4605 warning ("unknown register name: %s", dash + 1);
4606 return;
4607 }
4608
4609 *dash = '-';
4610
4611 if (first > last)
4612 {
4613 warning ("%s-%s is an empty range", str, dash + 1);
4614 return;
4615 }
4616
4617 for (i = first; i <= last; ++i)
4618 fixed_regs[i] = call_used_regs[i] = 1;
4619
4620 if (!comma)
4621 break;
4622
4623 *comma = ',';
4624 str = comma + 1;
4625 }
4626 }
4627
4628 static struct machine_function *
4629 ia64_init_machine_status ()
4630 {
4631 return ggc_alloc_cleared (sizeof (struct machine_function));
4632 }
4633
4634 /* Handle TARGET_OPTIONS switches. */
4635
4636 void
4637 ia64_override_options ()
4638 {
4639 static struct pta
4640 {
4641 const char *const name; /* processor name or nickname. */
4642 const enum processor_type processor;
4643 }
4644 const processor_alias_table[] =
4645 {
4646 {"itanium", PROCESSOR_ITANIUM},
4647 {"itanium1", PROCESSOR_ITANIUM},
4648 {"merced", PROCESSOR_ITANIUM},
4649 {"itanium2", PROCESSOR_ITANIUM2},
4650 {"mckinley", PROCESSOR_ITANIUM2},
4651 };
4652
4653 int const pta_size = ARRAY_SIZE (processor_alias_table);
4654 int i;
4655
4656 if (TARGET_AUTO_PIC)
4657 target_flags |= MASK_CONST_GP;
4658
4659 if (TARGET_INLINE_FLOAT_DIV_LAT && TARGET_INLINE_FLOAT_DIV_THR)
4660 {
4661 warning ("cannot optimize floating point division for both latency and throughput");
4662 target_flags &= ~MASK_INLINE_FLOAT_DIV_THR;
4663 }
4664
4665 if (TARGET_INLINE_INT_DIV_LAT && TARGET_INLINE_INT_DIV_THR)
4666 {
4667 warning ("cannot optimize integer division for both latency and throughput");
4668 target_flags &= ~MASK_INLINE_INT_DIV_THR;
4669 }
4670
4671 if (ia64_fixed_range_string)
4672 fix_range (ia64_fixed_range_string);
4673
4674 if (ia64_tls_size_string)
4675 {
4676 char *end;
4677 unsigned long tmp = strtoul (ia64_tls_size_string, &end, 10);
4678 if (*end || (tmp != 14 && tmp != 22 && tmp != 64))
4679 error ("bad value (%s) for -mtls-size= switch", ia64_tls_size_string);
4680 else
4681 ia64_tls_size = tmp;
4682 }
4683
4684 if (!ia64_tune_string)
4685 ia64_tune_string = "itanium2";
4686
4687 for (i = 0; i < pta_size; i++)
4688 if (! strcmp (ia64_tune_string, processor_alias_table[i].name))
4689 {
4690 ia64_tune = processor_alias_table[i].processor;
4691 break;
4692 }
4693
4694 if (i == pta_size)
4695 error ("bad value (%s) for -tune= switch", ia64_tune_string);
4696
4697 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
4698 flag_schedule_insns_after_reload = 0;
4699
4700 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
4701
4702 init_machine_status = ia64_init_machine_status;
4703
4704 /* Tell the compiler which flavor of TFmode we're using. */
4705 if (INTEL_EXTENDED_IEEE_FORMAT)
4706 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
4707 }
4708 \f
4709 static enum attr_itanium_class ia64_safe_itanium_class PARAMS((rtx));
4710 static enum attr_type ia64_safe_type PARAMS((rtx));
4711
4712 static enum attr_itanium_class
4713 ia64_safe_itanium_class (insn)
4714 rtx insn;
4715 {
4716 if (recog_memoized (insn) >= 0)
4717 return get_attr_itanium_class (insn);
4718 else
4719 return ITANIUM_CLASS_UNKNOWN;
4720 }
4721
4722 static enum attr_type
4723 ia64_safe_type (insn)
4724 rtx insn;
4725 {
4726 if (recog_memoized (insn) >= 0)
4727 return get_attr_type (insn);
4728 else
4729 return TYPE_UNKNOWN;
4730 }
4731 \f
4732 /* The following collection of routines emit instruction group stop bits as
4733 necessary to avoid dependencies. */
4734
4735 /* Need to track some additional registers as far as serialization is
4736 concerned so we can properly handle br.call and br.ret. We could
4737 make these registers visible to gcc, but since these registers are
4738 never explicitly used in gcc generated code, it seems wasteful to
4739 do so (plus it would make the call and return patterns needlessly
4740 complex). */
4741 #define REG_GP (GR_REG (1))
4742 #define REG_RP (BR_REG (0))
4743 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
4744 /* This is used for volatile asms which may require a stop bit immediately
4745 before and after them. */
4746 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
4747 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
4748 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
4749
4750 /* For each register, we keep track of how it has been written in the
4751 current instruction group.
4752
4753 If a register is written unconditionally (no qualifying predicate),
4754 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4755
4756 If a register is written if its qualifying predicate P is true, we
4757 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
4758 may be written again by the complement of P (P^1) and when this happens,
4759 WRITE_COUNT gets set to 2.
4760
4761 The result of this is that whenever an insn attempts to write a register
4762 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
4763
4764 If a predicate register is written by a floating-point insn, we set
4765 WRITTEN_BY_FP to true.
4766
4767 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4768 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
4769
4770 struct reg_write_state
4771 {
4772 unsigned int write_count : 2;
4773 unsigned int first_pred : 16;
4774 unsigned int written_by_fp : 1;
4775 unsigned int written_by_and : 1;
4776 unsigned int written_by_or : 1;
4777 };
4778
4779 /* Cumulative info for the current instruction group. */
4780 struct reg_write_state rws_sum[NUM_REGS];
4781 /* Info for the current instruction. This gets copied to rws_sum after a
4782 stop bit is emitted. */
4783 struct reg_write_state rws_insn[NUM_REGS];
4784
4785 /* Indicates whether this is the first instruction after a stop bit,
4786 in which case we don't need another stop bit. Without this, we hit
4787 the abort in ia64_variable_issue when scheduling an alloc. */
4788 static int first_instruction;
4789
4790 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4791 RTL for one instruction. */
4792 struct reg_flags
4793 {
4794 unsigned int is_write : 1; /* Is register being written? */
4795 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
4796 unsigned int is_branch : 1; /* Is register used as part of a branch? */
4797 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
4798 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
4799 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
4800 };
4801
4802 static void rws_update PARAMS ((struct reg_write_state *, int,
4803 struct reg_flags, int));
4804 static int rws_access_regno PARAMS ((int, struct reg_flags, int));
4805 static int rws_access_reg PARAMS ((rtx, struct reg_flags, int));
4806 static void update_set_flags PARAMS ((rtx, struct reg_flags *, int *, rtx *));
4807 static int set_src_needs_barrier PARAMS ((rtx, struct reg_flags, int, rtx));
4808 static int rtx_needs_barrier PARAMS ((rtx, struct reg_flags, int));
4809 static void init_insn_group_barriers PARAMS ((void));
4810 static int group_barrier_needed_p PARAMS ((rtx));
4811 static int safe_group_barrier_needed_p PARAMS ((rtx));
4812
4813 /* Update *RWS for REGNO, which is being written by the current instruction,
4814 with predicate PRED, and associated register flags in FLAGS. */
4815
4816 static void
4817 rws_update (rws, regno, flags, pred)
4818 struct reg_write_state *rws;
4819 int regno;
4820 struct reg_flags flags;
4821 int pred;
4822 {
4823 if (pred)
4824 rws[regno].write_count++;
4825 else
4826 rws[regno].write_count = 2;
4827 rws[regno].written_by_fp |= flags.is_fp;
4828 /* ??? Not tracking and/or across differing predicates. */
4829 rws[regno].written_by_and = flags.is_and;
4830 rws[regno].written_by_or = flags.is_or;
4831 rws[regno].first_pred = pred;
4832 }
4833
4834 /* Handle an access to register REGNO of type FLAGS using predicate register
4835 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
4836 a dependency with an earlier instruction in the same group. */
4837
4838 static int
4839 rws_access_regno (regno, flags, pred)
4840 int regno;
4841 struct reg_flags flags;
4842 int pred;
4843 {
4844 int need_barrier = 0;
4845
4846 if (regno >= NUM_REGS)
4847 abort ();
4848
4849 if (! PR_REGNO_P (regno))
4850 flags.is_and = flags.is_or = 0;
4851
4852 if (flags.is_write)
4853 {
4854 int write_count;
4855
4856 /* One insn writes same reg multiple times? */
4857 if (rws_insn[regno].write_count > 0)
4858 abort ();
4859
4860 /* Update info for current instruction. */
4861 rws_update (rws_insn, regno, flags, pred);
4862 write_count = rws_sum[regno].write_count;
4863
4864 switch (write_count)
4865 {
4866 case 0:
4867 /* The register has not been written yet. */
4868 rws_update (rws_sum, regno, flags, pred);
4869 break;
4870
4871 case 1:
4872 /* The register has been written via a predicate. If this is
4873 not a complementary predicate, then we need a barrier. */
4874 /* ??? This assumes that P and P+1 are always complementary
4875 predicates for P even. */
4876 if (flags.is_and && rws_sum[regno].written_by_and)
4877 ;
4878 else if (flags.is_or && rws_sum[regno].written_by_or)
4879 ;
4880 else if ((rws_sum[regno].first_pred ^ 1) != pred)
4881 need_barrier = 1;
4882 rws_update (rws_sum, regno, flags, pred);
4883 break;
4884
4885 case 2:
4886 /* The register has been unconditionally written already. We
4887 need a barrier. */
4888 if (flags.is_and && rws_sum[regno].written_by_and)
4889 ;
4890 else if (flags.is_or && rws_sum[regno].written_by_or)
4891 ;
4892 else
4893 need_barrier = 1;
4894 rws_sum[regno].written_by_and = flags.is_and;
4895 rws_sum[regno].written_by_or = flags.is_or;
4896 break;
4897
4898 default:
4899 abort ();
4900 }
4901 }
4902 else
4903 {
4904 if (flags.is_branch)
4905 {
4906 /* Branches have several RAW exceptions that allow to avoid
4907 barriers. */
4908
4909 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
4910 /* RAW dependencies on branch regs are permissible as long
4911 as the writer is a non-branch instruction. Since we
4912 never generate code that uses a branch register written
4913 by a branch instruction, handling this case is
4914 easy. */
4915 return 0;
4916
4917 if (REGNO_REG_CLASS (regno) == PR_REGS
4918 && ! rws_sum[regno].written_by_fp)
4919 /* The predicates of a branch are available within the
4920 same insn group as long as the predicate was written by
4921 something other than a floating-point instruction. */
4922 return 0;
4923 }
4924
4925 if (flags.is_and && rws_sum[regno].written_by_and)
4926 return 0;
4927 if (flags.is_or && rws_sum[regno].written_by_or)
4928 return 0;
4929
4930 switch (rws_sum[regno].write_count)
4931 {
4932 case 0:
4933 /* The register has not been written yet. */
4934 break;
4935
4936 case 1:
4937 /* The register has been written via a predicate. If this is
4938 not a complementary predicate, then we need a barrier. */
4939 /* ??? This assumes that P and P+1 are always complementary
4940 predicates for P even. */
4941 if ((rws_sum[regno].first_pred ^ 1) != pred)
4942 need_barrier = 1;
4943 break;
4944
4945 case 2:
4946 /* The register has been unconditionally written already. We
4947 need a barrier. */
4948 need_barrier = 1;
4949 break;
4950
4951 default:
4952 abort ();
4953 }
4954 }
4955
4956 return need_barrier;
4957 }
4958
4959 static int
4960 rws_access_reg (reg, flags, pred)
4961 rtx reg;
4962 struct reg_flags flags;
4963 int pred;
4964 {
4965 int regno = REGNO (reg);
4966 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
4967
4968 if (n == 1)
4969 return rws_access_regno (regno, flags, pred);
4970 else
4971 {
4972 int need_barrier = 0;
4973 while (--n >= 0)
4974 need_barrier |= rws_access_regno (regno + n, flags, pred);
4975 return need_barrier;
4976 }
4977 }
4978
4979 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
4980 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
4981
4982 static void
4983 update_set_flags (x, pflags, ppred, pcond)
4984 rtx x;
4985 struct reg_flags *pflags;
4986 int *ppred;
4987 rtx *pcond;
4988 {
4989 rtx src = SET_SRC (x);
4990
4991 *pcond = 0;
4992
4993 switch (GET_CODE (src))
4994 {
4995 case CALL:
4996 return;
4997
4998 case IF_THEN_ELSE:
4999 if (SET_DEST (x) == pc_rtx)
5000 /* X is a conditional branch. */
5001 return;
5002 else
5003 {
5004 int is_complemented = 0;
5005
5006 /* X is a conditional move. */
5007 rtx cond = XEXP (src, 0);
5008 if (GET_CODE (cond) == EQ)
5009 is_complemented = 1;
5010 cond = XEXP (cond, 0);
5011 if (GET_CODE (cond) != REG
5012 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
5013 abort ();
5014 *pcond = cond;
5015 if (XEXP (src, 1) == SET_DEST (x)
5016 || XEXP (src, 2) == SET_DEST (x))
5017 {
5018 /* X is a conditional move that conditionally writes the
5019 destination. */
5020
5021 /* We need another complement in this case. */
5022 if (XEXP (src, 1) == SET_DEST (x))
5023 is_complemented = ! is_complemented;
5024
5025 *ppred = REGNO (cond);
5026 if (is_complemented)
5027 ++*ppred;
5028 }
5029
5030 /* ??? If this is a conditional write to the dest, then this
5031 instruction does not actually read one source. This probably
5032 doesn't matter, because that source is also the dest. */
5033 /* ??? Multiple writes to predicate registers are allowed
5034 if they are all AND type compares, or if they are all OR
5035 type compares. We do not generate such instructions
5036 currently. */
5037 }
5038 /* ... fall through ... */
5039
5040 default:
5041 if (GET_RTX_CLASS (GET_CODE (src)) == '<'
5042 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
5043 /* Set pflags->is_fp to 1 so that we know we're dealing
5044 with a floating point comparison when processing the
5045 destination of the SET. */
5046 pflags->is_fp = 1;
5047
5048 /* Discover if this is a parallel comparison. We only handle
5049 and.orcm and or.andcm at present, since we must retain a
5050 strict inverse on the predicate pair. */
5051 else if (GET_CODE (src) == AND)
5052 pflags->is_and = 1;
5053 else if (GET_CODE (src) == IOR)
5054 pflags->is_or = 1;
5055
5056 break;
5057 }
5058 }
5059
5060 /* Subroutine of rtx_needs_barrier; this function determines whether the
5061 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
5062 are as in rtx_needs_barrier. COND is an rtx that holds the condition
5063 for this insn. */
5064
5065 static int
5066 set_src_needs_barrier (x, flags, pred, cond)
5067 rtx x;
5068 struct reg_flags flags;
5069 int pred;
5070 rtx cond;
5071 {
5072 int need_barrier = 0;
5073 rtx dst;
5074 rtx src = SET_SRC (x);
5075
5076 if (GET_CODE (src) == CALL)
5077 /* We don't need to worry about the result registers that
5078 get written by subroutine call. */
5079 return rtx_needs_barrier (src, flags, pred);
5080 else if (SET_DEST (x) == pc_rtx)
5081 {
5082 /* X is a conditional branch. */
5083 /* ??? This seems redundant, as the caller sets this bit for
5084 all JUMP_INSNs. */
5085 flags.is_branch = 1;
5086 return rtx_needs_barrier (src, flags, pred);
5087 }
5088
5089 need_barrier = rtx_needs_barrier (src, flags, pred);
5090
5091 /* This instruction unconditionally uses a predicate register. */
5092 if (cond)
5093 need_barrier |= rws_access_reg (cond, flags, 0);
5094
5095 dst = SET_DEST (x);
5096 if (GET_CODE (dst) == ZERO_EXTRACT)
5097 {
5098 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
5099 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
5100 dst = XEXP (dst, 0);
5101 }
5102 return need_barrier;
5103 }
5104
5105 /* Handle an access to rtx X of type FLAGS using predicate register PRED.
5106 Return 1 is this access creates a dependency with an earlier instruction
5107 in the same group. */
5108
5109 static int
5110 rtx_needs_barrier (x, flags, pred)
5111 rtx x;
5112 struct reg_flags flags;
5113 int pred;
5114 {
5115 int i, j;
5116 int is_complemented = 0;
5117 int need_barrier = 0;
5118 const char *format_ptr;
5119 struct reg_flags new_flags;
5120 rtx cond = 0;
5121
5122 if (! x)
5123 return 0;
5124
5125 new_flags = flags;
5126
5127 switch (GET_CODE (x))
5128 {
5129 case SET:
5130 update_set_flags (x, &new_flags, &pred, &cond);
5131 need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
5132 if (GET_CODE (SET_SRC (x)) != CALL)
5133 {
5134 new_flags.is_write = 1;
5135 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
5136 }
5137 break;
5138
5139 case CALL:
5140 new_flags.is_write = 0;
5141 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5142
5143 /* Avoid multiple register writes, in case this is a pattern with
5144 multiple CALL rtx. This avoids an abort in rws_access_reg. */
5145 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
5146 {
5147 new_flags.is_write = 1;
5148 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
5149 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
5150 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5151 }
5152 break;
5153
5154 case COND_EXEC:
5155 /* X is a predicated instruction. */
5156
5157 cond = COND_EXEC_TEST (x);
5158 if (pred)
5159 abort ();
5160 need_barrier = rtx_needs_barrier (cond, flags, 0);
5161
5162 if (GET_CODE (cond) == EQ)
5163 is_complemented = 1;
5164 cond = XEXP (cond, 0);
5165 if (GET_CODE (cond) != REG
5166 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
5167 abort ();
5168 pred = REGNO (cond);
5169 if (is_complemented)
5170 ++pred;
5171
5172 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
5173 return need_barrier;
5174
5175 case CLOBBER:
5176 case USE:
5177 /* Clobber & use are for earlier compiler-phases only. */
5178 break;
5179
5180 case ASM_OPERANDS:
5181 case ASM_INPUT:
5182 /* We always emit stop bits for traditional asms. We emit stop bits
5183 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
5184 if (GET_CODE (x) != ASM_OPERANDS
5185 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
5186 {
5187 /* Avoid writing the register multiple times if we have multiple
5188 asm outputs. This avoids an abort in rws_access_reg. */
5189 if (! rws_insn[REG_VOLATILE].write_count)
5190 {
5191 new_flags.is_write = 1;
5192 rws_access_regno (REG_VOLATILE, new_flags, pred);
5193 }
5194 return 1;
5195 }
5196
5197 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
5198 We can not just fall through here since then we would be confused
5199 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
5200 traditional asms unlike their normal usage. */
5201
5202 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
5203 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
5204 need_barrier = 1;
5205 break;
5206
5207 case PARALLEL:
5208 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5209 {
5210 rtx pat = XVECEXP (x, 0, i);
5211 if (GET_CODE (pat) == SET)
5212 {
5213 update_set_flags (pat, &new_flags, &pred, &cond);
5214 need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
5215 }
5216 else if (GET_CODE (pat) == USE
5217 || GET_CODE (pat) == CALL
5218 || GET_CODE (pat) == ASM_OPERANDS)
5219 need_barrier |= rtx_needs_barrier (pat, flags, pred);
5220 else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
5221 abort ();
5222 }
5223 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5224 {
5225 rtx pat = XVECEXP (x, 0, i);
5226 if (GET_CODE (pat) == SET)
5227 {
5228 if (GET_CODE (SET_SRC (pat)) != CALL)
5229 {
5230 new_flags.is_write = 1;
5231 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
5232 pred);
5233 }
5234 }
5235 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
5236 need_barrier |= rtx_needs_barrier (pat, flags, pred);
5237 }
5238 break;
5239
5240 case SUBREG:
5241 x = SUBREG_REG (x);
5242 /* FALLTHRU */
5243 case REG:
5244 if (REGNO (x) == AR_UNAT_REGNUM)
5245 {
5246 for (i = 0; i < 64; ++i)
5247 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
5248 }
5249 else
5250 need_barrier = rws_access_reg (x, flags, pred);
5251 break;
5252
5253 case MEM:
5254 /* Find the regs used in memory address computation. */
5255 new_flags.is_write = 0;
5256 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5257 break;
5258
5259 case CONST_INT: case CONST_DOUBLE:
5260 case SYMBOL_REF: case LABEL_REF: case CONST:
5261 break;
5262
5263 /* Operators with side-effects. */
5264 case POST_INC: case POST_DEC:
5265 if (GET_CODE (XEXP (x, 0)) != REG)
5266 abort ();
5267
5268 new_flags.is_write = 0;
5269 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
5270 new_flags.is_write = 1;
5271 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5272 break;
5273
5274 case POST_MODIFY:
5275 if (GET_CODE (XEXP (x, 0)) != REG)
5276 abort ();
5277
5278 new_flags.is_write = 0;
5279 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
5280 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5281 new_flags.is_write = 1;
5282 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5283 break;
5284
5285 /* Handle common unary and binary ops for efficiency. */
5286 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
5287 case MOD: case UDIV: case UMOD: case AND: case IOR:
5288 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
5289 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
5290 case NE: case EQ: case GE: case GT: case LE:
5291 case LT: case GEU: case GTU: case LEU: case LTU:
5292 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5293 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5294 break;
5295
5296 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
5297 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
5298 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
5299 case SQRT: case FFS: case POPCOUNT:
5300 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
5301 break;
5302
5303 case UNSPEC:
5304 switch (XINT (x, 1))
5305 {
5306 case UNSPEC_LTOFF_DTPMOD:
5307 case UNSPEC_LTOFF_DTPREL:
5308 case UNSPEC_DTPREL:
5309 case UNSPEC_LTOFF_TPREL:
5310 case UNSPEC_TPREL:
5311 case UNSPEC_PRED_REL_MUTEX:
5312 case UNSPEC_PIC_CALL:
5313 case UNSPEC_MF:
5314 case UNSPEC_FETCHADD_ACQ:
5315 case UNSPEC_BSP_VALUE:
5316 case UNSPEC_FLUSHRS:
5317 case UNSPEC_BUNDLE_SELECTOR:
5318 break;
5319
5320 case UNSPEC_GR_SPILL:
5321 case UNSPEC_GR_RESTORE:
5322 {
5323 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
5324 HOST_WIDE_INT bit = (offset >> 3) & 63;
5325
5326 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5327 new_flags.is_write = (XINT (x, 1) == 1);
5328 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
5329 new_flags, pred);
5330 break;
5331 }
5332
5333 case UNSPEC_FR_SPILL:
5334 case UNSPEC_FR_RESTORE:
5335 case UNSPEC_GETF_EXP:
5336 case UNSPEC_ADDP4:
5337 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5338 break;
5339
5340 case UNSPEC_FR_RECIP_APPROX:
5341 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5342 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5343 break;
5344
5345 case UNSPEC_CMPXCHG_ACQ:
5346 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5347 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
5348 break;
5349
5350 default:
5351 abort ();
5352 }
5353 break;
5354
5355 case UNSPEC_VOLATILE:
5356 switch (XINT (x, 1))
5357 {
5358 case UNSPECV_ALLOC:
5359 /* Alloc must always be the first instruction of a group.
5360 We force this by always returning true. */
5361 /* ??? We might get better scheduling if we explicitly check for
5362 input/local/output register dependencies, and modify the
5363 scheduler so that alloc is always reordered to the start of
5364 the current group. We could then eliminate all of the
5365 first_instruction code. */
5366 rws_access_regno (AR_PFS_REGNUM, flags, pred);
5367
5368 new_flags.is_write = 1;
5369 rws_access_regno (REG_AR_CFM, new_flags, pred);
5370 return 1;
5371
5372 case UNSPECV_SET_BSP:
5373 need_barrier = 1;
5374 break;
5375
5376 case UNSPECV_BLOCKAGE:
5377 case UNSPECV_INSN_GROUP_BARRIER:
5378 case UNSPECV_BREAK:
5379 case UNSPECV_PSAC_ALL:
5380 case UNSPECV_PSAC_NORMAL:
5381 return 0;
5382
5383 default:
5384 abort ();
5385 }
5386 break;
5387
5388 case RETURN:
5389 new_flags.is_write = 0;
5390 need_barrier = rws_access_regno (REG_RP, flags, pred);
5391 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
5392
5393 new_flags.is_write = 1;
5394 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5395 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5396 break;
5397
5398 default:
5399 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
5400 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5401 switch (format_ptr[i])
5402 {
5403 case '0': /* unused field */
5404 case 'i': /* integer */
5405 case 'n': /* note */
5406 case 'w': /* wide integer */
5407 case 's': /* pointer to string */
5408 case 'S': /* optional pointer to string */
5409 break;
5410
5411 case 'e':
5412 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
5413 need_barrier = 1;
5414 break;
5415
5416 case 'E':
5417 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
5418 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
5419 need_barrier = 1;
5420 break;
5421
5422 default:
5423 abort ();
5424 }
5425 break;
5426 }
5427 return need_barrier;
5428 }
5429
5430 /* Clear out the state for group_barrier_needed_p at the start of a
5431 sequence of insns. */
5432
5433 static void
5434 init_insn_group_barriers ()
5435 {
5436 memset (rws_sum, 0, sizeof (rws_sum));
5437 first_instruction = 1;
5438 }
5439
5440 /* Given the current state, recorded by previous calls to this function,
5441 determine whether a group barrier (a stop bit) is necessary before INSN.
5442 Return nonzero if so. */
5443
5444 static int
5445 group_barrier_needed_p (insn)
5446 rtx insn;
5447 {
5448 rtx pat;
5449 int need_barrier = 0;
5450 struct reg_flags flags;
5451
5452 memset (&flags, 0, sizeof (flags));
5453 switch (GET_CODE (insn))
5454 {
5455 case NOTE:
5456 break;
5457
5458 case BARRIER:
5459 /* A barrier doesn't imply an instruction group boundary. */
5460 break;
5461
5462 case CODE_LABEL:
5463 memset (rws_insn, 0, sizeof (rws_insn));
5464 return 1;
5465
5466 case CALL_INSN:
5467 flags.is_branch = 1;
5468 flags.is_sibcall = SIBLING_CALL_P (insn);
5469 memset (rws_insn, 0, sizeof (rws_insn));
5470
5471 /* Don't bundle a call following another call. */
5472 if ((pat = prev_active_insn (insn))
5473 && GET_CODE (pat) == CALL_INSN)
5474 {
5475 need_barrier = 1;
5476 break;
5477 }
5478
5479 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
5480 break;
5481
5482 case JUMP_INSN:
5483 flags.is_branch = 1;
5484
5485 /* Don't bundle a jump following a call. */
5486 if ((pat = prev_active_insn (insn))
5487 && GET_CODE (pat) == CALL_INSN)
5488 {
5489 need_barrier = 1;
5490 break;
5491 }
5492 /* FALLTHRU */
5493
5494 case INSN:
5495 if (GET_CODE (PATTERN (insn)) == USE
5496 || GET_CODE (PATTERN (insn)) == CLOBBER)
5497 /* Don't care about USE and CLOBBER "insns"---those are used to
5498 indicate to the optimizer that it shouldn't get rid of
5499 certain operations. */
5500 break;
5501
5502 pat = PATTERN (insn);
5503
5504 /* Ug. Hack hacks hacked elsewhere. */
5505 switch (recog_memoized (insn))
5506 {
5507 /* We play dependency tricks with the epilogue in order
5508 to get proper schedules. Undo this for dv analysis. */
5509 case CODE_FOR_epilogue_deallocate_stack:
5510 case CODE_FOR_prologue_allocate_stack:
5511 pat = XVECEXP (pat, 0, 0);
5512 break;
5513
5514 /* The pattern we use for br.cloop confuses the code above.
5515 The second element of the vector is representative. */
5516 case CODE_FOR_doloop_end_internal:
5517 pat = XVECEXP (pat, 0, 1);
5518 break;
5519
5520 /* Doesn't generate code. */
5521 case CODE_FOR_pred_rel_mutex:
5522 case CODE_FOR_prologue_use:
5523 return 0;
5524
5525 default:
5526 break;
5527 }
5528
5529 memset (rws_insn, 0, sizeof (rws_insn));
5530 need_barrier = rtx_needs_barrier (pat, flags, 0);
5531
5532 /* Check to see if the previous instruction was a volatile
5533 asm. */
5534 if (! need_barrier)
5535 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
5536 break;
5537
5538 default:
5539 abort ();
5540 }
5541
5542 if (first_instruction && INSN_P (insn)
5543 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
5544 && GET_CODE (PATTERN (insn)) != USE
5545 && GET_CODE (PATTERN (insn)) != CLOBBER)
5546 {
5547 need_barrier = 0;
5548 first_instruction = 0;
5549 }
5550
5551 return need_barrier;
5552 }
5553
5554 /* Like group_barrier_needed_p, but do not clobber the current state. */
5555
5556 static int
5557 safe_group_barrier_needed_p (insn)
5558 rtx insn;
5559 {
5560 struct reg_write_state rws_saved[NUM_REGS];
5561 int saved_first_instruction;
5562 int t;
5563
5564 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
5565 saved_first_instruction = first_instruction;
5566
5567 t = group_barrier_needed_p (insn);
5568
5569 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
5570 first_instruction = saved_first_instruction;
5571
5572 return t;
5573 }
5574
5575 /* Scan the current function and insert stop bits as necessary to
5576 eliminate dependencies. This function assumes that a final
5577 instruction scheduling pass has been run which has already
5578 inserted most of the necessary stop bits. This function only
5579 inserts new ones at basic block boundaries, since these are
5580 invisible to the scheduler. */
5581
5582 static void
5583 emit_insn_group_barriers (dump)
5584 FILE *dump;
5585 {
5586 rtx insn;
5587 rtx last_label = 0;
5588 int insns_since_last_label = 0;
5589
5590 init_insn_group_barriers ();
5591
5592 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5593 {
5594 if (GET_CODE (insn) == CODE_LABEL)
5595 {
5596 if (insns_since_last_label)
5597 last_label = insn;
5598 insns_since_last_label = 0;
5599 }
5600 else if (GET_CODE (insn) == NOTE
5601 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
5602 {
5603 if (insns_since_last_label)
5604 last_label = insn;
5605 insns_since_last_label = 0;
5606 }
5607 else if (GET_CODE (insn) == INSN
5608 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
5609 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
5610 {
5611 init_insn_group_barriers ();
5612 last_label = 0;
5613 }
5614 else if (INSN_P (insn))
5615 {
5616 insns_since_last_label = 1;
5617
5618 if (group_barrier_needed_p (insn))
5619 {
5620 if (last_label)
5621 {
5622 if (dump)
5623 fprintf (dump, "Emitting stop before label %d\n",
5624 INSN_UID (last_label));
5625 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
5626 insn = last_label;
5627
5628 init_insn_group_barriers ();
5629 last_label = 0;
5630 }
5631 }
5632 }
5633 }
5634 }
5635
5636 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
5637 This function has to emit all necessary group barriers. */
5638
5639 static void
5640 emit_all_insn_group_barriers (dump)
5641 FILE *dump ATTRIBUTE_UNUSED;
5642 {
5643 rtx insn;
5644
5645 init_insn_group_barriers ();
5646
5647 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5648 {
5649 if (GET_CODE (insn) == BARRIER)
5650 {
5651 rtx last = prev_active_insn (insn);
5652
5653 if (! last)
5654 continue;
5655 if (GET_CODE (last) == JUMP_INSN
5656 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
5657 last = prev_active_insn (last);
5658 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
5659 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
5660
5661 init_insn_group_barriers ();
5662 }
5663 else if (INSN_P (insn))
5664 {
5665 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
5666 init_insn_group_barriers ();
5667 else if (group_barrier_needed_p (insn))
5668 {
5669 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5670 init_insn_group_barriers ();
5671 group_barrier_needed_p (insn);
5672 }
5673 }
5674 }
5675 }
5676
5677 \f
5678 static int errata_find_address_regs PARAMS ((rtx *, void *));
5679 static void errata_emit_nops PARAMS ((rtx));
5680 static void fixup_errata PARAMS ((void));
5681
5682 /* This structure is used to track some details about the previous insns
5683 groups so we can determine if it may be necessary to insert NOPs to
5684 workaround hardware errata. */
5685 static struct group
5686 {
5687 HARD_REG_SET p_reg_set;
5688 HARD_REG_SET gr_reg_conditionally_set;
5689 } last_group[2];
5690
5691 /* Index into the last_group array. */
5692 static int group_idx;
5693
5694 /* Called through for_each_rtx; determines if a hard register that was
5695 conditionally set in the previous group is used as an address register.
5696 It ensures that for_each_rtx returns 1 in that case. */
5697 static int
5698 errata_find_address_regs (xp, data)
5699 rtx *xp;
5700 void *data ATTRIBUTE_UNUSED;
5701 {
5702 rtx x = *xp;
5703 if (GET_CODE (x) != MEM)
5704 return 0;
5705 x = XEXP (x, 0);
5706 if (GET_CODE (x) == POST_MODIFY)
5707 x = XEXP (x, 0);
5708 if (GET_CODE (x) == REG)
5709 {
5710 struct group *prev_group = last_group + (group_idx ^ 1);
5711 if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
5712 REGNO (x)))
5713 return 1;
5714 return -1;
5715 }
5716 return 0;
5717 }
5718
5719 /* Called for each insn; this function keeps track of the state in
5720 last_group and emits additional NOPs if necessary to work around
5721 an Itanium A/B step erratum. */
5722 static void
5723 errata_emit_nops (insn)
5724 rtx insn;
5725 {
5726 struct group *this_group = last_group + group_idx;
5727 struct group *prev_group = last_group + (group_idx ^ 1);
5728 rtx pat = PATTERN (insn);
5729 rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
5730 rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
5731 enum attr_type type;
5732 rtx set = real_pat;
5733
5734 if (GET_CODE (real_pat) == USE
5735 || GET_CODE (real_pat) == CLOBBER
5736 || GET_CODE (real_pat) == ASM_INPUT
5737 || GET_CODE (real_pat) == ADDR_VEC
5738 || GET_CODE (real_pat) == ADDR_DIFF_VEC
5739 || asm_noperands (PATTERN (insn)) >= 0)
5740 return;
5741
5742 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
5743 parts of it. */
5744
5745 if (GET_CODE (set) == PARALLEL)
5746 {
5747 int i;
5748 set = XVECEXP (real_pat, 0, 0);
5749 for (i = 1; i < XVECLEN (real_pat, 0); i++)
5750 if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
5751 && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
5752 {
5753 set = 0;
5754 break;
5755 }
5756 }
5757
5758 if (set && GET_CODE (set) != SET)
5759 set = 0;
5760
5761 type = get_attr_type (insn);
5762
5763 if (type == TYPE_F
5764 && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
5765 SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
5766
5767 if ((type == TYPE_M || type == TYPE_A) && cond && set
5768 && REG_P (SET_DEST (set))
5769 && GET_CODE (SET_SRC (set)) != PLUS
5770 && GET_CODE (SET_SRC (set)) != MINUS
5771 && (GET_CODE (SET_SRC (set)) != ASHIFT
5772 || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode))
5773 && (GET_CODE (SET_SRC (set)) != MEM
5774 || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
5775 && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
5776 {
5777 if (GET_RTX_CLASS (GET_CODE (cond)) != '<'
5778 || ! REG_P (XEXP (cond, 0)))
5779 abort ();
5780
5781 if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
5782 SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
5783 }
5784 if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
5785 {
5786 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5787 emit_insn_before (gen_nop (), insn);
5788 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5789 group_idx = 0;
5790 memset (last_group, 0, sizeof last_group);
5791 }
5792 }
5793
5794 /* Emit extra nops if they are required to work around hardware errata. */
5795
5796 static void
5797 fixup_errata ()
5798 {
5799 rtx insn;
5800
5801 if (! TARGET_B_STEP)
5802 return;
5803
5804 group_idx = 0;
5805 memset (last_group, 0, sizeof last_group);
5806
5807 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5808 {
5809 if (!INSN_P (insn))
5810 continue;
5811
5812 if (ia64_safe_type (insn) == TYPE_S)
5813 {
5814 group_idx ^= 1;
5815 memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
5816 }
5817 else
5818 errata_emit_nops (insn);
5819 }
5820 }
5821 \f
5822
5823 /* Instruction scheduling support. */
5824
5825 #define NR_BUNDLES 10
5826
5827 /* A list of names of all available bundles. */
5828
5829 static const char *bundle_name [NR_BUNDLES] =
5830 {
5831 ".mii",
5832 ".mmi",
5833 ".mfi",
5834 ".mmf",
5835 #if NR_BUNDLES == 10
5836 ".bbb",
5837 ".mbb",
5838 #endif
5839 ".mib",
5840 ".mmb",
5841 ".mfb",
5842 ".mlx"
5843 };
5844
5845 /* Nonzero if we should insert stop bits into the schedule. */
5846
5847 int ia64_final_schedule = 0;
5848
5849 /* Codes of the corresponding quieryied units: */
5850
5851 static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
5852 static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
5853
5854 static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
5855 static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
5856
5857 static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
5858
5859 /* The following variable value is an insn group barrier. */
5860
5861 static rtx dfa_stop_insn;
5862
5863 /* The following variable value is the last issued insn. */
5864
5865 static rtx last_scheduled_insn;
5866
5867 /* The following variable value is size of the DFA state. */
5868
5869 static size_t dfa_state_size;
5870
5871 /* The following variable value is pointer to a DFA state used as
5872 temporary variable. */
5873
5874 static state_t temp_dfa_state = NULL;
5875
5876 /* The following variable value is DFA state after issuing the last
5877 insn. */
5878
5879 static state_t prev_cycle_state = NULL;
5880
5881 /* The following array element values are TRUE if the corresponding
5882 insn requires to add stop bits before it. */
5883
5884 static char *stops_p;
5885
5886 /* The following variable is used to set up the mentioned above array. */
5887
5888 static int stop_before_p = 0;
5889
5890 /* The following variable value is length of the arrays `clocks' and
5891 `add_cycles'. */
5892
5893 static int clocks_length;
5894
5895 /* The following array element values are cycles on which the
5896 corresponding insn will be issued. The array is used only for
5897 Itanium1. */
5898
5899 static int *clocks;
5900
5901 /* The following array element values are numbers of cycles should be
5902 added to improve insn scheduling for MM_insns for Itanium1. */
5903
5904 static int *add_cycles;
5905
5906 static rtx ia64_single_set PARAMS ((rtx));
5907 static void ia64_emit_insn_before PARAMS ((rtx, rtx));
5908
5909 /* Map a bundle number to its pseudo-op. */
5910
5911 const char *
5912 get_bundle_name (b)
5913 int b;
5914 {
5915 return bundle_name[b];
5916 }
5917
5918
5919 /* Return the maximum number of instructions a cpu can issue. */
5920
5921 static int
5922 ia64_issue_rate ()
5923 {
5924 return 6;
5925 }
5926
5927 /* Helper function - like single_set, but look inside COND_EXEC. */
5928
5929 static rtx
5930 ia64_single_set (insn)
5931 rtx insn;
5932 {
5933 rtx x = PATTERN (insn), ret;
5934 if (GET_CODE (x) == COND_EXEC)
5935 x = COND_EXEC_CODE (x);
5936 if (GET_CODE (x) == SET)
5937 return x;
5938
5939 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
5940 Although they are not classical single set, the second set is there just
5941 to protect it from moving past FP-relative stack accesses. */
5942 switch (recog_memoized (insn))
5943 {
5944 case CODE_FOR_prologue_allocate_stack:
5945 case CODE_FOR_epilogue_deallocate_stack:
5946 ret = XVECEXP (x, 0, 0);
5947 break;
5948
5949 default:
5950 ret = single_set_2 (insn, x);
5951 break;
5952 }
5953
5954 return ret;
5955 }
5956
5957 /* Adjust the cost of a scheduling dependency. Return the new cost of
5958 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5959
5960 static int
5961 ia64_adjust_cost (insn, link, dep_insn, cost)
5962 rtx insn, link, dep_insn;
5963 int cost;
5964 {
5965 enum attr_itanium_class dep_class;
5966 enum attr_itanium_class insn_class;
5967
5968 if (REG_NOTE_KIND (link) != REG_DEP_OUTPUT)
5969 return cost;
5970
5971 insn_class = ia64_safe_itanium_class (insn);
5972 dep_class = ia64_safe_itanium_class (dep_insn);
5973 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
5974 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
5975 return 0;
5976
5977 return cost;
5978 }
5979
5980 /* Like emit_insn_before, but skip cycle_display notes.
5981 ??? When cycle display notes are implemented, update this. */
5982
5983 static void
5984 ia64_emit_insn_before (insn, before)
5985 rtx insn, before;
5986 {
5987 emit_insn_before (insn, before);
5988 }
5989
5990 /* The following function marks insns who produce addresses for load
5991 and store insns. Such insns will be placed into M slots because it
5992 decrease latency time for Itanium1 (see function
5993 `ia64_produce_address_p' and the DFA descriptions). */
5994
5995 static void
5996 ia64_dependencies_evaluation_hook (head, tail)
5997 rtx head, tail;
5998 {
5999 rtx insn, link, next, next_tail;
6000
6001 next_tail = NEXT_INSN (tail);
6002 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6003 if (INSN_P (insn))
6004 insn->call = 0;
6005 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6006 if (INSN_P (insn)
6007 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
6008 {
6009 for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1))
6010 {
6011 next = XEXP (link, 0);
6012 if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_ST
6013 || ia64_safe_itanium_class (next) == ITANIUM_CLASS_STF)
6014 && ia64_st_address_bypass_p (insn, next))
6015 break;
6016 else if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_LD
6017 || ia64_safe_itanium_class (next)
6018 == ITANIUM_CLASS_FLD)
6019 && ia64_ld_address_bypass_p (insn, next))
6020 break;
6021 }
6022 insn->call = link != 0;
6023 }
6024 }
6025
6026 /* We're beginning a new block. Initialize data structures as necessary. */
6027
6028 static void
6029 ia64_sched_init (dump, sched_verbose, max_ready)
6030 FILE *dump ATTRIBUTE_UNUSED;
6031 int sched_verbose ATTRIBUTE_UNUSED;
6032 int max_ready ATTRIBUTE_UNUSED;
6033 {
6034 #ifdef ENABLE_CHECKING
6035 rtx insn;
6036
6037 if (reload_completed)
6038 for (insn = NEXT_INSN (current_sched_info->prev_head);
6039 insn != current_sched_info->next_tail;
6040 insn = NEXT_INSN (insn))
6041 if (SCHED_GROUP_P (insn))
6042 abort ();
6043 #endif
6044 last_scheduled_insn = NULL_RTX;
6045 init_insn_group_barriers ();
6046 }
6047
6048 /* We are about to being issuing insns for this clock cycle.
6049 Override the default sort algorithm to better slot instructions. */
6050
6051 static int
6052 ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
6053 clock_var, reorder_type)
6054 FILE *dump;
6055 int sched_verbose;
6056 rtx *ready;
6057 int *pn_ready;
6058 int clock_var ATTRIBUTE_UNUSED;
6059 int reorder_type;
6060 {
6061 int n_asms;
6062 int n_ready = *pn_ready;
6063 rtx *e_ready = ready + n_ready;
6064 rtx *insnp;
6065
6066 if (sched_verbose)
6067 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
6068
6069 if (reorder_type == 0)
6070 {
6071 /* First, move all USEs, CLOBBERs and other crud out of the way. */
6072 n_asms = 0;
6073 for (insnp = ready; insnp < e_ready; insnp++)
6074 if (insnp < e_ready)
6075 {
6076 rtx insn = *insnp;
6077 enum attr_type t = ia64_safe_type (insn);
6078 if (t == TYPE_UNKNOWN)
6079 {
6080 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6081 || asm_noperands (PATTERN (insn)) >= 0)
6082 {
6083 rtx lowest = ready[n_asms];
6084 ready[n_asms] = insn;
6085 *insnp = lowest;
6086 n_asms++;
6087 }
6088 else
6089 {
6090 rtx highest = ready[n_ready - 1];
6091 ready[n_ready - 1] = insn;
6092 *insnp = highest;
6093 return 1;
6094 }
6095 }
6096 }
6097
6098 if (n_asms < n_ready)
6099 {
6100 /* Some normal insns to process. Skip the asms. */
6101 ready += n_asms;
6102 n_ready -= n_asms;
6103 }
6104 else if (n_ready > 0)
6105 return 1;
6106 }
6107
6108 if (ia64_final_schedule)
6109 {
6110 int deleted = 0;
6111 int nr_need_stop = 0;
6112
6113 for (insnp = ready; insnp < e_ready; insnp++)
6114 if (safe_group_barrier_needed_p (*insnp))
6115 nr_need_stop++;
6116
6117 if (reorder_type == 1 && n_ready == nr_need_stop)
6118 return 0;
6119 if (reorder_type == 0)
6120 return 1;
6121 insnp = e_ready;
6122 /* Move down everything that needs a stop bit, preserving
6123 relative order. */
6124 while (insnp-- > ready + deleted)
6125 while (insnp >= ready + deleted)
6126 {
6127 rtx insn = *insnp;
6128 if (! safe_group_barrier_needed_p (insn))
6129 break;
6130 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6131 *ready = insn;
6132 deleted++;
6133 }
6134 n_ready -= deleted;
6135 ready += deleted;
6136 }
6137
6138 return 1;
6139 }
6140
6141 /* We are about to being issuing insns for this clock cycle. Override
6142 the default sort algorithm to better slot instructions. */
6143
6144 static int
6145 ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, clock_var)
6146 FILE *dump;
6147 int sched_verbose;
6148 rtx *ready;
6149 int *pn_ready;
6150 int clock_var;
6151 {
6152 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
6153 pn_ready, clock_var, 0);
6154 }
6155
6156 /* Like ia64_sched_reorder, but called after issuing each insn.
6157 Override the default sort algorithm to better slot instructions. */
6158
6159 static int
6160 ia64_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var)
6161 FILE *dump ATTRIBUTE_UNUSED;
6162 int sched_verbose ATTRIBUTE_UNUSED;
6163 rtx *ready;
6164 int *pn_ready;
6165 int clock_var;
6166 {
6167 if (ia64_tune == PROCESSOR_ITANIUM && reload_completed && last_scheduled_insn)
6168 clocks [INSN_UID (last_scheduled_insn)] = clock_var;
6169 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
6170 clock_var, 1);
6171 }
6172
6173 /* We are about to issue INSN. Return the number of insns left on the
6174 ready queue that can be issued this cycle. */
6175
6176 static int
6177 ia64_variable_issue (dump, sched_verbose, insn, can_issue_more)
6178 FILE *dump ATTRIBUTE_UNUSED;
6179 int sched_verbose ATTRIBUTE_UNUSED;
6180 rtx insn ATTRIBUTE_UNUSED;
6181 int can_issue_more ATTRIBUTE_UNUSED;
6182 {
6183 last_scheduled_insn = insn;
6184 memcpy (prev_cycle_state, curr_state, dfa_state_size);
6185 if (reload_completed)
6186 {
6187 if (group_barrier_needed_p (insn))
6188 abort ();
6189 if (GET_CODE (insn) == CALL_INSN)
6190 init_insn_group_barriers ();
6191 stops_p [INSN_UID (insn)] = stop_before_p;
6192 stop_before_p = 0;
6193 }
6194 return 1;
6195 }
6196
6197 /* We are choosing insn from the ready queue. Return nonzero if INSN
6198 can be chosen. */
6199
6200 static int
6201 ia64_first_cycle_multipass_dfa_lookahead_guard (insn)
6202 rtx insn;
6203 {
6204 if (insn == NULL_RTX || !INSN_P (insn))
6205 abort ();
6206 return (!reload_completed
6207 || !safe_group_barrier_needed_p (insn));
6208 }
6209
6210 /* The following variable value is pseudo-insn used by the DFA insn
6211 scheduler to change the DFA state when the simulated clock is
6212 increased. */
6213
6214 static rtx dfa_pre_cycle_insn;
6215
6216 /* We are about to being issuing INSN. Return nonzero if we can not
6217 issue it on given cycle CLOCK and return zero if we should not sort
6218 the ready queue on the next clock start. */
6219
6220 static int
6221 ia64_dfa_new_cycle (dump, verbose, insn, last_clock, clock, sort_p)
6222 FILE *dump;
6223 int verbose;
6224 rtx insn;
6225 int last_clock, clock;
6226 int *sort_p;
6227 {
6228 int setup_clocks_p = FALSE;
6229
6230 if (insn == NULL_RTX || !INSN_P (insn))
6231 abort ();
6232 if ((reload_completed && safe_group_barrier_needed_p (insn))
6233 || (last_scheduled_insn
6234 && (GET_CODE (last_scheduled_insn) == CALL_INSN
6235 || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6236 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
6237 {
6238 init_insn_group_barriers ();
6239 if (verbose && dump)
6240 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
6241 last_clock == clock ? " + cycle advance" : "");
6242 stop_before_p = 1;
6243 if (last_clock == clock)
6244 {
6245 state_transition (curr_state, dfa_stop_insn);
6246 if (TARGET_EARLY_STOP_BITS)
6247 *sort_p = (last_scheduled_insn == NULL_RTX
6248 || GET_CODE (last_scheduled_insn) != CALL_INSN);
6249 else
6250 *sort_p = 0;
6251 return 1;
6252 }
6253 else if (reload_completed)
6254 setup_clocks_p = TRUE;
6255 memcpy (curr_state, prev_cycle_state, dfa_state_size);
6256 state_transition (curr_state, dfa_stop_insn);
6257 state_transition (curr_state, dfa_pre_cycle_insn);
6258 state_transition (curr_state, NULL);
6259 }
6260 else if (reload_completed)
6261 setup_clocks_p = TRUE;
6262 if (setup_clocks_p && ia64_tune == PROCESSOR_ITANIUM)
6263 {
6264 enum attr_itanium_class c = ia64_safe_itanium_class (insn);
6265
6266 if (c != ITANIUM_CLASS_MMMUL && c != ITANIUM_CLASS_MMSHF)
6267 {
6268 rtx link;
6269 int d = -1;
6270
6271 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6272 if (REG_NOTE_KIND (link) == 0)
6273 {
6274 enum attr_itanium_class dep_class;
6275 rtx dep_insn = XEXP (link, 0);
6276
6277 dep_class = ia64_safe_itanium_class (dep_insn);
6278 if ((dep_class == ITANIUM_CLASS_MMMUL
6279 || dep_class == ITANIUM_CLASS_MMSHF)
6280 && last_clock - clocks [INSN_UID (dep_insn)] < 4
6281 && (d < 0
6282 || last_clock - clocks [INSN_UID (dep_insn)] < d))
6283 d = last_clock - clocks [INSN_UID (dep_insn)];
6284 }
6285 if (d >= 0)
6286 add_cycles [INSN_UID (insn)] = 3 - d;
6287 }
6288 }
6289 return 0;
6290 }
6291
6292 \f
6293
6294 /* The following page contains abstract data `bundle states' which are
6295 used for bundling insns (inserting nops and template generation). */
6296
6297 /* The following describes state of insn bundling. */
6298
6299 struct bundle_state
6300 {
6301 /* Unique bundle state number to identify them in the debugging
6302 output */
6303 int unique_num;
6304 rtx insn; /* corresponding insn, NULL for the 1st and the last state */
6305 /* number nops before and after the insn */
6306 short before_nops_num, after_nops_num;
6307 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
6308 insn */
6309 int cost; /* cost of the state in cycles */
6310 int accumulated_insns_num; /* number of all previous insns including
6311 nops. L is considered as 2 insns */
6312 int branch_deviation; /* deviation of previous branches from 3rd slots */
6313 struct bundle_state *next; /* next state with the same insn_num */
6314 struct bundle_state *originator; /* originator (previous insn state) */
6315 /* All bundle states are in the following chain. */
6316 struct bundle_state *allocated_states_chain;
6317 /* The DFA State after issuing the insn and the nops. */
6318 state_t dfa_state;
6319 };
6320
6321 /* The following is map insn number to the corresponding bundle state. */
6322
6323 static struct bundle_state **index_to_bundle_states;
6324
6325 /* The unique number of next bundle state. */
6326
6327 static int bundle_states_num;
6328
6329 /* All allocated bundle states are in the following chain. */
6330
6331 static struct bundle_state *allocated_bundle_states_chain;
6332
6333 /* All allocated but not used bundle states are in the following
6334 chain. */
6335
6336 static struct bundle_state *free_bundle_state_chain;
6337
6338
6339 /* The following function returns a free bundle state. */
6340
6341 static struct bundle_state *
6342 get_free_bundle_state ()
6343 {
6344 struct bundle_state *result;
6345
6346 if (free_bundle_state_chain != NULL)
6347 {
6348 result = free_bundle_state_chain;
6349 free_bundle_state_chain = result->next;
6350 }
6351 else
6352 {
6353 result = xmalloc (sizeof (struct bundle_state));
6354 result->dfa_state = xmalloc (dfa_state_size);
6355 result->allocated_states_chain = allocated_bundle_states_chain;
6356 allocated_bundle_states_chain = result;
6357 }
6358 result->unique_num = bundle_states_num++;
6359 return result;
6360
6361 }
6362
6363 /* The following function frees given bundle state. */
6364
6365 static void
6366 free_bundle_state (state)
6367 struct bundle_state *state;
6368 {
6369 state->next = free_bundle_state_chain;
6370 free_bundle_state_chain = state;
6371 }
6372
6373 /* Start work with abstract data `bundle states'. */
6374
6375 static void
6376 initiate_bundle_states ()
6377 {
6378 bundle_states_num = 0;
6379 free_bundle_state_chain = NULL;
6380 allocated_bundle_states_chain = NULL;
6381 }
6382
6383 /* Finish work with abstract data `bundle states'. */
6384
6385 static void
6386 finish_bundle_states ()
6387 {
6388 struct bundle_state *curr_state, *next_state;
6389
6390 for (curr_state = allocated_bundle_states_chain;
6391 curr_state != NULL;
6392 curr_state = next_state)
6393 {
6394 next_state = curr_state->allocated_states_chain;
6395 free (curr_state->dfa_state);
6396 free (curr_state);
6397 }
6398 }
6399
6400 /* Hash table of the bundle states. The key is dfa_state and insn_num
6401 of the bundle states. */
6402
6403 static htab_t bundle_state_table;
6404
6405 /* The function returns hash of BUNDLE_STATE. */
6406
6407 static unsigned
6408 bundle_state_hash (bundle_state)
6409 const void *bundle_state;
6410 {
6411 const struct bundle_state *state = (struct bundle_state *) bundle_state;
6412 unsigned result, i;
6413
6414 for (result = i = 0; i < dfa_state_size; i++)
6415 result += (((unsigned char *) state->dfa_state) [i]
6416 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
6417 return result + state->insn_num;
6418 }
6419
6420 /* The function returns nonzero if the bundle state keys are equal. */
6421
6422 static int
6423 bundle_state_eq_p (bundle_state_1, bundle_state_2)
6424 const void *bundle_state_1;
6425 const void *bundle_state_2;
6426 {
6427 const struct bundle_state * state1 = (struct bundle_state *) bundle_state_1;
6428 const struct bundle_state * state2 = (struct bundle_state *) bundle_state_2;
6429
6430 return (state1->insn_num == state2->insn_num
6431 && memcmp (state1->dfa_state, state2->dfa_state,
6432 dfa_state_size) == 0);
6433 }
6434
6435 /* The function inserts the BUNDLE_STATE into the hash table. The
6436 function returns nonzero if the bundle has been inserted into the
6437 table. The table contains the best bundle state with given key. */
6438
6439 static int
6440 insert_bundle_state (bundle_state)
6441 struct bundle_state *bundle_state;
6442 {
6443 void **entry_ptr;
6444
6445 entry_ptr = htab_find_slot (bundle_state_table, bundle_state, 1);
6446 if (*entry_ptr == NULL)
6447 {
6448 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
6449 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
6450 *entry_ptr = (void *) bundle_state;
6451 return TRUE;
6452 }
6453 else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
6454 || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
6455 && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
6456 > bundle_state->accumulated_insns_num
6457 || (((struct bundle_state *)
6458 *entry_ptr)->accumulated_insns_num
6459 == bundle_state->accumulated_insns_num
6460 && ((struct bundle_state *)
6461 *entry_ptr)->branch_deviation
6462 > bundle_state->branch_deviation))))
6463
6464 {
6465 struct bundle_state temp;
6466
6467 temp = *(struct bundle_state *) *entry_ptr;
6468 *(struct bundle_state *) *entry_ptr = *bundle_state;
6469 ((struct bundle_state *) *entry_ptr)->next = temp.next;
6470 *bundle_state = temp;
6471 }
6472 return FALSE;
6473 }
6474
6475 /* Start work with the hash table. */
6476
6477 static void
6478 initiate_bundle_state_table ()
6479 {
6480 bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
6481 (htab_del) 0);
6482 }
6483
6484 /* Finish work with the hash table. */
6485
6486 static void
6487 finish_bundle_state_table ()
6488 {
6489 htab_delete (bundle_state_table);
6490 }
6491
6492 \f
6493
6494 /* The following variable is a insn `nop' used to check bundle states
6495 with different number of inserted nops. */
6496
6497 static rtx ia64_nop;
6498
6499 /* The following function tries to issue NOPS_NUM nops for the current
6500 state without advancing processor cycle. If it failed, the
6501 function returns FALSE and frees the current state. */
6502
6503 static int
6504 try_issue_nops (curr_state, nops_num)
6505 struct bundle_state *curr_state;
6506 int nops_num;
6507 {
6508 int i;
6509
6510 for (i = 0; i < nops_num; i++)
6511 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
6512 {
6513 free_bundle_state (curr_state);
6514 return FALSE;
6515 }
6516 return TRUE;
6517 }
6518
6519 /* The following function tries to issue INSN for the current
6520 state without advancing processor cycle. If it failed, the
6521 function returns FALSE and frees the current state. */
6522
6523 static int
6524 try_issue_insn (curr_state, insn)
6525 struct bundle_state *curr_state;
6526 rtx insn;
6527 {
6528 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
6529 {
6530 free_bundle_state (curr_state);
6531 return FALSE;
6532 }
6533 return TRUE;
6534 }
6535
6536 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
6537 starting with ORIGINATOR without advancing processor cycle. If
6538 TRY_BUNDLE_END_P is TRUE, the function also/only (if
6539 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
6540 If it was successful, the function creates new bundle state and
6541 insert into the hash table and into `index_to_bundle_states'. */
6542
6543 static void
6544 issue_nops_and_insn (originator, before_nops_num, insn, try_bundle_end_p,
6545 only_bundle_end_p)
6546 struct bundle_state *originator;
6547 int before_nops_num;
6548 rtx insn;
6549 int try_bundle_end_p, only_bundle_end_p;
6550 {
6551 struct bundle_state *curr_state;
6552
6553 curr_state = get_free_bundle_state ();
6554 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
6555 curr_state->insn = insn;
6556 curr_state->insn_num = originator->insn_num + 1;
6557 curr_state->cost = originator->cost;
6558 curr_state->originator = originator;
6559 curr_state->before_nops_num = before_nops_num;
6560 curr_state->after_nops_num = 0;
6561 curr_state->accumulated_insns_num
6562 = originator->accumulated_insns_num + before_nops_num;
6563 curr_state->branch_deviation = originator->branch_deviation;
6564 if (insn == NULL_RTX)
6565 abort ();
6566 else if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
6567 {
6568 if (GET_MODE (insn) == TImode)
6569 abort ();
6570 if (!try_issue_nops (curr_state, before_nops_num))
6571 return;
6572 if (!try_issue_insn (curr_state, insn))
6573 return;
6574 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
6575 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
6576 && curr_state->accumulated_insns_num % 3 != 0)
6577 {
6578 free_bundle_state (curr_state);
6579 return;
6580 }
6581 }
6582 else if (GET_MODE (insn) != TImode)
6583 {
6584 if (!try_issue_nops (curr_state, before_nops_num))
6585 return;
6586 if (!try_issue_insn (curr_state, insn))
6587 return;
6588 curr_state->accumulated_insns_num++;
6589 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6590 || asm_noperands (PATTERN (insn)) >= 0)
6591 abort ();
6592 if (ia64_safe_type (insn) == TYPE_L)
6593 curr_state->accumulated_insns_num++;
6594 }
6595 else
6596 {
6597 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
6598 state_transition (curr_state->dfa_state, NULL);
6599 curr_state->cost++;
6600 if (!try_issue_nops (curr_state, before_nops_num))
6601 return;
6602 if (!try_issue_insn (curr_state, insn))
6603 return;
6604 curr_state->accumulated_insns_num++;
6605 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6606 || asm_noperands (PATTERN (insn)) >= 0)
6607 {
6608 /* Finish bundle containing asm insn. */
6609 curr_state->after_nops_num
6610 = 3 - curr_state->accumulated_insns_num % 3;
6611 curr_state->accumulated_insns_num
6612 += 3 - curr_state->accumulated_insns_num % 3;
6613 }
6614 else if (ia64_safe_type (insn) == TYPE_L)
6615 curr_state->accumulated_insns_num++;
6616 }
6617 if (ia64_safe_type (insn) == TYPE_B)
6618 curr_state->branch_deviation
6619 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
6620 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
6621 {
6622 if (!only_bundle_end_p && insert_bundle_state (curr_state))
6623 {
6624 state_t dfa_state;
6625 struct bundle_state *curr_state1;
6626 struct bundle_state *allocated_states_chain;
6627
6628 curr_state1 = get_free_bundle_state ();
6629 dfa_state = curr_state1->dfa_state;
6630 allocated_states_chain = curr_state1->allocated_states_chain;
6631 *curr_state1 = *curr_state;
6632 curr_state1->dfa_state = dfa_state;
6633 curr_state1->allocated_states_chain = allocated_states_chain;
6634 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
6635 dfa_state_size);
6636 curr_state = curr_state1;
6637 }
6638 if (!try_issue_nops (curr_state,
6639 3 - curr_state->accumulated_insns_num % 3))
6640 return;
6641 curr_state->after_nops_num
6642 = 3 - curr_state->accumulated_insns_num % 3;
6643 curr_state->accumulated_insns_num
6644 += 3 - curr_state->accumulated_insns_num % 3;
6645 }
6646 if (!insert_bundle_state (curr_state))
6647 free_bundle_state (curr_state);
6648 return;
6649 }
6650
6651 /* The following function returns position in the two window bundle
6652 for given STATE. */
6653
6654 static int
6655 get_max_pos (state)
6656 state_t state;
6657 {
6658 if (cpu_unit_reservation_p (state, pos_6))
6659 return 6;
6660 else if (cpu_unit_reservation_p (state, pos_5))
6661 return 5;
6662 else if (cpu_unit_reservation_p (state, pos_4))
6663 return 4;
6664 else if (cpu_unit_reservation_p (state, pos_3))
6665 return 3;
6666 else if (cpu_unit_reservation_p (state, pos_2))
6667 return 2;
6668 else if (cpu_unit_reservation_p (state, pos_1))
6669 return 1;
6670 else
6671 return 0;
6672 }
6673
6674 /* The function returns code of a possible template for given position
6675 and state. The function should be called only with 2 values of
6676 position equal to 3 or 6. */
6677
6678 static int
6679 get_template (state, pos)
6680 state_t state;
6681 int pos;
6682 {
6683 switch (pos)
6684 {
6685 case 3:
6686 if (cpu_unit_reservation_p (state, _0mii_))
6687 return 0;
6688 else if (cpu_unit_reservation_p (state, _0mmi_))
6689 return 1;
6690 else if (cpu_unit_reservation_p (state, _0mfi_))
6691 return 2;
6692 else if (cpu_unit_reservation_p (state, _0mmf_))
6693 return 3;
6694 else if (cpu_unit_reservation_p (state, _0bbb_))
6695 return 4;
6696 else if (cpu_unit_reservation_p (state, _0mbb_))
6697 return 5;
6698 else if (cpu_unit_reservation_p (state, _0mib_))
6699 return 6;
6700 else if (cpu_unit_reservation_p (state, _0mmb_))
6701 return 7;
6702 else if (cpu_unit_reservation_p (state, _0mfb_))
6703 return 8;
6704 else if (cpu_unit_reservation_p (state, _0mlx_))
6705 return 9;
6706 else
6707 abort ();
6708 case 6:
6709 if (cpu_unit_reservation_p (state, _1mii_))
6710 return 0;
6711 else if (cpu_unit_reservation_p (state, _1mmi_))
6712 return 1;
6713 else if (cpu_unit_reservation_p (state, _1mfi_))
6714 return 2;
6715 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
6716 return 3;
6717 else if (cpu_unit_reservation_p (state, _1bbb_))
6718 return 4;
6719 else if (cpu_unit_reservation_p (state, _1mbb_))
6720 return 5;
6721 else if (cpu_unit_reservation_p (state, _1mib_))
6722 return 6;
6723 else if (cpu_unit_reservation_p (state, _1mmb_))
6724 return 7;
6725 else if (cpu_unit_reservation_p (state, _1mfb_))
6726 return 8;
6727 else if (cpu_unit_reservation_p (state, _1mlx_))
6728 return 9;
6729 else
6730 abort ();
6731 default:
6732 abort ();
6733 }
6734 }
6735
6736 /* The following function returns an insn important for insn bundling
6737 followed by INSN and before TAIL. */
6738
6739 static rtx
6740 get_next_important_insn (insn, tail)
6741 rtx insn, tail;
6742 {
6743 for (; insn && insn != tail; insn = NEXT_INSN (insn))
6744 if (INSN_P (insn)
6745 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6746 && GET_CODE (PATTERN (insn)) != USE
6747 && GET_CODE (PATTERN (insn)) != CLOBBER)
6748 return insn;
6749 return NULL_RTX;
6750 }
6751
6752 /* The following function does insn bundling. Bundling algorithm is
6753 based on dynamic programming. It tries to insert different number of
6754 nop insns before/after the real insns. At the end of EBB, it chooses the
6755 best alternative and then, moving back in EBB, inserts templates for
6756 the best alternative. The algorithm is directed by information
6757 (changes of simulated processor cycle) created by the 2nd insn
6758 scheduling. */
6759
6760 static void
6761 bundling (dump, verbose, prev_head_insn, tail)
6762 FILE *dump;
6763 int verbose;
6764 rtx prev_head_insn, tail;
6765 {
6766 struct bundle_state *curr_state, *next_state, *best_state;
6767 rtx insn, next_insn;
6768 int insn_num;
6769 int i, bundle_end_p, only_bundle_end_p, asm_p;
6770 int pos = 0, max_pos, template0, template1;
6771 rtx b;
6772 rtx nop;
6773 enum attr_type type;
6774
6775 insn_num = 0;
6776 for (insn = NEXT_INSN (prev_head_insn);
6777 insn && insn != tail;
6778 insn = NEXT_INSN (insn))
6779 if (INSN_P (insn))
6780 insn_num++;
6781 if (insn_num == 0)
6782 return;
6783 bundling_p = 1;
6784 dfa_clean_insn_cache ();
6785 initiate_bundle_state_table ();
6786 index_to_bundle_states = xmalloc ((insn_num + 2)
6787 * sizeof (struct bundle_state *));
6788 /* First (forward) pass -- generates states. */
6789 curr_state = get_free_bundle_state ();
6790 curr_state->insn = NULL;
6791 curr_state->before_nops_num = 0;
6792 curr_state->after_nops_num = 0;
6793 curr_state->insn_num = 0;
6794 curr_state->cost = 0;
6795 curr_state->accumulated_insns_num = 0;
6796 curr_state->branch_deviation = 0;
6797 curr_state->next = NULL;
6798 curr_state->originator = NULL;
6799 state_reset (curr_state->dfa_state);
6800 index_to_bundle_states [0] = curr_state;
6801 insn_num = 0;
6802 for (insn = NEXT_INSN (prev_head_insn);
6803 insn != tail;
6804 insn = NEXT_INSN (insn))
6805 if (INSN_P (insn)
6806 && (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6807 || GET_CODE (PATTERN (insn)) == USE
6808 || GET_CODE (PATTERN (insn)) == CLOBBER)
6809 && GET_MODE (insn) == TImode)
6810 {
6811 PUT_MODE (insn, VOIDmode);
6812 for (next_insn = NEXT_INSN (insn);
6813 next_insn != tail;
6814 next_insn = NEXT_INSN (next_insn))
6815 if (INSN_P (next_insn)
6816 && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
6817 && GET_CODE (PATTERN (next_insn)) != USE
6818 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
6819 {
6820 PUT_MODE (next_insn, TImode);
6821 break;
6822 }
6823 }
6824 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
6825 insn != NULL_RTX;
6826 insn = next_insn)
6827 {
6828 if (!INSN_P (insn)
6829 || ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6830 || GET_CODE (PATTERN (insn)) == USE
6831 || GET_CODE (PATTERN (insn)) == CLOBBER)
6832 abort ();
6833 type = ia64_safe_type (insn);
6834 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
6835 insn_num++;
6836 index_to_bundle_states [insn_num] = NULL;
6837 for (curr_state = index_to_bundle_states [insn_num - 1];
6838 curr_state != NULL;
6839 curr_state = next_state)
6840 {
6841 pos = curr_state->accumulated_insns_num % 3;
6842 next_state = curr_state->next;
6843 /* Finish the current bundle in order to start a subsequent
6844 asm insn in a new bundle. */
6845 only_bundle_end_p
6846 = (next_insn != NULL_RTX
6847 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
6848 && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
6849 bundle_end_p
6850 = (only_bundle_end_p || next_insn == NULL_RTX
6851 || (GET_MODE (next_insn) == TImode
6852 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
6853 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
6854 || type == TYPE_S
6855 /* We need to insert 2 Nops for cases like M_MII. */
6856 || (type == TYPE_M && ia64_tune == PROCESSOR_ITANIUM
6857 && !bundle_end_p && pos == 1))
6858 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
6859 only_bundle_end_p);
6860 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
6861 only_bundle_end_p);
6862 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
6863 only_bundle_end_p);
6864 }
6865 if (index_to_bundle_states [insn_num] == NULL)
6866 abort ();
6867 for (curr_state = index_to_bundle_states [insn_num];
6868 curr_state != NULL;
6869 curr_state = curr_state->next)
6870 if (verbose >= 2 && dump)
6871 {
6872 struct DFA_chip
6873 {
6874 unsigned short one_automaton_state;
6875 unsigned short oneb_automaton_state;
6876 unsigned short two_automaton_state;
6877 unsigned short twob_automaton_state;
6878 };
6879
6880 fprintf
6881 (dump,
6882 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6883 curr_state->unique_num,
6884 (curr_state->originator == NULL
6885 ? -1 : curr_state->originator->unique_num),
6886 curr_state->cost,
6887 curr_state->before_nops_num, curr_state->after_nops_num,
6888 curr_state->accumulated_insns_num, curr_state->branch_deviation,
6889 (ia64_tune == PROCESSOR_ITANIUM
6890 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
6891 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
6892 INSN_UID (insn));
6893 }
6894 }
6895 if (index_to_bundle_states [insn_num] == NULL)
6896 abort ();
6897 /* Finding state with a minimal cost: */
6898 best_state = NULL;
6899 for (curr_state = index_to_bundle_states [insn_num];
6900 curr_state != NULL;
6901 curr_state = curr_state->next)
6902 if (curr_state->accumulated_insns_num % 3 == 0
6903 && (best_state == NULL || best_state->cost > curr_state->cost
6904 || (best_state->cost == curr_state->cost
6905 && (curr_state->accumulated_insns_num
6906 < best_state->accumulated_insns_num
6907 || (curr_state->accumulated_insns_num
6908 == best_state->accumulated_insns_num
6909 && curr_state->branch_deviation
6910 < best_state->branch_deviation)))))
6911 best_state = curr_state;
6912 /* Second (backward) pass: adding nops and templates: */
6913 insn_num = best_state->before_nops_num;
6914 template0 = template1 = -1;
6915 for (curr_state = best_state;
6916 curr_state->originator != NULL;
6917 curr_state = curr_state->originator)
6918 {
6919 insn = curr_state->insn;
6920 asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
6921 || asm_noperands (PATTERN (insn)) >= 0);
6922 insn_num++;
6923 if (verbose >= 2 && dump)
6924 {
6925 struct DFA_chip
6926 {
6927 unsigned short one_automaton_state;
6928 unsigned short oneb_automaton_state;
6929 unsigned short two_automaton_state;
6930 unsigned short twob_automaton_state;
6931 };
6932
6933 fprintf
6934 (dump,
6935 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6936 curr_state->unique_num,
6937 (curr_state->originator == NULL
6938 ? -1 : curr_state->originator->unique_num),
6939 curr_state->cost,
6940 curr_state->before_nops_num, curr_state->after_nops_num,
6941 curr_state->accumulated_insns_num, curr_state->branch_deviation,
6942 (ia64_tune == PROCESSOR_ITANIUM
6943 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
6944 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
6945 INSN_UID (insn));
6946 }
6947 max_pos = get_max_pos (curr_state->dfa_state);
6948 if (max_pos == 6 || (max_pos == 3 && template0 < 0))
6949 {
6950 pos = max_pos;
6951 if (max_pos == 3)
6952 template0 = get_template (curr_state->dfa_state, 3);
6953 else
6954 {
6955 template1 = get_template (curr_state->dfa_state, 3);
6956 template0 = get_template (curr_state->dfa_state, 6);
6957 }
6958 }
6959 if (max_pos > 3 && template1 < 0)
6960 {
6961 if (pos > 3)
6962 abort ();
6963 template1 = get_template (curr_state->dfa_state, 3);
6964 pos += 3;
6965 }
6966 if (!asm_p)
6967 for (i = 0; i < curr_state->after_nops_num; i++)
6968 {
6969 nop = gen_nop ();
6970 emit_insn_after (nop, insn);
6971 pos--;
6972 if (pos < 0)
6973 abort ();
6974 if (pos % 3 == 0)
6975 {
6976 if (template0 < 0)
6977 abort ();
6978 b = gen_bundle_selector (GEN_INT (template0));
6979 ia64_emit_insn_before (b, nop);
6980 template0 = template1;
6981 template1 = -1;
6982 }
6983 }
6984 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
6985 && GET_CODE (PATTERN (insn)) != ASM_INPUT
6986 && asm_noperands (PATTERN (insn)) < 0)
6987 pos--;
6988 if (ia64_safe_type (insn) == TYPE_L)
6989 pos--;
6990 if (pos < 0)
6991 abort ();
6992 if (pos % 3 == 0
6993 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
6994 && GET_CODE (PATTERN (insn)) != ASM_INPUT
6995 && asm_noperands (PATTERN (insn)) < 0)
6996 {
6997 if (template0 < 0)
6998 abort ();
6999 b = gen_bundle_selector (GEN_INT (template0));
7000 ia64_emit_insn_before (b, insn);
7001 b = PREV_INSN (insn);
7002 insn = b;
7003 template0 = template1;
7004 template1 = -1;
7005 }
7006 for (i = 0; i < curr_state->before_nops_num; i++)
7007 {
7008 nop = gen_nop ();
7009 ia64_emit_insn_before (nop, insn);
7010 nop = PREV_INSN (insn);
7011 insn = nop;
7012 pos--;
7013 if (pos < 0)
7014 abort ();
7015 if (pos % 3 == 0)
7016 {
7017 if (template0 < 0)
7018 abort ();
7019 b = gen_bundle_selector (GEN_INT (template0));
7020 ia64_emit_insn_before (b, insn);
7021 b = PREV_INSN (insn);
7022 insn = b;
7023 template0 = template1;
7024 template1 = -1;
7025 }
7026 }
7027 }
7028 if (ia64_tune == PROCESSOR_ITANIUM)
7029 /* Insert additional cycles for MM-insns: */
7030 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
7031 insn != NULL_RTX;
7032 insn = next_insn)
7033 {
7034 if (!INSN_P (insn)
7035 || ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
7036 || GET_CODE (PATTERN (insn)) == USE
7037 || GET_CODE (PATTERN (insn)) == CLOBBER)
7038 abort ();
7039 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
7040 if (INSN_UID (insn) < clocks_length && add_cycles [INSN_UID (insn)])
7041 {
7042 rtx last;
7043 int i, j, n;
7044 int pred_stop_p;
7045
7046 last = prev_active_insn (insn);
7047 pred_stop_p = recog_memoized (last) == CODE_FOR_insn_group_barrier;
7048 if (pred_stop_p)
7049 last = prev_active_insn (last);
7050 n = 0;
7051 for (;; last = prev_active_insn (last))
7052 if (recog_memoized (last) == CODE_FOR_bundle_selector)
7053 {
7054 template0 = XINT (XVECEXP (PATTERN (last), 0, 0), 0);
7055 if (template0 == 9)
7056 PATTERN (last)
7057 = gen_bundle_selector (GEN_INT (2)); /* -> MFI */
7058 break;
7059 }
7060 else if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
7061 n++;
7062 if ((pred_stop_p && n == 0) || n > 2
7063 || (template0 == 9 && n != 0))
7064 abort ();
7065 for (j = 3 - n; j > 0; j --)
7066 ia64_emit_insn_before (gen_nop (), insn);
7067 add_cycles [INSN_UID (insn)]--;
7068 if (!pred_stop_p || add_cycles [INSN_UID (insn)])
7069 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7070 insn);
7071 if (pred_stop_p)
7072 add_cycles [INSN_UID (insn)]--;
7073 for (i = add_cycles [INSN_UID (insn)]; i > 0; i--)
7074 {
7075 /* Insert .MII bundle. */
7076 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (0)),
7077 insn);
7078 ia64_emit_insn_before (gen_nop (), insn);
7079 ia64_emit_insn_before (gen_nop (), insn);
7080 if (i > 1)
7081 {
7082 ia64_emit_insn_before
7083 (gen_insn_group_barrier (GEN_INT (3)), insn);
7084 i--;
7085 }
7086 ia64_emit_insn_before (gen_nop (), insn);
7087 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7088 insn);
7089 }
7090 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (template0)),
7091 insn);
7092 for (j = n; j > 0; j --)
7093 ia64_emit_insn_before (gen_nop (), insn);
7094 if (pred_stop_p)
7095 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7096 insn);
7097 }
7098 }
7099 free (index_to_bundle_states);
7100 finish_bundle_state_table ();
7101 bundling_p = 0;
7102 dfa_clean_insn_cache ();
7103 }
7104
7105 /* The following function is called at the end of scheduling BB or
7106 EBB. After reload, it inserts stop bits and does insn bundling. */
7107
7108 static void
7109 ia64_sched_finish (dump, sched_verbose)
7110 FILE *dump;
7111 int sched_verbose;
7112 {
7113 if (sched_verbose)
7114 fprintf (dump, "// Finishing schedule.\n");
7115 if (!reload_completed)
7116 return;
7117 if (reload_completed)
7118 {
7119 final_emit_insn_group_barriers (dump);
7120 bundling (dump, sched_verbose, current_sched_info->prev_head,
7121 current_sched_info->next_tail);
7122 if (sched_verbose && dump)
7123 fprintf (dump, "// finishing %d-%d\n",
7124 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
7125 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
7126
7127 return;
7128 }
7129 }
7130
7131 /* The following function inserts stop bits in scheduled BB or EBB. */
7132
7133 static void
7134 final_emit_insn_group_barriers (dump)
7135 FILE *dump ATTRIBUTE_UNUSED;
7136 {
7137 rtx insn;
7138 int need_barrier_p = 0;
7139 rtx prev_insn = NULL_RTX;
7140
7141 init_insn_group_barriers ();
7142
7143 for (insn = NEXT_INSN (current_sched_info->prev_head);
7144 insn != current_sched_info->next_tail;
7145 insn = NEXT_INSN (insn))
7146 {
7147 if (GET_CODE (insn) == BARRIER)
7148 {
7149 rtx last = prev_active_insn (insn);
7150
7151 if (! last)
7152 continue;
7153 if (GET_CODE (last) == JUMP_INSN
7154 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
7155 last = prev_active_insn (last);
7156 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
7157 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
7158
7159 init_insn_group_barriers ();
7160 need_barrier_p = 0;
7161 prev_insn = NULL_RTX;
7162 }
7163 else if (INSN_P (insn))
7164 {
7165 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
7166 {
7167 init_insn_group_barriers ();
7168 need_barrier_p = 0;
7169 prev_insn = NULL_RTX;
7170 }
7171 else if (need_barrier_p || group_barrier_needed_p (insn))
7172 {
7173 if (TARGET_EARLY_STOP_BITS)
7174 {
7175 rtx last;
7176
7177 for (last = insn;
7178 last != current_sched_info->prev_head;
7179 last = PREV_INSN (last))
7180 if (INSN_P (last) && GET_MODE (last) == TImode
7181 && stops_p [INSN_UID (last)])
7182 break;
7183 if (last == current_sched_info->prev_head)
7184 last = insn;
7185 last = prev_active_insn (last);
7186 if (last
7187 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
7188 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
7189 last);
7190 init_insn_group_barriers ();
7191 for (last = NEXT_INSN (last);
7192 last != insn;
7193 last = NEXT_INSN (last))
7194 if (INSN_P (last))
7195 group_barrier_needed_p (last);
7196 }
7197 else
7198 {
7199 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7200 insn);
7201 init_insn_group_barriers ();
7202 }
7203 group_barrier_needed_p (insn);
7204 prev_insn = NULL_RTX;
7205 }
7206 else if (recog_memoized (insn) >= 0)
7207 prev_insn = insn;
7208 need_barrier_p = (GET_CODE (insn) == CALL_INSN
7209 || GET_CODE (PATTERN (insn)) == ASM_INPUT
7210 || asm_noperands (PATTERN (insn)) >= 0);
7211 }
7212 }
7213 }
7214
7215 \f
7216
7217 /* If the following function returns TRUE, we will use the the DFA
7218 insn scheduler. */
7219
7220 static int
7221 ia64_use_dfa_pipeline_interface ()
7222 {
7223 return 1;
7224 }
7225
7226 /* If the following function returns TRUE, we will use the the DFA
7227 insn scheduler. */
7228
7229 static int
7230 ia64_first_cycle_multipass_dfa_lookahead ()
7231 {
7232 return (reload_completed ? 6 : 4);
7233 }
7234
7235 /* The following function initiates variable `dfa_pre_cycle_insn'. */
7236
7237 static void
7238 ia64_init_dfa_pre_cycle_insn ()
7239 {
7240 if (temp_dfa_state == NULL)
7241 {
7242 dfa_state_size = state_size ();
7243 temp_dfa_state = xmalloc (dfa_state_size);
7244 prev_cycle_state = xmalloc (dfa_state_size);
7245 }
7246 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
7247 PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
7248 recog_memoized (dfa_pre_cycle_insn);
7249 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
7250 PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
7251 recog_memoized (dfa_stop_insn);
7252 }
7253
7254 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
7255 used by the DFA insn scheduler. */
7256
7257 static rtx
7258 ia64_dfa_pre_cycle_insn ()
7259 {
7260 return dfa_pre_cycle_insn;
7261 }
7262
7263 /* The following function returns TRUE if PRODUCER (of type ilog or
7264 ld) produces address for CONSUMER (of type st or stf). */
7265
7266 int
7267 ia64_st_address_bypass_p (producer, consumer)
7268 rtx producer;
7269 rtx consumer;
7270 {
7271 rtx dest, reg, mem;
7272
7273 if (producer == NULL_RTX || consumer == NULL_RTX)
7274 abort ();
7275 dest = ia64_single_set (producer);
7276 if (dest == NULL_RTX || (reg = SET_DEST (dest)) == NULL_RTX
7277 || (GET_CODE (reg) != REG && GET_CODE (reg) != SUBREG))
7278 abort ();
7279 if (GET_CODE (reg) == SUBREG)
7280 reg = SUBREG_REG (reg);
7281 dest = ia64_single_set (consumer);
7282 if (dest == NULL_RTX || (mem = SET_DEST (dest)) == NULL_RTX
7283 || GET_CODE (mem) != MEM)
7284 abort ();
7285 return reg_mentioned_p (reg, mem);
7286 }
7287
7288 /* The following function returns TRUE if PRODUCER (of type ilog or
7289 ld) produces address for CONSUMER (of type ld or fld). */
7290
7291 int
7292 ia64_ld_address_bypass_p (producer, consumer)
7293 rtx producer;
7294 rtx consumer;
7295 {
7296 rtx dest, src, reg, mem;
7297
7298 if (producer == NULL_RTX || consumer == NULL_RTX)
7299 abort ();
7300 dest = ia64_single_set (producer);
7301 if (dest == NULL_RTX || (reg = SET_DEST (dest)) == NULL_RTX
7302 || (GET_CODE (reg) != REG && GET_CODE (reg) != SUBREG))
7303 abort ();
7304 if (GET_CODE (reg) == SUBREG)
7305 reg = SUBREG_REG (reg);
7306 src = ia64_single_set (consumer);
7307 if (src == NULL_RTX || (mem = SET_SRC (src)) == NULL_RTX)
7308 abort ();
7309 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
7310 mem = XVECEXP (mem, 0, 0);
7311 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
7312 mem = XEXP (mem, 0);
7313
7314 /* Note that LO_SUM is used for GOT loads. */
7315 if (GET_CODE (mem) != LO_SUM && GET_CODE (mem) != MEM)
7316 abort ();
7317
7318 return reg_mentioned_p (reg, mem);
7319 }
7320
7321 /* The following function returns TRUE if INSN produces address for a
7322 load/store insn. We will place such insns into M slot because it
7323 decreases its latency time. */
7324
7325 int
7326 ia64_produce_address_p (insn)
7327 rtx insn;
7328 {
7329 return insn->call;
7330 }
7331
7332 \f
7333 /* Emit pseudo-ops for the assembler to describe predicate relations.
7334 At present this assumes that we only consider predicate pairs to
7335 be mutex, and that the assembler can deduce proper values from
7336 straight-line code. */
7337
7338 static void
7339 emit_predicate_relation_info ()
7340 {
7341 basic_block bb;
7342
7343 FOR_EACH_BB_REVERSE (bb)
7344 {
7345 int r;
7346 rtx head = bb->head;
7347
7348 /* We only need such notes at code labels. */
7349 if (GET_CODE (head) != CODE_LABEL)
7350 continue;
7351 if (GET_CODE (NEXT_INSN (head)) == NOTE
7352 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
7353 head = NEXT_INSN (head);
7354
7355 for (r = PR_REG (0); r < PR_REG (64); r += 2)
7356 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
7357 {
7358 rtx p = gen_rtx_REG (BImode, r);
7359 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
7360 if (head == bb->end)
7361 bb->end = n;
7362 head = n;
7363 }
7364 }
7365
7366 /* Look for conditional calls that do not return, and protect predicate
7367 relations around them. Otherwise the assembler will assume the call
7368 returns, and complain about uses of call-clobbered predicates after
7369 the call. */
7370 FOR_EACH_BB_REVERSE (bb)
7371 {
7372 rtx insn = bb->head;
7373
7374 while (1)
7375 {
7376 if (GET_CODE (insn) == CALL_INSN
7377 && GET_CODE (PATTERN (insn)) == COND_EXEC
7378 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
7379 {
7380 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
7381 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
7382 if (bb->head == insn)
7383 bb->head = b;
7384 if (bb->end == insn)
7385 bb->end = a;
7386 }
7387
7388 if (insn == bb->end)
7389 break;
7390 insn = NEXT_INSN (insn);
7391 }
7392 }
7393 }
7394
7395 /* Perform machine dependent operations on the rtl chain INSNS. */
7396
7397 static void
7398 ia64_reorg ()
7399 {
7400 /* We are freeing block_for_insn in the toplev to keep compatibility
7401 with old MDEP_REORGS that are not CFG based. Recompute it now. */
7402 compute_bb_for_insn ();
7403
7404 /* If optimizing, we'll have split before scheduling. */
7405 if (optimize == 0)
7406 split_all_insns (0);
7407
7408 /* ??? update_life_info_in_dirty_blocks fails to terminate during
7409 non-optimizing bootstrap. */
7410 update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, PROP_DEATH_NOTES);
7411
7412 if (ia64_flag_schedule_insns2)
7413 {
7414 timevar_push (TV_SCHED2);
7415 ia64_final_schedule = 1;
7416
7417 initiate_bundle_states ();
7418 ia64_nop = make_insn_raw (gen_nop ());
7419 PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
7420 recog_memoized (ia64_nop);
7421 clocks_length = get_max_uid () + 1;
7422 stops_p = (char *) xmalloc (clocks_length);
7423 memset (stops_p, 0, clocks_length);
7424 if (ia64_tune == PROCESSOR_ITANIUM)
7425 {
7426 clocks = (int *) xmalloc (clocks_length * sizeof (int));
7427 memset (clocks, 0, clocks_length * sizeof (int));
7428 add_cycles = (int *) xmalloc (clocks_length * sizeof (int));
7429 memset (add_cycles, 0, clocks_length * sizeof (int));
7430 }
7431 if (ia64_tune == PROCESSOR_ITANIUM2)
7432 {
7433 pos_1 = get_cpu_unit_code ("2_1");
7434 pos_2 = get_cpu_unit_code ("2_2");
7435 pos_3 = get_cpu_unit_code ("2_3");
7436 pos_4 = get_cpu_unit_code ("2_4");
7437 pos_5 = get_cpu_unit_code ("2_5");
7438 pos_6 = get_cpu_unit_code ("2_6");
7439 _0mii_ = get_cpu_unit_code ("2b_0mii.");
7440 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
7441 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
7442 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
7443 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
7444 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
7445 _0mib_ = get_cpu_unit_code ("2b_0mib.");
7446 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
7447 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
7448 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
7449 _1mii_ = get_cpu_unit_code ("2b_1mii.");
7450 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
7451 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
7452 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
7453 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
7454 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
7455 _1mib_ = get_cpu_unit_code ("2b_1mib.");
7456 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
7457 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
7458 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
7459 }
7460 else
7461 {
7462 pos_1 = get_cpu_unit_code ("1_1");
7463 pos_2 = get_cpu_unit_code ("1_2");
7464 pos_3 = get_cpu_unit_code ("1_3");
7465 pos_4 = get_cpu_unit_code ("1_4");
7466 pos_5 = get_cpu_unit_code ("1_5");
7467 pos_6 = get_cpu_unit_code ("1_6");
7468 _0mii_ = get_cpu_unit_code ("1b_0mii.");
7469 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
7470 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
7471 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
7472 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
7473 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
7474 _0mib_ = get_cpu_unit_code ("1b_0mib.");
7475 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
7476 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
7477 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
7478 _1mii_ = get_cpu_unit_code ("1b_1mii.");
7479 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
7480 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
7481 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
7482 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
7483 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
7484 _1mib_ = get_cpu_unit_code ("1b_1mib.");
7485 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
7486 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
7487 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
7488 }
7489 schedule_ebbs (rtl_dump_file);
7490 finish_bundle_states ();
7491 if (ia64_tune == PROCESSOR_ITANIUM)
7492 {
7493 free (add_cycles);
7494 free (clocks);
7495 }
7496 free (stops_p);
7497 emit_insn_group_barriers (rtl_dump_file);
7498
7499 ia64_final_schedule = 0;
7500 timevar_pop (TV_SCHED2);
7501 }
7502 else
7503 emit_all_insn_group_barriers (rtl_dump_file);
7504
7505 /* A call must not be the last instruction in a function, so that the
7506 return address is still within the function, so that unwinding works
7507 properly. Note that IA-64 differs from dwarf2 on this point. */
7508 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7509 {
7510 rtx insn;
7511 int saw_stop = 0;
7512
7513 insn = get_last_insn ();
7514 if (! INSN_P (insn))
7515 insn = prev_active_insn (insn);
7516 if (GET_CODE (insn) == INSN
7517 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
7518 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
7519 {
7520 saw_stop = 1;
7521 insn = prev_active_insn (insn);
7522 }
7523 if (GET_CODE (insn) == CALL_INSN)
7524 {
7525 if (! saw_stop)
7526 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7527 emit_insn (gen_break_f ());
7528 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7529 }
7530 }
7531
7532 fixup_errata ();
7533 emit_predicate_relation_info ();
7534 }
7535 \f
7536 /* Return true if REGNO is used by the epilogue. */
7537
7538 int
7539 ia64_epilogue_uses (regno)
7540 int regno;
7541 {
7542 switch (regno)
7543 {
7544 case R_GR (1):
7545 /* With a call to a function in another module, we will write a new
7546 value to "gp". After returning from such a call, we need to make
7547 sure the function restores the original gp-value, even if the
7548 function itself does not use the gp anymore. */
7549 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
7550
7551 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
7552 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
7553 /* For functions defined with the syscall_linkage attribute, all
7554 input registers are marked as live at all function exits. This
7555 prevents the register allocator from using the input registers,
7556 which in turn makes it possible to restart a system call after
7557 an interrupt without having to save/restore the input registers.
7558 This also prevents kernel data from leaking to application code. */
7559 return lookup_attribute ("syscall_linkage",
7560 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
7561
7562 case R_BR (0):
7563 /* Conditional return patterns can't represent the use of `b0' as
7564 the return address, so we force the value live this way. */
7565 return 1;
7566
7567 case AR_PFS_REGNUM:
7568 /* Likewise for ar.pfs, which is used by br.ret. */
7569 return 1;
7570
7571 default:
7572 return 0;
7573 }
7574 }
7575
7576 /* Return true if REGNO is used by the frame unwinder. */
7577
7578 int
7579 ia64_eh_uses (regno)
7580 int regno;
7581 {
7582 if (! reload_completed)
7583 return 0;
7584
7585 if (current_frame_info.reg_save_b0
7586 && regno == current_frame_info.reg_save_b0)
7587 return 1;
7588 if (current_frame_info.reg_save_pr
7589 && regno == current_frame_info.reg_save_pr)
7590 return 1;
7591 if (current_frame_info.reg_save_ar_pfs
7592 && regno == current_frame_info.reg_save_ar_pfs)
7593 return 1;
7594 if (current_frame_info.reg_save_ar_unat
7595 && regno == current_frame_info.reg_save_ar_unat)
7596 return 1;
7597 if (current_frame_info.reg_save_ar_lc
7598 && regno == current_frame_info.reg_save_ar_lc)
7599 return 1;
7600
7601 return 0;
7602 }
7603 \f
7604 /* Return true if this goes in small data/bss. */
7605
7606 /* ??? We could also support own long data here. Generating movl/add/ld8
7607 instead of addl,ld8/ld8. This makes the code bigger, but should make the
7608 code faster because there is one less load. This also includes incomplete
7609 types which can't go in sdata/sbss. */
7610
7611 static bool
7612 ia64_in_small_data_p (exp)
7613 tree exp;
7614 {
7615 if (TARGET_NO_SDATA)
7616 return false;
7617
7618 /* We want to merge strings, so we never consider them small data. */
7619 if (TREE_CODE (exp) == STRING_CST)
7620 return false;
7621
7622 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
7623 {
7624 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
7625 if (strcmp (section, ".sdata") == 0
7626 || strcmp (section, ".sbss") == 0)
7627 return true;
7628 }
7629 else
7630 {
7631 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
7632
7633 /* If this is an incomplete type with size 0, then we can't put it
7634 in sdata because it might be too big when completed. */
7635 if (size > 0 && size <= ia64_section_threshold)
7636 return true;
7637 }
7638
7639 return false;
7640 }
7641 \f
7642 /* Output assembly directives for prologue regions. */
7643
7644 /* The current basic block number. */
7645
7646 static bool last_block;
7647
7648 /* True if we need a copy_state command at the start of the next block. */
7649
7650 static bool need_copy_state;
7651
7652 /* The function emits unwind directives for the start of an epilogue. */
7653
7654 static void
7655 process_epilogue ()
7656 {
7657 /* If this isn't the last block of the function, then we need to label the
7658 current state, and copy it back in at the start of the next block. */
7659
7660 if (!last_block)
7661 {
7662 fprintf (asm_out_file, "\t.label_state 1\n");
7663 need_copy_state = true;
7664 }
7665
7666 fprintf (asm_out_file, "\t.restore sp\n");
7667 }
7668
7669 /* This function processes a SET pattern looking for specific patterns
7670 which result in emitting an assembly directive required for unwinding. */
7671
7672 static int
7673 process_set (asm_out_file, pat)
7674 FILE *asm_out_file;
7675 rtx pat;
7676 {
7677 rtx src = SET_SRC (pat);
7678 rtx dest = SET_DEST (pat);
7679 int src_regno, dest_regno;
7680
7681 /* Look for the ALLOC insn. */
7682 if (GET_CODE (src) == UNSPEC_VOLATILE
7683 && XINT (src, 1) == UNSPECV_ALLOC
7684 && GET_CODE (dest) == REG)
7685 {
7686 dest_regno = REGNO (dest);
7687
7688 /* If this isn't the final destination for ar.pfs, the alloc
7689 shouldn't have been marked frame related. */
7690 if (dest_regno != current_frame_info.reg_save_ar_pfs)
7691 abort ();
7692
7693 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
7694 ia64_dbx_register_number (dest_regno));
7695 return 1;
7696 }
7697
7698 /* Look for SP = .... */
7699 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
7700 {
7701 if (GET_CODE (src) == PLUS)
7702 {
7703 rtx op0 = XEXP (src, 0);
7704 rtx op1 = XEXP (src, 1);
7705 if (op0 == dest && GET_CODE (op1) == CONST_INT)
7706 {
7707 if (INTVAL (op1) < 0)
7708 fprintf (asm_out_file, "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
7709 -INTVAL (op1));
7710 else
7711 process_epilogue ();
7712 }
7713 else
7714 abort ();
7715 }
7716 else if (GET_CODE (src) == REG
7717 && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
7718 process_epilogue ();
7719 else
7720 abort ();
7721
7722 return 1;
7723 }
7724
7725 /* Register move we need to look at. */
7726 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
7727 {
7728 src_regno = REGNO (src);
7729 dest_regno = REGNO (dest);
7730
7731 switch (src_regno)
7732 {
7733 case BR_REG (0):
7734 /* Saving return address pointer. */
7735 if (dest_regno != current_frame_info.reg_save_b0)
7736 abort ();
7737 fprintf (asm_out_file, "\t.save rp, r%d\n",
7738 ia64_dbx_register_number (dest_regno));
7739 return 1;
7740
7741 case PR_REG (0):
7742 if (dest_regno != current_frame_info.reg_save_pr)
7743 abort ();
7744 fprintf (asm_out_file, "\t.save pr, r%d\n",
7745 ia64_dbx_register_number (dest_regno));
7746 return 1;
7747
7748 case AR_UNAT_REGNUM:
7749 if (dest_regno != current_frame_info.reg_save_ar_unat)
7750 abort ();
7751 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
7752 ia64_dbx_register_number (dest_regno));
7753 return 1;
7754
7755 case AR_LC_REGNUM:
7756 if (dest_regno != current_frame_info.reg_save_ar_lc)
7757 abort ();
7758 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
7759 ia64_dbx_register_number (dest_regno));
7760 return 1;
7761
7762 case STACK_POINTER_REGNUM:
7763 if (dest_regno != HARD_FRAME_POINTER_REGNUM
7764 || ! frame_pointer_needed)
7765 abort ();
7766 fprintf (asm_out_file, "\t.vframe r%d\n",
7767 ia64_dbx_register_number (dest_regno));
7768 return 1;
7769
7770 default:
7771 /* Everything else should indicate being stored to memory. */
7772 abort ();
7773 }
7774 }
7775
7776 /* Memory store we need to look at. */
7777 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
7778 {
7779 long off;
7780 rtx base;
7781 const char *saveop;
7782
7783 if (GET_CODE (XEXP (dest, 0)) == REG)
7784 {
7785 base = XEXP (dest, 0);
7786 off = 0;
7787 }
7788 else if (GET_CODE (XEXP (dest, 0)) == PLUS
7789 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
7790 {
7791 base = XEXP (XEXP (dest, 0), 0);
7792 off = INTVAL (XEXP (XEXP (dest, 0), 1));
7793 }
7794 else
7795 abort ();
7796
7797 if (base == hard_frame_pointer_rtx)
7798 {
7799 saveop = ".savepsp";
7800 off = - off;
7801 }
7802 else if (base == stack_pointer_rtx)
7803 saveop = ".savesp";
7804 else
7805 abort ();
7806
7807 src_regno = REGNO (src);
7808 switch (src_regno)
7809 {
7810 case BR_REG (0):
7811 if (current_frame_info.reg_save_b0 != 0)
7812 abort ();
7813 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
7814 return 1;
7815
7816 case PR_REG (0):
7817 if (current_frame_info.reg_save_pr != 0)
7818 abort ();
7819 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
7820 return 1;
7821
7822 case AR_LC_REGNUM:
7823 if (current_frame_info.reg_save_ar_lc != 0)
7824 abort ();
7825 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
7826 return 1;
7827
7828 case AR_PFS_REGNUM:
7829 if (current_frame_info.reg_save_ar_pfs != 0)
7830 abort ();
7831 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
7832 return 1;
7833
7834 case AR_UNAT_REGNUM:
7835 if (current_frame_info.reg_save_ar_unat != 0)
7836 abort ();
7837 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
7838 return 1;
7839
7840 case GR_REG (4):
7841 case GR_REG (5):
7842 case GR_REG (6):
7843 case GR_REG (7):
7844 fprintf (asm_out_file, "\t.save.g 0x%x\n",
7845 1 << (src_regno - GR_REG (4)));
7846 return 1;
7847
7848 case BR_REG (1):
7849 case BR_REG (2):
7850 case BR_REG (3):
7851 case BR_REG (4):
7852 case BR_REG (5):
7853 fprintf (asm_out_file, "\t.save.b 0x%x\n",
7854 1 << (src_regno - BR_REG (1)));
7855 return 1;
7856
7857 case FR_REG (2):
7858 case FR_REG (3):
7859 case FR_REG (4):
7860 case FR_REG (5):
7861 fprintf (asm_out_file, "\t.save.f 0x%x\n",
7862 1 << (src_regno - FR_REG (2)));
7863 return 1;
7864
7865 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7866 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7867 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7868 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7869 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
7870 1 << (src_regno - FR_REG (12)));
7871 return 1;
7872
7873 default:
7874 return 0;
7875 }
7876 }
7877
7878 return 0;
7879 }
7880
7881
7882 /* This function looks at a single insn and emits any directives
7883 required to unwind this insn. */
7884 void
7885 process_for_unwind_directive (asm_out_file, insn)
7886 FILE *asm_out_file;
7887 rtx insn;
7888 {
7889 if (flag_unwind_tables
7890 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7891 {
7892 rtx pat;
7893
7894 if (GET_CODE (insn) == NOTE
7895 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
7896 {
7897 last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
7898
7899 /* Restore unwind state from immediately before the epilogue. */
7900 if (need_copy_state)
7901 {
7902 fprintf (asm_out_file, "\t.body\n");
7903 fprintf (asm_out_file, "\t.copy_state 1\n");
7904 need_copy_state = false;
7905 }
7906 }
7907
7908 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
7909 return;
7910
7911 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
7912 if (pat)
7913 pat = XEXP (pat, 0);
7914 else
7915 pat = PATTERN (insn);
7916
7917 switch (GET_CODE (pat))
7918 {
7919 case SET:
7920 process_set (asm_out_file, pat);
7921 break;
7922
7923 case PARALLEL:
7924 {
7925 int par_index;
7926 int limit = XVECLEN (pat, 0);
7927 for (par_index = 0; par_index < limit; par_index++)
7928 {
7929 rtx x = XVECEXP (pat, 0, par_index);
7930 if (GET_CODE (x) == SET)
7931 process_set (asm_out_file, x);
7932 }
7933 break;
7934 }
7935
7936 default:
7937 abort ();
7938 }
7939 }
7940 }
7941
7942 \f
7943 void
7944 ia64_init_builtins ()
7945 {
7946 tree psi_type_node = build_pointer_type (integer_type_node);
7947 tree pdi_type_node = build_pointer_type (long_integer_type_node);
7948
7949 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
7950 tree si_ftype_psi_si_si
7951 = build_function_type_list (integer_type_node,
7952 psi_type_node, integer_type_node,
7953 integer_type_node, NULL_TREE);
7954
7955 /* __sync_val_compare_and_swap_di */
7956 tree di_ftype_pdi_di_di
7957 = build_function_type_list (long_integer_type_node,
7958 pdi_type_node, long_integer_type_node,
7959 long_integer_type_node, NULL_TREE);
7960 /* __sync_bool_compare_and_swap_di */
7961 tree si_ftype_pdi_di_di
7962 = build_function_type_list (integer_type_node,
7963 pdi_type_node, long_integer_type_node,
7964 long_integer_type_node, NULL_TREE);
7965 /* __sync_synchronize */
7966 tree void_ftype_void
7967 = build_function_type (void_type_node, void_list_node);
7968
7969 /* __sync_lock_test_and_set_si */
7970 tree si_ftype_psi_si
7971 = build_function_type_list (integer_type_node,
7972 psi_type_node, integer_type_node, NULL_TREE);
7973
7974 /* __sync_lock_test_and_set_di */
7975 tree di_ftype_pdi_di
7976 = build_function_type_list (long_integer_type_node,
7977 pdi_type_node, long_integer_type_node,
7978 NULL_TREE);
7979
7980 /* __sync_lock_release_si */
7981 tree void_ftype_psi
7982 = build_function_type_list (void_type_node, psi_type_node, NULL_TREE);
7983
7984 /* __sync_lock_release_di */
7985 tree void_ftype_pdi
7986 = build_function_type_list (void_type_node, pdi_type_node, NULL_TREE);
7987
7988 #define def_builtin(name, type, code) \
7989 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL, NULL_TREE)
7990
7991 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
7992 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
7993 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
7994 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
7995 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
7996 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
7997 def_builtin ("__sync_bool_compare_and_swap_di", si_ftype_pdi_di_di,
7998 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
7999
8000 def_builtin ("__sync_synchronize", void_ftype_void,
8001 IA64_BUILTIN_SYNCHRONIZE);
8002
8003 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
8004 IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
8005 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
8006 IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
8007 def_builtin ("__sync_lock_release_si", void_ftype_psi,
8008 IA64_BUILTIN_LOCK_RELEASE_SI);
8009 def_builtin ("__sync_lock_release_di", void_ftype_pdi,
8010 IA64_BUILTIN_LOCK_RELEASE_DI);
8011
8012 def_builtin ("__builtin_ia64_bsp",
8013 build_function_type (ptr_type_node, void_list_node),
8014 IA64_BUILTIN_BSP);
8015
8016 def_builtin ("__builtin_ia64_flushrs",
8017 build_function_type (void_type_node, void_list_node),
8018 IA64_BUILTIN_FLUSHRS);
8019
8020 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
8021 IA64_BUILTIN_FETCH_AND_ADD_SI);
8022 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
8023 IA64_BUILTIN_FETCH_AND_SUB_SI);
8024 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
8025 IA64_BUILTIN_FETCH_AND_OR_SI);
8026 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
8027 IA64_BUILTIN_FETCH_AND_AND_SI);
8028 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
8029 IA64_BUILTIN_FETCH_AND_XOR_SI);
8030 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
8031 IA64_BUILTIN_FETCH_AND_NAND_SI);
8032
8033 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
8034 IA64_BUILTIN_ADD_AND_FETCH_SI);
8035 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
8036 IA64_BUILTIN_SUB_AND_FETCH_SI);
8037 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
8038 IA64_BUILTIN_OR_AND_FETCH_SI);
8039 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
8040 IA64_BUILTIN_AND_AND_FETCH_SI);
8041 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
8042 IA64_BUILTIN_XOR_AND_FETCH_SI);
8043 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
8044 IA64_BUILTIN_NAND_AND_FETCH_SI);
8045
8046 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
8047 IA64_BUILTIN_FETCH_AND_ADD_DI);
8048 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
8049 IA64_BUILTIN_FETCH_AND_SUB_DI);
8050 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
8051 IA64_BUILTIN_FETCH_AND_OR_DI);
8052 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
8053 IA64_BUILTIN_FETCH_AND_AND_DI);
8054 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
8055 IA64_BUILTIN_FETCH_AND_XOR_DI);
8056 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
8057 IA64_BUILTIN_FETCH_AND_NAND_DI);
8058
8059 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
8060 IA64_BUILTIN_ADD_AND_FETCH_DI);
8061 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
8062 IA64_BUILTIN_SUB_AND_FETCH_DI);
8063 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
8064 IA64_BUILTIN_OR_AND_FETCH_DI);
8065 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
8066 IA64_BUILTIN_AND_AND_FETCH_DI);
8067 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
8068 IA64_BUILTIN_XOR_AND_FETCH_DI);
8069 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
8070 IA64_BUILTIN_NAND_AND_FETCH_DI);
8071
8072 #undef def_builtin
8073 }
8074
8075 /* Expand fetch_and_op intrinsics. The basic code sequence is:
8076
8077 mf
8078 tmp = [ptr];
8079 do {
8080 ret = tmp;
8081 ar.ccv = tmp;
8082 tmp <op>= value;
8083 cmpxchgsz.acq tmp = [ptr], tmp
8084 } while (tmp != ret)
8085 */
8086
8087 static rtx
8088 ia64_expand_fetch_and_op (binoptab, mode, arglist, target)
8089 optab binoptab;
8090 enum machine_mode mode;
8091 tree arglist;
8092 rtx target;
8093 {
8094 rtx ret, label, tmp, ccv, insn, mem, value;
8095 tree arg0, arg1;
8096
8097 arg0 = TREE_VALUE (arglist);
8098 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8099 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
8100 #ifdef POINTERS_EXTEND_UNSIGNED
8101 if (GET_MODE(mem) != Pmode)
8102 mem = convert_memory_address (Pmode, mem);
8103 #endif
8104 value = expand_expr (arg1, NULL_RTX, mode, 0);
8105
8106 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
8107 MEM_VOLATILE_P (mem) = 1;
8108
8109 if (target && register_operand (target, mode))
8110 ret = target;
8111 else
8112 ret = gen_reg_rtx (mode);
8113
8114 emit_insn (gen_mf ());
8115
8116 /* Special case for fetchadd instructions. */
8117 if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
8118 {
8119 if (mode == SImode)
8120 insn = gen_fetchadd_acq_si (ret, mem, value);
8121 else
8122 insn = gen_fetchadd_acq_di (ret, mem, value);
8123 emit_insn (insn);
8124 return ret;
8125 }
8126
8127 tmp = gen_reg_rtx (mode);
8128 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
8129 emit_move_insn (tmp, mem);
8130
8131 label = gen_label_rtx ();
8132 emit_label (label);
8133 emit_move_insn (ret, tmp);
8134 emit_move_insn (ccv, tmp);
8135
8136 /* Perform the specific operation. Special case NAND by noticing
8137 one_cmpl_optab instead. */
8138 if (binoptab == one_cmpl_optab)
8139 {
8140 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
8141 binoptab = and_optab;
8142 }
8143 tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
8144
8145 if (mode == SImode)
8146 insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
8147 else
8148 insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
8149 emit_insn (insn);
8150
8151 emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, label);
8152
8153 return ret;
8154 }
8155
8156 /* Expand op_and_fetch intrinsics. The basic code sequence is:
8157
8158 mf
8159 tmp = [ptr];
8160 do {
8161 old = tmp;
8162 ar.ccv = tmp;
8163 ret = tmp <op> value;
8164 cmpxchgsz.acq tmp = [ptr], ret
8165 } while (tmp != old)
8166 */
8167
8168 static rtx
8169 ia64_expand_op_and_fetch (binoptab, mode, arglist, target)
8170 optab binoptab;
8171 enum machine_mode mode;
8172 tree arglist;
8173 rtx target;
8174 {
8175 rtx old, label, tmp, ret, ccv, insn, mem, value;
8176 tree arg0, arg1;
8177
8178 arg0 = TREE_VALUE (arglist);
8179 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8180 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
8181 #ifdef POINTERS_EXTEND_UNSIGNED
8182 if (GET_MODE(mem) != Pmode)
8183 mem = convert_memory_address (Pmode, mem);
8184 #endif
8185
8186 value = expand_expr (arg1, NULL_RTX, mode, 0);
8187
8188 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
8189 MEM_VOLATILE_P (mem) = 1;
8190
8191 if (target && ! register_operand (target, mode))
8192 target = NULL_RTX;
8193
8194 emit_insn (gen_mf ());
8195 tmp = gen_reg_rtx (mode);
8196 old = gen_reg_rtx (mode);
8197 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
8198
8199 emit_move_insn (tmp, mem);
8200
8201 label = gen_label_rtx ();
8202 emit_label (label);
8203 emit_move_insn (old, tmp);
8204 emit_move_insn (ccv, tmp);
8205
8206 /* Perform the specific operation. Special case NAND by noticing
8207 one_cmpl_optab instead. */
8208 if (binoptab == one_cmpl_optab)
8209 {
8210 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
8211 binoptab = and_optab;
8212 }
8213 ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
8214
8215 if (mode == SImode)
8216 insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
8217 else
8218 insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
8219 emit_insn (insn);
8220
8221 emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, label);
8222
8223 return ret;
8224 }
8225
8226 /* Expand val_ and bool_compare_and_swap. For val_ we want:
8227
8228 ar.ccv = oldval
8229 mf
8230 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
8231 return ret
8232
8233 For bool_ it's the same except return ret == oldval.
8234 */
8235
8236 static rtx
8237 ia64_expand_compare_and_swap (rmode, mode, boolp, arglist, target)
8238 enum machine_mode rmode;
8239 enum machine_mode mode;
8240 int boolp;
8241 tree arglist;
8242 rtx target;
8243 {
8244 tree arg0, arg1, arg2;
8245 rtx mem, old, new, ccv, tmp, insn;
8246
8247 arg0 = TREE_VALUE (arglist);
8248 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8249 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8250 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8251 old = expand_expr (arg1, NULL_RTX, mode, 0);
8252 new = expand_expr (arg2, NULL_RTX, mode, 0);
8253
8254 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8255 MEM_VOLATILE_P (mem) = 1;
8256
8257 if (! register_operand (old, mode))
8258 old = copy_to_mode_reg (mode, old);
8259 if (! register_operand (new, mode))
8260 new = copy_to_mode_reg (mode, new);
8261
8262 if (! boolp && target && register_operand (target, mode))
8263 tmp = target;
8264 else
8265 tmp = gen_reg_rtx (mode);
8266
8267 ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
8268 if (mode == DImode)
8269 emit_move_insn (ccv, old);
8270 else
8271 {
8272 rtx ccvtmp = gen_reg_rtx (DImode);
8273 emit_insn (gen_zero_extendsidi2 (ccvtmp, old));
8274 emit_move_insn (ccv, ccvtmp);
8275 }
8276 emit_insn (gen_mf ());
8277 if (mode == SImode)
8278 insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
8279 else
8280 insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
8281 emit_insn (insn);
8282
8283 if (boolp)
8284 {
8285 if (! target)
8286 target = gen_reg_rtx (rmode);
8287 return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
8288 }
8289 else
8290 return tmp;
8291 }
8292
8293 /* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
8294
8295 static rtx
8296 ia64_expand_lock_test_and_set (mode, arglist, target)
8297 enum machine_mode mode;
8298 tree arglist;
8299 rtx target;
8300 {
8301 tree arg0, arg1;
8302 rtx mem, new, ret, insn;
8303
8304 arg0 = TREE_VALUE (arglist);
8305 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8306 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8307 new = expand_expr (arg1, NULL_RTX, mode, 0);
8308
8309 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8310 MEM_VOLATILE_P (mem) = 1;
8311 if (! register_operand (new, mode))
8312 new = copy_to_mode_reg (mode, new);
8313
8314 if (target && register_operand (target, mode))
8315 ret = target;
8316 else
8317 ret = gen_reg_rtx (mode);
8318
8319 if (mode == SImode)
8320 insn = gen_xchgsi (ret, mem, new);
8321 else
8322 insn = gen_xchgdi (ret, mem, new);
8323 emit_insn (insn);
8324
8325 return ret;
8326 }
8327
8328 /* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
8329
8330 static rtx
8331 ia64_expand_lock_release (mode, arglist, target)
8332 enum machine_mode mode;
8333 tree arglist;
8334 rtx target ATTRIBUTE_UNUSED;
8335 {
8336 tree arg0;
8337 rtx mem;
8338
8339 arg0 = TREE_VALUE (arglist);
8340 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8341
8342 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8343 MEM_VOLATILE_P (mem) = 1;
8344
8345 emit_move_insn (mem, const0_rtx);
8346
8347 return const0_rtx;
8348 }
8349
8350 rtx
8351 ia64_expand_builtin (exp, target, subtarget, mode, ignore)
8352 tree exp;
8353 rtx target;
8354 rtx subtarget ATTRIBUTE_UNUSED;
8355 enum machine_mode mode ATTRIBUTE_UNUSED;
8356 int ignore ATTRIBUTE_UNUSED;
8357 {
8358 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
8359 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
8360 tree arglist = TREE_OPERAND (exp, 1);
8361 enum machine_mode rmode = VOIDmode;
8362
8363 switch (fcode)
8364 {
8365 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
8366 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
8367 mode = SImode;
8368 rmode = SImode;
8369 break;
8370
8371 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
8372 case IA64_BUILTIN_LOCK_RELEASE_SI:
8373 case IA64_BUILTIN_FETCH_AND_ADD_SI:
8374 case IA64_BUILTIN_FETCH_AND_SUB_SI:
8375 case IA64_BUILTIN_FETCH_AND_OR_SI:
8376 case IA64_BUILTIN_FETCH_AND_AND_SI:
8377 case IA64_BUILTIN_FETCH_AND_XOR_SI:
8378 case IA64_BUILTIN_FETCH_AND_NAND_SI:
8379 case IA64_BUILTIN_ADD_AND_FETCH_SI:
8380 case IA64_BUILTIN_SUB_AND_FETCH_SI:
8381 case IA64_BUILTIN_OR_AND_FETCH_SI:
8382 case IA64_BUILTIN_AND_AND_FETCH_SI:
8383 case IA64_BUILTIN_XOR_AND_FETCH_SI:
8384 case IA64_BUILTIN_NAND_AND_FETCH_SI:
8385 mode = SImode;
8386 break;
8387
8388 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
8389 mode = DImode;
8390 rmode = SImode;
8391 break;
8392
8393 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
8394 mode = DImode;
8395 rmode = DImode;
8396 break;
8397
8398 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
8399 case IA64_BUILTIN_LOCK_RELEASE_DI:
8400 case IA64_BUILTIN_FETCH_AND_ADD_DI:
8401 case IA64_BUILTIN_FETCH_AND_SUB_DI:
8402 case IA64_BUILTIN_FETCH_AND_OR_DI:
8403 case IA64_BUILTIN_FETCH_AND_AND_DI:
8404 case IA64_BUILTIN_FETCH_AND_XOR_DI:
8405 case IA64_BUILTIN_FETCH_AND_NAND_DI:
8406 case IA64_BUILTIN_ADD_AND_FETCH_DI:
8407 case IA64_BUILTIN_SUB_AND_FETCH_DI:
8408 case IA64_BUILTIN_OR_AND_FETCH_DI:
8409 case IA64_BUILTIN_AND_AND_FETCH_DI:
8410 case IA64_BUILTIN_XOR_AND_FETCH_DI:
8411 case IA64_BUILTIN_NAND_AND_FETCH_DI:
8412 mode = DImode;
8413 break;
8414
8415 default:
8416 break;
8417 }
8418
8419 switch (fcode)
8420 {
8421 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
8422 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
8423 return ia64_expand_compare_and_swap (rmode, mode, 1, arglist,
8424 target);
8425
8426 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
8427 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
8428 return ia64_expand_compare_and_swap (rmode, mode, 0, arglist,
8429 target);
8430
8431 case IA64_BUILTIN_SYNCHRONIZE:
8432 emit_insn (gen_mf ());
8433 return const0_rtx;
8434
8435 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
8436 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
8437 return ia64_expand_lock_test_and_set (mode, arglist, target);
8438
8439 case IA64_BUILTIN_LOCK_RELEASE_SI:
8440 case IA64_BUILTIN_LOCK_RELEASE_DI:
8441 return ia64_expand_lock_release (mode, arglist, target);
8442
8443 case IA64_BUILTIN_BSP:
8444 if (! target || ! register_operand (target, DImode))
8445 target = gen_reg_rtx (DImode);
8446 emit_insn (gen_bsp_value (target));
8447 #ifdef POINTERS_EXTEND_UNSIGNED
8448 target = convert_memory_address (ptr_mode, target);
8449 #endif
8450 return target;
8451
8452 case IA64_BUILTIN_FLUSHRS:
8453 emit_insn (gen_flushrs ());
8454 return const0_rtx;
8455
8456 case IA64_BUILTIN_FETCH_AND_ADD_SI:
8457 case IA64_BUILTIN_FETCH_AND_ADD_DI:
8458 return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
8459
8460 case IA64_BUILTIN_FETCH_AND_SUB_SI:
8461 case IA64_BUILTIN_FETCH_AND_SUB_DI:
8462 return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
8463
8464 case IA64_BUILTIN_FETCH_AND_OR_SI:
8465 case IA64_BUILTIN_FETCH_AND_OR_DI:
8466 return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
8467
8468 case IA64_BUILTIN_FETCH_AND_AND_SI:
8469 case IA64_BUILTIN_FETCH_AND_AND_DI:
8470 return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
8471
8472 case IA64_BUILTIN_FETCH_AND_XOR_SI:
8473 case IA64_BUILTIN_FETCH_AND_XOR_DI:
8474 return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
8475
8476 case IA64_BUILTIN_FETCH_AND_NAND_SI:
8477 case IA64_BUILTIN_FETCH_AND_NAND_DI:
8478 return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
8479
8480 case IA64_BUILTIN_ADD_AND_FETCH_SI:
8481 case IA64_BUILTIN_ADD_AND_FETCH_DI:
8482 return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
8483
8484 case IA64_BUILTIN_SUB_AND_FETCH_SI:
8485 case IA64_BUILTIN_SUB_AND_FETCH_DI:
8486 return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
8487
8488 case IA64_BUILTIN_OR_AND_FETCH_SI:
8489 case IA64_BUILTIN_OR_AND_FETCH_DI:
8490 return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
8491
8492 case IA64_BUILTIN_AND_AND_FETCH_SI:
8493 case IA64_BUILTIN_AND_AND_FETCH_DI:
8494 return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
8495
8496 case IA64_BUILTIN_XOR_AND_FETCH_SI:
8497 case IA64_BUILTIN_XOR_AND_FETCH_DI:
8498 return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
8499
8500 case IA64_BUILTIN_NAND_AND_FETCH_SI:
8501 case IA64_BUILTIN_NAND_AND_FETCH_DI:
8502 return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);
8503
8504 default:
8505 break;
8506 }
8507
8508 return NULL_RTX;
8509 }
8510
8511 /* For the HP-UX IA64 aggregate parameters are passed stored in the
8512 most significant bits of the stack slot. */
8513
8514 enum direction
8515 ia64_hpux_function_arg_padding (mode, type)
8516 enum machine_mode mode;
8517 tree type;
8518 {
8519 /* Exception to normal case for structures/unions/etc. */
8520
8521 if (type && AGGREGATE_TYPE_P (type)
8522 && int_size_in_bytes (type) < UNITS_PER_WORD)
8523 return upward;
8524
8525 /* This is the standard FUNCTION_ARG_PADDING with !BYTES_BIG_ENDIAN
8526 hardwired to be true. */
8527
8528 return((mode == BLKmode
8529 ? (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
8530 && int_size_in_bytes (type) < (PARM_BOUNDARY / BITS_PER_UNIT))
8531 : GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
8532 ? downward : upward);
8533 }
8534
8535 /* Linked list of all external functions that are to be emitted by GCC.
8536 We output the name if and only if TREE_SYMBOL_REFERENCED is set in
8537 order to avoid putting out names that are never really used. */
8538
8539 struct extern_func_list
8540 {
8541 struct extern_func_list *next; /* next external */
8542 char *name; /* name of the external */
8543 } *extern_func_head = 0;
8544
8545 static void
8546 ia64_hpux_add_extern_decl (name)
8547 const char *name;
8548 {
8549 struct extern_func_list *p;
8550
8551 p = (struct extern_func_list *) xmalloc (sizeof (struct extern_func_list));
8552 p->name = xmalloc (strlen (name) + 1);
8553 strcpy(p->name, name);
8554 p->next = extern_func_head;
8555 extern_func_head = p;
8556 }
8557
8558 /* Print out the list of used global functions. */
8559
8560 static void
8561 ia64_hpux_file_end ()
8562 {
8563 while (extern_func_head)
8564 {
8565 const char *real_name;
8566 tree decl;
8567
8568 real_name = (* targetm.strip_name_encoding) (extern_func_head->name);
8569 decl = maybe_get_identifier (real_name);
8570
8571 if (!decl
8572 || (! TREE_ASM_WRITTEN (decl) && TREE_SYMBOL_REFERENCED (decl)))
8573 {
8574 if (decl)
8575 TREE_ASM_WRITTEN (decl) = 1;
8576 (*targetm.asm_out.globalize_label) (asm_out_file,
8577 extern_func_head->name);
8578 fputs (TYPE_ASM_OP, asm_out_file);
8579 assemble_name (asm_out_file, extern_func_head->name);
8580 putc (',', asm_out_file);
8581 fprintf (asm_out_file, TYPE_OPERAND_FMT, "function");
8582 putc ('\n', asm_out_file);
8583 }
8584 extern_func_head = extern_func_head->next;
8585 }
8586 }
8587
8588 \f
8589 /* Switch to the section to which we should output X. The only thing
8590 special we do here is to honor small data. */
8591
8592 static void
8593 ia64_select_rtx_section (mode, x, align)
8594 enum machine_mode mode;
8595 rtx x;
8596 unsigned HOST_WIDE_INT align;
8597 {
8598 if (GET_MODE_SIZE (mode) > 0
8599 && GET_MODE_SIZE (mode) <= ia64_section_threshold)
8600 sdata_section ();
8601 else
8602 default_elf_select_rtx_section (mode, x, align);
8603 }
8604
8605 /* It is illegal to have relocations in shared segments on AIX and HPUX.
8606 Pretend flag_pic is always set. */
8607
8608 static void
8609 ia64_rwreloc_select_section (exp, reloc, align)
8610 tree exp;
8611 int reloc;
8612 unsigned HOST_WIDE_INT align;
8613 {
8614 default_elf_select_section_1 (exp, reloc, align, true);
8615 }
8616
8617 static void
8618 ia64_rwreloc_unique_section (decl, reloc)
8619 tree decl;
8620 int reloc;
8621 {
8622 default_unique_section_1 (decl, reloc, true);
8623 }
8624
8625 static void
8626 ia64_rwreloc_select_rtx_section (mode, x, align)
8627 enum machine_mode mode;
8628 rtx x;
8629 unsigned HOST_WIDE_INT align;
8630 {
8631 int save_pic = flag_pic;
8632 flag_pic = 1;
8633 ia64_select_rtx_section (mode, x, align);
8634 flag_pic = save_pic;
8635 }
8636
8637 static unsigned int
8638 ia64_rwreloc_section_type_flags (decl, name, reloc)
8639 tree decl;
8640 const char *name;
8641 int reloc;
8642 {
8643 return default_section_type_flags_1 (decl, name, reloc, true);
8644 }
8645
8646
8647 /* Output the assembler code for a thunk function. THUNK_DECL is the
8648 declaration for the thunk function itself, FUNCTION is the decl for
8649 the target function. DELTA is an immediate constant offset to be
8650 added to THIS. If VCALL_OFFSET is nonzero, the word at
8651 *(*this + vcall_offset) should be added to THIS. */
8652
8653 static void
8654 ia64_output_mi_thunk (file, thunk, delta, vcall_offset, function)
8655 FILE *file;
8656 tree thunk ATTRIBUTE_UNUSED;
8657 HOST_WIDE_INT delta;
8658 HOST_WIDE_INT vcall_offset;
8659 tree function;
8660 {
8661 rtx this, insn, funexp;
8662
8663 reload_completed = 1;
8664 epilogue_completed = 1;
8665 no_new_pseudos = 1;
8666
8667 /* Set things up as ia64_expand_prologue might. */
8668 last_scratch_gr_reg = 15;
8669
8670 memset (&current_frame_info, 0, sizeof (current_frame_info));
8671 current_frame_info.spill_cfa_off = -16;
8672 current_frame_info.n_input_regs = 1;
8673 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
8674
8675 if (!TARGET_REG_NAMES)
8676 reg_names[IN_REG (0)] = ia64_reg_numbers[0];
8677
8678 /* Mark the end of the (empty) prologue. */
8679 emit_note (NOTE_INSN_PROLOGUE_END);
8680
8681 this = gen_rtx_REG (Pmode, IN_REG (0));
8682
8683 /* Apply the constant offset, if required. */
8684 if (delta)
8685 {
8686 rtx delta_rtx = GEN_INT (delta);
8687
8688 if (!CONST_OK_FOR_I (delta))
8689 {
8690 rtx tmp = gen_rtx_REG (Pmode, 2);
8691 emit_move_insn (tmp, delta_rtx);
8692 delta_rtx = tmp;
8693 }
8694 emit_insn (gen_adddi3 (this, this, delta_rtx));
8695 }
8696
8697 /* Apply the offset from the vtable, if required. */
8698 if (vcall_offset)
8699 {
8700 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
8701 rtx tmp = gen_rtx_REG (Pmode, 2);
8702
8703 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this));
8704
8705 if (!CONST_OK_FOR_J (vcall_offset))
8706 {
8707 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
8708 emit_move_insn (tmp2, vcall_offset_rtx);
8709 vcall_offset_rtx = tmp2;
8710 }
8711 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
8712
8713 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
8714
8715 emit_insn (gen_adddi3 (this, this, tmp));
8716 }
8717
8718 /* Generate a tail call to the target function. */
8719 if (! TREE_USED (function))
8720 {
8721 assemble_external (function);
8722 TREE_USED (function) = 1;
8723 }
8724 funexp = XEXP (DECL_RTL (function), 0);
8725 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8726 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
8727 insn = get_last_insn ();
8728 SIBLING_CALL_P (insn) = 1;
8729
8730 /* Code generation for calls relies on splitting. */
8731 reload_completed = 1;
8732 epilogue_completed = 1;
8733 try_split (PATTERN (insn), insn, 0);
8734
8735 emit_barrier ();
8736
8737 /* Run just enough of rest_of_compilation to get the insns emitted.
8738 There's not really enough bulk here to make other passes such as
8739 instruction scheduling worth while. Note that use_thunk calls
8740 assemble_start_function and assemble_end_function. */
8741
8742 insn_locators_initialize ();
8743 emit_all_insn_group_barriers (NULL);
8744 insn = get_insns ();
8745 shorten_branches (insn);
8746 final_start_function (insn, file, 1);
8747 final (insn, file, 1, 0);
8748 final_end_function ();
8749
8750 reload_completed = 0;
8751 epilogue_completed = 0;
8752 no_new_pseudos = 0;
8753 }
8754
8755 #include "gt-ia64.h"