avr.c: Fix comment typos.
[gcc.git] / gcc / config / ia64 / ia64.c
1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
5
6 This file is part of GNU CC.
7
8 GNU CC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
12
13 GNU CC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "tm_p.h"
28 #include "regs.h"
29 #include "hard-reg-set.h"
30 #include "real.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "output.h"
34 #include "insn-attr.h"
35 #include "flags.h"
36 #include "recog.h"
37 #include "expr.h"
38 #include "optabs.h"
39 #include "obstack.h"
40 #include "except.h"
41 #include "function.h"
42 #include "ggc.h"
43 #include "basic-block.h"
44 #include "toplev.h"
45 #include "sched-int.h"
46 #include "timevar.h"
47 #include "target.h"
48 #include "target-def.h"
49
50 /* This is used for communication between ASM_OUTPUT_LABEL and
51 ASM_OUTPUT_LABELREF. */
52 int ia64_asm_output_label = 0;
53
54 /* Define the information needed to generate branch and scc insns. This is
55 stored from the compare operation. */
56 struct rtx_def * ia64_compare_op0;
57 struct rtx_def * ia64_compare_op1;
58
59 /* Register names for ia64_expand_prologue. */
60 static const char * const ia64_reg_numbers[96] =
61 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
62 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
63 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
64 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
65 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
66 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
67 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
68 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
69 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
70 "r104","r105","r106","r107","r108","r109","r110","r111",
71 "r112","r113","r114","r115","r116","r117","r118","r119",
72 "r120","r121","r122","r123","r124","r125","r126","r127"};
73
74 /* ??? These strings could be shared with REGISTER_NAMES. */
75 static const char * const ia64_input_reg_names[8] =
76 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
77
78 /* ??? These strings could be shared with REGISTER_NAMES. */
79 static const char * const ia64_local_reg_names[80] =
80 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
81 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
82 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
83 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
84 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
85 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
86 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
87 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
88 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
89 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
90
91 /* ??? These strings could be shared with REGISTER_NAMES. */
92 static const char * const ia64_output_reg_names[8] =
93 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
94
95 /* String used with the -mfixed-range= option. */
96 const char *ia64_fixed_range_string;
97
98 /* Determines whether we run our final scheduling pass or not. We always
99 avoid the normal second scheduling pass. */
100 static int ia64_flag_schedule_insns2;
101
102 /* Variables which are this size or smaller are put in the sdata/sbss
103 sections. */
104
105 unsigned int ia64_section_threshold;
106 \f
107 static int find_gr_spill PARAMS ((int));
108 static int next_scratch_gr_reg PARAMS ((void));
109 static void mark_reg_gr_used_mask PARAMS ((rtx, void *));
110 static void ia64_compute_frame_size PARAMS ((HOST_WIDE_INT));
111 static void setup_spill_pointers PARAMS ((int, rtx, HOST_WIDE_INT));
112 static void finish_spill_pointers PARAMS ((void));
113 static rtx spill_restore_mem PARAMS ((rtx, HOST_WIDE_INT));
114 static void do_spill PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx));
115 static void do_restore PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT));
116 static rtx gen_movdi_x PARAMS ((rtx, rtx, rtx));
117 static rtx gen_fr_spill_x PARAMS ((rtx, rtx, rtx));
118 static rtx gen_fr_restore_x PARAMS ((rtx, rtx, rtx));
119
120 static enum machine_mode hfa_element_mode PARAMS ((tree, int));
121 static void fix_range PARAMS ((const char *));
122 static void ia64_add_gc_roots PARAMS ((void));
123 static void ia64_init_machine_status PARAMS ((struct function *));
124 static void ia64_mark_machine_status PARAMS ((struct function *));
125 static void ia64_free_machine_status PARAMS ((struct function *));
126 static void emit_insn_group_barriers PARAMS ((FILE *, rtx));
127 static void emit_all_insn_group_barriers PARAMS ((FILE *, rtx));
128 static void emit_predicate_relation_info PARAMS ((void));
129 static void process_epilogue PARAMS ((void));
130 static int process_set PARAMS ((FILE *, rtx));
131
132 static rtx ia64_expand_fetch_and_op PARAMS ((optab, enum machine_mode,
133 tree, rtx));
134 static rtx ia64_expand_op_and_fetch PARAMS ((optab, enum machine_mode,
135 tree, rtx));
136 static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode, int,
137 tree, rtx));
138 static rtx ia64_expand_lock_test_and_set PARAMS ((enum machine_mode,
139 tree, rtx));
140 static rtx ia64_expand_lock_release PARAMS ((enum machine_mode, tree, rtx));
141 const struct attribute_spec ia64_attribute_table[];
142 static bool ia64_assemble_integer PARAMS ((rtx, unsigned int, int));
143 static void ia64_output_function_prologue PARAMS ((FILE *, HOST_WIDE_INT));
144 static void ia64_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
145 static void ia64_output_function_end_prologue PARAMS ((FILE *));
146
147 static int ia64_issue_rate PARAMS ((void));
148 static int ia64_adjust_cost PARAMS ((rtx, rtx, rtx, int));
149 static void ia64_sched_init PARAMS ((FILE *, int, int));
150 static void ia64_sched_finish PARAMS ((FILE *, int));
151 static int ia64_internal_sched_reorder PARAMS ((FILE *, int, rtx *,
152 int *, int, int));
153 static int ia64_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
154 static int ia64_sched_reorder2 PARAMS ((FILE *, int, rtx *, int *, int));
155 static int ia64_variable_issue PARAMS ((FILE *, int, rtx, int));
156 static rtx ia64_cycle_display PARAMS ((int, rtx));
157
158 \f
159 /* Initialize the GCC target structure. */
160 #undef TARGET_ATTRIBUTE_TABLE
161 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
162
163 #undef TARGET_INIT_BUILTINS
164 #define TARGET_INIT_BUILTINS ia64_init_builtins
165
166 #undef TARGET_EXPAND_BUILTIN
167 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
168
169 #undef TARGET_ASM_BYTE_OP
170 #define TARGET_ASM_BYTE_OP "\tdata1\t"
171 #undef TARGET_ASM_ALIGNED_HI_OP
172 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
173 #undef TARGET_ASM_ALIGNED_SI_OP
174 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
175 #undef TARGET_ASM_ALIGNED_DI_OP
176 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
177 #undef TARGET_ASM_UNALIGNED_HI_OP
178 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
179 #undef TARGET_ASM_UNALIGNED_SI_OP
180 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
181 #undef TARGET_ASM_UNALIGNED_DI_OP
182 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
183 #undef TARGET_ASM_INTEGER
184 #define TARGET_ASM_INTEGER ia64_assemble_integer
185
186 #undef TARGET_ASM_FUNCTION_PROLOGUE
187 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
188 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
189 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
190 #undef TARGET_ASM_FUNCTION_EPILOGUE
191 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
192
193 #undef TARGET_SCHED_ADJUST_COST
194 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
195 #undef TARGET_SCHED_ISSUE_RATE
196 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
197 #undef TARGET_SCHED_VARIABLE_ISSUE
198 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
199 #undef TARGET_SCHED_INIT
200 #define TARGET_SCHED_INIT ia64_sched_init
201 #undef TARGET_SCHED_FINISH
202 #define TARGET_SCHED_FINISH ia64_sched_finish
203 #undef TARGET_SCHED_REORDER
204 #define TARGET_SCHED_REORDER ia64_sched_reorder
205 #undef TARGET_SCHED_REORDER2
206 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
207 #undef TARGET_SCHED_CYCLE_DISPLAY
208 #define TARGET_SCHED_CYCLE_DISPLAY ia64_cycle_display
209
210 struct gcc_target targetm = TARGET_INITIALIZER;
211 \f
212 /* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
213
214 int
215 call_operand (op, mode)
216 rtx op;
217 enum machine_mode mode;
218 {
219 if (mode != GET_MODE (op))
220 return 0;
221
222 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
223 || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
224 }
225
226 /* Return 1 if OP refers to a symbol in the sdata section. */
227
228 int
229 sdata_symbolic_operand (op, mode)
230 rtx op;
231 enum machine_mode mode ATTRIBUTE_UNUSED;
232 {
233 switch (GET_CODE (op))
234 {
235 case CONST:
236 if (GET_CODE (XEXP (op, 0)) != PLUS
237 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF)
238 break;
239 op = XEXP (XEXP (op, 0), 0);
240 /* FALLTHRU */
241
242 case SYMBOL_REF:
243 if (CONSTANT_POOL_ADDRESS_P (op))
244 return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold;
245 else
246 return XSTR (op, 0)[0] == SDATA_NAME_FLAG_CHAR;
247
248 default:
249 break;
250 }
251
252 return 0;
253 }
254
255 /* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
256
257 int
258 got_symbolic_operand (op, mode)
259 rtx op;
260 enum machine_mode mode ATTRIBUTE_UNUSED;
261 {
262 switch (GET_CODE (op))
263 {
264 case CONST:
265 op = XEXP (op, 0);
266 if (GET_CODE (op) != PLUS)
267 return 0;
268 if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
269 return 0;
270 op = XEXP (op, 1);
271 if (GET_CODE (op) != CONST_INT)
272 return 0;
273
274 return 1;
275
276 /* Ok if we're not using GOT entries at all. */
277 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
278 return 1;
279
280 /* "Ok" while emitting rtl, since otherwise we won't be provided
281 with the entire offset during emission, which makes it very
282 hard to split the offset into high and low parts. */
283 if (rtx_equal_function_value_matters)
284 return 1;
285
286 /* Force the low 14 bits of the constant to zero so that we do not
287 use up so many GOT entries. */
288 return (INTVAL (op) & 0x3fff) == 0;
289
290 case SYMBOL_REF:
291 case LABEL_REF:
292 return 1;
293
294 default:
295 break;
296 }
297 return 0;
298 }
299
300 /* Return 1 if OP refers to a symbol. */
301
302 int
303 symbolic_operand (op, mode)
304 rtx op;
305 enum machine_mode mode ATTRIBUTE_UNUSED;
306 {
307 switch (GET_CODE (op))
308 {
309 case CONST:
310 case SYMBOL_REF:
311 case LABEL_REF:
312 return 1;
313
314 default:
315 break;
316 }
317 return 0;
318 }
319
320 /* Return 1 if OP refers to a function. */
321
322 int
323 function_operand (op, mode)
324 rtx op;
325 enum machine_mode mode ATTRIBUTE_UNUSED;
326 {
327 if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FLAG (op))
328 return 1;
329 else
330 return 0;
331 }
332
333 /* Return 1 if OP is setjmp or a similar function. */
334
335 /* ??? This is an unsatisfying solution. Should rethink. */
336
337 int
338 setjmp_operand (op, mode)
339 rtx op;
340 enum machine_mode mode ATTRIBUTE_UNUSED;
341 {
342 const char *name;
343 int retval = 0;
344
345 if (GET_CODE (op) != SYMBOL_REF)
346 return 0;
347
348 name = XSTR (op, 0);
349
350 /* The following code is borrowed from special_function_p in calls.c. */
351
352 /* Disregard prefix _, __ or __x. */
353 if (name[0] == '_')
354 {
355 if (name[1] == '_' && name[2] == 'x')
356 name += 3;
357 else if (name[1] == '_')
358 name += 2;
359 else
360 name += 1;
361 }
362
363 if (name[0] == 's')
364 {
365 retval
366 = ((name[1] == 'e'
367 && (! strcmp (name, "setjmp")
368 || ! strcmp (name, "setjmp_syscall")))
369 || (name[1] == 'i'
370 && ! strcmp (name, "sigsetjmp"))
371 || (name[1] == 'a'
372 && ! strcmp (name, "savectx")));
373 }
374 else if ((name[0] == 'q' && name[1] == 's'
375 && ! strcmp (name, "qsetjmp"))
376 || (name[0] == 'v' && name[1] == 'f'
377 && ! strcmp (name, "vfork")))
378 retval = 1;
379
380 return retval;
381 }
382
383 /* Return 1 if OP is a general operand, but when pic exclude symbolic
384 operands. */
385
386 /* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF
387 from PREDICATE_CODES. */
388
389 int
390 move_operand (op, mode)
391 rtx op;
392 enum machine_mode mode;
393 {
394 if (! TARGET_NO_PIC && symbolic_operand (op, mode))
395 return 0;
396
397 return general_operand (op, mode);
398 }
399
400 /* Return 1 if OP is a register operand that is (or could be) a GR reg. */
401
402 int
403 gr_register_operand (op, mode)
404 rtx op;
405 enum machine_mode mode;
406 {
407 if (! register_operand (op, mode))
408 return 0;
409 if (GET_CODE (op) == SUBREG)
410 op = SUBREG_REG (op);
411 if (GET_CODE (op) == REG)
412 {
413 unsigned int regno = REGNO (op);
414 if (regno < FIRST_PSEUDO_REGISTER)
415 return GENERAL_REGNO_P (regno);
416 }
417 return 1;
418 }
419
420 /* Return 1 if OP is a register operand that is (or could be) an FR reg. */
421
422 int
423 fr_register_operand (op, mode)
424 rtx op;
425 enum machine_mode mode;
426 {
427 if (! register_operand (op, mode))
428 return 0;
429 if (GET_CODE (op) == SUBREG)
430 op = SUBREG_REG (op);
431 if (GET_CODE (op) == REG)
432 {
433 unsigned int regno = REGNO (op);
434 if (regno < FIRST_PSEUDO_REGISTER)
435 return FR_REGNO_P (regno);
436 }
437 return 1;
438 }
439
440 /* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */
441
442 int
443 grfr_register_operand (op, mode)
444 rtx op;
445 enum machine_mode mode;
446 {
447 if (! register_operand (op, mode))
448 return 0;
449 if (GET_CODE (op) == SUBREG)
450 op = SUBREG_REG (op);
451 if (GET_CODE (op) == REG)
452 {
453 unsigned int regno = REGNO (op);
454 if (regno < FIRST_PSEUDO_REGISTER)
455 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
456 }
457 return 1;
458 }
459
460 /* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */
461
462 int
463 gr_nonimmediate_operand (op, mode)
464 rtx op;
465 enum machine_mode mode;
466 {
467 if (! nonimmediate_operand (op, mode))
468 return 0;
469 if (GET_CODE (op) == SUBREG)
470 op = SUBREG_REG (op);
471 if (GET_CODE (op) == REG)
472 {
473 unsigned int regno = REGNO (op);
474 if (regno < FIRST_PSEUDO_REGISTER)
475 return GENERAL_REGNO_P (regno);
476 }
477 return 1;
478 }
479
480 /* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
481
482 int
483 fr_nonimmediate_operand (op, mode)
484 rtx op;
485 enum machine_mode mode;
486 {
487 if (! nonimmediate_operand (op, mode))
488 return 0;
489 if (GET_CODE (op) == SUBREG)
490 op = SUBREG_REG (op);
491 if (GET_CODE (op) == REG)
492 {
493 unsigned int regno = REGNO (op);
494 if (regno < FIRST_PSEUDO_REGISTER)
495 return FR_REGNO_P (regno);
496 }
497 return 1;
498 }
499
500 /* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
501
502 int
503 grfr_nonimmediate_operand (op, mode)
504 rtx op;
505 enum machine_mode mode;
506 {
507 if (! nonimmediate_operand (op, mode))
508 return 0;
509 if (GET_CODE (op) == SUBREG)
510 op = SUBREG_REG (op);
511 if (GET_CODE (op) == REG)
512 {
513 unsigned int regno = REGNO (op);
514 if (regno < FIRST_PSEUDO_REGISTER)
515 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
516 }
517 return 1;
518 }
519
520 /* Return 1 if OP is a GR register operand, or zero. */
521
522 int
523 gr_reg_or_0_operand (op, mode)
524 rtx op;
525 enum machine_mode mode;
526 {
527 return (op == const0_rtx || gr_register_operand (op, mode));
528 }
529
530 /* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */
531
532 int
533 gr_reg_or_5bit_operand (op, mode)
534 rtx op;
535 enum machine_mode mode;
536 {
537 return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32)
538 || GET_CODE (op) == CONSTANT_P_RTX
539 || gr_register_operand (op, mode));
540 }
541
542 /* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */
543
544 int
545 gr_reg_or_6bit_operand (op, mode)
546 rtx op;
547 enum machine_mode mode;
548 {
549 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
550 || GET_CODE (op) == CONSTANT_P_RTX
551 || gr_register_operand (op, mode));
552 }
553
554 /* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */
555
556 int
557 gr_reg_or_8bit_operand (op, mode)
558 rtx op;
559 enum machine_mode mode;
560 {
561 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
562 || GET_CODE (op) == CONSTANT_P_RTX
563 || gr_register_operand (op, mode));
564 }
565
566 /* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */
567
568 int
569 grfr_reg_or_8bit_operand (op, mode)
570 rtx op;
571 enum machine_mode mode;
572 {
573 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
574 || GET_CODE (op) == CONSTANT_P_RTX
575 || grfr_register_operand (op, mode));
576 }
577
578 /* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
579 operand. */
580
581 int
582 gr_reg_or_8bit_adjusted_operand (op, mode)
583 rtx op;
584 enum machine_mode mode;
585 {
586 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
587 || GET_CODE (op) == CONSTANT_P_RTX
588 || gr_register_operand (op, mode));
589 }
590
591 /* Return 1 if OP is a register operand, or is valid for both an 8 bit
592 immediate and an 8 bit adjusted immediate operand. This is necessary
593 because when we emit a compare, we don't know what the condition will be,
594 so we need the union of the immediates accepted by GT and LT. */
595
596 int
597 gr_reg_or_8bit_and_adjusted_operand (op, mode)
598 rtx op;
599 enum machine_mode mode;
600 {
601 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
602 && CONST_OK_FOR_L (INTVAL (op)))
603 || GET_CODE (op) == CONSTANT_P_RTX
604 || gr_register_operand (op, mode));
605 }
606
607 /* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
608
609 int
610 gr_reg_or_14bit_operand (op, mode)
611 rtx op;
612 enum machine_mode mode;
613 {
614 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
615 || GET_CODE (op) == CONSTANT_P_RTX
616 || gr_register_operand (op, mode));
617 }
618
619 /* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
620
621 int
622 gr_reg_or_22bit_operand (op, mode)
623 rtx op;
624 enum machine_mode mode;
625 {
626 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
627 || GET_CODE (op) == CONSTANT_P_RTX
628 || gr_register_operand (op, mode));
629 }
630
631 /* Return 1 if OP is a 6 bit immediate operand. */
632
633 int
634 shift_count_operand (op, mode)
635 rtx op;
636 enum machine_mode mode ATTRIBUTE_UNUSED;
637 {
638 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
639 || GET_CODE (op) == CONSTANT_P_RTX);
640 }
641
642 /* Return 1 if OP is a 5 bit immediate operand. */
643
644 int
645 shift_32bit_count_operand (op, mode)
646 rtx op;
647 enum machine_mode mode ATTRIBUTE_UNUSED;
648 {
649 return ((GET_CODE (op) == CONST_INT
650 && (INTVAL (op) >= 0 && INTVAL (op) < 32))
651 || GET_CODE (op) == CONSTANT_P_RTX);
652 }
653
654 /* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
655
656 int
657 shladd_operand (op, mode)
658 rtx op;
659 enum machine_mode mode ATTRIBUTE_UNUSED;
660 {
661 return (GET_CODE (op) == CONST_INT
662 && (INTVAL (op) == 2 || INTVAL (op) == 4
663 || INTVAL (op) == 8 || INTVAL (op) == 16));
664 }
665
666 /* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
667
668 int
669 fetchadd_operand (op, mode)
670 rtx op;
671 enum machine_mode mode ATTRIBUTE_UNUSED;
672 {
673 return (GET_CODE (op) == CONST_INT
674 && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
675 INTVAL (op) == -4 || INTVAL (op) == -1 ||
676 INTVAL (op) == 1 || INTVAL (op) == 4 ||
677 INTVAL (op) == 8 || INTVAL (op) == 16));
678 }
679
680 /* Return 1 if OP is a floating-point constant zero, one, or a register. */
681
682 int
683 fr_reg_or_fp01_operand (op, mode)
684 rtx op;
685 enum machine_mode mode;
686 {
687 return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
688 || fr_register_operand (op, mode));
689 }
690
691 /* Like nonimmediate_operand, but don't allow MEMs that try to use a
692 POST_MODIFY with a REG as displacement. */
693
694 int
695 destination_operand (op, mode)
696 rtx op;
697 enum machine_mode mode;
698 {
699 if (! nonimmediate_operand (op, mode))
700 return 0;
701 if (GET_CODE (op) == MEM
702 && GET_CODE (XEXP (op, 0)) == POST_MODIFY
703 && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG)
704 return 0;
705 return 1;
706 }
707
708 /* Like memory_operand, but don't allow post-increments. */
709
710 int
711 not_postinc_memory_operand (op, mode)
712 rtx op;
713 enum machine_mode mode;
714 {
715 return (memory_operand (op, mode)
716 && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != 'a');
717 }
718
719 /* Return 1 if this is a comparison operator, which accepts an normal 8-bit
720 signed immediate operand. */
721
722 int
723 normal_comparison_operator (op, mode)
724 register rtx op;
725 enum machine_mode mode;
726 {
727 enum rtx_code code = GET_CODE (op);
728 return ((mode == VOIDmode || GET_MODE (op) == mode)
729 && (code == EQ || code == NE
730 || code == GT || code == LE || code == GTU || code == LEU));
731 }
732
733 /* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
734 signed immediate operand. */
735
736 int
737 adjusted_comparison_operator (op, mode)
738 register rtx op;
739 enum machine_mode mode;
740 {
741 enum rtx_code code = GET_CODE (op);
742 return ((mode == VOIDmode || GET_MODE (op) == mode)
743 && (code == LT || code == GE || code == LTU || code == GEU));
744 }
745
746 /* Return 1 if this is a signed inequality operator. */
747
748 int
749 signed_inequality_operator (op, mode)
750 register rtx op;
751 enum machine_mode mode;
752 {
753 enum rtx_code code = GET_CODE (op);
754 return ((mode == VOIDmode || GET_MODE (op) == mode)
755 && (code == GE || code == GT
756 || code == LE || code == LT));
757 }
758
759 /* Return 1 if this operator is valid for predication. */
760
761 int
762 predicate_operator (op, mode)
763 register rtx op;
764 enum machine_mode mode;
765 {
766 enum rtx_code code = GET_CODE (op);
767 return ((GET_MODE (op) == mode || mode == VOIDmode)
768 && (code == EQ || code == NE));
769 }
770
771 /* Return 1 if this operator can be used in a conditional operation. */
772
773 int
774 condop_operator (op, mode)
775 register rtx op;
776 enum machine_mode mode;
777 {
778 enum rtx_code code = GET_CODE (op);
779 return ((GET_MODE (op) == mode || mode == VOIDmode)
780 && (code == PLUS || code == MINUS || code == AND
781 || code == IOR || code == XOR));
782 }
783
784 /* Return 1 if this is the ar.lc register. */
785
786 int
787 ar_lc_reg_operand (op, mode)
788 register rtx op;
789 enum machine_mode mode;
790 {
791 return (GET_MODE (op) == DImode
792 && (mode == DImode || mode == VOIDmode)
793 && GET_CODE (op) == REG
794 && REGNO (op) == AR_LC_REGNUM);
795 }
796
797 /* Return 1 if this is the ar.ccv register. */
798
799 int
800 ar_ccv_reg_operand (op, mode)
801 register rtx op;
802 enum machine_mode mode;
803 {
804 return ((GET_MODE (op) == mode || mode == VOIDmode)
805 && GET_CODE (op) == REG
806 && REGNO (op) == AR_CCV_REGNUM);
807 }
808
809 /* Return 1 if this is the ar.pfs register. */
810
811 int
812 ar_pfs_reg_operand (op, mode)
813 register rtx op;
814 enum machine_mode mode;
815 {
816 return ((GET_MODE (op) == mode || mode == VOIDmode)
817 && GET_CODE (op) == REG
818 && REGNO (op) == AR_PFS_REGNUM);
819 }
820
821 /* Like general_operand, but don't allow (mem (addressof)). */
822
823 int
824 general_tfmode_operand (op, mode)
825 rtx op;
826 enum machine_mode mode;
827 {
828 if (! general_operand (op, mode))
829 return 0;
830 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
831 return 0;
832 return 1;
833 }
834
835 /* Similarly. */
836
837 int
838 destination_tfmode_operand (op, mode)
839 rtx op;
840 enum machine_mode mode;
841 {
842 if (! destination_operand (op, mode))
843 return 0;
844 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
845 return 0;
846 return 1;
847 }
848
849 /* Similarly. */
850
851 int
852 tfreg_or_fp01_operand (op, mode)
853 rtx op;
854 enum machine_mode mode;
855 {
856 if (GET_CODE (op) == SUBREG)
857 return 0;
858 return fr_reg_or_fp01_operand (op, mode);
859 }
860 \f
861 /* Return 1 if the operands of a move are ok. */
862
863 int
864 ia64_move_ok (dst, src)
865 rtx dst, src;
866 {
867 /* If we're under init_recog_no_volatile, we'll not be able to use
868 memory_operand. So check the code directly and don't worry about
869 the validity of the underlying address, which should have been
870 checked elsewhere anyway. */
871 if (GET_CODE (dst) != MEM)
872 return 1;
873 if (GET_CODE (src) == MEM)
874 return 0;
875 if (register_operand (src, VOIDmode))
876 return 1;
877
878 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
879 if (INTEGRAL_MODE_P (GET_MODE (dst)))
880 return src == const0_rtx;
881 else
882 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
883 }
884
885 /* Check if OP is a mask suitible for use with SHIFT in a dep.z instruction.
886 Return the length of the field, or <= 0 on failure. */
887
888 int
889 ia64_depz_field_mask (rop, rshift)
890 rtx rop, rshift;
891 {
892 unsigned HOST_WIDE_INT op = INTVAL (rop);
893 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
894
895 /* Get rid of the zero bits we're shifting in. */
896 op >>= shift;
897
898 /* We must now have a solid block of 1's at bit 0. */
899 return exact_log2 (op + 1);
900 }
901
902 /* Expand a symbolic constant load. */
903 /* ??? Should generalize this, so that we can also support 32 bit pointers. */
904
905 void
906 ia64_expand_load_address (dest, src, scratch)
907 rtx dest, src, scratch;
908 {
909 rtx temp;
910
911 /* The destination could be a MEM during initial rtl generation,
912 which isn't a valid destination for the PIC load address patterns. */
913 if (! register_operand (dest, DImode))
914 temp = gen_reg_rtx (DImode);
915 else
916 temp = dest;
917
918 if (TARGET_AUTO_PIC)
919 emit_insn (gen_load_gprel64 (temp, src));
920 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FLAG (src))
921 emit_insn (gen_load_fptr (temp, src));
922 else if (sdata_symbolic_operand (src, DImode))
923 emit_insn (gen_load_gprel (temp, src));
924 else if (GET_CODE (src) == CONST
925 && GET_CODE (XEXP (src, 0)) == PLUS
926 && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
927 && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0)
928 {
929 rtx subtarget = no_new_pseudos ? temp : gen_reg_rtx (DImode);
930 rtx sym = XEXP (XEXP (src, 0), 0);
931 HOST_WIDE_INT ofs, hi, lo;
932
933 /* Split the offset into a sign extended 14-bit low part
934 and a complementary high part. */
935 ofs = INTVAL (XEXP (XEXP (src, 0), 1));
936 lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
937 hi = ofs - lo;
938
939 if (! scratch)
940 scratch = no_new_pseudos ? subtarget : gen_reg_rtx (DImode);
941
942 emit_insn (gen_load_symptr (subtarget, plus_constant (sym, hi),
943 scratch));
944 emit_insn (gen_adddi3 (temp, subtarget, GEN_INT (lo)));
945 }
946 else
947 {
948 rtx insn;
949 if (! scratch)
950 scratch = no_new_pseudos ? temp : gen_reg_rtx (DImode);
951
952 insn = emit_insn (gen_load_symptr (temp, src, scratch));
953 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, src, REG_NOTES (insn));
954 }
955
956 if (temp != dest)
957 emit_move_insn (dest, temp);
958 }
959
960 rtx
961 ia64_gp_save_reg (setjmp_p)
962 int setjmp_p;
963 {
964 rtx save = cfun->machine->ia64_gp_save;
965
966 if (save != NULL)
967 {
968 /* We can't save GP in a pseudo if we are calling setjmp, because
969 pseudos won't be restored by longjmp. For now, we save it in r4. */
970 /* ??? It would be more efficient to save this directly into a stack
971 slot. Unfortunately, the stack slot address gets cse'd across
972 the setjmp call because the NOTE_INSN_SETJMP note is in the wrong
973 place. */
974
975 /* ??? Get the barf bag, Virginia. We've got to replace this thing
976 in place, since this rtx is used in exception handling receivers.
977 Moreover, we must get this rtx out of regno_reg_rtx or reload
978 will do the wrong thing. */
979 unsigned int old_regno = REGNO (save);
980 if (setjmp_p && old_regno != GR_REG (4))
981 {
982 REGNO (save) = GR_REG (4);
983 regno_reg_rtx[old_regno] = gen_rtx_raw_REG (DImode, old_regno);
984 }
985 }
986 else
987 {
988 if (setjmp_p)
989 save = gen_rtx_REG (DImode, GR_REG (4));
990 else if (! optimize)
991 save = gen_rtx_REG (DImode, LOC_REG (0));
992 else
993 save = gen_reg_rtx (DImode);
994 cfun->machine->ia64_gp_save = save;
995 }
996
997 return save;
998 }
999
1000 /* Split a post-reload TImode reference into two DImode components. */
1001
1002 rtx
1003 ia64_split_timode (out, in, scratch)
1004 rtx out[2];
1005 rtx in, scratch;
1006 {
1007 switch (GET_CODE (in))
1008 {
1009 case REG:
1010 out[0] = gen_rtx_REG (DImode, REGNO (in));
1011 out[1] = gen_rtx_REG (DImode, REGNO (in) + 1);
1012 return NULL_RTX;
1013
1014 case MEM:
1015 {
1016 rtx base = XEXP (in, 0);
1017
1018 switch (GET_CODE (base))
1019 {
1020 case REG:
1021 out[0] = adjust_address (in, DImode, 0);
1022 break;
1023 case POST_MODIFY:
1024 base = XEXP (base, 0);
1025 out[0] = adjust_address (in, DImode, 0);
1026 break;
1027
1028 /* Since we're changing the mode, we need to change to POST_MODIFY
1029 as well to preserve the size of the increment. Either that or
1030 do the update in two steps, but we've already got this scratch
1031 register handy so let's use it. */
1032 case POST_INC:
1033 base = XEXP (base, 0);
1034 out[0]
1035 = change_address (in, DImode,
1036 gen_rtx_POST_MODIFY
1037 (Pmode, base, plus_constant (base, 16)));
1038 break;
1039 case POST_DEC:
1040 base = XEXP (base, 0);
1041 out[0]
1042 = change_address (in, DImode,
1043 gen_rtx_POST_MODIFY
1044 (Pmode, base, plus_constant (base, -16)));
1045 break;
1046 default:
1047 abort ();
1048 }
1049
1050 if (scratch == NULL_RTX)
1051 abort ();
1052 out[1] = change_address (in, DImode, scratch);
1053 return gen_adddi3 (scratch, base, GEN_INT (8));
1054 }
1055
1056 case CONST_INT:
1057 case CONST_DOUBLE:
1058 split_double (in, &out[0], &out[1]);
1059 return NULL_RTX;
1060
1061 default:
1062 abort ();
1063 }
1064 }
1065
1066 /* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go
1067 through memory plus an extra GR scratch register. Except that you can
1068 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1069 SECONDARY_RELOAD_CLASS, but not both.
1070
1071 We got into problems in the first place by allowing a construct like
1072 (subreg:TF (reg:TI)), which we got from a union containing a long double.
1073 This solution attempts to prevent this situation from occurring. When
1074 we see something like the above, we spill the inner register to memory. */
1075
1076 rtx
1077 spill_tfmode_operand (in, force)
1078 rtx in;
1079 int force;
1080 {
1081 if (GET_CODE (in) == SUBREG
1082 && GET_MODE (SUBREG_REG (in)) == TImode
1083 && GET_CODE (SUBREG_REG (in)) == REG)
1084 {
1085 rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE);
1086 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1087 }
1088 else if (force && GET_CODE (in) == REG)
1089 {
1090 rtx mem = gen_mem_addressof (in, NULL_TREE);
1091 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1092 }
1093 else if (GET_CODE (in) == MEM
1094 && GET_CODE (XEXP (in, 0)) == ADDRESSOF)
1095 return change_address (in, TFmode, copy_to_reg (XEXP (in, 0)));
1096 else
1097 return in;
1098 }
1099
1100 /* Emit comparison instruction if necessary, returning the expression
1101 that holds the compare result in the proper mode. */
1102
1103 rtx
1104 ia64_expand_compare (code, mode)
1105 enum rtx_code code;
1106 enum machine_mode mode;
1107 {
1108 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1109 rtx cmp;
1110
1111 /* If we have a BImode input, then we already have a compare result, and
1112 do not need to emit another comparison. */
1113 if (GET_MODE (op0) == BImode)
1114 {
1115 if ((code == NE || code == EQ) && op1 == const0_rtx)
1116 cmp = op0;
1117 else
1118 abort ();
1119 }
1120 else
1121 {
1122 cmp = gen_reg_rtx (BImode);
1123 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1124 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1125 code = NE;
1126 }
1127
1128 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1129 }
1130
1131 /* Emit the appropriate sequence for a call. */
1132
1133 void
1134 ia64_expand_call (retval, addr, nextarg, sibcall_p)
1135 rtx retval;
1136 rtx addr;
1137 rtx nextarg;
1138 int sibcall_p;
1139 {
1140 rtx insn, b0, pfs, gp_save, narg_rtx;
1141 int narg;
1142
1143 addr = XEXP (addr, 0);
1144 b0 = gen_rtx_REG (DImode, R_BR (0));
1145 pfs = gen_rtx_REG (DImode, AR_PFS_REGNUM);
1146
1147 if (! nextarg)
1148 narg = 0;
1149 else if (IN_REGNO_P (REGNO (nextarg)))
1150 narg = REGNO (nextarg) - IN_REG (0);
1151 else
1152 narg = REGNO (nextarg) - OUT_REG (0);
1153 narg_rtx = GEN_INT (narg);
1154
1155 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1156 {
1157 if (sibcall_p)
1158 insn = gen_sibcall_nopic (addr, narg_rtx, b0, pfs);
1159 else if (! retval)
1160 insn = gen_call_nopic (addr, narg_rtx, b0);
1161 else
1162 insn = gen_call_value_nopic (retval, addr, narg_rtx, b0);
1163 emit_call_insn (insn);
1164 return;
1165 }
1166
1167 if (sibcall_p)
1168 gp_save = NULL_RTX;
1169 else
1170 gp_save = ia64_gp_save_reg (setjmp_operand (addr, VOIDmode));
1171
1172 /* If this is an indirect call, then we have the address of a descriptor. */
1173 if (! symbolic_operand (addr, VOIDmode))
1174 {
1175 rtx dest;
1176
1177 if (! sibcall_p)
1178 emit_move_insn (gp_save, pic_offset_table_rtx);
1179
1180 dest = force_reg (DImode, gen_rtx_MEM (DImode, addr));
1181 emit_move_insn (pic_offset_table_rtx,
1182 gen_rtx_MEM (DImode, plus_constant (addr, 8)));
1183
1184 if (sibcall_p)
1185 insn = gen_sibcall_pic (dest, narg_rtx, b0, pfs);
1186 else if (! retval)
1187 insn = gen_call_pic (dest, narg_rtx, b0);
1188 else
1189 insn = gen_call_value_pic (retval, dest, narg_rtx, b0);
1190 emit_call_insn (insn);
1191
1192 if (! sibcall_p)
1193 emit_move_insn (pic_offset_table_rtx, gp_save);
1194 }
1195 else if (TARGET_CONST_GP)
1196 {
1197 if (sibcall_p)
1198 insn = gen_sibcall_nopic (addr, narg_rtx, b0, pfs);
1199 else if (! retval)
1200 insn = gen_call_nopic (addr, narg_rtx, b0);
1201 else
1202 insn = gen_call_value_nopic (retval, addr, narg_rtx, b0);
1203 emit_call_insn (insn);
1204 }
1205 else
1206 {
1207 if (sibcall_p)
1208 emit_call_insn (gen_sibcall_pic (addr, narg_rtx, b0, pfs));
1209 else
1210 {
1211 emit_move_insn (gp_save, pic_offset_table_rtx);
1212
1213 if (! retval)
1214 insn = gen_call_pic (addr, narg_rtx, b0);
1215 else
1216 insn = gen_call_value_pic (retval, addr, narg_rtx, b0);
1217 emit_call_insn (insn);
1218
1219 emit_move_insn (pic_offset_table_rtx, gp_save);
1220 }
1221 }
1222 }
1223 \f
1224 /* Begin the assembly file. */
1225
1226 void
1227 emit_safe_across_calls (f)
1228 FILE *f;
1229 {
1230 unsigned int rs, re;
1231 int out_state;
1232
1233 rs = 1;
1234 out_state = 0;
1235 while (1)
1236 {
1237 while (rs < 64 && call_used_regs[PR_REG (rs)])
1238 rs++;
1239 if (rs >= 64)
1240 break;
1241 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1242 continue;
1243 if (out_state == 0)
1244 {
1245 fputs ("\t.pred.safe_across_calls ", f);
1246 out_state = 1;
1247 }
1248 else
1249 fputc (',', f);
1250 if (re == rs + 1)
1251 fprintf (f, "p%u", rs);
1252 else
1253 fprintf (f, "p%u-p%u", rs, re - 1);
1254 rs = re + 1;
1255 }
1256 if (out_state)
1257 fputc ('\n', f);
1258 }
1259
1260
1261 /* Structure to be filled in by ia64_compute_frame_size with register
1262 save masks and offsets for the current function. */
1263
1264 struct ia64_frame_info
1265 {
1266 HOST_WIDE_INT total_size; /* size of the stack frame, not including
1267 the caller's scratch area. */
1268 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
1269 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
1270 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
1271 HARD_REG_SET mask; /* mask of saved registers. */
1272 unsigned int gr_used_mask; /* mask of registers in use as gr spill
1273 registers or long-term scratches. */
1274 int n_spilled; /* number of spilled registers. */
1275 int reg_fp; /* register for fp. */
1276 int reg_save_b0; /* save register for b0. */
1277 int reg_save_pr; /* save register for prs. */
1278 int reg_save_ar_pfs; /* save register for ar.pfs. */
1279 int reg_save_ar_unat; /* save register for ar.unat. */
1280 int reg_save_ar_lc; /* save register for ar.lc. */
1281 int n_input_regs; /* number of input registers used. */
1282 int n_local_regs; /* number of local registers used. */
1283 int n_output_regs; /* number of output registers used. */
1284 int n_rotate_regs; /* number of rotating registers used. */
1285
1286 char need_regstk; /* true if a .regstk directive needed. */
1287 char initialized; /* true if the data is finalized. */
1288 };
1289
1290 /* Current frame information calculated by ia64_compute_frame_size. */
1291 static struct ia64_frame_info current_frame_info;
1292
1293 /* Helper function for ia64_compute_frame_size: find an appropriate general
1294 register to spill some special register to. SPECIAL_SPILL_MASK contains
1295 bits in GR0 to GR31 that have already been allocated by this routine.
1296 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1297
1298 static int
1299 find_gr_spill (try_locals)
1300 int try_locals;
1301 {
1302 int regno;
1303
1304 /* If this is a leaf function, first try an otherwise unused
1305 call-clobbered register. */
1306 if (current_function_is_leaf)
1307 {
1308 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1309 if (! regs_ever_live[regno]
1310 && call_used_regs[regno]
1311 && ! fixed_regs[regno]
1312 && ! global_regs[regno]
1313 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1314 {
1315 current_frame_info.gr_used_mask |= 1 << regno;
1316 return regno;
1317 }
1318 }
1319
1320 if (try_locals)
1321 {
1322 regno = current_frame_info.n_local_regs;
1323 /* If there is a frame pointer, then we can't use loc79, because
1324 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1325 reg_name switching code in ia64_expand_prologue. */
1326 if (regno < (80 - frame_pointer_needed))
1327 {
1328 current_frame_info.n_local_regs = regno + 1;
1329 return LOC_REG (0) + regno;
1330 }
1331 }
1332
1333 /* Failed to find a general register to spill to. Must use stack. */
1334 return 0;
1335 }
1336
1337 /* In order to make for nice schedules, we try to allocate every temporary
1338 to a different register. We must of course stay away from call-saved,
1339 fixed, and global registers. We must also stay away from registers
1340 allocated in current_frame_info.gr_used_mask, since those include regs
1341 used all through the prologue.
1342
1343 Any register allocated here must be used immediately. The idea is to
1344 aid scheduling, not to solve data flow problems. */
1345
1346 static int last_scratch_gr_reg;
1347
1348 static int
1349 next_scratch_gr_reg ()
1350 {
1351 int i, regno;
1352
1353 for (i = 0; i < 32; ++i)
1354 {
1355 regno = (last_scratch_gr_reg + i + 1) & 31;
1356 if (call_used_regs[regno]
1357 && ! fixed_regs[regno]
1358 && ! global_regs[regno]
1359 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1360 {
1361 last_scratch_gr_reg = regno;
1362 return regno;
1363 }
1364 }
1365
1366 /* There must be _something_ available. */
1367 abort ();
1368 }
1369
1370 /* Helper function for ia64_compute_frame_size, called through
1371 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1372
1373 static void
1374 mark_reg_gr_used_mask (reg, data)
1375 rtx reg;
1376 void *data ATTRIBUTE_UNUSED;
1377 {
1378 unsigned int regno = REGNO (reg);
1379 if (regno < 32)
1380 {
1381 unsigned int i, n = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1382 for (i = 0; i < n; ++i)
1383 current_frame_info.gr_used_mask |= 1 << (regno + i);
1384 }
1385 }
1386
1387 /* Returns the number of bytes offset between the frame pointer and the stack
1388 pointer for the current function. SIZE is the number of bytes of space
1389 needed for local variables. */
1390
1391 static void
1392 ia64_compute_frame_size (size)
1393 HOST_WIDE_INT size;
1394 {
1395 HOST_WIDE_INT total_size;
1396 HOST_WIDE_INT spill_size = 0;
1397 HOST_WIDE_INT extra_spill_size = 0;
1398 HOST_WIDE_INT pretend_args_size;
1399 HARD_REG_SET mask;
1400 int n_spilled = 0;
1401 int spilled_gr_p = 0;
1402 int spilled_fr_p = 0;
1403 unsigned int regno;
1404 int i;
1405
1406 if (current_frame_info.initialized)
1407 return;
1408
1409 memset (&current_frame_info, 0, sizeof current_frame_info);
1410 CLEAR_HARD_REG_SET (mask);
1411
1412 /* Don't allocate scratches to the return register. */
1413 diddle_return_value (mark_reg_gr_used_mask, NULL);
1414
1415 /* Don't allocate scratches to the EH scratch registers. */
1416 if (cfun->machine->ia64_eh_epilogue_sp)
1417 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
1418 if (cfun->machine->ia64_eh_epilogue_bsp)
1419 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
1420
1421 /* Find the size of the register stack frame. We have only 80 local
1422 registers, because we reserve 8 for the inputs and 8 for the
1423 outputs. */
1424
1425 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1426 since we'll be adjusting that down later. */
1427 regno = LOC_REG (78) + ! frame_pointer_needed;
1428 for (; regno >= LOC_REG (0); regno--)
1429 if (regs_ever_live[regno])
1430 break;
1431 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
1432
1433 /* For functions marked with the syscall_linkage attribute, we must mark
1434 all eight input registers as in use, so that locals aren't visible to
1435 the caller. */
1436
1437 if (cfun->machine->n_varargs > 0
1438 || lookup_attribute ("syscall_linkage",
1439 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
1440 current_frame_info.n_input_regs = 8;
1441 else
1442 {
1443 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
1444 if (regs_ever_live[regno])
1445 break;
1446 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
1447 }
1448
1449 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
1450 if (regs_ever_live[regno])
1451 break;
1452 i = regno - OUT_REG (0) + 1;
1453
1454 /* When -p profiling, we need one output register for the mcount argument.
1455 Likwise for -a profiling for the bb_init_func argument. For -ax
1456 profiling, we need two output registers for the two bb_init_trace_func
1457 arguments. */
1458 if (profile_flag)
1459 i = MAX (i, 1);
1460 current_frame_info.n_output_regs = i;
1461
1462 /* ??? No rotating register support yet. */
1463 current_frame_info.n_rotate_regs = 0;
1464
1465 /* Discover which registers need spilling, and how much room that
1466 will take. Begin with floating point and general registers,
1467 which will always wind up on the stack. */
1468
1469 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
1470 if (regs_ever_live[regno] && ! call_used_regs[regno])
1471 {
1472 SET_HARD_REG_BIT (mask, regno);
1473 spill_size += 16;
1474 n_spilled += 1;
1475 spilled_fr_p = 1;
1476 }
1477
1478 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1479 if (regs_ever_live[regno] && ! call_used_regs[regno])
1480 {
1481 SET_HARD_REG_BIT (mask, regno);
1482 spill_size += 8;
1483 n_spilled += 1;
1484 spilled_gr_p = 1;
1485 }
1486
1487 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
1488 if (regs_ever_live[regno] && ! call_used_regs[regno])
1489 {
1490 SET_HARD_REG_BIT (mask, regno);
1491 spill_size += 8;
1492 n_spilled += 1;
1493 }
1494
1495 /* Now come all special registers that might get saved in other
1496 general registers. */
1497
1498 if (frame_pointer_needed)
1499 {
1500 current_frame_info.reg_fp = find_gr_spill (1);
1501 /* If we did not get a register, then we take LOC79. This is guaranteed
1502 to be free, even if regs_ever_live is already set, because this is
1503 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
1504 as we don't count loc79 above. */
1505 if (current_frame_info.reg_fp == 0)
1506 {
1507 current_frame_info.reg_fp = LOC_REG (79);
1508 current_frame_info.n_local_regs++;
1509 }
1510 }
1511
1512 if (! current_function_is_leaf)
1513 {
1514 /* Emit a save of BR0 if we call other functions. Do this even
1515 if this function doesn't return, as EH depends on this to be
1516 able to unwind the stack. */
1517 SET_HARD_REG_BIT (mask, BR_REG (0));
1518
1519 current_frame_info.reg_save_b0 = find_gr_spill (1);
1520 if (current_frame_info.reg_save_b0 == 0)
1521 {
1522 spill_size += 8;
1523 n_spilled += 1;
1524 }
1525
1526 /* Similarly for ar.pfs. */
1527 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1528 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1529 if (current_frame_info.reg_save_ar_pfs == 0)
1530 {
1531 extra_spill_size += 8;
1532 n_spilled += 1;
1533 }
1534 }
1535 else
1536 {
1537 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
1538 {
1539 SET_HARD_REG_BIT (mask, BR_REG (0));
1540 spill_size += 8;
1541 n_spilled += 1;
1542 }
1543 }
1544
1545 /* Unwind descriptor hackery: things are most efficient if we allocate
1546 consecutive GR save registers for RP, PFS, FP in that order. However,
1547 it is absolutely critical that FP get the only hard register that's
1548 guaranteed to be free, so we allocated it first. If all three did
1549 happen to be allocated hard regs, and are consecutive, rearrange them
1550 into the preferred order now. */
1551 if (current_frame_info.reg_fp != 0
1552 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
1553 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
1554 {
1555 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
1556 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
1557 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
1558 }
1559
1560 /* See if we need to store the predicate register block. */
1561 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1562 if (regs_ever_live[regno] && ! call_used_regs[regno])
1563 break;
1564 if (regno <= PR_REG (63))
1565 {
1566 SET_HARD_REG_BIT (mask, PR_REG (0));
1567 current_frame_info.reg_save_pr = find_gr_spill (1);
1568 if (current_frame_info.reg_save_pr == 0)
1569 {
1570 extra_spill_size += 8;
1571 n_spilled += 1;
1572 }
1573
1574 /* ??? Mark them all as used so that register renaming and such
1575 are free to use them. */
1576 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1577 regs_ever_live[regno] = 1;
1578 }
1579
1580 /* If we're forced to use st8.spill, we're forced to save and restore
1581 ar.unat as well. */
1582 if (spilled_gr_p || cfun->machine->n_varargs)
1583 {
1584 regs_ever_live[AR_UNAT_REGNUM] = 1;
1585 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
1586 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
1587 if (current_frame_info.reg_save_ar_unat == 0)
1588 {
1589 extra_spill_size += 8;
1590 n_spilled += 1;
1591 }
1592 }
1593
1594 if (regs_ever_live[AR_LC_REGNUM])
1595 {
1596 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
1597 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
1598 if (current_frame_info.reg_save_ar_lc == 0)
1599 {
1600 extra_spill_size += 8;
1601 n_spilled += 1;
1602 }
1603 }
1604
1605 /* If we have an odd number of words of pretend arguments written to
1606 the stack, then the FR save area will be unaligned. We round the
1607 size of this area up to keep things 16 byte aligned. */
1608 if (spilled_fr_p)
1609 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
1610 else
1611 pretend_args_size = current_function_pretend_args_size;
1612
1613 total_size = (spill_size + extra_spill_size + size + pretend_args_size
1614 + current_function_outgoing_args_size);
1615 total_size = IA64_STACK_ALIGN (total_size);
1616
1617 /* We always use the 16-byte scratch area provided by the caller, but
1618 if we are a leaf function, there's no one to which we need to provide
1619 a scratch area. */
1620 if (current_function_is_leaf)
1621 total_size = MAX (0, total_size - 16);
1622
1623 current_frame_info.total_size = total_size;
1624 current_frame_info.spill_cfa_off = pretend_args_size - 16;
1625 current_frame_info.spill_size = spill_size;
1626 current_frame_info.extra_spill_size = extra_spill_size;
1627 COPY_HARD_REG_SET (current_frame_info.mask, mask);
1628 current_frame_info.n_spilled = n_spilled;
1629 current_frame_info.initialized = reload_completed;
1630 }
1631
1632 /* Compute the initial difference between the specified pair of registers. */
1633
1634 HOST_WIDE_INT
1635 ia64_initial_elimination_offset (from, to)
1636 int from, to;
1637 {
1638 HOST_WIDE_INT offset;
1639
1640 ia64_compute_frame_size (get_frame_size ());
1641 switch (from)
1642 {
1643 case FRAME_POINTER_REGNUM:
1644 if (to == HARD_FRAME_POINTER_REGNUM)
1645 {
1646 if (current_function_is_leaf)
1647 offset = -current_frame_info.total_size;
1648 else
1649 offset = -(current_frame_info.total_size
1650 - current_function_outgoing_args_size - 16);
1651 }
1652 else if (to == STACK_POINTER_REGNUM)
1653 {
1654 if (current_function_is_leaf)
1655 offset = 0;
1656 else
1657 offset = 16 + current_function_outgoing_args_size;
1658 }
1659 else
1660 abort ();
1661 break;
1662
1663 case ARG_POINTER_REGNUM:
1664 /* Arguments start above the 16 byte save area, unless stdarg
1665 in which case we store through the 16 byte save area. */
1666 if (to == HARD_FRAME_POINTER_REGNUM)
1667 offset = 16 - current_function_pretend_args_size;
1668 else if (to == STACK_POINTER_REGNUM)
1669 offset = (current_frame_info.total_size
1670 + 16 - current_function_pretend_args_size);
1671 else
1672 abort ();
1673 break;
1674
1675 case RETURN_ADDRESS_POINTER_REGNUM:
1676 offset = 0;
1677 break;
1678
1679 default:
1680 abort ();
1681 }
1682
1683 return offset;
1684 }
1685
1686 /* If there are more than a trivial number of register spills, we use
1687 two interleaved iterators so that we can get two memory references
1688 per insn group.
1689
1690 In order to simplify things in the prologue and epilogue expanders,
1691 we use helper functions to fix up the memory references after the
1692 fact with the appropriate offsets to a POST_MODIFY memory mode.
1693 The following data structure tracks the state of the two iterators
1694 while insns are being emitted. */
1695
1696 struct spill_fill_data
1697 {
1698 rtx init_after; /* point at which to emit initializations */
1699 rtx init_reg[2]; /* initial base register */
1700 rtx iter_reg[2]; /* the iterator registers */
1701 rtx *prev_addr[2]; /* address of last memory use */
1702 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
1703 HOST_WIDE_INT prev_off[2]; /* last offset */
1704 int n_iter; /* number of iterators in use */
1705 int next_iter; /* next iterator to use */
1706 unsigned int save_gr_used_mask;
1707 };
1708
1709 static struct spill_fill_data spill_fill_data;
1710
1711 static void
1712 setup_spill_pointers (n_spills, init_reg, cfa_off)
1713 int n_spills;
1714 rtx init_reg;
1715 HOST_WIDE_INT cfa_off;
1716 {
1717 int i;
1718
1719 spill_fill_data.init_after = get_last_insn ();
1720 spill_fill_data.init_reg[0] = init_reg;
1721 spill_fill_data.init_reg[1] = init_reg;
1722 spill_fill_data.prev_addr[0] = NULL;
1723 spill_fill_data.prev_addr[1] = NULL;
1724 spill_fill_data.prev_insn[0] = NULL;
1725 spill_fill_data.prev_insn[1] = NULL;
1726 spill_fill_data.prev_off[0] = cfa_off;
1727 spill_fill_data.prev_off[1] = cfa_off;
1728 spill_fill_data.next_iter = 0;
1729 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
1730
1731 spill_fill_data.n_iter = 1 + (n_spills > 2);
1732 for (i = 0; i < spill_fill_data.n_iter; ++i)
1733 {
1734 int regno = next_scratch_gr_reg ();
1735 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
1736 current_frame_info.gr_used_mask |= 1 << regno;
1737 }
1738 }
1739
1740 static void
1741 finish_spill_pointers ()
1742 {
1743 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
1744 }
1745
1746 static rtx
1747 spill_restore_mem (reg, cfa_off)
1748 rtx reg;
1749 HOST_WIDE_INT cfa_off;
1750 {
1751 int iter = spill_fill_data.next_iter;
1752 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
1753 rtx disp_rtx = GEN_INT (disp);
1754 rtx mem;
1755
1756 if (spill_fill_data.prev_addr[iter])
1757 {
1758 if (CONST_OK_FOR_N (disp))
1759 {
1760 *spill_fill_data.prev_addr[iter]
1761 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
1762 gen_rtx_PLUS (DImode,
1763 spill_fill_data.iter_reg[iter],
1764 disp_rtx));
1765 REG_NOTES (spill_fill_data.prev_insn[iter])
1766 = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
1767 REG_NOTES (spill_fill_data.prev_insn[iter]));
1768 }
1769 else
1770 {
1771 /* ??? Could use register post_modify for loads. */
1772 if (! CONST_OK_FOR_I (disp))
1773 {
1774 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1775 emit_move_insn (tmp, disp_rtx);
1776 disp_rtx = tmp;
1777 }
1778 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1779 spill_fill_data.iter_reg[iter], disp_rtx));
1780 }
1781 }
1782 /* Micro-optimization: if we've created a frame pointer, it's at
1783 CFA 0, which may allow the real iterator to be initialized lower,
1784 slightly increasing parallelism. Also, if there are few saves
1785 it may eliminate the iterator entirely. */
1786 else if (disp == 0
1787 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
1788 && frame_pointer_needed)
1789 {
1790 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
1791 set_mem_alias_set (mem, get_varargs_alias_set ());
1792 return mem;
1793 }
1794 else
1795 {
1796 rtx seq, insn;
1797
1798 if (disp == 0)
1799 seq = gen_movdi (spill_fill_data.iter_reg[iter],
1800 spill_fill_data.init_reg[iter]);
1801 else
1802 {
1803 start_sequence ();
1804
1805 if (! CONST_OK_FOR_I (disp))
1806 {
1807 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1808 emit_move_insn (tmp, disp_rtx);
1809 disp_rtx = tmp;
1810 }
1811
1812 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1813 spill_fill_data.init_reg[iter],
1814 disp_rtx));
1815
1816 seq = gen_sequence ();
1817 end_sequence ();
1818 }
1819
1820 /* Careful for being the first insn in a sequence. */
1821 if (spill_fill_data.init_after)
1822 insn = emit_insn_after (seq, spill_fill_data.init_after);
1823 else
1824 {
1825 rtx first = get_insns ();
1826 if (first)
1827 insn = emit_insn_before (seq, first);
1828 else
1829 insn = emit_insn (seq);
1830 }
1831 spill_fill_data.init_after = insn;
1832
1833 /* If DISP is 0, we may or may not have a further adjustment
1834 afterward. If we do, then the load/store insn may be modified
1835 to be a post-modify. If we don't, then this copy may be
1836 eliminated by copyprop_hardreg_forward, which makes this
1837 insn garbage, which runs afoul of the sanity check in
1838 propagate_one_insn. So mark this insn as legal to delete. */
1839 if (disp == 0)
1840 REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
1841 REG_NOTES (insn));
1842 }
1843
1844 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
1845
1846 /* ??? Not all of the spills are for varargs, but some of them are.
1847 The rest of the spills belong in an alias set of their own. But
1848 it doesn't actually hurt to include them here. */
1849 set_mem_alias_set (mem, get_varargs_alias_set ());
1850
1851 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
1852 spill_fill_data.prev_off[iter] = cfa_off;
1853
1854 if (++iter >= spill_fill_data.n_iter)
1855 iter = 0;
1856 spill_fill_data.next_iter = iter;
1857
1858 return mem;
1859 }
1860
1861 static void
1862 do_spill (move_fn, reg, cfa_off, frame_reg)
1863 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
1864 rtx reg, frame_reg;
1865 HOST_WIDE_INT cfa_off;
1866 {
1867 int iter = spill_fill_data.next_iter;
1868 rtx mem, insn;
1869
1870 mem = spill_restore_mem (reg, cfa_off);
1871 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
1872 spill_fill_data.prev_insn[iter] = insn;
1873
1874 if (frame_reg)
1875 {
1876 rtx base;
1877 HOST_WIDE_INT off;
1878
1879 RTX_FRAME_RELATED_P (insn) = 1;
1880
1881 /* Don't even pretend that the unwind code can intuit its way
1882 through a pair of interleaved post_modify iterators. Just
1883 provide the correct answer. */
1884
1885 if (frame_pointer_needed)
1886 {
1887 base = hard_frame_pointer_rtx;
1888 off = - cfa_off;
1889 }
1890 else
1891 {
1892 base = stack_pointer_rtx;
1893 off = current_frame_info.total_size - cfa_off;
1894 }
1895
1896 REG_NOTES (insn)
1897 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1898 gen_rtx_SET (VOIDmode,
1899 gen_rtx_MEM (GET_MODE (reg),
1900 plus_constant (base, off)),
1901 frame_reg),
1902 REG_NOTES (insn));
1903 }
1904 }
1905
1906 static void
1907 do_restore (move_fn, reg, cfa_off)
1908 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
1909 rtx reg;
1910 HOST_WIDE_INT cfa_off;
1911 {
1912 int iter = spill_fill_data.next_iter;
1913 rtx insn;
1914
1915 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
1916 GEN_INT (cfa_off)));
1917 spill_fill_data.prev_insn[iter] = insn;
1918 }
1919
1920 /* Wrapper functions that discards the CONST_INT spill offset. These
1921 exist so that we can give gr_spill/gr_fill the offset they need and
1922 use a consistant function interface. */
1923
1924 static rtx
1925 gen_movdi_x (dest, src, offset)
1926 rtx dest, src;
1927 rtx offset ATTRIBUTE_UNUSED;
1928 {
1929 return gen_movdi (dest, src);
1930 }
1931
1932 static rtx
1933 gen_fr_spill_x (dest, src, offset)
1934 rtx dest, src;
1935 rtx offset ATTRIBUTE_UNUSED;
1936 {
1937 return gen_fr_spill (dest, src);
1938 }
1939
1940 static rtx
1941 gen_fr_restore_x (dest, src, offset)
1942 rtx dest, src;
1943 rtx offset ATTRIBUTE_UNUSED;
1944 {
1945 return gen_fr_restore (dest, src);
1946 }
1947
1948 /* Called after register allocation to add any instructions needed for the
1949 prologue. Using a prologue insn is favored compared to putting all of the
1950 instructions in output_function_prologue(), since it allows the scheduler
1951 to intermix instructions with the saves of the caller saved registers. In
1952 some cases, it might be necessary to emit a barrier instruction as the last
1953 insn to prevent such scheduling.
1954
1955 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
1956 so that the debug info generation code can handle them properly.
1957
1958 The register save area is layed out like so:
1959 cfa+16
1960 [ varargs spill area ]
1961 [ fr register spill area ]
1962 [ br register spill area ]
1963 [ ar register spill area ]
1964 [ pr register spill area ]
1965 [ gr register spill area ] */
1966
1967 /* ??? Get inefficient code when the frame size is larger than can fit in an
1968 adds instruction. */
1969
1970 void
1971 ia64_expand_prologue ()
1972 {
1973 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
1974 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
1975 rtx reg, alt_reg;
1976
1977 ia64_compute_frame_size (get_frame_size ());
1978 last_scratch_gr_reg = 15;
1979
1980 /* If there is no epilogue, then we don't need some prologue insns.
1981 We need to avoid emitting the dead prologue insns, because flow
1982 will complain about them. */
1983 if (optimize)
1984 {
1985 edge e;
1986
1987 for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
1988 if ((e->flags & EDGE_FAKE) == 0
1989 && (e->flags & EDGE_FALLTHRU) != 0)
1990 break;
1991 epilogue_p = (e != NULL);
1992 }
1993 else
1994 epilogue_p = 1;
1995
1996 /* Set the local, input, and output register names. We need to do this
1997 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
1998 half. If we use in/loc/out register names, then we get assembler errors
1999 in crtn.S because there is no alloc insn or regstk directive in there. */
2000 if (! TARGET_REG_NAMES)
2001 {
2002 int inputs = current_frame_info.n_input_regs;
2003 int locals = current_frame_info.n_local_regs;
2004 int outputs = current_frame_info.n_output_regs;
2005
2006 for (i = 0; i < inputs; i++)
2007 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2008 for (i = 0; i < locals; i++)
2009 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2010 for (i = 0; i < outputs; i++)
2011 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2012 }
2013
2014 /* Set the frame pointer register name. The regnum is logically loc79,
2015 but of course we'll not have allocated that many locals. Rather than
2016 worrying about renumbering the existing rtxs, we adjust the name. */
2017 /* ??? This code means that we can never use one local register when
2018 there is a frame pointer. loc79 gets wasted in this case, as it is
2019 renamed to a register that will never be used. See also the try_locals
2020 code in find_gr_spill. */
2021 if (current_frame_info.reg_fp)
2022 {
2023 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2024 reg_names[HARD_FRAME_POINTER_REGNUM]
2025 = reg_names[current_frame_info.reg_fp];
2026 reg_names[current_frame_info.reg_fp] = tmp;
2027 }
2028
2029 /* Fix up the return address placeholder. */
2030 /* ??? We can fail if __builtin_return_address is used, and we didn't
2031 allocate a register in which to save b0. I can't think of a way to
2032 eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and
2033 then be sure that I got the right one. Further, reload doesn't seem
2034 to care if an eliminable register isn't used, and "eliminates" it
2035 anyway. */
2036 if (regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM]
2037 && current_frame_info.reg_save_b0 != 0)
2038 XINT (return_address_pointer_rtx, 0) = current_frame_info.reg_save_b0;
2039
2040 /* We don't need an alloc instruction if we've used no outputs or locals. */
2041 if (current_frame_info.n_local_regs == 0
2042 && current_frame_info.n_output_regs == 0
2043 && current_frame_info.n_input_regs <= current_function_args_info.words)
2044 {
2045 /* If there is no alloc, but there are input registers used, then we
2046 need a .regstk directive. */
2047 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2048 ar_pfs_save_reg = NULL_RTX;
2049 }
2050 else
2051 {
2052 current_frame_info.need_regstk = 0;
2053
2054 if (current_frame_info.reg_save_ar_pfs)
2055 regno = current_frame_info.reg_save_ar_pfs;
2056 else
2057 regno = next_scratch_gr_reg ();
2058 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2059
2060 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2061 GEN_INT (current_frame_info.n_input_regs),
2062 GEN_INT (current_frame_info.n_local_regs),
2063 GEN_INT (current_frame_info.n_output_regs),
2064 GEN_INT (current_frame_info.n_rotate_regs)));
2065 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2066 }
2067
2068 /* Set up frame pointer, stack pointer, and spill iterators. */
2069
2070 n_varargs = cfun->machine->n_varargs;
2071 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
2072 stack_pointer_rtx, 0);
2073
2074 if (frame_pointer_needed)
2075 {
2076 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2077 RTX_FRAME_RELATED_P (insn) = 1;
2078 }
2079
2080 if (current_frame_info.total_size != 0)
2081 {
2082 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2083 rtx offset;
2084
2085 if (CONST_OK_FOR_I (- current_frame_info.total_size))
2086 offset = frame_size_rtx;
2087 else
2088 {
2089 regno = next_scratch_gr_reg ();
2090 offset = gen_rtx_REG (DImode, regno);
2091 emit_move_insn (offset, frame_size_rtx);
2092 }
2093
2094 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
2095 stack_pointer_rtx, offset));
2096
2097 if (! frame_pointer_needed)
2098 {
2099 RTX_FRAME_RELATED_P (insn) = 1;
2100 if (GET_CODE (offset) != CONST_INT)
2101 {
2102 REG_NOTES (insn)
2103 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2104 gen_rtx_SET (VOIDmode,
2105 stack_pointer_rtx,
2106 gen_rtx_PLUS (DImode,
2107 stack_pointer_rtx,
2108 frame_size_rtx)),
2109 REG_NOTES (insn));
2110 }
2111 }
2112
2113 /* ??? At this point we must generate a magic insn that appears to
2114 modify the stack pointer, the frame pointer, and all spill
2115 iterators. This would allow the most scheduling freedom. For
2116 now, just hard stop. */
2117 emit_insn (gen_blockage ());
2118 }
2119
2120 /* Must copy out ar.unat before doing any integer spills. */
2121 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2122 {
2123 if (current_frame_info.reg_save_ar_unat)
2124 ar_unat_save_reg
2125 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2126 else
2127 {
2128 alt_regno = next_scratch_gr_reg ();
2129 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2130 current_frame_info.gr_used_mask |= 1 << alt_regno;
2131 }
2132
2133 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2134 insn = emit_move_insn (ar_unat_save_reg, reg);
2135 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2136
2137 /* Even if we're not going to generate an epilogue, we still
2138 need to save the register so that EH works. */
2139 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2140 emit_insn (gen_rtx_USE (VOIDmode, ar_unat_save_reg));
2141 }
2142 else
2143 ar_unat_save_reg = NULL_RTX;
2144
2145 /* Spill all varargs registers. Do this before spilling any GR registers,
2146 since we want the UNAT bits for the GR registers to override the UNAT
2147 bits from varargs, which we don't care about. */
2148
2149 cfa_off = -16;
2150 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
2151 {
2152 reg = gen_rtx_REG (DImode, regno);
2153 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
2154 }
2155
2156 /* Locate the bottom of the register save area. */
2157 cfa_off = (current_frame_info.spill_cfa_off
2158 + current_frame_info.spill_size
2159 + current_frame_info.extra_spill_size);
2160
2161 /* Save the predicate register block either in a register or in memory. */
2162 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2163 {
2164 reg = gen_rtx_REG (DImode, PR_REG (0));
2165 if (current_frame_info.reg_save_pr != 0)
2166 {
2167 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2168 insn = emit_move_insn (alt_reg, reg);
2169
2170 /* ??? Denote pr spill/fill by a DImode move that modifies all
2171 64 hard registers. */
2172 RTX_FRAME_RELATED_P (insn) = 1;
2173 REG_NOTES (insn)
2174 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2175 gen_rtx_SET (VOIDmode, alt_reg, reg),
2176 REG_NOTES (insn));
2177
2178 /* Even if we're not going to generate an epilogue, we still
2179 need to save the register so that EH works. */
2180 if (! epilogue_p)
2181 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
2182 }
2183 else
2184 {
2185 alt_regno = next_scratch_gr_reg ();
2186 alt_reg = gen_rtx_REG (DImode, alt_regno);
2187 insn = emit_move_insn (alt_reg, reg);
2188 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2189 cfa_off -= 8;
2190 }
2191 }
2192
2193 /* Handle AR regs in numerical order. All of them get special handling. */
2194 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2195 && current_frame_info.reg_save_ar_unat == 0)
2196 {
2197 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2198 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
2199 cfa_off -= 8;
2200 }
2201
2202 /* The alloc insn already copied ar.pfs into a general register. The
2203 only thing we have to do now is copy that register to a stack slot
2204 if we'd not allocated a local register for the job. */
2205 if (current_frame_info.reg_save_ar_pfs == 0
2206 && ! current_function_is_leaf)
2207 {
2208 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2209 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
2210 cfa_off -= 8;
2211 }
2212
2213 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2214 {
2215 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2216 if (current_frame_info.reg_save_ar_lc != 0)
2217 {
2218 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2219 insn = emit_move_insn (alt_reg, reg);
2220 RTX_FRAME_RELATED_P (insn) = 1;
2221
2222 /* Even if we're not going to generate an epilogue, we still
2223 need to save the register so that EH works. */
2224 if (! epilogue_p)
2225 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
2226 }
2227 else
2228 {
2229 alt_regno = next_scratch_gr_reg ();
2230 alt_reg = gen_rtx_REG (DImode, alt_regno);
2231 emit_move_insn (alt_reg, reg);
2232 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2233 cfa_off -= 8;
2234 }
2235 }
2236
2237 /* We should now be at the base of the gr/br/fr spill area. */
2238 if (cfa_off != (current_frame_info.spill_cfa_off
2239 + current_frame_info.spill_size))
2240 abort ();
2241
2242 /* Spill all general registers. */
2243 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2244 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2245 {
2246 reg = gen_rtx_REG (DImode, regno);
2247 do_spill (gen_gr_spill, reg, cfa_off, reg);
2248 cfa_off -= 8;
2249 }
2250
2251 /* Handle BR0 specially -- it may be getting stored permanently in
2252 some GR register. */
2253 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2254 {
2255 reg = gen_rtx_REG (DImode, BR_REG (0));
2256 if (current_frame_info.reg_save_b0 != 0)
2257 {
2258 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2259 insn = emit_move_insn (alt_reg, reg);
2260 RTX_FRAME_RELATED_P (insn) = 1;
2261
2262 /* Even if we're not going to generate an epilogue, we still
2263 need to save the register so that EH works. */
2264 if (! epilogue_p)
2265 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
2266 }
2267 else
2268 {
2269 alt_regno = next_scratch_gr_reg ();
2270 alt_reg = gen_rtx_REG (DImode, alt_regno);
2271 emit_move_insn (alt_reg, reg);
2272 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2273 cfa_off -= 8;
2274 }
2275 }
2276
2277 /* Spill the rest of the BR registers. */
2278 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2279 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2280 {
2281 alt_regno = next_scratch_gr_reg ();
2282 alt_reg = gen_rtx_REG (DImode, alt_regno);
2283 reg = gen_rtx_REG (DImode, regno);
2284 emit_move_insn (alt_reg, reg);
2285 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2286 cfa_off -= 8;
2287 }
2288
2289 /* Align the frame and spill all FR registers. */
2290 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2291 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2292 {
2293 if (cfa_off & 15)
2294 abort ();
2295 reg = gen_rtx_REG (TFmode, regno);
2296 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
2297 cfa_off -= 16;
2298 }
2299
2300 if (cfa_off != current_frame_info.spill_cfa_off)
2301 abort ();
2302
2303 finish_spill_pointers ();
2304 }
2305
2306 /* Called after register allocation to add any instructions needed for the
2307 epilogue. Using an epilogue insn is favored compared to putting all of the
2308 instructions in output_function_prologue(), since it allows the scheduler
2309 to intermix instructions with the saves of the caller saved registers. In
2310 some cases, it might be necessary to emit a barrier instruction as the last
2311 insn to prevent such scheduling. */
2312
2313 void
2314 ia64_expand_epilogue (sibcall_p)
2315 int sibcall_p;
2316 {
2317 rtx insn, reg, alt_reg, ar_unat_save_reg;
2318 int regno, alt_regno, cfa_off;
2319
2320 ia64_compute_frame_size (get_frame_size ());
2321
2322 /* If there is a frame pointer, then we use it instead of the stack
2323 pointer, so that the stack pointer does not need to be valid when
2324 the epilogue starts. See EXIT_IGNORE_STACK. */
2325 if (frame_pointer_needed)
2326 setup_spill_pointers (current_frame_info.n_spilled,
2327 hard_frame_pointer_rtx, 0);
2328 else
2329 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2330 current_frame_info.total_size);
2331
2332 if (current_frame_info.total_size != 0)
2333 {
2334 /* ??? At this point we must generate a magic insn that appears to
2335 modify the spill iterators and the frame pointer. This would
2336 allow the most scheduling freedom. For now, just hard stop. */
2337 emit_insn (gen_blockage ());
2338 }
2339
2340 /* Locate the bottom of the register save area. */
2341 cfa_off = (current_frame_info.spill_cfa_off
2342 + current_frame_info.spill_size
2343 + current_frame_info.extra_spill_size);
2344
2345 /* Restore the predicate registers. */
2346 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2347 {
2348 if (current_frame_info.reg_save_pr != 0)
2349 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2350 else
2351 {
2352 alt_regno = next_scratch_gr_reg ();
2353 alt_reg = gen_rtx_REG (DImode, alt_regno);
2354 do_restore (gen_movdi_x, alt_reg, cfa_off);
2355 cfa_off -= 8;
2356 }
2357 reg = gen_rtx_REG (DImode, PR_REG (0));
2358 emit_move_insn (reg, alt_reg);
2359 }
2360
2361 /* Restore the application registers. */
2362
2363 /* Load the saved unat from the stack, but do not restore it until
2364 after the GRs have been restored. */
2365 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2366 {
2367 if (current_frame_info.reg_save_ar_unat != 0)
2368 ar_unat_save_reg
2369 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2370 else
2371 {
2372 alt_regno = next_scratch_gr_reg ();
2373 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2374 current_frame_info.gr_used_mask |= 1 << alt_regno;
2375 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
2376 cfa_off -= 8;
2377 }
2378 }
2379 else
2380 ar_unat_save_reg = NULL_RTX;
2381
2382 if (current_frame_info.reg_save_ar_pfs != 0)
2383 {
2384 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
2385 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2386 emit_move_insn (reg, alt_reg);
2387 }
2388 else if (! current_function_is_leaf)
2389 {
2390 alt_regno = next_scratch_gr_reg ();
2391 alt_reg = gen_rtx_REG (DImode, alt_regno);
2392 do_restore (gen_movdi_x, alt_reg, cfa_off);
2393 cfa_off -= 8;
2394 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2395 emit_move_insn (reg, alt_reg);
2396 }
2397
2398 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2399 {
2400 if (current_frame_info.reg_save_ar_lc != 0)
2401 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2402 else
2403 {
2404 alt_regno = next_scratch_gr_reg ();
2405 alt_reg = gen_rtx_REG (DImode, alt_regno);
2406 do_restore (gen_movdi_x, alt_reg, cfa_off);
2407 cfa_off -= 8;
2408 }
2409 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2410 emit_move_insn (reg, alt_reg);
2411 }
2412
2413 /* We should now be at the base of the gr/br/fr spill area. */
2414 if (cfa_off != (current_frame_info.spill_cfa_off
2415 + current_frame_info.spill_size))
2416 abort ();
2417
2418 /* Restore all general registers. */
2419 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2420 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2421 {
2422 reg = gen_rtx_REG (DImode, regno);
2423 do_restore (gen_gr_restore, reg, cfa_off);
2424 cfa_off -= 8;
2425 }
2426
2427 /* Restore the branch registers. Handle B0 specially, as it may
2428 have gotten stored in some GR register. */
2429 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2430 {
2431 if (current_frame_info.reg_save_b0 != 0)
2432 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2433 else
2434 {
2435 alt_regno = next_scratch_gr_reg ();
2436 alt_reg = gen_rtx_REG (DImode, alt_regno);
2437 do_restore (gen_movdi_x, alt_reg, cfa_off);
2438 cfa_off -= 8;
2439 }
2440 reg = gen_rtx_REG (DImode, BR_REG (0));
2441 emit_move_insn (reg, alt_reg);
2442 }
2443
2444 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2445 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2446 {
2447 alt_regno = next_scratch_gr_reg ();
2448 alt_reg = gen_rtx_REG (DImode, alt_regno);
2449 do_restore (gen_movdi_x, alt_reg, cfa_off);
2450 cfa_off -= 8;
2451 reg = gen_rtx_REG (DImode, regno);
2452 emit_move_insn (reg, alt_reg);
2453 }
2454
2455 /* Restore floating point registers. */
2456 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2457 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2458 {
2459 if (cfa_off & 15)
2460 abort ();
2461 reg = gen_rtx_REG (TFmode, regno);
2462 do_restore (gen_fr_restore_x, reg, cfa_off);
2463 cfa_off -= 16;
2464 }
2465
2466 /* Restore ar.unat for real. */
2467 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2468 {
2469 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2470 emit_move_insn (reg, ar_unat_save_reg);
2471 }
2472
2473 if (cfa_off != current_frame_info.spill_cfa_off)
2474 abort ();
2475
2476 finish_spill_pointers ();
2477
2478 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
2479 {
2480 /* ??? At this point we must generate a magic insn that appears to
2481 modify the spill iterators, the stack pointer, and the frame
2482 pointer. This would allow the most scheduling freedom. For now,
2483 just hard stop. */
2484 emit_insn (gen_blockage ());
2485 }
2486
2487 if (cfun->machine->ia64_eh_epilogue_sp)
2488 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
2489 else if (frame_pointer_needed)
2490 {
2491 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
2492 RTX_FRAME_RELATED_P (insn) = 1;
2493 }
2494 else if (current_frame_info.total_size)
2495 {
2496 rtx offset, frame_size_rtx;
2497
2498 frame_size_rtx = GEN_INT (current_frame_info.total_size);
2499 if (CONST_OK_FOR_I (current_frame_info.total_size))
2500 offset = frame_size_rtx;
2501 else
2502 {
2503 regno = next_scratch_gr_reg ();
2504 offset = gen_rtx_REG (DImode, regno);
2505 emit_move_insn (offset, frame_size_rtx);
2506 }
2507
2508 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
2509 offset));
2510
2511 RTX_FRAME_RELATED_P (insn) = 1;
2512 if (GET_CODE (offset) != CONST_INT)
2513 {
2514 REG_NOTES (insn)
2515 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2516 gen_rtx_SET (VOIDmode,
2517 stack_pointer_rtx,
2518 gen_rtx_PLUS (DImode,
2519 stack_pointer_rtx,
2520 frame_size_rtx)),
2521 REG_NOTES (insn));
2522 }
2523 }
2524
2525 if (cfun->machine->ia64_eh_epilogue_bsp)
2526 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
2527
2528 if (! sibcall_p)
2529 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
2530 else
2531 {
2532 int fp = GR_REG (2);
2533 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
2534 first available call clobbered register. If there was a frame_pointer
2535 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
2536 so we have to make sure we're using the string "r2" when emitting
2537 the register name for the assmbler. */
2538 if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
2539 fp = HARD_FRAME_POINTER_REGNUM;
2540
2541 /* We must emit an alloc to force the input registers to become output
2542 registers. Otherwise, if the callee tries to pass its parameters
2543 through to another call without an intervening alloc, then these
2544 values get lost. */
2545 /* ??? We don't need to preserve all input registers. We only need to
2546 preserve those input registers used as arguments to the sibling call.
2547 It is unclear how to compute that number here. */
2548 if (current_frame_info.n_input_regs != 0)
2549 emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
2550 GEN_INT (0), GEN_INT (0),
2551 GEN_INT (current_frame_info.n_input_regs),
2552 GEN_INT (0)));
2553 }
2554 }
2555
2556 /* Return 1 if br.ret can do all the work required to return from a
2557 function. */
2558
2559 int
2560 ia64_direct_return ()
2561 {
2562 if (reload_completed && ! frame_pointer_needed)
2563 {
2564 ia64_compute_frame_size (get_frame_size ());
2565
2566 return (current_frame_info.total_size == 0
2567 && current_frame_info.n_spilled == 0
2568 && current_frame_info.reg_save_b0 == 0
2569 && current_frame_info.reg_save_pr == 0
2570 && current_frame_info.reg_save_ar_pfs == 0
2571 && current_frame_info.reg_save_ar_unat == 0
2572 && current_frame_info.reg_save_ar_lc == 0);
2573 }
2574 return 0;
2575 }
2576
2577 int
2578 ia64_hard_regno_rename_ok (from, to)
2579 int from;
2580 int to;
2581 {
2582 /* Don't clobber any of the registers we reserved for the prologue. */
2583 if (to == current_frame_info.reg_fp
2584 || to == current_frame_info.reg_save_b0
2585 || to == current_frame_info.reg_save_pr
2586 || to == current_frame_info.reg_save_ar_pfs
2587 || to == current_frame_info.reg_save_ar_unat
2588 || to == current_frame_info.reg_save_ar_lc)
2589 return 0;
2590
2591 if (from == current_frame_info.reg_fp
2592 || from == current_frame_info.reg_save_b0
2593 || from == current_frame_info.reg_save_pr
2594 || from == current_frame_info.reg_save_ar_pfs
2595 || from == current_frame_info.reg_save_ar_unat
2596 || from == current_frame_info.reg_save_ar_lc)
2597 return 0;
2598
2599 /* Don't use output registers outside the register frame. */
2600 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
2601 return 0;
2602
2603 /* Retain even/oddness on predicate register pairs. */
2604 if (PR_REGNO_P (from) && PR_REGNO_P (to))
2605 return (from & 1) == (to & 1);
2606
2607 /* Reg 4 contains the saved gp; we can't reliably rename this. */
2608 if (from == GR_REG (4) && current_function_calls_setjmp)
2609 return 0;
2610
2611 return 1;
2612 }
2613
2614 /* Target hook for assembling integer objects. Handle word-sized
2615 aligned objects and detect the cases when @fptr is needed. */
2616
2617 static bool
2618 ia64_assemble_integer (x, size, aligned_p)
2619 rtx x;
2620 unsigned int size;
2621 int aligned_p;
2622 {
2623 if (size == UNITS_PER_WORD && aligned_p
2624 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
2625 && GET_CODE (x) == SYMBOL_REF
2626 && SYMBOL_REF_FLAG (x))
2627 {
2628 fputs ("\tdata8\t@fptr(", asm_out_file);
2629 output_addr_const (asm_out_file, x);
2630 fputs (")\n", asm_out_file);
2631 return true;
2632 }
2633 return default_assemble_integer (x, size, aligned_p);
2634 }
2635
2636 /* Emit the function prologue. */
2637
2638 static void
2639 ia64_output_function_prologue (file, size)
2640 FILE *file;
2641 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
2642 {
2643 int mask, grsave, grsave_prev;
2644
2645 if (current_frame_info.need_regstk)
2646 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
2647 current_frame_info.n_input_regs,
2648 current_frame_info.n_local_regs,
2649 current_frame_info.n_output_regs,
2650 current_frame_info.n_rotate_regs);
2651
2652 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
2653 return;
2654
2655 /* Emit the .prologue directive. */
2656
2657 mask = 0;
2658 grsave = grsave_prev = 0;
2659 if (current_frame_info.reg_save_b0 != 0)
2660 {
2661 mask |= 8;
2662 grsave = grsave_prev = current_frame_info.reg_save_b0;
2663 }
2664 if (current_frame_info.reg_save_ar_pfs != 0
2665 && (grsave_prev == 0
2666 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
2667 {
2668 mask |= 4;
2669 if (grsave_prev == 0)
2670 grsave = current_frame_info.reg_save_ar_pfs;
2671 grsave_prev = current_frame_info.reg_save_ar_pfs;
2672 }
2673 if (current_frame_info.reg_fp != 0
2674 && (grsave_prev == 0
2675 || current_frame_info.reg_fp == grsave_prev + 1))
2676 {
2677 mask |= 2;
2678 if (grsave_prev == 0)
2679 grsave = HARD_FRAME_POINTER_REGNUM;
2680 grsave_prev = current_frame_info.reg_fp;
2681 }
2682 if (current_frame_info.reg_save_pr != 0
2683 && (grsave_prev == 0
2684 || current_frame_info.reg_save_pr == grsave_prev + 1))
2685 {
2686 mask |= 1;
2687 if (grsave_prev == 0)
2688 grsave = current_frame_info.reg_save_pr;
2689 }
2690
2691 if (mask)
2692 fprintf (file, "\t.prologue %d, %d\n", mask,
2693 ia64_dbx_register_number (grsave));
2694 else
2695 fputs ("\t.prologue\n", file);
2696
2697 /* Emit a .spill directive, if necessary, to relocate the base of
2698 the register spill area. */
2699 if (current_frame_info.spill_cfa_off != -16)
2700 fprintf (file, "\t.spill %ld\n",
2701 (long) (current_frame_info.spill_cfa_off
2702 + current_frame_info.spill_size));
2703 }
2704
2705 /* Emit the .body directive at the scheduled end of the prologue. */
2706
2707 static void
2708 ia64_output_function_end_prologue (file)
2709 FILE *file;
2710 {
2711 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
2712 return;
2713
2714 fputs ("\t.body\n", file);
2715 }
2716
2717 /* Emit the function epilogue. */
2718
2719 static void
2720 ia64_output_function_epilogue (file, size)
2721 FILE *file ATTRIBUTE_UNUSED;
2722 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
2723 {
2724 int i;
2725
2726 /* Reset from the function's potential modifications. */
2727 XINT (return_address_pointer_rtx, 0) = RETURN_ADDRESS_POINTER_REGNUM;
2728
2729 if (current_frame_info.reg_fp)
2730 {
2731 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2732 reg_names[HARD_FRAME_POINTER_REGNUM]
2733 = reg_names[current_frame_info.reg_fp];
2734 reg_names[current_frame_info.reg_fp] = tmp;
2735 }
2736 if (! TARGET_REG_NAMES)
2737 {
2738 for (i = 0; i < current_frame_info.n_input_regs; i++)
2739 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
2740 for (i = 0; i < current_frame_info.n_local_regs; i++)
2741 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
2742 for (i = 0; i < current_frame_info.n_output_regs; i++)
2743 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
2744 }
2745
2746 current_frame_info.initialized = 0;
2747 }
2748
2749 int
2750 ia64_dbx_register_number (regno)
2751 int regno;
2752 {
2753 /* In ia64_expand_prologue we quite literally renamed the frame pointer
2754 from its home at loc79 to something inside the register frame. We
2755 must perform the same renumbering here for the debug info. */
2756 if (current_frame_info.reg_fp)
2757 {
2758 if (regno == HARD_FRAME_POINTER_REGNUM)
2759 regno = current_frame_info.reg_fp;
2760 else if (regno == current_frame_info.reg_fp)
2761 regno = HARD_FRAME_POINTER_REGNUM;
2762 }
2763
2764 if (IN_REGNO_P (regno))
2765 return 32 + regno - IN_REG (0);
2766 else if (LOC_REGNO_P (regno))
2767 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
2768 else if (OUT_REGNO_P (regno))
2769 return (32 + current_frame_info.n_input_regs
2770 + current_frame_info.n_local_regs + regno - OUT_REG (0));
2771 else
2772 return regno;
2773 }
2774
2775 void
2776 ia64_initialize_trampoline (addr, fnaddr, static_chain)
2777 rtx addr, fnaddr, static_chain;
2778 {
2779 rtx addr_reg, eight = GEN_INT (8);
2780
2781 /* Load up our iterator. */
2782 addr_reg = gen_reg_rtx (Pmode);
2783 emit_move_insn (addr_reg, addr);
2784
2785 /* The first two words are the fake descriptor:
2786 __ia64_trampoline, ADDR+16. */
2787 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2788 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
2789 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2790
2791 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2792 copy_to_reg (plus_constant (addr, 16)));
2793 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2794
2795 /* The third word is the target descriptor. */
2796 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
2797 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2798
2799 /* The fourth word is the static chain. */
2800 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
2801 }
2802 \f
2803 /* Do any needed setup for a variadic function. CUM has not been updated
2804 for the last named argument which has type TYPE and mode MODE.
2805
2806 We generate the actual spill instructions during prologue generation. */
2807
2808 void
2809 ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time)
2810 CUMULATIVE_ARGS cum;
2811 int int_mode;
2812 tree type;
2813 int * pretend_size;
2814 int second_time ATTRIBUTE_UNUSED;
2815 {
2816 /* If this is a stdarg function, then skip the current argument. */
2817 if (! current_function_varargs)
2818 ia64_function_arg_advance (&cum, int_mode, type, 1);
2819
2820 if (cum.words < MAX_ARGUMENT_SLOTS)
2821 {
2822 int n = MAX_ARGUMENT_SLOTS - cum.words;
2823 *pretend_size = n * UNITS_PER_WORD;
2824 cfun->machine->n_varargs = n;
2825 }
2826 }
2827
2828 /* Check whether TYPE is a homogeneous floating point aggregate. If
2829 it is, return the mode of the floating point type that appears
2830 in all leafs. If it is not, return VOIDmode.
2831
2832 An aggregate is a homogeneous floating point aggregate is if all
2833 fields/elements in it have the same floating point type (e.g,
2834 SFmode). 128-bit quad-precision floats are excluded. */
2835
2836 static enum machine_mode
2837 hfa_element_mode (type, nested)
2838 tree type;
2839 int nested;
2840 {
2841 enum machine_mode element_mode = VOIDmode;
2842 enum machine_mode mode;
2843 enum tree_code code = TREE_CODE (type);
2844 int know_element_mode = 0;
2845 tree t;
2846
2847 switch (code)
2848 {
2849 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
2850 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
2851 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
2852 case FILE_TYPE: case SET_TYPE: case LANG_TYPE:
2853 case FUNCTION_TYPE:
2854 return VOIDmode;
2855
2856 /* Fortran complex types are supposed to be HFAs, so we need to handle
2857 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
2858 types though. */
2859 case COMPLEX_TYPE:
2860 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT)
2861 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type))
2862 * BITS_PER_UNIT, MODE_FLOAT, 0);
2863 else
2864 return VOIDmode;
2865
2866 case REAL_TYPE:
2867 /* ??? Should exclude 128-bit long double here. */
2868 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
2869 mode if this is contained within an aggregate. */
2870 if (nested)
2871 return TYPE_MODE (type);
2872 else
2873 return VOIDmode;
2874
2875 case ARRAY_TYPE:
2876 return TYPE_MODE (TREE_TYPE (type));
2877
2878 case RECORD_TYPE:
2879 case UNION_TYPE:
2880 case QUAL_UNION_TYPE:
2881 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
2882 {
2883 if (TREE_CODE (t) != FIELD_DECL)
2884 continue;
2885
2886 mode = hfa_element_mode (TREE_TYPE (t), 1);
2887 if (know_element_mode)
2888 {
2889 if (mode != element_mode)
2890 return VOIDmode;
2891 }
2892 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
2893 return VOIDmode;
2894 else
2895 {
2896 know_element_mode = 1;
2897 element_mode = mode;
2898 }
2899 }
2900 return element_mode;
2901
2902 default:
2903 /* If we reach here, we probably have some front-end specific type
2904 that the backend doesn't know about. This can happen via the
2905 aggregate_value_p call in init_function_start. All we can do is
2906 ignore unknown tree types. */
2907 return VOIDmode;
2908 }
2909
2910 return VOIDmode;
2911 }
2912
2913 /* Return rtx for register where argument is passed, or zero if it is passed
2914 on the stack. */
2915
2916 /* ??? 128-bit quad-precision floats are always passed in general
2917 registers. */
2918
2919 rtx
2920 ia64_function_arg (cum, mode, type, named, incoming)
2921 CUMULATIVE_ARGS *cum;
2922 enum machine_mode mode;
2923 tree type;
2924 int named;
2925 int incoming;
2926 {
2927 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
2928 int words = (((mode == BLKmode ? int_size_in_bytes (type)
2929 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
2930 / UNITS_PER_WORD);
2931 int offset = 0;
2932 enum machine_mode hfa_mode = VOIDmode;
2933
2934 /* Integer and float arguments larger than 8 bytes start at the next even
2935 boundary. Aggregates larger than 8 bytes start at the next even boundary
2936 if the aggregate has 16 byte alignment. Net effect is that types with
2937 alignment greater than 8 start at the next even boundary. */
2938 /* ??? The ABI does not specify how to handle aggregates with alignment from
2939 9 to 15 bytes, or greater than 16. We handle them all as if they had
2940 16 byte alignment. Such aggregates can occur only if gcc extensions are
2941 used. */
2942 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
2943 : (words > 1))
2944 && (cum->words & 1))
2945 offset = 1;
2946
2947 /* If all argument slots are used, then it must go on the stack. */
2948 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
2949 return 0;
2950
2951 /* Check for and handle homogeneous FP aggregates. */
2952 if (type)
2953 hfa_mode = hfa_element_mode (type, 0);
2954
2955 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
2956 and unprototyped hfas are passed specially. */
2957 if (hfa_mode != VOIDmode && (! cum->prototype || named))
2958 {
2959 rtx loc[16];
2960 int i = 0;
2961 int fp_regs = cum->fp_regs;
2962 int int_regs = cum->words + offset;
2963 int hfa_size = GET_MODE_SIZE (hfa_mode);
2964 int byte_size;
2965 int args_byte_size;
2966
2967 /* If prototyped, pass it in FR regs then GR regs.
2968 If not prototyped, pass it in both FR and GR regs.
2969
2970 If this is an SFmode aggregate, then it is possible to run out of
2971 FR regs while GR regs are still left. In that case, we pass the
2972 remaining part in the GR regs. */
2973
2974 /* Fill the FP regs. We do this always. We stop if we reach the end
2975 of the argument, the last FP register, or the last argument slot. */
2976
2977 byte_size = ((mode == BLKmode)
2978 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2979 args_byte_size = int_regs * UNITS_PER_WORD;
2980 offset = 0;
2981 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
2982 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
2983 {
2984 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
2985 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
2986 + fp_regs)),
2987 GEN_INT (offset));
2988 offset += hfa_size;
2989 args_byte_size += hfa_size;
2990 fp_regs++;
2991 }
2992
2993 /* If no prototype, then the whole thing must go in GR regs. */
2994 if (! cum->prototype)
2995 offset = 0;
2996 /* If this is an SFmode aggregate, then we might have some left over
2997 that needs to go in GR regs. */
2998 else if (byte_size != offset)
2999 int_regs += offset / UNITS_PER_WORD;
3000
3001 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
3002
3003 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
3004 {
3005 enum machine_mode gr_mode = DImode;
3006
3007 /* If we have an odd 4 byte hunk because we ran out of FR regs,
3008 then this goes in a GR reg left adjusted/little endian, right
3009 adjusted/big endian. */
3010 /* ??? Currently this is handled wrong, because 4-byte hunks are
3011 always right adjusted/little endian. */
3012 if (offset & 0x4)
3013 gr_mode = SImode;
3014 /* If we have an even 4 byte hunk because the aggregate is a
3015 multiple of 4 bytes in size, then this goes in a GR reg right
3016 adjusted/little endian. */
3017 else if (byte_size - offset == 4)
3018 gr_mode = SImode;
3019 /* Complex floats need to have float mode. */
3020 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3021 gr_mode = hfa_mode;
3022
3023 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3024 gen_rtx_REG (gr_mode, (basereg
3025 + int_regs)),
3026 GEN_INT (offset));
3027 offset += GET_MODE_SIZE (gr_mode);
3028 int_regs += GET_MODE_SIZE (gr_mode) <= UNITS_PER_WORD
3029 ? 1 : GET_MODE_SIZE (gr_mode) / UNITS_PER_WORD;
3030 }
3031
3032 /* If we ended up using just one location, just return that one loc. */
3033 if (i == 1)
3034 return XEXP (loc[0], 0);
3035 else
3036 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3037 }
3038
3039 /* Integral and aggregates go in general registers. If we have run out of
3040 FR registers, then FP values must also go in general registers. This can
3041 happen when we have a SFmode HFA. */
3042 else if (((mode == TFmode) && ! INTEL_EXTENDED_IEEE_FORMAT)
3043 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3044 return gen_rtx_REG (mode, basereg + cum->words + offset);
3045
3046 /* If there is a prototype, then FP values go in a FR register when
3047 named, and in a GR registeer when unnamed. */
3048 else if (cum->prototype)
3049 {
3050 if (! named)
3051 return gen_rtx_REG (mode, basereg + cum->words + offset);
3052 else
3053 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
3054 }
3055 /* If there is no prototype, then FP values go in both FR and GR
3056 registers. */
3057 else
3058 {
3059 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
3060 gen_rtx_REG (mode, (FR_ARG_FIRST
3061 + cum->fp_regs)),
3062 const0_rtx);
3063 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3064 gen_rtx_REG (mode,
3065 (basereg + cum->words
3066 + offset)),
3067 const0_rtx);
3068
3069 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
3070 }
3071 }
3072
3073 /* Return number of words, at the beginning of the argument, that must be
3074 put in registers. 0 is the argument is entirely in registers or entirely
3075 in memory. */
3076
3077 int
3078 ia64_function_arg_partial_nregs (cum, mode, type, named)
3079 CUMULATIVE_ARGS *cum;
3080 enum machine_mode mode;
3081 tree type;
3082 int named ATTRIBUTE_UNUSED;
3083 {
3084 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3085 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3086 / UNITS_PER_WORD);
3087 int offset = 0;
3088
3089 /* Arguments with alignment larger than 8 bytes start at the next even
3090 boundary. */
3091 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3092 : (words > 1))
3093 && (cum->words & 1))
3094 offset = 1;
3095
3096 /* If all argument slots are used, then it must go on the stack. */
3097 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3098 return 0;
3099
3100 /* It doesn't matter whether the argument goes in FR or GR regs. If
3101 it fits within the 8 argument slots, then it goes entirely in
3102 registers. If it extends past the last argument slot, then the rest
3103 goes on the stack. */
3104
3105 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
3106 return 0;
3107
3108 return MAX_ARGUMENT_SLOTS - cum->words - offset;
3109 }
3110
3111 /* Update CUM to point after this argument. This is patterned after
3112 ia64_function_arg. */
3113
3114 void
3115 ia64_function_arg_advance (cum, mode, type, named)
3116 CUMULATIVE_ARGS *cum;
3117 enum machine_mode mode;
3118 tree type;
3119 int named;
3120 {
3121 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3122 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3123 / UNITS_PER_WORD);
3124 int offset = 0;
3125 enum machine_mode hfa_mode = VOIDmode;
3126
3127 /* If all arg slots are already full, then there is nothing to do. */
3128 if (cum->words >= MAX_ARGUMENT_SLOTS)
3129 return;
3130
3131 /* Arguments with alignment larger than 8 bytes start at the next even
3132 boundary. */
3133 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3134 : (words > 1))
3135 && (cum->words & 1))
3136 offset = 1;
3137
3138 cum->words += words + offset;
3139
3140 /* Check for and handle homogeneous FP aggregates. */
3141 if (type)
3142 hfa_mode = hfa_element_mode (type, 0);
3143
3144 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3145 and unprototyped hfas are passed specially. */
3146 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3147 {
3148 int fp_regs = cum->fp_regs;
3149 /* This is the original value of cum->words + offset. */
3150 int int_regs = cum->words - words;
3151 int hfa_size = GET_MODE_SIZE (hfa_mode);
3152 int byte_size;
3153 int args_byte_size;
3154
3155 /* If prototyped, pass it in FR regs then GR regs.
3156 If not prototyped, pass it in both FR and GR regs.
3157
3158 If this is an SFmode aggregate, then it is possible to run out of
3159 FR regs while GR regs are still left. In that case, we pass the
3160 remaining part in the GR regs. */
3161
3162 /* Fill the FP regs. We do this always. We stop if we reach the end
3163 of the argument, the last FP register, or the last argument slot. */
3164
3165 byte_size = ((mode == BLKmode)
3166 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3167 args_byte_size = int_regs * UNITS_PER_WORD;
3168 offset = 0;
3169 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3170 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3171 {
3172 offset += hfa_size;
3173 args_byte_size += hfa_size;
3174 fp_regs++;
3175 }
3176
3177 cum->fp_regs = fp_regs;
3178 }
3179
3180 /* Integral and aggregates go in general registers. If we have run out of
3181 FR registers, then FP values must also go in general registers. This can
3182 happen when we have a SFmode HFA. */
3183 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
3184 return;
3185
3186 /* If there is a prototype, then FP values go in a FR register when
3187 named, and in a GR registeer when unnamed. */
3188 else if (cum->prototype)
3189 {
3190 if (! named)
3191 return;
3192 else
3193 /* ??? Complex types should not reach here. */
3194 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3195 }
3196 /* If there is no prototype, then FP values go in both FR and GR
3197 registers. */
3198 else
3199 /* ??? Complex types should not reach here. */
3200 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3201
3202 return;
3203 }
3204 \f
3205 /* Implement va_start. */
3206
3207 void
3208 ia64_va_start (stdarg_p, valist, nextarg)
3209 int stdarg_p;
3210 tree valist;
3211 rtx nextarg;
3212 {
3213 int arg_words;
3214 int ofs;
3215
3216 arg_words = current_function_args_info.words;
3217
3218 if (stdarg_p)
3219 ofs = 0;
3220 else
3221 ofs = (arg_words >= MAX_ARGUMENT_SLOTS ? -UNITS_PER_WORD : 0);
3222
3223 nextarg = plus_constant (nextarg, ofs);
3224 std_expand_builtin_va_start (1, valist, nextarg);
3225 }
3226
3227 /* Implement va_arg. */
3228
3229 rtx
3230 ia64_va_arg (valist, type)
3231 tree valist, type;
3232 {
3233 tree t;
3234
3235 /* Arguments with alignment larger than 8 bytes start at the next even
3236 boundary. */
3237 if (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3238 {
3239 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3240 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
3241 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3242 build_int_2 (-2 * UNITS_PER_WORD, -1));
3243 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
3244 TREE_SIDE_EFFECTS (t) = 1;
3245 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3246 }
3247
3248 return std_expand_builtin_va_arg (valist, type);
3249 }
3250 \f
3251 /* Return 1 if function return value returned in memory. Return 0 if it is
3252 in a register. */
3253
3254 int
3255 ia64_return_in_memory (valtype)
3256 tree valtype;
3257 {
3258 enum machine_mode mode;
3259 enum machine_mode hfa_mode;
3260 HOST_WIDE_INT byte_size;
3261
3262 mode = TYPE_MODE (valtype);
3263 byte_size = GET_MODE_SIZE (mode);
3264 if (mode == BLKmode)
3265 {
3266 byte_size = int_size_in_bytes (valtype);
3267 if (byte_size < 0)
3268 return 1;
3269 }
3270
3271 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3272
3273 hfa_mode = hfa_element_mode (valtype, 0);
3274 if (hfa_mode != VOIDmode)
3275 {
3276 int hfa_size = GET_MODE_SIZE (hfa_mode);
3277
3278 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
3279 return 1;
3280 else
3281 return 0;
3282 }
3283 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
3284 return 1;
3285 else
3286 return 0;
3287 }
3288
3289 /* Return rtx for register that holds the function return value. */
3290
3291 rtx
3292 ia64_function_value (valtype, func)
3293 tree valtype;
3294 tree func ATTRIBUTE_UNUSED;
3295 {
3296 enum machine_mode mode;
3297 enum machine_mode hfa_mode;
3298
3299 mode = TYPE_MODE (valtype);
3300 hfa_mode = hfa_element_mode (valtype, 0);
3301
3302 if (hfa_mode != VOIDmode)
3303 {
3304 rtx loc[8];
3305 int i;
3306 int hfa_size;
3307 int byte_size;
3308 int offset;
3309
3310 hfa_size = GET_MODE_SIZE (hfa_mode);
3311 byte_size = ((mode == BLKmode)
3312 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3313 offset = 0;
3314 for (i = 0; offset < byte_size; i++)
3315 {
3316 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3317 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
3318 GEN_INT (offset));
3319 offset += hfa_size;
3320 }
3321
3322 if (i == 1)
3323 return XEXP (loc[0], 0);
3324 else
3325 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3326 }
3327 else if (FLOAT_TYPE_P (valtype) &&
3328 ((mode != TFmode) || INTEL_EXTENDED_IEEE_FORMAT))
3329 return gen_rtx_REG (mode, FR_ARG_FIRST);
3330 else
3331 return gen_rtx_REG (mode, GR_RET_FIRST);
3332 }
3333
3334 /* Print a memory address as an operand to reference that memory location. */
3335
3336 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3337 also call this from ia64_print_operand for memory addresses. */
3338
3339 void
3340 ia64_print_operand_address (stream, address)
3341 FILE * stream ATTRIBUTE_UNUSED;
3342 rtx address ATTRIBUTE_UNUSED;
3343 {
3344 }
3345
3346 /* Print an operand to an assembler instruction.
3347 C Swap and print a comparison operator.
3348 D Print an FP comparison operator.
3349 E Print 32 - constant, for SImode shifts as extract.
3350 e Print 64 - constant, for DImode rotates.
3351 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3352 a floating point register emitted normally.
3353 I Invert a predicate register by adding 1.
3354 J Select the proper predicate register for a condition.
3355 j Select the inverse predicate register for a condition.
3356 O Append .acq for volatile load.
3357 P Postincrement of a MEM.
3358 Q Append .rel for volatile store.
3359 S Shift amount for shladd instruction.
3360 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3361 for Intel assembler.
3362 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3363 for Intel assembler.
3364 r Print register name, or constant 0 as r0. HP compatibility for
3365 Linux kernel. */
3366 void
3367 ia64_print_operand (file, x, code)
3368 FILE * file;
3369 rtx x;
3370 int code;
3371 {
3372 const char *str;
3373
3374 switch (code)
3375 {
3376 case 0:
3377 /* Handled below. */
3378 break;
3379
3380 case 'C':
3381 {
3382 enum rtx_code c = swap_condition (GET_CODE (x));
3383 fputs (GET_RTX_NAME (c), file);
3384 return;
3385 }
3386
3387 case 'D':
3388 switch (GET_CODE (x))
3389 {
3390 case NE:
3391 str = "neq";
3392 break;
3393 case UNORDERED:
3394 str = "unord";
3395 break;
3396 case ORDERED:
3397 str = "ord";
3398 break;
3399 default:
3400 str = GET_RTX_NAME (GET_CODE (x));
3401 break;
3402 }
3403 fputs (str, file);
3404 return;
3405
3406 case 'E':
3407 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
3408 return;
3409
3410 case 'e':
3411 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
3412 return;
3413
3414 case 'F':
3415 if (x == CONST0_RTX (GET_MODE (x)))
3416 str = reg_names [FR_REG (0)];
3417 else if (x == CONST1_RTX (GET_MODE (x)))
3418 str = reg_names [FR_REG (1)];
3419 else if (GET_CODE (x) == REG)
3420 str = reg_names [REGNO (x)];
3421 else
3422 abort ();
3423 fputs (str, file);
3424 return;
3425
3426 case 'I':
3427 fputs (reg_names [REGNO (x) + 1], file);
3428 return;
3429
3430 case 'J':
3431 case 'j':
3432 {
3433 unsigned int regno = REGNO (XEXP (x, 0));
3434 if (GET_CODE (x) == EQ)
3435 regno += 1;
3436 if (code == 'j')
3437 regno ^= 1;
3438 fputs (reg_names [regno], file);
3439 }
3440 return;
3441
3442 case 'O':
3443 if (MEM_VOLATILE_P (x))
3444 fputs(".acq", file);
3445 return;
3446
3447 case 'P':
3448 {
3449 HOST_WIDE_INT value;
3450
3451 switch (GET_CODE (XEXP (x, 0)))
3452 {
3453 default:
3454 return;
3455
3456 case POST_MODIFY:
3457 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
3458 if (GET_CODE (x) == CONST_INT)
3459 value = INTVAL (x);
3460 else if (GET_CODE (x) == REG)
3461 {
3462 fprintf (file, ", %s", reg_names[REGNO (x)]);
3463 return;
3464 }
3465 else
3466 abort ();
3467 break;
3468
3469 case POST_INC:
3470 value = GET_MODE_SIZE (GET_MODE (x));
3471 break;
3472
3473 case POST_DEC:
3474 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
3475 break;
3476 }
3477
3478 putc (',', file);
3479 putc (' ', file);
3480 fprintf (file, HOST_WIDE_INT_PRINT_DEC, value);
3481 return;
3482 }
3483
3484 case 'Q':
3485 if (MEM_VOLATILE_P (x))
3486 fputs(".rel", file);
3487 return;
3488
3489 case 'S':
3490 fprintf (file, "%d", exact_log2 (INTVAL (x)));
3491 return;
3492
3493 case 'T':
3494 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3495 {
3496 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
3497 return;
3498 }
3499 break;
3500
3501 case 'U':
3502 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3503 {
3504 const char *prefix = "0x";
3505 if (INTVAL (x) & 0x80000000)
3506 {
3507 fprintf (file, "0xffffffff");
3508 prefix = "";
3509 }
3510 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
3511 return;
3512 }
3513 break;
3514
3515 case 'r':
3516 /* If this operand is the constant zero, write it as register zero.
3517 Any register, zero, or CONST_INT value is OK here. */
3518 if (GET_CODE (x) == REG)
3519 fputs (reg_names[REGNO (x)], file);
3520 else if (x == CONST0_RTX (GET_MODE (x)))
3521 fputs ("r0", file);
3522 else if (GET_CODE (x) == CONST_INT)
3523 output_addr_const (file, x);
3524 else
3525 output_operand_lossage ("invalid %%r value");
3526 return;
3527
3528 case '+':
3529 {
3530 const char *which;
3531
3532 /* For conditional branches, returns or calls, substitute
3533 sptk, dptk, dpnt, or spnt for %s. */
3534 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
3535 if (x)
3536 {
3537 int pred_val = INTVAL (XEXP (x, 0));
3538
3539 /* Guess top and bottom 10% statically predicted. */
3540 if (pred_val < REG_BR_PROB_BASE / 50)
3541 which = ".spnt";
3542 else if (pred_val < REG_BR_PROB_BASE / 2)
3543 which = ".dpnt";
3544 else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
3545 which = ".dptk";
3546 else
3547 which = ".sptk";
3548 }
3549 else if (GET_CODE (current_output_insn) == CALL_INSN)
3550 which = ".sptk";
3551 else
3552 which = ".dptk";
3553
3554 fputs (which, file);
3555 return;
3556 }
3557
3558 case ',':
3559 x = current_insn_predicate;
3560 if (x)
3561 {
3562 unsigned int regno = REGNO (XEXP (x, 0));
3563 if (GET_CODE (x) == EQ)
3564 regno += 1;
3565 fprintf (file, "(%s) ", reg_names [regno]);
3566 }
3567 return;
3568
3569 default:
3570 output_operand_lossage ("ia64_print_operand: unknown code");
3571 return;
3572 }
3573
3574 switch (GET_CODE (x))
3575 {
3576 /* This happens for the spill/restore instructions. */
3577 case POST_INC:
3578 case POST_DEC:
3579 case POST_MODIFY:
3580 x = XEXP (x, 0);
3581 /* ... fall through ... */
3582
3583 case REG:
3584 fputs (reg_names [REGNO (x)], file);
3585 break;
3586
3587 case MEM:
3588 {
3589 rtx addr = XEXP (x, 0);
3590 if (GET_RTX_CLASS (GET_CODE (addr)) == 'a')
3591 addr = XEXP (addr, 0);
3592 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
3593 break;
3594 }
3595
3596 default:
3597 output_addr_const (file, x);
3598 break;
3599 }
3600
3601 return;
3602 }
3603 \f
3604 /* Calulate the cost of moving data from a register in class FROM to
3605 one in class TO, using MODE. */
3606
3607 int
3608 ia64_register_move_cost (mode, from, to)
3609 enum machine_mode mode;
3610 enum reg_class from, to;
3611 {
3612 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
3613 if (to == ADDL_REGS)
3614 to = GR_REGS;
3615 if (from == ADDL_REGS)
3616 from = GR_REGS;
3617
3618 /* All costs are symmetric, so reduce cases by putting the
3619 lower number class as the destination. */
3620 if (from < to)
3621 {
3622 enum reg_class tmp = to;
3623 to = from, from = tmp;
3624 }
3625
3626 /* Moving from FR<->GR in TFmode must be more expensive than 2,
3627 so that we get secondary memory reloads. Between FR_REGS,
3628 we have to make this at least as expensive as MEMORY_MOVE_COST
3629 to avoid spectacularly poor register class preferencing. */
3630 if (mode == TFmode)
3631 {
3632 if (to != GR_REGS || from != GR_REGS)
3633 return MEMORY_MOVE_COST (mode, to, 0);
3634 else
3635 return 3;
3636 }
3637
3638 switch (to)
3639 {
3640 case PR_REGS:
3641 /* Moving between PR registers takes two insns. */
3642 if (from == PR_REGS)
3643 return 3;
3644 /* Moving between PR and anything but GR is impossible. */
3645 if (from != GR_REGS)
3646 return MEMORY_MOVE_COST (mode, to, 0);
3647 break;
3648
3649 case BR_REGS:
3650 /* Moving between BR and anything but GR is impossible. */
3651 if (from != GR_REGS && from != GR_AND_BR_REGS)
3652 return MEMORY_MOVE_COST (mode, to, 0);
3653 break;
3654
3655 case AR_I_REGS:
3656 case AR_M_REGS:
3657 /* Moving between AR and anything but GR is impossible. */
3658 if (from != GR_REGS)
3659 return MEMORY_MOVE_COST (mode, to, 0);
3660 break;
3661
3662 case GR_REGS:
3663 case FR_REGS:
3664 case GR_AND_FR_REGS:
3665 case GR_AND_BR_REGS:
3666 case ALL_REGS:
3667 break;
3668
3669 default:
3670 abort ();
3671 }
3672
3673 return 2;
3674 }
3675
3676 /* This function returns the register class required for a secondary
3677 register when copying between one of the registers in CLASS, and X,
3678 using MODE. A return value of NO_REGS means that no secondary register
3679 is required. */
3680
3681 enum reg_class
3682 ia64_secondary_reload_class (class, mode, x)
3683 enum reg_class class;
3684 enum machine_mode mode ATTRIBUTE_UNUSED;
3685 rtx x;
3686 {
3687 int regno = -1;
3688
3689 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3690 regno = true_regnum (x);
3691
3692 switch (class)
3693 {
3694 case BR_REGS:
3695 case AR_M_REGS:
3696 case AR_I_REGS:
3697 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
3698 interaction. We end up with two pseudos with overlapping lifetimes
3699 both of which are equiv to the same constant, and both which need
3700 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
3701 changes depending on the path length, which means the qty_first_reg
3702 check in make_regs_eqv can give different answers at different times.
3703 At some point I'll probably need a reload_indi pattern to handle
3704 this.
3705
3706 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
3707 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
3708 non-general registers for good measure. */
3709 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
3710 return GR_REGS;
3711
3712 /* This is needed if a pseudo used as a call_operand gets spilled to a
3713 stack slot. */
3714 if (GET_CODE (x) == MEM)
3715 return GR_REGS;
3716 break;
3717
3718 case FR_REGS:
3719 /* Need to go through general regsters to get to other class regs. */
3720 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
3721 return GR_REGS;
3722
3723 /* This can happen when a paradoxical subreg is an operand to the
3724 muldi3 pattern. */
3725 /* ??? This shouldn't be necessary after instruction scheduling is
3726 enabled, because paradoxical subregs are not accepted by
3727 register_operand when INSN_SCHEDULING is defined. Or alternatively,
3728 stop the paradoxical subreg stupidity in the *_operand functions
3729 in recog.c. */
3730 if (GET_CODE (x) == MEM
3731 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
3732 || GET_MODE (x) == QImode))
3733 return GR_REGS;
3734
3735 /* This can happen because of the ior/and/etc patterns that accept FP
3736 registers as operands. If the third operand is a constant, then it
3737 needs to be reloaded into a FP register. */
3738 if (GET_CODE (x) == CONST_INT)
3739 return GR_REGS;
3740
3741 /* This can happen because of register elimination in a muldi3 insn.
3742 E.g. `26107 * (unsigned long)&u'. */
3743 if (GET_CODE (x) == PLUS)
3744 return GR_REGS;
3745 break;
3746
3747 case PR_REGS:
3748 /* ??? This happens if we cse/gcse a BImode value across a call,
3749 and the function has a nonlocal goto. This is because global
3750 does not allocate call crossing pseudos to hard registers when
3751 current_function_has_nonlocal_goto is true. This is relatively
3752 common for C++ programs that use exceptions. To reproduce,
3753 return NO_REGS and compile libstdc++. */
3754 if (GET_CODE (x) == MEM)
3755 return GR_REGS;
3756
3757 /* This can happen when we take a BImode subreg of a DImode value,
3758 and that DImode value winds up in some non-GR register. */
3759 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
3760 return GR_REGS;
3761 break;
3762
3763 case GR_REGS:
3764 /* Since we have no offsettable memory addresses, we need a temporary
3765 to hold the address of the second word. */
3766 if (mode == TImode)
3767 return GR_REGS;
3768 break;
3769
3770 default:
3771 break;
3772 }
3773
3774 return NO_REGS;
3775 }
3776
3777 \f
3778 /* Emit text to declare externally defined variables and functions, because
3779 the Intel assembler does not support undefined externals. */
3780
3781 void
3782 ia64_asm_output_external (file, decl, name)
3783 FILE *file;
3784 tree decl;
3785 const char *name;
3786 {
3787 int save_referenced;
3788
3789 /* GNU as does not need anything here. */
3790 if (TARGET_GNU_AS)
3791 return;
3792
3793 /* ??? The Intel assembler creates a reference that needs to be satisfied by
3794 the linker when we do this, so we need to be careful not to do this for
3795 builtin functions which have no library equivalent. Unfortunately, we
3796 can't tell here whether or not a function will actually be called by
3797 expand_expr, so we pull in library functions even if we may not need
3798 them later. */
3799 if (! strcmp (name, "__builtin_next_arg")
3800 || ! strcmp (name, "alloca")
3801 || ! strcmp (name, "__builtin_constant_p")
3802 || ! strcmp (name, "__builtin_args_info"))
3803 return;
3804
3805 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
3806 restore it. */
3807 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
3808 if (TREE_CODE (decl) == FUNCTION_DECL)
3809 {
3810 fprintf (file, "%s", TYPE_ASM_OP);
3811 assemble_name (file, name);
3812 putc (',', file);
3813 fprintf (file, TYPE_OPERAND_FMT, "function");
3814 putc ('\n', file);
3815 }
3816 ASM_GLOBALIZE_LABEL (file, name);
3817 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
3818 }
3819 \f
3820 /* Parse the -mfixed-range= option string. */
3821
3822 static void
3823 fix_range (const_str)
3824 const char *const_str;
3825 {
3826 int i, first, last;
3827 char *str, *dash, *comma;
3828
3829 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3830 REG2 are either register names or register numbers. The effect
3831 of this option is to mark the registers in the range from REG1 to
3832 REG2 as ``fixed'' so they won't be used by the compiler. This is
3833 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
3834
3835 i = strlen (const_str);
3836 str = (char *) alloca (i + 1);
3837 memcpy (str, const_str, i + 1);
3838
3839 while (1)
3840 {
3841 dash = strchr (str, '-');
3842 if (!dash)
3843 {
3844 warning ("value of -mfixed-range must have form REG1-REG2");
3845 return;
3846 }
3847 *dash = '\0';
3848
3849 comma = strchr (dash + 1, ',');
3850 if (comma)
3851 *comma = '\0';
3852
3853 first = decode_reg_name (str);
3854 if (first < 0)
3855 {
3856 warning ("unknown register name: %s", str);
3857 return;
3858 }
3859
3860 last = decode_reg_name (dash + 1);
3861 if (last < 0)
3862 {
3863 warning ("unknown register name: %s", dash + 1);
3864 return;
3865 }
3866
3867 *dash = '-';
3868
3869 if (first > last)
3870 {
3871 warning ("%s-%s is an empty range", str, dash + 1);
3872 return;
3873 }
3874
3875 for (i = first; i <= last; ++i)
3876 fixed_regs[i] = call_used_regs[i] = 1;
3877
3878 if (!comma)
3879 break;
3880
3881 *comma = ',';
3882 str = comma + 1;
3883 }
3884 }
3885
3886 /* Called to register all of our global variables with the garbage
3887 collector. */
3888
3889 static void
3890 ia64_add_gc_roots ()
3891 {
3892 ggc_add_rtx_root (&ia64_compare_op0, 1);
3893 ggc_add_rtx_root (&ia64_compare_op1, 1);
3894 }
3895
3896 static void
3897 ia64_init_machine_status (p)
3898 struct function *p;
3899 {
3900 p->machine =
3901 (struct machine_function *) xcalloc (1, sizeof (struct machine_function));
3902 }
3903
3904 static void
3905 ia64_mark_machine_status (p)
3906 struct function *p;
3907 {
3908 struct machine_function *machine = p->machine;
3909
3910 if (machine)
3911 {
3912 ggc_mark_rtx (machine->ia64_eh_epilogue_sp);
3913 ggc_mark_rtx (machine->ia64_eh_epilogue_bsp);
3914 ggc_mark_rtx (machine->ia64_gp_save);
3915 }
3916 }
3917
3918 static void
3919 ia64_free_machine_status (p)
3920 struct function *p;
3921 {
3922 free (p->machine);
3923 p->machine = NULL;
3924 }
3925
3926 /* Handle TARGET_OPTIONS switches. */
3927
3928 void
3929 ia64_override_options ()
3930 {
3931 if (TARGET_AUTO_PIC)
3932 target_flags |= MASK_CONST_GP;
3933
3934 if (TARGET_INLINE_DIV_LAT && TARGET_INLINE_DIV_THR)
3935 {
3936 warning ("cannot optimize division for both latency and throughput");
3937 target_flags &= ~MASK_INLINE_DIV_THR;
3938 }
3939
3940 if (ia64_fixed_range_string)
3941 fix_range (ia64_fixed_range_string);
3942
3943 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
3944 flag_schedule_insns_after_reload = 0;
3945
3946 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
3947
3948 init_machine_status = ia64_init_machine_status;
3949 mark_machine_status = ia64_mark_machine_status;
3950 free_machine_status = ia64_free_machine_status;
3951
3952 ia64_add_gc_roots ();
3953 }
3954 \f
3955 static enum attr_itanium_requires_unit0 ia64_safe_itanium_requires_unit0 PARAMS((rtx));
3956 static enum attr_itanium_class ia64_safe_itanium_class PARAMS((rtx));
3957 static enum attr_type ia64_safe_type PARAMS((rtx));
3958
3959 static enum attr_itanium_requires_unit0
3960 ia64_safe_itanium_requires_unit0 (insn)
3961 rtx insn;
3962 {
3963 if (recog_memoized (insn) >= 0)
3964 return get_attr_itanium_requires_unit0 (insn);
3965 else
3966 return ITANIUM_REQUIRES_UNIT0_NO;
3967 }
3968
3969 static enum attr_itanium_class
3970 ia64_safe_itanium_class (insn)
3971 rtx insn;
3972 {
3973 if (recog_memoized (insn) >= 0)
3974 return get_attr_itanium_class (insn);
3975 else
3976 return ITANIUM_CLASS_UNKNOWN;
3977 }
3978
3979 static enum attr_type
3980 ia64_safe_type (insn)
3981 rtx insn;
3982 {
3983 if (recog_memoized (insn) >= 0)
3984 return get_attr_type (insn);
3985 else
3986 return TYPE_UNKNOWN;
3987 }
3988 \f
3989 /* The following collection of routines emit instruction group stop bits as
3990 necessary to avoid dependencies. */
3991
3992 /* Need to track some additional registers as far as serialization is
3993 concerned so we can properly handle br.call and br.ret. We could
3994 make these registers visible to gcc, but since these registers are
3995 never explicitly used in gcc generated code, it seems wasteful to
3996 do so (plus it would make the call and return patterns needlessly
3997 complex). */
3998 #define REG_GP (GR_REG (1))
3999 #define REG_RP (BR_REG (0))
4000 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
4001 /* This is used for volatile asms which may require a stop bit immediately
4002 before and after them. */
4003 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
4004 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
4005 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
4006
4007 /* For each register, we keep track of how it has been written in the
4008 current instruction group.
4009
4010 If a register is written unconditionally (no qualifying predicate),
4011 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4012
4013 If a register is written if its qualifying predicate P is true, we
4014 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
4015 may be written again by the complement of P (P^1) and when this happens,
4016 WRITE_COUNT gets set to 2.
4017
4018 The result of this is that whenever an insn attempts to write a register
4019 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
4020
4021 If a predicate register is written by a floating-point insn, we set
4022 WRITTEN_BY_FP to true.
4023
4024 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4025 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
4026
4027 struct reg_write_state
4028 {
4029 unsigned int write_count : 2;
4030 unsigned int first_pred : 16;
4031 unsigned int written_by_fp : 1;
4032 unsigned int written_by_and : 1;
4033 unsigned int written_by_or : 1;
4034 };
4035
4036 /* Cumulative info for the current instruction group. */
4037 struct reg_write_state rws_sum[NUM_REGS];
4038 /* Info for the current instruction. This gets copied to rws_sum after a
4039 stop bit is emitted. */
4040 struct reg_write_state rws_insn[NUM_REGS];
4041
4042 /* Indicates whether this is the first instruction after a stop bit,
4043 in which case we don't need another stop bit. Without this, we hit
4044 the abort in ia64_variable_issue when scheduling an alloc. */
4045 static int first_instruction;
4046
4047 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4048 RTL for one instruction. */
4049 struct reg_flags
4050 {
4051 unsigned int is_write : 1; /* Is register being written? */
4052 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
4053 unsigned int is_branch : 1; /* Is register used as part of a branch? */
4054 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
4055 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
4056 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
4057 };
4058
4059 static void rws_update PARAMS ((struct reg_write_state *, int,
4060 struct reg_flags, int));
4061 static int rws_access_regno PARAMS ((int, struct reg_flags, int));
4062 static int rws_access_reg PARAMS ((rtx, struct reg_flags, int));
4063 static void update_set_flags PARAMS ((rtx, struct reg_flags *, int *, rtx *));
4064 static int set_src_needs_barrier PARAMS ((rtx, struct reg_flags, int, rtx));
4065 static int rtx_needs_barrier PARAMS ((rtx, struct reg_flags, int));
4066 static void init_insn_group_barriers PARAMS ((void));
4067 static int group_barrier_needed_p PARAMS ((rtx));
4068 static int safe_group_barrier_needed_p PARAMS ((rtx));
4069
4070 /* Update *RWS for REGNO, which is being written by the current instruction,
4071 with predicate PRED, and associated register flags in FLAGS. */
4072
4073 static void
4074 rws_update (rws, regno, flags, pred)
4075 struct reg_write_state *rws;
4076 int regno;
4077 struct reg_flags flags;
4078 int pred;
4079 {
4080 if (pred)
4081 rws[regno].write_count++;
4082 else
4083 rws[regno].write_count = 2;
4084 rws[regno].written_by_fp |= flags.is_fp;
4085 /* ??? Not tracking and/or across differing predicates. */
4086 rws[regno].written_by_and = flags.is_and;
4087 rws[regno].written_by_or = flags.is_or;
4088 rws[regno].first_pred = pred;
4089 }
4090
4091 /* Handle an access to register REGNO of type FLAGS using predicate register
4092 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
4093 a dependency with an earlier instruction in the same group. */
4094
4095 static int
4096 rws_access_regno (regno, flags, pred)
4097 int regno;
4098 struct reg_flags flags;
4099 int pred;
4100 {
4101 int need_barrier = 0;
4102
4103 if (regno >= NUM_REGS)
4104 abort ();
4105
4106 if (! PR_REGNO_P (regno))
4107 flags.is_and = flags.is_or = 0;
4108
4109 if (flags.is_write)
4110 {
4111 int write_count;
4112
4113 /* One insn writes same reg multiple times? */
4114 if (rws_insn[regno].write_count > 0)
4115 abort ();
4116
4117 /* Update info for current instruction. */
4118 rws_update (rws_insn, regno, flags, pred);
4119 write_count = rws_sum[regno].write_count;
4120
4121 switch (write_count)
4122 {
4123 case 0:
4124 /* The register has not been written yet. */
4125 rws_update (rws_sum, regno, flags, pred);
4126 break;
4127
4128 case 1:
4129 /* The register has been written via a predicate. If this is
4130 not a complementary predicate, then we need a barrier. */
4131 /* ??? This assumes that P and P+1 are always complementary
4132 predicates for P even. */
4133 if (flags.is_and && rws_sum[regno].written_by_and)
4134 ;
4135 else if (flags.is_or && rws_sum[regno].written_by_or)
4136 ;
4137 else if ((rws_sum[regno].first_pred ^ 1) != pred)
4138 need_barrier = 1;
4139 rws_update (rws_sum, regno, flags, pred);
4140 break;
4141
4142 case 2:
4143 /* The register has been unconditionally written already. We
4144 need a barrier. */
4145 if (flags.is_and && rws_sum[regno].written_by_and)
4146 ;
4147 else if (flags.is_or && rws_sum[regno].written_by_or)
4148 ;
4149 else
4150 need_barrier = 1;
4151 rws_sum[regno].written_by_and = flags.is_and;
4152 rws_sum[regno].written_by_or = flags.is_or;
4153 break;
4154
4155 default:
4156 abort ();
4157 }
4158 }
4159 else
4160 {
4161 if (flags.is_branch)
4162 {
4163 /* Branches have several RAW exceptions that allow to avoid
4164 barriers. */
4165
4166 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
4167 /* RAW dependencies on branch regs are permissible as long
4168 as the writer is a non-branch instruction. Since we
4169 never generate code that uses a branch register written
4170 by a branch instruction, handling this case is
4171 easy. */
4172 return 0;
4173
4174 if (REGNO_REG_CLASS (regno) == PR_REGS
4175 && ! rws_sum[regno].written_by_fp)
4176 /* The predicates of a branch are available within the
4177 same insn group as long as the predicate was written by
4178 something other than a floating-point instruction. */
4179 return 0;
4180 }
4181
4182 if (flags.is_and && rws_sum[regno].written_by_and)
4183 return 0;
4184 if (flags.is_or && rws_sum[regno].written_by_or)
4185 return 0;
4186
4187 switch (rws_sum[regno].write_count)
4188 {
4189 case 0:
4190 /* The register has not been written yet. */
4191 break;
4192
4193 case 1:
4194 /* The register has been written via a predicate. If this is
4195 not a complementary predicate, then we need a barrier. */
4196 /* ??? This assumes that P and P+1 are always complementary
4197 predicates for P even. */
4198 if ((rws_sum[regno].first_pred ^ 1) != pred)
4199 need_barrier = 1;
4200 break;
4201
4202 case 2:
4203 /* The register has been unconditionally written already. We
4204 need a barrier. */
4205 need_barrier = 1;
4206 break;
4207
4208 default:
4209 abort ();
4210 }
4211 }
4212
4213 return need_barrier;
4214 }
4215
4216 static int
4217 rws_access_reg (reg, flags, pred)
4218 rtx reg;
4219 struct reg_flags flags;
4220 int pred;
4221 {
4222 int regno = REGNO (reg);
4223 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
4224
4225 if (n == 1)
4226 return rws_access_regno (regno, flags, pred);
4227 else
4228 {
4229 int need_barrier = 0;
4230 while (--n >= 0)
4231 need_barrier |= rws_access_regno (regno + n, flags, pred);
4232 return need_barrier;
4233 }
4234 }
4235
4236 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
4237 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
4238
4239 static void
4240 update_set_flags (x, pflags, ppred, pcond)
4241 rtx x;
4242 struct reg_flags *pflags;
4243 int *ppred;
4244 rtx *pcond;
4245 {
4246 rtx src = SET_SRC (x);
4247
4248 *pcond = 0;
4249
4250 switch (GET_CODE (src))
4251 {
4252 case CALL:
4253 return;
4254
4255 case IF_THEN_ELSE:
4256 if (SET_DEST (x) == pc_rtx)
4257 /* X is a conditional branch. */
4258 return;
4259 else
4260 {
4261 int is_complemented = 0;
4262
4263 /* X is a conditional move. */
4264 rtx cond = XEXP (src, 0);
4265 if (GET_CODE (cond) == EQ)
4266 is_complemented = 1;
4267 cond = XEXP (cond, 0);
4268 if (GET_CODE (cond) != REG
4269 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4270 abort ();
4271 *pcond = cond;
4272 if (XEXP (src, 1) == SET_DEST (x)
4273 || XEXP (src, 2) == SET_DEST (x))
4274 {
4275 /* X is a conditional move that conditionally writes the
4276 destination. */
4277
4278 /* We need another complement in this case. */
4279 if (XEXP (src, 1) == SET_DEST (x))
4280 is_complemented = ! is_complemented;
4281
4282 *ppred = REGNO (cond);
4283 if (is_complemented)
4284 ++*ppred;
4285 }
4286
4287 /* ??? If this is a conditional write to the dest, then this
4288 instruction does not actually read one source. This probably
4289 doesn't matter, because that source is also the dest. */
4290 /* ??? Multiple writes to predicate registers are allowed
4291 if they are all AND type compares, or if they are all OR
4292 type compares. We do not generate such instructions
4293 currently. */
4294 }
4295 /* ... fall through ... */
4296
4297 default:
4298 if (GET_RTX_CLASS (GET_CODE (src)) == '<'
4299 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
4300 /* Set pflags->is_fp to 1 so that we know we're dealing
4301 with a floating point comparison when processing the
4302 destination of the SET. */
4303 pflags->is_fp = 1;
4304
4305 /* Discover if this is a parallel comparison. We only handle
4306 and.orcm and or.andcm at present, since we must retain a
4307 strict inverse on the predicate pair. */
4308 else if (GET_CODE (src) == AND)
4309 pflags->is_and = 1;
4310 else if (GET_CODE (src) == IOR)
4311 pflags->is_or = 1;
4312
4313 break;
4314 }
4315 }
4316
4317 /* Subroutine of rtx_needs_barrier; this function determines whether the
4318 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
4319 are as in rtx_needs_barrier. COND is an rtx that holds the condition
4320 for this insn. */
4321
4322 static int
4323 set_src_needs_barrier (x, flags, pred, cond)
4324 rtx x;
4325 struct reg_flags flags;
4326 int pred;
4327 rtx cond;
4328 {
4329 int need_barrier = 0;
4330 rtx dst;
4331 rtx src = SET_SRC (x);
4332
4333 if (GET_CODE (src) == CALL)
4334 /* We don't need to worry about the result registers that
4335 get written by subroutine call. */
4336 return rtx_needs_barrier (src, flags, pred);
4337 else if (SET_DEST (x) == pc_rtx)
4338 {
4339 /* X is a conditional branch. */
4340 /* ??? This seems redundant, as the caller sets this bit for
4341 all JUMP_INSNs. */
4342 flags.is_branch = 1;
4343 return rtx_needs_barrier (src, flags, pred);
4344 }
4345
4346 need_barrier = rtx_needs_barrier (src, flags, pred);
4347
4348 /* This instruction unconditionally uses a predicate register. */
4349 if (cond)
4350 need_barrier |= rws_access_reg (cond, flags, 0);
4351
4352 dst = SET_DEST (x);
4353 if (GET_CODE (dst) == ZERO_EXTRACT)
4354 {
4355 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
4356 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
4357 dst = XEXP (dst, 0);
4358 }
4359 return need_barrier;
4360 }
4361
4362 /* Handle an access to rtx X of type FLAGS using predicate register PRED.
4363 Return 1 is this access creates a dependency with an earlier instruction
4364 in the same group. */
4365
4366 static int
4367 rtx_needs_barrier (x, flags, pred)
4368 rtx x;
4369 struct reg_flags flags;
4370 int pred;
4371 {
4372 int i, j;
4373 int is_complemented = 0;
4374 int need_barrier = 0;
4375 const char *format_ptr;
4376 struct reg_flags new_flags;
4377 rtx cond = 0;
4378
4379 if (! x)
4380 return 0;
4381
4382 new_flags = flags;
4383
4384 switch (GET_CODE (x))
4385 {
4386 case SET:
4387 update_set_flags (x, &new_flags, &pred, &cond);
4388 need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
4389 if (GET_CODE (SET_SRC (x)) != CALL)
4390 {
4391 new_flags.is_write = 1;
4392 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
4393 }
4394 break;
4395
4396 case CALL:
4397 new_flags.is_write = 0;
4398 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4399
4400 /* Avoid multiple register writes, in case this is a pattern with
4401 multiple CALL rtx. This avoids an abort in rws_access_reg. */
4402 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
4403 {
4404 new_flags.is_write = 1;
4405 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
4406 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
4407 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4408 }
4409 break;
4410
4411 case COND_EXEC:
4412 /* X is a predicated instruction. */
4413
4414 cond = COND_EXEC_TEST (x);
4415 if (pred)
4416 abort ();
4417 need_barrier = rtx_needs_barrier (cond, flags, 0);
4418
4419 if (GET_CODE (cond) == EQ)
4420 is_complemented = 1;
4421 cond = XEXP (cond, 0);
4422 if (GET_CODE (cond) != REG
4423 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4424 abort ();
4425 pred = REGNO (cond);
4426 if (is_complemented)
4427 ++pred;
4428
4429 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
4430 return need_barrier;
4431
4432 case CLOBBER:
4433 case USE:
4434 /* Clobber & use are for earlier compiler-phases only. */
4435 break;
4436
4437 case ASM_OPERANDS:
4438 case ASM_INPUT:
4439 /* We always emit stop bits for traditional asms. We emit stop bits
4440 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
4441 if (GET_CODE (x) != ASM_OPERANDS
4442 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
4443 {
4444 /* Avoid writing the register multiple times if we have multiple
4445 asm outputs. This avoids an abort in rws_access_reg. */
4446 if (! rws_insn[REG_VOLATILE].write_count)
4447 {
4448 new_flags.is_write = 1;
4449 rws_access_regno (REG_VOLATILE, new_flags, pred);
4450 }
4451 return 1;
4452 }
4453
4454 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
4455 We can not just fall through here since then we would be confused
4456 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
4457 traditional asms unlike their normal usage. */
4458
4459 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
4460 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
4461 need_barrier = 1;
4462 break;
4463
4464 case PARALLEL:
4465 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4466 {
4467 rtx pat = XVECEXP (x, 0, i);
4468 if (GET_CODE (pat) == SET)
4469 {
4470 update_set_flags (pat, &new_flags, &pred, &cond);
4471 need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
4472 }
4473 else if (GET_CODE (pat) == USE
4474 || GET_CODE (pat) == CALL
4475 || GET_CODE (pat) == ASM_OPERANDS)
4476 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4477 else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
4478 abort ();
4479 }
4480 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4481 {
4482 rtx pat = XVECEXP (x, 0, i);
4483 if (GET_CODE (pat) == SET)
4484 {
4485 if (GET_CODE (SET_SRC (pat)) != CALL)
4486 {
4487 new_flags.is_write = 1;
4488 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
4489 pred);
4490 }
4491 }
4492 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
4493 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4494 }
4495 break;
4496
4497 case SUBREG:
4498 x = SUBREG_REG (x);
4499 /* FALLTHRU */
4500 case REG:
4501 if (REGNO (x) == AR_UNAT_REGNUM)
4502 {
4503 for (i = 0; i < 64; ++i)
4504 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
4505 }
4506 else
4507 need_barrier = rws_access_reg (x, flags, pred);
4508 break;
4509
4510 case MEM:
4511 /* Find the regs used in memory address computation. */
4512 new_flags.is_write = 0;
4513 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4514 break;
4515
4516 case CONST_INT: case CONST_DOUBLE:
4517 case SYMBOL_REF: case LABEL_REF: case CONST:
4518 break;
4519
4520 /* Operators with side-effects. */
4521 case POST_INC: case POST_DEC:
4522 if (GET_CODE (XEXP (x, 0)) != REG)
4523 abort ();
4524
4525 new_flags.is_write = 0;
4526 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4527 new_flags.is_write = 1;
4528 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4529 break;
4530
4531 case POST_MODIFY:
4532 if (GET_CODE (XEXP (x, 0)) != REG)
4533 abort ();
4534
4535 new_flags.is_write = 0;
4536 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4537 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4538 new_flags.is_write = 1;
4539 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4540 break;
4541
4542 /* Handle common unary and binary ops for efficiency. */
4543 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
4544 case MOD: case UDIV: case UMOD: case AND: case IOR:
4545 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
4546 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
4547 case NE: case EQ: case GE: case GT: case LE:
4548 case LT: case GEU: case GTU: case LEU: case LTU:
4549 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4550 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4551 break;
4552
4553 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
4554 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
4555 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
4556 case SQRT: case FFS:
4557 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
4558 break;
4559
4560 case UNSPEC:
4561 switch (XINT (x, 1))
4562 {
4563 case 1: /* st8.spill */
4564 case 2: /* ld8.fill */
4565 {
4566 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
4567 HOST_WIDE_INT bit = (offset >> 3) & 63;
4568
4569 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4570 new_flags.is_write = (XINT (x, 1) == 1);
4571 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
4572 new_flags, pred);
4573 break;
4574 }
4575
4576 case 3: /* stf.spill */
4577 case 4: /* ldf.spill */
4578 case 8: /* popcnt */
4579 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4580 break;
4581
4582 case 7: /* pred_rel_mutex */
4583 case 9: /* pic call */
4584 case 12: /* mf */
4585 case 19: /* fetchadd_acq */
4586 case 20: /* mov = ar.bsp */
4587 case 21: /* flushrs */
4588 case 22: /* bundle selector */
4589 case 23: /* cycle display */
4590 break;
4591
4592 case 24: /* addp4 */
4593 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4594 break;
4595
4596 case 5: /* recip_approx */
4597 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4598 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4599 break;
4600
4601 case 13: /* cmpxchg_acq */
4602 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4603 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
4604 break;
4605
4606 default:
4607 abort ();
4608 }
4609 break;
4610
4611 case UNSPEC_VOLATILE:
4612 switch (XINT (x, 1))
4613 {
4614 case 0: /* alloc */
4615 /* Alloc must always be the first instruction of a group.
4616 We force this by always returning true. */
4617 /* ??? We might get better scheduling if we explicitly check for
4618 input/local/output register dependencies, and modify the
4619 scheduler so that alloc is always reordered to the start of
4620 the current group. We could then eliminate all of the
4621 first_instruction code. */
4622 rws_access_regno (AR_PFS_REGNUM, flags, pred);
4623
4624 new_flags.is_write = 1;
4625 rws_access_regno (REG_AR_CFM, new_flags, pred);
4626 return 1;
4627
4628 case 1: /* blockage */
4629 case 2: /* insn group barrier */
4630 return 0;
4631
4632 case 5: /* set_bsp */
4633 need_barrier = 1;
4634 break;
4635
4636 case 7: /* pred.rel.mutex */
4637 case 8: /* safe_across_calls all */
4638 case 9: /* safe_across_calls normal */
4639 return 0;
4640
4641 default:
4642 abort ();
4643 }
4644 break;
4645
4646 case RETURN:
4647 new_flags.is_write = 0;
4648 need_barrier = rws_access_regno (REG_RP, flags, pred);
4649 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
4650
4651 new_flags.is_write = 1;
4652 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4653 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4654 break;
4655
4656 default:
4657 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
4658 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4659 switch (format_ptr[i])
4660 {
4661 case '0': /* unused field */
4662 case 'i': /* integer */
4663 case 'n': /* note */
4664 case 'w': /* wide integer */
4665 case 's': /* pointer to string */
4666 case 'S': /* optional pointer to string */
4667 break;
4668
4669 case 'e':
4670 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
4671 need_barrier = 1;
4672 break;
4673
4674 case 'E':
4675 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
4676 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
4677 need_barrier = 1;
4678 break;
4679
4680 default:
4681 abort ();
4682 }
4683 break;
4684 }
4685 return need_barrier;
4686 }
4687
4688 /* Clear out the state for group_barrier_needed_p at the start of a
4689 sequence of insns. */
4690
4691 static void
4692 init_insn_group_barriers ()
4693 {
4694 memset (rws_sum, 0, sizeof (rws_sum));
4695 first_instruction = 1;
4696 }
4697
4698 /* Given the current state, recorded by previous calls to this function,
4699 determine whether a group barrier (a stop bit) is necessary before INSN.
4700 Return nonzero if so. */
4701
4702 static int
4703 group_barrier_needed_p (insn)
4704 rtx insn;
4705 {
4706 rtx pat;
4707 int need_barrier = 0;
4708 struct reg_flags flags;
4709
4710 memset (&flags, 0, sizeof (flags));
4711 switch (GET_CODE (insn))
4712 {
4713 case NOTE:
4714 break;
4715
4716 case BARRIER:
4717 /* A barrier doesn't imply an instruction group boundary. */
4718 break;
4719
4720 case CODE_LABEL:
4721 memset (rws_insn, 0, sizeof (rws_insn));
4722 return 1;
4723
4724 case CALL_INSN:
4725 flags.is_branch = 1;
4726 flags.is_sibcall = SIBLING_CALL_P (insn);
4727 memset (rws_insn, 0, sizeof (rws_insn));
4728
4729 /* Don't bundle a call following another call. */
4730 if ((pat = prev_active_insn (insn))
4731 && GET_CODE (pat) == CALL_INSN)
4732 {
4733 need_barrier = 1;
4734 break;
4735 }
4736
4737 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
4738 break;
4739
4740 case JUMP_INSN:
4741 flags.is_branch = 1;
4742
4743 /* Don't bundle a jump following a call. */
4744 if ((pat = prev_active_insn (insn))
4745 && GET_CODE (pat) == CALL_INSN)
4746 {
4747 need_barrier = 1;
4748 break;
4749 }
4750 /* FALLTHRU */
4751
4752 case INSN:
4753 if (GET_CODE (PATTERN (insn)) == USE
4754 || GET_CODE (PATTERN (insn)) == CLOBBER)
4755 /* Don't care about USE and CLOBBER "insns"---those are used to
4756 indicate to the optimizer that it shouldn't get rid of
4757 certain operations. */
4758 break;
4759
4760 pat = PATTERN (insn);
4761
4762 /* Ug. Hack hacks hacked elsewhere. */
4763 switch (recog_memoized (insn))
4764 {
4765 /* We play dependency tricks with the epilogue in order
4766 to get proper schedules. Undo this for dv analysis. */
4767 case CODE_FOR_epilogue_deallocate_stack:
4768 pat = XVECEXP (pat, 0, 0);
4769 break;
4770
4771 /* The pattern we use for br.cloop confuses the code above.
4772 The second element of the vector is representative. */
4773 case CODE_FOR_doloop_end_internal:
4774 pat = XVECEXP (pat, 0, 1);
4775 break;
4776
4777 /* Doesn't generate code. */
4778 case CODE_FOR_pred_rel_mutex:
4779 return 0;
4780
4781 default:
4782 break;
4783 }
4784
4785 memset (rws_insn, 0, sizeof (rws_insn));
4786 need_barrier = rtx_needs_barrier (pat, flags, 0);
4787
4788 /* Check to see if the previous instruction was a volatile
4789 asm. */
4790 if (! need_barrier)
4791 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
4792 break;
4793
4794 default:
4795 abort ();
4796 }
4797
4798 if (first_instruction)
4799 {
4800 need_barrier = 0;
4801 first_instruction = 0;
4802 }
4803
4804 return need_barrier;
4805 }
4806
4807 /* Like group_barrier_needed_p, but do not clobber the current state. */
4808
4809 static int
4810 safe_group_barrier_needed_p (insn)
4811 rtx insn;
4812 {
4813 struct reg_write_state rws_saved[NUM_REGS];
4814 int saved_first_instruction;
4815 int t;
4816
4817 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
4818 saved_first_instruction = first_instruction;
4819
4820 t = group_barrier_needed_p (insn);
4821
4822 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
4823 first_instruction = saved_first_instruction;
4824
4825 return t;
4826 }
4827
4828 /* INSNS is an chain of instructions. Scan the chain, and insert stop bits
4829 as necessary to eliminate dependendencies. This function assumes that
4830 a final instruction scheduling pass has been run which has already
4831 inserted most of the necessary stop bits. This function only inserts
4832 new ones at basic block boundaries, since these are invisible to the
4833 scheduler. */
4834
4835 static void
4836 emit_insn_group_barriers (dump, insns)
4837 FILE *dump;
4838 rtx insns;
4839 {
4840 rtx insn;
4841 rtx last_label = 0;
4842 int insns_since_last_label = 0;
4843
4844 init_insn_group_barriers ();
4845
4846 for (insn = insns; insn; insn = NEXT_INSN (insn))
4847 {
4848 if (GET_CODE (insn) == CODE_LABEL)
4849 {
4850 if (insns_since_last_label)
4851 last_label = insn;
4852 insns_since_last_label = 0;
4853 }
4854 else if (GET_CODE (insn) == NOTE
4855 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
4856 {
4857 if (insns_since_last_label)
4858 last_label = insn;
4859 insns_since_last_label = 0;
4860 }
4861 else if (GET_CODE (insn) == INSN
4862 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
4863 && XINT (PATTERN (insn), 1) == 2)
4864 {
4865 init_insn_group_barriers ();
4866 last_label = 0;
4867 }
4868 else if (INSN_P (insn))
4869 {
4870 insns_since_last_label = 1;
4871
4872 if (group_barrier_needed_p (insn))
4873 {
4874 if (last_label)
4875 {
4876 if (dump)
4877 fprintf (dump, "Emitting stop before label %d\n",
4878 INSN_UID (last_label));
4879 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
4880 insn = last_label;
4881
4882 init_insn_group_barriers ();
4883 last_label = 0;
4884 }
4885 }
4886 }
4887 }
4888 }
4889
4890 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
4891 This function has to emit all necessary group barriers. */
4892
4893 static void
4894 emit_all_insn_group_barriers (dump, insns)
4895 FILE *dump ATTRIBUTE_UNUSED;
4896 rtx insns;
4897 {
4898 rtx insn;
4899
4900 init_insn_group_barriers ();
4901
4902 for (insn = insns; insn; insn = NEXT_INSN (insn))
4903 {
4904 if (GET_CODE (insn) == BARRIER)
4905 {
4906 rtx last = prev_active_insn (insn);
4907
4908 if (! last)
4909 continue;
4910 if (GET_CODE (last) == JUMP_INSN
4911 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
4912 last = prev_active_insn (last);
4913 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
4914 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
4915
4916 init_insn_group_barriers ();
4917 }
4918 else if (INSN_P (insn))
4919 {
4920 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
4921 init_insn_group_barriers ();
4922 else if (group_barrier_needed_p (insn))
4923 {
4924 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
4925 init_insn_group_barriers ();
4926 group_barrier_needed_p (insn);
4927 }
4928 }
4929 }
4930 }
4931 \f
4932 static int errata_find_address_regs PARAMS ((rtx *, void *));
4933 static void errata_emit_nops PARAMS ((rtx));
4934 static void fixup_errata PARAMS ((void));
4935
4936 /* This structure is used to track some details about the previous insns
4937 groups so we can determine if it may be necessary to insert NOPs to
4938 workaround hardware errata. */
4939 static struct group
4940 {
4941 HARD_REG_SET p_reg_set;
4942 HARD_REG_SET gr_reg_conditionally_set;
4943 } last_group[2];
4944
4945 /* Index into the last_group array. */
4946 static int group_idx;
4947
4948 /* Called through for_each_rtx; determines if a hard register that was
4949 conditionally set in the previous group is used as an address register.
4950 It ensures that for_each_rtx returns 1 in that case. */
4951 static int
4952 errata_find_address_regs (xp, data)
4953 rtx *xp;
4954 void *data ATTRIBUTE_UNUSED;
4955 {
4956 rtx x = *xp;
4957 if (GET_CODE (x) != MEM)
4958 return 0;
4959 x = XEXP (x, 0);
4960 if (GET_CODE (x) == POST_MODIFY)
4961 x = XEXP (x, 0);
4962 if (GET_CODE (x) == REG)
4963 {
4964 struct group *prev_group = last_group + (group_idx ^ 1);
4965 if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
4966 REGNO (x)))
4967 return 1;
4968 return -1;
4969 }
4970 return 0;
4971 }
4972
4973 /* Called for each insn; this function keeps track of the state in
4974 last_group and emits additional NOPs if necessary to work around
4975 an Itanium A/B step erratum. */
4976 static void
4977 errata_emit_nops (insn)
4978 rtx insn;
4979 {
4980 struct group *this_group = last_group + group_idx;
4981 struct group *prev_group = last_group + (group_idx ^ 1);
4982 rtx pat = PATTERN (insn);
4983 rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
4984 rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
4985 enum attr_type type;
4986 rtx set = real_pat;
4987
4988 if (GET_CODE (real_pat) == USE
4989 || GET_CODE (real_pat) == CLOBBER
4990 || GET_CODE (real_pat) == ASM_INPUT
4991 || GET_CODE (real_pat) == ADDR_VEC
4992 || GET_CODE (real_pat) == ADDR_DIFF_VEC
4993 || asm_noperands (PATTERN (insn)) >= 0)
4994 return;
4995
4996 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
4997 parts of it. */
4998
4999 if (GET_CODE (set) == PARALLEL)
5000 {
5001 int i;
5002 set = XVECEXP (real_pat, 0, 0);
5003 for (i = 1; i < XVECLEN (real_pat, 0); i++)
5004 if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
5005 && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
5006 {
5007 set = 0;
5008 break;
5009 }
5010 }
5011
5012 if (set && GET_CODE (set) != SET)
5013 set = 0;
5014
5015 type = get_attr_type (insn);
5016
5017 if (type == TYPE_F
5018 && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
5019 SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
5020
5021 if ((type == TYPE_M || type == TYPE_A) && cond && set
5022 && REG_P (SET_DEST (set))
5023 && GET_CODE (SET_SRC (set)) != PLUS
5024 && GET_CODE (SET_SRC (set)) != MINUS
5025 && (GET_CODE (SET_SRC (set)) != ASHIFT
5026 || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode))
5027 && (GET_CODE (SET_SRC (set)) != MEM
5028 || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
5029 && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
5030 {
5031 if (GET_RTX_CLASS (GET_CODE (cond)) != '<'
5032 || ! REG_P (XEXP (cond, 0)))
5033 abort ();
5034
5035 if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
5036 SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
5037 }
5038 if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
5039 {
5040 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5041 emit_insn_before (gen_nop (), insn);
5042 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5043 group_idx = 0;
5044 memset (last_group, 0, sizeof last_group);
5045 }
5046 }
5047
5048 /* Emit extra nops if they are required to work around hardware errata. */
5049
5050 static void
5051 fixup_errata ()
5052 {
5053 rtx insn;
5054
5055 if (! TARGET_B_STEP)
5056 return;
5057
5058 group_idx = 0;
5059 memset (last_group, 0, sizeof last_group);
5060
5061 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5062 {
5063 if (!INSN_P (insn))
5064 continue;
5065
5066 if (ia64_safe_type (insn) == TYPE_S)
5067 {
5068 group_idx ^= 1;
5069 memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
5070 }
5071 else
5072 errata_emit_nops (insn);
5073 }
5074 }
5075 \f
5076 /* Instruction scheduling support. */
5077 /* Describe one bundle. */
5078
5079 struct bundle
5080 {
5081 /* Zero if there's no possibility of a stop in this bundle other than
5082 at the end, otherwise the position of the optional stop bit. */
5083 int possible_stop;
5084 /* The types of the three slots. */
5085 enum attr_type t[3];
5086 /* The pseudo op to be emitted into the assembler output. */
5087 const char *name;
5088 };
5089
5090 #define NR_BUNDLES 10
5091
5092 /* A list of all available bundles. */
5093
5094 static const struct bundle bundle[NR_BUNDLES] =
5095 {
5096 { 2, { TYPE_M, TYPE_I, TYPE_I }, ".mii" },
5097 { 1, { TYPE_M, TYPE_M, TYPE_I }, ".mmi" },
5098 { 0, { TYPE_M, TYPE_F, TYPE_I }, ".mfi" },
5099 { 0, { TYPE_M, TYPE_M, TYPE_F }, ".mmf" },
5100 #if NR_BUNDLES == 10
5101 { 0, { TYPE_B, TYPE_B, TYPE_B }, ".bbb" },
5102 { 0, { TYPE_M, TYPE_B, TYPE_B }, ".mbb" },
5103 #endif
5104 { 0, { TYPE_M, TYPE_I, TYPE_B }, ".mib" },
5105 { 0, { TYPE_M, TYPE_M, TYPE_B }, ".mmb" },
5106 { 0, { TYPE_M, TYPE_F, TYPE_B }, ".mfb" },
5107 /* .mfi needs to occur earlier than .mlx, so that we only generate it if
5108 it matches an L type insn. Otherwise we'll try to generate L type
5109 nops. */
5110 { 0, { TYPE_M, TYPE_L, TYPE_X }, ".mlx" }
5111 };
5112
5113 /* Describe a packet of instructions. Packets consist of two bundles that
5114 are visible to the hardware in one scheduling window. */
5115
5116 struct ia64_packet
5117 {
5118 const struct bundle *t1, *t2;
5119 /* Precomputed value of the first split issue in this packet if a cycle
5120 starts at its beginning. */
5121 int first_split;
5122 /* For convenience, the insn types are replicated here so we don't have
5123 to go through T1 and T2 all the time. */
5124 enum attr_type t[6];
5125 };
5126
5127 /* An array containing all possible packets. */
5128 #define NR_PACKETS (NR_BUNDLES * NR_BUNDLES)
5129 static struct ia64_packet packets[NR_PACKETS];
5130
5131 /* Map attr_type to a string with the name. */
5132
5133 static const char *const type_names[] =
5134 {
5135 "UNKNOWN", "A", "I", "M", "F", "B", "L", "X", "S"
5136 };
5137
5138 /* Nonzero if we should insert stop bits into the schedule. */
5139 int ia64_final_schedule = 0;
5140
5141 static int itanium_split_issue PARAMS ((const struct ia64_packet *, int));
5142 static rtx ia64_single_set PARAMS ((rtx));
5143 static int insn_matches_slot PARAMS ((const struct ia64_packet *, enum attr_type, int, rtx));
5144 static void ia64_emit_insn_before PARAMS ((rtx, rtx));
5145 static void maybe_rotate PARAMS ((FILE *));
5146 static void finish_last_head PARAMS ((FILE *, int));
5147 static void rotate_one_bundle PARAMS ((FILE *));
5148 static void rotate_two_bundles PARAMS ((FILE *));
5149 static void nop_cycles_until PARAMS ((int, FILE *));
5150 static void cycle_end_fill_slots PARAMS ((FILE *));
5151 static int packet_matches_p PARAMS ((const struct ia64_packet *, int, int *));
5152 static int get_split PARAMS ((const struct ia64_packet *, int));
5153 static int find_best_insn PARAMS ((rtx *, enum attr_type *, int,
5154 const struct ia64_packet *, int));
5155 static void find_best_packet PARAMS ((int *, const struct ia64_packet **,
5156 rtx *, enum attr_type *, int));
5157 static int itanium_reorder PARAMS ((FILE *, rtx *, rtx *, int));
5158 static void dump_current_packet PARAMS ((FILE *));
5159 static void schedule_stop PARAMS ((FILE *));
5160 static rtx gen_nop_type PARAMS ((enum attr_type));
5161 static void ia64_emit_nops PARAMS ((void));
5162
5163 /* Map a bundle number to its pseudo-op. */
5164
5165 const char *
5166 get_bundle_name (b)
5167 int b;
5168 {
5169 return bundle[b].name;
5170 }
5171
5172 /* Compute the slot which will cause a split issue in packet P if the
5173 current cycle begins at slot BEGIN. */
5174
5175 static int
5176 itanium_split_issue (p, begin)
5177 const struct ia64_packet *p;
5178 int begin;
5179 {
5180 int type_count[TYPE_S];
5181 int i;
5182 int split = 6;
5183
5184 if (begin < 3)
5185 {
5186 /* Always split before and after MMF. */
5187 if (p->t[0] == TYPE_M && p->t[1] == TYPE_M && p->t[2] == TYPE_F)
5188 return 3;
5189 if (p->t[3] == TYPE_M && p->t[4] == TYPE_M && p->t[5] == TYPE_F)
5190 return 3;
5191 /* Always split after MBB and BBB. */
5192 if (p->t[1] == TYPE_B)
5193 return 3;
5194 /* Split after first bundle in MIB BBB combination. */
5195 if (p->t[2] == TYPE_B && p->t[3] == TYPE_B)
5196 return 3;
5197 }
5198
5199 memset (type_count, 0, sizeof type_count);
5200 for (i = begin; i < split; i++)
5201 {
5202 enum attr_type t0 = p->t[i];
5203 /* An MLX bundle reserves the same units as an MFI bundle. */
5204 enum attr_type t = (t0 == TYPE_L ? TYPE_F
5205 : t0 == TYPE_X ? TYPE_I
5206 : t0);
5207
5208 /* Itanium can execute up to 3 branches, 2 floating point, 2 memory, and
5209 2 integer per cycle. */
5210 int max = (t == TYPE_B ? 3 : 2);
5211 if (type_count[t] == max)
5212 return i;
5213
5214 type_count[t]++;
5215 }
5216 return split;
5217 }
5218
5219 /* Return the maximum number of instructions a cpu can issue. */
5220
5221 static int
5222 ia64_issue_rate ()
5223 {
5224 return 6;
5225 }
5226
5227 /* Helper function - like single_set, but look inside COND_EXEC. */
5228
5229 static rtx
5230 ia64_single_set (insn)
5231 rtx insn;
5232 {
5233 rtx x = PATTERN (insn), ret;
5234 if (GET_CODE (x) == COND_EXEC)
5235 x = COND_EXEC_CODE (x);
5236 if (GET_CODE (x) == SET)
5237 return x;
5238 ret = single_set_2 (insn, x);
5239 if (ret == NULL && GET_CODE (x) == PARALLEL)
5240 {
5241 /* Special case here prologue_allocate_stack and
5242 epilogue_deallocate_stack. Although it is not a classical
5243 single set, the second set is there just to protect it
5244 from moving past FP-relative stack accesses. */
5245 if (XVECLEN (x, 0) == 2
5246 && GET_CODE (XVECEXP (x, 0, 0)) == SET
5247 && GET_CODE (XVECEXP (x, 0, 1)) == SET
5248 && GET_CODE (SET_DEST (XVECEXP (x, 0, 1))) == REG
5249 && SET_DEST (XVECEXP (x, 0, 1)) == SET_SRC (XVECEXP (x, 0, 1))
5250 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
5251 ret = XVECEXP (x, 0, 0);
5252 }
5253 return ret;
5254 }
5255
5256 /* Adjust the cost of a scheduling dependency. Return the new cost of
5257 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5258
5259 static int
5260 ia64_adjust_cost (insn, link, dep_insn, cost)
5261 rtx insn, link, dep_insn;
5262 int cost;
5263 {
5264 enum attr_type dep_type;
5265 enum attr_itanium_class dep_class;
5266 enum attr_itanium_class insn_class;
5267 rtx dep_set, set, src, addr;
5268
5269 if (GET_CODE (PATTERN (insn)) == CLOBBER
5270 || GET_CODE (PATTERN (insn)) == USE
5271 || GET_CODE (PATTERN (dep_insn)) == CLOBBER
5272 || GET_CODE (PATTERN (dep_insn)) == USE
5273 /* @@@ Not accurate for indirect calls. */
5274 || GET_CODE (insn) == CALL_INSN
5275 || ia64_safe_type (insn) == TYPE_S)
5276 return 0;
5277
5278 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT
5279 || REG_NOTE_KIND (link) == REG_DEP_ANTI)
5280 return 0;
5281
5282 dep_type = ia64_safe_type (dep_insn);
5283 dep_class = ia64_safe_itanium_class (dep_insn);
5284 insn_class = ia64_safe_itanium_class (insn);
5285
5286 /* Compares that feed a conditional branch can execute in the same
5287 cycle. */
5288 dep_set = ia64_single_set (dep_insn);
5289 set = ia64_single_set (insn);
5290
5291 if (dep_type != TYPE_F
5292 && dep_set
5293 && GET_CODE (SET_DEST (dep_set)) == REG
5294 && PR_REG (REGNO (SET_DEST (dep_set)))
5295 && GET_CODE (insn) == JUMP_INSN)
5296 return 0;
5297
5298 if (dep_set && GET_CODE (SET_DEST (dep_set)) == MEM)
5299 {
5300 /* ??? Can't find any information in the documenation about whether
5301 a sequence
5302 st [rx] = ra
5303 ld rb = [ry]
5304 splits issue. Assume it doesn't. */
5305 return 0;
5306 }
5307
5308 src = set ? SET_SRC (set) : 0;
5309 addr = 0;
5310 if (set)
5311 {
5312 if (GET_CODE (SET_DEST (set)) == MEM)
5313 addr = XEXP (SET_DEST (set), 0);
5314 else if (GET_CODE (SET_DEST (set)) == SUBREG
5315 && GET_CODE (SUBREG_REG (SET_DEST (set))) == MEM)
5316 addr = XEXP (SUBREG_REG (SET_DEST (set)), 0);
5317 else
5318 {
5319 addr = src;
5320 if (GET_CODE (addr) == UNSPEC && XVECLEN (addr, 0) > 0)
5321 addr = XVECEXP (addr, 0, 0);
5322 while (GET_CODE (addr) == SUBREG || GET_CODE (addr) == ZERO_EXTEND)
5323 addr = XEXP (addr, 0);
5324 if (GET_CODE (addr) == MEM)
5325 addr = XEXP (addr, 0);
5326 else
5327 addr = 0;
5328 }
5329 }
5330
5331 if (addr && GET_CODE (addr) == POST_MODIFY)
5332 addr = XEXP (addr, 0);
5333
5334 set = ia64_single_set (dep_insn);
5335
5336 if ((dep_class == ITANIUM_CLASS_IALU
5337 || dep_class == ITANIUM_CLASS_ILOG
5338 || dep_class == ITANIUM_CLASS_LD)
5339 && (insn_class == ITANIUM_CLASS_LD
5340 || insn_class == ITANIUM_CLASS_ST))
5341 {
5342 if (! addr || ! set)
5343 abort ();
5344 /* This isn't completely correct - an IALU that feeds an address has
5345 a latency of 1 cycle if it's issued in an M slot, but 2 cycles
5346 otherwise. Unfortunately there's no good way to describe this. */
5347 if (reg_overlap_mentioned_p (SET_DEST (set), addr))
5348 return cost + 1;
5349 }
5350 if ((dep_class == ITANIUM_CLASS_IALU
5351 || dep_class == ITANIUM_CLASS_ILOG
5352 || dep_class == ITANIUM_CLASS_LD)
5353 && (insn_class == ITANIUM_CLASS_MMMUL
5354 || insn_class == ITANIUM_CLASS_MMSHF
5355 || insn_class == ITANIUM_CLASS_MMSHFI))
5356 return 3;
5357 if (dep_class == ITANIUM_CLASS_FMAC
5358 && (insn_class == ITANIUM_CLASS_FMISC
5359 || insn_class == ITANIUM_CLASS_FCVTFX
5360 || insn_class == ITANIUM_CLASS_XMPY))
5361 return 7;
5362 if ((dep_class == ITANIUM_CLASS_FMAC
5363 || dep_class == ITANIUM_CLASS_FMISC
5364 || dep_class == ITANIUM_CLASS_FCVTFX
5365 || dep_class == ITANIUM_CLASS_XMPY)
5366 && insn_class == ITANIUM_CLASS_STF)
5367 return 8;
5368 if ((dep_class == ITANIUM_CLASS_MMMUL
5369 || dep_class == ITANIUM_CLASS_MMSHF
5370 || dep_class == ITANIUM_CLASS_MMSHFI)
5371 && (insn_class == ITANIUM_CLASS_LD
5372 || insn_class == ITANIUM_CLASS_ST
5373 || insn_class == ITANIUM_CLASS_IALU
5374 || insn_class == ITANIUM_CLASS_ILOG
5375 || insn_class == ITANIUM_CLASS_ISHF))
5376 return 4;
5377
5378 return cost;
5379 }
5380
5381 /* Describe the current state of the Itanium pipeline. */
5382 static struct
5383 {
5384 /* The first slot that is used in the current cycle. */
5385 int first_slot;
5386 /* The next slot to fill. */
5387 int cur;
5388 /* The packet we have selected for the current issue window. */
5389 const struct ia64_packet *packet;
5390 /* The position of the split issue that occurs due to issue width
5391 limitations (6 if there's no split issue). */
5392 int split;
5393 /* Record data about the insns scheduled so far in the same issue
5394 window. The elements up to but not including FIRST_SLOT belong
5395 to the previous cycle, the ones starting with FIRST_SLOT belong
5396 to the current cycle. */
5397 enum attr_type types[6];
5398 rtx insns[6];
5399 int stopbit[6];
5400 /* Nonzero if we decided to schedule a stop bit. */
5401 int last_was_stop;
5402 } sched_data;
5403
5404 /* Temporary arrays; they have enough elements to hold all insns that
5405 can be ready at the same time while scheduling of the current block.
5406 SCHED_READY can hold ready insns, SCHED_TYPES their types. */
5407 static rtx *sched_ready;
5408 static enum attr_type *sched_types;
5409
5410 /* Determine whether an insn INSN of type ITYPE can fit into slot SLOT
5411 of packet P. */
5412
5413 static int
5414 insn_matches_slot (p, itype, slot, insn)
5415 const struct ia64_packet *p;
5416 enum attr_type itype;
5417 int slot;
5418 rtx insn;
5419 {
5420 enum attr_itanium_requires_unit0 u0;
5421 enum attr_type stype = p->t[slot];
5422
5423 if (insn)
5424 {
5425 u0 = ia64_safe_itanium_requires_unit0 (insn);
5426 if (u0 == ITANIUM_REQUIRES_UNIT0_YES)
5427 {
5428 int i;
5429 for (i = sched_data.first_slot; i < slot; i++)
5430 if (p->t[i] == stype
5431 || (stype == TYPE_F && p->t[i] == TYPE_L)
5432 || (stype == TYPE_I && p->t[i] == TYPE_X))
5433 return 0;
5434 }
5435 if (GET_CODE (insn) == CALL_INSN)
5436 {
5437 /* Reject calls in multiway branch packets. We want to limit
5438 the number of multiway branches we generate (since the branch
5439 predictor is limited), and this seems to work fairly well.
5440 (If we didn't do this, we'd have to add another test here to
5441 force calls into the third slot of the bundle.) */
5442 if (slot < 3)
5443 {
5444 if (p->t[1] == TYPE_B)
5445 return 0;
5446 }
5447 else
5448 {
5449 if (p->t[4] == TYPE_B)
5450 return 0;
5451 }
5452 }
5453 }
5454
5455 if (itype == stype)
5456 return 1;
5457 if (itype == TYPE_A)
5458 return stype == TYPE_M || stype == TYPE_I;
5459 return 0;
5460 }
5461
5462 /* Like emit_insn_before, but skip cycle_display insns. This makes the
5463 assembly output a bit prettier. */
5464
5465 static void
5466 ia64_emit_insn_before (insn, before)
5467 rtx insn, before;
5468 {
5469 rtx prev = PREV_INSN (before);
5470 if (prev && GET_CODE (prev) == INSN
5471 && GET_CODE (PATTERN (prev)) == UNSPEC
5472 && XINT (PATTERN (prev), 1) == 23)
5473 before = prev;
5474 emit_insn_before (insn, before);
5475 }
5476
5477 #if 0
5478 /* Generate a nop insn of the given type. Note we never generate L type
5479 nops. */
5480
5481 static rtx
5482 gen_nop_type (t)
5483 enum attr_type t;
5484 {
5485 switch (t)
5486 {
5487 case TYPE_M:
5488 return gen_nop_m ();
5489 case TYPE_I:
5490 return gen_nop_i ();
5491 case TYPE_B:
5492 return gen_nop_b ();
5493 case TYPE_F:
5494 return gen_nop_f ();
5495 case TYPE_X:
5496 return gen_nop_x ();
5497 default:
5498 abort ();
5499 }
5500 }
5501 #endif
5502
5503 /* When rotating a bundle out of the issue window, insert a bundle selector
5504 insn in front of it. DUMP is the scheduling dump file or NULL. START
5505 is either 0 or 3, depending on whether we want to emit a bundle selector
5506 for the first bundle or the second bundle in the current issue window.
5507
5508 The selector insns are emitted this late because the selected packet can
5509 be changed until parts of it get rotated out. */
5510
5511 static void
5512 finish_last_head (dump, start)
5513 FILE *dump;
5514 int start;
5515 {
5516 const struct ia64_packet *p = sched_data.packet;
5517 const struct bundle *b = start == 0 ? p->t1 : p->t2;
5518 int bundle_type = b - bundle;
5519 rtx insn;
5520 int i;
5521
5522 if (! ia64_final_schedule)
5523 return;
5524
5525 for (i = start; sched_data.insns[i] == 0; i++)
5526 if (i == start + 3)
5527 abort ();
5528 insn = sched_data.insns[i];
5529
5530 if (dump)
5531 fprintf (dump, "// Emitting template before %d: %s\n",
5532 INSN_UID (insn), b->name);
5533
5534 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (bundle_type)), insn);
5535 }
5536
5537 /* We can't schedule more insns this cycle. Fix up the scheduling state
5538 and advance FIRST_SLOT and CUR.
5539 We have to distribute the insns that are currently found between
5540 FIRST_SLOT and CUR into the slots of the packet we have selected. So
5541 far, they are stored successively in the fields starting at FIRST_SLOT;
5542 now they must be moved to the correct slots.
5543 DUMP is the current scheduling dump file, or NULL. */
5544
5545 static void
5546 cycle_end_fill_slots (dump)
5547 FILE *dump;
5548 {
5549 const struct ia64_packet *packet = sched_data.packet;
5550 int slot, i;
5551 enum attr_type tmp_types[6];
5552 rtx tmp_insns[6];
5553
5554 memcpy (tmp_types, sched_data.types, 6 * sizeof (enum attr_type));
5555 memcpy (tmp_insns, sched_data.insns, 6 * sizeof (rtx));
5556
5557 for (i = slot = sched_data.first_slot; i < sched_data.cur; i++)
5558 {
5559 enum attr_type t = tmp_types[i];
5560 if (t != ia64_safe_type (tmp_insns[i]))
5561 abort ();
5562 while (! insn_matches_slot (packet, t, slot, tmp_insns[i]))
5563 {
5564 if (slot > sched_data.split)
5565 abort ();
5566 if (dump)
5567 fprintf (dump, "// Packet needs %s, have %s\n", type_names[packet->t[slot]],
5568 type_names[t]);
5569 sched_data.types[slot] = packet->t[slot];
5570 sched_data.insns[slot] = 0;
5571 sched_data.stopbit[slot] = 0;
5572
5573 /* ??? TYPE_L instructions always fill up two slots, but we don't
5574 support TYPE_L nops. */
5575 if (packet->t[slot] == TYPE_L)
5576 abort ();
5577
5578 slot++;
5579 }
5580 /* Do _not_ use T here. If T == TYPE_A, then we'd risk changing the
5581 actual slot type later. */
5582 sched_data.types[slot] = packet->t[slot];
5583 sched_data.insns[slot] = tmp_insns[i];
5584 sched_data.stopbit[slot] = 0;
5585 slot++;
5586 /* TYPE_L instructions always fill up two slots. */
5587 if (t == TYPE_L)
5588 slot++;
5589 }
5590
5591 /* This isn't right - there's no need to pad out until the forced split;
5592 the CPU will automatically split if an insn isn't ready. */
5593 #if 0
5594 while (slot < sched_data.split)
5595 {
5596 sched_data.types[slot] = packet->t[slot];
5597 sched_data.insns[slot] = 0;
5598 sched_data.stopbit[slot] = 0;
5599 slot++;
5600 }
5601 #endif
5602
5603 sched_data.first_slot = sched_data.cur = slot;
5604 }
5605
5606 /* Bundle rotations, as described in the Itanium optimization manual.
5607 We can rotate either one or both bundles out of the issue window.
5608 DUMP is the current scheduling dump file, or NULL. */
5609
5610 static void
5611 rotate_one_bundle (dump)
5612 FILE *dump;
5613 {
5614 if (dump)
5615 fprintf (dump, "// Rotating one bundle.\n");
5616
5617 finish_last_head (dump, 0);
5618 if (sched_data.cur > 3)
5619 {
5620 sched_data.cur -= 3;
5621 sched_data.first_slot -= 3;
5622 memmove (sched_data.types,
5623 sched_data.types + 3,
5624 sched_data.cur * sizeof *sched_data.types);
5625 memmove (sched_data.stopbit,
5626 sched_data.stopbit + 3,
5627 sched_data.cur * sizeof *sched_data.stopbit);
5628 memmove (sched_data.insns,
5629 sched_data.insns + 3,
5630 sched_data.cur * sizeof *sched_data.insns);
5631 }
5632 else
5633 {
5634 sched_data.cur = 0;
5635 sched_data.first_slot = 0;
5636 }
5637 }
5638
5639 static void
5640 rotate_two_bundles (dump)
5641 FILE *dump;
5642 {
5643 if (dump)
5644 fprintf (dump, "// Rotating two bundles.\n");
5645
5646 if (sched_data.cur == 0)
5647 return;
5648
5649 finish_last_head (dump, 0);
5650 if (sched_data.cur > 3)
5651 finish_last_head (dump, 3);
5652 sched_data.cur = 0;
5653 sched_data.first_slot = 0;
5654 }
5655
5656 /* We're beginning a new block. Initialize data structures as necessary. */
5657
5658 static void
5659 ia64_sched_init (dump, sched_verbose, max_ready)
5660 FILE *dump ATTRIBUTE_UNUSED;
5661 int sched_verbose ATTRIBUTE_UNUSED;
5662 int max_ready;
5663 {
5664 static int initialized = 0;
5665
5666 if (! initialized)
5667 {
5668 int b1, b2, i;
5669
5670 initialized = 1;
5671
5672 for (i = b1 = 0; b1 < NR_BUNDLES; b1++)
5673 {
5674 const struct bundle *t1 = bundle + b1;
5675 for (b2 = 0; b2 < NR_BUNDLES; b2++, i++)
5676 {
5677 const struct bundle *t2 = bundle + b2;
5678
5679 packets[i].t1 = t1;
5680 packets[i].t2 = t2;
5681 }
5682 }
5683 for (i = 0; i < NR_PACKETS; i++)
5684 {
5685 int j;
5686 for (j = 0; j < 3; j++)
5687 packets[i].t[j] = packets[i].t1->t[j];
5688 for (j = 0; j < 3; j++)
5689 packets[i].t[j + 3] = packets[i].t2->t[j];
5690 packets[i].first_split = itanium_split_issue (packets + i, 0);
5691 }
5692
5693 }
5694
5695 init_insn_group_barriers ();
5696
5697 memset (&sched_data, 0, sizeof sched_data);
5698 sched_types = (enum attr_type *) xmalloc (max_ready
5699 * sizeof (enum attr_type));
5700 sched_ready = (rtx *) xmalloc (max_ready * sizeof (rtx));
5701 }
5702
5703 /* See if the packet P can match the insns we have already scheduled. Return
5704 nonzero if so. In *PSLOT, we store the first slot that is available for
5705 more instructions if we choose this packet.
5706 SPLIT holds the last slot we can use, there's a split issue after it so
5707 scheduling beyond it would cause us to use more than one cycle. */
5708
5709 static int
5710 packet_matches_p (p, split, pslot)
5711 const struct ia64_packet *p;
5712 int split;
5713 int *pslot;
5714 {
5715 int filled = sched_data.cur;
5716 int first = sched_data.first_slot;
5717 int i, slot;
5718
5719 /* First, check if the first of the two bundles must be a specific one (due
5720 to stop bits). */
5721 if (first > 0 && sched_data.stopbit[0] && p->t1->possible_stop != 1)
5722 return 0;
5723 if (first > 1 && sched_data.stopbit[1] && p->t1->possible_stop != 2)
5724 return 0;
5725
5726 for (i = 0; i < first; i++)
5727 if (! insn_matches_slot (p, sched_data.types[i], i,
5728 sched_data.insns[i]))
5729 return 0;
5730 for (i = slot = first; i < filled; i++)
5731 {
5732 while (slot < split)
5733 {
5734 if (insn_matches_slot (p, sched_data.types[i], slot,
5735 sched_data.insns[i]))
5736 break;
5737 slot++;
5738 }
5739 if (slot == split)
5740 return 0;
5741 slot++;
5742 }
5743
5744 if (pslot)
5745 *pslot = slot;
5746 return 1;
5747 }
5748
5749 /* A frontend for itanium_split_issue. For a packet P and a slot
5750 number FIRST that describes the start of the current clock cycle,
5751 return the slot number of the first split issue. This function
5752 uses the cached number found in P if possible. */
5753
5754 static int
5755 get_split (p, first)
5756 const struct ia64_packet *p;
5757 int first;
5758 {
5759 if (first == 0)
5760 return p->first_split;
5761 return itanium_split_issue (p, first);
5762 }
5763
5764 /* Given N_READY insns in the array READY, whose types are found in the
5765 corresponding array TYPES, return the insn that is best suited to be
5766 scheduled in slot SLOT of packet P. */
5767
5768 static int
5769 find_best_insn (ready, types, n_ready, p, slot)
5770 rtx *ready;
5771 enum attr_type *types;
5772 int n_ready;
5773 const struct ia64_packet *p;
5774 int slot;
5775 {
5776 int best = -1;
5777 int best_pri = 0;
5778 while (n_ready-- > 0)
5779 {
5780 rtx insn = ready[n_ready];
5781 if (! insn)
5782 continue;
5783 if (best >= 0 && INSN_PRIORITY (ready[n_ready]) < best_pri)
5784 break;
5785 /* If we have equally good insns, one of which has a stricter
5786 slot requirement, prefer the one with the stricter requirement. */
5787 if (best >= 0 && types[n_ready] == TYPE_A)
5788 continue;
5789 if (insn_matches_slot (p, types[n_ready], slot, insn))
5790 {
5791 best = n_ready;
5792 best_pri = INSN_PRIORITY (ready[best]);
5793
5794 /* If there's no way we could get a stricter requirement, stop
5795 looking now. */
5796 if (types[n_ready] != TYPE_A
5797 && ia64_safe_itanium_requires_unit0 (ready[n_ready]))
5798 break;
5799 break;
5800 }
5801 }
5802 return best;
5803 }
5804
5805 /* Select the best packet to use given the current scheduler state and the
5806 current ready list.
5807 READY is an array holding N_READY ready insns; TYPES is a corresponding
5808 array that holds their types. Store the best packet in *PPACKET and the
5809 number of insns that can be scheduled in the current cycle in *PBEST. */
5810
5811 static void
5812 find_best_packet (pbest, ppacket, ready, types, n_ready)
5813 int *pbest;
5814 const struct ia64_packet **ppacket;
5815 rtx *ready;
5816 enum attr_type *types;
5817 int n_ready;
5818 {
5819 int first = sched_data.first_slot;
5820 int best = 0;
5821 int lowest_end = 6;
5822 const struct ia64_packet *best_packet = NULL;
5823 int i;
5824
5825 for (i = 0; i < NR_PACKETS; i++)
5826 {
5827 const struct ia64_packet *p = packets + i;
5828 int slot;
5829 int split = get_split (p, first);
5830 int win = 0;
5831 int first_slot, last_slot;
5832 int b_nops = 0;
5833
5834 if (! packet_matches_p (p, split, &first_slot))
5835 continue;
5836
5837 memcpy (sched_ready, ready, n_ready * sizeof (rtx));
5838
5839 win = 0;
5840 last_slot = 6;
5841 for (slot = first_slot; slot < split; slot++)
5842 {
5843 int insn_nr;
5844
5845 /* Disallow a degenerate case where the first bundle doesn't
5846 contain anything but NOPs! */
5847 if (first_slot == 0 && win == 0 && slot == 3)
5848 {
5849 win = -1;
5850 break;
5851 }
5852
5853 insn_nr = find_best_insn (sched_ready, types, n_ready, p, slot);
5854 if (insn_nr >= 0)
5855 {
5856 sched_ready[insn_nr] = 0;
5857 last_slot = slot;
5858 win++;
5859 }
5860 else if (p->t[slot] == TYPE_B)
5861 b_nops++;
5862 }
5863 /* We must disallow MBB/BBB packets if any of their B slots would be
5864 filled with nops. */
5865 if (last_slot < 3)
5866 {
5867 if (p->t[1] == TYPE_B && (b_nops || last_slot < 2))
5868 win = -1;
5869 }
5870 else
5871 {
5872 if (p->t[4] == TYPE_B && (b_nops || last_slot < 5))
5873 win = -1;
5874 }
5875
5876 if (win > best
5877 || (win == best && last_slot < lowest_end))
5878 {
5879 best = win;
5880 lowest_end = last_slot;
5881 best_packet = p;
5882 }
5883 }
5884 *pbest = best;
5885 *ppacket = best_packet;
5886 }
5887
5888 /* Reorder the ready list so that the insns that can be issued in this cycle
5889 are found in the correct order at the end of the list.
5890 DUMP is the scheduling dump file, or NULL. READY points to the start,
5891 E_READY to the end of the ready list. MAY_FAIL determines what should be
5892 done if no insns can be scheduled in this cycle: if it is zero, we abort,
5893 otherwise we return 0.
5894 Return 1 if any insns can be scheduled in this cycle. */
5895
5896 static int
5897 itanium_reorder (dump, ready, e_ready, may_fail)
5898 FILE *dump;
5899 rtx *ready;
5900 rtx *e_ready;
5901 int may_fail;
5902 {
5903 const struct ia64_packet *best_packet;
5904 int n_ready = e_ready - ready;
5905 int first = sched_data.first_slot;
5906 int i, best, best_split, filled;
5907
5908 for (i = 0; i < n_ready; i++)
5909 sched_types[i] = ia64_safe_type (ready[i]);
5910
5911 find_best_packet (&best, &best_packet, ready, sched_types, n_ready);
5912
5913 if (best == 0)
5914 {
5915 if (may_fail)
5916 return 0;
5917 abort ();
5918 }
5919
5920 if (dump)
5921 {
5922 fprintf (dump, "// Selected bundles: %s %s (%d insns)\n",
5923 best_packet->t1->name,
5924 best_packet->t2 ? best_packet->t2->name : NULL, best);
5925 }
5926
5927 best_split = itanium_split_issue (best_packet, first);
5928 packet_matches_p (best_packet, best_split, &filled);
5929
5930 for (i = filled; i < best_split; i++)
5931 {
5932 int insn_nr;
5933
5934 insn_nr = find_best_insn (ready, sched_types, n_ready, best_packet, i);
5935 if (insn_nr >= 0)
5936 {
5937 rtx insn = ready[insn_nr];
5938 memmove (ready + insn_nr, ready + insn_nr + 1,
5939 (n_ready - insn_nr - 1) * sizeof (rtx));
5940 memmove (sched_types + insn_nr, sched_types + insn_nr + 1,
5941 (n_ready - insn_nr - 1) * sizeof (enum attr_type));
5942 ready[--n_ready] = insn;
5943 }
5944 }
5945
5946 sched_data.packet = best_packet;
5947 sched_data.split = best_split;
5948 return 1;
5949 }
5950
5951 /* Dump information about the current scheduling state to file DUMP. */
5952
5953 static void
5954 dump_current_packet (dump)
5955 FILE *dump;
5956 {
5957 int i;
5958 fprintf (dump, "// %d slots filled:", sched_data.cur);
5959 for (i = 0; i < sched_data.first_slot; i++)
5960 {
5961 rtx insn = sched_data.insns[i];
5962 fprintf (dump, " %s", type_names[sched_data.types[i]]);
5963 if (insn)
5964 fprintf (dump, "/%s", type_names[ia64_safe_type (insn)]);
5965 if (sched_data.stopbit[i])
5966 fprintf (dump, " ;;");
5967 }
5968 fprintf (dump, " :::");
5969 for (i = sched_data.first_slot; i < sched_data.cur; i++)
5970 {
5971 rtx insn = sched_data.insns[i];
5972 enum attr_type t = ia64_safe_type (insn);
5973 fprintf (dump, " (%d) %s", INSN_UID (insn), type_names[t]);
5974 }
5975 fprintf (dump, "\n");
5976 }
5977
5978 /* Schedule a stop bit. DUMP is the current scheduling dump file, or
5979 NULL. */
5980
5981 static void
5982 schedule_stop (dump)
5983 FILE *dump;
5984 {
5985 const struct ia64_packet *best = sched_data.packet;
5986 int i;
5987 int best_stop = 6;
5988
5989 if (dump)
5990 fprintf (dump, "// Stop bit, cur = %d.\n", sched_data.cur);
5991
5992 if (sched_data.cur == 0)
5993 {
5994 if (dump)
5995 fprintf (dump, "// At start of bundle, so nothing to do.\n");
5996
5997 rotate_two_bundles (NULL);
5998 return;
5999 }
6000
6001 for (i = -1; i < NR_PACKETS; i++)
6002 {
6003 /* This is a slight hack to give the current packet the first chance.
6004 This is done to avoid e.g. switching from MIB to MBB bundles. */
6005 const struct ia64_packet *p = (i >= 0 ? packets + i : sched_data.packet);
6006 int split = get_split (p, sched_data.first_slot);
6007 const struct bundle *compare;
6008 int next, stoppos;
6009
6010 if (! packet_matches_p (p, split, &next))
6011 continue;
6012
6013 compare = next > 3 ? p->t2 : p->t1;
6014
6015 stoppos = 3;
6016 if (compare->possible_stop)
6017 stoppos = compare->possible_stop;
6018 if (next > 3)
6019 stoppos += 3;
6020
6021 if (stoppos < next || stoppos >= best_stop)
6022 {
6023 if (compare->possible_stop == 0)
6024 continue;
6025 stoppos = (next > 3 ? 6 : 3);
6026 }
6027 if (stoppos < next || stoppos >= best_stop)
6028 continue;
6029
6030 if (dump)
6031 fprintf (dump, "// switching from %s %s to %s %s (stop at %d)\n",
6032 best->t1->name, best->t2->name, p->t1->name, p->t2->name,
6033 stoppos);
6034
6035 best_stop = stoppos;
6036 best = p;
6037 }
6038
6039 sched_data.packet = best;
6040 cycle_end_fill_slots (dump);
6041 while (sched_data.cur < best_stop)
6042 {
6043 sched_data.types[sched_data.cur] = best->t[sched_data.cur];
6044 sched_data.insns[sched_data.cur] = 0;
6045 sched_data.stopbit[sched_data.cur] = 0;
6046 sched_data.cur++;
6047 }
6048 sched_data.stopbit[sched_data.cur - 1] = 1;
6049 sched_data.first_slot = best_stop;
6050
6051 if (dump)
6052 dump_current_packet (dump);
6053 }
6054
6055 /* If necessary, perform one or two rotations on the scheduling state.
6056 This should only be called if we are starting a new cycle. */
6057
6058 static void
6059 maybe_rotate (dump)
6060 FILE *dump;
6061 {
6062 if (sched_data.cur == 6)
6063 rotate_two_bundles (dump);
6064 else if (sched_data.cur >= 3)
6065 rotate_one_bundle (dump);
6066 sched_data.first_slot = sched_data.cur;
6067 }
6068
6069 /* The clock cycle when ia64_sched_reorder was last called. */
6070 static int prev_cycle;
6071
6072 /* The first insn scheduled in the previous cycle. This is the saved
6073 value of sched_data.first_slot. */
6074 static int prev_first;
6075
6076 /* The last insn that has been scheduled. At the start of a new cycle
6077 we know that we can emit new insns after it; the main scheduling code
6078 has already emitted a cycle_display insn after it and is using that
6079 as its current last insn. */
6080 static rtx last_issued;
6081
6082 /* Emit NOPs to fill the delay between PREV_CYCLE and CLOCK_VAR. Used to
6083 pad out the delay between MM (shifts, etc.) and integer operations. */
6084
6085 static void
6086 nop_cycles_until (clock_var, dump)
6087 int clock_var;
6088 FILE *dump;
6089 {
6090 int prev_clock = prev_cycle;
6091 int cycles_left = clock_var - prev_clock;
6092
6093 /* Finish the previous cycle; pad it out with NOPs. */
6094 if (sched_data.cur == 3)
6095 {
6096 rtx t = gen_insn_group_barrier (GEN_INT (3));
6097 last_issued = emit_insn_after (t, last_issued);
6098 maybe_rotate (dump);
6099 }
6100 else if (sched_data.cur > 0)
6101 {
6102 int need_stop = 0;
6103 int split = itanium_split_issue (sched_data.packet, prev_first);
6104
6105 if (sched_data.cur < 3 && split > 3)
6106 {
6107 split = 3;
6108 need_stop = 1;
6109 }
6110
6111 if (split > sched_data.cur)
6112 {
6113 int i;
6114 for (i = sched_data.cur; i < split; i++)
6115 {
6116 rtx t;
6117
6118 t = gen_nop_type (sched_data.packet->t[i]);
6119 last_issued = emit_insn_after (t, last_issued);
6120 sched_data.types[i] = sched_data.packet->t[sched_data.cur];
6121 sched_data.insns[i] = last_issued;
6122 sched_data.stopbit[i] = 0;
6123 }
6124 sched_data.cur = split;
6125 }
6126
6127 if (! need_stop && sched_data.cur > 0 && sched_data.cur < 6
6128 && cycles_left > 1)
6129 {
6130 int i;
6131 for (i = sched_data.cur; i < 6; i++)
6132 {
6133 rtx t;
6134
6135 t = gen_nop_type (sched_data.packet->t[i]);
6136 last_issued = emit_insn_after (t, last_issued);
6137 sched_data.types[i] = sched_data.packet->t[sched_data.cur];
6138 sched_data.insns[i] = last_issued;
6139 sched_data.stopbit[i] = 0;
6140 }
6141 sched_data.cur = 6;
6142 cycles_left--;
6143 need_stop = 1;
6144 }
6145
6146 if (need_stop || sched_data.cur == 6)
6147 {
6148 rtx t = gen_insn_group_barrier (GEN_INT (3));
6149 last_issued = emit_insn_after (t, last_issued);
6150 }
6151 maybe_rotate (dump);
6152 }
6153
6154 cycles_left--;
6155 while (cycles_left > 0)
6156 {
6157 rtx t = gen_bundle_selector (GEN_INT (0));
6158 last_issued = emit_insn_after (t, last_issued);
6159 t = gen_nop_type (TYPE_M);
6160 last_issued = emit_insn_after (t, last_issued);
6161 t = gen_nop_type (TYPE_I);
6162 last_issued = emit_insn_after (t, last_issued);
6163 if (cycles_left > 1)
6164 {
6165 t = gen_insn_group_barrier (GEN_INT (2));
6166 last_issued = emit_insn_after (t, last_issued);
6167 cycles_left--;
6168 }
6169 t = gen_nop_type (TYPE_I);
6170 last_issued = emit_insn_after (t, last_issued);
6171 t = gen_insn_group_barrier (GEN_INT (3));
6172 last_issued = emit_insn_after (t, last_issued);
6173 cycles_left--;
6174 }
6175 }
6176
6177 /* We are about to being issuing insns for this clock cycle.
6178 Override the default sort algorithm to better slot instructions. */
6179
6180 static int
6181 ia64_internal_sched_reorder (dump, sched_verbose, ready, pn_ready,
6182 reorder_type, clock_var)
6183 FILE *dump ATTRIBUTE_UNUSED;
6184 int sched_verbose ATTRIBUTE_UNUSED;
6185 rtx *ready;
6186 int *pn_ready;
6187 int reorder_type, clock_var;
6188 {
6189 int n_asms;
6190 int n_ready = *pn_ready;
6191 rtx *e_ready = ready + n_ready;
6192 rtx *insnp;
6193
6194 if (sched_verbose)
6195 {
6196 fprintf (dump, "// ia64_sched_reorder (type %d):\n", reorder_type);
6197 dump_current_packet (dump);
6198 }
6199
6200 if (reorder_type == 0 && clock_var > 0 && ia64_final_schedule)
6201 {
6202 for (insnp = ready; insnp < e_ready; insnp++)
6203 {
6204 rtx insn = *insnp;
6205 enum attr_itanium_class t = ia64_safe_itanium_class (insn);
6206 if (t == ITANIUM_CLASS_IALU || t == ITANIUM_CLASS_ISHF
6207 || t == ITANIUM_CLASS_ILOG
6208 || t == ITANIUM_CLASS_LD || t == ITANIUM_CLASS_ST)
6209 {
6210 rtx link;
6211 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6212 if (REG_NOTE_KIND (link) != REG_DEP_OUTPUT
6213 && REG_NOTE_KIND (link) != REG_DEP_ANTI)
6214 {
6215 rtx other = XEXP (link, 0);
6216 enum attr_itanium_class t0 = ia64_safe_itanium_class (other);
6217 if (t0 == ITANIUM_CLASS_MMSHF
6218 || t0 == ITANIUM_CLASS_MMMUL)
6219 {
6220 nop_cycles_until (clock_var, sched_verbose ? dump : NULL);
6221 goto out;
6222 }
6223 }
6224 }
6225 }
6226 }
6227 out:
6228
6229 prev_first = sched_data.first_slot;
6230 prev_cycle = clock_var;
6231
6232 if (reorder_type == 0)
6233 maybe_rotate (sched_verbose ? dump : NULL);
6234
6235 /* First, move all USEs, CLOBBERs and other crud out of the way. */
6236 n_asms = 0;
6237 for (insnp = ready; insnp < e_ready; insnp++)
6238 if (insnp < e_ready)
6239 {
6240 rtx insn = *insnp;
6241 enum attr_type t = ia64_safe_type (insn);
6242 if (t == TYPE_UNKNOWN)
6243 {
6244 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6245 || asm_noperands (PATTERN (insn)) >= 0)
6246 {
6247 rtx lowest = ready[n_asms];
6248 ready[n_asms] = insn;
6249 *insnp = lowest;
6250 n_asms++;
6251 }
6252 else
6253 {
6254 rtx highest = ready[n_ready - 1];
6255 ready[n_ready - 1] = insn;
6256 *insnp = highest;
6257 if (ia64_final_schedule && group_barrier_needed_p (insn))
6258 {
6259 schedule_stop (sched_verbose ? dump : NULL);
6260 sched_data.last_was_stop = 1;
6261 maybe_rotate (sched_verbose ? dump : NULL);
6262 }
6263
6264 return 1;
6265 }
6266 }
6267 }
6268 if (n_asms < n_ready)
6269 {
6270 /* Some normal insns to process. Skip the asms. */
6271 ready += n_asms;
6272 n_ready -= n_asms;
6273 }
6274 else if (n_ready > 0)
6275 {
6276 /* Only asm insns left. */
6277 if (ia64_final_schedule && group_barrier_needed_p (ready[n_ready - 1]))
6278 {
6279 schedule_stop (sched_verbose ? dump : NULL);
6280 sched_data.last_was_stop = 1;
6281 maybe_rotate (sched_verbose ? dump : NULL);
6282 }
6283 cycle_end_fill_slots (sched_verbose ? dump : NULL);
6284 return 1;
6285 }
6286
6287 if (ia64_final_schedule)
6288 {
6289 int nr_need_stop = 0;
6290
6291 for (insnp = ready; insnp < e_ready; insnp++)
6292 if (safe_group_barrier_needed_p (*insnp))
6293 nr_need_stop++;
6294
6295 /* Schedule a stop bit if
6296 - all insns require a stop bit, or
6297 - we are starting a new cycle and _any_ insns require a stop bit.
6298 The reason for the latter is that if our schedule is accurate, then
6299 the additional stop won't decrease performance at this point (since
6300 there's a split issue at this point anyway), but it gives us more
6301 freedom when scheduling the currently ready insns. */
6302 if ((reorder_type == 0 && nr_need_stop)
6303 || (reorder_type == 1 && n_ready == nr_need_stop))
6304 {
6305 schedule_stop (sched_verbose ? dump : NULL);
6306 sched_data.last_was_stop = 1;
6307 maybe_rotate (sched_verbose ? dump : NULL);
6308 if (reorder_type == 1)
6309 return 0;
6310 }
6311 else
6312 {
6313 int deleted = 0;
6314 insnp = e_ready;
6315 /* Move down everything that needs a stop bit, preserving relative
6316 order. */
6317 while (insnp-- > ready + deleted)
6318 while (insnp >= ready + deleted)
6319 {
6320 rtx insn = *insnp;
6321 if (! safe_group_barrier_needed_p (insn))
6322 break;
6323 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6324 *ready = insn;
6325 deleted++;
6326 }
6327 n_ready -= deleted;
6328 ready += deleted;
6329 if (deleted != nr_need_stop)
6330 abort ();
6331 }
6332 }
6333
6334 return itanium_reorder (sched_verbose ? dump : NULL,
6335 ready, e_ready, reorder_type == 1);
6336 }
6337
6338 static int
6339 ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, clock_var)
6340 FILE *dump;
6341 int sched_verbose;
6342 rtx *ready;
6343 int *pn_ready;
6344 int clock_var;
6345 {
6346 return ia64_internal_sched_reorder (dump, sched_verbose, ready,
6347 pn_ready, 0, clock_var);
6348 }
6349
6350 /* Like ia64_sched_reorder, but called after issuing each insn.
6351 Override the default sort algorithm to better slot instructions. */
6352
6353 static int
6354 ia64_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var)
6355 FILE *dump ATTRIBUTE_UNUSED;
6356 int sched_verbose ATTRIBUTE_UNUSED;
6357 rtx *ready;
6358 int *pn_ready;
6359 int clock_var;
6360 {
6361 if (sched_data.last_was_stop)
6362 return 0;
6363
6364 /* Detect one special case and try to optimize it.
6365 If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs,
6366 then we can get better code by transforming this to 1.MFB;; 2.MIx. */
6367 if (sched_data.first_slot == 1
6368 && sched_data.stopbit[0]
6369 && ((sched_data.cur == 4
6370 && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A)
6371 && (sched_data.types[2] == TYPE_I || sched_data.types[2] == TYPE_A)
6372 && (sched_data.types[3] != TYPE_M && sched_data.types[3] != TYPE_A))
6373 || (sched_data.cur == 3
6374 && (sched_data.types[1] == TYPE_M
6375 || sched_data.types[1] == TYPE_A)
6376 && (sched_data.types[2] != TYPE_M
6377 && sched_data.types[2] != TYPE_I
6378 && sched_data.types[2] != TYPE_A))))
6379
6380 {
6381 int i, best;
6382 rtx stop = sched_data.insns[1];
6383
6384 /* Search backward for the stop bit that must be there. */
6385 while (1)
6386 {
6387 int insn_code;
6388
6389 stop = PREV_INSN (stop);
6390 if (GET_CODE (stop) != INSN)
6391 abort ();
6392 insn_code = recog_memoized (stop);
6393
6394 /* Ignore cycle displays and .pred.rel.mutex. */
6395 if (insn_code == CODE_FOR_cycle_display
6396 || insn_code == CODE_FOR_pred_rel_mutex)
6397 continue;
6398
6399 if (insn_code == CODE_FOR_insn_group_barrier)
6400 break;
6401 abort ();
6402 }
6403
6404 /* Adjust the stop bit's slot selector. */
6405 if (INTVAL (XVECEXP (PATTERN (stop), 0, 0)) != 1)
6406 abort ();
6407 XVECEXP (PATTERN (stop), 0, 0) = GEN_INT (3);
6408
6409 sched_data.stopbit[0] = 0;
6410 sched_data.stopbit[2] = 1;
6411
6412 sched_data.types[5] = sched_data.types[3];
6413 sched_data.types[4] = sched_data.types[2];
6414 sched_data.types[3] = sched_data.types[1];
6415 sched_data.insns[5] = sched_data.insns[3];
6416 sched_data.insns[4] = sched_data.insns[2];
6417 sched_data.insns[3] = sched_data.insns[1];
6418 sched_data.stopbit[5] = sched_data.stopbit[4] = sched_data.stopbit[3] = 0;
6419 sched_data.cur += 2;
6420 sched_data.first_slot = 3;
6421 for (i = 0; i < NR_PACKETS; i++)
6422 {
6423 const struct ia64_packet *p = packets + i;
6424 if (p->t[0] == TYPE_M && p->t[1] == TYPE_F && p->t[2] == TYPE_B)
6425 {
6426 sched_data.packet = p;
6427 break;
6428 }
6429 }
6430 rotate_one_bundle (sched_verbose ? dump : NULL);
6431
6432 best = 6;
6433 for (i = 0; i < NR_PACKETS; i++)
6434 {
6435 const struct ia64_packet *p = packets + i;
6436 int split = get_split (p, sched_data.first_slot);
6437 int next;
6438
6439 /* Disallow multiway branches here. */
6440 if (p->t[1] == TYPE_B)
6441 continue;
6442
6443 if (packet_matches_p (p, split, &next) && next < best)
6444 {
6445 best = next;
6446 sched_data.packet = p;
6447 sched_data.split = split;
6448 }
6449 }
6450 if (best == 6)
6451 abort ();
6452 }
6453
6454 if (*pn_ready > 0)
6455 {
6456 int more = ia64_internal_sched_reorder (dump, sched_verbose,
6457 ready, pn_ready, 1,
6458 clock_var);
6459 if (more)
6460 return more;
6461 /* Did we schedule a stop? If so, finish this cycle. */
6462 if (sched_data.cur == sched_data.first_slot)
6463 return 0;
6464 }
6465
6466 if (sched_verbose)
6467 fprintf (dump, "// Can't issue more this cycle; updating type array.\n");
6468
6469 cycle_end_fill_slots (sched_verbose ? dump : NULL);
6470 if (sched_verbose)
6471 dump_current_packet (dump);
6472 return 0;
6473 }
6474
6475 /* We are about to issue INSN. Return the number of insns left on the
6476 ready queue that can be issued this cycle. */
6477
6478 static int
6479 ia64_variable_issue (dump, sched_verbose, insn, can_issue_more)
6480 FILE *dump;
6481 int sched_verbose;
6482 rtx insn;
6483 int can_issue_more ATTRIBUTE_UNUSED;
6484 {
6485 enum attr_type t = ia64_safe_type (insn);
6486
6487 last_issued = insn;
6488
6489 if (sched_data.last_was_stop)
6490 {
6491 int t = sched_data.first_slot;
6492 if (t == 0)
6493 t = 3;
6494 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (t)), insn);
6495 init_insn_group_barriers ();
6496 sched_data.last_was_stop = 0;
6497 }
6498
6499 if (t == TYPE_UNKNOWN)
6500 {
6501 if (sched_verbose)
6502 fprintf (dump, "// Ignoring type %s\n", type_names[t]);
6503 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6504 || asm_noperands (PATTERN (insn)) >= 0)
6505 {
6506 /* This must be some kind of asm. Clear the scheduling state. */
6507 rotate_two_bundles (sched_verbose ? dump : NULL);
6508 if (ia64_final_schedule)
6509 group_barrier_needed_p (insn);
6510 }
6511 return 1;
6512 }
6513
6514 /* This is _not_ just a sanity check. group_barrier_needed_p will update
6515 important state info. Don't delete this test. */
6516 if (ia64_final_schedule
6517 && group_barrier_needed_p (insn))
6518 abort ();
6519
6520 sched_data.stopbit[sched_data.cur] = 0;
6521 sched_data.insns[sched_data.cur] = insn;
6522 sched_data.types[sched_data.cur] = t;
6523
6524 sched_data.cur++;
6525 if (sched_verbose)
6526 fprintf (dump, "// Scheduling insn %d of type %s\n",
6527 INSN_UID (insn), type_names[t]);
6528
6529 if (GET_CODE (insn) == CALL_INSN && ia64_final_schedule)
6530 {
6531 schedule_stop (sched_verbose ? dump : NULL);
6532 sched_data.last_was_stop = 1;
6533 }
6534
6535 return 1;
6536 }
6537
6538 /* Free data allocated by ia64_sched_init. */
6539
6540 static void
6541 ia64_sched_finish (dump, sched_verbose)
6542 FILE *dump;
6543 int sched_verbose;
6544 {
6545 if (sched_verbose)
6546 fprintf (dump, "// Finishing schedule.\n");
6547 rotate_two_bundles (NULL);
6548 free (sched_types);
6549 free (sched_ready);
6550 }
6551
6552 static rtx
6553 ia64_cycle_display (clock, last)
6554 int clock;
6555 rtx last;
6556 {
6557 if (ia64_final_schedule)
6558 return emit_insn_after (gen_cycle_display (GEN_INT (clock)), last);
6559 else
6560 return last;
6561 }
6562 \f
6563 /* Emit pseudo-ops for the assembler to describe predicate relations.
6564 At present this assumes that we only consider predicate pairs to
6565 be mutex, and that the assembler can deduce proper values from
6566 straight-line code. */
6567
6568 static void
6569 emit_predicate_relation_info ()
6570 {
6571 int i;
6572
6573 for (i = n_basic_blocks - 1; i >= 0; --i)
6574 {
6575 basic_block bb = BASIC_BLOCK (i);
6576 int r;
6577 rtx head = bb->head;
6578
6579 /* We only need such notes at code labels. */
6580 if (GET_CODE (head) != CODE_LABEL)
6581 continue;
6582 if (GET_CODE (NEXT_INSN (head)) == NOTE
6583 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
6584 head = NEXT_INSN (head);
6585
6586 for (r = PR_REG (0); r < PR_REG (64); r += 2)
6587 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
6588 {
6589 rtx p = gen_rtx_REG (BImode, r);
6590 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
6591 if (head == bb->end)
6592 bb->end = n;
6593 head = n;
6594 }
6595 }
6596
6597 /* Look for conditional calls that do not return, and protect predicate
6598 relations around them. Otherwise the assembler will assume the call
6599 returns, and complain about uses of call-clobbered predicates after
6600 the call. */
6601 for (i = n_basic_blocks - 1; i >= 0; --i)
6602 {
6603 basic_block bb = BASIC_BLOCK (i);
6604 rtx insn = bb->head;
6605
6606 while (1)
6607 {
6608 if (GET_CODE (insn) == CALL_INSN
6609 && GET_CODE (PATTERN (insn)) == COND_EXEC
6610 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
6611 {
6612 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
6613 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
6614 if (bb->head == insn)
6615 bb->head = b;
6616 if (bb->end == insn)
6617 bb->end = a;
6618 }
6619
6620 if (insn == bb->end)
6621 break;
6622 insn = NEXT_INSN (insn);
6623 }
6624 }
6625 }
6626
6627 /* Generate a NOP instruction of type T. We will never generate L type
6628 nops. */
6629
6630 static rtx
6631 gen_nop_type (t)
6632 enum attr_type t;
6633 {
6634 switch (t)
6635 {
6636 case TYPE_M:
6637 return gen_nop_m ();
6638 case TYPE_I:
6639 return gen_nop_i ();
6640 case TYPE_B:
6641 return gen_nop_b ();
6642 case TYPE_F:
6643 return gen_nop_f ();
6644 case TYPE_X:
6645 return gen_nop_x ();
6646 default:
6647 abort ();
6648 }
6649 }
6650
6651 /* After the last scheduling pass, fill in NOPs. It's easier to do this
6652 here than while scheduling. */
6653
6654 static void
6655 ia64_emit_nops ()
6656 {
6657 rtx insn;
6658 const struct bundle *b = 0;
6659 int bundle_pos = 0;
6660
6661 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6662 {
6663 rtx pat;
6664 enum attr_type t;
6665 pat = INSN_P (insn) ? PATTERN (insn) : const0_rtx;
6666 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
6667 continue;
6668 if ((GET_CODE (pat) == UNSPEC && XINT (pat, 1) == 22)
6669 || GET_CODE (insn) == CODE_LABEL)
6670 {
6671 if (b)
6672 while (bundle_pos < 3)
6673 {
6674 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6675 bundle_pos++;
6676 }
6677 if (GET_CODE (insn) != CODE_LABEL)
6678 b = bundle + INTVAL (XVECEXP (pat, 0, 0));
6679 else
6680 b = 0;
6681 bundle_pos = 0;
6682 continue;
6683 }
6684 else if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == 2)
6685 {
6686 int t = INTVAL (XVECEXP (pat, 0, 0));
6687 if (b)
6688 while (bundle_pos < t)
6689 {
6690 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6691 bundle_pos++;
6692 }
6693 continue;
6694 }
6695
6696 if (bundle_pos == 3)
6697 b = 0;
6698
6699 if (b && INSN_P (insn))
6700 {
6701 t = ia64_safe_type (insn);
6702 if (asm_noperands (PATTERN (insn)) >= 0
6703 || GET_CODE (PATTERN (insn)) == ASM_INPUT)
6704 {
6705 while (bundle_pos < 3)
6706 {
6707 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6708 bundle_pos++;
6709 }
6710 continue;
6711 }
6712
6713 if (t == TYPE_UNKNOWN)
6714 continue;
6715 while (bundle_pos < 3)
6716 {
6717 if (t == b->t[bundle_pos]
6718 || (t == TYPE_A && (b->t[bundle_pos] == TYPE_M
6719 || b->t[bundle_pos] == TYPE_I)))
6720 break;
6721
6722 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6723 bundle_pos++;
6724 }
6725 if (bundle_pos < 3)
6726 bundle_pos++;
6727 }
6728 }
6729 }
6730
6731 /* Perform machine dependent operations on the rtl chain INSNS. */
6732
6733 void
6734 ia64_reorg (insns)
6735 rtx insns;
6736 {
6737 /* If optimizing, we'll have split before scheduling. */
6738 if (optimize == 0)
6739 split_all_insns_noflow ();
6740
6741 /* Make sure the CFG and global_live_at_start are correct
6742 for emit_predicate_relation_info. */
6743 find_basic_blocks (insns, max_reg_num (), NULL);
6744 life_analysis (insns, NULL, PROP_DEATH_NOTES);
6745
6746 if (ia64_flag_schedule_insns2)
6747 {
6748 timevar_push (TV_SCHED2);
6749 ia64_final_schedule = 1;
6750 schedule_ebbs (rtl_dump_file);
6751 ia64_final_schedule = 0;
6752 timevar_pop (TV_SCHED2);
6753
6754 /* This relies on the NOTE_INSN_BASIC_BLOCK notes to be in the same
6755 place as they were during scheduling. */
6756 emit_insn_group_barriers (rtl_dump_file, insns);
6757 ia64_emit_nops ();
6758 }
6759 else
6760 emit_all_insn_group_barriers (rtl_dump_file, insns);
6761
6762 /* A call must not be the last instruction in a function, so that the
6763 return address is still within the function, so that unwinding works
6764 properly. Note that IA-64 differs from dwarf2 on this point. */
6765 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
6766 {
6767 rtx insn;
6768 int saw_stop = 0;
6769
6770 insn = get_last_insn ();
6771 if (! INSN_P (insn))
6772 insn = prev_active_insn (insn);
6773 if (GET_CODE (insn) == INSN
6774 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
6775 && XINT (PATTERN (insn), 1) == 2)
6776 {
6777 saw_stop = 1;
6778 insn = prev_active_insn (insn);
6779 }
6780 if (GET_CODE (insn) == CALL_INSN)
6781 {
6782 if (! saw_stop)
6783 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6784 emit_insn (gen_break_f ());
6785 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6786 }
6787 }
6788
6789 fixup_errata ();
6790 emit_predicate_relation_info ();
6791 }
6792 \f
6793 /* Return true if REGNO is used by the epilogue. */
6794
6795 int
6796 ia64_epilogue_uses (regno)
6797 int regno;
6798 {
6799 switch (regno)
6800 {
6801 case R_GR (1):
6802 /* When a function makes a call through a function descriptor, we
6803 will write a (potentially) new value to "gp". After returning
6804 from such a call, we need to make sure the function restores the
6805 original gp-value, even if the function itself does not use the
6806 gp anymore. */
6807 return (TARGET_CONST_GP && !(TARGET_AUTO_PIC || TARGET_NO_PIC));
6808
6809 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
6810 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
6811 /* For functions defined with the syscall_linkage attribute, all
6812 input registers are marked as live at all function exits. This
6813 prevents the register allocator from using the input registers,
6814 which in turn makes it possible to restart a system call after
6815 an interrupt without having to save/restore the input registers.
6816 This also prevents kernel data from leaking to application code. */
6817 return lookup_attribute ("syscall_linkage",
6818 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
6819
6820 case R_BR (0):
6821 /* Conditional return patterns can't represent the use of `b0' as
6822 the return address, so we force the value live this way. */
6823 return 1;
6824
6825 case AR_PFS_REGNUM:
6826 /* Likewise for ar.pfs, which is used by br.ret. */
6827 return 1;
6828
6829 default:
6830 return 0;
6831 }
6832 }
6833
6834 /* Table of valid machine attributes. */
6835 const struct attribute_spec ia64_attribute_table[] =
6836 {
6837 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
6838 { "syscall_linkage", 0, 0, false, true, true, NULL },
6839 { NULL, 0, 0, false, false, false, NULL }
6840 };
6841 \f
6842 /* For ia64, SYMBOL_REF_FLAG set means that it is a function.
6843
6844 We add @ to the name if this goes in small data/bss. We can only put
6845 a variable in small data/bss if it is defined in this module or a module
6846 that we are statically linked with. We can't check the second condition,
6847 but TREE_STATIC gives us the first one. */
6848
6849 /* ??? If we had IPA, we could check the second condition. We could support
6850 programmer added section attributes if the variable is not defined in this
6851 module. */
6852
6853 /* ??? See the v850 port for a cleaner way to do this. */
6854
6855 /* ??? We could also support own long data here. Generating movl/add/ld8
6856 instead of addl,ld8/ld8. This makes the code bigger, but should make the
6857 code faster because there is one less load. This also includes incomplete
6858 types which can't go in sdata/sbss. */
6859
6860 /* ??? See select_section. We must put short own readonly variables in
6861 sdata/sbss instead of the more natural rodata, because we can't perform
6862 the DECL_READONLY_SECTION test here. */
6863
6864 extern struct obstack * saveable_obstack;
6865
6866 void
6867 ia64_encode_section_info (decl)
6868 tree decl;
6869 {
6870 const char *symbol_str;
6871
6872 if (TREE_CODE (decl) == FUNCTION_DECL)
6873 {
6874 SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl), 0)) = 1;
6875 return;
6876 }
6877
6878 /* Careful not to prod global register variables. */
6879 if (TREE_CODE (decl) != VAR_DECL
6880 || GET_CODE (DECL_RTL (decl)) != MEM
6881 || GET_CODE (XEXP (DECL_RTL (decl), 0)) != SYMBOL_REF)
6882 return;
6883
6884 symbol_str = XSTR (XEXP (DECL_RTL (decl), 0), 0);
6885
6886 /* We assume that -fpic is used only to create a shared library (dso).
6887 With -fpic, no global data can ever be sdata.
6888 Without -fpic, global common uninitialized data can never be sdata, since
6889 it can unify with a real definition in a dso. */
6890 /* ??? Actually, we can put globals in sdata, as long as we don't use gprel
6891 to access them. The linker may then be able to do linker relaxation to
6892 optimize references to them. Currently sdata implies use of gprel. */
6893 /* We need the DECL_EXTERNAL check for C++. static class data members get
6894 both TREE_STATIC and DECL_EXTERNAL set, to indicate that they are
6895 statically allocated, but the space is allocated somewhere else. Such
6896 decls can not be own data. */
6897 if (! TARGET_NO_SDATA
6898 && TREE_STATIC (decl) && ! DECL_EXTERNAL (decl)
6899 && ! (DECL_ONE_ONLY (decl) || DECL_WEAK (decl))
6900 && ! (TREE_PUBLIC (decl)
6901 && (flag_pic
6902 || (DECL_COMMON (decl)
6903 && (DECL_INITIAL (decl) == 0
6904 || DECL_INITIAL (decl) == error_mark_node))))
6905 /* Either the variable must be declared without a section attribute,
6906 or the section must be sdata or sbss. */
6907 && (DECL_SECTION_NAME (decl) == 0
6908 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
6909 ".sdata")
6910 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
6911 ".sbss")))
6912 {
6913 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
6914
6915 /* If the variable has already been defined in the output file, then it
6916 is too late to put it in sdata if it wasn't put there in the first
6917 place. The test is here rather than above, because if it is already
6918 in sdata, then it can stay there. */
6919
6920 if (TREE_ASM_WRITTEN (decl))
6921 ;
6922
6923 /* If this is an incomplete type with size 0, then we can't put it in
6924 sdata because it might be too big when completed. */
6925 else if (size > 0
6926 && size <= (HOST_WIDE_INT) ia64_section_threshold
6927 && symbol_str[0] != SDATA_NAME_FLAG_CHAR)
6928 {
6929 size_t len = strlen (symbol_str);
6930 char *newstr = alloca (len + 1);
6931 const char *string;
6932
6933 *newstr = SDATA_NAME_FLAG_CHAR;
6934 memcpy (newstr + 1, symbol_str, len + 1);
6935
6936 string = ggc_alloc_string (newstr, len + 1);
6937 XSTR (XEXP (DECL_RTL (decl), 0), 0) = string;
6938 }
6939 }
6940 /* This decl is marked as being in small data/bss but it shouldn't
6941 be; one likely explanation for this is that the decl has been
6942 moved into a different section from the one it was in when
6943 ENCODE_SECTION_INFO was first called. Remove the '@'.*/
6944 else if (symbol_str[0] == SDATA_NAME_FLAG_CHAR)
6945 {
6946 XSTR (XEXP (DECL_RTL (decl), 0), 0)
6947 = ggc_strdup (symbol_str + 1);
6948 }
6949 }
6950 \f
6951 /* Output assembly directives for prologue regions. */
6952
6953 /* The current basic block number. */
6954
6955 static int block_num;
6956
6957 /* True if we need a copy_state command at the start of the next block. */
6958
6959 static int need_copy_state;
6960
6961 /* The function emits unwind directives for the start of an epilogue. */
6962
6963 static void
6964 process_epilogue ()
6965 {
6966 /* If this isn't the last block of the function, then we need to label the
6967 current state, and copy it back in at the start of the next block. */
6968
6969 if (block_num != n_basic_blocks - 1)
6970 {
6971 fprintf (asm_out_file, "\t.label_state 1\n");
6972 need_copy_state = 1;
6973 }
6974
6975 fprintf (asm_out_file, "\t.restore sp\n");
6976 }
6977
6978 /* This function processes a SET pattern looking for specific patterns
6979 which result in emitting an assembly directive required for unwinding. */
6980
6981 static int
6982 process_set (asm_out_file, pat)
6983 FILE *asm_out_file;
6984 rtx pat;
6985 {
6986 rtx src = SET_SRC (pat);
6987 rtx dest = SET_DEST (pat);
6988 int src_regno, dest_regno;
6989
6990 /* Look for the ALLOC insn. */
6991 if (GET_CODE (src) == UNSPEC_VOLATILE
6992 && XINT (src, 1) == 0
6993 && GET_CODE (dest) == REG)
6994 {
6995 dest_regno = REGNO (dest);
6996
6997 /* If this isn't the final destination for ar.pfs, the alloc
6998 shouldn't have been marked frame related. */
6999 if (dest_regno != current_frame_info.reg_save_ar_pfs)
7000 abort ();
7001
7002 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
7003 ia64_dbx_register_number (dest_regno));
7004 return 1;
7005 }
7006
7007 /* Look for SP = .... */
7008 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
7009 {
7010 if (GET_CODE (src) == PLUS)
7011 {
7012 rtx op0 = XEXP (src, 0);
7013 rtx op1 = XEXP (src, 1);
7014 if (op0 == dest && GET_CODE (op1) == CONST_INT)
7015 {
7016 if (INTVAL (op1) < 0)
7017 {
7018 fputs ("\t.fframe ", asm_out_file);
7019 fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC,
7020 -INTVAL (op1));
7021 fputc ('\n', asm_out_file);
7022 }
7023 else
7024 process_epilogue ();
7025 }
7026 else
7027 abort ();
7028 }
7029 else if (GET_CODE (src) == REG
7030 && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
7031 process_epilogue ();
7032 else
7033 abort ();
7034
7035 return 1;
7036 }
7037
7038 /* Register move we need to look at. */
7039 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
7040 {
7041 src_regno = REGNO (src);
7042 dest_regno = REGNO (dest);
7043
7044 switch (src_regno)
7045 {
7046 case BR_REG (0):
7047 /* Saving return address pointer. */
7048 if (dest_regno != current_frame_info.reg_save_b0)
7049 abort ();
7050 fprintf (asm_out_file, "\t.save rp, r%d\n",
7051 ia64_dbx_register_number (dest_regno));
7052 return 1;
7053
7054 case PR_REG (0):
7055 if (dest_regno != current_frame_info.reg_save_pr)
7056 abort ();
7057 fprintf (asm_out_file, "\t.save pr, r%d\n",
7058 ia64_dbx_register_number (dest_regno));
7059 return 1;
7060
7061 case AR_UNAT_REGNUM:
7062 if (dest_regno != current_frame_info.reg_save_ar_unat)
7063 abort ();
7064 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
7065 ia64_dbx_register_number (dest_regno));
7066 return 1;
7067
7068 case AR_LC_REGNUM:
7069 if (dest_regno != current_frame_info.reg_save_ar_lc)
7070 abort ();
7071 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
7072 ia64_dbx_register_number (dest_regno));
7073 return 1;
7074
7075 case STACK_POINTER_REGNUM:
7076 if (dest_regno != HARD_FRAME_POINTER_REGNUM
7077 || ! frame_pointer_needed)
7078 abort ();
7079 fprintf (asm_out_file, "\t.vframe r%d\n",
7080 ia64_dbx_register_number (dest_regno));
7081 return 1;
7082
7083 default:
7084 /* Everything else should indicate being stored to memory. */
7085 abort ();
7086 }
7087 }
7088
7089 /* Memory store we need to look at. */
7090 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
7091 {
7092 long off;
7093 rtx base;
7094 const char *saveop;
7095
7096 if (GET_CODE (XEXP (dest, 0)) == REG)
7097 {
7098 base = XEXP (dest, 0);
7099 off = 0;
7100 }
7101 else if (GET_CODE (XEXP (dest, 0)) == PLUS
7102 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
7103 {
7104 base = XEXP (XEXP (dest, 0), 0);
7105 off = INTVAL (XEXP (XEXP (dest, 0), 1));
7106 }
7107 else
7108 abort ();
7109
7110 if (base == hard_frame_pointer_rtx)
7111 {
7112 saveop = ".savepsp";
7113 off = - off;
7114 }
7115 else if (base == stack_pointer_rtx)
7116 saveop = ".savesp";
7117 else
7118 abort ();
7119
7120 src_regno = REGNO (src);
7121 switch (src_regno)
7122 {
7123 case BR_REG (0):
7124 if (current_frame_info.reg_save_b0 != 0)
7125 abort ();
7126 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
7127 return 1;
7128
7129 case PR_REG (0):
7130 if (current_frame_info.reg_save_pr != 0)
7131 abort ();
7132 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
7133 return 1;
7134
7135 case AR_LC_REGNUM:
7136 if (current_frame_info.reg_save_ar_lc != 0)
7137 abort ();
7138 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
7139 return 1;
7140
7141 case AR_PFS_REGNUM:
7142 if (current_frame_info.reg_save_ar_pfs != 0)
7143 abort ();
7144 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
7145 return 1;
7146
7147 case AR_UNAT_REGNUM:
7148 if (current_frame_info.reg_save_ar_unat != 0)
7149 abort ();
7150 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
7151 return 1;
7152
7153 case GR_REG (4):
7154 case GR_REG (5):
7155 case GR_REG (6):
7156 case GR_REG (7):
7157 fprintf (asm_out_file, "\t.save.g 0x%x\n",
7158 1 << (src_regno - GR_REG (4)));
7159 return 1;
7160
7161 case BR_REG (1):
7162 case BR_REG (2):
7163 case BR_REG (3):
7164 case BR_REG (4):
7165 case BR_REG (5):
7166 fprintf (asm_out_file, "\t.save.b 0x%x\n",
7167 1 << (src_regno - BR_REG (1)));
7168 return 1;
7169
7170 case FR_REG (2):
7171 case FR_REG (3):
7172 case FR_REG (4):
7173 case FR_REG (5):
7174 fprintf (asm_out_file, "\t.save.f 0x%x\n",
7175 1 << (src_regno - FR_REG (2)));
7176 return 1;
7177
7178 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7179 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7180 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7181 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7182 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
7183 1 << (src_regno - FR_REG (12)));
7184 return 1;
7185
7186 default:
7187 return 0;
7188 }
7189 }
7190
7191 return 0;
7192 }
7193
7194
7195 /* This function looks at a single insn and emits any directives
7196 required to unwind this insn. */
7197 void
7198 process_for_unwind_directive (asm_out_file, insn)
7199 FILE *asm_out_file;
7200 rtx insn;
7201 {
7202 if (flag_unwind_tables
7203 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7204 {
7205 rtx pat;
7206
7207 if (GET_CODE (insn) == NOTE
7208 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
7209 {
7210 block_num = NOTE_BASIC_BLOCK (insn)->index;
7211
7212 /* Restore unwind state from immediately before the epilogue. */
7213 if (need_copy_state)
7214 {
7215 fprintf (asm_out_file, "\t.body\n");
7216 fprintf (asm_out_file, "\t.copy_state 1\n");
7217 need_copy_state = 0;
7218 }
7219 }
7220
7221 if (! RTX_FRAME_RELATED_P (insn))
7222 return;
7223
7224 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
7225 if (pat)
7226 pat = XEXP (pat, 0);
7227 else
7228 pat = PATTERN (insn);
7229
7230 switch (GET_CODE (pat))
7231 {
7232 case SET:
7233 process_set (asm_out_file, pat);
7234 break;
7235
7236 case PARALLEL:
7237 {
7238 int par_index;
7239 int limit = XVECLEN (pat, 0);
7240 for (par_index = 0; par_index < limit; par_index++)
7241 {
7242 rtx x = XVECEXP (pat, 0, par_index);
7243 if (GET_CODE (x) == SET)
7244 process_set (asm_out_file, x);
7245 }
7246 break;
7247 }
7248
7249 default:
7250 abort ();
7251 }
7252 }
7253 }
7254
7255 \f
7256 void
7257 ia64_init_builtins ()
7258 {
7259 tree psi_type_node = build_pointer_type (integer_type_node);
7260 tree pdi_type_node = build_pointer_type (long_integer_type_node);
7261 tree endlink = void_list_node;
7262
7263 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
7264 tree si_ftype_psi_si_si
7265 = build_function_type (integer_type_node,
7266 tree_cons (NULL_TREE, psi_type_node,
7267 tree_cons (NULL_TREE, integer_type_node,
7268 tree_cons (NULL_TREE,
7269 integer_type_node,
7270 endlink))));
7271
7272 /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */
7273 tree di_ftype_pdi_di_di
7274 = build_function_type (long_integer_type_node,
7275 tree_cons (NULL_TREE, pdi_type_node,
7276 tree_cons (NULL_TREE,
7277 long_integer_type_node,
7278 tree_cons (NULL_TREE,
7279 long_integer_type_node,
7280 endlink))));
7281 /* __sync_synchronize */
7282 tree void_ftype_void
7283 = build_function_type (void_type_node, endlink);
7284
7285 /* __sync_lock_test_and_set_si */
7286 tree si_ftype_psi_si
7287 = build_function_type (integer_type_node,
7288 tree_cons (NULL_TREE, psi_type_node,
7289 tree_cons (NULL_TREE, integer_type_node, endlink)));
7290
7291 /* __sync_lock_test_and_set_di */
7292 tree di_ftype_pdi_di
7293 = build_function_type (long_integer_type_node,
7294 tree_cons (NULL_TREE, pdi_type_node,
7295 tree_cons (NULL_TREE, long_integer_type_node,
7296 endlink)));
7297
7298 /* __sync_lock_release_si */
7299 tree void_ftype_psi
7300 = build_function_type (void_type_node, tree_cons (NULL_TREE, psi_type_node,
7301 endlink));
7302
7303 /* __sync_lock_release_di */
7304 tree void_ftype_pdi
7305 = build_function_type (void_type_node, tree_cons (NULL_TREE, pdi_type_node,
7306 endlink));
7307
7308 #define def_builtin(name, type, code) \
7309 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL)
7310
7311 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
7312 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
7313 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
7314 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
7315 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
7316 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
7317 def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di,
7318 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
7319
7320 def_builtin ("__sync_synchronize", void_ftype_void,
7321 IA64_BUILTIN_SYNCHRONIZE);
7322
7323 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
7324 IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
7325 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
7326 IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
7327 def_builtin ("__sync_lock_release_si", void_ftype_psi,
7328 IA64_BUILTIN_LOCK_RELEASE_SI);
7329 def_builtin ("__sync_lock_release_di", void_ftype_pdi,
7330 IA64_BUILTIN_LOCK_RELEASE_DI);
7331
7332 def_builtin ("__builtin_ia64_bsp",
7333 build_function_type (ptr_type_node, endlink),
7334 IA64_BUILTIN_BSP);
7335
7336 def_builtin ("__builtin_ia64_flushrs",
7337 build_function_type (void_type_node, endlink),
7338 IA64_BUILTIN_FLUSHRS);
7339
7340 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
7341 IA64_BUILTIN_FETCH_AND_ADD_SI);
7342 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
7343 IA64_BUILTIN_FETCH_AND_SUB_SI);
7344 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
7345 IA64_BUILTIN_FETCH_AND_OR_SI);
7346 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
7347 IA64_BUILTIN_FETCH_AND_AND_SI);
7348 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
7349 IA64_BUILTIN_FETCH_AND_XOR_SI);
7350 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
7351 IA64_BUILTIN_FETCH_AND_NAND_SI);
7352
7353 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
7354 IA64_BUILTIN_ADD_AND_FETCH_SI);
7355 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
7356 IA64_BUILTIN_SUB_AND_FETCH_SI);
7357 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
7358 IA64_BUILTIN_OR_AND_FETCH_SI);
7359 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
7360 IA64_BUILTIN_AND_AND_FETCH_SI);
7361 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
7362 IA64_BUILTIN_XOR_AND_FETCH_SI);
7363 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
7364 IA64_BUILTIN_NAND_AND_FETCH_SI);
7365
7366 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
7367 IA64_BUILTIN_FETCH_AND_ADD_DI);
7368 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
7369 IA64_BUILTIN_FETCH_AND_SUB_DI);
7370 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
7371 IA64_BUILTIN_FETCH_AND_OR_DI);
7372 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
7373 IA64_BUILTIN_FETCH_AND_AND_DI);
7374 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
7375 IA64_BUILTIN_FETCH_AND_XOR_DI);
7376 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
7377 IA64_BUILTIN_FETCH_AND_NAND_DI);
7378
7379 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
7380 IA64_BUILTIN_ADD_AND_FETCH_DI);
7381 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
7382 IA64_BUILTIN_SUB_AND_FETCH_DI);
7383 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
7384 IA64_BUILTIN_OR_AND_FETCH_DI);
7385 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
7386 IA64_BUILTIN_AND_AND_FETCH_DI);
7387 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
7388 IA64_BUILTIN_XOR_AND_FETCH_DI);
7389 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
7390 IA64_BUILTIN_NAND_AND_FETCH_DI);
7391
7392 #undef def_builtin
7393 }
7394
7395 /* Expand fetch_and_op intrinsics. The basic code sequence is:
7396
7397 mf
7398 tmp = [ptr];
7399 do {
7400 ret = tmp;
7401 ar.ccv = tmp;
7402 tmp <op>= value;
7403 cmpxchgsz.acq tmp = [ptr], tmp
7404 } while (tmp != ret)
7405 */
7406
7407 static rtx
7408 ia64_expand_fetch_and_op (binoptab, mode, arglist, target)
7409 optab binoptab;
7410 enum machine_mode mode;
7411 tree arglist;
7412 rtx target;
7413 {
7414 rtx ret, label, tmp, ccv, insn, mem, value;
7415 tree arg0, arg1;
7416
7417 arg0 = TREE_VALUE (arglist);
7418 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7419 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7420 value = expand_expr (arg1, NULL_RTX, mode, 0);
7421
7422 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7423 MEM_VOLATILE_P (mem) = 1;
7424
7425 if (target && register_operand (target, mode))
7426 ret = target;
7427 else
7428 ret = gen_reg_rtx (mode);
7429
7430 emit_insn (gen_mf ());
7431
7432 /* Special case for fetchadd instructions. */
7433 if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
7434 {
7435 if (mode == SImode)
7436 insn = gen_fetchadd_acq_si (ret, mem, value);
7437 else
7438 insn = gen_fetchadd_acq_di (ret, mem, value);
7439 emit_insn (insn);
7440 return ret;
7441 }
7442
7443 tmp = gen_reg_rtx (mode);
7444 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7445 emit_move_insn (tmp, mem);
7446
7447 label = gen_label_rtx ();
7448 emit_label (label);
7449 emit_move_insn (ret, tmp);
7450 emit_move_insn (ccv, tmp);
7451
7452 /* Perform the specific operation. Special case NAND by noticing
7453 one_cmpl_optab instead. */
7454 if (binoptab == one_cmpl_optab)
7455 {
7456 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7457 binoptab = and_optab;
7458 }
7459 tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
7460
7461 if (mode == SImode)
7462 insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
7463 else
7464 insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
7465 emit_insn (insn);
7466
7467 emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, label);
7468
7469 return ret;
7470 }
7471
7472 /* Expand op_and_fetch intrinsics. The basic code sequence is:
7473
7474 mf
7475 tmp = [ptr];
7476 do {
7477 old = tmp;
7478 ar.ccv = tmp;
7479 ret = tmp + value;
7480 cmpxchgsz.acq tmp = [ptr], ret
7481 } while (tmp != old)
7482 */
7483
7484 static rtx
7485 ia64_expand_op_and_fetch (binoptab, mode, arglist, target)
7486 optab binoptab;
7487 enum machine_mode mode;
7488 tree arglist;
7489 rtx target;
7490 {
7491 rtx old, label, tmp, ret, ccv, insn, mem, value;
7492 tree arg0, arg1;
7493
7494 arg0 = TREE_VALUE (arglist);
7495 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7496 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7497 value = expand_expr (arg1, NULL_RTX, mode, 0);
7498
7499 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7500 MEM_VOLATILE_P (mem) = 1;
7501
7502 if (target && ! register_operand (target, mode))
7503 target = NULL_RTX;
7504
7505 emit_insn (gen_mf ());
7506 tmp = gen_reg_rtx (mode);
7507 old = gen_reg_rtx (mode);
7508 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7509
7510 emit_move_insn (tmp, mem);
7511
7512 label = gen_label_rtx ();
7513 emit_label (label);
7514 emit_move_insn (old, tmp);
7515 emit_move_insn (ccv, tmp);
7516
7517 /* Perform the specific operation. Special case NAND by noticing
7518 one_cmpl_optab instead. */
7519 if (binoptab == one_cmpl_optab)
7520 {
7521 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7522 binoptab = and_optab;
7523 }
7524 ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
7525
7526 if (mode == SImode)
7527 insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
7528 else
7529 insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
7530 emit_insn (insn);
7531
7532 emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, label);
7533
7534 return ret;
7535 }
7536
7537 /* Expand val_ and bool_compare_and_swap. For val_ we want:
7538
7539 ar.ccv = oldval
7540 mf
7541 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
7542 return ret
7543
7544 For bool_ it's the same except return ret == oldval.
7545 */
7546
7547 static rtx
7548 ia64_expand_compare_and_swap (mode, boolp, arglist, target)
7549 enum machine_mode mode;
7550 int boolp;
7551 tree arglist;
7552 rtx target;
7553 {
7554 tree arg0, arg1, arg2;
7555 rtx mem, old, new, ccv, tmp, insn;
7556
7557 arg0 = TREE_VALUE (arglist);
7558 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7559 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
7560 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7561 old = expand_expr (arg1, NULL_RTX, mode, 0);
7562 new = expand_expr (arg2, NULL_RTX, mode, 0);
7563
7564 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7565 MEM_VOLATILE_P (mem) = 1;
7566
7567 if (! register_operand (old, mode))
7568 old = copy_to_mode_reg (mode, old);
7569 if (! register_operand (new, mode))
7570 new = copy_to_mode_reg (mode, new);
7571
7572 if (! boolp && target && register_operand (target, mode))
7573 tmp = target;
7574 else
7575 tmp = gen_reg_rtx (mode);
7576
7577 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7578 emit_move_insn (ccv, old);
7579 emit_insn (gen_mf ());
7580 if (mode == SImode)
7581 insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
7582 else
7583 insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
7584 emit_insn (insn);
7585
7586 if (boolp)
7587 {
7588 if (! target)
7589 target = gen_reg_rtx (mode);
7590 return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
7591 }
7592 else
7593 return tmp;
7594 }
7595
7596 /* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
7597
7598 static rtx
7599 ia64_expand_lock_test_and_set (mode, arglist, target)
7600 enum machine_mode mode;
7601 tree arglist;
7602 rtx target;
7603 {
7604 tree arg0, arg1;
7605 rtx mem, new, ret, insn;
7606
7607 arg0 = TREE_VALUE (arglist);
7608 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7609 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7610 new = expand_expr (arg1, NULL_RTX, mode, 0);
7611
7612 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7613 MEM_VOLATILE_P (mem) = 1;
7614 if (! register_operand (new, mode))
7615 new = copy_to_mode_reg (mode, new);
7616
7617 if (target && register_operand (target, mode))
7618 ret = target;
7619 else
7620 ret = gen_reg_rtx (mode);
7621
7622 if (mode == SImode)
7623 insn = gen_xchgsi (ret, mem, new);
7624 else
7625 insn = gen_xchgdi (ret, mem, new);
7626 emit_insn (insn);
7627
7628 return ret;
7629 }
7630
7631 /* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
7632
7633 static rtx
7634 ia64_expand_lock_release (mode, arglist, target)
7635 enum machine_mode mode;
7636 tree arglist;
7637 rtx target ATTRIBUTE_UNUSED;
7638 {
7639 tree arg0;
7640 rtx mem;
7641
7642 arg0 = TREE_VALUE (arglist);
7643 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7644
7645 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7646 MEM_VOLATILE_P (mem) = 1;
7647
7648 emit_move_insn (mem, const0_rtx);
7649
7650 return const0_rtx;
7651 }
7652
7653 rtx
7654 ia64_expand_builtin (exp, target, subtarget, mode, ignore)
7655 tree exp;
7656 rtx target;
7657 rtx subtarget ATTRIBUTE_UNUSED;
7658 enum machine_mode mode ATTRIBUTE_UNUSED;
7659 int ignore ATTRIBUTE_UNUSED;
7660 {
7661 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
7662 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
7663 tree arglist = TREE_OPERAND (exp, 1);
7664
7665 switch (fcode)
7666 {
7667 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
7668 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
7669 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
7670 case IA64_BUILTIN_LOCK_RELEASE_SI:
7671 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7672 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7673 case IA64_BUILTIN_FETCH_AND_OR_SI:
7674 case IA64_BUILTIN_FETCH_AND_AND_SI:
7675 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7676 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7677 case IA64_BUILTIN_ADD_AND_FETCH_SI:
7678 case IA64_BUILTIN_SUB_AND_FETCH_SI:
7679 case IA64_BUILTIN_OR_AND_FETCH_SI:
7680 case IA64_BUILTIN_AND_AND_FETCH_SI:
7681 case IA64_BUILTIN_XOR_AND_FETCH_SI:
7682 case IA64_BUILTIN_NAND_AND_FETCH_SI:
7683 mode = SImode;
7684 break;
7685
7686 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
7687 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
7688 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
7689 case IA64_BUILTIN_LOCK_RELEASE_DI:
7690 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7691 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7692 case IA64_BUILTIN_FETCH_AND_OR_DI:
7693 case IA64_BUILTIN_FETCH_AND_AND_DI:
7694 case IA64_BUILTIN_FETCH_AND_XOR_DI:
7695 case IA64_BUILTIN_FETCH_AND_NAND_DI:
7696 case IA64_BUILTIN_ADD_AND_FETCH_DI:
7697 case IA64_BUILTIN_SUB_AND_FETCH_DI:
7698 case IA64_BUILTIN_OR_AND_FETCH_DI:
7699 case IA64_BUILTIN_AND_AND_FETCH_DI:
7700 case IA64_BUILTIN_XOR_AND_FETCH_DI:
7701 case IA64_BUILTIN_NAND_AND_FETCH_DI:
7702 mode = DImode;
7703 break;
7704
7705 default:
7706 break;
7707 }
7708
7709 switch (fcode)
7710 {
7711 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
7712 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
7713 return ia64_expand_compare_and_swap (mode, 1, arglist, target);
7714
7715 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
7716 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
7717 return ia64_expand_compare_and_swap (mode, 0, arglist, target);
7718
7719 case IA64_BUILTIN_SYNCHRONIZE:
7720 emit_insn (gen_mf ());
7721 return const0_rtx;
7722
7723 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
7724 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
7725 return ia64_expand_lock_test_and_set (mode, arglist, target);
7726
7727 case IA64_BUILTIN_LOCK_RELEASE_SI:
7728 case IA64_BUILTIN_LOCK_RELEASE_DI:
7729 return ia64_expand_lock_release (mode, arglist, target);
7730
7731 case IA64_BUILTIN_BSP:
7732 if (! target || ! register_operand (target, DImode))
7733 target = gen_reg_rtx (DImode);
7734 emit_insn (gen_bsp_value (target));
7735 return target;
7736
7737 case IA64_BUILTIN_FLUSHRS:
7738 emit_insn (gen_flushrs ());
7739 return const0_rtx;
7740
7741 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7742 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7743 return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
7744
7745 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7746 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7747 return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
7748
7749 case IA64_BUILTIN_FETCH_AND_OR_SI:
7750 case IA64_BUILTIN_FETCH_AND_OR_DI:
7751 return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
7752
7753 case IA64_BUILTIN_FETCH_AND_AND_SI:
7754 case IA64_BUILTIN_FETCH_AND_AND_DI:
7755 return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
7756
7757 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7758 case IA64_BUILTIN_FETCH_AND_XOR_DI:
7759 return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
7760
7761 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7762 case IA64_BUILTIN_FETCH_AND_NAND_DI:
7763 return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
7764
7765 case IA64_BUILTIN_ADD_AND_FETCH_SI:
7766 case IA64_BUILTIN_ADD_AND_FETCH_DI:
7767 return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
7768
7769 case IA64_BUILTIN_SUB_AND_FETCH_SI:
7770 case IA64_BUILTIN_SUB_AND_FETCH_DI:
7771 return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
7772
7773 case IA64_BUILTIN_OR_AND_FETCH_SI:
7774 case IA64_BUILTIN_OR_AND_FETCH_DI:
7775 return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
7776
7777 case IA64_BUILTIN_AND_AND_FETCH_SI:
7778 case IA64_BUILTIN_AND_AND_FETCH_DI:
7779 return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
7780
7781 case IA64_BUILTIN_XOR_AND_FETCH_SI:
7782 case IA64_BUILTIN_XOR_AND_FETCH_DI:
7783 return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
7784
7785 case IA64_BUILTIN_NAND_AND_FETCH_SI:
7786 case IA64_BUILTIN_NAND_AND_FETCH_DI:
7787 return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);
7788
7789 default:
7790 break;
7791 }
7792
7793 return NULL_RTX;
7794 }
7795
7796 /* For the HP-UX IA64 aggregate parameters are passed stored in the
7797 most significant bits of the stack slot. */
7798
7799 enum direction
7800 ia64_hpux_function_arg_padding (mode, type)
7801 enum machine_mode mode;
7802 tree type;
7803 {
7804 /* Exception to normal case for structures/unions/etc. */
7805
7806 if (type && AGGREGATE_TYPE_P (type)
7807 && int_size_in_bytes (type) < UNITS_PER_WORD)
7808 return upward;
7809
7810 /* This is the standard FUNCTION_ARG_PADDING with !BYTES_BIG_ENDIAN
7811 hardwired to be true. */
7812
7813 return((mode == BLKmode
7814 ? (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
7815 && int_size_in_bytes (type) < (PARM_BOUNDARY / BITS_PER_UNIT))
7816 : GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
7817 ? downward : upward);
7818 }