config.gcc: Use t-slibgcc-elf to build shared libgcc_s on s390*linux.
[gcc.git] / gcc / config / ia64 / ia64.c
1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
5
6 This file is part of GNU CC.
7
8 GNU CC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
12
13 GNU CC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "tm_p.h"
28 #include "regs.h"
29 #include "hard-reg-set.h"
30 #include "real.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "output.h"
34 #include "insn-attr.h"
35 #include "flags.h"
36 #include "recog.h"
37 #include "expr.h"
38 #include "obstack.h"
39 #include "except.h"
40 #include "function.h"
41 #include "ggc.h"
42 #include "basic-block.h"
43 #include "toplev.h"
44 #include "sched-int.h"
45 #include "target.h"
46 #include "target-def.h"
47
48 /* This is used for communication between ASM_OUTPUT_LABEL and
49 ASM_OUTPUT_LABELREF. */
50 int ia64_asm_output_label = 0;
51
52 /* Define the information needed to generate branch and scc insns. This is
53 stored from the compare operation. */
54 struct rtx_def * ia64_compare_op0;
55 struct rtx_def * ia64_compare_op1;
56
57 /* Register names for ia64_expand_prologue. */
58 static const char * const ia64_reg_numbers[96] =
59 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
60 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
61 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
62 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
63 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
64 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
65 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
66 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
67 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
68 "r104","r105","r106","r107","r108","r109","r110","r111",
69 "r112","r113","r114","r115","r116","r117","r118","r119",
70 "r120","r121","r122","r123","r124","r125","r126","r127"};
71
72 /* ??? These strings could be shared with REGISTER_NAMES. */
73 static const char * const ia64_input_reg_names[8] =
74 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
75
76 /* ??? These strings could be shared with REGISTER_NAMES. */
77 static const char * const ia64_local_reg_names[80] =
78 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
79 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
80 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
81 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
82 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
83 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
84 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
85 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
86 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
87 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
88
89 /* ??? These strings could be shared with REGISTER_NAMES. */
90 static const char * const ia64_output_reg_names[8] =
91 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
92
93 /* String used with the -mfixed-range= option. */
94 const char *ia64_fixed_range_string;
95
96 /* Determines whether we run our final scheduling pass or not. We always
97 avoid the normal second scheduling pass. */
98 static int ia64_flag_schedule_insns2;
99
100 /* Variables which are this size or smaller are put in the sdata/sbss
101 sections. */
102
103 unsigned int ia64_section_threshold;
104 \f
105 static int find_gr_spill PARAMS ((int));
106 static int next_scratch_gr_reg PARAMS ((void));
107 static void mark_reg_gr_used_mask PARAMS ((rtx, void *));
108 static void ia64_compute_frame_size PARAMS ((HOST_WIDE_INT));
109 static void setup_spill_pointers PARAMS ((int, rtx, HOST_WIDE_INT));
110 static void finish_spill_pointers PARAMS ((void));
111 static rtx spill_restore_mem PARAMS ((rtx, HOST_WIDE_INT));
112 static void do_spill PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx));
113 static void do_restore PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT));
114 static rtx gen_movdi_x PARAMS ((rtx, rtx, rtx));
115 static rtx gen_fr_spill_x PARAMS ((rtx, rtx, rtx));
116 static rtx gen_fr_restore_x PARAMS ((rtx, rtx, rtx));
117
118 static enum machine_mode hfa_element_mode PARAMS ((tree, int));
119 static void fix_range PARAMS ((const char *));
120 static void ia64_add_gc_roots PARAMS ((void));
121 static void ia64_init_machine_status PARAMS ((struct function *));
122 static void ia64_mark_machine_status PARAMS ((struct function *));
123 static void ia64_free_machine_status PARAMS ((struct function *));
124 static void emit_insn_group_barriers PARAMS ((FILE *, rtx));
125 static void emit_all_insn_group_barriers PARAMS ((FILE *, rtx));
126 static void emit_predicate_relation_info PARAMS ((void));
127 static void process_epilogue PARAMS ((void));
128 static int process_set PARAMS ((FILE *, rtx));
129
130 static rtx ia64_expand_fetch_and_op PARAMS ((optab, enum machine_mode,
131 tree, rtx));
132 static rtx ia64_expand_op_and_fetch PARAMS ((optab, enum machine_mode,
133 tree, rtx));
134 static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode, int,
135 tree, rtx));
136 static rtx ia64_expand_lock_test_and_set PARAMS ((enum machine_mode,
137 tree, rtx));
138 static rtx ia64_expand_lock_release PARAMS ((enum machine_mode, tree, rtx));
139 static int ia64_valid_type_attribute PARAMS((tree, tree, tree, tree));
140 static void ia64_output_function_prologue PARAMS ((FILE *, HOST_WIDE_INT));
141 static void ia64_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
142 static void ia64_output_function_end_prologue PARAMS ((FILE *));
143 \f
144 /* Initialize the GCC target structure. */
145 #undef TARGET_VALID_TYPE_ATTRIBUTE
146 #define TARGET_VALID_TYPE_ATTRIBUTE ia64_valid_type_attribute
147
148 #undef TARGET_INIT_BUILTINS
149 #define TARGET_INIT_BUILTINS ia64_init_builtins
150
151 #undef TARGET_EXPAND_BUILTIN
152 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
153
154 #undef TARGET_ASM_FUNCTION_PROLOGUE
155 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
156 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
157 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
158 #undef TARGET_ASM_FUNCTION_EPILOGUE
159 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
160
161 struct gcc_target targetm = TARGET_INITIALIZER;
162 \f
163 /* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
164
165 int
166 call_operand (op, mode)
167 rtx op;
168 enum machine_mode mode;
169 {
170 if (mode != GET_MODE (op))
171 return 0;
172
173 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
174 || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
175 }
176
177 /* Return 1 if OP refers to a symbol in the sdata section. */
178
179 int
180 sdata_symbolic_operand (op, mode)
181 rtx op;
182 enum machine_mode mode ATTRIBUTE_UNUSED;
183 {
184 switch (GET_CODE (op))
185 {
186 case CONST:
187 if (GET_CODE (XEXP (op, 0)) != PLUS
188 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF)
189 break;
190 op = XEXP (XEXP (op, 0), 0);
191 /* FALLTHRU */
192
193 case SYMBOL_REF:
194 if (CONSTANT_POOL_ADDRESS_P (op))
195 return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold;
196 else
197 return XSTR (op, 0)[0] == SDATA_NAME_FLAG_CHAR;
198
199 default:
200 break;
201 }
202
203 return 0;
204 }
205
206 /* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
207
208 int
209 got_symbolic_operand (op, mode)
210 rtx op;
211 enum machine_mode mode ATTRIBUTE_UNUSED;
212 {
213 switch (GET_CODE (op))
214 {
215 case CONST:
216 op = XEXP (op, 0);
217 if (GET_CODE (op) != PLUS)
218 return 0;
219 if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
220 return 0;
221 op = XEXP (op, 1);
222 if (GET_CODE (op) != CONST_INT)
223 return 0;
224
225 return 1;
226
227 /* Ok if we're not using GOT entries at all. */
228 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
229 return 1;
230
231 /* "Ok" while emitting rtl, since otherwise we won't be provided
232 with the entire offset during emission, which makes it very
233 hard to split the offset into high and low parts. */
234 if (rtx_equal_function_value_matters)
235 return 1;
236
237 /* Force the low 14 bits of the constant to zero so that we do not
238 use up so many GOT entries. */
239 return (INTVAL (op) & 0x3fff) == 0;
240
241 case SYMBOL_REF:
242 case LABEL_REF:
243 return 1;
244
245 default:
246 break;
247 }
248 return 0;
249 }
250
251 /* Return 1 if OP refers to a symbol. */
252
253 int
254 symbolic_operand (op, mode)
255 rtx op;
256 enum machine_mode mode ATTRIBUTE_UNUSED;
257 {
258 switch (GET_CODE (op))
259 {
260 case CONST:
261 case SYMBOL_REF:
262 case LABEL_REF:
263 return 1;
264
265 default:
266 break;
267 }
268 return 0;
269 }
270
271 /* Return 1 if OP refers to a function. */
272
273 int
274 function_operand (op, mode)
275 rtx op;
276 enum machine_mode mode ATTRIBUTE_UNUSED;
277 {
278 if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FLAG (op))
279 return 1;
280 else
281 return 0;
282 }
283
284 /* Return 1 if OP is setjmp or a similar function. */
285
286 /* ??? This is an unsatisfying solution. Should rethink. */
287
288 int
289 setjmp_operand (op, mode)
290 rtx op;
291 enum machine_mode mode ATTRIBUTE_UNUSED;
292 {
293 const char *name;
294 int retval = 0;
295
296 if (GET_CODE (op) != SYMBOL_REF)
297 return 0;
298
299 name = XSTR (op, 0);
300
301 /* The following code is borrowed from special_function_p in calls.c. */
302
303 /* Disregard prefix _, __ or __x. */
304 if (name[0] == '_')
305 {
306 if (name[1] == '_' && name[2] == 'x')
307 name += 3;
308 else if (name[1] == '_')
309 name += 2;
310 else
311 name += 1;
312 }
313
314 if (name[0] == 's')
315 {
316 retval
317 = ((name[1] == 'e'
318 && (! strcmp (name, "setjmp")
319 || ! strcmp (name, "setjmp_syscall")))
320 || (name[1] == 'i'
321 && ! strcmp (name, "sigsetjmp"))
322 || (name[1] == 'a'
323 && ! strcmp (name, "savectx")));
324 }
325 else if ((name[0] == 'q' && name[1] == 's'
326 && ! strcmp (name, "qsetjmp"))
327 || (name[0] == 'v' && name[1] == 'f'
328 && ! strcmp (name, "vfork")))
329 retval = 1;
330
331 return retval;
332 }
333
334 /* Return 1 if OP is a general operand, but when pic exclude symbolic
335 operands. */
336
337 /* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF
338 from PREDICATE_CODES. */
339
340 int
341 move_operand (op, mode)
342 rtx op;
343 enum machine_mode mode;
344 {
345 if (! TARGET_NO_PIC && symbolic_operand (op, mode))
346 return 0;
347
348 return general_operand (op, mode);
349 }
350
351 /* Return 1 if OP is a register operand that is (or could be) a GR reg. */
352
353 int
354 gr_register_operand (op, mode)
355 rtx op;
356 enum machine_mode mode;
357 {
358 if (! register_operand (op, mode))
359 return 0;
360 if (GET_CODE (op) == SUBREG)
361 op = SUBREG_REG (op);
362 if (GET_CODE (op) == REG)
363 {
364 unsigned int regno = REGNO (op);
365 if (regno < FIRST_PSEUDO_REGISTER)
366 return GENERAL_REGNO_P (regno);
367 }
368 return 1;
369 }
370
371 /* Return 1 if OP is a register operand that is (or could be) an FR reg. */
372
373 int
374 fr_register_operand (op, mode)
375 rtx op;
376 enum machine_mode mode;
377 {
378 if (! register_operand (op, mode))
379 return 0;
380 if (GET_CODE (op) == SUBREG)
381 op = SUBREG_REG (op);
382 if (GET_CODE (op) == REG)
383 {
384 unsigned int regno = REGNO (op);
385 if (regno < FIRST_PSEUDO_REGISTER)
386 return FR_REGNO_P (regno);
387 }
388 return 1;
389 }
390
391 /* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */
392
393 int
394 grfr_register_operand (op, mode)
395 rtx op;
396 enum machine_mode mode;
397 {
398 if (! register_operand (op, mode))
399 return 0;
400 if (GET_CODE (op) == SUBREG)
401 op = SUBREG_REG (op);
402 if (GET_CODE (op) == REG)
403 {
404 unsigned int regno = REGNO (op);
405 if (regno < FIRST_PSEUDO_REGISTER)
406 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
407 }
408 return 1;
409 }
410
411 /* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */
412
413 int
414 gr_nonimmediate_operand (op, mode)
415 rtx op;
416 enum machine_mode mode;
417 {
418 if (! nonimmediate_operand (op, mode))
419 return 0;
420 if (GET_CODE (op) == SUBREG)
421 op = SUBREG_REG (op);
422 if (GET_CODE (op) == REG)
423 {
424 unsigned int regno = REGNO (op);
425 if (regno < FIRST_PSEUDO_REGISTER)
426 return GENERAL_REGNO_P (regno);
427 }
428 return 1;
429 }
430
431 /* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
432
433 int
434 fr_nonimmediate_operand (op, mode)
435 rtx op;
436 enum machine_mode mode;
437 {
438 if (! nonimmediate_operand (op, mode))
439 return 0;
440 if (GET_CODE (op) == SUBREG)
441 op = SUBREG_REG (op);
442 if (GET_CODE (op) == REG)
443 {
444 unsigned int regno = REGNO (op);
445 if (regno < FIRST_PSEUDO_REGISTER)
446 return FR_REGNO_P (regno);
447 }
448 return 1;
449 }
450
451 /* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
452
453 int
454 grfr_nonimmediate_operand (op, mode)
455 rtx op;
456 enum machine_mode mode;
457 {
458 if (! nonimmediate_operand (op, mode))
459 return 0;
460 if (GET_CODE (op) == SUBREG)
461 op = SUBREG_REG (op);
462 if (GET_CODE (op) == REG)
463 {
464 unsigned int regno = REGNO (op);
465 if (regno < FIRST_PSEUDO_REGISTER)
466 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
467 }
468 return 1;
469 }
470
471 /* Return 1 if OP is a GR register operand, or zero. */
472
473 int
474 gr_reg_or_0_operand (op, mode)
475 rtx op;
476 enum machine_mode mode;
477 {
478 return (op == const0_rtx || gr_register_operand (op, mode));
479 }
480
481 /* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */
482
483 int
484 gr_reg_or_5bit_operand (op, mode)
485 rtx op;
486 enum machine_mode mode;
487 {
488 return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32)
489 || GET_CODE (op) == CONSTANT_P_RTX
490 || gr_register_operand (op, mode));
491 }
492
493 /* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */
494
495 int
496 gr_reg_or_6bit_operand (op, mode)
497 rtx op;
498 enum machine_mode mode;
499 {
500 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
501 || GET_CODE (op) == CONSTANT_P_RTX
502 || gr_register_operand (op, mode));
503 }
504
505 /* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */
506
507 int
508 gr_reg_or_8bit_operand (op, mode)
509 rtx op;
510 enum machine_mode mode;
511 {
512 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
513 || GET_CODE (op) == CONSTANT_P_RTX
514 || gr_register_operand (op, mode));
515 }
516
517 /* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */
518
519 int
520 grfr_reg_or_8bit_operand (op, mode)
521 rtx op;
522 enum machine_mode mode;
523 {
524 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
525 || GET_CODE (op) == CONSTANT_P_RTX
526 || grfr_register_operand (op, mode));
527 }
528
529 /* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
530 operand. */
531
532 int
533 gr_reg_or_8bit_adjusted_operand (op, mode)
534 rtx op;
535 enum machine_mode mode;
536 {
537 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
538 || GET_CODE (op) == CONSTANT_P_RTX
539 || gr_register_operand (op, mode));
540 }
541
542 /* Return 1 if OP is a register operand, or is valid for both an 8 bit
543 immediate and an 8 bit adjusted immediate operand. This is necessary
544 because when we emit a compare, we don't know what the condition will be,
545 so we need the union of the immediates accepted by GT and LT. */
546
547 int
548 gr_reg_or_8bit_and_adjusted_operand (op, mode)
549 rtx op;
550 enum machine_mode mode;
551 {
552 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
553 && CONST_OK_FOR_L (INTVAL (op)))
554 || GET_CODE (op) == CONSTANT_P_RTX
555 || gr_register_operand (op, mode));
556 }
557
558 /* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
559
560 int
561 gr_reg_or_14bit_operand (op, mode)
562 rtx op;
563 enum machine_mode mode;
564 {
565 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
566 || GET_CODE (op) == CONSTANT_P_RTX
567 || gr_register_operand (op, mode));
568 }
569
570 /* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
571
572 int
573 gr_reg_or_22bit_operand (op, mode)
574 rtx op;
575 enum machine_mode mode;
576 {
577 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
578 || GET_CODE (op) == CONSTANT_P_RTX
579 || gr_register_operand (op, mode));
580 }
581
582 /* Return 1 if OP is a 6 bit immediate operand. */
583
584 int
585 shift_count_operand (op, mode)
586 rtx op;
587 enum machine_mode mode ATTRIBUTE_UNUSED;
588 {
589 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
590 || GET_CODE (op) == CONSTANT_P_RTX);
591 }
592
593 /* Return 1 if OP is a 5 bit immediate operand. */
594
595 int
596 shift_32bit_count_operand (op, mode)
597 rtx op;
598 enum machine_mode mode ATTRIBUTE_UNUSED;
599 {
600 return ((GET_CODE (op) == CONST_INT
601 && (INTVAL (op) >= 0 && INTVAL (op) < 32))
602 || GET_CODE (op) == CONSTANT_P_RTX);
603 }
604
605 /* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
606
607 int
608 shladd_operand (op, mode)
609 rtx op;
610 enum machine_mode mode ATTRIBUTE_UNUSED;
611 {
612 return (GET_CODE (op) == CONST_INT
613 && (INTVAL (op) == 2 || INTVAL (op) == 4
614 || INTVAL (op) == 8 || INTVAL (op) == 16));
615 }
616
617 /* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
618
619 int
620 fetchadd_operand (op, mode)
621 rtx op;
622 enum machine_mode mode ATTRIBUTE_UNUSED;
623 {
624 return (GET_CODE (op) == CONST_INT
625 && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
626 INTVAL (op) == -4 || INTVAL (op) == -1 ||
627 INTVAL (op) == 1 || INTVAL (op) == 4 ||
628 INTVAL (op) == 8 || INTVAL (op) == 16));
629 }
630
631 /* Return 1 if OP is a floating-point constant zero, one, or a register. */
632
633 int
634 fr_reg_or_fp01_operand (op, mode)
635 rtx op;
636 enum machine_mode mode;
637 {
638 return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
639 || fr_register_operand (op, mode));
640 }
641
642 /* Like nonimmediate_operand, but don't allow MEMs that try to use a
643 POST_MODIFY with a REG as displacement. */
644
645 int
646 destination_operand (op, mode)
647 rtx op;
648 enum machine_mode mode;
649 {
650 if (! nonimmediate_operand (op, mode))
651 return 0;
652 if (GET_CODE (op) == MEM
653 && GET_CODE (XEXP (op, 0)) == POST_MODIFY
654 && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG)
655 return 0;
656 return 1;
657 }
658
659 /* Like memory_operand, but don't allow post-increments. */
660
661 int
662 not_postinc_memory_operand (op, mode)
663 rtx op;
664 enum machine_mode mode;
665 {
666 return (memory_operand (op, mode)
667 && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != 'a');
668 }
669
670 /* Return 1 if this is a comparison operator, which accepts an normal 8-bit
671 signed immediate operand. */
672
673 int
674 normal_comparison_operator (op, mode)
675 register rtx op;
676 enum machine_mode mode;
677 {
678 enum rtx_code code = GET_CODE (op);
679 return ((mode == VOIDmode || GET_MODE (op) == mode)
680 && (code == EQ || code == NE
681 || code == GT || code == LE || code == GTU || code == LEU));
682 }
683
684 /* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
685 signed immediate operand. */
686
687 int
688 adjusted_comparison_operator (op, mode)
689 register rtx op;
690 enum machine_mode mode;
691 {
692 enum rtx_code code = GET_CODE (op);
693 return ((mode == VOIDmode || GET_MODE (op) == mode)
694 && (code == LT || code == GE || code == LTU || code == GEU));
695 }
696
697 /* Return 1 if this is a signed inequality operator. */
698
699 int
700 signed_inequality_operator (op, mode)
701 register rtx op;
702 enum machine_mode mode;
703 {
704 enum rtx_code code = GET_CODE (op);
705 return ((mode == VOIDmode || GET_MODE (op) == mode)
706 && (code == GE || code == GT
707 || code == LE || code == LT));
708 }
709
710 /* Return 1 if this operator is valid for predication. */
711
712 int
713 predicate_operator (op, mode)
714 register rtx op;
715 enum machine_mode mode;
716 {
717 enum rtx_code code = GET_CODE (op);
718 return ((GET_MODE (op) == mode || mode == VOIDmode)
719 && (code == EQ || code == NE));
720 }
721
722 /* Return 1 if this operator can be used in a conditional operation. */
723
724 int
725 condop_operator (op, mode)
726 register rtx op;
727 enum machine_mode mode;
728 {
729 enum rtx_code code = GET_CODE (op);
730 return ((GET_MODE (op) == mode || mode == VOIDmode)
731 && (code == PLUS || code == MINUS || code == AND
732 || code == IOR || code == XOR));
733 }
734
735 /* Return 1 if this is the ar.lc register. */
736
737 int
738 ar_lc_reg_operand (op, mode)
739 register rtx op;
740 enum machine_mode mode;
741 {
742 return (GET_MODE (op) == DImode
743 && (mode == DImode || mode == VOIDmode)
744 && GET_CODE (op) == REG
745 && REGNO (op) == AR_LC_REGNUM);
746 }
747
748 /* Return 1 if this is the ar.ccv register. */
749
750 int
751 ar_ccv_reg_operand (op, mode)
752 register rtx op;
753 enum machine_mode mode;
754 {
755 return ((GET_MODE (op) == mode || mode == VOIDmode)
756 && GET_CODE (op) == REG
757 && REGNO (op) == AR_CCV_REGNUM);
758 }
759
760 /* Like general_operand, but don't allow (mem (addressof)). */
761
762 int
763 general_tfmode_operand (op, mode)
764 rtx op;
765 enum machine_mode mode;
766 {
767 if (! general_operand (op, mode))
768 return 0;
769 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
770 return 0;
771 return 1;
772 }
773
774 /* Similarly. */
775
776 int
777 destination_tfmode_operand (op, mode)
778 rtx op;
779 enum machine_mode mode;
780 {
781 if (! destination_operand (op, mode))
782 return 0;
783 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
784 return 0;
785 return 1;
786 }
787
788 /* Similarly. */
789
790 int
791 tfreg_or_fp01_operand (op, mode)
792 rtx op;
793 enum machine_mode mode;
794 {
795 if (GET_CODE (op) == SUBREG)
796 return 0;
797 return fr_reg_or_fp01_operand (op, mode);
798 }
799 \f
800 /* Return 1 if the operands of a move are ok. */
801
802 int
803 ia64_move_ok (dst, src)
804 rtx dst, src;
805 {
806 /* If we're under init_recog_no_volatile, we'll not be able to use
807 memory_operand. So check the code directly and don't worry about
808 the validity of the underlying address, which should have been
809 checked elsewhere anyway. */
810 if (GET_CODE (dst) != MEM)
811 return 1;
812 if (GET_CODE (src) == MEM)
813 return 0;
814 if (register_operand (src, VOIDmode))
815 return 1;
816
817 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
818 if (INTEGRAL_MODE_P (GET_MODE (dst)))
819 return src == const0_rtx;
820 else
821 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
822 }
823
824 /* Check if OP is a mask suitible for use with SHIFT in a dep.z instruction.
825 Return the length of the field, or <= 0 on failure. */
826
827 int
828 ia64_depz_field_mask (rop, rshift)
829 rtx rop, rshift;
830 {
831 unsigned HOST_WIDE_INT op = INTVAL (rop);
832 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
833
834 /* Get rid of the zero bits we're shifting in. */
835 op >>= shift;
836
837 /* We must now have a solid block of 1's at bit 0. */
838 return exact_log2 (op + 1);
839 }
840
841 /* Expand a symbolic constant load. */
842 /* ??? Should generalize this, so that we can also support 32 bit pointers. */
843
844 void
845 ia64_expand_load_address (dest, src, scratch)
846 rtx dest, src, scratch;
847 {
848 rtx temp;
849
850 /* The destination could be a MEM during initial rtl generation,
851 which isn't a valid destination for the PIC load address patterns. */
852 if (! register_operand (dest, DImode))
853 temp = gen_reg_rtx (DImode);
854 else
855 temp = dest;
856
857 if (TARGET_AUTO_PIC)
858 emit_insn (gen_load_gprel64 (temp, src));
859 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FLAG (src))
860 emit_insn (gen_load_fptr (temp, src));
861 else if (sdata_symbolic_operand (src, DImode))
862 emit_insn (gen_load_gprel (temp, src));
863 else if (GET_CODE (src) == CONST
864 && GET_CODE (XEXP (src, 0)) == PLUS
865 && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
866 && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0)
867 {
868 rtx subtarget = no_new_pseudos ? temp : gen_reg_rtx (DImode);
869 rtx sym = XEXP (XEXP (src, 0), 0);
870 HOST_WIDE_INT ofs, hi, lo;
871
872 /* Split the offset into a sign extended 14-bit low part
873 and a complementary high part. */
874 ofs = INTVAL (XEXP (XEXP (src, 0), 1));
875 lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
876 hi = ofs - lo;
877
878 if (! scratch)
879 scratch = no_new_pseudos ? subtarget : gen_reg_rtx (DImode);
880
881 emit_insn (gen_load_symptr (subtarget, plus_constant (sym, hi),
882 scratch));
883 emit_insn (gen_adddi3 (temp, subtarget, GEN_INT (lo)));
884 }
885 else
886 {
887 rtx insn;
888 if (! scratch)
889 scratch = no_new_pseudos ? temp : gen_reg_rtx (DImode);
890
891 insn = emit_insn (gen_load_symptr (temp, src, scratch));
892 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, src, REG_NOTES (insn));
893 }
894
895 if (temp != dest)
896 emit_move_insn (dest, temp);
897 }
898
899 rtx
900 ia64_gp_save_reg (setjmp_p)
901 int setjmp_p;
902 {
903 rtx save = cfun->machine->ia64_gp_save;
904
905 if (save != NULL)
906 {
907 /* We can't save GP in a pseudo if we are calling setjmp, because
908 pseudos won't be restored by longjmp. For now, we save it in r4. */
909 /* ??? It would be more efficient to save this directly into a stack
910 slot. Unfortunately, the stack slot address gets cse'd across
911 the setjmp call because the NOTE_INSN_SETJMP note is in the wrong
912 place. */
913
914 /* ??? Get the barf bag, Virginia. We've got to replace this thing
915 in place, since this rtx is used in exception handling receivers.
916 Moreover, we must get this rtx out of regno_reg_rtx or reload
917 will do the wrong thing. */
918 unsigned int old_regno = REGNO (save);
919 if (setjmp_p && old_regno != GR_REG (4))
920 {
921 REGNO (save) = GR_REG (4);
922 regno_reg_rtx[old_regno] = gen_rtx_raw_REG (DImode, old_regno);
923 }
924 }
925 else
926 {
927 if (setjmp_p)
928 save = gen_rtx_REG (DImode, GR_REG (4));
929 else if (! optimize)
930 save = gen_rtx_REG (DImode, LOC_REG (0));
931 else
932 save = gen_reg_rtx (DImode);
933 cfun->machine->ia64_gp_save = save;
934 }
935
936 return save;
937 }
938
939 /* Split a post-reload TImode reference into two DImode components. */
940
941 rtx
942 ia64_split_timode (out, in, scratch)
943 rtx out[2];
944 rtx in, scratch;
945 {
946 switch (GET_CODE (in))
947 {
948 case REG:
949 out[0] = gen_rtx_REG (DImode, REGNO (in));
950 out[1] = gen_rtx_REG (DImode, REGNO (in) + 1);
951 return NULL_RTX;
952
953 case MEM:
954 {
955 rtx base = XEXP (in, 0);
956
957 switch (GET_CODE (base))
958 {
959 case REG:
960 out[0] = adjust_address (in, DImode, 0);
961 break;
962 case POST_MODIFY:
963 base = XEXP (base, 0);
964 out[0] = adjust_address (in, DImode, 0);
965 break;
966
967 /* Since we're changing the mode, we need to change to POST_MODIFY
968 as well to preserve the size of the increment. Either that or
969 do the update in two steps, but we've already got this scratch
970 register handy so let's use it. */
971 case POST_INC:
972 base = XEXP (base, 0);
973 out[0]
974 = change_address (in, DImode,
975 gen_rtx_POST_MODIFY
976 (Pmode, base, plus_constant (base, 16)));
977 break;
978 case POST_DEC:
979 base = XEXP (base, 0);
980 out[0]
981 = change_address (in, DImode,
982 gen_rtx_POST_MODIFY
983 (Pmode, base, plus_constant (base, -16)));
984 break;
985 default:
986 abort ();
987 }
988
989 if (scratch == NULL_RTX)
990 abort ();
991 out[1] = change_address (in, DImode, scratch);
992 return gen_adddi3 (scratch, base, GEN_INT (8));
993 }
994
995 case CONST_INT:
996 case CONST_DOUBLE:
997 split_double (in, &out[0], &out[1]);
998 return NULL_RTX;
999
1000 default:
1001 abort ();
1002 }
1003 }
1004
1005 /* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go
1006 through memory plus an extra GR scratch register. Except that you can
1007 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1008 SECONDARY_RELOAD_CLASS, but not both.
1009
1010 We got into problems in the first place by allowing a construct like
1011 (subreg:TF (reg:TI)), which we got from a union containing a long double.
1012 This solution attempts to prevent this situation from ocurring. When
1013 we see something like the above, we spill the inner register to memory. */
1014
1015 rtx
1016 spill_tfmode_operand (in, force)
1017 rtx in;
1018 int force;
1019 {
1020 if (GET_CODE (in) == SUBREG
1021 && GET_MODE (SUBREG_REG (in)) == TImode
1022 && GET_CODE (SUBREG_REG (in)) == REG)
1023 {
1024 rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE);
1025 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1026 }
1027 else if (force && GET_CODE (in) == REG)
1028 {
1029 rtx mem = gen_mem_addressof (in, NULL_TREE);
1030 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1031 }
1032 else if (GET_CODE (in) == MEM
1033 && GET_CODE (XEXP (in, 0)) == ADDRESSOF)
1034 return change_address (in, TFmode, copy_to_reg (XEXP (in, 0)));
1035 else
1036 return in;
1037 }
1038
1039 /* Emit comparison instruction if necessary, returning the expression
1040 that holds the compare result in the proper mode. */
1041
1042 rtx
1043 ia64_expand_compare (code, mode)
1044 enum rtx_code code;
1045 enum machine_mode mode;
1046 {
1047 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1048 rtx cmp;
1049
1050 /* If we have a BImode input, then we already have a compare result, and
1051 do not need to emit another comparison. */
1052 if (GET_MODE (op0) == BImode)
1053 {
1054 if ((code == NE || code == EQ) && op1 == const0_rtx)
1055 cmp = op0;
1056 else
1057 abort ();
1058 }
1059 else
1060 {
1061 cmp = gen_reg_rtx (BImode);
1062 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1063 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1064 code = NE;
1065 }
1066
1067 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1068 }
1069
1070 /* Emit the appropriate sequence for a call. */
1071
1072 void
1073 ia64_expand_call (retval, addr, nextarg, sibcall_p)
1074 rtx retval;
1075 rtx addr;
1076 rtx nextarg;
1077 int sibcall_p;
1078 {
1079 rtx insn, b0, gp_save, narg_rtx;
1080 int narg;
1081
1082 addr = XEXP (addr, 0);
1083 b0 = gen_rtx_REG (DImode, R_BR (0));
1084
1085 if (! nextarg)
1086 narg = 0;
1087 else if (IN_REGNO_P (REGNO (nextarg)))
1088 narg = REGNO (nextarg) - IN_REG (0);
1089 else
1090 narg = REGNO (nextarg) - OUT_REG (0);
1091 narg_rtx = GEN_INT (narg);
1092
1093 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1094 {
1095 if (sibcall_p)
1096 insn = gen_sibcall_nopic (addr, narg_rtx, b0);
1097 else if (! retval)
1098 insn = gen_call_nopic (addr, narg_rtx, b0);
1099 else
1100 insn = gen_call_value_nopic (retval, addr, narg_rtx, b0);
1101 emit_call_insn (insn);
1102 return;
1103 }
1104
1105 if (sibcall_p)
1106 gp_save = NULL_RTX;
1107 else
1108 gp_save = ia64_gp_save_reg (setjmp_operand (addr, VOIDmode));
1109
1110 /* If this is an indirect call, then we have the address of a descriptor. */
1111 if (! symbolic_operand (addr, VOIDmode))
1112 {
1113 rtx dest;
1114
1115 if (! sibcall_p)
1116 emit_move_insn (gp_save, pic_offset_table_rtx);
1117
1118 dest = force_reg (DImode, gen_rtx_MEM (DImode, addr));
1119 emit_move_insn (pic_offset_table_rtx,
1120 gen_rtx_MEM (DImode, plus_constant (addr, 8)));
1121
1122 if (sibcall_p)
1123 insn = gen_sibcall_pic (dest, narg_rtx, b0);
1124 else if (! retval)
1125 insn = gen_call_pic (dest, narg_rtx, b0);
1126 else
1127 insn = gen_call_value_pic (retval, dest, narg_rtx, b0);
1128 emit_call_insn (insn);
1129
1130 if (! sibcall_p)
1131 emit_move_insn (pic_offset_table_rtx, gp_save);
1132 }
1133 else if (TARGET_CONST_GP)
1134 {
1135 if (sibcall_p)
1136 insn = gen_sibcall_nopic (addr, narg_rtx, b0);
1137 else if (! retval)
1138 insn = gen_call_nopic (addr, narg_rtx, b0);
1139 else
1140 insn = gen_call_value_nopic (retval, addr, narg_rtx, b0);
1141 emit_call_insn (insn);
1142 }
1143 else
1144 {
1145 if (sibcall_p)
1146 emit_call_insn (gen_sibcall_pic (addr, narg_rtx, b0));
1147 else
1148 {
1149 emit_move_insn (gp_save, pic_offset_table_rtx);
1150
1151 if (! retval)
1152 insn = gen_call_pic (addr, narg_rtx, b0);
1153 else
1154 insn = gen_call_value_pic (retval, addr, narg_rtx, b0);
1155 emit_call_insn (insn);
1156
1157 emit_move_insn (pic_offset_table_rtx, gp_save);
1158 }
1159 }
1160 }
1161 \f
1162 /* Begin the assembly file. */
1163
1164 void
1165 emit_safe_across_calls (f)
1166 FILE *f;
1167 {
1168 unsigned int rs, re;
1169 int out_state;
1170
1171 rs = 1;
1172 out_state = 0;
1173 while (1)
1174 {
1175 while (rs < 64 && call_used_regs[PR_REG (rs)])
1176 rs++;
1177 if (rs >= 64)
1178 break;
1179 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1180 continue;
1181 if (out_state == 0)
1182 {
1183 fputs ("\t.pred.safe_across_calls ", f);
1184 out_state = 1;
1185 }
1186 else
1187 fputc (',', f);
1188 if (re == rs + 1)
1189 fprintf (f, "p%u", rs);
1190 else
1191 fprintf (f, "p%u-p%u", rs, re - 1);
1192 rs = re + 1;
1193 }
1194 if (out_state)
1195 fputc ('\n', f);
1196 }
1197
1198
1199 /* Structure to be filled in by ia64_compute_frame_size with register
1200 save masks and offsets for the current function. */
1201
1202 struct ia64_frame_info
1203 {
1204 HOST_WIDE_INT total_size; /* size of the stack frame, not including
1205 the caller's scratch area. */
1206 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
1207 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
1208 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
1209 HARD_REG_SET mask; /* mask of saved registers. */
1210 unsigned int gr_used_mask; /* mask of registers in use as gr spill
1211 registers or long-term scratches. */
1212 int n_spilled; /* number of spilled registers. */
1213 int reg_fp; /* register for fp. */
1214 int reg_save_b0; /* save register for b0. */
1215 int reg_save_pr; /* save register for prs. */
1216 int reg_save_ar_pfs; /* save register for ar.pfs. */
1217 int reg_save_ar_unat; /* save register for ar.unat. */
1218 int reg_save_ar_lc; /* save register for ar.lc. */
1219 int n_input_regs; /* number of input registers used. */
1220 int n_local_regs; /* number of local registers used. */
1221 int n_output_regs; /* number of output registers used. */
1222 int n_rotate_regs; /* number of rotating registers used. */
1223
1224 char need_regstk; /* true if a .regstk directive needed. */
1225 char initialized; /* true if the data is finalized. */
1226 };
1227
1228 /* Current frame information calculated by ia64_compute_frame_size. */
1229 static struct ia64_frame_info current_frame_info;
1230
1231 /* Helper function for ia64_compute_frame_size: find an appropriate general
1232 register to spill some special register to. SPECIAL_SPILL_MASK contains
1233 bits in GR0 to GR31 that have already been allocated by this routine.
1234 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1235
1236 static int
1237 find_gr_spill (try_locals)
1238 int try_locals;
1239 {
1240 int regno;
1241
1242 /* If this is a leaf function, first try an otherwise unused
1243 call-clobbered register. */
1244 if (current_function_is_leaf)
1245 {
1246 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1247 if (! regs_ever_live[regno]
1248 && call_used_regs[regno]
1249 && ! fixed_regs[regno]
1250 && ! global_regs[regno]
1251 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1252 {
1253 current_frame_info.gr_used_mask |= 1 << regno;
1254 return regno;
1255 }
1256 }
1257
1258 if (try_locals)
1259 {
1260 regno = current_frame_info.n_local_regs;
1261 /* If there is a frame pointer, then we can't use loc79, because
1262 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1263 reg_name switching code in ia64_expand_prologue. */
1264 if (regno < (80 - frame_pointer_needed))
1265 {
1266 current_frame_info.n_local_regs = regno + 1;
1267 return LOC_REG (0) + regno;
1268 }
1269 }
1270
1271 /* Failed to find a general register to spill to. Must use stack. */
1272 return 0;
1273 }
1274
1275 /* In order to make for nice schedules, we try to allocate every temporary
1276 to a different register. We must of course stay away from call-saved,
1277 fixed, and global registers. We must also stay away from registers
1278 allocated in current_frame_info.gr_used_mask, since those include regs
1279 used all through the prologue.
1280
1281 Any register allocated here must be used immediately. The idea is to
1282 aid scheduling, not to solve data flow problems. */
1283
1284 static int last_scratch_gr_reg;
1285
1286 static int
1287 next_scratch_gr_reg ()
1288 {
1289 int i, regno;
1290
1291 for (i = 0; i < 32; ++i)
1292 {
1293 regno = (last_scratch_gr_reg + i + 1) & 31;
1294 if (call_used_regs[regno]
1295 && ! fixed_regs[regno]
1296 && ! global_regs[regno]
1297 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1298 {
1299 last_scratch_gr_reg = regno;
1300 return regno;
1301 }
1302 }
1303
1304 /* There must be _something_ available. */
1305 abort ();
1306 }
1307
1308 /* Helper function for ia64_compute_frame_size, called through
1309 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1310
1311 static void
1312 mark_reg_gr_used_mask (reg, data)
1313 rtx reg;
1314 void *data ATTRIBUTE_UNUSED;
1315 {
1316 unsigned int regno = REGNO (reg);
1317 if (regno < 32)
1318 current_frame_info.gr_used_mask |= 1 << regno;
1319 }
1320
1321 /* Returns the number of bytes offset between the frame pointer and the stack
1322 pointer for the current function. SIZE is the number of bytes of space
1323 needed for local variables. */
1324
1325 static void
1326 ia64_compute_frame_size (size)
1327 HOST_WIDE_INT size;
1328 {
1329 HOST_WIDE_INT total_size;
1330 HOST_WIDE_INT spill_size = 0;
1331 HOST_WIDE_INT extra_spill_size = 0;
1332 HOST_WIDE_INT pretend_args_size;
1333 HARD_REG_SET mask;
1334 int n_spilled = 0;
1335 int spilled_gr_p = 0;
1336 int spilled_fr_p = 0;
1337 unsigned int regno;
1338 int i;
1339
1340 if (current_frame_info.initialized)
1341 return;
1342
1343 memset (&current_frame_info, 0, sizeof current_frame_info);
1344 CLEAR_HARD_REG_SET (mask);
1345
1346 /* Don't allocate scratches to the return register. */
1347 diddle_return_value (mark_reg_gr_used_mask, NULL);
1348
1349 /* Don't allocate scratches to the EH scratch registers. */
1350 if (cfun->machine->ia64_eh_epilogue_sp)
1351 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
1352 if (cfun->machine->ia64_eh_epilogue_bsp)
1353 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
1354
1355 /* Find the size of the register stack frame. We have only 80 local
1356 registers, because we reserve 8 for the inputs and 8 for the
1357 outputs. */
1358
1359 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1360 since we'll be adjusting that down later. */
1361 regno = LOC_REG (78) + ! frame_pointer_needed;
1362 for (; regno >= LOC_REG (0); regno--)
1363 if (regs_ever_live[regno])
1364 break;
1365 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
1366
1367 /* For functions marked with the syscall_linkage attribute, we must mark
1368 all eight input registers as in use, so that locals aren't visible to
1369 the caller. */
1370
1371 if (cfun->machine->n_varargs > 0
1372 || lookup_attribute ("syscall_linkage",
1373 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
1374 current_frame_info.n_input_regs = 8;
1375 else
1376 {
1377 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
1378 if (regs_ever_live[regno])
1379 break;
1380 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
1381 }
1382
1383 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
1384 if (regs_ever_live[regno])
1385 break;
1386 i = regno - OUT_REG (0) + 1;
1387
1388 /* When -p profiling, we need one output register for the mcount argument.
1389 Likwise for -a profiling for the bb_init_func argument. For -ax
1390 profiling, we need two output registers for the two bb_init_trace_func
1391 arguments. */
1392 if (profile_flag || profile_block_flag == 1)
1393 i = MAX (i, 1);
1394 else if (profile_block_flag == 2)
1395 i = MAX (i, 2);
1396 current_frame_info.n_output_regs = i;
1397
1398 /* ??? No rotating register support yet. */
1399 current_frame_info.n_rotate_regs = 0;
1400
1401 /* Discover which registers need spilling, and how much room that
1402 will take. Begin with floating point and general registers,
1403 which will always wind up on the stack. */
1404
1405 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
1406 if (regs_ever_live[regno] && ! call_used_regs[regno])
1407 {
1408 SET_HARD_REG_BIT (mask, regno);
1409 spill_size += 16;
1410 n_spilled += 1;
1411 spilled_fr_p = 1;
1412 }
1413
1414 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1415 if (regs_ever_live[regno] && ! call_used_regs[regno])
1416 {
1417 SET_HARD_REG_BIT (mask, regno);
1418 spill_size += 8;
1419 n_spilled += 1;
1420 spilled_gr_p = 1;
1421 }
1422
1423 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
1424 if (regs_ever_live[regno] && ! call_used_regs[regno])
1425 {
1426 SET_HARD_REG_BIT (mask, regno);
1427 spill_size += 8;
1428 n_spilled += 1;
1429 }
1430
1431 /* Now come all special registers that might get saved in other
1432 general registers. */
1433
1434 if (frame_pointer_needed)
1435 {
1436 current_frame_info.reg_fp = find_gr_spill (1);
1437 /* If we did not get a register, then we take LOC79. This is guaranteed
1438 to be free, even if regs_ever_live is already set, because this is
1439 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
1440 as we don't count loc79 above. */
1441 if (current_frame_info.reg_fp == 0)
1442 {
1443 current_frame_info.reg_fp = LOC_REG (79);
1444 current_frame_info.n_local_regs++;
1445 }
1446 }
1447
1448 if (! current_function_is_leaf)
1449 {
1450 /* Emit a save of BR0 if we call other functions. Do this even
1451 if this function doesn't return, as EH depends on this to be
1452 able to unwind the stack. */
1453 SET_HARD_REG_BIT (mask, BR_REG (0));
1454
1455 current_frame_info.reg_save_b0 = find_gr_spill (1);
1456 if (current_frame_info.reg_save_b0 == 0)
1457 {
1458 spill_size += 8;
1459 n_spilled += 1;
1460 }
1461
1462 /* Similarly for ar.pfs. */
1463 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1464 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1465 if (current_frame_info.reg_save_ar_pfs == 0)
1466 {
1467 extra_spill_size += 8;
1468 n_spilled += 1;
1469 }
1470 }
1471 else
1472 {
1473 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
1474 {
1475 SET_HARD_REG_BIT (mask, BR_REG (0));
1476 spill_size += 8;
1477 n_spilled += 1;
1478 }
1479 }
1480
1481 /* Unwind descriptor hackery: things are most efficient if we allocate
1482 consecutive GR save registers for RP, PFS, FP in that order. However,
1483 it is absolutely critical that FP get the only hard register that's
1484 guaranteed to be free, so we allocated it first. If all three did
1485 happen to be allocated hard regs, and are consecutive, rearrange them
1486 into the preferred order now. */
1487 if (current_frame_info.reg_fp != 0
1488 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
1489 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
1490 {
1491 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
1492 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
1493 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
1494 }
1495
1496 /* See if we need to store the predicate register block. */
1497 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1498 if (regs_ever_live[regno] && ! call_used_regs[regno])
1499 break;
1500 if (regno <= PR_REG (63))
1501 {
1502 SET_HARD_REG_BIT (mask, PR_REG (0));
1503 current_frame_info.reg_save_pr = find_gr_spill (1);
1504 if (current_frame_info.reg_save_pr == 0)
1505 {
1506 extra_spill_size += 8;
1507 n_spilled += 1;
1508 }
1509
1510 /* ??? Mark them all as used so that register renaming and such
1511 are free to use them. */
1512 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1513 regs_ever_live[regno] = 1;
1514 }
1515
1516 /* If we're forced to use st8.spill, we're forced to save and restore
1517 ar.unat as well. */
1518 if (spilled_gr_p || cfun->machine->n_varargs)
1519 {
1520 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
1521 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
1522 if (current_frame_info.reg_save_ar_unat == 0)
1523 {
1524 extra_spill_size += 8;
1525 n_spilled += 1;
1526 }
1527 }
1528
1529 if (regs_ever_live[AR_LC_REGNUM])
1530 {
1531 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
1532 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
1533 if (current_frame_info.reg_save_ar_lc == 0)
1534 {
1535 extra_spill_size += 8;
1536 n_spilled += 1;
1537 }
1538 }
1539
1540 /* If we have an odd number of words of pretend arguments written to
1541 the stack, then the FR save area will be unaligned. We round the
1542 size of this area up to keep things 16 byte aligned. */
1543 if (spilled_fr_p)
1544 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
1545 else
1546 pretend_args_size = current_function_pretend_args_size;
1547
1548 total_size = (spill_size + extra_spill_size + size + pretend_args_size
1549 + current_function_outgoing_args_size);
1550 total_size = IA64_STACK_ALIGN (total_size);
1551
1552 /* We always use the 16-byte scratch area provided by the caller, but
1553 if we are a leaf function, there's no one to which we need to provide
1554 a scratch area. */
1555 if (current_function_is_leaf)
1556 total_size = MAX (0, total_size - 16);
1557
1558 current_frame_info.total_size = total_size;
1559 current_frame_info.spill_cfa_off = pretend_args_size - 16;
1560 current_frame_info.spill_size = spill_size;
1561 current_frame_info.extra_spill_size = extra_spill_size;
1562 COPY_HARD_REG_SET (current_frame_info.mask, mask);
1563 current_frame_info.n_spilled = n_spilled;
1564 current_frame_info.initialized = reload_completed;
1565 }
1566
1567 /* Compute the initial difference between the specified pair of registers. */
1568
1569 HOST_WIDE_INT
1570 ia64_initial_elimination_offset (from, to)
1571 int from, to;
1572 {
1573 HOST_WIDE_INT offset;
1574
1575 ia64_compute_frame_size (get_frame_size ());
1576 switch (from)
1577 {
1578 case FRAME_POINTER_REGNUM:
1579 if (to == HARD_FRAME_POINTER_REGNUM)
1580 {
1581 if (current_function_is_leaf)
1582 offset = -current_frame_info.total_size;
1583 else
1584 offset = -(current_frame_info.total_size
1585 - current_function_outgoing_args_size - 16);
1586 }
1587 else if (to == STACK_POINTER_REGNUM)
1588 {
1589 if (current_function_is_leaf)
1590 offset = 0;
1591 else
1592 offset = 16 + current_function_outgoing_args_size;
1593 }
1594 else
1595 abort ();
1596 break;
1597
1598 case ARG_POINTER_REGNUM:
1599 /* Arguments start above the 16 byte save area, unless stdarg
1600 in which case we store through the 16 byte save area. */
1601 if (to == HARD_FRAME_POINTER_REGNUM)
1602 offset = 16 - current_function_pretend_args_size;
1603 else if (to == STACK_POINTER_REGNUM)
1604 offset = (current_frame_info.total_size
1605 + 16 - current_function_pretend_args_size);
1606 else
1607 abort ();
1608 break;
1609
1610 case RETURN_ADDRESS_POINTER_REGNUM:
1611 offset = 0;
1612 break;
1613
1614 default:
1615 abort ();
1616 }
1617
1618 return offset;
1619 }
1620
1621 /* If there are more than a trivial number of register spills, we use
1622 two interleaved iterators so that we can get two memory references
1623 per insn group.
1624
1625 In order to simplify things in the prologue and epilogue expanders,
1626 we use helper functions to fix up the memory references after the
1627 fact with the appropriate offsets to a POST_MODIFY memory mode.
1628 The following data structure tracks the state of the two iterators
1629 while insns are being emitted. */
1630
1631 struct spill_fill_data
1632 {
1633 rtx init_after; /* point at which to emit intializations */
1634 rtx init_reg[2]; /* initial base register */
1635 rtx iter_reg[2]; /* the iterator registers */
1636 rtx *prev_addr[2]; /* address of last memory use */
1637 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
1638 HOST_WIDE_INT prev_off[2]; /* last offset */
1639 int n_iter; /* number of iterators in use */
1640 int next_iter; /* next iterator to use */
1641 unsigned int save_gr_used_mask;
1642 };
1643
1644 static struct spill_fill_data spill_fill_data;
1645
1646 static void
1647 setup_spill_pointers (n_spills, init_reg, cfa_off)
1648 int n_spills;
1649 rtx init_reg;
1650 HOST_WIDE_INT cfa_off;
1651 {
1652 int i;
1653
1654 spill_fill_data.init_after = get_last_insn ();
1655 spill_fill_data.init_reg[0] = init_reg;
1656 spill_fill_data.init_reg[1] = init_reg;
1657 spill_fill_data.prev_addr[0] = NULL;
1658 spill_fill_data.prev_addr[1] = NULL;
1659 spill_fill_data.prev_insn[0] = NULL;
1660 spill_fill_data.prev_insn[1] = NULL;
1661 spill_fill_data.prev_off[0] = cfa_off;
1662 spill_fill_data.prev_off[1] = cfa_off;
1663 spill_fill_data.next_iter = 0;
1664 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
1665
1666 spill_fill_data.n_iter = 1 + (n_spills > 2);
1667 for (i = 0; i < spill_fill_data.n_iter; ++i)
1668 {
1669 int regno = next_scratch_gr_reg ();
1670 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
1671 current_frame_info.gr_used_mask |= 1 << regno;
1672 }
1673 }
1674
1675 static void
1676 finish_spill_pointers ()
1677 {
1678 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
1679 }
1680
1681 static rtx
1682 spill_restore_mem (reg, cfa_off)
1683 rtx reg;
1684 HOST_WIDE_INT cfa_off;
1685 {
1686 int iter = spill_fill_data.next_iter;
1687 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
1688 rtx disp_rtx = GEN_INT (disp);
1689 rtx mem;
1690
1691 if (spill_fill_data.prev_addr[iter])
1692 {
1693 if (CONST_OK_FOR_N (disp))
1694 {
1695 *spill_fill_data.prev_addr[iter]
1696 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
1697 gen_rtx_PLUS (DImode,
1698 spill_fill_data.iter_reg[iter],
1699 disp_rtx));
1700 REG_NOTES (spill_fill_data.prev_insn[iter])
1701 = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
1702 REG_NOTES (spill_fill_data.prev_insn[iter]));
1703 }
1704 else
1705 {
1706 /* ??? Could use register post_modify for loads. */
1707 if (! CONST_OK_FOR_I (disp))
1708 {
1709 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1710 emit_move_insn (tmp, disp_rtx);
1711 disp_rtx = tmp;
1712 }
1713 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1714 spill_fill_data.iter_reg[iter], disp_rtx));
1715 }
1716 }
1717 /* Micro-optimization: if we've created a frame pointer, it's at
1718 CFA 0, which may allow the real iterator to be initialized lower,
1719 slightly increasing parallelism. Also, if there are few saves
1720 it may eliminate the iterator entirely. */
1721 else if (disp == 0
1722 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
1723 && frame_pointer_needed)
1724 {
1725 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
1726 set_mem_alias_set (mem, get_varargs_alias_set ());
1727 return mem;
1728 }
1729 else
1730 {
1731 rtx seq;
1732
1733 if (disp == 0)
1734 seq = gen_movdi (spill_fill_data.iter_reg[iter],
1735 spill_fill_data.init_reg[iter]);
1736 else
1737 {
1738 start_sequence ();
1739
1740 if (! CONST_OK_FOR_I (disp))
1741 {
1742 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1743 emit_move_insn (tmp, disp_rtx);
1744 disp_rtx = tmp;
1745 }
1746
1747 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1748 spill_fill_data.init_reg[iter],
1749 disp_rtx));
1750
1751 seq = gen_sequence ();
1752 end_sequence ();
1753 }
1754
1755 /* Careful for being the first insn in a sequence. */
1756 if (spill_fill_data.init_after)
1757 spill_fill_data.init_after
1758 = emit_insn_after (seq, spill_fill_data.init_after);
1759 else
1760 {
1761 rtx first = get_insns ();
1762 if (first)
1763 spill_fill_data.init_after
1764 = emit_insn_before (seq, first);
1765 else
1766 spill_fill_data.init_after = emit_insn (seq);
1767 }
1768 }
1769
1770 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
1771
1772 /* ??? Not all of the spills are for varargs, but some of them are.
1773 The rest of the spills belong in an alias set of their own. But
1774 it doesn't actually hurt to include them here. */
1775 set_mem_alias_set (mem, get_varargs_alias_set ());
1776
1777 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
1778 spill_fill_data.prev_off[iter] = cfa_off;
1779
1780 if (++iter >= spill_fill_data.n_iter)
1781 iter = 0;
1782 spill_fill_data.next_iter = iter;
1783
1784 return mem;
1785 }
1786
1787 static void
1788 do_spill (move_fn, reg, cfa_off, frame_reg)
1789 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
1790 rtx reg, frame_reg;
1791 HOST_WIDE_INT cfa_off;
1792 {
1793 int iter = spill_fill_data.next_iter;
1794 rtx mem, insn;
1795
1796 mem = spill_restore_mem (reg, cfa_off);
1797 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
1798 spill_fill_data.prev_insn[iter] = insn;
1799
1800 if (frame_reg)
1801 {
1802 rtx base;
1803 HOST_WIDE_INT off;
1804
1805 RTX_FRAME_RELATED_P (insn) = 1;
1806
1807 /* Don't even pretend that the unwind code can intuit its way
1808 through a pair of interleaved post_modify iterators. Just
1809 provide the correct answer. */
1810
1811 if (frame_pointer_needed)
1812 {
1813 base = hard_frame_pointer_rtx;
1814 off = - cfa_off;
1815 }
1816 else
1817 {
1818 base = stack_pointer_rtx;
1819 off = current_frame_info.total_size - cfa_off;
1820 }
1821
1822 REG_NOTES (insn)
1823 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1824 gen_rtx_SET (VOIDmode,
1825 gen_rtx_MEM (GET_MODE (reg),
1826 plus_constant (base, off)),
1827 frame_reg),
1828 REG_NOTES (insn));
1829 }
1830 }
1831
1832 static void
1833 do_restore (move_fn, reg, cfa_off)
1834 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
1835 rtx reg;
1836 HOST_WIDE_INT cfa_off;
1837 {
1838 int iter = spill_fill_data.next_iter;
1839 rtx insn;
1840
1841 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
1842 GEN_INT (cfa_off)));
1843 spill_fill_data.prev_insn[iter] = insn;
1844 }
1845
1846 /* Wrapper functions that discards the CONST_INT spill offset. These
1847 exist so that we can give gr_spill/gr_fill the offset they need and
1848 use a consistant function interface. */
1849
1850 static rtx
1851 gen_movdi_x (dest, src, offset)
1852 rtx dest, src;
1853 rtx offset ATTRIBUTE_UNUSED;
1854 {
1855 return gen_movdi (dest, src);
1856 }
1857
1858 static rtx
1859 gen_fr_spill_x (dest, src, offset)
1860 rtx dest, src;
1861 rtx offset ATTRIBUTE_UNUSED;
1862 {
1863 return gen_fr_spill (dest, src);
1864 }
1865
1866 static rtx
1867 gen_fr_restore_x (dest, src, offset)
1868 rtx dest, src;
1869 rtx offset ATTRIBUTE_UNUSED;
1870 {
1871 return gen_fr_restore (dest, src);
1872 }
1873
1874 /* Called after register allocation to add any instructions needed for the
1875 prologue. Using a prologue insn is favored compared to putting all of the
1876 instructions in output_function_prologue(), since it allows the scheduler
1877 to intermix instructions with the saves of the caller saved registers. In
1878 some cases, it might be necessary to emit a barrier instruction as the last
1879 insn to prevent such scheduling.
1880
1881 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
1882 so that the debug info generation code can handle them properly.
1883
1884 The register save area is layed out like so:
1885 cfa+16
1886 [ varargs spill area ]
1887 [ fr register spill area ]
1888 [ br register spill area ]
1889 [ ar register spill area ]
1890 [ pr register spill area ]
1891 [ gr register spill area ] */
1892
1893 /* ??? Get inefficient code when the frame size is larger than can fit in an
1894 adds instruction. */
1895
1896 void
1897 ia64_expand_prologue ()
1898 {
1899 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
1900 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
1901 rtx reg, alt_reg;
1902
1903 ia64_compute_frame_size (get_frame_size ());
1904 last_scratch_gr_reg = 15;
1905
1906 /* If there is no epilogue, then we don't need some prologue insns.
1907 We need to avoid emitting the dead prologue insns, because flow
1908 will complain about them. */
1909 if (optimize)
1910 {
1911 edge e;
1912
1913 for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
1914 if ((e->flags & EDGE_FAKE) == 0
1915 && (e->flags & EDGE_FALLTHRU) != 0)
1916 break;
1917 epilogue_p = (e != NULL);
1918 }
1919 else
1920 epilogue_p = 1;
1921
1922 /* Set the local, input, and output register names. We need to do this
1923 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
1924 half. If we use in/loc/out register names, then we get assembler errors
1925 in crtn.S because there is no alloc insn or regstk directive in there. */
1926 if (! TARGET_REG_NAMES)
1927 {
1928 int inputs = current_frame_info.n_input_regs;
1929 int locals = current_frame_info.n_local_regs;
1930 int outputs = current_frame_info.n_output_regs;
1931
1932 for (i = 0; i < inputs; i++)
1933 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
1934 for (i = 0; i < locals; i++)
1935 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
1936 for (i = 0; i < outputs; i++)
1937 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
1938 }
1939
1940 /* Set the frame pointer register name. The regnum is logically loc79,
1941 but of course we'll not have allocated that many locals. Rather than
1942 worrying about renumbering the existing rtxs, we adjust the name. */
1943 /* ??? This code means that we can never use one local register when
1944 there is a frame pointer. loc79 gets wasted in this case, as it is
1945 renamed to a register that will never be used. See also the try_locals
1946 code in find_gr_spill. */
1947 if (current_frame_info.reg_fp)
1948 {
1949 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
1950 reg_names[HARD_FRAME_POINTER_REGNUM]
1951 = reg_names[current_frame_info.reg_fp];
1952 reg_names[current_frame_info.reg_fp] = tmp;
1953 }
1954
1955 /* Fix up the return address placeholder. */
1956 /* ??? We can fail if __builtin_return_address is used, and we didn't
1957 allocate a register in which to save b0. I can't think of a way to
1958 eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and
1959 then be sure that I got the right one. Further, reload doesn't seem
1960 to care if an eliminable register isn't used, and "eliminates" it
1961 anyway. */
1962 if (regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM]
1963 && current_frame_info.reg_save_b0 != 0)
1964 XINT (return_address_pointer_rtx, 0) = current_frame_info.reg_save_b0;
1965
1966 /* We don't need an alloc instruction if we've used no outputs or locals. */
1967 if (current_frame_info.n_local_regs == 0
1968 && current_frame_info.n_output_regs == 0
1969 && current_frame_info.n_input_regs <= current_function_args_info.words)
1970 {
1971 /* If there is no alloc, but there are input registers used, then we
1972 need a .regstk directive. */
1973 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
1974 ar_pfs_save_reg = NULL_RTX;
1975 }
1976 else
1977 {
1978 current_frame_info.need_regstk = 0;
1979
1980 if (current_frame_info.reg_save_ar_pfs)
1981 regno = current_frame_info.reg_save_ar_pfs;
1982 else
1983 regno = next_scratch_gr_reg ();
1984 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
1985
1986 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
1987 GEN_INT (current_frame_info.n_input_regs),
1988 GEN_INT (current_frame_info.n_local_regs),
1989 GEN_INT (current_frame_info.n_output_regs),
1990 GEN_INT (current_frame_info.n_rotate_regs)));
1991 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
1992 }
1993
1994 /* Set up frame pointer, stack pointer, and spill iterators. */
1995
1996 n_varargs = cfun->machine->n_varargs;
1997 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
1998 stack_pointer_rtx, 0);
1999
2000 if (frame_pointer_needed)
2001 {
2002 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2003 RTX_FRAME_RELATED_P (insn) = 1;
2004 }
2005
2006 if (current_frame_info.total_size != 0)
2007 {
2008 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2009 rtx offset;
2010
2011 if (CONST_OK_FOR_I (- current_frame_info.total_size))
2012 offset = frame_size_rtx;
2013 else
2014 {
2015 regno = next_scratch_gr_reg ();
2016 offset = gen_rtx_REG (DImode, regno);
2017 emit_move_insn (offset, frame_size_rtx);
2018 }
2019
2020 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
2021 stack_pointer_rtx, offset));
2022
2023 if (! frame_pointer_needed)
2024 {
2025 RTX_FRAME_RELATED_P (insn) = 1;
2026 if (GET_CODE (offset) != CONST_INT)
2027 {
2028 REG_NOTES (insn)
2029 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2030 gen_rtx_SET (VOIDmode,
2031 stack_pointer_rtx,
2032 gen_rtx_PLUS (DImode,
2033 stack_pointer_rtx,
2034 frame_size_rtx)),
2035 REG_NOTES (insn));
2036 }
2037 }
2038
2039 /* ??? At this point we must generate a magic insn that appears to
2040 modify the stack pointer, the frame pointer, and all spill
2041 iterators. This would allow the most scheduling freedom. For
2042 now, just hard stop. */
2043 emit_insn (gen_blockage ());
2044 }
2045
2046 /* Must copy out ar.unat before doing any integer spills. */
2047 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2048 {
2049 if (current_frame_info.reg_save_ar_unat)
2050 ar_unat_save_reg
2051 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2052 else
2053 {
2054 alt_regno = next_scratch_gr_reg ();
2055 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2056 current_frame_info.gr_used_mask |= 1 << alt_regno;
2057 }
2058
2059 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2060 insn = emit_move_insn (ar_unat_save_reg, reg);
2061 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2062
2063 /* Even if we're not going to generate an epilogue, we still
2064 need to save the register so that EH works. */
2065 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2066 emit_insn (gen_rtx_USE (VOIDmode, ar_unat_save_reg));
2067 }
2068 else
2069 ar_unat_save_reg = NULL_RTX;
2070
2071 /* Spill all varargs registers. Do this before spilling any GR registers,
2072 since we want the UNAT bits for the GR registers to override the UNAT
2073 bits from varargs, which we don't care about. */
2074
2075 cfa_off = -16;
2076 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
2077 {
2078 reg = gen_rtx_REG (DImode, regno);
2079 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
2080 }
2081
2082 /* Locate the bottom of the register save area. */
2083 cfa_off = (current_frame_info.spill_cfa_off
2084 + current_frame_info.spill_size
2085 + current_frame_info.extra_spill_size);
2086
2087 /* Save the predicate register block either in a register or in memory. */
2088 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2089 {
2090 reg = gen_rtx_REG (DImode, PR_REG (0));
2091 if (current_frame_info.reg_save_pr != 0)
2092 {
2093 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2094 insn = emit_move_insn (alt_reg, reg);
2095
2096 /* ??? Denote pr spill/fill by a DImode move that modifies all
2097 64 hard registers. */
2098 RTX_FRAME_RELATED_P (insn) = 1;
2099 REG_NOTES (insn)
2100 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2101 gen_rtx_SET (VOIDmode, alt_reg, reg),
2102 REG_NOTES (insn));
2103
2104 /* Even if we're not going to generate an epilogue, we still
2105 need to save the register so that EH works. */
2106 if (! epilogue_p)
2107 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
2108 }
2109 else
2110 {
2111 alt_regno = next_scratch_gr_reg ();
2112 alt_reg = gen_rtx_REG (DImode, alt_regno);
2113 insn = emit_move_insn (alt_reg, reg);
2114 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2115 cfa_off -= 8;
2116 }
2117 }
2118
2119 /* Handle AR regs in numerical order. All of them get special handling. */
2120 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2121 && current_frame_info.reg_save_ar_unat == 0)
2122 {
2123 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2124 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
2125 cfa_off -= 8;
2126 }
2127
2128 /* The alloc insn already copied ar.pfs into a general register. The
2129 only thing we have to do now is copy that register to a stack slot
2130 if we'd not allocated a local register for the job. */
2131 if (current_frame_info.reg_save_ar_pfs == 0
2132 && ! current_function_is_leaf)
2133 {
2134 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2135 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
2136 cfa_off -= 8;
2137 }
2138
2139 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2140 {
2141 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2142 if (current_frame_info.reg_save_ar_lc != 0)
2143 {
2144 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2145 insn = emit_move_insn (alt_reg, reg);
2146 RTX_FRAME_RELATED_P (insn) = 1;
2147
2148 /* Even if we're not going to generate an epilogue, we still
2149 need to save the register so that EH works. */
2150 if (! epilogue_p)
2151 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
2152 }
2153 else
2154 {
2155 alt_regno = next_scratch_gr_reg ();
2156 alt_reg = gen_rtx_REG (DImode, alt_regno);
2157 emit_move_insn (alt_reg, reg);
2158 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2159 cfa_off -= 8;
2160 }
2161 }
2162
2163 /* We should now be at the base of the gr/br/fr spill area. */
2164 if (cfa_off != (current_frame_info.spill_cfa_off
2165 + current_frame_info.spill_size))
2166 abort ();
2167
2168 /* Spill all general registers. */
2169 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2170 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2171 {
2172 reg = gen_rtx_REG (DImode, regno);
2173 do_spill (gen_gr_spill, reg, cfa_off, reg);
2174 cfa_off -= 8;
2175 }
2176
2177 /* Handle BR0 specially -- it may be getting stored permanently in
2178 some GR register. */
2179 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2180 {
2181 reg = gen_rtx_REG (DImode, BR_REG (0));
2182 if (current_frame_info.reg_save_b0 != 0)
2183 {
2184 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2185 insn = emit_move_insn (alt_reg, reg);
2186 RTX_FRAME_RELATED_P (insn) = 1;
2187
2188 /* Even if we're not going to generate an epilogue, we still
2189 need to save the register so that EH works. */
2190 if (! epilogue_p)
2191 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
2192 }
2193 else
2194 {
2195 alt_regno = next_scratch_gr_reg ();
2196 alt_reg = gen_rtx_REG (DImode, alt_regno);
2197 emit_move_insn (alt_reg, reg);
2198 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2199 cfa_off -= 8;
2200 }
2201 }
2202
2203 /* Spill the rest of the BR registers. */
2204 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2205 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2206 {
2207 alt_regno = next_scratch_gr_reg ();
2208 alt_reg = gen_rtx_REG (DImode, alt_regno);
2209 reg = gen_rtx_REG (DImode, regno);
2210 emit_move_insn (alt_reg, reg);
2211 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2212 cfa_off -= 8;
2213 }
2214
2215 /* Align the frame and spill all FR registers. */
2216 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2217 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2218 {
2219 if (cfa_off & 15)
2220 abort ();
2221 reg = gen_rtx_REG (TFmode, regno);
2222 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
2223 cfa_off -= 16;
2224 }
2225
2226 if (cfa_off != current_frame_info.spill_cfa_off)
2227 abort ();
2228
2229 finish_spill_pointers ();
2230 }
2231
2232 /* Called after register allocation to add any instructions needed for the
2233 epilogue. Using a epilogue insn is favored compared to putting all of the
2234 instructions in output_function_prologue(), since it allows the scheduler
2235 to intermix instructions with the saves of the caller saved registers. In
2236 some cases, it might be necessary to emit a barrier instruction as the last
2237 insn to prevent such scheduling. */
2238
2239 void
2240 ia64_expand_epilogue (sibcall_p)
2241 int sibcall_p;
2242 {
2243 rtx insn, reg, alt_reg, ar_unat_save_reg;
2244 int regno, alt_regno, cfa_off;
2245
2246 ia64_compute_frame_size (get_frame_size ());
2247
2248 /* If there is a frame pointer, then we use it instead of the stack
2249 pointer, so that the stack pointer does not need to be valid when
2250 the epilogue starts. See EXIT_IGNORE_STACK. */
2251 if (frame_pointer_needed)
2252 setup_spill_pointers (current_frame_info.n_spilled,
2253 hard_frame_pointer_rtx, 0);
2254 else
2255 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2256 current_frame_info.total_size);
2257
2258 if (current_frame_info.total_size != 0)
2259 {
2260 /* ??? At this point we must generate a magic insn that appears to
2261 modify the spill iterators and the frame pointer. This would
2262 allow the most scheduling freedom. For now, just hard stop. */
2263 emit_insn (gen_blockage ());
2264 }
2265
2266 /* Locate the bottom of the register save area. */
2267 cfa_off = (current_frame_info.spill_cfa_off
2268 + current_frame_info.spill_size
2269 + current_frame_info.extra_spill_size);
2270
2271 /* Restore the predicate registers. */
2272 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2273 {
2274 if (current_frame_info.reg_save_pr != 0)
2275 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2276 else
2277 {
2278 alt_regno = next_scratch_gr_reg ();
2279 alt_reg = gen_rtx_REG (DImode, alt_regno);
2280 do_restore (gen_movdi_x, alt_reg, cfa_off);
2281 cfa_off -= 8;
2282 }
2283 reg = gen_rtx_REG (DImode, PR_REG (0));
2284 emit_move_insn (reg, alt_reg);
2285 }
2286
2287 /* Restore the application registers. */
2288
2289 /* Load the saved unat from the stack, but do not restore it until
2290 after the GRs have been restored. */
2291 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2292 {
2293 if (current_frame_info.reg_save_ar_unat != 0)
2294 ar_unat_save_reg
2295 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2296 else
2297 {
2298 alt_regno = next_scratch_gr_reg ();
2299 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2300 current_frame_info.gr_used_mask |= 1 << alt_regno;
2301 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
2302 cfa_off -= 8;
2303 }
2304 }
2305 else
2306 ar_unat_save_reg = NULL_RTX;
2307
2308 if (current_frame_info.reg_save_ar_pfs != 0)
2309 {
2310 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
2311 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2312 emit_move_insn (reg, alt_reg);
2313 }
2314 else if (! current_function_is_leaf)
2315 {
2316 alt_regno = next_scratch_gr_reg ();
2317 alt_reg = gen_rtx_REG (DImode, alt_regno);
2318 do_restore (gen_movdi_x, alt_reg, cfa_off);
2319 cfa_off -= 8;
2320 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2321 emit_move_insn (reg, alt_reg);
2322 }
2323
2324 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2325 {
2326 if (current_frame_info.reg_save_ar_lc != 0)
2327 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2328 else
2329 {
2330 alt_regno = next_scratch_gr_reg ();
2331 alt_reg = gen_rtx_REG (DImode, alt_regno);
2332 do_restore (gen_movdi_x, alt_reg, cfa_off);
2333 cfa_off -= 8;
2334 }
2335 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2336 emit_move_insn (reg, alt_reg);
2337 }
2338
2339 /* We should now be at the base of the gr/br/fr spill area. */
2340 if (cfa_off != (current_frame_info.spill_cfa_off
2341 + current_frame_info.spill_size))
2342 abort ();
2343
2344 /* Restore all general registers. */
2345 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2346 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2347 {
2348 reg = gen_rtx_REG (DImode, regno);
2349 do_restore (gen_gr_restore, reg, cfa_off);
2350 cfa_off -= 8;
2351 }
2352
2353 /* Restore the branch registers. Handle B0 specially, as it may
2354 have gotten stored in some GR register. */
2355 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2356 {
2357 if (current_frame_info.reg_save_b0 != 0)
2358 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2359 else
2360 {
2361 alt_regno = next_scratch_gr_reg ();
2362 alt_reg = gen_rtx_REG (DImode, alt_regno);
2363 do_restore (gen_movdi_x, alt_reg, cfa_off);
2364 cfa_off -= 8;
2365 }
2366 reg = gen_rtx_REG (DImode, BR_REG (0));
2367 emit_move_insn (reg, alt_reg);
2368 }
2369
2370 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2371 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2372 {
2373 alt_regno = next_scratch_gr_reg ();
2374 alt_reg = gen_rtx_REG (DImode, alt_regno);
2375 do_restore (gen_movdi_x, alt_reg, cfa_off);
2376 cfa_off -= 8;
2377 reg = gen_rtx_REG (DImode, regno);
2378 emit_move_insn (reg, alt_reg);
2379 }
2380
2381 /* Restore floating point registers. */
2382 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2383 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2384 {
2385 if (cfa_off & 15)
2386 abort ();
2387 reg = gen_rtx_REG (TFmode, regno);
2388 do_restore (gen_fr_restore_x, reg, cfa_off);
2389 cfa_off -= 16;
2390 }
2391
2392 /* Restore ar.unat for real. */
2393 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2394 {
2395 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2396 emit_move_insn (reg, ar_unat_save_reg);
2397 }
2398
2399 if (cfa_off != current_frame_info.spill_cfa_off)
2400 abort ();
2401
2402 finish_spill_pointers ();
2403
2404 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
2405 {
2406 /* ??? At this point we must generate a magic insn that appears to
2407 modify the spill iterators, the stack pointer, and the frame
2408 pointer. This would allow the most scheduling freedom. For now,
2409 just hard stop. */
2410 emit_insn (gen_blockage ());
2411 }
2412
2413 if (cfun->machine->ia64_eh_epilogue_sp)
2414 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
2415 else if (frame_pointer_needed)
2416 {
2417 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
2418 RTX_FRAME_RELATED_P (insn) = 1;
2419 }
2420 else if (current_frame_info.total_size)
2421 {
2422 rtx offset, frame_size_rtx;
2423
2424 frame_size_rtx = GEN_INT (current_frame_info.total_size);
2425 if (CONST_OK_FOR_I (current_frame_info.total_size))
2426 offset = frame_size_rtx;
2427 else
2428 {
2429 regno = next_scratch_gr_reg ();
2430 offset = gen_rtx_REG (DImode, regno);
2431 emit_move_insn (offset, frame_size_rtx);
2432 }
2433
2434 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
2435 offset));
2436
2437 RTX_FRAME_RELATED_P (insn) = 1;
2438 if (GET_CODE (offset) != CONST_INT)
2439 {
2440 REG_NOTES (insn)
2441 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2442 gen_rtx_SET (VOIDmode,
2443 stack_pointer_rtx,
2444 gen_rtx_PLUS (DImode,
2445 stack_pointer_rtx,
2446 frame_size_rtx)),
2447 REG_NOTES (insn));
2448 }
2449 }
2450
2451 if (cfun->machine->ia64_eh_epilogue_bsp)
2452 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
2453
2454 if (! sibcall_p)
2455 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
2456 else
2457 {
2458 int fp = GR_REG (2);
2459 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
2460 first available call clobbered register. If there was a frame_pointer
2461 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
2462 so we have to make sure we're using the string "r2" when emitting
2463 the register name for the assmbler. */
2464 if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
2465 fp = HARD_FRAME_POINTER_REGNUM;
2466
2467 /* We must emit an alloc to force the input registers to become output
2468 registers. Otherwise, if the callee tries to pass its parameters
2469 through to another call without an intervening alloc, then these
2470 values get lost. */
2471 /* ??? We don't need to preserve all input registers. We only need to
2472 preserve those input registers used as arguments to the sibling call.
2473 It is unclear how to compute that number here. */
2474 if (current_frame_info.n_input_regs != 0)
2475 emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
2476 GEN_INT (0), GEN_INT (0),
2477 GEN_INT (current_frame_info.n_input_regs),
2478 GEN_INT (0)));
2479 }
2480 }
2481
2482 /* Return 1 if br.ret can do all the work required to return from a
2483 function. */
2484
2485 int
2486 ia64_direct_return ()
2487 {
2488 if (reload_completed && ! frame_pointer_needed)
2489 {
2490 ia64_compute_frame_size (get_frame_size ());
2491
2492 return (current_frame_info.total_size == 0
2493 && current_frame_info.n_spilled == 0
2494 && current_frame_info.reg_save_b0 == 0
2495 && current_frame_info.reg_save_pr == 0
2496 && current_frame_info.reg_save_ar_pfs == 0
2497 && current_frame_info.reg_save_ar_unat == 0
2498 && current_frame_info.reg_save_ar_lc == 0);
2499 }
2500 return 0;
2501 }
2502
2503 int
2504 ia64_hard_regno_rename_ok (from, to)
2505 int from;
2506 int to;
2507 {
2508 /* Don't clobber any of the registers we reserved for the prologue. */
2509 if (to == current_frame_info.reg_fp
2510 || to == current_frame_info.reg_save_b0
2511 || to == current_frame_info.reg_save_pr
2512 || to == current_frame_info.reg_save_ar_pfs
2513 || to == current_frame_info.reg_save_ar_unat
2514 || to == current_frame_info.reg_save_ar_lc)
2515 return 0;
2516
2517 if (from == current_frame_info.reg_fp
2518 || from == current_frame_info.reg_save_b0
2519 || from == current_frame_info.reg_save_pr
2520 || from == current_frame_info.reg_save_ar_pfs
2521 || from == current_frame_info.reg_save_ar_unat
2522 || from == current_frame_info.reg_save_ar_lc)
2523 return 0;
2524
2525 /* Don't use output registers outside the register frame. */
2526 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
2527 return 0;
2528
2529 /* Retain even/oddness on predicate register pairs. */
2530 if (PR_REGNO_P (from) && PR_REGNO_P (to))
2531 return (from & 1) == (to & 1);
2532
2533 /* Reg 4 contains the saved gp; we can't reliably rename this. */
2534 if (from == GR_REG (4) && current_function_calls_setjmp)
2535 return 0;
2536
2537 return 1;
2538 }
2539
2540 /* Emit the function prologue. */
2541
2542 static void
2543 ia64_output_function_prologue (file, size)
2544 FILE *file;
2545 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
2546 {
2547 int mask, grsave, grsave_prev;
2548
2549 if (current_frame_info.need_regstk)
2550 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
2551 current_frame_info.n_input_regs,
2552 current_frame_info.n_local_regs,
2553 current_frame_info.n_output_regs,
2554 current_frame_info.n_rotate_regs);
2555
2556 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
2557 return;
2558
2559 /* Emit the .prologue directive. */
2560
2561 mask = 0;
2562 grsave = grsave_prev = 0;
2563 if (current_frame_info.reg_save_b0 != 0)
2564 {
2565 mask |= 8;
2566 grsave = grsave_prev = current_frame_info.reg_save_b0;
2567 }
2568 if (current_frame_info.reg_save_ar_pfs != 0
2569 && (grsave_prev == 0
2570 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
2571 {
2572 mask |= 4;
2573 if (grsave_prev == 0)
2574 grsave = current_frame_info.reg_save_ar_pfs;
2575 grsave_prev = current_frame_info.reg_save_ar_pfs;
2576 }
2577 if (current_frame_info.reg_fp != 0
2578 && (grsave_prev == 0
2579 || current_frame_info.reg_fp == grsave_prev + 1))
2580 {
2581 mask |= 2;
2582 if (grsave_prev == 0)
2583 grsave = HARD_FRAME_POINTER_REGNUM;
2584 grsave_prev = current_frame_info.reg_fp;
2585 }
2586 if (current_frame_info.reg_save_pr != 0
2587 && (grsave_prev == 0
2588 || current_frame_info.reg_save_pr == grsave_prev + 1))
2589 {
2590 mask |= 1;
2591 if (grsave_prev == 0)
2592 grsave = current_frame_info.reg_save_pr;
2593 }
2594
2595 if (mask)
2596 fprintf (file, "\t.prologue %d, %d\n", mask,
2597 ia64_dbx_register_number (grsave));
2598 else
2599 fputs ("\t.prologue\n", file);
2600
2601 /* Emit a .spill directive, if necessary, to relocate the base of
2602 the register spill area. */
2603 if (current_frame_info.spill_cfa_off != -16)
2604 fprintf (file, "\t.spill %ld\n",
2605 (long) (current_frame_info.spill_cfa_off
2606 + current_frame_info.spill_size));
2607 }
2608
2609 /* Emit the .body directive at the scheduled end of the prologue. */
2610
2611 static void
2612 ia64_output_function_end_prologue (file)
2613 FILE *file;
2614 {
2615 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
2616 return;
2617
2618 fputs ("\t.body\n", file);
2619 }
2620
2621 /* Emit the function epilogue. */
2622
2623 static void
2624 ia64_output_function_epilogue (file, size)
2625 FILE *file ATTRIBUTE_UNUSED;
2626 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
2627 {
2628 int i;
2629
2630 /* Reset from the function's potential modifications. */
2631 XINT (return_address_pointer_rtx, 0) = RETURN_ADDRESS_POINTER_REGNUM;
2632
2633 if (current_frame_info.reg_fp)
2634 {
2635 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2636 reg_names[HARD_FRAME_POINTER_REGNUM]
2637 = reg_names[current_frame_info.reg_fp];
2638 reg_names[current_frame_info.reg_fp] = tmp;
2639 }
2640 if (! TARGET_REG_NAMES)
2641 {
2642 for (i = 0; i < current_frame_info.n_input_regs; i++)
2643 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
2644 for (i = 0; i < current_frame_info.n_local_regs; i++)
2645 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
2646 for (i = 0; i < current_frame_info.n_output_regs; i++)
2647 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
2648 }
2649
2650 current_frame_info.initialized = 0;
2651 }
2652
2653 int
2654 ia64_dbx_register_number (regno)
2655 int regno;
2656 {
2657 /* In ia64_expand_prologue we quite literally renamed the frame pointer
2658 from its home at loc79 to something inside the register frame. We
2659 must perform the same renumbering here for the debug info. */
2660 if (current_frame_info.reg_fp)
2661 {
2662 if (regno == HARD_FRAME_POINTER_REGNUM)
2663 regno = current_frame_info.reg_fp;
2664 else if (regno == current_frame_info.reg_fp)
2665 regno = HARD_FRAME_POINTER_REGNUM;
2666 }
2667
2668 if (IN_REGNO_P (regno))
2669 return 32 + regno - IN_REG (0);
2670 else if (LOC_REGNO_P (regno))
2671 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
2672 else if (OUT_REGNO_P (regno))
2673 return (32 + current_frame_info.n_input_regs
2674 + current_frame_info.n_local_regs + regno - OUT_REG (0));
2675 else
2676 return regno;
2677 }
2678
2679 void
2680 ia64_initialize_trampoline (addr, fnaddr, static_chain)
2681 rtx addr, fnaddr, static_chain;
2682 {
2683 rtx addr_reg, eight = GEN_INT (8);
2684
2685 /* Load up our iterator. */
2686 addr_reg = gen_reg_rtx (Pmode);
2687 emit_move_insn (addr_reg, addr);
2688
2689 /* The first two words are the fake descriptor:
2690 __ia64_trampoline, ADDR+16. */
2691 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2692 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
2693 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2694
2695 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2696 copy_to_reg (plus_constant (addr, 16)));
2697 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2698
2699 /* The third word is the target descriptor. */
2700 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
2701 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2702
2703 /* The fourth word is the static chain. */
2704 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
2705 }
2706 \f
2707 /* Do any needed setup for a variadic function. CUM has not been updated
2708 for the last named argument which has type TYPE and mode MODE.
2709
2710 We generate the actual spill instructions during prologue generation. */
2711
2712 void
2713 ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time)
2714 CUMULATIVE_ARGS cum;
2715 int int_mode;
2716 tree type;
2717 int * pretend_size;
2718 int second_time ATTRIBUTE_UNUSED;
2719 {
2720 /* If this is a stdarg function, then skip the current argument. */
2721 if (! current_function_varargs)
2722 ia64_function_arg_advance (&cum, int_mode, type, 1);
2723
2724 if (cum.words < MAX_ARGUMENT_SLOTS)
2725 {
2726 int n = MAX_ARGUMENT_SLOTS - cum.words;
2727 *pretend_size = n * UNITS_PER_WORD;
2728 cfun->machine->n_varargs = n;
2729 }
2730 }
2731
2732 /* Check whether TYPE is a homogeneous floating point aggregate. If
2733 it is, return the mode of the floating point type that appears
2734 in all leafs. If it is not, return VOIDmode.
2735
2736 An aggregate is a homogeneous floating point aggregate is if all
2737 fields/elements in it have the same floating point type (e.g,
2738 SFmode). 128-bit quad-precision floats are excluded. */
2739
2740 static enum machine_mode
2741 hfa_element_mode (type, nested)
2742 tree type;
2743 int nested;
2744 {
2745 enum machine_mode element_mode = VOIDmode;
2746 enum machine_mode mode;
2747 enum tree_code code = TREE_CODE (type);
2748 int know_element_mode = 0;
2749 tree t;
2750
2751 switch (code)
2752 {
2753 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
2754 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
2755 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
2756 case FILE_TYPE: case SET_TYPE: case LANG_TYPE:
2757 case FUNCTION_TYPE:
2758 return VOIDmode;
2759
2760 /* Fortran complex types are supposed to be HFAs, so we need to handle
2761 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
2762 types though. */
2763 case COMPLEX_TYPE:
2764 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT)
2765 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type))
2766 * BITS_PER_UNIT, MODE_FLOAT, 0);
2767 else
2768 return VOIDmode;
2769
2770 case REAL_TYPE:
2771 /* ??? Should exclude 128-bit long double here. */
2772 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
2773 mode if this is contained within an aggregate. */
2774 if (nested)
2775 return TYPE_MODE (type);
2776 else
2777 return VOIDmode;
2778
2779 case ARRAY_TYPE:
2780 return TYPE_MODE (TREE_TYPE (type));
2781
2782 case RECORD_TYPE:
2783 case UNION_TYPE:
2784 case QUAL_UNION_TYPE:
2785 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
2786 {
2787 if (TREE_CODE (t) != FIELD_DECL)
2788 continue;
2789
2790 mode = hfa_element_mode (TREE_TYPE (t), 1);
2791 if (know_element_mode)
2792 {
2793 if (mode != element_mode)
2794 return VOIDmode;
2795 }
2796 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
2797 return VOIDmode;
2798 else
2799 {
2800 know_element_mode = 1;
2801 element_mode = mode;
2802 }
2803 }
2804 return element_mode;
2805
2806 default:
2807 /* If we reach here, we probably have some front-end specific type
2808 that the backend doesn't know about. This can happen via the
2809 aggregate_value_p call in init_function_start. All we can do is
2810 ignore unknown tree types. */
2811 return VOIDmode;
2812 }
2813
2814 return VOIDmode;
2815 }
2816
2817 /* Return rtx for register where argument is passed, or zero if it is passed
2818 on the stack. */
2819
2820 /* ??? 128-bit quad-precision floats are always passed in general
2821 registers. */
2822
2823 rtx
2824 ia64_function_arg (cum, mode, type, named, incoming)
2825 CUMULATIVE_ARGS *cum;
2826 enum machine_mode mode;
2827 tree type;
2828 int named;
2829 int incoming;
2830 {
2831 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
2832 int words = (((mode == BLKmode ? int_size_in_bytes (type)
2833 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
2834 / UNITS_PER_WORD);
2835 int offset = 0;
2836 enum machine_mode hfa_mode = VOIDmode;
2837
2838 /* Integer and float arguments larger than 8 bytes start at the next even
2839 boundary. Aggregates larger than 8 bytes start at the next even boundary
2840 if the aggregate has 16 byte alignment. Net effect is that types with
2841 alignment greater than 8 start at the next even boundary. */
2842 /* ??? The ABI does not specify how to handle aggregates with alignment from
2843 9 to 15 bytes, or greater than 16. We handle them all as if they had
2844 16 byte alignment. Such aggregates can occur only if gcc extensions are
2845 used. */
2846 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
2847 : (words > 1))
2848 && (cum->words & 1))
2849 offset = 1;
2850
2851 /* If all argument slots are used, then it must go on the stack. */
2852 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
2853 return 0;
2854
2855 /* Check for and handle homogeneous FP aggregates. */
2856 if (type)
2857 hfa_mode = hfa_element_mode (type, 0);
2858
2859 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
2860 and unprototyped hfas are passed specially. */
2861 if (hfa_mode != VOIDmode && (! cum->prototype || named))
2862 {
2863 rtx loc[16];
2864 int i = 0;
2865 int fp_regs = cum->fp_regs;
2866 int int_regs = cum->words + offset;
2867 int hfa_size = GET_MODE_SIZE (hfa_mode);
2868 int byte_size;
2869 int args_byte_size;
2870
2871 /* If prototyped, pass it in FR regs then GR regs.
2872 If not prototyped, pass it in both FR and GR regs.
2873
2874 If this is an SFmode aggregate, then it is possible to run out of
2875 FR regs while GR regs are still left. In that case, we pass the
2876 remaining part in the GR regs. */
2877
2878 /* Fill the FP regs. We do this always. We stop if we reach the end
2879 of the argument, the last FP register, or the last argument slot. */
2880
2881 byte_size = ((mode == BLKmode)
2882 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2883 args_byte_size = int_regs * UNITS_PER_WORD;
2884 offset = 0;
2885 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
2886 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
2887 {
2888 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
2889 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
2890 + fp_regs)),
2891 GEN_INT (offset));
2892 offset += hfa_size;
2893 args_byte_size += hfa_size;
2894 fp_regs++;
2895 }
2896
2897 /* If no prototype, then the whole thing must go in GR regs. */
2898 if (! cum->prototype)
2899 offset = 0;
2900 /* If this is an SFmode aggregate, then we might have some left over
2901 that needs to go in GR regs. */
2902 else if (byte_size != offset)
2903 int_regs += offset / UNITS_PER_WORD;
2904
2905 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
2906
2907 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
2908 {
2909 enum machine_mode gr_mode = DImode;
2910
2911 /* If we have an odd 4 byte hunk because we ran out of FR regs,
2912 then this goes in a GR reg left adjusted/little endian, right
2913 adjusted/big endian. */
2914 /* ??? Currently this is handled wrong, because 4-byte hunks are
2915 always right adjusted/little endian. */
2916 if (offset & 0x4)
2917 gr_mode = SImode;
2918 /* If we have an even 4 byte hunk because the aggregate is a
2919 multiple of 4 bytes in size, then this goes in a GR reg right
2920 adjusted/little endian. */
2921 else if (byte_size - offset == 4)
2922 gr_mode = SImode;
2923 /* Complex floats need to have float mode. */
2924 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
2925 gr_mode = hfa_mode;
2926
2927 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
2928 gen_rtx_REG (gr_mode, (basereg
2929 + int_regs)),
2930 GEN_INT (offset));
2931 offset += GET_MODE_SIZE (gr_mode);
2932 int_regs += GET_MODE_SIZE (gr_mode) <= UNITS_PER_WORD
2933 ? 1 : GET_MODE_SIZE (gr_mode) / UNITS_PER_WORD;
2934 }
2935
2936 /* If we ended up using just one location, just return that one loc. */
2937 if (i == 1)
2938 return XEXP (loc[0], 0);
2939 else
2940 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
2941 }
2942
2943 /* Integral and aggregates go in general registers. If we have run out of
2944 FR registers, then FP values must also go in general registers. This can
2945 happen when we have a SFmode HFA. */
2946 else if (((mode == TFmode) && ! INTEL_EXTENDED_IEEE_FORMAT)
2947 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
2948 return gen_rtx_REG (mode, basereg + cum->words + offset);
2949
2950 /* If there is a prototype, then FP values go in a FR register when
2951 named, and in a GR registeer when unnamed. */
2952 else if (cum->prototype)
2953 {
2954 if (! named)
2955 return gen_rtx_REG (mode, basereg + cum->words + offset);
2956 else
2957 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
2958 }
2959 /* If there is no prototype, then FP values go in both FR and GR
2960 registers. */
2961 else
2962 {
2963 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
2964 gen_rtx_REG (mode, (FR_ARG_FIRST
2965 + cum->fp_regs)),
2966 const0_rtx);
2967 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
2968 gen_rtx_REG (mode,
2969 (basereg + cum->words
2970 + offset)),
2971 const0_rtx);
2972
2973 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
2974 }
2975 }
2976
2977 /* Return number of words, at the beginning of the argument, that must be
2978 put in registers. 0 is the argument is entirely in registers or entirely
2979 in memory. */
2980
2981 int
2982 ia64_function_arg_partial_nregs (cum, mode, type, named)
2983 CUMULATIVE_ARGS *cum;
2984 enum machine_mode mode;
2985 tree type;
2986 int named ATTRIBUTE_UNUSED;
2987 {
2988 int words = (((mode == BLKmode ? int_size_in_bytes (type)
2989 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
2990 / UNITS_PER_WORD);
2991 int offset = 0;
2992
2993 /* Arguments with alignment larger than 8 bytes start at the next even
2994 boundary. */
2995 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
2996 : (words > 1))
2997 && (cum->words & 1))
2998 offset = 1;
2999
3000 /* If all argument slots are used, then it must go on the stack. */
3001 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3002 return 0;
3003
3004 /* It doesn't matter whether the argument goes in FR or GR regs. If
3005 it fits within the 8 argument slots, then it goes entirely in
3006 registers. If it extends past the last argument slot, then the rest
3007 goes on the stack. */
3008
3009 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
3010 return 0;
3011
3012 return MAX_ARGUMENT_SLOTS - cum->words - offset;
3013 }
3014
3015 /* Update CUM to point after this argument. This is patterned after
3016 ia64_function_arg. */
3017
3018 void
3019 ia64_function_arg_advance (cum, mode, type, named)
3020 CUMULATIVE_ARGS *cum;
3021 enum machine_mode mode;
3022 tree type;
3023 int named;
3024 {
3025 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3026 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3027 / UNITS_PER_WORD);
3028 int offset = 0;
3029 enum machine_mode hfa_mode = VOIDmode;
3030
3031 /* If all arg slots are already full, then there is nothing to do. */
3032 if (cum->words >= MAX_ARGUMENT_SLOTS)
3033 return;
3034
3035 /* Arguments with alignment larger than 8 bytes start at the next even
3036 boundary. */
3037 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3038 : (words > 1))
3039 && (cum->words & 1))
3040 offset = 1;
3041
3042 cum->words += words + offset;
3043
3044 /* Check for and handle homogeneous FP aggregates. */
3045 if (type)
3046 hfa_mode = hfa_element_mode (type, 0);
3047
3048 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3049 and unprototyped hfas are passed specially. */
3050 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3051 {
3052 int fp_regs = cum->fp_regs;
3053 /* This is the original value of cum->words + offset. */
3054 int int_regs = cum->words - words;
3055 int hfa_size = GET_MODE_SIZE (hfa_mode);
3056 int byte_size;
3057 int args_byte_size;
3058
3059 /* If prototyped, pass it in FR regs then GR regs.
3060 If not prototyped, pass it in both FR and GR regs.
3061
3062 If this is an SFmode aggregate, then it is possible to run out of
3063 FR regs while GR regs are still left. In that case, we pass the
3064 remaining part in the GR regs. */
3065
3066 /* Fill the FP regs. We do this always. We stop if we reach the end
3067 of the argument, the last FP register, or the last argument slot. */
3068
3069 byte_size = ((mode == BLKmode)
3070 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3071 args_byte_size = int_regs * UNITS_PER_WORD;
3072 offset = 0;
3073 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3074 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3075 {
3076 offset += hfa_size;
3077 args_byte_size += hfa_size;
3078 fp_regs++;
3079 }
3080
3081 cum->fp_regs = fp_regs;
3082 }
3083
3084 /* Integral and aggregates go in general registers. If we have run out of
3085 FR registers, then FP values must also go in general registers. This can
3086 happen when we have a SFmode HFA. */
3087 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
3088 return;
3089
3090 /* If there is a prototype, then FP values go in a FR register when
3091 named, and in a GR registeer when unnamed. */
3092 else if (cum->prototype)
3093 {
3094 if (! named)
3095 return;
3096 else
3097 /* ??? Complex types should not reach here. */
3098 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3099 }
3100 /* If there is no prototype, then FP values go in both FR and GR
3101 registers. */
3102 else
3103 /* ??? Complex types should not reach here. */
3104 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3105
3106 return;
3107 }
3108 \f
3109 /* Implement va_start. */
3110
3111 void
3112 ia64_va_start (stdarg_p, valist, nextarg)
3113 int stdarg_p;
3114 tree valist;
3115 rtx nextarg;
3116 {
3117 int arg_words;
3118 int ofs;
3119
3120 arg_words = current_function_args_info.words;
3121
3122 if (stdarg_p)
3123 ofs = 0;
3124 else
3125 ofs = (arg_words >= MAX_ARGUMENT_SLOTS ? -UNITS_PER_WORD : 0);
3126
3127 nextarg = plus_constant (nextarg, ofs);
3128 std_expand_builtin_va_start (1, valist, nextarg);
3129 }
3130
3131 /* Implement va_arg. */
3132
3133 rtx
3134 ia64_va_arg (valist, type)
3135 tree valist, type;
3136 {
3137 tree t;
3138
3139 /* Arguments with alignment larger than 8 bytes start at the next even
3140 boundary. */
3141 if (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3142 {
3143 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3144 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
3145 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3146 build_int_2 (-2 * UNITS_PER_WORD, -1));
3147 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
3148 TREE_SIDE_EFFECTS (t) = 1;
3149 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3150 }
3151
3152 return std_expand_builtin_va_arg (valist, type);
3153 }
3154 \f
3155 /* Return 1 if function return value returned in memory. Return 0 if it is
3156 in a register. */
3157
3158 int
3159 ia64_return_in_memory (valtype)
3160 tree valtype;
3161 {
3162 enum machine_mode mode;
3163 enum machine_mode hfa_mode;
3164 int byte_size;
3165
3166 mode = TYPE_MODE (valtype);
3167 byte_size = ((mode == BLKmode)
3168 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3169
3170 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3171
3172 hfa_mode = hfa_element_mode (valtype, 0);
3173 if (hfa_mode != VOIDmode)
3174 {
3175 int hfa_size = GET_MODE_SIZE (hfa_mode);
3176
3177 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
3178 return 1;
3179 else
3180 return 0;
3181 }
3182
3183 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
3184 return 1;
3185 else
3186 return 0;
3187 }
3188
3189 /* Return rtx for register that holds the function return value. */
3190
3191 rtx
3192 ia64_function_value (valtype, func)
3193 tree valtype;
3194 tree func ATTRIBUTE_UNUSED;
3195 {
3196 enum machine_mode mode;
3197 enum machine_mode hfa_mode;
3198
3199 mode = TYPE_MODE (valtype);
3200 hfa_mode = hfa_element_mode (valtype, 0);
3201
3202 if (hfa_mode != VOIDmode)
3203 {
3204 rtx loc[8];
3205 int i;
3206 int hfa_size;
3207 int byte_size;
3208 int offset;
3209
3210 hfa_size = GET_MODE_SIZE (hfa_mode);
3211 byte_size = ((mode == BLKmode)
3212 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3213 offset = 0;
3214 for (i = 0; offset < byte_size; i++)
3215 {
3216 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3217 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
3218 GEN_INT (offset));
3219 offset += hfa_size;
3220 }
3221
3222 if (i == 1)
3223 return XEXP (loc[0], 0);
3224 else
3225 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3226 }
3227 else if (FLOAT_TYPE_P (valtype) &&
3228 ((mode != TFmode) || INTEL_EXTENDED_IEEE_FORMAT))
3229 return gen_rtx_REG (mode, FR_ARG_FIRST);
3230 else
3231 return gen_rtx_REG (mode, GR_RET_FIRST);
3232 }
3233
3234 /* Print a memory address as an operand to reference that memory location. */
3235
3236 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3237 also call this from ia64_print_operand for memory addresses. */
3238
3239 void
3240 ia64_print_operand_address (stream, address)
3241 FILE * stream ATTRIBUTE_UNUSED;
3242 rtx address ATTRIBUTE_UNUSED;
3243 {
3244 }
3245
3246 /* Print an operand to a assembler instruction.
3247 C Swap and print a comparison operator.
3248 D Print an FP comparison operator.
3249 E Print 32 - constant, for SImode shifts as extract.
3250 e Print 64 - constant, for DImode rotates.
3251 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3252 a floating point register emitted normally.
3253 I Invert a predicate register by adding 1.
3254 J Select the proper predicate register for a condition.
3255 j Select the inverse predicate register for a condition.
3256 O Append .acq for volatile load.
3257 P Postincrement of a MEM.
3258 Q Append .rel for volatile store.
3259 S Shift amount for shladd instruction.
3260 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3261 for Intel assembler.
3262 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3263 for Intel assembler.
3264 r Print register name, or constant 0 as r0. HP compatibility for
3265 Linux kernel. */
3266 void
3267 ia64_print_operand (file, x, code)
3268 FILE * file;
3269 rtx x;
3270 int code;
3271 {
3272 const char *str;
3273
3274 switch (code)
3275 {
3276 case 0:
3277 /* Handled below. */
3278 break;
3279
3280 case 'C':
3281 {
3282 enum rtx_code c = swap_condition (GET_CODE (x));
3283 fputs (GET_RTX_NAME (c), file);
3284 return;
3285 }
3286
3287 case 'D':
3288 switch (GET_CODE (x))
3289 {
3290 case NE:
3291 str = "neq";
3292 break;
3293 case UNORDERED:
3294 str = "unord";
3295 break;
3296 case ORDERED:
3297 str = "ord";
3298 break;
3299 default:
3300 str = GET_RTX_NAME (GET_CODE (x));
3301 break;
3302 }
3303 fputs (str, file);
3304 return;
3305
3306 case 'E':
3307 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
3308 return;
3309
3310 case 'e':
3311 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
3312 return;
3313
3314 case 'F':
3315 if (x == CONST0_RTX (GET_MODE (x)))
3316 str = reg_names [FR_REG (0)];
3317 else if (x == CONST1_RTX (GET_MODE (x)))
3318 str = reg_names [FR_REG (1)];
3319 else if (GET_CODE (x) == REG)
3320 str = reg_names [REGNO (x)];
3321 else
3322 abort ();
3323 fputs (str, file);
3324 return;
3325
3326 case 'I':
3327 fputs (reg_names [REGNO (x) + 1], file);
3328 return;
3329
3330 case 'J':
3331 case 'j':
3332 {
3333 unsigned int regno = REGNO (XEXP (x, 0));
3334 if (GET_CODE (x) == EQ)
3335 regno += 1;
3336 if (code == 'j')
3337 regno ^= 1;
3338 fputs (reg_names [regno], file);
3339 }
3340 return;
3341
3342 case 'O':
3343 if (MEM_VOLATILE_P (x))
3344 fputs(".acq", file);
3345 return;
3346
3347 case 'P':
3348 {
3349 HOST_WIDE_INT value;
3350
3351 switch (GET_CODE (XEXP (x, 0)))
3352 {
3353 default:
3354 return;
3355
3356 case POST_MODIFY:
3357 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
3358 if (GET_CODE (x) == CONST_INT)
3359 value = INTVAL (x);
3360 else if (GET_CODE (x) == REG)
3361 {
3362 fprintf (file, ", %s", reg_names[REGNO (x)]);
3363 return;
3364 }
3365 else
3366 abort ();
3367 break;
3368
3369 case POST_INC:
3370 value = GET_MODE_SIZE (GET_MODE (x));
3371 break;
3372
3373 case POST_DEC:
3374 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
3375 break;
3376 }
3377
3378 putc (',', file);
3379 putc (' ', file);
3380 fprintf (file, HOST_WIDE_INT_PRINT_DEC, value);
3381 return;
3382 }
3383
3384 case 'Q':
3385 if (MEM_VOLATILE_P (x))
3386 fputs(".rel", file);
3387 return;
3388
3389 case 'S':
3390 fprintf (file, "%d", exact_log2 (INTVAL (x)));
3391 return;
3392
3393 case 'T':
3394 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3395 {
3396 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
3397 return;
3398 }
3399 break;
3400
3401 case 'U':
3402 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3403 {
3404 const char *prefix = "0x";
3405 if (INTVAL (x) & 0x80000000)
3406 {
3407 fprintf (file, "0xffffffff");
3408 prefix = "";
3409 }
3410 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
3411 return;
3412 }
3413 break;
3414
3415 case 'r':
3416 /* If this operand is the constant zero, write it as register zero.
3417 Any register, zero, or CONST_INT value is OK here. */
3418 if (GET_CODE (x) == REG)
3419 fputs (reg_names[REGNO (x)], file);
3420 else if (x == CONST0_RTX (GET_MODE (x)))
3421 fputs ("r0", file);
3422 else if (GET_CODE (x) == CONST_INT)
3423 output_addr_const (file, x);
3424 else
3425 output_operand_lossage ("invalid %%r value");
3426 return;
3427
3428 case '+':
3429 {
3430 const char *which;
3431
3432 /* For conditional branches, returns or calls, substitute
3433 sptk, dptk, dpnt, or spnt for %s. */
3434 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
3435 if (x)
3436 {
3437 int pred_val = INTVAL (XEXP (x, 0));
3438
3439 /* Guess top and bottom 10% statically predicted. */
3440 if (pred_val < REG_BR_PROB_BASE / 50)
3441 which = ".spnt";
3442 else if (pred_val < REG_BR_PROB_BASE / 2)
3443 which = ".dpnt";
3444 else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
3445 which = ".dptk";
3446 else
3447 which = ".sptk";
3448 }
3449 else if (GET_CODE (current_output_insn) == CALL_INSN)
3450 which = ".sptk";
3451 else
3452 which = ".dptk";
3453
3454 fputs (which, file);
3455 return;
3456 }
3457
3458 case ',':
3459 x = current_insn_predicate;
3460 if (x)
3461 {
3462 unsigned int regno = REGNO (XEXP (x, 0));
3463 if (GET_CODE (x) == EQ)
3464 regno += 1;
3465 fprintf (file, "(%s) ", reg_names [regno]);
3466 }
3467 return;
3468
3469 default:
3470 output_operand_lossage ("ia64_print_operand: unknown code");
3471 return;
3472 }
3473
3474 switch (GET_CODE (x))
3475 {
3476 /* This happens for the spill/restore instructions. */
3477 case POST_INC:
3478 case POST_DEC:
3479 case POST_MODIFY:
3480 x = XEXP (x, 0);
3481 /* ... fall through ... */
3482
3483 case REG:
3484 fputs (reg_names [REGNO (x)], file);
3485 break;
3486
3487 case MEM:
3488 {
3489 rtx addr = XEXP (x, 0);
3490 if (GET_RTX_CLASS (GET_CODE (addr)) == 'a')
3491 addr = XEXP (addr, 0);
3492 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
3493 break;
3494 }
3495
3496 default:
3497 output_addr_const (file, x);
3498 break;
3499 }
3500
3501 return;
3502 }
3503 \f
3504 /* Calulate the cost of moving data from a register in class FROM to
3505 one in class TO. */
3506
3507 int
3508 ia64_register_move_cost (from, to)
3509 enum reg_class from, to;
3510 {
3511 int from_hard, to_hard;
3512 int from_gr, to_gr;
3513 int from_fr, to_fr;
3514 int from_pr, to_pr;
3515
3516 from_hard = (from == BR_REGS || from == AR_M_REGS || from == AR_I_REGS);
3517 to_hard = (to == BR_REGS || to == AR_M_REGS || to == AR_I_REGS);
3518 from_gr = (from == GENERAL_REGS);
3519 to_gr = (to == GENERAL_REGS);
3520 from_fr = (from == FR_REGS);
3521 to_fr = (to == FR_REGS);
3522 from_pr = (from == PR_REGS);
3523 to_pr = (to == PR_REGS);
3524
3525 if (from_hard && to_hard)
3526 return 8;
3527 else if ((from_hard && !to_gr) || (!from_gr && to_hard))
3528 return 6;
3529
3530 /* Moving between PR registers takes two insns. */
3531 else if (from_pr && to_pr)
3532 return 3;
3533 /* Moving between PR and anything but GR is impossible. */
3534 else if ((from_pr && !to_gr) || (!from_gr && to_pr))
3535 return 6;
3536
3537 /* ??? Moving from FR<->GR must be more expensive than 2, so that we get
3538 secondary memory reloads for TFmode moves. Unfortunately, we don't
3539 have the mode here, so we can't check that. */
3540 /* Moreover, we have to make this at least as high as MEMORY_MOVE_COST
3541 to avoid spectacularly poor register class preferencing for TFmode. */
3542 else if (from_fr != to_fr)
3543 return 5;
3544
3545 return 2;
3546 }
3547
3548 /* This function returns the register class required for a secondary
3549 register when copying between one of the registers in CLASS, and X,
3550 using MODE. A return value of NO_REGS means that no secondary register
3551 is required. */
3552
3553 enum reg_class
3554 ia64_secondary_reload_class (class, mode, x)
3555 enum reg_class class;
3556 enum machine_mode mode ATTRIBUTE_UNUSED;
3557 rtx x;
3558 {
3559 int regno = -1;
3560
3561 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3562 regno = true_regnum (x);
3563
3564 switch (class)
3565 {
3566 case BR_REGS:
3567 /* ??? This is required because of a bad gcse/cse/global interaction.
3568 We end up with two pseudos with overlapping lifetimes both of which
3569 are equiv to the same constant, and both which need to be in BR_REGS.
3570 This results in a BR_REGS to BR_REGS copy which doesn't exist. To
3571 reproduce, return NO_REGS here, and compile divdi3 in libgcc2.c.
3572 This seems to be a cse bug. cse_basic_block_end changes depending
3573 on the path length, which means the qty_first_reg check in
3574 make_regs_eqv can give different answers at different times. */
3575 /* ??? At some point I'll probably need a reload_indi pattern to handle
3576 this. */
3577 if (BR_REGNO_P (regno))
3578 return GR_REGS;
3579
3580 /* This is needed if a pseudo used as a call_operand gets spilled to a
3581 stack slot. */
3582 if (GET_CODE (x) == MEM)
3583 return GR_REGS;
3584 break;
3585
3586 case FR_REGS:
3587 /* This can happen when a paradoxical subreg is an operand to the
3588 muldi3 pattern. */
3589 /* ??? This shouldn't be necessary after instruction scheduling is
3590 enabled, because paradoxical subregs are not accepted by
3591 register_operand when INSN_SCHEDULING is defined. Or alternatively,
3592 stop the paradoxical subreg stupidity in the *_operand functions
3593 in recog.c. */
3594 if (GET_CODE (x) == MEM
3595 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
3596 || GET_MODE (x) == QImode))
3597 return GR_REGS;
3598
3599 /* This can happen because of the ior/and/etc patterns that accept FP
3600 registers as operands. If the third operand is a constant, then it
3601 needs to be reloaded into a FP register. */
3602 if (GET_CODE (x) == CONST_INT)
3603 return GR_REGS;
3604
3605 /* This can happen because of register elimination in a muldi3 insn.
3606 E.g. `26107 * (unsigned long)&u'. */
3607 if (GET_CODE (x) == PLUS)
3608 return GR_REGS;
3609 break;
3610
3611 case PR_REGS:
3612 /* ??? This happens if we cse/gcse a BImode value across a call,
3613 and the function has a nonlocal goto. This is because global
3614 does not allocate call crossing pseudos to hard registers when
3615 current_function_has_nonlocal_goto is true. This is relatively
3616 common for C++ programs that use exceptions. To reproduce,
3617 return NO_REGS and compile libstdc++. */
3618 if (GET_CODE (x) == MEM)
3619 return GR_REGS;
3620
3621 /* This can happen when we take a BImode subreg of a DImode value,
3622 and that DImode value winds up in some non-GR register. */
3623 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
3624 return GR_REGS;
3625 break;
3626
3627 case GR_REGS:
3628 /* Since we have no offsettable memory addresses, we need a temporary
3629 to hold the address of the second word. */
3630 if (mode == TImode)
3631 return GR_REGS;
3632 break;
3633
3634 default:
3635 break;
3636 }
3637
3638 return NO_REGS;
3639 }
3640
3641 \f
3642 /* Emit text to declare externally defined variables and functions, because
3643 the Intel assembler does not support undefined externals. */
3644
3645 void
3646 ia64_asm_output_external (file, decl, name)
3647 FILE *file;
3648 tree decl;
3649 const char *name;
3650 {
3651 int save_referenced;
3652
3653 /* GNU as does not need anything here. */
3654 if (TARGET_GNU_AS)
3655 return;
3656
3657 /* ??? The Intel assembler creates a reference that needs to be satisfied by
3658 the linker when we do this, so we need to be careful not to do this for
3659 builtin functions which have no library equivalent. Unfortunately, we
3660 can't tell here whether or not a function will actually be called by
3661 expand_expr, so we pull in library functions even if we may not need
3662 them later. */
3663 if (! strcmp (name, "__builtin_next_arg")
3664 || ! strcmp (name, "alloca")
3665 || ! strcmp (name, "__builtin_constant_p")
3666 || ! strcmp (name, "__builtin_args_info"))
3667 return;
3668
3669 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
3670 restore it. */
3671 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
3672 if (TREE_CODE (decl) == FUNCTION_DECL)
3673 {
3674 fprintf (file, "%s", TYPE_ASM_OP);
3675 assemble_name (file, name);
3676 putc (',', file);
3677 fprintf (file, TYPE_OPERAND_FMT, "function");
3678 putc ('\n', file);
3679 }
3680 ASM_GLOBALIZE_LABEL (file, name);
3681 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
3682 }
3683 \f
3684 /* Parse the -mfixed-range= option string. */
3685
3686 static void
3687 fix_range (const_str)
3688 const char *const_str;
3689 {
3690 int i, first, last;
3691 char *str, *dash, *comma;
3692
3693 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3694 REG2 are either register names or register numbers. The effect
3695 of this option is to mark the registers in the range from REG1 to
3696 REG2 as ``fixed'' so they won't be used by the compiler. This is
3697 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
3698
3699 i = strlen (const_str);
3700 str = (char *) alloca (i + 1);
3701 memcpy (str, const_str, i + 1);
3702
3703 while (1)
3704 {
3705 dash = strchr (str, '-');
3706 if (!dash)
3707 {
3708 warning ("value of -mfixed-range must have form REG1-REG2");
3709 return;
3710 }
3711 *dash = '\0';
3712
3713 comma = strchr (dash + 1, ',');
3714 if (comma)
3715 *comma = '\0';
3716
3717 first = decode_reg_name (str);
3718 if (first < 0)
3719 {
3720 warning ("unknown register name: %s", str);
3721 return;
3722 }
3723
3724 last = decode_reg_name (dash + 1);
3725 if (last < 0)
3726 {
3727 warning ("unknown register name: %s", dash + 1);
3728 return;
3729 }
3730
3731 *dash = '-';
3732
3733 if (first > last)
3734 {
3735 warning ("%s-%s is an empty range", str, dash + 1);
3736 return;
3737 }
3738
3739 for (i = first; i <= last; ++i)
3740 fixed_regs[i] = call_used_regs[i] = 1;
3741
3742 if (!comma)
3743 break;
3744
3745 *comma = ',';
3746 str = comma + 1;
3747 }
3748 }
3749
3750 /* Called to register all of our global variables with the garbage
3751 collector. */
3752
3753 static void
3754 ia64_add_gc_roots ()
3755 {
3756 ggc_add_rtx_root (&ia64_compare_op0, 1);
3757 ggc_add_rtx_root (&ia64_compare_op1, 1);
3758 }
3759
3760 static void
3761 ia64_init_machine_status (p)
3762 struct function *p;
3763 {
3764 p->machine =
3765 (struct machine_function *) xcalloc (1, sizeof (struct machine_function));
3766 }
3767
3768 static void
3769 ia64_mark_machine_status (p)
3770 struct function *p;
3771 {
3772 struct machine_function *machine = p->machine;
3773
3774 if (machine)
3775 {
3776 ggc_mark_rtx (machine->ia64_eh_epilogue_sp);
3777 ggc_mark_rtx (machine->ia64_eh_epilogue_bsp);
3778 ggc_mark_rtx (machine->ia64_gp_save);
3779 }
3780 }
3781
3782 static void
3783 ia64_free_machine_status (p)
3784 struct function *p;
3785 {
3786 free (p->machine);
3787 p->machine = NULL;
3788 }
3789
3790 /* Handle TARGET_OPTIONS switches. */
3791
3792 void
3793 ia64_override_options ()
3794 {
3795 if (TARGET_AUTO_PIC)
3796 target_flags |= MASK_CONST_GP;
3797
3798 if (TARGET_INLINE_DIV_LAT && TARGET_INLINE_DIV_THR)
3799 {
3800 warning ("cannot optimize division for both latency and throughput");
3801 target_flags &= ~MASK_INLINE_DIV_THR;
3802 }
3803
3804 if (ia64_fixed_range_string)
3805 fix_range (ia64_fixed_range_string);
3806
3807 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
3808 flag_schedule_insns_after_reload = 0;
3809
3810 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
3811
3812 init_machine_status = ia64_init_machine_status;
3813 mark_machine_status = ia64_mark_machine_status;
3814 free_machine_status = ia64_free_machine_status;
3815
3816 ia64_add_gc_roots ();
3817 }
3818 \f
3819 static enum attr_itanium_requires_unit0 ia64_safe_itanium_requires_unit0 PARAMS((rtx));
3820 static enum attr_itanium_class ia64_safe_itanium_class PARAMS((rtx));
3821 static enum attr_type ia64_safe_type PARAMS((rtx));
3822
3823 static enum attr_itanium_requires_unit0
3824 ia64_safe_itanium_requires_unit0 (insn)
3825 rtx insn;
3826 {
3827 if (recog_memoized (insn) >= 0)
3828 return get_attr_itanium_requires_unit0 (insn);
3829 else
3830 return ITANIUM_REQUIRES_UNIT0_NO;
3831 }
3832
3833 static enum attr_itanium_class
3834 ia64_safe_itanium_class (insn)
3835 rtx insn;
3836 {
3837 if (recog_memoized (insn) >= 0)
3838 return get_attr_itanium_class (insn);
3839 else
3840 return ITANIUM_CLASS_UNKNOWN;
3841 }
3842
3843 static enum attr_type
3844 ia64_safe_type (insn)
3845 rtx insn;
3846 {
3847 if (recog_memoized (insn) >= 0)
3848 return get_attr_type (insn);
3849 else
3850 return TYPE_UNKNOWN;
3851 }
3852 \f
3853 /* The following collection of routines emit instruction group stop bits as
3854 necessary to avoid dependencies. */
3855
3856 /* Need to track some additional registers as far as serialization is
3857 concerned so we can properly handle br.call and br.ret. We could
3858 make these registers visible to gcc, but since these registers are
3859 never explicitly used in gcc generated code, it seems wasteful to
3860 do so (plus it would make the call and return patterns needlessly
3861 complex). */
3862 #define REG_GP (GR_REG (1))
3863 #define REG_RP (BR_REG (0))
3864 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
3865 /* This is used for volatile asms which may require a stop bit immediately
3866 before and after them. */
3867 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
3868 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
3869 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
3870
3871 /* For each register, we keep track of how it has been written in the
3872 current instruction group.
3873
3874 If a register is written unconditionally (no qualifying predicate),
3875 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
3876
3877 If a register is written if its qualifying predicate P is true, we
3878 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
3879 may be written again by the complement of P (P^1) and when this happens,
3880 WRITE_COUNT gets set to 2.
3881
3882 The result of this is that whenever an insn attempts to write a register
3883 whose WRITE_COUNT is two, we need to issue a insn group barrier first.
3884
3885 If a predicate register is written by a floating-point insn, we set
3886 WRITTEN_BY_FP to true.
3887
3888 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
3889 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
3890
3891 struct reg_write_state
3892 {
3893 unsigned int write_count : 2;
3894 unsigned int first_pred : 16;
3895 unsigned int written_by_fp : 1;
3896 unsigned int written_by_and : 1;
3897 unsigned int written_by_or : 1;
3898 };
3899
3900 /* Cumulative info for the current instruction group. */
3901 struct reg_write_state rws_sum[NUM_REGS];
3902 /* Info for the current instruction. This gets copied to rws_sum after a
3903 stop bit is emitted. */
3904 struct reg_write_state rws_insn[NUM_REGS];
3905
3906 /* Indicates whether this is the first instruction after a stop bit,
3907 in which case we don't need another stop bit. Without this, we hit
3908 the abort in ia64_variable_issue when scheduling an alloc. */
3909 static int first_instruction;
3910
3911 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
3912 RTL for one instruction. */
3913 struct reg_flags
3914 {
3915 unsigned int is_write : 1; /* Is register being written? */
3916 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
3917 unsigned int is_branch : 1; /* Is register used as part of a branch? */
3918 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
3919 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
3920 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
3921 };
3922
3923 static void rws_update PARAMS ((struct reg_write_state *, int,
3924 struct reg_flags, int));
3925 static int rws_access_regno PARAMS ((int, struct reg_flags, int));
3926 static int rws_access_reg PARAMS ((rtx, struct reg_flags, int));
3927 static void update_set_flags PARAMS ((rtx, struct reg_flags *, int *, rtx *));
3928 static int set_src_needs_barrier PARAMS ((rtx, struct reg_flags, int, rtx));
3929 static int rtx_needs_barrier PARAMS ((rtx, struct reg_flags, int));
3930 static void init_insn_group_barriers PARAMS ((void));
3931 static int group_barrier_needed_p PARAMS ((rtx));
3932 static int safe_group_barrier_needed_p PARAMS ((rtx));
3933
3934 /* Update *RWS for REGNO, which is being written by the current instruction,
3935 with predicate PRED, and associated register flags in FLAGS. */
3936
3937 static void
3938 rws_update (rws, regno, flags, pred)
3939 struct reg_write_state *rws;
3940 int regno;
3941 struct reg_flags flags;
3942 int pred;
3943 {
3944 rws[regno].write_count += pred ? 1 : 2;
3945 rws[regno].written_by_fp |= flags.is_fp;
3946 /* ??? Not tracking and/or across differing predicates. */
3947 rws[regno].written_by_and = flags.is_and;
3948 rws[regno].written_by_or = flags.is_or;
3949 rws[regno].first_pred = pred;
3950 }
3951
3952 /* Handle an access to register REGNO of type FLAGS using predicate register
3953 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
3954 a dependency with an earlier instruction in the same group. */
3955
3956 static int
3957 rws_access_regno (regno, flags, pred)
3958 int regno;
3959 struct reg_flags flags;
3960 int pred;
3961 {
3962 int need_barrier = 0;
3963
3964 if (regno >= NUM_REGS)
3965 abort ();
3966
3967 if (! PR_REGNO_P (regno))
3968 flags.is_and = flags.is_or = 0;
3969
3970 if (flags.is_write)
3971 {
3972 int write_count;
3973
3974 /* One insn writes same reg multiple times? */
3975 if (rws_insn[regno].write_count > 0)
3976 abort ();
3977
3978 /* Update info for current instruction. */
3979 rws_update (rws_insn, regno, flags, pred);
3980 write_count = rws_sum[regno].write_count;
3981
3982 switch (write_count)
3983 {
3984 case 0:
3985 /* The register has not been written yet. */
3986 rws_update (rws_sum, regno, flags, pred);
3987 break;
3988
3989 case 1:
3990 /* The register has been written via a predicate. If this is
3991 not a complementary predicate, then we need a barrier. */
3992 /* ??? This assumes that P and P+1 are always complementary
3993 predicates for P even. */
3994 if (flags.is_and && rws_sum[regno].written_by_and)
3995 ;
3996 else if (flags.is_or && rws_sum[regno].written_by_or)
3997 ;
3998 else if ((rws_sum[regno].first_pred ^ 1) != pred)
3999 need_barrier = 1;
4000 rws_update (rws_sum, regno, flags, pred);
4001 break;
4002
4003 case 2:
4004 /* The register has been unconditionally written already. We
4005 need a barrier. */
4006 if (flags.is_and && rws_sum[regno].written_by_and)
4007 ;
4008 else if (flags.is_or && rws_sum[regno].written_by_or)
4009 ;
4010 else
4011 need_barrier = 1;
4012 rws_sum[regno].written_by_and = flags.is_and;
4013 rws_sum[regno].written_by_or = flags.is_or;
4014 break;
4015
4016 default:
4017 abort ();
4018 }
4019 }
4020 else
4021 {
4022 if (flags.is_branch)
4023 {
4024 /* Branches have several RAW exceptions that allow to avoid
4025 barriers. */
4026
4027 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
4028 /* RAW dependencies on branch regs are permissible as long
4029 as the writer is a non-branch instruction. Since we
4030 never generate code that uses a branch register written
4031 by a branch instruction, handling this case is
4032 easy. */
4033 return 0;
4034
4035 if (REGNO_REG_CLASS (regno) == PR_REGS
4036 && ! rws_sum[regno].written_by_fp)
4037 /* The predicates of a branch are available within the
4038 same insn group as long as the predicate was written by
4039 something other than a floating-point instruction. */
4040 return 0;
4041 }
4042
4043 if (flags.is_and && rws_sum[regno].written_by_and)
4044 return 0;
4045 if (flags.is_or && rws_sum[regno].written_by_or)
4046 return 0;
4047
4048 switch (rws_sum[regno].write_count)
4049 {
4050 case 0:
4051 /* The register has not been written yet. */
4052 break;
4053
4054 case 1:
4055 /* The register has been written via a predicate. If this is
4056 not a complementary predicate, then we need a barrier. */
4057 /* ??? This assumes that P and P+1 are always complementary
4058 predicates for P even. */
4059 if ((rws_sum[regno].first_pred ^ 1) != pred)
4060 need_barrier = 1;
4061 break;
4062
4063 case 2:
4064 /* The register has been unconditionally written already. We
4065 need a barrier. */
4066 need_barrier = 1;
4067 break;
4068
4069 default:
4070 abort ();
4071 }
4072 }
4073
4074 return need_barrier;
4075 }
4076
4077 static int
4078 rws_access_reg (reg, flags, pred)
4079 rtx reg;
4080 struct reg_flags flags;
4081 int pred;
4082 {
4083 int regno = REGNO (reg);
4084 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
4085
4086 if (n == 1)
4087 return rws_access_regno (regno, flags, pred);
4088 else
4089 {
4090 int need_barrier = 0;
4091 while (--n >= 0)
4092 need_barrier |= rws_access_regno (regno + n, flags, pred);
4093 return need_barrier;
4094 }
4095 }
4096
4097 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
4098 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
4099
4100 static void
4101 update_set_flags (x, pflags, ppred, pcond)
4102 rtx x;
4103 struct reg_flags *pflags;
4104 int *ppred;
4105 rtx *pcond;
4106 {
4107 rtx src = SET_SRC (x);
4108
4109 *pcond = 0;
4110
4111 switch (GET_CODE (src))
4112 {
4113 case CALL:
4114 return;
4115
4116 case IF_THEN_ELSE:
4117 if (SET_DEST (x) == pc_rtx)
4118 /* X is a conditional branch. */
4119 return;
4120 else
4121 {
4122 int is_complemented = 0;
4123
4124 /* X is a conditional move. */
4125 rtx cond = XEXP (src, 0);
4126 if (GET_CODE (cond) == EQ)
4127 is_complemented = 1;
4128 cond = XEXP (cond, 0);
4129 if (GET_CODE (cond) != REG
4130 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4131 abort ();
4132 *pcond = cond;
4133 if (XEXP (src, 1) == SET_DEST (x)
4134 || XEXP (src, 2) == SET_DEST (x))
4135 {
4136 /* X is a conditional move that conditionally writes the
4137 destination. */
4138
4139 /* We need another complement in this case. */
4140 if (XEXP (src, 1) == SET_DEST (x))
4141 is_complemented = ! is_complemented;
4142
4143 *ppred = REGNO (cond);
4144 if (is_complemented)
4145 ++*ppred;
4146 }
4147
4148 /* ??? If this is a conditional write to the dest, then this
4149 instruction does not actually read one source. This probably
4150 doesn't matter, because that source is also the dest. */
4151 /* ??? Multiple writes to predicate registers are allowed
4152 if they are all AND type compares, or if they are all OR
4153 type compares. We do not generate such instructions
4154 currently. */
4155 }
4156 /* ... fall through ... */
4157
4158 default:
4159 if (GET_RTX_CLASS (GET_CODE (src)) == '<'
4160 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
4161 /* Set pflags->is_fp to 1 so that we know we're dealing
4162 with a floating point comparison when processing the
4163 destination of the SET. */
4164 pflags->is_fp = 1;
4165
4166 /* Discover if this is a parallel comparison. We only handle
4167 and.orcm and or.andcm at present, since we must retain a
4168 strict inverse on the predicate pair. */
4169 else if (GET_CODE (src) == AND)
4170 pflags->is_and = 1;
4171 else if (GET_CODE (src) == IOR)
4172 pflags->is_or = 1;
4173
4174 break;
4175 }
4176 }
4177
4178 /* Subroutine of rtx_needs_barrier; this function determines whether the
4179 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
4180 are as in rtx_needs_barrier. COND is an rtx that holds the condition
4181 for this insn. */
4182
4183 static int
4184 set_src_needs_barrier (x, flags, pred, cond)
4185 rtx x;
4186 struct reg_flags flags;
4187 int pred;
4188 rtx cond;
4189 {
4190 int need_barrier = 0;
4191 rtx dst;
4192 rtx src = SET_SRC (x);
4193
4194 if (GET_CODE (src) == CALL)
4195 /* We don't need to worry about the result registers that
4196 get written by subroutine call. */
4197 return rtx_needs_barrier (src, flags, pred);
4198 else if (SET_DEST (x) == pc_rtx)
4199 {
4200 /* X is a conditional branch. */
4201 /* ??? This seems redundant, as the caller sets this bit for
4202 all JUMP_INSNs. */
4203 flags.is_branch = 1;
4204 return rtx_needs_barrier (src, flags, pred);
4205 }
4206
4207 need_barrier = rtx_needs_barrier (src, flags, pred);
4208
4209 /* This instruction unconditionally uses a predicate register. */
4210 if (cond)
4211 need_barrier |= rws_access_reg (cond, flags, 0);
4212
4213 dst = SET_DEST (x);
4214 if (GET_CODE (dst) == ZERO_EXTRACT)
4215 {
4216 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
4217 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
4218 dst = XEXP (dst, 0);
4219 }
4220 return need_barrier;
4221 }
4222
4223 /* Handle an access to rtx X of type FLAGS using predicate register PRED.
4224 Return 1 is this access creates a dependency with an earlier instruction
4225 in the same group. */
4226
4227 static int
4228 rtx_needs_barrier (x, flags, pred)
4229 rtx x;
4230 struct reg_flags flags;
4231 int pred;
4232 {
4233 int i, j;
4234 int is_complemented = 0;
4235 int need_barrier = 0;
4236 const char *format_ptr;
4237 struct reg_flags new_flags;
4238 rtx cond = 0;
4239
4240 if (! x)
4241 return 0;
4242
4243 new_flags = flags;
4244
4245 switch (GET_CODE (x))
4246 {
4247 case SET:
4248 update_set_flags (x, &new_flags, &pred, &cond);
4249 need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
4250 if (GET_CODE (SET_SRC (x)) != CALL)
4251 {
4252 new_flags.is_write = 1;
4253 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
4254 }
4255 break;
4256
4257 case CALL:
4258 new_flags.is_write = 0;
4259 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4260
4261 /* Avoid multiple register writes, in case this is a pattern with
4262 multiple CALL rtx. This avoids an abort in rws_access_reg. */
4263 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
4264 {
4265 new_flags.is_write = 1;
4266 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
4267 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
4268 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4269 }
4270 break;
4271
4272 case COND_EXEC:
4273 /* X is a predicated instruction. */
4274
4275 cond = COND_EXEC_TEST (x);
4276 if (pred)
4277 abort ();
4278 need_barrier = rtx_needs_barrier (cond, flags, 0);
4279
4280 if (GET_CODE (cond) == EQ)
4281 is_complemented = 1;
4282 cond = XEXP (cond, 0);
4283 if (GET_CODE (cond) != REG
4284 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4285 abort ();
4286 pred = REGNO (cond);
4287 if (is_complemented)
4288 ++pred;
4289
4290 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
4291 return need_barrier;
4292
4293 case CLOBBER:
4294 case USE:
4295 /* Clobber & use are for earlier compiler-phases only. */
4296 break;
4297
4298 case ASM_OPERANDS:
4299 case ASM_INPUT:
4300 /* We always emit stop bits for traditional asms. We emit stop bits
4301 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
4302 if (GET_CODE (x) != ASM_OPERANDS
4303 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
4304 {
4305 /* Avoid writing the register multiple times if we have multiple
4306 asm outputs. This avoids an abort in rws_access_reg. */
4307 if (! rws_insn[REG_VOLATILE].write_count)
4308 {
4309 new_flags.is_write = 1;
4310 rws_access_regno (REG_VOLATILE, new_flags, pred);
4311 }
4312 return 1;
4313 }
4314
4315 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
4316 We can not just fall through here since then we would be confused
4317 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
4318 traditional asms unlike their normal usage. */
4319
4320 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
4321 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
4322 need_barrier = 1;
4323 break;
4324
4325 case PARALLEL:
4326 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4327 {
4328 rtx pat = XVECEXP (x, 0, i);
4329 if (GET_CODE (pat) == SET)
4330 {
4331 update_set_flags (pat, &new_flags, &pred, &cond);
4332 need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
4333 }
4334 else if (GET_CODE (pat) == USE
4335 || GET_CODE (pat) == CALL
4336 || GET_CODE (pat) == ASM_OPERANDS)
4337 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4338 else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
4339 abort ();
4340 }
4341 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4342 {
4343 rtx pat = XVECEXP (x, 0, i);
4344 if (GET_CODE (pat) == SET)
4345 {
4346 if (GET_CODE (SET_SRC (pat)) != CALL)
4347 {
4348 new_flags.is_write = 1;
4349 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
4350 pred);
4351 }
4352 }
4353 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
4354 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4355 }
4356 break;
4357
4358 case SUBREG:
4359 x = SUBREG_REG (x);
4360 /* FALLTHRU */
4361 case REG:
4362 if (REGNO (x) == AR_UNAT_REGNUM)
4363 {
4364 for (i = 0; i < 64; ++i)
4365 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
4366 }
4367 else
4368 need_barrier = rws_access_reg (x, flags, pred);
4369 break;
4370
4371 case MEM:
4372 /* Find the regs used in memory address computation. */
4373 new_flags.is_write = 0;
4374 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4375 break;
4376
4377 case CONST_INT: case CONST_DOUBLE:
4378 case SYMBOL_REF: case LABEL_REF: case CONST:
4379 break;
4380
4381 /* Operators with side-effects. */
4382 case POST_INC: case POST_DEC:
4383 if (GET_CODE (XEXP (x, 0)) != REG)
4384 abort ();
4385
4386 new_flags.is_write = 0;
4387 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4388 new_flags.is_write = 1;
4389 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4390 break;
4391
4392 case POST_MODIFY:
4393 if (GET_CODE (XEXP (x, 0)) != REG)
4394 abort ();
4395
4396 new_flags.is_write = 0;
4397 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4398 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4399 new_flags.is_write = 1;
4400 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4401 break;
4402
4403 /* Handle common unary and binary ops for efficiency. */
4404 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
4405 case MOD: case UDIV: case UMOD: case AND: case IOR:
4406 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
4407 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
4408 case NE: case EQ: case GE: case GT: case LE:
4409 case LT: case GEU: case GTU: case LEU: case LTU:
4410 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4411 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4412 break;
4413
4414 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
4415 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
4416 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
4417 case SQRT: case FFS:
4418 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
4419 break;
4420
4421 case UNSPEC:
4422 switch (XINT (x, 1))
4423 {
4424 case 1: /* st8.spill */
4425 case 2: /* ld8.fill */
4426 {
4427 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
4428 HOST_WIDE_INT bit = (offset >> 3) & 63;
4429
4430 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4431 new_flags.is_write = (XINT (x, 1) == 1);
4432 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
4433 new_flags, pred);
4434 break;
4435 }
4436
4437 case 3: /* stf.spill */
4438 case 4: /* ldf.spill */
4439 case 8: /* popcnt */
4440 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4441 break;
4442
4443 case 7: /* pred_rel_mutex */
4444 case 9: /* pic call */
4445 case 12: /* mf */
4446 case 19: /* fetchadd_acq */
4447 case 20: /* mov = ar.bsp */
4448 case 21: /* flushrs */
4449 case 22: /* bundle selector */
4450 case 23: /* cycle display */
4451 break;
4452
4453 case 24: /* addp4 */
4454 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4455 break;
4456
4457 case 5: /* recip_approx */
4458 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4459 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4460 break;
4461
4462 case 13: /* cmpxchg_acq */
4463 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4464 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
4465 break;
4466
4467 default:
4468 abort ();
4469 }
4470 break;
4471
4472 case UNSPEC_VOLATILE:
4473 switch (XINT (x, 1))
4474 {
4475 case 0: /* alloc */
4476 /* Alloc must always be the first instruction of a group.
4477 We force this by always returning true. */
4478 /* ??? We might get better scheduling if we explicitly check for
4479 input/local/output register dependencies, and modify the
4480 scheduler so that alloc is always reordered to the start of
4481 the current group. We could then eliminate all of the
4482 first_instruction code. */
4483 rws_access_regno (AR_PFS_REGNUM, flags, pred);
4484
4485 new_flags.is_write = 1;
4486 rws_access_regno (REG_AR_CFM, new_flags, pred);
4487 return 1;
4488
4489 case 1: /* blockage */
4490 case 2: /* insn group barrier */
4491 return 0;
4492
4493 case 5: /* set_bsp */
4494 need_barrier = 1;
4495 break;
4496
4497 case 7: /* pred.rel.mutex */
4498 case 8: /* safe_across_calls all */
4499 case 9: /* safe_across_calls normal */
4500 return 0;
4501
4502 default:
4503 abort ();
4504 }
4505 break;
4506
4507 case RETURN:
4508 new_flags.is_write = 0;
4509 need_barrier = rws_access_regno (REG_RP, flags, pred);
4510 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
4511
4512 new_flags.is_write = 1;
4513 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4514 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4515 break;
4516
4517 default:
4518 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
4519 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4520 switch (format_ptr[i])
4521 {
4522 case '0': /* unused field */
4523 case 'i': /* integer */
4524 case 'n': /* note */
4525 case 'w': /* wide integer */
4526 case 's': /* pointer to string */
4527 case 'S': /* optional pointer to string */
4528 break;
4529
4530 case 'e':
4531 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
4532 need_barrier = 1;
4533 break;
4534
4535 case 'E':
4536 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
4537 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
4538 need_barrier = 1;
4539 break;
4540
4541 default:
4542 abort ();
4543 }
4544 break;
4545 }
4546 return need_barrier;
4547 }
4548
4549 /* Clear out the state for group_barrier_needed_p at the start of a
4550 sequence of insns. */
4551
4552 static void
4553 init_insn_group_barriers ()
4554 {
4555 memset (rws_sum, 0, sizeof (rws_sum));
4556 first_instruction = 1;
4557 }
4558
4559 /* Given the current state, recorded by previous calls to this function,
4560 determine whether a group barrier (a stop bit) is necessary before INSN.
4561 Return nonzero if so. */
4562
4563 static int
4564 group_barrier_needed_p (insn)
4565 rtx insn;
4566 {
4567 rtx pat;
4568 int need_barrier = 0;
4569 struct reg_flags flags;
4570
4571 memset (&flags, 0, sizeof (flags));
4572 switch (GET_CODE (insn))
4573 {
4574 case NOTE:
4575 break;
4576
4577 case BARRIER:
4578 /* A barrier doesn't imply an instruction group boundary. */
4579 break;
4580
4581 case CODE_LABEL:
4582 memset (rws_insn, 0, sizeof (rws_insn));
4583 return 1;
4584
4585 case CALL_INSN:
4586 flags.is_branch = 1;
4587 flags.is_sibcall = SIBLING_CALL_P (insn);
4588 memset (rws_insn, 0, sizeof (rws_insn));
4589
4590 /* Don't bundle a call following another call. */
4591 if ((pat = prev_active_insn (insn))
4592 && GET_CODE (pat) == CALL_INSN)
4593 {
4594 need_barrier = 1;
4595 break;
4596 }
4597
4598 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
4599 break;
4600
4601 case JUMP_INSN:
4602 flags.is_branch = 1;
4603
4604 /* Don't bundle a jump following a call. */
4605 if ((pat = prev_active_insn (insn))
4606 && GET_CODE (pat) == CALL_INSN)
4607 {
4608 need_barrier = 1;
4609 break;
4610 }
4611 /* FALLTHRU */
4612
4613 case INSN:
4614 if (GET_CODE (PATTERN (insn)) == USE
4615 || GET_CODE (PATTERN (insn)) == CLOBBER)
4616 /* Don't care about USE and CLOBBER "insns"---those are used to
4617 indicate to the optimizer that it shouldn't get rid of
4618 certain operations. */
4619 break;
4620
4621 pat = PATTERN (insn);
4622
4623 /* Ug. Hack hacks hacked elsewhere. */
4624 switch (recog_memoized (insn))
4625 {
4626 /* We play dependency tricks with the epilogue in order
4627 to get proper schedules. Undo this for dv analysis. */
4628 case CODE_FOR_epilogue_deallocate_stack:
4629 pat = XVECEXP (pat, 0, 0);
4630 break;
4631
4632 /* The pattern we use for br.cloop confuses the code above.
4633 The second element of the vector is representative. */
4634 case CODE_FOR_doloop_end_internal:
4635 pat = XVECEXP (pat, 0, 1);
4636 break;
4637
4638 /* Doesn't generate code. */
4639 case CODE_FOR_pred_rel_mutex:
4640 return 0;
4641
4642 default:
4643 break;
4644 }
4645
4646 memset (rws_insn, 0, sizeof (rws_insn));
4647 need_barrier = rtx_needs_barrier (pat, flags, 0);
4648
4649 /* Check to see if the previous instruction was a volatile
4650 asm. */
4651 if (! need_barrier)
4652 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
4653 break;
4654
4655 default:
4656 abort ();
4657 }
4658
4659 if (first_instruction)
4660 {
4661 need_barrier = 0;
4662 first_instruction = 0;
4663 }
4664
4665 return need_barrier;
4666 }
4667
4668 /* Like group_barrier_needed_p, but do not clobber the current state. */
4669
4670 static int
4671 safe_group_barrier_needed_p (insn)
4672 rtx insn;
4673 {
4674 struct reg_write_state rws_saved[NUM_REGS];
4675 int saved_first_instruction;
4676 int t;
4677
4678 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
4679 saved_first_instruction = first_instruction;
4680
4681 t = group_barrier_needed_p (insn);
4682
4683 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
4684 first_instruction = saved_first_instruction;
4685
4686 return t;
4687 }
4688
4689 /* INSNS is an chain of instructions. Scan the chain, and insert stop bits
4690 as necessary to eliminate dependendencies. This function assumes that
4691 a final instruction scheduling pass has been run which has already
4692 inserted most of the necessary stop bits. This function only inserts
4693 new ones at basic block boundaries, since these are invisible to the
4694 scheduler. */
4695
4696 static void
4697 emit_insn_group_barriers (dump, insns)
4698 FILE *dump;
4699 rtx insns;
4700 {
4701 rtx insn;
4702 rtx last_label = 0;
4703 int insns_since_last_label = 0;
4704
4705 init_insn_group_barriers ();
4706
4707 for (insn = insns; insn; insn = NEXT_INSN (insn))
4708 {
4709 if (GET_CODE (insn) == CODE_LABEL)
4710 {
4711 if (insns_since_last_label)
4712 last_label = insn;
4713 insns_since_last_label = 0;
4714 }
4715 else if (GET_CODE (insn) == NOTE
4716 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
4717 {
4718 if (insns_since_last_label)
4719 last_label = insn;
4720 insns_since_last_label = 0;
4721 }
4722 else if (GET_CODE (insn) == INSN
4723 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
4724 && XINT (PATTERN (insn), 1) == 2)
4725 {
4726 init_insn_group_barriers ();
4727 last_label = 0;
4728 }
4729 else if (INSN_P (insn))
4730 {
4731 insns_since_last_label = 1;
4732
4733 if (group_barrier_needed_p (insn))
4734 {
4735 if (last_label)
4736 {
4737 if (dump)
4738 fprintf (dump, "Emitting stop before label %d\n",
4739 INSN_UID (last_label));
4740 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
4741 insn = last_label;
4742
4743 init_insn_group_barriers ();
4744 last_label = 0;
4745 }
4746 }
4747 }
4748 }
4749 }
4750
4751 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
4752 This function has to emit all necessary group barriers. */
4753
4754 static void
4755 emit_all_insn_group_barriers (dump, insns)
4756 FILE *dump ATTRIBUTE_UNUSED;
4757 rtx insns;
4758 {
4759 rtx insn;
4760
4761 init_insn_group_barriers ();
4762
4763 for (insn = insns; insn; insn = NEXT_INSN (insn))
4764 {
4765 if (GET_CODE (insn) == INSN
4766 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
4767 && XINT (PATTERN (insn), 1) == 2)
4768 init_insn_group_barriers ();
4769 else if (INSN_P (insn))
4770 {
4771 if (group_barrier_needed_p (insn))
4772 {
4773 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
4774 init_insn_group_barriers ();
4775 group_barrier_needed_p (insn);
4776 }
4777 }
4778 }
4779 }
4780 \f
4781 static int errata_find_address_regs PARAMS ((rtx *, void *));
4782 static void errata_emit_nops PARAMS ((rtx));
4783 static void fixup_errata PARAMS ((void));
4784
4785 /* This structure is used to track some details about the previous insns
4786 groups so we can determine if it may be necessary to insert NOPs to
4787 workaround hardware errata. */
4788 static struct group
4789 {
4790 HARD_REG_SET p_reg_set;
4791 HARD_REG_SET gr_reg_conditionally_set;
4792 } last_group[2];
4793
4794 /* Index into the last_group array. */
4795 static int group_idx;
4796
4797 /* Called through for_each_rtx; determines if a hard register that was
4798 conditionally set in the previous group is used as an address register.
4799 It ensures that for_each_rtx returns 1 in that case. */
4800 static int
4801 errata_find_address_regs (xp, data)
4802 rtx *xp;
4803 void *data ATTRIBUTE_UNUSED;
4804 {
4805 rtx x = *xp;
4806 if (GET_CODE (x) != MEM)
4807 return 0;
4808 x = XEXP (x, 0);
4809 if (GET_CODE (x) == POST_MODIFY)
4810 x = XEXP (x, 0);
4811 if (GET_CODE (x) == REG)
4812 {
4813 struct group *prev_group = last_group + (group_idx ^ 1);
4814 if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
4815 REGNO (x)))
4816 return 1;
4817 return -1;
4818 }
4819 return 0;
4820 }
4821
4822 /* Called for each insn; this function keeps track of the state in
4823 last_group and emits additional NOPs if necessary to work around
4824 an Itanium A/B step erratum. */
4825 static void
4826 errata_emit_nops (insn)
4827 rtx insn;
4828 {
4829 struct group *this_group = last_group + group_idx;
4830 struct group *prev_group = last_group + (group_idx ^ 1);
4831 rtx pat = PATTERN (insn);
4832 rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
4833 rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
4834 enum attr_type type;
4835 rtx set = real_pat;
4836
4837 if (GET_CODE (real_pat) == USE
4838 || GET_CODE (real_pat) == CLOBBER
4839 || GET_CODE (real_pat) == ASM_INPUT
4840 || GET_CODE (real_pat) == ADDR_VEC
4841 || GET_CODE (real_pat) == ADDR_DIFF_VEC
4842 || asm_noperands (PATTERN (insn)) >= 0)
4843 return;
4844
4845 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
4846 parts of it. */
4847
4848 if (GET_CODE (set) == PARALLEL)
4849 {
4850 int i;
4851 set = XVECEXP (real_pat, 0, 0);
4852 for (i = 1; i < XVECLEN (real_pat, 0); i++)
4853 if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
4854 && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
4855 {
4856 set = 0;
4857 break;
4858 }
4859 }
4860
4861 if (set && GET_CODE (set) != SET)
4862 set = 0;
4863
4864 type = get_attr_type (insn);
4865
4866 if (type == TYPE_F
4867 && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
4868 SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
4869
4870 if ((type == TYPE_M || type == TYPE_A) && cond && set
4871 && REG_P (SET_DEST (set))
4872 && GET_CODE (SET_SRC (set)) != PLUS
4873 && GET_CODE (SET_SRC (set)) != MINUS
4874 && (GET_CODE (SET_SRC (set)) != ASHIFT
4875 || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode))
4876 && (GET_CODE (SET_SRC (set)) != MEM
4877 || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
4878 && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
4879 {
4880 if (GET_RTX_CLASS (GET_CODE (cond)) != '<'
4881 || ! REG_P (XEXP (cond, 0)))
4882 abort ();
4883
4884 if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
4885 SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
4886 }
4887 if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
4888 {
4889 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
4890 emit_insn_before (gen_nop (), insn);
4891 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
4892 group_idx = 0;
4893 memset (last_group, 0, sizeof last_group);
4894 }
4895 }
4896
4897 /* Emit extra nops if they are required to work around hardware errata. */
4898
4899 static void
4900 fixup_errata ()
4901 {
4902 rtx insn;
4903
4904 if (! TARGET_B_STEP)
4905 return;
4906
4907 group_idx = 0;
4908 memset (last_group, 0, sizeof last_group);
4909
4910 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
4911 {
4912 if (!INSN_P (insn))
4913 continue;
4914
4915 if (ia64_safe_type (insn) == TYPE_S)
4916 {
4917 group_idx ^= 1;
4918 memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
4919 }
4920 else
4921 errata_emit_nops (insn);
4922 }
4923 }
4924 \f
4925 /* Instruction scheduling support. */
4926 /* Describe one bundle. */
4927
4928 struct bundle
4929 {
4930 /* Zero if there's no possibility of a stop in this bundle other than
4931 at the end, otherwise the position of the optional stop bit. */
4932 int possible_stop;
4933 /* The types of the three slots. */
4934 enum attr_type t[3];
4935 /* The pseudo op to be emitted into the assembler output. */
4936 const char *name;
4937 };
4938
4939 #define NR_BUNDLES 10
4940
4941 /* A list of all available bundles. */
4942
4943 static const struct bundle bundle[NR_BUNDLES] =
4944 {
4945 { 2, { TYPE_M, TYPE_I, TYPE_I }, ".mii" },
4946 { 1, { TYPE_M, TYPE_M, TYPE_I }, ".mmi" },
4947 { 0, { TYPE_M, TYPE_F, TYPE_I }, ".mfi" },
4948 { 0, { TYPE_M, TYPE_M, TYPE_F }, ".mmf" },
4949 #if NR_BUNDLES == 10
4950 { 0, { TYPE_B, TYPE_B, TYPE_B }, ".bbb" },
4951 { 0, { TYPE_M, TYPE_B, TYPE_B }, ".mbb" },
4952 #endif
4953 { 0, { TYPE_M, TYPE_I, TYPE_B }, ".mib" },
4954 { 0, { TYPE_M, TYPE_M, TYPE_B }, ".mmb" },
4955 { 0, { TYPE_M, TYPE_F, TYPE_B }, ".mfb" },
4956 /* .mfi needs to occur earlier than .mlx, so that we only generate it if
4957 it matches an L type insn. Otherwise we'll try to generate L type
4958 nops. */
4959 { 0, { TYPE_M, TYPE_L, TYPE_X }, ".mlx" }
4960 };
4961
4962 /* Describe a packet of instructions. Packets consist of two bundles that
4963 are visible to the hardware in one scheduling window. */
4964
4965 struct ia64_packet
4966 {
4967 const struct bundle *t1, *t2;
4968 /* Precomputed value of the first split issue in this packet if a cycle
4969 starts at its beginning. */
4970 int first_split;
4971 /* For convenience, the insn types are replicated here so we don't have
4972 to go through T1 and T2 all the time. */
4973 enum attr_type t[6];
4974 };
4975
4976 /* An array containing all possible packets. */
4977 #define NR_PACKETS (NR_BUNDLES * NR_BUNDLES)
4978 static struct ia64_packet packets[NR_PACKETS];
4979
4980 /* Map attr_type to a string with the name. */
4981
4982 static const char *type_names[] =
4983 {
4984 "UNKNOWN", "A", "I", "M", "F", "B", "L", "X", "S"
4985 };
4986
4987 /* Nonzero if we should insert stop bits into the schedule. */
4988 int ia64_final_schedule = 0;
4989
4990 static int itanium_split_issue PARAMS ((const struct ia64_packet *, int));
4991 static rtx ia64_single_set PARAMS ((rtx));
4992 static int insn_matches_slot PARAMS ((const struct ia64_packet *, enum attr_type, int, rtx));
4993 static void ia64_emit_insn_before PARAMS ((rtx, rtx));
4994 static void maybe_rotate PARAMS ((FILE *));
4995 static void finish_last_head PARAMS ((FILE *, int));
4996 static void rotate_one_bundle PARAMS ((FILE *));
4997 static void rotate_two_bundles PARAMS ((FILE *));
4998 static void nop_cycles_until PARAMS ((int, FILE *));
4999 static void cycle_end_fill_slots PARAMS ((FILE *));
5000 static int packet_matches_p PARAMS ((const struct ia64_packet *, int, int *));
5001 static int get_split PARAMS ((const struct ia64_packet *, int));
5002 static int find_best_insn PARAMS ((rtx *, enum attr_type *, int,
5003 const struct ia64_packet *, int));
5004 static void find_best_packet PARAMS ((int *, const struct ia64_packet **,
5005 rtx *, enum attr_type *, int));
5006 static int itanium_reorder PARAMS ((FILE *, rtx *, rtx *, int));
5007 static void dump_current_packet PARAMS ((FILE *));
5008 static void schedule_stop PARAMS ((FILE *));
5009 static rtx gen_nop_type PARAMS ((enum attr_type));
5010 static void ia64_emit_nops PARAMS ((void));
5011
5012 /* Map a bundle number to its pseudo-op. */
5013
5014 const char *
5015 get_bundle_name (b)
5016 int b;
5017 {
5018 return bundle[b].name;
5019 }
5020
5021 /* Compute the slot which will cause a split issue in packet P if the
5022 current cycle begins at slot BEGIN. */
5023
5024 static int
5025 itanium_split_issue (p, begin)
5026 const struct ia64_packet *p;
5027 int begin;
5028 {
5029 int type_count[TYPE_S];
5030 int i;
5031 int split = 6;
5032
5033 if (begin < 3)
5034 {
5035 /* Always split before and after MMF. */
5036 if (p->t[0] == TYPE_M && p->t[1] == TYPE_M && p->t[2] == TYPE_F)
5037 return 3;
5038 if (p->t[3] == TYPE_M && p->t[4] == TYPE_M && p->t[5] == TYPE_F)
5039 return 3;
5040 /* Always split after MBB and BBB. */
5041 if (p->t[1] == TYPE_B)
5042 return 3;
5043 /* Split after first bundle in MIB BBB combination. */
5044 if (p->t[2] == TYPE_B && p->t[3] == TYPE_B)
5045 return 3;
5046 }
5047
5048 memset (type_count, 0, sizeof type_count);
5049 for (i = begin; i < split; i++)
5050 {
5051 enum attr_type t0 = p->t[i];
5052 /* An MLX bundle reserves the same units as an MFI bundle. */
5053 enum attr_type t = (t0 == TYPE_L ? TYPE_F
5054 : t0 == TYPE_X ? TYPE_I
5055 : t0);
5056 int max = (t == TYPE_B ? 3 : t == TYPE_F ? 1 : 2);
5057 if (type_count[t] == max)
5058 return i;
5059 type_count[t]++;
5060 }
5061 return split;
5062 }
5063
5064 /* Return the maximum number of instructions a cpu can issue. */
5065
5066 int
5067 ia64_issue_rate ()
5068 {
5069 return 6;
5070 }
5071
5072 /* Helper function - like single_set, but look inside COND_EXEC. */
5073
5074 static rtx
5075 ia64_single_set (insn)
5076 rtx insn;
5077 {
5078 rtx x = PATTERN (insn);
5079 if (GET_CODE (x) == COND_EXEC)
5080 x = COND_EXEC_CODE (x);
5081 if (GET_CODE (x) == SET)
5082 return x;
5083 return single_set_2 (insn, x);
5084 }
5085
5086 /* Adjust the cost of a scheduling dependency. Return the new cost of
5087 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5088
5089 int
5090 ia64_adjust_cost (insn, link, dep_insn, cost)
5091 rtx insn, link, dep_insn;
5092 int cost;
5093 {
5094 enum attr_type dep_type;
5095 enum attr_itanium_class dep_class;
5096 enum attr_itanium_class insn_class;
5097 rtx dep_set, set, src, addr;
5098
5099 if (GET_CODE (PATTERN (insn)) == CLOBBER
5100 || GET_CODE (PATTERN (insn)) == USE
5101 || GET_CODE (PATTERN (dep_insn)) == CLOBBER
5102 || GET_CODE (PATTERN (dep_insn)) == USE
5103 /* @@@ Not accurate for indirect calls. */
5104 || GET_CODE (insn) == CALL_INSN
5105 || ia64_safe_type (insn) == TYPE_S)
5106 return 0;
5107
5108 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT
5109 || REG_NOTE_KIND (link) == REG_DEP_ANTI)
5110 return 0;
5111
5112 dep_type = ia64_safe_type (dep_insn);
5113 dep_class = ia64_safe_itanium_class (dep_insn);
5114 insn_class = ia64_safe_itanium_class (insn);
5115
5116 /* Compares that feed a conditional branch can execute in the same
5117 cycle. */
5118 dep_set = ia64_single_set (dep_insn);
5119 set = ia64_single_set (insn);
5120
5121 if (dep_type != TYPE_F
5122 && dep_set
5123 && GET_CODE (SET_DEST (dep_set)) == REG
5124 && PR_REG (REGNO (SET_DEST (dep_set)))
5125 && GET_CODE (insn) == JUMP_INSN)
5126 return 0;
5127
5128 if (dep_set && GET_CODE (SET_DEST (dep_set)) == MEM)
5129 {
5130 /* ??? Can't find any information in the documenation about whether
5131 a sequence
5132 st [rx] = ra
5133 ld rb = [ry]
5134 splits issue. Assume it doesn't. */
5135 return 0;
5136 }
5137
5138 src = set ? SET_SRC (set) : 0;
5139 addr = 0;
5140 if (set && GET_CODE (SET_DEST (set)) == MEM)
5141 addr = XEXP (SET_DEST (set), 0);
5142 else if (set && GET_CODE (src) == MEM)
5143 addr = XEXP (src, 0);
5144 else if (set && GET_CODE (src) == ZERO_EXTEND
5145 && GET_CODE (XEXP (src, 0)) == MEM)
5146 addr = XEXP (XEXP (src, 0), 0);
5147 else if (set && GET_CODE (src) == UNSPEC
5148 && XVECLEN (XEXP (src, 0), 0) > 0
5149 && GET_CODE (XVECEXP (src, 0, 0)) == MEM)
5150 addr = XEXP (XVECEXP (src, 0, 0), 0);
5151 if (addr && GET_CODE (addr) == POST_MODIFY)
5152 addr = XEXP (addr, 0);
5153
5154 set = ia64_single_set (dep_insn);
5155
5156 if ((dep_class == ITANIUM_CLASS_IALU
5157 || dep_class == ITANIUM_CLASS_ILOG
5158 || dep_class == ITANIUM_CLASS_LD)
5159 && (insn_class == ITANIUM_CLASS_LD
5160 || insn_class == ITANIUM_CLASS_ST))
5161 {
5162 if (! addr || ! set)
5163 abort ();
5164 /* This isn't completely correct - an IALU that feeds an address has
5165 a latency of 1 cycle if it's issued in an M slot, but 2 cycles
5166 otherwise. Unfortunately there's no good way to describe this. */
5167 if (reg_overlap_mentioned_p (SET_DEST (set), addr))
5168 return cost + 1;
5169 }
5170 if ((dep_class == ITANIUM_CLASS_IALU
5171 || dep_class == ITANIUM_CLASS_ILOG
5172 || dep_class == ITANIUM_CLASS_LD)
5173 && (insn_class == ITANIUM_CLASS_MMMUL
5174 || insn_class == ITANIUM_CLASS_MMSHF
5175 || insn_class == ITANIUM_CLASS_MMSHFI))
5176 return 3;
5177 if (dep_class == ITANIUM_CLASS_FMAC
5178 && (insn_class == ITANIUM_CLASS_FMISC
5179 || insn_class == ITANIUM_CLASS_FCVTFX
5180 || insn_class == ITANIUM_CLASS_XMPY))
5181 return 7;
5182 if ((dep_class == ITANIUM_CLASS_FMAC
5183 || dep_class == ITANIUM_CLASS_FMISC
5184 || dep_class == ITANIUM_CLASS_FCVTFX
5185 || dep_class == ITANIUM_CLASS_XMPY)
5186 && insn_class == ITANIUM_CLASS_STF)
5187 return 8;
5188 if ((dep_class == ITANIUM_CLASS_MMMUL
5189 || dep_class == ITANIUM_CLASS_MMSHF
5190 || dep_class == ITANIUM_CLASS_MMSHFI)
5191 && (insn_class == ITANIUM_CLASS_LD
5192 || insn_class == ITANIUM_CLASS_ST
5193 || insn_class == ITANIUM_CLASS_IALU
5194 || insn_class == ITANIUM_CLASS_ILOG
5195 || insn_class == ITANIUM_CLASS_ISHF))
5196 return 4;
5197
5198 return cost;
5199 }
5200
5201 /* Describe the current state of the Itanium pipeline. */
5202 static struct
5203 {
5204 /* The first slot that is used in the current cycle. */
5205 int first_slot;
5206 /* The next slot to fill. */
5207 int cur;
5208 /* The packet we have selected for the current issue window. */
5209 const struct ia64_packet *packet;
5210 /* The position of the split issue that occurs due to issue width
5211 limitations (6 if there's no split issue). */
5212 int split;
5213 /* Record data about the insns scheduled so far in the same issue
5214 window. The elements up to but not including FIRST_SLOT belong
5215 to the previous cycle, the ones starting with FIRST_SLOT belong
5216 to the current cycle. */
5217 enum attr_type types[6];
5218 rtx insns[6];
5219 int stopbit[6];
5220 /* Nonzero if we decided to schedule a stop bit. */
5221 int last_was_stop;
5222 } sched_data;
5223
5224 /* Temporary arrays; they have enough elements to hold all insns that
5225 can be ready at the same time while scheduling of the current block.
5226 SCHED_READY can hold ready insns, SCHED_TYPES their types. */
5227 static rtx *sched_ready;
5228 static enum attr_type *sched_types;
5229
5230 /* Determine whether an insn INSN of type ITYPE can fit into slot SLOT
5231 of packet P. */
5232
5233 static int
5234 insn_matches_slot (p, itype, slot, insn)
5235 const struct ia64_packet *p;
5236 enum attr_type itype;
5237 int slot;
5238 rtx insn;
5239 {
5240 enum attr_itanium_requires_unit0 u0;
5241 enum attr_type stype = p->t[slot];
5242
5243 if (insn)
5244 {
5245 u0 = ia64_safe_itanium_requires_unit0 (insn);
5246 if (u0 == ITANIUM_REQUIRES_UNIT0_YES)
5247 {
5248 int i;
5249 for (i = sched_data.first_slot; i < slot; i++)
5250 if (p->t[i] == stype)
5251 return 0;
5252 }
5253 if (GET_CODE (insn) == CALL_INSN)
5254 {
5255 /* Reject calls in multiway branch packets. We want to limit
5256 the number of multiway branches we generate (since the branch
5257 predictor is limited), and this seems to work fairly well.
5258 (If we didn't do this, we'd have to add another test here to
5259 force calls into the third slot of the bundle.) */
5260 if (slot < 3)
5261 {
5262 if (p->t[1] == TYPE_B)
5263 return 0;
5264 }
5265 else
5266 {
5267 if (p->t[4] == TYPE_B)
5268 return 0;
5269 }
5270 }
5271 }
5272
5273 if (itype == stype)
5274 return 1;
5275 if (itype == TYPE_A)
5276 return stype == TYPE_M || stype == TYPE_I;
5277 return 0;
5278 }
5279
5280 /* Like emit_insn_before, but skip cycle_display insns. This makes the
5281 assembly output a bit prettier. */
5282
5283 static void
5284 ia64_emit_insn_before (insn, before)
5285 rtx insn, before;
5286 {
5287 rtx prev = PREV_INSN (before);
5288 if (prev && GET_CODE (prev) == INSN
5289 && GET_CODE (PATTERN (prev)) == UNSPEC
5290 && XINT (PATTERN (prev), 1) == 23)
5291 before = prev;
5292 emit_insn_before (insn, before);
5293 }
5294
5295 #if 0
5296 /* Generate a nop insn of the given type. Note we never generate L type
5297 nops. */
5298
5299 static rtx
5300 gen_nop_type (t)
5301 enum attr_type t;
5302 {
5303 switch (t)
5304 {
5305 case TYPE_M:
5306 return gen_nop_m ();
5307 case TYPE_I:
5308 return gen_nop_i ();
5309 case TYPE_B:
5310 return gen_nop_b ();
5311 case TYPE_F:
5312 return gen_nop_f ();
5313 case TYPE_X:
5314 return gen_nop_x ();
5315 default:
5316 abort ();
5317 }
5318 }
5319 #endif
5320
5321 /* When rotating a bundle out of the issue window, insert a bundle selector
5322 insn in front of it. DUMP is the scheduling dump file or NULL. START
5323 is either 0 or 3, depending on whether we want to emit a bundle selector
5324 for the first bundle or the second bundle in the current issue window.
5325
5326 The selector insns are emitted this late because the selected packet can
5327 be changed until parts of it get rotated out. */
5328
5329 static void
5330 finish_last_head (dump, start)
5331 FILE *dump;
5332 int start;
5333 {
5334 const struct ia64_packet *p = sched_data.packet;
5335 const struct bundle *b = start == 0 ? p->t1 : p->t2;
5336 int bundle_type = b - bundle;
5337 rtx insn;
5338 int i;
5339
5340 if (! ia64_final_schedule)
5341 return;
5342
5343 for (i = start; sched_data.insns[i] == 0; i++)
5344 if (i == start + 3)
5345 abort ();
5346 insn = sched_data.insns[i];
5347
5348 if (dump)
5349 fprintf (dump, "// Emitting template before %d: %s\n",
5350 INSN_UID (insn), b->name);
5351
5352 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (bundle_type)), insn);
5353 }
5354
5355 /* We can't schedule more insns this cycle. Fix up the scheduling state
5356 and advance FIRST_SLOT and CUR.
5357 We have to distribute the insns that are currently found between
5358 FIRST_SLOT and CUR into the slots of the packet we have selected. So
5359 far, they are stored successively in the fields starting at FIRST_SLOT;
5360 now they must be moved to the correct slots.
5361 DUMP is the current scheduling dump file, or NULL. */
5362
5363 static void
5364 cycle_end_fill_slots (dump)
5365 FILE *dump;
5366 {
5367 const struct ia64_packet *packet = sched_data.packet;
5368 int slot, i;
5369 enum attr_type tmp_types[6];
5370 rtx tmp_insns[6];
5371
5372 memcpy (tmp_types, sched_data.types, 6 * sizeof (enum attr_type));
5373 memcpy (tmp_insns, sched_data.insns, 6 * sizeof (rtx));
5374
5375 for (i = slot = sched_data.first_slot; i < sched_data.cur; i++)
5376 {
5377 enum attr_type t = tmp_types[i];
5378 if (t != ia64_safe_type (tmp_insns[i]))
5379 abort ();
5380 while (! insn_matches_slot (packet, t, slot, tmp_insns[i]))
5381 {
5382 if (slot > sched_data.split)
5383 abort ();
5384 if (dump)
5385 fprintf (dump, "// Packet needs %s, have %s\n", type_names[packet->t[slot]],
5386 type_names[t]);
5387 sched_data.types[slot] = packet->t[slot];
5388 sched_data.insns[slot] = 0;
5389 sched_data.stopbit[slot] = 0;
5390 slot++;
5391 }
5392 /* Do _not_ use T here. If T == TYPE_A, then we'd risk changing the
5393 actual slot type later. */
5394 sched_data.types[slot] = packet->t[slot];
5395 sched_data.insns[slot] = tmp_insns[i];
5396 sched_data.stopbit[slot] = 0;
5397 slot++;
5398 }
5399
5400 /* This isn't right - there's no need to pad out until the forced split;
5401 the CPU will automatically split if an insn isn't ready. */
5402 #if 0
5403 while (slot < sched_data.split)
5404 {
5405 sched_data.types[slot] = packet->t[slot];
5406 sched_data.insns[slot] = 0;
5407 sched_data.stopbit[slot] = 0;
5408 slot++;
5409 }
5410 #endif
5411
5412 sched_data.first_slot = sched_data.cur = slot;
5413 }
5414
5415 /* Bundle rotations, as described in the Itanium optimization manual.
5416 We can rotate either one or both bundles out of the issue window.
5417 DUMP is the current scheduling dump file, or NULL. */
5418
5419 static void
5420 rotate_one_bundle (dump)
5421 FILE *dump;
5422 {
5423 if (dump)
5424 fprintf (dump, "// Rotating one bundle.\n");
5425
5426 finish_last_head (dump, 0);
5427 if (sched_data.cur > 3)
5428 {
5429 sched_data.cur -= 3;
5430 sched_data.first_slot -= 3;
5431 memmove (sched_data.types,
5432 sched_data.types + 3,
5433 sched_data.cur * sizeof *sched_data.types);
5434 memmove (sched_data.stopbit,
5435 sched_data.stopbit + 3,
5436 sched_data.cur * sizeof *sched_data.stopbit);
5437 memmove (sched_data.insns,
5438 sched_data.insns + 3,
5439 sched_data.cur * sizeof *sched_data.insns);
5440 }
5441 else
5442 {
5443 sched_data.cur = 0;
5444 sched_data.first_slot = 0;
5445 }
5446 }
5447
5448 static void
5449 rotate_two_bundles (dump)
5450 FILE *dump;
5451 {
5452 if (dump)
5453 fprintf (dump, "// Rotating two bundles.\n");
5454
5455 if (sched_data.cur == 0)
5456 return;
5457
5458 finish_last_head (dump, 0);
5459 if (sched_data.cur > 3)
5460 finish_last_head (dump, 3);
5461 sched_data.cur = 0;
5462 sched_data.first_slot = 0;
5463 }
5464
5465 /* We're beginning a new block. Initialize data structures as necessary. */
5466
5467 void
5468 ia64_sched_init (dump, sched_verbose, max_ready)
5469 FILE *dump ATTRIBUTE_UNUSED;
5470 int sched_verbose ATTRIBUTE_UNUSED;
5471 int max_ready;
5472 {
5473 static int initialized = 0;
5474
5475 if (! initialized)
5476 {
5477 int b1, b2, i;
5478
5479 initialized = 1;
5480
5481 for (i = b1 = 0; b1 < NR_BUNDLES; b1++)
5482 {
5483 const struct bundle *t1 = bundle + b1;
5484 for (b2 = 0; b2 < NR_BUNDLES; b2++, i++)
5485 {
5486 const struct bundle *t2 = bundle + b2;
5487
5488 packets[i].t1 = t1;
5489 packets[i].t2 = t2;
5490 }
5491 }
5492 for (i = 0; i < NR_PACKETS; i++)
5493 {
5494 int j;
5495 for (j = 0; j < 3; j++)
5496 packets[i].t[j] = packets[i].t1->t[j];
5497 for (j = 0; j < 3; j++)
5498 packets[i].t[j + 3] = packets[i].t2->t[j];
5499 packets[i].first_split = itanium_split_issue (packets + i, 0);
5500 }
5501
5502 }
5503
5504 init_insn_group_barriers ();
5505
5506 memset (&sched_data, 0, sizeof sched_data);
5507 sched_types = (enum attr_type *) xmalloc (max_ready
5508 * sizeof (enum attr_type));
5509 sched_ready = (rtx *) xmalloc (max_ready * sizeof (rtx));
5510 }
5511
5512 /* See if the packet P can match the insns we have already scheduled. Return
5513 nonzero if so. In *PSLOT, we store the first slot that is available for
5514 more instructions if we choose this packet.
5515 SPLIT holds the last slot we can use, there's a split issue after it so
5516 scheduling beyond it would cause us to use more than one cycle. */
5517
5518 static int
5519 packet_matches_p (p, split, pslot)
5520 const struct ia64_packet *p;
5521 int split;
5522 int *pslot;
5523 {
5524 int filled = sched_data.cur;
5525 int first = sched_data.first_slot;
5526 int i, slot;
5527
5528 /* First, check if the first of the two bundles must be a specific one (due
5529 to stop bits). */
5530 if (first > 0 && sched_data.stopbit[0] && p->t1->possible_stop != 1)
5531 return 0;
5532 if (first > 1 && sched_data.stopbit[1] && p->t1->possible_stop != 2)
5533 return 0;
5534
5535 for (i = 0; i < first; i++)
5536 if (! insn_matches_slot (p, sched_data.types[i], i,
5537 sched_data.insns[i]))
5538 return 0;
5539 for (i = slot = first; i < filled; i++)
5540 {
5541 while (slot < split)
5542 {
5543 if (insn_matches_slot (p, sched_data.types[i], slot,
5544 sched_data.insns[i]))
5545 break;
5546 slot++;
5547 }
5548 if (slot == split)
5549 return 0;
5550 slot++;
5551 }
5552
5553 if (pslot)
5554 *pslot = slot;
5555 return 1;
5556 }
5557
5558 /* A frontend for itanium_split_issue. For a packet P and a slot
5559 number FIRST that describes the start of the current clock cycle,
5560 return the slot number of the first split issue. This function
5561 uses the cached number found in P if possible. */
5562
5563 static int
5564 get_split (p, first)
5565 const struct ia64_packet *p;
5566 int first;
5567 {
5568 if (first == 0)
5569 return p->first_split;
5570 return itanium_split_issue (p, first);
5571 }
5572
5573 /* Given N_READY insns in the array READY, whose types are found in the
5574 corresponding array TYPES, return the insn that is best suited to be
5575 scheduled in slot SLOT of packet P. */
5576
5577 static int
5578 find_best_insn (ready, types, n_ready, p, slot)
5579 rtx *ready;
5580 enum attr_type *types;
5581 int n_ready;
5582 const struct ia64_packet *p;
5583 int slot;
5584 {
5585 int best = -1;
5586 int best_pri = 0;
5587 while (n_ready-- > 0)
5588 {
5589 rtx insn = ready[n_ready];
5590 if (! insn)
5591 continue;
5592 if (best >= 0 && INSN_PRIORITY (ready[n_ready]) < best_pri)
5593 break;
5594 /* If we have equally good insns, one of which has a stricter
5595 slot requirement, prefer the one with the stricter requirement. */
5596 if (best >= 0 && types[n_ready] == TYPE_A)
5597 continue;
5598 if (insn_matches_slot (p, types[n_ready], slot, insn))
5599 {
5600 best = n_ready;
5601 best_pri = INSN_PRIORITY (ready[best]);
5602
5603 /* If there's no way we could get a stricter requirement, stop
5604 looking now. */
5605 if (types[n_ready] != TYPE_A
5606 && ia64_safe_itanium_requires_unit0 (ready[n_ready]))
5607 break;
5608 break;
5609 }
5610 }
5611 return best;
5612 }
5613
5614 /* Select the best packet to use given the current scheduler state and the
5615 current ready list.
5616 READY is an array holding N_READY ready insns; TYPES is a corresponding
5617 array that holds their types. Store the best packet in *PPACKET and the
5618 number of insns that can be scheduled in the current cycle in *PBEST. */
5619
5620 static void
5621 find_best_packet (pbest, ppacket, ready, types, n_ready)
5622 int *pbest;
5623 const struct ia64_packet **ppacket;
5624 rtx *ready;
5625 enum attr_type *types;
5626 int n_ready;
5627 {
5628 int first = sched_data.first_slot;
5629 int best = 0;
5630 int lowest_end = 6;
5631 const struct ia64_packet *best_packet = NULL;
5632 int i;
5633
5634 for (i = 0; i < NR_PACKETS; i++)
5635 {
5636 const struct ia64_packet *p = packets + i;
5637 int slot;
5638 int split = get_split (p, first);
5639 int win = 0;
5640 int first_slot, last_slot;
5641 int b_nops = 0;
5642
5643 if (! packet_matches_p (p, split, &first_slot))
5644 continue;
5645
5646 memcpy (sched_ready, ready, n_ready * sizeof (rtx));
5647
5648 win = 0;
5649 last_slot = 6;
5650 for (slot = first_slot; slot < split; slot++)
5651 {
5652 int insn_nr;
5653
5654 /* Disallow a degenerate case where the first bundle doesn't
5655 contain anything but NOPs! */
5656 if (first_slot == 0 && win == 0 && slot == 3)
5657 {
5658 win = -1;
5659 break;
5660 }
5661
5662 insn_nr = find_best_insn (sched_ready, types, n_ready, p, slot);
5663 if (insn_nr >= 0)
5664 {
5665 sched_ready[insn_nr] = 0;
5666 last_slot = slot;
5667 win++;
5668 }
5669 else if (p->t[slot] == TYPE_B)
5670 b_nops++;
5671 }
5672 /* We must disallow MBB/BBB packets if any of their B slots would be
5673 filled with nops. */
5674 if (last_slot < 3)
5675 {
5676 if (p->t[1] == TYPE_B && (b_nops || last_slot < 2))
5677 win = -1;
5678 }
5679 else
5680 {
5681 if (p->t[4] == TYPE_B && (b_nops || last_slot < 5))
5682 win = -1;
5683 }
5684
5685 if (win > best
5686 || (win == best && last_slot < lowest_end))
5687 {
5688 best = win;
5689 lowest_end = last_slot;
5690 best_packet = p;
5691 }
5692 }
5693 *pbest = best;
5694 *ppacket = best_packet;
5695 }
5696
5697 /* Reorder the ready list so that the insns that can be issued in this cycle
5698 are found in the correct order at the end of the list.
5699 DUMP is the scheduling dump file, or NULL. READY points to the start,
5700 E_READY to the end of the ready list. MAY_FAIL determines what should be
5701 done if no insns can be scheduled in this cycle: if it is zero, we abort,
5702 otherwise we return 0.
5703 Return 1 if any insns can be scheduled in this cycle. */
5704
5705 static int
5706 itanium_reorder (dump, ready, e_ready, may_fail)
5707 FILE *dump;
5708 rtx *ready;
5709 rtx *e_ready;
5710 int may_fail;
5711 {
5712 const struct ia64_packet *best_packet;
5713 int n_ready = e_ready - ready;
5714 int first = sched_data.first_slot;
5715 int i, best, best_split, filled;
5716
5717 for (i = 0; i < n_ready; i++)
5718 sched_types[i] = ia64_safe_type (ready[i]);
5719
5720 find_best_packet (&best, &best_packet, ready, sched_types, n_ready);
5721
5722 if (best == 0)
5723 {
5724 if (may_fail)
5725 return 0;
5726 abort ();
5727 }
5728
5729 if (dump)
5730 {
5731 fprintf (dump, "// Selected bundles: %s %s (%d insns)\n",
5732 best_packet->t1->name,
5733 best_packet->t2 ? best_packet->t2->name : NULL, best);
5734 }
5735
5736 best_split = itanium_split_issue (best_packet, first);
5737 packet_matches_p (best_packet, best_split, &filled);
5738
5739 for (i = filled; i < best_split; i++)
5740 {
5741 int insn_nr;
5742
5743 insn_nr = find_best_insn (ready, sched_types, n_ready, best_packet, i);
5744 if (insn_nr >= 0)
5745 {
5746 rtx insn = ready[insn_nr];
5747 memmove (ready + insn_nr, ready + insn_nr + 1,
5748 (n_ready - insn_nr - 1) * sizeof (rtx));
5749 memmove (sched_types + insn_nr, sched_types + insn_nr + 1,
5750 (n_ready - insn_nr - 1) * sizeof (enum attr_type));
5751 ready[--n_ready] = insn;
5752 }
5753 }
5754
5755 sched_data.packet = best_packet;
5756 sched_data.split = best_split;
5757 return 1;
5758 }
5759
5760 /* Dump information about the current scheduling state to file DUMP. */
5761
5762 static void
5763 dump_current_packet (dump)
5764 FILE *dump;
5765 {
5766 int i;
5767 fprintf (dump, "// %d slots filled:", sched_data.cur);
5768 for (i = 0; i < sched_data.first_slot; i++)
5769 {
5770 rtx insn = sched_data.insns[i];
5771 fprintf (dump, " %s", type_names[sched_data.types[i]]);
5772 if (insn)
5773 fprintf (dump, "/%s", type_names[ia64_safe_type (insn)]);
5774 if (sched_data.stopbit[i])
5775 fprintf (dump, " ;;");
5776 }
5777 fprintf (dump, " :::");
5778 for (i = sched_data.first_slot; i < sched_data.cur; i++)
5779 {
5780 rtx insn = sched_data.insns[i];
5781 enum attr_type t = ia64_safe_type (insn);
5782 fprintf (dump, " (%d) %s", INSN_UID (insn), type_names[t]);
5783 }
5784 fprintf (dump, "\n");
5785 }
5786
5787 /* Schedule a stop bit. DUMP is the current scheduling dump file, or
5788 NULL. */
5789
5790 static void
5791 schedule_stop (dump)
5792 FILE *dump;
5793 {
5794 const struct ia64_packet *best = sched_data.packet;
5795 int i;
5796 int best_stop = 6;
5797
5798 if (dump)
5799 fprintf (dump, "// Stop bit, cur = %d.\n", sched_data.cur);
5800
5801 if (sched_data.cur == 0)
5802 {
5803 if (dump)
5804 fprintf (dump, "// At start of bundle, so nothing to do.\n");
5805
5806 rotate_two_bundles (NULL);
5807 return;
5808 }
5809
5810 for (i = -1; i < NR_PACKETS; i++)
5811 {
5812 /* This is a slight hack to give the current packet the first chance.
5813 This is done to avoid e.g. switching from MIB to MBB bundles. */
5814 const struct ia64_packet *p = (i >= 0 ? packets + i : sched_data.packet);
5815 int split = get_split (p, sched_data.first_slot);
5816 const struct bundle *compare;
5817 int next, stoppos;
5818
5819 if (! packet_matches_p (p, split, &next))
5820 continue;
5821
5822 compare = next > 3 ? p->t2 : p->t1;
5823
5824 stoppos = 3;
5825 if (compare->possible_stop)
5826 stoppos = compare->possible_stop;
5827 if (next > 3)
5828 stoppos += 3;
5829
5830 if (stoppos < next || stoppos >= best_stop)
5831 {
5832 if (compare->possible_stop == 0)
5833 continue;
5834 stoppos = (next > 3 ? 6 : 3);
5835 }
5836 if (stoppos < next || stoppos >= best_stop)
5837 continue;
5838
5839 if (dump)
5840 fprintf (dump, "// switching from %s %s to %s %s (stop at %d)\n",
5841 best->t1->name, best->t2->name, p->t1->name, p->t2->name,
5842 stoppos);
5843
5844 best_stop = stoppos;
5845 best = p;
5846 }
5847
5848 sched_data.packet = best;
5849 cycle_end_fill_slots (dump);
5850 while (sched_data.cur < best_stop)
5851 {
5852 sched_data.types[sched_data.cur] = best->t[sched_data.cur];
5853 sched_data.insns[sched_data.cur] = 0;
5854 sched_data.stopbit[sched_data.cur] = 0;
5855 sched_data.cur++;
5856 }
5857 sched_data.stopbit[sched_data.cur - 1] = 1;
5858 sched_data.first_slot = best_stop;
5859
5860 if (dump)
5861 dump_current_packet (dump);
5862 }
5863
5864 /* If necessary, perform one or two rotations on the scheduling state.
5865 This should only be called if we are starting a new cycle. */
5866
5867 static void
5868 maybe_rotate (dump)
5869 FILE *dump;
5870 {
5871 if (sched_data.cur == 6)
5872 rotate_two_bundles (dump);
5873 else if (sched_data.cur >= 3)
5874 rotate_one_bundle (dump);
5875 sched_data.first_slot = sched_data.cur;
5876 }
5877
5878 /* The clock cycle when ia64_sched_reorder was last called. */
5879 static int prev_cycle;
5880
5881 /* The first insn scheduled in the previous cycle. This is the saved
5882 value of sched_data.first_slot. */
5883 static int prev_first;
5884
5885 /* The last insn that has been scheduled. At the start of a new cycle
5886 we know that we can emit new insns after it; the main scheduling code
5887 has already emitted a cycle_display insn after it and is using that
5888 as its current last insn. */
5889 static rtx last_issued;
5890
5891 /* Emit NOPs to fill the delay between PREV_CYCLE and CLOCK_VAR. Used to
5892 pad out the delay between MM (shifts, etc.) and integer operations. */
5893
5894 static void
5895 nop_cycles_until (clock_var, dump)
5896 int clock_var;
5897 FILE *dump;
5898 {
5899 int prev_clock = prev_cycle;
5900 int cycles_left = clock_var - prev_clock;
5901
5902 /* Finish the previous cycle; pad it out with NOPs. */
5903 if (sched_data.cur == 3)
5904 {
5905 rtx t = gen_insn_group_barrier (GEN_INT (3));
5906 last_issued = emit_insn_after (t, last_issued);
5907 maybe_rotate (dump);
5908 }
5909 else if (sched_data.cur > 0)
5910 {
5911 int need_stop = 0;
5912 int split = itanium_split_issue (sched_data.packet, prev_first);
5913
5914 if (sched_data.cur < 3 && split > 3)
5915 {
5916 split = 3;
5917 need_stop = 1;
5918 }
5919
5920 if (split > sched_data.cur)
5921 {
5922 int i;
5923 for (i = sched_data.cur; i < split; i++)
5924 {
5925 rtx t;
5926
5927 t = gen_nop_type (sched_data.packet->t[i]);
5928 last_issued = emit_insn_after (t, last_issued);
5929 sched_data.types[i] = sched_data.packet->t[sched_data.cur];
5930 sched_data.insns[i] = last_issued;
5931 sched_data.stopbit[i] = 0;
5932 }
5933 sched_data.cur = split;
5934 }
5935
5936 if (! need_stop && sched_data.cur > 0 && sched_data.cur < 6
5937 && cycles_left > 1)
5938 {
5939 int i;
5940 for (i = sched_data.cur; i < 6; i++)
5941 {
5942 rtx t;
5943
5944 t = gen_nop_type (sched_data.packet->t[i]);
5945 last_issued = emit_insn_after (t, last_issued);
5946 sched_data.types[i] = sched_data.packet->t[sched_data.cur];
5947 sched_data.insns[i] = last_issued;
5948 sched_data.stopbit[i] = 0;
5949 }
5950 sched_data.cur = 6;
5951 cycles_left--;
5952 need_stop = 1;
5953 }
5954
5955 if (need_stop || sched_data.cur == 6)
5956 {
5957 rtx t = gen_insn_group_barrier (GEN_INT (3));
5958 last_issued = emit_insn_after (t, last_issued);
5959 }
5960 maybe_rotate (dump);
5961 }
5962
5963 cycles_left--;
5964 while (cycles_left > 0)
5965 {
5966 rtx t = gen_bundle_selector (GEN_INT (0));
5967 last_issued = emit_insn_after (t, last_issued);
5968 t = gen_nop_type (TYPE_M);
5969 last_issued = emit_insn_after (t, last_issued);
5970 t = gen_nop_type (TYPE_I);
5971 last_issued = emit_insn_after (t, last_issued);
5972 if (cycles_left > 1)
5973 {
5974 t = gen_insn_group_barrier (GEN_INT (2));
5975 last_issued = emit_insn_after (t, last_issued);
5976 cycles_left--;
5977 }
5978 t = gen_nop_type (TYPE_I);
5979 last_issued = emit_insn_after (t, last_issued);
5980 t = gen_insn_group_barrier (GEN_INT (3));
5981 last_issued = emit_insn_after (t, last_issued);
5982 cycles_left--;
5983 }
5984 }
5985
5986 /* We are about to being issuing insns for this clock cycle.
5987 Override the default sort algorithm to better slot instructions. */
5988
5989 int
5990 ia64_sched_reorder (dump, sched_verbose, ready, pn_ready,
5991 reorder_type, clock_var)
5992 FILE *dump ATTRIBUTE_UNUSED;
5993 int sched_verbose ATTRIBUTE_UNUSED;
5994 rtx *ready;
5995 int *pn_ready;
5996 int reorder_type, clock_var;
5997 {
5998 int n_asms;
5999 int n_ready = *pn_ready;
6000 rtx *e_ready = ready + n_ready;
6001 rtx *insnp;
6002
6003 if (sched_verbose)
6004 {
6005 fprintf (dump, "// ia64_sched_reorder (type %d):\n", reorder_type);
6006 dump_current_packet (dump);
6007 }
6008
6009 if (reorder_type == 0 && clock_var > 0 && ia64_final_schedule)
6010 {
6011 for (insnp = ready; insnp < e_ready; insnp++)
6012 {
6013 rtx insn = *insnp;
6014 enum attr_itanium_class t = ia64_safe_itanium_class (insn);
6015 if (t == ITANIUM_CLASS_IALU || t == ITANIUM_CLASS_ISHF
6016 || t == ITANIUM_CLASS_ILOG
6017 || t == ITANIUM_CLASS_LD || t == ITANIUM_CLASS_ST)
6018 {
6019 rtx link;
6020 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6021 if (REG_NOTE_KIND (link) != REG_DEP_OUTPUT
6022 && REG_NOTE_KIND (link) != REG_DEP_ANTI)
6023 {
6024 rtx other = XEXP (link, 0);
6025 enum attr_itanium_class t0 = ia64_safe_itanium_class (other);
6026 if (t0 == ITANIUM_CLASS_MMSHF
6027 || t0 == ITANIUM_CLASS_MMMUL)
6028 {
6029 nop_cycles_until (clock_var, sched_verbose ? dump : NULL);
6030 goto out;
6031 }
6032 }
6033 }
6034 }
6035 }
6036 out:
6037
6038 prev_first = sched_data.first_slot;
6039 prev_cycle = clock_var;
6040
6041 if (reorder_type == 0)
6042 maybe_rotate (sched_verbose ? dump : NULL);
6043
6044 /* First, move all USEs, CLOBBERs and other crud out of the way. */
6045 n_asms = 0;
6046 for (insnp = ready; insnp < e_ready; insnp++)
6047 if (insnp < e_ready)
6048 {
6049 rtx insn = *insnp;
6050 enum attr_type t = ia64_safe_type (insn);
6051 if (t == TYPE_UNKNOWN)
6052 {
6053 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6054 || asm_noperands (PATTERN (insn)) >= 0)
6055 {
6056 rtx lowest = ready[0];
6057 ready[0] = insn;
6058 *insnp = lowest;
6059 n_asms++;
6060 }
6061 else
6062 {
6063 rtx highest = ready[n_ready - 1];
6064 ready[n_ready - 1] = insn;
6065 *insnp = highest;
6066 if (ia64_final_schedule && group_barrier_needed_p (insn))
6067 {
6068 schedule_stop (sched_verbose ? dump : NULL);
6069 sched_data.last_was_stop = 1;
6070 maybe_rotate (sched_verbose ? dump : NULL);
6071 }
6072
6073 return 1;
6074 }
6075 }
6076 }
6077 if (n_asms < n_ready)
6078 {
6079 /* Some normal insns to process. Skip the asms. */
6080 ready += n_asms;
6081 n_ready -= n_asms;
6082 }
6083 else if (n_ready > 0)
6084 {
6085 /* Only asm insns left. */
6086 cycle_end_fill_slots (sched_verbose ? dump : NULL);
6087 return 1;
6088 }
6089
6090 if (ia64_final_schedule)
6091 {
6092 int nr_need_stop = 0;
6093
6094 for (insnp = ready; insnp < e_ready; insnp++)
6095 if (safe_group_barrier_needed_p (*insnp))
6096 nr_need_stop++;
6097
6098 /* Schedule a stop bit if
6099 - all insns require a stop bit, or
6100 - we are starting a new cycle and _any_ insns require a stop bit.
6101 The reason for the latter is that if our schedule is accurate, then
6102 the additional stop won't decrease performance at this point (since
6103 there's a split issue at this point anyway), but it gives us more
6104 freedom when scheduling the currently ready insns. */
6105 if ((reorder_type == 0 && nr_need_stop)
6106 || (reorder_type == 1 && n_ready == nr_need_stop))
6107 {
6108 schedule_stop (sched_verbose ? dump : NULL);
6109 sched_data.last_was_stop = 1;
6110 maybe_rotate (sched_verbose ? dump : NULL);
6111 if (reorder_type == 1)
6112 return 0;
6113 }
6114 else
6115 {
6116 int deleted = 0;
6117 insnp = e_ready;
6118 /* Move down everything that needs a stop bit, preserving relative
6119 order. */
6120 while (insnp-- > ready + deleted)
6121 while (insnp >= ready + deleted)
6122 {
6123 rtx insn = *insnp;
6124 if (! safe_group_barrier_needed_p (insn))
6125 break;
6126 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6127 *ready = insn;
6128 deleted++;
6129 }
6130 n_ready -= deleted;
6131 ready += deleted;
6132 if (deleted != nr_need_stop)
6133 abort ();
6134 }
6135 }
6136
6137 return itanium_reorder (sched_verbose ? dump : NULL,
6138 ready, e_ready, reorder_type == 1);
6139 }
6140
6141 /* Like ia64_sched_reorder, but called after issuing each insn.
6142 Override the default sort algorithm to better slot instructions. */
6143
6144 int
6145 ia64_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var)
6146 FILE *dump ATTRIBUTE_UNUSED;
6147 int sched_verbose ATTRIBUTE_UNUSED;
6148 rtx *ready;
6149 int *pn_ready;
6150 int clock_var;
6151 {
6152 if (sched_data.last_was_stop)
6153 return 0;
6154
6155 /* Detect one special case and try to optimize it.
6156 If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs,
6157 then we can get better code by transforming this to 1.MFB;; 2.MIx. */
6158 if (sched_data.first_slot == 1
6159 && sched_data.stopbit[0]
6160 && ((sched_data.cur == 4
6161 && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A)
6162 && (sched_data.types[2] == TYPE_I || sched_data.types[2] == TYPE_A)
6163 && (sched_data.types[3] != TYPE_M && sched_data.types[3] != TYPE_A))
6164 || (sched_data.cur == 3
6165 && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A)
6166 && (sched_data.types[2] != TYPE_M && sched_data.types[2] != TYPE_I
6167 && sched_data.types[2] != TYPE_A))))
6168
6169 {
6170 int i, best;
6171 rtx stop = PREV_INSN (sched_data.insns[1]);
6172 rtx pat;
6173
6174 sched_data.stopbit[0] = 0;
6175 sched_data.stopbit[2] = 1;
6176 if (GET_CODE (stop) != INSN)
6177 abort ();
6178
6179 pat = PATTERN (stop);
6180 /* Ignore cycle displays. */
6181 if (GET_CODE (pat) == UNSPEC && XINT (pat, 1) == 23)
6182 stop = PREV_INSN (stop);
6183 pat = PATTERN (stop);
6184 if (GET_CODE (pat) != UNSPEC_VOLATILE
6185 || XINT (pat, 1) != 2
6186 || INTVAL (XVECEXP (pat, 0, 0)) != 1)
6187 abort ();
6188 XVECEXP (pat, 0, 0) = GEN_INT (3);
6189
6190 sched_data.types[5] = sched_data.types[3];
6191 sched_data.types[4] = sched_data.types[2];
6192 sched_data.types[3] = sched_data.types[1];
6193 sched_data.insns[5] = sched_data.insns[3];
6194 sched_data.insns[4] = sched_data.insns[2];
6195 sched_data.insns[3] = sched_data.insns[1];
6196 sched_data.stopbit[5] = sched_data.stopbit[4] = sched_data.stopbit[3] = 0;
6197 sched_data.cur += 2;
6198 sched_data.first_slot = 3;
6199 for (i = 0; i < NR_PACKETS; i++)
6200 {
6201 const struct ia64_packet *p = packets + i;
6202 if (p->t[0] == TYPE_M && p->t[1] == TYPE_F && p->t[2] == TYPE_B)
6203 {
6204 sched_data.packet = p;
6205 break;
6206 }
6207 }
6208 rotate_one_bundle (sched_verbose ? dump : NULL);
6209
6210 best = 6;
6211 for (i = 0; i < NR_PACKETS; i++)
6212 {
6213 const struct ia64_packet *p = packets + i;
6214 int split = get_split (p, sched_data.first_slot);
6215 int next;
6216
6217 /* Disallow multiway branches here. */
6218 if (p->t[1] == TYPE_B)
6219 continue;
6220
6221 if (packet_matches_p (p, split, &next) && next < best)
6222 {
6223 best = next;
6224 sched_data.packet = p;
6225 sched_data.split = split;
6226 }
6227 }
6228 if (best == 6)
6229 abort ();
6230 }
6231
6232 if (*pn_ready > 0)
6233 {
6234 int more = ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, 1,
6235 clock_var);
6236 if (more)
6237 return more;
6238 /* Did we schedule a stop? If so, finish this cycle. */
6239 if (sched_data.cur == sched_data.first_slot)
6240 return 0;
6241 }
6242
6243 if (sched_verbose)
6244 fprintf (dump, "// Can't issue more this cycle; updating type array.\n");
6245
6246 cycle_end_fill_slots (sched_verbose ? dump : NULL);
6247 if (sched_verbose)
6248 dump_current_packet (dump);
6249 return 0;
6250 }
6251
6252 /* We are about to issue INSN. Return the number of insns left on the
6253 ready queue that can be issued this cycle. */
6254
6255 int
6256 ia64_variable_issue (dump, sched_verbose, insn, can_issue_more)
6257 FILE *dump;
6258 int sched_verbose;
6259 rtx insn;
6260 int can_issue_more ATTRIBUTE_UNUSED;
6261 {
6262 enum attr_type t = ia64_safe_type (insn);
6263
6264 last_issued = insn;
6265
6266 if (sched_data.last_was_stop)
6267 {
6268 int t = sched_data.first_slot;
6269 if (t == 0)
6270 t = 3;
6271 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (t)), insn);
6272 init_insn_group_barriers ();
6273 sched_data.last_was_stop = 0;
6274 }
6275
6276 if (t == TYPE_UNKNOWN)
6277 {
6278 if (sched_verbose)
6279 fprintf (dump, "// Ignoring type %s\n", type_names[t]);
6280 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6281 || asm_noperands (PATTERN (insn)) >= 0)
6282 {
6283 /* This must be some kind of asm. Clear the scheduling state. */
6284 rotate_two_bundles (sched_verbose ? dump : NULL);
6285 if (ia64_final_schedule)
6286 group_barrier_needed_p (insn);
6287 }
6288 return 1;
6289 }
6290
6291 /* This is _not_ just a sanity check. group_barrier_needed_p will update
6292 important state info. Don't delete this test. */
6293 if (ia64_final_schedule
6294 && group_barrier_needed_p (insn))
6295 abort ();
6296
6297 sched_data.stopbit[sched_data.cur] = 0;
6298 sched_data.insns[sched_data.cur] = insn;
6299 sched_data.types[sched_data.cur] = t;
6300
6301 sched_data.cur++;
6302 if (sched_verbose)
6303 fprintf (dump, "// Scheduling insn %d of type %s\n",
6304 INSN_UID (insn), type_names[t]);
6305
6306 if (GET_CODE (insn) == CALL_INSN && ia64_final_schedule)
6307 {
6308 schedule_stop (sched_verbose ? dump : NULL);
6309 sched_data.last_was_stop = 1;
6310 }
6311
6312 return 1;
6313 }
6314
6315 /* Free data allocated by ia64_sched_init. */
6316
6317 void
6318 ia64_sched_finish (dump, sched_verbose)
6319 FILE *dump;
6320 int sched_verbose;
6321 {
6322 if (sched_verbose)
6323 fprintf (dump, "// Finishing schedule.\n");
6324 rotate_two_bundles (NULL);
6325 free (sched_types);
6326 free (sched_ready);
6327 }
6328 \f
6329 /* Emit pseudo-ops for the assembler to describe predicate relations.
6330 At present this assumes that we only consider predicate pairs to
6331 be mutex, and that the assembler can deduce proper values from
6332 straight-line code. */
6333
6334 static void
6335 emit_predicate_relation_info ()
6336 {
6337 int i;
6338
6339 for (i = n_basic_blocks - 1; i >= 0; --i)
6340 {
6341 basic_block bb = BASIC_BLOCK (i);
6342 int r;
6343 rtx head = bb->head;
6344
6345 /* We only need such notes at code labels. */
6346 if (GET_CODE (head) != CODE_LABEL)
6347 continue;
6348 if (GET_CODE (NEXT_INSN (head)) == NOTE
6349 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
6350 head = NEXT_INSN (head);
6351
6352 for (r = PR_REG (0); r < PR_REG (64); r += 2)
6353 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
6354 {
6355 rtx p = gen_rtx_REG (BImode, r);
6356 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
6357 if (head == bb->end)
6358 bb->end = n;
6359 head = n;
6360 }
6361 }
6362
6363 /* Look for conditional calls that do not return, and protect predicate
6364 relations around them. Otherwise the assembler will assume the call
6365 returns, and complain about uses of call-clobbered predicates after
6366 the call. */
6367 for (i = n_basic_blocks - 1; i >= 0; --i)
6368 {
6369 basic_block bb = BASIC_BLOCK (i);
6370 rtx insn = bb->head;
6371
6372 while (1)
6373 {
6374 if (GET_CODE (insn) == CALL_INSN
6375 && GET_CODE (PATTERN (insn)) == COND_EXEC
6376 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
6377 {
6378 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
6379 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
6380 if (bb->head == insn)
6381 bb->head = b;
6382 if (bb->end == insn)
6383 bb->end = a;
6384 }
6385
6386 if (insn == bb->end)
6387 break;
6388 insn = NEXT_INSN (insn);
6389 }
6390 }
6391 }
6392
6393 /* Generate a NOP instruction of type T. We will never generate L type
6394 nops. */
6395
6396 static rtx
6397 gen_nop_type (t)
6398 enum attr_type t;
6399 {
6400 switch (t)
6401 {
6402 case TYPE_M:
6403 return gen_nop_m ();
6404 case TYPE_I:
6405 return gen_nop_i ();
6406 case TYPE_B:
6407 return gen_nop_b ();
6408 case TYPE_F:
6409 return gen_nop_f ();
6410 case TYPE_X:
6411 return gen_nop_x ();
6412 default:
6413 abort ();
6414 }
6415 }
6416
6417 /* After the last scheduling pass, fill in NOPs. It's easier to do this
6418 here than while scheduling. */
6419
6420 static void
6421 ia64_emit_nops ()
6422 {
6423 rtx insn;
6424 const struct bundle *b = 0;
6425 int bundle_pos = 0;
6426
6427 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6428 {
6429 rtx pat;
6430 enum attr_type t;
6431 pat = INSN_P (insn) ? PATTERN (insn) : const0_rtx;
6432 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
6433 continue;
6434 if ((GET_CODE (pat) == UNSPEC && XINT (pat, 1) == 22)
6435 || GET_CODE (insn) == CODE_LABEL)
6436 {
6437 if (b)
6438 while (bundle_pos < 3)
6439 {
6440 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6441 bundle_pos++;
6442 }
6443 if (GET_CODE (insn) != CODE_LABEL)
6444 b = bundle + INTVAL (XVECEXP (pat, 0, 0));
6445 else
6446 b = 0;
6447 bundle_pos = 0;
6448 continue;
6449 }
6450 else if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == 2)
6451 {
6452 int t = INTVAL (XVECEXP (pat, 0, 0));
6453 if (b)
6454 while (bundle_pos < t)
6455 {
6456 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6457 bundle_pos++;
6458 }
6459 continue;
6460 }
6461
6462 if (bundle_pos == 3)
6463 b = 0;
6464
6465 if (b && INSN_P (insn))
6466 {
6467 t = ia64_safe_type (insn);
6468 if (asm_noperands (PATTERN (insn)) >= 0
6469 || GET_CODE (PATTERN (insn)) == ASM_INPUT)
6470 {
6471 while (bundle_pos < 3)
6472 {
6473 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6474 bundle_pos++;
6475 }
6476 continue;
6477 }
6478
6479 if (t == TYPE_UNKNOWN)
6480 continue;
6481 while (bundle_pos < 3)
6482 {
6483 if (t == b->t[bundle_pos]
6484 || (t == TYPE_A && (b->t[bundle_pos] == TYPE_M
6485 || b->t[bundle_pos] == TYPE_I)))
6486 break;
6487
6488 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6489 bundle_pos++;
6490 }
6491 if (bundle_pos < 3)
6492 bundle_pos++;
6493 }
6494 }
6495 }
6496
6497 /* Perform machine dependent operations on the rtl chain INSNS. */
6498
6499 void
6500 ia64_reorg (insns)
6501 rtx insns;
6502 {
6503 /* If optimizing, we'll have split before scheduling. */
6504 if (optimize == 0)
6505 split_all_insns_noflow ();
6506
6507 /* Make sure the CFG and global_live_at_start are correct
6508 for emit_predicate_relation_info. */
6509 find_basic_blocks (insns, max_reg_num (), NULL);
6510 life_analysis (insns, NULL, PROP_DEATH_NOTES);
6511
6512 if (ia64_flag_schedule_insns2)
6513 {
6514 ia64_final_schedule = 1;
6515 schedule_ebbs (rtl_dump_file);
6516 ia64_final_schedule = 0;
6517
6518 /* This relies on the NOTE_INSN_BASIC_BLOCK notes to be in the same
6519 place as they were during scheduling. */
6520 emit_insn_group_barriers (rtl_dump_file, insns);
6521 ia64_emit_nops ();
6522 }
6523 else
6524 emit_all_insn_group_barriers (rtl_dump_file, insns);
6525
6526 /* A call must not be the last instruction in a function, so that the
6527 return address is still within the function, so that unwinding works
6528 properly. Note that IA-64 differs from dwarf2 on this point. */
6529 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
6530 {
6531 rtx insn;
6532 int saw_stop = 0;
6533
6534 insn = get_last_insn ();
6535 if (! INSN_P (insn))
6536 insn = prev_active_insn (insn);
6537 if (GET_CODE (insn) == INSN
6538 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
6539 && XINT (PATTERN (insn), 1) == 2)
6540 {
6541 saw_stop = 1;
6542 insn = prev_active_insn (insn);
6543 }
6544 if (GET_CODE (insn) == CALL_INSN)
6545 {
6546 if (! saw_stop)
6547 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6548 emit_insn (gen_break_f ());
6549 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6550 }
6551 }
6552
6553 fixup_errata ();
6554 emit_predicate_relation_info ();
6555 }
6556 \f
6557 /* Return true if REGNO is used by the epilogue. */
6558
6559 int
6560 ia64_epilogue_uses (regno)
6561 int regno;
6562 {
6563 /* When a function makes a call through a function descriptor, we
6564 will write a (potentially) new value to "gp". After returning
6565 from such a call, we need to make sure the function restores the
6566 original gp-value, even if the function itself does not use the
6567 gp anymore. */
6568 if (regno == R_GR (1)
6569 && TARGET_CONST_GP
6570 && !(TARGET_AUTO_PIC || TARGET_NO_PIC))
6571 return 1;
6572
6573 /* For functions defined with the syscall_linkage attribute, all input
6574 registers are marked as live at all function exits. This prevents the
6575 register allocator from using the input registers, which in turn makes it
6576 possible to restart a system call after an interrupt without having to
6577 save/restore the input registers. This also prevents kernel data from
6578 leaking to application code. */
6579
6580 if (IN_REGNO_P (regno)
6581 && lookup_attribute ("syscall_linkage",
6582 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
6583 return 1;
6584
6585 /* Conditional return patterns can't represent the use of `b0' as
6586 the return address, so we force the value live this way. */
6587 if (regno == R_BR (0))
6588 return 1;
6589
6590 if (regs_ever_live[AR_LC_REGNUM] && regno == AR_LC_REGNUM)
6591 return 1;
6592 if (! current_function_is_leaf && regno == AR_PFS_REGNUM)
6593 return 1;
6594 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
6595 && regno == AR_UNAT_REGNUM)
6596 return 1;
6597
6598 return 0;
6599 }
6600
6601 /* Return true if IDENTIFIER is a valid attribute for TYPE. */
6602
6603 static int
6604 ia64_valid_type_attribute (type, attributes, identifier, args)
6605 tree type;
6606 tree attributes ATTRIBUTE_UNUSED;
6607 tree identifier;
6608 tree args;
6609 {
6610 /* We only support an attribute for function calls. */
6611
6612 if (TREE_CODE (type) != FUNCTION_TYPE
6613 && TREE_CODE (type) != METHOD_TYPE)
6614 return 0;
6615
6616 /* The "syscall_linkage" attribute says the callee is a system call entry
6617 point. This affects ia64_epilogue_uses. */
6618
6619 if (is_attribute_p ("syscall_linkage", identifier))
6620 return args == NULL_TREE;
6621
6622 return 0;
6623 }
6624 \f
6625 /* For ia64, SYMBOL_REF_FLAG set means that it is a function.
6626
6627 We add @ to the name if this goes in small data/bss. We can only put
6628 a variable in small data/bss if it is defined in this module or a module
6629 that we are statically linked with. We can't check the second condition,
6630 but TREE_STATIC gives us the first one. */
6631
6632 /* ??? If we had IPA, we could check the second condition. We could support
6633 programmer added section attributes if the variable is not defined in this
6634 module. */
6635
6636 /* ??? See the v850 port for a cleaner way to do this. */
6637
6638 /* ??? We could also support own long data here. Generating movl/add/ld8
6639 instead of addl,ld8/ld8. This makes the code bigger, but should make the
6640 code faster because there is one less load. This also includes incomplete
6641 types which can't go in sdata/sbss. */
6642
6643 /* ??? See select_section. We must put short own readonly variables in
6644 sdata/sbss instead of the more natural rodata, because we can't perform
6645 the DECL_READONLY_SECTION test here. */
6646
6647 extern struct obstack * saveable_obstack;
6648
6649 void
6650 ia64_encode_section_info (decl)
6651 tree decl;
6652 {
6653 const char *symbol_str;
6654
6655 if (TREE_CODE (decl) == FUNCTION_DECL)
6656 {
6657 SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl), 0)) = 1;
6658 return;
6659 }
6660
6661 /* Careful not to prod global register variables. */
6662 if (TREE_CODE (decl) != VAR_DECL
6663 || GET_CODE (DECL_RTL (decl)) != MEM
6664 || GET_CODE (XEXP (DECL_RTL (decl), 0)) != SYMBOL_REF)
6665 return;
6666
6667 symbol_str = XSTR (XEXP (DECL_RTL (decl), 0), 0);
6668
6669 /* We assume that -fpic is used only to create a shared library (dso).
6670 With -fpic, no global data can ever be sdata.
6671 Without -fpic, global common uninitialized data can never be sdata, since
6672 it can unify with a real definition in a dso. */
6673 /* ??? Actually, we can put globals in sdata, as long as we don't use gprel
6674 to access them. The linker may then be able to do linker relaxation to
6675 optimize references to them. Currently sdata implies use of gprel. */
6676 /* We need the DECL_EXTERNAL check for C++. static class data members get
6677 both TREE_STATIC and DECL_EXTERNAL set, to indicate that they are
6678 statically allocated, but the space is allocated somewhere else. Such
6679 decls can not be own data. */
6680 if (! TARGET_NO_SDATA
6681 && TREE_STATIC (decl) && ! DECL_EXTERNAL (decl)
6682 && ! (DECL_ONE_ONLY (decl) || DECL_WEAK (decl))
6683 && ! (TREE_PUBLIC (decl)
6684 && (flag_pic
6685 || (DECL_COMMON (decl)
6686 && (DECL_INITIAL (decl) == 0
6687 || DECL_INITIAL (decl) == error_mark_node))))
6688 /* Either the variable must be declared without a section attribute,
6689 or the section must be sdata or sbss. */
6690 && (DECL_SECTION_NAME (decl) == 0
6691 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
6692 ".sdata")
6693 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
6694 ".sbss")))
6695 {
6696 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
6697
6698 /* If the variable has already been defined in the output file, then it
6699 is too late to put it in sdata if it wasn't put there in the first
6700 place. The test is here rather than above, because if it is already
6701 in sdata, then it can stay there. */
6702
6703 if (TREE_ASM_WRITTEN (decl))
6704 ;
6705
6706 /* If this is an incomplete type with size 0, then we can't put it in
6707 sdata because it might be too big when completed. */
6708 else if (size > 0
6709 && size <= (HOST_WIDE_INT) ia64_section_threshold
6710 && symbol_str[0] != SDATA_NAME_FLAG_CHAR)
6711 {
6712 size_t len = strlen (symbol_str);
6713 char *newstr = alloca (len + 1);
6714 const char *string;
6715
6716 *newstr = SDATA_NAME_FLAG_CHAR;
6717 memcpy (newstr + 1, symbol_str, len + 1);
6718
6719 string = ggc_alloc_string (newstr, len + 1);
6720 XSTR (XEXP (DECL_RTL (decl), 0), 0) = string;
6721 }
6722 }
6723 /* This decl is marked as being in small data/bss but it shouldn't
6724 be; one likely explanation for this is that the decl has been
6725 moved into a different section from the one it was in when
6726 ENCODE_SECTION_INFO was first called. Remove the '@'.*/
6727 else if (symbol_str[0] == SDATA_NAME_FLAG_CHAR)
6728 {
6729 XSTR (XEXP (DECL_RTL (decl), 0), 0)
6730 = ggc_strdup (symbol_str + 1);
6731 }
6732 }
6733 \f
6734 /* Output assembly directives for prologue regions. */
6735
6736 /* The current basic block number. */
6737
6738 static int block_num;
6739
6740 /* True if we need a copy_state command at the start of the next block. */
6741
6742 static int need_copy_state;
6743
6744 /* The function emits unwind directives for the start of an epilogue. */
6745
6746 static void
6747 process_epilogue ()
6748 {
6749 /* If this isn't the last block of the function, then we need to label the
6750 current state, and copy it back in at the start of the next block. */
6751
6752 if (block_num != n_basic_blocks - 1)
6753 {
6754 fprintf (asm_out_file, "\t.label_state 1\n");
6755 need_copy_state = 1;
6756 }
6757
6758 fprintf (asm_out_file, "\t.restore sp\n");
6759 }
6760
6761 /* This function processes a SET pattern looking for specific patterns
6762 which result in emitting an assembly directive required for unwinding. */
6763
6764 static int
6765 process_set (asm_out_file, pat)
6766 FILE *asm_out_file;
6767 rtx pat;
6768 {
6769 rtx src = SET_SRC (pat);
6770 rtx dest = SET_DEST (pat);
6771 int src_regno, dest_regno;
6772
6773 /* Look for the ALLOC insn. */
6774 if (GET_CODE (src) == UNSPEC_VOLATILE
6775 && XINT (src, 1) == 0
6776 && GET_CODE (dest) == REG)
6777 {
6778 dest_regno = REGNO (dest);
6779
6780 /* If this isn't the final destination for ar.pfs, the alloc
6781 shouldn't have been marked frame related. */
6782 if (dest_regno != current_frame_info.reg_save_ar_pfs)
6783 abort ();
6784
6785 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
6786 ia64_dbx_register_number (dest_regno));
6787 return 1;
6788 }
6789
6790 /* Look for SP = .... */
6791 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
6792 {
6793 if (GET_CODE (src) == PLUS)
6794 {
6795 rtx op0 = XEXP (src, 0);
6796 rtx op1 = XEXP (src, 1);
6797 if (op0 == dest && GET_CODE (op1) == CONST_INT)
6798 {
6799 if (INTVAL (op1) < 0)
6800 {
6801 fputs ("\t.fframe ", asm_out_file);
6802 fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC,
6803 -INTVAL (op1));
6804 fputc ('\n', asm_out_file);
6805 }
6806 else
6807 process_epilogue ();
6808 }
6809 else
6810 abort ();
6811 }
6812 else if (GET_CODE (src) == REG
6813 && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
6814 process_epilogue ();
6815 else
6816 abort ();
6817
6818 return 1;
6819 }
6820
6821 /* Register move we need to look at. */
6822 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
6823 {
6824 src_regno = REGNO (src);
6825 dest_regno = REGNO (dest);
6826
6827 switch (src_regno)
6828 {
6829 case BR_REG (0):
6830 /* Saving return address pointer. */
6831 if (dest_regno != current_frame_info.reg_save_b0)
6832 abort ();
6833 fprintf (asm_out_file, "\t.save rp, r%d\n",
6834 ia64_dbx_register_number (dest_regno));
6835 return 1;
6836
6837 case PR_REG (0):
6838 if (dest_regno != current_frame_info.reg_save_pr)
6839 abort ();
6840 fprintf (asm_out_file, "\t.save pr, r%d\n",
6841 ia64_dbx_register_number (dest_regno));
6842 return 1;
6843
6844 case AR_UNAT_REGNUM:
6845 if (dest_regno != current_frame_info.reg_save_ar_unat)
6846 abort ();
6847 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
6848 ia64_dbx_register_number (dest_regno));
6849 return 1;
6850
6851 case AR_LC_REGNUM:
6852 if (dest_regno != current_frame_info.reg_save_ar_lc)
6853 abort ();
6854 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
6855 ia64_dbx_register_number (dest_regno));
6856 return 1;
6857
6858 case STACK_POINTER_REGNUM:
6859 if (dest_regno != HARD_FRAME_POINTER_REGNUM
6860 || ! frame_pointer_needed)
6861 abort ();
6862 fprintf (asm_out_file, "\t.vframe r%d\n",
6863 ia64_dbx_register_number (dest_regno));
6864 return 1;
6865
6866 default:
6867 /* Everything else should indicate being stored to memory. */
6868 abort ();
6869 }
6870 }
6871
6872 /* Memory store we need to look at. */
6873 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
6874 {
6875 long off;
6876 rtx base;
6877 const char *saveop;
6878
6879 if (GET_CODE (XEXP (dest, 0)) == REG)
6880 {
6881 base = XEXP (dest, 0);
6882 off = 0;
6883 }
6884 else if (GET_CODE (XEXP (dest, 0)) == PLUS
6885 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
6886 {
6887 base = XEXP (XEXP (dest, 0), 0);
6888 off = INTVAL (XEXP (XEXP (dest, 0), 1));
6889 }
6890 else
6891 abort ();
6892
6893 if (base == hard_frame_pointer_rtx)
6894 {
6895 saveop = ".savepsp";
6896 off = - off;
6897 }
6898 else if (base == stack_pointer_rtx)
6899 saveop = ".savesp";
6900 else
6901 abort ();
6902
6903 src_regno = REGNO (src);
6904 switch (src_regno)
6905 {
6906 case BR_REG (0):
6907 if (current_frame_info.reg_save_b0 != 0)
6908 abort ();
6909 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
6910 return 1;
6911
6912 case PR_REG (0):
6913 if (current_frame_info.reg_save_pr != 0)
6914 abort ();
6915 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
6916 return 1;
6917
6918 case AR_LC_REGNUM:
6919 if (current_frame_info.reg_save_ar_lc != 0)
6920 abort ();
6921 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
6922 return 1;
6923
6924 case AR_PFS_REGNUM:
6925 if (current_frame_info.reg_save_ar_pfs != 0)
6926 abort ();
6927 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
6928 return 1;
6929
6930 case AR_UNAT_REGNUM:
6931 if (current_frame_info.reg_save_ar_unat != 0)
6932 abort ();
6933 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
6934 return 1;
6935
6936 case GR_REG (4):
6937 case GR_REG (5):
6938 case GR_REG (6):
6939 case GR_REG (7):
6940 fprintf (asm_out_file, "\t.save.g 0x%x\n",
6941 1 << (src_regno - GR_REG (4)));
6942 return 1;
6943
6944 case BR_REG (1):
6945 case BR_REG (2):
6946 case BR_REG (3):
6947 case BR_REG (4):
6948 case BR_REG (5):
6949 fprintf (asm_out_file, "\t.save.b 0x%x\n",
6950 1 << (src_regno - BR_REG (1)));
6951 return 1;
6952
6953 case FR_REG (2):
6954 case FR_REG (3):
6955 case FR_REG (4):
6956 case FR_REG (5):
6957 fprintf (asm_out_file, "\t.save.f 0x%x\n",
6958 1 << (src_regno - FR_REG (2)));
6959 return 1;
6960
6961 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
6962 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
6963 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
6964 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
6965 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
6966 1 << (src_regno - FR_REG (12)));
6967 return 1;
6968
6969 default:
6970 return 0;
6971 }
6972 }
6973
6974 return 0;
6975 }
6976
6977
6978 /* This function looks at a single insn and emits any directives
6979 required to unwind this insn. */
6980 void
6981 process_for_unwind_directive (asm_out_file, insn)
6982 FILE *asm_out_file;
6983 rtx insn;
6984 {
6985 if (flag_unwind_tables
6986 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
6987 {
6988 rtx pat;
6989
6990 if (GET_CODE (insn) == NOTE
6991 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
6992 {
6993 block_num = NOTE_BASIC_BLOCK (insn)->index;
6994
6995 /* Restore unwind state from immediately before the epilogue. */
6996 if (need_copy_state)
6997 {
6998 fprintf (asm_out_file, "\t.body\n");
6999 fprintf (asm_out_file, "\t.copy_state 1\n");
7000 need_copy_state = 0;
7001 }
7002 }
7003
7004 if (! RTX_FRAME_RELATED_P (insn))
7005 return;
7006
7007 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
7008 if (pat)
7009 pat = XEXP (pat, 0);
7010 else
7011 pat = PATTERN (insn);
7012
7013 switch (GET_CODE (pat))
7014 {
7015 case SET:
7016 process_set (asm_out_file, pat);
7017 break;
7018
7019 case PARALLEL:
7020 {
7021 int par_index;
7022 int limit = XVECLEN (pat, 0);
7023 for (par_index = 0; par_index < limit; par_index++)
7024 {
7025 rtx x = XVECEXP (pat, 0, par_index);
7026 if (GET_CODE (x) == SET)
7027 process_set (asm_out_file, x);
7028 }
7029 break;
7030 }
7031
7032 default:
7033 abort ();
7034 }
7035 }
7036 }
7037
7038 \f
7039 void
7040 ia64_init_builtins ()
7041 {
7042 tree psi_type_node = build_pointer_type (integer_type_node);
7043 tree pdi_type_node = build_pointer_type (long_integer_type_node);
7044 tree endlink = void_list_node;
7045
7046 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
7047 tree si_ftype_psi_si_si
7048 = build_function_type (integer_type_node,
7049 tree_cons (NULL_TREE, psi_type_node,
7050 tree_cons (NULL_TREE, integer_type_node,
7051 tree_cons (NULL_TREE,
7052 integer_type_node,
7053 endlink))));
7054
7055 /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */
7056 tree di_ftype_pdi_di_di
7057 = build_function_type (long_integer_type_node,
7058 tree_cons (NULL_TREE, pdi_type_node,
7059 tree_cons (NULL_TREE,
7060 long_integer_type_node,
7061 tree_cons (NULL_TREE,
7062 long_integer_type_node,
7063 endlink))));
7064 /* __sync_synchronize */
7065 tree void_ftype_void
7066 = build_function_type (void_type_node, endlink);
7067
7068 /* __sync_lock_test_and_set_si */
7069 tree si_ftype_psi_si
7070 = build_function_type (integer_type_node,
7071 tree_cons (NULL_TREE, psi_type_node,
7072 tree_cons (NULL_TREE, integer_type_node, endlink)));
7073
7074 /* __sync_lock_test_and_set_di */
7075 tree di_ftype_pdi_di
7076 = build_function_type (long_integer_type_node,
7077 tree_cons (NULL_TREE, pdi_type_node,
7078 tree_cons (NULL_TREE, long_integer_type_node,
7079 endlink)));
7080
7081 /* __sync_lock_release_si */
7082 tree void_ftype_psi
7083 = build_function_type (void_type_node, tree_cons (NULL_TREE, psi_type_node,
7084 endlink));
7085
7086 /* __sync_lock_release_di */
7087 tree void_ftype_pdi
7088 = build_function_type (void_type_node, tree_cons (NULL_TREE, pdi_type_node,
7089 endlink));
7090
7091 #define def_builtin(name, type, code) \
7092 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL)
7093
7094 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
7095 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
7096 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
7097 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
7098 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
7099 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
7100 def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di,
7101 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
7102
7103 def_builtin ("__sync_synchronize", void_ftype_void,
7104 IA64_BUILTIN_SYNCHRONIZE);
7105
7106 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
7107 IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
7108 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
7109 IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
7110 def_builtin ("__sync_lock_release_si", void_ftype_psi,
7111 IA64_BUILTIN_LOCK_RELEASE_SI);
7112 def_builtin ("__sync_lock_release_di", void_ftype_pdi,
7113 IA64_BUILTIN_LOCK_RELEASE_DI);
7114
7115 def_builtin ("__builtin_ia64_bsp",
7116 build_function_type (ptr_type_node, endlink),
7117 IA64_BUILTIN_BSP);
7118
7119 def_builtin ("__builtin_ia64_flushrs",
7120 build_function_type (void_type_node, endlink),
7121 IA64_BUILTIN_FLUSHRS);
7122
7123 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
7124 IA64_BUILTIN_FETCH_AND_ADD_SI);
7125 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
7126 IA64_BUILTIN_FETCH_AND_SUB_SI);
7127 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
7128 IA64_BUILTIN_FETCH_AND_OR_SI);
7129 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
7130 IA64_BUILTIN_FETCH_AND_AND_SI);
7131 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
7132 IA64_BUILTIN_FETCH_AND_XOR_SI);
7133 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
7134 IA64_BUILTIN_FETCH_AND_NAND_SI);
7135
7136 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
7137 IA64_BUILTIN_ADD_AND_FETCH_SI);
7138 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
7139 IA64_BUILTIN_SUB_AND_FETCH_SI);
7140 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
7141 IA64_BUILTIN_OR_AND_FETCH_SI);
7142 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
7143 IA64_BUILTIN_AND_AND_FETCH_SI);
7144 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
7145 IA64_BUILTIN_XOR_AND_FETCH_SI);
7146 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
7147 IA64_BUILTIN_NAND_AND_FETCH_SI);
7148
7149 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
7150 IA64_BUILTIN_FETCH_AND_ADD_DI);
7151 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
7152 IA64_BUILTIN_FETCH_AND_SUB_DI);
7153 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
7154 IA64_BUILTIN_FETCH_AND_OR_DI);
7155 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
7156 IA64_BUILTIN_FETCH_AND_AND_DI);
7157 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
7158 IA64_BUILTIN_FETCH_AND_XOR_DI);
7159 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
7160 IA64_BUILTIN_FETCH_AND_NAND_DI);
7161
7162 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
7163 IA64_BUILTIN_ADD_AND_FETCH_DI);
7164 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
7165 IA64_BUILTIN_SUB_AND_FETCH_DI);
7166 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
7167 IA64_BUILTIN_OR_AND_FETCH_DI);
7168 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
7169 IA64_BUILTIN_AND_AND_FETCH_DI);
7170 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
7171 IA64_BUILTIN_XOR_AND_FETCH_DI);
7172 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
7173 IA64_BUILTIN_NAND_AND_FETCH_DI);
7174
7175 #undef def_builtin
7176 }
7177
7178 /* Expand fetch_and_op intrinsics. The basic code sequence is:
7179
7180 mf
7181 tmp = [ptr];
7182 do {
7183 ret = tmp;
7184 ar.ccv = tmp;
7185 tmp <op>= value;
7186 cmpxchgsz.acq tmp = [ptr], tmp
7187 } while (tmp != ret)
7188 */
7189
7190 static rtx
7191 ia64_expand_fetch_and_op (binoptab, mode, arglist, target)
7192 optab binoptab;
7193 enum machine_mode mode;
7194 tree arglist;
7195 rtx target;
7196 {
7197 rtx ret, label, tmp, ccv, insn, mem, value;
7198 tree arg0, arg1;
7199
7200 arg0 = TREE_VALUE (arglist);
7201 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7202 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7203 value = expand_expr (arg1, NULL_RTX, mode, 0);
7204
7205 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7206 MEM_VOLATILE_P (mem) = 1;
7207
7208 if (target && register_operand (target, mode))
7209 ret = target;
7210 else
7211 ret = gen_reg_rtx (mode);
7212
7213 emit_insn (gen_mf ());
7214
7215 /* Special case for fetchadd instructions. */
7216 if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
7217 {
7218 if (mode == SImode)
7219 insn = gen_fetchadd_acq_si (ret, mem, value);
7220 else
7221 insn = gen_fetchadd_acq_di (ret, mem, value);
7222 emit_insn (insn);
7223 return ret;
7224 }
7225
7226 tmp = gen_reg_rtx (mode);
7227 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7228 emit_move_insn (tmp, mem);
7229
7230 label = gen_label_rtx ();
7231 emit_label (label);
7232 emit_move_insn (ret, tmp);
7233 emit_move_insn (ccv, tmp);
7234
7235 /* Perform the specific operation. Special case NAND by noticing
7236 one_cmpl_optab instead. */
7237 if (binoptab == one_cmpl_optab)
7238 {
7239 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7240 binoptab = and_optab;
7241 }
7242 tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
7243
7244 if (mode == SImode)
7245 insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
7246 else
7247 insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
7248 emit_insn (insn);
7249
7250 emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, 0, label);
7251
7252 return ret;
7253 }
7254
7255 /* Expand op_and_fetch intrinsics. The basic code sequence is:
7256
7257 mf
7258 tmp = [ptr];
7259 do {
7260 old = tmp;
7261 ar.ccv = tmp;
7262 ret = tmp + value;
7263 cmpxchgsz.acq tmp = [ptr], ret
7264 } while (tmp != old)
7265 */
7266
7267 static rtx
7268 ia64_expand_op_and_fetch (binoptab, mode, arglist, target)
7269 optab binoptab;
7270 enum machine_mode mode;
7271 tree arglist;
7272 rtx target;
7273 {
7274 rtx old, label, tmp, ret, ccv, insn, mem, value;
7275 tree arg0, arg1;
7276
7277 arg0 = TREE_VALUE (arglist);
7278 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7279 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7280 value = expand_expr (arg1, NULL_RTX, mode, 0);
7281
7282 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7283 MEM_VOLATILE_P (mem) = 1;
7284
7285 if (target && ! register_operand (target, mode))
7286 target = NULL_RTX;
7287
7288 emit_insn (gen_mf ());
7289 tmp = gen_reg_rtx (mode);
7290 old = gen_reg_rtx (mode);
7291 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7292
7293 emit_move_insn (tmp, mem);
7294
7295 label = gen_label_rtx ();
7296 emit_label (label);
7297 emit_move_insn (old, tmp);
7298 emit_move_insn (ccv, tmp);
7299
7300 /* Perform the specific operation. Special case NAND by noticing
7301 one_cmpl_optab instead. */
7302 if (binoptab == one_cmpl_optab)
7303 {
7304 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7305 binoptab = and_optab;
7306 }
7307 ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
7308
7309 if (mode == SImode)
7310 insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
7311 else
7312 insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
7313 emit_insn (insn);
7314
7315 emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, 0, label);
7316
7317 return ret;
7318 }
7319
7320 /* Expand val_ and bool_compare_and_swap. For val_ we want:
7321
7322 ar.ccv = oldval
7323 mf
7324 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
7325 return ret
7326
7327 For bool_ it's the same except return ret == oldval.
7328 */
7329
7330 static rtx
7331 ia64_expand_compare_and_swap (mode, boolp, arglist, target)
7332 enum machine_mode mode;
7333 int boolp;
7334 tree arglist;
7335 rtx target;
7336 {
7337 tree arg0, arg1, arg2;
7338 rtx mem, old, new, ccv, tmp, insn;
7339
7340 arg0 = TREE_VALUE (arglist);
7341 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7342 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
7343 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7344 old = expand_expr (arg1, NULL_RTX, mode, 0);
7345 new = expand_expr (arg2, NULL_RTX, mode, 0);
7346
7347 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7348 MEM_VOLATILE_P (mem) = 1;
7349
7350 if (! register_operand (old, mode))
7351 old = copy_to_mode_reg (mode, old);
7352 if (! register_operand (new, mode))
7353 new = copy_to_mode_reg (mode, new);
7354
7355 if (! boolp && target && register_operand (target, mode))
7356 tmp = target;
7357 else
7358 tmp = gen_reg_rtx (mode);
7359
7360 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7361 emit_move_insn (ccv, old);
7362 emit_insn (gen_mf ());
7363 if (mode == SImode)
7364 insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
7365 else
7366 insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
7367 emit_insn (insn);
7368
7369 if (boolp)
7370 {
7371 if (! target)
7372 target = gen_reg_rtx (mode);
7373 return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
7374 }
7375 else
7376 return tmp;
7377 }
7378
7379 /* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
7380
7381 static rtx
7382 ia64_expand_lock_test_and_set (mode, arglist, target)
7383 enum machine_mode mode;
7384 tree arglist;
7385 rtx target;
7386 {
7387 tree arg0, arg1;
7388 rtx mem, new, ret, insn;
7389
7390 arg0 = TREE_VALUE (arglist);
7391 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7392 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7393 new = expand_expr (arg1, NULL_RTX, mode, 0);
7394
7395 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7396 MEM_VOLATILE_P (mem) = 1;
7397 if (! register_operand (new, mode))
7398 new = copy_to_mode_reg (mode, new);
7399
7400 if (target && register_operand (target, mode))
7401 ret = target;
7402 else
7403 ret = gen_reg_rtx (mode);
7404
7405 if (mode == SImode)
7406 insn = gen_xchgsi (ret, mem, new);
7407 else
7408 insn = gen_xchgdi (ret, mem, new);
7409 emit_insn (insn);
7410
7411 return ret;
7412 }
7413
7414 /* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
7415
7416 static rtx
7417 ia64_expand_lock_release (mode, arglist, target)
7418 enum machine_mode mode;
7419 tree arglist;
7420 rtx target ATTRIBUTE_UNUSED;
7421 {
7422 tree arg0;
7423 rtx mem;
7424
7425 arg0 = TREE_VALUE (arglist);
7426 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7427
7428 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7429 MEM_VOLATILE_P (mem) = 1;
7430
7431 emit_move_insn (mem, const0_rtx);
7432
7433 return const0_rtx;
7434 }
7435
7436 rtx
7437 ia64_expand_builtin (exp, target, subtarget, mode, ignore)
7438 tree exp;
7439 rtx target;
7440 rtx subtarget ATTRIBUTE_UNUSED;
7441 enum machine_mode mode ATTRIBUTE_UNUSED;
7442 int ignore ATTRIBUTE_UNUSED;
7443 {
7444 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
7445 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
7446 tree arglist = TREE_OPERAND (exp, 1);
7447
7448 switch (fcode)
7449 {
7450 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
7451 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
7452 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
7453 case IA64_BUILTIN_LOCK_RELEASE_SI:
7454 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7455 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7456 case IA64_BUILTIN_FETCH_AND_OR_SI:
7457 case IA64_BUILTIN_FETCH_AND_AND_SI:
7458 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7459 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7460 case IA64_BUILTIN_ADD_AND_FETCH_SI:
7461 case IA64_BUILTIN_SUB_AND_FETCH_SI:
7462 case IA64_BUILTIN_OR_AND_FETCH_SI:
7463 case IA64_BUILTIN_AND_AND_FETCH_SI:
7464 case IA64_BUILTIN_XOR_AND_FETCH_SI:
7465 case IA64_BUILTIN_NAND_AND_FETCH_SI:
7466 mode = SImode;
7467 break;
7468
7469 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
7470 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
7471 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
7472 case IA64_BUILTIN_LOCK_RELEASE_DI:
7473 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7474 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7475 case IA64_BUILTIN_FETCH_AND_OR_DI:
7476 case IA64_BUILTIN_FETCH_AND_AND_DI:
7477 case IA64_BUILTIN_FETCH_AND_XOR_DI:
7478 case IA64_BUILTIN_FETCH_AND_NAND_DI:
7479 case IA64_BUILTIN_ADD_AND_FETCH_DI:
7480 case IA64_BUILTIN_SUB_AND_FETCH_DI:
7481 case IA64_BUILTIN_OR_AND_FETCH_DI:
7482 case IA64_BUILTIN_AND_AND_FETCH_DI:
7483 case IA64_BUILTIN_XOR_AND_FETCH_DI:
7484 case IA64_BUILTIN_NAND_AND_FETCH_DI:
7485 mode = DImode;
7486 break;
7487
7488 default:
7489 break;
7490 }
7491
7492 switch (fcode)
7493 {
7494 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
7495 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
7496 return ia64_expand_compare_and_swap (mode, 1, arglist, target);
7497
7498 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
7499 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
7500 return ia64_expand_compare_and_swap (mode, 0, arglist, target);
7501
7502 case IA64_BUILTIN_SYNCHRONIZE:
7503 emit_insn (gen_mf ());
7504 return const0_rtx;
7505
7506 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
7507 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
7508 return ia64_expand_lock_test_and_set (mode, arglist, target);
7509
7510 case IA64_BUILTIN_LOCK_RELEASE_SI:
7511 case IA64_BUILTIN_LOCK_RELEASE_DI:
7512 return ia64_expand_lock_release (mode, arglist, target);
7513
7514 case IA64_BUILTIN_BSP:
7515 if (! target || ! register_operand (target, DImode))
7516 target = gen_reg_rtx (DImode);
7517 emit_insn (gen_bsp_value (target));
7518 return target;
7519
7520 case IA64_BUILTIN_FLUSHRS:
7521 emit_insn (gen_flushrs ());
7522 return const0_rtx;
7523
7524 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7525 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7526 return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
7527
7528 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7529 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7530 return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
7531
7532 case IA64_BUILTIN_FETCH_AND_OR_SI:
7533 case IA64_BUILTIN_FETCH_AND_OR_DI:
7534 return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
7535
7536 case IA64_BUILTIN_FETCH_AND_AND_SI:
7537 case IA64_BUILTIN_FETCH_AND_AND_DI:
7538 return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
7539
7540 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7541 case IA64_BUILTIN_FETCH_AND_XOR_DI:
7542 return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
7543
7544 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7545 case IA64_BUILTIN_FETCH_AND_NAND_DI:
7546 return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
7547
7548 case IA64_BUILTIN_ADD_AND_FETCH_SI:
7549 case IA64_BUILTIN_ADD_AND_FETCH_DI:
7550 return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
7551
7552 case IA64_BUILTIN_SUB_AND_FETCH_SI:
7553 case IA64_BUILTIN_SUB_AND_FETCH_DI:
7554 return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
7555
7556 case IA64_BUILTIN_OR_AND_FETCH_SI:
7557 case IA64_BUILTIN_OR_AND_FETCH_DI:
7558 return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
7559
7560 case IA64_BUILTIN_AND_AND_FETCH_SI:
7561 case IA64_BUILTIN_AND_AND_FETCH_DI:
7562 return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
7563
7564 case IA64_BUILTIN_XOR_AND_FETCH_SI:
7565 case IA64_BUILTIN_XOR_AND_FETCH_DI:
7566 return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
7567
7568 case IA64_BUILTIN_NAND_AND_FETCH_SI:
7569 case IA64_BUILTIN_NAND_AND_FETCH_DI:
7570 return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);
7571
7572 default:
7573 break;
7574 }
7575
7576 return NULL_RTX;
7577 }