h8300.h: Fix comment formatting.
[gcc.git] / gcc / config / ia64 / ia64.c
1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
5
6 This file is part of GNU CC.
7
8 GNU CC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
12
13 GNU CC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "tm_p.h"
28 #include "regs.h"
29 #include "hard-reg-set.h"
30 #include "real.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "output.h"
34 #include "insn-attr.h"
35 #include "flags.h"
36 #include "recog.h"
37 #include "expr.h"
38 #include "optabs.h"
39 #include "obstack.h"
40 #include "except.h"
41 #include "function.h"
42 #include "ggc.h"
43 #include "basic-block.h"
44 #include "toplev.h"
45 #include "sched-int.h"
46 #include "timevar.h"
47 #include "target.h"
48 #include "target-def.h"
49
50 /* This is used for communication between ASM_OUTPUT_LABEL and
51 ASM_OUTPUT_LABELREF. */
52 int ia64_asm_output_label = 0;
53
54 /* Define the information needed to generate branch and scc insns. This is
55 stored from the compare operation. */
56 struct rtx_def * ia64_compare_op0;
57 struct rtx_def * ia64_compare_op1;
58
59 /* Register names for ia64_expand_prologue. */
60 static const char * const ia64_reg_numbers[96] =
61 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
62 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
63 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
64 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
65 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
66 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
67 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
68 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
69 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
70 "r104","r105","r106","r107","r108","r109","r110","r111",
71 "r112","r113","r114","r115","r116","r117","r118","r119",
72 "r120","r121","r122","r123","r124","r125","r126","r127"};
73
74 /* ??? These strings could be shared with REGISTER_NAMES. */
75 static const char * const ia64_input_reg_names[8] =
76 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
77
78 /* ??? These strings could be shared with REGISTER_NAMES. */
79 static const char * const ia64_local_reg_names[80] =
80 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
81 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
82 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
83 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
84 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
85 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
86 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
87 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
88 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
89 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
90
91 /* ??? These strings could be shared with REGISTER_NAMES. */
92 static const char * const ia64_output_reg_names[8] =
93 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
94
95 /* String used with the -mfixed-range= option. */
96 const char *ia64_fixed_range_string;
97
98 /* Determines whether we run our final scheduling pass or not. We always
99 avoid the normal second scheduling pass. */
100 static int ia64_flag_schedule_insns2;
101
102 /* Variables which are this size or smaller are put in the sdata/sbss
103 sections. */
104
105 unsigned int ia64_section_threshold;
106 \f
107 static int find_gr_spill PARAMS ((int));
108 static int next_scratch_gr_reg PARAMS ((void));
109 static void mark_reg_gr_used_mask PARAMS ((rtx, void *));
110 static void ia64_compute_frame_size PARAMS ((HOST_WIDE_INT));
111 static void setup_spill_pointers PARAMS ((int, rtx, HOST_WIDE_INT));
112 static void finish_spill_pointers PARAMS ((void));
113 static rtx spill_restore_mem PARAMS ((rtx, HOST_WIDE_INT));
114 static void do_spill PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx));
115 static void do_restore PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT));
116 static rtx gen_movdi_x PARAMS ((rtx, rtx, rtx));
117 static rtx gen_fr_spill_x PARAMS ((rtx, rtx, rtx));
118 static rtx gen_fr_restore_x PARAMS ((rtx, rtx, rtx));
119
120 static enum machine_mode hfa_element_mode PARAMS ((tree, int));
121 static void fix_range PARAMS ((const char *));
122 static void ia64_add_gc_roots PARAMS ((void));
123 static void ia64_init_machine_status PARAMS ((struct function *));
124 static void ia64_mark_machine_status PARAMS ((struct function *));
125 static void ia64_free_machine_status PARAMS ((struct function *));
126 static void emit_insn_group_barriers PARAMS ((FILE *, rtx));
127 static void emit_all_insn_group_barriers PARAMS ((FILE *, rtx));
128 static void emit_predicate_relation_info PARAMS ((void));
129 static void process_epilogue PARAMS ((void));
130 static int process_set PARAMS ((FILE *, rtx));
131
132 static rtx ia64_expand_fetch_and_op PARAMS ((optab, enum machine_mode,
133 tree, rtx));
134 static rtx ia64_expand_op_and_fetch PARAMS ((optab, enum machine_mode,
135 tree, rtx));
136 static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode, int,
137 tree, rtx));
138 static rtx ia64_expand_lock_test_and_set PARAMS ((enum machine_mode,
139 tree, rtx));
140 static rtx ia64_expand_lock_release PARAMS ((enum machine_mode, tree, rtx));
141 const struct attribute_spec ia64_attribute_table[];
142 static bool ia64_assemble_integer PARAMS ((rtx, unsigned int, int));
143 static void ia64_output_function_prologue PARAMS ((FILE *, HOST_WIDE_INT));
144 static void ia64_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
145 static void ia64_output_function_end_prologue PARAMS ((FILE *));
146
147 static int ia64_issue_rate PARAMS ((void));
148 static int ia64_adjust_cost PARAMS ((rtx, rtx, rtx, int));
149 static void ia64_sched_init PARAMS ((FILE *, int, int));
150 static void ia64_sched_finish PARAMS ((FILE *, int));
151 static int ia64_internal_sched_reorder PARAMS ((FILE *, int, rtx *,
152 int *, int, int));
153 static int ia64_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
154 static int ia64_sched_reorder2 PARAMS ((FILE *, int, rtx *, int *, int));
155 static int ia64_variable_issue PARAMS ((FILE *, int, rtx, int));
156 static rtx ia64_cycle_display PARAMS ((int, rtx));
157
158 \f
159 /* Initialize the GCC target structure. */
160 #undef TARGET_ATTRIBUTE_TABLE
161 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
162
163 #undef TARGET_INIT_BUILTINS
164 #define TARGET_INIT_BUILTINS ia64_init_builtins
165
166 #undef TARGET_EXPAND_BUILTIN
167 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
168
169 #undef TARGET_ASM_BYTE_OP
170 #define TARGET_ASM_BYTE_OP "\tdata1\t"
171 #undef TARGET_ASM_ALIGNED_HI_OP
172 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
173 #undef TARGET_ASM_ALIGNED_SI_OP
174 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
175 #undef TARGET_ASM_ALIGNED_DI_OP
176 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
177 #undef TARGET_ASM_UNALIGNED_HI_OP
178 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
179 #undef TARGET_ASM_UNALIGNED_SI_OP
180 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
181 #undef TARGET_ASM_UNALIGNED_DI_OP
182 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
183 #undef TARGET_ASM_INTEGER
184 #define TARGET_ASM_INTEGER ia64_assemble_integer
185
186 #undef TARGET_ASM_FUNCTION_PROLOGUE
187 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
188 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
189 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
190 #undef TARGET_ASM_FUNCTION_EPILOGUE
191 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
192
193 #undef TARGET_SCHED_ADJUST_COST
194 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
195 #undef TARGET_SCHED_ISSUE_RATE
196 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
197 #undef TARGET_SCHED_VARIABLE_ISSUE
198 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
199 #undef TARGET_SCHED_INIT
200 #define TARGET_SCHED_INIT ia64_sched_init
201 #undef TARGET_SCHED_FINISH
202 #define TARGET_SCHED_FINISH ia64_sched_finish
203 #undef TARGET_SCHED_REORDER
204 #define TARGET_SCHED_REORDER ia64_sched_reorder
205 #undef TARGET_SCHED_REORDER2
206 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
207 #undef TARGET_SCHED_CYCLE_DISPLAY
208 #define TARGET_SCHED_CYCLE_DISPLAY ia64_cycle_display
209
210 struct gcc_target targetm = TARGET_INITIALIZER;
211 \f
212 /* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
213
214 int
215 call_operand (op, mode)
216 rtx op;
217 enum machine_mode mode;
218 {
219 if (mode != GET_MODE (op))
220 return 0;
221
222 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
223 || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
224 }
225
226 /* Return 1 if OP refers to a symbol in the sdata section. */
227
228 int
229 sdata_symbolic_operand (op, mode)
230 rtx op;
231 enum machine_mode mode ATTRIBUTE_UNUSED;
232 {
233 switch (GET_CODE (op))
234 {
235 case CONST:
236 if (GET_CODE (XEXP (op, 0)) != PLUS
237 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF)
238 break;
239 op = XEXP (XEXP (op, 0), 0);
240 /* FALLTHRU */
241
242 case SYMBOL_REF:
243 if (CONSTANT_POOL_ADDRESS_P (op))
244 return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold;
245 else
246 return XSTR (op, 0)[0] == SDATA_NAME_FLAG_CHAR;
247
248 default:
249 break;
250 }
251
252 return 0;
253 }
254
255 /* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
256
257 int
258 got_symbolic_operand (op, mode)
259 rtx op;
260 enum machine_mode mode ATTRIBUTE_UNUSED;
261 {
262 switch (GET_CODE (op))
263 {
264 case CONST:
265 op = XEXP (op, 0);
266 if (GET_CODE (op) != PLUS)
267 return 0;
268 if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
269 return 0;
270 op = XEXP (op, 1);
271 if (GET_CODE (op) != CONST_INT)
272 return 0;
273
274 return 1;
275
276 /* Ok if we're not using GOT entries at all. */
277 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
278 return 1;
279
280 /* "Ok" while emitting rtl, since otherwise we won't be provided
281 with the entire offset during emission, which makes it very
282 hard to split the offset into high and low parts. */
283 if (rtx_equal_function_value_matters)
284 return 1;
285
286 /* Force the low 14 bits of the constant to zero so that we do not
287 use up so many GOT entries. */
288 return (INTVAL (op) & 0x3fff) == 0;
289
290 case SYMBOL_REF:
291 case LABEL_REF:
292 return 1;
293
294 default:
295 break;
296 }
297 return 0;
298 }
299
300 /* Return 1 if OP refers to a symbol. */
301
302 int
303 symbolic_operand (op, mode)
304 rtx op;
305 enum machine_mode mode ATTRIBUTE_UNUSED;
306 {
307 switch (GET_CODE (op))
308 {
309 case CONST:
310 case SYMBOL_REF:
311 case LABEL_REF:
312 return 1;
313
314 default:
315 break;
316 }
317 return 0;
318 }
319
320 /* Return 1 if OP refers to a function. */
321
322 int
323 function_operand (op, mode)
324 rtx op;
325 enum machine_mode mode ATTRIBUTE_UNUSED;
326 {
327 if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FLAG (op))
328 return 1;
329 else
330 return 0;
331 }
332
333 /* Return 1 if OP is setjmp or a similar function. */
334
335 /* ??? This is an unsatisfying solution. Should rethink. */
336
337 int
338 setjmp_operand (op, mode)
339 rtx op;
340 enum machine_mode mode ATTRIBUTE_UNUSED;
341 {
342 const char *name;
343 int retval = 0;
344
345 if (GET_CODE (op) != SYMBOL_REF)
346 return 0;
347
348 name = XSTR (op, 0);
349
350 /* The following code is borrowed from special_function_p in calls.c. */
351
352 /* Disregard prefix _, __ or __x. */
353 if (name[0] == '_')
354 {
355 if (name[1] == '_' && name[2] == 'x')
356 name += 3;
357 else if (name[1] == '_')
358 name += 2;
359 else
360 name += 1;
361 }
362
363 if (name[0] == 's')
364 {
365 retval
366 = ((name[1] == 'e'
367 && (! strcmp (name, "setjmp")
368 || ! strcmp (name, "setjmp_syscall")))
369 || (name[1] == 'i'
370 && ! strcmp (name, "sigsetjmp"))
371 || (name[1] == 'a'
372 && ! strcmp (name, "savectx")));
373 }
374 else if ((name[0] == 'q' && name[1] == 's'
375 && ! strcmp (name, "qsetjmp"))
376 || (name[0] == 'v' && name[1] == 'f'
377 && ! strcmp (name, "vfork")))
378 retval = 1;
379
380 return retval;
381 }
382
383 /* Return 1 if OP is a general operand, but when pic exclude symbolic
384 operands. */
385
386 /* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF
387 from PREDICATE_CODES. */
388
389 int
390 move_operand (op, mode)
391 rtx op;
392 enum machine_mode mode;
393 {
394 if (! TARGET_NO_PIC && symbolic_operand (op, mode))
395 return 0;
396
397 return general_operand (op, mode);
398 }
399
400 /* Return 1 if OP is a register operand that is (or could be) a GR reg. */
401
402 int
403 gr_register_operand (op, mode)
404 rtx op;
405 enum machine_mode mode;
406 {
407 if (! register_operand (op, mode))
408 return 0;
409 if (GET_CODE (op) == SUBREG)
410 op = SUBREG_REG (op);
411 if (GET_CODE (op) == REG)
412 {
413 unsigned int regno = REGNO (op);
414 if (regno < FIRST_PSEUDO_REGISTER)
415 return GENERAL_REGNO_P (regno);
416 }
417 return 1;
418 }
419
420 /* Return 1 if OP is a register operand that is (or could be) an FR reg. */
421
422 int
423 fr_register_operand (op, mode)
424 rtx op;
425 enum machine_mode mode;
426 {
427 if (! register_operand (op, mode))
428 return 0;
429 if (GET_CODE (op) == SUBREG)
430 op = SUBREG_REG (op);
431 if (GET_CODE (op) == REG)
432 {
433 unsigned int regno = REGNO (op);
434 if (regno < FIRST_PSEUDO_REGISTER)
435 return FR_REGNO_P (regno);
436 }
437 return 1;
438 }
439
440 /* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */
441
442 int
443 grfr_register_operand (op, mode)
444 rtx op;
445 enum machine_mode mode;
446 {
447 if (! register_operand (op, mode))
448 return 0;
449 if (GET_CODE (op) == SUBREG)
450 op = SUBREG_REG (op);
451 if (GET_CODE (op) == REG)
452 {
453 unsigned int regno = REGNO (op);
454 if (regno < FIRST_PSEUDO_REGISTER)
455 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
456 }
457 return 1;
458 }
459
460 /* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */
461
462 int
463 gr_nonimmediate_operand (op, mode)
464 rtx op;
465 enum machine_mode mode;
466 {
467 if (! nonimmediate_operand (op, mode))
468 return 0;
469 if (GET_CODE (op) == SUBREG)
470 op = SUBREG_REG (op);
471 if (GET_CODE (op) == REG)
472 {
473 unsigned int regno = REGNO (op);
474 if (regno < FIRST_PSEUDO_REGISTER)
475 return GENERAL_REGNO_P (regno);
476 }
477 return 1;
478 }
479
480 /* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
481
482 int
483 fr_nonimmediate_operand (op, mode)
484 rtx op;
485 enum machine_mode mode;
486 {
487 if (! nonimmediate_operand (op, mode))
488 return 0;
489 if (GET_CODE (op) == SUBREG)
490 op = SUBREG_REG (op);
491 if (GET_CODE (op) == REG)
492 {
493 unsigned int regno = REGNO (op);
494 if (regno < FIRST_PSEUDO_REGISTER)
495 return FR_REGNO_P (regno);
496 }
497 return 1;
498 }
499
500 /* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
501
502 int
503 grfr_nonimmediate_operand (op, mode)
504 rtx op;
505 enum machine_mode mode;
506 {
507 if (! nonimmediate_operand (op, mode))
508 return 0;
509 if (GET_CODE (op) == SUBREG)
510 op = SUBREG_REG (op);
511 if (GET_CODE (op) == REG)
512 {
513 unsigned int regno = REGNO (op);
514 if (regno < FIRST_PSEUDO_REGISTER)
515 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
516 }
517 return 1;
518 }
519
520 /* Return 1 if OP is a GR register operand, or zero. */
521
522 int
523 gr_reg_or_0_operand (op, mode)
524 rtx op;
525 enum machine_mode mode;
526 {
527 return (op == const0_rtx || gr_register_operand (op, mode));
528 }
529
530 /* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */
531
532 int
533 gr_reg_or_5bit_operand (op, mode)
534 rtx op;
535 enum machine_mode mode;
536 {
537 return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32)
538 || GET_CODE (op) == CONSTANT_P_RTX
539 || gr_register_operand (op, mode));
540 }
541
542 /* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */
543
544 int
545 gr_reg_or_6bit_operand (op, mode)
546 rtx op;
547 enum machine_mode mode;
548 {
549 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
550 || GET_CODE (op) == CONSTANT_P_RTX
551 || gr_register_operand (op, mode));
552 }
553
554 /* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */
555
556 int
557 gr_reg_or_8bit_operand (op, mode)
558 rtx op;
559 enum machine_mode mode;
560 {
561 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
562 || GET_CODE (op) == CONSTANT_P_RTX
563 || gr_register_operand (op, mode));
564 }
565
566 /* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */
567
568 int
569 grfr_reg_or_8bit_operand (op, mode)
570 rtx op;
571 enum machine_mode mode;
572 {
573 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
574 || GET_CODE (op) == CONSTANT_P_RTX
575 || grfr_register_operand (op, mode));
576 }
577
578 /* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
579 operand. */
580
581 int
582 gr_reg_or_8bit_adjusted_operand (op, mode)
583 rtx op;
584 enum machine_mode mode;
585 {
586 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
587 || GET_CODE (op) == CONSTANT_P_RTX
588 || gr_register_operand (op, mode));
589 }
590
591 /* Return 1 if OP is a register operand, or is valid for both an 8 bit
592 immediate and an 8 bit adjusted immediate operand. This is necessary
593 because when we emit a compare, we don't know what the condition will be,
594 so we need the union of the immediates accepted by GT and LT. */
595
596 int
597 gr_reg_or_8bit_and_adjusted_operand (op, mode)
598 rtx op;
599 enum machine_mode mode;
600 {
601 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
602 && CONST_OK_FOR_L (INTVAL (op)))
603 || GET_CODE (op) == CONSTANT_P_RTX
604 || gr_register_operand (op, mode));
605 }
606
607 /* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
608
609 int
610 gr_reg_or_14bit_operand (op, mode)
611 rtx op;
612 enum machine_mode mode;
613 {
614 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
615 || GET_CODE (op) == CONSTANT_P_RTX
616 || gr_register_operand (op, mode));
617 }
618
619 /* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
620
621 int
622 gr_reg_or_22bit_operand (op, mode)
623 rtx op;
624 enum machine_mode mode;
625 {
626 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
627 || GET_CODE (op) == CONSTANT_P_RTX
628 || gr_register_operand (op, mode));
629 }
630
631 /* Return 1 if OP is a 6 bit immediate operand. */
632
633 int
634 shift_count_operand (op, mode)
635 rtx op;
636 enum machine_mode mode ATTRIBUTE_UNUSED;
637 {
638 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
639 || GET_CODE (op) == CONSTANT_P_RTX);
640 }
641
642 /* Return 1 if OP is a 5 bit immediate operand. */
643
644 int
645 shift_32bit_count_operand (op, mode)
646 rtx op;
647 enum machine_mode mode ATTRIBUTE_UNUSED;
648 {
649 return ((GET_CODE (op) == CONST_INT
650 && (INTVAL (op) >= 0 && INTVAL (op) < 32))
651 || GET_CODE (op) == CONSTANT_P_RTX);
652 }
653
654 /* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
655
656 int
657 shladd_operand (op, mode)
658 rtx op;
659 enum machine_mode mode ATTRIBUTE_UNUSED;
660 {
661 return (GET_CODE (op) == CONST_INT
662 && (INTVAL (op) == 2 || INTVAL (op) == 4
663 || INTVAL (op) == 8 || INTVAL (op) == 16));
664 }
665
666 /* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
667
668 int
669 fetchadd_operand (op, mode)
670 rtx op;
671 enum machine_mode mode ATTRIBUTE_UNUSED;
672 {
673 return (GET_CODE (op) == CONST_INT
674 && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
675 INTVAL (op) == -4 || INTVAL (op) == -1 ||
676 INTVAL (op) == 1 || INTVAL (op) == 4 ||
677 INTVAL (op) == 8 || INTVAL (op) == 16));
678 }
679
680 /* Return 1 if OP is a floating-point constant zero, one, or a register. */
681
682 int
683 fr_reg_or_fp01_operand (op, mode)
684 rtx op;
685 enum machine_mode mode;
686 {
687 return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
688 || fr_register_operand (op, mode));
689 }
690
691 /* Like nonimmediate_operand, but don't allow MEMs that try to use a
692 POST_MODIFY with a REG as displacement. */
693
694 int
695 destination_operand (op, mode)
696 rtx op;
697 enum machine_mode mode;
698 {
699 if (! nonimmediate_operand (op, mode))
700 return 0;
701 if (GET_CODE (op) == MEM
702 && GET_CODE (XEXP (op, 0)) == POST_MODIFY
703 && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG)
704 return 0;
705 return 1;
706 }
707
708 /* Like memory_operand, but don't allow post-increments. */
709
710 int
711 not_postinc_memory_operand (op, mode)
712 rtx op;
713 enum machine_mode mode;
714 {
715 return (memory_operand (op, mode)
716 && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != 'a');
717 }
718
719 /* Return 1 if this is a comparison operator, which accepts an normal 8-bit
720 signed immediate operand. */
721
722 int
723 normal_comparison_operator (op, mode)
724 register rtx op;
725 enum machine_mode mode;
726 {
727 enum rtx_code code = GET_CODE (op);
728 return ((mode == VOIDmode || GET_MODE (op) == mode)
729 && (code == EQ || code == NE
730 || code == GT || code == LE || code == GTU || code == LEU));
731 }
732
733 /* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
734 signed immediate operand. */
735
736 int
737 adjusted_comparison_operator (op, mode)
738 register rtx op;
739 enum machine_mode mode;
740 {
741 enum rtx_code code = GET_CODE (op);
742 return ((mode == VOIDmode || GET_MODE (op) == mode)
743 && (code == LT || code == GE || code == LTU || code == GEU));
744 }
745
746 /* Return 1 if this is a signed inequality operator. */
747
748 int
749 signed_inequality_operator (op, mode)
750 register rtx op;
751 enum machine_mode mode;
752 {
753 enum rtx_code code = GET_CODE (op);
754 return ((mode == VOIDmode || GET_MODE (op) == mode)
755 && (code == GE || code == GT
756 || code == LE || code == LT));
757 }
758
759 /* Return 1 if this operator is valid for predication. */
760
761 int
762 predicate_operator (op, mode)
763 register rtx op;
764 enum machine_mode mode;
765 {
766 enum rtx_code code = GET_CODE (op);
767 return ((GET_MODE (op) == mode || mode == VOIDmode)
768 && (code == EQ || code == NE));
769 }
770
771 /* Return 1 if this operator can be used in a conditional operation. */
772
773 int
774 condop_operator (op, mode)
775 register rtx op;
776 enum machine_mode mode;
777 {
778 enum rtx_code code = GET_CODE (op);
779 return ((GET_MODE (op) == mode || mode == VOIDmode)
780 && (code == PLUS || code == MINUS || code == AND
781 || code == IOR || code == XOR));
782 }
783
784 /* Return 1 if this is the ar.lc register. */
785
786 int
787 ar_lc_reg_operand (op, mode)
788 register rtx op;
789 enum machine_mode mode;
790 {
791 return (GET_MODE (op) == DImode
792 && (mode == DImode || mode == VOIDmode)
793 && GET_CODE (op) == REG
794 && REGNO (op) == AR_LC_REGNUM);
795 }
796
797 /* Return 1 if this is the ar.ccv register. */
798
799 int
800 ar_ccv_reg_operand (op, mode)
801 register rtx op;
802 enum machine_mode mode;
803 {
804 return ((GET_MODE (op) == mode || mode == VOIDmode)
805 && GET_CODE (op) == REG
806 && REGNO (op) == AR_CCV_REGNUM);
807 }
808
809 /* Return 1 if this is the ar.pfs register. */
810
811 int
812 ar_pfs_reg_operand (op, mode)
813 register rtx op;
814 enum machine_mode mode;
815 {
816 return ((GET_MODE (op) == mode || mode == VOIDmode)
817 && GET_CODE (op) == REG
818 && REGNO (op) == AR_PFS_REGNUM);
819 }
820
821 /* Like general_operand, but don't allow (mem (addressof)). */
822
823 int
824 general_tfmode_operand (op, mode)
825 rtx op;
826 enum machine_mode mode;
827 {
828 if (! general_operand (op, mode))
829 return 0;
830 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
831 return 0;
832 return 1;
833 }
834
835 /* Similarly. */
836
837 int
838 destination_tfmode_operand (op, mode)
839 rtx op;
840 enum machine_mode mode;
841 {
842 if (! destination_operand (op, mode))
843 return 0;
844 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
845 return 0;
846 return 1;
847 }
848
849 /* Similarly. */
850
851 int
852 tfreg_or_fp01_operand (op, mode)
853 rtx op;
854 enum machine_mode mode;
855 {
856 if (GET_CODE (op) == SUBREG)
857 return 0;
858 return fr_reg_or_fp01_operand (op, mode);
859 }
860 \f
861 /* Return 1 if the operands of a move are ok. */
862
863 int
864 ia64_move_ok (dst, src)
865 rtx dst, src;
866 {
867 /* If we're under init_recog_no_volatile, we'll not be able to use
868 memory_operand. So check the code directly and don't worry about
869 the validity of the underlying address, which should have been
870 checked elsewhere anyway. */
871 if (GET_CODE (dst) != MEM)
872 return 1;
873 if (GET_CODE (src) == MEM)
874 return 0;
875 if (register_operand (src, VOIDmode))
876 return 1;
877
878 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
879 if (INTEGRAL_MODE_P (GET_MODE (dst)))
880 return src == const0_rtx;
881 else
882 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
883 }
884
885 /* Check if OP is a mask suitible for use with SHIFT in a dep.z instruction.
886 Return the length of the field, or <= 0 on failure. */
887
888 int
889 ia64_depz_field_mask (rop, rshift)
890 rtx rop, rshift;
891 {
892 unsigned HOST_WIDE_INT op = INTVAL (rop);
893 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
894
895 /* Get rid of the zero bits we're shifting in. */
896 op >>= shift;
897
898 /* We must now have a solid block of 1's at bit 0. */
899 return exact_log2 (op + 1);
900 }
901
902 /* Expand a symbolic constant load. */
903 /* ??? Should generalize this, so that we can also support 32 bit pointers. */
904
905 void
906 ia64_expand_load_address (dest, src, scratch)
907 rtx dest, src, scratch;
908 {
909 rtx temp;
910
911 /* The destination could be a MEM during initial rtl generation,
912 which isn't a valid destination for the PIC load address patterns. */
913 if (! register_operand (dest, DImode))
914 temp = gen_reg_rtx (DImode);
915 else
916 temp = dest;
917
918 if (TARGET_AUTO_PIC)
919 emit_insn (gen_load_gprel64 (temp, src));
920 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FLAG (src))
921 emit_insn (gen_load_fptr (temp, src));
922 else if (sdata_symbolic_operand (src, DImode))
923 emit_insn (gen_load_gprel (temp, src));
924 else if (GET_CODE (src) == CONST
925 && GET_CODE (XEXP (src, 0)) == PLUS
926 && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
927 && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0)
928 {
929 rtx subtarget = no_new_pseudos ? temp : gen_reg_rtx (DImode);
930 rtx sym = XEXP (XEXP (src, 0), 0);
931 HOST_WIDE_INT ofs, hi, lo;
932
933 /* Split the offset into a sign extended 14-bit low part
934 and a complementary high part. */
935 ofs = INTVAL (XEXP (XEXP (src, 0), 1));
936 lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
937 hi = ofs - lo;
938
939 if (! scratch)
940 scratch = no_new_pseudos ? subtarget : gen_reg_rtx (DImode);
941
942 emit_insn (gen_load_symptr (subtarget, plus_constant (sym, hi),
943 scratch));
944 emit_insn (gen_adddi3 (temp, subtarget, GEN_INT (lo)));
945 }
946 else
947 {
948 rtx insn;
949 if (! scratch)
950 scratch = no_new_pseudos ? temp : gen_reg_rtx (DImode);
951
952 insn = emit_insn (gen_load_symptr (temp, src, scratch));
953 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, src, REG_NOTES (insn));
954 }
955
956 if (temp != dest)
957 emit_move_insn (dest, temp);
958 }
959
960 rtx
961 ia64_gp_save_reg (setjmp_p)
962 int setjmp_p;
963 {
964 rtx save = cfun->machine->ia64_gp_save;
965
966 if (save != NULL)
967 {
968 /* We can't save GP in a pseudo if we are calling setjmp, because
969 pseudos won't be restored by longjmp. For now, we save it in r4. */
970 /* ??? It would be more efficient to save this directly into a stack
971 slot. Unfortunately, the stack slot address gets cse'd across
972 the setjmp call because the NOTE_INSN_SETJMP note is in the wrong
973 place. */
974
975 /* ??? Get the barf bag, Virginia. We've got to replace this thing
976 in place, since this rtx is used in exception handling receivers.
977 Moreover, we must get this rtx out of regno_reg_rtx or reload
978 will do the wrong thing. */
979 unsigned int old_regno = REGNO (save);
980 if (setjmp_p && old_regno != GR_REG (4))
981 {
982 REGNO (save) = GR_REG (4);
983 regno_reg_rtx[old_regno] = gen_rtx_raw_REG (DImode, old_regno);
984 }
985 }
986 else
987 {
988 if (setjmp_p)
989 save = gen_rtx_REG (DImode, GR_REG (4));
990 else if (! optimize)
991 save = gen_rtx_REG (DImode, LOC_REG (0));
992 else
993 save = gen_reg_rtx (DImode);
994 cfun->machine->ia64_gp_save = save;
995 }
996
997 return save;
998 }
999
1000 /* Split a post-reload TImode reference into two DImode components. */
1001
1002 rtx
1003 ia64_split_timode (out, in, scratch)
1004 rtx out[2];
1005 rtx in, scratch;
1006 {
1007 switch (GET_CODE (in))
1008 {
1009 case REG:
1010 out[0] = gen_rtx_REG (DImode, REGNO (in));
1011 out[1] = gen_rtx_REG (DImode, REGNO (in) + 1);
1012 return NULL_RTX;
1013
1014 case MEM:
1015 {
1016 rtx base = XEXP (in, 0);
1017
1018 switch (GET_CODE (base))
1019 {
1020 case REG:
1021 out[0] = adjust_address (in, DImode, 0);
1022 break;
1023 case POST_MODIFY:
1024 base = XEXP (base, 0);
1025 out[0] = adjust_address (in, DImode, 0);
1026 break;
1027
1028 /* Since we're changing the mode, we need to change to POST_MODIFY
1029 as well to preserve the size of the increment. Either that or
1030 do the update in two steps, but we've already got this scratch
1031 register handy so let's use it. */
1032 case POST_INC:
1033 base = XEXP (base, 0);
1034 out[0]
1035 = change_address (in, DImode,
1036 gen_rtx_POST_MODIFY
1037 (Pmode, base, plus_constant (base, 16)));
1038 break;
1039 case POST_DEC:
1040 base = XEXP (base, 0);
1041 out[0]
1042 = change_address (in, DImode,
1043 gen_rtx_POST_MODIFY
1044 (Pmode, base, plus_constant (base, -16)));
1045 break;
1046 default:
1047 abort ();
1048 }
1049
1050 if (scratch == NULL_RTX)
1051 abort ();
1052 out[1] = change_address (in, DImode, scratch);
1053 return gen_adddi3 (scratch, base, GEN_INT (8));
1054 }
1055
1056 case CONST_INT:
1057 case CONST_DOUBLE:
1058 split_double (in, &out[0], &out[1]);
1059 return NULL_RTX;
1060
1061 default:
1062 abort ();
1063 }
1064 }
1065
1066 /* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go
1067 through memory plus an extra GR scratch register. Except that you can
1068 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1069 SECONDARY_RELOAD_CLASS, but not both.
1070
1071 We got into problems in the first place by allowing a construct like
1072 (subreg:TF (reg:TI)), which we got from a union containing a long double.
1073 This solution attempts to prevent this situation from occurring. When
1074 we see something like the above, we spill the inner register to memory. */
1075
1076 rtx
1077 spill_tfmode_operand (in, force)
1078 rtx in;
1079 int force;
1080 {
1081 if (GET_CODE (in) == SUBREG
1082 && GET_MODE (SUBREG_REG (in)) == TImode
1083 && GET_CODE (SUBREG_REG (in)) == REG)
1084 {
1085 rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE);
1086 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1087 }
1088 else if (force && GET_CODE (in) == REG)
1089 {
1090 rtx mem = gen_mem_addressof (in, NULL_TREE);
1091 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1092 }
1093 else if (GET_CODE (in) == MEM
1094 && GET_CODE (XEXP (in, 0)) == ADDRESSOF)
1095 return change_address (in, TFmode, copy_to_reg (XEXP (in, 0)));
1096 else
1097 return in;
1098 }
1099
1100 /* Emit comparison instruction if necessary, returning the expression
1101 that holds the compare result in the proper mode. */
1102
1103 rtx
1104 ia64_expand_compare (code, mode)
1105 enum rtx_code code;
1106 enum machine_mode mode;
1107 {
1108 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1109 rtx cmp;
1110
1111 /* If we have a BImode input, then we already have a compare result, and
1112 do not need to emit another comparison. */
1113 if (GET_MODE (op0) == BImode)
1114 {
1115 if ((code == NE || code == EQ) && op1 == const0_rtx)
1116 cmp = op0;
1117 else
1118 abort ();
1119 }
1120 else
1121 {
1122 cmp = gen_reg_rtx (BImode);
1123 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1124 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1125 code = NE;
1126 }
1127
1128 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1129 }
1130
1131 /* Emit the appropriate sequence for a call. */
1132
1133 void
1134 ia64_expand_call (retval, addr, nextarg, sibcall_p)
1135 rtx retval;
1136 rtx addr;
1137 rtx nextarg;
1138 int sibcall_p;
1139 {
1140 rtx insn, b0, pfs, gp_save, narg_rtx;
1141 int narg;
1142
1143 addr = XEXP (addr, 0);
1144 b0 = gen_rtx_REG (DImode, R_BR (0));
1145 pfs = gen_rtx_REG (DImode, AR_PFS_REGNUM);
1146
1147 if (! nextarg)
1148 narg = 0;
1149 else if (IN_REGNO_P (REGNO (nextarg)))
1150 narg = REGNO (nextarg) - IN_REG (0);
1151 else
1152 narg = REGNO (nextarg) - OUT_REG (0);
1153 narg_rtx = GEN_INT (narg);
1154
1155 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1156 {
1157 if (sibcall_p)
1158 insn = gen_sibcall_nopic (addr, narg_rtx, b0, pfs);
1159 else if (! retval)
1160 insn = gen_call_nopic (addr, narg_rtx, b0);
1161 else
1162 insn = gen_call_value_nopic (retval, addr, narg_rtx, b0);
1163 emit_call_insn (insn);
1164 return;
1165 }
1166
1167 if (sibcall_p)
1168 gp_save = NULL_RTX;
1169 else
1170 gp_save = ia64_gp_save_reg (setjmp_operand (addr, VOIDmode));
1171
1172 /* If this is an indirect call, then we have the address of a descriptor. */
1173 if (! symbolic_operand (addr, VOIDmode))
1174 {
1175 rtx dest;
1176
1177 if (! sibcall_p)
1178 emit_move_insn (gp_save, pic_offset_table_rtx);
1179
1180 dest = force_reg (DImode, gen_rtx_MEM (DImode, addr));
1181 emit_move_insn (pic_offset_table_rtx,
1182 gen_rtx_MEM (DImode, plus_constant (addr, 8)));
1183
1184 if (sibcall_p)
1185 insn = gen_sibcall_pic (dest, narg_rtx, b0, pfs);
1186 else if (! retval)
1187 insn = gen_call_pic (dest, narg_rtx, b0);
1188 else
1189 insn = gen_call_value_pic (retval, dest, narg_rtx, b0);
1190 emit_call_insn (insn);
1191
1192 if (! sibcall_p)
1193 emit_move_insn (pic_offset_table_rtx, gp_save);
1194 }
1195 else if (TARGET_CONST_GP)
1196 {
1197 if (sibcall_p)
1198 insn = gen_sibcall_nopic (addr, narg_rtx, b0, pfs);
1199 else if (! retval)
1200 insn = gen_call_nopic (addr, narg_rtx, b0);
1201 else
1202 insn = gen_call_value_nopic (retval, addr, narg_rtx, b0);
1203 emit_call_insn (insn);
1204 }
1205 else
1206 {
1207 if (sibcall_p)
1208 emit_call_insn (gen_sibcall_pic (addr, narg_rtx, b0, pfs));
1209 else
1210 {
1211 emit_move_insn (gp_save, pic_offset_table_rtx);
1212
1213 if (! retval)
1214 insn = gen_call_pic (addr, narg_rtx, b0);
1215 else
1216 insn = gen_call_value_pic (retval, addr, narg_rtx, b0);
1217 emit_call_insn (insn);
1218
1219 emit_move_insn (pic_offset_table_rtx, gp_save);
1220 }
1221 }
1222 }
1223 \f
1224 /* Begin the assembly file. */
1225
1226 void
1227 emit_safe_across_calls (f)
1228 FILE *f;
1229 {
1230 unsigned int rs, re;
1231 int out_state;
1232
1233 rs = 1;
1234 out_state = 0;
1235 while (1)
1236 {
1237 while (rs < 64 && call_used_regs[PR_REG (rs)])
1238 rs++;
1239 if (rs >= 64)
1240 break;
1241 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1242 continue;
1243 if (out_state == 0)
1244 {
1245 fputs ("\t.pred.safe_across_calls ", f);
1246 out_state = 1;
1247 }
1248 else
1249 fputc (',', f);
1250 if (re == rs + 1)
1251 fprintf (f, "p%u", rs);
1252 else
1253 fprintf (f, "p%u-p%u", rs, re - 1);
1254 rs = re + 1;
1255 }
1256 if (out_state)
1257 fputc ('\n', f);
1258 }
1259
1260
1261 /* Structure to be filled in by ia64_compute_frame_size with register
1262 save masks and offsets for the current function. */
1263
1264 struct ia64_frame_info
1265 {
1266 HOST_WIDE_INT total_size; /* size of the stack frame, not including
1267 the caller's scratch area. */
1268 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
1269 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
1270 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
1271 HARD_REG_SET mask; /* mask of saved registers. */
1272 unsigned int gr_used_mask; /* mask of registers in use as gr spill
1273 registers or long-term scratches. */
1274 int n_spilled; /* number of spilled registers. */
1275 int reg_fp; /* register for fp. */
1276 int reg_save_b0; /* save register for b0. */
1277 int reg_save_pr; /* save register for prs. */
1278 int reg_save_ar_pfs; /* save register for ar.pfs. */
1279 int reg_save_ar_unat; /* save register for ar.unat. */
1280 int reg_save_ar_lc; /* save register for ar.lc. */
1281 int n_input_regs; /* number of input registers used. */
1282 int n_local_regs; /* number of local registers used. */
1283 int n_output_regs; /* number of output registers used. */
1284 int n_rotate_regs; /* number of rotating registers used. */
1285
1286 char need_regstk; /* true if a .regstk directive needed. */
1287 char initialized; /* true if the data is finalized. */
1288 };
1289
1290 /* Current frame information calculated by ia64_compute_frame_size. */
1291 static struct ia64_frame_info current_frame_info;
1292
1293 /* Helper function for ia64_compute_frame_size: find an appropriate general
1294 register to spill some special register to. SPECIAL_SPILL_MASK contains
1295 bits in GR0 to GR31 that have already been allocated by this routine.
1296 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1297
1298 static int
1299 find_gr_spill (try_locals)
1300 int try_locals;
1301 {
1302 int regno;
1303
1304 /* If this is a leaf function, first try an otherwise unused
1305 call-clobbered register. */
1306 if (current_function_is_leaf)
1307 {
1308 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1309 if (! regs_ever_live[regno]
1310 && call_used_regs[regno]
1311 && ! fixed_regs[regno]
1312 && ! global_regs[regno]
1313 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1314 {
1315 current_frame_info.gr_used_mask |= 1 << regno;
1316 return regno;
1317 }
1318 }
1319
1320 if (try_locals)
1321 {
1322 regno = current_frame_info.n_local_regs;
1323 /* If there is a frame pointer, then we can't use loc79, because
1324 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1325 reg_name switching code in ia64_expand_prologue. */
1326 if (regno < (80 - frame_pointer_needed))
1327 {
1328 current_frame_info.n_local_regs = regno + 1;
1329 return LOC_REG (0) + regno;
1330 }
1331 }
1332
1333 /* Failed to find a general register to spill to. Must use stack. */
1334 return 0;
1335 }
1336
1337 /* In order to make for nice schedules, we try to allocate every temporary
1338 to a different register. We must of course stay away from call-saved,
1339 fixed, and global registers. We must also stay away from registers
1340 allocated in current_frame_info.gr_used_mask, since those include regs
1341 used all through the prologue.
1342
1343 Any register allocated here must be used immediately. The idea is to
1344 aid scheduling, not to solve data flow problems. */
1345
1346 static int last_scratch_gr_reg;
1347
1348 static int
1349 next_scratch_gr_reg ()
1350 {
1351 int i, regno;
1352
1353 for (i = 0; i < 32; ++i)
1354 {
1355 regno = (last_scratch_gr_reg + i + 1) & 31;
1356 if (call_used_regs[regno]
1357 && ! fixed_regs[regno]
1358 && ! global_regs[regno]
1359 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1360 {
1361 last_scratch_gr_reg = regno;
1362 return regno;
1363 }
1364 }
1365
1366 /* There must be _something_ available. */
1367 abort ();
1368 }
1369
1370 /* Helper function for ia64_compute_frame_size, called through
1371 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1372
1373 static void
1374 mark_reg_gr_used_mask (reg, data)
1375 rtx reg;
1376 void *data ATTRIBUTE_UNUSED;
1377 {
1378 unsigned int regno = REGNO (reg);
1379 if (regno < 32)
1380 {
1381 unsigned int i, n = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1382 for (i = 0; i < n; ++i)
1383 current_frame_info.gr_used_mask |= 1 << (regno + i);
1384 }
1385 }
1386
1387 /* Returns the number of bytes offset between the frame pointer and the stack
1388 pointer for the current function. SIZE is the number of bytes of space
1389 needed for local variables. */
1390
1391 static void
1392 ia64_compute_frame_size (size)
1393 HOST_WIDE_INT size;
1394 {
1395 HOST_WIDE_INT total_size;
1396 HOST_WIDE_INT spill_size = 0;
1397 HOST_WIDE_INT extra_spill_size = 0;
1398 HOST_WIDE_INT pretend_args_size;
1399 HARD_REG_SET mask;
1400 int n_spilled = 0;
1401 int spilled_gr_p = 0;
1402 int spilled_fr_p = 0;
1403 unsigned int regno;
1404 int i;
1405
1406 if (current_frame_info.initialized)
1407 return;
1408
1409 memset (&current_frame_info, 0, sizeof current_frame_info);
1410 CLEAR_HARD_REG_SET (mask);
1411
1412 /* Don't allocate scratches to the return register. */
1413 diddle_return_value (mark_reg_gr_used_mask, NULL);
1414
1415 /* Don't allocate scratches to the EH scratch registers. */
1416 if (cfun->machine->ia64_eh_epilogue_sp)
1417 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
1418 if (cfun->machine->ia64_eh_epilogue_bsp)
1419 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
1420
1421 /* Find the size of the register stack frame. We have only 80 local
1422 registers, because we reserve 8 for the inputs and 8 for the
1423 outputs. */
1424
1425 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1426 since we'll be adjusting that down later. */
1427 regno = LOC_REG (78) + ! frame_pointer_needed;
1428 for (; regno >= LOC_REG (0); regno--)
1429 if (regs_ever_live[regno])
1430 break;
1431 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
1432
1433 /* For functions marked with the syscall_linkage attribute, we must mark
1434 all eight input registers as in use, so that locals aren't visible to
1435 the caller. */
1436
1437 if (cfun->machine->n_varargs > 0
1438 || lookup_attribute ("syscall_linkage",
1439 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
1440 current_frame_info.n_input_regs = 8;
1441 else
1442 {
1443 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
1444 if (regs_ever_live[regno])
1445 break;
1446 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
1447 }
1448
1449 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
1450 if (regs_ever_live[regno])
1451 break;
1452 i = regno - OUT_REG (0) + 1;
1453
1454 /* When -p profiling, we need one output register for the mcount argument.
1455 Likwise for -a profiling for the bb_init_func argument. For -ax
1456 profiling, we need two output registers for the two bb_init_trace_func
1457 arguments. */
1458 if (current_function_profile)
1459 i = MAX (i, 1);
1460 current_frame_info.n_output_regs = i;
1461
1462 /* ??? No rotating register support yet. */
1463 current_frame_info.n_rotate_regs = 0;
1464
1465 /* Discover which registers need spilling, and how much room that
1466 will take. Begin with floating point and general registers,
1467 which will always wind up on the stack. */
1468
1469 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
1470 if (regs_ever_live[regno] && ! call_used_regs[regno])
1471 {
1472 SET_HARD_REG_BIT (mask, regno);
1473 spill_size += 16;
1474 n_spilled += 1;
1475 spilled_fr_p = 1;
1476 }
1477
1478 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1479 if (regs_ever_live[regno] && ! call_used_regs[regno])
1480 {
1481 SET_HARD_REG_BIT (mask, regno);
1482 spill_size += 8;
1483 n_spilled += 1;
1484 spilled_gr_p = 1;
1485 }
1486
1487 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
1488 if (regs_ever_live[regno] && ! call_used_regs[regno])
1489 {
1490 SET_HARD_REG_BIT (mask, regno);
1491 spill_size += 8;
1492 n_spilled += 1;
1493 }
1494
1495 /* Now come all special registers that might get saved in other
1496 general registers. */
1497
1498 if (frame_pointer_needed)
1499 {
1500 current_frame_info.reg_fp = find_gr_spill (1);
1501 /* If we did not get a register, then we take LOC79. This is guaranteed
1502 to be free, even if regs_ever_live is already set, because this is
1503 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
1504 as we don't count loc79 above. */
1505 if (current_frame_info.reg_fp == 0)
1506 {
1507 current_frame_info.reg_fp = LOC_REG (79);
1508 current_frame_info.n_local_regs++;
1509 }
1510 }
1511
1512 if (! current_function_is_leaf)
1513 {
1514 /* Emit a save of BR0 if we call other functions. Do this even
1515 if this function doesn't return, as EH depends on this to be
1516 able to unwind the stack. */
1517 SET_HARD_REG_BIT (mask, BR_REG (0));
1518
1519 current_frame_info.reg_save_b0 = find_gr_spill (1);
1520 if (current_frame_info.reg_save_b0 == 0)
1521 {
1522 spill_size += 8;
1523 n_spilled += 1;
1524 }
1525
1526 /* Similarly for ar.pfs. */
1527 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1528 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1529 if (current_frame_info.reg_save_ar_pfs == 0)
1530 {
1531 extra_spill_size += 8;
1532 n_spilled += 1;
1533 }
1534 }
1535 else
1536 {
1537 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
1538 {
1539 SET_HARD_REG_BIT (mask, BR_REG (0));
1540 spill_size += 8;
1541 n_spilled += 1;
1542 }
1543 }
1544
1545 /* Unwind descriptor hackery: things are most efficient if we allocate
1546 consecutive GR save registers for RP, PFS, FP in that order. However,
1547 it is absolutely critical that FP get the only hard register that's
1548 guaranteed to be free, so we allocated it first. If all three did
1549 happen to be allocated hard regs, and are consecutive, rearrange them
1550 into the preferred order now. */
1551 if (current_frame_info.reg_fp != 0
1552 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
1553 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
1554 {
1555 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
1556 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
1557 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
1558 }
1559
1560 /* See if we need to store the predicate register block. */
1561 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1562 if (regs_ever_live[regno] && ! call_used_regs[regno])
1563 break;
1564 if (regno <= PR_REG (63))
1565 {
1566 SET_HARD_REG_BIT (mask, PR_REG (0));
1567 current_frame_info.reg_save_pr = find_gr_spill (1);
1568 if (current_frame_info.reg_save_pr == 0)
1569 {
1570 extra_spill_size += 8;
1571 n_spilled += 1;
1572 }
1573
1574 /* ??? Mark them all as used so that register renaming and such
1575 are free to use them. */
1576 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1577 regs_ever_live[regno] = 1;
1578 }
1579
1580 /* If we're forced to use st8.spill, we're forced to save and restore
1581 ar.unat as well. */
1582 if (spilled_gr_p || cfun->machine->n_varargs)
1583 {
1584 regs_ever_live[AR_UNAT_REGNUM] = 1;
1585 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
1586 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
1587 if (current_frame_info.reg_save_ar_unat == 0)
1588 {
1589 extra_spill_size += 8;
1590 n_spilled += 1;
1591 }
1592 }
1593
1594 if (regs_ever_live[AR_LC_REGNUM])
1595 {
1596 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
1597 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
1598 if (current_frame_info.reg_save_ar_lc == 0)
1599 {
1600 extra_spill_size += 8;
1601 n_spilled += 1;
1602 }
1603 }
1604
1605 /* If we have an odd number of words of pretend arguments written to
1606 the stack, then the FR save area will be unaligned. We round the
1607 size of this area up to keep things 16 byte aligned. */
1608 if (spilled_fr_p)
1609 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
1610 else
1611 pretend_args_size = current_function_pretend_args_size;
1612
1613 total_size = (spill_size + extra_spill_size + size + pretend_args_size
1614 + current_function_outgoing_args_size);
1615 total_size = IA64_STACK_ALIGN (total_size);
1616
1617 /* We always use the 16-byte scratch area provided by the caller, but
1618 if we are a leaf function, there's no one to which we need to provide
1619 a scratch area. */
1620 if (current_function_is_leaf)
1621 total_size = MAX (0, total_size - 16);
1622
1623 current_frame_info.total_size = total_size;
1624 current_frame_info.spill_cfa_off = pretend_args_size - 16;
1625 current_frame_info.spill_size = spill_size;
1626 current_frame_info.extra_spill_size = extra_spill_size;
1627 COPY_HARD_REG_SET (current_frame_info.mask, mask);
1628 current_frame_info.n_spilled = n_spilled;
1629 current_frame_info.initialized = reload_completed;
1630 }
1631
1632 /* Compute the initial difference between the specified pair of registers. */
1633
1634 HOST_WIDE_INT
1635 ia64_initial_elimination_offset (from, to)
1636 int from, to;
1637 {
1638 HOST_WIDE_INT offset;
1639
1640 ia64_compute_frame_size (get_frame_size ());
1641 switch (from)
1642 {
1643 case FRAME_POINTER_REGNUM:
1644 if (to == HARD_FRAME_POINTER_REGNUM)
1645 {
1646 if (current_function_is_leaf)
1647 offset = -current_frame_info.total_size;
1648 else
1649 offset = -(current_frame_info.total_size
1650 - current_function_outgoing_args_size - 16);
1651 }
1652 else if (to == STACK_POINTER_REGNUM)
1653 {
1654 if (current_function_is_leaf)
1655 offset = 0;
1656 else
1657 offset = 16 + current_function_outgoing_args_size;
1658 }
1659 else
1660 abort ();
1661 break;
1662
1663 case ARG_POINTER_REGNUM:
1664 /* Arguments start above the 16 byte save area, unless stdarg
1665 in which case we store through the 16 byte save area. */
1666 if (to == HARD_FRAME_POINTER_REGNUM)
1667 offset = 16 - current_function_pretend_args_size;
1668 else if (to == STACK_POINTER_REGNUM)
1669 offset = (current_frame_info.total_size
1670 + 16 - current_function_pretend_args_size);
1671 else
1672 abort ();
1673 break;
1674
1675 case RETURN_ADDRESS_POINTER_REGNUM:
1676 offset = 0;
1677 break;
1678
1679 default:
1680 abort ();
1681 }
1682
1683 return offset;
1684 }
1685
1686 /* If there are more than a trivial number of register spills, we use
1687 two interleaved iterators so that we can get two memory references
1688 per insn group.
1689
1690 In order to simplify things in the prologue and epilogue expanders,
1691 we use helper functions to fix up the memory references after the
1692 fact with the appropriate offsets to a POST_MODIFY memory mode.
1693 The following data structure tracks the state of the two iterators
1694 while insns are being emitted. */
1695
1696 struct spill_fill_data
1697 {
1698 rtx init_after; /* point at which to emit initializations */
1699 rtx init_reg[2]; /* initial base register */
1700 rtx iter_reg[2]; /* the iterator registers */
1701 rtx *prev_addr[2]; /* address of last memory use */
1702 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
1703 HOST_WIDE_INT prev_off[2]; /* last offset */
1704 int n_iter; /* number of iterators in use */
1705 int next_iter; /* next iterator to use */
1706 unsigned int save_gr_used_mask;
1707 };
1708
1709 static struct spill_fill_data spill_fill_data;
1710
1711 static void
1712 setup_spill_pointers (n_spills, init_reg, cfa_off)
1713 int n_spills;
1714 rtx init_reg;
1715 HOST_WIDE_INT cfa_off;
1716 {
1717 int i;
1718
1719 spill_fill_data.init_after = get_last_insn ();
1720 spill_fill_data.init_reg[0] = init_reg;
1721 spill_fill_data.init_reg[1] = init_reg;
1722 spill_fill_data.prev_addr[0] = NULL;
1723 spill_fill_data.prev_addr[1] = NULL;
1724 spill_fill_data.prev_insn[0] = NULL;
1725 spill_fill_data.prev_insn[1] = NULL;
1726 spill_fill_data.prev_off[0] = cfa_off;
1727 spill_fill_data.prev_off[1] = cfa_off;
1728 spill_fill_data.next_iter = 0;
1729 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
1730
1731 spill_fill_data.n_iter = 1 + (n_spills > 2);
1732 for (i = 0; i < spill_fill_data.n_iter; ++i)
1733 {
1734 int regno = next_scratch_gr_reg ();
1735 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
1736 current_frame_info.gr_used_mask |= 1 << regno;
1737 }
1738 }
1739
1740 static void
1741 finish_spill_pointers ()
1742 {
1743 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
1744 }
1745
1746 static rtx
1747 spill_restore_mem (reg, cfa_off)
1748 rtx reg;
1749 HOST_WIDE_INT cfa_off;
1750 {
1751 int iter = spill_fill_data.next_iter;
1752 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
1753 rtx disp_rtx = GEN_INT (disp);
1754 rtx mem;
1755
1756 if (spill_fill_data.prev_addr[iter])
1757 {
1758 if (CONST_OK_FOR_N (disp))
1759 {
1760 *spill_fill_data.prev_addr[iter]
1761 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
1762 gen_rtx_PLUS (DImode,
1763 spill_fill_data.iter_reg[iter],
1764 disp_rtx));
1765 REG_NOTES (spill_fill_data.prev_insn[iter])
1766 = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
1767 REG_NOTES (spill_fill_data.prev_insn[iter]));
1768 }
1769 else
1770 {
1771 /* ??? Could use register post_modify for loads. */
1772 if (! CONST_OK_FOR_I (disp))
1773 {
1774 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1775 emit_move_insn (tmp, disp_rtx);
1776 disp_rtx = tmp;
1777 }
1778 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1779 spill_fill_data.iter_reg[iter], disp_rtx));
1780 }
1781 }
1782 /* Micro-optimization: if we've created a frame pointer, it's at
1783 CFA 0, which may allow the real iterator to be initialized lower,
1784 slightly increasing parallelism. Also, if there are few saves
1785 it may eliminate the iterator entirely. */
1786 else if (disp == 0
1787 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
1788 && frame_pointer_needed)
1789 {
1790 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
1791 set_mem_alias_set (mem, get_varargs_alias_set ());
1792 return mem;
1793 }
1794 else
1795 {
1796 rtx seq, insn;
1797
1798 if (disp == 0)
1799 seq = gen_movdi (spill_fill_data.iter_reg[iter],
1800 spill_fill_data.init_reg[iter]);
1801 else
1802 {
1803 start_sequence ();
1804
1805 if (! CONST_OK_FOR_I (disp))
1806 {
1807 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1808 emit_move_insn (tmp, disp_rtx);
1809 disp_rtx = tmp;
1810 }
1811
1812 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1813 spill_fill_data.init_reg[iter],
1814 disp_rtx));
1815
1816 seq = gen_sequence ();
1817 end_sequence ();
1818 }
1819
1820 /* Careful for being the first insn in a sequence. */
1821 if (spill_fill_data.init_after)
1822 insn = emit_insn_after (seq, spill_fill_data.init_after);
1823 else
1824 {
1825 rtx first = get_insns ();
1826 if (first)
1827 insn = emit_insn_before (seq, first);
1828 else
1829 insn = emit_insn (seq);
1830 }
1831 spill_fill_data.init_after = insn;
1832
1833 /* If DISP is 0, we may or may not have a further adjustment
1834 afterward. If we do, then the load/store insn may be modified
1835 to be a post-modify. If we don't, then this copy may be
1836 eliminated by copyprop_hardreg_forward, which makes this
1837 insn garbage, which runs afoul of the sanity check in
1838 propagate_one_insn. So mark this insn as legal to delete. */
1839 if (disp == 0)
1840 REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
1841 REG_NOTES (insn));
1842 }
1843
1844 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
1845
1846 /* ??? Not all of the spills are for varargs, but some of them are.
1847 The rest of the spills belong in an alias set of their own. But
1848 it doesn't actually hurt to include them here. */
1849 set_mem_alias_set (mem, get_varargs_alias_set ());
1850
1851 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
1852 spill_fill_data.prev_off[iter] = cfa_off;
1853
1854 if (++iter >= spill_fill_data.n_iter)
1855 iter = 0;
1856 spill_fill_data.next_iter = iter;
1857
1858 return mem;
1859 }
1860
1861 static void
1862 do_spill (move_fn, reg, cfa_off, frame_reg)
1863 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
1864 rtx reg, frame_reg;
1865 HOST_WIDE_INT cfa_off;
1866 {
1867 int iter = spill_fill_data.next_iter;
1868 rtx mem, insn;
1869
1870 mem = spill_restore_mem (reg, cfa_off);
1871 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
1872 spill_fill_data.prev_insn[iter] = insn;
1873
1874 if (frame_reg)
1875 {
1876 rtx base;
1877 HOST_WIDE_INT off;
1878
1879 RTX_FRAME_RELATED_P (insn) = 1;
1880
1881 /* Don't even pretend that the unwind code can intuit its way
1882 through a pair of interleaved post_modify iterators. Just
1883 provide the correct answer. */
1884
1885 if (frame_pointer_needed)
1886 {
1887 base = hard_frame_pointer_rtx;
1888 off = - cfa_off;
1889 }
1890 else
1891 {
1892 base = stack_pointer_rtx;
1893 off = current_frame_info.total_size - cfa_off;
1894 }
1895
1896 REG_NOTES (insn)
1897 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1898 gen_rtx_SET (VOIDmode,
1899 gen_rtx_MEM (GET_MODE (reg),
1900 plus_constant (base, off)),
1901 frame_reg),
1902 REG_NOTES (insn));
1903 }
1904 }
1905
1906 static void
1907 do_restore (move_fn, reg, cfa_off)
1908 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
1909 rtx reg;
1910 HOST_WIDE_INT cfa_off;
1911 {
1912 int iter = spill_fill_data.next_iter;
1913 rtx insn;
1914
1915 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
1916 GEN_INT (cfa_off)));
1917 spill_fill_data.prev_insn[iter] = insn;
1918 }
1919
1920 /* Wrapper functions that discards the CONST_INT spill offset. These
1921 exist so that we can give gr_spill/gr_fill the offset they need and
1922 use a consistant function interface. */
1923
1924 static rtx
1925 gen_movdi_x (dest, src, offset)
1926 rtx dest, src;
1927 rtx offset ATTRIBUTE_UNUSED;
1928 {
1929 return gen_movdi (dest, src);
1930 }
1931
1932 static rtx
1933 gen_fr_spill_x (dest, src, offset)
1934 rtx dest, src;
1935 rtx offset ATTRIBUTE_UNUSED;
1936 {
1937 return gen_fr_spill (dest, src);
1938 }
1939
1940 static rtx
1941 gen_fr_restore_x (dest, src, offset)
1942 rtx dest, src;
1943 rtx offset ATTRIBUTE_UNUSED;
1944 {
1945 return gen_fr_restore (dest, src);
1946 }
1947
1948 /* Called after register allocation to add any instructions needed for the
1949 prologue. Using a prologue insn is favored compared to putting all of the
1950 instructions in output_function_prologue(), since it allows the scheduler
1951 to intermix instructions with the saves of the caller saved registers. In
1952 some cases, it might be necessary to emit a barrier instruction as the last
1953 insn to prevent such scheduling.
1954
1955 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
1956 so that the debug info generation code can handle them properly.
1957
1958 The register save area is layed out like so:
1959 cfa+16
1960 [ varargs spill area ]
1961 [ fr register spill area ]
1962 [ br register spill area ]
1963 [ ar register spill area ]
1964 [ pr register spill area ]
1965 [ gr register spill area ] */
1966
1967 /* ??? Get inefficient code when the frame size is larger than can fit in an
1968 adds instruction. */
1969
1970 void
1971 ia64_expand_prologue ()
1972 {
1973 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
1974 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
1975 rtx reg, alt_reg;
1976
1977 ia64_compute_frame_size (get_frame_size ());
1978 last_scratch_gr_reg = 15;
1979
1980 /* If there is no epilogue, then we don't need some prologue insns.
1981 We need to avoid emitting the dead prologue insns, because flow
1982 will complain about them. */
1983 if (optimize)
1984 {
1985 edge e;
1986
1987 for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
1988 if ((e->flags & EDGE_FAKE) == 0
1989 && (e->flags & EDGE_FALLTHRU) != 0)
1990 break;
1991 epilogue_p = (e != NULL);
1992 }
1993 else
1994 epilogue_p = 1;
1995
1996 /* Set the local, input, and output register names. We need to do this
1997 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
1998 half. If we use in/loc/out register names, then we get assembler errors
1999 in crtn.S because there is no alloc insn or regstk directive in there. */
2000 if (! TARGET_REG_NAMES)
2001 {
2002 int inputs = current_frame_info.n_input_regs;
2003 int locals = current_frame_info.n_local_regs;
2004 int outputs = current_frame_info.n_output_regs;
2005
2006 for (i = 0; i < inputs; i++)
2007 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2008 for (i = 0; i < locals; i++)
2009 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2010 for (i = 0; i < outputs; i++)
2011 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2012 }
2013
2014 /* Set the frame pointer register name. The regnum is logically loc79,
2015 but of course we'll not have allocated that many locals. Rather than
2016 worrying about renumbering the existing rtxs, we adjust the name. */
2017 /* ??? This code means that we can never use one local register when
2018 there is a frame pointer. loc79 gets wasted in this case, as it is
2019 renamed to a register that will never be used. See also the try_locals
2020 code in find_gr_spill. */
2021 if (current_frame_info.reg_fp)
2022 {
2023 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2024 reg_names[HARD_FRAME_POINTER_REGNUM]
2025 = reg_names[current_frame_info.reg_fp];
2026 reg_names[current_frame_info.reg_fp] = tmp;
2027 }
2028
2029 /* Fix up the return address placeholder. */
2030 /* ??? We can fail if __builtin_return_address is used, and we didn't
2031 allocate a register in which to save b0. I can't think of a way to
2032 eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and
2033 then be sure that I got the right one. Further, reload doesn't seem
2034 to care if an eliminable register isn't used, and "eliminates" it
2035 anyway. */
2036 if (regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM]
2037 && current_frame_info.reg_save_b0 != 0)
2038 XINT (return_address_pointer_rtx, 0) = current_frame_info.reg_save_b0;
2039
2040 /* We don't need an alloc instruction if we've used no outputs or locals. */
2041 if (current_frame_info.n_local_regs == 0
2042 && current_frame_info.n_output_regs == 0
2043 && current_frame_info.n_input_regs <= current_function_args_info.words)
2044 {
2045 /* If there is no alloc, but there are input registers used, then we
2046 need a .regstk directive. */
2047 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2048 ar_pfs_save_reg = NULL_RTX;
2049 }
2050 else
2051 {
2052 current_frame_info.need_regstk = 0;
2053
2054 if (current_frame_info.reg_save_ar_pfs)
2055 regno = current_frame_info.reg_save_ar_pfs;
2056 else
2057 regno = next_scratch_gr_reg ();
2058 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2059
2060 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2061 GEN_INT (current_frame_info.n_input_regs),
2062 GEN_INT (current_frame_info.n_local_regs),
2063 GEN_INT (current_frame_info.n_output_regs),
2064 GEN_INT (current_frame_info.n_rotate_regs)));
2065 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2066 }
2067
2068 /* Set up frame pointer, stack pointer, and spill iterators. */
2069
2070 n_varargs = cfun->machine->n_varargs;
2071 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
2072 stack_pointer_rtx, 0);
2073
2074 if (frame_pointer_needed)
2075 {
2076 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2077 RTX_FRAME_RELATED_P (insn) = 1;
2078 }
2079
2080 if (current_frame_info.total_size != 0)
2081 {
2082 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2083 rtx offset;
2084
2085 if (CONST_OK_FOR_I (- current_frame_info.total_size))
2086 offset = frame_size_rtx;
2087 else
2088 {
2089 regno = next_scratch_gr_reg ();
2090 offset = gen_rtx_REG (DImode, regno);
2091 emit_move_insn (offset, frame_size_rtx);
2092 }
2093
2094 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
2095 stack_pointer_rtx, offset));
2096
2097 if (! frame_pointer_needed)
2098 {
2099 RTX_FRAME_RELATED_P (insn) = 1;
2100 if (GET_CODE (offset) != CONST_INT)
2101 {
2102 REG_NOTES (insn)
2103 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2104 gen_rtx_SET (VOIDmode,
2105 stack_pointer_rtx,
2106 gen_rtx_PLUS (DImode,
2107 stack_pointer_rtx,
2108 frame_size_rtx)),
2109 REG_NOTES (insn));
2110 }
2111 }
2112
2113 /* ??? At this point we must generate a magic insn that appears to
2114 modify the stack pointer, the frame pointer, and all spill
2115 iterators. This would allow the most scheduling freedom. For
2116 now, just hard stop. */
2117 emit_insn (gen_blockage ());
2118 }
2119
2120 /* Must copy out ar.unat before doing any integer spills. */
2121 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2122 {
2123 if (current_frame_info.reg_save_ar_unat)
2124 ar_unat_save_reg
2125 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2126 else
2127 {
2128 alt_regno = next_scratch_gr_reg ();
2129 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2130 current_frame_info.gr_used_mask |= 1 << alt_regno;
2131 }
2132
2133 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2134 insn = emit_move_insn (ar_unat_save_reg, reg);
2135 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2136
2137 /* Even if we're not going to generate an epilogue, we still
2138 need to save the register so that EH works. */
2139 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2140 emit_insn (gen_prologue_use (ar_unat_save_reg));
2141 }
2142 else
2143 ar_unat_save_reg = NULL_RTX;
2144
2145 /* Spill all varargs registers. Do this before spilling any GR registers,
2146 since we want the UNAT bits for the GR registers to override the UNAT
2147 bits from varargs, which we don't care about. */
2148
2149 cfa_off = -16;
2150 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
2151 {
2152 reg = gen_rtx_REG (DImode, regno);
2153 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
2154 }
2155
2156 /* Locate the bottom of the register save area. */
2157 cfa_off = (current_frame_info.spill_cfa_off
2158 + current_frame_info.spill_size
2159 + current_frame_info.extra_spill_size);
2160
2161 /* Save the predicate register block either in a register or in memory. */
2162 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2163 {
2164 reg = gen_rtx_REG (DImode, PR_REG (0));
2165 if (current_frame_info.reg_save_pr != 0)
2166 {
2167 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2168 insn = emit_move_insn (alt_reg, reg);
2169
2170 /* ??? Denote pr spill/fill by a DImode move that modifies all
2171 64 hard registers. */
2172 RTX_FRAME_RELATED_P (insn) = 1;
2173 REG_NOTES (insn)
2174 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2175 gen_rtx_SET (VOIDmode, alt_reg, reg),
2176 REG_NOTES (insn));
2177
2178 /* Even if we're not going to generate an epilogue, we still
2179 need to save the register so that EH works. */
2180 if (! epilogue_p)
2181 emit_insn (gen_prologue_use (alt_reg));
2182 }
2183 else
2184 {
2185 alt_regno = next_scratch_gr_reg ();
2186 alt_reg = gen_rtx_REG (DImode, alt_regno);
2187 insn = emit_move_insn (alt_reg, reg);
2188 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2189 cfa_off -= 8;
2190 }
2191 }
2192
2193 /* Handle AR regs in numerical order. All of them get special handling. */
2194 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2195 && current_frame_info.reg_save_ar_unat == 0)
2196 {
2197 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2198 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
2199 cfa_off -= 8;
2200 }
2201
2202 /* The alloc insn already copied ar.pfs into a general register. The
2203 only thing we have to do now is copy that register to a stack slot
2204 if we'd not allocated a local register for the job. */
2205 if (current_frame_info.reg_save_ar_pfs == 0
2206 && ! current_function_is_leaf)
2207 {
2208 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2209 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
2210 cfa_off -= 8;
2211 }
2212
2213 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2214 {
2215 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2216 if (current_frame_info.reg_save_ar_lc != 0)
2217 {
2218 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2219 insn = emit_move_insn (alt_reg, reg);
2220 RTX_FRAME_RELATED_P (insn) = 1;
2221
2222 /* Even if we're not going to generate an epilogue, we still
2223 need to save the register so that EH works. */
2224 if (! epilogue_p)
2225 emit_insn (gen_prologue_use (alt_reg));
2226 }
2227 else
2228 {
2229 alt_regno = next_scratch_gr_reg ();
2230 alt_reg = gen_rtx_REG (DImode, alt_regno);
2231 emit_move_insn (alt_reg, reg);
2232 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2233 cfa_off -= 8;
2234 }
2235 }
2236
2237 /* We should now be at the base of the gr/br/fr spill area. */
2238 if (cfa_off != (current_frame_info.spill_cfa_off
2239 + current_frame_info.spill_size))
2240 abort ();
2241
2242 /* Spill all general registers. */
2243 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2244 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2245 {
2246 reg = gen_rtx_REG (DImode, regno);
2247 do_spill (gen_gr_spill, reg, cfa_off, reg);
2248 cfa_off -= 8;
2249 }
2250
2251 /* Handle BR0 specially -- it may be getting stored permanently in
2252 some GR register. */
2253 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2254 {
2255 reg = gen_rtx_REG (DImode, BR_REG (0));
2256 if (current_frame_info.reg_save_b0 != 0)
2257 {
2258 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2259 insn = emit_move_insn (alt_reg, reg);
2260 RTX_FRAME_RELATED_P (insn) = 1;
2261
2262 /* Even if we're not going to generate an epilogue, we still
2263 need to save the register so that EH works. */
2264 if (! epilogue_p)
2265 emit_insn (gen_prologue_use (alt_reg));
2266 }
2267 else
2268 {
2269 alt_regno = next_scratch_gr_reg ();
2270 alt_reg = gen_rtx_REG (DImode, alt_regno);
2271 emit_move_insn (alt_reg, reg);
2272 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2273 cfa_off -= 8;
2274 }
2275 }
2276
2277 /* Spill the rest of the BR registers. */
2278 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2279 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2280 {
2281 alt_regno = next_scratch_gr_reg ();
2282 alt_reg = gen_rtx_REG (DImode, alt_regno);
2283 reg = gen_rtx_REG (DImode, regno);
2284 emit_move_insn (alt_reg, reg);
2285 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2286 cfa_off -= 8;
2287 }
2288
2289 /* Align the frame and spill all FR registers. */
2290 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2291 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2292 {
2293 if (cfa_off & 15)
2294 abort ();
2295 reg = gen_rtx_REG (TFmode, regno);
2296 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
2297 cfa_off -= 16;
2298 }
2299
2300 if (cfa_off != current_frame_info.spill_cfa_off)
2301 abort ();
2302
2303 finish_spill_pointers ();
2304 }
2305
2306 /* Called after register allocation to add any instructions needed for the
2307 epilogue. Using an epilogue insn is favored compared to putting all of the
2308 instructions in output_function_prologue(), since it allows the scheduler
2309 to intermix instructions with the saves of the caller saved registers. In
2310 some cases, it might be necessary to emit a barrier instruction as the last
2311 insn to prevent such scheduling. */
2312
2313 void
2314 ia64_expand_epilogue (sibcall_p)
2315 int sibcall_p;
2316 {
2317 rtx insn, reg, alt_reg, ar_unat_save_reg;
2318 int regno, alt_regno, cfa_off;
2319
2320 ia64_compute_frame_size (get_frame_size ());
2321
2322 /* If there is a frame pointer, then we use it instead of the stack
2323 pointer, so that the stack pointer does not need to be valid when
2324 the epilogue starts. See EXIT_IGNORE_STACK. */
2325 if (frame_pointer_needed)
2326 setup_spill_pointers (current_frame_info.n_spilled,
2327 hard_frame_pointer_rtx, 0);
2328 else
2329 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2330 current_frame_info.total_size);
2331
2332 if (current_frame_info.total_size != 0)
2333 {
2334 /* ??? At this point we must generate a magic insn that appears to
2335 modify the spill iterators and the frame pointer. This would
2336 allow the most scheduling freedom. For now, just hard stop. */
2337 emit_insn (gen_blockage ());
2338 }
2339
2340 /* Locate the bottom of the register save area. */
2341 cfa_off = (current_frame_info.spill_cfa_off
2342 + current_frame_info.spill_size
2343 + current_frame_info.extra_spill_size);
2344
2345 /* Restore the predicate registers. */
2346 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2347 {
2348 if (current_frame_info.reg_save_pr != 0)
2349 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2350 else
2351 {
2352 alt_regno = next_scratch_gr_reg ();
2353 alt_reg = gen_rtx_REG (DImode, alt_regno);
2354 do_restore (gen_movdi_x, alt_reg, cfa_off);
2355 cfa_off -= 8;
2356 }
2357 reg = gen_rtx_REG (DImode, PR_REG (0));
2358 emit_move_insn (reg, alt_reg);
2359 }
2360
2361 /* Restore the application registers. */
2362
2363 /* Load the saved unat from the stack, but do not restore it until
2364 after the GRs have been restored. */
2365 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2366 {
2367 if (current_frame_info.reg_save_ar_unat != 0)
2368 ar_unat_save_reg
2369 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2370 else
2371 {
2372 alt_regno = next_scratch_gr_reg ();
2373 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2374 current_frame_info.gr_used_mask |= 1 << alt_regno;
2375 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
2376 cfa_off -= 8;
2377 }
2378 }
2379 else
2380 ar_unat_save_reg = NULL_RTX;
2381
2382 if (current_frame_info.reg_save_ar_pfs != 0)
2383 {
2384 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
2385 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2386 emit_move_insn (reg, alt_reg);
2387 }
2388 else if (! current_function_is_leaf)
2389 {
2390 alt_regno = next_scratch_gr_reg ();
2391 alt_reg = gen_rtx_REG (DImode, alt_regno);
2392 do_restore (gen_movdi_x, alt_reg, cfa_off);
2393 cfa_off -= 8;
2394 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2395 emit_move_insn (reg, alt_reg);
2396 }
2397
2398 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2399 {
2400 if (current_frame_info.reg_save_ar_lc != 0)
2401 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2402 else
2403 {
2404 alt_regno = next_scratch_gr_reg ();
2405 alt_reg = gen_rtx_REG (DImode, alt_regno);
2406 do_restore (gen_movdi_x, alt_reg, cfa_off);
2407 cfa_off -= 8;
2408 }
2409 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2410 emit_move_insn (reg, alt_reg);
2411 }
2412
2413 /* We should now be at the base of the gr/br/fr spill area. */
2414 if (cfa_off != (current_frame_info.spill_cfa_off
2415 + current_frame_info.spill_size))
2416 abort ();
2417
2418 /* Restore all general registers. */
2419 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2420 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2421 {
2422 reg = gen_rtx_REG (DImode, regno);
2423 do_restore (gen_gr_restore, reg, cfa_off);
2424 cfa_off -= 8;
2425 }
2426
2427 /* Restore the branch registers. Handle B0 specially, as it may
2428 have gotten stored in some GR register. */
2429 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2430 {
2431 if (current_frame_info.reg_save_b0 != 0)
2432 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2433 else
2434 {
2435 alt_regno = next_scratch_gr_reg ();
2436 alt_reg = gen_rtx_REG (DImode, alt_regno);
2437 do_restore (gen_movdi_x, alt_reg, cfa_off);
2438 cfa_off -= 8;
2439 }
2440 reg = gen_rtx_REG (DImode, BR_REG (0));
2441 emit_move_insn (reg, alt_reg);
2442 }
2443
2444 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2445 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2446 {
2447 alt_regno = next_scratch_gr_reg ();
2448 alt_reg = gen_rtx_REG (DImode, alt_regno);
2449 do_restore (gen_movdi_x, alt_reg, cfa_off);
2450 cfa_off -= 8;
2451 reg = gen_rtx_REG (DImode, regno);
2452 emit_move_insn (reg, alt_reg);
2453 }
2454
2455 /* Restore floating point registers. */
2456 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2457 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2458 {
2459 if (cfa_off & 15)
2460 abort ();
2461 reg = gen_rtx_REG (TFmode, regno);
2462 do_restore (gen_fr_restore_x, reg, cfa_off);
2463 cfa_off -= 16;
2464 }
2465
2466 /* Restore ar.unat for real. */
2467 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2468 {
2469 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2470 emit_move_insn (reg, ar_unat_save_reg);
2471 }
2472
2473 if (cfa_off != current_frame_info.spill_cfa_off)
2474 abort ();
2475
2476 finish_spill_pointers ();
2477
2478 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
2479 {
2480 /* ??? At this point we must generate a magic insn that appears to
2481 modify the spill iterators, the stack pointer, and the frame
2482 pointer. This would allow the most scheduling freedom. For now,
2483 just hard stop. */
2484 emit_insn (gen_blockage ());
2485 }
2486
2487 if (cfun->machine->ia64_eh_epilogue_sp)
2488 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
2489 else if (frame_pointer_needed)
2490 {
2491 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
2492 RTX_FRAME_RELATED_P (insn) = 1;
2493 }
2494 else if (current_frame_info.total_size)
2495 {
2496 rtx offset, frame_size_rtx;
2497
2498 frame_size_rtx = GEN_INT (current_frame_info.total_size);
2499 if (CONST_OK_FOR_I (current_frame_info.total_size))
2500 offset = frame_size_rtx;
2501 else
2502 {
2503 regno = next_scratch_gr_reg ();
2504 offset = gen_rtx_REG (DImode, regno);
2505 emit_move_insn (offset, frame_size_rtx);
2506 }
2507
2508 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
2509 offset));
2510
2511 RTX_FRAME_RELATED_P (insn) = 1;
2512 if (GET_CODE (offset) != CONST_INT)
2513 {
2514 REG_NOTES (insn)
2515 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2516 gen_rtx_SET (VOIDmode,
2517 stack_pointer_rtx,
2518 gen_rtx_PLUS (DImode,
2519 stack_pointer_rtx,
2520 frame_size_rtx)),
2521 REG_NOTES (insn));
2522 }
2523 }
2524
2525 if (cfun->machine->ia64_eh_epilogue_bsp)
2526 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
2527
2528 if (! sibcall_p)
2529 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
2530 else
2531 {
2532 int fp = GR_REG (2);
2533 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
2534 first available call clobbered register. If there was a frame_pointer
2535 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
2536 so we have to make sure we're using the string "r2" when emitting
2537 the register name for the assmbler. */
2538 if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
2539 fp = HARD_FRAME_POINTER_REGNUM;
2540
2541 /* We must emit an alloc to force the input registers to become output
2542 registers. Otherwise, if the callee tries to pass its parameters
2543 through to another call without an intervening alloc, then these
2544 values get lost. */
2545 /* ??? We don't need to preserve all input registers. We only need to
2546 preserve those input registers used as arguments to the sibling call.
2547 It is unclear how to compute that number here. */
2548 if (current_frame_info.n_input_regs != 0)
2549 emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
2550 GEN_INT (0), GEN_INT (0),
2551 GEN_INT (current_frame_info.n_input_regs),
2552 GEN_INT (0)));
2553 }
2554 }
2555
2556 /* Return 1 if br.ret can do all the work required to return from a
2557 function. */
2558
2559 int
2560 ia64_direct_return ()
2561 {
2562 if (reload_completed && ! frame_pointer_needed)
2563 {
2564 ia64_compute_frame_size (get_frame_size ());
2565
2566 return (current_frame_info.total_size == 0
2567 && current_frame_info.n_spilled == 0
2568 && current_frame_info.reg_save_b0 == 0
2569 && current_frame_info.reg_save_pr == 0
2570 && current_frame_info.reg_save_ar_pfs == 0
2571 && current_frame_info.reg_save_ar_unat == 0
2572 && current_frame_info.reg_save_ar_lc == 0);
2573 }
2574 return 0;
2575 }
2576
2577 int
2578 ia64_hard_regno_rename_ok (from, to)
2579 int from;
2580 int to;
2581 {
2582 /* Don't clobber any of the registers we reserved for the prologue. */
2583 if (to == current_frame_info.reg_fp
2584 || to == current_frame_info.reg_save_b0
2585 || to == current_frame_info.reg_save_pr
2586 || to == current_frame_info.reg_save_ar_pfs
2587 || to == current_frame_info.reg_save_ar_unat
2588 || to == current_frame_info.reg_save_ar_lc)
2589 return 0;
2590
2591 if (from == current_frame_info.reg_fp
2592 || from == current_frame_info.reg_save_b0
2593 || from == current_frame_info.reg_save_pr
2594 || from == current_frame_info.reg_save_ar_pfs
2595 || from == current_frame_info.reg_save_ar_unat
2596 || from == current_frame_info.reg_save_ar_lc)
2597 return 0;
2598
2599 /* Don't use output registers outside the register frame. */
2600 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
2601 return 0;
2602
2603 /* Retain even/oddness on predicate register pairs. */
2604 if (PR_REGNO_P (from) && PR_REGNO_P (to))
2605 return (from & 1) == (to & 1);
2606
2607 /* Reg 4 contains the saved gp; we can't reliably rename this. */
2608 if (from == GR_REG (4) && current_function_calls_setjmp)
2609 return 0;
2610
2611 return 1;
2612 }
2613
2614 /* Target hook for assembling integer objects. Handle word-sized
2615 aligned objects and detect the cases when @fptr is needed. */
2616
2617 static bool
2618 ia64_assemble_integer (x, size, aligned_p)
2619 rtx x;
2620 unsigned int size;
2621 int aligned_p;
2622 {
2623 if (size == UNITS_PER_WORD && aligned_p
2624 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
2625 && GET_CODE (x) == SYMBOL_REF
2626 && SYMBOL_REF_FLAG (x))
2627 {
2628 fputs ("\tdata8\t@fptr(", asm_out_file);
2629 output_addr_const (asm_out_file, x);
2630 fputs (")\n", asm_out_file);
2631 return true;
2632 }
2633 return default_assemble_integer (x, size, aligned_p);
2634 }
2635
2636 /* Emit the function prologue. */
2637
2638 static void
2639 ia64_output_function_prologue (file, size)
2640 FILE *file;
2641 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
2642 {
2643 int mask, grsave, grsave_prev;
2644
2645 if (current_frame_info.need_regstk)
2646 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
2647 current_frame_info.n_input_regs,
2648 current_frame_info.n_local_regs,
2649 current_frame_info.n_output_regs,
2650 current_frame_info.n_rotate_regs);
2651
2652 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
2653 return;
2654
2655 /* Emit the .prologue directive. */
2656
2657 mask = 0;
2658 grsave = grsave_prev = 0;
2659 if (current_frame_info.reg_save_b0 != 0)
2660 {
2661 mask |= 8;
2662 grsave = grsave_prev = current_frame_info.reg_save_b0;
2663 }
2664 if (current_frame_info.reg_save_ar_pfs != 0
2665 && (grsave_prev == 0
2666 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
2667 {
2668 mask |= 4;
2669 if (grsave_prev == 0)
2670 grsave = current_frame_info.reg_save_ar_pfs;
2671 grsave_prev = current_frame_info.reg_save_ar_pfs;
2672 }
2673 if (current_frame_info.reg_fp != 0
2674 && (grsave_prev == 0
2675 || current_frame_info.reg_fp == grsave_prev + 1))
2676 {
2677 mask |= 2;
2678 if (grsave_prev == 0)
2679 grsave = HARD_FRAME_POINTER_REGNUM;
2680 grsave_prev = current_frame_info.reg_fp;
2681 }
2682 if (current_frame_info.reg_save_pr != 0
2683 && (grsave_prev == 0
2684 || current_frame_info.reg_save_pr == grsave_prev + 1))
2685 {
2686 mask |= 1;
2687 if (grsave_prev == 0)
2688 grsave = current_frame_info.reg_save_pr;
2689 }
2690
2691 if (mask)
2692 fprintf (file, "\t.prologue %d, %d\n", mask,
2693 ia64_dbx_register_number (grsave));
2694 else
2695 fputs ("\t.prologue\n", file);
2696
2697 /* Emit a .spill directive, if necessary, to relocate the base of
2698 the register spill area. */
2699 if (current_frame_info.spill_cfa_off != -16)
2700 fprintf (file, "\t.spill %ld\n",
2701 (long) (current_frame_info.spill_cfa_off
2702 + current_frame_info.spill_size));
2703 }
2704
2705 /* Emit the .body directive at the scheduled end of the prologue. */
2706
2707 static void
2708 ia64_output_function_end_prologue (file)
2709 FILE *file;
2710 {
2711 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
2712 return;
2713
2714 fputs ("\t.body\n", file);
2715 }
2716
2717 /* Emit the function epilogue. */
2718
2719 static void
2720 ia64_output_function_epilogue (file, size)
2721 FILE *file ATTRIBUTE_UNUSED;
2722 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
2723 {
2724 int i;
2725
2726 /* Reset from the function's potential modifications. */
2727 XINT (return_address_pointer_rtx, 0) = RETURN_ADDRESS_POINTER_REGNUM;
2728
2729 if (current_frame_info.reg_fp)
2730 {
2731 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2732 reg_names[HARD_FRAME_POINTER_REGNUM]
2733 = reg_names[current_frame_info.reg_fp];
2734 reg_names[current_frame_info.reg_fp] = tmp;
2735 }
2736 if (! TARGET_REG_NAMES)
2737 {
2738 for (i = 0; i < current_frame_info.n_input_regs; i++)
2739 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
2740 for (i = 0; i < current_frame_info.n_local_regs; i++)
2741 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
2742 for (i = 0; i < current_frame_info.n_output_regs; i++)
2743 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
2744 }
2745
2746 current_frame_info.initialized = 0;
2747 }
2748
2749 int
2750 ia64_dbx_register_number (regno)
2751 int regno;
2752 {
2753 /* In ia64_expand_prologue we quite literally renamed the frame pointer
2754 from its home at loc79 to something inside the register frame. We
2755 must perform the same renumbering here for the debug info. */
2756 if (current_frame_info.reg_fp)
2757 {
2758 if (regno == HARD_FRAME_POINTER_REGNUM)
2759 regno = current_frame_info.reg_fp;
2760 else if (regno == current_frame_info.reg_fp)
2761 regno = HARD_FRAME_POINTER_REGNUM;
2762 }
2763
2764 if (IN_REGNO_P (regno))
2765 return 32 + regno - IN_REG (0);
2766 else if (LOC_REGNO_P (regno))
2767 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
2768 else if (OUT_REGNO_P (regno))
2769 return (32 + current_frame_info.n_input_regs
2770 + current_frame_info.n_local_regs + regno - OUT_REG (0));
2771 else
2772 return regno;
2773 }
2774
2775 void
2776 ia64_initialize_trampoline (addr, fnaddr, static_chain)
2777 rtx addr, fnaddr, static_chain;
2778 {
2779 rtx addr_reg, eight = GEN_INT (8);
2780
2781 /* Load up our iterator. */
2782 addr_reg = gen_reg_rtx (Pmode);
2783 emit_move_insn (addr_reg, addr);
2784
2785 /* The first two words are the fake descriptor:
2786 __ia64_trampoline, ADDR+16. */
2787 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2788 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
2789 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2790
2791 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2792 copy_to_reg (plus_constant (addr, 16)));
2793 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2794
2795 /* The third word is the target descriptor. */
2796 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
2797 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2798
2799 /* The fourth word is the static chain. */
2800 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
2801 }
2802 \f
2803 /* Do any needed setup for a variadic function. CUM has not been updated
2804 for the last named argument which has type TYPE and mode MODE.
2805
2806 We generate the actual spill instructions during prologue generation. */
2807
2808 void
2809 ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time)
2810 CUMULATIVE_ARGS cum;
2811 int int_mode;
2812 tree type;
2813 int * pretend_size;
2814 int second_time ATTRIBUTE_UNUSED;
2815 {
2816 /* If this is a stdarg function, then skip the current argument. */
2817 if (! current_function_varargs)
2818 ia64_function_arg_advance (&cum, int_mode, type, 1);
2819
2820 if (cum.words < MAX_ARGUMENT_SLOTS)
2821 {
2822 int n = MAX_ARGUMENT_SLOTS - cum.words;
2823 *pretend_size = n * UNITS_PER_WORD;
2824 cfun->machine->n_varargs = n;
2825 }
2826 }
2827
2828 /* Check whether TYPE is a homogeneous floating point aggregate. If
2829 it is, return the mode of the floating point type that appears
2830 in all leafs. If it is not, return VOIDmode.
2831
2832 An aggregate is a homogeneous floating point aggregate is if all
2833 fields/elements in it have the same floating point type (e.g,
2834 SFmode). 128-bit quad-precision floats are excluded. */
2835
2836 static enum machine_mode
2837 hfa_element_mode (type, nested)
2838 tree type;
2839 int nested;
2840 {
2841 enum machine_mode element_mode = VOIDmode;
2842 enum machine_mode mode;
2843 enum tree_code code = TREE_CODE (type);
2844 int know_element_mode = 0;
2845 tree t;
2846
2847 switch (code)
2848 {
2849 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
2850 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
2851 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
2852 case FILE_TYPE: case SET_TYPE: case LANG_TYPE:
2853 case FUNCTION_TYPE:
2854 return VOIDmode;
2855
2856 /* Fortran complex types are supposed to be HFAs, so we need to handle
2857 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
2858 types though. */
2859 case COMPLEX_TYPE:
2860 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT)
2861 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type))
2862 * BITS_PER_UNIT, MODE_FLOAT, 0);
2863 else
2864 return VOIDmode;
2865
2866 case REAL_TYPE:
2867 /* ??? Should exclude 128-bit long double here. */
2868 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
2869 mode if this is contained within an aggregate. */
2870 if (nested)
2871 return TYPE_MODE (type);
2872 else
2873 return VOIDmode;
2874
2875 case ARRAY_TYPE:
2876 return TYPE_MODE (TREE_TYPE (type));
2877
2878 case RECORD_TYPE:
2879 case UNION_TYPE:
2880 case QUAL_UNION_TYPE:
2881 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
2882 {
2883 if (TREE_CODE (t) != FIELD_DECL)
2884 continue;
2885
2886 mode = hfa_element_mode (TREE_TYPE (t), 1);
2887 if (know_element_mode)
2888 {
2889 if (mode != element_mode)
2890 return VOIDmode;
2891 }
2892 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
2893 return VOIDmode;
2894 else
2895 {
2896 know_element_mode = 1;
2897 element_mode = mode;
2898 }
2899 }
2900 return element_mode;
2901
2902 default:
2903 /* If we reach here, we probably have some front-end specific type
2904 that the backend doesn't know about. This can happen via the
2905 aggregate_value_p call in init_function_start. All we can do is
2906 ignore unknown tree types. */
2907 return VOIDmode;
2908 }
2909
2910 return VOIDmode;
2911 }
2912
2913 /* Return rtx for register where argument is passed, or zero if it is passed
2914 on the stack. */
2915
2916 /* ??? 128-bit quad-precision floats are always passed in general
2917 registers. */
2918
2919 rtx
2920 ia64_function_arg (cum, mode, type, named, incoming)
2921 CUMULATIVE_ARGS *cum;
2922 enum machine_mode mode;
2923 tree type;
2924 int named;
2925 int incoming;
2926 {
2927 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
2928 int words = (((mode == BLKmode ? int_size_in_bytes (type)
2929 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
2930 / UNITS_PER_WORD);
2931 int offset = 0;
2932 enum machine_mode hfa_mode = VOIDmode;
2933
2934 /* Integer and float arguments larger than 8 bytes start at the next even
2935 boundary. Aggregates larger than 8 bytes start at the next even boundary
2936 if the aggregate has 16 byte alignment. Net effect is that types with
2937 alignment greater than 8 start at the next even boundary. */
2938 /* ??? The ABI does not specify how to handle aggregates with alignment from
2939 9 to 15 bytes, or greater than 16. We handle them all as if they had
2940 16 byte alignment. Such aggregates can occur only if gcc extensions are
2941 used. */
2942 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
2943 : (words > 1))
2944 && (cum->words & 1))
2945 offset = 1;
2946
2947 /* If all argument slots are used, then it must go on the stack. */
2948 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
2949 return 0;
2950
2951 /* Check for and handle homogeneous FP aggregates. */
2952 if (type)
2953 hfa_mode = hfa_element_mode (type, 0);
2954
2955 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
2956 and unprototyped hfas are passed specially. */
2957 if (hfa_mode != VOIDmode && (! cum->prototype || named))
2958 {
2959 rtx loc[16];
2960 int i = 0;
2961 int fp_regs = cum->fp_regs;
2962 int int_regs = cum->words + offset;
2963 int hfa_size = GET_MODE_SIZE (hfa_mode);
2964 int byte_size;
2965 int args_byte_size;
2966
2967 /* If prototyped, pass it in FR regs then GR regs.
2968 If not prototyped, pass it in both FR and GR regs.
2969
2970 If this is an SFmode aggregate, then it is possible to run out of
2971 FR regs while GR regs are still left. In that case, we pass the
2972 remaining part in the GR regs. */
2973
2974 /* Fill the FP regs. We do this always. We stop if we reach the end
2975 of the argument, the last FP register, or the last argument slot. */
2976
2977 byte_size = ((mode == BLKmode)
2978 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2979 args_byte_size = int_regs * UNITS_PER_WORD;
2980 offset = 0;
2981 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
2982 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
2983 {
2984 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
2985 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
2986 + fp_regs)),
2987 GEN_INT (offset));
2988 offset += hfa_size;
2989 args_byte_size += hfa_size;
2990 fp_regs++;
2991 }
2992
2993 /* If no prototype, then the whole thing must go in GR regs. */
2994 if (! cum->prototype)
2995 offset = 0;
2996 /* If this is an SFmode aggregate, then we might have some left over
2997 that needs to go in GR regs. */
2998 else if (byte_size != offset)
2999 int_regs += offset / UNITS_PER_WORD;
3000
3001 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
3002
3003 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
3004 {
3005 enum machine_mode gr_mode = DImode;
3006
3007 /* If we have an odd 4 byte hunk because we ran out of FR regs,
3008 then this goes in a GR reg left adjusted/little endian, right
3009 adjusted/big endian. */
3010 /* ??? Currently this is handled wrong, because 4-byte hunks are
3011 always right adjusted/little endian. */
3012 if (offset & 0x4)
3013 gr_mode = SImode;
3014 /* If we have an even 4 byte hunk because the aggregate is a
3015 multiple of 4 bytes in size, then this goes in a GR reg right
3016 adjusted/little endian. */
3017 else if (byte_size - offset == 4)
3018 gr_mode = SImode;
3019 /* Complex floats need to have float mode. */
3020 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3021 gr_mode = hfa_mode;
3022
3023 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3024 gen_rtx_REG (gr_mode, (basereg
3025 + int_regs)),
3026 GEN_INT (offset));
3027 offset += GET_MODE_SIZE (gr_mode);
3028 int_regs += GET_MODE_SIZE (gr_mode) <= UNITS_PER_WORD
3029 ? 1 : GET_MODE_SIZE (gr_mode) / UNITS_PER_WORD;
3030 }
3031
3032 /* If we ended up using just one location, just return that one loc. */
3033 if (i == 1)
3034 return XEXP (loc[0], 0);
3035 else
3036 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3037 }
3038
3039 /* Integral and aggregates go in general registers. If we have run out of
3040 FR registers, then FP values must also go in general registers. This can
3041 happen when we have a SFmode HFA. */
3042 else if (((mode == TFmode) && ! INTEL_EXTENDED_IEEE_FORMAT)
3043 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3044 return gen_rtx_REG (mode, basereg + cum->words + offset);
3045
3046 /* If there is a prototype, then FP values go in a FR register when
3047 named, and in a GR registeer when unnamed. */
3048 else if (cum->prototype)
3049 {
3050 if (! named)
3051 return gen_rtx_REG (mode, basereg + cum->words + offset);
3052 else
3053 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
3054 }
3055 /* If there is no prototype, then FP values go in both FR and GR
3056 registers. */
3057 else
3058 {
3059 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
3060 gen_rtx_REG (mode, (FR_ARG_FIRST
3061 + cum->fp_regs)),
3062 const0_rtx);
3063 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3064 gen_rtx_REG (mode,
3065 (basereg + cum->words
3066 + offset)),
3067 const0_rtx);
3068
3069 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
3070 }
3071 }
3072
3073 /* Return number of words, at the beginning of the argument, that must be
3074 put in registers. 0 is the argument is entirely in registers or entirely
3075 in memory. */
3076
3077 int
3078 ia64_function_arg_partial_nregs (cum, mode, type, named)
3079 CUMULATIVE_ARGS *cum;
3080 enum machine_mode mode;
3081 tree type;
3082 int named ATTRIBUTE_UNUSED;
3083 {
3084 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3085 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3086 / UNITS_PER_WORD);
3087 int offset = 0;
3088
3089 /* Arguments with alignment larger than 8 bytes start at the next even
3090 boundary. */
3091 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3092 : (words > 1))
3093 && (cum->words & 1))
3094 offset = 1;
3095
3096 /* If all argument slots are used, then it must go on the stack. */
3097 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3098 return 0;
3099
3100 /* It doesn't matter whether the argument goes in FR or GR regs. If
3101 it fits within the 8 argument slots, then it goes entirely in
3102 registers. If it extends past the last argument slot, then the rest
3103 goes on the stack. */
3104
3105 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
3106 return 0;
3107
3108 return MAX_ARGUMENT_SLOTS - cum->words - offset;
3109 }
3110
3111 /* Update CUM to point after this argument. This is patterned after
3112 ia64_function_arg. */
3113
3114 void
3115 ia64_function_arg_advance (cum, mode, type, named)
3116 CUMULATIVE_ARGS *cum;
3117 enum machine_mode mode;
3118 tree type;
3119 int named;
3120 {
3121 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3122 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3123 / UNITS_PER_WORD);
3124 int offset = 0;
3125 enum machine_mode hfa_mode = VOIDmode;
3126
3127 /* If all arg slots are already full, then there is nothing to do. */
3128 if (cum->words >= MAX_ARGUMENT_SLOTS)
3129 return;
3130
3131 /* Arguments with alignment larger than 8 bytes start at the next even
3132 boundary. */
3133 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3134 : (words > 1))
3135 && (cum->words & 1))
3136 offset = 1;
3137
3138 cum->words += words + offset;
3139
3140 /* Check for and handle homogeneous FP aggregates. */
3141 if (type)
3142 hfa_mode = hfa_element_mode (type, 0);
3143
3144 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3145 and unprototyped hfas are passed specially. */
3146 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3147 {
3148 int fp_regs = cum->fp_regs;
3149 /* This is the original value of cum->words + offset. */
3150 int int_regs = cum->words - words;
3151 int hfa_size = GET_MODE_SIZE (hfa_mode);
3152 int byte_size;
3153 int args_byte_size;
3154
3155 /* If prototyped, pass it in FR regs then GR regs.
3156 If not prototyped, pass it in both FR and GR regs.
3157
3158 If this is an SFmode aggregate, then it is possible to run out of
3159 FR regs while GR regs are still left. In that case, we pass the
3160 remaining part in the GR regs. */
3161
3162 /* Fill the FP regs. We do this always. We stop if we reach the end
3163 of the argument, the last FP register, or the last argument slot. */
3164
3165 byte_size = ((mode == BLKmode)
3166 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3167 args_byte_size = int_regs * UNITS_PER_WORD;
3168 offset = 0;
3169 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3170 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3171 {
3172 offset += hfa_size;
3173 args_byte_size += hfa_size;
3174 fp_regs++;
3175 }
3176
3177 cum->fp_regs = fp_regs;
3178 }
3179
3180 /* Integral and aggregates go in general registers. If we have run out of
3181 FR registers, then FP values must also go in general registers. This can
3182 happen when we have a SFmode HFA. */
3183 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
3184 return;
3185
3186 /* If there is a prototype, then FP values go in a FR register when
3187 named, and in a GR registeer when unnamed. */
3188 else if (cum->prototype)
3189 {
3190 if (! named)
3191 return;
3192 else
3193 /* ??? Complex types should not reach here. */
3194 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3195 }
3196 /* If there is no prototype, then FP values go in both FR and GR
3197 registers. */
3198 else
3199 /* ??? Complex types should not reach here. */
3200 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3201
3202 return;
3203 }
3204 \f
3205 /* Implement va_start. */
3206
3207 void
3208 ia64_va_start (stdarg_p, valist, nextarg)
3209 int stdarg_p;
3210 tree valist;
3211 rtx nextarg;
3212 {
3213 int arg_words;
3214 int ofs;
3215
3216 arg_words = current_function_args_info.words;
3217
3218 if (stdarg_p)
3219 ofs = 0;
3220 else
3221 ofs = (arg_words >= MAX_ARGUMENT_SLOTS ? -UNITS_PER_WORD : 0);
3222
3223 nextarg = plus_constant (nextarg, ofs);
3224 std_expand_builtin_va_start (1, valist, nextarg);
3225 }
3226
3227 /* Implement va_arg. */
3228
3229 rtx
3230 ia64_va_arg (valist, type)
3231 tree valist, type;
3232 {
3233 tree t;
3234
3235 /* Arguments with alignment larger than 8 bytes start at the next even
3236 boundary. */
3237 if (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3238 {
3239 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3240 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
3241 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3242 build_int_2 (-2 * UNITS_PER_WORD, -1));
3243 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
3244 TREE_SIDE_EFFECTS (t) = 1;
3245 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3246 }
3247
3248 return std_expand_builtin_va_arg (valist, type);
3249 }
3250 \f
3251 /* Return 1 if function return value returned in memory. Return 0 if it is
3252 in a register. */
3253
3254 int
3255 ia64_return_in_memory (valtype)
3256 tree valtype;
3257 {
3258 enum machine_mode mode;
3259 enum machine_mode hfa_mode;
3260 HOST_WIDE_INT byte_size;
3261
3262 mode = TYPE_MODE (valtype);
3263 byte_size = GET_MODE_SIZE (mode);
3264 if (mode == BLKmode)
3265 {
3266 byte_size = int_size_in_bytes (valtype);
3267 if (byte_size < 0)
3268 return 1;
3269 }
3270
3271 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3272
3273 hfa_mode = hfa_element_mode (valtype, 0);
3274 if (hfa_mode != VOIDmode)
3275 {
3276 int hfa_size = GET_MODE_SIZE (hfa_mode);
3277
3278 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
3279 return 1;
3280 else
3281 return 0;
3282 }
3283 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
3284 return 1;
3285 else
3286 return 0;
3287 }
3288
3289 /* Return rtx for register that holds the function return value. */
3290
3291 rtx
3292 ia64_function_value (valtype, func)
3293 tree valtype;
3294 tree func ATTRIBUTE_UNUSED;
3295 {
3296 enum machine_mode mode;
3297 enum machine_mode hfa_mode;
3298
3299 mode = TYPE_MODE (valtype);
3300 hfa_mode = hfa_element_mode (valtype, 0);
3301
3302 if (hfa_mode != VOIDmode)
3303 {
3304 rtx loc[8];
3305 int i;
3306 int hfa_size;
3307 int byte_size;
3308 int offset;
3309
3310 hfa_size = GET_MODE_SIZE (hfa_mode);
3311 byte_size = ((mode == BLKmode)
3312 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3313 offset = 0;
3314 for (i = 0; offset < byte_size; i++)
3315 {
3316 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3317 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
3318 GEN_INT (offset));
3319 offset += hfa_size;
3320 }
3321
3322 if (i == 1)
3323 return XEXP (loc[0], 0);
3324 else
3325 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3326 }
3327 else if (FLOAT_TYPE_P (valtype) &&
3328 ((mode != TFmode) || INTEL_EXTENDED_IEEE_FORMAT))
3329 return gen_rtx_REG (mode, FR_ARG_FIRST);
3330 else
3331 return gen_rtx_REG (mode, GR_RET_FIRST);
3332 }
3333
3334 /* Print a memory address as an operand to reference that memory location. */
3335
3336 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3337 also call this from ia64_print_operand for memory addresses. */
3338
3339 void
3340 ia64_print_operand_address (stream, address)
3341 FILE * stream ATTRIBUTE_UNUSED;
3342 rtx address ATTRIBUTE_UNUSED;
3343 {
3344 }
3345
3346 /* Print an operand to an assembler instruction.
3347 C Swap and print a comparison operator.
3348 D Print an FP comparison operator.
3349 E Print 32 - constant, for SImode shifts as extract.
3350 e Print 64 - constant, for DImode rotates.
3351 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3352 a floating point register emitted normally.
3353 I Invert a predicate register by adding 1.
3354 J Select the proper predicate register for a condition.
3355 j Select the inverse predicate register for a condition.
3356 O Append .acq for volatile load.
3357 P Postincrement of a MEM.
3358 Q Append .rel for volatile store.
3359 S Shift amount for shladd instruction.
3360 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3361 for Intel assembler.
3362 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3363 for Intel assembler.
3364 r Print register name, or constant 0 as r0. HP compatibility for
3365 Linux kernel. */
3366 void
3367 ia64_print_operand (file, x, code)
3368 FILE * file;
3369 rtx x;
3370 int code;
3371 {
3372 const char *str;
3373
3374 switch (code)
3375 {
3376 case 0:
3377 /* Handled below. */
3378 break;
3379
3380 case 'C':
3381 {
3382 enum rtx_code c = swap_condition (GET_CODE (x));
3383 fputs (GET_RTX_NAME (c), file);
3384 return;
3385 }
3386
3387 case 'D':
3388 switch (GET_CODE (x))
3389 {
3390 case NE:
3391 str = "neq";
3392 break;
3393 case UNORDERED:
3394 str = "unord";
3395 break;
3396 case ORDERED:
3397 str = "ord";
3398 break;
3399 default:
3400 str = GET_RTX_NAME (GET_CODE (x));
3401 break;
3402 }
3403 fputs (str, file);
3404 return;
3405
3406 case 'E':
3407 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
3408 return;
3409
3410 case 'e':
3411 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
3412 return;
3413
3414 case 'F':
3415 if (x == CONST0_RTX (GET_MODE (x)))
3416 str = reg_names [FR_REG (0)];
3417 else if (x == CONST1_RTX (GET_MODE (x)))
3418 str = reg_names [FR_REG (1)];
3419 else if (GET_CODE (x) == REG)
3420 str = reg_names [REGNO (x)];
3421 else
3422 abort ();
3423 fputs (str, file);
3424 return;
3425
3426 case 'I':
3427 fputs (reg_names [REGNO (x) + 1], file);
3428 return;
3429
3430 case 'J':
3431 case 'j':
3432 {
3433 unsigned int regno = REGNO (XEXP (x, 0));
3434 if (GET_CODE (x) == EQ)
3435 regno += 1;
3436 if (code == 'j')
3437 regno ^= 1;
3438 fputs (reg_names [regno], file);
3439 }
3440 return;
3441
3442 case 'O':
3443 if (MEM_VOLATILE_P (x))
3444 fputs(".acq", file);
3445 return;
3446
3447 case 'P':
3448 {
3449 HOST_WIDE_INT value;
3450
3451 switch (GET_CODE (XEXP (x, 0)))
3452 {
3453 default:
3454 return;
3455
3456 case POST_MODIFY:
3457 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
3458 if (GET_CODE (x) == CONST_INT)
3459 value = INTVAL (x);
3460 else if (GET_CODE (x) == REG)
3461 {
3462 fprintf (file, ", %s", reg_names[REGNO (x)]);
3463 return;
3464 }
3465 else
3466 abort ();
3467 break;
3468
3469 case POST_INC:
3470 value = GET_MODE_SIZE (GET_MODE (x));
3471 break;
3472
3473 case POST_DEC:
3474 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
3475 break;
3476 }
3477
3478 putc (',', file);
3479 putc (' ', file);
3480 fprintf (file, HOST_WIDE_INT_PRINT_DEC, value);
3481 return;
3482 }
3483
3484 case 'Q':
3485 if (MEM_VOLATILE_P (x))
3486 fputs(".rel", file);
3487 return;
3488
3489 case 'S':
3490 fprintf (file, "%d", exact_log2 (INTVAL (x)));
3491 return;
3492
3493 case 'T':
3494 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3495 {
3496 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
3497 return;
3498 }
3499 break;
3500
3501 case 'U':
3502 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3503 {
3504 const char *prefix = "0x";
3505 if (INTVAL (x) & 0x80000000)
3506 {
3507 fprintf (file, "0xffffffff");
3508 prefix = "";
3509 }
3510 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
3511 return;
3512 }
3513 break;
3514
3515 case 'r':
3516 /* If this operand is the constant zero, write it as register zero.
3517 Any register, zero, or CONST_INT value is OK here. */
3518 if (GET_CODE (x) == REG)
3519 fputs (reg_names[REGNO (x)], file);
3520 else if (x == CONST0_RTX (GET_MODE (x)))
3521 fputs ("r0", file);
3522 else if (GET_CODE (x) == CONST_INT)
3523 output_addr_const (file, x);
3524 else
3525 output_operand_lossage ("invalid %%r value");
3526 return;
3527
3528 case '+':
3529 {
3530 const char *which;
3531
3532 /* For conditional branches, returns or calls, substitute
3533 sptk, dptk, dpnt, or spnt for %s. */
3534 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
3535 if (x)
3536 {
3537 int pred_val = INTVAL (XEXP (x, 0));
3538
3539 /* Guess top and bottom 10% statically predicted. */
3540 if (pred_val < REG_BR_PROB_BASE / 50)
3541 which = ".spnt";
3542 else if (pred_val < REG_BR_PROB_BASE / 2)
3543 which = ".dpnt";
3544 else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
3545 which = ".dptk";
3546 else
3547 which = ".sptk";
3548 }
3549 else if (GET_CODE (current_output_insn) == CALL_INSN)
3550 which = ".sptk";
3551 else
3552 which = ".dptk";
3553
3554 fputs (which, file);
3555 return;
3556 }
3557
3558 case ',':
3559 x = current_insn_predicate;
3560 if (x)
3561 {
3562 unsigned int regno = REGNO (XEXP (x, 0));
3563 if (GET_CODE (x) == EQ)
3564 regno += 1;
3565 fprintf (file, "(%s) ", reg_names [regno]);
3566 }
3567 return;
3568
3569 default:
3570 output_operand_lossage ("ia64_print_operand: unknown code");
3571 return;
3572 }
3573
3574 switch (GET_CODE (x))
3575 {
3576 /* This happens for the spill/restore instructions. */
3577 case POST_INC:
3578 case POST_DEC:
3579 case POST_MODIFY:
3580 x = XEXP (x, 0);
3581 /* ... fall through ... */
3582
3583 case REG:
3584 fputs (reg_names [REGNO (x)], file);
3585 break;
3586
3587 case MEM:
3588 {
3589 rtx addr = XEXP (x, 0);
3590 if (GET_RTX_CLASS (GET_CODE (addr)) == 'a')
3591 addr = XEXP (addr, 0);
3592 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
3593 break;
3594 }
3595
3596 default:
3597 output_addr_const (file, x);
3598 break;
3599 }
3600
3601 return;
3602 }
3603 \f
3604 /* Calulate the cost of moving data from a register in class FROM to
3605 one in class TO, using MODE. */
3606
3607 int
3608 ia64_register_move_cost (mode, from, to)
3609 enum machine_mode mode;
3610 enum reg_class from, to;
3611 {
3612 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
3613 if (to == ADDL_REGS)
3614 to = GR_REGS;
3615 if (from == ADDL_REGS)
3616 from = GR_REGS;
3617
3618 /* All costs are symmetric, so reduce cases by putting the
3619 lower number class as the destination. */
3620 if (from < to)
3621 {
3622 enum reg_class tmp = to;
3623 to = from, from = tmp;
3624 }
3625
3626 /* Moving from FR<->GR in TFmode must be more expensive than 2,
3627 so that we get secondary memory reloads. Between FR_REGS,
3628 we have to make this at least as expensive as MEMORY_MOVE_COST
3629 to avoid spectacularly poor register class preferencing. */
3630 if (mode == TFmode)
3631 {
3632 if (to != GR_REGS || from != GR_REGS)
3633 return MEMORY_MOVE_COST (mode, to, 0);
3634 else
3635 return 3;
3636 }
3637
3638 switch (to)
3639 {
3640 case PR_REGS:
3641 /* Moving between PR registers takes two insns. */
3642 if (from == PR_REGS)
3643 return 3;
3644 /* Moving between PR and anything but GR is impossible. */
3645 if (from != GR_REGS)
3646 return MEMORY_MOVE_COST (mode, to, 0);
3647 break;
3648
3649 case BR_REGS:
3650 /* Moving between BR and anything but GR is impossible. */
3651 if (from != GR_REGS && from != GR_AND_BR_REGS)
3652 return MEMORY_MOVE_COST (mode, to, 0);
3653 break;
3654
3655 case AR_I_REGS:
3656 case AR_M_REGS:
3657 /* Moving between AR and anything but GR is impossible. */
3658 if (from != GR_REGS)
3659 return MEMORY_MOVE_COST (mode, to, 0);
3660 break;
3661
3662 case GR_REGS:
3663 case FR_REGS:
3664 case GR_AND_FR_REGS:
3665 case GR_AND_BR_REGS:
3666 case ALL_REGS:
3667 break;
3668
3669 default:
3670 abort ();
3671 }
3672
3673 return 2;
3674 }
3675
3676 /* This function returns the register class required for a secondary
3677 register when copying between one of the registers in CLASS, and X,
3678 using MODE. A return value of NO_REGS means that no secondary register
3679 is required. */
3680
3681 enum reg_class
3682 ia64_secondary_reload_class (class, mode, x)
3683 enum reg_class class;
3684 enum machine_mode mode ATTRIBUTE_UNUSED;
3685 rtx x;
3686 {
3687 int regno = -1;
3688
3689 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3690 regno = true_regnum (x);
3691
3692 switch (class)
3693 {
3694 case BR_REGS:
3695 case AR_M_REGS:
3696 case AR_I_REGS:
3697 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
3698 interaction. We end up with two pseudos with overlapping lifetimes
3699 both of which are equiv to the same constant, and both which need
3700 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
3701 changes depending on the path length, which means the qty_first_reg
3702 check in make_regs_eqv can give different answers at different times.
3703 At some point I'll probably need a reload_indi pattern to handle
3704 this.
3705
3706 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
3707 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
3708 non-general registers for good measure. */
3709 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
3710 return GR_REGS;
3711
3712 /* This is needed if a pseudo used as a call_operand gets spilled to a
3713 stack slot. */
3714 if (GET_CODE (x) == MEM)
3715 return GR_REGS;
3716 break;
3717
3718 case FR_REGS:
3719 /* Need to go through general regsters to get to other class regs. */
3720 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
3721 return GR_REGS;
3722
3723 /* This can happen when a paradoxical subreg is an operand to the
3724 muldi3 pattern. */
3725 /* ??? This shouldn't be necessary after instruction scheduling is
3726 enabled, because paradoxical subregs are not accepted by
3727 register_operand when INSN_SCHEDULING is defined. Or alternatively,
3728 stop the paradoxical subreg stupidity in the *_operand functions
3729 in recog.c. */
3730 if (GET_CODE (x) == MEM
3731 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
3732 || GET_MODE (x) == QImode))
3733 return GR_REGS;
3734
3735 /* This can happen because of the ior/and/etc patterns that accept FP
3736 registers as operands. If the third operand is a constant, then it
3737 needs to be reloaded into a FP register. */
3738 if (GET_CODE (x) == CONST_INT)
3739 return GR_REGS;
3740
3741 /* This can happen because of register elimination in a muldi3 insn.
3742 E.g. `26107 * (unsigned long)&u'. */
3743 if (GET_CODE (x) == PLUS)
3744 return GR_REGS;
3745 break;
3746
3747 case PR_REGS:
3748 /* ??? This happens if we cse/gcse a BImode value across a call,
3749 and the function has a nonlocal goto. This is because global
3750 does not allocate call crossing pseudos to hard registers when
3751 current_function_has_nonlocal_goto is true. This is relatively
3752 common for C++ programs that use exceptions. To reproduce,
3753 return NO_REGS and compile libstdc++. */
3754 if (GET_CODE (x) == MEM)
3755 return GR_REGS;
3756
3757 /* This can happen when we take a BImode subreg of a DImode value,
3758 and that DImode value winds up in some non-GR register. */
3759 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
3760 return GR_REGS;
3761 break;
3762
3763 case GR_REGS:
3764 /* Since we have no offsettable memory addresses, we need a temporary
3765 to hold the address of the second word. */
3766 if (mode == TImode)
3767 return GR_REGS;
3768 break;
3769
3770 default:
3771 break;
3772 }
3773
3774 return NO_REGS;
3775 }
3776
3777 \f
3778 /* Emit text to declare externally defined variables and functions, because
3779 the Intel assembler does not support undefined externals. */
3780
3781 void
3782 ia64_asm_output_external (file, decl, name)
3783 FILE *file;
3784 tree decl;
3785 const char *name;
3786 {
3787 int save_referenced;
3788
3789 /* GNU as does not need anything here. */
3790 if (TARGET_GNU_AS)
3791 return;
3792
3793 /* ??? The Intel assembler creates a reference that needs to be satisfied by
3794 the linker when we do this, so we need to be careful not to do this for
3795 builtin functions which have no library equivalent. Unfortunately, we
3796 can't tell here whether or not a function will actually be called by
3797 expand_expr, so we pull in library functions even if we may not need
3798 them later. */
3799 if (! strcmp (name, "__builtin_next_arg")
3800 || ! strcmp (name, "alloca")
3801 || ! strcmp (name, "__builtin_constant_p")
3802 || ! strcmp (name, "__builtin_args_info"))
3803 return;
3804
3805 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
3806 restore it. */
3807 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
3808 if (TREE_CODE (decl) == FUNCTION_DECL)
3809 {
3810 fprintf (file, "%s", TYPE_ASM_OP);
3811 assemble_name (file, name);
3812 putc (',', file);
3813 fprintf (file, TYPE_OPERAND_FMT, "function");
3814 putc ('\n', file);
3815 }
3816 ASM_GLOBALIZE_LABEL (file, name);
3817 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
3818 }
3819 \f
3820 /* Parse the -mfixed-range= option string. */
3821
3822 static void
3823 fix_range (const_str)
3824 const char *const_str;
3825 {
3826 int i, first, last;
3827 char *str, *dash, *comma;
3828
3829 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3830 REG2 are either register names or register numbers. The effect
3831 of this option is to mark the registers in the range from REG1 to
3832 REG2 as ``fixed'' so they won't be used by the compiler. This is
3833 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
3834
3835 i = strlen (const_str);
3836 str = (char *) alloca (i + 1);
3837 memcpy (str, const_str, i + 1);
3838
3839 while (1)
3840 {
3841 dash = strchr (str, '-');
3842 if (!dash)
3843 {
3844 warning ("value of -mfixed-range must have form REG1-REG2");
3845 return;
3846 }
3847 *dash = '\0';
3848
3849 comma = strchr (dash + 1, ',');
3850 if (comma)
3851 *comma = '\0';
3852
3853 first = decode_reg_name (str);
3854 if (first < 0)
3855 {
3856 warning ("unknown register name: %s", str);
3857 return;
3858 }
3859
3860 last = decode_reg_name (dash + 1);
3861 if (last < 0)
3862 {
3863 warning ("unknown register name: %s", dash + 1);
3864 return;
3865 }
3866
3867 *dash = '-';
3868
3869 if (first > last)
3870 {
3871 warning ("%s-%s is an empty range", str, dash + 1);
3872 return;
3873 }
3874
3875 for (i = first; i <= last; ++i)
3876 fixed_regs[i] = call_used_regs[i] = 1;
3877
3878 if (!comma)
3879 break;
3880
3881 *comma = ',';
3882 str = comma + 1;
3883 }
3884 }
3885
3886 /* Called to register all of our global variables with the garbage
3887 collector. */
3888
3889 static void
3890 ia64_add_gc_roots ()
3891 {
3892 ggc_add_rtx_root (&ia64_compare_op0, 1);
3893 ggc_add_rtx_root (&ia64_compare_op1, 1);
3894 }
3895
3896 static void
3897 ia64_init_machine_status (p)
3898 struct function *p;
3899 {
3900 p->machine =
3901 (struct machine_function *) xcalloc (1, sizeof (struct machine_function));
3902 }
3903
3904 static void
3905 ia64_mark_machine_status (p)
3906 struct function *p;
3907 {
3908 struct machine_function *machine = p->machine;
3909
3910 if (machine)
3911 {
3912 ggc_mark_rtx (machine->ia64_eh_epilogue_sp);
3913 ggc_mark_rtx (machine->ia64_eh_epilogue_bsp);
3914 ggc_mark_rtx (machine->ia64_gp_save);
3915 }
3916 }
3917
3918 static void
3919 ia64_free_machine_status (p)
3920 struct function *p;
3921 {
3922 free (p->machine);
3923 p->machine = NULL;
3924 }
3925
3926 /* Handle TARGET_OPTIONS switches. */
3927
3928 void
3929 ia64_override_options ()
3930 {
3931 if (TARGET_AUTO_PIC)
3932 target_flags |= MASK_CONST_GP;
3933
3934 if (TARGET_INLINE_DIV_LAT && TARGET_INLINE_DIV_THR)
3935 {
3936 warning ("cannot optimize division for both latency and throughput");
3937 target_flags &= ~MASK_INLINE_DIV_THR;
3938 }
3939
3940 if (ia64_fixed_range_string)
3941 fix_range (ia64_fixed_range_string);
3942
3943 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
3944 flag_schedule_insns_after_reload = 0;
3945
3946 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
3947
3948 init_machine_status = ia64_init_machine_status;
3949 mark_machine_status = ia64_mark_machine_status;
3950 free_machine_status = ia64_free_machine_status;
3951
3952 ia64_add_gc_roots ();
3953 }
3954 \f
3955 static enum attr_itanium_requires_unit0 ia64_safe_itanium_requires_unit0 PARAMS((rtx));
3956 static enum attr_itanium_class ia64_safe_itanium_class PARAMS((rtx));
3957 static enum attr_type ia64_safe_type PARAMS((rtx));
3958
3959 static enum attr_itanium_requires_unit0
3960 ia64_safe_itanium_requires_unit0 (insn)
3961 rtx insn;
3962 {
3963 if (recog_memoized (insn) >= 0)
3964 return get_attr_itanium_requires_unit0 (insn);
3965 else
3966 return ITANIUM_REQUIRES_UNIT0_NO;
3967 }
3968
3969 static enum attr_itanium_class
3970 ia64_safe_itanium_class (insn)
3971 rtx insn;
3972 {
3973 if (recog_memoized (insn) >= 0)
3974 return get_attr_itanium_class (insn);
3975 else
3976 return ITANIUM_CLASS_UNKNOWN;
3977 }
3978
3979 static enum attr_type
3980 ia64_safe_type (insn)
3981 rtx insn;
3982 {
3983 if (recog_memoized (insn) >= 0)
3984 return get_attr_type (insn);
3985 else
3986 return TYPE_UNKNOWN;
3987 }
3988 \f
3989 /* The following collection of routines emit instruction group stop bits as
3990 necessary to avoid dependencies. */
3991
3992 /* Need to track some additional registers as far as serialization is
3993 concerned so we can properly handle br.call and br.ret. We could
3994 make these registers visible to gcc, but since these registers are
3995 never explicitly used in gcc generated code, it seems wasteful to
3996 do so (plus it would make the call and return patterns needlessly
3997 complex). */
3998 #define REG_GP (GR_REG (1))
3999 #define REG_RP (BR_REG (0))
4000 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
4001 /* This is used for volatile asms which may require a stop bit immediately
4002 before and after them. */
4003 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
4004 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
4005 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
4006
4007 /* For each register, we keep track of how it has been written in the
4008 current instruction group.
4009
4010 If a register is written unconditionally (no qualifying predicate),
4011 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4012
4013 If a register is written if its qualifying predicate P is true, we
4014 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
4015 may be written again by the complement of P (P^1) and when this happens,
4016 WRITE_COUNT gets set to 2.
4017
4018 The result of this is that whenever an insn attempts to write a register
4019 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
4020
4021 If a predicate register is written by a floating-point insn, we set
4022 WRITTEN_BY_FP to true.
4023
4024 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4025 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
4026
4027 struct reg_write_state
4028 {
4029 unsigned int write_count : 2;
4030 unsigned int first_pred : 16;
4031 unsigned int written_by_fp : 1;
4032 unsigned int written_by_and : 1;
4033 unsigned int written_by_or : 1;
4034 };
4035
4036 /* Cumulative info for the current instruction group. */
4037 struct reg_write_state rws_sum[NUM_REGS];
4038 /* Info for the current instruction. This gets copied to rws_sum after a
4039 stop bit is emitted. */
4040 struct reg_write_state rws_insn[NUM_REGS];
4041
4042 /* Indicates whether this is the first instruction after a stop bit,
4043 in which case we don't need another stop bit. Without this, we hit
4044 the abort in ia64_variable_issue when scheduling an alloc. */
4045 static int first_instruction;
4046
4047 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4048 RTL for one instruction. */
4049 struct reg_flags
4050 {
4051 unsigned int is_write : 1; /* Is register being written? */
4052 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
4053 unsigned int is_branch : 1; /* Is register used as part of a branch? */
4054 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
4055 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
4056 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
4057 };
4058
4059 static void rws_update PARAMS ((struct reg_write_state *, int,
4060 struct reg_flags, int));
4061 static int rws_access_regno PARAMS ((int, struct reg_flags, int));
4062 static int rws_access_reg PARAMS ((rtx, struct reg_flags, int));
4063 static void update_set_flags PARAMS ((rtx, struct reg_flags *, int *, rtx *));
4064 static int set_src_needs_barrier PARAMS ((rtx, struct reg_flags, int, rtx));
4065 static int rtx_needs_barrier PARAMS ((rtx, struct reg_flags, int));
4066 static void init_insn_group_barriers PARAMS ((void));
4067 static int group_barrier_needed_p PARAMS ((rtx));
4068 static int safe_group_barrier_needed_p PARAMS ((rtx));
4069
4070 /* Update *RWS for REGNO, which is being written by the current instruction,
4071 with predicate PRED, and associated register flags in FLAGS. */
4072
4073 static void
4074 rws_update (rws, regno, flags, pred)
4075 struct reg_write_state *rws;
4076 int regno;
4077 struct reg_flags flags;
4078 int pred;
4079 {
4080 if (pred)
4081 rws[regno].write_count++;
4082 else
4083 rws[regno].write_count = 2;
4084 rws[regno].written_by_fp |= flags.is_fp;
4085 /* ??? Not tracking and/or across differing predicates. */
4086 rws[regno].written_by_and = flags.is_and;
4087 rws[regno].written_by_or = flags.is_or;
4088 rws[regno].first_pred = pred;
4089 }
4090
4091 /* Handle an access to register REGNO of type FLAGS using predicate register
4092 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
4093 a dependency with an earlier instruction in the same group. */
4094
4095 static int
4096 rws_access_regno (regno, flags, pred)
4097 int regno;
4098 struct reg_flags flags;
4099 int pred;
4100 {
4101 int need_barrier = 0;
4102
4103 if (regno >= NUM_REGS)
4104 abort ();
4105
4106 if (! PR_REGNO_P (regno))
4107 flags.is_and = flags.is_or = 0;
4108
4109 if (flags.is_write)
4110 {
4111 int write_count;
4112
4113 /* One insn writes same reg multiple times? */
4114 if (rws_insn[regno].write_count > 0)
4115 abort ();
4116
4117 /* Update info for current instruction. */
4118 rws_update (rws_insn, regno, flags, pred);
4119 write_count = rws_sum[regno].write_count;
4120
4121 switch (write_count)
4122 {
4123 case 0:
4124 /* The register has not been written yet. */
4125 rws_update (rws_sum, regno, flags, pred);
4126 break;
4127
4128 case 1:
4129 /* The register has been written via a predicate. If this is
4130 not a complementary predicate, then we need a barrier. */
4131 /* ??? This assumes that P and P+1 are always complementary
4132 predicates for P even. */
4133 if (flags.is_and && rws_sum[regno].written_by_and)
4134 ;
4135 else if (flags.is_or && rws_sum[regno].written_by_or)
4136 ;
4137 else if ((rws_sum[regno].first_pred ^ 1) != pred)
4138 need_barrier = 1;
4139 rws_update (rws_sum, regno, flags, pred);
4140 break;
4141
4142 case 2:
4143 /* The register has been unconditionally written already. We
4144 need a barrier. */
4145 if (flags.is_and && rws_sum[regno].written_by_and)
4146 ;
4147 else if (flags.is_or && rws_sum[regno].written_by_or)
4148 ;
4149 else
4150 need_barrier = 1;
4151 rws_sum[regno].written_by_and = flags.is_and;
4152 rws_sum[regno].written_by_or = flags.is_or;
4153 break;
4154
4155 default:
4156 abort ();
4157 }
4158 }
4159 else
4160 {
4161 if (flags.is_branch)
4162 {
4163 /* Branches have several RAW exceptions that allow to avoid
4164 barriers. */
4165
4166 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
4167 /* RAW dependencies on branch regs are permissible as long
4168 as the writer is a non-branch instruction. Since we
4169 never generate code that uses a branch register written
4170 by a branch instruction, handling this case is
4171 easy. */
4172 return 0;
4173
4174 if (REGNO_REG_CLASS (regno) == PR_REGS
4175 && ! rws_sum[regno].written_by_fp)
4176 /* The predicates of a branch are available within the
4177 same insn group as long as the predicate was written by
4178 something other than a floating-point instruction. */
4179 return 0;
4180 }
4181
4182 if (flags.is_and && rws_sum[regno].written_by_and)
4183 return 0;
4184 if (flags.is_or && rws_sum[regno].written_by_or)
4185 return 0;
4186
4187 switch (rws_sum[regno].write_count)
4188 {
4189 case 0:
4190 /* The register has not been written yet. */
4191 break;
4192
4193 case 1:
4194 /* The register has been written via a predicate. If this is
4195 not a complementary predicate, then we need a barrier. */
4196 /* ??? This assumes that P and P+1 are always complementary
4197 predicates for P even. */
4198 if ((rws_sum[regno].first_pred ^ 1) != pred)
4199 need_barrier = 1;
4200 break;
4201
4202 case 2:
4203 /* The register has been unconditionally written already. We
4204 need a barrier. */
4205 need_barrier = 1;
4206 break;
4207
4208 default:
4209 abort ();
4210 }
4211 }
4212
4213 return need_barrier;
4214 }
4215
4216 static int
4217 rws_access_reg (reg, flags, pred)
4218 rtx reg;
4219 struct reg_flags flags;
4220 int pred;
4221 {
4222 int regno = REGNO (reg);
4223 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
4224
4225 if (n == 1)
4226 return rws_access_regno (regno, flags, pred);
4227 else
4228 {
4229 int need_barrier = 0;
4230 while (--n >= 0)
4231 need_barrier |= rws_access_regno (regno + n, flags, pred);
4232 return need_barrier;
4233 }
4234 }
4235
4236 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
4237 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
4238
4239 static void
4240 update_set_flags (x, pflags, ppred, pcond)
4241 rtx x;
4242 struct reg_flags *pflags;
4243 int *ppred;
4244 rtx *pcond;
4245 {
4246 rtx src = SET_SRC (x);
4247
4248 *pcond = 0;
4249
4250 switch (GET_CODE (src))
4251 {
4252 case CALL:
4253 return;
4254
4255 case IF_THEN_ELSE:
4256 if (SET_DEST (x) == pc_rtx)
4257 /* X is a conditional branch. */
4258 return;
4259 else
4260 {
4261 int is_complemented = 0;
4262
4263 /* X is a conditional move. */
4264 rtx cond = XEXP (src, 0);
4265 if (GET_CODE (cond) == EQ)
4266 is_complemented = 1;
4267 cond = XEXP (cond, 0);
4268 if (GET_CODE (cond) != REG
4269 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4270 abort ();
4271 *pcond = cond;
4272 if (XEXP (src, 1) == SET_DEST (x)
4273 || XEXP (src, 2) == SET_DEST (x))
4274 {
4275 /* X is a conditional move that conditionally writes the
4276 destination. */
4277
4278 /* We need another complement in this case. */
4279 if (XEXP (src, 1) == SET_DEST (x))
4280 is_complemented = ! is_complemented;
4281
4282 *ppred = REGNO (cond);
4283 if (is_complemented)
4284 ++*ppred;
4285 }
4286
4287 /* ??? If this is a conditional write to the dest, then this
4288 instruction does not actually read one source. This probably
4289 doesn't matter, because that source is also the dest. */
4290 /* ??? Multiple writes to predicate registers are allowed
4291 if they are all AND type compares, or if they are all OR
4292 type compares. We do not generate such instructions
4293 currently. */
4294 }
4295 /* ... fall through ... */
4296
4297 default:
4298 if (GET_RTX_CLASS (GET_CODE (src)) == '<'
4299 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
4300 /* Set pflags->is_fp to 1 so that we know we're dealing
4301 with a floating point comparison when processing the
4302 destination of the SET. */
4303 pflags->is_fp = 1;
4304
4305 /* Discover if this is a parallel comparison. We only handle
4306 and.orcm and or.andcm at present, since we must retain a
4307 strict inverse on the predicate pair. */
4308 else if (GET_CODE (src) == AND)
4309 pflags->is_and = 1;
4310 else if (GET_CODE (src) == IOR)
4311 pflags->is_or = 1;
4312
4313 break;
4314 }
4315 }
4316
4317 /* Subroutine of rtx_needs_barrier; this function determines whether the
4318 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
4319 are as in rtx_needs_barrier. COND is an rtx that holds the condition
4320 for this insn. */
4321
4322 static int
4323 set_src_needs_barrier (x, flags, pred, cond)
4324 rtx x;
4325 struct reg_flags flags;
4326 int pred;
4327 rtx cond;
4328 {
4329 int need_barrier = 0;
4330 rtx dst;
4331 rtx src = SET_SRC (x);
4332
4333 if (GET_CODE (src) == CALL)
4334 /* We don't need to worry about the result registers that
4335 get written by subroutine call. */
4336 return rtx_needs_barrier (src, flags, pred);
4337 else if (SET_DEST (x) == pc_rtx)
4338 {
4339 /* X is a conditional branch. */
4340 /* ??? This seems redundant, as the caller sets this bit for
4341 all JUMP_INSNs. */
4342 flags.is_branch = 1;
4343 return rtx_needs_barrier (src, flags, pred);
4344 }
4345
4346 need_barrier = rtx_needs_barrier (src, flags, pred);
4347
4348 /* This instruction unconditionally uses a predicate register. */
4349 if (cond)
4350 need_barrier |= rws_access_reg (cond, flags, 0);
4351
4352 dst = SET_DEST (x);
4353 if (GET_CODE (dst) == ZERO_EXTRACT)
4354 {
4355 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
4356 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
4357 dst = XEXP (dst, 0);
4358 }
4359 return need_barrier;
4360 }
4361
4362 /* Handle an access to rtx X of type FLAGS using predicate register PRED.
4363 Return 1 is this access creates a dependency with an earlier instruction
4364 in the same group. */
4365
4366 static int
4367 rtx_needs_barrier (x, flags, pred)
4368 rtx x;
4369 struct reg_flags flags;
4370 int pred;
4371 {
4372 int i, j;
4373 int is_complemented = 0;
4374 int need_barrier = 0;
4375 const char *format_ptr;
4376 struct reg_flags new_flags;
4377 rtx cond = 0;
4378
4379 if (! x)
4380 return 0;
4381
4382 new_flags = flags;
4383
4384 switch (GET_CODE (x))
4385 {
4386 case SET:
4387 update_set_flags (x, &new_flags, &pred, &cond);
4388 need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
4389 if (GET_CODE (SET_SRC (x)) != CALL)
4390 {
4391 new_flags.is_write = 1;
4392 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
4393 }
4394 break;
4395
4396 case CALL:
4397 new_flags.is_write = 0;
4398 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4399
4400 /* Avoid multiple register writes, in case this is a pattern with
4401 multiple CALL rtx. This avoids an abort in rws_access_reg. */
4402 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
4403 {
4404 new_flags.is_write = 1;
4405 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
4406 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
4407 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4408 }
4409 break;
4410
4411 case COND_EXEC:
4412 /* X is a predicated instruction. */
4413
4414 cond = COND_EXEC_TEST (x);
4415 if (pred)
4416 abort ();
4417 need_barrier = rtx_needs_barrier (cond, flags, 0);
4418
4419 if (GET_CODE (cond) == EQ)
4420 is_complemented = 1;
4421 cond = XEXP (cond, 0);
4422 if (GET_CODE (cond) != REG
4423 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4424 abort ();
4425 pred = REGNO (cond);
4426 if (is_complemented)
4427 ++pred;
4428
4429 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
4430 return need_barrier;
4431
4432 case CLOBBER:
4433 case USE:
4434 /* Clobber & use are for earlier compiler-phases only. */
4435 break;
4436
4437 case ASM_OPERANDS:
4438 case ASM_INPUT:
4439 /* We always emit stop bits for traditional asms. We emit stop bits
4440 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
4441 if (GET_CODE (x) != ASM_OPERANDS
4442 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
4443 {
4444 /* Avoid writing the register multiple times if we have multiple
4445 asm outputs. This avoids an abort in rws_access_reg. */
4446 if (! rws_insn[REG_VOLATILE].write_count)
4447 {
4448 new_flags.is_write = 1;
4449 rws_access_regno (REG_VOLATILE, new_flags, pred);
4450 }
4451 return 1;
4452 }
4453
4454 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
4455 We can not just fall through here since then we would be confused
4456 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
4457 traditional asms unlike their normal usage. */
4458
4459 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
4460 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
4461 need_barrier = 1;
4462 break;
4463
4464 case PARALLEL:
4465 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4466 {
4467 rtx pat = XVECEXP (x, 0, i);
4468 if (GET_CODE (pat) == SET)
4469 {
4470 update_set_flags (pat, &new_flags, &pred, &cond);
4471 need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
4472 }
4473 else if (GET_CODE (pat) == USE
4474 || GET_CODE (pat) == CALL
4475 || GET_CODE (pat) == ASM_OPERANDS)
4476 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4477 else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
4478 abort ();
4479 }
4480 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4481 {
4482 rtx pat = XVECEXP (x, 0, i);
4483 if (GET_CODE (pat) == SET)
4484 {
4485 if (GET_CODE (SET_SRC (pat)) != CALL)
4486 {
4487 new_flags.is_write = 1;
4488 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
4489 pred);
4490 }
4491 }
4492 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
4493 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4494 }
4495 break;
4496
4497 case SUBREG:
4498 x = SUBREG_REG (x);
4499 /* FALLTHRU */
4500 case REG:
4501 if (REGNO (x) == AR_UNAT_REGNUM)
4502 {
4503 for (i = 0; i < 64; ++i)
4504 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
4505 }
4506 else
4507 need_barrier = rws_access_reg (x, flags, pred);
4508 break;
4509
4510 case MEM:
4511 /* Find the regs used in memory address computation. */
4512 new_flags.is_write = 0;
4513 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4514 break;
4515
4516 case CONST_INT: case CONST_DOUBLE:
4517 case SYMBOL_REF: case LABEL_REF: case CONST:
4518 break;
4519
4520 /* Operators with side-effects. */
4521 case POST_INC: case POST_DEC:
4522 if (GET_CODE (XEXP (x, 0)) != REG)
4523 abort ();
4524
4525 new_flags.is_write = 0;
4526 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4527 new_flags.is_write = 1;
4528 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4529 break;
4530
4531 case POST_MODIFY:
4532 if (GET_CODE (XEXP (x, 0)) != REG)
4533 abort ();
4534
4535 new_flags.is_write = 0;
4536 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4537 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4538 new_flags.is_write = 1;
4539 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4540 break;
4541
4542 /* Handle common unary and binary ops for efficiency. */
4543 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
4544 case MOD: case UDIV: case UMOD: case AND: case IOR:
4545 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
4546 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
4547 case NE: case EQ: case GE: case GT: case LE:
4548 case LT: case GEU: case GTU: case LEU: case LTU:
4549 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4550 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4551 break;
4552
4553 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
4554 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
4555 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
4556 case SQRT: case FFS:
4557 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
4558 break;
4559
4560 case UNSPEC:
4561 switch (XINT (x, 1))
4562 {
4563 case 1: /* st8.spill */
4564 case 2: /* ld8.fill */
4565 {
4566 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
4567 HOST_WIDE_INT bit = (offset >> 3) & 63;
4568
4569 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4570 new_flags.is_write = (XINT (x, 1) == 1);
4571 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
4572 new_flags, pred);
4573 break;
4574 }
4575
4576 case 3: /* stf.spill */
4577 case 4: /* ldf.spill */
4578 case 8: /* popcnt */
4579 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4580 break;
4581
4582 case 7: /* pred_rel_mutex */
4583 case 9: /* pic call */
4584 case 12: /* mf */
4585 case 19: /* fetchadd_acq */
4586 case 20: /* mov = ar.bsp */
4587 case 21: /* flushrs */
4588 case 22: /* bundle selector */
4589 case 23: /* cycle display */
4590 break;
4591
4592 case 24: /* addp4 */
4593 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4594 break;
4595
4596 case 5: /* recip_approx */
4597 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4598 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4599 break;
4600
4601 case 13: /* cmpxchg_acq */
4602 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4603 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
4604 break;
4605
4606 default:
4607 abort ();
4608 }
4609 break;
4610
4611 case UNSPEC_VOLATILE:
4612 switch (XINT (x, 1))
4613 {
4614 case 0: /* alloc */
4615 /* Alloc must always be the first instruction of a group.
4616 We force this by always returning true. */
4617 /* ??? We might get better scheduling if we explicitly check for
4618 input/local/output register dependencies, and modify the
4619 scheduler so that alloc is always reordered to the start of
4620 the current group. We could then eliminate all of the
4621 first_instruction code. */
4622 rws_access_regno (AR_PFS_REGNUM, flags, pred);
4623
4624 new_flags.is_write = 1;
4625 rws_access_regno (REG_AR_CFM, new_flags, pred);
4626 return 1;
4627
4628 case 1: /* blockage */
4629 case 2: /* insn group barrier */
4630 return 0;
4631
4632 case 5: /* set_bsp */
4633 need_barrier = 1;
4634 break;
4635
4636 case 7: /* pred.rel.mutex */
4637 case 8: /* safe_across_calls all */
4638 case 9: /* safe_across_calls normal */
4639 return 0;
4640
4641 default:
4642 abort ();
4643 }
4644 break;
4645
4646 case RETURN:
4647 new_flags.is_write = 0;
4648 need_barrier = rws_access_regno (REG_RP, flags, pred);
4649 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
4650
4651 new_flags.is_write = 1;
4652 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4653 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4654 break;
4655
4656 default:
4657 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
4658 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4659 switch (format_ptr[i])
4660 {
4661 case '0': /* unused field */
4662 case 'i': /* integer */
4663 case 'n': /* note */
4664 case 'w': /* wide integer */
4665 case 's': /* pointer to string */
4666 case 'S': /* optional pointer to string */
4667 break;
4668
4669 case 'e':
4670 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
4671 need_barrier = 1;
4672 break;
4673
4674 case 'E':
4675 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
4676 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
4677 need_barrier = 1;
4678 break;
4679
4680 default:
4681 abort ();
4682 }
4683 break;
4684 }
4685 return need_barrier;
4686 }
4687
4688 /* Clear out the state for group_barrier_needed_p at the start of a
4689 sequence of insns. */
4690
4691 static void
4692 init_insn_group_barriers ()
4693 {
4694 memset (rws_sum, 0, sizeof (rws_sum));
4695 first_instruction = 1;
4696 }
4697
4698 /* Given the current state, recorded by previous calls to this function,
4699 determine whether a group barrier (a stop bit) is necessary before INSN.
4700 Return nonzero if so. */
4701
4702 static int
4703 group_barrier_needed_p (insn)
4704 rtx insn;
4705 {
4706 rtx pat;
4707 int need_barrier = 0;
4708 struct reg_flags flags;
4709
4710 memset (&flags, 0, sizeof (flags));
4711 switch (GET_CODE (insn))
4712 {
4713 case NOTE:
4714 break;
4715
4716 case BARRIER:
4717 /* A barrier doesn't imply an instruction group boundary. */
4718 break;
4719
4720 case CODE_LABEL:
4721 memset (rws_insn, 0, sizeof (rws_insn));
4722 return 1;
4723
4724 case CALL_INSN:
4725 flags.is_branch = 1;
4726 flags.is_sibcall = SIBLING_CALL_P (insn);
4727 memset (rws_insn, 0, sizeof (rws_insn));
4728
4729 /* Don't bundle a call following another call. */
4730 if ((pat = prev_active_insn (insn))
4731 && GET_CODE (pat) == CALL_INSN)
4732 {
4733 need_barrier = 1;
4734 break;
4735 }
4736
4737 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
4738 break;
4739
4740 case JUMP_INSN:
4741 flags.is_branch = 1;
4742
4743 /* Don't bundle a jump following a call. */
4744 if ((pat = prev_active_insn (insn))
4745 && GET_CODE (pat) == CALL_INSN)
4746 {
4747 need_barrier = 1;
4748 break;
4749 }
4750 /* FALLTHRU */
4751
4752 case INSN:
4753 if (GET_CODE (PATTERN (insn)) == USE
4754 || GET_CODE (PATTERN (insn)) == CLOBBER)
4755 /* Don't care about USE and CLOBBER "insns"---those are used to
4756 indicate to the optimizer that it shouldn't get rid of
4757 certain operations. */
4758 break;
4759
4760 pat = PATTERN (insn);
4761
4762 /* Ug. Hack hacks hacked elsewhere. */
4763 switch (recog_memoized (insn))
4764 {
4765 /* We play dependency tricks with the epilogue in order
4766 to get proper schedules. Undo this for dv analysis. */
4767 case CODE_FOR_epilogue_deallocate_stack:
4768 pat = XVECEXP (pat, 0, 0);
4769 break;
4770
4771 /* The pattern we use for br.cloop confuses the code above.
4772 The second element of the vector is representative. */
4773 case CODE_FOR_doloop_end_internal:
4774 pat = XVECEXP (pat, 0, 1);
4775 break;
4776
4777 /* Doesn't generate code. */
4778 case CODE_FOR_pred_rel_mutex:
4779 case CODE_FOR_prologue_use:
4780 return 0;
4781
4782 default:
4783 break;
4784 }
4785
4786 memset (rws_insn, 0, sizeof (rws_insn));
4787 need_barrier = rtx_needs_barrier (pat, flags, 0);
4788
4789 /* Check to see if the previous instruction was a volatile
4790 asm. */
4791 if (! need_barrier)
4792 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
4793 break;
4794
4795 default:
4796 abort ();
4797 }
4798
4799 if (first_instruction)
4800 {
4801 need_barrier = 0;
4802 first_instruction = 0;
4803 }
4804
4805 return need_barrier;
4806 }
4807
4808 /* Like group_barrier_needed_p, but do not clobber the current state. */
4809
4810 static int
4811 safe_group_barrier_needed_p (insn)
4812 rtx insn;
4813 {
4814 struct reg_write_state rws_saved[NUM_REGS];
4815 int saved_first_instruction;
4816 int t;
4817
4818 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
4819 saved_first_instruction = first_instruction;
4820
4821 t = group_barrier_needed_p (insn);
4822
4823 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
4824 first_instruction = saved_first_instruction;
4825
4826 return t;
4827 }
4828
4829 /* INSNS is an chain of instructions. Scan the chain, and insert stop bits
4830 as necessary to eliminate dependendencies. This function assumes that
4831 a final instruction scheduling pass has been run which has already
4832 inserted most of the necessary stop bits. This function only inserts
4833 new ones at basic block boundaries, since these are invisible to the
4834 scheduler. */
4835
4836 static void
4837 emit_insn_group_barriers (dump, insns)
4838 FILE *dump;
4839 rtx insns;
4840 {
4841 rtx insn;
4842 rtx last_label = 0;
4843 int insns_since_last_label = 0;
4844
4845 init_insn_group_barriers ();
4846
4847 for (insn = insns; insn; insn = NEXT_INSN (insn))
4848 {
4849 if (GET_CODE (insn) == CODE_LABEL)
4850 {
4851 if (insns_since_last_label)
4852 last_label = insn;
4853 insns_since_last_label = 0;
4854 }
4855 else if (GET_CODE (insn) == NOTE
4856 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
4857 {
4858 if (insns_since_last_label)
4859 last_label = insn;
4860 insns_since_last_label = 0;
4861 }
4862 else if (GET_CODE (insn) == INSN
4863 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
4864 && XINT (PATTERN (insn), 1) == 2)
4865 {
4866 init_insn_group_barriers ();
4867 last_label = 0;
4868 }
4869 else if (INSN_P (insn))
4870 {
4871 insns_since_last_label = 1;
4872
4873 if (group_barrier_needed_p (insn))
4874 {
4875 if (last_label)
4876 {
4877 if (dump)
4878 fprintf (dump, "Emitting stop before label %d\n",
4879 INSN_UID (last_label));
4880 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
4881 insn = last_label;
4882
4883 init_insn_group_barriers ();
4884 last_label = 0;
4885 }
4886 }
4887 }
4888 }
4889 }
4890
4891 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
4892 This function has to emit all necessary group barriers. */
4893
4894 static void
4895 emit_all_insn_group_barriers (dump, insns)
4896 FILE *dump ATTRIBUTE_UNUSED;
4897 rtx insns;
4898 {
4899 rtx insn;
4900
4901 init_insn_group_barriers ();
4902
4903 for (insn = insns; insn; insn = NEXT_INSN (insn))
4904 {
4905 if (GET_CODE (insn) == BARRIER)
4906 {
4907 rtx last = prev_active_insn (insn);
4908
4909 if (! last)
4910 continue;
4911 if (GET_CODE (last) == JUMP_INSN
4912 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
4913 last = prev_active_insn (last);
4914 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
4915 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
4916
4917 init_insn_group_barriers ();
4918 }
4919 else if (INSN_P (insn))
4920 {
4921 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
4922 init_insn_group_barriers ();
4923 else if (group_barrier_needed_p (insn))
4924 {
4925 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
4926 init_insn_group_barriers ();
4927 group_barrier_needed_p (insn);
4928 }
4929 }
4930 }
4931 }
4932 \f
4933 static int errata_find_address_regs PARAMS ((rtx *, void *));
4934 static void errata_emit_nops PARAMS ((rtx));
4935 static void fixup_errata PARAMS ((void));
4936
4937 /* This structure is used to track some details about the previous insns
4938 groups so we can determine if it may be necessary to insert NOPs to
4939 workaround hardware errata. */
4940 static struct group
4941 {
4942 HARD_REG_SET p_reg_set;
4943 HARD_REG_SET gr_reg_conditionally_set;
4944 } last_group[2];
4945
4946 /* Index into the last_group array. */
4947 static int group_idx;
4948
4949 /* Called through for_each_rtx; determines if a hard register that was
4950 conditionally set in the previous group is used as an address register.
4951 It ensures that for_each_rtx returns 1 in that case. */
4952 static int
4953 errata_find_address_regs (xp, data)
4954 rtx *xp;
4955 void *data ATTRIBUTE_UNUSED;
4956 {
4957 rtx x = *xp;
4958 if (GET_CODE (x) != MEM)
4959 return 0;
4960 x = XEXP (x, 0);
4961 if (GET_CODE (x) == POST_MODIFY)
4962 x = XEXP (x, 0);
4963 if (GET_CODE (x) == REG)
4964 {
4965 struct group *prev_group = last_group + (group_idx ^ 1);
4966 if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
4967 REGNO (x)))
4968 return 1;
4969 return -1;
4970 }
4971 return 0;
4972 }
4973
4974 /* Called for each insn; this function keeps track of the state in
4975 last_group and emits additional NOPs if necessary to work around
4976 an Itanium A/B step erratum. */
4977 static void
4978 errata_emit_nops (insn)
4979 rtx insn;
4980 {
4981 struct group *this_group = last_group + group_idx;
4982 struct group *prev_group = last_group + (group_idx ^ 1);
4983 rtx pat = PATTERN (insn);
4984 rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
4985 rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
4986 enum attr_type type;
4987 rtx set = real_pat;
4988
4989 if (GET_CODE (real_pat) == USE
4990 || GET_CODE (real_pat) == CLOBBER
4991 || GET_CODE (real_pat) == ASM_INPUT
4992 || GET_CODE (real_pat) == ADDR_VEC
4993 || GET_CODE (real_pat) == ADDR_DIFF_VEC
4994 || asm_noperands (PATTERN (insn)) >= 0)
4995 return;
4996
4997 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
4998 parts of it. */
4999
5000 if (GET_CODE (set) == PARALLEL)
5001 {
5002 int i;
5003 set = XVECEXP (real_pat, 0, 0);
5004 for (i = 1; i < XVECLEN (real_pat, 0); i++)
5005 if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
5006 && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
5007 {
5008 set = 0;
5009 break;
5010 }
5011 }
5012
5013 if (set && GET_CODE (set) != SET)
5014 set = 0;
5015
5016 type = get_attr_type (insn);
5017
5018 if (type == TYPE_F
5019 && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
5020 SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
5021
5022 if ((type == TYPE_M || type == TYPE_A) && cond && set
5023 && REG_P (SET_DEST (set))
5024 && GET_CODE (SET_SRC (set)) != PLUS
5025 && GET_CODE (SET_SRC (set)) != MINUS
5026 && (GET_CODE (SET_SRC (set)) != ASHIFT
5027 || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode))
5028 && (GET_CODE (SET_SRC (set)) != MEM
5029 || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
5030 && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
5031 {
5032 if (GET_RTX_CLASS (GET_CODE (cond)) != '<'
5033 || ! REG_P (XEXP (cond, 0)))
5034 abort ();
5035
5036 if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
5037 SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
5038 }
5039 if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
5040 {
5041 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5042 emit_insn_before (gen_nop (), insn);
5043 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5044 group_idx = 0;
5045 memset (last_group, 0, sizeof last_group);
5046 }
5047 }
5048
5049 /* Emit extra nops if they are required to work around hardware errata. */
5050
5051 static void
5052 fixup_errata ()
5053 {
5054 rtx insn;
5055
5056 if (! TARGET_B_STEP)
5057 return;
5058
5059 group_idx = 0;
5060 memset (last_group, 0, sizeof last_group);
5061
5062 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5063 {
5064 if (!INSN_P (insn))
5065 continue;
5066
5067 if (ia64_safe_type (insn) == TYPE_S)
5068 {
5069 group_idx ^= 1;
5070 memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
5071 }
5072 else
5073 errata_emit_nops (insn);
5074 }
5075 }
5076 \f
5077 /* Instruction scheduling support. */
5078 /* Describe one bundle. */
5079
5080 struct bundle
5081 {
5082 /* Zero if there's no possibility of a stop in this bundle other than
5083 at the end, otherwise the position of the optional stop bit. */
5084 int possible_stop;
5085 /* The types of the three slots. */
5086 enum attr_type t[3];
5087 /* The pseudo op to be emitted into the assembler output. */
5088 const char *name;
5089 };
5090
5091 #define NR_BUNDLES 10
5092
5093 /* A list of all available bundles. */
5094
5095 static const struct bundle bundle[NR_BUNDLES] =
5096 {
5097 { 2, { TYPE_M, TYPE_I, TYPE_I }, ".mii" },
5098 { 1, { TYPE_M, TYPE_M, TYPE_I }, ".mmi" },
5099 { 0, { TYPE_M, TYPE_F, TYPE_I }, ".mfi" },
5100 { 0, { TYPE_M, TYPE_M, TYPE_F }, ".mmf" },
5101 #if NR_BUNDLES == 10
5102 { 0, { TYPE_B, TYPE_B, TYPE_B }, ".bbb" },
5103 { 0, { TYPE_M, TYPE_B, TYPE_B }, ".mbb" },
5104 #endif
5105 { 0, { TYPE_M, TYPE_I, TYPE_B }, ".mib" },
5106 { 0, { TYPE_M, TYPE_M, TYPE_B }, ".mmb" },
5107 { 0, { TYPE_M, TYPE_F, TYPE_B }, ".mfb" },
5108 /* .mfi needs to occur earlier than .mlx, so that we only generate it if
5109 it matches an L type insn. Otherwise we'll try to generate L type
5110 nops. */
5111 { 0, { TYPE_M, TYPE_L, TYPE_X }, ".mlx" }
5112 };
5113
5114 /* Describe a packet of instructions. Packets consist of two bundles that
5115 are visible to the hardware in one scheduling window. */
5116
5117 struct ia64_packet
5118 {
5119 const struct bundle *t1, *t2;
5120 /* Precomputed value of the first split issue in this packet if a cycle
5121 starts at its beginning. */
5122 int first_split;
5123 /* For convenience, the insn types are replicated here so we don't have
5124 to go through T1 and T2 all the time. */
5125 enum attr_type t[6];
5126 };
5127
5128 /* An array containing all possible packets. */
5129 #define NR_PACKETS (NR_BUNDLES * NR_BUNDLES)
5130 static struct ia64_packet packets[NR_PACKETS];
5131
5132 /* Map attr_type to a string with the name. */
5133
5134 static const char *const type_names[] =
5135 {
5136 "UNKNOWN", "A", "I", "M", "F", "B", "L", "X", "S"
5137 };
5138
5139 /* Nonzero if we should insert stop bits into the schedule. */
5140 int ia64_final_schedule = 0;
5141
5142 static int itanium_split_issue PARAMS ((const struct ia64_packet *, int));
5143 static rtx ia64_single_set PARAMS ((rtx));
5144 static int insn_matches_slot PARAMS ((const struct ia64_packet *, enum attr_type, int, rtx));
5145 static void ia64_emit_insn_before PARAMS ((rtx, rtx));
5146 static void maybe_rotate PARAMS ((FILE *));
5147 static void finish_last_head PARAMS ((FILE *, int));
5148 static void rotate_one_bundle PARAMS ((FILE *));
5149 static void rotate_two_bundles PARAMS ((FILE *));
5150 static void nop_cycles_until PARAMS ((int, FILE *));
5151 static void cycle_end_fill_slots PARAMS ((FILE *));
5152 static int packet_matches_p PARAMS ((const struct ia64_packet *, int, int *));
5153 static int get_split PARAMS ((const struct ia64_packet *, int));
5154 static int find_best_insn PARAMS ((rtx *, enum attr_type *, int,
5155 const struct ia64_packet *, int));
5156 static void find_best_packet PARAMS ((int *, const struct ia64_packet **,
5157 rtx *, enum attr_type *, int));
5158 static int itanium_reorder PARAMS ((FILE *, rtx *, rtx *, int));
5159 static void dump_current_packet PARAMS ((FILE *));
5160 static void schedule_stop PARAMS ((FILE *));
5161 static rtx gen_nop_type PARAMS ((enum attr_type));
5162 static void ia64_emit_nops PARAMS ((void));
5163
5164 /* Map a bundle number to its pseudo-op. */
5165
5166 const char *
5167 get_bundle_name (b)
5168 int b;
5169 {
5170 return bundle[b].name;
5171 }
5172
5173 /* Compute the slot which will cause a split issue in packet P if the
5174 current cycle begins at slot BEGIN. */
5175
5176 static int
5177 itanium_split_issue (p, begin)
5178 const struct ia64_packet *p;
5179 int begin;
5180 {
5181 int type_count[TYPE_S];
5182 int i;
5183 int split = 6;
5184
5185 if (begin < 3)
5186 {
5187 /* Always split before and after MMF. */
5188 if (p->t[0] == TYPE_M && p->t[1] == TYPE_M && p->t[2] == TYPE_F)
5189 return 3;
5190 if (p->t[3] == TYPE_M && p->t[4] == TYPE_M && p->t[5] == TYPE_F)
5191 return 3;
5192 /* Always split after MBB and BBB. */
5193 if (p->t[1] == TYPE_B)
5194 return 3;
5195 /* Split after first bundle in MIB BBB combination. */
5196 if (p->t[2] == TYPE_B && p->t[3] == TYPE_B)
5197 return 3;
5198 }
5199
5200 memset (type_count, 0, sizeof type_count);
5201 for (i = begin; i < split; i++)
5202 {
5203 enum attr_type t0 = p->t[i];
5204 /* An MLX bundle reserves the same units as an MFI bundle. */
5205 enum attr_type t = (t0 == TYPE_L ? TYPE_F
5206 : t0 == TYPE_X ? TYPE_I
5207 : t0);
5208
5209 /* Itanium can execute up to 3 branches, 2 floating point, 2 memory, and
5210 2 integer per cycle. */
5211 int max = (t == TYPE_B ? 3 : 2);
5212 if (type_count[t] == max)
5213 return i;
5214
5215 type_count[t]++;
5216 }
5217 return split;
5218 }
5219
5220 /* Return the maximum number of instructions a cpu can issue. */
5221
5222 static int
5223 ia64_issue_rate ()
5224 {
5225 return 6;
5226 }
5227
5228 /* Helper function - like single_set, but look inside COND_EXEC. */
5229
5230 static rtx
5231 ia64_single_set (insn)
5232 rtx insn;
5233 {
5234 rtx x = PATTERN (insn), ret;
5235 if (GET_CODE (x) == COND_EXEC)
5236 x = COND_EXEC_CODE (x);
5237 if (GET_CODE (x) == SET)
5238 return x;
5239 ret = single_set_2 (insn, x);
5240 if (ret == NULL && GET_CODE (x) == PARALLEL)
5241 {
5242 /* Special case here prologue_allocate_stack and
5243 epilogue_deallocate_stack. Although it is not a classical
5244 single set, the second set is there just to protect it
5245 from moving past FP-relative stack accesses. */
5246 if (XVECLEN (x, 0) == 2
5247 && GET_CODE (XVECEXP (x, 0, 0)) == SET
5248 && GET_CODE (XVECEXP (x, 0, 1)) == SET
5249 && GET_CODE (SET_DEST (XVECEXP (x, 0, 1))) == REG
5250 && SET_DEST (XVECEXP (x, 0, 1)) == SET_SRC (XVECEXP (x, 0, 1))
5251 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
5252 ret = XVECEXP (x, 0, 0);
5253 }
5254 return ret;
5255 }
5256
5257 /* Adjust the cost of a scheduling dependency. Return the new cost of
5258 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5259
5260 static int
5261 ia64_adjust_cost (insn, link, dep_insn, cost)
5262 rtx insn, link, dep_insn;
5263 int cost;
5264 {
5265 enum attr_type dep_type;
5266 enum attr_itanium_class dep_class;
5267 enum attr_itanium_class insn_class;
5268 rtx dep_set, set, src, addr;
5269
5270 if (GET_CODE (PATTERN (insn)) == CLOBBER
5271 || GET_CODE (PATTERN (insn)) == USE
5272 || GET_CODE (PATTERN (dep_insn)) == CLOBBER
5273 || GET_CODE (PATTERN (dep_insn)) == USE
5274 /* @@@ Not accurate for indirect calls. */
5275 || GET_CODE (insn) == CALL_INSN
5276 || ia64_safe_type (insn) == TYPE_S)
5277 return 0;
5278
5279 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT
5280 || REG_NOTE_KIND (link) == REG_DEP_ANTI)
5281 return 0;
5282
5283 dep_type = ia64_safe_type (dep_insn);
5284 dep_class = ia64_safe_itanium_class (dep_insn);
5285 insn_class = ia64_safe_itanium_class (insn);
5286
5287 /* Compares that feed a conditional branch can execute in the same
5288 cycle. */
5289 dep_set = ia64_single_set (dep_insn);
5290 set = ia64_single_set (insn);
5291
5292 if (dep_type != TYPE_F
5293 && dep_set
5294 && GET_CODE (SET_DEST (dep_set)) == REG
5295 && PR_REG (REGNO (SET_DEST (dep_set)))
5296 && GET_CODE (insn) == JUMP_INSN)
5297 return 0;
5298
5299 if (dep_set && GET_CODE (SET_DEST (dep_set)) == MEM)
5300 {
5301 /* ??? Can't find any information in the documenation about whether
5302 a sequence
5303 st [rx] = ra
5304 ld rb = [ry]
5305 splits issue. Assume it doesn't. */
5306 return 0;
5307 }
5308
5309 src = set ? SET_SRC (set) : 0;
5310 addr = 0;
5311 if (set)
5312 {
5313 if (GET_CODE (SET_DEST (set)) == MEM)
5314 addr = XEXP (SET_DEST (set), 0);
5315 else if (GET_CODE (SET_DEST (set)) == SUBREG
5316 && GET_CODE (SUBREG_REG (SET_DEST (set))) == MEM)
5317 addr = XEXP (SUBREG_REG (SET_DEST (set)), 0);
5318 else
5319 {
5320 addr = src;
5321 if (GET_CODE (addr) == UNSPEC && XVECLEN (addr, 0) > 0)
5322 addr = XVECEXP (addr, 0, 0);
5323 while (GET_CODE (addr) == SUBREG || GET_CODE (addr) == ZERO_EXTEND)
5324 addr = XEXP (addr, 0);
5325 if (GET_CODE (addr) == MEM)
5326 addr = XEXP (addr, 0);
5327 else
5328 addr = 0;
5329 }
5330 }
5331
5332 if (addr && GET_CODE (addr) == POST_MODIFY)
5333 addr = XEXP (addr, 0);
5334
5335 set = ia64_single_set (dep_insn);
5336
5337 if ((dep_class == ITANIUM_CLASS_IALU
5338 || dep_class == ITANIUM_CLASS_ILOG
5339 || dep_class == ITANIUM_CLASS_LD)
5340 && (insn_class == ITANIUM_CLASS_LD
5341 || insn_class == ITANIUM_CLASS_ST))
5342 {
5343 if (! addr || ! set)
5344 abort ();
5345 /* This isn't completely correct - an IALU that feeds an address has
5346 a latency of 1 cycle if it's issued in an M slot, but 2 cycles
5347 otherwise. Unfortunately there's no good way to describe this. */
5348 if (reg_overlap_mentioned_p (SET_DEST (set), addr))
5349 return cost + 1;
5350 }
5351 if ((dep_class == ITANIUM_CLASS_IALU
5352 || dep_class == ITANIUM_CLASS_ILOG
5353 || dep_class == ITANIUM_CLASS_LD)
5354 && (insn_class == ITANIUM_CLASS_MMMUL
5355 || insn_class == ITANIUM_CLASS_MMSHF
5356 || insn_class == ITANIUM_CLASS_MMSHFI))
5357 return 3;
5358 if (dep_class == ITANIUM_CLASS_FMAC
5359 && (insn_class == ITANIUM_CLASS_FMISC
5360 || insn_class == ITANIUM_CLASS_FCVTFX
5361 || insn_class == ITANIUM_CLASS_XMPY))
5362 return 7;
5363 if ((dep_class == ITANIUM_CLASS_FMAC
5364 || dep_class == ITANIUM_CLASS_FMISC
5365 || dep_class == ITANIUM_CLASS_FCVTFX
5366 || dep_class == ITANIUM_CLASS_XMPY)
5367 && insn_class == ITANIUM_CLASS_STF)
5368 return 8;
5369 if ((dep_class == ITANIUM_CLASS_MMMUL
5370 || dep_class == ITANIUM_CLASS_MMSHF
5371 || dep_class == ITANIUM_CLASS_MMSHFI)
5372 && (insn_class == ITANIUM_CLASS_LD
5373 || insn_class == ITANIUM_CLASS_ST
5374 || insn_class == ITANIUM_CLASS_IALU
5375 || insn_class == ITANIUM_CLASS_ILOG
5376 || insn_class == ITANIUM_CLASS_ISHF))
5377 return 4;
5378
5379 return cost;
5380 }
5381
5382 /* Describe the current state of the Itanium pipeline. */
5383 static struct
5384 {
5385 /* The first slot that is used in the current cycle. */
5386 int first_slot;
5387 /* The next slot to fill. */
5388 int cur;
5389 /* The packet we have selected for the current issue window. */
5390 const struct ia64_packet *packet;
5391 /* The position of the split issue that occurs due to issue width
5392 limitations (6 if there's no split issue). */
5393 int split;
5394 /* Record data about the insns scheduled so far in the same issue
5395 window. The elements up to but not including FIRST_SLOT belong
5396 to the previous cycle, the ones starting with FIRST_SLOT belong
5397 to the current cycle. */
5398 enum attr_type types[6];
5399 rtx insns[6];
5400 int stopbit[6];
5401 /* Nonzero if we decided to schedule a stop bit. */
5402 int last_was_stop;
5403 } sched_data;
5404
5405 /* Temporary arrays; they have enough elements to hold all insns that
5406 can be ready at the same time while scheduling of the current block.
5407 SCHED_READY can hold ready insns, SCHED_TYPES their types. */
5408 static rtx *sched_ready;
5409 static enum attr_type *sched_types;
5410
5411 /* Determine whether an insn INSN of type ITYPE can fit into slot SLOT
5412 of packet P. */
5413
5414 static int
5415 insn_matches_slot (p, itype, slot, insn)
5416 const struct ia64_packet *p;
5417 enum attr_type itype;
5418 int slot;
5419 rtx insn;
5420 {
5421 enum attr_itanium_requires_unit0 u0;
5422 enum attr_type stype = p->t[slot];
5423
5424 if (insn)
5425 {
5426 u0 = ia64_safe_itanium_requires_unit0 (insn);
5427 if (u0 == ITANIUM_REQUIRES_UNIT0_YES)
5428 {
5429 int i;
5430 for (i = sched_data.first_slot; i < slot; i++)
5431 if (p->t[i] == stype
5432 || (stype == TYPE_F && p->t[i] == TYPE_L)
5433 || (stype == TYPE_I && p->t[i] == TYPE_X))
5434 return 0;
5435 }
5436 if (GET_CODE (insn) == CALL_INSN)
5437 {
5438 /* Reject calls in multiway branch packets. We want to limit
5439 the number of multiway branches we generate (since the branch
5440 predictor is limited), and this seems to work fairly well.
5441 (If we didn't do this, we'd have to add another test here to
5442 force calls into the third slot of the bundle.) */
5443 if (slot < 3)
5444 {
5445 if (p->t[1] == TYPE_B)
5446 return 0;
5447 }
5448 else
5449 {
5450 if (p->t[4] == TYPE_B)
5451 return 0;
5452 }
5453 }
5454 }
5455
5456 if (itype == stype)
5457 return 1;
5458 if (itype == TYPE_A)
5459 return stype == TYPE_M || stype == TYPE_I;
5460 return 0;
5461 }
5462
5463 /* Like emit_insn_before, but skip cycle_display insns. This makes the
5464 assembly output a bit prettier. */
5465
5466 static void
5467 ia64_emit_insn_before (insn, before)
5468 rtx insn, before;
5469 {
5470 rtx prev = PREV_INSN (before);
5471 if (prev && GET_CODE (prev) == INSN
5472 && GET_CODE (PATTERN (prev)) == UNSPEC
5473 && XINT (PATTERN (prev), 1) == 23)
5474 before = prev;
5475 emit_insn_before (insn, before);
5476 }
5477
5478 #if 0
5479 /* Generate a nop insn of the given type. Note we never generate L type
5480 nops. */
5481
5482 static rtx
5483 gen_nop_type (t)
5484 enum attr_type t;
5485 {
5486 switch (t)
5487 {
5488 case TYPE_M:
5489 return gen_nop_m ();
5490 case TYPE_I:
5491 return gen_nop_i ();
5492 case TYPE_B:
5493 return gen_nop_b ();
5494 case TYPE_F:
5495 return gen_nop_f ();
5496 case TYPE_X:
5497 return gen_nop_x ();
5498 default:
5499 abort ();
5500 }
5501 }
5502 #endif
5503
5504 /* When rotating a bundle out of the issue window, insert a bundle selector
5505 insn in front of it. DUMP is the scheduling dump file or NULL. START
5506 is either 0 or 3, depending on whether we want to emit a bundle selector
5507 for the first bundle or the second bundle in the current issue window.
5508
5509 The selector insns are emitted this late because the selected packet can
5510 be changed until parts of it get rotated out. */
5511
5512 static void
5513 finish_last_head (dump, start)
5514 FILE *dump;
5515 int start;
5516 {
5517 const struct ia64_packet *p = sched_data.packet;
5518 const struct bundle *b = start == 0 ? p->t1 : p->t2;
5519 int bundle_type = b - bundle;
5520 rtx insn;
5521 int i;
5522
5523 if (! ia64_final_schedule)
5524 return;
5525
5526 for (i = start; sched_data.insns[i] == 0; i++)
5527 if (i == start + 3)
5528 abort ();
5529 insn = sched_data.insns[i];
5530
5531 if (dump)
5532 fprintf (dump, "// Emitting template before %d: %s\n",
5533 INSN_UID (insn), b->name);
5534
5535 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (bundle_type)), insn);
5536 }
5537
5538 /* We can't schedule more insns this cycle. Fix up the scheduling state
5539 and advance FIRST_SLOT and CUR.
5540 We have to distribute the insns that are currently found between
5541 FIRST_SLOT and CUR into the slots of the packet we have selected. So
5542 far, they are stored successively in the fields starting at FIRST_SLOT;
5543 now they must be moved to the correct slots.
5544 DUMP is the current scheduling dump file, or NULL. */
5545
5546 static void
5547 cycle_end_fill_slots (dump)
5548 FILE *dump;
5549 {
5550 const struct ia64_packet *packet = sched_data.packet;
5551 int slot, i;
5552 enum attr_type tmp_types[6];
5553 rtx tmp_insns[6];
5554
5555 memcpy (tmp_types, sched_data.types, 6 * sizeof (enum attr_type));
5556 memcpy (tmp_insns, sched_data.insns, 6 * sizeof (rtx));
5557
5558 for (i = slot = sched_data.first_slot; i < sched_data.cur; i++)
5559 {
5560 enum attr_type t = tmp_types[i];
5561 if (t != ia64_safe_type (tmp_insns[i]))
5562 abort ();
5563 while (! insn_matches_slot (packet, t, slot, tmp_insns[i]))
5564 {
5565 if (slot > sched_data.split)
5566 abort ();
5567 if (dump)
5568 fprintf (dump, "// Packet needs %s, have %s\n", type_names[packet->t[slot]],
5569 type_names[t]);
5570 sched_data.types[slot] = packet->t[slot];
5571 sched_data.insns[slot] = 0;
5572 sched_data.stopbit[slot] = 0;
5573
5574 /* ??? TYPE_L instructions always fill up two slots, but we don't
5575 support TYPE_L nops. */
5576 if (packet->t[slot] == TYPE_L)
5577 abort ();
5578
5579 slot++;
5580 }
5581 /* Do _not_ use T here. If T == TYPE_A, then we'd risk changing the
5582 actual slot type later. */
5583 sched_data.types[slot] = packet->t[slot];
5584 sched_data.insns[slot] = tmp_insns[i];
5585 sched_data.stopbit[slot] = 0;
5586 slot++;
5587 /* TYPE_L instructions always fill up two slots. */
5588 if (t == TYPE_L)
5589 slot++;
5590 }
5591
5592 /* This isn't right - there's no need to pad out until the forced split;
5593 the CPU will automatically split if an insn isn't ready. */
5594 #if 0
5595 while (slot < sched_data.split)
5596 {
5597 sched_data.types[slot] = packet->t[slot];
5598 sched_data.insns[slot] = 0;
5599 sched_data.stopbit[slot] = 0;
5600 slot++;
5601 }
5602 #endif
5603
5604 sched_data.first_slot = sched_data.cur = slot;
5605 }
5606
5607 /* Bundle rotations, as described in the Itanium optimization manual.
5608 We can rotate either one or both bundles out of the issue window.
5609 DUMP is the current scheduling dump file, or NULL. */
5610
5611 static void
5612 rotate_one_bundle (dump)
5613 FILE *dump;
5614 {
5615 if (dump)
5616 fprintf (dump, "// Rotating one bundle.\n");
5617
5618 finish_last_head (dump, 0);
5619 if (sched_data.cur > 3)
5620 {
5621 sched_data.cur -= 3;
5622 sched_data.first_slot -= 3;
5623 memmove (sched_data.types,
5624 sched_data.types + 3,
5625 sched_data.cur * sizeof *sched_data.types);
5626 memmove (sched_data.stopbit,
5627 sched_data.stopbit + 3,
5628 sched_data.cur * sizeof *sched_data.stopbit);
5629 memmove (sched_data.insns,
5630 sched_data.insns + 3,
5631 sched_data.cur * sizeof *sched_data.insns);
5632 }
5633 else
5634 {
5635 sched_data.cur = 0;
5636 sched_data.first_slot = 0;
5637 }
5638 }
5639
5640 static void
5641 rotate_two_bundles (dump)
5642 FILE *dump;
5643 {
5644 if (dump)
5645 fprintf (dump, "// Rotating two bundles.\n");
5646
5647 if (sched_data.cur == 0)
5648 return;
5649
5650 finish_last_head (dump, 0);
5651 if (sched_data.cur > 3)
5652 finish_last_head (dump, 3);
5653 sched_data.cur = 0;
5654 sched_data.first_slot = 0;
5655 }
5656
5657 /* We're beginning a new block. Initialize data structures as necessary. */
5658
5659 static void
5660 ia64_sched_init (dump, sched_verbose, max_ready)
5661 FILE *dump ATTRIBUTE_UNUSED;
5662 int sched_verbose ATTRIBUTE_UNUSED;
5663 int max_ready;
5664 {
5665 static int initialized = 0;
5666
5667 if (! initialized)
5668 {
5669 int b1, b2, i;
5670
5671 initialized = 1;
5672
5673 for (i = b1 = 0; b1 < NR_BUNDLES; b1++)
5674 {
5675 const struct bundle *t1 = bundle + b1;
5676 for (b2 = 0; b2 < NR_BUNDLES; b2++, i++)
5677 {
5678 const struct bundle *t2 = bundle + b2;
5679
5680 packets[i].t1 = t1;
5681 packets[i].t2 = t2;
5682 }
5683 }
5684 for (i = 0; i < NR_PACKETS; i++)
5685 {
5686 int j;
5687 for (j = 0; j < 3; j++)
5688 packets[i].t[j] = packets[i].t1->t[j];
5689 for (j = 0; j < 3; j++)
5690 packets[i].t[j + 3] = packets[i].t2->t[j];
5691 packets[i].first_split = itanium_split_issue (packets + i, 0);
5692 }
5693
5694 }
5695
5696 init_insn_group_barriers ();
5697
5698 memset (&sched_data, 0, sizeof sched_data);
5699 sched_types = (enum attr_type *) xmalloc (max_ready
5700 * sizeof (enum attr_type));
5701 sched_ready = (rtx *) xmalloc (max_ready * sizeof (rtx));
5702 }
5703
5704 /* See if the packet P can match the insns we have already scheduled. Return
5705 nonzero if so. In *PSLOT, we store the first slot that is available for
5706 more instructions if we choose this packet.
5707 SPLIT holds the last slot we can use, there's a split issue after it so
5708 scheduling beyond it would cause us to use more than one cycle. */
5709
5710 static int
5711 packet_matches_p (p, split, pslot)
5712 const struct ia64_packet *p;
5713 int split;
5714 int *pslot;
5715 {
5716 int filled = sched_data.cur;
5717 int first = sched_data.first_slot;
5718 int i, slot;
5719
5720 /* First, check if the first of the two bundles must be a specific one (due
5721 to stop bits). */
5722 if (first > 0 && sched_data.stopbit[0] && p->t1->possible_stop != 1)
5723 return 0;
5724 if (first > 1 && sched_data.stopbit[1] && p->t1->possible_stop != 2)
5725 return 0;
5726
5727 for (i = 0; i < first; i++)
5728 if (! insn_matches_slot (p, sched_data.types[i], i,
5729 sched_data.insns[i]))
5730 return 0;
5731 for (i = slot = first; i < filled; i++)
5732 {
5733 while (slot < split)
5734 {
5735 if (insn_matches_slot (p, sched_data.types[i], slot,
5736 sched_data.insns[i]))
5737 break;
5738 slot++;
5739 }
5740 if (slot == split)
5741 return 0;
5742 slot++;
5743 }
5744
5745 if (pslot)
5746 *pslot = slot;
5747 return 1;
5748 }
5749
5750 /* A frontend for itanium_split_issue. For a packet P and a slot
5751 number FIRST that describes the start of the current clock cycle,
5752 return the slot number of the first split issue. This function
5753 uses the cached number found in P if possible. */
5754
5755 static int
5756 get_split (p, first)
5757 const struct ia64_packet *p;
5758 int first;
5759 {
5760 if (first == 0)
5761 return p->first_split;
5762 return itanium_split_issue (p, first);
5763 }
5764
5765 /* Given N_READY insns in the array READY, whose types are found in the
5766 corresponding array TYPES, return the insn that is best suited to be
5767 scheduled in slot SLOT of packet P. */
5768
5769 static int
5770 find_best_insn (ready, types, n_ready, p, slot)
5771 rtx *ready;
5772 enum attr_type *types;
5773 int n_ready;
5774 const struct ia64_packet *p;
5775 int slot;
5776 {
5777 int best = -1;
5778 int best_pri = 0;
5779 while (n_ready-- > 0)
5780 {
5781 rtx insn = ready[n_ready];
5782 if (! insn)
5783 continue;
5784 if (best >= 0 && INSN_PRIORITY (ready[n_ready]) < best_pri)
5785 break;
5786 /* If we have equally good insns, one of which has a stricter
5787 slot requirement, prefer the one with the stricter requirement. */
5788 if (best >= 0 && types[n_ready] == TYPE_A)
5789 continue;
5790 if (insn_matches_slot (p, types[n_ready], slot, insn))
5791 {
5792 best = n_ready;
5793 best_pri = INSN_PRIORITY (ready[best]);
5794
5795 /* If there's no way we could get a stricter requirement, stop
5796 looking now. */
5797 if (types[n_ready] != TYPE_A
5798 && ia64_safe_itanium_requires_unit0 (ready[n_ready]))
5799 break;
5800 break;
5801 }
5802 }
5803 return best;
5804 }
5805
5806 /* Select the best packet to use given the current scheduler state and the
5807 current ready list.
5808 READY is an array holding N_READY ready insns; TYPES is a corresponding
5809 array that holds their types. Store the best packet in *PPACKET and the
5810 number of insns that can be scheduled in the current cycle in *PBEST. */
5811
5812 static void
5813 find_best_packet (pbest, ppacket, ready, types, n_ready)
5814 int *pbest;
5815 const struct ia64_packet **ppacket;
5816 rtx *ready;
5817 enum attr_type *types;
5818 int n_ready;
5819 {
5820 int first = sched_data.first_slot;
5821 int best = 0;
5822 int lowest_end = 6;
5823 const struct ia64_packet *best_packet = NULL;
5824 int i;
5825
5826 for (i = 0; i < NR_PACKETS; i++)
5827 {
5828 const struct ia64_packet *p = packets + i;
5829 int slot;
5830 int split = get_split (p, first);
5831 int win = 0;
5832 int first_slot, last_slot;
5833 int b_nops = 0;
5834
5835 if (! packet_matches_p (p, split, &first_slot))
5836 continue;
5837
5838 memcpy (sched_ready, ready, n_ready * sizeof (rtx));
5839
5840 win = 0;
5841 last_slot = 6;
5842 for (slot = first_slot; slot < split; slot++)
5843 {
5844 int insn_nr;
5845
5846 /* Disallow a degenerate case where the first bundle doesn't
5847 contain anything but NOPs! */
5848 if (first_slot == 0 && win == 0 && slot == 3)
5849 {
5850 win = -1;
5851 break;
5852 }
5853
5854 insn_nr = find_best_insn (sched_ready, types, n_ready, p, slot);
5855 if (insn_nr >= 0)
5856 {
5857 sched_ready[insn_nr] = 0;
5858 last_slot = slot;
5859 win++;
5860 }
5861 else if (p->t[slot] == TYPE_B)
5862 b_nops++;
5863 }
5864 /* We must disallow MBB/BBB packets if any of their B slots would be
5865 filled with nops. */
5866 if (last_slot < 3)
5867 {
5868 if (p->t[1] == TYPE_B && (b_nops || last_slot < 2))
5869 win = -1;
5870 }
5871 else
5872 {
5873 if (p->t[4] == TYPE_B && (b_nops || last_slot < 5))
5874 win = -1;
5875 }
5876
5877 if (win > best
5878 || (win == best && last_slot < lowest_end))
5879 {
5880 best = win;
5881 lowest_end = last_slot;
5882 best_packet = p;
5883 }
5884 }
5885 *pbest = best;
5886 *ppacket = best_packet;
5887 }
5888
5889 /* Reorder the ready list so that the insns that can be issued in this cycle
5890 are found in the correct order at the end of the list.
5891 DUMP is the scheduling dump file, or NULL. READY points to the start,
5892 E_READY to the end of the ready list. MAY_FAIL determines what should be
5893 done if no insns can be scheduled in this cycle: if it is zero, we abort,
5894 otherwise we return 0.
5895 Return 1 if any insns can be scheduled in this cycle. */
5896
5897 static int
5898 itanium_reorder (dump, ready, e_ready, may_fail)
5899 FILE *dump;
5900 rtx *ready;
5901 rtx *e_ready;
5902 int may_fail;
5903 {
5904 const struct ia64_packet *best_packet;
5905 int n_ready = e_ready - ready;
5906 int first = sched_data.first_slot;
5907 int i, best, best_split, filled;
5908
5909 for (i = 0; i < n_ready; i++)
5910 sched_types[i] = ia64_safe_type (ready[i]);
5911
5912 find_best_packet (&best, &best_packet, ready, sched_types, n_ready);
5913
5914 if (best == 0)
5915 {
5916 if (may_fail)
5917 return 0;
5918 abort ();
5919 }
5920
5921 if (dump)
5922 {
5923 fprintf (dump, "// Selected bundles: %s %s (%d insns)\n",
5924 best_packet->t1->name,
5925 best_packet->t2 ? best_packet->t2->name : NULL, best);
5926 }
5927
5928 best_split = itanium_split_issue (best_packet, first);
5929 packet_matches_p (best_packet, best_split, &filled);
5930
5931 for (i = filled; i < best_split; i++)
5932 {
5933 int insn_nr;
5934
5935 insn_nr = find_best_insn (ready, sched_types, n_ready, best_packet, i);
5936 if (insn_nr >= 0)
5937 {
5938 rtx insn = ready[insn_nr];
5939 memmove (ready + insn_nr, ready + insn_nr + 1,
5940 (n_ready - insn_nr - 1) * sizeof (rtx));
5941 memmove (sched_types + insn_nr, sched_types + insn_nr + 1,
5942 (n_ready - insn_nr - 1) * sizeof (enum attr_type));
5943 ready[--n_ready] = insn;
5944 }
5945 }
5946
5947 sched_data.packet = best_packet;
5948 sched_data.split = best_split;
5949 return 1;
5950 }
5951
5952 /* Dump information about the current scheduling state to file DUMP. */
5953
5954 static void
5955 dump_current_packet (dump)
5956 FILE *dump;
5957 {
5958 int i;
5959 fprintf (dump, "// %d slots filled:", sched_data.cur);
5960 for (i = 0; i < sched_data.first_slot; i++)
5961 {
5962 rtx insn = sched_data.insns[i];
5963 fprintf (dump, " %s", type_names[sched_data.types[i]]);
5964 if (insn)
5965 fprintf (dump, "/%s", type_names[ia64_safe_type (insn)]);
5966 if (sched_data.stopbit[i])
5967 fprintf (dump, " ;;");
5968 }
5969 fprintf (dump, " :::");
5970 for (i = sched_data.first_slot; i < sched_data.cur; i++)
5971 {
5972 rtx insn = sched_data.insns[i];
5973 enum attr_type t = ia64_safe_type (insn);
5974 fprintf (dump, " (%d) %s", INSN_UID (insn), type_names[t]);
5975 }
5976 fprintf (dump, "\n");
5977 }
5978
5979 /* Schedule a stop bit. DUMP is the current scheduling dump file, or
5980 NULL. */
5981
5982 static void
5983 schedule_stop (dump)
5984 FILE *dump;
5985 {
5986 const struct ia64_packet *best = sched_data.packet;
5987 int i;
5988 int best_stop = 6;
5989
5990 if (dump)
5991 fprintf (dump, "// Stop bit, cur = %d.\n", sched_data.cur);
5992
5993 if (sched_data.cur == 0)
5994 {
5995 if (dump)
5996 fprintf (dump, "// At start of bundle, so nothing to do.\n");
5997
5998 rotate_two_bundles (NULL);
5999 return;
6000 }
6001
6002 for (i = -1; i < NR_PACKETS; i++)
6003 {
6004 /* This is a slight hack to give the current packet the first chance.
6005 This is done to avoid e.g. switching from MIB to MBB bundles. */
6006 const struct ia64_packet *p = (i >= 0 ? packets + i : sched_data.packet);
6007 int split = get_split (p, sched_data.first_slot);
6008 const struct bundle *compare;
6009 int next, stoppos;
6010
6011 if (! packet_matches_p (p, split, &next))
6012 continue;
6013
6014 compare = next > 3 ? p->t2 : p->t1;
6015
6016 stoppos = 3;
6017 if (compare->possible_stop)
6018 stoppos = compare->possible_stop;
6019 if (next > 3)
6020 stoppos += 3;
6021
6022 if (stoppos < next || stoppos >= best_stop)
6023 {
6024 if (compare->possible_stop == 0)
6025 continue;
6026 stoppos = (next > 3 ? 6 : 3);
6027 }
6028 if (stoppos < next || stoppos >= best_stop)
6029 continue;
6030
6031 if (dump)
6032 fprintf (dump, "// switching from %s %s to %s %s (stop at %d)\n",
6033 best->t1->name, best->t2->name, p->t1->name, p->t2->name,
6034 stoppos);
6035
6036 best_stop = stoppos;
6037 best = p;
6038 }
6039
6040 sched_data.packet = best;
6041 cycle_end_fill_slots (dump);
6042 while (sched_data.cur < best_stop)
6043 {
6044 sched_data.types[sched_data.cur] = best->t[sched_data.cur];
6045 sched_data.insns[sched_data.cur] = 0;
6046 sched_data.stopbit[sched_data.cur] = 0;
6047 sched_data.cur++;
6048 }
6049 sched_data.stopbit[sched_data.cur - 1] = 1;
6050 sched_data.first_slot = best_stop;
6051
6052 if (dump)
6053 dump_current_packet (dump);
6054 }
6055
6056 /* If necessary, perform one or two rotations on the scheduling state.
6057 This should only be called if we are starting a new cycle. */
6058
6059 static void
6060 maybe_rotate (dump)
6061 FILE *dump;
6062 {
6063 if (sched_data.cur == 6)
6064 rotate_two_bundles (dump);
6065 else if (sched_data.cur >= 3)
6066 rotate_one_bundle (dump);
6067 sched_data.first_slot = sched_data.cur;
6068 }
6069
6070 /* The clock cycle when ia64_sched_reorder was last called. */
6071 static int prev_cycle;
6072
6073 /* The first insn scheduled in the previous cycle. This is the saved
6074 value of sched_data.first_slot. */
6075 static int prev_first;
6076
6077 /* The last insn that has been scheduled. At the start of a new cycle
6078 we know that we can emit new insns after it; the main scheduling code
6079 has already emitted a cycle_display insn after it and is using that
6080 as its current last insn. */
6081 static rtx last_issued;
6082
6083 /* Emit NOPs to fill the delay between PREV_CYCLE and CLOCK_VAR. Used to
6084 pad out the delay between MM (shifts, etc.) and integer operations. */
6085
6086 static void
6087 nop_cycles_until (clock_var, dump)
6088 int clock_var;
6089 FILE *dump;
6090 {
6091 int prev_clock = prev_cycle;
6092 int cycles_left = clock_var - prev_clock;
6093
6094 /* Finish the previous cycle; pad it out with NOPs. */
6095 if (sched_data.cur == 3)
6096 {
6097 rtx t = gen_insn_group_barrier (GEN_INT (3));
6098 last_issued = emit_insn_after (t, last_issued);
6099 maybe_rotate (dump);
6100 }
6101 else if (sched_data.cur > 0)
6102 {
6103 int need_stop = 0;
6104 int split = itanium_split_issue (sched_data.packet, prev_first);
6105
6106 if (sched_data.cur < 3 && split > 3)
6107 {
6108 split = 3;
6109 need_stop = 1;
6110 }
6111
6112 if (split > sched_data.cur)
6113 {
6114 int i;
6115 for (i = sched_data.cur; i < split; i++)
6116 {
6117 rtx t;
6118
6119 t = gen_nop_type (sched_data.packet->t[i]);
6120 last_issued = emit_insn_after (t, last_issued);
6121 sched_data.types[i] = sched_data.packet->t[sched_data.cur];
6122 sched_data.insns[i] = last_issued;
6123 sched_data.stopbit[i] = 0;
6124 }
6125 sched_data.cur = split;
6126 }
6127
6128 if (! need_stop && sched_data.cur > 0 && sched_data.cur < 6
6129 && cycles_left > 1)
6130 {
6131 int i;
6132 for (i = sched_data.cur; i < 6; i++)
6133 {
6134 rtx t;
6135
6136 t = gen_nop_type (sched_data.packet->t[i]);
6137 last_issued = emit_insn_after (t, last_issued);
6138 sched_data.types[i] = sched_data.packet->t[sched_data.cur];
6139 sched_data.insns[i] = last_issued;
6140 sched_data.stopbit[i] = 0;
6141 }
6142 sched_data.cur = 6;
6143 cycles_left--;
6144 need_stop = 1;
6145 }
6146
6147 if (need_stop || sched_data.cur == 6)
6148 {
6149 rtx t = gen_insn_group_barrier (GEN_INT (3));
6150 last_issued = emit_insn_after (t, last_issued);
6151 }
6152 maybe_rotate (dump);
6153 }
6154
6155 cycles_left--;
6156 while (cycles_left > 0)
6157 {
6158 rtx t = gen_bundle_selector (GEN_INT (0));
6159 last_issued = emit_insn_after (t, last_issued);
6160 t = gen_nop_type (TYPE_M);
6161 last_issued = emit_insn_after (t, last_issued);
6162 t = gen_nop_type (TYPE_I);
6163 last_issued = emit_insn_after (t, last_issued);
6164 if (cycles_left > 1)
6165 {
6166 t = gen_insn_group_barrier (GEN_INT (2));
6167 last_issued = emit_insn_after (t, last_issued);
6168 cycles_left--;
6169 }
6170 t = gen_nop_type (TYPE_I);
6171 last_issued = emit_insn_after (t, last_issued);
6172 t = gen_insn_group_barrier (GEN_INT (3));
6173 last_issued = emit_insn_after (t, last_issued);
6174 cycles_left--;
6175 }
6176 }
6177
6178 /* We are about to being issuing insns for this clock cycle.
6179 Override the default sort algorithm to better slot instructions. */
6180
6181 static int
6182 ia64_internal_sched_reorder (dump, sched_verbose, ready, pn_ready,
6183 reorder_type, clock_var)
6184 FILE *dump ATTRIBUTE_UNUSED;
6185 int sched_verbose ATTRIBUTE_UNUSED;
6186 rtx *ready;
6187 int *pn_ready;
6188 int reorder_type, clock_var;
6189 {
6190 int n_asms;
6191 int n_ready = *pn_ready;
6192 rtx *e_ready = ready + n_ready;
6193 rtx *insnp;
6194
6195 if (sched_verbose)
6196 {
6197 fprintf (dump, "// ia64_sched_reorder (type %d):\n", reorder_type);
6198 dump_current_packet (dump);
6199 }
6200
6201 if (reorder_type == 0 && clock_var > 0 && ia64_final_schedule)
6202 {
6203 for (insnp = ready; insnp < e_ready; insnp++)
6204 {
6205 rtx insn = *insnp;
6206 enum attr_itanium_class t = ia64_safe_itanium_class (insn);
6207 if (t == ITANIUM_CLASS_IALU || t == ITANIUM_CLASS_ISHF
6208 || t == ITANIUM_CLASS_ILOG
6209 || t == ITANIUM_CLASS_LD || t == ITANIUM_CLASS_ST)
6210 {
6211 rtx link;
6212 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6213 if (REG_NOTE_KIND (link) != REG_DEP_OUTPUT
6214 && REG_NOTE_KIND (link) != REG_DEP_ANTI)
6215 {
6216 rtx other = XEXP (link, 0);
6217 enum attr_itanium_class t0 = ia64_safe_itanium_class (other);
6218 if (t0 == ITANIUM_CLASS_MMSHF
6219 || t0 == ITANIUM_CLASS_MMMUL)
6220 {
6221 nop_cycles_until (clock_var, sched_verbose ? dump : NULL);
6222 goto out;
6223 }
6224 }
6225 }
6226 }
6227 }
6228 out:
6229
6230 prev_first = sched_data.first_slot;
6231 prev_cycle = clock_var;
6232
6233 if (reorder_type == 0)
6234 maybe_rotate (sched_verbose ? dump : NULL);
6235
6236 /* First, move all USEs, CLOBBERs and other crud out of the way. */
6237 n_asms = 0;
6238 for (insnp = ready; insnp < e_ready; insnp++)
6239 if (insnp < e_ready)
6240 {
6241 rtx insn = *insnp;
6242 enum attr_type t = ia64_safe_type (insn);
6243 if (t == TYPE_UNKNOWN)
6244 {
6245 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6246 || asm_noperands (PATTERN (insn)) >= 0)
6247 {
6248 rtx lowest = ready[n_asms];
6249 ready[n_asms] = insn;
6250 *insnp = lowest;
6251 n_asms++;
6252 }
6253 else
6254 {
6255 rtx highest = ready[n_ready - 1];
6256 ready[n_ready - 1] = insn;
6257 *insnp = highest;
6258 if (ia64_final_schedule && group_barrier_needed_p (insn))
6259 {
6260 schedule_stop (sched_verbose ? dump : NULL);
6261 sched_data.last_was_stop = 1;
6262 maybe_rotate (sched_verbose ? dump : NULL);
6263 }
6264
6265 return 1;
6266 }
6267 }
6268 }
6269 if (n_asms < n_ready)
6270 {
6271 /* Some normal insns to process. Skip the asms. */
6272 ready += n_asms;
6273 n_ready -= n_asms;
6274 }
6275 else if (n_ready > 0)
6276 {
6277 /* Only asm insns left. */
6278 if (ia64_final_schedule && group_barrier_needed_p (ready[n_ready - 1]))
6279 {
6280 schedule_stop (sched_verbose ? dump : NULL);
6281 sched_data.last_was_stop = 1;
6282 maybe_rotate (sched_verbose ? dump : NULL);
6283 }
6284 cycle_end_fill_slots (sched_verbose ? dump : NULL);
6285 return 1;
6286 }
6287
6288 if (ia64_final_schedule)
6289 {
6290 int nr_need_stop = 0;
6291
6292 for (insnp = ready; insnp < e_ready; insnp++)
6293 if (safe_group_barrier_needed_p (*insnp))
6294 nr_need_stop++;
6295
6296 /* Schedule a stop bit if
6297 - all insns require a stop bit, or
6298 - we are starting a new cycle and _any_ insns require a stop bit.
6299 The reason for the latter is that if our schedule is accurate, then
6300 the additional stop won't decrease performance at this point (since
6301 there's a split issue at this point anyway), but it gives us more
6302 freedom when scheduling the currently ready insns. */
6303 if ((reorder_type == 0 && nr_need_stop)
6304 || (reorder_type == 1 && n_ready == nr_need_stop))
6305 {
6306 schedule_stop (sched_verbose ? dump : NULL);
6307 sched_data.last_was_stop = 1;
6308 maybe_rotate (sched_verbose ? dump : NULL);
6309 if (reorder_type == 1)
6310 return 0;
6311 }
6312 else
6313 {
6314 int deleted = 0;
6315 insnp = e_ready;
6316 /* Move down everything that needs a stop bit, preserving relative
6317 order. */
6318 while (insnp-- > ready + deleted)
6319 while (insnp >= ready + deleted)
6320 {
6321 rtx insn = *insnp;
6322 if (! safe_group_barrier_needed_p (insn))
6323 break;
6324 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6325 *ready = insn;
6326 deleted++;
6327 }
6328 n_ready -= deleted;
6329 ready += deleted;
6330 if (deleted != nr_need_stop)
6331 abort ();
6332 }
6333 }
6334
6335 return itanium_reorder (sched_verbose ? dump : NULL,
6336 ready, e_ready, reorder_type == 1);
6337 }
6338
6339 static int
6340 ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, clock_var)
6341 FILE *dump;
6342 int sched_verbose;
6343 rtx *ready;
6344 int *pn_ready;
6345 int clock_var;
6346 {
6347 return ia64_internal_sched_reorder (dump, sched_verbose, ready,
6348 pn_ready, 0, clock_var);
6349 }
6350
6351 /* Like ia64_sched_reorder, but called after issuing each insn.
6352 Override the default sort algorithm to better slot instructions. */
6353
6354 static int
6355 ia64_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var)
6356 FILE *dump ATTRIBUTE_UNUSED;
6357 int sched_verbose ATTRIBUTE_UNUSED;
6358 rtx *ready;
6359 int *pn_ready;
6360 int clock_var;
6361 {
6362 if (sched_data.last_was_stop)
6363 return 0;
6364
6365 /* Detect one special case and try to optimize it.
6366 If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs,
6367 then we can get better code by transforming this to 1.MFB;; 2.MIx. */
6368 if (sched_data.first_slot == 1
6369 && sched_data.stopbit[0]
6370 && ((sched_data.cur == 4
6371 && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A)
6372 && (sched_data.types[2] == TYPE_I || sched_data.types[2] == TYPE_A)
6373 && (sched_data.types[3] != TYPE_M && sched_data.types[3] != TYPE_A))
6374 || (sched_data.cur == 3
6375 && (sched_data.types[1] == TYPE_M
6376 || sched_data.types[1] == TYPE_A)
6377 && (sched_data.types[2] != TYPE_M
6378 && sched_data.types[2] != TYPE_I
6379 && sched_data.types[2] != TYPE_A))))
6380
6381 {
6382 int i, best;
6383 rtx stop = sched_data.insns[1];
6384
6385 /* Search backward for the stop bit that must be there. */
6386 while (1)
6387 {
6388 int insn_code;
6389
6390 stop = PREV_INSN (stop);
6391 if (GET_CODE (stop) != INSN)
6392 abort ();
6393 insn_code = recog_memoized (stop);
6394
6395 /* Ignore cycle displays and .pred.rel.mutex. */
6396 if (insn_code == CODE_FOR_cycle_display
6397 || insn_code == CODE_FOR_pred_rel_mutex
6398 || insn_code == CODE_FOR_prologue_use)
6399 continue;
6400
6401 if (insn_code == CODE_FOR_insn_group_barrier)
6402 break;
6403 abort ();
6404 }
6405
6406 /* Adjust the stop bit's slot selector. */
6407 if (INTVAL (XVECEXP (PATTERN (stop), 0, 0)) != 1)
6408 abort ();
6409 XVECEXP (PATTERN (stop), 0, 0) = GEN_INT (3);
6410
6411 sched_data.stopbit[0] = 0;
6412 sched_data.stopbit[2] = 1;
6413
6414 sched_data.types[5] = sched_data.types[3];
6415 sched_data.types[4] = sched_data.types[2];
6416 sched_data.types[3] = sched_data.types[1];
6417 sched_data.insns[5] = sched_data.insns[3];
6418 sched_data.insns[4] = sched_data.insns[2];
6419 sched_data.insns[3] = sched_data.insns[1];
6420 sched_data.stopbit[5] = sched_data.stopbit[4] = sched_data.stopbit[3] = 0;
6421 sched_data.cur += 2;
6422 sched_data.first_slot = 3;
6423 for (i = 0; i < NR_PACKETS; i++)
6424 {
6425 const struct ia64_packet *p = packets + i;
6426 if (p->t[0] == TYPE_M && p->t[1] == TYPE_F && p->t[2] == TYPE_B)
6427 {
6428 sched_data.packet = p;
6429 break;
6430 }
6431 }
6432 rotate_one_bundle (sched_verbose ? dump : NULL);
6433
6434 best = 6;
6435 for (i = 0; i < NR_PACKETS; i++)
6436 {
6437 const struct ia64_packet *p = packets + i;
6438 int split = get_split (p, sched_data.first_slot);
6439 int next;
6440
6441 /* Disallow multiway branches here. */
6442 if (p->t[1] == TYPE_B)
6443 continue;
6444
6445 if (packet_matches_p (p, split, &next) && next < best)
6446 {
6447 best = next;
6448 sched_data.packet = p;
6449 sched_data.split = split;
6450 }
6451 }
6452 if (best == 6)
6453 abort ();
6454 }
6455
6456 if (*pn_ready > 0)
6457 {
6458 int more = ia64_internal_sched_reorder (dump, sched_verbose,
6459 ready, pn_ready, 1,
6460 clock_var);
6461 if (more)
6462 return more;
6463 /* Did we schedule a stop? If so, finish this cycle. */
6464 if (sched_data.cur == sched_data.first_slot)
6465 return 0;
6466 }
6467
6468 if (sched_verbose)
6469 fprintf (dump, "// Can't issue more this cycle; updating type array.\n");
6470
6471 cycle_end_fill_slots (sched_verbose ? dump : NULL);
6472 if (sched_verbose)
6473 dump_current_packet (dump);
6474 return 0;
6475 }
6476
6477 /* We are about to issue INSN. Return the number of insns left on the
6478 ready queue that can be issued this cycle. */
6479
6480 static int
6481 ia64_variable_issue (dump, sched_verbose, insn, can_issue_more)
6482 FILE *dump;
6483 int sched_verbose;
6484 rtx insn;
6485 int can_issue_more ATTRIBUTE_UNUSED;
6486 {
6487 enum attr_type t = ia64_safe_type (insn);
6488
6489 last_issued = insn;
6490
6491 if (sched_data.last_was_stop)
6492 {
6493 int t = sched_data.first_slot;
6494 if (t == 0)
6495 t = 3;
6496 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (t)), insn);
6497 init_insn_group_barriers ();
6498 sched_data.last_was_stop = 0;
6499 }
6500
6501 if (t == TYPE_UNKNOWN)
6502 {
6503 if (sched_verbose)
6504 fprintf (dump, "// Ignoring type %s\n", type_names[t]);
6505 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6506 || asm_noperands (PATTERN (insn)) >= 0)
6507 {
6508 /* This must be some kind of asm. Clear the scheduling state. */
6509 rotate_two_bundles (sched_verbose ? dump : NULL);
6510 if (ia64_final_schedule)
6511 group_barrier_needed_p (insn);
6512 }
6513 return 1;
6514 }
6515
6516 /* This is _not_ just a sanity check. group_barrier_needed_p will update
6517 important state info. Don't delete this test. */
6518 if (ia64_final_schedule
6519 && group_barrier_needed_p (insn))
6520 abort ();
6521
6522 sched_data.stopbit[sched_data.cur] = 0;
6523 sched_data.insns[sched_data.cur] = insn;
6524 sched_data.types[sched_data.cur] = t;
6525
6526 sched_data.cur++;
6527 if (sched_verbose)
6528 fprintf (dump, "// Scheduling insn %d of type %s\n",
6529 INSN_UID (insn), type_names[t]);
6530
6531 if (GET_CODE (insn) == CALL_INSN && ia64_final_schedule)
6532 {
6533 schedule_stop (sched_verbose ? dump : NULL);
6534 sched_data.last_was_stop = 1;
6535 }
6536
6537 return 1;
6538 }
6539
6540 /* Free data allocated by ia64_sched_init. */
6541
6542 static void
6543 ia64_sched_finish (dump, sched_verbose)
6544 FILE *dump;
6545 int sched_verbose;
6546 {
6547 if (sched_verbose)
6548 fprintf (dump, "// Finishing schedule.\n");
6549 rotate_two_bundles (NULL);
6550 free (sched_types);
6551 free (sched_ready);
6552 }
6553
6554 static rtx
6555 ia64_cycle_display (clock, last)
6556 int clock;
6557 rtx last;
6558 {
6559 if (ia64_final_schedule)
6560 return emit_insn_after (gen_cycle_display (GEN_INT (clock)), last);
6561 else
6562 return last;
6563 }
6564 \f
6565 /* Emit pseudo-ops for the assembler to describe predicate relations.
6566 At present this assumes that we only consider predicate pairs to
6567 be mutex, and that the assembler can deduce proper values from
6568 straight-line code. */
6569
6570 static void
6571 emit_predicate_relation_info ()
6572 {
6573 int i;
6574
6575 for (i = n_basic_blocks - 1; i >= 0; --i)
6576 {
6577 basic_block bb = BASIC_BLOCK (i);
6578 int r;
6579 rtx head = bb->head;
6580
6581 /* We only need such notes at code labels. */
6582 if (GET_CODE (head) != CODE_LABEL)
6583 continue;
6584 if (GET_CODE (NEXT_INSN (head)) == NOTE
6585 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
6586 head = NEXT_INSN (head);
6587
6588 for (r = PR_REG (0); r < PR_REG (64); r += 2)
6589 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
6590 {
6591 rtx p = gen_rtx_REG (BImode, r);
6592 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
6593 if (head == bb->end)
6594 bb->end = n;
6595 head = n;
6596 }
6597 }
6598
6599 /* Look for conditional calls that do not return, and protect predicate
6600 relations around them. Otherwise the assembler will assume the call
6601 returns, and complain about uses of call-clobbered predicates after
6602 the call. */
6603 for (i = n_basic_blocks - 1; i >= 0; --i)
6604 {
6605 basic_block bb = BASIC_BLOCK (i);
6606 rtx insn = bb->head;
6607
6608 while (1)
6609 {
6610 if (GET_CODE (insn) == CALL_INSN
6611 && GET_CODE (PATTERN (insn)) == COND_EXEC
6612 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
6613 {
6614 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
6615 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
6616 if (bb->head == insn)
6617 bb->head = b;
6618 if (bb->end == insn)
6619 bb->end = a;
6620 }
6621
6622 if (insn == bb->end)
6623 break;
6624 insn = NEXT_INSN (insn);
6625 }
6626 }
6627 }
6628
6629 /* Generate a NOP instruction of type T. We will never generate L type
6630 nops. */
6631
6632 static rtx
6633 gen_nop_type (t)
6634 enum attr_type t;
6635 {
6636 switch (t)
6637 {
6638 case TYPE_M:
6639 return gen_nop_m ();
6640 case TYPE_I:
6641 return gen_nop_i ();
6642 case TYPE_B:
6643 return gen_nop_b ();
6644 case TYPE_F:
6645 return gen_nop_f ();
6646 case TYPE_X:
6647 return gen_nop_x ();
6648 default:
6649 abort ();
6650 }
6651 }
6652
6653 /* After the last scheduling pass, fill in NOPs. It's easier to do this
6654 here than while scheduling. */
6655
6656 static void
6657 ia64_emit_nops ()
6658 {
6659 rtx insn;
6660 const struct bundle *b = 0;
6661 int bundle_pos = 0;
6662
6663 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6664 {
6665 rtx pat;
6666 enum attr_type t;
6667 pat = INSN_P (insn) ? PATTERN (insn) : const0_rtx;
6668 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
6669 continue;
6670 if ((GET_CODE (pat) == UNSPEC && XINT (pat, 1) == 22)
6671 || GET_CODE (insn) == CODE_LABEL)
6672 {
6673 if (b)
6674 while (bundle_pos < 3)
6675 {
6676 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6677 bundle_pos++;
6678 }
6679 if (GET_CODE (insn) != CODE_LABEL)
6680 b = bundle + INTVAL (XVECEXP (pat, 0, 0));
6681 else
6682 b = 0;
6683 bundle_pos = 0;
6684 continue;
6685 }
6686 else if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == 2)
6687 {
6688 int t = INTVAL (XVECEXP (pat, 0, 0));
6689 if (b)
6690 while (bundle_pos < t)
6691 {
6692 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6693 bundle_pos++;
6694 }
6695 continue;
6696 }
6697
6698 if (bundle_pos == 3)
6699 b = 0;
6700
6701 if (b && INSN_P (insn))
6702 {
6703 t = ia64_safe_type (insn);
6704 if (asm_noperands (PATTERN (insn)) >= 0
6705 || GET_CODE (PATTERN (insn)) == ASM_INPUT)
6706 {
6707 while (bundle_pos < 3)
6708 {
6709 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6710 bundle_pos++;
6711 }
6712 continue;
6713 }
6714
6715 if (t == TYPE_UNKNOWN)
6716 continue;
6717 while (bundle_pos < 3)
6718 {
6719 if (t == b->t[bundle_pos]
6720 || (t == TYPE_A && (b->t[bundle_pos] == TYPE_M
6721 || b->t[bundle_pos] == TYPE_I)))
6722 break;
6723
6724 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6725 bundle_pos++;
6726 }
6727 if (bundle_pos < 3)
6728 bundle_pos++;
6729 }
6730 }
6731 }
6732
6733 /* Perform machine dependent operations on the rtl chain INSNS. */
6734
6735 void
6736 ia64_reorg (insns)
6737 rtx insns;
6738 {
6739 /* If optimizing, we'll have split before scheduling. */
6740 if (optimize == 0)
6741 split_all_insns_noflow ();
6742
6743 /* Make sure the CFG and global_live_at_start are correct
6744 for emit_predicate_relation_info. */
6745 find_basic_blocks (insns, max_reg_num (), NULL);
6746 life_analysis (insns, NULL, PROP_DEATH_NOTES);
6747
6748 if (ia64_flag_schedule_insns2)
6749 {
6750 timevar_push (TV_SCHED2);
6751 ia64_final_schedule = 1;
6752 schedule_ebbs (rtl_dump_file);
6753 ia64_final_schedule = 0;
6754 timevar_pop (TV_SCHED2);
6755
6756 /* This relies on the NOTE_INSN_BASIC_BLOCK notes to be in the same
6757 place as they were during scheduling. */
6758 emit_insn_group_barriers (rtl_dump_file, insns);
6759 ia64_emit_nops ();
6760 }
6761 else
6762 emit_all_insn_group_barriers (rtl_dump_file, insns);
6763
6764 /* A call must not be the last instruction in a function, so that the
6765 return address is still within the function, so that unwinding works
6766 properly. Note that IA-64 differs from dwarf2 on this point. */
6767 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
6768 {
6769 rtx insn;
6770 int saw_stop = 0;
6771
6772 insn = get_last_insn ();
6773 if (! INSN_P (insn))
6774 insn = prev_active_insn (insn);
6775 if (GET_CODE (insn) == INSN
6776 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
6777 && XINT (PATTERN (insn), 1) == 2)
6778 {
6779 saw_stop = 1;
6780 insn = prev_active_insn (insn);
6781 }
6782 if (GET_CODE (insn) == CALL_INSN)
6783 {
6784 if (! saw_stop)
6785 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6786 emit_insn (gen_break_f ());
6787 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6788 }
6789 }
6790
6791 fixup_errata ();
6792 emit_predicate_relation_info ();
6793 }
6794 \f
6795 /* Return true if REGNO is used by the epilogue. */
6796
6797 int
6798 ia64_epilogue_uses (regno)
6799 int regno;
6800 {
6801 switch (regno)
6802 {
6803 case R_GR (1):
6804 /* When a function makes a call through a function descriptor, we
6805 will write a (potentially) new value to "gp". After returning
6806 from such a call, we need to make sure the function restores the
6807 original gp-value, even if the function itself does not use the
6808 gp anymore. */
6809 return (TARGET_CONST_GP && !(TARGET_AUTO_PIC || TARGET_NO_PIC));
6810
6811 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
6812 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
6813 /* For functions defined with the syscall_linkage attribute, all
6814 input registers are marked as live at all function exits. This
6815 prevents the register allocator from using the input registers,
6816 which in turn makes it possible to restart a system call after
6817 an interrupt without having to save/restore the input registers.
6818 This also prevents kernel data from leaking to application code. */
6819 return lookup_attribute ("syscall_linkage",
6820 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
6821
6822 case R_BR (0):
6823 /* Conditional return patterns can't represent the use of `b0' as
6824 the return address, so we force the value live this way. */
6825 return 1;
6826
6827 case AR_PFS_REGNUM:
6828 /* Likewise for ar.pfs, which is used by br.ret. */
6829 return 1;
6830
6831 default:
6832 return 0;
6833 }
6834 }
6835
6836 /* Table of valid machine attributes. */
6837 const struct attribute_spec ia64_attribute_table[] =
6838 {
6839 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
6840 { "syscall_linkage", 0, 0, false, true, true, NULL },
6841 { NULL, 0, 0, false, false, false, NULL }
6842 };
6843 \f
6844 /* For ia64, SYMBOL_REF_FLAG set means that it is a function.
6845
6846 We add @ to the name if this goes in small data/bss. We can only put
6847 a variable in small data/bss if it is defined in this module or a module
6848 that we are statically linked with. We can't check the second condition,
6849 but TREE_STATIC gives us the first one. */
6850
6851 /* ??? If we had IPA, we could check the second condition. We could support
6852 programmer added section attributes if the variable is not defined in this
6853 module. */
6854
6855 /* ??? See the v850 port for a cleaner way to do this. */
6856
6857 /* ??? We could also support own long data here. Generating movl/add/ld8
6858 instead of addl,ld8/ld8. This makes the code bigger, but should make the
6859 code faster because there is one less load. This also includes incomplete
6860 types which can't go in sdata/sbss. */
6861
6862 /* ??? See select_section. We must put short own readonly variables in
6863 sdata/sbss instead of the more natural rodata, because we can't perform
6864 the DECL_READONLY_SECTION test here. */
6865
6866 extern struct obstack * saveable_obstack;
6867
6868 void
6869 ia64_encode_section_info (decl)
6870 tree decl;
6871 {
6872 const char *symbol_str;
6873
6874 if (TREE_CODE (decl) == FUNCTION_DECL)
6875 {
6876 SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl), 0)) = 1;
6877 return;
6878 }
6879
6880 /* Careful not to prod global register variables. */
6881 if (TREE_CODE (decl) != VAR_DECL
6882 || GET_CODE (DECL_RTL (decl)) != MEM
6883 || GET_CODE (XEXP (DECL_RTL (decl), 0)) != SYMBOL_REF)
6884 return;
6885
6886 symbol_str = XSTR (XEXP (DECL_RTL (decl), 0), 0);
6887
6888 /* We assume that -fpic is used only to create a shared library (dso).
6889 With -fpic, no global data can ever be sdata.
6890 Without -fpic, global common uninitialized data can never be sdata, since
6891 it can unify with a real definition in a dso. */
6892 /* ??? Actually, we can put globals in sdata, as long as we don't use gprel
6893 to access them. The linker may then be able to do linker relaxation to
6894 optimize references to them. Currently sdata implies use of gprel. */
6895 /* We need the DECL_EXTERNAL check for C++. static class data members get
6896 both TREE_STATIC and DECL_EXTERNAL set, to indicate that they are
6897 statically allocated, but the space is allocated somewhere else. Such
6898 decls can not be own data. */
6899 if (! TARGET_NO_SDATA
6900 && TREE_STATIC (decl) && ! DECL_EXTERNAL (decl)
6901 && ! (DECL_ONE_ONLY (decl) || DECL_WEAK (decl))
6902 && ! (TREE_PUBLIC (decl)
6903 && (flag_pic
6904 || (DECL_COMMON (decl)
6905 && (DECL_INITIAL (decl) == 0
6906 || DECL_INITIAL (decl) == error_mark_node))))
6907 /* Either the variable must be declared without a section attribute,
6908 or the section must be sdata or sbss. */
6909 && (DECL_SECTION_NAME (decl) == 0
6910 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
6911 ".sdata")
6912 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
6913 ".sbss")))
6914 {
6915 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
6916
6917 /* If the variable has already been defined in the output file, then it
6918 is too late to put it in sdata if it wasn't put there in the first
6919 place. The test is here rather than above, because if it is already
6920 in sdata, then it can stay there. */
6921
6922 if (TREE_ASM_WRITTEN (decl))
6923 ;
6924
6925 /* If this is an incomplete type with size 0, then we can't put it in
6926 sdata because it might be too big when completed. */
6927 else if (size > 0
6928 && size <= (HOST_WIDE_INT) ia64_section_threshold
6929 && symbol_str[0] != SDATA_NAME_FLAG_CHAR)
6930 {
6931 size_t len = strlen (symbol_str);
6932 char *newstr = alloca (len + 1);
6933 const char *string;
6934
6935 *newstr = SDATA_NAME_FLAG_CHAR;
6936 memcpy (newstr + 1, symbol_str, len + 1);
6937
6938 string = ggc_alloc_string (newstr, len + 1);
6939 XSTR (XEXP (DECL_RTL (decl), 0), 0) = string;
6940 }
6941 }
6942 /* This decl is marked as being in small data/bss but it shouldn't
6943 be; one likely explanation for this is that the decl has been
6944 moved into a different section from the one it was in when
6945 ENCODE_SECTION_INFO was first called. Remove the '@'. */
6946 else if (symbol_str[0] == SDATA_NAME_FLAG_CHAR)
6947 {
6948 XSTR (XEXP (DECL_RTL (decl), 0), 0)
6949 = ggc_strdup (symbol_str + 1);
6950 }
6951 }
6952 \f
6953 /* Output assembly directives for prologue regions. */
6954
6955 /* The current basic block number. */
6956
6957 static int block_num;
6958
6959 /* True if we need a copy_state command at the start of the next block. */
6960
6961 static int need_copy_state;
6962
6963 /* The function emits unwind directives for the start of an epilogue. */
6964
6965 static void
6966 process_epilogue ()
6967 {
6968 /* If this isn't the last block of the function, then we need to label the
6969 current state, and copy it back in at the start of the next block. */
6970
6971 if (block_num != n_basic_blocks - 1)
6972 {
6973 fprintf (asm_out_file, "\t.label_state 1\n");
6974 need_copy_state = 1;
6975 }
6976
6977 fprintf (asm_out_file, "\t.restore sp\n");
6978 }
6979
6980 /* This function processes a SET pattern looking for specific patterns
6981 which result in emitting an assembly directive required for unwinding. */
6982
6983 static int
6984 process_set (asm_out_file, pat)
6985 FILE *asm_out_file;
6986 rtx pat;
6987 {
6988 rtx src = SET_SRC (pat);
6989 rtx dest = SET_DEST (pat);
6990 int src_regno, dest_regno;
6991
6992 /* Look for the ALLOC insn. */
6993 if (GET_CODE (src) == UNSPEC_VOLATILE
6994 && XINT (src, 1) == 0
6995 && GET_CODE (dest) == REG)
6996 {
6997 dest_regno = REGNO (dest);
6998
6999 /* If this isn't the final destination for ar.pfs, the alloc
7000 shouldn't have been marked frame related. */
7001 if (dest_regno != current_frame_info.reg_save_ar_pfs)
7002 abort ();
7003
7004 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
7005 ia64_dbx_register_number (dest_regno));
7006 return 1;
7007 }
7008
7009 /* Look for SP = .... */
7010 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
7011 {
7012 if (GET_CODE (src) == PLUS)
7013 {
7014 rtx op0 = XEXP (src, 0);
7015 rtx op1 = XEXP (src, 1);
7016 if (op0 == dest && GET_CODE (op1) == CONST_INT)
7017 {
7018 if (INTVAL (op1) < 0)
7019 {
7020 fputs ("\t.fframe ", asm_out_file);
7021 fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC,
7022 -INTVAL (op1));
7023 fputc ('\n', asm_out_file);
7024 }
7025 else
7026 process_epilogue ();
7027 }
7028 else
7029 abort ();
7030 }
7031 else if (GET_CODE (src) == REG
7032 && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
7033 process_epilogue ();
7034 else
7035 abort ();
7036
7037 return 1;
7038 }
7039
7040 /* Register move we need to look at. */
7041 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
7042 {
7043 src_regno = REGNO (src);
7044 dest_regno = REGNO (dest);
7045
7046 switch (src_regno)
7047 {
7048 case BR_REG (0):
7049 /* Saving return address pointer. */
7050 if (dest_regno != current_frame_info.reg_save_b0)
7051 abort ();
7052 fprintf (asm_out_file, "\t.save rp, r%d\n",
7053 ia64_dbx_register_number (dest_regno));
7054 return 1;
7055
7056 case PR_REG (0):
7057 if (dest_regno != current_frame_info.reg_save_pr)
7058 abort ();
7059 fprintf (asm_out_file, "\t.save pr, r%d\n",
7060 ia64_dbx_register_number (dest_regno));
7061 return 1;
7062
7063 case AR_UNAT_REGNUM:
7064 if (dest_regno != current_frame_info.reg_save_ar_unat)
7065 abort ();
7066 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
7067 ia64_dbx_register_number (dest_regno));
7068 return 1;
7069
7070 case AR_LC_REGNUM:
7071 if (dest_regno != current_frame_info.reg_save_ar_lc)
7072 abort ();
7073 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
7074 ia64_dbx_register_number (dest_regno));
7075 return 1;
7076
7077 case STACK_POINTER_REGNUM:
7078 if (dest_regno != HARD_FRAME_POINTER_REGNUM
7079 || ! frame_pointer_needed)
7080 abort ();
7081 fprintf (asm_out_file, "\t.vframe r%d\n",
7082 ia64_dbx_register_number (dest_regno));
7083 return 1;
7084
7085 default:
7086 /* Everything else should indicate being stored to memory. */
7087 abort ();
7088 }
7089 }
7090
7091 /* Memory store we need to look at. */
7092 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
7093 {
7094 long off;
7095 rtx base;
7096 const char *saveop;
7097
7098 if (GET_CODE (XEXP (dest, 0)) == REG)
7099 {
7100 base = XEXP (dest, 0);
7101 off = 0;
7102 }
7103 else if (GET_CODE (XEXP (dest, 0)) == PLUS
7104 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
7105 {
7106 base = XEXP (XEXP (dest, 0), 0);
7107 off = INTVAL (XEXP (XEXP (dest, 0), 1));
7108 }
7109 else
7110 abort ();
7111
7112 if (base == hard_frame_pointer_rtx)
7113 {
7114 saveop = ".savepsp";
7115 off = - off;
7116 }
7117 else if (base == stack_pointer_rtx)
7118 saveop = ".savesp";
7119 else
7120 abort ();
7121
7122 src_regno = REGNO (src);
7123 switch (src_regno)
7124 {
7125 case BR_REG (0):
7126 if (current_frame_info.reg_save_b0 != 0)
7127 abort ();
7128 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
7129 return 1;
7130
7131 case PR_REG (0):
7132 if (current_frame_info.reg_save_pr != 0)
7133 abort ();
7134 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
7135 return 1;
7136
7137 case AR_LC_REGNUM:
7138 if (current_frame_info.reg_save_ar_lc != 0)
7139 abort ();
7140 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
7141 return 1;
7142
7143 case AR_PFS_REGNUM:
7144 if (current_frame_info.reg_save_ar_pfs != 0)
7145 abort ();
7146 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
7147 return 1;
7148
7149 case AR_UNAT_REGNUM:
7150 if (current_frame_info.reg_save_ar_unat != 0)
7151 abort ();
7152 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
7153 return 1;
7154
7155 case GR_REG (4):
7156 case GR_REG (5):
7157 case GR_REG (6):
7158 case GR_REG (7):
7159 fprintf (asm_out_file, "\t.save.g 0x%x\n",
7160 1 << (src_regno - GR_REG (4)));
7161 return 1;
7162
7163 case BR_REG (1):
7164 case BR_REG (2):
7165 case BR_REG (3):
7166 case BR_REG (4):
7167 case BR_REG (5):
7168 fprintf (asm_out_file, "\t.save.b 0x%x\n",
7169 1 << (src_regno - BR_REG (1)));
7170 return 1;
7171
7172 case FR_REG (2):
7173 case FR_REG (3):
7174 case FR_REG (4):
7175 case FR_REG (5):
7176 fprintf (asm_out_file, "\t.save.f 0x%x\n",
7177 1 << (src_regno - FR_REG (2)));
7178 return 1;
7179
7180 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7181 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7182 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7183 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7184 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
7185 1 << (src_regno - FR_REG (12)));
7186 return 1;
7187
7188 default:
7189 return 0;
7190 }
7191 }
7192
7193 return 0;
7194 }
7195
7196
7197 /* This function looks at a single insn and emits any directives
7198 required to unwind this insn. */
7199 void
7200 process_for_unwind_directive (asm_out_file, insn)
7201 FILE *asm_out_file;
7202 rtx insn;
7203 {
7204 if (flag_unwind_tables
7205 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7206 {
7207 rtx pat;
7208
7209 if (GET_CODE (insn) == NOTE
7210 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
7211 {
7212 block_num = NOTE_BASIC_BLOCK (insn)->index;
7213
7214 /* Restore unwind state from immediately before the epilogue. */
7215 if (need_copy_state)
7216 {
7217 fprintf (asm_out_file, "\t.body\n");
7218 fprintf (asm_out_file, "\t.copy_state 1\n");
7219 need_copy_state = 0;
7220 }
7221 }
7222
7223 if (! RTX_FRAME_RELATED_P (insn))
7224 return;
7225
7226 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
7227 if (pat)
7228 pat = XEXP (pat, 0);
7229 else
7230 pat = PATTERN (insn);
7231
7232 switch (GET_CODE (pat))
7233 {
7234 case SET:
7235 process_set (asm_out_file, pat);
7236 break;
7237
7238 case PARALLEL:
7239 {
7240 int par_index;
7241 int limit = XVECLEN (pat, 0);
7242 for (par_index = 0; par_index < limit; par_index++)
7243 {
7244 rtx x = XVECEXP (pat, 0, par_index);
7245 if (GET_CODE (x) == SET)
7246 process_set (asm_out_file, x);
7247 }
7248 break;
7249 }
7250
7251 default:
7252 abort ();
7253 }
7254 }
7255 }
7256
7257 \f
7258 void
7259 ia64_init_builtins ()
7260 {
7261 tree psi_type_node = build_pointer_type (integer_type_node);
7262 tree pdi_type_node = build_pointer_type (long_integer_type_node);
7263 tree endlink = void_list_node;
7264
7265 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
7266 tree si_ftype_psi_si_si
7267 = build_function_type (integer_type_node,
7268 tree_cons (NULL_TREE, psi_type_node,
7269 tree_cons (NULL_TREE, integer_type_node,
7270 tree_cons (NULL_TREE,
7271 integer_type_node,
7272 endlink))));
7273
7274 /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */
7275 tree di_ftype_pdi_di_di
7276 = build_function_type (long_integer_type_node,
7277 tree_cons (NULL_TREE, pdi_type_node,
7278 tree_cons (NULL_TREE,
7279 long_integer_type_node,
7280 tree_cons (NULL_TREE,
7281 long_integer_type_node,
7282 endlink))));
7283 /* __sync_synchronize */
7284 tree void_ftype_void
7285 = build_function_type (void_type_node, endlink);
7286
7287 /* __sync_lock_test_and_set_si */
7288 tree si_ftype_psi_si
7289 = build_function_type (integer_type_node,
7290 tree_cons (NULL_TREE, psi_type_node,
7291 tree_cons (NULL_TREE, integer_type_node, endlink)));
7292
7293 /* __sync_lock_test_and_set_di */
7294 tree di_ftype_pdi_di
7295 = build_function_type (long_integer_type_node,
7296 tree_cons (NULL_TREE, pdi_type_node,
7297 tree_cons (NULL_TREE, long_integer_type_node,
7298 endlink)));
7299
7300 /* __sync_lock_release_si */
7301 tree void_ftype_psi
7302 = build_function_type (void_type_node, tree_cons (NULL_TREE, psi_type_node,
7303 endlink));
7304
7305 /* __sync_lock_release_di */
7306 tree void_ftype_pdi
7307 = build_function_type (void_type_node, tree_cons (NULL_TREE, pdi_type_node,
7308 endlink));
7309
7310 #define def_builtin(name, type, code) \
7311 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL)
7312
7313 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
7314 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
7315 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
7316 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
7317 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
7318 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
7319 def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di,
7320 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
7321
7322 def_builtin ("__sync_synchronize", void_ftype_void,
7323 IA64_BUILTIN_SYNCHRONIZE);
7324
7325 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
7326 IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
7327 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
7328 IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
7329 def_builtin ("__sync_lock_release_si", void_ftype_psi,
7330 IA64_BUILTIN_LOCK_RELEASE_SI);
7331 def_builtin ("__sync_lock_release_di", void_ftype_pdi,
7332 IA64_BUILTIN_LOCK_RELEASE_DI);
7333
7334 def_builtin ("__builtin_ia64_bsp",
7335 build_function_type (ptr_type_node, endlink),
7336 IA64_BUILTIN_BSP);
7337
7338 def_builtin ("__builtin_ia64_flushrs",
7339 build_function_type (void_type_node, endlink),
7340 IA64_BUILTIN_FLUSHRS);
7341
7342 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
7343 IA64_BUILTIN_FETCH_AND_ADD_SI);
7344 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
7345 IA64_BUILTIN_FETCH_AND_SUB_SI);
7346 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
7347 IA64_BUILTIN_FETCH_AND_OR_SI);
7348 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
7349 IA64_BUILTIN_FETCH_AND_AND_SI);
7350 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
7351 IA64_BUILTIN_FETCH_AND_XOR_SI);
7352 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
7353 IA64_BUILTIN_FETCH_AND_NAND_SI);
7354
7355 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
7356 IA64_BUILTIN_ADD_AND_FETCH_SI);
7357 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
7358 IA64_BUILTIN_SUB_AND_FETCH_SI);
7359 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
7360 IA64_BUILTIN_OR_AND_FETCH_SI);
7361 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
7362 IA64_BUILTIN_AND_AND_FETCH_SI);
7363 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
7364 IA64_BUILTIN_XOR_AND_FETCH_SI);
7365 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
7366 IA64_BUILTIN_NAND_AND_FETCH_SI);
7367
7368 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
7369 IA64_BUILTIN_FETCH_AND_ADD_DI);
7370 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
7371 IA64_BUILTIN_FETCH_AND_SUB_DI);
7372 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
7373 IA64_BUILTIN_FETCH_AND_OR_DI);
7374 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
7375 IA64_BUILTIN_FETCH_AND_AND_DI);
7376 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
7377 IA64_BUILTIN_FETCH_AND_XOR_DI);
7378 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
7379 IA64_BUILTIN_FETCH_AND_NAND_DI);
7380
7381 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
7382 IA64_BUILTIN_ADD_AND_FETCH_DI);
7383 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
7384 IA64_BUILTIN_SUB_AND_FETCH_DI);
7385 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
7386 IA64_BUILTIN_OR_AND_FETCH_DI);
7387 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
7388 IA64_BUILTIN_AND_AND_FETCH_DI);
7389 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
7390 IA64_BUILTIN_XOR_AND_FETCH_DI);
7391 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
7392 IA64_BUILTIN_NAND_AND_FETCH_DI);
7393
7394 #undef def_builtin
7395 }
7396
7397 /* Expand fetch_and_op intrinsics. The basic code sequence is:
7398
7399 mf
7400 tmp = [ptr];
7401 do {
7402 ret = tmp;
7403 ar.ccv = tmp;
7404 tmp <op>= value;
7405 cmpxchgsz.acq tmp = [ptr], tmp
7406 } while (tmp != ret)
7407 */
7408
7409 static rtx
7410 ia64_expand_fetch_and_op (binoptab, mode, arglist, target)
7411 optab binoptab;
7412 enum machine_mode mode;
7413 tree arglist;
7414 rtx target;
7415 {
7416 rtx ret, label, tmp, ccv, insn, mem, value;
7417 tree arg0, arg1;
7418
7419 arg0 = TREE_VALUE (arglist);
7420 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7421 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7422 value = expand_expr (arg1, NULL_RTX, mode, 0);
7423
7424 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7425 MEM_VOLATILE_P (mem) = 1;
7426
7427 if (target && register_operand (target, mode))
7428 ret = target;
7429 else
7430 ret = gen_reg_rtx (mode);
7431
7432 emit_insn (gen_mf ());
7433
7434 /* Special case for fetchadd instructions. */
7435 if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
7436 {
7437 if (mode == SImode)
7438 insn = gen_fetchadd_acq_si (ret, mem, value);
7439 else
7440 insn = gen_fetchadd_acq_di (ret, mem, value);
7441 emit_insn (insn);
7442 return ret;
7443 }
7444
7445 tmp = gen_reg_rtx (mode);
7446 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7447 emit_move_insn (tmp, mem);
7448
7449 label = gen_label_rtx ();
7450 emit_label (label);
7451 emit_move_insn (ret, tmp);
7452 emit_move_insn (ccv, tmp);
7453
7454 /* Perform the specific operation. Special case NAND by noticing
7455 one_cmpl_optab instead. */
7456 if (binoptab == one_cmpl_optab)
7457 {
7458 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7459 binoptab = and_optab;
7460 }
7461 tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
7462
7463 if (mode == SImode)
7464 insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
7465 else
7466 insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
7467 emit_insn (insn);
7468
7469 emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, label);
7470
7471 return ret;
7472 }
7473
7474 /* Expand op_and_fetch intrinsics. The basic code sequence is:
7475
7476 mf
7477 tmp = [ptr];
7478 do {
7479 old = tmp;
7480 ar.ccv = tmp;
7481 ret = tmp + value;
7482 cmpxchgsz.acq tmp = [ptr], ret
7483 } while (tmp != old)
7484 */
7485
7486 static rtx
7487 ia64_expand_op_and_fetch (binoptab, mode, arglist, target)
7488 optab binoptab;
7489 enum machine_mode mode;
7490 tree arglist;
7491 rtx target;
7492 {
7493 rtx old, label, tmp, ret, ccv, insn, mem, value;
7494 tree arg0, arg1;
7495
7496 arg0 = TREE_VALUE (arglist);
7497 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7498 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7499 value = expand_expr (arg1, NULL_RTX, mode, 0);
7500
7501 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7502 MEM_VOLATILE_P (mem) = 1;
7503
7504 if (target && ! register_operand (target, mode))
7505 target = NULL_RTX;
7506
7507 emit_insn (gen_mf ());
7508 tmp = gen_reg_rtx (mode);
7509 old = gen_reg_rtx (mode);
7510 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7511
7512 emit_move_insn (tmp, mem);
7513
7514 label = gen_label_rtx ();
7515 emit_label (label);
7516 emit_move_insn (old, tmp);
7517 emit_move_insn (ccv, tmp);
7518
7519 /* Perform the specific operation. Special case NAND by noticing
7520 one_cmpl_optab instead. */
7521 if (binoptab == one_cmpl_optab)
7522 {
7523 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7524 binoptab = and_optab;
7525 }
7526 ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
7527
7528 if (mode == SImode)
7529 insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
7530 else
7531 insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
7532 emit_insn (insn);
7533
7534 emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, label);
7535
7536 return ret;
7537 }
7538
7539 /* Expand val_ and bool_compare_and_swap. For val_ we want:
7540
7541 ar.ccv = oldval
7542 mf
7543 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
7544 return ret
7545
7546 For bool_ it's the same except return ret == oldval.
7547 */
7548
7549 static rtx
7550 ia64_expand_compare_and_swap (mode, boolp, arglist, target)
7551 enum machine_mode mode;
7552 int boolp;
7553 tree arglist;
7554 rtx target;
7555 {
7556 tree arg0, arg1, arg2;
7557 rtx mem, old, new, ccv, tmp, insn;
7558
7559 arg0 = TREE_VALUE (arglist);
7560 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7561 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
7562 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7563 old = expand_expr (arg1, NULL_RTX, mode, 0);
7564 new = expand_expr (arg2, NULL_RTX, mode, 0);
7565
7566 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7567 MEM_VOLATILE_P (mem) = 1;
7568
7569 if (! register_operand (old, mode))
7570 old = copy_to_mode_reg (mode, old);
7571 if (! register_operand (new, mode))
7572 new = copy_to_mode_reg (mode, new);
7573
7574 if (! boolp && target && register_operand (target, mode))
7575 tmp = target;
7576 else
7577 tmp = gen_reg_rtx (mode);
7578
7579 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7580 emit_move_insn (ccv, old);
7581 emit_insn (gen_mf ());
7582 if (mode == SImode)
7583 insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
7584 else
7585 insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
7586 emit_insn (insn);
7587
7588 if (boolp)
7589 {
7590 if (! target)
7591 target = gen_reg_rtx (mode);
7592 return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
7593 }
7594 else
7595 return tmp;
7596 }
7597
7598 /* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
7599
7600 static rtx
7601 ia64_expand_lock_test_and_set (mode, arglist, target)
7602 enum machine_mode mode;
7603 tree arglist;
7604 rtx target;
7605 {
7606 tree arg0, arg1;
7607 rtx mem, new, ret, insn;
7608
7609 arg0 = TREE_VALUE (arglist);
7610 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7611 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7612 new = expand_expr (arg1, NULL_RTX, mode, 0);
7613
7614 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7615 MEM_VOLATILE_P (mem) = 1;
7616 if (! register_operand (new, mode))
7617 new = copy_to_mode_reg (mode, new);
7618
7619 if (target && register_operand (target, mode))
7620 ret = target;
7621 else
7622 ret = gen_reg_rtx (mode);
7623
7624 if (mode == SImode)
7625 insn = gen_xchgsi (ret, mem, new);
7626 else
7627 insn = gen_xchgdi (ret, mem, new);
7628 emit_insn (insn);
7629
7630 return ret;
7631 }
7632
7633 /* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
7634
7635 static rtx
7636 ia64_expand_lock_release (mode, arglist, target)
7637 enum machine_mode mode;
7638 tree arglist;
7639 rtx target ATTRIBUTE_UNUSED;
7640 {
7641 tree arg0;
7642 rtx mem;
7643
7644 arg0 = TREE_VALUE (arglist);
7645 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7646
7647 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7648 MEM_VOLATILE_P (mem) = 1;
7649
7650 emit_move_insn (mem, const0_rtx);
7651
7652 return const0_rtx;
7653 }
7654
7655 rtx
7656 ia64_expand_builtin (exp, target, subtarget, mode, ignore)
7657 tree exp;
7658 rtx target;
7659 rtx subtarget ATTRIBUTE_UNUSED;
7660 enum machine_mode mode ATTRIBUTE_UNUSED;
7661 int ignore ATTRIBUTE_UNUSED;
7662 {
7663 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
7664 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
7665 tree arglist = TREE_OPERAND (exp, 1);
7666
7667 switch (fcode)
7668 {
7669 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
7670 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
7671 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
7672 case IA64_BUILTIN_LOCK_RELEASE_SI:
7673 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7674 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7675 case IA64_BUILTIN_FETCH_AND_OR_SI:
7676 case IA64_BUILTIN_FETCH_AND_AND_SI:
7677 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7678 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7679 case IA64_BUILTIN_ADD_AND_FETCH_SI:
7680 case IA64_BUILTIN_SUB_AND_FETCH_SI:
7681 case IA64_BUILTIN_OR_AND_FETCH_SI:
7682 case IA64_BUILTIN_AND_AND_FETCH_SI:
7683 case IA64_BUILTIN_XOR_AND_FETCH_SI:
7684 case IA64_BUILTIN_NAND_AND_FETCH_SI:
7685 mode = SImode;
7686 break;
7687
7688 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
7689 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
7690 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
7691 case IA64_BUILTIN_LOCK_RELEASE_DI:
7692 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7693 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7694 case IA64_BUILTIN_FETCH_AND_OR_DI:
7695 case IA64_BUILTIN_FETCH_AND_AND_DI:
7696 case IA64_BUILTIN_FETCH_AND_XOR_DI:
7697 case IA64_BUILTIN_FETCH_AND_NAND_DI:
7698 case IA64_BUILTIN_ADD_AND_FETCH_DI:
7699 case IA64_BUILTIN_SUB_AND_FETCH_DI:
7700 case IA64_BUILTIN_OR_AND_FETCH_DI:
7701 case IA64_BUILTIN_AND_AND_FETCH_DI:
7702 case IA64_BUILTIN_XOR_AND_FETCH_DI:
7703 case IA64_BUILTIN_NAND_AND_FETCH_DI:
7704 mode = DImode;
7705 break;
7706
7707 default:
7708 break;
7709 }
7710
7711 switch (fcode)
7712 {
7713 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
7714 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
7715 return ia64_expand_compare_and_swap (mode, 1, arglist, target);
7716
7717 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
7718 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
7719 return ia64_expand_compare_and_swap (mode, 0, arglist, target);
7720
7721 case IA64_BUILTIN_SYNCHRONIZE:
7722 emit_insn (gen_mf ());
7723 return const0_rtx;
7724
7725 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
7726 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
7727 return ia64_expand_lock_test_and_set (mode, arglist, target);
7728
7729 case IA64_BUILTIN_LOCK_RELEASE_SI:
7730 case IA64_BUILTIN_LOCK_RELEASE_DI:
7731 return ia64_expand_lock_release (mode, arglist, target);
7732
7733 case IA64_BUILTIN_BSP:
7734 if (! target || ! register_operand (target, DImode))
7735 target = gen_reg_rtx (DImode);
7736 emit_insn (gen_bsp_value (target));
7737 return target;
7738
7739 case IA64_BUILTIN_FLUSHRS:
7740 emit_insn (gen_flushrs ());
7741 return const0_rtx;
7742
7743 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7744 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7745 return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
7746
7747 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7748 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7749 return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
7750
7751 case IA64_BUILTIN_FETCH_AND_OR_SI:
7752 case IA64_BUILTIN_FETCH_AND_OR_DI:
7753 return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
7754
7755 case IA64_BUILTIN_FETCH_AND_AND_SI:
7756 case IA64_BUILTIN_FETCH_AND_AND_DI:
7757 return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
7758
7759 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7760 case IA64_BUILTIN_FETCH_AND_XOR_DI:
7761 return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
7762
7763 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7764 case IA64_BUILTIN_FETCH_AND_NAND_DI:
7765 return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
7766
7767 case IA64_BUILTIN_ADD_AND_FETCH_SI:
7768 case IA64_BUILTIN_ADD_AND_FETCH_DI:
7769 return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
7770
7771 case IA64_BUILTIN_SUB_AND_FETCH_SI:
7772 case IA64_BUILTIN_SUB_AND_FETCH_DI:
7773 return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
7774
7775 case IA64_BUILTIN_OR_AND_FETCH_SI:
7776 case IA64_BUILTIN_OR_AND_FETCH_DI:
7777 return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
7778
7779 case IA64_BUILTIN_AND_AND_FETCH_SI:
7780 case IA64_BUILTIN_AND_AND_FETCH_DI:
7781 return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
7782
7783 case IA64_BUILTIN_XOR_AND_FETCH_SI:
7784 case IA64_BUILTIN_XOR_AND_FETCH_DI:
7785 return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
7786
7787 case IA64_BUILTIN_NAND_AND_FETCH_SI:
7788 case IA64_BUILTIN_NAND_AND_FETCH_DI:
7789 return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);
7790
7791 default:
7792 break;
7793 }
7794
7795 return NULL_RTX;
7796 }
7797
7798 /* For the HP-UX IA64 aggregate parameters are passed stored in the
7799 most significant bits of the stack slot. */
7800
7801 enum direction
7802 ia64_hpux_function_arg_padding (mode, type)
7803 enum machine_mode mode;
7804 tree type;
7805 {
7806 /* Exception to normal case for structures/unions/etc. */
7807
7808 if (type && AGGREGATE_TYPE_P (type)
7809 && int_size_in_bytes (type) < UNITS_PER_WORD)
7810 return upward;
7811
7812 /* This is the standard FUNCTION_ARG_PADDING with !BYTES_BIG_ENDIAN
7813 hardwired to be true. */
7814
7815 return((mode == BLKmode
7816 ? (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
7817 && int_size_in_bytes (type) < (PARM_BOUNDARY / BITS_PER_UNIT))
7818 : GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
7819 ? downward : upward);
7820 }