1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
6 This file is part of GNU CC.
8 GNU CC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
13 GNU CC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-attr.h"
42 #include "basic-block.h"
44 #include "sched-int.h"
46 #include "target-def.h"
48 /* This is used for communication between ASM_OUTPUT_LABEL and
49 ASM_OUTPUT_LABELREF. */
50 int ia64_asm_output_label
= 0;
52 /* Define the information needed to generate branch and scc insns. This is
53 stored from the compare operation. */
54 struct rtx_def
* ia64_compare_op0
;
55 struct rtx_def
* ia64_compare_op1
;
57 /* Register names for ia64_expand_prologue. */
58 static const char * const ia64_reg_numbers
[96] =
59 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
60 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
61 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
62 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
63 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
64 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
65 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
66 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
67 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
68 "r104","r105","r106","r107","r108","r109","r110","r111",
69 "r112","r113","r114","r115","r116","r117","r118","r119",
70 "r120","r121","r122","r123","r124","r125","r126","r127"};
72 /* ??? These strings could be shared with REGISTER_NAMES. */
73 static const char * const ia64_input_reg_names
[8] =
74 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
76 /* ??? These strings could be shared with REGISTER_NAMES. */
77 static const char * const ia64_local_reg_names
[80] =
78 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
79 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
80 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
81 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
82 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
83 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
84 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
85 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
86 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
87 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
89 /* ??? These strings could be shared with REGISTER_NAMES. */
90 static const char * const ia64_output_reg_names
[8] =
91 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
93 /* String used with the -mfixed-range= option. */
94 const char *ia64_fixed_range_string
;
96 /* Determines whether we run our final scheduling pass or not. We always
97 avoid the normal second scheduling pass. */
98 static int ia64_flag_schedule_insns2
;
100 /* Variables which are this size or smaller are put in the sdata/sbss
103 unsigned int ia64_section_threshold
;
105 static int find_gr_spill
PARAMS ((int));
106 static int next_scratch_gr_reg
PARAMS ((void));
107 static void mark_reg_gr_used_mask
PARAMS ((rtx
, void *));
108 static void ia64_compute_frame_size
PARAMS ((HOST_WIDE_INT
));
109 static void setup_spill_pointers
PARAMS ((int, rtx
, HOST_WIDE_INT
));
110 static void finish_spill_pointers
PARAMS ((void));
111 static rtx spill_restore_mem
PARAMS ((rtx
, HOST_WIDE_INT
));
112 static void do_spill
PARAMS ((rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
, rtx
));
113 static void do_restore
PARAMS ((rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
));
114 static rtx gen_movdi_x
PARAMS ((rtx
, rtx
, rtx
));
115 static rtx gen_fr_spill_x
PARAMS ((rtx
, rtx
, rtx
));
116 static rtx gen_fr_restore_x
PARAMS ((rtx
, rtx
, rtx
));
118 static enum machine_mode hfa_element_mode
PARAMS ((tree
, int));
119 static void fix_range
PARAMS ((const char *));
120 static void ia64_add_gc_roots
PARAMS ((void));
121 static void ia64_init_machine_status
PARAMS ((struct function
*));
122 static void ia64_mark_machine_status
PARAMS ((struct function
*));
123 static void ia64_free_machine_status
PARAMS ((struct function
*));
124 static void emit_insn_group_barriers
PARAMS ((FILE *, rtx
));
125 static void emit_all_insn_group_barriers
PARAMS ((FILE *, rtx
));
126 static void emit_predicate_relation_info
PARAMS ((void));
127 static void process_epilogue
PARAMS ((void));
128 static int process_set
PARAMS ((FILE *, rtx
));
130 static rtx ia64_expand_fetch_and_op
PARAMS ((optab
, enum machine_mode
,
132 static rtx ia64_expand_op_and_fetch
PARAMS ((optab
, enum machine_mode
,
134 static rtx ia64_expand_compare_and_swap
PARAMS ((enum machine_mode
, int,
136 static rtx ia64_expand_lock_test_and_set
PARAMS ((enum machine_mode
,
138 static rtx ia64_expand_lock_release
PARAMS ((enum machine_mode
, tree
, rtx
));
139 static int ia64_valid_type_attribute
PARAMS((tree
, tree
, tree
, tree
));
140 static void ia64_output_function_prologue
PARAMS ((FILE *, HOST_WIDE_INT
));
141 static void ia64_output_function_epilogue
PARAMS ((FILE *, HOST_WIDE_INT
));
142 static void ia64_output_function_end_prologue
PARAMS ((FILE *));
144 /* Initialize the GCC target structure. */
145 #undef TARGET_VALID_TYPE_ATTRIBUTE
146 #define TARGET_VALID_TYPE_ATTRIBUTE ia64_valid_type_attribute
148 #undef TARGET_INIT_BUILTINS
149 #define TARGET_INIT_BUILTINS ia64_init_builtins
151 #undef TARGET_EXPAND_BUILTIN
152 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
154 #undef TARGET_ASM_FUNCTION_PROLOGUE
155 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
156 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
157 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
158 #undef TARGET_ASM_FUNCTION_EPILOGUE
159 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
161 struct gcc_target targetm
= TARGET_INITIALIZER
;
163 /* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
166 call_operand (op
, mode
)
168 enum machine_mode mode
;
170 if (mode
!= GET_MODE (op
))
173 return (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == REG
174 || (GET_CODE (op
) == SUBREG
&& GET_CODE (XEXP (op
, 0)) == REG
));
177 /* Return 1 if OP refers to a symbol in the sdata section. */
180 sdata_symbolic_operand (op
, mode
)
182 enum machine_mode mode ATTRIBUTE_UNUSED
;
184 switch (GET_CODE (op
))
187 if (GET_CODE (XEXP (op
, 0)) != PLUS
188 || GET_CODE (XEXP (XEXP (op
, 0), 0)) != SYMBOL_REF
)
190 op
= XEXP (XEXP (op
, 0), 0);
194 if (CONSTANT_POOL_ADDRESS_P (op
))
195 return GET_MODE_SIZE (get_pool_mode (op
)) <= ia64_section_threshold
;
197 return XSTR (op
, 0)[0] == SDATA_NAME_FLAG_CHAR
;
206 /* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
209 got_symbolic_operand (op
, mode
)
211 enum machine_mode mode ATTRIBUTE_UNUSED
;
213 switch (GET_CODE (op
))
217 if (GET_CODE (op
) != PLUS
)
219 if (GET_CODE (XEXP (op
, 0)) != SYMBOL_REF
)
222 if (GET_CODE (op
) != CONST_INT
)
227 /* Ok if we're not using GOT entries at all. */
228 if (TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
231 /* "Ok" while emitting rtl, since otherwise we won't be provided
232 with the entire offset during emission, which makes it very
233 hard to split the offset into high and low parts. */
234 if (rtx_equal_function_value_matters
)
237 /* Force the low 14 bits of the constant to zero so that we do not
238 use up so many GOT entries. */
239 return (INTVAL (op
) & 0x3fff) == 0;
251 /* Return 1 if OP refers to a symbol. */
254 symbolic_operand (op
, mode
)
256 enum machine_mode mode ATTRIBUTE_UNUSED
;
258 switch (GET_CODE (op
))
271 /* Return 1 if OP refers to a function. */
274 function_operand (op
, mode
)
276 enum machine_mode mode ATTRIBUTE_UNUSED
;
278 if (GET_CODE (op
) == SYMBOL_REF
&& SYMBOL_REF_FLAG (op
))
284 /* Return 1 if OP is setjmp or a similar function. */
286 /* ??? This is an unsatisfying solution. Should rethink. */
289 setjmp_operand (op
, mode
)
291 enum machine_mode mode ATTRIBUTE_UNUSED
;
296 if (GET_CODE (op
) != SYMBOL_REF
)
301 /* The following code is borrowed from special_function_p in calls.c. */
303 /* Disregard prefix _, __ or __x. */
306 if (name
[1] == '_' && name
[2] == 'x')
308 else if (name
[1] == '_')
318 && (! strcmp (name
, "setjmp")
319 || ! strcmp (name
, "setjmp_syscall")))
321 && ! strcmp (name
, "sigsetjmp"))
323 && ! strcmp (name
, "savectx")));
325 else if ((name
[0] == 'q' && name
[1] == 's'
326 && ! strcmp (name
, "qsetjmp"))
327 || (name
[0] == 'v' && name
[1] == 'f'
328 && ! strcmp (name
, "vfork")))
334 /* Return 1 if OP is a general operand, but when pic exclude symbolic
337 /* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF
338 from PREDICATE_CODES. */
341 move_operand (op
, mode
)
343 enum machine_mode mode
;
345 if (! TARGET_NO_PIC
&& symbolic_operand (op
, mode
))
348 return general_operand (op
, mode
);
351 /* Return 1 if OP is a register operand that is (or could be) a GR reg. */
354 gr_register_operand (op
, mode
)
356 enum machine_mode mode
;
358 if (! register_operand (op
, mode
))
360 if (GET_CODE (op
) == SUBREG
)
361 op
= SUBREG_REG (op
);
362 if (GET_CODE (op
) == REG
)
364 unsigned int regno
= REGNO (op
);
365 if (regno
< FIRST_PSEUDO_REGISTER
)
366 return GENERAL_REGNO_P (regno
);
371 /* Return 1 if OP is a register operand that is (or could be) an FR reg. */
374 fr_register_operand (op
, mode
)
376 enum machine_mode mode
;
378 if (! register_operand (op
, mode
))
380 if (GET_CODE (op
) == SUBREG
)
381 op
= SUBREG_REG (op
);
382 if (GET_CODE (op
) == REG
)
384 unsigned int regno
= REGNO (op
);
385 if (regno
< FIRST_PSEUDO_REGISTER
)
386 return FR_REGNO_P (regno
);
391 /* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */
394 grfr_register_operand (op
, mode
)
396 enum machine_mode mode
;
398 if (! register_operand (op
, mode
))
400 if (GET_CODE (op
) == SUBREG
)
401 op
= SUBREG_REG (op
);
402 if (GET_CODE (op
) == REG
)
404 unsigned int regno
= REGNO (op
);
405 if (regno
< FIRST_PSEUDO_REGISTER
)
406 return GENERAL_REGNO_P (regno
) || FR_REGNO_P (regno
);
411 /* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */
414 gr_nonimmediate_operand (op
, mode
)
416 enum machine_mode mode
;
418 if (! nonimmediate_operand (op
, mode
))
420 if (GET_CODE (op
) == SUBREG
)
421 op
= SUBREG_REG (op
);
422 if (GET_CODE (op
) == REG
)
424 unsigned int regno
= REGNO (op
);
425 if (regno
< FIRST_PSEUDO_REGISTER
)
426 return GENERAL_REGNO_P (regno
);
431 /* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
434 fr_nonimmediate_operand (op
, mode
)
436 enum machine_mode mode
;
438 if (! nonimmediate_operand (op
, mode
))
440 if (GET_CODE (op
) == SUBREG
)
441 op
= SUBREG_REG (op
);
442 if (GET_CODE (op
) == REG
)
444 unsigned int regno
= REGNO (op
);
445 if (regno
< FIRST_PSEUDO_REGISTER
)
446 return FR_REGNO_P (regno
);
451 /* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
454 grfr_nonimmediate_operand (op
, mode
)
456 enum machine_mode mode
;
458 if (! nonimmediate_operand (op
, mode
))
460 if (GET_CODE (op
) == SUBREG
)
461 op
= SUBREG_REG (op
);
462 if (GET_CODE (op
) == REG
)
464 unsigned int regno
= REGNO (op
);
465 if (regno
< FIRST_PSEUDO_REGISTER
)
466 return GENERAL_REGNO_P (regno
) || FR_REGNO_P (regno
);
471 /* Return 1 if OP is a GR register operand, or zero. */
474 gr_reg_or_0_operand (op
, mode
)
476 enum machine_mode mode
;
478 return (op
== const0_rtx
|| gr_register_operand (op
, mode
));
481 /* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */
484 gr_reg_or_5bit_operand (op
, mode
)
486 enum machine_mode mode
;
488 return ((GET_CODE (op
) == CONST_INT
&& INTVAL (op
) >= 0 && INTVAL (op
) < 32)
489 || GET_CODE (op
) == CONSTANT_P_RTX
490 || gr_register_operand (op
, mode
));
493 /* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */
496 gr_reg_or_6bit_operand (op
, mode
)
498 enum machine_mode mode
;
500 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_M (INTVAL (op
)))
501 || GET_CODE (op
) == CONSTANT_P_RTX
502 || gr_register_operand (op
, mode
));
505 /* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */
508 gr_reg_or_8bit_operand (op
, mode
)
510 enum machine_mode mode
;
512 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_K (INTVAL (op
)))
513 || GET_CODE (op
) == CONSTANT_P_RTX
514 || gr_register_operand (op
, mode
));
517 /* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */
520 grfr_reg_or_8bit_operand (op
, mode
)
522 enum machine_mode mode
;
524 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_K (INTVAL (op
)))
525 || GET_CODE (op
) == CONSTANT_P_RTX
526 || grfr_register_operand (op
, mode
));
529 /* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
533 gr_reg_or_8bit_adjusted_operand (op
, mode
)
535 enum machine_mode mode
;
537 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_L (INTVAL (op
)))
538 || GET_CODE (op
) == CONSTANT_P_RTX
539 || gr_register_operand (op
, mode
));
542 /* Return 1 if OP is a register operand, or is valid for both an 8 bit
543 immediate and an 8 bit adjusted immediate operand. This is necessary
544 because when we emit a compare, we don't know what the condition will be,
545 so we need the union of the immediates accepted by GT and LT. */
548 gr_reg_or_8bit_and_adjusted_operand (op
, mode
)
550 enum machine_mode mode
;
552 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_K (INTVAL (op
))
553 && CONST_OK_FOR_L (INTVAL (op
)))
554 || GET_CODE (op
) == CONSTANT_P_RTX
555 || gr_register_operand (op
, mode
));
558 /* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
561 gr_reg_or_14bit_operand (op
, mode
)
563 enum machine_mode mode
;
565 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_I (INTVAL (op
)))
566 || GET_CODE (op
) == CONSTANT_P_RTX
567 || gr_register_operand (op
, mode
));
570 /* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
573 gr_reg_or_22bit_operand (op
, mode
)
575 enum machine_mode mode
;
577 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_J (INTVAL (op
)))
578 || GET_CODE (op
) == CONSTANT_P_RTX
579 || gr_register_operand (op
, mode
));
582 /* Return 1 if OP is a 6 bit immediate operand. */
585 shift_count_operand (op
, mode
)
587 enum machine_mode mode ATTRIBUTE_UNUSED
;
589 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_M (INTVAL (op
)))
590 || GET_CODE (op
) == CONSTANT_P_RTX
);
593 /* Return 1 if OP is a 5 bit immediate operand. */
596 shift_32bit_count_operand (op
, mode
)
598 enum machine_mode mode ATTRIBUTE_UNUSED
;
600 return ((GET_CODE (op
) == CONST_INT
601 && (INTVAL (op
) >= 0 && INTVAL (op
) < 32))
602 || GET_CODE (op
) == CONSTANT_P_RTX
);
605 /* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
608 shladd_operand (op
, mode
)
610 enum machine_mode mode ATTRIBUTE_UNUSED
;
612 return (GET_CODE (op
) == CONST_INT
613 && (INTVAL (op
) == 2 || INTVAL (op
) == 4
614 || INTVAL (op
) == 8 || INTVAL (op
) == 16));
617 /* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
620 fetchadd_operand (op
, mode
)
622 enum machine_mode mode ATTRIBUTE_UNUSED
;
624 return (GET_CODE (op
) == CONST_INT
625 && (INTVAL (op
) == -16 || INTVAL (op
) == -8 ||
626 INTVAL (op
) == -4 || INTVAL (op
) == -1 ||
627 INTVAL (op
) == 1 || INTVAL (op
) == 4 ||
628 INTVAL (op
) == 8 || INTVAL (op
) == 16));
631 /* Return 1 if OP is a floating-point constant zero, one, or a register. */
634 fr_reg_or_fp01_operand (op
, mode
)
636 enum machine_mode mode
;
638 return ((GET_CODE (op
) == CONST_DOUBLE
&& CONST_DOUBLE_OK_FOR_G (op
))
639 || fr_register_operand (op
, mode
));
642 /* Like nonimmediate_operand, but don't allow MEMs that try to use a
643 POST_MODIFY with a REG as displacement. */
646 destination_operand (op
, mode
)
648 enum machine_mode mode
;
650 if (! nonimmediate_operand (op
, mode
))
652 if (GET_CODE (op
) == MEM
653 && GET_CODE (XEXP (op
, 0)) == POST_MODIFY
654 && GET_CODE (XEXP (XEXP (XEXP (op
, 0), 1), 1)) == REG
)
659 /* Like memory_operand, but don't allow post-increments. */
662 not_postinc_memory_operand (op
, mode
)
664 enum machine_mode mode
;
666 return (memory_operand (op
, mode
)
667 && GET_RTX_CLASS (GET_CODE (XEXP (op
, 0))) != 'a');
670 /* Return 1 if this is a comparison operator, which accepts an normal 8-bit
671 signed immediate operand. */
674 normal_comparison_operator (op
, mode
)
676 enum machine_mode mode
;
678 enum rtx_code code
= GET_CODE (op
);
679 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
680 && (code
== EQ
|| code
== NE
681 || code
== GT
|| code
== LE
|| code
== GTU
|| code
== LEU
));
684 /* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
685 signed immediate operand. */
688 adjusted_comparison_operator (op
, mode
)
690 enum machine_mode mode
;
692 enum rtx_code code
= GET_CODE (op
);
693 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
694 && (code
== LT
|| code
== GE
|| code
== LTU
|| code
== GEU
));
697 /* Return 1 if this is a signed inequality operator. */
700 signed_inequality_operator (op
, mode
)
702 enum machine_mode mode
;
704 enum rtx_code code
= GET_CODE (op
);
705 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
706 && (code
== GE
|| code
== GT
707 || code
== LE
|| code
== LT
));
710 /* Return 1 if this operator is valid for predication. */
713 predicate_operator (op
, mode
)
715 enum machine_mode mode
;
717 enum rtx_code code
= GET_CODE (op
);
718 return ((GET_MODE (op
) == mode
|| mode
== VOIDmode
)
719 && (code
== EQ
|| code
== NE
));
722 /* Return 1 if this operator can be used in a conditional operation. */
725 condop_operator (op
, mode
)
727 enum machine_mode mode
;
729 enum rtx_code code
= GET_CODE (op
);
730 return ((GET_MODE (op
) == mode
|| mode
== VOIDmode
)
731 && (code
== PLUS
|| code
== MINUS
|| code
== AND
732 || code
== IOR
|| code
== XOR
));
735 /* Return 1 if this is the ar.lc register. */
738 ar_lc_reg_operand (op
, mode
)
740 enum machine_mode mode
;
742 return (GET_MODE (op
) == DImode
743 && (mode
== DImode
|| mode
== VOIDmode
)
744 && GET_CODE (op
) == REG
745 && REGNO (op
) == AR_LC_REGNUM
);
748 /* Return 1 if this is the ar.ccv register. */
751 ar_ccv_reg_operand (op
, mode
)
753 enum machine_mode mode
;
755 return ((GET_MODE (op
) == mode
|| mode
== VOIDmode
)
756 && GET_CODE (op
) == REG
757 && REGNO (op
) == AR_CCV_REGNUM
);
760 /* Like general_operand, but don't allow (mem (addressof)). */
763 general_tfmode_operand (op
, mode
)
765 enum machine_mode mode
;
767 if (! general_operand (op
, mode
))
769 if (GET_CODE (op
) == MEM
&& GET_CODE (XEXP (op
, 0)) == ADDRESSOF
)
777 destination_tfmode_operand (op
, mode
)
779 enum machine_mode mode
;
781 if (! destination_operand (op
, mode
))
783 if (GET_CODE (op
) == MEM
&& GET_CODE (XEXP (op
, 0)) == ADDRESSOF
)
791 tfreg_or_fp01_operand (op
, mode
)
793 enum machine_mode mode
;
795 if (GET_CODE (op
) == SUBREG
)
797 return fr_reg_or_fp01_operand (op
, mode
);
800 /* Return 1 if the operands of a move are ok. */
803 ia64_move_ok (dst
, src
)
806 /* If we're under init_recog_no_volatile, we'll not be able to use
807 memory_operand. So check the code directly and don't worry about
808 the validity of the underlying address, which should have been
809 checked elsewhere anyway. */
810 if (GET_CODE (dst
) != MEM
)
812 if (GET_CODE (src
) == MEM
)
814 if (register_operand (src
, VOIDmode
))
817 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
818 if (INTEGRAL_MODE_P (GET_MODE (dst
)))
819 return src
== const0_rtx
;
821 return GET_CODE (src
) == CONST_DOUBLE
&& CONST_DOUBLE_OK_FOR_G (src
);
824 /* Check if OP is a mask suitible for use with SHIFT in a dep.z instruction.
825 Return the length of the field, or <= 0 on failure. */
828 ia64_depz_field_mask (rop
, rshift
)
831 unsigned HOST_WIDE_INT op
= INTVAL (rop
);
832 unsigned HOST_WIDE_INT shift
= INTVAL (rshift
);
834 /* Get rid of the zero bits we're shifting in. */
837 /* We must now have a solid block of 1's at bit 0. */
838 return exact_log2 (op
+ 1);
841 /* Expand a symbolic constant load. */
842 /* ??? Should generalize this, so that we can also support 32 bit pointers. */
845 ia64_expand_load_address (dest
, src
, scratch
)
846 rtx dest
, src
, scratch
;
850 /* The destination could be a MEM during initial rtl generation,
851 which isn't a valid destination for the PIC load address patterns. */
852 if (! register_operand (dest
, DImode
))
853 temp
= gen_reg_rtx (DImode
);
858 emit_insn (gen_load_gprel64 (temp
, src
));
859 else if (GET_CODE (src
) == SYMBOL_REF
&& SYMBOL_REF_FLAG (src
))
860 emit_insn (gen_load_fptr (temp
, src
));
861 else if (sdata_symbolic_operand (src
, DImode
))
862 emit_insn (gen_load_gprel (temp
, src
));
863 else if (GET_CODE (src
) == CONST
864 && GET_CODE (XEXP (src
, 0)) == PLUS
865 && GET_CODE (XEXP (XEXP (src
, 0), 1)) == CONST_INT
866 && (INTVAL (XEXP (XEXP (src
, 0), 1)) & 0x1fff) != 0)
868 rtx subtarget
= no_new_pseudos
? temp
: gen_reg_rtx (DImode
);
869 rtx sym
= XEXP (XEXP (src
, 0), 0);
870 HOST_WIDE_INT ofs
, hi
, lo
;
872 /* Split the offset into a sign extended 14-bit low part
873 and a complementary high part. */
874 ofs
= INTVAL (XEXP (XEXP (src
, 0), 1));
875 lo
= ((ofs
& 0x3fff) ^ 0x2000) - 0x2000;
879 scratch
= no_new_pseudos
? subtarget
: gen_reg_rtx (DImode
);
881 emit_insn (gen_load_symptr (subtarget
, plus_constant (sym
, hi
),
883 emit_insn (gen_adddi3 (temp
, subtarget
, GEN_INT (lo
)));
889 scratch
= no_new_pseudos
? temp
: gen_reg_rtx (DImode
);
891 insn
= emit_insn (gen_load_symptr (temp
, src
, scratch
));
892 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_EQUAL
, src
, REG_NOTES (insn
));
896 emit_move_insn (dest
, temp
);
900 ia64_gp_save_reg (setjmp_p
)
903 rtx save
= cfun
->machine
->ia64_gp_save
;
907 /* We can't save GP in a pseudo if we are calling setjmp, because
908 pseudos won't be restored by longjmp. For now, we save it in r4. */
909 /* ??? It would be more efficient to save this directly into a stack
910 slot. Unfortunately, the stack slot address gets cse'd across
911 the setjmp call because the NOTE_INSN_SETJMP note is in the wrong
914 /* ??? Get the barf bag, Virginia. We've got to replace this thing
915 in place, since this rtx is used in exception handling receivers.
916 Moreover, we must get this rtx out of regno_reg_rtx or reload
917 will do the wrong thing. */
918 unsigned int old_regno
= REGNO (save
);
919 if (setjmp_p
&& old_regno
!= GR_REG (4))
921 REGNO (save
) = GR_REG (4);
922 regno_reg_rtx
[old_regno
] = gen_rtx_raw_REG (DImode
, old_regno
);
928 save
= gen_rtx_REG (DImode
, GR_REG (4));
930 save
= gen_rtx_REG (DImode
, LOC_REG (0));
932 save
= gen_reg_rtx (DImode
);
933 cfun
->machine
->ia64_gp_save
= save
;
939 /* Split a post-reload TImode reference into two DImode components. */
942 ia64_split_timode (out
, in
, scratch
)
946 switch (GET_CODE (in
))
949 out
[0] = gen_rtx_REG (DImode
, REGNO (in
));
950 out
[1] = gen_rtx_REG (DImode
, REGNO (in
) + 1);
955 rtx base
= XEXP (in
, 0);
957 switch (GET_CODE (base
))
960 out
[0] = adjust_address (in
, DImode
, 0);
963 base
= XEXP (base
, 0);
964 out
[0] = adjust_address (in
, DImode
, 0);
967 /* Since we're changing the mode, we need to change to POST_MODIFY
968 as well to preserve the size of the increment. Either that or
969 do the update in two steps, but we've already got this scratch
970 register handy so let's use it. */
972 base
= XEXP (base
, 0);
974 = change_address (in
, DImode
,
976 (Pmode
, base
, plus_constant (base
, 16)));
979 base
= XEXP (base
, 0);
981 = change_address (in
, DImode
,
983 (Pmode
, base
, plus_constant (base
, -16)));
989 if (scratch
== NULL_RTX
)
991 out
[1] = change_address (in
, DImode
, scratch
);
992 return gen_adddi3 (scratch
, base
, GEN_INT (8));
997 split_double (in
, &out
[0], &out
[1]);
1005 /* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go
1006 through memory plus an extra GR scratch register. Except that you can
1007 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1008 SECONDARY_RELOAD_CLASS, but not both.
1010 We got into problems in the first place by allowing a construct like
1011 (subreg:TF (reg:TI)), which we got from a union containing a long double.
1012 This solution attempts to prevent this situation from ocurring. When
1013 we see something like the above, we spill the inner register to memory. */
1016 spill_tfmode_operand (in
, force
)
1020 if (GET_CODE (in
) == SUBREG
1021 && GET_MODE (SUBREG_REG (in
)) == TImode
1022 && GET_CODE (SUBREG_REG (in
)) == REG
)
1024 rtx mem
= gen_mem_addressof (SUBREG_REG (in
), NULL_TREE
);
1025 return gen_rtx_MEM (TFmode
, copy_to_reg (XEXP (mem
, 0)));
1027 else if (force
&& GET_CODE (in
) == REG
)
1029 rtx mem
= gen_mem_addressof (in
, NULL_TREE
);
1030 return gen_rtx_MEM (TFmode
, copy_to_reg (XEXP (mem
, 0)));
1032 else if (GET_CODE (in
) == MEM
1033 && GET_CODE (XEXP (in
, 0)) == ADDRESSOF
)
1034 return change_address (in
, TFmode
, copy_to_reg (XEXP (in
, 0)));
1039 /* Emit comparison instruction if necessary, returning the expression
1040 that holds the compare result in the proper mode. */
1043 ia64_expand_compare (code
, mode
)
1045 enum machine_mode mode
;
1047 rtx op0
= ia64_compare_op0
, op1
= ia64_compare_op1
;
1050 /* If we have a BImode input, then we already have a compare result, and
1051 do not need to emit another comparison. */
1052 if (GET_MODE (op0
) == BImode
)
1054 if ((code
== NE
|| code
== EQ
) && op1
== const0_rtx
)
1061 cmp
= gen_reg_rtx (BImode
);
1062 emit_insn (gen_rtx_SET (VOIDmode
, cmp
,
1063 gen_rtx_fmt_ee (code
, BImode
, op0
, op1
)));
1067 return gen_rtx_fmt_ee (code
, mode
, cmp
, const0_rtx
);
1070 /* Emit the appropriate sequence for a call. */
1073 ia64_expand_call (retval
, addr
, nextarg
, sibcall_p
)
1079 rtx insn
, b0
, gp_save
, narg_rtx
;
1082 addr
= XEXP (addr
, 0);
1083 b0
= gen_rtx_REG (DImode
, R_BR (0));
1087 else if (IN_REGNO_P (REGNO (nextarg
)))
1088 narg
= REGNO (nextarg
) - IN_REG (0);
1090 narg
= REGNO (nextarg
) - OUT_REG (0);
1091 narg_rtx
= GEN_INT (narg
);
1093 if (TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
1096 insn
= gen_sibcall_nopic (addr
, narg_rtx
, b0
);
1098 insn
= gen_call_nopic (addr
, narg_rtx
, b0
);
1100 insn
= gen_call_value_nopic (retval
, addr
, narg_rtx
, b0
);
1101 emit_call_insn (insn
);
1108 gp_save
= ia64_gp_save_reg (setjmp_operand (addr
, VOIDmode
));
1110 /* If this is an indirect call, then we have the address of a descriptor. */
1111 if (! symbolic_operand (addr
, VOIDmode
))
1116 emit_move_insn (gp_save
, pic_offset_table_rtx
);
1118 dest
= force_reg (DImode
, gen_rtx_MEM (DImode
, addr
));
1119 emit_move_insn (pic_offset_table_rtx
,
1120 gen_rtx_MEM (DImode
, plus_constant (addr
, 8)));
1123 insn
= gen_sibcall_pic (dest
, narg_rtx
, b0
);
1125 insn
= gen_call_pic (dest
, narg_rtx
, b0
);
1127 insn
= gen_call_value_pic (retval
, dest
, narg_rtx
, b0
);
1128 emit_call_insn (insn
);
1131 emit_move_insn (pic_offset_table_rtx
, gp_save
);
1133 else if (TARGET_CONST_GP
)
1136 insn
= gen_sibcall_nopic (addr
, narg_rtx
, b0
);
1138 insn
= gen_call_nopic (addr
, narg_rtx
, b0
);
1140 insn
= gen_call_value_nopic (retval
, addr
, narg_rtx
, b0
);
1141 emit_call_insn (insn
);
1146 emit_call_insn (gen_sibcall_pic (addr
, narg_rtx
, b0
));
1149 emit_move_insn (gp_save
, pic_offset_table_rtx
);
1152 insn
= gen_call_pic (addr
, narg_rtx
, b0
);
1154 insn
= gen_call_value_pic (retval
, addr
, narg_rtx
, b0
);
1155 emit_call_insn (insn
);
1157 emit_move_insn (pic_offset_table_rtx
, gp_save
);
1162 /* Begin the assembly file. */
1165 emit_safe_across_calls (f
)
1168 unsigned int rs
, re
;
1175 while (rs
< 64 && call_used_regs
[PR_REG (rs
)])
1179 for (re
= rs
+ 1; re
< 64 && ! call_used_regs
[PR_REG (re
)]; re
++)
1183 fputs ("\t.pred.safe_across_calls ", f
);
1189 fprintf (f
, "p%u", rs
);
1191 fprintf (f
, "p%u-p%u", rs
, re
- 1);
1199 /* Structure to be filled in by ia64_compute_frame_size with register
1200 save masks and offsets for the current function. */
1202 struct ia64_frame_info
1204 HOST_WIDE_INT total_size
; /* size of the stack frame, not including
1205 the caller's scratch area. */
1206 HOST_WIDE_INT spill_cfa_off
; /* top of the reg spill area from the cfa. */
1207 HOST_WIDE_INT spill_size
; /* size of the gr/br/fr spill area. */
1208 HOST_WIDE_INT extra_spill_size
; /* size of spill area for others. */
1209 HARD_REG_SET mask
; /* mask of saved registers. */
1210 unsigned int gr_used_mask
; /* mask of registers in use as gr spill
1211 registers or long-term scratches. */
1212 int n_spilled
; /* number of spilled registers. */
1213 int reg_fp
; /* register for fp. */
1214 int reg_save_b0
; /* save register for b0. */
1215 int reg_save_pr
; /* save register for prs. */
1216 int reg_save_ar_pfs
; /* save register for ar.pfs. */
1217 int reg_save_ar_unat
; /* save register for ar.unat. */
1218 int reg_save_ar_lc
; /* save register for ar.lc. */
1219 int n_input_regs
; /* number of input registers used. */
1220 int n_local_regs
; /* number of local registers used. */
1221 int n_output_regs
; /* number of output registers used. */
1222 int n_rotate_regs
; /* number of rotating registers used. */
1224 char need_regstk
; /* true if a .regstk directive needed. */
1225 char initialized
; /* true if the data is finalized. */
1228 /* Current frame information calculated by ia64_compute_frame_size. */
1229 static struct ia64_frame_info current_frame_info
;
1231 /* Helper function for ia64_compute_frame_size: find an appropriate general
1232 register to spill some special register to. SPECIAL_SPILL_MASK contains
1233 bits in GR0 to GR31 that have already been allocated by this routine.
1234 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1237 find_gr_spill (try_locals
)
1242 /* If this is a leaf function, first try an otherwise unused
1243 call-clobbered register. */
1244 if (current_function_is_leaf
)
1246 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
1247 if (! regs_ever_live
[regno
]
1248 && call_used_regs
[regno
]
1249 && ! fixed_regs
[regno
]
1250 && ! global_regs
[regno
]
1251 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0)
1253 current_frame_info
.gr_used_mask
|= 1 << regno
;
1260 regno
= current_frame_info
.n_local_regs
;
1261 /* If there is a frame pointer, then we can't use loc79, because
1262 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1263 reg_name switching code in ia64_expand_prologue. */
1264 if (regno
< (80 - frame_pointer_needed
))
1266 current_frame_info
.n_local_regs
= regno
+ 1;
1267 return LOC_REG (0) + regno
;
1271 /* Failed to find a general register to spill to. Must use stack. */
1275 /* In order to make for nice schedules, we try to allocate every temporary
1276 to a different register. We must of course stay away from call-saved,
1277 fixed, and global registers. We must also stay away from registers
1278 allocated in current_frame_info.gr_used_mask, since those include regs
1279 used all through the prologue.
1281 Any register allocated here must be used immediately. The idea is to
1282 aid scheduling, not to solve data flow problems. */
1284 static int last_scratch_gr_reg
;
1287 next_scratch_gr_reg ()
1291 for (i
= 0; i
< 32; ++i
)
1293 regno
= (last_scratch_gr_reg
+ i
+ 1) & 31;
1294 if (call_used_regs
[regno
]
1295 && ! fixed_regs
[regno
]
1296 && ! global_regs
[regno
]
1297 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0)
1299 last_scratch_gr_reg
= regno
;
1304 /* There must be _something_ available. */
1308 /* Helper function for ia64_compute_frame_size, called through
1309 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1312 mark_reg_gr_used_mask (reg
, data
)
1314 void *data ATTRIBUTE_UNUSED
;
1316 unsigned int regno
= REGNO (reg
);
1318 current_frame_info
.gr_used_mask
|= 1 << regno
;
1321 /* Returns the number of bytes offset between the frame pointer and the stack
1322 pointer for the current function. SIZE is the number of bytes of space
1323 needed for local variables. */
1326 ia64_compute_frame_size (size
)
1329 HOST_WIDE_INT total_size
;
1330 HOST_WIDE_INT spill_size
= 0;
1331 HOST_WIDE_INT extra_spill_size
= 0;
1332 HOST_WIDE_INT pretend_args_size
;
1335 int spilled_gr_p
= 0;
1336 int spilled_fr_p
= 0;
1340 if (current_frame_info
.initialized
)
1343 memset (¤t_frame_info
, 0, sizeof current_frame_info
);
1344 CLEAR_HARD_REG_SET (mask
);
1346 /* Don't allocate scratches to the return register. */
1347 diddle_return_value (mark_reg_gr_used_mask
, NULL
);
1349 /* Don't allocate scratches to the EH scratch registers. */
1350 if (cfun
->machine
->ia64_eh_epilogue_sp
)
1351 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_sp
, NULL
);
1352 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
1353 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_bsp
, NULL
);
1355 /* Find the size of the register stack frame. We have only 80 local
1356 registers, because we reserve 8 for the inputs and 8 for the
1359 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1360 since we'll be adjusting that down later. */
1361 regno
= LOC_REG (78) + ! frame_pointer_needed
;
1362 for (; regno
>= LOC_REG (0); regno
--)
1363 if (regs_ever_live
[regno
])
1365 current_frame_info
.n_local_regs
= regno
- LOC_REG (0) + 1;
1367 /* For functions marked with the syscall_linkage attribute, we must mark
1368 all eight input registers as in use, so that locals aren't visible to
1371 if (cfun
->machine
->n_varargs
> 0
1372 || lookup_attribute ("syscall_linkage",
1373 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
1374 current_frame_info
.n_input_regs
= 8;
1377 for (regno
= IN_REG (7); regno
>= IN_REG (0); regno
--)
1378 if (regs_ever_live
[regno
])
1380 current_frame_info
.n_input_regs
= regno
- IN_REG (0) + 1;
1383 for (regno
= OUT_REG (7); regno
>= OUT_REG (0); regno
--)
1384 if (regs_ever_live
[regno
])
1386 i
= regno
- OUT_REG (0) + 1;
1388 /* When -p profiling, we need one output register for the mcount argument.
1389 Likwise for -a profiling for the bb_init_func argument. For -ax
1390 profiling, we need two output registers for the two bb_init_trace_func
1392 if (profile_flag
|| profile_block_flag
== 1)
1394 else if (profile_block_flag
== 2)
1396 current_frame_info
.n_output_regs
= i
;
1398 /* ??? No rotating register support yet. */
1399 current_frame_info
.n_rotate_regs
= 0;
1401 /* Discover which registers need spilling, and how much room that
1402 will take. Begin with floating point and general registers,
1403 which will always wind up on the stack. */
1405 for (regno
= FR_REG (2); regno
<= FR_REG (127); regno
++)
1406 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1408 SET_HARD_REG_BIT (mask
, regno
);
1414 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
1415 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1417 SET_HARD_REG_BIT (mask
, regno
);
1423 for (regno
= BR_REG (1); regno
<= BR_REG (7); regno
++)
1424 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1426 SET_HARD_REG_BIT (mask
, regno
);
1431 /* Now come all special registers that might get saved in other
1432 general registers. */
1434 if (frame_pointer_needed
)
1436 current_frame_info
.reg_fp
= find_gr_spill (1);
1437 /* If we did not get a register, then we take LOC79. This is guaranteed
1438 to be free, even if regs_ever_live is already set, because this is
1439 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
1440 as we don't count loc79 above. */
1441 if (current_frame_info
.reg_fp
== 0)
1443 current_frame_info
.reg_fp
= LOC_REG (79);
1444 current_frame_info
.n_local_regs
++;
1448 if (! current_function_is_leaf
)
1450 /* Emit a save of BR0 if we call other functions. Do this even
1451 if this function doesn't return, as EH depends on this to be
1452 able to unwind the stack. */
1453 SET_HARD_REG_BIT (mask
, BR_REG (0));
1455 current_frame_info
.reg_save_b0
= find_gr_spill (1);
1456 if (current_frame_info
.reg_save_b0
== 0)
1462 /* Similarly for ar.pfs. */
1463 SET_HARD_REG_BIT (mask
, AR_PFS_REGNUM
);
1464 current_frame_info
.reg_save_ar_pfs
= find_gr_spill (1);
1465 if (current_frame_info
.reg_save_ar_pfs
== 0)
1467 extra_spill_size
+= 8;
1473 if (regs_ever_live
[BR_REG (0)] && ! call_used_regs
[BR_REG (0)])
1475 SET_HARD_REG_BIT (mask
, BR_REG (0));
1481 /* Unwind descriptor hackery: things are most efficient if we allocate
1482 consecutive GR save registers for RP, PFS, FP in that order. However,
1483 it is absolutely critical that FP get the only hard register that's
1484 guaranteed to be free, so we allocated it first. If all three did
1485 happen to be allocated hard regs, and are consecutive, rearrange them
1486 into the preferred order now. */
1487 if (current_frame_info
.reg_fp
!= 0
1488 && current_frame_info
.reg_save_b0
== current_frame_info
.reg_fp
+ 1
1489 && current_frame_info
.reg_save_ar_pfs
== current_frame_info
.reg_fp
+ 2)
1491 current_frame_info
.reg_save_b0
= current_frame_info
.reg_fp
;
1492 current_frame_info
.reg_save_ar_pfs
= current_frame_info
.reg_fp
+ 1;
1493 current_frame_info
.reg_fp
= current_frame_info
.reg_fp
+ 2;
1496 /* See if we need to store the predicate register block. */
1497 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
1498 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1500 if (regno
<= PR_REG (63))
1502 SET_HARD_REG_BIT (mask
, PR_REG (0));
1503 current_frame_info
.reg_save_pr
= find_gr_spill (1);
1504 if (current_frame_info
.reg_save_pr
== 0)
1506 extra_spill_size
+= 8;
1510 /* ??? Mark them all as used so that register renaming and such
1511 are free to use them. */
1512 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
1513 regs_ever_live
[regno
] = 1;
1516 /* If we're forced to use st8.spill, we're forced to save and restore
1518 if (spilled_gr_p
|| cfun
->machine
->n_varargs
)
1520 SET_HARD_REG_BIT (mask
, AR_UNAT_REGNUM
);
1521 current_frame_info
.reg_save_ar_unat
= find_gr_spill (spill_size
== 0);
1522 if (current_frame_info
.reg_save_ar_unat
== 0)
1524 extra_spill_size
+= 8;
1529 if (regs_ever_live
[AR_LC_REGNUM
])
1531 SET_HARD_REG_BIT (mask
, AR_LC_REGNUM
);
1532 current_frame_info
.reg_save_ar_lc
= find_gr_spill (spill_size
== 0);
1533 if (current_frame_info
.reg_save_ar_lc
== 0)
1535 extra_spill_size
+= 8;
1540 /* If we have an odd number of words of pretend arguments written to
1541 the stack, then the FR save area will be unaligned. We round the
1542 size of this area up to keep things 16 byte aligned. */
1544 pretend_args_size
= IA64_STACK_ALIGN (current_function_pretend_args_size
);
1546 pretend_args_size
= current_function_pretend_args_size
;
1548 total_size
= (spill_size
+ extra_spill_size
+ size
+ pretend_args_size
1549 + current_function_outgoing_args_size
);
1550 total_size
= IA64_STACK_ALIGN (total_size
);
1552 /* We always use the 16-byte scratch area provided by the caller, but
1553 if we are a leaf function, there's no one to which we need to provide
1555 if (current_function_is_leaf
)
1556 total_size
= MAX (0, total_size
- 16);
1558 current_frame_info
.total_size
= total_size
;
1559 current_frame_info
.spill_cfa_off
= pretend_args_size
- 16;
1560 current_frame_info
.spill_size
= spill_size
;
1561 current_frame_info
.extra_spill_size
= extra_spill_size
;
1562 COPY_HARD_REG_SET (current_frame_info
.mask
, mask
);
1563 current_frame_info
.n_spilled
= n_spilled
;
1564 current_frame_info
.initialized
= reload_completed
;
1567 /* Compute the initial difference between the specified pair of registers. */
1570 ia64_initial_elimination_offset (from
, to
)
1573 HOST_WIDE_INT offset
;
1575 ia64_compute_frame_size (get_frame_size ());
1578 case FRAME_POINTER_REGNUM
:
1579 if (to
== HARD_FRAME_POINTER_REGNUM
)
1581 if (current_function_is_leaf
)
1582 offset
= -current_frame_info
.total_size
;
1584 offset
= -(current_frame_info
.total_size
1585 - current_function_outgoing_args_size
- 16);
1587 else if (to
== STACK_POINTER_REGNUM
)
1589 if (current_function_is_leaf
)
1592 offset
= 16 + current_function_outgoing_args_size
;
1598 case ARG_POINTER_REGNUM
:
1599 /* Arguments start above the 16 byte save area, unless stdarg
1600 in which case we store through the 16 byte save area. */
1601 if (to
== HARD_FRAME_POINTER_REGNUM
)
1602 offset
= 16 - current_function_pretend_args_size
;
1603 else if (to
== STACK_POINTER_REGNUM
)
1604 offset
= (current_frame_info
.total_size
1605 + 16 - current_function_pretend_args_size
);
1610 case RETURN_ADDRESS_POINTER_REGNUM
:
1621 /* If there are more than a trivial number of register spills, we use
1622 two interleaved iterators so that we can get two memory references
1625 In order to simplify things in the prologue and epilogue expanders,
1626 we use helper functions to fix up the memory references after the
1627 fact with the appropriate offsets to a POST_MODIFY memory mode.
1628 The following data structure tracks the state of the two iterators
1629 while insns are being emitted. */
1631 struct spill_fill_data
1633 rtx init_after
; /* point at which to emit intializations */
1634 rtx init_reg
[2]; /* initial base register */
1635 rtx iter_reg
[2]; /* the iterator registers */
1636 rtx
*prev_addr
[2]; /* address of last memory use */
1637 rtx prev_insn
[2]; /* the insn corresponding to prev_addr */
1638 HOST_WIDE_INT prev_off
[2]; /* last offset */
1639 int n_iter
; /* number of iterators in use */
1640 int next_iter
; /* next iterator to use */
1641 unsigned int save_gr_used_mask
;
1644 static struct spill_fill_data spill_fill_data
;
1647 setup_spill_pointers (n_spills
, init_reg
, cfa_off
)
1650 HOST_WIDE_INT cfa_off
;
1654 spill_fill_data
.init_after
= get_last_insn ();
1655 spill_fill_data
.init_reg
[0] = init_reg
;
1656 spill_fill_data
.init_reg
[1] = init_reg
;
1657 spill_fill_data
.prev_addr
[0] = NULL
;
1658 spill_fill_data
.prev_addr
[1] = NULL
;
1659 spill_fill_data
.prev_insn
[0] = NULL
;
1660 spill_fill_data
.prev_insn
[1] = NULL
;
1661 spill_fill_data
.prev_off
[0] = cfa_off
;
1662 spill_fill_data
.prev_off
[1] = cfa_off
;
1663 spill_fill_data
.next_iter
= 0;
1664 spill_fill_data
.save_gr_used_mask
= current_frame_info
.gr_used_mask
;
1666 spill_fill_data
.n_iter
= 1 + (n_spills
> 2);
1667 for (i
= 0; i
< spill_fill_data
.n_iter
; ++i
)
1669 int regno
= next_scratch_gr_reg ();
1670 spill_fill_data
.iter_reg
[i
] = gen_rtx_REG (DImode
, regno
);
1671 current_frame_info
.gr_used_mask
|= 1 << regno
;
1676 finish_spill_pointers ()
1678 current_frame_info
.gr_used_mask
= spill_fill_data
.save_gr_used_mask
;
1682 spill_restore_mem (reg
, cfa_off
)
1684 HOST_WIDE_INT cfa_off
;
1686 int iter
= spill_fill_data
.next_iter
;
1687 HOST_WIDE_INT disp
= spill_fill_data
.prev_off
[iter
] - cfa_off
;
1688 rtx disp_rtx
= GEN_INT (disp
);
1691 if (spill_fill_data
.prev_addr
[iter
])
1693 if (CONST_OK_FOR_N (disp
))
1695 *spill_fill_data
.prev_addr
[iter
]
1696 = gen_rtx_POST_MODIFY (DImode
, spill_fill_data
.iter_reg
[iter
],
1697 gen_rtx_PLUS (DImode
,
1698 spill_fill_data
.iter_reg
[iter
],
1700 REG_NOTES (spill_fill_data
.prev_insn
[iter
])
1701 = gen_rtx_EXPR_LIST (REG_INC
, spill_fill_data
.iter_reg
[iter
],
1702 REG_NOTES (spill_fill_data
.prev_insn
[iter
]));
1706 /* ??? Could use register post_modify for loads. */
1707 if (! CONST_OK_FOR_I (disp
))
1709 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
1710 emit_move_insn (tmp
, disp_rtx
);
1713 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
1714 spill_fill_data
.iter_reg
[iter
], disp_rtx
));
1717 /* Micro-optimization: if we've created a frame pointer, it's at
1718 CFA 0, which may allow the real iterator to be initialized lower,
1719 slightly increasing parallelism. Also, if there are few saves
1720 it may eliminate the iterator entirely. */
1722 && spill_fill_data
.init_reg
[iter
] == stack_pointer_rtx
1723 && frame_pointer_needed
)
1725 mem
= gen_rtx_MEM (GET_MODE (reg
), hard_frame_pointer_rtx
);
1726 set_mem_alias_set (mem
, get_varargs_alias_set ());
1734 seq
= gen_movdi (spill_fill_data
.iter_reg
[iter
],
1735 spill_fill_data
.init_reg
[iter
]);
1740 if (! CONST_OK_FOR_I (disp
))
1742 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
1743 emit_move_insn (tmp
, disp_rtx
);
1747 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
1748 spill_fill_data
.init_reg
[iter
],
1751 seq
= gen_sequence ();
1755 /* Careful for being the first insn in a sequence. */
1756 if (spill_fill_data
.init_after
)
1757 spill_fill_data
.init_after
1758 = emit_insn_after (seq
, spill_fill_data
.init_after
);
1761 rtx first
= get_insns ();
1763 spill_fill_data
.init_after
1764 = emit_insn_before (seq
, first
);
1766 spill_fill_data
.init_after
= emit_insn (seq
);
1770 mem
= gen_rtx_MEM (GET_MODE (reg
), spill_fill_data
.iter_reg
[iter
]);
1772 /* ??? Not all of the spills are for varargs, but some of them are.
1773 The rest of the spills belong in an alias set of their own. But
1774 it doesn't actually hurt to include them here. */
1775 set_mem_alias_set (mem
, get_varargs_alias_set ());
1777 spill_fill_data
.prev_addr
[iter
] = &XEXP (mem
, 0);
1778 spill_fill_data
.prev_off
[iter
] = cfa_off
;
1780 if (++iter
>= spill_fill_data
.n_iter
)
1782 spill_fill_data
.next_iter
= iter
;
1788 do_spill (move_fn
, reg
, cfa_off
, frame_reg
)
1789 rtx (*move_fn
) PARAMS ((rtx
, rtx
, rtx
));
1791 HOST_WIDE_INT cfa_off
;
1793 int iter
= spill_fill_data
.next_iter
;
1796 mem
= spill_restore_mem (reg
, cfa_off
);
1797 insn
= emit_insn ((*move_fn
) (mem
, reg
, GEN_INT (cfa_off
)));
1798 spill_fill_data
.prev_insn
[iter
] = insn
;
1805 RTX_FRAME_RELATED_P (insn
) = 1;
1807 /* Don't even pretend that the unwind code can intuit its way
1808 through a pair of interleaved post_modify iterators. Just
1809 provide the correct answer. */
1811 if (frame_pointer_needed
)
1813 base
= hard_frame_pointer_rtx
;
1818 base
= stack_pointer_rtx
;
1819 off
= current_frame_info
.total_size
- cfa_off
;
1823 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
1824 gen_rtx_SET (VOIDmode
,
1825 gen_rtx_MEM (GET_MODE (reg
),
1826 plus_constant (base
, off
)),
1833 do_restore (move_fn
, reg
, cfa_off
)
1834 rtx (*move_fn
) PARAMS ((rtx
, rtx
, rtx
));
1836 HOST_WIDE_INT cfa_off
;
1838 int iter
= spill_fill_data
.next_iter
;
1841 insn
= emit_insn ((*move_fn
) (reg
, spill_restore_mem (reg
, cfa_off
),
1842 GEN_INT (cfa_off
)));
1843 spill_fill_data
.prev_insn
[iter
] = insn
;
1846 /* Wrapper functions that discards the CONST_INT spill offset. These
1847 exist so that we can give gr_spill/gr_fill the offset they need and
1848 use a consistant function interface. */
1851 gen_movdi_x (dest
, src
, offset
)
1853 rtx offset ATTRIBUTE_UNUSED
;
1855 return gen_movdi (dest
, src
);
1859 gen_fr_spill_x (dest
, src
, offset
)
1861 rtx offset ATTRIBUTE_UNUSED
;
1863 return gen_fr_spill (dest
, src
);
1867 gen_fr_restore_x (dest
, src
, offset
)
1869 rtx offset ATTRIBUTE_UNUSED
;
1871 return gen_fr_restore (dest
, src
);
1874 /* Called after register allocation to add any instructions needed for the
1875 prologue. Using a prologue insn is favored compared to putting all of the
1876 instructions in output_function_prologue(), since it allows the scheduler
1877 to intermix instructions with the saves of the caller saved registers. In
1878 some cases, it might be necessary to emit a barrier instruction as the last
1879 insn to prevent such scheduling.
1881 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
1882 so that the debug info generation code can handle them properly.
1884 The register save area is layed out like so:
1886 [ varargs spill area ]
1887 [ fr register spill area ]
1888 [ br register spill area ]
1889 [ ar register spill area ]
1890 [ pr register spill area ]
1891 [ gr register spill area ] */
1893 /* ??? Get inefficient code when the frame size is larger than can fit in an
1894 adds instruction. */
1897 ia64_expand_prologue ()
1899 rtx insn
, ar_pfs_save_reg
, ar_unat_save_reg
;
1900 int i
, epilogue_p
, regno
, alt_regno
, cfa_off
, n_varargs
;
1903 ia64_compute_frame_size (get_frame_size ());
1904 last_scratch_gr_reg
= 15;
1906 /* If there is no epilogue, then we don't need some prologue insns.
1907 We need to avoid emitting the dead prologue insns, because flow
1908 will complain about them. */
1913 for (e
= EXIT_BLOCK_PTR
->pred
; e
; e
= e
->pred_next
)
1914 if ((e
->flags
& EDGE_FAKE
) == 0
1915 && (e
->flags
& EDGE_FALLTHRU
) != 0)
1917 epilogue_p
= (e
!= NULL
);
1922 /* Set the local, input, and output register names. We need to do this
1923 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
1924 half. If we use in/loc/out register names, then we get assembler errors
1925 in crtn.S because there is no alloc insn or regstk directive in there. */
1926 if (! TARGET_REG_NAMES
)
1928 int inputs
= current_frame_info
.n_input_regs
;
1929 int locals
= current_frame_info
.n_local_regs
;
1930 int outputs
= current_frame_info
.n_output_regs
;
1932 for (i
= 0; i
< inputs
; i
++)
1933 reg_names
[IN_REG (i
)] = ia64_reg_numbers
[i
];
1934 for (i
= 0; i
< locals
; i
++)
1935 reg_names
[LOC_REG (i
)] = ia64_reg_numbers
[inputs
+ i
];
1936 for (i
= 0; i
< outputs
; i
++)
1937 reg_names
[OUT_REG (i
)] = ia64_reg_numbers
[inputs
+ locals
+ i
];
1940 /* Set the frame pointer register name. The regnum is logically loc79,
1941 but of course we'll not have allocated that many locals. Rather than
1942 worrying about renumbering the existing rtxs, we adjust the name. */
1943 /* ??? This code means that we can never use one local register when
1944 there is a frame pointer. loc79 gets wasted in this case, as it is
1945 renamed to a register that will never be used. See also the try_locals
1946 code in find_gr_spill. */
1947 if (current_frame_info
.reg_fp
)
1949 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
1950 reg_names
[HARD_FRAME_POINTER_REGNUM
]
1951 = reg_names
[current_frame_info
.reg_fp
];
1952 reg_names
[current_frame_info
.reg_fp
] = tmp
;
1955 /* Fix up the return address placeholder. */
1956 /* ??? We can fail if __builtin_return_address is used, and we didn't
1957 allocate a register in which to save b0. I can't think of a way to
1958 eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and
1959 then be sure that I got the right one. Further, reload doesn't seem
1960 to care if an eliminable register isn't used, and "eliminates" it
1962 if (regs_ever_live
[RETURN_ADDRESS_POINTER_REGNUM
]
1963 && current_frame_info
.reg_save_b0
!= 0)
1964 XINT (return_address_pointer_rtx
, 0) = current_frame_info
.reg_save_b0
;
1966 /* We don't need an alloc instruction if we've used no outputs or locals. */
1967 if (current_frame_info
.n_local_regs
== 0
1968 && current_frame_info
.n_output_regs
== 0
1969 && current_frame_info
.n_input_regs
<= current_function_args_info
.words
)
1971 /* If there is no alloc, but there are input registers used, then we
1972 need a .regstk directive. */
1973 current_frame_info
.need_regstk
= (TARGET_REG_NAMES
!= 0);
1974 ar_pfs_save_reg
= NULL_RTX
;
1978 current_frame_info
.need_regstk
= 0;
1980 if (current_frame_info
.reg_save_ar_pfs
)
1981 regno
= current_frame_info
.reg_save_ar_pfs
;
1983 regno
= next_scratch_gr_reg ();
1984 ar_pfs_save_reg
= gen_rtx_REG (DImode
, regno
);
1986 insn
= emit_insn (gen_alloc (ar_pfs_save_reg
,
1987 GEN_INT (current_frame_info
.n_input_regs
),
1988 GEN_INT (current_frame_info
.n_local_regs
),
1989 GEN_INT (current_frame_info
.n_output_regs
),
1990 GEN_INT (current_frame_info
.n_rotate_regs
)));
1991 RTX_FRAME_RELATED_P (insn
) = (current_frame_info
.reg_save_ar_pfs
!= 0);
1994 /* Set up frame pointer, stack pointer, and spill iterators. */
1996 n_varargs
= cfun
->machine
->n_varargs
;
1997 setup_spill_pointers (current_frame_info
.n_spilled
+ n_varargs
,
1998 stack_pointer_rtx
, 0);
2000 if (frame_pointer_needed
)
2002 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
2003 RTX_FRAME_RELATED_P (insn
) = 1;
2006 if (current_frame_info
.total_size
!= 0)
2008 rtx frame_size_rtx
= GEN_INT (- current_frame_info
.total_size
);
2011 if (CONST_OK_FOR_I (- current_frame_info
.total_size
))
2012 offset
= frame_size_rtx
;
2015 regno
= next_scratch_gr_reg ();
2016 offset
= gen_rtx_REG (DImode
, regno
);
2017 emit_move_insn (offset
, frame_size_rtx
);
2020 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
,
2021 stack_pointer_rtx
, offset
));
2023 if (! frame_pointer_needed
)
2025 RTX_FRAME_RELATED_P (insn
) = 1;
2026 if (GET_CODE (offset
) != CONST_INT
)
2029 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
2030 gen_rtx_SET (VOIDmode
,
2032 gen_rtx_PLUS (DImode
,
2039 /* ??? At this point we must generate a magic insn that appears to
2040 modify the stack pointer, the frame pointer, and all spill
2041 iterators. This would allow the most scheduling freedom. For
2042 now, just hard stop. */
2043 emit_insn (gen_blockage ());
2046 /* Must copy out ar.unat before doing any integer spills. */
2047 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
2049 if (current_frame_info
.reg_save_ar_unat
)
2051 = gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_unat
);
2054 alt_regno
= next_scratch_gr_reg ();
2055 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
2056 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
2059 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
2060 insn
= emit_move_insn (ar_unat_save_reg
, reg
);
2061 RTX_FRAME_RELATED_P (insn
) = (current_frame_info
.reg_save_ar_unat
!= 0);
2063 /* Even if we're not going to generate an epilogue, we still
2064 need to save the register so that EH works. */
2065 if (! epilogue_p
&& current_frame_info
.reg_save_ar_unat
)
2066 emit_insn (gen_rtx_USE (VOIDmode
, ar_unat_save_reg
));
2069 ar_unat_save_reg
= NULL_RTX
;
2071 /* Spill all varargs registers. Do this before spilling any GR registers,
2072 since we want the UNAT bits for the GR registers to override the UNAT
2073 bits from varargs, which we don't care about. */
2076 for (regno
= GR_ARG_FIRST
+ 7; n_varargs
> 0; --n_varargs
, --regno
)
2078 reg
= gen_rtx_REG (DImode
, regno
);
2079 do_spill (gen_gr_spill
, reg
, cfa_off
+= 8, NULL_RTX
);
2082 /* Locate the bottom of the register save area. */
2083 cfa_off
= (current_frame_info
.spill_cfa_off
2084 + current_frame_info
.spill_size
2085 + current_frame_info
.extra_spill_size
);
2087 /* Save the predicate register block either in a register or in memory. */
2088 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
2090 reg
= gen_rtx_REG (DImode
, PR_REG (0));
2091 if (current_frame_info
.reg_save_pr
!= 0)
2093 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_pr
);
2094 insn
= emit_move_insn (alt_reg
, reg
);
2096 /* ??? Denote pr spill/fill by a DImode move that modifies all
2097 64 hard registers. */
2098 RTX_FRAME_RELATED_P (insn
) = 1;
2100 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
2101 gen_rtx_SET (VOIDmode
, alt_reg
, reg
),
2104 /* Even if we're not going to generate an epilogue, we still
2105 need to save the register so that EH works. */
2107 emit_insn (gen_rtx_USE (VOIDmode
, alt_reg
));
2111 alt_regno
= next_scratch_gr_reg ();
2112 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2113 insn
= emit_move_insn (alt_reg
, reg
);
2114 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2119 /* Handle AR regs in numerical order. All of them get special handling. */
2120 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
)
2121 && current_frame_info
.reg_save_ar_unat
== 0)
2123 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
2124 do_spill (gen_movdi_x
, ar_unat_save_reg
, cfa_off
, reg
);
2128 /* The alloc insn already copied ar.pfs into a general register. The
2129 only thing we have to do now is copy that register to a stack slot
2130 if we'd not allocated a local register for the job. */
2131 if (current_frame_info
.reg_save_ar_pfs
== 0
2132 && ! current_function_is_leaf
)
2134 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
2135 do_spill (gen_movdi_x
, ar_pfs_save_reg
, cfa_off
, reg
);
2139 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
2141 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
2142 if (current_frame_info
.reg_save_ar_lc
!= 0)
2144 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_lc
);
2145 insn
= emit_move_insn (alt_reg
, reg
);
2146 RTX_FRAME_RELATED_P (insn
) = 1;
2148 /* Even if we're not going to generate an epilogue, we still
2149 need to save the register so that EH works. */
2151 emit_insn (gen_rtx_USE (VOIDmode
, alt_reg
));
2155 alt_regno
= next_scratch_gr_reg ();
2156 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2157 emit_move_insn (alt_reg
, reg
);
2158 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2163 /* We should now be at the base of the gr/br/fr spill area. */
2164 if (cfa_off
!= (current_frame_info
.spill_cfa_off
2165 + current_frame_info
.spill_size
))
2168 /* Spill all general registers. */
2169 for (regno
= GR_REG (1); regno
<= GR_REG (31); ++regno
)
2170 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2172 reg
= gen_rtx_REG (DImode
, regno
);
2173 do_spill (gen_gr_spill
, reg
, cfa_off
, reg
);
2177 /* Handle BR0 specially -- it may be getting stored permanently in
2178 some GR register. */
2179 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
2181 reg
= gen_rtx_REG (DImode
, BR_REG (0));
2182 if (current_frame_info
.reg_save_b0
!= 0)
2184 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_b0
);
2185 insn
= emit_move_insn (alt_reg
, reg
);
2186 RTX_FRAME_RELATED_P (insn
) = 1;
2188 /* Even if we're not going to generate an epilogue, we still
2189 need to save the register so that EH works. */
2191 emit_insn (gen_rtx_USE (VOIDmode
, alt_reg
));
2195 alt_regno
= next_scratch_gr_reg ();
2196 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2197 emit_move_insn (alt_reg
, reg
);
2198 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2203 /* Spill the rest of the BR registers. */
2204 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
2205 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2207 alt_regno
= next_scratch_gr_reg ();
2208 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2209 reg
= gen_rtx_REG (DImode
, regno
);
2210 emit_move_insn (alt_reg
, reg
);
2211 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2215 /* Align the frame and spill all FR registers. */
2216 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
2217 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2221 reg
= gen_rtx_REG (TFmode
, regno
);
2222 do_spill (gen_fr_spill_x
, reg
, cfa_off
, reg
);
2226 if (cfa_off
!= current_frame_info
.spill_cfa_off
)
2229 finish_spill_pointers ();
2232 /* Called after register allocation to add any instructions needed for the
2233 epilogue. Using a epilogue insn is favored compared to putting all of the
2234 instructions in output_function_prologue(), since it allows the scheduler
2235 to intermix instructions with the saves of the caller saved registers. In
2236 some cases, it might be necessary to emit a barrier instruction as the last
2237 insn to prevent such scheduling. */
2240 ia64_expand_epilogue (sibcall_p
)
2243 rtx insn
, reg
, alt_reg
, ar_unat_save_reg
;
2244 int regno
, alt_regno
, cfa_off
;
2246 ia64_compute_frame_size (get_frame_size ());
2248 /* If there is a frame pointer, then we use it instead of the stack
2249 pointer, so that the stack pointer does not need to be valid when
2250 the epilogue starts. See EXIT_IGNORE_STACK. */
2251 if (frame_pointer_needed
)
2252 setup_spill_pointers (current_frame_info
.n_spilled
,
2253 hard_frame_pointer_rtx
, 0);
2255 setup_spill_pointers (current_frame_info
.n_spilled
, stack_pointer_rtx
,
2256 current_frame_info
.total_size
);
2258 if (current_frame_info
.total_size
!= 0)
2260 /* ??? At this point we must generate a magic insn that appears to
2261 modify the spill iterators and the frame pointer. This would
2262 allow the most scheduling freedom. For now, just hard stop. */
2263 emit_insn (gen_blockage ());
2266 /* Locate the bottom of the register save area. */
2267 cfa_off
= (current_frame_info
.spill_cfa_off
2268 + current_frame_info
.spill_size
2269 + current_frame_info
.extra_spill_size
);
2271 /* Restore the predicate registers. */
2272 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
2274 if (current_frame_info
.reg_save_pr
!= 0)
2275 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_pr
);
2278 alt_regno
= next_scratch_gr_reg ();
2279 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2280 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2283 reg
= gen_rtx_REG (DImode
, PR_REG (0));
2284 emit_move_insn (reg
, alt_reg
);
2287 /* Restore the application registers. */
2289 /* Load the saved unat from the stack, but do not restore it until
2290 after the GRs have been restored. */
2291 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
2293 if (current_frame_info
.reg_save_ar_unat
!= 0)
2295 = gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_unat
);
2298 alt_regno
= next_scratch_gr_reg ();
2299 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
2300 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
2301 do_restore (gen_movdi_x
, ar_unat_save_reg
, cfa_off
);
2306 ar_unat_save_reg
= NULL_RTX
;
2308 if (current_frame_info
.reg_save_ar_pfs
!= 0)
2310 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_pfs
);
2311 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
2312 emit_move_insn (reg
, alt_reg
);
2314 else if (! current_function_is_leaf
)
2316 alt_regno
= next_scratch_gr_reg ();
2317 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2318 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2320 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
2321 emit_move_insn (reg
, alt_reg
);
2324 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
2326 if (current_frame_info
.reg_save_ar_lc
!= 0)
2327 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_lc
);
2330 alt_regno
= next_scratch_gr_reg ();
2331 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2332 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2335 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
2336 emit_move_insn (reg
, alt_reg
);
2339 /* We should now be at the base of the gr/br/fr spill area. */
2340 if (cfa_off
!= (current_frame_info
.spill_cfa_off
2341 + current_frame_info
.spill_size
))
2344 /* Restore all general registers. */
2345 for (regno
= GR_REG (1); regno
<= GR_REG (31); ++regno
)
2346 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2348 reg
= gen_rtx_REG (DImode
, regno
);
2349 do_restore (gen_gr_restore
, reg
, cfa_off
);
2353 /* Restore the branch registers. Handle B0 specially, as it may
2354 have gotten stored in some GR register. */
2355 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
2357 if (current_frame_info
.reg_save_b0
!= 0)
2358 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_b0
);
2361 alt_regno
= next_scratch_gr_reg ();
2362 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2363 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2366 reg
= gen_rtx_REG (DImode
, BR_REG (0));
2367 emit_move_insn (reg
, alt_reg
);
2370 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
2371 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2373 alt_regno
= next_scratch_gr_reg ();
2374 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2375 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2377 reg
= gen_rtx_REG (DImode
, regno
);
2378 emit_move_insn (reg
, alt_reg
);
2381 /* Restore floating point registers. */
2382 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
2383 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2387 reg
= gen_rtx_REG (TFmode
, regno
);
2388 do_restore (gen_fr_restore_x
, reg
, cfa_off
);
2392 /* Restore ar.unat for real. */
2393 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
2395 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
2396 emit_move_insn (reg
, ar_unat_save_reg
);
2399 if (cfa_off
!= current_frame_info
.spill_cfa_off
)
2402 finish_spill_pointers ();
2404 if (current_frame_info
.total_size
|| cfun
->machine
->ia64_eh_epilogue_sp
)
2406 /* ??? At this point we must generate a magic insn that appears to
2407 modify the spill iterators, the stack pointer, and the frame
2408 pointer. This would allow the most scheduling freedom. For now,
2410 emit_insn (gen_blockage ());
2413 if (cfun
->machine
->ia64_eh_epilogue_sp
)
2414 emit_move_insn (stack_pointer_rtx
, cfun
->machine
->ia64_eh_epilogue_sp
);
2415 else if (frame_pointer_needed
)
2417 insn
= emit_move_insn (stack_pointer_rtx
, hard_frame_pointer_rtx
);
2418 RTX_FRAME_RELATED_P (insn
) = 1;
2420 else if (current_frame_info
.total_size
)
2422 rtx offset
, frame_size_rtx
;
2424 frame_size_rtx
= GEN_INT (current_frame_info
.total_size
);
2425 if (CONST_OK_FOR_I (current_frame_info
.total_size
))
2426 offset
= frame_size_rtx
;
2429 regno
= next_scratch_gr_reg ();
2430 offset
= gen_rtx_REG (DImode
, regno
);
2431 emit_move_insn (offset
, frame_size_rtx
);
2434 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
, stack_pointer_rtx
,
2437 RTX_FRAME_RELATED_P (insn
) = 1;
2438 if (GET_CODE (offset
) != CONST_INT
)
2441 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
2442 gen_rtx_SET (VOIDmode
,
2444 gen_rtx_PLUS (DImode
,
2451 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
2452 emit_insn (gen_set_bsp (cfun
->machine
->ia64_eh_epilogue_bsp
));
2455 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode
, BR_REG (0))));
2458 int fp
= GR_REG (2);
2459 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
2460 first available call clobbered register. If there was a frame_pointer
2461 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
2462 so we have to make sure we're using the string "r2" when emitting
2463 the register name for the assmbler. */
2464 if (current_frame_info
.reg_fp
&& current_frame_info
.reg_fp
== GR_REG (2))
2465 fp
= HARD_FRAME_POINTER_REGNUM
;
2467 /* We must emit an alloc to force the input registers to become output
2468 registers. Otherwise, if the callee tries to pass its parameters
2469 through to another call without an intervening alloc, then these
2471 /* ??? We don't need to preserve all input registers. We only need to
2472 preserve those input registers used as arguments to the sibling call.
2473 It is unclear how to compute that number here. */
2474 if (current_frame_info
.n_input_regs
!= 0)
2475 emit_insn (gen_alloc (gen_rtx_REG (DImode
, fp
),
2476 GEN_INT (0), GEN_INT (0),
2477 GEN_INT (current_frame_info
.n_input_regs
),
2482 /* Return 1 if br.ret can do all the work required to return from a
2486 ia64_direct_return ()
2488 if (reload_completed
&& ! frame_pointer_needed
)
2490 ia64_compute_frame_size (get_frame_size ());
2492 return (current_frame_info
.total_size
== 0
2493 && current_frame_info
.n_spilled
== 0
2494 && current_frame_info
.reg_save_b0
== 0
2495 && current_frame_info
.reg_save_pr
== 0
2496 && current_frame_info
.reg_save_ar_pfs
== 0
2497 && current_frame_info
.reg_save_ar_unat
== 0
2498 && current_frame_info
.reg_save_ar_lc
== 0);
2504 ia64_hard_regno_rename_ok (from
, to
)
2508 /* Don't clobber any of the registers we reserved for the prologue. */
2509 if (to
== current_frame_info
.reg_fp
2510 || to
== current_frame_info
.reg_save_b0
2511 || to
== current_frame_info
.reg_save_pr
2512 || to
== current_frame_info
.reg_save_ar_pfs
2513 || to
== current_frame_info
.reg_save_ar_unat
2514 || to
== current_frame_info
.reg_save_ar_lc
)
2517 if (from
== current_frame_info
.reg_fp
2518 || from
== current_frame_info
.reg_save_b0
2519 || from
== current_frame_info
.reg_save_pr
2520 || from
== current_frame_info
.reg_save_ar_pfs
2521 || from
== current_frame_info
.reg_save_ar_unat
2522 || from
== current_frame_info
.reg_save_ar_lc
)
2525 /* Don't use output registers outside the register frame. */
2526 if (OUT_REGNO_P (to
) && to
>= OUT_REG (current_frame_info
.n_output_regs
))
2529 /* Retain even/oddness on predicate register pairs. */
2530 if (PR_REGNO_P (from
) && PR_REGNO_P (to
))
2531 return (from
& 1) == (to
& 1);
2533 /* Reg 4 contains the saved gp; we can't reliably rename this. */
2534 if (from
== GR_REG (4) && current_function_calls_setjmp
)
2540 /* Emit the function prologue. */
2543 ia64_output_function_prologue (file
, size
)
2545 HOST_WIDE_INT size ATTRIBUTE_UNUSED
;
2547 int mask
, grsave
, grsave_prev
;
2549 if (current_frame_info
.need_regstk
)
2550 fprintf (file
, "\t.regstk %d, %d, %d, %d\n",
2551 current_frame_info
.n_input_regs
,
2552 current_frame_info
.n_local_regs
,
2553 current_frame_info
.n_output_regs
,
2554 current_frame_info
.n_rotate_regs
);
2556 if (!flag_unwind_tables
&& (!flag_exceptions
|| USING_SJLJ_EXCEPTIONS
))
2559 /* Emit the .prologue directive. */
2562 grsave
= grsave_prev
= 0;
2563 if (current_frame_info
.reg_save_b0
!= 0)
2566 grsave
= grsave_prev
= current_frame_info
.reg_save_b0
;
2568 if (current_frame_info
.reg_save_ar_pfs
!= 0
2569 && (grsave_prev
== 0
2570 || current_frame_info
.reg_save_ar_pfs
== grsave_prev
+ 1))
2573 if (grsave_prev
== 0)
2574 grsave
= current_frame_info
.reg_save_ar_pfs
;
2575 grsave_prev
= current_frame_info
.reg_save_ar_pfs
;
2577 if (current_frame_info
.reg_fp
!= 0
2578 && (grsave_prev
== 0
2579 || current_frame_info
.reg_fp
== grsave_prev
+ 1))
2582 if (grsave_prev
== 0)
2583 grsave
= HARD_FRAME_POINTER_REGNUM
;
2584 grsave_prev
= current_frame_info
.reg_fp
;
2586 if (current_frame_info
.reg_save_pr
!= 0
2587 && (grsave_prev
== 0
2588 || current_frame_info
.reg_save_pr
== grsave_prev
+ 1))
2591 if (grsave_prev
== 0)
2592 grsave
= current_frame_info
.reg_save_pr
;
2596 fprintf (file
, "\t.prologue %d, %d\n", mask
,
2597 ia64_dbx_register_number (grsave
));
2599 fputs ("\t.prologue\n", file
);
2601 /* Emit a .spill directive, if necessary, to relocate the base of
2602 the register spill area. */
2603 if (current_frame_info
.spill_cfa_off
!= -16)
2604 fprintf (file
, "\t.spill %ld\n",
2605 (long) (current_frame_info
.spill_cfa_off
2606 + current_frame_info
.spill_size
));
2609 /* Emit the .body directive at the scheduled end of the prologue. */
2612 ia64_output_function_end_prologue (file
)
2615 if (!flag_unwind_tables
&& (!flag_exceptions
|| USING_SJLJ_EXCEPTIONS
))
2618 fputs ("\t.body\n", file
);
2621 /* Emit the function epilogue. */
2624 ia64_output_function_epilogue (file
, size
)
2625 FILE *file ATTRIBUTE_UNUSED
;
2626 HOST_WIDE_INT size ATTRIBUTE_UNUSED
;
2630 /* Reset from the function's potential modifications. */
2631 XINT (return_address_pointer_rtx
, 0) = RETURN_ADDRESS_POINTER_REGNUM
;
2633 if (current_frame_info
.reg_fp
)
2635 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
2636 reg_names
[HARD_FRAME_POINTER_REGNUM
]
2637 = reg_names
[current_frame_info
.reg_fp
];
2638 reg_names
[current_frame_info
.reg_fp
] = tmp
;
2640 if (! TARGET_REG_NAMES
)
2642 for (i
= 0; i
< current_frame_info
.n_input_regs
; i
++)
2643 reg_names
[IN_REG (i
)] = ia64_input_reg_names
[i
];
2644 for (i
= 0; i
< current_frame_info
.n_local_regs
; i
++)
2645 reg_names
[LOC_REG (i
)] = ia64_local_reg_names
[i
];
2646 for (i
= 0; i
< current_frame_info
.n_output_regs
; i
++)
2647 reg_names
[OUT_REG (i
)] = ia64_output_reg_names
[i
];
2650 current_frame_info
.initialized
= 0;
2654 ia64_dbx_register_number (regno
)
2657 /* In ia64_expand_prologue we quite literally renamed the frame pointer
2658 from its home at loc79 to something inside the register frame. We
2659 must perform the same renumbering here for the debug info. */
2660 if (current_frame_info
.reg_fp
)
2662 if (regno
== HARD_FRAME_POINTER_REGNUM
)
2663 regno
= current_frame_info
.reg_fp
;
2664 else if (regno
== current_frame_info
.reg_fp
)
2665 regno
= HARD_FRAME_POINTER_REGNUM
;
2668 if (IN_REGNO_P (regno
))
2669 return 32 + regno
- IN_REG (0);
2670 else if (LOC_REGNO_P (regno
))
2671 return 32 + current_frame_info
.n_input_regs
+ regno
- LOC_REG (0);
2672 else if (OUT_REGNO_P (regno
))
2673 return (32 + current_frame_info
.n_input_regs
2674 + current_frame_info
.n_local_regs
+ regno
- OUT_REG (0));
2680 ia64_initialize_trampoline (addr
, fnaddr
, static_chain
)
2681 rtx addr
, fnaddr
, static_chain
;
2683 rtx addr_reg
, eight
= GEN_INT (8);
2685 /* Load up our iterator. */
2686 addr_reg
= gen_reg_rtx (Pmode
);
2687 emit_move_insn (addr_reg
, addr
);
2689 /* The first two words are the fake descriptor:
2690 __ia64_trampoline, ADDR+16. */
2691 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
),
2692 gen_rtx_SYMBOL_REF (Pmode
, "__ia64_trampoline"));
2693 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
2695 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
),
2696 copy_to_reg (plus_constant (addr
, 16)));
2697 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
2699 /* The third word is the target descriptor. */
2700 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
), fnaddr
);
2701 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
2703 /* The fourth word is the static chain. */
2704 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
), static_chain
);
2707 /* Do any needed setup for a variadic function. CUM has not been updated
2708 for the last named argument which has type TYPE and mode MODE.
2710 We generate the actual spill instructions during prologue generation. */
2713 ia64_setup_incoming_varargs (cum
, int_mode
, type
, pretend_size
, second_time
)
2714 CUMULATIVE_ARGS cum
;
2718 int second_time ATTRIBUTE_UNUSED
;
2720 /* If this is a stdarg function, then skip the current argument. */
2721 if (! current_function_varargs
)
2722 ia64_function_arg_advance (&cum
, int_mode
, type
, 1);
2724 if (cum
.words
< MAX_ARGUMENT_SLOTS
)
2726 int n
= MAX_ARGUMENT_SLOTS
- cum
.words
;
2727 *pretend_size
= n
* UNITS_PER_WORD
;
2728 cfun
->machine
->n_varargs
= n
;
2732 /* Check whether TYPE is a homogeneous floating point aggregate. If
2733 it is, return the mode of the floating point type that appears
2734 in all leafs. If it is not, return VOIDmode.
2736 An aggregate is a homogeneous floating point aggregate is if all
2737 fields/elements in it have the same floating point type (e.g,
2738 SFmode). 128-bit quad-precision floats are excluded. */
2740 static enum machine_mode
2741 hfa_element_mode (type
, nested
)
2745 enum machine_mode element_mode
= VOIDmode
;
2746 enum machine_mode mode
;
2747 enum tree_code code
= TREE_CODE (type
);
2748 int know_element_mode
= 0;
2753 case VOID_TYPE
: case INTEGER_TYPE
: case ENUMERAL_TYPE
:
2754 case BOOLEAN_TYPE
: case CHAR_TYPE
: case POINTER_TYPE
:
2755 case OFFSET_TYPE
: case REFERENCE_TYPE
: case METHOD_TYPE
:
2756 case FILE_TYPE
: case SET_TYPE
: case LANG_TYPE
:
2760 /* Fortran complex types are supposed to be HFAs, so we need to handle
2761 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
2764 if (GET_MODE_CLASS (TYPE_MODE (type
)) == MODE_COMPLEX_FLOAT
)
2765 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type
))
2766 * BITS_PER_UNIT
, MODE_FLOAT
, 0);
2771 /* ??? Should exclude 128-bit long double here. */
2772 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
2773 mode if this is contained within an aggregate. */
2775 return TYPE_MODE (type
);
2780 return TYPE_MODE (TREE_TYPE (type
));
2784 case QUAL_UNION_TYPE
:
2785 for (t
= TYPE_FIELDS (type
); t
; t
= TREE_CHAIN (t
))
2787 if (TREE_CODE (t
) != FIELD_DECL
)
2790 mode
= hfa_element_mode (TREE_TYPE (t
), 1);
2791 if (know_element_mode
)
2793 if (mode
!= element_mode
)
2796 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
)
2800 know_element_mode
= 1;
2801 element_mode
= mode
;
2804 return element_mode
;
2807 /* If we reach here, we probably have some front-end specific type
2808 that the backend doesn't know about. This can happen via the
2809 aggregate_value_p call in init_function_start. All we can do is
2810 ignore unknown tree types. */
2817 /* Return rtx for register where argument is passed, or zero if it is passed
2820 /* ??? 128-bit quad-precision floats are always passed in general
2824 ia64_function_arg (cum
, mode
, type
, named
, incoming
)
2825 CUMULATIVE_ARGS
*cum
;
2826 enum machine_mode mode
;
2831 int basereg
= (incoming
? GR_ARG_FIRST
: AR_ARG_FIRST
);
2832 int words
= (((mode
== BLKmode
? int_size_in_bytes (type
)
2833 : GET_MODE_SIZE (mode
)) + UNITS_PER_WORD
- 1)
2836 enum machine_mode hfa_mode
= VOIDmode
;
2838 /* Integer and float arguments larger than 8 bytes start at the next even
2839 boundary. Aggregates larger than 8 bytes start at the next even boundary
2840 if the aggregate has 16 byte alignment. Net effect is that types with
2841 alignment greater than 8 start at the next even boundary. */
2842 /* ??? The ABI does not specify how to handle aggregates with alignment from
2843 9 to 15 bytes, or greater than 16. We handle them all as if they had
2844 16 byte alignment. Such aggregates can occur only if gcc extensions are
2846 if ((type
? (TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
2848 && (cum
->words
& 1))
2851 /* If all argument slots are used, then it must go on the stack. */
2852 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
2855 /* Check for and handle homogeneous FP aggregates. */
2857 hfa_mode
= hfa_element_mode (type
, 0);
2859 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
2860 and unprototyped hfas are passed specially. */
2861 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
2865 int fp_regs
= cum
->fp_regs
;
2866 int int_regs
= cum
->words
+ offset
;
2867 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
2871 /* If prototyped, pass it in FR regs then GR regs.
2872 If not prototyped, pass it in both FR and GR regs.
2874 If this is an SFmode aggregate, then it is possible to run out of
2875 FR regs while GR regs are still left. In that case, we pass the
2876 remaining part in the GR regs. */
2878 /* Fill the FP regs. We do this always. We stop if we reach the end
2879 of the argument, the last FP register, or the last argument slot. */
2881 byte_size
= ((mode
== BLKmode
)
2882 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
2883 args_byte_size
= int_regs
* UNITS_PER_WORD
;
2885 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
2886 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
)); i
++)
2888 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
2889 gen_rtx_REG (hfa_mode
, (FR_ARG_FIRST
2893 args_byte_size
+= hfa_size
;
2897 /* If no prototype, then the whole thing must go in GR regs. */
2898 if (! cum
->prototype
)
2900 /* If this is an SFmode aggregate, then we might have some left over
2901 that needs to go in GR regs. */
2902 else if (byte_size
!= offset
)
2903 int_regs
+= offset
/ UNITS_PER_WORD
;
2905 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
2907 for (; offset
< byte_size
&& int_regs
< MAX_ARGUMENT_SLOTS
; i
++)
2909 enum machine_mode gr_mode
= DImode
;
2911 /* If we have an odd 4 byte hunk because we ran out of FR regs,
2912 then this goes in a GR reg left adjusted/little endian, right
2913 adjusted/big endian. */
2914 /* ??? Currently this is handled wrong, because 4-byte hunks are
2915 always right adjusted/little endian. */
2918 /* If we have an even 4 byte hunk because the aggregate is a
2919 multiple of 4 bytes in size, then this goes in a GR reg right
2920 adjusted/little endian. */
2921 else if (byte_size
- offset
== 4)
2923 /* Complex floats need to have float mode. */
2924 if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
2927 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
2928 gen_rtx_REG (gr_mode
, (basereg
2931 offset
+= GET_MODE_SIZE (gr_mode
);
2932 int_regs
+= GET_MODE_SIZE (gr_mode
) <= UNITS_PER_WORD
2933 ? 1 : GET_MODE_SIZE (gr_mode
) / UNITS_PER_WORD
;
2936 /* If we ended up using just one location, just return that one loc. */
2938 return XEXP (loc
[0], 0);
2940 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
2943 /* Integral and aggregates go in general registers. If we have run out of
2944 FR registers, then FP values must also go in general registers. This can
2945 happen when we have a SFmode HFA. */
2946 else if (((mode
== TFmode
) && ! INTEL_EXTENDED_IEEE_FORMAT
)
2947 || (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
))
2948 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
2950 /* If there is a prototype, then FP values go in a FR register when
2951 named, and in a GR registeer when unnamed. */
2952 else if (cum
->prototype
)
2955 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
2957 return gen_rtx_REG (mode
, FR_ARG_FIRST
+ cum
->fp_regs
);
2959 /* If there is no prototype, then FP values go in both FR and GR
2963 rtx fp_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
2964 gen_rtx_REG (mode
, (FR_ARG_FIRST
2967 rtx gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
2969 (basereg
+ cum
->words
2973 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, fp_reg
, gr_reg
));
2977 /* Return number of words, at the beginning of the argument, that must be
2978 put in registers. 0 is the argument is entirely in registers or entirely
2982 ia64_function_arg_partial_nregs (cum
, mode
, type
, named
)
2983 CUMULATIVE_ARGS
*cum
;
2984 enum machine_mode mode
;
2986 int named ATTRIBUTE_UNUSED
;
2988 int words
= (((mode
== BLKmode
? int_size_in_bytes (type
)
2989 : GET_MODE_SIZE (mode
)) + UNITS_PER_WORD
- 1)
2993 /* Arguments with alignment larger than 8 bytes start at the next even
2995 if ((type
? (TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
2997 && (cum
->words
& 1))
3000 /* If all argument slots are used, then it must go on the stack. */
3001 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
3004 /* It doesn't matter whether the argument goes in FR or GR regs. If
3005 it fits within the 8 argument slots, then it goes entirely in
3006 registers. If it extends past the last argument slot, then the rest
3007 goes on the stack. */
3009 if (words
+ cum
->words
+ offset
<= MAX_ARGUMENT_SLOTS
)
3012 return MAX_ARGUMENT_SLOTS
- cum
->words
- offset
;
3015 /* Update CUM to point after this argument. This is patterned after
3016 ia64_function_arg. */
3019 ia64_function_arg_advance (cum
, mode
, type
, named
)
3020 CUMULATIVE_ARGS
*cum
;
3021 enum machine_mode mode
;
3025 int words
= (((mode
== BLKmode
? int_size_in_bytes (type
)
3026 : GET_MODE_SIZE (mode
)) + UNITS_PER_WORD
- 1)
3029 enum machine_mode hfa_mode
= VOIDmode
;
3031 /* If all arg slots are already full, then there is nothing to do. */
3032 if (cum
->words
>= MAX_ARGUMENT_SLOTS
)
3035 /* Arguments with alignment larger than 8 bytes start at the next even
3037 if ((type
? (TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
3039 && (cum
->words
& 1))
3042 cum
->words
+= words
+ offset
;
3044 /* Check for and handle homogeneous FP aggregates. */
3046 hfa_mode
= hfa_element_mode (type
, 0);
3048 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3049 and unprototyped hfas are passed specially. */
3050 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
3052 int fp_regs
= cum
->fp_regs
;
3053 /* This is the original value of cum->words + offset. */
3054 int int_regs
= cum
->words
- words
;
3055 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
3059 /* If prototyped, pass it in FR regs then GR regs.
3060 If not prototyped, pass it in both FR and GR regs.
3062 If this is an SFmode aggregate, then it is possible to run out of
3063 FR regs while GR regs are still left. In that case, we pass the
3064 remaining part in the GR regs. */
3066 /* Fill the FP regs. We do this always. We stop if we reach the end
3067 of the argument, the last FP register, or the last argument slot. */
3069 byte_size
= ((mode
== BLKmode
)
3070 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3071 args_byte_size
= int_regs
* UNITS_PER_WORD
;
3073 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
3074 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
));)
3077 args_byte_size
+= hfa_size
;
3081 cum
->fp_regs
= fp_regs
;
3084 /* Integral and aggregates go in general registers. If we have run out of
3085 FR registers, then FP values must also go in general registers. This can
3086 happen when we have a SFmode HFA. */
3087 else if (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
)
3090 /* If there is a prototype, then FP values go in a FR register when
3091 named, and in a GR registeer when unnamed. */
3092 else if (cum
->prototype
)
3097 /* ??? Complex types should not reach here. */
3098 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
3100 /* If there is no prototype, then FP values go in both FR and GR
3103 /* ??? Complex types should not reach here. */
3104 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
3109 /* Implement va_start. */
3112 ia64_va_start (stdarg_p
, valist
, nextarg
)
3120 arg_words
= current_function_args_info
.words
;
3125 ofs
= (arg_words
>= MAX_ARGUMENT_SLOTS
? -UNITS_PER_WORD
: 0);
3127 nextarg
= plus_constant (nextarg
, ofs
);
3128 std_expand_builtin_va_start (1, valist
, nextarg
);
3131 /* Implement va_arg. */
3134 ia64_va_arg (valist
, type
)
3139 /* Arguments with alignment larger than 8 bytes start at the next even
3141 if (TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
3143 t
= build (PLUS_EXPR
, TREE_TYPE (valist
), valist
,
3144 build_int_2 (2 * UNITS_PER_WORD
- 1, 0));
3145 t
= build (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
3146 build_int_2 (-2 * UNITS_PER_WORD
, -1));
3147 t
= build (MODIFY_EXPR
, TREE_TYPE (valist
), valist
, t
);
3148 TREE_SIDE_EFFECTS (t
) = 1;
3149 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3152 return std_expand_builtin_va_arg (valist
, type
);
3155 /* Return 1 if function return value returned in memory. Return 0 if it is
3159 ia64_return_in_memory (valtype
)
3162 enum machine_mode mode
;
3163 enum machine_mode hfa_mode
;
3166 mode
= TYPE_MODE (valtype
);
3167 byte_size
= ((mode
== BLKmode
)
3168 ? int_size_in_bytes (valtype
) : GET_MODE_SIZE (mode
));
3170 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3172 hfa_mode
= hfa_element_mode (valtype
, 0);
3173 if (hfa_mode
!= VOIDmode
)
3175 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
3177 if (byte_size
/ hfa_size
> MAX_ARGUMENT_SLOTS
)
3183 else if (byte_size
> UNITS_PER_WORD
* MAX_INT_RETURN_SLOTS
)
3189 /* Return rtx for register that holds the function return value. */
3192 ia64_function_value (valtype
, func
)
3194 tree func ATTRIBUTE_UNUSED
;
3196 enum machine_mode mode
;
3197 enum machine_mode hfa_mode
;
3199 mode
= TYPE_MODE (valtype
);
3200 hfa_mode
= hfa_element_mode (valtype
, 0);
3202 if (hfa_mode
!= VOIDmode
)
3210 hfa_size
= GET_MODE_SIZE (hfa_mode
);
3211 byte_size
= ((mode
== BLKmode
)
3212 ? int_size_in_bytes (valtype
) : GET_MODE_SIZE (mode
));
3214 for (i
= 0; offset
< byte_size
; i
++)
3216 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
3217 gen_rtx_REG (hfa_mode
, FR_ARG_FIRST
+ i
),
3223 return XEXP (loc
[0], 0);
3225 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
3227 else if (FLOAT_TYPE_P (valtype
) &&
3228 ((mode
!= TFmode
) || INTEL_EXTENDED_IEEE_FORMAT
))
3229 return gen_rtx_REG (mode
, FR_ARG_FIRST
);
3231 return gen_rtx_REG (mode
, GR_RET_FIRST
);
3234 /* Print a memory address as an operand to reference that memory location. */
3236 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3237 also call this from ia64_print_operand for memory addresses. */
3240 ia64_print_operand_address (stream
, address
)
3241 FILE * stream ATTRIBUTE_UNUSED
;
3242 rtx address ATTRIBUTE_UNUSED
;
3246 /* Print an operand to a assembler instruction.
3247 C Swap and print a comparison operator.
3248 D Print an FP comparison operator.
3249 E Print 32 - constant, for SImode shifts as extract.
3250 e Print 64 - constant, for DImode rotates.
3251 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3252 a floating point register emitted normally.
3253 I Invert a predicate register by adding 1.
3254 J Select the proper predicate register for a condition.
3255 j Select the inverse predicate register for a condition.
3256 O Append .acq for volatile load.
3257 P Postincrement of a MEM.
3258 Q Append .rel for volatile store.
3259 S Shift amount for shladd instruction.
3260 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3261 for Intel assembler.
3262 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3263 for Intel assembler.
3264 r Print register name, or constant 0 as r0. HP compatibility for
3267 ia64_print_operand (file
, x
, code
)
3277 /* Handled below. */
3282 enum rtx_code c
= swap_condition (GET_CODE (x
));
3283 fputs (GET_RTX_NAME (c
), file
);
3288 switch (GET_CODE (x
))
3300 str
= GET_RTX_NAME (GET_CODE (x
));
3307 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 32 - INTVAL (x
));
3311 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 64 - INTVAL (x
));
3315 if (x
== CONST0_RTX (GET_MODE (x
)))
3316 str
= reg_names
[FR_REG (0)];
3317 else if (x
== CONST1_RTX (GET_MODE (x
)))
3318 str
= reg_names
[FR_REG (1)];
3319 else if (GET_CODE (x
) == REG
)
3320 str
= reg_names
[REGNO (x
)];
3327 fputs (reg_names
[REGNO (x
) + 1], file
);
3333 unsigned int regno
= REGNO (XEXP (x
, 0));
3334 if (GET_CODE (x
) == EQ
)
3338 fputs (reg_names
[regno
], file
);
3343 if (MEM_VOLATILE_P (x
))
3344 fputs(".acq", file
);
3349 HOST_WIDE_INT value
;
3351 switch (GET_CODE (XEXP (x
, 0)))
3357 x
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
3358 if (GET_CODE (x
) == CONST_INT
)
3360 else if (GET_CODE (x
) == REG
)
3362 fprintf (file
, ", %s", reg_names
[REGNO (x
)]);
3370 value
= GET_MODE_SIZE (GET_MODE (x
));
3374 value
= - (HOST_WIDE_INT
) GET_MODE_SIZE (GET_MODE (x
));
3380 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, value
);
3385 if (MEM_VOLATILE_P (x
))
3386 fputs(".rel", file
);
3390 fprintf (file
, "%d", exact_log2 (INTVAL (x
)));
3394 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
3396 fprintf (file
, "0x%x", (int) INTVAL (x
) & 0xffffffff);
3402 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
3404 const char *prefix
= "0x";
3405 if (INTVAL (x
) & 0x80000000)
3407 fprintf (file
, "0xffffffff");
3410 fprintf (file
, "%s%x", prefix
, (int) INTVAL (x
) & 0xffffffff);
3416 /* If this operand is the constant zero, write it as register zero.
3417 Any register, zero, or CONST_INT value is OK here. */
3418 if (GET_CODE (x
) == REG
)
3419 fputs (reg_names
[REGNO (x
)], file
);
3420 else if (x
== CONST0_RTX (GET_MODE (x
)))
3422 else if (GET_CODE (x
) == CONST_INT
)
3423 output_addr_const (file
, x
);
3425 output_operand_lossage ("invalid %%r value");
3432 /* For conditional branches, returns or calls, substitute
3433 sptk, dptk, dpnt, or spnt for %s. */
3434 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
3437 int pred_val
= INTVAL (XEXP (x
, 0));
3439 /* Guess top and bottom 10% statically predicted. */
3440 if (pred_val
< REG_BR_PROB_BASE
/ 50)
3442 else if (pred_val
< REG_BR_PROB_BASE
/ 2)
3444 else if (pred_val
< REG_BR_PROB_BASE
/ 100 * 98)
3449 else if (GET_CODE (current_output_insn
) == CALL_INSN
)
3454 fputs (which
, file
);
3459 x
= current_insn_predicate
;
3462 unsigned int regno
= REGNO (XEXP (x
, 0));
3463 if (GET_CODE (x
) == EQ
)
3465 fprintf (file
, "(%s) ", reg_names
[regno
]);
3470 output_operand_lossage ("ia64_print_operand: unknown code");
3474 switch (GET_CODE (x
))
3476 /* This happens for the spill/restore instructions. */
3481 /* ... fall through ... */
3484 fputs (reg_names
[REGNO (x
)], file
);
3489 rtx addr
= XEXP (x
, 0);
3490 if (GET_RTX_CLASS (GET_CODE (addr
)) == 'a')
3491 addr
= XEXP (addr
, 0);
3492 fprintf (file
, "[%s]", reg_names
[REGNO (addr
)]);
3497 output_addr_const (file
, x
);
3504 /* Calulate the cost of moving data from a register in class FROM to
3508 ia64_register_move_cost (from
, to
)
3509 enum reg_class from
, to
;
3511 int from_hard
, to_hard
;
3516 from_hard
= (from
== BR_REGS
|| from
== AR_M_REGS
|| from
== AR_I_REGS
);
3517 to_hard
= (to
== BR_REGS
|| to
== AR_M_REGS
|| to
== AR_I_REGS
);
3518 from_gr
= (from
== GENERAL_REGS
);
3519 to_gr
= (to
== GENERAL_REGS
);
3520 from_fr
= (from
== FR_REGS
);
3521 to_fr
= (to
== FR_REGS
);
3522 from_pr
= (from
== PR_REGS
);
3523 to_pr
= (to
== PR_REGS
);
3525 if (from_hard
&& to_hard
)
3527 else if ((from_hard
&& !to_gr
) || (!from_gr
&& to_hard
))
3530 /* Moving between PR registers takes two insns. */
3531 else if (from_pr
&& to_pr
)
3533 /* Moving between PR and anything but GR is impossible. */
3534 else if ((from_pr
&& !to_gr
) || (!from_gr
&& to_pr
))
3537 /* ??? Moving from FR<->GR must be more expensive than 2, so that we get
3538 secondary memory reloads for TFmode moves. Unfortunately, we don't
3539 have the mode here, so we can't check that. */
3540 /* Moreover, we have to make this at least as high as MEMORY_MOVE_COST
3541 to avoid spectacularly poor register class preferencing for TFmode. */
3542 else if (from_fr
!= to_fr
)
3548 /* This function returns the register class required for a secondary
3549 register when copying between one of the registers in CLASS, and X,
3550 using MODE. A return value of NO_REGS means that no secondary register
3554 ia64_secondary_reload_class (class, mode
, x
)
3555 enum reg_class
class;
3556 enum machine_mode mode ATTRIBUTE_UNUSED
;
3561 if (GET_CODE (x
) == REG
|| GET_CODE (x
) == SUBREG
)
3562 regno
= true_regnum (x
);
3567 /* ??? This is required because of a bad gcse/cse/global interaction.
3568 We end up with two pseudos with overlapping lifetimes both of which
3569 are equiv to the same constant, and both which need to be in BR_REGS.
3570 This results in a BR_REGS to BR_REGS copy which doesn't exist. To
3571 reproduce, return NO_REGS here, and compile divdi3 in libgcc2.c.
3572 This seems to be a cse bug. cse_basic_block_end changes depending
3573 on the path length, which means the qty_first_reg check in
3574 make_regs_eqv can give different answers at different times. */
3575 /* ??? At some point I'll probably need a reload_indi pattern to handle
3577 if (BR_REGNO_P (regno
))
3580 /* This is needed if a pseudo used as a call_operand gets spilled to a
3582 if (GET_CODE (x
) == MEM
)
3587 /* This can happen when a paradoxical subreg is an operand to the
3589 /* ??? This shouldn't be necessary after instruction scheduling is
3590 enabled, because paradoxical subregs are not accepted by
3591 register_operand when INSN_SCHEDULING is defined. Or alternatively,
3592 stop the paradoxical subreg stupidity in the *_operand functions
3594 if (GET_CODE (x
) == MEM
3595 && (GET_MODE (x
) == SImode
|| GET_MODE (x
) == HImode
3596 || GET_MODE (x
) == QImode
))
3599 /* This can happen because of the ior/and/etc patterns that accept FP
3600 registers as operands. If the third operand is a constant, then it
3601 needs to be reloaded into a FP register. */
3602 if (GET_CODE (x
) == CONST_INT
)
3605 /* This can happen because of register elimination in a muldi3 insn.
3606 E.g. `26107 * (unsigned long)&u'. */
3607 if (GET_CODE (x
) == PLUS
)
3612 /* ??? This happens if we cse/gcse a BImode value across a call,
3613 and the function has a nonlocal goto. This is because global
3614 does not allocate call crossing pseudos to hard registers when
3615 current_function_has_nonlocal_goto is true. This is relatively
3616 common for C++ programs that use exceptions. To reproduce,
3617 return NO_REGS and compile libstdc++. */
3618 if (GET_CODE (x
) == MEM
)
3621 /* This can happen when we take a BImode subreg of a DImode value,
3622 and that DImode value winds up in some non-GR register. */
3623 if (regno
>= 0 && ! GENERAL_REGNO_P (regno
) && ! PR_REGNO_P (regno
))
3628 /* Since we have no offsettable memory addresses, we need a temporary
3629 to hold the address of the second word. */
3642 /* Emit text to declare externally defined variables and functions, because
3643 the Intel assembler does not support undefined externals. */
3646 ia64_asm_output_external (file
, decl
, name
)
3651 int save_referenced
;
3653 /* GNU as does not need anything here. */
3657 /* ??? The Intel assembler creates a reference that needs to be satisfied by
3658 the linker when we do this, so we need to be careful not to do this for
3659 builtin functions which have no library equivalent. Unfortunately, we
3660 can't tell here whether or not a function will actually be called by
3661 expand_expr, so we pull in library functions even if we may not need
3663 if (! strcmp (name
, "__builtin_next_arg")
3664 || ! strcmp (name
, "alloca")
3665 || ! strcmp (name
, "__builtin_constant_p")
3666 || ! strcmp (name
, "__builtin_args_info"))
3669 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
3671 save_referenced
= TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl
));
3672 if (TREE_CODE (decl
) == FUNCTION_DECL
)
3674 fprintf (file
, "%s", TYPE_ASM_OP
);
3675 assemble_name (file
, name
);
3677 fprintf (file
, TYPE_OPERAND_FMT
, "function");
3680 ASM_GLOBALIZE_LABEL (file
, name
);
3681 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl
)) = save_referenced
;
3684 /* Parse the -mfixed-range= option string. */
3687 fix_range (const_str
)
3688 const char *const_str
;
3691 char *str
, *dash
, *comma
;
3693 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3694 REG2 are either register names or register numbers. The effect
3695 of this option is to mark the registers in the range from REG1 to
3696 REG2 as ``fixed'' so they won't be used by the compiler. This is
3697 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
3699 i
= strlen (const_str
);
3700 str
= (char *) alloca (i
+ 1);
3701 memcpy (str
, const_str
, i
+ 1);
3705 dash
= strchr (str
, '-');
3708 warning ("value of -mfixed-range must have form REG1-REG2");
3713 comma
= strchr (dash
+ 1, ',');
3717 first
= decode_reg_name (str
);
3720 warning ("unknown register name: %s", str
);
3724 last
= decode_reg_name (dash
+ 1);
3727 warning ("unknown register name: %s", dash
+ 1);
3735 warning ("%s-%s is an empty range", str
, dash
+ 1);
3739 for (i
= first
; i
<= last
; ++i
)
3740 fixed_regs
[i
] = call_used_regs
[i
] = 1;
3750 /* Called to register all of our global variables with the garbage
3754 ia64_add_gc_roots ()
3756 ggc_add_rtx_root (&ia64_compare_op0
, 1);
3757 ggc_add_rtx_root (&ia64_compare_op1
, 1);
3761 ia64_init_machine_status (p
)
3765 (struct machine_function
*) xcalloc (1, sizeof (struct machine_function
));
3769 ia64_mark_machine_status (p
)
3772 struct machine_function
*machine
= p
->machine
;
3776 ggc_mark_rtx (machine
->ia64_eh_epilogue_sp
);
3777 ggc_mark_rtx (machine
->ia64_eh_epilogue_bsp
);
3778 ggc_mark_rtx (machine
->ia64_gp_save
);
3783 ia64_free_machine_status (p
)
3790 /* Handle TARGET_OPTIONS switches. */
3793 ia64_override_options ()
3795 if (TARGET_AUTO_PIC
)
3796 target_flags
|= MASK_CONST_GP
;
3798 if (TARGET_INLINE_DIV_LAT
&& TARGET_INLINE_DIV_THR
)
3800 warning ("cannot optimize division for both latency and throughput");
3801 target_flags
&= ~MASK_INLINE_DIV_THR
;
3804 if (ia64_fixed_range_string
)
3805 fix_range (ia64_fixed_range_string
);
3807 ia64_flag_schedule_insns2
= flag_schedule_insns_after_reload
;
3808 flag_schedule_insns_after_reload
= 0;
3810 ia64_section_threshold
= g_switch_set
? g_switch_value
: IA64_DEFAULT_GVALUE
;
3812 init_machine_status
= ia64_init_machine_status
;
3813 mark_machine_status
= ia64_mark_machine_status
;
3814 free_machine_status
= ia64_free_machine_status
;
3816 ia64_add_gc_roots ();
3819 static enum attr_itanium_requires_unit0 ia64_safe_itanium_requires_unit0
PARAMS((rtx
));
3820 static enum attr_itanium_class ia64_safe_itanium_class
PARAMS((rtx
));
3821 static enum attr_type ia64_safe_type
PARAMS((rtx
));
3823 static enum attr_itanium_requires_unit0
3824 ia64_safe_itanium_requires_unit0 (insn
)
3827 if (recog_memoized (insn
) >= 0)
3828 return get_attr_itanium_requires_unit0 (insn
);
3830 return ITANIUM_REQUIRES_UNIT0_NO
;
3833 static enum attr_itanium_class
3834 ia64_safe_itanium_class (insn
)
3837 if (recog_memoized (insn
) >= 0)
3838 return get_attr_itanium_class (insn
);
3840 return ITANIUM_CLASS_UNKNOWN
;
3843 static enum attr_type
3844 ia64_safe_type (insn
)
3847 if (recog_memoized (insn
) >= 0)
3848 return get_attr_type (insn
);
3850 return TYPE_UNKNOWN
;
3853 /* The following collection of routines emit instruction group stop bits as
3854 necessary to avoid dependencies. */
3856 /* Need to track some additional registers as far as serialization is
3857 concerned so we can properly handle br.call and br.ret. We could
3858 make these registers visible to gcc, but since these registers are
3859 never explicitly used in gcc generated code, it seems wasteful to
3860 do so (plus it would make the call and return patterns needlessly
3862 #define REG_GP (GR_REG (1))
3863 #define REG_RP (BR_REG (0))
3864 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
3865 /* This is used for volatile asms which may require a stop bit immediately
3866 before and after them. */
3867 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
3868 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
3869 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
3871 /* For each register, we keep track of how it has been written in the
3872 current instruction group.
3874 If a register is written unconditionally (no qualifying predicate),
3875 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
3877 If a register is written if its qualifying predicate P is true, we
3878 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
3879 may be written again by the complement of P (P^1) and when this happens,
3880 WRITE_COUNT gets set to 2.
3882 The result of this is that whenever an insn attempts to write a register
3883 whose WRITE_COUNT is two, we need to issue a insn group barrier first.
3885 If a predicate register is written by a floating-point insn, we set
3886 WRITTEN_BY_FP to true.
3888 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
3889 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
3891 struct reg_write_state
3893 unsigned int write_count
: 2;
3894 unsigned int first_pred
: 16;
3895 unsigned int written_by_fp
: 1;
3896 unsigned int written_by_and
: 1;
3897 unsigned int written_by_or
: 1;
3900 /* Cumulative info for the current instruction group. */
3901 struct reg_write_state rws_sum
[NUM_REGS
];
3902 /* Info for the current instruction. This gets copied to rws_sum after a
3903 stop bit is emitted. */
3904 struct reg_write_state rws_insn
[NUM_REGS
];
3906 /* Indicates whether this is the first instruction after a stop bit,
3907 in which case we don't need another stop bit. Without this, we hit
3908 the abort in ia64_variable_issue when scheduling an alloc. */
3909 static int first_instruction
;
3911 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
3912 RTL for one instruction. */
3915 unsigned int is_write
: 1; /* Is register being written? */
3916 unsigned int is_fp
: 1; /* Is register used as part of an fp op? */
3917 unsigned int is_branch
: 1; /* Is register used as part of a branch? */
3918 unsigned int is_and
: 1; /* Is register used as part of and.orcm? */
3919 unsigned int is_or
: 1; /* Is register used as part of or.andcm? */
3920 unsigned int is_sibcall
: 1; /* Is this a sibling or normal call? */
3923 static void rws_update
PARAMS ((struct reg_write_state
*, int,
3924 struct reg_flags
, int));
3925 static int rws_access_regno
PARAMS ((int, struct reg_flags
, int));
3926 static int rws_access_reg
PARAMS ((rtx
, struct reg_flags
, int));
3927 static void update_set_flags
PARAMS ((rtx
, struct reg_flags
*, int *, rtx
*));
3928 static int set_src_needs_barrier
PARAMS ((rtx
, struct reg_flags
, int, rtx
));
3929 static int rtx_needs_barrier
PARAMS ((rtx
, struct reg_flags
, int));
3930 static void init_insn_group_barriers
PARAMS ((void));
3931 static int group_barrier_needed_p
PARAMS ((rtx
));
3932 static int safe_group_barrier_needed_p
PARAMS ((rtx
));
3934 /* Update *RWS for REGNO, which is being written by the current instruction,
3935 with predicate PRED, and associated register flags in FLAGS. */
3938 rws_update (rws
, regno
, flags
, pred
)
3939 struct reg_write_state
*rws
;
3941 struct reg_flags flags
;
3944 rws
[regno
].write_count
+= pred
? 1 : 2;
3945 rws
[regno
].written_by_fp
|= flags
.is_fp
;
3946 /* ??? Not tracking and/or across differing predicates. */
3947 rws
[regno
].written_by_and
= flags
.is_and
;
3948 rws
[regno
].written_by_or
= flags
.is_or
;
3949 rws
[regno
].first_pred
= pred
;
3952 /* Handle an access to register REGNO of type FLAGS using predicate register
3953 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
3954 a dependency with an earlier instruction in the same group. */
3957 rws_access_regno (regno
, flags
, pred
)
3959 struct reg_flags flags
;
3962 int need_barrier
= 0;
3964 if (regno
>= NUM_REGS
)
3967 if (! PR_REGNO_P (regno
))
3968 flags
.is_and
= flags
.is_or
= 0;
3974 /* One insn writes same reg multiple times? */
3975 if (rws_insn
[regno
].write_count
> 0)
3978 /* Update info for current instruction. */
3979 rws_update (rws_insn
, regno
, flags
, pred
);
3980 write_count
= rws_sum
[regno
].write_count
;
3982 switch (write_count
)
3985 /* The register has not been written yet. */
3986 rws_update (rws_sum
, regno
, flags
, pred
);
3990 /* The register has been written via a predicate. If this is
3991 not a complementary predicate, then we need a barrier. */
3992 /* ??? This assumes that P and P+1 are always complementary
3993 predicates for P even. */
3994 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
3996 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
3998 else if ((rws_sum
[regno
].first_pred
^ 1) != pred
)
4000 rws_update (rws_sum
, regno
, flags
, pred
);
4004 /* The register has been unconditionally written already. We
4006 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
4008 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
4012 rws_sum
[regno
].written_by_and
= flags
.is_and
;
4013 rws_sum
[regno
].written_by_or
= flags
.is_or
;
4022 if (flags
.is_branch
)
4024 /* Branches have several RAW exceptions that allow to avoid
4027 if (REGNO_REG_CLASS (regno
) == BR_REGS
|| regno
== AR_PFS_REGNUM
)
4028 /* RAW dependencies on branch regs are permissible as long
4029 as the writer is a non-branch instruction. Since we
4030 never generate code that uses a branch register written
4031 by a branch instruction, handling this case is
4035 if (REGNO_REG_CLASS (regno
) == PR_REGS
4036 && ! rws_sum
[regno
].written_by_fp
)
4037 /* The predicates of a branch are available within the
4038 same insn group as long as the predicate was written by
4039 something other than a floating-point instruction. */
4043 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
4045 if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
4048 switch (rws_sum
[regno
].write_count
)
4051 /* The register has not been written yet. */
4055 /* The register has been written via a predicate. If this is
4056 not a complementary predicate, then we need a barrier. */
4057 /* ??? This assumes that P and P+1 are always complementary
4058 predicates for P even. */
4059 if ((rws_sum
[regno
].first_pred
^ 1) != pred
)
4064 /* The register has been unconditionally written already. We
4074 return need_barrier
;
4078 rws_access_reg (reg
, flags
, pred
)
4080 struct reg_flags flags
;
4083 int regno
= REGNO (reg
);
4084 int n
= HARD_REGNO_NREGS (REGNO (reg
), GET_MODE (reg
));
4087 return rws_access_regno (regno
, flags
, pred
);
4090 int need_barrier
= 0;
4092 need_barrier
|= rws_access_regno (regno
+ n
, flags
, pred
);
4093 return need_barrier
;
4097 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
4098 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
4101 update_set_flags (x
, pflags
, ppred
, pcond
)
4103 struct reg_flags
*pflags
;
4107 rtx src
= SET_SRC (x
);
4111 switch (GET_CODE (src
))
4117 if (SET_DEST (x
) == pc_rtx
)
4118 /* X is a conditional branch. */
4122 int is_complemented
= 0;
4124 /* X is a conditional move. */
4125 rtx cond
= XEXP (src
, 0);
4126 if (GET_CODE (cond
) == EQ
)
4127 is_complemented
= 1;
4128 cond
= XEXP (cond
, 0);
4129 if (GET_CODE (cond
) != REG
4130 && REGNO_REG_CLASS (REGNO (cond
)) != PR_REGS
)
4133 if (XEXP (src
, 1) == SET_DEST (x
)
4134 || XEXP (src
, 2) == SET_DEST (x
))
4136 /* X is a conditional move that conditionally writes the
4139 /* We need another complement in this case. */
4140 if (XEXP (src
, 1) == SET_DEST (x
))
4141 is_complemented
= ! is_complemented
;
4143 *ppred
= REGNO (cond
);
4144 if (is_complemented
)
4148 /* ??? If this is a conditional write to the dest, then this
4149 instruction does not actually read one source. This probably
4150 doesn't matter, because that source is also the dest. */
4151 /* ??? Multiple writes to predicate registers are allowed
4152 if they are all AND type compares, or if they are all OR
4153 type compares. We do not generate such instructions
4156 /* ... fall through ... */
4159 if (GET_RTX_CLASS (GET_CODE (src
)) == '<'
4160 && GET_MODE_CLASS (GET_MODE (XEXP (src
, 0))) == MODE_FLOAT
)
4161 /* Set pflags->is_fp to 1 so that we know we're dealing
4162 with a floating point comparison when processing the
4163 destination of the SET. */
4166 /* Discover if this is a parallel comparison. We only handle
4167 and.orcm and or.andcm at present, since we must retain a
4168 strict inverse on the predicate pair. */
4169 else if (GET_CODE (src
) == AND
)
4171 else if (GET_CODE (src
) == IOR
)
4178 /* Subroutine of rtx_needs_barrier; this function determines whether the
4179 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
4180 are as in rtx_needs_barrier. COND is an rtx that holds the condition
4184 set_src_needs_barrier (x
, flags
, pred
, cond
)
4186 struct reg_flags flags
;
4190 int need_barrier
= 0;
4192 rtx src
= SET_SRC (x
);
4194 if (GET_CODE (src
) == CALL
)
4195 /* We don't need to worry about the result registers that
4196 get written by subroutine call. */
4197 return rtx_needs_barrier (src
, flags
, pred
);
4198 else if (SET_DEST (x
) == pc_rtx
)
4200 /* X is a conditional branch. */
4201 /* ??? This seems redundant, as the caller sets this bit for
4203 flags
.is_branch
= 1;
4204 return rtx_needs_barrier (src
, flags
, pred
);
4207 need_barrier
= rtx_needs_barrier (src
, flags
, pred
);
4209 /* This instruction unconditionally uses a predicate register. */
4211 need_barrier
|= rws_access_reg (cond
, flags
, 0);
4214 if (GET_CODE (dst
) == ZERO_EXTRACT
)
4216 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 1), flags
, pred
);
4217 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 2), flags
, pred
);
4218 dst
= XEXP (dst
, 0);
4220 return need_barrier
;
4223 /* Handle an access to rtx X of type FLAGS using predicate register PRED.
4224 Return 1 is this access creates a dependency with an earlier instruction
4225 in the same group. */
4228 rtx_needs_barrier (x
, flags
, pred
)
4230 struct reg_flags flags
;
4234 int is_complemented
= 0;
4235 int need_barrier
= 0;
4236 const char *format_ptr
;
4237 struct reg_flags new_flags
;
4245 switch (GET_CODE (x
))
4248 update_set_flags (x
, &new_flags
, &pred
, &cond
);
4249 need_barrier
= set_src_needs_barrier (x
, new_flags
, pred
, cond
);
4250 if (GET_CODE (SET_SRC (x
)) != CALL
)
4252 new_flags
.is_write
= 1;
4253 need_barrier
|= rtx_needs_barrier (SET_DEST (x
), new_flags
, pred
);
4258 new_flags
.is_write
= 0;
4259 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
4261 /* Avoid multiple register writes, in case this is a pattern with
4262 multiple CALL rtx. This avoids an abort in rws_access_reg. */
4263 if (! flags
.is_sibcall
&& ! rws_insn
[REG_AR_CFM
].write_count
)
4265 new_flags
.is_write
= 1;
4266 need_barrier
|= rws_access_regno (REG_RP
, new_flags
, pred
);
4267 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, new_flags
, pred
);
4268 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
4273 /* X is a predicated instruction. */
4275 cond
= COND_EXEC_TEST (x
);
4278 need_barrier
= rtx_needs_barrier (cond
, flags
, 0);
4280 if (GET_CODE (cond
) == EQ
)
4281 is_complemented
= 1;
4282 cond
= XEXP (cond
, 0);
4283 if (GET_CODE (cond
) != REG
4284 && REGNO_REG_CLASS (REGNO (cond
)) != PR_REGS
)
4286 pred
= REGNO (cond
);
4287 if (is_complemented
)
4290 need_barrier
|= rtx_needs_barrier (COND_EXEC_CODE (x
), flags
, pred
);
4291 return need_barrier
;
4295 /* Clobber & use are for earlier compiler-phases only. */
4300 /* We always emit stop bits for traditional asms. We emit stop bits
4301 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
4302 if (GET_CODE (x
) != ASM_OPERANDS
4303 || (MEM_VOLATILE_P (x
) && TARGET_VOL_ASM_STOP
))
4305 /* Avoid writing the register multiple times if we have multiple
4306 asm outputs. This avoids an abort in rws_access_reg. */
4307 if (! rws_insn
[REG_VOLATILE
].write_count
)
4309 new_flags
.is_write
= 1;
4310 rws_access_regno (REG_VOLATILE
, new_flags
, pred
);
4315 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
4316 We can not just fall through here since then we would be confused
4317 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
4318 traditional asms unlike their normal usage. */
4320 for (i
= ASM_OPERANDS_INPUT_LENGTH (x
) - 1; i
>= 0; --i
)
4321 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x
, i
), flags
, pred
))
4326 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
4328 rtx pat
= XVECEXP (x
, 0, i
);
4329 if (GET_CODE (pat
) == SET
)
4331 update_set_flags (pat
, &new_flags
, &pred
, &cond
);
4332 need_barrier
|= set_src_needs_barrier (pat
, new_flags
, pred
, cond
);
4334 else if (GET_CODE (pat
) == USE
4335 || GET_CODE (pat
) == CALL
4336 || GET_CODE (pat
) == ASM_OPERANDS
)
4337 need_barrier
|= rtx_needs_barrier (pat
, flags
, pred
);
4338 else if (GET_CODE (pat
) != CLOBBER
&& GET_CODE (pat
) != RETURN
)
4341 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
4343 rtx pat
= XVECEXP (x
, 0, i
);
4344 if (GET_CODE (pat
) == SET
)
4346 if (GET_CODE (SET_SRC (pat
)) != CALL
)
4348 new_flags
.is_write
= 1;
4349 need_barrier
|= rtx_needs_barrier (SET_DEST (pat
), new_flags
,
4353 else if (GET_CODE (pat
) == CLOBBER
|| GET_CODE (pat
) == RETURN
)
4354 need_barrier
|= rtx_needs_barrier (pat
, flags
, pred
);
4362 if (REGNO (x
) == AR_UNAT_REGNUM
)
4364 for (i
= 0; i
< 64; ++i
)
4365 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ i
, flags
, pred
);
4368 need_barrier
= rws_access_reg (x
, flags
, pred
);
4372 /* Find the regs used in memory address computation. */
4373 new_flags
.is_write
= 0;
4374 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
4377 case CONST_INT
: case CONST_DOUBLE
:
4378 case SYMBOL_REF
: case LABEL_REF
: case CONST
:
4381 /* Operators with side-effects. */
4382 case POST_INC
: case POST_DEC
:
4383 if (GET_CODE (XEXP (x
, 0)) != REG
)
4386 new_flags
.is_write
= 0;
4387 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
4388 new_flags
.is_write
= 1;
4389 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
4393 if (GET_CODE (XEXP (x
, 0)) != REG
)
4396 new_flags
.is_write
= 0;
4397 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
4398 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
4399 new_flags
.is_write
= 1;
4400 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
4403 /* Handle common unary and binary ops for efficiency. */
4404 case COMPARE
: case PLUS
: case MINUS
: case MULT
: case DIV
:
4405 case MOD
: case UDIV
: case UMOD
: case AND
: case IOR
:
4406 case XOR
: case ASHIFT
: case ROTATE
: case ASHIFTRT
: case LSHIFTRT
:
4407 case ROTATERT
: case SMIN
: case SMAX
: case UMIN
: case UMAX
:
4408 case NE
: case EQ
: case GE
: case GT
: case LE
:
4409 case LT
: case GEU
: case GTU
: case LEU
: case LTU
:
4410 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
4411 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
4414 case NEG
: case NOT
: case SIGN_EXTEND
: case ZERO_EXTEND
:
4415 case TRUNCATE
: case FLOAT_EXTEND
: case FLOAT_TRUNCATE
: case FLOAT
:
4416 case FIX
: case UNSIGNED_FLOAT
: case UNSIGNED_FIX
: case ABS
:
4417 case SQRT
: case FFS
:
4418 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), flags
, pred
);
4422 switch (XINT (x
, 1))
4424 case 1: /* st8.spill */
4425 case 2: /* ld8.fill */
4427 HOST_WIDE_INT offset
= INTVAL (XVECEXP (x
, 0, 1));
4428 HOST_WIDE_INT bit
= (offset
>> 3) & 63;
4430 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
4431 new_flags
.is_write
= (XINT (x
, 1) == 1);
4432 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ bit
,
4437 case 3: /* stf.spill */
4438 case 4: /* ldf.spill */
4439 case 8: /* popcnt */
4440 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
4443 case 7: /* pred_rel_mutex */
4444 case 9: /* pic call */
4446 case 19: /* fetchadd_acq */
4447 case 20: /* mov = ar.bsp */
4448 case 21: /* flushrs */
4449 case 22: /* bundle selector */
4450 case 23: /* cycle display */
4453 case 24: /* addp4 */
4454 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
4457 case 5: /* recip_approx */
4458 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
4459 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
4462 case 13: /* cmpxchg_acq */
4463 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
4464 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 2), flags
, pred
);
4472 case UNSPEC_VOLATILE
:
4473 switch (XINT (x
, 1))
4476 /* Alloc must always be the first instruction of a group.
4477 We force this by always returning true. */
4478 /* ??? We might get better scheduling if we explicitly check for
4479 input/local/output register dependencies, and modify the
4480 scheduler so that alloc is always reordered to the start of
4481 the current group. We could then eliminate all of the
4482 first_instruction code. */
4483 rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
4485 new_flags
.is_write
= 1;
4486 rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
4489 case 1: /* blockage */
4490 case 2: /* insn group barrier */
4493 case 5: /* set_bsp */
4497 case 7: /* pred.rel.mutex */
4498 case 8: /* safe_across_calls all */
4499 case 9: /* safe_across_calls normal */
4508 new_flags
.is_write
= 0;
4509 need_barrier
= rws_access_regno (REG_RP
, flags
, pred
);
4510 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
4512 new_flags
.is_write
= 1;
4513 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
4514 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
4518 format_ptr
= GET_RTX_FORMAT (GET_CODE (x
));
4519 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
4520 switch (format_ptr
[i
])
4522 case '0': /* unused field */
4523 case 'i': /* integer */
4524 case 'n': /* note */
4525 case 'w': /* wide integer */
4526 case 's': /* pointer to string */
4527 case 'S': /* optional pointer to string */
4531 if (rtx_needs_barrier (XEXP (x
, i
), flags
, pred
))
4536 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; --j
)
4537 if (rtx_needs_barrier (XVECEXP (x
, i
, j
), flags
, pred
))
4546 return need_barrier
;
4549 /* Clear out the state for group_barrier_needed_p at the start of a
4550 sequence of insns. */
4553 init_insn_group_barriers ()
4555 memset (rws_sum
, 0, sizeof (rws_sum
));
4556 first_instruction
= 1;
4559 /* Given the current state, recorded by previous calls to this function,
4560 determine whether a group barrier (a stop bit) is necessary before INSN.
4561 Return nonzero if so. */
4564 group_barrier_needed_p (insn
)
4568 int need_barrier
= 0;
4569 struct reg_flags flags
;
4571 memset (&flags
, 0, sizeof (flags
));
4572 switch (GET_CODE (insn
))
4578 /* A barrier doesn't imply an instruction group boundary. */
4582 memset (rws_insn
, 0, sizeof (rws_insn
));
4586 flags
.is_branch
= 1;
4587 flags
.is_sibcall
= SIBLING_CALL_P (insn
);
4588 memset (rws_insn
, 0, sizeof (rws_insn
));
4590 /* Don't bundle a call following another call. */
4591 if ((pat
= prev_active_insn (insn
))
4592 && GET_CODE (pat
) == CALL_INSN
)
4598 need_barrier
= rtx_needs_barrier (PATTERN (insn
), flags
, 0);
4602 flags
.is_branch
= 1;
4604 /* Don't bundle a jump following a call. */
4605 if ((pat
= prev_active_insn (insn
))
4606 && GET_CODE (pat
) == CALL_INSN
)
4614 if (GET_CODE (PATTERN (insn
)) == USE
4615 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
4616 /* Don't care about USE and CLOBBER "insns"---those are used to
4617 indicate to the optimizer that it shouldn't get rid of
4618 certain operations. */
4621 pat
= PATTERN (insn
);
4623 /* Ug. Hack hacks hacked elsewhere. */
4624 switch (recog_memoized (insn
))
4626 /* We play dependency tricks with the epilogue in order
4627 to get proper schedules. Undo this for dv analysis. */
4628 case CODE_FOR_epilogue_deallocate_stack
:
4629 pat
= XVECEXP (pat
, 0, 0);
4632 /* The pattern we use for br.cloop confuses the code above.
4633 The second element of the vector is representative. */
4634 case CODE_FOR_doloop_end_internal
:
4635 pat
= XVECEXP (pat
, 0, 1);
4638 /* Doesn't generate code. */
4639 case CODE_FOR_pred_rel_mutex
:
4646 memset (rws_insn
, 0, sizeof (rws_insn
));
4647 need_barrier
= rtx_needs_barrier (pat
, flags
, 0);
4649 /* Check to see if the previous instruction was a volatile
4652 need_barrier
= rws_access_regno (REG_VOLATILE
, flags
, 0);
4659 if (first_instruction
)
4662 first_instruction
= 0;
4665 return need_barrier
;
4668 /* Like group_barrier_needed_p, but do not clobber the current state. */
4671 safe_group_barrier_needed_p (insn
)
4674 struct reg_write_state rws_saved
[NUM_REGS
];
4675 int saved_first_instruction
;
4678 memcpy (rws_saved
, rws_sum
, NUM_REGS
* sizeof *rws_saved
);
4679 saved_first_instruction
= first_instruction
;
4681 t
= group_barrier_needed_p (insn
);
4683 memcpy (rws_sum
, rws_saved
, NUM_REGS
* sizeof *rws_saved
);
4684 first_instruction
= saved_first_instruction
;
4689 /* INSNS is an chain of instructions. Scan the chain, and insert stop bits
4690 as necessary to eliminate dependendencies. This function assumes that
4691 a final instruction scheduling pass has been run which has already
4692 inserted most of the necessary stop bits. This function only inserts
4693 new ones at basic block boundaries, since these are invisible to the
4697 emit_insn_group_barriers (dump
, insns
)
4703 int insns_since_last_label
= 0;
4705 init_insn_group_barriers ();
4707 for (insn
= insns
; insn
; insn
= NEXT_INSN (insn
))
4709 if (GET_CODE (insn
) == CODE_LABEL
)
4711 if (insns_since_last_label
)
4713 insns_since_last_label
= 0;
4715 else if (GET_CODE (insn
) == NOTE
4716 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_BASIC_BLOCK
)
4718 if (insns_since_last_label
)
4720 insns_since_last_label
= 0;
4722 else if (GET_CODE (insn
) == INSN
4723 && GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
4724 && XINT (PATTERN (insn
), 1) == 2)
4726 init_insn_group_barriers ();
4729 else if (INSN_P (insn
))
4731 insns_since_last_label
= 1;
4733 if (group_barrier_needed_p (insn
))
4738 fprintf (dump
, "Emitting stop before label %d\n",
4739 INSN_UID (last_label
));
4740 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label
);
4743 init_insn_group_barriers ();
4751 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
4752 This function has to emit all necessary group barriers. */
4755 emit_all_insn_group_barriers (dump
, insns
)
4756 FILE *dump ATTRIBUTE_UNUSED
;
4761 init_insn_group_barriers ();
4763 for (insn
= insns
; insn
; insn
= NEXT_INSN (insn
))
4765 if (GET_CODE (insn
) == INSN
4766 && GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
4767 && XINT (PATTERN (insn
), 1) == 2)
4768 init_insn_group_barriers ();
4769 else if (INSN_P (insn
))
4771 if (group_barrier_needed_p (insn
))
4773 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
4774 init_insn_group_barriers ();
4775 group_barrier_needed_p (insn
);
4781 static int errata_find_address_regs
PARAMS ((rtx
*, void *));
4782 static void errata_emit_nops
PARAMS ((rtx
));
4783 static void fixup_errata
PARAMS ((void));
4785 /* This structure is used to track some details about the previous insns
4786 groups so we can determine if it may be necessary to insert NOPs to
4787 workaround hardware errata. */
4790 HARD_REG_SET p_reg_set
;
4791 HARD_REG_SET gr_reg_conditionally_set
;
4794 /* Index into the last_group array. */
4795 static int group_idx
;
4797 /* Called through for_each_rtx; determines if a hard register that was
4798 conditionally set in the previous group is used as an address register.
4799 It ensures that for_each_rtx returns 1 in that case. */
4801 errata_find_address_regs (xp
, data
)
4803 void *data ATTRIBUTE_UNUSED
;
4806 if (GET_CODE (x
) != MEM
)
4809 if (GET_CODE (x
) == POST_MODIFY
)
4811 if (GET_CODE (x
) == REG
)
4813 struct group
*prev_group
= last_group
+ (group_idx
^ 1);
4814 if (TEST_HARD_REG_BIT (prev_group
->gr_reg_conditionally_set
,
4822 /* Called for each insn; this function keeps track of the state in
4823 last_group and emits additional NOPs if necessary to work around
4824 an Itanium A/B step erratum. */
4826 errata_emit_nops (insn
)
4829 struct group
*this_group
= last_group
+ group_idx
;
4830 struct group
*prev_group
= last_group
+ (group_idx
^ 1);
4831 rtx pat
= PATTERN (insn
);
4832 rtx cond
= GET_CODE (pat
) == COND_EXEC
? COND_EXEC_TEST (pat
) : 0;
4833 rtx real_pat
= cond
? COND_EXEC_CODE (pat
) : pat
;
4834 enum attr_type type
;
4837 if (GET_CODE (real_pat
) == USE
4838 || GET_CODE (real_pat
) == CLOBBER
4839 || GET_CODE (real_pat
) == ASM_INPUT
4840 || GET_CODE (real_pat
) == ADDR_VEC
4841 || GET_CODE (real_pat
) == ADDR_DIFF_VEC
4842 || asm_noperands (PATTERN (insn
)) >= 0)
4845 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
4848 if (GET_CODE (set
) == PARALLEL
)
4851 set
= XVECEXP (real_pat
, 0, 0);
4852 for (i
= 1; i
< XVECLEN (real_pat
, 0); i
++)
4853 if (GET_CODE (XVECEXP (real_pat
, 0, i
)) != USE
4854 && GET_CODE (XVECEXP (real_pat
, 0, i
)) != CLOBBER
)
4861 if (set
&& GET_CODE (set
) != SET
)
4864 type
= get_attr_type (insn
);
4867 && set
&& REG_P (SET_DEST (set
)) && PR_REGNO_P (REGNO (SET_DEST (set
))))
4868 SET_HARD_REG_BIT (this_group
->p_reg_set
, REGNO (SET_DEST (set
)));
4870 if ((type
== TYPE_M
|| type
== TYPE_A
) && cond
&& set
4871 && REG_P (SET_DEST (set
))
4872 && GET_CODE (SET_SRC (set
)) != PLUS
4873 && GET_CODE (SET_SRC (set
)) != MINUS
4874 && (GET_CODE (SET_SRC (set
)) != ASHIFT
4875 || !shladd_operand (XEXP (SET_SRC (set
), 1), VOIDmode
))
4876 && (GET_CODE (SET_SRC (set
)) != MEM
4877 || GET_CODE (XEXP (SET_SRC (set
), 0)) != POST_MODIFY
)
4878 && GENERAL_REGNO_P (REGNO (SET_DEST (set
))))
4880 if (GET_RTX_CLASS (GET_CODE (cond
)) != '<'
4881 || ! REG_P (XEXP (cond
, 0)))
4884 if (TEST_HARD_REG_BIT (prev_group
->p_reg_set
, REGNO (XEXP (cond
, 0))))
4885 SET_HARD_REG_BIT (this_group
->gr_reg_conditionally_set
, REGNO (SET_DEST (set
)));
4887 if (for_each_rtx (&real_pat
, errata_find_address_regs
, NULL
))
4889 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
4890 emit_insn_before (gen_nop (), insn
);
4891 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
4893 memset (last_group
, 0, sizeof last_group
);
4897 /* Emit extra nops if they are required to work around hardware errata. */
4904 if (! TARGET_B_STEP
)
4908 memset (last_group
, 0, sizeof last_group
);
4910 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
4915 if (ia64_safe_type (insn
) == TYPE_S
)
4918 memset (last_group
+ group_idx
, 0, sizeof last_group
[group_idx
]);
4921 errata_emit_nops (insn
);
4925 /* Instruction scheduling support. */
4926 /* Describe one bundle. */
4930 /* Zero if there's no possibility of a stop in this bundle other than
4931 at the end, otherwise the position of the optional stop bit. */
4933 /* The types of the three slots. */
4934 enum attr_type t
[3];
4935 /* The pseudo op to be emitted into the assembler output. */
4939 #define NR_BUNDLES 10
4941 /* A list of all available bundles. */
4943 static const struct bundle bundle
[NR_BUNDLES
] =
4945 { 2, { TYPE_M
, TYPE_I
, TYPE_I
}, ".mii" },
4946 { 1, { TYPE_M
, TYPE_M
, TYPE_I
}, ".mmi" },
4947 { 0, { TYPE_M
, TYPE_F
, TYPE_I
}, ".mfi" },
4948 { 0, { TYPE_M
, TYPE_M
, TYPE_F
}, ".mmf" },
4949 #if NR_BUNDLES == 10
4950 { 0, { TYPE_B
, TYPE_B
, TYPE_B
}, ".bbb" },
4951 { 0, { TYPE_M
, TYPE_B
, TYPE_B
}, ".mbb" },
4953 { 0, { TYPE_M
, TYPE_I
, TYPE_B
}, ".mib" },
4954 { 0, { TYPE_M
, TYPE_M
, TYPE_B
}, ".mmb" },
4955 { 0, { TYPE_M
, TYPE_F
, TYPE_B
}, ".mfb" },
4956 /* .mfi needs to occur earlier than .mlx, so that we only generate it if
4957 it matches an L type insn. Otherwise we'll try to generate L type
4959 { 0, { TYPE_M
, TYPE_L
, TYPE_X
}, ".mlx" }
4962 /* Describe a packet of instructions. Packets consist of two bundles that
4963 are visible to the hardware in one scheduling window. */
4967 const struct bundle
*t1
, *t2
;
4968 /* Precomputed value of the first split issue in this packet if a cycle
4969 starts at its beginning. */
4971 /* For convenience, the insn types are replicated here so we don't have
4972 to go through T1 and T2 all the time. */
4973 enum attr_type t
[6];
4976 /* An array containing all possible packets. */
4977 #define NR_PACKETS (NR_BUNDLES * NR_BUNDLES)
4978 static struct ia64_packet packets
[NR_PACKETS
];
4980 /* Map attr_type to a string with the name. */
4982 static const char *type_names
[] =
4984 "UNKNOWN", "A", "I", "M", "F", "B", "L", "X", "S"
4987 /* Nonzero if we should insert stop bits into the schedule. */
4988 int ia64_final_schedule
= 0;
4990 static int itanium_split_issue
PARAMS ((const struct ia64_packet
*, int));
4991 static rtx ia64_single_set
PARAMS ((rtx
));
4992 static int insn_matches_slot
PARAMS ((const struct ia64_packet
*, enum attr_type
, int, rtx
));
4993 static void ia64_emit_insn_before
PARAMS ((rtx
, rtx
));
4994 static void maybe_rotate
PARAMS ((FILE *));
4995 static void finish_last_head
PARAMS ((FILE *, int));
4996 static void rotate_one_bundle
PARAMS ((FILE *));
4997 static void rotate_two_bundles
PARAMS ((FILE *));
4998 static void nop_cycles_until
PARAMS ((int, FILE *));
4999 static void cycle_end_fill_slots
PARAMS ((FILE *));
5000 static int packet_matches_p
PARAMS ((const struct ia64_packet
*, int, int *));
5001 static int get_split
PARAMS ((const struct ia64_packet
*, int));
5002 static int find_best_insn
PARAMS ((rtx
*, enum attr_type
*, int,
5003 const struct ia64_packet
*, int));
5004 static void find_best_packet
PARAMS ((int *, const struct ia64_packet
**,
5005 rtx
*, enum attr_type
*, int));
5006 static int itanium_reorder
PARAMS ((FILE *, rtx
*, rtx
*, int));
5007 static void dump_current_packet
PARAMS ((FILE *));
5008 static void schedule_stop
PARAMS ((FILE *));
5009 static rtx gen_nop_type
PARAMS ((enum attr_type
));
5010 static void ia64_emit_nops
PARAMS ((void));
5012 /* Map a bundle number to its pseudo-op. */
5018 return bundle
[b
].name
;
5021 /* Compute the slot which will cause a split issue in packet P if the
5022 current cycle begins at slot BEGIN. */
5025 itanium_split_issue (p
, begin
)
5026 const struct ia64_packet
*p
;
5029 int type_count
[TYPE_S
];
5035 /* Always split before and after MMF. */
5036 if (p
->t
[0] == TYPE_M
&& p
->t
[1] == TYPE_M
&& p
->t
[2] == TYPE_F
)
5038 if (p
->t
[3] == TYPE_M
&& p
->t
[4] == TYPE_M
&& p
->t
[5] == TYPE_F
)
5040 /* Always split after MBB and BBB. */
5041 if (p
->t
[1] == TYPE_B
)
5043 /* Split after first bundle in MIB BBB combination. */
5044 if (p
->t
[2] == TYPE_B
&& p
->t
[3] == TYPE_B
)
5048 memset (type_count
, 0, sizeof type_count
);
5049 for (i
= begin
; i
< split
; i
++)
5051 enum attr_type t0
= p
->t
[i
];
5052 /* An MLX bundle reserves the same units as an MFI bundle. */
5053 enum attr_type t
= (t0
== TYPE_L
? TYPE_F
5054 : t0
== TYPE_X
? TYPE_I
5056 int max
= (t
== TYPE_B
? 3 : t
== TYPE_F
? 1 : 2);
5057 if (type_count
[t
] == max
)
5064 /* Return the maximum number of instructions a cpu can issue. */
5072 /* Helper function - like single_set, but look inside COND_EXEC. */
5075 ia64_single_set (insn
)
5078 rtx x
= PATTERN (insn
);
5079 if (GET_CODE (x
) == COND_EXEC
)
5080 x
= COND_EXEC_CODE (x
);
5081 if (GET_CODE (x
) == SET
)
5083 return single_set_2 (insn
, x
);
5086 /* Adjust the cost of a scheduling dependency. Return the new cost of
5087 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5090 ia64_adjust_cost (insn
, link
, dep_insn
, cost
)
5091 rtx insn
, link
, dep_insn
;
5094 enum attr_type dep_type
;
5095 enum attr_itanium_class dep_class
;
5096 enum attr_itanium_class insn_class
;
5097 rtx dep_set
, set
, src
, addr
;
5099 if (GET_CODE (PATTERN (insn
)) == CLOBBER
5100 || GET_CODE (PATTERN (insn
)) == USE
5101 || GET_CODE (PATTERN (dep_insn
)) == CLOBBER
5102 || GET_CODE (PATTERN (dep_insn
)) == USE
5103 /* @@@ Not accurate for indirect calls. */
5104 || GET_CODE (insn
) == CALL_INSN
5105 || ia64_safe_type (insn
) == TYPE_S
)
5108 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
5109 || REG_NOTE_KIND (link
) == REG_DEP_ANTI
)
5112 dep_type
= ia64_safe_type (dep_insn
);
5113 dep_class
= ia64_safe_itanium_class (dep_insn
);
5114 insn_class
= ia64_safe_itanium_class (insn
);
5116 /* Compares that feed a conditional branch can execute in the same
5118 dep_set
= ia64_single_set (dep_insn
);
5119 set
= ia64_single_set (insn
);
5121 if (dep_type
!= TYPE_F
5123 && GET_CODE (SET_DEST (dep_set
)) == REG
5124 && PR_REG (REGNO (SET_DEST (dep_set
)))
5125 && GET_CODE (insn
) == JUMP_INSN
)
5128 if (dep_set
&& GET_CODE (SET_DEST (dep_set
)) == MEM
)
5130 /* ??? Can't find any information in the documenation about whether
5134 splits issue. Assume it doesn't. */
5138 src
= set
? SET_SRC (set
) : 0;
5140 if (set
&& GET_CODE (SET_DEST (set
)) == MEM
)
5141 addr
= XEXP (SET_DEST (set
), 0);
5142 else if (set
&& GET_CODE (src
) == MEM
)
5143 addr
= XEXP (src
, 0);
5144 else if (set
&& GET_CODE (src
) == ZERO_EXTEND
5145 && GET_CODE (XEXP (src
, 0)) == MEM
)
5146 addr
= XEXP (XEXP (src
, 0), 0);
5147 else if (set
&& GET_CODE (src
) == UNSPEC
5148 && XVECLEN (XEXP (src
, 0), 0) > 0
5149 && GET_CODE (XVECEXP (src
, 0, 0)) == MEM
)
5150 addr
= XEXP (XVECEXP (src
, 0, 0), 0);
5151 if (addr
&& GET_CODE (addr
) == POST_MODIFY
)
5152 addr
= XEXP (addr
, 0);
5154 set
= ia64_single_set (dep_insn
);
5156 if ((dep_class
== ITANIUM_CLASS_IALU
5157 || dep_class
== ITANIUM_CLASS_ILOG
5158 || dep_class
== ITANIUM_CLASS_LD
)
5159 && (insn_class
== ITANIUM_CLASS_LD
5160 || insn_class
== ITANIUM_CLASS_ST
))
5162 if (! addr
|| ! set
)
5164 /* This isn't completely correct - an IALU that feeds an address has
5165 a latency of 1 cycle if it's issued in an M slot, but 2 cycles
5166 otherwise. Unfortunately there's no good way to describe this. */
5167 if (reg_overlap_mentioned_p (SET_DEST (set
), addr
))
5170 if ((dep_class
== ITANIUM_CLASS_IALU
5171 || dep_class
== ITANIUM_CLASS_ILOG
5172 || dep_class
== ITANIUM_CLASS_LD
)
5173 && (insn_class
== ITANIUM_CLASS_MMMUL
5174 || insn_class
== ITANIUM_CLASS_MMSHF
5175 || insn_class
== ITANIUM_CLASS_MMSHFI
))
5177 if (dep_class
== ITANIUM_CLASS_FMAC
5178 && (insn_class
== ITANIUM_CLASS_FMISC
5179 || insn_class
== ITANIUM_CLASS_FCVTFX
5180 || insn_class
== ITANIUM_CLASS_XMPY
))
5182 if ((dep_class
== ITANIUM_CLASS_FMAC
5183 || dep_class
== ITANIUM_CLASS_FMISC
5184 || dep_class
== ITANIUM_CLASS_FCVTFX
5185 || dep_class
== ITANIUM_CLASS_XMPY
)
5186 && insn_class
== ITANIUM_CLASS_STF
)
5188 if ((dep_class
== ITANIUM_CLASS_MMMUL
5189 || dep_class
== ITANIUM_CLASS_MMSHF
5190 || dep_class
== ITANIUM_CLASS_MMSHFI
)
5191 && (insn_class
== ITANIUM_CLASS_LD
5192 || insn_class
== ITANIUM_CLASS_ST
5193 || insn_class
== ITANIUM_CLASS_IALU
5194 || insn_class
== ITANIUM_CLASS_ILOG
5195 || insn_class
== ITANIUM_CLASS_ISHF
))
5201 /* Describe the current state of the Itanium pipeline. */
5204 /* The first slot that is used in the current cycle. */
5206 /* The next slot to fill. */
5208 /* The packet we have selected for the current issue window. */
5209 const struct ia64_packet
*packet
;
5210 /* The position of the split issue that occurs due to issue width
5211 limitations (6 if there's no split issue). */
5213 /* Record data about the insns scheduled so far in the same issue
5214 window. The elements up to but not including FIRST_SLOT belong
5215 to the previous cycle, the ones starting with FIRST_SLOT belong
5216 to the current cycle. */
5217 enum attr_type types
[6];
5220 /* Nonzero if we decided to schedule a stop bit. */
5224 /* Temporary arrays; they have enough elements to hold all insns that
5225 can be ready at the same time while scheduling of the current block.
5226 SCHED_READY can hold ready insns, SCHED_TYPES their types. */
5227 static rtx
*sched_ready
;
5228 static enum attr_type
*sched_types
;
5230 /* Determine whether an insn INSN of type ITYPE can fit into slot SLOT
5234 insn_matches_slot (p
, itype
, slot
, insn
)
5235 const struct ia64_packet
*p
;
5236 enum attr_type itype
;
5240 enum attr_itanium_requires_unit0 u0
;
5241 enum attr_type stype
= p
->t
[slot
];
5245 u0
= ia64_safe_itanium_requires_unit0 (insn
);
5246 if (u0
== ITANIUM_REQUIRES_UNIT0_YES
)
5249 for (i
= sched_data
.first_slot
; i
< slot
; i
++)
5250 if (p
->t
[i
] == stype
)
5253 if (GET_CODE (insn
) == CALL_INSN
)
5255 /* Reject calls in multiway branch packets. We want to limit
5256 the number of multiway branches we generate (since the branch
5257 predictor is limited), and this seems to work fairly well.
5258 (If we didn't do this, we'd have to add another test here to
5259 force calls into the third slot of the bundle.) */
5262 if (p
->t
[1] == TYPE_B
)
5267 if (p
->t
[4] == TYPE_B
)
5275 if (itype
== TYPE_A
)
5276 return stype
== TYPE_M
|| stype
== TYPE_I
;
5280 /* Like emit_insn_before, but skip cycle_display insns. This makes the
5281 assembly output a bit prettier. */
5284 ia64_emit_insn_before (insn
, before
)
5287 rtx prev
= PREV_INSN (before
);
5288 if (prev
&& GET_CODE (prev
) == INSN
5289 && GET_CODE (PATTERN (prev
)) == UNSPEC
5290 && XINT (PATTERN (prev
), 1) == 23)
5292 emit_insn_before (insn
, before
);
5296 /* Generate a nop insn of the given type. Note we never generate L type
5306 return gen_nop_m ();
5308 return gen_nop_i ();
5310 return gen_nop_b ();
5312 return gen_nop_f ();
5314 return gen_nop_x ();
5321 /* When rotating a bundle out of the issue window, insert a bundle selector
5322 insn in front of it. DUMP is the scheduling dump file or NULL. START
5323 is either 0 or 3, depending on whether we want to emit a bundle selector
5324 for the first bundle or the second bundle in the current issue window.
5326 The selector insns are emitted this late because the selected packet can
5327 be changed until parts of it get rotated out. */
5330 finish_last_head (dump
, start
)
5334 const struct ia64_packet
*p
= sched_data
.packet
;
5335 const struct bundle
*b
= start
== 0 ? p
->t1
: p
->t2
;
5336 int bundle_type
= b
- bundle
;
5340 if (! ia64_final_schedule
)
5343 for (i
= start
; sched_data
.insns
[i
] == 0; i
++)
5346 insn
= sched_data
.insns
[i
];
5349 fprintf (dump
, "// Emitting template before %d: %s\n",
5350 INSN_UID (insn
), b
->name
);
5352 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (bundle_type
)), insn
);
5355 /* We can't schedule more insns this cycle. Fix up the scheduling state
5356 and advance FIRST_SLOT and CUR.
5357 We have to distribute the insns that are currently found between
5358 FIRST_SLOT and CUR into the slots of the packet we have selected. So
5359 far, they are stored successively in the fields starting at FIRST_SLOT;
5360 now they must be moved to the correct slots.
5361 DUMP is the current scheduling dump file, or NULL. */
5364 cycle_end_fill_slots (dump
)
5367 const struct ia64_packet
*packet
= sched_data
.packet
;
5369 enum attr_type tmp_types
[6];
5372 memcpy (tmp_types
, sched_data
.types
, 6 * sizeof (enum attr_type
));
5373 memcpy (tmp_insns
, sched_data
.insns
, 6 * sizeof (rtx
));
5375 for (i
= slot
= sched_data
.first_slot
; i
< sched_data
.cur
; i
++)
5377 enum attr_type t
= tmp_types
[i
];
5378 if (t
!= ia64_safe_type (tmp_insns
[i
]))
5380 while (! insn_matches_slot (packet
, t
, slot
, tmp_insns
[i
]))
5382 if (slot
> sched_data
.split
)
5385 fprintf (dump
, "// Packet needs %s, have %s\n", type_names
[packet
->t
[slot
]],
5387 sched_data
.types
[slot
] = packet
->t
[slot
];
5388 sched_data
.insns
[slot
] = 0;
5389 sched_data
.stopbit
[slot
] = 0;
5392 /* Do _not_ use T here. If T == TYPE_A, then we'd risk changing the
5393 actual slot type later. */
5394 sched_data
.types
[slot
] = packet
->t
[slot
];
5395 sched_data
.insns
[slot
] = tmp_insns
[i
];
5396 sched_data
.stopbit
[slot
] = 0;
5400 /* This isn't right - there's no need to pad out until the forced split;
5401 the CPU will automatically split if an insn isn't ready. */
5403 while (slot
< sched_data
.split
)
5405 sched_data
.types
[slot
] = packet
->t
[slot
];
5406 sched_data
.insns
[slot
] = 0;
5407 sched_data
.stopbit
[slot
] = 0;
5412 sched_data
.first_slot
= sched_data
.cur
= slot
;
5415 /* Bundle rotations, as described in the Itanium optimization manual.
5416 We can rotate either one or both bundles out of the issue window.
5417 DUMP is the current scheduling dump file, or NULL. */
5420 rotate_one_bundle (dump
)
5424 fprintf (dump
, "// Rotating one bundle.\n");
5426 finish_last_head (dump
, 0);
5427 if (sched_data
.cur
> 3)
5429 sched_data
.cur
-= 3;
5430 sched_data
.first_slot
-= 3;
5431 memmove (sched_data
.types
,
5432 sched_data
.types
+ 3,
5433 sched_data
.cur
* sizeof *sched_data
.types
);
5434 memmove (sched_data
.stopbit
,
5435 sched_data
.stopbit
+ 3,
5436 sched_data
.cur
* sizeof *sched_data
.stopbit
);
5437 memmove (sched_data
.insns
,
5438 sched_data
.insns
+ 3,
5439 sched_data
.cur
* sizeof *sched_data
.insns
);
5444 sched_data
.first_slot
= 0;
5449 rotate_two_bundles (dump
)
5453 fprintf (dump
, "// Rotating two bundles.\n");
5455 if (sched_data
.cur
== 0)
5458 finish_last_head (dump
, 0);
5459 if (sched_data
.cur
> 3)
5460 finish_last_head (dump
, 3);
5462 sched_data
.first_slot
= 0;
5465 /* We're beginning a new block. Initialize data structures as necessary. */
5468 ia64_sched_init (dump
, sched_verbose
, max_ready
)
5469 FILE *dump ATTRIBUTE_UNUSED
;
5470 int sched_verbose ATTRIBUTE_UNUSED
;
5473 static int initialized
= 0;
5481 for (i
= b1
= 0; b1
< NR_BUNDLES
; b1
++)
5483 const struct bundle
*t1
= bundle
+ b1
;
5484 for (b2
= 0; b2
< NR_BUNDLES
; b2
++, i
++)
5486 const struct bundle
*t2
= bundle
+ b2
;
5492 for (i
= 0; i
< NR_PACKETS
; i
++)
5495 for (j
= 0; j
< 3; j
++)
5496 packets
[i
].t
[j
] = packets
[i
].t1
->t
[j
];
5497 for (j
= 0; j
< 3; j
++)
5498 packets
[i
].t
[j
+ 3] = packets
[i
].t2
->t
[j
];
5499 packets
[i
].first_split
= itanium_split_issue (packets
+ i
, 0);
5504 init_insn_group_barriers ();
5506 memset (&sched_data
, 0, sizeof sched_data
);
5507 sched_types
= (enum attr_type
*) xmalloc (max_ready
5508 * sizeof (enum attr_type
));
5509 sched_ready
= (rtx
*) xmalloc (max_ready
* sizeof (rtx
));
5512 /* See if the packet P can match the insns we have already scheduled. Return
5513 nonzero if so. In *PSLOT, we store the first slot that is available for
5514 more instructions if we choose this packet.
5515 SPLIT holds the last slot we can use, there's a split issue after it so
5516 scheduling beyond it would cause us to use more than one cycle. */
5519 packet_matches_p (p
, split
, pslot
)
5520 const struct ia64_packet
*p
;
5524 int filled
= sched_data
.cur
;
5525 int first
= sched_data
.first_slot
;
5528 /* First, check if the first of the two bundles must be a specific one (due
5530 if (first
> 0 && sched_data
.stopbit
[0] && p
->t1
->possible_stop
!= 1)
5532 if (first
> 1 && sched_data
.stopbit
[1] && p
->t1
->possible_stop
!= 2)
5535 for (i
= 0; i
< first
; i
++)
5536 if (! insn_matches_slot (p
, sched_data
.types
[i
], i
,
5537 sched_data
.insns
[i
]))
5539 for (i
= slot
= first
; i
< filled
; i
++)
5541 while (slot
< split
)
5543 if (insn_matches_slot (p
, sched_data
.types
[i
], slot
,
5544 sched_data
.insns
[i
]))
5558 /* A frontend for itanium_split_issue. For a packet P and a slot
5559 number FIRST that describes the start of the current clock cycle,
5560 return the slot number of the first split issue. This function
5561 uses the cached number found in P if possible. */
5564 get_split (p
, first
)
5565 const struct ia64_packet
*p
;
5569 return p
->first_split
;
5570 return itanium_split_issue (p
, first
);
5573 /* Given N_READY insns in the array READY, whose types are found in the
5574 corresponding array TYPES, return the insn that is best suited to be
5575 scheduled in slot SLOT of packet P. */
5578 find_best_insn (ready
, types
, n_ready
, p
, slot
)
5580 enum attr_type
*types
;
5582 const struct ia64_packet
*p
;
5587 while (n_ready
-- > 0)
5589 rtx insn
= ready
[n_ready
];
5592 if (best
>= 0 && INSN_PRIORITY (ready
[n_ready
]) < best_pri
)
5594 /* If we have equally good insns, one of which has a stricter
5595 slot requirement, prefer the one with the stricter requirement. */
5596 if (best
>= 0 && types
[n_ready
] == TYPE_A
)
5598 if (insn_matches_slot (p
, types
[n_ready
], slot
, insn
))
5601 best_pri
= INSN_PRIORITY (ready
[best
]);
5603 /* If there's no way we could get a stricter requirement, stop
5605 if (types
[n_ready
] != TYPE_A
5606 && ia64_safe_itanium_requires_unit0 (ready
[n_ready
]))
5614 /* Select the best packet to use given the current scheduler state and the
5616 READY is an array holding N_READY ready insns; TYPES is a corresponding
5617 array that holds their types. Store the best packet in *PPACKET and the
5618 number of insns that can be scheduled in the current cycle in *PBEST. */
5621 find_best_packet (pbest
, ppacket
, ready
, types
, n_ready
)
5623 const struct ia64_packet
**ppacket
;
5625 enum attr_type
*types
;
5628 int first
= sched_data
.first_slot
;
5631 const struct ia64_packet
*best_packet
= NULL
;
5634 for (i
= 0; i
< NR_PACKETS
; i
++)
5636 const struct ia64_packet
*p
= packets
+ i
;
5638 int split
= get_split (p
, first
);
5640 int first_slot
, last_slot
;
5643 if (! packet_matches_p (p
, split
, &first_slot
))
5646 memcpy (sched_ready
, ready
, n_ready
* sizeof (rtx
));
5650 for (slot
= first_slot
; slot
< split
; slot
++)
5654 /* Disallow a degenerate case where the first bundle doesn't
5655 contain anything but NOPs! */
5656 if (first_slot
== 0 && win
== 0 && slot
== 3)
5662 insn_nr
= find_best_insn (sched_ready
, types
, n_ready
, p
, slot
);
5665 sched_ready
[insn_nr
] = 0;
5669 else if (p
->t
[slot
] == TYPE_B
)
5672 /* We must disallow MBB/BBB packets if any of their B slots would be
5673 filled with nops. */
5676 if (p
->t
[1] == TYPE_B
&& (b_nops
|| last_slot
< 2))
5681 if (p
->t
[4] == TYPE_B
&& (b_nops
|| last_slot
< 5))
5686 || (win
== best
&& last_slot
< lowest_end
))
5689 lowest_end
= last_slot
;
5694 *ppacket
= best_packet
;
5697 /* Reorder the ready list so that the insns that can be issued in this cycle
5698 are found in the correct order at the end of the list.
5699 DUMP is the scheduling dump file, or NULL. READY points to the start,
5700 E_READY to the end of the ready list. MAY_FAIL determines what should be
5701 done if no insns can be scheduled in this cycle: if it is zero, we abort,
5702 otherwise we return 0.
5703 Return 1 if any insns can be scheduled in this cycle. */
5706 itanium_reorder (dump
, ready
, e_ready
, may_fail
)
5712 const struct ia64_packet
*best_packet
;
5713 int n_ready
= e_ready
- ready
;
5714 int first
= sched_data
.first_slot
;
5715 int i
, best
, best_split
, filled
;
5717 for (i
= 0; i
< n_ready
; i
++)
5718 sched_types
[i
] = ia64_safe_type (ready
[i
]);
5720 find_best_packet (&best
, &best_packet
, ready
, sched_types
, n_ready
);
5731 fprintf (dump
, "// Selected bundles: %s %s (%d insns)\n",
5732 best_packet
->t1
->name
,
5733 best_packet
->t2
? best_packet
->t2
->name
: NULL
, best
);
5736 best_split
= itanium_split_issue (best_packet
, first
);
5737 packet_matches_p (best_packet
, best_split
, &filled
);
5739 for (i
= filled
; i
< best_split
; i
++)
5743 insn_nr
= find_best_insn (ready
, sched_types
, n_ready
, best_packet
, i
);
5746 rtx insn
= ready
[insn_nr
];
5747 memmove (ready
+ insn_nr
, ready
+ insn_nr
+ 1,
5748 (n_ready
- insn_nr
- 1) * sizeof (rtx
));
5749 memmove (sched_types
+ insn_nr
, sched_types
+ insn_nr
+ 1,
5750 (n_ready
- insn_nr
- 1) * sizeof (enum attr_type
));
5751 ready
[--n_ready
] = insn
;
5755 sched_data
.packet
= best_packet
;
5756 sched_data
.split
= best_split
;
5760 /* Dump information about the current scheduling state to file DUMP. */
5763 dump_current_packet (dump
)
5767 fprintf (dump
, "// %d slots filled:", sched_data
.cur
);
5768 for (i
= 0; i
< sched_data
.first_slot
; i
++)
5770 rtx insn
= sched_data
.insns
[i
];
5771 fprintf (dump
, " %s", type_names
[sched_data
.types
[i
]]);
5773 fprintf (dump
, "/%s", type_names
[ia64_safe_type (insn
)]);
5774 if (sched_data
.stopbit
[i
])
5775 fprintf (dump
, " ;;");
5777 fprintf (dump
, " :::");
5778 for (i
= sched_data
.first_slot
; i
< sched_data
.cur
; i
++)
5780 rtx insn
= sched_data
.insns
[i
];
5781 enum attr_type t
= ia64_safe_type (insn
);
5782 fprintf (dump
, " (%d) %s", INSN_UID (insn
), type_names
[t
]);
5784 fprintf (dump
, "\n");
5787 /* Schedule a stop bit. DUMP is the current scheduling dump file, or
5791 schedule_stop (dump
)
5794 const struct ia64_packet
*best
= sched_data
.packet
;
5799 fprintf (dump
, "// Stop bit, cur = %d.\n", sched_data
.cur
);
5801 if (sched_data
.cur
== 0)
5804 fprintf (dump
, "// At start of bundle, so nothing to do.\n");
5806 rotate_two_bundles (NULL
);
5810 for (i
= -1; i
< NR_PACKETS
; i
++)
5812 /* This is a slight hack to give the current packet the first chance.
5813 This is done to avoid e.g. switching from MIB to MBB bundles. */
5814 const struct ia64_packet
*p
= (i
>= 0 ? packets
+ i
: sched_data
.packet
);
5815 int split
= get_split (p
, sched_data
.first_slot
);
5816 const struct bundle
*compare
;
5819 if (! packet_matches_p (p
, split
, &next
))
5822 compare
= next
> 3 ? p
->t2
: p
->t1
;
5825 if (compare
->possible_stop
)
5826 stoppos
= compare
->possible_stop
;
5830 if (stoppos
< next
|| stoppos
>= best_stop
)
5832 if (compare
->possible_stop
== 0)
5834 stoppos
= (next
> 3 ? 6 : 3);
5836 if (stoppos
< next
|| stoppos
>= best_stop
)
5840 fprintf (dump
, "// switching from %s %s to %s %s (stop at %d)\n",
5841 best
->t1
->name
, best
->t2
->name
, p
->t1
->name
, p
->t2
->name
,
5844 best_stop
= stoppos
;
5848 sched_data
.packet
= best
;
5849 cycle_end_fill_slots (dump
);
5850 while (sched_data
.cur
< best_stop
)
5852 sched_data
.types
[sched_data
.cur
] = best
->t
[sched_data
.cur
];
5853 sched_data
.insns
[sched_data
.cur
] = 0;
5854 sched_data
.stopbit
[sched_data
.cur
] = 0;
5857 sched_data
.stopbit
[sched_data
.cur
- 1] = 1;
5858 sched_data
.first_slot
= best_stop
;
5861 dump_current_packet (dump
);
5864 /* If necessary, perform one or two rotations on the scheduling state.
5865 This should only be called if we are starting a new cycle. */
5871 if (sched_data
.cur
== 6)
5872 rotate_two_bundles (dump
);
5873 else if (sched_data
.cur
>= 3)
5874 rotate_one_bundle (dump
);
5875 sched_data
.first_slot
= sched_data
.cur
;
5878 /* The clock cycle when ia64_sched_reorder was last called. */
5879 static int prev_cycle
;
5881 /* The first insn scheduled in the previous cycle. This is the saved
5882 value of sched_data.first_slot. */
5883 static int prev_first
;
5885 /* The last insn that has been scheduled. At the start of a new cycle
5886 we know that we can emit new insns after it; the main scheduling code
5887 has already emitted a cycle_display insn after it and is using that
5888 as its current last insn. */
5889 static rtx last_issued
;
5891 /* Emit NOPs to fill the delay between PREV_CYCLE and CLOCK_VAR. Used to
5892 pad out the delay between MM (shifts, etc.) and integer operations. */
5895 nop_cycles_until (clock_var
, dump
)
5899 int prev_clock
= prev_cycle
;
5900 int cycles_left
= clock_var
- prev_clock
;
5902 /* Finish the previous cycle; pad it out with NOPs. */
5903 if (sched_data
.cur
== 3)
5905 rtx t
= gen_insn_group_barrier (GEN_INT (3));
5906 last_issued
= emit_insn_after (t
, last_issued
);
5907 maybe_rotate (dump
);
5909 else if (sched_data
.cur
> 0)
5912 int split
= itanium_split_issue (sched_data
.packet
, prev_first
);
5914 if (sched_data
.cur
< 3 && split
> 3)
5920 if (split
> sched_data
.cur
)
5923 for (i
= sched_data
.cur
; i
< split
; i
++)
5927 t
= gen_nop_type (sched_data
.packet
->t
[i
]);
5928 last_issued
= emit_insn_after (t
, last_issued
);
5929 sched_data
.types
[i
] = sched_data
.packet
->t
[sched_data
.cur
];
5930 sched_data
.insns
[i
] = last_issued
;
5931 sched_data
.stopbit
[i
] = 0;
5933 sched_data
.cur
= split
;
5936 if (! need_stop
&& sched_data
.cur
> 0 && sched_data
.cur
< 6
5940 for (i
= sched_data
.cur
; i
< 6; i
++)
5944 t
= gen_nop_type (sched_data
.packet
->t
[i
]);
5945 last_issued
= emit_insn_after (t
, last_issued
);
5946 sched_data
.types
[i
] = sched_data
.packet
->t
[sched_data
.cur
];
5947 sched_data
.insns
[i
] = last_issued
;
5948 sched_data
.stopbit
[i
] = 0;
5955 if (need_stop
|| sched_data
.cur
== 6)
5957 rtx t
= gen_insn_group_barrier (GEN_INT (3));
5958 last_issued
= emit_insn_after (t
, last_issued
);
5960 maybe_rotate (dump
);
5964 while (cycles_left
> 0)
5966 rtx t
= gen_bundle_selector (GEN_INT (0));
5967 last_issued
= emit_insn_after (t
, last_issued
);
5968 t
= gen_nop_type (TYPE_M
);
5969 last_issued
= emit_insn_after (t
, last_issued
);
5970 t
= gen_nop_type (TYPE_I
);
5971 last_issued
= emit_insn_after (t
, last_issued
);
5972 if (cycles_left
> 1)
5974 t
= gen_insn_group_barrier (GEN_INT (2));
5975 last_issued
= emit_insn_after (t
, last_issued
);
5978 t
= gen_nop_type (TYPE_I
);
5979 last_issued
= emit_insn_after (t
, last_issued
);
5980 t
= gen_insn_group_barrier (GEN_INT (3));
5981 last_issued
= emit_insn_after (t
, last_issued
);
5986 /* We are about to being issuing insns for this clock cycle.
5987 Override the default sort algorithm to better slot instructions. */
5990 ia64_sched_reorder (dump
, sched_verbose
, ready
, pn_ready
,
5991 reorder_type
, clock_var
)
5992 FILE *dump ATTRIBUTE_UNUSED
;
5993 int sched_verbose ATTRIBUTE_UNUSED
;
5996 int reorder_type
, clock_var
;
5999 int n_ready
= *pn_ready
;
6000 rtx
*e_ready
= ready
+ n_ready
;
6005 fprintf (dump
, "// ia64_sched_reorder (type %d):\n", reorder_type
);
6006 dump_current_packet (dump
);
6009 if (reorder_type
== 0 && clock_var
> 0 && ia64_final_schedule
)
6011 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
6014 enum attr_itanium_class t
= ia64_safe_itanium_class (insn
);
6015 if (t
== ITANIUM_CLASS_IALU
|| t
== ITANIUM_CLASS_ISHF
6016 || t
== ITANIUM_CLASS_ILOG
6017 || t
== ITANIUM_CLASS_LD
|| t
== ITANIUM_CLASS_ST
)
6020 for (link
= LOG_LINKS (insn
); link
; link
= XEXP (link
, 1))
6021 if (REG_NOTE_KIND (link
) != REG_DEP_OUTPUT
6022 && REG_NOTE_KIND (link
) != REG_DEP_ANTI
)
6024 rtx other
= XEXP (link
, 0);
6025 enum attr_itanium_class t0
= ia64_safe_itanium_class (other
);
6026 if (t0
== ITANIUM_CLASS_MMSHF
6027 || t0
== ITANIUM_CLASS_MMMUL
)
6029 nop_cycles_until (clock_var
, sched_verbose
? dump
: NULL
);
6038 prev_first
= sched_data
.first_slot
;
6039 prev_cycle
= clock_var
;
6041 if (reorder_type
== 0)
6042 maybe_rotate (sched_verbose
? dump
: NULL
);
6044 /* First, move all USEs, CLOBBERs and other crud out of the way. */
6046 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
6047 if (insnp
< e_ready
)
6050 enum attr_type t
= ia64_safe_type (insn
);
6051 if (t
== TYPE_UNKNOWN
)
6053 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
6054 || asm_noperands (PATTERN (insn
)) >= 0)
6056 rtx lowest
= ready
[0];
6063 rtx highest
= ready
[n_ready
- 1];
6064 ready
[n_ready
- 1] = insn
;
6066 if (ia64_final_schedule
&& group_barrier_needed_p (insn
))
6068 schedule_stop (sched_verbose
? dump
: NULL
);
6069 sched_data
.last_was_stop
= 1;
6070 maybe_rotate (sched_verbose
? dump
: NULL
);
6077 if (n_asms
< n_ready
)
6079 /* Some normal insns to process. Skip the asms. */
6083 else if (n_ready
> 0)
6085 /* Only asm insns left. */
6086 cycle_end_fill_slots (sched_verbose
? dump
: NULL
);
6090 if (ia64_final_schedule
)
6092 int nr_need_stop
= 0;
6094 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
6095 if (safe_group_barrier_needed_p (*insnp
))
6098 /* Schedule a stop bit if
6099 - all insns require a stop bit, or
6100 - we are starting a new cycle and _any_ insns require a stop bit.
6101 The reason for the latter is that if our schedule is accurate, then
6102 the additional stop won't decrease performance at this point (since
6103 there's a split issue at this point anyway), but it gives us more
6104 freedom when scheduling the currently ready insns. */
6105 if ((reorder_type
== 0 && nr_need_stop
)
6106 || (reorder_type
== 1 && n_ready
== nr_need_stop
))
6108 schedule_stop (sched_verbose
? dump
: NULL
);
6109 sched_data
.last_was_stop
= 1;
6110 maybe_rotate (sched_verbose
? dump
: NULL
);
6111 if (reorder_type
== 1)
6118 /* Move down everything that needs a stop bit, preserving relative
6120 while (insnp
-- > ready
+ deleted
)
6121 while (insnp
>= ready
+ deleted
)
6124 if (! safe_group_barrier_needed_p (insn
))
6126 memmove (ready
+ 1, ready
, (insnp
- ready
) * sizeof (rtx
));
6132 if (deleted
!= nr_need_stop
)
6137 return itanium_reorder (sched_verbose
? dump
: NULL
,
6138 ready
, e_ready
, reorder_type
== 1);
6141 /* Like ia64_sched_reorder, but called after issuing each insn.
6142 Override the default sort algorithm to better slot instructions. */
6145 ia64_sched_reorder2 (dump
, sched_verbose
, ready
, pn_ready
, clock_var
)
6146 FILE *dump ATTRIBUTE_UNUSED
;
6147 int sched_verbose ATTRIBUTE_UNUSED
;
6152 if (sched_data
.last_was_stop
)
6155 /* Detect one special case and try to optimize it.
6156 If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs,
6157 then we can get better code by transforming this to 1.MFB;; 2.MIx. */
6158 if (sched_data
.first_slot
== 1
6159 && sched_data
.stopbit
[0]
6160 && ((sched_data
.cur
== 4
6161 && (sched_data
.types
[1] == TYPE_M
|| sched_data
.types
[1] == TYPE_A
)
6162 && (sched_data
.types
[2] == TYPE_I
|| sched_data
.types
[2] == TYPE_A
)
6163 && (sched_data
.types
[3] != TYPE_M
&& sched_data
.types
[3] != TYPE_A
))
6164 || (sched_data
.cur
== 3
6165 && (sched_data
.types
[1] == TYPE_M
|| sched_data
.types
[1] == TYPE_A
)
6166 && (sched_data
.types
[2] != TYPE_M
&& sched_data
.types
[2] != TYPE_I
6167 && sched_data
.types
[2] != TYPE_A
))))
6171 rtx stop
= PREV_INSN (sched_data
.insns
[1]);
6174 sched_data
.stopbit
[0] = 0;
6175 sched_data
.stopbit
[2] = 1;
6176 if (GET_CODE (stop
) != INSN
)
6179 pat
= PATTERN (stop
);
6180 /* Ignore cycle displays. */
6181 if (GET_CODE (pat
) == UNSPEC
&& XINT (pat
, 1) == 23)
6182 stop
= PREV_INSN (stop
);
6183 pat
= PATTERN (stop
);
6184 if (GET_CODE (pat
) != UNSPEC_VOLATILE
6185 || XINT (pat
, 1) != 2
6186 || INTVAL (XVECEXP (pat
, 0, 0)) != 1)
6188 XVECEXP (pat
, 0, 0) = GEN_INT (3);
6190 sched_data
.types
[5] = sched_data
.types
[3];
6191 sched_data
.types
[4] = sched_data
.types
[2];
6192 sched_data
.types
[3] = sched_data
.types
[1];
6193 sched_data
.insns
[5] = sched_data
.insns
[3];
6194 sched_data
.insns
[4] = sched_data
.insns
[2];
6195 sched_data
.insns
[3] = sched_data
.insns
[1];
6196 sched_data
.stopbit
[5] = sched_data
.stopbit
[4] = sched_data
.stopbit
[3] = 0;
6197 sched_data
.cur
+= 2;
6198 sched_data
.first_slot
= 3;
6199 for (i
= 0; i
< NR_PACKETS
; i
++)
6201 const struct ia64_packet
*p
= packets
+ i
;
6202 if (p
->t
[0] == TYPE_M
&& p
->t
[1] == TYPE_F
&& p
->t
[2] == TYPE_B
)
6204 sched_data
.packet
= p
;
6208 rotate_one_bundle (sched_verbose
? dump
: NULL
);
6211 for (i
= 0; i
< NR_PACKETS
; i
++)
6213 const struct ia64_packet
*p
= packets
+ i
;
6214 int split
= get_split (p
, sched_data
.first_slot
);
6217 /* Disallow multiway branches here. */
6218 if (p
->t
[1] == TYPE_B
)
6221 if (packet_matches_p (p
, split
, &next
) && next
< best
)
6224 sched_data
.packet
= p
;
6225 sched_data
.split
= split
;
6234 int more
= ia64_sched_reorder (dump
, sched_verbose
, ready
, pn_ready
, 1,
6238 /* Did we schedule a stop? If so, finish this cycle. */
6239 if (sched_data
.cur
== sched_data
.first_slot
)
6244 fprintf (dump
, "// Can't issue more this cycle; updating type array.\n");
6246 cycle_end_fill_slots (sched_verbose
? dump
: NULL
);
6248 dump_current_packet (dump
);
6252 /* We are about to issue INSN. Return the number of insns left on the
6253 ready queue that can be issued this cycle. */
6256 ia64_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
)
6260 int can_issue_more ATTRIBUTE_UNUSED
;
6262 enum attr_type t
= ia64_safe_type (insn
);
6266 if (sched_data
.last_was_stop
)
6268 int t
= sched_data
.first_slot
;
6271 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (t
)), insn
);
6272 init_insn_group_barriers ();
6273 sched_data
.last_was_stop
= 0;
6276 if (t
== TYPE_UNKNOWN
)
6279 fprintf (dump
, "// Ignoring type %s\n", type_names
[t
]);
6280 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
6281 || asm_noperands (PATTERN (insn
)) >= 0)
6283 /* This must be some kind of asm. Clear the scheduling state. */
6284 rotate_two_bundles (sched_verbose
? dump
: NULL
);
6285 if (ia64_final_schedule
)
6286 group_barrier_needed_p (insn
);
6291 /* This is _not_ just a sanity check. group_barrier_needed_p will update
6292 important state info. Don't delete this test. */
6293 if (ia64_final_schedule
6294 && group_barrier_needed_p (insn
))
6297 sched_data
.stopbit
[sched_data
.cur
] = 0;
6298 sched_data
.insns
[sched_data
.cur
] = insn
;
6299 sched_data
.types
[sched_data
.cur
] = t
;
6303 fprintf (dump
, "// Scheduling insn %d of type %s\n",
6304 INSN_UID (insn
), type_names
[t
]);
6306 if (GET_CODE (insn
) == CALL_INSN
&& ia64_final_schedule
)
6308 schedule_stop (sched_verbose
? dump
: NULL
);
6309 sched_data
.last_was_stop
= 1;
6315 /* Free data allocated by ia64_sched_init. */
6318 ia64_sched_finish (dump
, sched_verbose
)
6323 fprintf (dump
, "// Finishing schedule.\n");
6324 rotate_two_bundles (NULL
);
6329 /* Emit pseudo-ops for the assembler to describe predicate relations.
6330 At present this assumes that we only consider predicate pairs to
6331 be mutex, and that the assembler can deduce proper values from
6332 straight-line code. */
6335 emit_predicate_relation_info ()
6339 for (i
= n_basic_blocks
- 1; i
>= 0; --i
)
6341 basic_block bb
= BASIC_BLOCK (i
);
6343 rtx head
= bb
->head
;
6345 /* We only need such notes at code labels. */
6346 if (GET_CODE (head
) != CODE_LABEL
)
6348 if (GET_CODE (NEXT_INSN (head
)) == NOTE
6349 && NOTE_LINE_NUMBER (NEXT_INSN (head
)) == NOTE_INSN_BASIC_BLOCK
)
6350 head
= NEXT_INSN (head
);
6352 for (r
= PR_REG (0); r
< PR_REG (64); r
+= 2)
6353 if (REGNO_REG_SET_P (bb
->global_live_at_start
, r
))
6355 rtx p
= gen_rtx_REG (BImode
, r
);
6356 rtx n
= emit_insn_after (gen_pred_rel_mutex (p
), head
);
6357 if (head
== bb
->end
)
6363 /* Look for conditional calls that do not return, and protect predicate
6364 relations around them. Otherwise the assembler will assume the call
6365 returns, and complain about uses of call-clobbered predicates after
6367 for (i
= n_basic_blocks
- 1; i
>= 0; --i
)
6369 basic_block bb
= BASIC_BLOCK (i
);
6370 rtx insn
= bb
->head
;
6374 if (GET_CODE (insn
) == CALL_INSN
6375 && GET_CODE (PATTERN (insn
)) == COND_EXEC
6376 && find_reg_note (insn
, REG_NORETURN
, NULL_RTX
))
6378 rtx b
= emit_insn_before (gen_safe_across_calls_all (), insn
);
6379 rtx a
= emit_insn_after (gen_safe_across_calls_normal (), insn
);
6380 if (bb
->head
== insn
)
6382 if (bb
->end
== insn
)
6386 if (insn
== bb
->end
)
6388 insn
= NEXT_INSN (insn
);
6393 /* Generate a NOP instruction of type T. We will never generate L type
6403 return gen_nop_m ();
6405 return gen_nop_i ();
6407 return gen_nop_b ();
6409 return gen_nop_f ();
6411 return gen_nop_x ();
6417 /* After the last scheduling pass, fill in NOPs. It's easier to do this
6418 here than while scheduling. */
6424 const struct bundle
*b
= 0;
6427 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
6431 pat
= INSN_P (insn
) ? PATTERN (insn
) : const0_rtx
;
6432 if (GET_CODE (pat
) == USE
|| GET_CODE (pat
) == CLOBBER
)
6434 if ((GET_CODE (pat
) == UNSPEC
&& XINT (pat
, 1) == 22)
6435 || GET_CODE (insn
) == CODE_LABEL
)
6438 while (bundle_pos
< 3)
6440 emit_insn_before (gen_nop_type (b
->t
[bundle_pos
]), insn
);
6443 if (GET_CODE (insn
) != CODE_LABEL
)
6444 b
= bundle
+ INTVAL (XVECEXP (pat
, 0, 0));
6450 else if (GET_CODE (pat
) == UNSPEC_VOLATILE
&& XINT (pat
, 1) == 2)
6452 int t
= INTVAL (XVECEXP (pat
, 0, 0));
6454 while (bundle_pos
< t
)
6456 emit_insn_before (gen_nop_type (b
->t
[bundle_pos
]), insn
);
6462 if (bundle_pos
== 3)
6465 if (b
&& INSN_P (insn
))
6467 t
= ia64_safe_type (insn
);
6468 if (asm_noperands (PATTERN (insn
)) >= 0
6469 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)
6471 while (bundle_pos
< 3)
6473 emit_insn_before (gen_nop_type (b
->t
[bundle_pos
]), insn
);
6479 if (t
== TYPE_UNKNOWN
)
6481 while (bundle_pos
< 3)
6483 if (t
== b
->t
[bundle_pos
]
6484 || (t
== TYPE_A
&& (b
->t
[bundle_pos
] == TYPE_M
6485 || b
->t
[bundle_pos
] == TYPE_I
)))
6488 emit_insn_before (gen_nop_type (b
->t
[bundle_pos
]), insn
);
6497 /* Perform machine dependent operations on the rtl chain INSNS. */
6503 /* If optimizing, we'll have split before scheduling. */
6505 split_all_insns_noflow ();
6507 /* Make sure the CFG and global_live_at_start are correct
6508 for emit_predicate_relation_info. */
6509 find_basic_blocks (insns
, max_reg_num (), NULL
);
6510 life_analysis (insns
, NULL
, PROP_DEATH_NOTES
);
6512 if (ia64_flag_schedule_insns2
)
6514 ia64_final_schedule
= 1;
6515 schedule_ebbs (rtl_dump_file
);
6516 ia64_final_schedule
= 0;
6518 /* This relies on the NOTE_INSN_BASIC_BLOCK notes to be in the same
6519 place as they were during scheduling. */
6520 emit_insn_group_barriers (rtl_dump_file
, insns
);
6524 emit_all_insn_group_barriers (rtl_dump_file
, insns
);
6526 /* A call must not be the last instruction in a function, so that the
6527 return address is still within the function, so that unwinding works
6528 properly. Note that IA-64 differs from dwarf2 on this point. */
6529 if (flag_unwind_tables
|| (flag_exceptions
&& !USING_SJLJ_EXCEPTIONS
))
6534 insn
= get_last_insn ();
6535 if (! INSN_P (insn
))
6536 insn
= prev_active_insn (insn
);
6537 if (GET_CODE (insn
) == INSN
6538 && GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
6539 && XINT (PATTERN (insn
), 1) == 2)
6542 insn
= prev_active_insn (insn
);
6544 if (GET_CODE (insn
) == CALL_INSN
)
6547 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6548 emit_insn (gen_break_f ());
6549 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6554 emit_predicate_relation_info ();
6557 /* Return true if REGNO is used by the epilogue. */
6560 ia64_epilogue_uses (regno
)
6563 /* When a function makes a call through a function descriptor, we
6564 will write a (potentially) new value to "gp". After returning
6565 from such a call, we need to make sure the function restores the
6566 original gp-value, even if the function itself does not use the
6568 if (regno
== R_GR (1)
6570 && !(TARGET_AUTO_PIC
|| TARGET_NO_PIC
))
6573 /* For functions defined with the syscall_linkage attribute, all input
6574 registers are marked as live at all function exits. This prevents the
6575 register allocator from using the input registers, which in turn makes it
6576 possible to restart a system call after an interrupt without having to
6577 save/restore the input registers. This also prevents kernel data from
6578 leaking to application code. */
6580 if (IN_REGNO_P (regno
)
6581 && lookup_attribute ("syscall_linkage",
6582 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
6585 /* Conditional return patterns can't represent the use of `b0' as
6586 the return address, so we force the value live this way. */
6587 if (regno
== R_BR (0))
6590 if (regs_ever_live
[AR_LC_REGNUM
] && regno
== AR_LC_REGNUM
)
6592 if (! current_function_is_leaf
&& regno
== AR_PFS_REGNUM
)
6594 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
)
6595 && regno
== AR_UNAT_REGNUM
)
6601 /* Return true if IDENTIFIER is a valid attribute for TYPE. */
6604 ia64_valid_type_attribute (type
, attributes
, identifier
, args
)
6606 tree attributes ATTRIBUTE_UNUSED
;
6610 /* We only support an attribute for function calls. */
6612 if (TREE_CODE (type
) != FUNCTION_TYPE
6613 && TREE_CODE (type
) != METHOD_TYPE
)
6616 /* The "syscall_linkage" attribute says the callee is a system call entry
6617 point. This affects ia64_epilogue_uses. */
6619 if (is_attribute_p ("syscall_linkage", identifier
))
6620 return args
== NULL_TREE
;
6625 /* For ia64, SYMBOL_REF_FLAG set means that it is a function.
6627 We add @ to the name if this goes in small data/bss. We can only put
6628 a variable in small data/bss if it is defined in this module or a module
6629 that we are statically linked with. We can't check the second condition,
6630 but TREE_STATIC gives us the first one. */
6632 /* ??? If we had IPA, we could check the second condition. We could support
6633 programmer added section attributes if the variable is not defined in this
6636 /* ??? See the v850 port for a cleaner way to do this. */
6638 /* ??? We could also support own long data here. Generating movl/add/ld8
6639 instead of addl,ld8/ld8. This makes the code bigger, but should make the
6640 code faster because there is one less load. This also includes incomplete
6641 types which can't go in sdata/sbss. */
6643 /* ??? See select_section. We must put short own readonly variables in
6644 sdata/sbss instead of the more natural rodata, because we can't perform
6645 the DECL_READONLY_SECTION test here. */
6647 extern struct obstack
* saveable_obstack
;
6650 ia64_encode_section_info (decl
)
6653 const char *symbol_str
;
6655 if (TREE_CODE (decl
) == FUNCTION_DECL
)
6657 SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl
), 0)) = 1;
6661 /* Careful not to prod global register variables. */
6662 if (TREE_CODE (decl
) != VAR_DECL
6663 || GET_CODE (DECL_RTL (decl
)) != MEM
6664 || GET_CODE (XEXP (DECL_RTL (decl
), 0)) != SYMBOL_REF
)
6667 symbol_str
= XSTR (XEXP (DECL_RTL (decl
), 0), 0);
6669 /* We assume that -fpic is used only to create a shared library (dso).
6670 With -fpic, no global data can ever be sdata.
6671 Without -fpic, global common uninitialized data can never be sdata, since
6672 it can unify with a real definition in a dso. */
6673 /* ??? Actually, we can put globals in sdata, as long as we don't use gprel
6674 to access them. The linker may then be able to do linker relaxation to
6675 optimize references to them. Currently sdata implies use of gprel. */
6676 /* We need the DECL_EXTERNAL check for C++. static class data members get
6677 both TREE_STATIC and DECL_EXTERNAL set, to indicate that they are
6678 statically allocated, but the space is allocated somewhere else. Such
6679 decls can not be own data. */
6680 if (! TARGET_NO_SDATA
6681 && TREE_STATIC (decl
) && ! DECL_EXTERNAL (decl
)
6682 && ! (DECL_ONE_ONLY (decl
) || DECL_WEAK (decl
))
6683 && ! (TREE_PUBLIC (decl
)
6685 || (DECL_COMMON (decl
)
6686 && (DECL_INITIAL (decl
) == 0
6687 || DECL_INITIAL (decl
) == error_mark_node
))))
6688 /* Either the variable must be declared without a section attribute,
6689 or the section must be sdata or sbss. */
6690 && (DECL_SECTION_NAME (decl
) == 0
6691 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl
)),
6693 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl
)),
6696 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (decl
));
6698 /* If the variable has already been defined in the output file, then it
6699 is too late to put it in sdata if it wasn't put there in the first
6700 place. The test is here rather than above, because if it is already
6701 in sdata, then it can stay there. */
6703 if (TREE_ASM_WRITTEN (decl
))
6706 /* If this is an incomplete type with size 0, then we can't put it in
6707 sdata because it might be too big when completed. */
6709 && size
<= (HOST_WIDE_INT
) ia64_section_threshold
6710 && symbol_str
[0] != SDATA_NAME_FLAG_CHAR
)
6712 size_t len
= strlen (symbol_str
);
6713 char *newstr
= alloca (len
+ 1);
6716 *newstr
= SDATA_NAME_FLAG_CHAR
;
6717 memcpy (newstr
+ 1, symbol_str
, len
+ 1);
6719 string
= ggc_alloc_string (newstr
, len
+ 1);
6720 XSTR (XEXP (DECL_RTL (decl
), 0), 0) = string
;
6723 /* This decl is marked as being in small data/bss but it shouldn't
6724 be; one likely explanation for this is that the decl has been
6725 moved into a different section from the one it was in when
6726 ENCODE_SECTION_INFO was first called. Remove the '@'.*/
6727 else if (symbol_str
[0] == SDATA_NAME_FLAG_CHAR
)
6729 XSTR (XEXP (DECL_RTL (decl
), 0), 0)
6730 = ggc_strdup (symbol_str
+ 1);
6734 /* Output assembly directives for prologue regions. */
6736 /* The current basic block number. */
6738 static int block_num
;
6740 /* True if we need a copy_state command at the start of the next block. */
6742 static int need_copy_state
;
6744 /* The function emits unwind directives for the start of an epilogue. */
6749 /* If this isn't the last block of the function, then we need to label the
6750 current state, and copy it back in at the start of the next block. */
6752 if (block_num
!= n_basic_blocks
- 1)
6754 fprintf (asm_out_file
, "\t.label_state 1\n");
6755 need_copy_state
= 1;
6758 fprintf (asm_out_file
, "\t.restore sp\n");
6761 /* This function processes a SET pattern looking for specific patterns
6762 which result in emitting an assembly directive required for unwinding. */
6765 process_set (asm_out_file
, pat
)
6769 rtx src
= SET_SRC (pat
);
6770 rtx dest
= SET_DEST (pat
);
6771 int src_regno
, dest_regno
;
6773 /* Look for the ALLOC insn. */
6774 if (GET_CODE (src
) == UNSPEC_VOLATILE
6775 && XINT (src
, 1) == 0
6776 && GET_CODE (dest
) == REG
)
6778 dest_regno
= REGNO (dest
);
6780 /* If this isn't the final destination for ar.pfs, the alloc
6781 shouldn't have been marked frame related. */
6782 if (dest_regno
!= current_frame_info
.reg_save_ar_pfs
)
6785 fprintf (asm_out_file
, "\t.save ar.pfs, r%d\n",
6786 ia64_dbx_register_number (dest_regno
));
6790 /* Look for SP = .... */
6791 if (GET_CODE (dest
) == REG
&& REGNO (dest
) == STACK_POINTER_REGNUM
)
6793 if (GET_CODE (src
) == PLUS
)
6795 rtx op0
= XEXP (src
, 0);
6796 rtx op1
= XEXP (src
, 1);
6797 if (op0
== dest
&& GET_CODE (op1
) == CONST_INT
)
6799 if (INTVAL (op1
) < 0)
6801 fputs ("\t.fframe ", asm_out_file
);
6802 fprintf (asm_out_file
, HOST_WIDE_INT_PRINT_DEC
,
6804 fputc ('\n', asm_out_file
);
6807 process_epilogue ();
6812 else if (GET_CODE (src
) == REG
6813 && REGNO (src
) == HARD_FRAME_POINTER_REGNUM
)
6814 process_epilogue ();
6821 /* Register move we need to look at. */
6822 if (GET_CODE (dest
) == REG
&& GET_CODE (src
) == REG
)
6824 src_regno
= REGNO (src
);
6825 dest_regno
= REGNO (dest
);
6830 /* Saving return address pointer. */
6831 if (dest_regno
!= current_frame_info
.reg_save_b0
)
6833 fprintf (asm_out_file
, "\t.save rp, r%d\n",
6834 ia64_dbx_register_number (dest_regno
));
6838 if (dest_regno
!= current_frame_info
.reg_save_pr
)
6840 fprintf (asm_out_file
, "\t.save pr, r%d\n",
6841 ia64_dbx_register_number (dest_regno
));
6844 case AR_UNAT_REGNUM
:
6845 if (dest_regno
!= current_frame_info
.reg_save_ar_unat
)
6847 fprintf (asm_out_file
, "\t.save ar.unat, r%d\n",
6848 ia64_dbx_register_number (dest_regno
));
6852 if (dest_regno
!= current_frame_info
.reg_save_ar_lc
)
6854 fprintf (asm_out_file
, "\t.save ar.lc, r%d\n",
6855 ia64_dbx_register_number (dest_regno
));
6858 case STACK_POINTER_REGNUM
:
6859 if (dest_regno
!= HARD_FRAME_POINTER_REGNUM
6860 || ! frame_pointer_needed
)
6862 fprintf (asm_out_file
, "\t.vframe r%d\n",
6863 ia64_dbx_register_number (dest_regno
));
6867 /* Everything else should indicate being stored to memory. */
6872 /* Memory store we need to look at. */
6873 if (GET_CODE (dest
) == MEM
&& GET_CODE (src
) == REG
)
6879 if (GET_CODE (XEXP (dest
, 0)) == REG
)
6881 base
= XEXP (dest
, 0);
6884 else if (GET_CODE (XEXP (dest
, 0)) == PLUS
6885 && GET_CODE (XEXP (XEXP (dest
, 0), 1)) == CONST_INT
)
6887 base
= XEXP (XEXP (dest
, 0), 0);
6888 off
= INTVAL (XEXP (XEXP (dest
, 0), 1));
6893 if (base
== hard_frame_pointer_rtx
)
6895 saveop
= ".savepsp";
6898 else if (base
== stack_pointer_rtx
)
6903 src_regno
= REGNO (src
);
6907 if (current_frame_info
.reg_save_b0
!= 0)
6909 fprintf (asm_out_file
, "\t%s rp, %ld\n", saveop
, off
);
6913 if (current_frame_info
.reg_save_pr
!= 0)
6915 fprintf (asm_out_file
, "\t%s pr, %ld\n", saveop
, off
);
6919 if (current_frame_info
.reg_save_ar_lc
!= 0)
6921 fprintf (asm_out_file
, "\t%s ar.lc, %ld\n", saveop
, off
);
6925 if (current_frame_info
.reg_save_ar_pfs
!= 0)
6927 fprintf (asm_out_file
, "\t%s ar.pfs, %ld\n", saveop
, off
);
6930 case AR_UNAT_REGNUM
:
6931 if (current_frame_info
.reg_save_ar_unat
!= 0)
6933 fprintf (asm_out_file
, "\t%s ar.unat, %ld\n", saveop
, off
);
6940 fprintf (asm_out_file
, "\t.save.g 0x%x\n",
6941 1 << (src_regno
- GR_REG (4)));
6949 fprintf (asm_out_file
, "\t.save.b 0x%x\n",
6950 1 << (src_regno
- BR_REG (1)));
6957 fprintf (asm_out_file
, "\t.save.f 0x%x\n",
6958 1 << (src_regno
- FR_REG (2)));
6961 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
6962 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
6963 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
6964 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
6965 fprintf (asm_out_file
, "\t.save.gf 0x0, 0x%x\n",
6966 1 << (src_regno
- FR_REG (12)));
6978 /* This function looks at a single insn and emits any directives
6979 required to unwind this insn. */
6981 process_for_unwind_directive (asm_out_file
, insn
)
6985 if (flag_unwind_tables
6986 || (flag_exceptions
&& !USING_SJLJ_EXCEPTIONS
))
6990 if (GET_CODE (insn
) == NOTE
6991 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_BASIC_BLOCK
)
6993 block_num
= NOTE_BASIC_BLOCK (insn
)->index
;
6995 /* Restore unwind state from immediately before the epilogue. */
6996 if (need_copy_state
)
6998 fprintf (asm_out_file
, "\t.body\n");
6999 fprintf (asm_out_file
, "\t.copy_state 1\n");
7000 need_copy_state
= 0;
7004 if (! RTX_FRAME_RELATED_P (insn
))
7007 pat
= find_reg_note (insn
, REG_FRAME_RELATED_EXPR
, NULL_RTX
);
7009 pat
= XEXP (pat
, 0);
7011 pat
= PATTERN (insn
);
7013 switch (GET_CODE (pat
))
7016 process_set (asm_out_file
, pat
);
7022 int limit
= XVECLEN (pat
, 0);
7023 for (par_index
= 0; par_index
< limit
; par_index
++)
7025 rtx x
= XVECEXP (pat
, 0, par_index
);
7026 if (GET_CODE (x
) == SET
)
7027 process_set (asm_out_file
, x
);
7040 ia64_init_builtins ()
7042 tree psi_type_node
= build_pointer_type (integer_type_node
);
7043 tree pdi_type_node
= build_pointer_type (long_integer_type_node
);
7044 tree endlink
= void_list_node
;
7046 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
7047 tree si_ftype_psi_si_si
7048 = build_function_type (integer_type_node
,
7049 tree_cons (NULL_TREE
, psi_type_node
,
7050 tree_cons (NULL_TREE
, integer_type_node
,
7051 tree_cons (NULL_TREE
,
7055 /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */
7056 tree di_ftype_pdi_di_di
7057 = build_function_type (long_integer_type_node
,
7058 tree_cons (NULL_TREE
, pdi_type_node
,
7059 tree_cons (NULL_TREE
,
7060 long_integer_type_node
,
7061 tree_cons (NULL_TREE
,
7062 long_integer_type_node
,
7064 /* __sync_synchronize */
7065 tree void_ftype_void
7066 = build_function_type (void_type_node
, endlink
);
7068 /* __sync_lock_test_and_set_si */
7069 tree si_ftype_psi_si
7070 = build_function_type (integer_type_node
,
7071 tree_cons (NULL_TREE
, psi_type_node
,
7072 tree_cons (NULL_TREE
, integer_type_node
, endlink
)));
7074 /* __sync_lock_test_and_set_di */
7075 tree di_ftype_pdi_di
7076 = build_function_type (long_integer_type_node
,
7077 tree_cons (NULL_TREE
, pdi_type_node
,
7078 tree_cons (NULL_TREE
, long_integer_type_node
,
7081 /* __sync_lock_release_si */
7083 = build_function_type (void_type_node
, tree_cons (NULL_TREE
, psi_type_node
,
7086 /* __sync_lock_release_di */
7088 = build_function_type (void_type_node
, tree_cons (NULL_TREE
, pdi_type_node
,
7091 #define def_builtin(name, type, code) \
7092 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL)
7094 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si
,
7095 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI
);
7096 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di
,
7097 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI
);
7098 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si
,
7099 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI
);
7100 def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di
,
7101 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI
);
7103 def_builtin ("__sync_synchronize", void_ftype_void
,
7104 IA64_BUILTIN_SYNCHRONIZE
);
7106 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si
,
7107 IA64_BUILTIN_LOCK_TEST_AND_SET_SI
);
7108 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di
,
7109 IA64_BUILTIN_LOCK_TEST_AND_SET_DI
);
7110 def_builtin ("__sync_lock_release_si", void_ftype_psi
,
7111 IA64_BUILTIN_LOCK_RELEASE_SI
);
7112 def_builtin ("__sync_lock_release_di", void_ftype_pdi
,
7113 IA64_BUILTIN_LOCK_RELEASE_DI
);
7115 def_builtin ("__builtin_ia64_bsp",
7116 build_function_type (ptr_type_node
, endlink
),
7119 def_builtin ("__builtin_ia64_flushrs",
7120 build_function_type (void_type_node
, endlink
),
7121 IA64_BUILTIN_FLUSHRS
);
7123 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si
,
7124 IA64_BUILTIN_FETCH_AND_ADD_SI
);
7125 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si
,
7126 IA64_BUILTIN_FETCH_AND_SUB_SI
);
7127 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si
,
7128 IA64_BUILTIN_FETCH_AND_OR_SI
);
7129 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si
,
7130 IA64_BUILTIN_FETCH_AND_AND_SI
);
7131 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si
,
7132 IA64_BUILTIN_FETCH_AND_XOR_SI
);
7133 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si
,
7134 IA64_BUILTIN_FETCH_AND_NAND_SI
);
7136 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si
,
7137 IA64_BUILTIN_ADD_AND_FETCH_SI
);
7138 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si
,
7139 IA64_BUILTIN_SUB_AND_FETCH_SI
);
7140 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si
,
7141 IA64_BUILTIN_OR_AND_FETCH_SI
);
7142 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si
,
7143 IA64_BUILTIN_AND_AND_FETCH_SI
);
7144 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si
,
7145 IA64_BUILTIN_XOR_AND_FETCH_SI
);
7146 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si
,
7147 IA64_BUILTIN_NAND_AND_FETCH_SI
);
7149 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di
,
7150 IA64_BUILTIN_FETCH_AND_ADD_DI
);
7151 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di
,
7152 IA64_BUILTIN_FETCH_AND_SUB_DI
);
7153 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di
,
7154 IA64_BUILTIN_FETCH_AND_OR_DI
);
7155 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di
,
7156 IA64_BUILTIN_FETCH_AND_AND_DI
);
7157 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di
,
7158 IA64_BUILTIN_FETCH_AND_XOR_DI
);
7159 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di
,
7160 IA64_BUILTIN_FETCH_AND_NAND_DI
);
7162 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di
,
7163 IA64_BUILTIN_ADD_AND_FETCH_DI
);
7164 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di
,
7165 IA64_BUILTIN_SUB_AND_FETCH_DI
);
7166 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di
,
7167 IA64_BUILTIN_OR_AND_FETCH_DI
);
7168 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di
,
7169 IA64_BUILTIN_AND_AND_FETCH_DI
);
7170 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di
,
7171 IA64_BUILTIN_XOR_AND_FETCH_DI
);
7172 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di
,
7173 IA64_BUILTIN_NAND_AND_FETCH_DI
);
7178 /* Expand fetch_and_op intrinsics. The basic code sequence is:
7186 cmpxchgsz.acq tmp = [ptr], tmp
7187 } while (tmp != ret)
7191 ia64_expand_fetch_and_op (binoptab
, mode
, arglist
, target
)
7193 enum machine_mode mode
;
7197 rtx ret
, label
, tmp
, ccv
, insn
, mem
, value
;
7200 arg0
= TREE_VALUE (arglist
);
7201 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
7202 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
7203 value
= expand_expr (arg1
, NULL_RTX
, mode
, 0);
7205 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
7206 MEM_VOLATILE_P (mem
) = 1;
7208 if (target
&& register_operand (target
, mode
))
7211 ret
= gen_reg_rtx (mode
);
7213 emit_insn (gen_mf ());
7215 /* Special case for fetchadd instructions. */
7216 if (binoptab
== add_optab
&& fetchadd_operand (value
, VOIDmode
))
7219 insn
= gen_fetchadd_acq_si (ret
, mem
, value
);
7221 insn
= gen_fetchadd_acq_di (ret
, mem
, value
);
7226 tmp
= gen_reg_rtx (mode
);
7227 ccv
= gen_rtx_REG (mode
, AR_CCV_REGNUM
);
7228 emit_move_insn (tmp
, mem
);
7230 label
= gen_label_rtx ();
7232 emit_move_insn (ret
, tmp
);
7233 emit_move_insn (ccv
, tmp
);
7235 /* Perform the specific operation. Special case NAND by noticing
7236 one_cmpl_optab instead. */
7237 if (binoptab
== one_cmpl_optab
)
7239 tmp
= expand_unop (mode
, binoptab
, tmp
, NULL
, OPTAB_WIDEN
);
7240 binoptab
= and_optab
;
7242 tmp
= expand_binop (mode
, binoptab
, tmp
, value
, tmp
, 1, OPTAB_WIDEN
);
7245 insn
= gen_cmpxchg_acq_si (tmp
, mem
, tmp
, ccv
);
7247 insn
= gen_cmpxchg_acq_di (tmp
, mem
, tmp
, ccv
);
7250 emit_cmp_and_jump_insns (tmp
, ret
, NE
, 0, mode
, 1, 0, label
);
7255 /* Expand op_and_fetch intrinsics. The basic code sequence is:
7263 cmpxchgsz.acq tmp = [ptr], ret
7264 } while (tmp != old)
7268 ia64_expand_op_and_fetch (binoptab
, mode
, arglist
, target
)
7270 enum machine_mode mode
;
7274 rtx old
, label
, tmp
, ret
, ccv
, insn
, mem
, value
;
7277 arg0
= TREE_VALUE (arglist
);
7278 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
7279 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
7280 value
= expand_expr (arg1
, NULL_RTX
, mode
, 0);
7282 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
7283 MEM_VOLATILE_P (mem
) = 1;
7285 if (target
&& ! register_operand (target
, mode
))
7288 emit_insn (gen_mf ());
7289 tmp
= gen_reg_rtx (mode
);
7290 old
= gen_reg_rtx (mode
);
7291 ccv
= gen_rtx_REG (mode
, AR_CCV_REGNUM
);
7293 emit_move_insn (tmp
, mem
);
7295 label
= gen_label_rtx ();
7297 emit_move_insn (old
, tmp
);
7298 emit_move_insn (ccv
, tmp
);
7300 /* Perform the specific operation. Special case NAND by noticing
7301 one_cmpl_optab instead. */
7302 if (binoptab
== one_cmpl_optab
)
7304 tmp
= expand_unop (mode
, binoptab
, tmp
, NULL
, OPTAB_WIDEN
);
7305 binoptab
= and_optab
;
7307 ret
= expand_binop (mode
, binoptab
, tmp
, value
, target
, 1, OPTAB_WIDEN
);
7310 insn
= gen_cmpxchg_acq_si (tmp
, mem
, ret
, ccv
);
7312 insn
= gen_cmpxchg_acq_di (tmp
, mem
, ret
, ccv
);
7315 emit_cmp_and_jump_insns (tmp
, old
, NE
, 0, mode
, 1, 0, label
);
7320 /* Expand val_ and bool_compare_and_swap. For val_ we want:
7324 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
7327 For bool_ it's the same except return ret == oldval.
7331 ia64_expand_compare_and_swap (mode
, boolp
, arglist
, target
)
7332 enum machine_mode mode
;
7337 tree arg0
, arg1
, arg2
;
7338 rtx mem
, old
, new, ccv
, tmp
, insn
;
7340 arg0
= TREE_VALUE (arglist
);
7341 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
7342 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
7343 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
7344 old
= expand_expr (arg1
, NULL_RTX
, mode
, 0);
7345 new = expand_expr (arg2
, NULL_RTX
, mode
, 0);
7347 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
7348 MEM_VOLATILE_P (mem
) = 1;
7350 if (! register_operand (old
, mode
))
7351 old
= copy_to_mode_reg (mode
, old
);
7352 if (! register_operand (new, mode
))
7353 new = copy_to_mode_reg (mode
, new);
7355 if (! boolp
&& target
&& register_operand (target
, mode
))
7358 tmp
= gen_reg_rtx (mode
);
7360 ccv
= gen_rtx_REG (mode
, AR_CCV_REGNUM
);
7361 emit_move_insn (ccv
, old
);
7362 emit_insn (gen_mf ());
7364 insn
= gen_cmpxchg_acq_si (tmp
, mem
, new, ccv
);
7366 insn
= gen_cmpxchg_acq_di (tmp
, mem
, new, ccv
);
7372 target
= gen_reg_rtx (mode
);
7373 return emit_store_flag_force (target
, EQ
, tmp
, old
, mode
, 1, 1);
7379 /* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
7382 ia64_expand_lock_test_and_set (mode
, arglist
, target
)
7383 enum machine_mode mode
;
7388 rtx mem
, new, ret
, insn
;
7390 arg0
= TREE_VALUE (arglist
);
7391 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
7392 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
7393 new = expand_expr (arg1
, NULL_RTX
, mode
, 0);
7395 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
7396 MEM_VOLATILE_P (mem
) = 1;
7397 if (! register_operand (new, mode
))
7398 new = copy_to_mode_reg (mode
, new);
7400 if (target
&& register_operand (target
, mode
))
7403 ret
= gen_reg_rtx (mode
);
7406 insn
= gen_xchgsi (ret
, mem
, new);
7408 insn
= gen_xchgdi (ret
, mem
, new);
7414 /* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
7417 ia64_expand_lock_release (mode
, arglist
, target
)
7418 enum machine_mode mode
;
7420 rtx target ATTRIBUTE_UNUSED
;
7425 arg0
= TREE_VALUE (arglist
);
7426 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
7428 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
7429 MEM_VOLATILE_P (mem
) = 1;
7431 emit_move_insn (mem
, const0_rtx
);
7437 ia64_expand_builtin (exp
, target
, subtarget
, mode
, ignore
)
7440 rtx subtarget ATTRIBUTE_UNUSED
;
7441 enum machine_mode mode ATTRIBUTE_UNUSED
;
7442 int ignore ATTRIBUTE_UNUSED
;
7444 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
7445 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
7446 tree arglist
= TREE_OPERAND (exp
, 1);
7450 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI
:
7451 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI
:
7452 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI
:
7453 case IA64_BUILTIN_LOCK_RELEASE_SI
:
7454 case IA64_BUILTIN_FETCH_AND_ADD_SI
:
7455 case IA64_BUILTIN_FETCH_AND_SUB_SI
:
7456 case IA64_BUILTIN_FETCH_AND_OR_SI
:
7457 case IA64_BUILTIN_FETCH_AND_AND_SI
:
7458 case IA64_BUILTIN_FETCH_AND_XOR_SI
:
7459 case IA64_BUILTIN_FETCH_AND_NAND_SI
:
7460 case IA64_BUILTIN_ADD_AND_FETCH_SI
:
7461 case IA64_BUILTIN_SUB_AND_FETCH_SI
:
7462 case IA64_BUILTIN_OR_AND_FETCH_SI
:
7463 case IA64_BUILTIN_AND_AND_FETCH_SI
:
7464 case IA64_BUILTIN_XOR_AND_FETCH_SI
:
7465 case IA64_BUILTIN_NAND_AND_FETCH_SI
:
7469 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI
:
7470 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI
:
7471 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI
:
7472 case IA64_BUILTIN_LOCK_RELEASE_DI
:
7473 case IA64_BUILTIN_FETCH_AND_ADD_DI
:
7474 case IA64_BUILTIN_FETCH_AND_SUB_DI
:
7475 case IA64_BUILTIN_FETCH_AND_OR_DI
:
7476 case IA64_BUILTIN_FETCH_AND_AND_DI
:
7477 case IA64_BUILTIN_FETCH_AND_XOR_DI
:
7478 case IA64_BUILTIN_FETCH_AND_NAND_DI
:
7479 case IA64_BUILTIN_ADD_AND_FETCH_DI
:
7480 case IA64_BUILTIN_SUB_AND_FETCH_DI
:
7481 case IA64_BUILTIN_OR_AND_FETCH_DI
:
7482 case IA64_BUILTIN_AND_AND_FETCH_DI
:
7483 case IA64_BUILTIN_XOR_AND_FETCH_DI
:
7484 case IA64_BUILTIN_NAND_AND_FETCH_DI
:
7494 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI
:
7495 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI
:
7496 return ia64_expand_compare_and_swap (mode
, 1, arglist
, target
);
7498 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI
:
7499 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI
:
7500 return ia64_expand_compare_and_swap (mode
, 0, arglist
, target
);
7502 case IA64_BUILTIN_SYNCHRONIZE
:
7503 emit_insn (gen_mf ());
7506 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI
:
7507 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI
:
7508 return ia64_expand_lock_test_and_set (mode
, arglist
, target
);
7510 case IA64_BUILTIN_LOCK_RELEASE_SI
:
7511 case IA64_BUILTIN_LOCK_RELEASE_DI
:
7512 return ia64_expand_lock_release (mode
, arglist
, target
);
7514 case IA64_BUILTIN_BSP
:
7515 if (! target
|| ! register_operand (target
, DImode
))
7516 target
= gen_reg_rtx (DImode
);
7517 emit_insn (gen_bsp_value (target
));
7520 case IA64_BUILTIN_FLUSHRS
:
7521 emit_insn (gen_flushrs ());
7524 case IA64_BUILTIN_FETCH_AND_ADD_SI
:
7525 case IA64_BUILTIN_FETCH_AND_ADD_DI
:
7526 return ia64_expand_fetch_and_op (add_optab
, mode
, arglist
, target
);
7528 case IA64_BUILTIN_FETCH_AND_SUB_SI
:
7529 case IA64_BUILTIN_FETCH_AND_SUB_DI
:
7530 return ia64_expand_fetch_and_op (sub_optab
, mode
, arglist
, target
);
7532 case IA64_BUILTIN_FETCH_AND_OR_SI
:
7533 case IA64_BUILTIN_FETCH_AND_OR_DI
:
7534 return ia64_expand_fetch_and_op (ior_optab
, mode
, arglist
, target
);
7536 case IA64_BUILTIN_FETCH_AND_AND_SI
:
7537 case IA64_BUILTIN_FETCH_AND_AND_DI
:
7538 return ia64_expand_fetch_and_op (and_optab
, mode
, arglist
, target
);
7540 case IA64_BUILTIN_FETCH_AND_XOR_SI
:
7541 case IA64_BUILTIN_FETCH_AND_XOR_DI
:
7542 return ia64_expand_fetch_and_op (xor_optab
, mode
, arglist
, target
);
7544 case IA64_BUILTIN_FETCH_AND_NAND_SI
:
7545 case IA64_BUILTIN_FETCH_AND_NAND_DI
:
7546 return ia64_expand_fetch_and_op (one_cmpl_optab
, mode
, arglist
, target
);
7548 case IA64_BUILTIN_ADD_AND_FETCH_SI
:
7549 case IA64_BUILTIN_ADD_AND_FETCH_DI
:
7550 return ia64_expand_op_and_fetch (add_optab
, mode
, arglist
, target
);
7552 case IA64_BUILTIN_SUB_AND_FETCH_SI
:
7553 case IA64_BUILTIN_SUB_AND_FETCH_DI
:
7554 return ia64_expand_op_and_fetch (sub_optab
, mode
, arglist
, target
);
7556 case IA64_BUILTIN_OR_AND_FETCH_SI
:
7557 case IA64_BUILTIN_OR_AND_FETCH_DI
:
7558 return ia64_expand_op_and_fetch (ior_optab
, mode
, arglist
, target
);
7560 case IA64_BUILTIN_AND_AND_FETCH_SI
:
7561 case IA64_BUILTIN_AND_AND_FETCH_DI
:
7562 return ia64_expand_op_and_fetch (and_optab
, mode
, arglist
, target
);
7564 case IA64_BUILTIN_XOR_AND_FETCH_SI
:
7565 case IA64_BUILTIN_XOR_AND_FETCH_DI
:
7566 return ia64_expand_op_and_fetch (xor_optab
, mode
, arglist
, target
);
7568 case IA64_BUILTIN_NAND_AND_FETCH_SI
:
7569 case IA64_BUILTIN_NAND_AND_FETCH_DI
:
7570 return ia64_expand_op_and_fetch (one_cmpl_optab
, mode
, arglist
, target
);