fe74c72d08e65d222cde1c2771081a59499bfad3
[gcc.git] / gcc / config / ia64 / ia64.c
1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
5
6 This file is part of GNU CC.
7
8 GNU CC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
12
13 GNU CC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "tm_p.h"
28 #include "regs.h"
29 #include "hard-reg-set.h"
30 #include "real.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-flags.h"
34 #include "output.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "recog.h"
38 #include "expr.h"
39 #include "obstack.h"
40 #include "except.h"
41 #include "function.h"
42 #include "ggc.h"
43 #include "basic-block.h"
44 #include "toplev.h"
45
46 /* This is used for communication between ASM_OUTPUT_LABEL and
47 ASM_OUTPUT_LABELREF. */
48 int ia64_asm_output_label = 0;
49
50 /* Define the information needed to generate branch and scc insns. This is
51 stored from the compare operation. */
52 struct rtx_def * ia64_compare_op0;
53 struct rtx_def * ia64_compare_op1;
54
55 /* Register names for ia64_expand_prologue. */
56 static const char * const ia64_reg_numbers[96] =
57 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
58 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
59 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
60 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
61 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
62 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
63 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
64 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
65 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
66 "r104","r105","r106","r107","r108","r109","r110","r111",
67 "r112","r113","r114","r115","r116","r117","r118","r119",
68 "r120","r121","r122","r123","r124","r125","r126","r127"};
69
70 /* ??? These strings could be shared with REGISTER_NAMES. */
71 static const char * const ia64_input_reg_names[8] =
72 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
73
74 /* ??? These strings could be shared with REGISTER_NAMES. */
75 static const char * const ia64_local_reg_names[80] =
76 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
77 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
78 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
79 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
80 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
81 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
82 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
83 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
84 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
85 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
86
87 /* ??? These strings could be shared with REGISTER_NAMES. */
88 static const char * const ia64_output_reg_names[8] =
89 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
90
91 /* String used with the -mfixed-range= option. */
92 const char *ia64_fixed_range_string;
93
94 /* Variables which are this size or smaller are put in the sdata/sbss
95 sections. */
96
97 unsigned int ia64_section_threshold;
98 \f
99 static int find_gr_spill PARAMS ((int));
100 static int next_scratch_gr_reg PARAMS ((void));
101 static void mark_reg_gr_used_mask PARAMS ((rtx, void *));
102 static void ia64_compute_frame_size PARAMS ((HOST_WIDE_INT));
103 static void setup_spill_pointers PARAMS ((int, rtx, HOST_WIDE_INT));
104 static void finish_spill_pointers PARAMS ((void));
105 static rtx spill_restore_mem PARAMS ((rtx, HOST_WIDE_INT));
106 static void do_spill PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx));
107 static void do_restore PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT));
108 static rtx gen_movdi_x PARAMS ((rtx, rtx, rtx));
109 static rtx gen_fr_spill_x PARAMS ((rtx, rtx, rtx));
110 static rtx gen_fr_restore_x PARAMS ((rtx, rtx, rtx));
111
112 static enum machine_mode hfa_element_mode PARAMS ((tree, int));
113 static void fix_range PARAMS ((const char *));
114 static void ia64_add_gc_roots PARAMS ((void));
115 static void ia64_init_machine_status PARAMS ((struct function *));
116 static void ia64_mark_machine_status PARAMS ((struct function *));
117 static void emit_insn_group_barriers PARAMS ((rtx));
118 static void emit_predicate_relation_info PARAMS ((void));
119 static int process_set PARAMS ((FILE *, rtx));
120
121 static rtx ia64_expand_fetch_and_op PARAMS ((optab, enum machine_mode,
122 tree, rtx));
123 static rtx ia64_expand_op_and_fetch PARAMS ((optab, enum machine_mode,
124 tree, rtx));
125 static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode, int,
126 tree, rtx));
127 static rtx ia64_expand_lock_test_and_set PARAMS ((enum machine_mode,
128 tree, rtx));
129 static rtx ia64_expand_lock_release PARAMS ((enum machine_mode, tree, rtx));
130
131 \f
132 /* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
133
134 int
135 call_operand (op, mode)
136 rtx op;
137 enum machine_mode mode;
138 {
139 if (mode != GET_MODE (op))
140 return 0;
141
142 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
143 || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
144 }
145
146 /* Return 1 if OP refers to a symbol in the sdata section. */
147
148 int
149 sdata_symbolic_operand (op, mode)
150 rtx op;
151 enum machine_mode mode ATTRIBUTE_UNUSED;
152 {
153 switch (GET_CODE (op))
154 {
155 case CONST:
156 if (GET_CODE (XEXP (op, 0)) != PLUS
157 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF)
158 break;
159 op = XEXP (XEXP (op, 0), 0);
160 /* FALLTHRU */
161
162 case SYMBOL_REF:
163 if (CONSTANT_POOL_ADDRESS_P (op))
164 return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold;
165 else
166 return XSTR (op, 0)[0] == SDATA_NAME_FLAG_CHAR;
167
168 default:
169 break;
170 }
171
172 return 0;
173 }
174
175 /* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
176
177 int
178 got_symbolic_operand (op, mode)
179 rtx op;
180 enum machine_mode mode ATTRIBUTE_UNUSED;
181 {
182 switch (GET_CODE (op))
183 {
184 case CONST:
185 op = XEXP (op, 0);
186 if (GET_CODE (op) != PLUS)
187 return 0;
188 if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
189 return 0;
190 op = XEXP (op, 1);
191 if (GET_CODE (op) != CONST_INT)
192 return 0;
193
194 return 1;
195
196 /* Ok if we're not using GOT entries at all. */
197 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
198 return 1;
199
200 /* "Ok" while emitting rtl, since otherwise we won't be provided
201 with the entire offset during emission, which makes it very
202 hard to split the offset into high and low parts. */
203 if (rtx_equal_function_value_matters)
204 return 1;
205
206 /* Force the low 14 bits of the constant to zero so that we do not
207 use up so many GOT entries. */
208 return (INTVAL (op) & 0x3fff) == 0;
209
210 case SYMBOL_REF:
211 case LABEL_REF:
212 return 1;
213
214 default:
215 break;
216 }
217 return 0;
218 }
219
220 /* Return 1 if OP refers to a symbol. */
221
222 int
223 symbolic_operand (op, mode)
224 rtx op;
225 enum machine_mode mode ATTRIBUTE_UNUSED;
226 {
227 switch (GET_CODE (op))
228 {
229 case CONST:
230 case SYMBOL_REF:
231 case LABEL_REF:
232 return 1;
233
234 default:
235 break;
236 }
237 return 0;
238 }
239
240 /* Return 1 if OP refers to a function. */
241
242 int
243 function_operand (op, mode)
244 rtx op;
245 enum machine_mode mode ATTRIBUTE_UNUSED;
246 {
247 if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FLAG (op))
248 return 1;
249 else
250 return 0;
251 }
252
253 /* Return 1 if OP is setjmp or a similar function. */
254
255 /* ??? This is an unsatisfying solution. Should rethink. */
256
257 int
258 setjmp_operand (op, mode)
259 rtx op;
260 enum machine_mode mode ATTRIBUTE_UNUSED;
261 {
262 const char *name;
263 int retval = 0;
264
265 if (GET_CODE (op) != SYMBOL_REF)
266 return 0;
267
268 name = XSTR (op, 0);
269
270 /* The following code is borrowed from special_function_p in calls.c. */
271
272 /* Disregard prefix _, __ or __x. */
273 if (name[0] == '_')
274 {
275 if (name[1] == '_' && name[2] == 'x')
276 name += 3;
277 else if (name[1] == '_')
278 name += 2;
279 else
280 name += 1;
281 }
282
283 if (name[0] == 's')
284 {
285 retval
286 = ((name[1] == 'e'
287 && (! strcmp (name, "setjmp")
288 || ! strcmp (name, "setjmp_syscall")))
289 || (name[1] == 'i'
290 && ! strcmp (name, "sigsetjmp"))
291 || (name[1] == 'a'
292 && ! strcmp (name, "savectx")));
293 }
294 else if ((name[0] == 'q' && name[1] == 's'
295 && ! strcmp (name, "qsetjmp"))
296 || (name[0] == 'v' && name[1] == 'f'
297 && ! strcmp (name, "vfork")))
298 retval = 1;
299
300 return retval;
301 }
302
303 /* Return 1 if OP is a general operand, but when pic exclude symbolic
304 operands. */
305
306 /* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF
307 from PREDICATE_CODES. */
308
309 int
310 move_operand (op, mode)
311 rtx op;
312 enum machine_mode mode;
313 {
314 if (! TARGET_NO_PIC && symbolic_operand (op, mode))
315 return 0;
316
317 return general_operand (op, mode);
318 }
319
320 /* Return 1 if OP is a register operand that is (or could be) a GR reg. */
321
322 int
323 gr_register_operand (op, mode)
324 rtx op;
325 enum machine_mode mode;
326 {
327 if (! register_operand (op, mode))
328 return 0;
329 if (GET_CODE (op) == SUBREG)
330 op = SUBREG_REG (op);
331 if (GET_CODE (op) == REG)
332 {
333 unsigned int regno = REGNO (op);
334 if (regno < FIRST_PSEUDO_REGISTER)
335 return GENERAL_REGNO_P (regno);
336 }
337 return 1;
338 }
339
340 /* Return 1 if OP is a register operand that is (or could be) an FR reg. */
341
342 int
343 fr_register_operand (op, mode)
344 rtx op;
345 enum machine_mode mode;
346 {
347 if (! register_operand (op, mode))
348 return 0;
349 if (GET_CODE (op) == SUBREG)
350 op = SUBREG_REG (op);
351 if (GET_CODE (op) == REG)
352 {
353 unsigned int regno = REGNO (op);
354 if (regno < FIRST_PSEUDO_REGISTER)
355 return FR_REGNO_P (regno);
356 }
357 return 1;
358 }
359
360 /* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */
361
362 int
363 grfr_register_operand (op, mode)
364 rtx op;
365 enum machine_mode mode;
366 {
367 if (! register_operand (op, mode))
368 return 0;
369 if (GET_CODE (op) == SUBREG)
370 op = SUBREG_REG (op);
371 if (GET_CODE (op) == REG)
372 {
373 unsigned int regno = REGNO (op);
374 if (regno < FIRST_PSEUDO_REGISTER)
375 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
376 }
377 return 1;
378 }
379
380 /* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */
381
382 int
383 gr_nonimmediate_operand (op, mode)
384 rtx op;
385 enum machine_mode mode;
386 {
387 if (! nonimmediate_operand (op, mode))
388 return 0;
389 if (GET_CODE (op) == SUBREG)
390 op = SUBREG_REG (op);
391 if (GET_CODE (op) == REG)
392 {
393 unsigned int regno = REGNO (op);
394 if (regno < FIRST_PSEUDO_REGISTER)
395 return GENERAL_REGNO_P (regno);
396 }
397 return 1;
398 }
399
400 /* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
401
402 int
403 fr_nonimmediate_operand (op, mode)
404 rtx op;
405 enum machine_mode mode;
406 {
407 if (! nonimmediate_operand (op, mode))
408 return 0;
409 if (GET_CODE (op) == SUBREG)
410 op = SUBREG_REG (op);
411 if (GET_CODE (op) == REG)
412 {
413 unsigned int regno = REGNO (op);
414 if (regno < FIRST_PSEUDO_REGISTER)
415 return FR_REGNO_P (regno);
416 }
417 return 1;
418 }
419
420 /* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
421
422 int
423 grfr_nonimmediate_operand (op, mode)
424 rtx op;
425 enum machine_mode mode;
426 {
427 if (! nonimmediate_operand (op, mode))
428 return 0;
429 if (GET_CODE (op) == SUBREG)
430 op = SUBREG_REG (op);
431 if (GET_CODE (op) == REG)
432 {
433 unsigned int regno = REGNO (op);
434 if (regno < FIRST_PSEUDO_REGISTER)
435 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
436 }
437 return 1;
438 }
439
440 /* Return 1 if OP is a GR register operand, or zero. */
441
442 int
443 gr_reg_or_0_operand (op, mode)
444 rtx op;
445 enum machine_mode mode;
446 {
447 return (op == const0_rtx || gr_register_operand (op, mode));
448 }
449
450 /* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */
451
452 int
453 gr_reg_or_5bit_operand (op, mode)
454 rtx op;
455 enum machine_mode mode;
456 {
457 return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32)
458 || GET_CODE (op) == CONSTANT_P_RTX
459 || gr_register_operand (op, mode));
460 }
461
462 /* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */
463
464 int
465 gr_reg_or_6bit_operand (op, mode)
466 rtx op;
467 enum machine_mode mode;
468 {
469 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
470 || GET_CODE (op) == CONSTANT_P_RTX
471 || gr_register_operand (op, mode));
472 }
473
474 /* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */
475
476 int
477 gr_reg_or_8bit_operand (op, mode)
478 rtx op;
479 enum machine_mode mode;
480 {
481 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
482 || GET_CODE (op) == CONSTANT_P_RTX
483 || gr_register_operand (op, mode));
484 }
485
486 /* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */
487
488 int
489 grfr_reg_or_8bit_operand (op, mode)
490 rtx op;
491 enum machine_mode mode;
492 {
493 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
494 || GET_CODE (op) == CONSTANT_P_RTX
495 || grfr_register_operand (op, mode));
496 }
497
498 /* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
499 operand. */
500
501 int
502 gr_reg_or_8bit_adjusted_operand (op, mode)
503 rtx op;
504 enum machine_mode mode;
505 {
506 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
507 || GET_CODE (op) == CONSTANT_P_RTX
508 || gr_register_operand (op, mode));
509 }
510
511 /* Return 1 if OP is a register operand, or is valid for both an 8 bit
512 immediate and an 8 bit adjusted immediate operand. This is necessary
513 because when we emit a compare, we don't know what the condition will be,
514 so we need the union of the immediates accepted by GT and LT. */
515
516 int
517 gr_reg_or_8bit_and_adjusted_operand (op, mode)
518 rtx op;
519 enum machine_mode mode;
520 {
521 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
522 && CONST_OK_FOR_L (INTVAL (op)))
523 || GET_CODE (op) == CONSTANT_P_RTX
524 || gr_register_operand (op, mode));
525 }
526
527 /* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
528
529 int
530 gr_reg_or_14bit_operand (op, mode)
531 rtx op;
532 enum machine_mode mode;
533 {
534 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
535 || GET_CODE (op) == CONSTANT_P_RTX
536 || gr_register_operand (op, mode));
537 }
538
539 /* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
540
541 int
542 gr_reg_or_22bit_operand (op, mode)
543 rtx op;
544 enum machine_mode mode;
545 {
546 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
547 || GET_CODE (op) == CONSTANT_P_RTX
548 || gr_register_operand (op, mode));
549 }
550
551 /* Return 1 if OP is a 6 bit immediate operand. */
552
553 int
554 shift_count_operand (op, mode)
555 rtx op;
556 enum machine_mode mode ATTRIBUTE_UNUSED;
557 {
558 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
559 || GET_CODE (op) == CONSTANT_P_RTX);
560 }
561
562 /* Return 1 if OP is a 5 bit immediate operand. */
563
564 int
565 shift_32bit_count_operand (op, mode)
566 rtx op;
567 enum machine_mode mode ATTRIBUTE_UNUSED;
568 {
569 return ((GET_CODE (op) == CONST_INT
570 && (INTVAL (op) >= 0 && INTVAL (op) < 32))
571 || GET_CODE (op) == CONSTANT_P_RTX);
572 }
573
574 /* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
575
576 int
577 shladd_operand (op, mode)
578 rtx op;
579 enum machine_mode mode ATTRIBUTE_UNUSED;
580 {
581 return (GET_CODE (op) == CONST_INT
582 && (INTVAL (op) == 2 || INTVAL (op) == 4
583 || INTVAL (op) == 8 || INTVAL (op) == 16));
584 }
585
586 /* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
587
588 int
589 fetchadd_operand (op, mode)
590 rtx op;
591 enum machine_mode mode ATTRIBUTE_UNUSED;
592 {
593 return (GET_CODE (op) == CONST_INT
594 && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
595 INTVAL (op) == -4 || INTVAL (op) == -1 ||
596 INTVAL (op) == 1 || INTVAL (op) == 4 ||
597 INTVAL (op) == 8 || INTVAL (op) == 16));
598 }
599
600 /* Return 1 if OP is a floating-point constant zero, one, or a register. */
601
602 int
603 fr_reg_or_fp01_operand (op, mode)
604 rtx op;
605 enum machine_mode mode;
606 {
607 return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
608 || fr_register_operand (op, mode));
609 }
610
611 /* Like nonimmediate_operand, but don't allow MEMs that try to use a
612 POST_MODIFY with a REG as displacement. */
613
614 int
615 destination_operand (op, mode)
616 rtx op;
617 enum machine_mode mode;
618 {
619 if (! nonimmediate_operand (op, mode))
620 return 0;
621 if (GET_CODE (op) == MEM
622 && GET_CODE (XEXP (op, 0)) == POST_MODIFY
623 && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG)
624 return 0;
625 return 1;
626 }
627
628 /* Like memory_operand, but don't allow post-increments. */
629
630 int
631 not_postinc_memory_operand (op, mode)
632 rtx op;
633 enum machine_mode mode;
634 {
635 return (memory_operand (op, mode)
636 && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != 'a');
637 }
638
639 /* Return 1 if this is a comparison operator, which accepts an normal 8-bit
640 signed immediate operand. */
641
642 int
643 normal_comparison_operator (op, mode)
644 register rtx op;
645 enum machine_mode mode;
646 {
647 enum rtx_code code = GET_CODE (op);
648 return ((mode == VOIDmode || GET_MODE (op) == mode)
649 && (code == EQ || code == NE
650 || code == GT || code == LE || code == GTU || code == LEU));
651 }
652
653 /* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
654 signed immediate operand. */
655
656 int
657 adjusted_comparison_operator (op, mode)
658 register rtx op;
659 enum machine_mode mode;
660 {
661 enum rtx_code code = GET_CODE (op);
662 return ((mode == VOIDmode || GET_MODE (op) == mode)
663 && (code == LT || code == GE || code == LTU || code == GEU));
664 }
665
666 /* Return 1 if this is a signed inequality operator. */
667
668 int
669 signed_inequality_operator (op, mode)
670 register rtx op;
671 enum machine_mode mode;
672 {
673 enum rtx_code code = GET_CODE (op);
674 return ((mode == VOIDmode || GET_MODE (op) == mode)
675 && (code == GE || code == GT
676 || code == LE || code == LT));
677 }
678
679 /* Return 1 if this operator is valid for predication. */
680
681 int
682 predicate_operator (op, mode)
683 register rtx op;
684 enum machine_mode mode;
685 {
686 enum rtx_code code = GET_CODE (op);
687 return ((GET_MODE (op) == mode || mode == VOIDmode)
688 && (code == EQ || code == NE));
689 }
690
691 /* Return 1 if this is the ar.lc register. */
692
693 int
694 ar_lc_reg_operand (op, mode)
695 register rtx op;
696 enum machine_mode mode;
697 {
698 return (GET_MODE (op) == DImode
699 && (mode == DImode || mode == VOIDmode)
700 && GET_CODE (op) == REG
701 && REGNO (op) == AR_LC_REGNUM);
702 }
703
704 /* Return 1 if this is the ar.ccv register. */
705
706 int
707 ar_ccv_reg_operand (op, mode)
708 register rtx op;
709 enum machine_mode mode;
710 {
711 return ((GET_MODE (op) == mode || mode == VOIDmode)
712 && GET_CODE (op) == REG
713 && REGNO (op) == AR_CCV_REGNUM);
714 }
715
716 /* Like general_operand, but don't allow (mem (addressof)). */
717
718 int
719 general_tfmode_operand (op, mode)
720 rtx op;
721 enum machine_mode mode;
722 {
723 if (! general_operand (op, mode))
724 return 0;
725 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
726 return 0;
727 return 1;
728 }
729
730 /* Similarly. */
731
732 int
733 destination_tfmode_operand (op, mode)
734 rtx op;
735 enum machine_mode mode;
736 {
737 if (! destination_operand (op, mode))
738 return 0;
739 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
740 return 0;
741 return 1;
742 }
743
744 /* Similarly. */
745
746 int
747 tfreg_or_fp01_operand (op, mode)
748 rtx op;
749 enum machine_mode mode;
750 {
751 if (GET_CODE (op) == SUBREG)
752 return 0;
753 return fr_reg_or_fp01_operand (op, mode);
754 }
755 \f
756 /* Return 1 if the operands of a move are ok. */
757
758 int
759 ia64_move_ok (dst, src)
760 rtx dst, src;
761 {
762 /* If we're under init_recog_no_volatile, we'll not be able to use
763 memory_operand. So check the code directly and don't worry about
764 the validity of the underlying address, which should have been
765 checked elsewhere anyway. */
766 if (GET_CODE (dst) != MEM)
767 return 1;
768 if (GET_CODE (src) == MEM)
769 return 0;
770 if (register_operand (src, VOIDmode))
771 return 1;
772
773 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
774 if (INTEGRAL_MODE_P (GET_MODE (dst)))
775 return src == const0_rtx;
776 else
777 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
778 }
779
780 /* Check if OP is a mask suitible for use with SHIFT in a dep.z instruction.
781 Return the length of the field, or <= 0 on failure. */
782
783 int
784 ia64_depz_field_mask (rop, rshift)
785 rtx rop, rshift;
786 {
787 unsigned HOST_WIDE_INT op = INTVAL (rop);
788 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
789
790 /* Get rid of the zero bits we're shifting in. */
791 op >>= shift;
792
793 /* We must now have a solid block of 1's at bit 0. */
794 return exact_log2 (op + 1);
795 }
796
797 /* Expand a symbolic constant load. */
798 /* ??? Should generalize this, so that we can also support 32 bit pointers. */
799
800 void
801 ia64_expand_load_address (dest, src)
802 rtx dest, src;
803 {
804 rtx temp;
805
806 /* The destination could be a MEM during initial rtl generation,
807 which isn't a valid destination for the PIC load address patterns. */
808 if (! register_operand (dest, DImode))
809 temp = gen_reg_rtx (DImode);
810 else
811 temp = dest;
812
813 if (TARGET_AUTO_PIC)
814 emit_insn (gen_load_gprel64 (temp, src));
815 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FLAG (src))
816 emit_insn (gen_load_fptr (temp, src));
817 else if (sdata_symbolic_operand (src, DImode))
818 emit_insn (gen_load_gprel (temp, src));
819 else if (GET_CODE (src) == CONST
820 && GET_CODE (XEXP (src, 0)) == PLUS
821 && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
822 && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0)
823 {
824 rtx subtarget = no_new_pseudos ? temp : gen_reg_rtx (DImode);
825 rtx sym = XEXP (XEXP (src, 0), 0);
826 HOST_WIDE_INT ofs, hi, lo;
827
828 /* Split the offset into a sign extended 14-bit low part
829 and a complementary high part. */
830 ofs = INTVAL (XEXP (XEXP (src, 0), 1));
831 lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
832 hi = ofs - lo;
833
834 emit_insn (gen_load_symptr (subtarget, plus_constant (sym, hi)));
835 emit_insn (gen_adddi3 (temp, subtarget, GEN_INT (lo)));
836 }
837 else
838 emit_insn (gen_load_symptr (temp, src));
839
840 if (temp != dest)
841 emit_move_insn (dest, temp);
842 }
843
844 rtx
845 ia64_gp_save_reg (setjmp_p)
846 int setjmp_p;
847 {
848 rtx save = cfun->machine->ia64_gp_save;
849
850 if (save != NULL)
851 {
852 /* We can't save GP in a pseudo if we are calling setjmp, because
853 pseudos won't be restored by longjmp. For now, we save it in r4. */
854 /* ??? It would be more efficient to save this directly into a stack
855 slot. Unfortunately, the stack slot address gets cse'd across
856 the setjmp call because the NOTE_INSN_SETJMP note is in the wrong
857 place. */
858
859 /* ??? Get the barf bag, Virginia. We've got to replace this thing
860 in place, since this rtx is used in exception handling receivers.
861 Moreover, we must get this rtx out of regno_reg_rtx or reload
862 will do the wrong thing. */
863 unsigned int old_regno = REGNO (save);
864 if (setjmp_p && old_regno != GR_REG (4))
865 {
866 REGNO (save) = GR_REG (4);
867 regno_reg_rtx[old_regno] = gen_rtx_raw_REG (DImode, old_regno);
868 }
869 }
870 else
871 {
872 if (setjmp_p)
873 save = gen_rtx_REG (DImode, GR_REG (4));
874 else if (! optimize)
875 save = gen_rtx_REG (DImode, LOC_REG (0));
876 else
877 save = gen_reg_rtx (DImode);
878 cfun->machine->ia64_gp_save = save;
879 }
880
881 return save;
882 }
883
884 /* Split a post-reload TImode reference into two DImode components. */
885
886 rtx
887 ia64_split_timode (out, in, scratch)
888 rtx out[2];
889 rtx in, scratch;
890 {
891 switch (GET_CODE (in))
892 {
893 case REG:
894 out[0] = gen_rtx_REG (DImode, REGNO (in));
895 out[1] = gen_rtx_REG (DImode, REGNO (in) + 1);
896 return NULL_RTX;
897
898 case MEM:
899 {
900 rtx base = XEXP (in, 0);
901
902 switch (GET_CODE (base))
903 {
904 case REG:
905 out[0] = change_address (in, DImode, NULL_RTX);
906 break;
907 case POST_MODIFY:
908 base = XEXP (base, 0);
909 out[0] = change_address (in, DImode, NULL_RTX);
910 break;
911
912 /* Since we're changing the mode, we need to change to POST_MODIFY
913 as well to preserve the size of the increment. Either that or
914 do the update in two steps, but we've already got this scratch
915 register handy so let's use it. */
916 case POST_INC:
917 base = XEXP (base, 0);
918 out[0] = change_address (in, DImode,
919 gen_rtx_POST_MODIFY (Pmode, base,plus_constant (base, 16)));
920 break;
921 case POST_DEC:
922 base = XEXP (base, 0);
923 out[0] = change_address (in, DImode,
924 gen_rtx_POST_MODIFY (Pmode, base,plus_constant (base, -16)));
925 break;
926 default:
927 abort ();
928 }
929
930 if (scratch == NULL_RTX)
931 abort ();
932 out[1] = change_address (in, DImode, scratch);
933 return gen_adddi3 (scratch, base, GEN_INT (8));
934 }
935
936 case CONST_INT:
937 case CONST_DOUBLE:
938 split_double (in, &out[0], &out[1]);
939 return NULL_RTX;
940
941 default:
942 abort ();
943 }
944 }
945
946 /* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go
947 through memory plus an extra GR scratch register. Except that you can
948 either get the first from SECONDARY_MEMORY_NEEDED or the second from
949 SECONDARY_RELOAD_CLASS, but not both.
950
951 We got into problems in the first place by allowing a construct like
952 (subreg:TF (reg:TI)), which we got from a union containing a long double.
953 This solution attempts to prevent this situation from ocurring. When
954 we see something like the above, we spill the inner register to memory. */
955
956 rtx
957 spill_tfmode_operand (in, force)
958 rtx in;
959 int force;
960 {
961 if (GET_CODE (in) == SUBREG
962 && GET_MODE (SUBREG_REG (in)) == TImode
963 && GET_CODE (SUBREG_REG (in)) == REG)
964 {
965 rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE);
966 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
967 }
968 else if (force && GET_CODE (in) == REG)
969 {
970 rtx mem = gen_mem_addressof (in, NULL_TREE);
971 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
972 }
973 else if (GET_CODE (in) == MEM
974 && GET_CODE (XEXP (in, 0)) == ADDRESSOF)
975 {
976 return change_address (in, TFmode, copy_to_reg (XEXP (in, 0)));
977 }
978 else
979 return in;
980 }
981
982 /* Emit comparison instruction if necessary, returning the expression
983 that holds the compare result in the proper mode. */
984
985 rtx
986 ia64_expand_compare (code, mode)
987 enum rtx_code code;
988 enum machine_mode mode;
989 {
990 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
991 rtx cmp;
992
993 /* If we have a BImode input, then we already have a compare result, and
994 do not need to emit another comparison. */
995 if (GET_MODE (op0) == BImode)
996 {
997 if ((code == NE || code == EQ) && op1 == const0_rtx)
998 cmp = op0;
999 else
1000 abort ();
1001 }
1002 else
1003 {
1004 cmp = gen_reg_rtx (BImode);
1005 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1006 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1007 code = NE;
1008 }
1009
1010 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1011 }
1012
1013 /* Emit the appropriate sequence for a call. */
1014
1015 void
1016 ia64_expand_call (retval, addr, nextarg, sibcall_p)
1017 rtx retval;
1018 rtx addr;
1019 rtx nextarg;
1020 int sibcall_p;
1021 {
1022 rtx insn, b0, gp_save, narg_rtx;
1023 int narg;
1024
1025 addr = XEXP (addr, 0);
1026 b0 = gen_rtx_REG (DImode, R_BR (0));
1027
1028 if (! nextarg)
1029 narg = 0;
1030 else if (IN_REGNO_P (REGNO (nextarg)))
1031 narg = REGNO (nextarg) - IN_REG (0);
1032 else
1033 narg = REGNO (nextarg) - OUT_REG (0);
1034 narg_rtx = GEN_INT (narg);
1035
1036 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1037 {
1038 if (sibcall_p)
1039 insn = gen_sibcall_nopic (addr, narg_rtx, b0);
1040 else if (! retval)
1041 insn = gen_call_nopic (addr, narg_rtx, b0);
1042 else
1043 insn = gen_call_value_nopic (retval, addr, narg_rtx, b0);
1044 emit_call_insn (insn);
1045 return;
1046 }
1047
1048 if (sibcall_p)
1049 gp_save = NULL_RTX;
1050 else
1051 gp_save = ia64_gp_save_reg (setjmp_operand (addr, VOIDmode));
1052
1053 /* If this is an indirect call, then we have the address of a descriptor. */
1054 if (! symbolic_operand (addr, VOIDmode))
1055 {
1056 rtx dest;
1057
1058 if (! sibcall_p)
1059 emit_move_insn (gp_save, pic_offset_table_rtx);
1060
1061 dest = force_reg (DImode, gen_rtx_MEM (DImode, addr));
1062 emit_move_insn (pic_offset_table_rtx,
1063 gen_rtx_MEM (DImode, plus_constant (addr, 8)));
1064
1065 if (sibcall_p)
1066 insn = gen_sibcall_pic (dest, narg_rtx, b0);
1067 else if (! retval)
1068 insn = gen_call_pic (dest, narg_rtx, b0);
1069 else
1070 insn = gen_call_value_pic (retval, dest, narg_rtx, b0);
1071 emit_call_insn (insn);
1072
1073 if (! sibcall_p)
1074 emit_move_insn (pic_offset_table_rtx, gp_save);
1075 }
1076 else if (TARGET_CONST_GP)
1077 {
1078 if (sibcall_p)
1079 insn = gen_sibcall_nopic (addr, narg_rtx, b0);
1080 else if (! retval)
1081 insn = gen_call_nopic (addr, narg_rtx, b0);
1082 else
1083 insn = gen_call_value_nopic (retval, addr, narg_rtx, b0);
1084 emit_call_insn (insn);
1085 }
1086 else
1087 {
1088 if (sibcall_p)
1089 emit_call_insn (gen_sibcall_pic (addr, narg_rtx, b0));
1090 else
1091 {
1092 emit_move_insn (gp_save, pic_offset_table_rtx);
1093
1094 if (! retval)
1095 insn = gen_call_pic (addr, narg_rtx, b0);
1096 else
1097 insn = gen_call_value_pic (retval, addr, narg_rtx, b0);
1098 emit_call_insn (insn);
1099
1100 emit_move_insn (pic_offset_table_rtx, gp_save);
1101 }
1102 }
1103 }
1104 \f
1105 /* Begin the assembly file. */
1106
1107 void
1108 emit_safe_across_calls (f)
1109 FILE *f;
1110 {
1111 unsigned int rs, re;
1112 int out_state;
1113
1114 rs = 1;
1115 out_state = 0;
1116 while (1)
1117 {
1118 while (rs < 64 && call_used_regs[PR_REG (rs)])
1119 rs++;
1120 if (rs >= 64)
1121 break;
1122 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1123 continue;
1124 if (out_state == 0)
1125 {
1126 fputs ("\t.pred.safe_across_calls ", f);
1127 out_state = 1;
1128 }
1129 else
1130 fputc (',', f);
1131 if (re == rs + 1)
1132 fprintf (f, "p%u", rs);
1133 else
1134 fprintf (f, "p%u-p%u", rs, re - 1);
1135 rs = re + 1;
1136 }
1137 if (out_state)
1138 fputc ('\n', f);
1139 }
1140
1141
1142 /* Structure to be filled in by ia64_compute_frame_size with register
1143 save masks and offsets for the current function. */
1144
1145 struct ia64_frame_info
1146 {
1147 HOST_WIDE_INT total_size; /* size of the stack frame, not including
1148 the caller's scratch area. */
1149 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
1150 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
1151 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
1152 HARD_REG_SET mask; /* mask of saved registers. */
1153 unsigned int gr_used_mask; /* mask of registers in use as gr spill
1154 registers or long-term scratches. */
1155 int n_spilled; /* number of spilled registers. */
1156 int reg_fp; /* register for fp. */
1157 int reg_save_b0; /* save register for b0. */
1158 int reg_save_pr; /* save register for prs. */
1159 int reg_save_ar_pfs; /* save register for ar.pfs. */
1160 int reg_save_ar_unat; /* save register for ar.unat. */
1161 int reg_save_ar_lc; /* save register for ar.lc. */
1162 int n_input_regs; /* number of input registers used. */
1163 int n_local_regs; /* number of local registers used. */
1164 int n_output_regs; /* number of output registers used. */
1165 int n_rotate_regs; /* number of rotating registers used. */
1166
1167 char need_regstk; /* true if a .regstk directive needed. */
1168 char initialized; /* true if the data is finalized. */
1169 };
1170
1171 /* Current frame information calculated by ia64_compute_frame_size. */
1172 static struct ia64_frame_info current_frame_info;
1173
1174 /* Helper function for ia64_compute_frame_size: find an appropriate general
1175 register to spill some special register to. SPECIAL_SPILL_MASK contains
1176 bits in GR0 to GR31 that have already been allocated by this routine.
1177 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1178
1179 static int
1180 find_gr_spill (try_locals)
1181 int try_locals;
1182 {
1183 int regno;
1184
1185 /* If this is a leaf function, first try an otherwise unused
1186 call-clobbered register. */
1187 if (current_function_is_leaf)
1188 {
1189 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1190 if (! regs_ever_live[regno]
1191 && call_used_regs[regno]
1192 && ! fixed_regs[regno]
1193 && ! global_regs[regno]
1194 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1195 {
1196 current_frame_info.gr_used_mask |= 1 << regno;
1197 return regno;
1198 }
1199 }
1200
1201 if (try_locals)
1202 {
1203 regno = current_frame_info.n_local_regs;
1204 if (regno < 80)
1205 {
1206 current_frame_info.n_local_regs = regno + 1;
1207 return LOC_REG (0) + regno;
1208 }
1209 }
1210
1211 /* Failed to find a general register to spill to. Must use stack. */
1212 return 0;
1213 }
1214
1215 /* In order to make for nice schedules, we try to allocate every temporary
1216 to a different register. We must of course stay away from call-saved,
1217 fixed, and global registers. We must also stay away from registers
1218 allocated in current_frame_info.gr_used_mask, since those include regs
1219 used all through the prologue.
1220
1221 Any register allocated here must be used immediately. The idea is to
1222 aid scheduling, not to solve data flow problems. */
1223
1224 static int last_scratch_gr_reg;
1225
1226 static int
1227 next_scratch_gr_reg ()
1228 {
1229 int i, regno;
1230
1231 for (i = 0; i < 32; ++i)
1232 {
1233 regno = (last_scratch_gr_reg + i + 1) & 31;
1234 if (call_used_regs[regno]
1235 && ! fixed_regs[regno]
1236 && ! global_regs[regno]
1237 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1238 {
1239 last_scratch_gr_reg = regno;
1240 return regno;
1241 }
1242 }
1243
1244 /* There must be _something_ available. */
1245 abort ();
1246 }
1247
1248 /* Helper function for ia64_compute_frame_size, called through
1249 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1250
1251 static void
1252 mark_reg_gr_used_mask (reg, data)
1253 rtx reg;
1254 void *data ATTRIBUTE_UNUSED;
1255 {
1256 unsigned int regno = REGNO (reg);
1257 if (regno < 32)
1258 current_frame_info.gr_used_mask |= 1 << regno;
1259 }
1260
1261 /* Returns the number of bytes offset between the frame pointer and the stack
1262 pointer for the current function. SIZE is the number of bytes of space
1263 needed for local variables. */
1264
1265 static void
1266 ia64_compute_frame_size (size)
1267 HOST_WIDE_INT size;
1268 {
1269 HOST_WIDE_INT total_size;
1270 HOST_WIDE_INT spill_size = 0;
1271 HOST_WIDE_INT extra_spill_size = 0;
1272 HOST_WIDE_INT pretend_args_size;
1273 HARD_REG_SET mask;
1274 int n_spilled = 0;
1275 int spilled_gr_p = 0;
1276 int spilled_fr_p = 0;
1277 unsigned int regno;
1278 int i;
1279
1280 if (current_frame_info.initialized)
1281 return;
1282
1283 memset (&current_frame_info, 0, sizeof current_frame_info);
1284 CLEAR_HARD_REG_SET (mask);
1285
1286 /* Don't allocate scratches to the return register. */
1287 diddle_return_value (mark_reg_gr_used_mask, NULL);
1288
1289 /* Don't allocate scratches to the EH scratch registers. */
1290 if (cfun->machine->ia64_eh_epilogue_sp)
1291 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
1292 if (cfun->machine->ia64_eh_epilogue_bsp)
1293 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
1294
1295 /* Find the size of the register stack frame. We have only 80 local
1296 registers, because we reserve 8 for the inputs and 8 for the
1297 outputs. */
1298
1299 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1300 since we'll be adjusting that down later. */
1301 regno = LOC_REG (78) + ! frame_pointer_needed;
1302 for (; regno >= LOC_REG (0); regno--)
1303 if (regs_ever_live[regno])
1304 break;
1305 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
1306
1307 if (cfun->machine->n_varargs > 0)
1308 current_frame_info.n_input_regs = 8;
1309 else
1310 {
1311 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
1312 if (regs_ever_live[regno])
1313 break;
1314 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
1315 }
1316
1317 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
1318 if (regs_ever_live[regno])
1319 break;
1320 i = regno - OUT_REG (0) + 1;
1321
1322 /* When -p profiling, we need one output register for the mcount argument.
1323 Likwise for -a profiling for the bb_init_func argument. For -ax
1324 profiling, we need two output registers for the two bb_init_trace_func
1325 arguments. */
1326 if (profile_flag || profile_block_flag == 1)
1327 i = MAX (i, 1);
1328 else if (profile_block_flag == 2)
1329 i = MAX (i, 2);
1330 current_frame_info.n_output_regs = i;
1331
1332 /* ??? No rotating register support yet. */
1333 current_frame_info.n_rotate_regs = 0;
1334
1335 /* Discover which registers need spilling, and how much room that
1336 will take. Begin with floating point and general registers,
1337 which will always wind up on the stack. */
1338
1339 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
1340 if (regs_ever_live[regno] && ! call_used_regs[regno])
1341 {
1342 SET_HARD_REG_BIT (mask, regno);
1343 spill_size += 16;
1344 n_spilled += 1;
1345 spilled_fr_p = 1;
1346 }
1347
1348 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1349 if (regs_ever_live[regno] && ! call_used_regs[regno])
1350 {
1351 SET_HARD_REG_BIT (mask, regno);
1352 spill_size += 8;
1353 n_spilled += 1;
1354 spilled_gr_p = 1;
1355 }
1356
1357 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
1358 if (regs_ever_live[regno] && ! call_used_regs[regno])
1359 {
1360 SET_HARD_REG_BIT (mask, regno);
1361 spill_size += 8;
1362 n_spilled += 1;
1363 }
1364
1365 /* Now come all special registers that might get saved in other
1366 general registers. */
1367
1368 if (frame_pointer_needed)
1369 {
1370 current_frame_info.reg_fp = find_gr_spill (1);
1371 /* We should have gotten at least LOC79, since that's what
1372 HARD_FRAME_POINTER_REGNUM is. */
1373 if (current_frame_info.reg_fp == 0)
1374 abort ();
1375 }
1376
1377 if (! current_function_is_leaf)
1378 {
1379 /* Emit a save of BR0 if we call other functions. Do this even
1380 if this function doesn't return, as EH depends on this to be
1381 able to unwind the stack. */
1382 SET_HARD_REG_BIT (mask, BR_REG (0));
1383
1384 current_frame_info.reg_save_b0 = find_gr_spill (1);
1385 if (current_frame_info.reg_save_b0 == 0)
1386 {
1387 spill_size += 8;
1388 n_spilled += 1;
1389 }
1390
1391 /* Similarly for ar.pfs. */
1392 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1393 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1394 if (current_frame_info.reg_save_ar_pfs == 0)
1395 {
1396 extra_spill_size += 8;
1397 n_spilled += 1;
1398 }
1399 }
1400 else
1401 {
1402 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
1403 {
1404 SET_HARD_REG_BIT (mask, BR_REG (0));
1405 spill_size += 8;
1406 n_spilled += 1;
1407 }
1408 }
1409
1410 /* Unwind descriptor hackery: things are most efficient if we allocate
1411 consecutive GR save registers for RP, PFS, FP in that order. However,
1412 it is absolutely critical that FP get the only hard register that's
1413 guaranteed to be free, so we allocated it first. If all three did
1414 happen to be allocated hard regs, and are consecutive, rearrange them
1415 into the preferred order now. */
1416 if (current_frame_info.reg_fp != 0
1417 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
1418 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
1419 {
1420 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
1421 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
1422 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
1423 }
1424
1425 /* See if we need to store the predicate register block. */
1426 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1427 if (regs_ever_live[regno] && ! call_used_regs[regno])
1428 break;
1429 if (regno <= PR_REG (63))
1430 {
1431 SET_HARD_REG_BIT (mask, PR_REG (0));
1432 current_frame_info.reg_save_pr = find_gr_spill (1);
1433 if (current_frame_info.reg_save_pr == 0)
1434 {
1435 extra_spill_size += 8;
1436 n_spilled += 1;
1437 }
1438
1439 /* ??? Mark them all as used so that register renaming and such
1440 are free to use them. */
1441 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1442 regs_ever_live[regno] = 1;
1443 }
1444
1445 /* If we're forced to use st8.spill, we're forced to save and restore
1446 ar.unat as well. */
1447 if (spilled_gr_p || cfun->machine->n_varargs)
1448 {
1449 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
1450 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
1451 if (current_frame_info.reg_save_ar_unat == 0)
1452 {
1453 extra_spill_size += 8;
1454 n_spilled += 1;
1455 }
1456 }
1457
1458 if (regs_ever_live[AR_LC_REGNUM])
1459 {
1460 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
1461 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
1462 if (current_frame_info.reg_save_ar_lc == 0)
1463 {
1464 extra_spill_size += 8;
1465 n_spilled += 1;
1466 }
1467 }
1468
1469 /* If we have an odd number of words of pretend arguments written to
1470 the stack, then the FR save area will be unaligned. We round the
1471 size of this area up to keep things 16 byte aligned. */
1472 if (spilled_fr_p)
1473 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
1474 else
1475 pretend_args_size = current_function_pretend_args_size;
1476
1477 total_size = (spill_size + extra_spill_size + size + pretend_args_size
1478 + current_function_outgoing_args_size);
1479 total_size = IA64_STACK_ALIGN (total_size);
1480
1481 /* We always use the 16-byte scratch area provided by the caller, but
1482 if we are a leaf function, there's no one to which we need to provide
1483 a scratch area. */
1484 if (current_function_is_leaf)
1485 total_size = MAX (0, total_size - 16);
1486
1487 current_frame_info.total_size = total_size;
1488 current_frame_info.spill_cfa_off = pretend_args_size - 16;
1489 current_frame_info.spill_size = spill_size;
1490 current_frame_info.extra_spill_size = extra_spill_size;
1491 COPY_HARD_REG_SET (current_frame_info.mask, mask);
1492 current_frame_info.n_spilled = n_spilled;
1493 current_frame_info.initialized = reload_completed;
1494 }
1495
1496 /* Compute the initial difference between the specified pair of registers. */
1497
1498 HOST_WIDE_INT
1499 ia64_initial_elimination_offset (from, to)
1500 int from, to;
1501 {
1502 HOST_WIDE_INT offset;
1503
1504 ia64_compute_frame_size (get_frame_size ());
1505 switch (from)
1506 {
1507 case FRAME_POINTER_REGNUM:
1508 if (to == HARD_FRAME_POINTER_REGNUM)
1509 {
1510 if (current_function_is_leaf)
1511 offset = -current_frame_info.total_size;
1512 else
1513 offset = -(current_frame_info.total_size
1514 - current_function_outgoing_args_size - 16);
1515 }
1516 else if (to == STACK_POINTER_REGNUM)
1517 {
1518 if (current_function_is_leaf)
1519 offset = 0;
1520 else
1521 offset = 16 + current_function_outgoing_args_size;
1522 }
1523 else
1524 abort ();
1525 break;
1526
1527 case ARG_POINTER_REGNUM:
1528 /* Arguments start above the 16 byte save area, unless stdarg
1529 in which case we store through the 16 byte save area. */
1530 if (to == HARD_FRAME_POINTER_REGNUM)
1531 offset = 16 - current_function_pretend_args_size;
1532 else if (to == STACK_POINTER_REGNUM)
1533 offset = (current_frame_info.total_size
1534 + 16 - current_function_pretend_args_size);
1535 else
1536 abort ();
1537 break;
1538
1539 case RETURN_ADDRESS_POINTER_REGNUM:
1540 offset = 0;
1541 break;
1542
1543 default:
1544 abort ();
1545 }
1546
1547 return offset;
1548 }
1549
1550 /* If there are more than a trivial number of register spills, we use
1551 two interleaved iterators so that we can get two memory references
1552 per insn group.
1553
1554 In order to simplify things in the prologue and epilogue expanders,
1555 we use helper functions to fix up the memory references after the
1556 fact with the appropriate offsets to a POST_MODIFY memory mode.
1557 The following data structure tracks the state of the two iterators
1558 while insns are being emitted. */
1559
1560 struct spill_fill_data
1561 {
1562 rtx init_after; /* point at which to emit intializations */
1563 rtx init_reg[2]; /* initial base register */
1564 rtx iter_reg[2]; /* the iterator registers */
1565 rtx *prev_addr[2]; /* address of last memory use */
1566 HOST_WIDE_INT prev_off[2]; /* last offset */
1567 int n_iter; /* number of iterators in use */
1568 int next_iter; /* next iterator to use */
1569 unsigned int save_gr_used_mask;
1570 };
1571
1572 static struct spill_fill_data spill_fill_data;
1573
1574 static void
1575 setup_spill_pointers (n_spills, init_reg, cfa_off)
1576 int n_spills;
1577 rtx init_reg;
1578 HOST_WIDE_INT cfa_off;
1579 {
1580 int i;
1581
1582 spill_fill_data.init_after = get_last_insn ();
1583 spill_fill_data.init_reg[0] = init_reg;
1584 spill_fill_data.init_reg[1] = init_reg;
1585 spill_fill_data.prev_addr[0] = NULL;
1586 spill_fill_data.prev_addr[1] = NULL;
1587 spill_fill_data.prev_off[0] = cfa_off;
1588 spill_fill_data.prev_off[1] = cfa_off;
1589 spill_fill_data.next_iter = 0;
1590 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
1591
1592 spill_fill_data.n_iter = 1 + (n_spills > 2);
1593 for (i = 0; i < spill_fill_data.n_iter; ++i)
1594 {
1595 int regno = next_scratch_gr_reg ();
1596 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
1597 current_frame_info.gr_used_mask |= 1 << regno;
1598 }
1599 }
1600
1601 static void
1602 finish_spill_pointers ()
1603 {
1604 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
1605 }
1606
1607 static rtx
1608 spill_restore_mem (reg, cfa_off)
1609 rtx reg;
1610 HOST_WIDE_INT cfa_off;
1611 {
1612 int iter = spill_fill_data.next_iter;
1613 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
1614 rtx disp_rtx = GEN_INT (disp);
1615 rtx mem;
1616
1617 if (spill_fill_data.prev_addr[iter])
1618 {
1619 if (CONST_OK_FOR_N (disp))
1620 *spill_fill_data.prev_addr[iter]
1621 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
1622 gen_rtx_PLUS (DImode,
1623 spill_fill_data.iter_reg[iter],
1624 disp_rtx));
1625 else
1626 {
1627 /* ??? Could use register post_modify for loads. */
1628 if (! CONST_OK_FOR_I (disp))
1629 {
1630 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1631 emit_move_insn (tmp, disp_rtx);
1632 disp_rtx = tmp;
1633 }
1634 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1635 spill_fill_data.iter_reg[iter], disp_rtx));
1636 }
1637 }
1638 /* Micro-optimization: if we've created a frame pointer, it's at
1639 CFA 0, which may allow the real iterator to be initialized lower,
1640 slightly increasing parallelism. Also, if there are few saves
1641 it may eliminate the iterator entirely. */
1642 else if (disp == 0
1643 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
1644 && frame_pointer_needed)
1645 {
1646 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
1647 MEM_ALIAS_SET (mem) = get_varargs_alias_set ();
1648 return mem;
1649 }
1650 else
1651 {
1652 rtx seq;
1653
1654 if (disp == 0)
1655 seq = gen_movdi (spill_fill_data.iter_reg[iter],
1656 spill_fill_data.init_reg[iter]);
1657 else
1658 {
1659 start_sequence ();
1660
1661 if (! CONST_OK_FOR_I (disp))
1662 {
1663 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1664 emit_move_insn (tmp, disp_rtx);
1665 disp_rtx = tmp;
1666 }
1667
1668 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1669 spill_fill_data.init_reg[iter],
1670 disp_rtx));
1671
1672 seq = gen_sequence ();
1673 end_sequence ();
1674 }
1675
1676 /* Careful for being the first insn in a sequence. */
1677 if (spill_fill_data.init_after)
1678 spill_fill_data.init_after
1679 = emit_insn_after (seq, spill_fill_data.init_after);
1680 else
1681 {
1682 rtx first = get_insns ();
1683 if (first)
1684 spill_fill_data.init_after
1685 = emit_insn_before (seq, first);
1686 else
1687 spill_fill_data.init_after = emit_insn (seq);
1688 }
1689 }
1690
1691 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
1692
1693 /* ??? Not all of the spills are for varargs, but some of them are.
1694 The rest of the spills belong in an alias set of their own. But
1695 it doesn't actually hurt to include them here. */
1696 MEM_ALIAS_SET (mem) = get_varargs_alias_set ();
1697
1698 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
1699 spill_fill_data.prev_off[iter] = cfa_off;
1700
1701 if (++iter >= spill_fill_data.n_iter)
1702 iter = 0;
1703 spill_fill_data.next_iter = iter;
1704
1705 return mem;
1706 }
1707
1708 static void
1709 do_spill (move_fn, reg, cfa_off, frame_reg)
1710 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
1711 rtx reg, frame_reg;
1712 HOST_WIDE_INT cfa_off;
1713 {
1714 rtx mem, insn;
1715
1716 mem = spill_restore_mem (reg, cfa_off);
1717 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
1718
1719 if (frame_reg)
1720 {
1721 rtx base;
1722 HOST_WIDE_INT off;
1723
1724 RTX_FRAME_RELATED_P (insn) = 1;
1725
1726 /* Don't even pretend that the unwind code can intuit its way
1727 through a pair of interleaved post_modify iterators. Just
1728 provide the correct answer. */
1729
1730 if (frame_pointer_needed)
1731 {
1732 base = hard_frame_pointer_rtx;
1733 off = - cfa_off;
1734 }
1735 else
1736 {
1737 base = stack_pointer_rtx;
1738 off = current_frame_info.total_size - cfa_off;
1739 }
1740
1741 REG_NOTES (insn)
1742 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1743 gen_rtx_SET (VOIDmode,
1744 gen_rtx_MEM (GET_MODE (reg),
1745 plus_constant (base, off)),
1746 frame_reg),
1747 REG_NOTES (insn));
1748 }
1749 }
1750
1751 static void
1752 do_restore (move_fn, reg, cfa_off)
1753 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
1754 rtx reg;
1755 HOST_WIDE_INT cfa_off;
1756 {
1757 emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
1758 GEN_INT (cfa_off)));
1759 }
1760
1761 /* Wrapper functions that discards the CONST_INT spill offset. These
1762 exist so that we can give gr_spill/gr_fill the offset they need and
1763 use a consistant function interface. */
1764
1765 static rtx
1766 gen_movdi_x (dest, src, offset)
1767 rtx dest, src;
1768 rtx offset ATTRIBUTE_UNUSED;
1769 {
1770 return gen_movdi (dest, src);
1771 }
1772
1773 static rtx
1774 gen_fr_spill_x (dest, src, offset)
1775 rtx dest, src;
1776 rtx offset ATTRIBUTE_UNUSED;
1777 {
1778 return gen_fr_spill (dest, src);
1779 }
1780
1781 static rtx
1782 gen_fr_restore_x (dest, src, offset)
1783 rtx dest, src;
1784 rtx offset ATTRIBUTE_UNUSED;
1785 {
1786 return gen_fr_restore (dest, src);
1787 }
1788
1789 /* Called after register allocation to add any instructions needed for the
1790 prologue. Using a prologue insn is favored compared to putting all of the
1791 instructions in the FUNCTION_PROLOGUE macro, since it allows the scheduler
1792 to intermix instructions with the saves of the caller saved registers. In
1793 some cases, it might be necessary to emit a barrier instruction as the last
1794 insn to prevent such scheduling.
1795
1796 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
1797 so that the debug info generation code can handle them properly.
1798
1799 The register save area is layed out like so:
1800 cfa+16
1801 [ varargs spill area ]
1802 [ fr register spill area ]
1803 [ br register spill area ]
1804 [ ar register spill area ]
1805 [ pr register spill area ]
1806 [ gr register spill area ] */
1807
1808 /* ??? Get inefficient code when the frame size is larger than can fit in an
1809 adds instruction. */
1810
1811 void
1812 ia64_expand_prologue ()
1813 {
1814 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
1815 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
1816 rtx reg, alt_reg;
1817
1818 ia64_compute_frame_size (get_frame_size ());
1819 last_scratch_gr_reg = 15;
1820
1821 /* If there is no epilogue, then we don't need some prologue insns.
1822 We need to avoid emitting the dead prologue insns, because flow
1823 will complain about them. */
1824 if (optimize)
1825 {
1826 edge e;
1827
1828 for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
1829 if ((e->flags & EDGE_FAKE) == 0
1830 && (e->flags & EDGE_FALLTHRU) != 0)
1831 break;
1832 epilogue_p = (e != NULL);
1833 }
1834 else
1835 epilogue_p = 1;
1836
1837 /* Set the local, input, and output register names. We need to do this
1838 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
1839 half. If we use in/loc/out register names, then we get assembler errors
1840 in crtn.S because there is no alloc insn or regstk directive in there. */
1841 if (! TARGET_REG_NAMES)
1842 {
1843 int inputs = current_frame_info.n_input_regs;
1844 int locals = current_frame_info.n_local_regs;
1845 int outputs = current_frame_info.n_output_regs;
1846
1847 for (i = 0; i < inputs; i++)
1848 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
1849 for (i = 0; i < locals; i++)
1850 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
1851 for (i = 0; i < outputs; i++)
1852 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
1853 }
1854
1855 /* Set the frame pointer register name. The regnum is logically loc79,
1856 but of course we'll not have allocated that many locals. Rather than
1857 worrying about renumbering the existing rtxs, we adjust the name. */
1858 if (current_frame_info.reg_fp)
1859 {
1860 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
1861 reg_names[HARD_FRAME_POINTER_REGNUM]
1862 = reg_names[current_frame_info.reg_fp];
1863 reg_names[current_frame_info.reg_fp] = tmp;
1864 }
1865
1866 /* Fix up the return address placeholder. */
1867 /* ??? We can fail if __builtin_return_address is used, and we didn't
1868 allocate a register in which to save b0. I can't think of a way to
1869 eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and
1870 then be sure that I got the right one. Further, reload doesn't seem
1871 to care if an eliminable register isn't used, and "eliminates" it
1872 anyway. */
1873 if (regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM]
1874 && current_frame_info.reg_save_b0 != 0)
1875 XINT (return_address_pointer_rtx, 0) = current_frame_info.reg_save_b0;
1876
1877 /* We don't need an alloc instruction if we've used no outputs or locals. */
1878 if (current_frame_info.n_local_regs == 0
1879 && current_frame_info.n_output_regs == 0
1880 && current_frame_info.n_input_regs <= current_function_args_info.words)
1881 {
1882 /* If there is no alloc, but there are input registers used, then we
1883 need a .regstk directive. */
1884 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
1885 ar_pfs_save_reg = NULL_RTX;
1886 }
1887 else
1888 {
1889 current_frame_info.need_regstk = 0;
1890
1891 if (current_frame_info.reg_save_ar_pfs)
1892 regno = current_frame_info.reg_save_ar_pfs;
1893 else
1894 regno = next_scratch_gr_reg ();
1895 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
1896
1897 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
1898 GEN_INT (current_frame_info.n_input_regs),
1899 GEN_INT (current_frame_info.n_local_regs),
1900 GEN_INT (current_frame_info.n_output_regs),
1901 GEN_INT (current_frame_info.n_rotate_regs)));
1902 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
1903 }
1904
1905 /* Set up frame pointer, stack pointer, and spill iterators. */
1906
1907 n_varargs = cfun->machine->n_varargs;
1908 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
1909 stack_pointer_rtx, 0);
1910
1911 if (frame_pointer_needed)
1912 {
1913 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
1914 RTX_FRAME_RELATED_P (insn) = 1;
1915 }
1916
1917 if (current_frame_info.total_size != 0)
1918 {
1919 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
1920 rtx offset;
1921
1922 if (CONST_OK_FOR_I (- current_frame_info.total_size))
1923 offset = frame_size_rtx;
1924 else
1925 {
1926 regno = next_scratch_gr_reg ();
1927 offset = gen_rtx_REG (DImode, regno);
1928 emit_move_insn (offset, frame_size_rtx);
1929 }
1930
1931 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
1932 stack_pointer_rtx, offset));
1933
1934 if (! frame_pointer_needed)
1935 {
1936 RTX_FRAME_RELATED_P (insn) = 1;
1937 if (GET_CODE (offset) != CONST_INT)
1938 {
1939 REG_NOTES (insn)
1940 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1941 gen_rtx_SET (VOIDmode,
1942 stack_pointer_rtx,
1943 gen_rtx_PLUS (DImode,
1944 stack_pointer_rtx,
1945 frame_size_rtx)),
1946 REG_NOTES (insn));
1947 }
1948 }
1949
1950 /* ??? At this point we must generate a magic insn that appears to
1951 modify the stack pointer, the frame pointer, and all spill
1952 iterators. This would allow the most scheduling freedom. For
1953 now, just hard stop. */
1954 emit_insn (gen_blockage ());
1955 }
1956
1957 /* Must copy out ar.unat before doing any integer spills. */
1958 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
1959 {
1960 if (current_frame_info.reg_save_ar_unat)
1961 ar_unat_save_reg
1962 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
1963 else
1964 {
1965 alt_regno = next_scratch_gr_reg ();
1966 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
1967 current_frame_info.gr_used_mask |= 1 << alt_regno;
1968 }
1969
1970 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
1971 insn = emit_move_insn (ar_unat_save_reg, reg);
1972 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
1973
1974 /* Even if we're not going to generate an epilogue, we still
1975 need to save the register so that EH works. */
1976 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
1977 emit_insn (gen_rtx_USE (VOIDmode, ar_unat_save_reg));
1978 }
1979 else
1980 ar_unat_save_reg = NULL_RTX;
1981
1982 /* Spill all varargs registers. Do this before spilling any GR registers,
1983 since we want the UNAT bits for the GR registers to override the UNAT
1984 bits from varargs, which we don't care about. */
1985
1986 cfa_off = -16;
1987 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
1988 {
1989 reg = gen_rtx_REG (DImode, regno);
1990 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
1991 }
1992
1993 /* Locate the bottom of the register save area. */
1994 cfa_off = (current_frame_info.spill_cfa_off
1995 + current_frame_info.spill_size
1996 + current_frame_info.extra_spill_size);
1997
1998 /* Save the predicate register block either in a register or in memory. */
1999 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2000 {
2001 reg = gen_rtx_REG (DImode, PR_REG (0));
2002 if (current_frame_info.reg_save_pr != 0)
2003 {
2004 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2005 insn = emit_move_insn (alt_reg, reg);
2006
2007 /* ??? Denote pr spill/fill by a DImode move that modifies all
2008 64 hard registers. */
2009 RTX_FRAME_RELATED_P (insn) = 1;
2010 REG_NOTES (insn)
2011 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2012 gen_rtx_SET (VOIDmode, alt_reg, reg),
2013 REG_NOTES (insn));
2014
2015 /* Even if we're not going to generate an epilogue, we still
2016 need to save the register so that EH works. */
2017 if (! epilogue_p)
2018 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
2019 }
2020 else
2021 {
2022 alt_regno = next_scratch_gr_reg ();
2023 alt_reg = gen_rtx_REG (DImode, alt_regno);
2024 insn = emit_move_insn (alt_reg, reg);
2025 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2026 cfa_off -= 8;
2027 }
2028 }
2029
2030 /* Handle AR regs in numerical order. All of them get special handling. */
2031 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2032 && current_frame_info.reg_save_ar_unat == 0)
2033 {
2034 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2035 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
2036 cfa_off -= 8;
2037 }
2038
2039 /* The alloc insn already copied ar.pfs into a general register. The
2040 only thing we have to do now is copy that register to a stack slot
2041 if we'd not allocated a local register for the job. */
2042 if (current_frame_info.reg_save_ar_pfs == 0
2043 && ! current_function_is_leaf)
2044 {
2045 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2046 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
2047 cfa_off -= 8;
2048 }
2049
2050 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2051 {
2052 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2053 if (current_frame_info.reg_save_ar_lc != 0)
2054 {
2055 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2056 insn = emit_move_insn (alt_reg, reg);
2057 RTX_FRAME_RELATED_P (insn) = 1;
2058
2059 /* Even if we're not going to generate an epilogue, we still
2060 need to save the register so that EH works. */
2061 if (! epilogue_p)
2062 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
2063 }
2064 else
2065 {
2066 alt_regno = next_scratch_gr_reg ();
2067 alt_reg = gen_rtx_REG (DImode, alt_regno);
2068 emit_move_insn (alt_reg, reg);
2069 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2070 cfa_off -= 8;
2071 }
2072 }
2073
2074 /* We should now be at the base of the gr/br/fr spill area. */
2075 if (cfa_off != (current_frame_info.spill_cfa_off
2076 + current_frame_info.spill_size))
2077 abort ();
2078
2079 /* Spill all general registers. */
2080 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2081 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2082 {
2083 reg = gen_rtx_REG (DImode, regno);
2084 do_spill (gen_gr_spill, reg, cfa_off, reg);
2085 cfa_off -= 8;
2086 }
2087
2088 /* Handle BR0 specially -- it may be getting stored permanently in
2089 some GR register. */
2090 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2091 {
2092 reg = gen_rtx_REG (DImode, BR_REG (0));
2093 if (current_frame_info.reg_save_b0 != 0)
2094 {
2095 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2096 insn = emit_move_insn (alt_reg, reg);
2097 RTX_FRAME_RELATED_P (insn) = 1;
2098
2099 /* Even if we're not going to generate an epilogue, we still
2100 need to save the register so that EH works. */
2101 if (! epilogue_p)
2102 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
2103 }
2104 else
2105 {
2106 alt_regno = next_scratch_gr_reg ();
2107 alt_reg = gen_rtx_REG (DImode, alt_regno);
2108 emit_move_insn (alt_reg, reg);
2109 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2110 cfa_off -= 8;
2111 }
2112 }
2113
2114 /* Spill the rest of the BR registers. */
2115 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2116 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2117 {
2118 alt_regno = next_scratch_gr_reg ();
2119 alt_reg = gen_rtx_REG (DImode, alt_regno);
2120 reg = gen_rtx_REG (DImode, regno);
2121 emit_move_insn (alt_reg, reg);
2122 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2123 cfa_off -= 8;
2124 }
2125
2126 /* Align the frame and spill all FR registers. */
2127 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2128 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2129 {
2130 if (cfa_off & 15)
2131 abort ();
2132 reg = gen_rtx_REG (TFmode, regno);
2133 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
2134 cfa_off -= 16;
2135 }
2136
2137 if (cfa_off != current_frame_info.spill_cfa_off)
2138 abort ();
2139
2140 finish_spill_pointers ();
2141 }
2142
2143 /* Called after register allocation to add any instructions needed for the
2144 epilogue. Using a epilogue insn is favored compared to putting all of the
2145 instructions in the FUNCTION_PROLOGUE macro, since it allows the scheduler
2146 to intermix instructions with the saves of the caller saved registers. In
2147 some cases, it might be necessary to emit a barrier instruction as the last
2148 insn to prevent such scheduling. */
2149
2150 void
2151 ia64_expand_epilogue (sibcall_p)
2152 int sibcall_p;
2153 {
2154 rtx insn, reg, alt_reg, ar_unat_save_reg;
2155 int regno, alt_regno, cfa_off;
2156
2157 ia64_compute_frame_size (get_frame_size ());
2158
2159 /* If there is a frame pointer, then we use it instead of the stack
2160 pointer, so that the stack pointer does not need to be valid when
2161 the epilogue starts. See EXIT_IGNORE_STACK. */
2162 if (frame_pointer_needed)
2163 setup_spill_pointers (current_frame_info.n_spilled,
2164 hard_frame_pointer_rtx, 0);
2165 else
2166 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2167 current_frame_info.total_size);
2168
2169 if (current_frame_info.total_size != 0)
2170 {
2171 /* ??? At this point we must generate a magic insn that appears to
2172 modify the spill iterators and the frame pointer. This would
2173 allow the most scheduling freedom. For now, just hard stop. */
2174 emit_insn (gen_blockage ());
2175 }
2176
2177 /* Locate the bottom of the register save area. */
2178 cfa_off = (current_frame_info.spill_cfa_off
2179 + current_frame_info.spill_size
2180 + current_frame_info.extra_spill_size);
2181
2182 /* Restore the predicate registers. */
2183 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2184 {
2185 if (current_frame_info.reg_save_pr != 0)
2186 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2187 else
2188 {
2189 alt_regno = next_scratch_gr_reg ();
2190 alt_reg = gen_rtx_REG (DImode, alt_regno);
2191 do_restore (gen_movdi_x, alt_reg, cfa_off);
2192 cfa_off -= 8;
2193 }
2194 reg = gen_rtx_REG (DImode, PR_REG (0));
2195 emit_move_insn (reg, alt_reg);
2196 }
2197
2198 /* Restore the application registers. */
2199
2200 /* Load the saved unat from the stack, but do not restore it until
2201 after the GRs have been restored. */
2202 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2203 {
2204 if (current_frame_info.reg_save_ar_unat != 0)
2205 ar_unat_save_reg
2206 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2207 else
2208 {
2209 alt_regno = next_scratch_gr_reg ();
2210 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2211 current_frame_info.gr_used_mask |= 1 << alt_regno;
2212 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
2213 cfa_off -= 8;
2214 }
2215 }
2216 else
2217 ar_unat_save_reg = NULL_RTX;
2218
2219 if (current_frame_info.reg_save_ar_pfs != 0)
2220 {
2221 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
2222 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2223 emit_move_insn (reg, alt_reg);
2224 }
2225 else if (! current_function_is_leaf)
2226 {
2227 alt_regno = next_scratch_gr_reg ();
2228 alt_reg = gen_rtx_REG (DImode, alt_regno);
2229 do_restore (gen_movdi_x, alt_reg, cfa_off);
2230 cfa_off -= 8;
2231 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2232 emit_move_insn (reg, alt_reg);
2233 }
2234
2235 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2236 {
2237 if (current_frame_info.reg_save_ar_lc != 0)
2238 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2239 else
2240 {
2241 alt_regno = next_scratch_gr_reg ();
2242 alt_reg = gen_rtx_REG (DImode, alt_regno);
2243 do_restore (gen_movdi_x, alt_reg, cfa_off);
2244 cfa_off -= 8;
2245 }
2246 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2247 emit_move_insn (reg, alt_reg);
2248 }
2249
2250 /* We should now be at the base of the gr/br/fr spill area. */
2251 if (cfa_off != (current_frame_info.spill_cfa_off
2252 + current_frame_info.spill_size))
2253 abort ();
2254
2255 /* Restore all general registers. */
2256 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2257 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2258 {
2259 reg = gen_rtx_REG (DImode, regno);
2260 do_restore (gen_gr_restore, reg, cfa_off);
2261 cfa_off -= 8;
2262 }
2263
2264 /* Restore the branch registers. Handle B0 specially, as it may
2265 have gotten stored in some GR register. */
2266 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2267 {
2268 if (current_frame_info.reg_save_b0 != 0)
2269 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2270 else
2271 {
2272 alt_regno = next_scratch_gr_reg ();
2273 alt_reg = gen_rtx_REG (DImode, alt_regno);
2274 do_restore (gen_movdi_x, alt_reg, cfa_off);
2275 cfa_off -= 8;
2276 }
2277 reg = gen_rtx_REG (DImode, BR_REG (0));
2278 emit_move_insn (reg, alt_reg);
2279 }
2280
2281 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2282 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2283 {
2284 alt_regno = next_scratch_gr_reg ();
2285 alt_reg = gen_rtx_REG (DImode, alt_regno);
2286 do_restore (gen_movdi_x, alt_reg, cfa_off);
2287 cfa_off -= 8;
2288 reg = gen_rtx_REG (DImode, regno);
2289 emit_move_insn (reg, alt_reg);
2290 }
2291
2292 /* Restore floating point registers. */
2293 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2294 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2295 {
2296 if (cfa_off & 15)
2297 abort ();
2298 reg = gen_rtx_REG (TFmode, regno);
2299 do_restore (gen_fr_restore_x, reg, cfa_off);
2300 cfa_off -= 16;
2301 }
2302
2303 /* Restore ar.unat for real. */
2304 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2305 {
2306 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2307 emit_move_insn (reg, ar_unat_save_reg);
2308 }
2309
2310 if (cfa_off != current_frame_info.spill_cfa_off)
2311 abort ();
2312
2313 finish_spill_pointers ();
2314
2315 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
2316 {
2317 /* ??? At this point we must generate a magic insn that appears to
2318 modify the spill iterators, the stack pointer, and the frame
2319 pointer. This would allow the most scheduling freedom. For now,
2320 just hard stop. */
2321 emit_insn (gen_blockage ());
2322 }
2323
2324 if (cfun->machine->ia64_eh_epilogue_sp)
2325 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
2326 else if (frame_pointer_needed)
2327 {
2328 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
2329 RTX_FRAME_RELATED_P (insn) = 1;
2330 }
2331 else if (current_frame_info.total_size)
2332 {
2333 rtx offset, frame_size_rtx;
2334
2335 frame_size_rtx = GEN_INT (current_frame_info.total_size);
2336 if (CONST_OK_FOR_I (current_frame_info.total_size))
2337 offset = frame_size_rtx;
2338 else
2339 {
2340 regno = next_scratch_gr_reg ();
2341 offset = gen_rtx_REG (DImode, regno);
2342 emit_move_insn (offset, frame_size_rtx);
2343 }
2344
2345 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
2346 offset));
2347
2348 RTX_FRAME_RELATED_P (insn) = 1;
2349 if (GET_CODE (offset) != CONST_INT)
2350 {
2351 REG_NOTES (insn)
2352 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2353 gen_rtx_SET (VOIDmode,
2354 stack_pointer_rtx,
2355 gen_rtx_PLUS (DImode,
2356 stack_pointer_rtx,
2357 frame_size_rtx)),
2358 REG_NOTES (insn));
2359 }
2360 }
2361
2362 if (cfun->machine->ia64_eh_epilogue_bsp)
2363 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
2364
2365 if (! sibcall_p)
2366 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
2367 }
2368
2369 /* Return 1 if br.ret can do all the work required to return from a
2370 function. */
2371
2372 int
2373 ia64_direct_return ()
2374 {
2375 if (reload_completed && ! frame_pointer_needed)
2376 {
2377 ia64_compute_frame_size (get_frame_size ());
2378
2379 return (current_frame_info.total_size == 0
2380 && current_frame_info.n_spilled == 0
2381 && current_frame_info.reg_save_b0 == 0
2382 && current_frame_info.reg_save_pr == 0
2383 && current_frame_info.reg_save_ar_pfs == 0
2384 && current_frame_info.reg_save_ar_unat == 0
2385 && current_frame_info.reg_save_ar_lc == 0);
2386 }
2387 return 0;
2388 }
2389
2390 /* Emit the function prologue. */
2391
2392 void
2393 ia64_function_prologue (file, size)
2394 FILE *file;
2395 int size ATTRIBUTE_UNUSED;
2396 {
2397 int mask, grsave, grsave_prev;
2398
2399 if (current_frame_info.need_regstk)
2400 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
2401 current_frame_info.n_input_regs,
2402 current_frame_info.n_local_regs,
2403 current_frame_info.n_output_regs,
2404 current_frame_info.n_rotate_regs);
2405
2406 if (!flag_unwind_tables && (!flag_exceptions || exceptions_via_longjmp))
2407 return;
2408
2409 /* Emit the .prologue directive. */
2410
2411 mask = 0;
2412 grsave = grsave_prev = 0;
2413 if (current_frame_info.reg_save_b0 != 0)
2414 {
2415 mask |= 8;
2416 grsave = grsave_prev = current_frame_info.reg_save_b0;
2417 }
2418 if (current_frame_info.reg_save_ar_pfs != 0
2419 && (grsave_prev == 0
2420 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
2421 {
2422 mask |= 4;
2423 if (grsave_prev == 0)
2424 grsave = current_frame_info.reg_save_ar_pfs;
2425 grsave_prev = current_frame_info.reg_save_ar_pfs;
2426 }
2427 if (current_frame_info.reg_fp != 0
2428 && (grsave_prev == 0
2429 || current_frame_info.reg_fp == grsave_prev + 1))
2430 {
2431 mask |= 2;
2432 if (grsave_prev == 0)
2433 grsave = HARD_FRAME_POINTER_REGNUM;
2434 grsave_prev = current_frame_info.reg_fp;
2435 }
2436 if (current_frame_info.reg_save_pr != 0
2437 && (grsave_prev == 0
2438 || current_frame_info.reg_save_pr == grsave_prev + 1))
2439 {
2440 mask |= 1;
2441 if (grsave_prev == 0)
2442 grsave = current_frame_info.reg_save_pr;
2443 }
2444
2445 if (mask)
2446 fprintf (file, "\t.prologue %d, %d\n", mask,
2447 ia64_dbx_register_number (grsave));
2448 else
2449 fputs ("\t.prologue\n", file);
2450
2451 /* Emit a .spill directive, if necessary, to relocate the base of
2452 the register spill area. */
2453 if (current_frame_info.spill_cfa_off != -16)
2454 fprintf (file, "\t.spill %ld\n",
2455 (long) (current_frame_info.spill_cfa_off
2456 + current_frame_info.spill_size));
2457 }
2458
2459 /* Emit the .body directive at the scheduled end of the prologue. */
2460
2461 void
2462 ia64_output_end_prologue (file)
2463 FILE *file;
2464 {
2465 if (!flag_unwind_tables && (!flag_exceptions || exceptions_via_longjmp))
2466 return;
2467
2468 fputs ("\t.body\n", file);
2469 }
2470
2471 /* Emit the function epilogue. */
2472
2473 void
2474 ia64_function_epilogue (file, size)
2475 FILE *file ATTRIBUTE_UNUSED;
2476 int size ATTRIBUTE_UNUSED;
2477 {
2478 /* Reset from the function's potential modifications. */
2479 XINT (return_address_pointer_rtx, 0) = RETURN_ADDRESS_POINTER_REGNUM;
2480
2481 if (current_frame_info.reg_fp)
2482 {
2483 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2484 reg_names[HARD_FRAME_POINTER_REGNUM]
2485 = reg_names[current_frame_info.reg_fp];
2486 reg_names[current_frame_info.reg_fp] = tmp;
2487 }
2488 if (! TARGET_REG_NAMES)
2489 {
2490 int i;
2491
2492 for (i = 0; i < current_frame_info.n_input_regs; i++)
2493 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
2494 for (i = 0; i < current_frame_info.n_local_regs; i++)
2495 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
2496 for (i = 0; i < current_frame_info.n_output_regs; i++)
2497 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
2498 }
2499 current_frame_info.initialized = 0;
2500 }
2501
2502 int
2503 ia64_dbx_register_number (regno)
2504 int regno;
2505 {
2506 /* In ia64_expand_prologue we quite literally renamed the frame pointer
2507 from its home at loc79 to something inside the register frame. We
2508 must perform the same renumbering here for the debug info. */
2509 if (current_frame_info.reg_fp)
2510 {
2511 if (regno == HARD_FRAME_POINTER_REGNUM)
2512 regno = current_frame_info.reg_fp;
2513 else if (regno == current_frame_info.reg_fp)
2514 regno = HARD_FRAME_POINTER_REGNUM;
2515 }
2516
2517 if (IN_REGNO_P (regno))
2518 return 32 + regno - IN_REG (0);
2519 else if (LOC_REGNO_P (regno))
2520 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
2521 else if (OUT_REGNO_P (regno))
2522 return (32 + current_frame_info.n_input_regs
2523 + current_frame_info.n_local_regs + regno - OUT_REG (0));
2524 else
2525 return regno;
2526 }
2527
2528 void
2529 ia64_initialize_trampoline (addr, fnaddr, static_chain)
2530 rtx addr, fnaddr, static_chain;
2531 {
2532 rtx addr_reg, eight = GEN_INT (8);
2533
2534 /* Load up our iterator. */
2535 addr_reg = gen_reg_rtx (Pmode);
2536 emit_move_insn (addr_reg, addr);
2537
2538 /* The first two words are the fake descriptor:
2539 __ia64_trampoline, ADDR+16. */
2540 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2541 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
2542 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2543
2544 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2545 copy_to_reg (plus_constant (addr, 16)));
2546 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2547
2548 /* The third word is the target descriptor. */
2549 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
2550 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2551
2552 /* The fourth word is the static chain. */
2553 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
2554 }
2555 \f
2556 /* Do any needed setup for a variadic function. CUM has not been updated
2557 for the last named argument which has type TYPE and mode MODE.
2558
2559 We generate the actual spill instructions during prologue generation. */
2560
2561 void
2562 ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time)
2563 CUMULATIVE_ARGS cum;
2564 int int_mode;
2565 tree type;
2566 int * pretend_size;
2567 int second_time ATTRIBUTE_UNUSED;
2568 {
2569 /* If this is a stdarg function, then skip the current argument. */
2570 if (! current_function_varargs)
2571 ia64_function_arg_advance (&cum, int_mode, type, 1);
2572
2573 if (cum.words < MAX_ARGUMENT_SLOTS)
2574 {
2575 int n = MAX_ARGUMENT_SLOTS - cum.words;
2576 *pretend_size = n * UNITS_PER_WORD;
2577 cfun->machine->n_varargs = n;
2578 }
2579 }
2580
2581 /* Check whether TYPE is a homogeneous floating point aggregate. If
2582 it is, return the mode of the floating point type that appears
2583 in all leafs. If it is not, return VOIDmode.
2584
2585 An aggregate is a homogeneous floating point aggregate is if all
2586 fields/elements in it have the same floating point type (e.g,
2587 SFmode). 128-bit quad-precision floats are excluded. */
2588
2589 static enum machine_mode
2590 hfa_element_mode (type, nested)
2591 tree type;
2592 int nested;
2593 {
2594 enum machine_mode element_mode = VOIDmode;
2595 enum machine_mode mode;
2596 enum tree_code code = TREE_CODE (type);
2597 int know_element_mode = 0;
2598 tree t;
2599
2600 switch (code)
2601 {
2602 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
2603 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
2604 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
2605 case FILE_TYPE: case SET_TYPE: case LANG_TYPE:
2606 case FUNCTION_TYPE:
2607 return VOIDmode;
2608
2609 /* Fortran complex types are supposed to be HFAs, so we need to handle
2610 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
2611 types though. */
2612 case COMPLEX_TYPE:
2613 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT)
2614 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type))
2615 * BITS_PER_UNIT, MODE_FLOAT, 0);
2616 else
2617 return VOIDmode;
2618
2619 case REAL_TYPE:
2620 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
2621 mode if this is contained within an aggregate. */
2622 if (nested)
2623 return TYPE_MODE (type);
2624 else
2625 return VOIDmode;
2626
2627 case ARRAY_TYPE:
2628 return TYPE_MODE (TREE_TYPE (type));
2629
2630 case RECORD_TYPE:
2631 case UNION_TYPE:
2632 case QUAL_UNION_TYPE:
2633 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
2634 {
2635 if (TREE_CODE (t) != FIELD_DECL)
2636 continue;
2637
2638 mode = hfa_element_mode (TREE_TYPE (t), 1);
2639 if (know_element_mode)
2640 {
2641 if (mode != element_mode)
2642 return VOIDmode;
2643 }
2644 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
2645 return VOIDmode;
2646 else
2647 {
2648 know_element_mode = 1;
2649 element_mode = mode;
2650 }
2651 }
2652 return element_mode;
2653
2654 default:
2655 /* If we reach here, we probably have some front-end specific type
2656 that the backend doesn't know about. This can happen via the
2657 aggregate_value_p call in init_function_start. All we can do is
2658 ignore unknown tree types. */
2659 return VOIDmode;
2660 }
2661
2662 return VOIDmode;
2663 }
2664
2665 /* Return rtx for register where argument is passed, or zero if it is passed
2666 on the stack. */
2667
2668 /* ??? 128-bit quad-precision floats are always passed in general
2669 registers. */
2670
2671 rtx
2672 ia64_function_arg (cum, mode, type, named, incoming)
2673 CUMULATIVE_ARGS *cum;
2674 enum machine_mode mode;
2675 tree type;
2676 int named;
2677 int incoming;
2678 {
2679 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
2680 int words = (((mode == BLKmode ? int_size_in_bytes (type)
2681 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
2682 / UNITS_PER_WORD);
2683 int offset = 0;
2684 enum machine_mode hfa_mode = VOIDmode;
2685
2686 /* Integer and float arguments larger than 8 bytes start at the next even
2687 boundary. Aggregates larger than 8 bytes start at the next even boundary
2688 if the aggregate has 16 byte alignment. Net effect is that types with
2689 alignment greater than 8 start at the next even boundary. */
2690 /* ??? The ABI does not specify how to handle aggregates with alignment from
2691 9 to 15 bytes, or greater than 16. We handle them all as if they had
2692 16 byte alignment. Such aggregates can occur only if gcc extensions are
2693 used. */
2694 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
2695 : (words > 1))
2696 && (cum->words & 1))
2697 offset = 1;
2698
2699 /* If all argument slots are used, then it must go on the stack. */
2700 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
2701 return 0;
2702
2703 /* Check for and handle homogeneous FP aggregates. */
2704 if (type)
2705 hfa_mode = hfa_element_mode (type, 0);
2706
2707 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
2708 and unprototyped hfas are passed specially. */
2709 if (hfa_mode != VOIDmode && (! cum->prototype || named))
2710 {
2711 rtx loc[16];
2712 int i = 0;
2713 int fp_regs = cum->fp_regs;
2714 int int_regs = cum->words + offset;
2715 int hfa_size = GET_MODE_SIZE (hfa_mode);
2716 int byte_size;
2717 int args_byte_size;
2718
2719 /* If prototyped, pass it in FR regs then GR regs.
2720 If not prototyped, pass it in both FR and GR regs.
2721
2722 If this is an SFmode aggregate, then it is possible to run out of
2723 FR regs while GR regs are still left. In that case, we pass the
2724 remaining part in the GR regs. */
2725
2726 /* Fill the FP regs. We do this always. We stop if we reach the end
2727 of the argument, the last FP register, or the last argument slot. */
2728
2729 byte_size = ((mode == BLKmode)
2730 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2731 args_byte_size = int_regs * UNITS_PER_WORD;
2732 offset = 0;
2733 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
2734 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
2735 {
2736 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
2737 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
2738 + fp_regs)),
2739 GEN_INT (offset));
2740 offset += hfa_size;
2741 args_byte_size += hfa_size;
2742 fp_regs++;
2743 }
2744
2745 /* If no prototype, then the whole thing must go in GR regs. */
2746 if (! cum->prototype)
2747 offset = 0;
2748 /* If this is an SFmode aggregate, then we might have some left over
2749 that needs to go in GR regs. */
2750 else if (byte_size != offset)
2751 int_regs += offset / UNITS_PER_WORD;
2752
2753 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
2754
2755 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
2756 {
2757 enum machine_mode gr_mode = DImode;
2758
2759 /* If we have an odd 4 byte hunk because we ran out of FR regs,
2760 then this goes in a GR reg left adjusted/little endian, right
2761 adjusted/big endian. */
2762 /* ??? Currently this is handled wrong, because 4-byte hunks are
2763 always right adjusted/little endian. */
2764 if (offset & 0x4)
2765 gr_mode = SImode;
2766 /* If we have an even 4 byte hunk because the aggregate is a
2767 multiple of 4 bytes in size, then this goes in a GR reg right
2768 adjusted/little endian. */
2769 else if (byte_size - offset == 4)
2770 gr_mode = SImode;
2771
2772 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
2773 gen_rtx_REG (gr_mode, (basereg
2774 + int_regs)),
2775 GEN_INT (offset));
2776 offset += GET_MODE_SIZE (gr_mode);
2777 int_regs++;
2778 }
2779
2780 /* If we ended up using just one location, just return that one loc. */
2781 if (i == 1)
2782 return XEXP (loc[0], 0);
2783 else
2784 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
2785 }
2786
2787 /* Integral and aggregates go in general registers. If we have run out of
2788 FR registers, then FP values must also go in general registers. This can
2789 happen when we have a SFmode HFA. */
2790 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
2791 return gen_rtx_REG (mode, basereg + cum->words + offset);
2792
2793 /* If there is a prototype, then FP values go in a FR register when
2794 named, and in a GR registeer when unnamed. */
2795 else if (cum->prototype)
2796 {
2797 if (! named)
2798 return gen_rtx_REG (mode, basereg + cum->words + offset);
2799 else
2800 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
2801 }
2802 /* If there is no prototype, then FP values go in both FR and GR
2803 registers. */
2804 else
2805 {
2806 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
2807 gen_rtx_REG (mode, (FR_ARG_FIRST
2808 + cum->fp_regs)),
2809 const0_rtx);
2810 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
2811 gen_rtx_REG (mode,
2812 (basereg + cum->words
2813 + offset)),
2814 const0_rtx);
2815
2816 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
2817 }
2818 }
2819
2820 /* Return number of words, at the beginning of the argument, that must be
2821 put in registers. 0 is the argument is entirely in registers or entirely
2822 in memory. */
2823
2824 int
2825 ia64_function_arg_partial_nregs (cum, mode, type, named)
2826 CUMULATIVE_ARGS *cum;
2827 enum machine_mode mode;
2828 tree type;
2829 int named ATTRIBUTE_UNUSED;
2830 {
2831 int words = (((mode == BLKmode ? int_size_in_bytes (type)
2832 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
2833 / UNITS_PER_WORD);
2834 int offset = 0;
2835
2836 /* Arguments with alignment larger than 8 bytes start at the next even
2837 boundary. */
2838 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
2839 : (words > 1))
2840 && (cum->words & 1))
2841 offset = 1;
2842
2843 /* If all argument slots are used, then it must go on the stack. */
2844 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
2845 return 0;
2846
2847 /* It doesn't matter whether the argument goes in FR or GR regs. If
2848 it fits within the 8 argument slots, then it goes entirely in
2849 registers. If it extends past the last argument slot, then the rest
2850 goes on the stack. */
2851
2852 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
2853 return 0;
2854
2855 return MAX_ARGUMENT_SLOTS - cum->words - offset;
2856 }
2857
2858 /* Update CUM to point after this argument. This is patterned after
2859 ia64_function_arg. */
2860
2861 void
2862 ia64_function_arg_advance (cum, mode, type, named)
2863 CUMULATIVE_ARGS *cum;
2864 enum machine_mode mode;
2865 tree type;
2866 int named;
2867 {
2868 int words = (((mode == BLKmode ? int_size_in_bytes (type)
2869 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
2870 / UNITS_PER_WORD);
2871 int offset = 0;
2872 enum machine_mode hfa_mode = VOIDmode;
2873
2874 /* If all arg slots are already full, then there is nothing to do. */
2875 if (cum->words >= MAX_ARGUMENT_SLOTS)
2876 return;
2877
2878 /* Arguments with alignment larger than 8 bytes start at the next even
2879 boundary. */
2880 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
2881 : (words > 1))
2882 && (cum->words & 1))
2883 offset = 1;
2884
2885 cum->words += words + offset;
2886
2887 /* Check for and handle homogeneous FP aggregates. */
2888 if (type)
2889 hfa_mode = hfa_element_mode (type, 0);
2890
2891 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
2892 and unprototyped hfas are passed specially. */
2893 if (hfa_mode != VOIDmode && (! cum->prototype || named))
2894 {
2895 int fp_regs = cum->fp_regs;
2896 /* This is the original value of cum->words + offset. */
2897 int int_regs = cum->words - words;
2898 int hfa_size = GET_MODE_SIZE (hfa_mode);
2899 int byte_size;
2900 int args_byte_size;
2901
2902 /* If prototyped, pass it in FR regs then GR regs.
2903 If not prototyped, pass it in both FR and GR regs.
2904
2905 If this is an SFmode aggregate, then it is possible to run out of
2906 FR regs while GR regs are still left. In that case, we pass the
2907 remaining part in the GR regs. */
2908
2909 /* Fill the FP regs. We do this always. We stop if we reach the end
2910 of the argument, the last FP register, or the last argument slot. */
2911
2912 byte_size = ((mode == BLKmode)
2913 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2914 args_byte_size = int_regs * UNITS_PER_WORD;
2915 offset = 0;
2916 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
2917 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
2918 {
2919 offset += hfa_size;
2920 args_byte_size += hfa_size;
2921 fp_regs++;
2922 }
2923
2924 cum->fp_regs = fp_regs;
2925 }
2926
2927 /* Integral and aggregates go in general registers. If we have run out of
2928 FR registers, then FP values must also go in general registers. This can
2929 happen when we have a SFmode HFA. */
2930 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
2931 return;
2932
2933 /* If there is a prototype, then FP values go in a FR register when
2934 named, and in a GR registeer when unnamed. */
2935 else if (cum->prototype)
2936 {
2937 if (! named)
2938 return;
2939 else
2940 /* ??? Complex types should not reach here. */
2941 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
2942 }
2943 /* If there is no prototype, then FP values go in both FR and GR
2944 registers. */
2945 else
2946 /* ??? Complex types should not reach here. */
2947 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
2948
2949 return;
2950 }
2951 \f
2952 /* Implement va_start. */
2953
2954 void
2955 ia64_va_start (stdarg_p, valist, nextarg)
2956 int stdarg_p;
2957 tree valist;
2958 rtx nextarg;
2959 {
2960 int arg_words;
2961 int ofs;
2962
2963 arg_words = current_function_args_info.words;
2964
2965 if (stdarg_p)
2966 ofs = 0;
2967 else
2968 ofs = (arg_words >= MAX_ARGUMENT_SLOTS ? -UNITS_PER_WORD : 0);
2969
2970 nextarg = plus_constant (nextarg, ofs);
2971 std_expand_builtin_va_start (1, valist, nextarg);
2972 }
2973
2974 /* Implement va_arg. */
2975
2976 rtx
2977 ia64_va_arg (valist, type)
2978 tree valist, type;
2979 {
2980 tree t;
2981
2982 /* Arguments with alignment larger than 8 bytes start at the next even
2983 boundary. */
2984 if (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
2985 {
2986 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
2987 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
2988 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
2989 build_int_2 (-2 * UNITS_PER_WORD, -1));
2990 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
2991 TREE_SIDE_EFFECTS (t) = 1;
2992 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2993 }
2994
2995 return std_expand_builtin_va_arg (valist, type);
2996 }
2997 \f
2998 /* Return 1 if function return value returned in memory. Return 0 if it is
2999 in a register. */
3000
3001 int
3002 ia64_return_in_memory (valtype)
3003 tree valtype;
3004 {
3005 enum machine_mode mode;
3006 enum machine_mode hfa_mode;
3007 int byte_size;
3008
3009 mode = TYPE_MODE (valtype);
3010 byte_size = ((mode == BLKmode)
3011 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3012
3013 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3014
3015 hfa_mode = hfa_element_mode (valtype, 0);
3016 if (hfa_mode != VOIDmode)
3017 {
3018 int hfa_size = GET_MODE_SIZE (hfa_mode);
3019
3020 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
3021 return 1;
3022 else
3023 return 0;
3024 }
3025
3026 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
3027 return 1;
3028 else
3029 return 0;
3030 }
3031
3032 /* Return rtx for register that holds the function return value. */
3033
3034 rtx
3035 ia64_function_value (valtype, func)
3036 tree valtype;
3037 tree func ATTRIBUTE_UNUSED;
3038 {
3039 enum machine_mode mode;
3040 enum machine_mode hfa_mode;
3041
3042 mode = TYPE_MODE (valtype);
3043 hfa_mode = hfa_element_mode (valtype, 0);
3044
3045 if (hfa_mode != VOIDmode)
3046 {
3047 rtx loc[8];
3048 int i;
3049 int hfa_size;
3050 int byte_size;
3051 int offset;
3052
3053 hfa_size = GET_MODE_SIZE (hfa_mode);
3054 byte_size = ((mode == BLKmode)
3055 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3056 offset = 0;
3057 for (i = 0; offset < byte_size; i++)
3058 {
3059 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3060 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
3061 GEN_INT (offset));
3062 offset += hfa_size;
3063 }
3064
3065 if (i == 1)
3066 return XEXP (loc[0], 0);
3067 else
3068 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3069 }
3070 else if (FLOAT_TYPE_P (valtype))
3071 return gen_rtx_REG (mode, FR_ARG_FIRST);
3072 else
3073 return gen_rtx_REG (mode, GR_RET_FIRST);
3074 }
3075
3076 /* Print a memory address as an operand to reference that memory location. */
3077
3078 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3079 also call this from ia64_print_operand for memory addresses. */
3080
3081 void
3082 ia64_print_operand_address (stream, address)
3083 FILE * stream ATTRIBUTE_UNUSED;
3084 rtx address ATTRIBUTE_UNUSED;
3085 {
3086 }
3087
3088 /* Print an operand to a assembler instruction.
3089 B Work arounds for hardware bugs.
3090 C Swap and print a comparison operator.
3091 D Print an FP comparison operator.
3092 E Print 32 - constant, for SImode shifts as extract.
3093 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3094 a floating point register emitted normally.
3095 I Invert a predicate register by adding 1.
3096 J Select the proper predicate register for a condition.
3097 j Select the inverse predicate register for a condition.
3098 O Append .acq for volatile load.
3099 P Postincrement of a MEM.
3100 Q Append .rel for volatile store.
3101 S Shift amount for shladd instruction.
3102 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3103 for Intel assembler.
3104 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3105 for Intel assembler.
3106 r Print register name, or constant 0 as r0. HP compatibility for
3107 Linux kernel. */
3108 void
3109 ia64_print_operand (file, x, code)
3110 FILE * file;
3111 rtx x;
3112 int code;
3113 {
3114 const char *str;
3115
3116 switch (code)
3117 {
3118 case 0:
3119 /* Handled below. */
3120 break;
3121
3122 case 'B':
3123 if (TARGET_A_STEP)
3124 fputs (" ;; nop 0 ;; nop 0 ;;", file);
3125 return;
3126
3127 case 'C':
3128 {
3129 enum rtx_code c = swap_condition (GET_CODE (x));
3130 fputs (GET_RTX_NAME (c), file);
3131 return;
3132 }
3133
3134 case 'D':
3135 switch (GET_CODE (x))
3136 {
3137 case NE:
3138 str = "neq";
3139 break;
3140 case UNORDERED:
3141 str = "unord";
3142 break;
3143 case ORDERED:
3144 str = "ord";
3145 break;
3146 default:
3147 str = GET_RTX_NAME (GET_CODE (x));
3148 break;
3149 }
3150 fputs (str, file);
3151 return;
3152
3153 case 'E':
3154 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
3155 return;
3156
3157 case 'F':
3158 if (x == CONST0_RTX (GET_MODE (x)))
3159 str = reg_names [FR_REG (0)];
3160 else if (x == CONST1_RTX (GET_MODE (x)))
3161 str = reg_names [FR_REG (1)];
3162 else if (GET_CODE (x) == REG)
3163 str = reg_names [REGNO (x)];
3164 else
3165 abort ();
3166 fputs (str, file);
3167 return;
3168
3169 case 'I':
3170 fputs (reg_names [REGNO (x) + 1], file);
3171 return;
3172
3173 case 'J':
3174 case 'j':
3175 {
3176 unsigned int regno = REGNO (XEXP (x, 0));
3177 if (GET_CODE (x) == EQ)
3178 regno += 1;
3179 if (code == 'j')
3180 regno ^= 1;
3181 fputs (reg_names [regno], file);
3182 }
3183 return;
3184
3185 case 'O':
3186 if (MEM_VOLATILE_P (x))
3187 fputs(".acq", file);
3188 return;
3189
3190 case 'P':
3191 {
3192 HOST_WIDE_INT value;
3193
3194 switch (GET_CODE (XEXP (x, 0)))
3195 {
3196 default:
3197 return;
3198
3199 case POST_MODIFY:
3200 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
3201 if (GET_CODE (x) == CONST_INT)
3202 value = INTVAL (x);
3203 else if (GET_CODE (x) == REG)
3204 {
3205 fprintf (file, ", %s", reg_names[REGNO (x)]);
3206 return;
3207 }
3208 else
3209 abort ();
3210 break;
3211
3212 case POST_INC:
3213 value = GET_MODE_SIZE (GET_MODE (x));
3214 break;
3215
3216 case POST_DEC:
3217 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
3218 break;
3219 }
3220
3221 putc (',', file);
3222 putc (' ', file);
3223 fprintf (file, HOST_WIDE_INT_PRINT_DEC, value);
3224 return;
3225 }
3226
3227 case 'Q':
3228 if (MEM_VOLATILE_P (x))
3229 fputs(".rel", file);
3230 return;
3231
3232 case 'S':
3233 fprintf (file, "%d", exact_log2 (INTVAL (x)));
3234 return;
3235
3236 case 'T':
3237 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3238 {
3239 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
3240 return;
3241 }
3242 break;
3243
3244 case 'U':
3245 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3246 {
3247 const char *prefix = "0x";
3248 if (INTVAL (x) & 0x80000000)
3249 {
3250 fprintf (file, "0xffffffff");
3251 prefix = "";
3252 }
3253 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
3254 return;
3255 }
3256 break;
3257
3258 case 'r':
3259 /* If this operand is the constant zero, write it as zero. */
3260 if (GET_CODE (x) == REG)
3261 fputs (reg_names[REGNO (x)], file);
3262 else if (x == CONST0_RTX (GET_MODE (x)))
3263 fputs ("r0", file);
3264 else
3265 output_operand_lossage ("invalid %%r value");
3266 return;
3267
3268 case '+':
3269 {
3270 const char *which;
3271
3272 /* For conditional branches, returns or calls, substitute
3273 sptk, dptk, dpnt, or spnt for %s. */
3274 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
3275 if (x)
3276 {
3277 int pred_val = INTVAL (XEXP (x, 0));
3278
3279 /* Guess top and bottom 10% statically predicted. */
3280 if (pred_val < REG_BR_PROB_BASE / 10)
3281 which = ".spnt";
3282 else if (pred_val < REG_BR_PROB_BASE / 2)
3283 which = ".dpnt";
3284 else if (pred_val < REG_BR_PROB_BASE * 9 / 10)
3285 which = ".dptk";
3286 else
3287 which = ".sptk";
3288 }
3289 else if (GET_CODE (current_output_insn) == CALL_INSN)
3290 which = ".sptk";
3291 else
3292 which = ".dptk";
3293
3294 fputs (which, file);
3295 return;
3296 }
3297
3298 case ',':
3299 x = current_insn_predicate;
3300 if (x)
3301 {
3302 unsigned int regno = REGNO (XEXP (x, 0));
3303 if (GET_CODE (x) == EQ)
3304 regno += 1;
3305 fprintf (file, "(%s) ", reg_names [regno]);
3306 }
3307 return;
3308
3309 default:
3310 output_operand_lossage ("ia64_print_operand: unknown code");
3311 return;
3312 }
3313
3314 switch (GET_CODE (x))
3315 {
3316 /* This happens for the spill/restore instructions. */
3317 case POST_INC:
3318 case POST_DEC:
3319 case POST_MODIFY:
3320 x = XEXP (x, 0);
3321 /* ... fall through ... */
3322
3323 case REG:
3324 fputs (reg_names [REGNO (x)], file);
3325 break;
3326
3327 case MEM:
3328 {
3329 rtx addr = XEXP (x, 0);
3330 if (GET_RTX_CLASS (GET_CODE (addr)) == 'a')
3331 addr = XEXP (addr, 0);
3332 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
3333 break;
3334 }
3335
3336 default:
3337 output_addr_const (file, x);
3338 break;
3339 }
3340
3341 return;
3342 }
3343 \f
3344 /* Calulate the cost of moving data from a register in class FROM to
3345 one in class TO. */
3346
3347 int
3348 ia64_register_move_cost (from, to)
3349 enum reg_class from, to;
3350 {
3351 int from_hard, to_hard;
3352 int from_gr, to_gr;
3353 int from_fr, to_fr;
3354 int from_pr, to_pr;
3355
3356 from_hard = (from == BR_REGS || from == AR_M_REGS || from == AR_I_REGS);
3357 to_hard = (to == BR_REGS || to == AR_M_REGS || to == AR_I_REGS);
3358 from_gr = (from == GENERAL_REGS);
3359 to_gr = (to == GENERAL_REGS);
3360 from_fr = (from == FR_REGS);
3361 to_fr = (to == FR_REGS);
3362 from_pr = (from == PR_REGS);
3363 to_pr = (to == PR_REGS);
3364
3365 if (from_hard && to_hard)
3366 return 8;
3367 else if ((from_hard && !to_gr) || (!from_gr && to_hard))
3368 return 6;
3369
3370 /* Moving between PR registers takes two insns. */
3371 else if (from_pr && to_pr)
3372 return 3;
3373 /* Moving between PR and anything but GR is impossible. */
3374 else if ((from_pr && !to_gr) || (!from_gr && to_pr))
3375 return 6;
3376
3377 /* ??? Moving from FR<->GR must be more expensive than 2, so that we get
3378 secondary memory reloads for TFmode moves. Unfortunately, we don't
3379 have the mode here, so we can't check that. */
3380 /* Moreover, we have to make this at least as high as MEMORY_MOVE_COST
3381 to avoid spectacularly poor register class preferencing for TFmode. */
3382 else if (from_fr != to_fr)
3383 return 5;
3384
3385 return 2;
3386 }
3387
3388 /* This function returns the register class required for a secondary
3389 register when copying between one of the registers in CLASS, and X,
3390 using MODE. A return value of NO_REGS means that no secondary register
3391 is required. */
3392
3393 enum reg_class
3394 ia64_secondary_reload_class (class, mode, x)
3395 enum reg_class class;
3396 enum machine_mode mode ATTRIBUTE_UNUSED;
3397 rtx x;
3398 {
3399 int regno = -1;
3400
3401 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3402 regno = true_regnum (x);
3403
3404 switch (class)
3405 {
3406 case BR_REGS:
3407 /* ??? This is required because of a bad gcse/cse/global interaction.
3408 We end up with two pseudos with overlapping lifetimes both of which
3409 are equiv to the same constant, and both which need to be in BR_REGS.
3410 This results in a BR_REGS to BR_REGS copy which doesn't exist. To
3411 reproduce, return NO_REGS here, and compile divdi3 in libgcc2.c.
3412 This seems to be a cse bug. cse_basic_block_end changes depending
3413 on the path length, which means the qty_first_reg check in
3414 make_regs_eqv can give different answers at different times. */
3415 /* ??? At some point I'll probably need a reload_indi pattern to handle
3416 this. */
3417 if (BR_REGNO_P (regno))
3418 return GR_REGS;
3419
3420 /* This is needed if a pseudo used as a call_operand gets spilled to a
3421 stack slot. */
3422 if (GET_CODE (x) == MEM)
3423 return GR_REGS;
3424 break;
3425
3426 case FR_REGS:
3427 /* This can happen when a paradoxical subreg is an operand to the
3428 muldi3 pattern. */
3429 /* ??? This shouldn't be necessary after instruction scheduling is
3430 enabled, because paradoxical subregs are not accepted by
3431 register_operand when INSN_SCHEDULING is defined. Or alternatively,
3432 stop the paradoxical subreg stupidity in the *_operand functions
3433 in recog.c. */
3434 if (GET_CODE (x) == MEM
3435 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
3436 || GET_MODE (x) == QImode))
3437 return GR_REGS;
3438
3439 /* This can happen because of the ior/and/etc patterns that accept FP
3440 registers as operands. If the third operand is a constant, then it
3441 needs to be reloaded into a FP register. */
3442 if (GET_CODE (x) == CONST_INT)
3443 return GR_REGS;
3444
3445 /* This can happen because of register elimination in a muldi3 insn.
3446 E.g. `26107 * (unsigned long)&u'. */
3447 if (GET_CODE (x) == PLUS)
3448 return GR_REGS;
3449 break;
3450
3451 case PR_REGS:
3452 /* ??? This happens if we cse/gcse a BImode value across a call,
3453 and the function has a nonlocal goto. This is because global
3454 does not allocate call crossing pseudos to hard registers when
3455 current_function_has_nonlocal_goto is true. This is relatively
3456 common for C++ programs that use exceptions. To reproduce,
3457 return NO_REGS and compile libstdc++. */
3458 if (GET_CODE (x) == MEM)
3459 return GR_REGS;
3460
3461 /* This can happen when we take a BImode subreg of a DImode value,
3462 and that DImode value winds up in some non-GR register. */
3463 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
3464 return GR_REGS;
3465 break;
3466
3467 case GR_REGS:
3468 /* Since we have no offsettable memory addresses, we need a temporary
3469 to hold the address of the second word. */
3470 if (mode == TImode)
3471 return GR_REGS;
3472 break;
3473
3474 default:
3475 break;
3476 }
3477
3478 return NO_REGS;
3479 }
3480
3481 \f
3482 /* Emit text to declare externally defined variables and functions, because
3483 the Intel assembler does not support undefined externals. */
3484
3485 void
3486 ia64_asm_output_external (file, decl, name)
3487 FILE *file;
3488 tree decl;
3489 const char *name;
3490 {
3491 int save_referenced;
3492
3493 /* GNU as does not need anything here. */
3494 if (TARGET_GNU_AS)
3495 return;
3496
3497 /* ??? The Intel assembler creates a reference that needs to be satisfied by
3498 the linker when we do this, so we need to be careful not to do this for
3499 builtin functions which have no library equivalent. Unfortunately, we
3500 can't tell here whether or not a function will actually be called by
3501 expand_expr, so we pull in library functions even if we may not need
3502 them later. */
3503 if (! strcmp (name, "__builtin_next_arg")
3504 || ! strcmp (name, "alloca")
3505 || ! strcmp (name, "__builtin_constant_p")
3506 || ! strcmp (name, "__builtin_args_info"))
3507 return;
3508
3509 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
3510 restore it. */
3511 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
3512 if (TREE_CODE (decl) == FUNCTION_DECL)
3513 {
3514 fprintf (file, "%s", TYPE_ASM_OP);
3515 assemble_name (file, name);
3516 putc (',', file);
3517 fprintf (file, TYPE_OPERAND_FMT, "function");
3518 putc ('\n', file);
3519 }
3520 ASM_GLOBALIZE_LABEL (file, name);
3521 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
3522 }
3523 \f
3524 /* Parse the -mfixed-range= option string. */
3525
3526 static void
3527 fix_range (const_str)
3528 const char *const_str;
3529 {
3530 int i, first, last;
3531 char *str, *dash, *comma;
3532
3533 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3534 REG2 are either register names or register numbers. The effect
3535 of this option is to mark the registers in the range from REG1 to
3536 REG2 as ``fixed'' so they won't be used by the compiler. This is
3537 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
3538
3539 i = strlen (const_str);
3540 str = (char *) alloca (i + 1);
3541 memcpy (str, const_str, i + 1);
3542
3543 while (1)
3544 {
3545 dash = strchr (str, '-');
3546 if (!dash)
3547 {
3548 warning ("value of -mfixed-range must have form REG1-REG2");
3549 return;
3550 }
3551 *dash = '\0';
3552
3553 comma = strchr (dash + 1, ',');
3554 if (comma)
3555 *comma = '\0';
3556
3557 first = decode_reg_name (str);
3558 if (first < 0)
3559 {
3560 warning ("unknown register name: %s", str);
3561 return;
3562 }
3563
3564 last = decode_reg_name (dash + 1);
3565 if (last < 0)
3566 {
3567 warning ("unknown register name: %s", dash + 1);
3568 return;
3569 }
3570
3571 *dash = '-';
3572
3573 if (first > last)
3574 {
3575 warning ("%s-%s is an empty range", str, dash + 1);
3576 return;
3577 }
3578
3579 for (i = first; i <= last; ++i)
3580 fixed_regs[i] = call_used_regs[i] = 1;
3581
3582 if (!comma)
3583 break;
3584
3585 *comma = ',';
3586 str = comma + 1;
3587 }
3588 }
3589
3590 /* Called to register all of our global variables with the garbage
3591 collector. */
3592
3593 static void
3594 ia64_add_gc_roots ()
3595 {
3596 ggc_add_rtx_root (&ia64_compare_op0, 1);
3597 ggc_add_rtx_root (&ia64_compare_op1, 1);
3598 }
3599
3600 static void
3601 ia64_init_machine_status (p)
3602 struct function *p;
3603 {
3604 p->machine =
3605 (struct machine_function *) xcalloc (1, sizeof (struct machine_function));
3606 }
3607
3608 static void
3609 ia64_mark_machine_status (p)
3610 struct function *p;
3611 {
3612 ggc_mark_rtx (p->machine->ia64_eh_epilogue_sp);
3613 ggc_mark_rtx (p->machine->ia64_eh_epilogue_bsp);
3614 ggc_mark_rtx (p->machine->ia64_gp_save);
3615 }
3616
3617
3618 /* Handle TARGET_OPTIONS switches. */
3619
3620 void
3621 ia64_override_options ()
3622 {
3623 if (TARGET_AUTO_PIC)
3624 target_flags |= MASK_CONST_GP;
3625
3626 if (TARGET_INLINE_DIV_LAT && TARGET_INLINE_DIV_THR)
3627 {
3628 warning ("cannot optimize division for both latency and throughput");
3629 target_flags &= ~MASK_INLINE_DIV_THR;
3630 }
3631
3632 if (ia64_fixed_range_string)
3633 fix_range (ia64_fixed_range_string);
3634
3635 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
3636
3637 init_machine_status = ia64_init_machine_status;
3638 mark_machine_status = ia64_mark_machine_status;
3639
3640 ia64_add_gc_roots ();
3641 }
3642 \f
3643 /* The following collection of routines emit instruction group stop bits as
3644 necessary to avoid dependencies. */
3645
3646 /* Need to track some additional registers as far as serialization is
3647 concerned so we can properly handle br.call and br.ret. We could
3648 make these registers visible to gcc, but since these registers are
3649 never explicitly used in gcc generated code, it seems wasteful to
3650 do so (plus it would make the call and return patterns needlessly
3651 complex). */
3652 #define REG_GP (GR_REG (1))
3653 #define REG_RP (BR_REG (0))
3654 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
3655 /* This is used for volatile asms which may require a stop bit immediately
3656 before and after them. */
3657 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
3658 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
3659 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
3660
3661 /* For each register, we keep track of how it has been written in the
3662 current instruction group.
3663
3664 If a register is written unconditionally (no qualifying predicate),
3665 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
3666
3667 If a register is written if its qualifying predicate P is true, we
3668 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
3669 may be written again by the complement of P (P^1) and when this happens,
3670 WRITE_COUNT gets set to 2.
3671
3672 The result of this is that whenever an insn attempts to write a register
3673 whose WRITE_COUNT is two, we need to issue a insn group barrier first.
3674
3675 If a predicate register is written by a floating-point insn, we set
3676 WRITTEN_BY_FP to true.
3677
3678 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
3679 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
3680
3681 struct reg_write_state
3682 {
3683 unsigned int write_count : 2;
3684 unsigned int first_pred : 16;
3685 unsigned int written_by_fp : 1;
3686 unsigned int written_by_and : 1;
3687 unsigned int written_by_or : 1;
3688 };
3689
3690 /* Cumulative info for the current instruction group. */
3691 struct reg_write_state rws_sum[NUM_REGS];
3692 /* Info for the current instruction. This gets copied to rws_sum after a
3693 stop bit is emitted. */
3694 struct reg_write_state rws_insn[NUM_REGS];
3695
3696 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
3697 RTL for one instruction. */
3698 struct reg_flags
3699 {
3700 unsigned int is_write : 1; /* Is register being written? */
3701 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
3702 unsigned int is_branch : 1; /* Is register used as part of a branch? */
3703 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
3704 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
3705 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
3706 };
3707
3708 static void rws_update PARAMS ((struct reg_write_state *, int,
3709 struct reg_flags, int));
3710 static int rws_access_regno PARAMS ((int, struct reg_flags, int));
3711 static int rws_access_reg PARAMS ((rtx, struct reg_flags, int));
3712 static int rtx_needs_barrier PARAMS ((rtx, struct reg_flags, int));
3713
3714 /* Update *RWS for REGNO, which is being written by the current instruction,
3715 with predicate PRED, and associated register flags in FLAGS. */
3716
3717 static void
3718 rws_update (rws, regno, flags, pred)
3719 struct reg_write_state *rws;
3720 int regno;
3721 struct reg_flags flags;
3722 int pred;
3723 {
3724 rws[regno].write_count += pred ? 1 : 2;
3725 rws[regno].written_by_fp |= flags.is_fp;
3726 /* ??? Not tracking and/or across differing predicates. */
3727 rws[regno].written_by_and = flags.is_and;
3728 rws[regno].written_by_or = flags.is_or;
3729 rws[regno].first_pred = pred;
3730 }
3731
3732 /* Handle an access to register REGNO of type FLAGS using predicate register
3733 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
3734 a dependency with an earlier instruction in the same group. */
3735
3736 static int
3737 rws_access_regno (regno, flags, pred)
3738 int regno;
3739 struct reg_flags flags;
3740 int pred;
3741 {
3742 int need_barrier = 0;
3743
3744 if (regno >= NUM_REGS)
3745 abort ();
3746
3747 if (! PR_REGNO_P (regno))
3748 flags.is_and = flags.is_or = 0;
3749
3750 if (flags.is_write)
3751 {
3752 int write_count;
3753
3754 /* One insn writes same reg multiple times? */
3755 if (rws_insn[regno].write_count > 0)
3756 abort ();
3757
3758 /* Update info for current instruction. */
3759 rws_update (rws_insn, regno, flags, pred);
3760 write_count = rws_sum[regno].write_count;
3761
3762 switch (write_count)
3763 {
3764 case 0:
3765 /* The register has not been written yet. */
3766 rws_update (rws_sum, regno, flags, pred);
3767 break;
3768
3769 case 1:
3770 /* The register has been written via a predicate. If this is
3771 not a complementary predicate, then we need a barrier. */
3772 /* ??? This assumes that P and P+1 are always complementary
3773 predicates for P even. */
3774 if (flags.is_and && rws_sum[regno].written_by_and)
3775 ;
3776 else if (flags.is_or && rws_sum[regno].written_by_or)
3777 ;
3778 else if ((rws_sum[regno].first_pred ^ 1) != pred)
3779 need_barrier = 1;
3780 rws_update (rws_sum, regno, flags, pred);
3781 break;
3782
3783 case 2:
3784 /* The register has been unconditionally written already. We
3785 need a barrier. */
3786 if (flags.is_and && rws_sum[regno].written_by_and)
3787 ;
3788 else if (flags.is_or && rws_sum[regno].written_by_or)
3789 ;
3790 else
3791 need_barrier = 1;
3792 rws_sum[regno].written_by_and = flags.is_and;
3793 rws_sum[regno].written_by_or = flags.is_or;
3794 break;
3795
3796 default:
3797 abort ();
3798 }
3799 }
3800 else
3801 {
3802 if (flags.is_branch)
3803 {
3804 /* Branches have several RAW exceptions that allow to avoid
3805 barriers. */
3806
3807 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
3808 /* RAW dependencies on branch regs are permissible as long
3809 as the writer is a non-branch instruction. Since we
3810 never generate code that uses a branch register written
3811 by a branch instruction, handling this case is
3812 easy. */
3813 return 0;
3814
3815 if (REGNO_REG_CLASS (regno) == PR_REGS
3816 && ! rws_sum[regno].written_by_fp)
3817 /* The predicates of a branch are available within the
3818 same insn group as long as the predicate was written by
3819 something other than a floating-point instruction. */
3820 return 0;
3821 }
3822
3823 if (flags.is_and && rws_sum[regno].written_by_and)
3824 return 0;
3825 if (flags.is_or && rws_sum[regno].written_by_or)
3826 return 0;
3827
3828 switch (rws_sum[regno].write_count)
3829 {
3830 case 0:
3831 /* The register has not been written yet. */
3832 break;
3833
3834 case 1:
3835 /* The register has been written via a predicate. If this is
3836 not a complementary predicate, then we need a barrier. */
3837 /* ??? This assumes that P and P+1 are always complementary
3838 predicates for P even. */
3839 if ((rws_sum[regno].first_pred ^ 1) != pred)
3840 need_barrier = 1;
3841 break;
3842
3843 case 2:
3844 /* The register has been unconditionally written already. We
3845 need a barrier. */
3846 need_barrier = 1;
3847 break;
3848
3849 default:
3850 abort ();
3851 }
3852 }
3853
3854 return need_barrier;
3855 }
3856
3857 static int
3858 rws_access_reg (reg, flags, pred)
3859 rtx reg;
3860 struct reg_flags flags;
3861 int pred;
3862 {
3863 int regno = REGNO (reg);
3864 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
3865
3866 if (n == 1)
3867 return rws_access_regno (regno, flags, pred);
3868 else
3869 {
3870 int need_barrier = 0;
3871 while (--n >= 0)
3872 need_barrier |= rws_access_regno (regno + n, flags, pred);
3873 return need_barrier;
3874 }
3875 }
3876
3877 /* Handle an access to rtx X of type FLAGS using predicate register PRED.
3878 Return 1 is this access creates a dependency with an earlier instruction
3879 in the same group. */
3880
3881 static int
3882 rtx_needs_barrier (x, flags, pred)
3883 rtx x;
3884 struct reg_flags flags;
3885 int pred;
3886 {
3887 int i, j;
3888 int is_complemented = 0;
3889 int need_barrier = 0;
3890 const char *format_ptr;
3891 struct reg_flags new_flags;
3892 rtx src, dst;
3893 rtx cond = 0;
3894
3895 if (! x)
3896 return 0;
3897
3898 new_flags = flags;
3899
3900 switch (GET_CODE (x))
3901 {
3902 case SET:
3903 src = SET_SRC (x);
3904 switch (GET_CODE (src))
3905 {
3906 case CALL:
3907 /* We don't need to worry about the result registers that
3908 get written by subroutine call. */
3909 need_barrier = rtx_needs_barrier (src, flags, pred);
3910 return need_barrier;
3911
3912 case IF_THEN_ELSE:
3913 if (SET_DEST (x) == pc_rtx)
3914 {
3915 /* X is a conditional branch. */
3916 /* ??? This seems redundant, as the caller sets this bit for
3917 all JUMP_INSNs. */
3918 new_flags.is_branch = 1;
3919 need_barrier = rtx_needs_barrier (src, new_flags, pred);
3920 return need_barrier;
3921 }
3922 else
3923 {
3924 /* X is a conditional move. */
3925 cond = XEXP (src, 0);
3926 if (GET_CODE (cond) == EQ)
3927 is_complemented = 1;
3928 cond = XEXP (cond, 0);
3929 if (GET_CODE (cond) != REG
3930 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
3931 abort ();
3932
3933 if (XEXP (src, 1) == SET_DEST (x)
3934 || XEXP (src, 2) == SET_DEST (x))
3935 {
3936 /* X is a conditional move that conditionally writes the
3937 destination. */
3938
3939 /* We need another complement in this case. */
3940 if (XEXP (src, 1) == SET_DEST (x))
3941 is_complemented = ! is_complemented;
3942
3943 pred = REGNO (cond);
3944 if (is_complemented)
3945 ++pred;
3946 }
3947
3948 /* ??? If this is a conditional write to the dest, then this
3949 instruction does not actually read one source. This probably
3950 doesn't matter, because that source is also the dest. */
3951 /* ??? Multiple writes to predicate registers are allowed
3952 if they are all AND type compares, or if they are all OR
3953 type compares. We do not generate such instructions
3954 currently. */
3955 }
3956 /* ... fall through ... */
3957
3958 default:
3959 if (GET_RTX_CLASS (GET_CODE (src)) == '<'
3960 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
3961 /* Set new_flags.is_fp to 1 so that we know we're dealing
3962 with a floating point comparison when processing the
3963 destination of the SET. */
3964 new_flags.is_fp = 1;
3965
3966 /* Discover if this is a parallel comparison. We only handle
3967 and.orcm and or.andcm at present, since we must retain a
3968 strict inverse on the predicate pair. */
3969 else if (GET_CODE (src) == AND)
3970 new_flags.is_and = flags.is_and = 1;
3971 else if (GET_CODE (src) == IOR)
3972 new_flags.is_or = flags.is_or = 1;
3973
3974 break;
3975 }
3976 need_barrier = rtx_needs_barrier (src, flags, pred);
3977
3978 /* This instruction unconditionally uses a predicate register. */
3979 if (cond)
3980 need_barrier |= rws_access_reg (cond, flags, 0);
3981
3982 dst = SET_DEST (x);
3983 if (GET_CODE (dst) == ZERO_EXTRACT)
3984 {
3985 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
3986 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
3987 dst = XEXP (dst, 0);
3988 }
3989 new_flags.is_write = 1;
3990 need_barrier |= rtx_needs_barrier (dst, new_flags, pred);
3991 break;
3992
3993 case CALL:
3994 new_flags.is_write = 0;
3995 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
3996
3997 /* Avoid multiple register writes, in case this is a pattern with
3998 multiple CALL rtx. This avoids an abort in rws_access_reg. */
3999 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
4000 {
4001 new_flags.is_write = 1;
4002 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
4003 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
4004 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4005 }
4006 break;
4007
4008 case COND_EXEC:
4009 /* X is a predicated instruction. */
4010
4011 cond = COND_EXEC_TEST (x);
4012 if (pred)
4013 abort ();
4014 need_barrier = rtx_needs_barrier (cond, flags, 0);
4015
4016 if (GET_CODE (cond) == EQ)
4017 is_complemented = 1;
4018 cond = XEXP (cond, 0);
4019 if (GET_CODE (cond) != REG
4020 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4021 abort ();
4022 pred = REGNO (cond);
4023 if (is_complemented)
4024 ++pred;
4025
4026 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
4027 return need_barrier;
4028
4029 case CLOBBER:
4030 case USE:
4031 /* Clobber & use are for earlier compiler-phases only. */
4032 break;
4033
4034 case ASM_OPERANDS:
4035 case ASM_INPUT:
4036 /* We always emit stop bits for traditional asms. We emit stop bits
4037 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
4038 if (GET_CODE (x) != ASM_OPERANDS
4039 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
4040 {
4041 /* Avoid writing the register multiple times if we have multiple
4042 asm outputs. This avoids an abort in rws_access_reg. */
4043 if (! rws_insn[REG_VOLATILE].write_count)
4044 {
4045 new_flags.is_write = 1;
4046 rws_access_regno (REG_VOLATILE, new_flags, pred);
4047 }
4048 return 1;
4049 }
4050
4051 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
4052 We can not just fall through here since then we would be confused
4053 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
4054 traditional asms unlike their normal usage. */
4055
4056 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
4057 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
4058 need_barrier = 1;
4059 break;
4060
4061 case PARALLEL:
4062 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4063 if (rtx_needs_barrier (XVECEXP (x, 0, i), flags, pred))
4064 need_barrier = 1;
4065 break;
4066
4067 case SUBREG:
4068 x = SUBREG_REG (x);
4069 /* FALLTHRU */
4070 case REG:
4071 if (REGNO (x) == AR_UNAT_REGNUM)
4072 {
4073 for (i = 0; i < 64; ++i)
4074 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
4075 }
4076 else
4077 need_barrier = rws_access_reg (x, flags, pred);
4078 break;
4079
4080 case MEM:
4081 /* Find the regs used in memory address computation. */
4082 new_flags.is_write = 0;
4083 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4084 break;
4085
4086 case CONST_INT: case CONST_DOUBLE:
4087 case SYMBOL_REF: case LABEL_REF: case CONST:
4088 break;
4089
4090 /* Operators with side-effects. */
4091 case POST_INC: case POST_DEC:
4092 if (GET_CODE (XEXP (x, 0)) != REG)
4093 abort ();
4094
4095 new_flags.is_write = 0;
4096 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4097 new_flags.is_write = 1;
4098 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4099 break;
4100
4101 case POST_MODIFY:
4102 if (GET_CODE (XEXP (x, 0)) != REG)
4103 abort ();
4104
4105 new_flags.is_write = 0;
4106 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4107 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4108 new_flags.is_write = 1;
4109 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4110 break;
4111
4112 /* Handle common unary and binary ops for efficiency. */
4113 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
4114 case MOD: case UDIV: case UMOD: case AND: case IOR:
4115 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
4116 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
4117 case NE: case EQ: case GE: case GT: case LE:
4118 case LT: case GEU: case GTU: case LEU: case LTU:
4119 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4120 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4121 break;
4122
4123 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
4124 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
4125 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
4126 case SQRT: case FFS:
4127 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
4128 break;
4129
4130 case UNSPEC:
4131 switch (XINT (x, 1))
4132 {
4133 case 1: /* st8.spill */
4134 case 2: /* ld8.fill */
4135 {
4136 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
4137 HOST_WIDE_INT bit = (offset >> 3) & 63;
4138
4139 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4140 new_flags.is_write = (XINT (x, 1) == 1);
4141 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
4142 new_flags, pred);
4143 break;
4144 }
4145
4146 case 3: /* stf.spill */
4147 case 4: /* ldf.spill */
4148 case 8: /* popcnt */
4149 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4150 break;
4151
4152 case 7: /* pred_rel_mutex */
4153 case 9: /* pic call */
4154 case 12: /* mf */
4155 case 19: /* fetchadd_acq */
4156 case 20: /* mov = ar.bsp */
4157 case 21: /* flushrs */
4158 break;
4159
4160 case 5: /* recip_approx */
4161 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4162 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4163 break;
4164
4165 case 13: /* cmpxchg_acq */
4166 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4167 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
4168 break;
4169
4170 default:
4171 abort ();
4172 }
4173 break;
4174
4175 case UNSPEC_VOLATILE:
4176 switch (XINT (x, 1))
4177 {
4178 case 0: /* alloc */
4179 /* Alloc must always be the first instruction. Currently, we
4180 only emit it at the function start, so we don't need to worry
4181 about emitting a stop bit before it. */
4182 need_barrier = rws_access_regno (AR_PFS_REGNUM, flags, pred);
4183
4184 new_flags.is_write = 1;
4185 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4186 return need_barrier;
4187
4188 case 1: /* blockage */
4189 case 2: /* insn group barrier */
4190 return 0;
4191
4192 case 5: /* set_bsp */
4193 need_barrier = 1;
4194 break;
4195
4196 case 7: /* pred.rel.mutex */
4197 case 8: /* safe_across_calls all */
4198 case 9: /* safe_across_calls normal */
4199 return 0;
4200
4201 default:
4202 abort ();
4203 }
4204 break;
4205
4206 case RETURN:
4207 new_flags.is_write = 0;
4208 need_barrier = rws_access_regno (REG_RP, flags, pred);
4209 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
4210
4211 new_flags.is_write = 1;
4212 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4213 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4214 break;
4215
4216 default:
4217 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
4218 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4219 switch (format_ptr[i])
4220 {
4221 case '0': /* unused field */
4222 case 'i': /* integer */
4223 case 'n': /* note */
4224 case 'w': /* wide integer */
4225 case 's': /* pointer to string */
4226 case 'S': /* optional pointer to string */
4227 break;
4228
4229 case 'e':
4230 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
4231 need_barrier = 1;
4232 break;
4233
4234 case 'E':
4235 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
4236 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
4237 need_barrier = 1;
4238 break;
4239
4240 default:
4241 abort ();
4242 }
4243 break;
4244 }
4245 return need_barrier;
4246 }
4247
4248 /* INSNS is an chain of instructions. Scan the chain, and insert stop bits
4249 as necessary to eliminate dependendencies. */
4250
4251 static void
4252 emit_insn_group_barriers (insns)
4253 rtx insns;
4254 {
4255 rtx insn, prev_insn;
4256
4257 memset (rws_sum, 0, sizeof (rws_sum));
4258
4259 prev_insn = 0;
4260 for (insn = insns; insn; insn = NEXT_INSN (insn))
4261 {
4262 int need_barrier = 0;
4263 struct reg_flags flags;
4264
4265 memset (&flags, 0, sizeof (flags));
4266 switch (GET_CODE (insn))
4267 {
4268 case NOTE:
4269 /* For very small loops we can wind up with extra stop bits
4270 inside the loop because of not putting a stop after the
4271 assignment to ar.lc before the loop label. */
4272 /* ??? Ideally we'd do this for any register used in the first
4273 insn group that's been written recently. */
4274 if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
4275 {
4276 need_barrier = rws_access_regno (AR_LC_REGNUM, flags, 0);
4277 if (need_barrier)
4278 {
4279 emit_insn_after (gen_insn_group_barrier (), insn);
4280 memset (rws_sum, 0, sizeof(rws_sum));
4281 prev_insn = NULL_RTX;
4282 }
4283 }
4284 break;
4285
4286 case CALL_INSN:
4287 flags.is_branch = 1;
4288 flags.is_sibcall = SIBLING_CALL_P (insn);
4289 memset (rws_insn, 0, sizeof (rws_insn));
4290 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
4291
4292 if (need_barrier)
4293 {
4294 /* PREV_INSN null can happen if the very first insn is a
4295 volatile asm. */
4296 if (prev_insn)
4297 emit_insn_after (gen_insn_group_barrier (), prev_insn);
4298 memcpy (rws_sum, rws_insn, sizeof (rws_sum));
4299 }
4300
4301 /* A call must end a group, otherwise the assembler might pack
4302 it in with a following branch and then the function return
4303 goes to the wrong place. Do this unconditionally for
4304 unconditional calls, simply because it (1) looks nicer and
4305 (2) keeps the data structures more accurate for the insns
4306 following the call. */
4307
4308 need_barrier = 1;
4309 if (GET_CODE (PATTERN (insn)) == COND_EXEC)
4310 {
4311 rtx next_insn = insn;
4312 do
4313 next_insn = next_nonnote_insn (next_insn);
4314 while (next_insn
4315 && GET_CODE (next_insn) == INSN
4316 && (GET_CODE (PATTERN (next_insn)) == USE
4317 || GET_CODE (PATTERN (next_insn)) == CLOBBER));
4318 if (next_insn && GET_CODE (next_insn) != JUMP_INSN)
4319 need_barrier = 0;
4320 }
4321 if (need_barrier)
4322 {
4323 emit_insn_after (gen_insn_group_barrier (), insn);
4324 memset (rws_sum, 0, sizeof (rws_sum));
4325 prev_insn = NULL_RTX;
4326 }
4327 else
4328 prev_insn = insn;
4329 break;
4330
4331 case JUMP_INSN:
4332 flags.is_branch = 1;
4333 /* FALLTHRU */
4334
4335 case INSN:
4336 if (GET_CODE (PATTERN (insn)) == USE)
4337 /* Don't care about USE "insns"---those are used to
4338 indicate to the optimizer that it shouldn't get rid of
4339 certain operations. */
4340 break;
4341 else
4342 {
4343 rtx pat = PATTERN (insn);
4344
4345 /* Ug. Hack hacks hacked elsewhere. */
4346 switch (recog_memoized (insn))
4347 {
4348 /* We play dependency tricks with the epilogue in order
4349 to get proper schedules. Undo this for dv analysis. */
4350 case CODE_FOR_epilogue_deallocate_stack:
4351 pat = XVECEXP (pat, 0, 0);
4352 break;
4353
4354 /* The pattern we use for br.cloop confuses the code above.
4355 The second element of the vector is representative. */
4356 case CODE_FOR_doloop_end_internal:
4357 pat = XVECEXP (pat, 0, 1);
4358 break;
4359
4360 /* Doesn't generate code. */
4361 case CODE_FOR_pred_rel_mutex:
4362 continue;
4363
4364 default:
4365 break;
4366 }
4367
4368 memset (rws_insn, 0, sizeof (rws_insn));
4369 need_barrier |= rtx_needs_barrier (pat, flags, 0);
4370
4371 /* Check to see if the previous instruction was a volatile
4372 asm. */
4373 if (! need_barrier)
4374 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
4375
4376 if (need_barrier)
4377 {
4378 /* PREV_INSN null can happen if the very first insn is a
4379 volatile asm. */
4380 if (prev_insn)
4381 emit_insn_after (gen_insn_group_barrier (), prev_insn);
4382 memcpy (rws_sum, rws_insn, sizeof (rws_sum));
4383 }
4384 prev_insn = insn;
4385 }
4386 break;
4387
4388 case BARRIER:
4389 /* A barrier doesn't imply an instruction group boundary. */
4390 break;
4391
4392 case CODE_LABEL:
4393 /* Leave prev_insn alone so the barrier gets generated in front
4394 of the label, if one is needed. */
4395 break;
4396
4397 default:
4398 abort ();
4399 }
4400 }
4401 }
4402
4403 /* Emit pseudo-ops for the assembler to describe predicate relations.
4404 At present this assumes that we only consider predicate pairs to
4405 be mutex, and that the assembler can deduce proper values from
4406 straight-line code. */
4407
4408 static void
4409 emit_predicate_relation_info ()
4410 {
4411 int i;
4412
4413 for (i = n_basic_blocks - 1; i >= 0; --i)
4414 {
4415 basic_block bb = BASIC_BLOCK (i);
4416 int r;
4417 rtx head = bb->head;
4418
4419 /* We only need such notes at code labels. */
4420 if (GET_CODE (head) != CODE_LABEL)
4421 continue;
4422 if (GET_CODE (NEXT_INSN (head)) == NOTE
4423 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
4424 head = NEXT_INSN (head);
4425
4426 for (r = PR_REG (0); r < PR_REG (64); r += 2)
4427 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
4428 {
4429 rtx p = gen_rtx_REG (BImode, r);
4430 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
4431 if (head == bb->end)
4432 bb->end = n;
4433 head = n;
4434 }
4435 }
4436
4437 /* Look for conditional calls that do not return, and protect predicate
4438 relations around them. Otherwise the assembler will assume the call
4439 returns, and complain about uses of call-clobbered predicates after
4440 the call. */
4441 for (i = n_basic_blocks - 1; i >= 0; --i)
4442 {
4443 basic_block bb = BASIC_BLOCK (i);
4444 rtx insn = bb->head;
4445
4446 while (1)
4447 {
4448 if (GET_CODE (insn) == CALL_INSN
4449 && GET_CODE (PATTERN (insn)) == COND_EXEC
4450 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
4451 {
4452 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
4453 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
4454 if (bb->head == insn)
4455 bb->head = b;
4456 if (bb->end == insn)
4457 bb->end = a;
4458 }
4459
4460 if (insn == bb->end)
4461 break;
4462 insn = NEXT_INSN (insn);
4463 }
4464 }
4465 }
4466
4467 /* Perform machine dependent operations on the rtl chain INSNS. */
4468
4469 void
4470 ia64_reorg (insns)
4471 rtx insns;
4472 {
4473 /* If optimizing, we'll have split before scheduling. */
4474 if (optimize == 0)
4475 split_all_insns (0);
4476
4477 /* Make sure the CFG and global_live_at_start are correct
4478 for emit_predicate_relation_info. */
4479 find_basic_blocks (insns, max_reg_num (), NULL);
4480 life_analysis (insns, NULL, 0);
4481
4482 emit_insn_group_barriers (insns);
4483 emit_predicate_relation_info ();
4484 }
4485 \f
4486 /* Return true if REGNO is used by the epilogue. */
4487
4488 int
4489 ia64_epilogue_uses (regno)
4490 int regno;
4491 {
4492 /* When a function makes a call through a function descriptor, we
4493 will write a (potentially) new value to "gp". After returning
4494 from such a call, we need to make sure the function restores the
4495 original gp-value, even if the function itself does not use the
4496 gp anymore. */
4497 if (regno == R_GR (1)
4498 && TARGET_CONST_GP
4499 && !(TARGET_AUTO_PIC || TARGET_NO_PIC))
4500 return 1;
4501
4502 /* For functions defined with the syscall_linkage attribute, all input
4503 registers are marked as live at all function exits. This prevents the
4504 register allocator from using the input registers, which in turn makes it
4505 possible to restart a system call after an interrupt without having to
4506 save/restore the input registers. */
4507
4508 if (IN_REGNO_P (regno)
4509 && (regno < IN_REG (current_function_args_info.words))
4510 && lookup_attribute ("syscall_linkage",
4511 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
4512 return 1;
4513
4514 /* Conditional return patterns can't represent the use of `b0' as
4515 the return address, so we force the value live this way. */
4516 if (regno == R_BR (0))
4517 return 1;
4518
4519 if (regs_ever_live[AR_LC_REGNUM] && regno == AR_LC_REGNUM)
4520 return 1;
4521 if (! current_function_is_leaf && regno == AR_PFS_REGNUM)
4522 return 1;
4523 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
4524 && regno == AR_UNAT_REGNUM)
4525 return 1;
4526
4527 return 0;
4528 }
4529
4530 /* Return true if IDENTIFIER is a valid attribute for TYPE. */
4531
4532 int
4533 ia64_valid_type_attribute (type, attributes, identifier, args)
4534 tree type;
4535 tree attributes ATTRIBUTE_UNUSED;
4536 tree identifier;
4537 tree args;
4538 {
4539 /* We only support an attribute for function calls. */
4540
4541 if (TREE_CODE (type) != FUNCTION_TYPE
4542 && TREE_CODE (type) != METHOD_TYPE)
4543 return 0;
4544
4545 /* The "syscall_linkage" attribute says the callee is a system call entry
4546 point. This affects ia64_epilogue_uses. */
4547
4548 if (is_attribute_p ("syscall_linkage", identifier))
4549 return args == NULL_TREE;
4550
4551 return 0;
4552 }
4553 \f
4554 /* For ia64, SYMBOL_REF_FLAG set means that it is a function.
4555
4556 We add @ to the name if this goes in small data/bss. We can only put
4557 a variable in small data/bss if it is defined in this module or a module
4558 that we are statically linked with. We can't check the second condition,
4559 but TREE_STATIC gives us the first one. */
4560
4561 /* ??? If we had IPA, we could check the second condition. We could support
4562 programmer added section attributes if the variable is not defined in this
4563 module. */
4564
4565 /* ??? See the v850 port for a cleaner way to do this. */
4566
4567 /* ??? We could also support own long data here. Generating movl/add/ld8
4568 instead of addl,ld8/ld8. This makes the code bigger, but should make the
4569 code faster because there is one less load. This also includes incomplete
4570 types which can't go in sdata/sbss. */
4571
4572 /* ??? See select_section. We must put short own readonly variables in
4573 sdata/sbss instead of the more natural rodata, because we can't perform
4574 the DECL_READONLY_SECTION test here. */
4575
4576 extern struct obstack * saveable_obstack;
4577
4578 void
4579 ia64_encode_section_info (decl)
4580 tree decl;
4581 {
4582 const char *symbol_str;
4583
4584 if (TREE_CODE (decl) == FUNCTION_DECL)
4585 {
4586 SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl), 0)) = 1;
4587 return;
4588 }
4589
4590 /* Careful not to prod global register variables. */
4591 if (TREE_CODE (decl) != VAR_DECL
4592 || GET_CODE (DECL_RTL (decl)) != MEM
4593 || GET_CODE (XEXP (DECL_RTL (decl), 0)) != SYMBOL_REF)
4594 return;
4595
4596 symbol_str = XSTR (XEXP (DECL_RTL (decl), 0), 0);
4597
4598 /* We assume that -fpic is used only to create a shared library (dso).
4599 With -fpic, no global data can ever be sdata.
4600 Without -fpic, global common uninitialized data can never be sdata, since
4601 it can unify with a real definition in a dso. */
4602 /* ??? Actually, we can put globals in sdata, as long as we don't use gprel
4603 to access them. The linker may then be able to do linker relaxation to
4604 optimize references to them. Currently sdata implies use of gprel. */
4605 if (! TARGET_NO_SDATA
4606 && TREE_STATIC (decl)
4607 && ! (DECL_ONE_ONLY (decl) || DECL_WEAK (decl))
4608 && ! (TREE_PUBLIC (decl)
4609 && (flag_pic
4610 || (DECL_COMMON (decl)
4611 && (DECL_INITIAL (decl) == 0
4612 || DECL_INITIAL (decl) == error_mark_node))))
4613 /* Either the variable must be declared without a section attribute,
4614 or the section must be sdata or sbss. */
4615 && (DECL_SECTION_NAME (decl) == 0
4616 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
4617 ".sdata")
4618 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
4619 ".sbss")))
4620 {
4621 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
4622
4623 /* If the variable has already been defined in the output file, then it
4624 is too late to put it in sdata if it wasn't put there in the first
4625 place. The test is here rather than above, because if it is already
4626 in sdata, then it can stay there. */
4627
4628 if (TREE_ASM_WRITTEN (decl))
4629 ;
4630
4631 /* If this is an incomplete type with size 0, then we can't put it in
4632 sdata because it might be too big when completed. */
4633 else if (size > 0
4634 && size <= (HOST_WIDE_INT) ia64_section_threshold
4635 && symbol_str[0] != SDATA_NAME_FLAG_CHAR)
4636 {
4637 size_t len = strlen (symbol_str);
4638 char *newstr;
4639
4640 if (ggc_p)
4641 newstr = ggc_alloc_string (NULL, len + 1);
4642 else
4643 newstr = obstack_alloc (saveable_obstack, len + 2);
4644
4645 *newstr = SDATA_NAME_FLAG_CHAR;
4646 memcpy (newstr + 1, symbol_str, len + 1);
4647
4648 XSTR (XEXP (DECL_RTL (decl), 0), 0) = newstr;
4649 }
4650 }
4651 /* This decl is marked as being in small data/bss but it shouldn't
4652 be; one likely explanation for this is that the decl has been
4653 moved into a different section from the one it was in when
4654 ENCODE_SECTION_INFO was first called. Remove the '@'.*/
4655 else if (symbol_str[0] == SDATA_NAME_FLAG_CHAR)
4656 {
4657 if (ggc_p)
4658 XSTR (XEXP (DECL_RTL (decl), 0), 0)
4659 = ggc_alloc_string (symbol_str + 1, -1);
4660 else
4661 XSTR (XEXP (DECL_RTL (decl), 0), 0) = symbol_str + 1;
4662 }
4663 }
4664 \f
4665 /* Output assmebly directives for prologue regions. */
4666
4667 /* This function processes a SET pattern looking for specific patterns
4668 which result in emitting an assembly directive required for unwinding. */
4669
4670 static int
4671 process_set (asm_out_file, pat)
4672 FILE *asm_out_file;
4673 rtx pat;
4674 {
4675 rtx src = SET_SRC (pat);
4676 rtx dest = SET_DEST (pat);
4677 int src_regno, dest_regno;
4678
4679 /* Look for the ALLOC insn. */
4680 if (GET_CODE (src) == UNSPEC_VOLATILE
4681 && XINT (src, 1) == 0
4682 && GET_CODE (dest) == REG)
4683 {
4684 dest_regno = REGNO (dest);
4685
4686 /* If this isn't the final destination for ar.pfs, the alloc
4687 shouldn't have been marked frame related. */
4688 if (dest_regno != current_frame_info.reg_save_ar_pfs)
4689 abort ();
4690
4691 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
4692 ia64_dbx_register_number (dest_regno));
4693 return 1;
4694 }
4695
4696 /* Look for SP = .... */
4697 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
4698 {
4699 if (GET_CODE (src) == PLUS)
4700 {
4701 rtx op0 = XEXP (src, 0);
4702 rtx op1 = XEXP (src, 1);
4703 if (op0 == dest && GET_CODE (op1) == CONST_INT)
4704 {
4705 if (INTVAL (op1) < 0)
4706 {
4707 fputs ("\t.fframe ", asm_out_file);
4708 fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC,
4709 -INTVAL (op1));
4710 fputc ('\n', asm_out_file);
4711 }
4712 else
4713 fprintf (asm_out_file, "\t.restore sp\n");
4714 }
4715 else
4716 abort ();
4717 }
4718 else if (GET_CODE (src) == REG
4719 && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
4720 fprintf (asm_out_file, "\t.restore sp\n");
4721 else
4722 abort ();
4723
4724 return 1;
4725 }
4726
4727 /* Register move we need to look at. */
4728 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
4729 {
4730 src_regno = REGNO (src);
4731 dest_regno = REGNO (dest);
4732
4733 switch (src_regno)
4734 {
4735 case BR_REG (0):
4736 /* Saving return address pointer. */
4737 if (dest_regno != current_frame_info.reg_save_b0)
4738 abort ();
4739 fprintf (asm_out_file, "\t.save rp, r%d\n",
4740 ia64_dbx_register_number (dest_regno));
4741 return 1;
4742
4743 case PR_REG (0):
4744 if (dest_regno != current_frame_info.reg_save_pr)
4745 abort ();
4746 fprintf (asm_out_file, "\t.save pr, r%d\n",
4747 ia64_dbx_register_number (dest_regno));
4748 return 1;
4749
4750 case AR_UNAT_REGNUM:
4751 if (dest_regno != current_frame_info.reg_save_ar_unat)
4752 abort ();
4753 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
4754 ia64_dbx_register_number (dest_regno));
4755 return 1;
4756
4757 case AR_LC_REGNUM:
4758 if (dest_regno != current_frame_info.reg_save_ar_lc)
4759 abort ();
4760 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
4761 ia64_dbx_register_number (dest_regno));
4762 return 1;
4763
4764 case STACK_POINTER_REGNUM:
4765 if (dest_regno != HARD_FRAME_POINTER_REGNUM
4766 || ! frame_pointer_needed)
4767 abort ();
4768 fprintf (asm_out_file, "\t.vframe r%d\n",
4769 ia64_dbx_register_number (dest_regno));
4770 return 1;
4771
4772 default:
4773 /* Everything else should indicate being stored to memory. */
4774 abort ();
4775 }
4776 }
4777
4778 /* Memory store we need to look at. */
4779 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
4780 {
4781 long off;
4782 rtx base;
4783 const char *saveop;
4784
4785 if (GET_CODE (XEXP (dest, 0)) == REG)
4786 {
4787 base = XEXP (dest, 0);
4788 off = 0;
4789 }
4790 else if (GET_CODE (XEXP (dest, 0)) == PLUS
4791 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
4792 {
4793 base = XEXP (XEXP (dest, 0), 0);
4794 off = INTVAL (XEXP (XEXP (dest, 0), 1));
4795 }
4796 else
4797 abort ();
4798
4799 if (base == hard_frame_pointer_rtx)
4800 {
4801 saveop = ".savepsp";
4802 off = - off;
4803 }
4804 else if (base == stack_pointer_rtx)
4805 saveop = ".savesp";
4806 else
4807 abort ();
4808
4809 src_regno = REGNO (src);
4810 switch (src_regno)
4811 {
4812 case BR_REG (0):
4813 if (current_frame_info.reg_save_b0 != 0)
4814 abort ();
4815 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
4816 return 1;
4817
4818 case PR_REG (0):
4819 if (current_frame_info.reg_save_pr != 0)
4820 abort ();
4821 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
4822 return 1;
4823
4824 case AR_LC_REGNUM:
4825 if (current_frame_info.reg_save_ar_lc != 0)
4826 abort ();
4827 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
4828 return 1;
4829
4830 case AR_PFS_REGNUM:
4831 if (current_frame_info.reg_save_ar_pfs != 0)
4832 abort ();
4833 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
4834 return 1;
4835
4836 case AR_UNAT_REGNUM:
4837 if (current_frame_info.reg_save_ar_unat != 0)
4838 abort ();
4839 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
4840 return 1;
4841
4842 case GR_REG (4):
4843 case GR_REG (5):
4844 case GR_REG (6):
4845 case GR_REG (7):
4846 fprintf (asm_out_file, "\t.save.g 0x%x\n",
4847 1 << (src_regno - GR_REG (4)));
4848 return 1;
4849
4850 case BR_REG (1):
4851 case BR_REG (2):
4852 case BR_REG (3):
4853 case BR_REG (4):
4854 case BR_REG (5):
4855 fprintf (asm_out_file, "\t.save.b 0x%x\n",
4856 1 << (src_regno - BR_REG (1)));
4857 return 1;
4858
4859 case FR_REG (2):
4860 case FR_REG (3):
4861 case FR_REG (4):
4862 case FR_REG (5):
4863 fprintf (asm_out_file, "\t.save.f 0x%x\n",
4864 1 << (src_regno - FR_REG (2)));
4865 return 1;
4866
4867 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
4868 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
4869 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
4870 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
4871 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
4872 1 << (src_regno - FR_REG (12)));
4873 return 1;
4874
4875 default:
4876 return 0;
4877 }
4878 }
4879
4880 return 0;
4881 }
4882
4883
4884 /* This function looks at a single insn and emits any directives
4885 required to unwind this insn. */
4886 void
4887 process_for_unwind_directive (asm_out_file, insn)
4888 FILE *asm_out_file;
4889 rtx insn;
4890 {
4891 if ((flag_unwind_tables
4892 || (flag_exceptions && !exceptions_via_longjmp))
4893 && RTX_FRAME_RELATED_P (insn))
4894 {
4895 rtx pat;
4896
4897 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
4898 if (pat)
4899 pat = XEXP (pat, 0);
4900 else
4901 pat = PATTERN (insn);
4902
4903 switch (GET_CODE (pat))
4904 {
4905 case SET:
4906 process_set (asm_out_file, pat);
4907 break;
4908
4909 case PARALLEL:
4910 {
4911 int par_index;
4912 int limit = XVECLEN (pat, 0);
4913 for (par_index = 0; par_index < limit; par_index++)
4914 {
4915 rtx x = XVECEXP (pat, 0, par_index);
4916 if (GET_CODE (x) == SET)
4917 process_set (asm_out_file, x);
4918 }
4919 break;
4920 }
4921
4922 default:
4923 abort ();
4924 }
4925 }
4926 }
4927
4928 \f
4929 void
4930 ia64_init_builtins ()
4931 {
4932 tree psi_type_node = build_pointer_type (integer_type_node);
4933 tree pdi_type_node = build_pointer_type (long_integer_type_node);
4934 tree endlink = tree_cons (NULL_TREE, void_type_node, NULL_TREE);
4935
4936 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
4937 tree si_ftype_psi_si_si
4938 = build_function_type (integer_type_node,
4939 tree_cons (NULL_TREE, psi_type_node,
4940 tree_cons (NULL_TREE, integer_type_node,
4941 tree_cons (NULL_TREE,
4942 integer_type_node,
4943 endlink))));
4944
4945 /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */
4946 tree di_ftype_pdi_di_di
4947 = build_function_type (long_integer_type_node,
4948 tree_cons (NULL_TREE, pdi_type_node,
4949 tree_cons (NULL_TREE,
4950 long_integer_type_node,
4951 tree_cons (NULL_TREE,
4952 long_integer_type_node,
4953 endlink))));
4954 /* __sync_synchronize */
4955 tree void_ftype_void
4956 = build_function_type (void_type_node, endlink);
4957
4958 /* __sync_lock_test_and_set_si */
4959 tree si_ftype_psi_si
4960 = build_function_type (integer_type_node,
4961 tree_cons (NULL_TREE, psi_type_node,
4962 tree_cons (NULL_TREE, integer_type_node, endlink)));
4963
4964 /* __sync_lock_test_and_set_di */
4965 tree di_ftype_pdi_di
4966 = build_function_type (long_integer_type_node,
4967 tree_cons (NULL_TREE, pdi_type_node,
4968 tree_cons (NULL_TREE, long_integer_type_node,
4969 endlink)));
4970
4971 /* __sync_lock_release_si */
4972 tree void_ftype_psi
4973 = build_function_type (void_type_node, tree_cons (NULL_TREE, psi_type_node,
4974 endlink));
4975
4976 /* __sync_lock_release_di */
4977 tree void_ftype_pdi
4978 = build_function_type (void_type_node, tree_cons (NULL_TREE, pdi_type_node,
4979 endlink));
4980
4981 #define def_builtin(name, type, code) \
4982 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL_PTR)
4983
4984 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
4985 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
4986 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
4987 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
4988 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
4989 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
4990 def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di,
4991 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
4992
4993 def_builtin ("__sync_synchronize", void_ftype_void,
4994 IA64_BUILTIN_SYNCHRONIZE);
4995
4996 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
4997 IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
4998 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
4999 IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
5000 def_builtin ("__sync_lock_release_si", void_ftype_psi,
5001 IA64_BUILTIN_LOCK_RELEASE_SI);
5002 def_builtin ("__sync_lock_release_di", void_ftype_pdi,
5003 IA64_BUILTIN_LOCK_RELEASE_DI);
5004
5005 def_builtin ("__builtin_ia64_bsp",
5006 build_function_type (ptr_type_node, endlink),
5007 IA64_BUILTIN_BSP);
5008
5009 def_builtin ("__builtin_ia64_flushrs",
5010 build_function_type (void_type_node, endlink),
5011 IA64_BUILTIN_FLUSHRS);
5012
5013 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
5014 IA64_BUILTIN_FETCH_AND_ADD_SI);
5015 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
5016 IA64_BUILTIN_FETCH_AND_SUB_SI);
5017 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
5018 IA64_BUILTIN_FETCH_AND_OR_SI);
5019 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
5020 IA64_BUILTIN_FETCH_AND_AND_SI);
5021 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
5022 IA64_BUILTIN_FETCH_AND_XOR_SI);
5023 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
5024 IA64_BUILTIN_FETCH_AND_NAND_SI);
5025
5026 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
5027 IA64_BUILTIN_ADD_AND_FETCH_SI);
5028 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
5029 IA64_BUILTIN_SUB_AND_FETCH_SI);
5030 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
5031 IA64_BUILTIN_OR_AND_FETCH_SI);
5032 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
5033 IA64_BUILTIN_AND_AND_FETCH_SI);
5034 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
5035 IA64_BUILTIN_XOR_AND_FETCH_SI);
5036 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
5037 IA64_BUILTIN_NAND_AND_FETCH_SI);
5038
5039 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
5040 IA64_BUILTIN_FETCH_AND_ADD_DI);
5041 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
5042 IA64_BUILTIN_FETCH_AND_SUB_DI);
5043 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
5044 IA64_BUILTIN_FETCH_AND_OR_DI);
5045 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
5046 IA64_BUILTIN_FETCH_AND_AND_DI);
5047 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
5048 IA64_BUILTIN_FETCH_AND_XOR_DI);
5049 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
5050 IA64_BUILTIN_FETCH_AND_NAND_DI);
5051
5052 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
5053 IA64_BUILTIN_ADD_AND_FETCH_DI);
5054 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
5055 IA64_BUILTIN_SUB_AND_FETCH_DI);
5056 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
5057 IA64_BUILTIN_OR_AND_FETCH_DI);
5058 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
5059 IA64_BUILTIN_AND_AND_FETCH_DI);
5060 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
5061 IA64_BUILTIN_XOR_AND_FETCH_DI);
5062 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
5063 IA64_BUILTIN_NAND_AND_FETCH_DI);
5064
5065 #undef def_builtin
5066 }
5067
5068 /* Expand fetch_and_op intrinsics. The basic code sequence is:
5069
5070 mf
5071 tmp = [ptr];
5072 do {
5073 ret = tmp;
5074 ar.ccv = tmp;
5075 tmp <op>= value;
5076 cmpxchgsz.acq tmp = [ptr], tmp
5077 } while (tmp != ret)
5078 */
5079
5080 static rtx
5081 ia64_expand_fetch_and_op (binoptab, mode, arglist, target)
5082 optab binoptab;
5083 enum machine_mode mode;
5084 tree arglist;
5085 rtx target;
5086 {
5087 rtx ret, label, tmp, ccv, insn, mem, value;
5088 tree arg0, arg1;
5089
5090 arg0 = TREE_VALUE (arglist);
5091 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
5092 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
5093 value = expand_expr (arg1, NULL_RTX, mode, 0);
5094
5095 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
5096 MEM_VOLATILE_P (mem) = 1;
5097
5098 if (target && register_operand (target, mode))
5099 ret = target;
5100 else
5101 ret = gen_reg_rtx (mode);
5102
5103 emit_insn (gen_mf ());
5104
5105 /* Special case for fetchadd instructions. */
5106 if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
5107 {
5108 if (mode == SImode)
5109 insn = gen_fetchadd_acq_si (ret, mem, value);
5110 else
5111 insn = gen_fetchadd_acq_di (ret, mem, value);
5112 emit_insn (insn);
5113 return ret;
5114 }
5115
5116 tmp = gen_reg_rtx (mode);
5117 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
5118 emit_move_insn (tmp, mem);
5119
5120 label = gen_label_rtx ();
5121 emit_label (label);
5122 emit_move_insn (ret, tmp);
5123 emit_move_insn (ccv, tmp);
5124
5125 /* Perform the specific operation. Special case NAND by noticing
5126 one_cmpl_optab instead. */
5127 if (binoptab == one_cmpl_optab)
5128 {
5129 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
5130 binoptab = and_optab;
5131 }
5132 tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
5133
5134 if (mode == SImode)
5135 insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
5136 else
5137 insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
5138 emit_insn (insn);
5139
5140 emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, 0, label);
5141
5142 return ret;
5143 }
5144
5145 /* Expand op_and_fetch intrinsics. The basic code sequence is:
5146
5147 mf
5148 tmp = [ptr];
5149 do {
5150 old = tmp;
5151 ar.ccv = tmp;
5152 ret = tmp + value;
5153 cmpxchgsz.acq tmp = [ptr], ret
5154 } while (tmp != old)
5155 */
5156
5157 static rtx
5158 ia64_expand_op_and_fetch (binoptab, mode, arglist, target)
5159 optab binoptab;
5160 enum machine_mode mode;
5161 tree arglist;
5162 rtx target;
5163 {
5164 rtx old, label, tmp, ret, ccv, insn, mem, value;
5165 tree arg0, arg1;
5166
5167 arg0 = TREE_VALUE (arglist);
5168 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
5169 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
5170 value = expand_expr (arg1, NULL_RTX, mode, 0);
5171
5172 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
5173 MEM_VOLATILE_P (mem) = 1;
5174
5175 if (target && ! register_operand (target, mode))
5176 target = NULL_RTX;
5177
5178 emit_insn (gen_mf ());
5179 tmp = gen_reg_rtx (mode);
5180 old = gen_reg_rtx (mode);
5181 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
5182
5183 emit_move_insn (tmp, mem);
5184
5185 label = gen_label_rtx ();
5186 emit_label (label);
5187 emit_move_insn (old, tmp);
5188 emit_move_insn (ccv, tmp);
5189
5190 /* Perform the specific operation. Special case NAND by noticing
5191 one_cmpl_optab instead. */
5192 if (binoptab == one_cmpl_optab)
5193 {
5194 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
5195 binoptab = and_optab;
5196 }
5197 ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
5198
5199 if (mode == SImode)
5200 insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
5201 else
5202 insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
5203 emit_insn (insn);
5204
5205 emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, 0, label);
5206
5207 return ret;
5208 }
5209
5210 /* Expand val_ and bool_compare_and_swap. For val_ we want:
5211
5212 ar.ccv = oldval
5213 mf
5214 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
5215 return ret
5216
5217 For bool_ it's the same except return ret == oldval.
5218 */
5219
5220 static rtx
5221 ia64_expand_compare_and_swap (mode, boolp, arglist, target)
5222 enum machine_mode mode;
5223 int boolp;
5224 tree arglist;
5225 rtx target;
5226 {
5227 tree arg0, arg1, arg2;
5228 rtx mem, old, new, ccv, tmp, insn;
5229
5230 arg0 = TREE_VALUE (arglist);
5231 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
5232 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
5233 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
5234 old = expand_expr (arg1, NULL_RTX, mode, 0);
5235 new = expand_expr (arg2, NULL_RTX, mode, 0);
5236
5237 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
5238 MEM_VOLATILE_P (mem) = 1;
5239
5240 if (! register_operand (old, mode))
5241 old = copy_to_mode_reg (mode, old);
5242 if (! register_operand (new, mode))
5243 new = copy_to_mode_reg (mode, new);
5244
5245 if (! boolp && target && register_operand (target, mode))
5246 tmp = target;
5247 else
5248 tmp = gen_reg_rtx (mode);
5249
5250 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
5251 emit_move_insn (ccv, old);
5252 emit_insn (gen_mf ());
5253 if (mode == SImode)
5254 insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
5255 else
5256 insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
5257 emit_insn (insn);
5258
5259 if (boolp)
5260 {
5261 if (! target)
5262 target = gen_reg_rtx (mode);
5263 return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
5264 }
5265 else
5266 return tmp;
5267 }
5268
5269 /* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
5270
5271 static rtx
5272 ia64_expand_lock_test_and_set (mode, arglist, target)
5273 enum machine_mode mode;
5274 tree arglist;
5275 rtx target;
5276 {
5277 tree arg0, arg1;
5278 rtx mem, new, ret, insn;
5279
5280 arg0 = TREE_VALUE (arglist);
5281 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
5282 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
5283 new = expand_expr (arg1, NULL_RTX, mode, 0);
5284
5285 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
5286 MEM_VOLATILE_P (mem) = 1;
5287 if (! register_operand (new, mode))
5288 new = copy_to_mode_reg (mode, new);
5289
5290 if (target && register_operand (target, mode))
5291 ret = target;
5292 else
5293 ret = gen_reg_rtx (mode);
5294
5295 if (mode == SImode)
5296 insn = gen_xchgsi (ret, mem, new);
5297 else
5298 insn = gen_xchgdi (ret, mem, new);
5299 emit_insn (insn);
5300
5301 return ret;
5302 }
5303
5304 /* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
5305
5306 static rtx
5307 ia64_expand_lock_release (mode, arglist, target)
5308 enum machine_mode mode;
5309 tree arglist;
5310 rtx target ATTRIBUTE_UNUSED;
5311 {
5312 tree arg0;
5313 rtx mem;
5314
5315 arg0 = TREE_VALUE (arglist);
5316 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
5317
5318 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
5319 MEM_VOLATILE_P (mem) = 1;
5320
5321 emit_move_insn (mem, const0_rtx);
5322
5323 return const0_rtx;
5324 }
5325
5326 rtx
5327 ia64_expand_builtin (exp, target, subtarget, mode, ignore)
5328 tree exp;
5329 rtx target;
5330 rtx subtarget ATTRIBUTE_UNUSED;
5331 enum machine_mode mode ATTRIBUTE_UNUSED;
5332 int ignore ATTRIBUTE_UNUSED;
5333 {
5334 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
5335 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
5336 tree arglist = TREE_OPERAND (exp, 1);
5337
5338 switch (fcode)
5339 {
5340 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
5341 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
5342 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
5343 case IA64_BUILTIN_LOCK_RELEASE_SI:
5344 case IA64_BUILTIN_FETCH_AND_ADD_SI:
5345 case IA64_BUILTIN_FETCH_AND_SUB_SI:
5346 case IA64_BUILTIN_FETCH_AND_OR_SI:
5347 case IA64_BUILTIN_FETCH_AND_AND_SI:
5348 case IA64_BUILTIN_FETCH_AND_XOR_SI:
5349 case IA64_BUILTIN_FETCH_AND_NAND_SI:
5350 case IA64_BUILTIN_ADD_AND_FETCH_SI:
5351 case IA64_BUILTIN_SUB_AND_FETCH_SI:
5352 case IA64_BUILTIN_OR_AND_FETCH_SI:
5353 case IA64_BUILTIN_AND_AND_FETCH_SI:
5354 case IA64_BUILTIN_XOR_AND_FETCH_SI:
5355 case IA64_BUILTIN_NAND_AND_FETCH_SI:
5356 mode = SImode;
5357 break;
5358
5359 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
5360 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
5361 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
5362 case IA64_BUILTIN_LOCK_RELEASE_DI:
5363 case IA64_BUILTIN_FETCH_AND_ADD_DI:
5364 case IA64_BUILTIN_FETCH_AND_SUB_DI:
5365 case IA64_BUILTIN_FETCH_AND_OR_DI:
5366 case IA64_BUILTIN_FETCH_AND_AND_DI:
5367 case IA64_BUILTIN_FETCH_AND_XOR_DI:
5368 case IA64_BUILTIN_FETCH_AND_NAND_DI:
5369 case IA64_BUILTIN_ADD_AND_FETCH_DI:
5370 case IA64_BUILTIN_SUB_AND_FETCH_DI:
5371 case IA64_BUILTIN_OR_AND_FETCH_DI:
5372 case IA64_BUILTIN_AND_AND_FETCH_DI:
5373 case IA64_BUILTIN_XOR_AND_FETCH_DI:
5374 case IA64_BUILTIN_NAND_AND_FETCH_DI:
5375 mode = DImode;
5376 break;
5377
5378 default:
5379 break;
5380 }
5381
5382 switch (fcode)
5383 {
5384 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
5385 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
5386 return ia64_expand_compare_and_swap (mode, 1, arglist, target);
5387
5388 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
5389 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
5390 return ia64_expand_compare_and_swap (mode, 0, arglist, target);
5391
5392 case IA64_BUILTIN_SYNCHRONIZE:
5393 emit_insn (gen_mf ());
5394 return const0_rtx;
5395
5396 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
5397 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
5398 return ia64_expand_lock_test_and_set (mode, arglist, target);
5399
5400 case IA64_BUILTIN_LOCK_RELEASE_SI:
5401 case IA64_BUILTIN_LOCK_RELEASE_DI:
5402 return ia64_expand_lock_release (mode, arglist, target);
5403
5404 case IA64_BUILTIN_BSP:
5405 if (! target || ! register_operand (target, DImode))
5406 target = gen_reg_rtx (DImode);
5407 emit_insn (gen_bsp_value (target));
5408 return target;
5409
5410 case IA64_BUILTIN_FLUSHRS:
5411 emit_insn (gen_flushrs ());
5412 return const0_rtx;
5413
5414 case IA64_BUILTIN_FETCH_AND_ADD_SI:
5415 case IA64_BUILTIN_FETCH_AND_ADD_DI:
5416 return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
5417
5418 case IA64_BUILTIN_FETCH_AND_SUB_SI:
5419 case IA64_BUILTIN_FETCH_AND_SUB_DI:
5420 return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
5421
5422 case IA64_BUILTIN_FETCH_AND_OR_SI:
5423 case IA64_BUILTIN_FETCH_AND_OR_DI:
5424 return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
5425
5426 case IA64_BUILTIN_FETCH_AND_AND_SI:
5427 case IA64_BUILTIN_FETCH_AND_AND_DI:
5428 return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
5429
5430 case IA64_BUILTIN_FETCH_AND_XOR_SI:
5431 case IA64_BUILTIN_FETCH_AND_XOR_DI:
5432 return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
5433
5434 case IA64_BUILTIN_FETCH_AND_NAND_SI:
5435 case IA64_BUILTIN_FETCH_AND_NAND_DI:
5436 return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
5437
5438 case IA64_BUILTIN_ADD_AND_FETCH_SI:
5439 case IA64_BUILTIN_ADD_AND_FETCH_DI:
5440 return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
5441
5442 case IA64_BUILTIN_SUB_AND_FETCH_SI:
5443 case IA64_BUILTIN_SUB_AND_FETCH_DI:
5444 return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
5445
5446 case IA64_BUILTIN_OR_AND_FETCH_SI:
5447 case IA64_BUILTIN_OR_AND_FETCH_DI:
5448 return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
5449
5450 case IA64_BUILTIN_AND_AND_FETCH_SI:
5451 case IA64_BUILTIN_AND_AND_FETCH_DI:
5452 return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
5453
5454 case IA64_BUILTIN_XOR_AND_FETCH_SI:
5455 case IA64_BUILTIN_XOR_AND_FETCH_DI:
5456 return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
5457
5458 case IA64_BUILTIN_NAND_AND_FETCH_SI:
5459 case IA64_BUILTIN_NAND_AND_FETCH_DI:
5460 return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);
5461
5462 default:
5463 break;
5464 }
5465
5466 return NULL_RTX;
5467 }