98bc4022d6d208ff0e3e8c94a8b49d010fa55ee8
[gcc.git] / gcc / config / sparc / sparc.c
1 /* Subroutines for insn-output.c for Sun SPARC.
2 Copyright (C) 1987, 88, 89, 92-98, 1999 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64 bit SPARC V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
6
7 This file is part of GNU CC.
8
9 GNU CC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
13
14 GNU CC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GNU CC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
23
24 #include "config.h"
25 #include "system.h"
26 #include "tree.h"
27 #include "rtl.h"
28 #include "regs.h"
29 #include "hard-reg-set.h"
30 #include "real.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-flags.h"
34 #include "output.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "function.h"
38 #include "expr.h"
39 #include "recog.h"
40 #include "toplev.h"
41 #include "ggc.h"
42 #include "tm_p.h"
43
44 /* 1 if the caller has placed an "unimp" insn immediately after the call.
45 This is used in v8 code when calling a function that returns a structure.
46 v9 doesn't have this. Be careful to have this test be the same as that
47 used on the call. */
48
49 #define SKIP_CALLERS_UNIMP_P \
50 (!TARGET_ARCH64 && current_function_returns_struct \
51 && ! integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))) \
52 && (TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl))) \
53 == INTEGER_CST))
54
55 /* Global variables for machine-dependent things. */
56
57 /* Size of frame. Need to know this to emit return insns from leaf procedures.
58 ACTUAL_FSIZE is set by compute_frame_size() which is called during the
59 reload pass. This is important as the value is later used in insn
60 scheduling (to see what can go in a delay slot).
61 APPARENT_FSIZE is the size of the stack less the register save area and less
62 the outgoing argument area. It is used when saving call preserved regs. */
63 static int apparent_fsize;
64 static int actual_fsize;
65
66 /* Number of live general or floating point registers needed to be saved
67 (as 4-byte quantities). This is only done if TARGET_EPILOGUE. */
68 static int num_gfregs;
69
70 /* Save the operands last given to a compare for use when we
71 generate a scc or bcc insn. */
72
73 rtx sparc_compare_op0, sparc_compare_op1;
74
75 /* We may need an epilogue if we spill too many registers.
76 If this is non-zero, then we branch here for the epilogue. */
77 static rtx leaf_label;
78
79 #ifdef LEAF_REGISTERS
80
81 /* Vector to say how input registers are mapped to output
82 registers. FRAME_POINTER_REGNUM cannot be remapped by
83 this function to eliminate it. You must use -fomit-frame-pointer
84 to get that. */
85 char leaf_reg_remap[] =
86 { 0, 1, 2, 3, 4, 5, 6, 7,
87 -1, -1, -1, -1, -1, -1, 14, -1,
88 -1, -1, -1, -1, -1, -1, -1, -1,
89 8, 9, 10, 11, 12, 13, -1, 15,
90
91 32, 33, 34, 35, 36, 37, 38, 39,
92 40, 41, 42, 43, 44, 45, 46, 47,
93 48, 49, 50, 51, 52, 53, 54, 55,
94 56, 57, 58, 59, 60, 61, 62, 63,
95 64, 65, 66, 67, 68, 69, 70, 71,
96 72, 73, 74, 75, 76, 77, 78, 79,
97 80, 81, 82, 83, 84, 85, 86, 87,
98 88, 89, 90, 91, 92, 93, 94, 95,
99 96, 97, 98, 99, 100};
100
101 #endif
102
103 /* Name of where we pretend to think the frame pointer points.
104 Normally, this is "%fp", but if we are in a leaf procedure,
105 this is "%sp+something". We record "something" separately as it may be
106 too big for reg+constant addressing. */
107
108 static const char *frame_base_name;
109 static int frame_base_offset;
110
111 static rtx pic_setup_code PROTO((void));
112 static void sparc_init_modes PROTO((void));
113 static int save_regs PROTO((FILE *, int, int, const char *,
114 int, int, int));
115 static int restore_regs PROTO((FILE *, int, int, const char *, int, int));
116 static void build_big_number PROTO((FILE *, int, const char *));
117 static int function_arg_slotno PROTO((const CUMULATIVE_ARGS *,
118 enum machine_mode, tree, int, int,
119 int *, int *));
120
121 static int supersparc_adjust_cost PROTO((rtx, rtx, rtx, int));
122 static int hypersparc_adjust_cost PROTO((rtx, rtx, rtx, int));
123 static int ultrasparc_adjust_cost PROTO((rtx, rtx, rtx, int));
124
125 static void sparc_output_addr_vec PROTO((rtx));
126 static void sparc_output_addr_diff_vec PROTO((rtx));
127 static void sparc_output_deferred_case_vectors PROTO((void));
128 static void sparc_add_gc_roots PROTO ((void));
129 static void mark_ultrasparc_pipeline_state PROTO ((void *));
130 static int check_return_regs PROTO ((rtx));
131 static int epilogue_renumber PROTO ((rtx *, int));
132 static int ultra_cmove_results_ready_p PROTO ((rtx));
133 static int ultra_fpmode_conflict_exists PROTO ((enum machine_mode));
134 static rtx *ultra_find_type PROTO ((int, rtx *, int));
135 static void ultra_build_types_avail PROTO ((rtx *, int));
136 static void ultra_flush_pipeline PROTO ((void));
137 static void ultra_rescan_pipeline_state PROTO ((rtx *, int));
138 static int set_extends PROTO ((rtx, rtx));
139 \f
140 /* Option handling. */
141
142 /* Code model option as passed by user. */
143 const char *sparc_cmodel_string;
144 /* Parsed value. */
145 enum cmodel sparc_cmodel;
146
147 char sparc_hard_reg_printed[8];
148
149 struct sparc_cpu_select sparc_select[] =
150 {
151 /* switch name, tune arch */
152 { (char *)0, "default", 1, 1 },
153 { (char *)0, "-mcpu=", 1, 1 },
154 { (char *)0, "-mtune=", 1, 0 },
155 { 0, 0, 0, 0 }
156 };
157
158 /* CPU type. This is set from TARGET_CPU_DEFAULT and -m{cpu,tune}=xxx. */
159 enum processor_type sparc_cpu;
160
161 /* Validate and override various options, and do some machine dependent
162 initialization. */
163
164 void
165 sparc_override_options ()
166 {
167 static struct code_model {
168 const char *name;
169 int value;
170 } cmodels[] = {
171 { "32", CM_32 },
172 { "medlow", CM_MEDLOW },
173 { "medmid", CM_MEDMID },
174 { "medany", CM_MEDANY },
175 { "embmedany", CM_EMBMEDANY },
176 { 0, 0 }
177 };
178 struct code_model *cmodel;
179 /* Map TARGET_CPU_DEFAULT to value for -m{arch,tune}=. */
180 static struct cpu_default {
181 int cpu;
182 const char *name;
183 } cpu_default[] = {
184 /* There must be one entry here for each TARGET_CPU value. */
185 { TARGET_CPU_sparc, "cypress" },
186 { TARGET_CPU_sparclet, "tsc701" },
187 { TARGET_CPU_sparclite, "f930" },
188 { TARGET_CPU_v8, "v8" },
189 { TARGET_CPU_hypersparc, "hypersparc" },
190 { TARGET_CPU_sparclite86x, "sparclite86x" },
191 { TARGET_CPU_supersparc, "supersparc" },
192 { TARGET_CPU_v9, "v9" },
193 { TARGET_CPU_ultrasparc, "ultrasparc" },
194 { 0, 0 }
195 };
196 struct cpu_default *def;
197 /* Table of values for -m{cpu,tune}=. */
198 static struct cpu_table {
199 const char *name;
200 enum processor_type processor;
201 int disable;
202 int enable;
203 } cpu_table[] = {
204 { "v7", PROCESSOR_V7, MASK_ISA, 0 },
205 { "cypress", PROCESSOR_CYPRESS, MASK_ISA, 0 },
206 { "v8", PROCESSOR_V8, MASK_ISA, MASK_V8 },
207 /* TI TMS390Z55 supersparc */
208 { "supersparc", PROCESSOR_SUPERSPARC, MASK_ISA, MASK_V8 },
209 { "sparclite", PROCESSOR_SPARCLITE, MASK_ISA, MASK_SPARCLITE },
210 /* The Fujitsu MB86930 is the original sparclite chip, with no fpu.
211 The Fujitsu MB86934 is the recent sparclite chip, with an fpu. */
212 { "f930", PROCESSOR_F930, MASK_ISA|MASK_FPU, MASK_SPARCLITE },
213 { "f934", PROCESSOR_F934, MASK_ISA, MASK_SPARCLITE|MASK_FPU },
214 { "hypersparc", PROCESSOR_HYPERSPARC, MASK_ISA, MASK_V8|MASK_FPU },
215 { "sparclite86x", PROCESSOR_SPARCLITE86X, MASK_ISA|MASK_FPU,
216 MASK_SPARCLITE },
217 { "sparclet", PROCESSOR_SPARCLET, MASK_ISA, MASK_SPARCLET },
218 /* TEMIC sparclet */
219 { "tsc701", PROCESSOR_TSC701, MASK_ISA, MASK_SPARCLET },
220 { "v9", PROCESSOR_V9, MASK_ISA, MASK_V9 },
221 /* TI ultrasparc I, II, IIi */
222 { "ultrasparc", PROCESSOR_ULTRASPARC, MASK_ISA, MASK_V9
223 /* Although insns using %y are deprecated, it is a clear win on current
224 ultrasparcs. */
225 |MASK_DEPRECATED_V8_INSNS },
226 { 0, 0, 0, 0 }
227 };
228 struct cpu_table *cpu;
229 struct sparc_cpu_select *sel;
230 int fpu;
231
232 #ifndef SPARC_BI_ARCH
233 /* Check for unsupported architecture size. */
234 if (! TARGET_64BIT != DEFAULT_ARCH32_P)
235 {
236 error ("%s is not supported by this configuration",
237 DEFAULT_ARCH32_P ? "-m64" : "-m32");
238 }
239 #endif
240
241 /* At the moment we don't allow different pointer size and architecture */
242 if (! TARGET_64BIT != ! TARGET_PTR64)
243 {
244 error ("-mptr%d not allowed on -m%d",
245 TARGET_PTR64 ? 64 : 32, TARGET_64BIT ? 64 : 32);
246 if (TARGET_64BIT)
247 target_flags |= MASK_PTR64;
248 else
249 target_flags &= ~MASK_PTR64;
250 }
251
252 /* Code model selection. */
253 sparc_cmodel = SPARC_DEFAULT_CMODEL;
254
255 #ifdef SPARC_BI_ARCH
256 if (TARGET_ARCH32)
257 sparc_cmodel = CM_32;
258 #endif
259
260 if (sparc_cmodel_string != NULL)
261 {
262 if (TARGET_ARCH64)
263 {
264 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
265 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
266 break;
267 if (cmodel->name == NULL)
268 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
269 else
270 sparc_cmodel = cmodel->value;
271 }
272 else
273 error ("-mcmodel= is not supported on 32 bit systems");
274 }
275
276 fpu = TARGET_FPU; /* save current -mfpu status */
277
278 /* Set the default CPU. */
279 for (def = &cpu_default[0]; def->name; ++def)
280 if (def->cpu == TARGET_CPU_DEFAULT)
281 break;
282 if (! def->name)
283 abort ();
284 sparc_select[0].string = def->name;
285
286 for (sel = &sparc_select[0]; sel->name; ++sel)
287 {
288 if (sel->string)
289 {
290 for (cpu = &cpu_table[0]; cpu->name; ++cpu)
291 if (! strcmp (sel->string, cpu->name))
292 {
293 if (sel->set_tune_p)
294 sparc_cpu = cpu->processor;
295
296 if (sel->set_arch_p)
297 {
298 target_flags &= ~cpu->disable;
299 target_flags |= cpu->enable;
300 }
301 break;
302 }
303
304 if (! cpu->name)
305 error ("bad value (%s) for %s switch", sel->string, sel->name);
306 }
307 }
308
309 /* If -mfpu or -mno-fpu was explicitly used, don't override with
310 the processor default. Clear MASK_FPU_SET to avoid confusing
311 the reverse mapping from switch values to names. */
312 if (TARGET_FPU_SET)
313 {
314 target_flags = (target_flags & ~MASK_FPU) | fpu;
315 target_flags &= ~MASK_FPU_SET;
316 }
317
318 /* Use the deprecated v8 insns for sparc64 in 32 bit mode. */
319 if (TARGET_V9 && TARGET_ARCH32)
320 target_flags |= MASK_DEPRECATED_V8_INSNS;
321
322 /* V8PLUS requires V9, makes no sense in 64 bit mode. */
323 if (! TARGET_V9 || TARGET_ARCH64)
324 target_flags &= ~MASK_V8PLUS;
325
326 /* Don't use stack biasing in 32 bit mode. */
327 if (TARGET_ARCH32)
328 target_flags &= ~MASK_STACK_BIAS;
329
330 /* Don't allow -mvis if FPU is disabled. */
331 if (! TARGET_FPU)
332 target_flags &= ~MASK_VIS;
333
334 /* Supply a default value for align_functions. */
335 if (align_functions == 0 && sparc_cpu == PROCESSOR_ULTRASPARC)
336 align_functions = 32;
337
338 /* Validate PCC_STRUCT_RETURN. */
339 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
340 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
341
342 /* Do various machine dependent initializations. */
343 sparc_init_modes ();
344
345 if ((profile_flag || profile_block_flag)
346 && sparc_cmodel != CM_32 && sparc_cmodel != CM_MEDLOW)
347 {
348 error ("profiling does not support code models other than medlow");
349 }
350
351 /* Register global variables with the garbage collector. */
352 sparc_add_gc_roots ();
353 }
354 \f
355 /* Miscellaneous utilities. */
356
357 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
358 or branch on register contents instructions. */
359
360 int
361 v9_regcmp_p (code)
362 enum rtx_code code;
363 {
364 return (code == EQ || code == NE || code == GE || code == LT
365 || code == LE || code == GT);
366 }
367
368 \f
369 /* Operand constraints. */
370
371 /* Return non-zero only if OP is a register of mode MODE,
372 or const0_rtx. Don't allow const0_rtx if TARGET_LIVE_G0 because
373 %g0 may contain anything. */
374
375 int
376 reg_or_0_operand (op, mode)
377 rtx op;
378 enum machine_mode mode;
379 {
380 if (register_operand (op, mode))
381 return 1;
382 if (TARGET_LIVE_G0)
383 return 0;
384 if (op == const0_rtx)
385 return 1;
386 if (GET_MODE (op) == VOIDmode && GET_CODE (op) == CONST_DOUBLE
387 && CONST_DOUBLE_HIGH (op) == 0
388 && CONST_DOUBLE_LOW (op) == 0)
389 return 1;
390 if (GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT
391 && GET_CODE (op) == CONST_DOUBLE
392 && fp_zero_operand (op))
393 return 1;
394 return 0;
395 }
396
397 /* Nonzero if OP is a floating point value with value 0.0. */
398
399 int
400 fp_zero_operand (op)
401 rtx op;
402 {
403 REAL_VALUE_TYPE r;
404
405 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
406 return (REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r));
407 }
408
409 /* Nonzero if OP is a floating point constant which can
410 be loaded into an integer register using a single
411 sethi instruction. */
412
413 int
414 fp_sethi_p (op)
415 rtx op;
416 {
417 if (GET_CODE (op) == CONST_DOUBLE)
418 {
419 REAL_VALUE_TYPE r;
420 long i;
421
422 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
423 if (REAL_VALUES_EQUAL (r, dconst0) &&
424 ! REAL_VALUE_MINUS_ZERO (r))
425 return 0;
426 REAL_VALUE_TO_TARGET_SINGLE (r, i);
427 if (SPARC_SETHI_P (i))
428 return 1;
429 }
430
431 return 0;
432 }
433
434 /* Nonzero if OP is a floating point constant which can
435 be loaded into an integer register using a single
436 mov instruction. */
437
438 int
439 fp_mov_p (op)
440 rtx op;
441 {
442 if (GET_CODE (op) == CONST_DOUBLE)
443 {
444 REAL_VALUE_TYPE r;
445 long i;
446
447 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
448 if (REAL_VALUES_EQUAL (r, dconst0) &&
449 ! REAL_VALUE_MINUS_ZERO (r))
450 return 0;
451 REAL_VALUE_TO_TARGET_SINGLE (r, i);
452 if (SPARC_SIMM13_P (i))
453 return 1;
454 }
455
456 return 0;
457 }
458
459 /* Nonzero if OP is a floating point constant which can
460 be loaded into an integer register using a high/losum
461 instruction sequence. */
462
463 int
464 fp_high_losum_p (op)
465 rtx op;
466 {
467 /* The constraints calling this should only be in
468 SFmode move insns, so any constant which cannot
469 be moved using a single insn will do. */
470 if (GET_CODE (op) == CONST_DOUBLE)
471 {
472 REAL_VALUE_TYPE r;
473 long i;
474
475 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
476 if (REAL_VALUES_EQUAL (r, dconst0) &&
477 ! REAL_VALUE_MINUS_ZERO (r))
478 return 0;
479 REAL_VALUE_TO_TARGET_SINGLE (r, i);
480 if (! SPARC_SETHI_P (i)
481 && ! SPARC_SIMM13_P (i))
482 return 1;
483 }
484
485 return 0;
486 }
487
488 /* Nonzero if OP is an integer register. */
489
490 int
491 intreg_operand (op, mode)
492 rtx op;
493 enum machine_mode mode ATTRIBUTE_UNUSED;
494 {
495 return (register_operand (op, SImode)
496 || (TARGET_ARCH64 && register_operand (op, DImode)));
497 }
498
499 /* Nonzero if OP is a floating point condition code register. */
500
501 int
502 fcc_reg_operand (op, mode)
503 rtx op;
504 enum machine_mode mode;
505 {
506 /* This can happen when recog is called from combine. Op may be a MEM.
507 Fail instead of calling abort in this case. */
508 if (GET_CODE (op) != REG)
509 return 0;
510
511 if (mode != VOIDmode && mode != GET_MODE (op))
512 return 0;
513 if (mode == VOIDmode
514 && (GET_MODE (op) != CCFPmode && GET_MODE (op) != CCFPEmode))
515 return 0;
516
517 #if 0 /* ??? ==> 1 when %fcc0-3 are pseudos first. See gen_compare_reg(). */
518 if (reg_renumber == 0)
519 return REGNO (op) >= FIRST_PSEUDO_REGISTER;
520 return REGNO_OK_FOR_CCFP_P (REGNO (op));
521 #else
522 return (unsigned) REGNO (op) - SPARC_FIRST_V9_FCC_REG < 4;
523 #endif
524 }
525
526 /* Nonzero if OP is an integer or floating point condition code register. */
527
528 int
529 icc_or_fcc_reg_operand (op, mode)
530 rtx op;
531 enum machine_mode mode;
532 {
533 if (GET_CODE (op) == REG && REGNO (op) == SPARC_ICC_REG)
534 {
535 if (mode != VOIDmode && mode != GET_MODE (op))
536 return 0;
537 if (mode == VOIDmode
538 && GET_MODE (op) != CCmode && GET_MODE (op) != CCXmode)
539 return 0;
540 return 1;
541 }
542
543 return fcc_reg_operand (op, mode);
544 }
545
546 /* Nonzero if OP can appear as the dest of a RESTORE insn. */
547 int
548 restore_operand (op, mode)
549 rtx op;
550 enum machine_mode mode;
551 {
552 return (GET_CODE (op) == REG && GET_MODE (op) == mode
553 && (REGNO (op) < 8 || (REGNO (op) >= 24 && REGNO (op) < 32)));
554 }
555
556 /* Call insn on SPARC can take a PC-relative constant address, or any regular
557 memory address. */
558
559 int
560 call_operand (op, mode)
561 rtx op;
562 enum machine_mode mode;
563 {
564 if (GET_CODE (op) != MEM)
565 abort ();
566 op = XEXP (op, 0);
567 return (symbolic_operand (op, mode) || memory_address_p (Pmode, op));
568 }
569
570 int
571 call_operand_address (op, mode)
572 rtx op;
573 enum machine_mode mode;
574 {
575 return (symbolic_operand (op, mode) || memory_address_p (Pmode, op));
576 }
577
578 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
579 reference and a constant. */
580
581 int
582 symbolic_operand (op, mode)
583 register rtx op;
584 enum machine_mode mode;
585 {
586 enum machine_mode omode = GET_MODE (op);
587
588 if (omode != mode && omode != VOIDmode && mode != VOIDmode)
589 return 0;
590
591 switch (GET_CODE (op))
592 {
593 case SYMBOL_REF:
594 case LABEL_REF:
595 return 1;
596
597 case CONST:
598 op = XEXP (op, 0);
599 return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
600 || GET_CODE (XEXP (op, 0)) == LABEL_REF)
601 && GET_CODE (XEXP (op, 1)) == CONST_INT);
602
603 default:
604 return 0;
605 }
606 }
607
608 /* Return truth value of statement that OP is a symbolic memory
609 operand of mode MODE. */
610
611 int
612 symbolic_memory_operand (op, mode)
613 rtx op;
614 enum machine_mode mode ATTRIBUTE_UNUSED;
615 {
616 if (GET_CODE (op) == SUBREG)
617 op = SUBREG_REG (op);
618 if (GET_CODE (op) != MEM)
619 return 0;
620 op = XEXP (op, 0);
621 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST
622 || GET_CODE (op) == HIGH || GET_CODE (op) == LABEL_REF);
623 }
624
625 /* Return truth value of statement that OP is a LABEL_REF of mode MODE. */
626
627 int
628 label_ref_operand (op, mode)
629 rtx op;
630 enum machine_mode mode;
631 {
632 if (GET_CODE (op) != LABEL_REF)
633 return 0;
634 if (GET_MODE (op) != mode)
635 return 0;
636 return 1;
637 }
638
639 /* Return 1 if the operand is an argument used in generating pic references
640 in either the medium/low or medium/anywhere code models of sparc64. */
641
642 int
643 sp64_medium_pic_operand (op, mode)
644 rtx op;
645 enum machine_mode mode ATTRIBUTE_UNUSED;
646 {
647 /* Check for (const (minus (symbol_ref:GOT)
648 (const (minus (label) (pc))))). */
649 if (GET_CODE (op) != CONST)
650 return 0;
651 op = XEXP (op, 0);
652 if (GET_CODE (op) != MINUS)
653 return 0;
654 if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
655 return 0;
656 /* ??? Ensure symbol is GOT. */
657 if (GET_CODE (XEXP (op, 1)) != CONST)
658 return 0;
659 if (GET_CODE (XEXP (XEXP (op, 1), 0)) != MINUS)
660 return 0;
661 return 1;
662 }
663
664 /* Return 1 if the operand is a data segment reference. This includes
665 the readonly data segment, or in other words anything but the text segment.
666 This is needed in the medium/anywhere code model on v9. These values
667 are accessed with EMBMEDANY_BASE_REG. */
668
669 int
670 data_segment_operand (op, mode)
671 rtx op;
672 enum machine_mode mode ATTRIBUTE_UNUSED;
673 {
674 switch (GET_CODE (op))
675 {
676 case SYMBOL_REF :
677 return ! SYMBOL_REF_FLAG (op);
678 case PLUS :
679 /* Assume canonical format of symbol + constant.
680 Fall through. */
681 case CONST :
682 return data_segment_operand (XEXP (op, 0), VOIDmode);
683 default :
684 return 0;
685 }
686 }
687
688 /* Return 1 if the operand is a text segment reference.
689 This is needed in the medium/anywhere code model on v9. */
690
691 int
692 text_segment_operand (op, mode)
693 rtx op;
694 enum machine_mode mode ATTRIBUTE_UNUSED;
695 {
696 switch (GET_CODE (op))
697 {
698 case LABEL_REF :
699 return 1;
700 case SYMBOL_REF :
701 return SYMBOL_REF_FLAG (op);
702 case PLUS :
703 /* Assume canonical format of symbol + constant.
704 Fall through. */
705 case CONST :
706 return text_segment_operand (XEXP (op, 0), VOIDmode);
707 default :
708 return 0;
709 }
710 }
711
712 /* Return 1 if the operand is either a register or a memory operand that is
713 not symbolic. */
714
715 int
716 reg_or_nonsymb_mem_operand (op, mode)
717 register rtx op;
718 enum machine_mode mode;
719 {
720 if (register_operand (op, mode))
721 return 1;
722
723 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
724 return 1;
725
726 return 0;
727 }
728
729 int
730 splittable_symbolic_memory_operand (op, mode)
731 rtx op;
732 enum machine_mode mode ATTRIBUTE_UNUSED;
733 {
734 if (GET_CODE (op) != MEM)
735 return 0;
736 if (! symbolic_operand (XEXP (op, 0), Pmode))
737 return 0;
738 return 1;
739 }
740
741 int
742 splittable_immediate_memory_operand (op, mode)
743 rtx op;
744 enum machine_mode mode ATTRIBUTE_UNUSED;
745 {
746 if (GET_CODE (op) != MEM)
747 return 0;
748 if (! immediate_operand (XEXP (op, 0), Pmode))
749 return 0;
750 return 1;
751 }
752
753 /* Return truth value of whether OP is EQ or NE. */
754
755 int
756 eq_or_neq (op, mode)
757 rtx op;
758 enum machine_mode mode ATTRIBUTE_UNUSED;
759 {
760 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
761 }
762
763 /* Return 1 if this is a comparison operator, but not an EQ, NE, GEU,
764 or LTU for non-floating-point. We handle those specially. */
765
766 int
767 normal_comp_operator (op, mode)
768 rtx op;
769 enum machine_mode mode ATTRIBUTE_UNUSED;
770 {
771 enum rtx_code code = GET_CODE (op);
772
773 if (GET_RTX_CLASS (code) != '<')
774 return 0;
775
776 if (GET_MODE (XEXP (op, 0)) == CCFPmode
777 || GET_MODE (XEXP (op, 0)) == CCFPEmode)
778 return 1;
779
780 return (code != NE && code != EQ && code != GEU && code != LTU);
781 }
782
783 /* Return 1 if this is a comparison operator. This allows the use of
784 MATCH_OPERATOR to recognize all the branch insns. */
785
786 int
787 noov_compare_op (op, mode)
788 register rtx op;
789 enum machine_mode mode ATTRIBUTE_UNUSED;
790 {
791 enum rtx_code code = GET_CODE (op);
792
793 if (GET_RTX_CLASS (code) != '<')
794 return 0;
795
796 if (GET_MODE (XEXP (op, 0)) == CC_NOOVmode)
797 /* These are the only branches which work with CC_NOOVmode. */
798 return (code == EQ || code == NE || code == GE || code == LT);
799 return 1;
800 }
801
802 /* Nonzero if OP is a comparison operator suitable for use in v9
803 conditional move or branch on register contents instructions. */
804
805 int
806 v9_regcmp_op (op, mode)
807 register rtx op;
808 enum machine_mode mode ATTRIBUTE_UNUSED;
809 {
810 enum rtx_code code = GET_CODE (op);
811
812 if (GET_RTX_CLASS (code) != '<')
813 return 0;
814
815 return v9_regcmp_p (code);
816 }
817
818 /* Return 1 if this is a SIGN_EXTEND or ZERO_EXTEND operation. */
819
820 int
821 extend_op (op, mode)
822 rtx op;
823 enum machine_mode mode ATTRIBUTE_UNUSED;
824 {
825 return GET_CODE (op) == SIGN_EXTEND || GET_CODE (op) == ZERO_EXTEND;
826 }
827
828 /* Return nonzero if OP is an operator of mode MODE which can set
829 the condition codes explicitly. We do not include PLUS and MINUS
830 because these require CC_NOOVmode, which we handle explicitly. */
831
832 int
833 cc_arithop (op, mode)
834 rtx op;
835 enum machine_mode mode ATTRIBUTE_UNUSED;
836 {
837 if (GET_CODE (op) == AND
838 || GET_CODE (op) == IOR
839 || GET_CODE (op) == XOR)
840 return 1;
841
842 return 0;
843 }
844
845 /* Return nonzero if OP is an operator of mode MODE which can bitwise
846 complement its second operand and set the condition codes explicitly. */
847
848 int
849 cc_arithopn (op, mode)
850 rtx op;
851 enum machine_mode mode ATTRIBUTE_UNUSED;
852 {
853 /* XOR is not here because combine canonicalizes (xor (not ...) ...)
854 and (xor ... (not ...)) to (not (xor ...)). */
855 return (GET_CODE (op) == AND
856 || GET_CODE (op) == IOR);
857 }
858 \f
859 /* Return true if OP is a register, or is a CONST_INT that can fit in a
860 signed 13 bit immediate field. This is an acceptable SImode operand for
861 most 3 address instructions. */
862
863 int
864 arith_operand (op, mode)
865 rtx op;
866 enum machine_mode mode;
867 {
868 int val;
869 if (register_operand (op, mode))
870 return 1;
871 if (GET_CODE (op) != CONST_INT)
872 return 0;
873 val = INTVAL (op) & 0xffffffff;
874 return SPARC_SIMM13_P (val);
875 }
876
877 /* Return true if OP is a constant 4096 */
878
879 int
880 arith_4096_operand (op, mode)
881 rtx op;
882 enum machine_mode mode ATTRIBUTE_UNUSED;
883 {
884 int val;
885 if (GET_CODE (op) != CONST_INT)
886 return 0;
887 val = INTVAL (op) & 0xffffffff;
888 return val == 4096;
889 }
890
891 /* Return true if OP is suitable as second operand for add/sub */
892
893 int
894 arith_add_operand (op, mode)
895 rtx op;
896 enum machine_mode mode;
897 {
898 return arith_operand (op, mode) || arith_4096_operand (op, mode);
899 }
900
901 /* Return true if OP is a CONST_INT or a CONST_DOUBLE which can fit in the
902 immediate field of OR and XOR instructions. Used for 64-bit
903 constant formation patterns. */
904 int
905 const64_operand (op, mode)
906 rtx op;
907 enum machine_mode mode ATTRIBUTE_UNUSED;
908 {
909 return ((GET_CODE (op) == CONST_INT
910 && SPARC_SIMM13_P (INTVAL (op)))
911 #if HOST_BITS_PER_WIDE_INT != 64
912 || (GET_CODE (op) == CONST_DOUBLE
913 && SPARC_SIMM13_P (CONST_DOUBLE_LOW (op))
914 && (CONST_DOUBLE_HIGH (op) ==
915 ((CONST_DOUBLE_LOW (op) & 0x80000000) != 0 ?
916 (HOST_WIDE_INT)0xffffffff : 0)))
917 #endif
918 );
919 }
920
921 /* The same, but only for sethi instructions. */
922 int
923 const64_high_operand (op, mode)
924 rtx op;
925 enum machine_mode mode ATTRIBUTE_UNUSED;
926 {
927 return ((GET_CODE (op) == CONST_INT
928 && (INTVAL (op) & 0xfffffc00) != 0
929 && SPARC_SETHI_P (INTVAL (op))
930 #if HOST_BITS_PER_WIDE_INT != 64
931 /* Must be positive on non-64bit host else the
932 optimizer is fooled into thinking that sethi
933 sign extends, even though it does not. */
934 && INTVAL (op) >= 0
935 #endif
936 )
937 || (GET_CODE (op) == CONST_DOUBLE
938 && CONST_DOUBLE_HIGH (op) == 0
939 && (CONST_DOUBLE_LOW (op) & 0xfffffc00) != 0
940 && SPARC_SETHI_P (CONST_DOUBLE_LOW (op))));
941 }
942
943 /* Return true if OP is a register, or is a CONST_INT that can fit in a
944 signed 11 bit immediate field. This is an acceptable SImode operand for
945 the movcc instructions. */
946
947 int
948 arith11_operand (op, mode)
949 rtx op;
950 enum machine_mode mode;
951 {
952 return (register_operand (op, mode)
953 || (GET_CODE (op) == CONST_INT && SPARC_SIMM11_P (INTVAL (op))));
954 }
955
956 /* Return true if OP is a register, or is a CONST_INT that can fit in a
957 signed 10 bit immediate field. This is an acceptable SImode operand for
958 the movrcc instructions. */
959
960 int
961 arith10_operand (op, mode)
962 rtx op;
963 enum machine_mode mode;
964 {
965 return (register_operand (op, mode)
966 || (GET_CODE (op) == CONST_INT && SPARC_SIMM10_P (INTVAL (op))));
967 }
968
969 /* Return true if OP is a register, is a CONST_INT that fits in a 13 bit
970 immediate field, or is a CONST_DOUBLE whose both parts fit in a 13 bit
971 immediate field.
972 v9: Return true if OP is a register, or is a CONST_INT or CONST_DOUBLE that
973 can fit in a 13 bit immediate field. This is an acceptable DImode operand
974 for most 3 address instructions. */
975
976 int
977 arith_double_operand (op, mode)
978 rtx op;
979 enum machine_mode mode;
980 {
981 return (register_operand (op, mode)
982 || (GET_CODE (op) == CONST_INT && SMALL_INT (op))
983 || (! TARGET_ARCH64
984 && GET_CODE (op) == CONST_DOUBLE
985 && (unsigned HOST_WIDE_INT) (CONST_DOUBLE_LOW (op) + 0x1000) < 0x2000
986 && (unsigned HOST_WIDE_INT) (CONST_DOUBLE_HIGH (op) + 0x1000) < 0x2000)
987 || (TARGET_ARCH64
988 && GET_CODE (op) == CONST_DOUBLE
989 && (unsigned HOST_WIDE_INT) (CONST_DOUBLE_LOW (op) + 0x1000) < 0x2000
990 && ((CONST_DOUBLE_HIGH (op) == -1
991 && (CONST_DOUBLE_LOW (op) & 0x1000) == 0x1000)
992 || (CONST_DOUBLE_HIGH (op) == 0
993 && (CONST_DOUBLE_LOW (op) & 0x1000) == 0))));
994 }
995
996 /* Return true if OP is a constant 4096 for DImode on ARCH64 */
997
998 int
999 arith_double_4096_operand (op, mode)
1000 rtx op;
1001 enum machine_mode mode ATTRIBUTE_UNUSED;
1002 {
1003 return (TARGET_ARCH64 &&
1004 ((GET_CODE (op) == CONST_INT && INTVAL (op) == 4096) ||
1005 (GET_CODE (op) == CONST_DOUBLE &&
1006 CONST_DOUBLE_LOW (op) == 4096 &&
1007 CONST_DOUBLE_HIGH (op) == 0)));
1008 }
1009
1010 /* Return true if OP is suitable as second operand for add/sub in DImode */
1011
1012 int
1013 arith_double_add_operand (op, mode)
1014 rtx op;
1015 enum machine_mode mode;
1016 {
1017 return arith_double_operand (op, mode) || arith_double_4096_operand (op, mode);
1018 }
1019
1020 /* Return true if OP is a register, or is a CONST_INT or CONST_DOUBLE that
1021 can fit in an 11 bit immediate field. This is an acceptable DImode
1022 operand for the movcc instructions. */
1023 /* ??? Replace with arith11_operand? */
1024
1025 int
1026 arith11_double_operand (op, mode)
1027 rtx op;
1028 enum machine_mode mode;
1029 {
1030 return (register_operand (op, mode)
1031 || (GET_CODE (op) == CONST_DOUBLE
1032 && (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
1033 && (unsigned HOST_WIDE_INT) (CONST_DOUBLE_LOW (op) + 0x400) < 0x800
1034 && ((CONST_DOUBLE_HIGH (op) == -1
1035 && (CONST_DOUBLE_LOW (op) & 0x400) == 0x400)
1036 || (CONST_DOUBLE_HIGH (op) == 0
1037 && (CONST_DOUBLE_LOW (op) & 0x400) == 0)))
1038 || (GET_CODE (op) == CONST_INT
1039 && (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
1040 && (unsigned HOST_WIDE_INT) (INTVAL (op) + 0x400) < 0x800));
1041 }
1042
1043 /* Return true if OP is a register, or is a CONST_INT or CONST_DOUBLE that
1044 can fit in an 10 bit immediate field. This is an acceptable DImode
1045 operand for the movrcc instructions. */
1046 /* ??? Replace with arith10_operand? */
1047
1048 int
1049 arith10_double_operand (op, mode)
1050 rtx op;
1051 enum machine_mode mode;
1052 {
1053 return (register_operand (op, mode)
1054 || (GET_CODE (op) == CONST_DOUBLE
1055 && (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
1056 && (unsigned) (CONST_DOUBLE_LOW (op) + 0x200) < 0x400
1057 && ((CONST_DOUBLE_HIGH (op) == -1
1058 && (CONST_DOUBLE_LOW (op) & 0x200) == 0x200)
1059 || (CONST_DOUBLE_HIGH (op) == 0
1060 && (CONST_DOUBLE_LOW (op) & 0x200) == 0)))
1061 || (GET_CODE (op) == CONST_INT
1062 && (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
1063 && (unsigned HOST_WIDE_INT) (INTVAL (op) + 0x200) < 0x400));
1064 }
1065
1066 /* Return truth value of whether OP is a integer which fits the
1067 range constraining immediate operands in most three-address insns,
1068 which have a 13 bit immediate field. */
1069
1070 int
1071 small_int (op, mode)
1072 rtx op;
1073 enum machine_mode mode ATTRIBUTE_UNUSED;
1074 {
1075 return (GET_CODE (op) == CONST_INT && SMALL_INT (op));
1076 }
1077
1078 int
1079 small_int_or_double (op, mode)
1080 rtx op;
1081 enum machine_mode mode ATTRIBUTE_UNUSED;
1082 {
1083 return ((GET_CODE (op) == CONST_INT && SMALL_INT (op))
1084 || (GET_CODE (op) == CONST_DOUBLE
1085 && CONST_DOUBLE_HIGH (op) == 0
1086 && SPARC_SIMM13_P (CONST_DOUBLE_LOW (op))));
1087 }
1088
1089 /* Recognize operand values for the umul instruction. That instruction sign
1090 extends immediate values just like all other sparc instructions, but
1091 interprets the extended result as an unsigned number. */
1092
1093 int
1094 uns_small_int (op, mode)
1095 rtx op;
1096 enum machine_mode mode ATTRIBUTE_UNUSED;
1097 {
1098 #if HOST_BITS_PER_WIDE_INT > 32
1099 /* All allowed constants will fit a CONST_INT. */
1100 return (GET_CODE (op) == CONST_INT
1101 && ((INTVAL (op) >= 0 && INTVAL (op) < 0x1000)
1102 || (INTVAL (op) >= 0xFFFFF000
1103 && INTVAL (op) < 0x100000000)));
1104 #else
1105 return ((GET_CODE (op) == CONST_INT && (unsigned) INTVAL (op) < 0x1000)
1106 || (GET_CODE (op) == CONST_DOUBLE
1107 && CONST_DOUBLE_HIGH (op) == 0
1108 && (unsigned) CONST_DOUBLE_LOW (op) - 0xFFFFF000 < 0x1000));
1109 #endif
1110 }
1111
1112 int
1113 uns_arith_operand (op, mode)
1114 rtx op;
1115 enum machine_mode mode;
1116 {
1117 return register_operand (op, mode) || uns_small_int (op, mode);
1118 }
1119
1120 /* Return truth value of statement that OP is a call-clobbered register. */
1121 int
1122 clobbered_register (op, mode)
1123 rtx op;
1124 enum machine_mode mode ATTRIBUTE_UNUSED;
1125 {
1126 return (GET_CODE (op) == REG && call_used_regs[REGNO (op)]);
1127 }
1128
1129 /* Return 1 if OP is const0_rtx, used for TARGET_LIVE_G0 insns. */
1130
1131 int
1132 zero_operand (op, mode)
1133 rtx op;
1134 enum machine_mode mode ATTRIBUTE_UNUSED;
1135 {
1136 return op == const0_rtx;
1137 }
1138
1139 /* Return 1 if OP is a valid operand for the source of a move insn. */
1140
1141 int
1142 input_operand (op, mode)
1143 rtx op;
1144 enum machine_mode mode;
1145 {
1146 /* If both modes are non-void they must be the same. */
1147 if (mode != VOIDmode && GET_MODE (op) != VOIDmode && mode != GET_MODE (op))
1148 return 0;
1149
1150 /* Only a tiny bit of handling for CONSTANT_P_RTX is necessary. */
1151 if (GET_CODE (op) == CONST && GET_CODE (XEXP (op, 0)) == CONSTANT_P_RTX)
1152 return 1;
1153
1154 /* Allow any one instruction integer constant, and all CONST_INT
1155 variants when we are working in DImode and !arch64. */
1156 if (GET_MODE_CLASS (mode) == MODE_INT
1157 && ((GET_CODE (op) == CONST_INT
1158 && ((SPARC_SETHI_P (INTVAL (op))
1159 && (! TARGET_ARCH64
1160 || (INTVAL (op) >= 0)
1161 || mode == SImode
1162 || mode == HImode
1163 || mode == QImode))
1164 || SPARC_SIMM13_P (INTVAL (op))
1165 || (mode == DImode
1166 && ! TARGET_ARCH64)))
1167 || (TARGET_ARCH64
1168 && GET_CODE (op) == CONST_DOUBLE
1169 && ((CONST_DOUBLE_HIGH (op) == 0
1170 && SPARC_SETHI_P (CONST_DOUBLE_LOW (op)))
1171 ||
1172 #if HOST_BITS_PER_WIDE_INT == 64
1173 (CONST_DOUBLE_HIGH (op) == 0
1174 && SPARC_SIMM13_P (CONST_DOUBLE_LOW (op)))
1175 #else
1176 (SPARC_SIMM13_P (CONST_DOUBLE_LOW (op))
1177 && (((CONST_DOUBLE_LOW (op) & 0x80000000) == 0
1178 && CONST_DOUBLE_HIGH (op) == 0)
1179 || (CONST_DOUBLE_HIGH (op) == -1)))
1180 #endif
1181 ))))
1182 return 1;
1183
1184 /* If !arch64 and this is a DImode const, allow it so that
1185 the splits can be generated. */
1186 if (! TARGET_ARCH64
1187 && mode == DImode
1188 && GET_CODE (op) == CONST_DOUBLE)
1189 return 1;
1190
1191 if (register_operand (op, mode))
1192 return 1;
1193
1194 if (GET_MODE_CLASS (mode) == MODE_FLOAT
1195 && GET_CODE (op) == CONST_DOUBLE)
1196 return 1;
1197
1198 /* If this is a SUBREG, look inside so that we handle
1199 paradoxical ones. */
1200 if (GET_CODE (op) == SUBREG)
1201 op = SUBREG_REG (op);
1202
1203 /* Check for valid MEM forms. */
1204 if (GET_CODE (op) == MEM)
1205 {
1206 rtx inside = XEXP (op, 0);
1207
1208 if (GET_CODE (inside) == LO_SUM)
1209 {
1210 /* We can't allow these because all of the splits
1211 (eventually as they trickle down into DFmode
1212 splits) require offsettable memory references. */
1213 if (! TARGET_V9
1214 && GET_MODE (op) == TFmode)
1215 return 0;
1216
1217 return (register_operand (XEXP (inside, 0), Pmode)
1218 && CONSTANT_P (XEXP (inside, 1)));
1219 }
1220 return memory_address_p (mode, inside);
1221 }
1222
1223 return 0;
1224 }
1225
1226 \f
1227 /* We know it can't be done in one insn when we get here,
1228 the movsi expander guarentees this. */
1229 void
1230 sparc_emit_set_const32 (op0, op1)
1231 rtx op0;
1232 rtx op1;
1233 {
1234 enum machine_mode mode = GET_MODE (op0);
1235 rtx temp;
1236
1237 if (GET_CODE (op1) == CONST_INT)
1238 {
1239 HOST_WIDE_INT value = INTVAL (op1);
1240
1241 if (SPARC_SETHI_P (value)
1242 || SPARC_SIMM13_P (value))
1243 abort ();
1244 }
1245
1246 /* Full 2-insn decomposition is needed. */
1247 if (reload_in_progress || reload_completed)
1248 temp = op0;
1249 else
1250 temp = gen_reg_rtx (mode);
1251
1252 if (GET_CODE (op1) == CONST_INT)
1253 {
1254 /* Emit them as real moves instead of a HIGH/LO_SUM,
1255 this way CSE can see everything and reuse intermediate
1256 values if it wants. */
1257 if (TARGET_ARCH64
1258 && HOST_BITS_PER_WIDE_INT != 64
1259 && (INTVAL (op1) & 0x80000000) != 0)
1260 {
1261 emit_insn (gen_rtx_SET (VOIDmode,
1262 temp,
1263 gen_rtx_CONST_DOUBLE (VOIDmode, const0_rtx,
1264 INTVAL (op1) & 0xfffffc00, 0)));
1265 }
1266 else
1267 {
1268 emit_insn (gen_rtx_SET (VOIDmode,
1269 temp,
1270 GEN_INT (INTVAL (op1) & 0xfffffc00)));
1271 }
1272 emit_insn (gen_rtx_SET (VOIDmode,
1273 op0,
1274 gen_rtx_IOR (mode,
1275 temp,
1276 GEN_INT (INTVAL (op1) & 0x3ff))));
1277 }
1278 else
1279 {
1280 /* A symbol, emit in the traditional way. */
1281 emit_insn (gen_rtx_SET (VOIDmode,
1282 temp,
1283 gen_rtx_HIGH (mode,
1284 op1)));
1285 emit_insn (gen_rtx_SET (VOIDmode,
1286 op0,
1287 gen_rtx_LO_SUM (mode,
1288 temp,
1289 op1)));
1290
1291 }
1292 }
1293
1294 \f
1295 /* Sparc-v9 code-model support. */
1296 void
1297 sparc_emit_set_symbolic_const64 (op0, op1, temp1)
1298 rtx op0;
1299 rtx op1;
1300 rtx temp1;
1301 {
1302 switch (sparc_cmodel)
1303 {
1304 case CM_MEDLOW:
1305 /* The range spanned by all instructions in the object is less
1306 than 2^31 bytes (2GB) and the distance from any instruction
1307 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1308 than 2^31 bytes (2GB).
1309
1310 The executable must be in the low 4TB of the virtual address
1311 space.
1312
1313 sethi %hi(symbol), %temp
1314 or %temp, %lo(symbol), %reg */
1315 emit_insn (gen_rtx_SET (VOIDmode, temp1, gen_rtx_HIGH (DImode, op1)));
1316 emit_insn (gen_rtx_SET (VOIDmode, op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
1317 break;
1318
1319 case CM_MEDMID:
1320 /* The range spanned by all instructions in the object is less
1321 than 2^31 bytes (2GB) and the distance from any instruction
1322 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1323 than 2^31 bytes (2GB).
1324
1325 The executable must be in the low 16TB of the virtual address
1326 space.
1327
1328 sethi %h44(symbol), %temp1
1329 or %temp1, %m44(symbol), %temp2
1330 sllx %temp2, 12, %temp3
1331 or %temp3, %l44(symbol), %reg */
1332 emit_insn (gen_seth44 (op0, op1));
1333 emit_insn (gen_setm44 (op0, op0, op1));
1334 emit_insn (gen_rtx_SET (VOIDmode, temp1,
1335 gen_rtx_ASHIFT (DImode, op0, GEN_INT (12))));
1336 emit_insn (gen_setl44 (op0, temp1, op1));
1337 break;
1338
1339 case CM_MEDANY:
1340 /* The range spanned by all instructions in the object is less
1341 than 2^31 bytes (2GB) and the distance from any instruction
1342 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1343 than 2^31 bytes (2GB).
1344
1345 The executable can be placed anywhere in the virtual address
1346 space.
1347
1348 sethi %hh(symbol), %temp1
1349 sethi %lm(symbol), %temp2
1350 or %temp1, %hm(symbol), %temp3
1351 or %temp2, %lo(symbol), %temp4
1352 sllx %temp3, 32, %temp5
1353 or %temp4, %temp5, %reg */
1354
1355 /* Getting this right wrt. reloading is really tricky.
1356 We _MUST_ have a separate temporary at this point,
1357 if we don't barf immediately instead of generating
1358 incorrect code. */
1359 if (temp1 == op0)
1360 abort ();
1361
1362 emit_insn (gen_sethh (op0, op1));
1363 emit_insn (gen_setlm (temp1, op1));
1364 emit_insn (gen_sethm (op0, op0, op1));
1365 emit_insn (gen_rtx_SET (VOIDmode, op0,
1366 gen_rtx_ASHIFT (DImode, op0, GEN_INT (32))));
1367 emit_insn (gen_rtx_SET (VOIDmode, op0,
1368 gen_rtx_PLUS (DImode, op0, temp1)));
1369 emit_insn (gen_setlo (op0, op0, op1));
1370 break;
1371
1372 case CM_EMBMEDANY:
1373 /* Old old old backwards compatibility kruft here.
1374 Essentially it is MEDLOW with a fixed 64-bit
1375 virtual base added to all data segment addresses.
1376 Text-segment stuff is computed like MEDANY, we can't
1377 reuse the code above because the relocation knobs
1378 look different.
1379
1380 Data segment: sethi %hi(symbol), %temp1
1381 or %temp1, %lo(symbol), %temp2
1382 add %temp2, EMBMEDANY_BASE_REG, %reg
1383
1384 Text segment: sethi %uhi(symbol), %temp1
1385 sethi %hi(symbol), %temp2
1386 or %temp1, %ulo(symbol), %temp3
1387 or %temp2, %lo(symbol), %temp4
1388 sllx %temp3, 32, %temp5
1389 or %temp4, %temp5, %reg */
1390 if (data_segment_operand (op1, GET_MODE (op1)))
1391 {
1392 emit_insn (gen_embmedany_sethi (temp1, op1));
1393 emit_insn (gen_embmedany_brsum (op0, temp1));
1394 emit_insn (gen_embmedany_losum (op0, op0, op1));
1395 }
1396 else
1397 {
1398 /* Getting this right wrt. reloading is really tricky.
1399 We _MUST_ have a separate temporary at this point,
1400 so we barf immediately instead of generating
1401 incorrect code. */
1402 if (temp1 == op0)
1403 abort ();
1404
1405 emit_insn (gen_embmedany_textuhi (op0, op1));
1406 emit_insn (gen_embmedany_texthi (temp1, op1));
1407 emit_insn (gen_embmedany_textulo (op0, op0, op1));
1408 emit_insn (gen_rtx_SET (VOIDmode, op0,
1409 gen_rtx_ASHIFT (DImode, op0, GEN_INT (32))));
1410 emit_insn (gen_rtx_SET (VOIDmode, op0,
1411 gen_rtx_PLUS (DImode, op0, temp1)));
1412 emit_insn (gen_embmedany_textlo (op0, op0, op1));
1413 }
1414 break;
1415
1416 default:
1417 abort();
1418 }
1419 }
1420
1421 /* These avoid problems when cross compiling. If we do not
1422 go through all this hair then the optimizer will see
1423 invalid REG_EQUAL notes or in some cases none at all. */
1424 static void sparc_emit_set_safe_HIGH64 PROTO ((rtx, HOST_WIDE_INT));
1425 static rtx gen_safe_SET64 PROTO ((rtx, HOST_WIDE_INT));
1426 static rtx gen_safe_OR64 PROTO ((rtx, HOST_WIDE_INT));
1427 static rtx gen_safe_XOR64 PROTO ((rtx, HOST_WIDE_INT));
1428
1429 #if HOST_BITS_PER_WIDE_INT == 64
1430 #define GEN_HIGHINT64(__x) GEN_INT ((__x) & 0xfffffc00)
1431 #define GEN_INT64(__x) GEN_INT (__x)
1432 #else
1433 #define GEN_HIGHINT64(__x) \
1434 gen_rtx_CONST_DOUBLE (VOIDmode, const0_rtx, \
1435 (__x) & 0xfffffc00, 0)
1436 #define GEN_INT64(__x) \
1437 gen_rtx_CONST_DOUBLE (VOIDmode, const0_rtx, \
1438 (__x) & 0xffffffff, \
1439 ((__x) & 0x80000000 \
1440 ? 0xffffffff : 0))
1441 #endif
1442
1443 /* The optimizer is not to assume anything about exactly
1444 which bits are set for a HIGH, they are unspecified.
1445 Unfortunately this leads to many missed optimizations
1446 during CSE. We mask out the non-HIGH bits, and matches
1447 a plain movdi, to alleviate this problem. */
1448 static void
1449 sparc_emit_set_safe_HIGH64 (dest, val)
1450 rtx dest;
1451 HOST_WIDE_INT val;
1452 {
1453 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_HIGHINT64 (val)));
1454 }
1455
1456 static rtx
1457 gen_safe_SET64 (dest, val)
1458 rtx dest;
1459 HOST_WIDE_INT val;
1460 {
1461 return gen_rtx_SET (VOIDmode, dest, GEN_INT64 (val));
1462 }
1463
1464 static rtx
1465 gen_safe_OR64 (src, val)
1466 rtx src;
1467 HOST_WIDE_INT val;
1468 {
1469 return gen_rtx_IOR (DImode, src, GEN_INT64 (val));
1470 }
1471
1472 static rtx
1473 gen_safe_XOR64 (src, val)
1474 rtx src;
1475 HOST_WIDE_INT val;
1476 {
1477 return gen_rtx_XOR (DImode, src, GEN_INT64 (val));
1478 }
1479
1480 /* Worker routines for 64-bit constant formation on arch64.
1481 One of the key things to be doing in these emissions is
1482 to create as many temp REGs as possible. This makes it
1483 possible for half-built constants to be used later when
1484 such values are similar to something required later on.
1485 Without doing this, the optimizer cannot see such
1486 opportunities. */
1487
1488 static void sparc_emit_set_const64_quick1
1489 PROTO((rtx, rtx, unsigned HOST_WIDE_INT, int));
1490
1491 static void
1492 sparc_emit_set_const64_quick1 (op0, temp, low_bits, is_neg)
1493 rtx op0;
1494 rtx temp;
1495 unsigned HOST_WIDE_INT low_bits;
1496 int is_neg;
1497 {
1498 unsigned HOST_WIDE_INT high_bits;
1499
1500 if (is_neg)
1501 high_bits = (~low_bits) & 0xffffffff;
1502 else
1503 high_bits = low_bits;
1504
1505 sparc_emit_set_safe_HIGH64 (temp, high_bits);
1506 if (!is_neg)
1507 {
1508 emit_insn (gen_rtx_SET (VOIDmode, op0,
1509 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1510 }
1511 else
1512 {
1513 /* If we are XOR'ing with -1, then we should emit a one's complement
1514 instead. This way the combiner will notice logical operations
1515 such as ANDN later on and substitute. */
1516 if ((low_bits & 0x3ff) == 0x3ff)
1517 {
1518 emit_insn (gen_rtx_SET (VOIDmode, op0,
1519 gen_rtx_NOT (DImode, temp)));
1520 }
1521 else
1522 {
1523 emit_insn (gen_rtx_SET (VOIDmode, op0,
1524 gen_safe_XOR64 (temp,
1525 (-0x400 | (low_bits & 0x3ff)))));
1526 }
1527 }
1528 }
1529
1530 static void sparc_emit_set_const64_quick2
1531 PROTO((rtx, rtx, unsigned HOST_WIDE_INT,
1532 unsigned HOST_WIDE_INT, int));
1533
1534 static void
1535 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_immediate, shift_count)
1536 rtx op0;
1537 rtx temp;
1538 unsigned HOST_WIDE_INT high_bits;
1539 unsigned HOST_WIDE_INT low_immediate;
1540 int shift_count;
1541 {
1542 rtx temp2 = op0;
1543
1544 if ((high_bits & 0xfffffc00) != 0)
1545 {
1546 sparc_emit_set_safe_HIGH64 (temp, high_bits);
1547 if ((high_bits & ~0xfffffc00) != 0)
1548 emit_insn (gen_rtx_SET (VOIDmode, op0,
1549 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1550 else
1551 temp2 = temp;
1552 }
1553 else
1554 {
1555 emit_insn (gen_safe_SET64 (temp, high_bits));
1556 temp2 = temp;
1557 }
1558
1559 /* Now shift it up into place. */
1560 emit_insn (gen_rtx_SET (VOIDmode, op0,
1561 gen_rtx_ASHIFT (DImode, temp2,
1562 GEN_INT (shift_count))));
1563
1564 /* If there is a low immediate part piece, finish up by
1565 putting that in as well. */
1566 if (low_immediate != 0)
1567 emit_insn (gen_rtx_SET (VOIDmode, op0,
1568 gen_safe_OR64 (op0, low_immediate)));
1569 }
1570
1571 static void sparc_emit_set_const64_longway
1572 PROTO((rtx, rtx, unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT));
1573
1574 /* Full 64-bit constant decomposition. Even though this is the
1575 'worst' case, we still optimize a few things away. */
1576 static void
1577 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits)
1578 rtx op0;
1579 rtx temp;
1580 unsigned HOST_WIDE_INT high_bits;
1581 unsigned HOST_WIDE_INT low_bits;
1582 {
1583 rtx sub_temp;
1584
1585 if (reload_in_progress || reload_completed)
1586 sub_temp = op0;
1587 else
1588 sub_temp = gen_reg_rtx (DImode);
1589
1590 if ((high_bits & 0xfffffc00) != 0)
1591 {
1592 sparc_emit_set_safe_HIGH64 (temp, high_bits);
1593 if ((high_bits & ~0xfffffc00) != 0)
1594 emit_insn (gen_rtx_SET (VOIDmode,
1595 sub_temp,
1596 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1597 else
1598 sub_temp = temp;
1599 }
1600 else
1601 {
1602 emit_insn (gen_safe_SET64 (temp, high_bits));
1603 sub_temp = temp;
1604 }
1605
1606 if (!reload_in_progress && !reload_completed)
1607 {
1608 rtx temp2 = gen_reg_rtx (DImode);
1609 rtx temp3 = gen_reg_rtx (DImode);
1610 rtx temp4 = gen_reg_rtx (DImode);
1611
1612 emit_insn (gen_rtx_SET (VOIDmode, temp4,
1613 gen_rtx_ASHIFT (DImode, sub_temp,
1614 GEN_INT (32))));
1615
1616 sparc_emit_set_safe_HIGH64 (temp2, low_bits);
1617 if ((low_bits & ~0xfffffc00) != 0)
1618 {
1619 emit_insn (gen_rtx_SET (VOIDmode, temp3,
1620 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
1621 emit_insn (gen_rtx_SET (VOIDmode, op0,
1622 gen_rtx_PLUS (DImode, temp4, temp3)));
1623 }
1624 else
1625 {
1626 emit_insn (gen_rtx_SET (VOIDmode, op0,
1627 gen_rtx_PLUS (DImode, temp4, temp2)));
1628 }
1629 }
1630 else
1631 {
1632 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
1633 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
1634 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
1635 int to_shift = 12;
1636
1637 /* We are in the middle of reload, so this is really
1638 painful. However we do still make an attempt to
1639 avoid emitting truly stupid code. */
1640 if (low1 != const0_rtx)
1641 {
1642 emit_insn (gen_rtx_SET (VOIDmode, op0,
1643 gen_rtx_ASHIFT (DImode, sub_temp,
1644 GEN_INT (to_shift))));
1645 emit_insn (gen_rtx_SET (VOIDmode, op0,
1646 gen_rtx_IOR (DImode, op0, low1)));
1647 sub_temp = op0;
1648 to_shift = 12;
1649 }
1650 else
1651 {
1652 to_shift += 12;
1653 }
1654 if (low2 != const0_rtx)
1655 {
1656 emit_insn (gen_rtx_SET (VOIDmode, op0,
1657 gen_rtx_ASHIFT (DImode, sub_temp,
1658 GEN_INT (to_shift))));
1659 emit_insn (gen_rtx_SET (VOIDmode, op0,
1660 gen_rtx_IOR (DImode, op0, low2)));
1661 sub_temp = op0;
1662 to_shift = 8;
1663 }
1664 else
1665 {
1666 to_shift += 8;
1667 }
1668 emit_insn (gen_rtx_SET (VOIDmode, op0,
1669 gen_rtx_ASHIFT (DImode, sub_temp,
1670 GEN_INT (to_shift))));
1671 if (low3 != const0_rtx)
1672 emit_insn (gen_rtx_SET (VOIDmode, op0,
1673 gen_rtx_IOR (DImode, op0, low3)));
1674 /* phew... */
1675 }
1676 }
1677
1678 /* Analyze a 64-bit constant for certain properties. */
1679 static void analyze_64bit_constant
1680 PROTO((unsigned HOST_WIDE_INT,
1681 unsigned HOST_WIDE_INT,
1682 int *, int *, int *));
1683
1684 static void
1685 analyze_64bit_constant (high_bits, low_bits, hbsp, lbsp, abbasp)
1686 unsigned HOST_WIDE_INT high_bits, low_bits;
1687 int *hbsp, *lbsp, *abbasp;
1688 {
1689 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
1690 int i;
1691
1692 lowest_bit_set = highest_bit_set = -1;
1693 i = 0;
1694 do
1695 {
1696 if ((lowest_bit_set == -1)
1697 && ((low_bits >> i) & 1))
1698 lowest_bit_set = i;
1699 if ((highest_bit_set == -1)
1700 && ((high_bits >> (32 - i - 1)) & 1))
1701 highest_bit_set = (64 - i - 1);
1702 }
1703 while (++i < 32
1704 && ((highest_bit_set == -1)
1705 || (lowest_bit_set == -1)));
1706 if (i == 32)
1707 {
1708 i = 0;
1709 do
1710 {
1711 if ((lowest_bit_set == -1)
1712 && ((high_bits >> i) & 1))
1713 lowest_bit_set = i + 32;
1714 if ((highest_bit_set == -1)
1715 && ((low_bits >> (32 - i - 1)) & 1))
1716 highest_bit_set = 32 - i - 1;
1717 }
1718 while (++i < 32
1719 && ((highest_bit_set == -1)
1720 || (lowest_bit_set == -1)));
1721 }
1722 /* If there are no bits set this should have gone out
1723 as one instruction! */
1724 if (lowest_bit_set == -1
1725 || highest_bit_set == -1)
1726 abort ();
1727 all_bits_between_are_set = 1;
1728 for (i = lowest_bit_set; i <= highest_bit_set; i++)
1729 {
1730 if (i < 32)
1731 {
1732 if ((low_bits & (1 << i)) != 0)
1733 continue;
1734 }
1735 else
1736 {
1737 if ((high_bits & (1 << (i - 32))) != 0)
1738 continue;
1739 }
1740 all_bits_between_are_set = 0;
1741 break;
1742 }
1743 *hbsp = highest_bit_set;
1744 *lbsp = lowest_bit_set;
1745 *abbasp = all_bits_between_are_set;
1746 }
1747
1748 static int const64_is_2insns
1749 PROTO((unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT));
1750
1751 static int
1752 const64_is_2insns (high_bits, low_bits)
1753 unsigned HOST_WIDE_INT high_bits, low_bits;
1754 {
1755 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
1756
1757 if (high_bits == 0
1758 || high_bits == 0xffffffff)
1759 return 1;
1760
1761 analyze_64bit_constant (high_bits, low_bits,
1762 &highest_bit_set, &lowest_bit_set,
1763 &all_bits_between_are_set);
1764
1765 if ((highest_bit_set == 63
1766 || lowest_bit_set == 0)
1767 && all_bits_between_are_set != 0)
1768 return 1;
1769
1770 if ((highest_bit_set - lowest_bit_set) < 21)
1771 return 1;
1772
1773 return 0;
1774 }
1775
1776 static unsigned HOST_WIDE_INT create_simple_focus_bits
1777 PROTO((unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1778 int, int));
1779
1780 static unsigned HOST_WIDE_INT
1781 create_simple_focus_bits (high_bits, low_bits, lowest_bit_set, shift)
1782 unsigned HOST_WIDE_INT high_bits, low_bits;
1783 int lowest_bit_set, shift;
1784 {
1785 HOST_WIDE_INT hi, lo;
1786
1787 if (lowest_bit_set < 32)
1788 {
1789 lo = (low_bits >> lowest_bit_set) << shift;
1790 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
1791 }
1792 else
1793 {
1794 lo = 0;
1795 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
1796 }
1797 if (hi & lo)
1798 abort ();
1799 return (hi | lo);
1800 }
1801
1802 /* Here we are sure to be arch64 and this is an integer constant
1803 being loaded into a register. Emit the most efficient
1804 insn sequence possible. Detection of all the 1-insn cases
1805 has been done already. */
1806 void
1807 sparc_emit_set_const64 (op0, op1)
1808 rtx op0;
1809 rtx op1;
1810 {
1811 unsigned HOST_WIDE_INT high_bits, low_bits;
1812 int lowest_bit_set, highest_bit_set;
1813 int all_bits_between_are_set;
1814 rtx temp;
1815
1816 /* Sanity check that we know what we are working with. */
1817 if (! TARGET_ARCH64
1818 || GET_CODE (op0) != REG
1819 || (REGNO (op0) >= SPARC_FIRST_FP_REG
1820 && REGNO (op0) <= SPARC_LAST_V9_FP_REG))
1821 abort ();
1822
1823 if (reload_in_progress || reload_completed)
1824 temp = op0;
1825 else
1826 temp = gen_reg_rtx (DImode);
1827
1828 if (GET_CODE (op1) != CONST_DOUBLE
1829 && GET_CODE (op1) != CONST_INT)
1830 {
1831 sparc_emit_set_symbolic_const64 (op0, op1, temp);
1832 return;
1833 }
1834
1835 if (GET_CODE (op1) == CONST_DOUBLE)
1836 {
1837 #if HOST_BITS_PER_WIDE_INT == 64
1838 high_bits = (CONST_DOUBLE_LOW (op1) >> 32) & 0xffffffff;
1839 low_bits = CONST_DOUBLE_LOW (op1) & 0xffffffff;
1840 #else
1841 high_bits = CONST_DOUBLE_HIGH (op1);
1842 low_bits = CONST_DOUBLE_LOW (op1);
1843 #endif
1844 }
1845 else
1846 {
1847 #if HOST_BITS_PER_WIDE_INT == 64
1848 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
1849 low_bits = (INTVAL (op1) & 0xffffffff);
1850 #else
1851 high_bits = ((INTVAL (op1) < 0) ?
1852 0xffffffff :
1853 0x00000000);
1854 low_bits = INTVAL (op1);
1855 #endif
1856 }
1857
1858 /* low_bits bits 0 --> 31
1859 high_bits bits 32 --> 63 */
1860
1861 analyze_64bit_constant (high_bits, low_bits,
1862 &highest_bit_set, &lowest_bit_set,
1863 &all_bits_between_are_set);
1864
1865 /* First try for a 2-insn sequence. */
1866
1867 /* These situations are preferred because the optimizer can
1868 * do more things with them:
1869 * 1) mov -1, %reg
1870 * sllx %reg, shift, %reg
1871 * 2) mov -1, %reg
1872 * srlx %reg, shift, %reg
1873 * 3) mov some_small_const, %reg
1874 * sllx %reg, shift, %reg
1875 */
1876 if (((highest_bit_set == 63
1877 || lowest_bit_set == 0)
1878 && all_bits_between_are_set != 0)
1879 || ((highest_bit_set - lowest_bit_set) < 12))
1880 {
1881 HOST_WIDE_INT the_const = -1;
1882 int shift = lowest_bit_set;
1883
1884 if ((highest_bit_set != 63
1885 && lowest_bit_set != 0)
1886 || all_bits_between_are_set == 0)
1887 {
1888 the_const =
1889 create_simple_focus_bits (high_bits, low_bits,
1890 lowest_bit_set, 0);
1891 }
1892 else if (lowest_bit_set == 0)
1893 shift = -(63 - highest_bit_set);
1894
1895 if (! SPARC_SIMM13_P (the_const))
1896 abort ();
1897
1898 emit_insn (gen_safe_SET64 (temp, the_const));
1899 if (shift > 0)
1900 emit_insn (gen_rtx_SET (VOIDmode,
1901 op0,
1902 gen_rtx_ASHIFT (DImode,
1903 temp,
1904 GEN_INT (shift))));
1905 else if (shift < 0)
1906 emit_insn (gen_rtx_SET (VOIDmode,
1907 op0,
1908 gen_rtx_LSHIFTRT (DImode,
1909 temp,
1910 GEN_INT (-shift))));
1911 else
1912 abort ();
1913 return;
1914 }
1915
1916 /* Now a range of 22 or less bits set somewhere.
1917 * 1) sethi %hi(focus_bits), %reg
1918 * sllx %reg, shift, %reg
1919 * 2) sethi %hi(focus_bits), %reg
1920 * srlx %reg, shift, %reg
1921 */
1922 if ((highest_bit_set - lowest_bit_set) < 21)
1923 {
1924 unsigned HOST_WIDE_INT focus_bits =
1925 create_simple_focus_bits (high_bits, low_bits,
1926 lowest_bit_set, 10);
1927
1928 if (! SPARC_SETHI_P (focus_bits))
1929 abort ();
1930
1931 sparc_emit_set_safe_HIGH64 (temp, focus_bits);
1932
1933 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
1934 if (lowest_bit_set < 10)
1935 emit_insn (gen_rtx_SET (VOIDmode,
1936 op0,
1937 gen_rtx_LSHIFTRT (DImode, temp,
1938 GEN_INT (10 - lowest_bit_set))));
1939 else if (lowest_bit_set > 10)
1940 emit_insn (gen_rtx_SET (VOIDmode,
1941 op0,
1942 gen_rtx_ASHIFT (DImode, temp,
1943 GEN_INT (lowest_bit_set - 10))));
1944 else
1945 abort ();
1946 return;
1947 }
1948
1949 /* 1) sethi %hi(low_bits), %reg
1950 * or %reg, %lo(low_bits), %reg
1951 * 2) sethi %hi(~low_bits), %reg
1952 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
1953 */
1954 if (high_bits == 0
1955 || high_bits == 0xffffffff)
1956 {
1957 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
1958 (high_bits == 0xffffffff));
1959 return;
1960 }
1961
1962 /* Now, try 3-insn sequences. */
1963
1964 /* 1) sethi %hi(high_bits), %reg
1965 * or %reg, %lo(high_bits), %reg
1966 * sllx %reg, 32, %reg
1967 */
1968 if (low_bits == 0)
1969 {
1970 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
1971 return;
1972 }
1973
1974 /* We may be able to do something quick
1975 when the constant is negated, so try that. */
1976 if (const64_is_2insns ((~high_bits) & 0xffffffff,
1977 (~low_bits) & 0xfffffc00))
1978 {
1979 /* NOTE: The trailing bits get XOR'd so we need the
1980 non-negated bits, not the negated ones. */
1981 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
1982
1983 if ((((~high_bits) & 0xffffffff) == 0
1984 && ((~low_bits) & 0x80000000) == 0)
1985 || (((~high_bits) & 0xffffffff) == 0xffffffff
1986 && ((~low_bits) & 0x80000000) != 0))
1987 {
1988 int fast_int = (~low_bits & 0xffffffff);
1989
1990 if ((SPARC_SETHI_P (fast_int)
1991 && (~high_bits & 0xffffffff) == 0)
1992 || SPARC_SIMM13_P (fast_int))
1993 emit_insn (gen_safe_SET64 (temp, fast_int));
1994 else
1995 sparc_emit_set_const64 (temp, GEN_INT64 (fast_int));
1996 }
1997 else
1998 {
1999 rtx negated_const;
2000 #if HOST_BITS_PER_WIDE_INT == 64
2001 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
2002 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
2003 #else
2004 negated_const = gen_rtx_CONST_DOUBLE (DImode, const0_rtx,
2005 (~low_bits) & 0xfffffc00,
2006 (~high_bits) & 0xffffffff);
2007 #endif
2008 sparc_emit_set_const64 (temp, negated_const);
2009 }
2010
2011 /* If we are XOR'ing with -1, then we should emit a one's complement
2012 instead. This way the combiner will notice logical operations
2013 such as ANDN later on and substitute. */
2014 if (trailing_bits == 0x3ff)
2015 {
2016 emit_insn (gen_rtx_SET (VOIDmode, op0,
2017 gen_rtx_NOT (DImode, temp)));
2018 }
2019 else
2020 {
2021 emit_insn (gen_rtx_SET (VOIDmode,
2022 op0,
2023 gen_safe_XOR64 (temp,
2024 (-0x400 | trailing_bits))));
2025 }
2026 return;
2027 }
2028
2029 /* 1) sethi %hi(xxx), %reg
2030 * or %reg, %lo(xxx), %reg
2031 * sllx %reg, yyy, %reg
2032 *
2033 * ??? This is just a generalized version of the low_bits==0
2034 * thing above, FIXME...
2035 */
2036 if ((highest_bit_set - lowest_bit_set) < 32)
2037 {
2038 unsigned HOST_WIDE_INT focus_bits =
2039 create_simple_focus_bits (high_bits, low_bits,
2040 lowest_bit_set, 0);
2041
2042 /* We can't get here in this state. */
2043 if (highest_bit_set < 32
2044 || lowest_bit_set >= 32)
2045 abort ();
2046
2047 /* So what we know is that the set bits straddle the
2048 middle of the 64-bit word. */
2049 sparc_emit_set_const64_quick2 (op0, temp,
2050 focus_bits, 0,
2051 lowest_bit_set);
2052 return;
2053 }
2054
2055 /* 1) sethi %hi(high_bits), %reg
2056 * or %reg, %lo(high_bits), %reg
2057 * sllx %reg, 32, %reg
2058 * or %reg, low_bits, %reg
2059 */
2060 if (SPARC_SIMM13_P(low_bits)
2061 && ((int)low_bits > 0))
2062 {
2063 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
2064 return;
2065 }
2066
2067 /* The easiest way when all else fails, is full decomposition. */
2068 #if 0
2069 printf ("sparc_emit_set_const64: Hard constant [%08lx%08lx] neg[%08lx%08lx]\n",
2070 high_bits, low_bits, ~high_bits, ~low_bits);
2071 #endif
2072 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2073 }
2074
2075 /* X and Y are two things to compare using CODE. Emit the compare insn and
2076 return the rtx for the cc reg in the proper mode. */
2077
2078 rtx
2079 gen_compare_reg (code, x, y)
2080 enum rtx_code code;
2081 rtx x, y;
2082 {
2083 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
2084 rtx cc_reg;
2085
2086 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2087 fcc regs (cse can't tell they're really call clobbered regs and will
2088 remove a duplicate comparison even if there is an intervening function
2089 call - it will then try to reload the cc reg via an int reg which is why
2090 we need the movcc patterns). It is possible to provide the movcc
2091 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
2092 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
2093 to tell cse that CCFPE mode registers (even pseudos) are call
2094 clobbered. */
2095
2096 /* ??? This is an experiment. Rather than making changes to cse which may
2097 or may not be easy/clean, we do our own cse. This is possible because
2098 we will generate hard registers. Cse knows they're call clobbered (it
2099 doesn't know the same thing about pseudos). If we guess wrong, no big
2100 deal, but if we win, great! */
2101
2102 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2103 #if 1 /* experiment */
2104 {
2105 int reg;
2106 /* We cycle through the registers to ensure they're all exercised. */
2107 static int next_fcc_reg = 0;
2108 /* Previous x,y for each fcc reg. */
2109 static rtx prev_args[4][2];
2110
2111 /* Scan prev_args for x,y. */
2112 for (reg = 0; reg < 4; reg++)
2113 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2114 break;
2115 if (reg == 4)
2116 {
2117 reg = next_fcc_reg;
2118 prev_args[reg][0] = x;
2119 prev_args[reg][1] = y;
2120 next_fcc_reg = (next_fcc_reg + 1) & 3;
2121 }
2122 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2123 }
2124 #else
2125 cc_reg = gen_reg_rtx (mode);
2126 #endif /* ! experiment */
2127 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2128 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2129 else
2130 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2131
2132 emit_insn (gen_rtx_SET (VOIDmode, cc_reg,
2133 gen_rtx_COMPARE (mode, x, y)));
2134
2135 return cc_reg;
2136 }
2137
2138 /* This function is used for v9 only.
2139 CODE is the code for an Scc's comparison.
2140 OPERANDS[0] is the target of the Scc insn.
2141 OPERANDS[1] is the value we compare against const0_rtx (which hasn't
2142 been generated yet).
2143
2144 This function is needed to turn
2145
2146 (set (reg:SI 110)
2147 (gt (reg:CCX 100 %icc)
2148 (const_int 0)))
2149 into
2150 (set (reg:SI 110)
2151 (gt:DI (reg:CCX 100 %icc)
2152 (const_int 0)))
2153
2154 IE: The instruction recognizer needs to see the mode of the comparison to
2155 find the right instruction. We could use "gt:DI" right in the
2156 define_expand, but leaving it out allows us to handle DI, SI, etc.
2157
2158 We refer to the global sparc compare operands sparc_compare_op0 and
2159 sparc_compare_op1. */
2160
2161 int
2162 gen_v9_scc (compare_code, operands)
2163 enum rtx_code compare_code;
2164 register rtx *operands;
2165 {
2166 rtx temp, op0, op1;
2167
2168 if (! TARGET_ARCH64
2169 && (GET_MODE (sparc_compare_op0) == DImode
2170 || GET_MODE (operands[0]) == DImode))
2171 return 0;
2172
2173 /* Handle the case where operands[0] == sparc_compare_op0.
2174 We "early clobber" the result. */
2175 if (REGNO (operands[0]) == REGNO (sparc_compare_op0))
2176 {
2177 op0 = gen_reg_rtx (GET_MODE (sparc_compare_op0));
2178 emit_move_insn (op0, sparc_compare_op0);
2179 }
2180 else
2181 op0 = sparc_compare_op0;
2182 /* For consistency in the following. */
2183 op1 = sparc_compare_op1;
2184
2185 /* Try to use the movrCC insns. */
2186 if (TARGET_ARCH64
2187 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
2188 && op1 == const0_rtx
2189 && v9_regcmp_p (compare_code))
2190 {
2191 /* Special case for op0 != 0. This can be done with one instruction if
2192 operands[0] == sparc_compare_op0. We don't assume they are equal
2193 now though. */
2194
2195 if (compare_code == NE
2196 && GET_MODE (operands[0]) == DImode
2197 && GET_MODE (op0) == DImode)
2198 {
2199 emit_insn (gen_rtx_SET (VOIDmode, operands[0], op0));
2200 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2201 gen_rtx_IF_THEN_ELSE (DImode,
2202 gen_rtx_fmt_ee (compare_code, DImode,
2203 op0, const0_rtx),
2204 const1_rtx,
2205 operands[0])));
2206 return 1;
2207 }
2208
2209 emit_insn (gen_rtx_SET (VOIDmode, operands[0], const0_rtx));
2210 if (GET_MODE (op0) != DImode)
2211 {
2212 temp = gen_reg_rtx (DImode);
2213 convert_move (temp, op0, 0);
2214 }
2215 else
2216 temp = op0;
2217 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2218 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
2219 gen_rtx_fmt_ee (compare_code, DImode,
2220 temp, const0_rtx),
2221 const1_rtx,
2222 operands[0])));
2223 return 1;
2224 }
2225 else
2226 {
2227 operands[1] = gen_compare_reg (compare_code, op0, op1);
2228
2229 switch (GET_MODE (operands[1]))
2230 {
2231 case CCmode :
2232 case CCXmode :
2233 case CCFPEmode :
2234 case CCFPmode :
2235 break;
2236 default :
2237 abort ();
2238 }
2239 emit_insn (gen_rtx_SET (VOIDmode, operands[0], const0_rtx));
2240 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2241 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
2242 gen_rtx_fmt_ee (compare_code,
2243 GET_MODE (operands[1]),
2244 operands[1], const0_rtx),
2245 const1_rtx, operands[0])));
2246 return 1;
2247 }
2248 }
2249
2250 /* Emit a conditional jump insn for the v9 architecture using comparison code
2251 CODE and jump target LABEL.
2252 This function exists to take advantage of the v9 brxx insns. */
2253
2254 void
2255 emit_v9_brxx_insn (code, op0, label)
2256 enum rtx_code code;
2257 rtx op0, label;
2258 {
2259 emit_jump_insn (gen_rtx_SET (VOIDmode,
2260 pc_rtx,
2261 gen_rtx_IF_THEN_ELSE (VOIDmode,
2262 gen_rtx_fmt_ee (code, GET_MODE (op0),
2263 op0, const0_rtx),
2264 gen_rtx_LABEL_REF (VOIDmode, label),
2265 pc_rtx)));
2266 }
2267 \f
2268 /* Return nonzero if a return peephole merging return with
2269 setting of output register is ok. */
2270 int
2271 leaf_return_peephole_ok ()
2272 {
2273 return (actual_fsize == 0);
2274 }
2275
2276 /* Return nonzero if TRIAL can go into the function epilogue's
2277 delay slot. SLOT is the slot we are trying to fill. */
2278
2279 int
2280 eligible_for_epilogue_delay (trial, slot)
2281 rtx trial;
2282 int slot;
2283 {
2284 rtx pat, src;
2285
2286 if (slot >= 1)
2287 return 0;
2288
2289 if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET)
2290 return 0;
2291
2292 if (get_attr_length (trial) != 1)
2293 return 0;
2294
2295 /* If %g0 is live, there are lots of things we can't handle.
2296 Rather than trying to find them all now, let's punt and only
2297 optimize things as necessary. */
2298 if (TARGET_LIVE_G0)
2299 return 0;
2300
2301 /* If there are any call-saved registers, we should scan TRIAL if it
2302 does not reference them. For now just make it easy. */
2303 if (num_gfregs)
2304 return 0;
2305
2306 /* In the case of a true leaf function, anything can go into the delay slot.
2307 A delay slot only exists however if the frame size is zero, otherwise
2308 we will put an insn to adjust the stack after the return. */
2309 if (current_function_uses_only_leaf_regs)
2310 {
2311 if (leaf_return_peephole_ok ())
2312 return ((get_attr_in_uncond_branch_delay (trial)
2313 == IN_BRANCH_DELAY_TRUE));
2314 return 0;
2315 }
2316
2317 /* If only trivial `restore' insns work, nothing can go in the
2318 delay slot. */
2319 else if (TARGET_BROKEN_SAVERESTORE)
2320 return 0;
2321
2322 pat = PATTERN (trial);
2323
2324 /* Otherwise, only operations which can be done in tandem with
2325 a `restore' or `return' insn can go into the delay slot. */
2326 if (GET_CODE (SET_DEST (pat)) != REG
2327 || REGNO (SET_DEST (pat)) >= 32
2328 || REGNO (SET_DEST (pat)) < 24)
2329 return 0;
2330
2331 /* The set of insns matched here must agree precisely with the set of
2332 patterns paired with a RETURN in sparc.md. */
2333
2334 src = SET_SRC (pat);
2335
2336 /* This matches "*return_[qhs]i" or even "*return_di" on TARGET_ARCH64. */
2337 if (arith_operand (src, GET_MODE (src)))
2338 {
2339 if (TARGET_ARCH64)
2340 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
2341 else
2342 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
2343 }
2344
2345 /* This matches "*return_di". */
2346 else if (arith_double_operand (src, GET_MODE (src)))
2347 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
2348
2349 /* This matches "*return_sf_no_fpu". */
2350 else if (! TARGET_FPU && restore_operand (SET_DEST (pat), SFmode)
2351 && register_operand (src, SFmode))
2352 return 1;
2353
2354 /* If we have return instruction, anything that does not use
2355 local or output registers and can go into a delay slot wins. */
2356 else if (TARGET_V9 && ! epilogue_renumber (&pat, 1)
2357 && (get_attr_in_uncond_branch_delay (trial) == IN_BRANCH_DELAY_TRUE))
2358 return 1;
2359
2360 /* This matches "*return_addsi". */
2361 else if (GET_CODE (src) == PLUS
2362 && arith_operand (XEXP (src, 0), SImode)
2363 && arith_operand (XEXP (src, 1), SImode)
2364 && (register_operand (XEXP (src, 0), SImode)
2365 || register_operand (XEXP (src, 1), SImode)))
2366 return 1;
2367
2368 /* This matches "*return_adddi". */
2369 else if (GET_CODE (src) == PLUS
2370 && arith_double_operand (XEXP (src, 0), DImode)
2371 && arith_double_operand (XEXP (src, 1), DImode)
2372 && (register_operand (XEXP (src, 0), DImode)
2373 || register_operand (XEXP (src, 1), DImode)))
2374 return 1;
2375
2376 /* This can match "*return_losum_[sd]i".
2377 Catch only some cases, so that return_losum* don't have
2378 to be too big. */
2379 else if (GET_CODE (src) == LO_SUM
2380 && ! TARGET_CM_MEDMID
2381 && ((register_operand (XEXP (src, 0), SImode)
2382 && immediate_operand (XEXP (src, 1), SImode))
2383 || (TARGET_ARCH64
2384 && register_operand (XEXP (src, 0), DImode)
2385 && immediate_operand (XEXP (src, 1), DImode))))
2386 return 1;
2387
2388 /* sll{,x} reg,1,reg2 is add reg,reg,reg2 as well. */
2389 else if (GET_CODE (src) == ASHIFT
2390 && (register_operand (XEXP (src, 0), SImode)
2391 || register_operand (XEXP (src, 0), DImode))
2392 && XEXP (src, 1) == const1_rtx)
2393 return 1;
2394
2395 return 0;
2396 }
2397
2398 static int
2399 check_return_regs (x)
2400 rtx x;
2401 {
2402 switch (GET_CODE (x))
2403 {
2404 case REG:
2405 return IN_OR_GLOBAL_P (x);
2406
2407 case CONST_INT:
2408 case CONST_DOUBLE:
2409 case CONST:
2410 case SYMBOL_REF:
2411 case LABEL_REF:
2412 return 1;
2413
2414 case SET:
2415 case IOR:
2416 case AND:
2417 case XOR:
2418 case PLUS:
2419 case MINUS:
2420 if (check_return_regs (XEXP (x, 1)) == 0)
2421 return 0;
2422 case NOT:
2423 case NEG:
2424 case MEM:
2425 return check_return_regs (XEXP (x, 0));
2426
2427 default:
2428 return 0;
2429 }
2430
2431 }
2432
2433 /* Return 1 if TRIAL references only in and global registers. */
2434 int
2435 eligible_for_return_delay (trial)
2436 rtx trial;
2437 {
2438 if (GET_CODE (PATTERN (trial)) != SET)
2439 return 0;
2440
2441 return check_return_regs (PATTERN (trial));
2442 }
2443
2444 int
2445 short_branch (uid1, uid2)
2446 int uid1, uid2;
2447 {
2448 unsigned int delta = insn_addresses[uid1] - insn_addresses[uid2];
2449 if (delta + 1024 < 2048)
2450 return 1;
2451 /* warning ("long branch, distance %d", delta); */
2452 return 0;
2453 }
2454
2455 /* Return non-zero if REG is not used after INSN.
2456 We assume REG is a reload reg, and therefore does
2457 not live past labels or calls or jumps. */
2458 int
2459 reg_unused_after (reg, insn)
2460 rtx reg;
2461 rtx insn;
2462 {
2463 enum rtx_code code, prev_code = UNKNOWN;
2464
2465 while ((insn = NEXT_INSN (insn)))
2466 {
2467 if (prev_code == CALL_INSN && call_used_regs[REGNO (reg)])
2468 return 1;
2469
2470 code = GET_CODE (insn);
2471 if (GET_CODE (insn) == CODE_LABEL)
2472 return 1;
2473
2474 if (GET_RTX_CLASS (code) == 'i')
2475 {
2476 rtx set = single_set (insn);
2477 int in_src = set && reg_overlap_mentioned_p (reg, SET_SRC (set));
2478 if (set && in_src)
2479 return 0;
2480 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
2481 return 1;
2482 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
2483 return 0;
2484 }
2485 prev_code = code;
2486 }
2487 return 1;
2488 }
2489 \f
2490 /* The table we use to reference PIC data. */
2491 static rtx global_offset_table;
2492
2493 /* The function we use to get at it. */
2494 static rtx get_pc_symbol;
2495 static char get_pc_symbol_name[256];
2496
2497 /* Ensure that we are not using patterns that are not OK with PIC. */
2498
2499 int
2500 check_pic (i)
2501 int i;
2502 {
2503 switch (flag_pic)
2504 {
2505 case 1:
2506 if (GET_CODE (recog_data.operand[i]) == SYMBOL_REF
2507 || (GET_CODE (recog_data.operand[i]) == CONST
2508 && ! (GET_CODE (XEXP (recog_data.operand[i], 0)) == MINUS
2509 && (XEXP (XEXP (recog_data.operand[i], 0), 0)
2510 == global_offset_table)
2511 && (GET_CODE (XEXP (XEXP (recog_data.operand[i], 0), 1))
2512 == CONST))))
2513 abort ();
2514 case 2:
2515 default:
2516 return 1;
2517 }
2518 }
2519
2520 /* Return true if X is an address which needs a temporary register when
2521 reloaded while generating PIC code. */
2522
2523 int
2524 pic_address_needs_scratch (x)
2525 rtx x;
2526 {
2527 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
2528 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
2529 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
2530 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
2531 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
2532 return 1;
2533
2534 return 0;
2535 }
2536
2537 /* Legitimize PIC addresses. If the address is already position-independent,
2538 we return ORIG. Newly generated position-independent addresses go into a
2539 reg. This is REG if non zero, otherwise we allocate register(s) as
2540 necessary. */
2541
2542 rtx
2543 legitimize_pic_address (orig, mode, reg)
2544 rtx orig;
2545 enum machine_mode mode ATTRIBUTE_UNUSED;
2546 rtx reg;
2547 {
2548 if (GET_CODE (orig) == SYMBOL_REF)
2549 {
2550 rtx pic_ref, address;
2551 rtx insn;
2552
2553 if (reg == 0)
2554 {
2555 if (reload_in_progress || reload_completed)
2556 abort ();
2557 else
2558 reg = gen_reg_rtx (Pmode);
2559 }
2560
2561 if (flag_pic == 2)
2562 {
2563 /* If not during reload, allocate another temp reg here for loading
2564 in the address, so that these instructions can be optimized
2565 properly. */
2566 rtx temp_reg = ((reload_in_progress || reload_completed)
2567 ? reg : gen_reg_rtx (Pmode));
2568
2569 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
2570 won't get confused into thinking that these two instructions
2571 are loading in the true address of the symbol. If in the
2572 future a PIC rtx exists, that should be used instead. */
2573 if (Pmode == SImode)
2574 {
2575 emit_insn (gen_movsi_high_pic (temp_reg, orig));
2576 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
2577 }
2578 else
2579 {
2580 emit_insn (gen_movdi_high_pic (temp_reg, orig));
2581 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
2582 }
2583 address = temp_reg;
2584 }
2585 else
2586 address = orig;
2587
2588 pic_ref = gen_rtx_MEM (Pmode,
2589 gen_rtx_PLUS (Pmode,
2590 pic_offset_table_rtx, address));
2591 current_function_uses_pic_offset_table = 1;
2592 RTX_UNCHANGING_P (pic_ref) = 1;
2593 insn = emit_move_insn (reg, pic_ref);
2594 /* Put a REG_EQUAL note on this insn, so that it can be optimized
2595 by loop. */
2596 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, orig,
2597 REG_NOTES (insn));
2598 return reg;
2599 }
2600 else if (GET_CODE (orig) == CONST)
2601 {
2602 rtx base, offset;
2603
2604 if (GET_CODE (XEXP (orig, 0)) == PLUS
2605 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
2606 return orig;
2607
2608 if (reg == 0)
2609 {
2610 if (reload_in_progress || reload_completed)
2611 abort ();
2612 else
2613 reg = gen_reg_rtx (Pmode);
2614 }
2615
2616 if (GET_CODE (XEXP (orig, 0)) == PLUS)
2617 {
2618 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
2619 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
2620 base == reg ? 0 : reg);
2621 }
2622 else
2623 abort ();
2624
2625 if (GET_CODE (offset) == CONST_INT)
2626 {
2627 if (SMALL_INT (offset))
2628 return plus_constant_for_output (base, INTVAL (offset));
2629 else if (! reload_in_progress && ! reload_completed)
2630 offset = force_reg (Pmode, offset);
2631 else
2632 /* If we reach here, then something is seriously wrong. */
2633 abort ();
2634 }
2635 return gen_rtx_PLUS (Pmode, base, offset);
2636 }
2637 else if (GET_CODE (orig) == LABEL_REF)
2638 /* ??? Why do we do this? */
2639 /* Now movsi_pic_label_ref uses it, but we ought to be checking that
2640 the register is live instead, in case it is eliminated. */
2641 current_function_uses_pic_offset_table = 1;
2642
2643 return orig;
2644 }
2645
2646 /* Return the RTX for insns to set the PIC register. */
2647
2648 static rtx
2649 pic_setup_code ()
2650 {
2651 rtx seq;
2652
2653 start_sequence ();
2654 emit_insn (gen_get_pc (pic_offset_table_rtx, global_offset_table,
2655 get_pc_symbol));
2656 seq = gen_sequence ();
2657 end_sequence ();
2658
2659 return seq;
2660 }
2661
2662 /* Emit special PIC prologues and epilogues. */
2663
2664 void
2665 finalize_pic ()
2666 {
2667 /* Labels to get the PC in the prologue of this function. */
2668 int orig_flag_pic = flag_pic;
2669 rtx insn;
2670
2671 if (current_function_uses_pic_offset_table == 0)
2672 return;
2673
2674 if (! flag_pic)
2675 abort ();
2676
2677 /* If we havn't emitted the special get_pc helper function, do so now. */
2678 if (get_pc_symbol_name[0] == 0)
2679 {
2680 int align;
2681
2682 ASM_GENERATE_INTERNAL_LABEL (get_pc_symbol_name, "LGETPC", 0);
2683 text_section ();
2684
2685 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
2686 if (align > 0)
2687 ASM_OUTPUT_ALIGN (asm_out_file, align);
2688 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "LGETPC", 0);
2689 fputs ("\tretl\n\tadd %o7,%l7,%l7\n", asm_out_file);
2690 }
2691
2692 /* Initialize every time through, since we can't easily
2693 know this to be permanent. */
2694 global_offset_table = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
2695 get_pc_symbol = gen_rtx_SYMBOL_REF (Pmode, get_pc_symbol_name);
2696 flag_pic = 0;
2697
2698 emit_insn_after (pic_setup_code (), get_insns ());
2699
2700 /* Insert the code in each nonlocal goto receiver.
2701 If you make changes here or to the nonlocal_goto_receiver
2702 pattern, make sure the unspec_volatile numbers still
2703 match. */
2704 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2705 if (GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
2706 && XINT (PATTERN (insn), 1) == 5)
2707 emit_insn_after (pic_setup_code (), insn);
2708
2709 flag_pic = orig_flag_pic;
2710
2711 /* Need to emit this whether or not we obey regdecls,
2712 since setjmp/longjmp can cause life info to screw up.
2713 ??? In the case where we don't obey regdecls, this is not sufficient
2714 since we may not fall out the bottom. */
2715 emit_insn (gen_rtx_USE (VOIDmode, pic_offset_table_rtx));
2716 }
2717 \f
2718 /* Return 1 if RTX is a MEM which is known to be aligned to at
2719 least an 8 byte boundary. */
2720
2721 int
2722 mem_min_alignment (mem, desired)
2723 rtx mem;
2724 int desired;
2725 {
2726 rtx addr, base, offset;
2727
2728 /* If it's not a MEM we can't accept it. */
2729 if (GET_CODE (mem) != MEM)
2730 return 0;
2731
2732 addr = XEXP (mem, 0);
2733 base = offset = NULL_RTX;
2734 if (GET_CODE (addr) == PLUS)
2735 {
2736 if (GET_CODE (XEXP (addr, 0)) == REG)
2737 {
2738 base = XEXP (addr, 0);
2739
2740 /* What we are saying here is that if the base
2741 REG is aligned properly, the compiler will make
2742 sure any REG based index upon it will be so
2743 as well. */
2744 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
2745 offset = XEXP (addr, 1);
2746 else
2747 offset = const0_rtx;
2748 }
2749 }
2750 else if (GET_CODE (addr) == REG)
2751 {
2752 base = addr;
2753 offset = const0_rtx;
2754 }
2755
2756 if (base != NULL_RTX)
2757 {
2758 int regno = REGNO (base);
2759
2760 if (regno != FRAME_POINTER_REGNUM
2761 && regno != STACK_POINTER_REGNUM)
2762 {
2763 /* Check if the compiler has recorded some information
2764 about the alignment of the base REG. If reload has
2765 completed, we already matched with proper alignments. */
2766 if (((current_function != 0
2767 && REGNO_POINTER_ALIGN (regno) >= desired)
2768 || reload_completed)
2769 && ((INTVAL (offset) & (desired - 1)) == 0))
2770 return 1;
2771 }
2772 else
2773 {
2774 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
2775 return 1;
2776 }
2777 }
2778 else if (! TARGET_UNALIGNED_DOUBLES
2779 || CONSTANT_P (addr)
2780 || GET_CODE (addr) == LO_SUM)
2781 {
2782 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
2783 is true, in which case we can only assume that an access is aligned if
2784 it is to a constant address, or the address involves a LO_SUM. */
2785 return 1;
2786 }
2787
2788 /* An obviously unaligned address. */
2789 return 0;
2790 }
2791
2792 \f
2793 /* Vectors to keep interesting information about registers where it can easily
2794 be got. We use to use the actual mode value as the bit number, but there
2795 are more than 32 modes now. Instead we use two tables: one indexed by
2796 hard register number, and one indexed by mode. */
2797
2798 /* The purpose of sparc_mode_class is to shrink the range of modes so that
2799 they all fit (as bit numbers) in a 32 bit word (again). Each real mode is
2800 mapped into one sparc_mode_class mode. */
2801
2802 enum sparc_mode_class {
2803 S_MODE, D_MODE, T_MODE, O_MODE,
2804 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
2805 CC_MODE, CCFP_MODE
2806 };
2807
2808 /* Modes for single-word and smaller quantities. */
2809 #define S_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
2810
2811 /* Modes for double-word and smaller quantities. */
2812 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
2813
2814 /* Modes for quad-word and smaller quantities. */
2815 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
2816
2817 /* Modes for 8-word and smaller quantities. */
2818 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
2819
2820 /* Modes for single-float quantities. We must allow any single word or
2821 smaller quantity. This is because the fix/float conversion instructions
2822 take integer inputs/outputs from the float registers. */
2823 #define SF_MODES (S_MODES)
2824
2825 /* Modes for double-float and smaller quantities. */
2826 #define DF_MODES (S_MODES | D_MODES)
2827
2828 /* Modes for double-float only quantities. */
2829 #define DF_MODES_NO_S (D_MODES)
2830
2831 /* Modes for quad-float only quantities. */
2832 #define TF_ONLY_MODES (1 << (int) TF_MODE)
2833
2834 /* Modes for quad-float and smaller quantities. */
2835 #define TF_MODES (DF_MODES | TF_ONLY_MODES)
2836
2837 /* Modes for quad-float and double-float quantities. */
2838 #define TF_MODES_NO_S (DF_MODES_NO_S | TF_ONLY_MODES)
2839
2840 /* Modes for quad-float pair only quantities. */
2841 #define OF_ONLY_MODES (1 << (int) OF_MODE)
2842
2843 /* Modes for quad-float pairs and smaller quantities. */
2844 #define OF_MODES (TF_MODES | OF_ONLY_MODES)
2845
2846 #define OF_MODES_NO_S (TF_MODES_NO_S | OF_ONLY_MODES)
2847
2848 /* Modes for condition codes. */
2849 #define CC_MODES (1 << (int) CC_MODE)
2850 #define CCFP_MODES (1 << (int) CCFP_MODE)
2851
2852 /* Value is 1 if register/mode pair is acceptable on sparc.
2853 The funny mixture of D and T modes is because integer operations
2854 do not specially operate on tetra quantities, so non-quad-aligned
2855 registers can hold quadword quantities (except %o4 and %i4 because
2856 they cross fixed registers). */
2857
2858 /* This points to either the 32 bit or the 64 bit version. */
2859 int *hard_regno_mode_classes;
2860
2861 static int hard_32bit_mode_classes[] = {
2862 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
2863 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
2864 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
2865 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
2866
2867 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
2868 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
2869 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
2870 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
2871
2872 /* FP regs f32 to f63. Only the even numbered registers actually exist,
2873 and none can hold SFmode/SImode values. */
2874 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
2875 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
2876 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
2877 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
2878
2879 /* %fcc[0123] */
2880 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
2881
2882 /* %icc */
2883 CC_MODES
2884 };
2885
2886 static int hard_64bit_mode_classes[] = {
2887 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
2888 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
2889 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
2890 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
2891
2892 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
2893 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
2894 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
2895 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
2896
2897 /* FP regs f32 to f63. Only the even numbered registers actually exist,
2898 and none can hold SFmode/SImode values. */
2899 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
2900 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
2901 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
2902 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
2903
2904 /* %fcc[0123] */
2905 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
2906
2907 /* %icc */
2908 CC_MODES
2909 };
2910
2911 int sparc_mode_class [NUM_MACHINE_MODES];
2912
2913 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
2914
2915 static void
2916 sparc_init_modes ()
2917 {
2918 int i;
2919
2920 for (i = 0; i < NUM_MACHINE_MODES; i++)
2921 {
2922 switch (GET_MODE_CLASS (i))
2923 {
2924 case MODE_INT:
2925 case MODE_PARTIAL_INT:
2926 case MODE_COMPLEX_INT:
2927 if (GET_MODE_SIZE (i) <= 4)
2928 sparc_mode_class[i] = 1 << (int) S_MODE;
2929 else if (GET_MODE_SIZE (i) == 8)
2930 sparc_mode_class[i] = 1 << (int) D_MODE;
2931 else if (GET_MODE_SIZE (i) == 16)
2932 sparc_mode_class[i] = 1 << (int) T_MODE;
2933 else if (GET_MODE_SIZE (i) == 32)
2934 sparc_mode_class[i] = 1 << (int) O_MODE;
2935 else
2936 sparc_mode_class[i] = 0;
2937 break;
2938 case MODE_FLOAT:
2939 case MODE_COMPLEX_FLOAT:
2940 if (GET_MODE_SIZE (i) <= 4)
2941 sparc_mode_class[i] = 1 << (int) SF_MODE;
2942 else if (GET_MODE_SIZE (i) == 8)
2943 sparc_mode_class[i] = 1 << (int) DF_MODE;
2944 else if (GET_MODE_SIZE (i) == 16)
2945 sparc_mode_class[i] = 1 << (int) TF_MODE;
2946 else if (GET_MODE_SIZE (i) == 32)
2947 sparc_mode_class[i] = 1 << (int) OF_MODE;
2948 else
2949 sparc_mode_class[i] = 0;
2950 break;
2951 case MODE_CC:
2952 default:
2953 /* mode_class hasn't been initialized yet for EXTRA_CC_MODES, so
2954 we must explicitly check for them here. */
2955 if (i == (int) CCFPmode || i == (int) CCFPEmode)
2956 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
2957 else if (i == (int) CCmode || i == (int) CC_NOOVmode
2958 || i == (int) CCXmode || i == (int) CCX_NOOVmode)
2959 sparc_mode_class[i] = 1 << (int) CC_MODE;
2960 else
2961 sparc_mode_class[i] = 0;
2962 break;
2963 }
2964 }
2965
2966 if (TARGET_ARCH64)
2967 hard_regno_mode_classes = hard_64bit_mode_classes;
2968 else
2969 hard_regno_mode_classes = hard_32bit_mode_classes;
2970
2971 /* Initialize the array used by REGNO_REG_CLASS. */
2972 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
2973 {
2974 if (i < 16 && TARGET_V8PLUS)
2975 sparc_regno_reg_class[i] = I64_REGS;
2976 else if (i < 32)
2977 sparc_regno_reg_class[i] = GENERAL_REGS;
2978 else if (i < 64)
2979 sparc_regno_reg_class[i] = FP_REGS;
2980 else if (i < 96)
2981 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
2982 else if (i < 100)
2983 sparc_regno_reg_class[i] = FPCC_REGS;
2984 else
2985 sparc_regno_reg_class[i] = NO_REGS;
2986 }
2987 }
2988 \f
2989 /* Save non call used registers from LOW to HIGH at BASE+OFFSET.
2990 N_REGS is the number of 4-byte regs saved thus far. This applies even to
2991 v9 int regs as it simplifies the code. */
2992
2993 static int
2994 save_regs (file, low, high, base, offset, n_regs, real_offset)
2995 FILE *file;
2996 int low, high;
2997 const char *base;
2998 int offset;
2999 int n_regs;
3000 int real_offset;
3001 {
3002 int i;
3003
3004 if (TARGET_ARCH64 && high <= 32)
3005 {
3006 for (i = low; i < high; i++)
3007 {
3008 if (regs_ever_live[i] && ! call_used_regs[i])
3009 {
3010 fprintf (file, "\tstx\t%s, [%s+%d]\n",
3011 reg_names[i], base, offset + 4 * n_regs);
3012 if (dwarf2out_do_frame ())
3013 dwarf2out_reg_save ("", i, real_offset + 4 * n_regs);
3014 n_regs += 2;
3015 }
3016 }
3017 }
3018 else
3019 {
3020 for (i = low; i < high; i += 2)
3021 {
3022 if (regs_ever_live[i] && ! call_used_regs[i])
3023 {
3024 if (regs_ever_live[i+1] && ! call_used_regs[i+1])
3025 {
3026 fprintf (file, "\tstd\t%s, [%s+%d]\n",
3027 reg_names[i], base, offset + 4 * n_regs);
3028 if (dwarf2out_do_frame ())
3029 {
3030 char *l = dwarf2out_cfi_label ();
3031 dwarf2out_reg_save (l, i, real_offset + 4 * n_regs);
3032 dwarf2out_reg_save (l, i+1, real_offset + 4 * n_regs + 4);
3033 }
3034 n_regs += 2;
3035 }
3036 else
3037 {
3038 fprintf (file, "\tst\t%s, [%s+%d]\n",
3039 reg_names[i], base, offset + 4 * n_regs);
3040 if (dwarf2out_do_frame ())
3041 dwarf2out_reg_save ("", i, real_offset + 4 * n_regs);
3042 n_regs += 2;
3043 }
3044 }
3045 else
3046 {
3047 if (regs_ever_live[i+1] && ! call_used_regs[i+1])
3048 {
3049 fprintf (file, "\tst\t%s, [%s+%d]\n",
3050 reg_names[i+1], base, offset + 4 * n_regs + 4);
3051 if (dwarf2out_do_frame ())
3052 dwarf2out_reg_save ("", i + 1, real_offset + 4 * n_regs + 4);
3053 n_regs += 2;
3054 }
3055 }
3056 }
3057 }
3058 return n_regs;
3059 }
3060
3061 /* Restore non call used registers from LOW to HIGH at BASE+OFFSET.
3062
3063 N_REGS is the number of 4-byte regs saved thus far. This applies even to
3064 v9 int regs as it simplifies the code. */
3065
3066 static int
3067 restore_regs (file, low, high, base, offset, n_regs)
3068 FILE *file;
3069 int low, high;
3070 const char *base;
3071 int offset;
3072 int n_regs;
3073 {
3074 int i;
3075
3076 if (TARGET_ARCH64 && high <= 32)
3077 {
3078 for (i = low; i < high; i++)
3079 {
3080 if (regs_ever_live[i] && ! call_used_regs[i])
3081 fprintf (file, "\tldx\t[%s+%d], %s\n",
3082 base, offset + 4 * n_regs, reg_names[i]),
3083 n_regs += 2;
3084 }
3085 }
3086 else
3087 {
3088 for (i = low; i < high; i += 2)
3089 {
3090 if (regs_ever_live[i] && ! call_used_regs[i])
3091 if (regs_ever_live[i+1] && ! call_used_regs[i+1])
3092 fprintf (file, "\tldd\t[%s+%d], %s\n",
3093 base, offset + 4 * n_regs, reg_names[i]),
3094 n_regs += 2;
3095 else
3096 fprintf (file, "\tld\t[%s+%d],%s\n",
3097 base, offset + 4 * n_regs, reg_names[i]),
3098 n_regs += 2;
3099 else if (regs_ever_live[i+1] && ! call_used_regs[i+1])
3100 fprintf (file, "\tld\t[%s+%d],%s\n",
3101 base, offset + 4 * n_regs + 4, reg_names[i+1]),
3102 n_regs += 2;
3103 }
3104 }
3105 return n_regs;
3106 }
3107
3108 /* Compute the frame size required by the function. This function is called
3109 during the reload pass and also by output_function_prologue(). */
3110
3111 int
3112 compute_frame_size (size, leaf_function)
3113 int size;
3114 int leaf_function;
3115 {
3116 int n_regs = 0, i;
3117 int outgoing_args_size = (current_function_outgoing_args_size
3118 + REG_PARM_STACK_SPACE (current_function_decl));
3119
3120 if (TARGET_EPILOGUE)
3121 {
3122 /* N_REGS is the number of 4-byte regs saved thus far. This applies
3123 even to v9 int regs to be consistent with save_regs/restore_regs. */
3124
3125 if (TARGET_ARCH64)
3126 {
3127 for (i = 0; i < 8; i++)
3128 if (regs_ever_live[i] && ! call_used_regs[i])
3129 n_regs += 2;
3130 }
3131 else
3132 {
3133 for (i = 0; i < 8; i += 2)
3134 if ((regs_ever_live[i] && ! call_used_regs[i])
3135 || (regs_ever_live[i+1] && ! call_used_regs[i+1]))
3136 n_regs += 2;
3137 }
3138
3139 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
3140 if ((regs_ever_live[i] && ! call_used_regs[i])
3141 || (regs_ever_live[i+1] && ! call_used_regs[i+1]))
3142 n_regs += 2;
3143 }
3144
3145 /* Set up values for use in `function_epilogue'. */
3146 num_gfregs = n_regs;
3147
3148 if (leaf_function && n_regs == 0
3149 && size == 0 && current_function_outgoing_args_size == 0)
3150 {
3151 actual_fsize = apparent_fsize = 0;
3152 }
3153 else
3154 {
3155 /* We subtract STARTING_FRAME_OFFSET, remember it's negative.
3156 The stack bias (if any) is taken out to undo its effects. */
3157 apparent_fsize = (size - STARTING_FRAME_OFFSET + SPARC_STACK_BIAS + 7) & -8;
3158 apparent_fsize += n_regs * 4;
3159 actual_fsize = apparent_fsize + ((outgoing_args_size + 7) & -8);
3160 }
3161
3162 /* Make sure nothing can clobber our register windows.
3163 If a SAVE must be done, or there is a stack-local variable,
3164 the register window area must be allocated.
3165 ??? For v8 we apparently need an additional 8 bytes of reserved space. */
3166 if (leaf_function == 0 || size > 0)
3167 actual_fsize += (16 * UNITS_PER_WORD) + (TARGET_ARCH64 ? 0 : 8);
3168
3169 return SPARC_STACK_ALIGN (actual_fsize);
3170 }
3171
3172 /* Build a (32 bit) big number in a register. */
3173 /* ??? We may be able to use the set macro here too. */
3174
3175 static void
3176 build_big_number (file, num, reg)
3177 FILE *file;
3178 int num;
3179 const char *reg;
3180 {
3181 if (num >= 0 || ! TARGET_ARCH64)
3182 {
3183 fprintf (file, "\tsethi\t%%hi(%d), %s\n", num, reg);
3184 if ((num & 0x3ff) != 0)
3185 fprintf (file, "\tor\t%s, %%lo(%d), %s\n", reg, num, reg);
3186 }
3187 else /* num < 0 && TARGET_ARCH64 */
3188 {
3189 /* Sethi does not sign extend, so we must use a little trickery
3190 to use it for negative numbers. Invert the constant before
3191 loading it in, then use xor immediate to invert the loaded bits
3192 (along with the upper 32 bits) to the desired constant. This
3193 works because the sethi and immediate fields overlap. */
3194 int asize = num;
3195 int inv = ~asize;
3196 int low = -0x400 + (asize & 0x3FF);
3197
3198 fprintf (file, "\tsethi\t%%hi(%d), %s\n\txor\t%s, %d, %s\n",
3199 inv, reg, reg, low, reg);
3200 }
3201 }
3202
3203 /* Output any necessary .register pseudo-ops. */
3204 void
3205 sparc_output_scratch_registers (file)
3206 FILE *file;
3207 {
3208 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
3209 int i;
3210
3211 if (TARGET_ARCH32)
3212 return;
3213
3214 /* Check if %g[2367] were used without
3215 .register being printed for them already. */
3216 for (i = 2; i < 8; i++)
3217 {
3218 if (regs_ever_live [i]
3219 && ! sparc_hard_reg_printed [i])
3220 {
3221 sparc_hard_reg_printed [i] = 1;
3222 fprintf (file, "\t.register\t%%g%d, #scratch\n", i);
3223 }
3224 if (i == 3) i = 5;
3225 }
3226 #endif
3227 }
3228
3229 /* Output code for the function prologue. */
3230
3231 void
3232 output_function_prologue (file, size, leaf_function)
3233 FILE *file;
3234 int size;
3235 int leaf_function;
3236 {
3237 sparc_output_scratch_registers (file);
3238
3239 /* Need to use actual_fsize, since we are also allocating
3240 space for our callee (and our own register save area). */
3241 actual_fsize = compute_frame_size (size, leaf_function);
3242
3243 if (leaf_function)
3244 {
3245 frame_base_name = "%sp";
3246 frame_base_offset = actual_fsize + SPARC_STACK_BIAS;
3247 }
3248 else
3249 {
3250 frame_base_name = "%fp";
3251 frame_base_offset = SPARC_STACK_BIAS;
3252 }
3253
3254 /* This is only for the human reader. */
3255 fprintf (file, "\t%s#PROLOGUE# 0\n", ASM_COMMENT_START);
3256
3257 if (actual_fsize == 0)
3258 /* do nothing. */ ;
3259 else if (! leaf_function && ! TARGET_BROKEN_SAVERESTORE)
3260 {
3261 if (actual_fsize <= 4096)
3262 fprintf (file, "\tsave\t%%sp, -%d, %%sp\n", actual_fsize);
3263 else if (actual_fsize <= 8192)
3264 {
3265 fprintf (file, "\tsave\t%%sp, -4096, %%sp\n");
3266 fprintf (file, "\tadd\t%%sp, -%d, %%sp\n", actual_fsize - 4096);
3267 }
3268 else
3269 {
3270 build_big_number (file, -actual_fsize, "%g1");
3271 fprintf (file, "\tsave\t%%sp, %%g1, %%sp\n");
3272 }
3273 }
3274 else if (! leaf_function && TARGET_BROKEN_SAVERESTORE)
3275 {
3276 /* We assume the environment will properly handle or otherwise avoid
3277 trouble associated with an interrupt occurring after the `save' or
3278 trap occurring during it. */
3279 fprintf (file, "\tsave\n");
3280
3281 if (actual_fsize <= 4096)
3282 fprintf (file, "\tadd\t%%fp, -%d, %%sp\n", actual_fsize);
3283 else if (actual_fsize <= 8192)
3284 {
3285 fprintf (file, "\tadd\t%%fp, -4096, %%sp\n");
3286 fprintf (file, "\tadd\t%%fp, -%d, %%sp\n", actual_fsize - 4096);
3287 }
3288 else
3289 {
3290 build_big_number (file, -actual_fsize, "%g1");
3291 fprintf (file, "\tadd\t%%fp, %%g1, %%sp\n");
3292 }
3293 }
3294 else /* leaf function */
3295 {
3296 if (actual_fsize <= 4096)
3297 fprintf (file, "\tadd\t%%sp, -%d, %%sp\n", actual_fsize);
3298 else if (actual_fsize <= 8192)
3299 {
3300 fprintf (file, "\tadd\t%%sp, -4096, %%sp\n");
3301 fprintf (file, "\tadd\t%%sp, -%d, %%sp\n", actual_fsize - 4096);
3302 }
3303 else
3304 {
3305 build_big_number (file, -actual_fsize, "%g1");
3306 fprintf (file, "\tadd\t%%sp, %%g1, %%sp\n");
3307 }
3308 }
3309
3310 if (dwarf2out_do_frame () && actual_fsize)
3311 {
3312 char *label = dwarf2out_cfi_label ();
3313
3314 /* The canonical frame address refers to the top of the frame. */
3315 dwarf2out_def_cfa (label, (leaf_function ? STACK_POINTER_REGNUM
3316 : FRAME_POINTER_REGNUM),
3317 frame_base_offset);
3318
3319 if (! leaf_function)
3320 {
3321 /* Note the register window save. This tells the unwinder that
3322 it needs to restore the window registers from the previous
3323 frame's window save area at 0(cfa). */
3324 dwarf2out_window_save (label);
3325
3326 /* The return address (-8) is now in %i7. */
3327 dwarf2out_return_reg (label, 31);
3328 }
3329 }
3330
3331 /* If doing anything with PIC, do it now. */
3332 if (! flag_pic)
3333 fprintf (file, "\t%s#PROLOGUE# 1\n", ASM_COMMENT_START);
3334
3335 /* Call saved registers are saved just above the outgoing argument area. */
3336 if (num_gfregs)
3337 {
3338 int offset, real_offset, n_regs;
3339 const char *base;
3340
3341 real_offset = -apparent_fsize;
3342 offset = -apparent_fsize + frame_base_offset;
3343 if (offset < -4096 || offset + num_gfregs * 4 > 4096)
3344 {
3345 /* ??? This might be optimized a little as %g1 might already have a
3346 value close enough that a single add insn will do. */
3347 /* ??? Although, all of this is probably only a temporary fix
3348 because if %g1 can hold a function result, then
3349 output_function_epilogue will lose (the result will get
3350 clobbered). */
3351 build_big_number (file, offset, "%g1");
3352 fprintf (file, "\tadd\t%s, %%g1, %%g1\n", frame_base_name);
3353 base = "%g1";
3354 offset = 0;
3355 }
3356 else
3357 {
3358 base = frame_base_name;
3359 }
3360
3361 n_regs = 0;
3362 if (TARGET_EPILOGUE && ! leaf_function)
3363 /* ??? Originally saved regs 0-15 here. */
3364 n_regs = save_regs (file, 0, 8, base, offset, 0, real_offset);
3365 else if (leaf_function)
3366 /* ??? Originally saved regs 0-31 here. */
3367 n_regs = save_regs (file, 0, 8, base, offset, 0, real_offset);
3368 if (TARGET_EPILOGUE)
3369 save_regs (file, 32, TARGET_V9 ? 96 : 64, base, offset, n_regs,
3370 real_offset);
3371 }
3372
3373 leaf_label = 0;
3374 if (leaf_function && actual_fsize != 0)
3375 {
3376 /* warning ("leaf procedure with frame size %d", actual_fsize); */
3377 if (! TARGET_EPILOGUE)
3378 leaf_label = gen_label_rtx ();
3379 }
3380 }
3381
3382 /* Output code for the function epilogue. */
3383
3384 void
3385 output_function_epilogue (file, size, leaf_function)
3386 FILE *file;
3387 int size ATTRIBUTE_UNUSED;
3388 int leaf_function;
3389 {
3390 const char *ret;
3391
3392 if (leaf_label)
3393 {
3394 emit_label_after (leaf_label, get_last_insn ());
3395 final_scan_insn (get_last_insn (), file, 0, 0, 1);
3396 }
3397
3398 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
3399 else if (profile_block_flag == 2)
3400 {
3401 FUNCTION_BLOCK_PROFILER_EXIT(file);
3402 }
3403 #endif
3404
3405 else if (current_function_epilogue_delay_list == 0)
3406 {
3407 /* If code does not drop into the epilogue, we need
3408 do nothing except output pending case vectors. */
3409 rtx insn = get_last_insn ();
3410 if (GET_CODE (insn) == NOTE)
3411 insn = prev_nonnote_insn (insn);
3412 if (insn && GET_CODE (insn) == BARRIER)
3413 goto output_vectors;
3414 }
3415
3416 /* Restore any call saved registers. */
3417 if (num_gfregs)
3418 {
3419 int offset, n_regs;
3420 const char *base;
3421
3422 offset = -apparent_fsize + frame_base_offset;
3423 if (offset < -4096 || offset + num_gfregs * 4 > 4096 - 8 /*double*/)
3424 {
3425 build_big_number (file, offset, "%g1");
3426 fprintf (file, "\tadd\t%s, %%g1, %%g1\n", frame_base_name);
3427 base = "%g1";
3428 offset = 0;
3429 }
3430 else
3431 {
3432 base = frame_base_name;
3433 }
3434
3435 n_regs = 0;
3436 if (TARGET_EPILOGUE && ! leaf_function)
3437 /* ??? Originally saved regs 0-15 here. */
3438 n_regs = restore_regs (file, 0, 8, base, offset, 0);
3439 else if (leaf_function)
3440 /* ??? Originally saved regs 0-31 here. */
3441 n_regs = restore_regs (file, 0, 8, base, offset, 0);
3442 if (TARGET_EPILOGUE)
3443 restore_regs (file, 32, TARGET_V9 ? 96 : 64, base, offset, n_regs);
3444 }
3445
3446 /* Work out how to skip the caller's unimp instruction if required. */
3447 if (leaf_function)
3448 ret = (SKIP_CALLERS_UNIMP_P ? "jmp\t%o7+12" : "retl");
3449 else
3450 ret = (SKIP_CALLERS_UNIMP_P ? "jmp\t%i7+12" : "ret");
3451
3452 if (TARGET_EPILOGUE || leaf_label)
3453 {
3454 int old_target_epilogue = TARGET_EPILOGUE;
3455 target_flags &= ~old_target_epilogue;
3456
3457 if (! leaf_function)
3458 {
3459 /* If we wound up with things in our delay slot, flush them here. */
3460 if (current_function_epilogue_delay_list)
3461 {
3462 rtx delay = PATTERN (XEXP (current_function_epilogue_delay_list, 0));
3463
3464 if (TARGET_V9 && ! epilogue_renumber (&delay, 1))
3465 {
3466 epilogue_renumber (&delay, 0);
3467 fputs (SKIP_CALLERS_UNIMP_P
3468 ? "\treturn\t%i7+12\n"
3469 : "\treturn\t%i7+8\n", file);
3470 final_scan_insn (XEXP (current_function_epilogue_delay_list, 0), file, 1, 0, 0);
3471 }
3472 else
3473 {
3474 rtx insn = emit_jump_insn_after (gen_rtx_RETURN (VOIDmode),
3475 get_last_insn ());
3476 rtx src;
3477
3478 if (GET_CODE (delay) != SET)
3479 abort();
3480
3481 src = SET_SRC (delay);
3482 if (GET_CODE (src) == ASHIFT)
3483 {
3484 if (XEXP (src, 1) != const1_rtx)
3485 abort();
3486 SET_SRC (delay) = gen_rtx_PLUS (GET_MODE (src), XEXP (src, 0),
3487 XEXP (src, 0));
3488 }
3489
3490 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode,
3491 gen_rtvec (2, delay, PATTERN (insn)));
3492 final_scan_insn (insn, file, 1, 0, 1);
3493 }
3494 }
3495 else if (TARGET_V9 && ! SKIP_CALLERS_UNIMP_P)
3496 fputs ("\treturn\t%i7+8\n\tnop\n", file);
3497 else
3498 fprintf (file, "\t%s\n\trestore\n", ret);
3499 }
3500 /* All of the following cases are for leaf functions. */
3501 else if (current_function_epilogue_delay_list)
3502 {
3503 /* eligible_for_epilogue_delay_slot ensures that if this is a
3504 leaf function, then we will only have insn in the delay slot
3505 if the frame size is zero, thus no adjust for the stack is
3506 needed here. */
3507 if (actual_fsize != 0)
3508 abort ();
3509 fprintf (file, "\t%s\n", ret);
3510 final_scan_insn (XEXP (current_function_epilogue_delay_list, 0),
3511 file, 1, 0, 1);
3512 }
3513 /* Output 'nop' instead of 'sub %sp,-0,%sp' when no frame, so as to
3514 avoid generating confusing assembly language output. */
3515 else if (actual_fsize == 0)
3516 fprintf (file, "\t%s\n\tnop\n", ret);
3517 else if (actual_fsize <= 4096)
3518 fprintf (file, "\t%s\n\tsub\t%%sp, -%d, %%sp\n", ret, actual_fsize);
3519 else if (actual_fsize <= 8192)
3520 fprintf (file, "\tsub\t%%sp, -4096, %%sp\n\t%s\n\tsub\t%%sp, -%d, %%sp\n",
3521 ret, actual_fsize - 4096);
3522 else if ((actual_fsize & 0x3ff) == 0)
3523 fprintf (file, "\tsethi\t%%hi(%d), %%g1\n\t%s\n\tadd\t%%sp, %%g1, %%sp\n",
3524 actual_fsize, ret);
3525 else
3526 fprintf (file, "\tsethi\t%%hi(%d), %%g1\n\tor\t%%g1, %%lo(%d), %%g1\n\t%s\n\tadd\t%%sp, %%g1, %%sp\n",
3527 actual_fsize, actual_fsize, ret);
3528 target_flags |= old_target_epilogue;
3529 }
3530
3531 output_vectors:
3532 sparc_output_deferred_case_vectors ();
3533 }
3534 \f
3535 /* Functions for handling argument passing.
3536
3537 For v8 the first six args are normally in registers and the rest are
3538 pushed. Any arg that starts within the first 6 words is at least
3539 partially passed in a register unless its data type forbids.
3540
3541 For v9, the argument registers are laid out as an array of 16 elements
3542 and arguments are added sequentially. The first 6 int args and up to the
3543 first 16 fp args (depending on size) are passed in regs.
3544
3545 Slot Stack Integral Float Float in structure Double Long Double
3546 ---- ----- -------- ----- ------------------ ------ -----------
3547 15 [SP+248] %f31 %f30,%f31 %d30
3548 14 [SP+240] %f29 %f28,%f29 %d28 %q28
3549 13 [SP+232] %f27 %f26,%f27 %d26
3550 12 [SP+224] %f25 %f24,%f25 %d24 %q24
3551 11 [SP+216] %f23 %f22,%f23 %d22
3552 10 [SP+208] %f21 %f20,%f21 %d20 %q20
3553 9 [SP+200] %f19 %f18,%f19 %d18
3554 8 [SP+192] %f17 %f16,%f17 %d16 %q16
3555 7 [SP+184] %f15 %f14,%f15 %d14
3556 6 [SP+176] %f13 %f12,%f13 %d12 %q12
3557 5 [SP+168] %o5 %f11 %f10,%f11 %d10
3558 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
3559 3 [SP+152] %o3 %f7 %f6,%f7 %d6
3560 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
3561 1 [SP+136] %o1 %f3 %f2,%f3 %d2
3562 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
3563
3564 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
3565
3566 Integral arguments are always passed as 64 bit quantities appropriately
3567 extended.
3568
3569 Passing of floating point values is handled as follows.
3570 If a prototype is in scope:
3571 If the value is in a named argument (i.e. not a stdarg function or a
3572 value not part of the `...') then the value is passed in the appropriate
3573 fp reg.
3574 If the value is part of the `...' and is passed in one of the first 6
3575 slots then the value is passed in the appropriate int reg.
3576 If the value is part of the `...' and is not passed in one of the first 6
3577 slots then the value is passed in memory.
3578 If a prototype is not in scope:
3579 If the value is one of the first 6 arguments the value is passed in the
3580 appropriate integer reg and the appropriate fp reg.
3581 If the value is not one of the first 6 arguments the value is passed in
3582 the appropriate fp reg and in memory.
3583 */
3584
3585 /* Maximum number of int regs for args. */
3586 #define SPARC_INT_ARG_MAX 6
3587 /* Maximum number of fp regs for args. */
3588 #define SPARC_FP_ARG_MAX 16
3589
3590 #define ROUND_ADVANCE(SIZE) (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
3591
3592 /* Handle the INIT_CUMULATIVE_ARGS macro.
3593 Initialize a variable CUM of type CUMULATIVE_ARGS
3594 for a call to a function whose data type is FNTYPE.
3595 For a library call, FNTYPE is 0. */
3596
3597 void
3598 init_cumulative_args (cum, fntype, libname, indirect)
3599 CUMULATIVE_ARGS *cum;
3600 tree fntype;
3601 rtx libname ATTRIBUTE_UNUSED;
3602 int indirect ATTRIBUTE_UNUSED;
3603 {
3604 cum->words = 0;
3605 cum->prototype_p = fntype && TYPE_ARG_TYPES (fntype);
3606 cum->libcall_p = fntype == 0;
3607 }
3608
3609 /* Compute the slot number to pass an argument in.
3610 Returns the slot number or -1 if passing on the stack.
3611
3612 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3613 the preceding args and about the function being called.
3614 MODE is the argument's machine mode.
3615 TYPE is the data type of the argument (as a tree).
3616 This is null for libcalls where that information may
3617 not be available.
3618 NAMED is nonzero if this argument is a named parameter
3619 (otherwise it is an extra parameter matching an ellipsis).
3620 INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
3621 *PREGNO records the register number to use if scalar type.
3622 *PPADDING records the amount of padding needed in words. */
3623
3624 static int
3625 function_arg_slotno (cum, mode, type, named, incoming_p, pregno, ppadding)
3626 const CUMULATIVE_ARGS *cum;
3627 enum machine_mode mode;
3628 tree type;
3629 int named;
3630 int incoming_p;
3631 int *pregno;
3632 int *ppadding;
3633 {
3634 int regbase = (incoming_p
3635 ? SPARC_INCOMING_INT_ARG_FIRST
3636 : SPARC_OUTGOING_INT_ARG_FIRST);
3637 int slotno = cum->words;
3638 int regno;
3639
3640 *ppadding = 0;
3641
3642 if (type != 0 && TREE_ADDRESSABLE (type))
3643 return -1;
3644 if (TARGET_ARCH32
3645 && type != 0 && mode == BLKmode
3646 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
3647 return -1;
3648
3649 switch (mode)
3650 {
3651 case VOIDmode :
3652 /* MODE is VOIDmode when generating the actual call.
3653 See emit_call_1. */
3654 return -1;
3655
3656 case QImode : case CQImode :
3657 case HImode : case CHImode :
3658 case SImode : case CSImode :
3659 case DImode : case CDImode :
3660 if (slotno >= SPARC_INT_ARG_MAX)
3661 return -1;
3662 regno = regbase + slotno;
3663 break;
3664
3665 case SFmode : case SCmode :
3666 case DFmode : case DCmode :
3667 case TFmode : case TCmode :
3668 if (TARGET_ARCH32)
3669 {
3670 if (slotno >= SPARC_INT_ARG_MAX)
3671 return -1;
3672 regno = regbase + slotno;
3673 }
3674 else
3675 {
3676 if ((mode == TFmode || mode == TCmode)
3677 && (slotno & 1) != 0)
3678 slotno++, *ppadding = 1;
3679 if (TARGET_FPU && named)
3680 {
3681 if (slotno >= SPARC_FP_ARG_MAX)
3682 return -1;
3683 regno = SPARC_FP_ARG_FIRST + slotno * 2;
3684 if (mode == SFmode)
3685 regno++;
3686 }
3687 else
3688 {
3689 if (slotno >= SPARC_INT_ARG_MAX)
3690 return -1;
3691 regno = regbase + slotno;
3692 }
3693 }
3694 break;
3695
3696 case BLKmode :
3697 /* For sparc64, objects requiring 16 byte alignment get it. */
3698 if (TARGET_ARCH64)
3699 {
3700 if (type && TYPE_ALIGN (type) == 128 && (slotno & 1) != 0)
3701 slotno++, *ppadding = 1;
3702 }
3703
3704 if (TARGET_ARCH32
3705 || (type && TREE_CODE (type) == UNION_TYPE))
3706 {
3707 if (slotno >= SPARC_INT_ARG_MAX)
3708 return -1;
3709 regno = regbase + slotno;
3710 }
3711 else
3712 {
3713 tree field;
3714 int intregs_p = 0, fpregs_p = 0;
3715 /* The ABI obviously doesn't specify how packed
3716 structures are passed. These are defined to be passed
3717 in int regs if possible, otherwise memory. */
3718 int packed_p = 0;
3719
3720 /* First see what kinds of registers we need. */
3721 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3722 {
3723 if (TREE_CODE (field) == FIELD_DECL)
3724 {
3725 if (TREE_CODE (TREE_TYPE (field)) == REAL_TYPE
3726 && TARGET_FPU)
3727 fpregs_p = 1;
3728 else
3729 intregs_p = 1;
3730 if (DECL_PACKED (field))
3731 packed_p = 1;
3732 }
3733 }
3734 if (packed_p || !named)
3735 fpregs_p = 0, intregs_p = 1;
3736
3737 /* If all arg slots are filled, then must pass on stack. */
3738 if (fpregs_p && slotno >= SPARC_FP_ARG_MAX)
3739 return -1;
3740 /* If there are only int args and all int arg slots are filled,
3741 then must pass on stack. */
3742 if (!fpregs_p && intregs_p && slotno >= SPARC_INT_ARG_MAX)
3743 return -1;
3744 /* Note that even if all int arg slots are filled, fp members may
3745 still be passed in regs if such regs are available.
3746 *PREGNO isn't set because there may be more than one, it's up
3747 to the caller to compute them. */
3748 return slotno;
3749 }
3750 break;
3751
3752 default :
3753 abort ();
3754 }
3755
3756 *pregno = regno;
3757 return slotno;
3758 }
3759
3760 /* Handle recursive register counting for structure field layout. */
3761
3762 struct function_arg_record_value_parms
3763 {
3764 rtx ret;
3765 int slotno, named, regbase;
3766 int nregs, intoffset;
3767 };
3768
3769 static void function_arg_record_value_3
3770 PROTO((int, struct function_arg_record_value_parms *));
3771 static void function_arg_record_value_2
3772 PROTO((tree, int, struct function_arg_record_value_parms *));
3773 static void function_arg_record_value_1
3774 PROTO((tree, int, struct function_arg_record_value_parms *));
3775 static rtx function_arg_record_value
3776 PROTO((tree, enum machine_mode, int, int, int));
3777
3778 static void
3779 function_arg_record_value_1 (type, startbitpos, parms)
3780 tree type;
3781 int startbitpos;
3782 struct function_arg_record_value_parms *parms;
3783 {
3784 tree field;
3785
3786 /* The ABI obviously doesn't specify how packed structures are
3787 passed. These are defined to be passed in int regs if possible,
3788 otherwise memory. */
3789 int packed_p = 0;
3790
3791 /* We need to compute how many registers are needed so we can
3792 allocate the PARALLEL but before we can do that we need to know
3793 whether there are any packed fields. If there are, int regs are
3794 used regardless of whether there are fp values present. */
3795 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3796 {
3797 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
3798 {
3799 packed_p = 1;
3800 break;
3801 }
3802 }
3803
3804 /* Compute how many registers we need. */
3805 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3806 {
3807 if (TREE_CODE (field) == FIELD_DECL)
3808 {
3809 int bitpos = startbitpos;
3810 if (DECL_FIELD_BITPOS (field))
3811 bitpos += TREE_INT_CST_LOW (DECL_FIELD_BITPOS (field));
3812 /* ??? FIXME: else assume zero offset. */
3813
3814 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
3815 {
3816 function_arg_record_value_1 (TREE_TYPE (field), bitpos, parms);
3817 }
3818 else if (TREE_CODE (TREE_TYPE (field)) == REAL_TYPE
3819 && TARGET_FPU
3820 && ! packed_p
3821 && parms->named)
3822 {
3823 if (parms->intoffset != -1)
3824 {
3825 int intslots, this_slotno;
3826
3827 intslots = (bitpos - parms->intoffset + BITS_PER_WORD - 1)
3828 / BITS_PER_WORD;
3829 this_slotno = parms->slotno + parms->intoffset
3830 / BITS_PER_WORD;
3831
3832 intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
3833 intslots = MAX (intslots, 0);
3834 parms->nregs += intslots;
3835 parms->intoffset = -1;
3836 }
3837
3838 /* There's no need to check this_slotno < SPARC_FP_ARG MAX.
3839 If it wasn't true we wouldn't be here. */
3840 parms->nregs += 1;
3841 }
3842 else
3843 {
3844 if (parms->intoffset == -1)
3845 parms->intoffset = bitpos;
3846 }
3847 }
3848 }
3849 }
3850
3851 /* Handle recursive structure field register assignment. */
3852
3853 static void
3854 function_arg_record_value_3 (bitpos, parms)
3855 int bitpos;
3856 struct function_arg_record_value_parms *parms;
3857 {
3858 enum machine_mode mode;
3859 int regno, this_slotno, intslots, intoffset;
3860 rtx reg;
3861
3862 if (parms->intoffset == -1)
3863 return;
3864 intoffset = parms->intoffset;
3865 parms->intoffset = -1;
3866
3867 intslots = (bitpos - intoffset + BITS_PER_WORD - 1) / BITS_PER_WORD;
3868 this_slotno = parms->slotno + intoffset / BITS_PER_WORD;
3869
3870 intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
3871 if (intslots <= 0)
3872 return;
3873
3874 /* If this is the trailing part of a word, only load that much into
3875 the register. Otherwise load the whole register. Note that in
3876 the latter case we may pick up unwanted bits. It's not a problem
3877 at the moment but may wish to revisit. */
3878
3879 if (intoffset % BITS_PER_WORD != 0)
3880 {
3881 mode = mode_for_size (BITS_PER_WORD - intoffset%BITS_PER_WORD,
3882 MODE_INT, 0);
3883 }
3884 else
3885 mode = word_mode;
3886
3887 intoffset /= BITS_PER_UNIT;
3888 do
3889 {
3890 regno = parms->regbase + this_slotno;
3891 reg = gen_rtx_REG (mode, regno);
3892 XVECEXP (parms->ret, 0, parms->nregs)
3893 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
3894
3895 this_slotno += 1;
3896 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
3897 parms->nregs += 1;
3898 intslots -= 1;
3899 }
3900 while (intslots > 0);
3901 }
3902
3903 static void
3904 function_arg_record_value_2 (type, startbitpos, parms)
3905 tree type;
3906 int startbitpos;
3907 struct function_arg_record_value_parms *parms;
3908 {
3909 tree field;
3910 int packed_p = 0;
3911
3912 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3913 {
3914 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
3915 {
3916 packed_p = 1;
3917 break;
3918 }
3919 }
3920
3921 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3922 {
3923 if (TREE_CODE (field) == FIELD_DECL)
3924 {
3925 int bitpos = startbitpos;
3926 if (DECL_FIELD_BITPOS (field))
3927 bitpos += TREE_INT_CST_LOW (DECL_FIELD_BITPOS (field));
3928 /* ??? FIXME: else assume zero offset. */
3929
3930 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
3931 {
3932 function_arg_record_value_2 (TREE_TYPE (field), bitpos, parms);
3933 }
3934 else if (TREE_CODE (TREE_TYPE (field)) == REAL_TYPE
3935 && TARGET_FPU
3936 && ! packed_p
3937 && parms->named)
3938 {
3939 int this_slotno = parms->slotno + bitpos / BITS_PER_WORD;
3940 rtx reg;
3941
3942 function_arg_record_value_3 (bitpos, parms);
3943
3944 reg = gen_rtx_REG (DECL_MODE (field),
3945 (SPARC_FP_ARG_FIRST + this_slotno * 2
3946 + (DECL_MODE (field) == SFmode
3947 && (bitpos & 32) != 0)));
3948 XVECEXP (parms->ret, 0, parms->nregs)
3949 = gen_rtx_EXPR_LIST (VOIDmode, reg,
3950 GEN_INT (bitpos / BITS_PER_UNIT));
3951 parms->nregs += 1;
3952 }
3953 else
3954 {
3955 if (parms->intoffset == -1)
3956 parms->intoffset = bitpos;
3957 }
3958 }
3959 }
3960 }
3961
3962 static rtx
3963 function_arg_record_value (type, mode, slotno, named, regbase)
3964 tree type;
3965 enum machine_mode mode;
3966 int slotno, named, regbase;
3967 {
3968 HOST_WIDE_INT typesize = int_size_in_bytes (type);
3969 struct function_arg_record_value_parms parms;
3970 int nregs;
3971
3972 parms.ret = NULL_RTX;
3973 parms.slotno = slotno;
3974 parms.named = named;
3975 parms.regbase = regbase;
3976
3977 /* Compute how many registers we need. */
3978 parms.nregs = 0;
3979 parms.intoffset = 0;
3980 function_arg_record_value_1 (type, 0, &parms);
3981
3982 if (parms.intoffset != -1)
3983 {
3984 int intslots, this_slotno;
3985
3986 intslots = (typesize*BITS_PER_UNIT - parms.intoffset + BITS_PER_WORD - 1)
3987 / BITS_PER_WORD;
3988 this_slotno = slotno + parms.intoffset / BITS_PER_WORD;
3989
3990 intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
3991 intslots = MAX (intslots, 0);
3992
3993 parms.nregs += intslots;
3994 }
3995 nregs = parms.nregs;
3996
3997 /* Allocate the vector and handle some annoying special cases. */
3998 if (nregs == 0)
3999 {
4000 /* ??? Empty structure has no value? Duh? */
4001 if (typesize <= 0)
4002 {
4003 /* Though there's nothing really to store, return a word register
4004 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
4005 leads to breakage due to the fact that there are zero bytes to
4006 load. */
4007 return gen_rtx_REG (mode, regbase);
4008 }
4009 else
4010 {
4011 /* ??? C++ has structures with no fields, and yet a size. Give up
4012 for now and pass everything back in integer registers. */
4013 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4014 }
4015 if (nregs + slotno > SPARC_INT_ARG_MAX)
4016 nregs = SPARC_INT_ARG_MAX - slotno;
4017 }
4018 if (nregs == 0)
4019 abort ();
4020
4021 parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
4022
4023 /* Fill in the entries. */
4024 parms.nregs = 0;
4025 parms.intoffset = 0;
4026 function_arg_record_value_2 (type, 0, &parms);
4027 function_arg_record_value_3 (typesize * BITS_PER_UNIT, &parms);
4028
4029 if (parms.nregs != nregs)
4030 abort ();
4031
4032 return parms.ret;
4033 }
4034
4035 /* Handle the FUNCTION_ARG macro.
4036 Determine where to put an argument to a function.
4037 Value is zero to push the argument on the stack,
4038 or a hard register in which to store the argument.
4039
4040 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4041 the preceding args and about the function being called.
4042 MODE is the argument's machine mode.
4043 TYPE is the data type of the argument (as a tree).
4044 This is null for libcalls where that information may
4045 not be available.
4046 NAMED is nonzero if this argument is a named parameter
4047 (otherwise it is an extra parameter matching an ellipsis).
4048 INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG. */
4049
4050 rtx
4051 function_arg (cum, mode, type, named, incoming_p)
4052 const CUMULATIVE_ARGS *cum;
4053 enum machine_mode mode;
4054 tree type;
4055 int named;
4056 int incoming_p;
4057 {
4058 int regbase = (incoming_p
4059 ? SPARC_INCOMING_INT_ARG_FIRST
4060 : SPARC_OUTGOING_INT_ARG_FIRST);
4061 int slotno, regno, padding;
4062 rtx reg;
4063
4064 slotno = function_arg_slotno (cum, mode, type, named, incoming_p,
4065 &regno, &padding);
4066
4067 if (slotno == -1)
4068 return 0;
4069
4070 if (TARGET_ARCH32)
4071 {
4072 reg = gen_rtx_REG (mode, regno);
4073 return reg;
4074 }
4075
4076 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
4077 but also have the slot allocated for them.
4078 If no prototype is in scope fp values in register slots get passed
4079 in two places, either fp regs and int regs or fp regs and memory. */
4080 if ((GET_MODE_CLASS (mode) == MODE_FLOAT
4081 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4082 && SPARC_FP_REG_P (regno))
4083 {
4084 reg = gen_rtx_REG (mode, regno);
4085 if (cum->prototype_p || cum->libcall_p)
4086 {
4087 /* "* 2" because fp reg numbers are recorded in 4 byte
4088 quantities. */
4089 #if 0
4090 /* ??? This will cause the value to be passed in the fp reg and
4091 in the stack. When a prototype exists we want to pass the
4092 value in the reg but reserve space on the stack. That's an
4093 optimization, and is deferred [for a bit]. */
4094 if ((regno - SPARC_FP_ARG_FIRST) >= SPARC_INT_ARG_MAX * 2)
4095 return gen_rtx_PARALLEL (mode,
4096 gen_rtvec (2,
4097 gen_rtx_EXPR_LIST (VOIDmode,
4098 NULL_RTX, const0_rtx),
4099 gen_rtx_EXPR_LIST (VOIDmode,
4100 reg, const0_rtx)));
4101 else
4102 #else
4103 /* ??? It seems that passing back a register even when past
4104 the area declared by REG_PARM_STACK_SPACE will allocate
4105 space appropriately, and will not copy the data onto the
4106 stack, exactly as we desire.
4107
4108 This is due to locate_and_pad_parm being called in
4109 expand_call whenever reg_parm_stack_space > 0, which
4110 while benefical to our example here, would seem to be
4111 in error from what had been intended. Ho hum... -- r~ */
4112 #endif
4113 return reg;
4114 }
4115 else
4116 {
4117 rtx v0, v1;
4118
4119 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
4120 {
4121 int intreg;
4122
4123 /* On incoming, we don't need to know that the value
4124 is passed in %f0 and %i0, and it confuses other parts
4125 causing needless spillage even on the simplest cases. */
4126 if (incoming_p)
4127 return reg;
4128
4129 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
4130 + (regno - SPARC_FP_ARG_FIRST) / 2);
4131
4132 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
4133 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
4134 const0_rtx);
4135 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
4136 }
4137 else
4138 {
4139 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
4140 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
4141 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
4142 }
4143 }
4144 }
4145 else if (type && TREE_CODE (type) == RECORD_TYPE)
4146 {
4147 /* Structures up to 16 bytes in size are passed in arg slots on the
4148 stack and are promoted to registers where possible. */
4149
4150 if (int_size_in_bytes (type) > 16)
4151 abort (); /* shouldn't get here */
4152
4153 return function_arg_record_value (type, mode, slotno, named, regbase);
4154 }
4155 else if (type && TREE_CODE (type) == UNION_TYPE)
4156 {
4157 enum machine_mode mode;
4158 int bytes = int_size_in_bytes (type);
4159
4160 if (bytes > 16)
4161 abort ();
4162
4163 mode = mode_for_size (bytes * BITS_PER_UNIT, MODE_INT, 0);
4164 reg = gen_rtx_REG (mode, regno);
4165 }
4166 else
4167 {
4168 /* Scalar or complex int. */
4169 reg = gen_rtx_REG (mode, regno);
4170 }
4171
4172 return reg;
4173 }
4174
4175 /* Handle the FUNCTION_ARG_PARTIAL_NREGS macro.
4176 For an arg passed partly in registers and partly in memory,
4177 this is the number of registers used.
4178 For args passed entirely in registers or entirely in memory, zero.
4179
4180 Any arg that starts in the first 6 regs but won't entirely fit in them
4181 needs partial registers on v8. On v9, structures with integer
4182 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
4183 values that begin in the last fp reg [where "last fp reg" varies with the
4184 mode] will be split between that reg and memory. */
4185
4186 int
4187 function_arg_partial_nregs (cum, mode, type, named)
4188 const CUMULATIVE_ARGS *cum;
4189 enum machine_mode mode;
4190 tree type;
4191 int named;
4192 {
4193 int slotno, regno, padding;
4194
4195 /* We pass 0 for incoming_p here, it doesn't matter. */
4196 slotno = function_arg_slotno (cum, mode, type, named, 0, &regno, &padding);
4197
4198 if (slotno == -1)
4199 return 0;
4200
4201 if (TARGET_ARCH32)
4202 {
4203 if ((slotno + (mode == BLKmode
4204 ? ROUND_ADVANCE (int_size_in_bytes (type))
4205 : ROUND_ADVANCE (GET_MODE_SIZE (mode))))
4206 > NPARM_REGS (SImode))
4207 return NPARM_REGS (SImode) - slotno;
4208 return 0;
4209 }
4210 else
4211 {
4212 if (type && AGGREGATE_TYPE_P (type))
4213 {
4214 int size = int_size_in_bytes (type);
4215 int align = TYPE_ALIGN (type);
4216
4217 if (align == 16)
4218 slotno += slotno & 1;
4219 if (size > 8 && size <= 16
4220 && slotno == SPARC_INT_ARG_MAX - 1)
4221 return 1;
4222 }
4223 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
4224 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
4225 && ! TARGET_FPU))
4226 {
4227 if (GET_MODE_ALIGNMENT (mode) == 128)
4228 {
4229 slotno += slotno & 1;
4230 if (slotno == SPARC_INT_ARG_MAX - 2)
4231 return 1;
4232 }
4233 else
4234 {
4235 if (slotno == SPARC_INT_ARG_MAX - 1)
4236 return 1;
4237 }
4238 }
4239 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4240 {
4241 if (GET_MODE_ALIGNMENT (mode) == 128)
4242 slotno += slotno & 1;
4243 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
4244 > SPARC_FP_ARG_MAX)
4245 return 1;
4246 }
4247 return 0;
4248 }
4249 }
4250
4251 /* Handle the FUNCTION_ARG_PASS_BY_REFERENCE macro.
4252 !v9: The SPARC ABI stipulates passing struct arguments (of any size) and
4253 quad-precision floats by invisible reference.
4254 v9: Aggregates greater than 16 bytes are passed by reference.
4255 For Pascal, also pass arrays by reference. */
4256
4257 int
4258 function_arg_pass_by_reference (cum, mode, type, named)
4259 const CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
4260 enum machine_mode mode;
4261 tree type;
4262 int named ATTRIBUTE_UNUSED;
4263 {
4264 if (TARGET_ARCH32)
4265 {
4266 return ((type && AGGREGATE_TYPE_P (type))
4267 || mode == TFmode || mode == TCmode);
4268 }
4269 else
4270 {
4271 return ((type && TREE_CODE (type) == ARRAY_TYPE)
4272 /* Consider complex values as aggregates, so care for TCmode. */
4273 || GET_MODE_SIZE (mode) > 16
4274 || (type && AGGREGATE_TYPE_P (type)
4275 && int_size_in_bytes (type) > 16));
4276 }
4277 }
4278
4279 /* Handle the FUNCTION_ARG_ADVANCE macro.
4280 Update the data in CUM to advance over an argument
4281 of mode MODE and data type TYPE.
4282 TYPE is null for libcalls where that information may not be available. */
4283
4284 void
4285 function_arg_advance (cum, mode, type, named)
4286 CUMULATIVE_ARGS *cum;
4287 enum machine_mode mode;
4288 tree type;
4289 int named;
4290 {
4291 int slotno, regno, padding;
4292
4293 /* We pass 0 for incoming_p here, it doesn't matter. */
4294 slotno = function_arg_slotno (cum, mode, type, named, 0, &regno, &padding);
4295
4296 /* If register required leading padding, add it. */
4297 if (slotno != -1)
4298 cum->words += padding;
4299
4300 if (TARGET_ARCH32)
4301 {
4302 cum->words += (mode != BLKmode
4303 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
4304 : ROUND_ADVANCE (int_size_in_bytes (type)));
4305 }
4306 else
4307 {
4308 if (type && AGGREGATE_TYPE_P (type))
4309 {
4310 int size = int_size_in_bytes (type);
4311
4312 if (size <= 8)
4313 ++cum->words;
4314 else if (size <= 16)
4315 cum->words += 2;
4316 else /* passed by reference */
4317 ++cum->words;
4318 }
4319 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
4320 {
4321 cum->words += 2;
4322 }
4323 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4324 {
4325 cum->words += GET_MODE_SIZE (mode) / UNITS_PER_WORD;
4326 }
4327 else
4328 {
4329 cum->words += (mode != BLKmode
4330 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
4331 : ROUND_ADVANCE (int_size_in_bytes (type)));
4332 }
4333 }
4334 }
4335
4336 /* Handle the FUNCTION_ARG_PADDING macro.
4337 For the 64 bit ABI structs are always stored left shifted in their
4338 argument slot. */
4339
4340 enum direction
4341 function_arg_padding (mode, type)
4342 enum machine_mode mode;
4343 tree type;
4344 {
4345 if (TARGET_ARCH64 && type != 0 && AGGREGATE_TYPE_P (type))
4346 return upward;
4347
4348 /* This is the default definition. */
4349 return (! BYTES_BIG_ENDIAN
4350 ? upward
4351 : ((mode == BLKmode
4352 ? (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
4353 && int_size_in_bytes (type) < (PARM_BOUNDARY / BITS_PER_UNIT))
4354 : GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
4355 ? downward : upward));
4356 }
4357
4358 /* Handle FUNCTION_VALUE, FUNCTION_OUTGOING_VALUE, and LIBCALL_VALUE macros.
4359 For v9, function return values are subject to the same rules as arguments,
4360 except that up to 32-bytes may be returned in registers. */
4361
4362 rtx
4363 function_value (type, mode, incoming_p)
4364 tree type;
4365 enum machine_mode mode;
4366 int incoming_p;
4367 {
4368 int regno;
4369 int regbase = (incoming_p
4370 ? SPARC_OUTGOING_INT_ARG_FIRST
4371 : SPARC_INCOMING_INT_ARG_FIRST);
4372
4373 if (TARGET_ARCH64 && type)
4374 {
4375 if (TREE_CODE (type) == RECORD_TYPE)
4376 {
4377 /* Structures up to 32 bytes in size are passed in registers,
4378 promoted to fp registers where possible. */
4379
4380 if (int_size_in_bytes (type) > 32)
4381 abort (); /* shouldn't get here */
4382
4383 return function_arg_record_value (type, mode, 0, 1, regbase);
4384 }
4385 else if (TREE_CODE (type) == UNION_TYPE)
4386 {
4387 int bytes = int_size_in_bytes (type);
4388
4389 if (bytes > 32)
4390 abort ();
4391
4392 mode = mode_for_size (bytes * BITS_PER_UNIT, MODE_INT, 0);
4393 }
4394 }
4395
4396 if (TARGET_ARCH64
4397 && GET_MODE_CLASS (mode) == MODE_INT
4398 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
4399 && type && TREE_CODE (type) != UNION_TYPE)
4400 mode = DImode;
4401
4402 if (incoming_p)
4403 regno = BASE_RETURN_VALUE_REG (mode);
4404 else
4405 regno = BASE_OUTGOING_VALUE_REG (mode);
4406
4407 return gen_rtx_REG (mode, regno);
4408 }
4409
4410 /* Do what is necessary for `va_start'. We look at the current function
4411 to determine if stdarg or varargs is used and return the address of
4412 the first unnamed parameter. */
4413
4414 rtx
4415 sparc_builtin_saveregs ()
4416 {
4417 int first_reg = current_function_args_info.words;
4418 rtx address;
4419 int regno;
4420
4421 for (regno = first_reg; regno < NPARM_REGS (word_mode); regno++)
4422 emit_move_insn (gen_rtx_MEM (word_mode,
4423 gen_rtx_PLUS (Pmode,
4424 frame_pointer_rtx,
4425 GEN_INT (STACK_POINTER_OFFSET
4426 + UNITS_PER_WORD * regno))),
4427 gen_rtx_REG (word_mode,
4428 BASE_INCOMING_ARG_REG (word_mode) + regno));
4429
4430 address = gen_rtx_PLUS (Pmode,
4431 frame_pointer_rtx,
4432 GEN_INT (STACK_POINTER_OFFSET
4433 + UNITS_PER_WORD * first_reg));
4434
4435 if (current_function_check_memory_usage
4436 && first_reg < NPARM_REGS (word_mode))
4437 emit_library_call (chkr_set_right_libfunc, 1, VOIDmode, 3,
4438 address, ptr_mode,
4439 GEN_INT (UNITS_PER_WORD
4440 * (NPARM_REGS (word_mode) - first_reg)),
4441 TYPE_MODE (sizetype), GEN_INT (MEMORY_USE_RW),
4442 TYPE_MODE (integer_type_node));
4443
4444 return address;
4445 }
4446
4447 /* Implement `va_start' for varargs and stdarg. */
4448
4449 void
4450 sparc_va_start (stdarg_p, valist, nextarg)
4451 int stdarg_p ATTRIBUTE_UNUSED;
4452 tree valist;
4453 rtx nextarg;
4454 {
4455 nextarg = expand_builtin_saveregs ();
4456 std_expand_builtin_va_start (1, valist, nextarg);
4457 }
4458
4459 /* Implement `va_arg'. */
4460
4461 rtx
4462 sparc_va_arg (valist, type)
4463 tree valist, type;
4464 {
4465 HOST_WIDE_INT size, rsize, align;
4466 tree addr, incr;
4467 rtx addr_rtx;
4468 int indirect = 0;
4469
4470 /* Round up sizeof(type) to a word. */
4471 size = int_size_in_bytes (type);
4472 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
4473 align = 0;
4474
4475 if (TARGET_ARCH64)
4476 {
4477 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD)
4478 align = 2 * UNITS_PER_WORD;
4479
4480 if (AGGREGATE_TYPE_P (type))
4481 {
4482 if (size > 16)
4483 {
4484 indirect = 1;
4485 size = rsize = UNITS_PER_WORD;
4486 }
4487 else
4488 size = rsize;
4489 }
4490 }
4491 else
4492 {
4493 if (AGGREGATE_TYPE_P (type)
4494 || TYPE_MODE (type) == TFmode
4495 || TYPE_MODE (type) == TCmode)
4496 {
4497 indirect = 1;
4498 size = rsize = UNITS_PER_WORD;
4499 }
4500 else
4501 {
4502 /* ??? The old va-sparc.h implementation, for 8 byte objects
4503 copied stuff to a temporary -- I don't see that that
4504 provides any more alignment than the stack slot did. */
4505 }
4506 }
4507
4508 incr = valist;
4509 if (align)
4510 {
4511 incr = fold (build (PLUS_EXPR, ptr_type_node, incr,
4512 build_int_2 (align - 1, 0)));
4513 incr = fold (build (BIT_AND_EXPR, ptr_type_node, incr,
4514 build_int_2 (-align, -1)));
4515 }
4516
4517 addr = incr = save_expr (incr);
4518 if (BYTES_BIG_ENDIAN && size < rsize)
4519 {
4520 addr = fold (build (PLUS_EXPR, ptr_type_node, incr,
4521 build_int_2 (rsize - size, 0)));
4522 }
4523 incr = fold (build (PLUS_EXPR, ptr_type_node, incr,
4524 build_int_2 (rsize, 0)));
4525
4526 incr = build (MODIFY_EXPR, ptr_type_node, valist, incr);
4527 TREE_SIDE_EFFECTS (incr) = 1;
4528 expand_expr (incr, const0_rtx, VOIDmode, EXPAND_NORMAL);
4529
4530 addr_rtx = expand_expr (addr, NULL, Pmode, EXPAND_NORMAL);
4531
4532 if (indirect)
4533 {
4534 addr_rtx = force_reg (Pmode, addr_rtx);
4535 addr_rtx = gen_rtx_MEM (Pmode, addr_rtx);
4536 MEM_ALIAS_SET (addr_rtx) = get_varargs_alias_set ();
4537 }
4538
4539 return addr_rtx;
4540 }
4541 \f
4542 /* Return the string to output a conditional branch to LABEL, which is
4543 the operand number of the label. OP is the conditional expression.
4544 XEXP (OP, 0) is assumed to be a condition code register (integer or
4545 floating point) and its mode specifies what kind of comparison we made.
4546
4547 REVERSED is non-zero if we should reverse the sense of the comparison.
4548
4549 ANNUL is non-zero if we should generate an annulling branch.
4550
4551 NOOP is non-zero if we have to follow this branch by a noop.
4552
4553 INSN, if set, is the insn. */
4554
4555 char *
4556 output_cbranch (op, label, reversed, annul, noop, insn)
4557 rtx op;
4558 int label;
4559 int reversed, annul, noop;
4560 rtx insn;
4561 {
4562 static char string[32];
4563 enum rtx_code code = GET_CODE (op);
4564 rtx cc_reg = XEXP (op, 0);
4565 enum machine_mode mode = GET_MODE (cc_reg);
4566 static char v8_labelno[] = "%lX";
4567 static char v9_icc_labelno[] = "%%icc, %lX";
4568 static char v9_xcc_labelno[] = "%%xcc, %lX";
4569 static char v9_fcc_labelno[] = "%%fccX, %lY";
4570 char *labelno;
4571 int labeloff, spaces = 8;
4572
4573 /* ??? !v9: FP branches cannot be preceded by another floating point insn.
4574 Because there is currently no concept of pre-delay slots, we can fix
4575 this only by always emitting a nop before a floating point branch. */
4576
4577 if ((mode == CCFPmode || mode == CCFPEmode) && ! TARGET_V9)
4578 strcpy (string, "nop\n\t");
4579 else
4580 string[0] = '\0';
4581
4582 /* If not floating-point or if EQ or NE, we can just reverse the code. */
4583 if (reversed
4584 && ((mode != CCFPmode && mode != CCFPEmode) || code == EQ || code == NE))
4585 code = reverse_condition (code), reversed = 0;
4586
4587 /* Start by writing the branch condition. */
4588 switch (code)
4589 {
4590 case NE:
4591 if (mode == CCFPmode || mode == CCFPEmode)
4592 {
4593 strcat (string, "fbne");
4594 spaces -= 4;
4595 }
4596 else
4597 {
4598 strcpy (string, "bne");
4599 spaces -= 3;
4600 }
4601 break;
4602
4603 case EQ:
4604 if (mode == CCFPmode || mode == CCFPEmode)
4605 {
4606 strcat (string, "fbe");
4607 spaces -= 3;
4608 }
4609 else
4610 {
4611 strcpy (string, "be");
4612 spaces -= 2;
4613 }
4614 break;
4615
4616 case GE:
4617 if (mode == CCFPmode || mode == CCFPEmode)
4618 {
4619 if (reversed)
4620 strcat (string, "fbul");
4621 else
4622 strcat (string, "fbge");
4623 spaces -= 4;
4624 }
4625 else if (mode == CC_NOOVmode)
4626 {
4627 strcpy (string, "bpos");
4628 spaces -= 4;
4629 }
4630 else
4631 {
4632 strcpy (string, "bge");
4633 spaces -= 3;
4634 }
4635 break;
4636
4637 case GT:
4638 if (mode == CCFPmode || mode == CCFPEmode)
4639 {
4640 if (reversed)
4641 {
4642 strcat (string, "fbule");
4643 spaces -= 5;
4644 }
4645 else
4646 {
4647 strcat (string, "fbg");
4648 spaces -= 3;
4649 }
4650 }
4651 else
4652 {
4653 strcpy (string, "bg");
4654 spaces -= 2;
4655 }
4656 break;
4657
4658 case LE:
4659 if (mode == CCFPmode || mode == CCFPEmode)
4660 {
4661 if (reversed)
4662 strcat (string, "fbug");
4663 else
4664 strcat (string, "fble");
4665 spaces -= 4;
4666 }
4667 else
4668 {
4669 strcpy (string, "ble");
4670 spaces -= 3;
4671 }
4672 break;
4673
4674 case LT:
4675 if (mode == CCFPmode || mode == CCFPEmode)
4676 {
4677 if (reversed)
4678 {
4679 strcat (string, "fbuge");
4680 spaces -= 5;
4681 }
4682 else
4683 {
4684 strcat (string, "fbl");
4685 spaces -= 3;
4686 }
4687 }
4688 else if (mode == CC_NOOVmode)
4689 {
4690 strcpy (string, "bneg");
4691 spaces -= 4;
4692 }
4693 else
4694 {
4695 strcpy (string, "bl");
4696 spaces -= 2;
4697 }
4698 break;
4699
4700 case GEU:
4701 strcpy (string, "bgeu");
4702 spaces -= 4;
4703 break;
4704
4705 case GTU:
4706 strcpy (string, "bgu");
4707 spaces -= 3;
4708 break;
4709
4710 case LEU:
4711 strcpy (string, "bleu");
4712 spaces -= 4;
4713 break;
4714
4715 case LTU:
4716 strcpy (string, "blu");
4717 spaces -= 3;
4718 break;
4719
4720 default:
4721 abort ();
4722 }
4723
4724 /* Now add the annulling, the label, and a possible noop. */
4725 if (annul)
4726 {
4727 strcat (string, ",a");
4728 spaces -= 2;
4729 }
4730
4731 if (! TARGET_V9)
4732 {
4733 labeloff = 2;
4734 labelno = v8_labelno;
4735 }
4736 else
4737 {
4738 rtx note;
4739
4740 if (insn && (note = find_reg_note (insn, REG_BR_PRED, NULL_RTX)))
4741 {
4742 strcat (string,
4743 INTVAL (XEXP (note, 0)) & ATTR_FLAG_likely ? ",pt" : ",pn");
4744 spaces -= 3;
4745 }
4746
4747 labeloff = 9;
4748 if (mode == CCFPmode || mode == CCFPEmode)
4749 {
4750 labeloff = 10;
4751 labelno = v9_fcc_labelno;
4752 /* Set the char indicating the number of the fcc reg to use. */
4753 labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
4754 }
4755 else if (mode == CCXmode || mode == CCX_NOOVmode)
4756 labelno = v9_xcc_labelno;
4757 else
4758 labelno = v9_icc_labelno;
4759 }
4760 /* Set the char indicating the number of the operand containing the
4761 label_ref. */
4762 labelno[labeloff] = label + '0';
4763 if (spaces > 0)
4764 strcat (string, "\t");
4765 else
4766 strcat (string, " ");
4767 strcat (string, labelno);
4768
4769 if (noop)
4770 strcat (string, "\n\tnop");
4771
4772 return string;
4773 }
4774
4775 /* Emit a library call comparison between floating point X and Y.
4776 COMPARISON is the rtl operator to compare with (EQ, NE, GT, etc.).
4777 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
4778 values as arguments instead of the TFmode registers themselves,
4779 that's why we cannot call emit_float_lib_cmp. */
4780 void
4781 sparc_emit_float_lib_cmp (x, y, comparison)
4782 rtx x, y;
4783 enum rtx_code comparison;
4784 {
4785 const char *qpfunc;
4786 rtx slot0, slot1, result;
4787
4788 switch (comparison)
4789 {
4790 case EQ:
4791 qpfunc = "_Qp_feq";
4792 break;
4793
4794 case NE:
4795 qpfunc = "_Qp_fne";
4796 break;
4797
4798 case GT:
4799 qpfunc = "_Qp_fgt";
4800 break;
4801
4802 case GE:
4803 qpfunc = "_Qp_fge";
4804 break;
4805
4806 case LT:
4807 qpfunc = "_Qp_flt";
4808 break;
4809
4810 case LE:
4811 qpfunc = "_Qp_fle";
4812 break;
4813
4814 default:
4815 abort();
4816 break;
4817 }
4818
4819 if (GET_CODE (x) != MEM)
4820 {
4821 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode), 0);
4822 emit_insn (gen_rtx_SET (VOIDmode, slot0, x));
4823 }
4824
4825 if (GET_CODE (y) != MEM)
4826 {
4827 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode), 0);
4828 emit_insn (gen_rtx_SET (VOIDmode, slot1, y));
4829 }
4830
4831 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, qpfunc), 1,
4832 DImode, 2,
4833 XEXP (slot0, 0), Pmode,
4834 XEXP (slot1, 0), Pmode);
4835
4836 /* Immediately move the result of the libcall into a pseudo
4837 register so reload doesn't clobber the value if it needs
4838 the return register for a spill reg. */
4839 result = gen_reg_rtx (DImode);
4840 emit_move_insn (result, hard_libcall_value (DImode));
4841
4842 emit_cmp_insn (result, const0_rtx, comparison,
4843 NULL_RTX, DImode, 0, 0);
4844 }
4845
4846 /* Return the string to output a conditional branch to LABEL, testing
4847 register REG. LABEL is the operand number of the label; REG is the
4848 operand number of the reg. OP is the conditional expression. The mode
4849 of REG says what kind of comparison we made.
4850
4851 REVERSED is non-zero if we should reverse the sense of the comparison.
4852
4853 ANNUL is non-zero if we should generate an annulling branch.
4854
4855 NOOP is non-zero if we have to follow this branch by a noop. */
4856
4857 char *
4858 output_v9branch (op, reg, label, reversed, annul, noop, insn)
4859 rtx op;
4860 int reg, label;
4861 int reversed, annul, noop;
4862 rtx insn;
4863 {
4864 static char string[20];
4865 enum rtx_code code = GET_CODE (op);
4866 enum machine_mode mode = GET_MODE (XEXP (op, 0));
4867 static char labelno[] = "%X, %lX";
4868 rtx note;
4869 int spaces = 8;
4870
4871 /* If not floating-point or if EQ or NE, we can just reverse the code. */
4872 if (reversed)
4873 code = reverse_condition (code), reversed = 0;
4874
4875 /* Only 64 bit versions of these instructions exist. */
4876 if (mode != DImode)
4877 abort ();
4878
4879 /* Start by writing the branch condition. */
4880
4881 switch (code)
4882 {
4883 case NE:
4884 strcpy (string, "brnz");
4885 spaces -= 4;
4886 break;
4887
4888 case EQ:
4889 strcpy (string, "brz");
4890 spaces -= 3;
4891 break;
4892
4893 case GE:
4894 strcpy (string, "brgez");
4895 spaces -= 5;
4896 break;
4897
4898 case LT:
4899 strcpy (string, "brlz");
4900 spaces -= 4;
4901 break;
4902
4903 case LE:
4904 strcpy (string, "brlez");
4905 spaces -= 5;
4906 break;
4907
4908 case GT:
4909 strcpy (string, "brgz");
4910 spaces -= 4;
4911 break;
4912
4913 default:
4914 abort ();
4915 }
4916
4917 /* Now add the annulling, reg, label, and nop. */
4918 if (annul)
4919 {
4920 strcat (string, ",a");
4921 spaces -= 2;
4922 }
4923
4924 if (insn && (note = find_reg_note (insn, REG_BR_PRED, NULL_RTX)))
4925 {
4926 strcat (string,
4927 INTVAL (XEXP (note, 0)) & ATTR_FLAG_likely ? ",pt" : ",pn");
4928 spaces -= 3;
4929 }
4930
4931 labelno[1] = reg + '0';
4932 labelno[6] = label + '0';
4933 if (spaces > 0)
4934 strcat (string, "\t");
4935 else
4936 strcat (string, " ");
4937 strcat (string, labelno);
4938
4939 if (noop)
4940 strcat (string, "\n\tnop");
4941
4942 return string;
4943 }
4944
4945 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
4946 Such instructions cannot be used in the delay slot of return insn on v9.
4947 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
4948 */
4949
4950 static int
4951 epilogue_renumber (where, test)
4952 register rtx *where;
4953 int test;
4954 {
4955 register const char *fmt;
4956 register int i;
4957 register enum rtx_code code;
4958
4959 if (*where == 0)
4960 return 0;
4961
4962 code = GET_CODE (*where);
4963
4964 switch (code)
4965 {
4966 case REG:
4967 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
4968 return 1;
4969 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
4970 *where = gen_rtx (REG, GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
4971 case SCRATCH:
4972 case CC0:
4973 case PC:
4974 case CONST_INT:
4975 case CONST_DOUBLE:
4976 return 0;
4977
4978 default:
4979 break;
4980 }
4981
4982 fmt = GET_RTX_FORMAT (code);
4983
4984 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4985 {
4986 if (fmt[i] == 'E')
4987 {
4988 register int j;
4989 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
4990 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
4991 return 1;
4992 }
4993 else if (fmt[i] == 'e'
4994 && epilogue_renumber (&(XEXP (*where, i)), test))
4995 return 1;
4996 }
4997 return 0;
4998 }
4999
5000 /* Output assembler code to return from a function. */
5001
5002 const char *
5003 output_return (operands)
5004 rtx *operands;
5005 {
5006 rtx delay = final_sequence ? XVECEXP (final_sequence, 0, 1) : 0;
5007
5008 if (leaf_label)
5009 {
5010 operands[0] = leaf_label;
5011 return "b%* %l0%(";
5012 }
5013 else if (current_function_uses_only_leaf_regs)
5014 {
5015 /* No delay slot in a leaf function. */
5016 if (delay)
5017 abort ();
5018
5019 /* If we didn't allocate a frame pointer for the current function,
5020 the stack pointer might have been adjusted. Output code to
5021 restore it now. */
5022
5023 operands[0] = GEN_INT (actual_fsize);
5024
5025 /* Use sub of negated value in first two cases instead of add to
5026 allow actual_fsize == 4096. */
5027
5028 if (actual_fsize <= 4096)
5029 {
5030 if (SKIP_CALLERS_UNIMP_P)
5031 return "jmp\t%%o7+12\n\tsub\t%%sp, -%0, %%sp";
5032 else
5033 return "retl\n\tsub\t%%sp, -%0, %%sp";
5034 }
5035 else if (actual_fsize <= 8192)
5036 {
5037 operands[0] = GEN_INT (actual_fsize - 4096);
5038 if (SKIP_CALLERS_UNIMP_P)
5039 return "sub\t%%sp, -4096, %%sp\n\tjmp\t%%o7+12\n\tsub\t%%sp, -%0, %%sp";
5040 else
5041 return "sub\t%%sp, -4096, %%sp\n\tretl\n\tsub\t%%sp, -%0, %%sp";
5042 }
5043 else if (SKIP_CALLERS_UNIMP_P)
5044 {
5045 if ((actual_fsize & 0x3ff) != 0)
5046 return "sethi\t%%hi(%a0), %%g1\n\tor\t%%g1, %%lo(%a0), %%g1\n\tjmp\t%%o7+12\n\tadd\t%%sp, %%g1, %%sp";
5047 else
5048 return "sethi\t%%hi(%a0), %%g1\n\tjmp\t%%o7+12\n\tadd\t%%sp, %%g1, %%sp";
5049 }
5050 else
5051 {
5052 if ((actual_fsize & 0x3ff) != 0)
5053 return "sethi %%hi(%a0),%%g1\n\tor %%g1,%%lo(%a0),%%g1\n\tretl\n\tadd %%sp,%%g1,%%sp";
5054 else
5055 return "sethi %%hi(%a0),%%g1\n\tretl\n\tadd %%sp,%%g1,%%sp";
5056 }
5057 }
5058 else if (TARGET_V9)
5059 {
5060 if (delay)
5061 {
5062 epilogue_renumber (&SET_DEST (PATTERN (delay)), 0);
5063 epilogue_renumber (&SET_SRC (PATTERN (delay)), 0);
5064 }
5065 if (SKIP_CALLERS_UNIMP_P)
5066 return "return\t%%i7+12%#";
5067 else
5068 return "return\t%%i7+8%#";
5069 }
5070 else
5071 {
5072 if (delay)
5073 abort ();
5074 if (SKIP_CALLERS_UNIMP_P)
5075 return "jmp\t%%i7+12\n\trestore";
5076 else
5077 return "ret\n\trestore";
5078 }
5079 }
5080 \f
5081 /* Leaf functions and non-leaf functions have different needs. */
5082
5083 static int
5084 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
5085
5086 static int
5087 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
5088
5089 static int *reg_alloc_orders[] = {
5090 reg_leaf_alloc_order,
5091 reg_nonleaf_alloc_order};
5092
5093 void
5094 order_regs_for_local_alloc ()
5095 {
5096 static int last_order_nonleaf = 1;
5097
5098 if (regs_ever_live[15] != last_order_nonleaf)
5099 {
5100 last_order_nonleaf = !last_order_nonleaf;
5101 bcopy ((char *) reg_alloc_orders[last_order_nonleaf],
5102 (char *) reg_alloc_order, FIRST_PSEUDO_REGISTER * sizeof (int));
5103 }
5104 }
5105 \f
5106 /* Return 1 if REG and MEM are legitimate enough to allow the various
5107 mem<-->reg splits to be run. */
5108
5109 int
5110 sparc_splitdi_legitimate (reg, mem)
5111 rtx reg;
5112 rtx mem;
5113 {
5114 /* Punt if we are here by mistake. */
5115 if (! reload_completed)
5116 abort ();
5117
5118 /* We must have an offsettable memory reference. */
5119 if (! offsettable_memref_p (mem))
5120 return 0;
5121
5122 /* If we have legitimate args for ldd/std, we do not want
5123 the split to happen. */
5124 if ((REGNO (reg) % 2) == 0
5125 && mem_min_alignment (mem, 8))
5126 return 0;
5127
5128 /* Success. */
5129 return 1;
5130 }
5131
5132 /* Return 1 if x and y are some kind of REG and they refer to
5133 different hard registers. This test is guarenteed to be
5134 run after reload. */
5135
5136 int
5137 sparc_absnegfloat_split_legitimate (x, y)
5138 rtx x, y;
5139 {
5140 if (GET_CODE (x) == SUBREG)
5141 x = alter_subreg (x);
5142 if (GET_CODE (x) != REG)
5143 return 0;
5144 if (GET_CODE (y) == SUBREG)
5145 y = alter_subreg (y);
5146 if (GET_CODE (y) != REG)
5147 return 0;
5148 if (REGNO (x) == REGNO (y))
5149 return 0;
5150 return 1;
5151 }
5152
5153 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
5154 This makes them candidates for using ldd and std insns.
5155
5156 Note reg1 and reg2 *must* be hard registers. */
5157
5158 int
5159 registers_ok_for_ldd_peep (reg1, reg2)
5160 rtx reg1, reg2;
5161 {
5162 /* We might have been passed a SUBREG. */
5163 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
5164 return 0;
5165
5166 if (REGNO (reg1) % 2 != 0)
5167 return 0;
5168
5169 /* Integer ldd is deprecated in SPARC V9 */
5170 if (TARGET_V9 && REGNO (reg1) < 32)
5171 return 0;
5172
5173 return (REGNO (reg1) == REGNO (reg2) - 1);
5174 }
5175
5176 /* Return 1 if addr1 and addr2 are suitable for use in an ldd or
5177 std insn.
5178
5179 This can only happen when addr1 and addr2 are consecutive memory
5180 locations (addr1 + 4 == addr2). addr1 must also be aligned on a
5181 64 bit boundary (addr1 % 8 == 0).
5182
5183 We know %sp and %fp are kept aligned on a 64 bit boundary. Other
5184 registers are assumed to *never* be properly aligned and are
5185 rejected.
5186
5187 Knowing %sp and %fp are kept aligned on a 64 bit boundary, we
5188 need only check that the offset for addr1 % 8 == 0. */
5189
5190 int
5191 addrs_ok_for_ldd_peep (addr1, addr2)
5192 rtx addr1, addr2;
5193 {
5194 int reg1, offset1;
5195
5196 /* Extract a register number and offset (if used) from the first addr. */
5197 if (GET_CODE (addr1) == PLUS)
5198 {
5199 /* If not a REG, return zero. */
5200 if (GET_CODE (XEXP (addr1, 0)) != REG)
5201 return 0;
5202 else
5203 {
5204 reg1 = REGNO (XEXP (addr1, 0));
5205 /* The offset must be constant! */
5206 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
5207 return 0;
5208 offset1 = INTVAL (XEXP (addr1, 1));
5209 }
5210 }
5211 else if (GET_CODE (addr1) != REG)
5212 return 0;
5213 else
5214 {
5215 reg1 = REGNO (addr1);
5216 /* This was a simple (mem (reg)) expression. Offset is 0. */
5217 offset1 = 0;
5218 }
5219
5220 /* Make sure the second address is a (mem (plus (reg) (const_int). */
5221 if (GET_CODE (addr2) != PLUS)
5222 return 0;
5223
5224 if (GET_CODE (XEXP (addr2, 0)) != REG
5225 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
5226 return 0;
5227
5228 /* Only %fp and %sp are allowed. Additionally both addresses must
5229 use the same register. */
5230 if (reg1 != FRAME_POINTER_REGNUM && reg1 != STACK_POINTER_REGNUM)
5231 return 0;
5232
5233 if (reg1 != REGNO (XEXP (addr2, 0)))
5234 return 0;
5235
5236 /* The first offset must be evenly divisible by 8 to ensure the
5237 address is 64 bit aligned. */
5238 if (offset1 % 8 != 0)
5239 return 0;
5240
5241 /* The offset for the second addr must be 4 more than the first addr. */
5242 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
5243 return 0;
5244
5245 /* All the tests passed. addr1 and addr2 are valid for ldd and std
5246 instructions. */
5247 return 1;
5248 }
5249
5250 /* Return 1 if reg is a pseudo, or is the first register in
5251 a hard register pair. This makes it a candidate for use in
5252 ldd and std insns. */
5253
5254 int
5255 register_ok_for_ldd (reg)
5256 rtx reg;
5257 {
5258 /* We might have been passed a SUBREG. */
5259 if (GET_CODE (reg) != REG)
5260 return 0;
5261
5262 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
5263 return (REGNO (reg) % 2 == 0);
5264 else
5265 return 1;
5266 }
5267 \f
5268 /* Print operand X (an rtx) in assembler syntax to file FILE.
5269 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
5270 For `%' followed by punctuation, CODE is the punctuation and X is null. */
5271
5272 void
5273 print_operand (file, x, code)
5274 FILE *file;
5275 rtx x;
5276 int code;
5277 {
5278 switch (code)
5279 {
5280 case '#':
5281 /* Output a 'nop' if there's nothing for the delay slot. */
5282 if (dbr_sequence_length () == 0)
5283 fputs ("\n\t nop", file);
5284 return;
5285 case '*':
5286 /* Output an annul flag if there's nothing for the delay slot and we
5287 are optimizing. This is always used with '(' below. */
5288 /* Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
5289 this is a dbx bug. So, we only do this when optimizing. */
5290 /* On UltraSPARC, a branch in a delay slot causes a pipeline flush.
5291 Always emit a nop in case the next instruction is a branch. */
5292 if (dbr_sequence_length () == 0
5293 && (optimize && (int)sparc_cpu < PROCESSOR_V9))
5294 fputs (",a", file);
5295 return;
5296 case '(':
5297 /* Output a 'nop' if there's nothing for the delay slot and we are
5298 not optimizing. This is always used with '*' above. */
5299 if (dbr_sequence_length () == 0
5300 && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
5301 fputs ("\n\t nop", file);
5302 return;
5303 case '_':
5304 /* Output the Embedded Medium/Anywhere code model base register. */
5305 fputs (EMBMEDANY_BASE_REG, file);
5306 return;
5307 case '@':
5308 /* Print out what we are using as the frame pointer. This might
5309 be %fp, or might be %sp+offset. */
5310 /* ??? What if offset is too big? Perhaps the caller knows it isn't? */
5311 fprintf (file, "%s+%d", frame_base_name, frame_base_offset);
5312 return;
5313 case 'Y':
5314 /* Adjust the operand to take into account a RESTORE operation. */
5315 if (GET_CODE (x) == CONST_INT)
5316 break;
5317 else if (GET_CODE (x) != REG)
5318 output_operand_lossage ("Invalid %%Y operand");
5319 else if (REGNO (x) < 8)
5320 fputs (reg_names[REGNO (x)], file);
5321 else if (REGNO (x) >= 24 && REGNO (x) < 32)
5322 fputs (reg_names[REGNO (x)-16], file);
5323 else
5324 output_operand_lossage ("Invalid %%Y operand");
5325 return;
5326 case 'L':
5327 /* Print out the low order register name of a register pair. */
5328 if (WORDS_BIG_ENDIAN)
5329 fputs (reg_names[REGNO (x)+1], file);
5330 else
5331 fputs (reg_names[REGNO (x)], file);
5332 return;
5333 case 'H':
5334 /* Print out the high order register name of a register pair. */
5335 if (WORDS_BIG_ENDIAN)
5336 fputs (reg_names[REGNO (x)], file);
5337 else
5338 fputs (reg_names[REGNO (x)+1], file);
5339 return;
5340 case 'R':
5341 /* Print out the second register name of a register pair or quad.
5342 I.e., R (%o0) => %o1. */
5343 fputs (reg_names[REGNO (x)+1], file);
5344 return;
5345 case 'S':
5346 /* Print out the third register name of a register quad.
5347 I.e., S (%o0) => %o2. */
5348 fputs (reg_names[REGNO (x)+2], file);
5349 return;
5350 case 'T':
5351 /* Print out the fourth register name of a register quad.
5352 I.e., T (%o0) => %o3. */
5353 fputs (reg_names[REGNO (x)+3], file);
5354 return;
5355 case 'x':
5356 /* Print a condition code register. */
5357 if (REGNO (x) == SPARC_ICC_REG)
5358 {
5359 /* We don't handle CC[X]_NOOVmode because they're not supposed
5360 to occur here. */
5361 if (GET_MODE (x) == CCmode)
5362 fputs ("%icc", file);
5363 else if (GET_MODE (x) == CCXmode)
5364 fputs ("%xcc", file);
5365 else
5366 abort ();
5367 }
5368 else
5369 /* %fccN register */
5370 fputs (reg_names[REGNO (x)], file);
5371 return;
5372 case 'm':
5373 /* Print the operand's address only. */
5374 output_address (XEXP (x, 0));
5375 return;
5376 case 'r':
5377 /* In this case we need a register. Use %g0 if the
5378 operand is const0_rtx. */
5379 if (x == const0_rtx
5380 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
5381 {
5382 fputs ("%g0", file);
5383 return;
5384 }
5385 else
5386 break;
5387
5388 case 'A':
5389 switch (GET_CODE (x))
5390 {
5391 case IOR: fputs ("or", file); break;
5392 case AND: fputs ("and", file); break;
5393 case XOR: fputs ("xor", file); break;
5394 default: output_operand_lossage ("Invalid %%A operand");
5395 }
5396 return;
5397
5398 case 'B':
5399 switch (GET_CODE (x))
5400 {
5401 case IOR: fputs ("orn", file); break;
5402 case AND: fputs ("andn", file); break;
5403 case XOR: fputs ("xnor", file); break;
5404 default: output_operand_lossage ("Invalid %%B operand");
5405 }
5406 return;
5407
5408 /* These are used by the conditional move instructions. */
5409 case 'c' :
5410 case 'C':
5411 {
5412 enum rtx_code rc = (code == 'c'
5413 ? reverse_condition (GET_CODE (x))
5414 : GET_CODE (x));
5415 switch (rc)
5416 {
5417 case NE: fputs ("ne", file); break;
5418 case EQ: fputs ("e", file); break;
5419 case GE: fputs ("ge", file); break;
5420 case GT: fputs ("g", file); break;
5421 case LE: fputs ("le", file); break;
5422 case LT: fputs ("l", file); break;
5423 case GEU: fputs ("geu", file); break;
5424 case GTU: fputs ("gu", file); break;
5425 case LEU: fputs ("leu", file); break;
5426 case LTU: fputs ("lu", file); break;
5427 default: output_operand_lossage (code == 'c'
5428 ? "Invalid %%c operand"
5429 : "Invalid %%C operand");
5430 }
5431 return;
5432 }
5433
5434 /* These are used by the movr instruction pattern. */
5435 case 'd':
5436 case 'D':
5437 {
5438 enum rtx_code rc = (code == 'd'
5439 ? reverse_condition (GET_CODE (x))
5440 : GET_CODE (x));
5441 switch (rc)
5442 {
5443 case NE: fputs ("ne", file); break;
5444 case EQ: fputs ("e", file); break;
5445 case GE: fputs ("gez", file); break;
5446 case LT: fputs ("lz", file); break;
5447 case LE: fputs ("lez", file); break;
5448 case GT: fputs ("gz", file); break;
5449 default: output_operand_lossage (code == 'd'
5450 ? "Invalid %%d operand"
5451 : "Invalid %%D operand");
5452 }
5453 return;
5454 }
5455
5456 case 'b':
5457 {
5458 /* Print a sign-extended character. */
5459 int i = INTVAL (x) & 0xff;
5460 if (i & 0x80)
5461 i |= 0xffffff00;
5462 fprintf (file, "%d", i);
5463 return;
5464 }
5465
5466 case 'f':
5467 /* Operand must be a MEM; write its address. */
5468 if (GET_CODE (x) != MEM)
5469 output_operand_lossage ("Invalid %%f operand");
5470 output_address (XEXP (x, 0));
5471 return;
5472
5473 case 0:
5474 /* Do nothing special. */
5475 break;
5476
5477 default:
5478 /* Undocumented flag. */
5479 output_operand_lossage ("invalid operand output code");
5480 }
5481
5482 if (GET_CODE (x) == REG)
5483 fputs (reg_names[REGNO (x)], file);
5484 else if (GET_CODE (x) == MEM)
5485 {
5486 fputc ('[', file);
5487 /* Poor Sun assembler doesn't understand absolute addressing. */
5488 if (CONSTANT_P (XEXP (x, 0))
5489 && ! TARGET_LIVE_G0)
5490 fputs ("%g0+", file);
5491 output_address (XEXP (x, 0));
5492 fputc (']', file);
5493 }
5494 else if (GET_CODE (x) == HIGH)
5495 {
5496 fputs ("%hi(", file);
5497 output_addr_const (file, XEXP (x, 0));
5498 fputc (')', file);
5499 }
5500 else if (GET_CODE (x) == LO_SUM)
5501 {
5502 print_operand (file, XEXP (x, 0), 0);
5503 if (TARGET_CM_MEDMID)
5504 fputs ("+%l44(", file);
5505 else
5506 fputs ("+%lo(", file);
5507 output_addr_const (file, XEXP (x, 1));
5508 fputc (')', file);
5509 }
5510 else if (GET_CODE (x) == CONST_DOUBLE
5511 && (GET_MODE (x) == VOIDmode
5512 || GET_MODE_CLASS (GET_MODE (x)) == MODE_INT))
5513 {
5514 if (CONST_DOUBLE_HIGH (x) == 0)
5515 fprintf (file, "%u", CONST_DOUBLE_LOW (x));
5516 else if (CONST_DOUBLE_HIGH (x) == -1
5517 && CONST_DOUBLE_LOW (x) < 0)
5518 fprintf (file, "%d", CONST_DOUBLE_LOW (x));
5519 else
5520 output_operand_lossage ("long long constant not a valid immediate operand");
5521 }
5522 else if (GET_CODE (x) == CONST_DOUBLE)
5523 output_operand_lossage ("floating point constant not a valid immediate operand");
5524 else { output_addr_const (file, x); }
5525 }
5526 \f
5527 /* This function outputs assembler code for VALUE to FILE, where VALUE is
5528 a 64 bit (DImode) value. */
5529
5530 /* ??? If there is a 64 bit counterpart to .word that the assembler
5531 understands, then using that would simply this code greatly. */
5532 /* ??? We only output .xword's for symbols and only then in environments
5533 where the assembler can handle them. */
5534
5535 void
5536 output_double_int (file, value)
5537 FILE *file;
5538 rtx value;
5539 {
5540 if (GET_CODE (value) == CONST_INT)
5541 {
5542 /* ??? This has endianness issues. */
5543 #if HOST_BITS_PER_WIDE_INT == 64
5544 HOST_WIDE_INT xword = INTVAL (value);
5545 HOST_WIDE_INT high, low;
5546
5547 high = (xword >> 32) & 0xffffffff;
5548 low = xword & 0xffffffff;
5549 ASM_OUTPUT_INT (file, GEN_INT (high));
5550 ASM_OUTPUT_INT (file, GEN_INT (low));
5551 #else
5552 if (INTVAL (value) < 0)
5553 ASM_OUTPUT_INT (file, constm1_rtx);
5554 else
5555 ASM_OUTPUT_INT (file, const0_rtx);
5556 ASM_OUTPUT_INT (file, value);
5557 #endif
5558 }
5559 else if (GET_CODE (value) == CONST_DOUBLE)
5560 {
5561 ASM_OUTPUT_INT (file, GEN_INT (CONST_DOUBLE_HIGH (value)));
5562 ASM_OUTPUT_INT (file, GEN_INT (CONST_DOUBLE_LOW (value)));
5563 }
5564 else if (GET_CODE (value) == SYMBOL_REF
5565 || GET_CODE (value) == CONST
5566 || GET_CODE (value) == PLUS
5567 || (TARGET_ARCH64 &&
5568 (GET_CODE (value) == LABEL_REF
5569 || GET_CODE (value) == CODE_LABEL
5570 || GET_CODE (value) == MINUS)))
5571 {
5572 if (! TARGET_V9)
5573 {
5574 ASM_OUTPUT_INT (file, const0_rtx);
5575 ASM_OUTPUT_INT (file, value);
5576 }
5577 else
5578 {
5579 fprintf (file, "\t%s\t", ASM_LONGLONG);
5580 output_addr_const (file, value);
5581 fprintf (file, "\n");
5582 }
5583 }
5584 else
5585 abort ();
5586 }
5587 \f
5588 /* Return the value of a code used in the .proc pseudo-op that says
5589 what kind of result this function returns. For non-C types, we pick
5590 the closest C type. */
5591
5592 #ifndef CHAR_TYPE_SIZE
5593 #define CHAR_TYPE_SIZE BITS_PER_UNIT
5594 #endif
5595
5596 #ifndef SHORT_TYPE_SIZE
5597 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
5598 #endif
5599
5600 #ifndef INT_TYPE_SIZE
5601 #define INT_TYPE_SIZE BITS_PER_WORD
5602 #endif
5603
5604 #ifndef LONG_TYPE_SIZE
5605 #define LONG_TYPE_SIZE BITS_PER_WORD
5606 #endif
5607
5608 #ifndef LONG_LONG_TYPE_SIZE
5609 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
5610 #endif
5611
5612 #ifndef FLOAT_TYPE_SIZE
5613 #define FLOAT_TYPE_SIZE BITS_PER_WORD
5614 #endif
5615
5616 #ifndef DOUBLE_TYPE_SIZE
5617 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
5618 #endif
5619
5620 #ifndef LONG_DOUBLE_TYPE_SIZE
5621 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
5622 #endif
5623
5624 unsigned long
5625 sparc_type_code (type)
5626 register tree type;
5627 {
5628 register unsigned long qualifiers = 0;
5629 register unsigned shift;
5630
5631 /* Only the first 30 bits of the qualifier are valid. We must refrain from
5632 setting more, since some assemblers will give an error for this. Also,
5633 we must be careful to avoid shifts of 32 bits or more to avoid getting
5634 unpredictable results. */
5635
5636 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
5637 {
5638 switch (TREE_CODE (type))
5639 {
5640 case ERROR_MARK:
5641 return qualifiers;
5642
5643 case ARRAY_TYPE:
5644 qualifiers |= (3 << shift);
5645 break;
5646
5647 case FUNCTION_TYPE:
5648 case METHOD_TYPE:
5649 qualifiers |= (2 << shift);
5650 break;
5651
5652 case POINTER_TYPE:
5653 case REFERENCE_TYPE:
5654 case OFFSET_TYPE:
5655 qualifiers |= (1 << shift);
5656 break;
5657
5658 case RECORD_TYPE:
5659 return (qualifiers | 8);
5660
5661 case UNION_TYPE:
5662 case QUAL_UNION_TYPE:
5663 return (qualifiers | 9);
5664
5665 case ENUMERAL_TYPE:
5666 return (qualifiers | 10);
5667
5668 case VOID_TYPE:
5669 return (qualifiers | 16);
5670
5671 case INTEGER_TYPE:
5672 /* If this is a range type, consider it to be the underlying
5673 type. */
5674 if (TREE_TYPE (type) != 0)
5675 break;
5676
5677 /* Carefully distinguish all the standard types of C,
5678 without messing up if the language is not C. We do this by
5679 testing TYPE_PRECISION and TREE_UNSIGNED. The old code used to
5680 look at both the names and the above fields, but that's redundant.
5681 Any type whose size is between two C types will be considered
5682 to be the wider of the two types. Also, we do not have a
5683 special code to use for "long long", so anything wider than
5684 long is treated the same. Note that we can't distinguish
5685 between "int" and "long" in this code if they are the same
5686 size, but that's fine, since neither can the assembler. */
5687
5688 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
5689 return (qualifiers | (TREE_UNSIGNED (type) ? 12 : 2));
5690
5691 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
5692 return (qualifiers | (TREE_UNSIGNED (type) ? 13 : 3));
5693
5694 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
5695 return (qualifiers | (TREE_UNSIGNED (type) ? 14 : 4));
5696
5697 else
5698 return (qualifiers | (TREE_UNSIGNED (type) ? 15 : 5));
5699
5700 case REAL_TYPE:
5701 /* If this is a range type, consider it to be the underlying
5702 type. */
5703 if (TREE_TYPE (type) != 0)
5704 break;
5705
5706 /* Carefully distinguish all the standard types of C,
5707 without messing up if the language is not C. */
5708
5709 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
5710 return (qualifiers | 6);
5711
5712 else
5713 return (qualifiers | 7);
5714
5715 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
5716 /* ??? We need to distinguish between double and float complex types,
5717 but I don't know how yet because I can't reach this code from
5718 existing front-ends. */
5719 return (qualifiers | 7); /* Who knows? */
5720
5721 case CHAR_TYPE: /* GNU Pascal CHAR type. Not used in C. */
5722 case BOOLEAN_TYPE: /* GNU Fortran BOOLEAN type. */
5723 case FILE_TYPE: /* GNU Pascal FILE type. */
5724 case SET_TYPE: /* GNU Pascal SET type. */
5725 case LANG_TYPE: /* ? */
5726 return qualifiers;
5727
5728 default:
5729 abort (); /* Not a type! */
5730 }
5731 }
5732
5733 return qualifiers;
5734 }
5735 \f
5736 /* Nested function support. */
5737
5738 /* Emit RTL insns to initialize the variable parts of a trampoline.
5739 FNADDR is an RTX for the address of the function's pure code.
5740 CXT is an RTX for the static chain value for the function.
5741
5742 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
5743 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
5744 (to store insns). This is a bit excessive. Perhaps a different
5745 mechanism would be better here.
5746
5747 Emit enough FLUSH insns to synchronize the data and instruction caches. */
5748
5749 void
5750 sparc_initialize_trampoline (tramp, fnaddr, cxt)
5751 rtx tramp, fnaddr, cxt;
5752 {
5753 /* SPARC 32 bit trampoline:
5754
5755 sethi %hi(fn), %g1
5756 sethi %hi(static), %g2
5757 jmp %g1+%lo(fn)
5758 or %g2, %lo(static), %g2
5759
5760 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
5761 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
5762 */
5763 #ifdef TRANSFER_FROM_TRAMPOLINE
5764 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
5765 0, VOIDmode, 1, tramp, Pmode);
5766 #endif
5767
5768 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 0)),
5769 expand_binop (SImode, ior_optab,
5770 expand_shift (RSHIFT_EXPR, SImode, fnaddr,
5771 size_int (10), 0, 1),
5772 GEN_INT (0x03000000),
5773 NULL_RTX, 1, OPTAB_DIRECT));
5774
5775 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
5776 expand_binop (SImode, ior_optab,
5777 expand_shift (RSHIFT_EXPR, SImode, cxt,
5778 size_int (10), 0, 1),
5779 GEN_INT (0x05000000),
5780 NULL_RTX, 1, OPTAB_DIRECT));
5781
5782 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
5783 expand_binop (SImode, ior_optab,
5784 expand_and (fnaddr, GEN_INT (0x3ff), NULL_RTX),
5785 GEN_INT (0x81c06000),
5786 NULL_RTX, 1, OPTAB_DIRECT));
5787
5788 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
5789 expand_binop (SImode, ior_optab,
5790 expand_and (cxt, GEN_INT (0x3ff), NULL_RTX),
5791 GEN_INT (0x8410a000),
5792 NULL_RTX, 1, OPTAB_DIRECT));
5793
5794 emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode, tramp))));
5795 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
5796 aligned on a 16 byte boundary so one flush clears it all. */
5797 if (sparc_cpu != PROCESSOR_ULTRASPARC)
5798 emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode,
5799 plus_constant (tramp, 8)))));
5800 }
5801
5802 /* The 64 bit version is simpler because it makes more sense to load the
5803 values as "immediate" data out of the trampoline. It's also easier since
5804 we can read the PC without clobbering a register. */
5805
5806 void
5807 sparc64_initialize_trampoline (tramp, fnaddr, cxt)
5808 rtx tramp, fnaddr, cxt;
5809 {
5810 #ifdef TRANSFER_FROM_TRAMPOLINE
5811 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
5812 0, VOIDmode, 1, tramp, Pmode);
5813 #endif
5814
5815 /*
5816 rd %pc, %g1
5817 ldx [%g1+24], %g5
5818 jmp %g5
5819 ldx [%g1+16], %g5
5820 +16 bytes data
5821 */
5822
5823 emit_move_insn (gen_rtx_MEM (SImode, tramp),
5824 GEN_INT (0x83414000));
5825 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
5826 GEN_INT (0xca586018));
5827 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
5828 GEN_INT (0x81c14000));
5829 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
5830 GEN_INT (0xca586010));
5831 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), cxt);
5832 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 24)), fnaddr);
5833 emit_insn (gen_flushdi (validize_mem (gen_rtx_MEM (DImode, tramp))));
5834
5835 if (sparc_cpu != PROCESSOR_ULTRASPARC)
5836 emit_insn (gen_flushdi (validize_mem (gen_rtx_MEM (DImode, plus_constant (tramp, 8)))));
5837 }
5838 \f
5839 /* Subroutines to support a flat (single) register window calling
5840 convention. */
5841
5842 /* Single-register window sparc stack frames look like:
5843
5844 Before call After call
5845 +-----------------------+ +-----------------------+
5846 high | | | |
5847 mem | caller's temps. | | caller's temps. |
5848 | | | |
5849 +-----------------------+ +-----------------------+
5850 | | | |
5851 | arguments on stack. | | arguments on stack. |
5852 | | | |
5853 +-----------------------+FP+92->+-----------------------+
5854 | 6 words to save | | 6 words to save |
5855 | arguments passed | | arguments passed |
5856 | in registers, even | | in registers, even |
5857 | if not passed. | | if not passed. |
5858 SP+68->+-----------------------+FP+68->+-----------------------+
5859 | 1 word struct addr | | 1 word struct addr |
5860 +-----------------------+FP+64->+-----------------------+
5861 | | | |
5862 | 16 word reg save area | | 16 word reg save area |
5863 | | | |
5864 SP->+-----------------------+ FP->+-----------------------+
5865 | 4 word area for |
5866 | fp/alu reg moves |
5867 FP-16->+-----------------------+
5868 | |
5869 | local variables |
5870 | |
5871 +-----------------------+
5872 | |
5873 | fp register save |
5874 | |
5875 +-----------------------+
5876 | |
5877 | gp register save |
5878 | |
5879 +-----------------------+
5880 | |
5881 | alloca allocations |
5882 | |
5883 +-----------------------+
5884 | |
5885 | arguments on stack |
5886 | |
5887 SP+92->+-----------------------+
5888 | 6 words to save |
5889 | arguments passed |
5890 | in registers, even |
5891 low | if not passed. |
5892 memory SP+68->+-----------------------+
5893 | 1 word struct addr |
5894 SP+64->+-----------------------+
5895 | |
5896 I 16 word reg save area |
5897 | |
5898 SP->+-----------------------+ */
5899
5900 /* Structure to be filled in by sparc_flat_compute_frame_size with register
5901 save masks, and offsets for the current function. */
5902
5903 struct sparc_frame_info
5904 {
5905 unsigned long total_size; /* # bytes that the entire frame takes up. */
5906 unsigned long var_size; /* # bytes that variables take up. */
5907 unsigned long args_size; /* # bytes that outgoing arguments take up. */
5908 unsigned long extra_size; /* # bytes of extra gunk. */
5909 unsigned int gp_reg_size; /* # bytes needed to store gp regs. */
5910 unsigned int fp_reg_size; /* # bytes needed to store fp regs. */
5911 unsigned long gmask; /* Mask of saved gp registers. */
5912 unsigned long fmask; /* Mask of saved fp registers. */
5913 unsigned long reg_offset; /* Offset from new sp to store regs. */
5914 int initialized; /* Nonzero if frame size already calculated. */
5915 };
5916
5917 /* Current frame information calculated by sparc_flat_compute_frame_size. */
5918 struct sparc_frame_info current_frame_info;
5919
5920 /* Zero structure to initialize current_frame_info. */
5921 struct sparc_frame_info zero_frame_info;
5922
5923 /* Tell prologue and epilogue if register REGNO should be saved / restored. */
5924
5925 #define RETURN_ADDR_REGNUM 15
5926 #define FRAME_POINTER_MASK (1 << (FRAME_POINTER_REGNUM))
5927 #define RETURN_ADDR_MASK (1 << (RETURN_ADDR_REGNUM))
5928
5929 #define MUST_SAVE_REGISTER(regno) \
5930 ((regs_ever_live[regno] && !call_used_regs[regno]) \
5931 || (regno == FRAME_POINTER_REGNUM && frame_pointer_needed) \
5932 || (regno == RETURN_ADDR_REGNUM && regs_ever_live[RETURN_ADDR_REGNUM]))
5933
5934 /* Return the bytes needed to compute the frame pointer from the current
5935 stack pointer. */
5936
5937 unsigned long
5938 sparc_flat_compute_frame_size (size)
5939 int size; /* # of var. bytes allocated. */
5940 {
5941 int regno;
5942 unsigned long total_size; /* # bytes that the entire frame takes up. */
5943 unsigned long var_size; /* # bytes that variables take up. */
5944 unsigned long args_size; /* # bytes that outgoing arguments take up. */
5945 unsigned long extra_size; /* # extra bytes. */
5946 unsigned int gp_reg_size; /* # bytes needed to store gp regs. */
5947 unsigned int fp_reg_size; /* # bytes needed to store fp regs. */
5948 unsigned long gmask; /* Mask of saved gp registers. */
5949 unsigned long fmask; /* Mask of saved fp registers. */
5950 unsigned long reg_offset; /* Offset to register save area. */
5951 int need_aligned_p; /* 1 if need the save area 8 byte aligned. */
5952
5953 /* This is the size of the 16 word reg save area, 1 word struct addr
5954 area, and 4 word fp/alu register copy area. */
5955 extra_size = -STARTING_FRAME_OFFSET + FIRST_PARM_OFFSET(0);
5956 var_size = size;
5957 gp_reg_size = 0;
5958 fp_reg_size = 0;
5959 gmask = 0;
5960 fmask = 0;
5961 reg_offset = 0;
5962 need_aligned_p = 0;
5963
5964 args_size = 0;
5965 if (!leaf_function_p ())
5966 {
5967 /* Also include the size needed for the 6 parameter registers. */
5968 args_size = current_function_outgoing_args_size + 24;
5969 }
5970 total_size = var_size + args_size;
5971
5972 /* Calculate space needed for gp registers. */
5973 for (regno = 1; regno <= 31; regno++)
5974 {
5975 if (MUST_SAVE_REGISTER (regno))
5976 {
5977 /* If we need to save two regs in a row, ensure there's room to bump
5978 up the address to align it to a doubleword boundary. */
5979 if ((regno & 0x1) == 0 && MUST_SAVE_REGISTER (regno+1))
5980 {
5981 if (gp_reg_size % 8 != 0)
5982 gp_reg_size += 4;
5983 gp_reg_size += 2 * UNITS_PER_WORD;
5984 gmask |= 3 << regno;
5985 regno++;
5986 need_aligned_p = 1;
5987 }
5988 else
5989 {
5990 gp_reg_size += UNITS_PER_WORD;
5991 gmask |= 1 << regno;
5992 }
5993 }
5994 }
5995
5996 /* Calculate space needed for fp registers. */
5997 for (regno = 32; regno <= 63; regno++)
5998 {
5999 if (regs_ever_live[regno] && !call_used_regs[regno])
6000 {
6001 fp_reg_size += UNITS_PER_WORD;
6002 fmask |= 1 << (regno - 32);
6003 }
6004 }
6005
6006 if (gmask || fmask)
6007 {
6008 int n;
6009 reg_offset = FIRST_PARM_OFFSET(0) + args_size;
6010 /* Ensure save area is 8 byte aligned if we need it. */
6011 n = reg_offset % 8;
6012 if (need_aligned_p && n != 0)
6013 {
6014 total_size += 8 - n;
6015 reg_offset += 8 - n;
6016 }
6017 total_size += gp_reg_size + fp_reg_size;
6018 }
6019
6020 /* If we must allocate a stack frame at all, we must also allocate
6021 room for register window spillage, so as to be binary compatible
6022 with libraries and operating systems that do not use -mflat. */
6023 if (total_size > 0)
6024 total_size += extra_size;
6025 else
6026 extra_size = 0;
6027
6028 total_size = SPARC_STACK_ALIGN (total_size);
6029
6030 /* Save other computed information. */
6031 current_frame_info.total_size = total_size;
6032 current_frame_info.var_size = var_size;
6033 current_frame_info.args_size = args_size;
6034 current_frame_info.extra_size = extra_size;
6035 current_frame_info.gp_reg_size = gp_reg_size;
6036 current_frame_info.fp_reg_size = fp_reg_size;
6037 current_frame_info.gmask = gmask;
6038 current_frame_info.fmask = fmask;
6039 current_frame_info.reg_offset = reg_offset;
6040 current_frame_info.initialized = reload_completed;
6041
6042 /* Ok, we're done. */
6043 return total_size;
6044 }
6045 \f
6046 /* Save/restore registers in GMASK and FMASK at register BASE_REG plus offset
6047 OFFSET.
6048
6049 BASE_REG must be 8 byte aligned. This allows us to test OFFSET for
6050 appropriate alignment and use DOUBLEWORD_OP when we can. We assume
6051 [BASE_REG+OFFSET] will always be a valid address.
6052
6053 WORD_OP is either "st" for save, "ld" for restore.
6054 DOUBLEWORD_OP is either "std" for save, "ldd" for restore. */
6055
6056 void
6057 sparc_flat_save_restore (file, base_reg, offset, gmask, fmask, word_op,
6058 doubleword_op, base_offset)
6059 FILE *file;
6060 const char *base_reg;
6061 unsigned int offset;
6062 unsigned long gmask;
6063 unsigned long fmask;
6064 const char *word_op;
6065 const char *doubleword_op;
6066 unsigned long base_offset;
6067 {
6068 int regno;
6069
6070 if (gmask == 0 && fmask == 0)
6071 return;
6072
6073 /* Save registers starting from high to low. We've already saved the
6074 previous frame pointer and previous return address for the debugger's
6075 sake. The debugger allows us to not need a nop in the epilog if at least
6076 one register is reloaded in addition to return address. */
6077
6078 if (gmask)
6079 {
6080 for (regno = 1; regno <= 31; regno++)
6081 {
6082 if ((gmask & (1L << regno)) != 0)
6083 {
6084 if ((regno & 0x1) == 0 && ((gmask & (1L << (regno+1))) != 0))
6085 {
6086 /* We can save two registers in a row. If we're not at a
6087 double word boundary, move to one.
6088 sparc_flat_compute_frame_size ensures there's room to do
6089 this. */
6090 if (offset % 8 != 0)
6091 offset += UNITS_PER_WORD;
6092
6093 if (word_op[0] == 's')
6094 {
6095 fprintf (file, "\t%s\t%s, [%s+%d]\n",
6096 doubleword_op, reg_names[regno],
6097 base_reg, offset);
6098 if (dwarf2out_do_frame ())
6099 {
6100 char *l = dwarf2out_cfi_label ();
6101 dwarf2out_reg_save (l, regno, offset + base_offset);
6102 dwarf2out_reg_save
6103 (l, regno+1, offset+base_offset + UNITS_PER_WORD);
6104 }
6105 }
6106 else
6107 fprintf (file, "\t%s\t[%s+%d], %s\n",
6108 doubleword_op, base_reg, offset,
6109 reg_names[regno]);
6110
6111 offset += 2 * UNITS_PER_WORD;
6112 regno++;
6113 }
6114 else
6115 {
6116 if (word_op[0] == 's')
6117 {
6118 fprintf (file, "\t%s\t%s, [%s+%d]\n",
6119 word_op, reg_names[regno],
6120 base_reg, offset);
6121 if (dwarf2out_do_frame ())
6122 dwarf2out_reg_save ("", regno, offset + base_offset);
6123 }
6124 else
6125 fprintf (file, "\t%s\t[%s+%d], %s\n",
6126 word_op, base_reg, offset, reg_names[regno]);
6127
6128 offset += UNITS_PER_WORD;
6129 }
6130 }
6131 }
6132 }
6133
6134 if (fmask)
6135 {
6136 for (regno = 32; regno <= 63; regno++)
6137 {
6138 if ((fmask & (1L << (regno - 32))) != 0)
6139 {
6140 if (word_op[0] == 's')
6141 {
6142 fprintf (file, "\t%s\t%s, [%s+%d]\n",
6143 word_op, reg_names[regno],
6144 base_reg, offset);
6145 if (dwarf2out_do_frame ())
6146 dwarf2out_reg_save ("", regno, offset + base_offset);
6147 }
6148 else
6149 fprintf (file, "\t%s\t[%s+%d], %s\n",
6150 word_op, base_reg, offset, reg_names[regno]);
6151
6152 offset += UNITS_PER_WORD;
6153 }
6154 }
6155 }
6156 }
6157 \f
6158 /* Set up the stack and frame (if desired) for the function. */
6159
6160 void
6161 sparc_flat_output_function_prologue (file, size)
6162 FILE *file;
6163 int size;
6164 {
6165 const char *sp_str = reg_names[STACK_POINTER_REGNUM];
6166 unsigned long gmask = current_frame_info.gmask;
6167
6168 sparc_output_scratch_registers (file);
6169
6170 /* This is only for the human reader. */
6171 fprintf (file, "\t%s#PROLOGUE# 0\n", ASM_COMMENT_START);
6172 fprintf (file, "\t%s# vars= %ld, regs= %d/%d, args= %d, extra= %ld\n",
6173 ASM_COMMENT_START,
6174 current_frame_info.var_size,
6175 current_frame_info.gp_reg_size / 4,
6176 current_frame_info.fp_reg_size / 4,
6177 current_function_outgoing_args_size,
6178 current_frame_info.extra_size);
6179
6180 size = SPARC_STACK_ALIGN (size);
6181 size = (! current_frame_info.initialized
6182 ? sparc_flat_compute_frame_size (size)
6183 : current_frame_info.total_size);
6184
6185 /* These cases shouldn't happen. Catch them now. */
6186 if (size == 0 && (gmask || current_frame_info.fmask))
6187 abort ();
6188
6189 /* Allocate our stack frame by decrementing %sp.
6190 At present, the only algorithm gdb can use to determine if this is a
6191 flat frame is if we always set %i7 if we set %sp. This can be optimized
6192 in the future by putting in some sort of debugging information that says
6193 this is a `flat' function. However, there is still the case of debugging
6194 code without such debugging information (including cases where most fns
6195 have such info, but there is one that doesn't). So, always do this now
6196 so we don't get a lot of code out there that gdb can't handle.
6197 If the frame pointer isn't needn't then that's ok - gdb won't be able to
6198 distinguish us from a non-flat function but there won't (and shouldn't)
6199 be any differences anyway. The return pc is saved (if necessary) right
6200 after %i7 so gdb won't have to look too far to find it. */
6201 if (size > 0)
6202 {
6203 unsigned int reg_offset = current_frame_info.reg_offset;
6204 const char *fp_str = reg_names[FRAME_POINTER_REGNUM];
6205 const char *t1_str = "%g1";
6206
6207 /* Things get a little tricky if local variables take up more than ~4096
6208 bytes and outgoing arguments take up more than ~4096 bytes. When that
6209 happens, the register save area can't be accessed from either end of
6210 the frame. Handle this by decrementing %sp to the start of the gp
6211 register save area, save the regs, update %i7, and then set %sp to its
6212 final value. Given that we only have one scratch register to play
6213 with it is the cheapest solution, and it helps gdb out as it won't
6214 slow down recognition of flat functions.
6215 Don't change the order of insns emitted here without checking with
6216 the gdb folk first. */
6217
6218 /* Is the entire register save area offsettable from %sp? */
6219 if (reg_offset < 4096 - 64 * UNITS_PER_WORD)
6220 {
6221 if (size <= 4096)
6222 {
6223 fprintf (file, "\tadd\t%s, %d, %s\n",
6224 sp_str, -size, sp_str);
6225 if (gmask & FRAME_POINTER_MASK)
6226 {
6227 fprintf (file, "\tst\t%s, [%s+%d]\n",
6228 fp_str, sp_str, reg_offset);
6229 fprintf (file, "\tsub\t%s, %d, %s\t%s# set up frame pointer\n",
6230 sp_str, -size, fp_str, ASM_COMMENT_START);
6231 reg_offset += 4;
6232 }
6233 }
6234 else
6235 {
6236 fprintf (file, "\tset\t%d, %s\n\tsub\t%s, %s, %s\n",
6237 size, t1_str, sp_str, t1_str, sp_str);
6238 if (gmask & FRAME_POINTER_MASK)
6239 {
6240 fprintf (file, "\tst\t%s, [%s+%d]\n",
6241 fp_str, sp_str, reg_offset);
6242 fprintf (file, "\tadd\t%s, %s, %s\t%s# set up frame pointer\n",
6243 sp_str, t1_str, fp_str, ASM_COMMENT_START);
6244 reg_offset += 4;
6245 }
6246 }
6247 if (dwarf2out_do_frame ())
6248 {
6249 char *l = dwarf2out_cfi_label ();
6250 if (gmask & FRAME_POINTER_MASK)
6251 {
6252 dwarf2out_reg_save (l, FRAME_POINTER_REGNUM,
6253 reg_offset - 4 - size);
6254 dwarf2out_def_cfa (l, FRAME_POINTER_REGNUM, 0);
6255 }
6256 else
6257 dwarf2out_def_cfa (l, STACK_POINTER_REGNUM, size);
6258 }
6259 if (gmask & RETURN_ADDR_MASK)
6260 {
6261 fprintf (file, "\tst\t%s, [%s+%d]\n",
6262 reg_names[RETURN_ADDR_REGNUM], sp_str, reg_offset);
6263 if (dwarf2out_do_frame ())
6264 dwarf2out_return_save ("", reg_offset - size);
6265 reg_offset += 4;
6266 }
6267 sparc_flat_save_restore (file, sp_str, reg_offset,
6268 gmask & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK),
6269 current_frame_info.fmask,
6270 "st", "std", -size);
6271 }
6272 else
6273 {
6274 /* Subtract %sp in two steps, but make sure there is always a
6275 64 byte register save area, and %sp is properly aligned. */
6276 /* Amount to decrement %sp by, the first time. */
6277 unsigned int size1 = ((size - reg_offset + 64) + 15) & -16;
6278 /* Offset to register save area from %sp. */
6279 unsigned int offset = size1 - (size - reg_offset);
6280
6281 if (size1 <= 4096)
6282 {
6283 fprintf (file, "\tadd\t%s, %d, %s\n",
6284 sp_str, -size1, sp_str);
6285 if (gmask & FRAME_POINTER_MASK)
6286 {
6287 fprintf (file, "\tst\t%s, [%s+%d]\n\tsub\t%s, %d, %s\t%s# set up frame pointer\n",
6288 fp_str, sp_str, offset, sp_str, -size1, fp_str,
6289 ASM_COMMENT_START);
6290 offset += 4;
6291 }
6292 }
6293 else
6294 {
6295 fprintf (file, "\tset\t%d, %s\n\tsub\t%s, %s, %s\n",
6296 size1, t1_str, sp_str, t1_str, sp_str);
6297 if (gmask & FRAME_POINTER_MASK)
6298 {
6299 fprintf (file, "\tst\t%s, [%s+%d]\n\tadd\t%s, %s, %s\t%s# set up frame pointer\n",
6300 fp_str, sp_str, offset, sp_str, t1_str, fp_str,
6301 ASM_COMMENT_START);
6302 offset += 4;
6303 }
6304 }
6305 if (dwarf2out_do_frame ())
6306 {
6307 char *l = dwarf2out_cfi_label ();
6308 if (gmask & FRAME_POINTER_MASK)
6309 {
6310 dwarf2out_reg_save (l, FRAME_POINTER_REGNUM,
6311 offset - 4 - size1);
6312 dwarf2out_def_cfa (l, FRAME_POINTER_REGNUM, 0);
6313 }
6314 else
6315 dwarf2out_def_cfa (l, STACK_POINTER_REGNUM, size1);
6316 }
6317 if (gmask & RETURN_ADDR_MASK)
6318 {
6319 fprintf (file, "\tst\t%s, [%s+%d]\n",
6320 reg_names[RETURN_ADDR_REGNUM], sp_str, offset);
6321 if (dwarf2out_do_frame ())
6322 /* offset - size1 == reg_offset - size
6323 if reg_offset were updated above like offset. */
6324 dwarf2out_return_save ("", offset - size1);
6325 offset += 4;
6326 }
6327 sparc_flat_save_restore (file, sp_str, offset,
6328 gmask & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK),
6329 current_frame_info.fmask,
6330 "st", "std", -size1);
6331 fprintf (file, "\tset\t%d, %s\n\tsub\t%s, %s, %s\n",
6332 size - size1, t1_str, sp_str, t1_str, sp_str);
6333 if (dwarf2out_do_frame ())
6334 if (! (gmask & FRAME_POINTER_MASK))
6335 dwarf2out_def_cfa ("", STACK_POINTER_REGNUM, size);
6336 }
6337 }
6338
6339 fprintf (file, "\t%s#PROLOGUE# 1\n", ASM_COMMENT_START);
6340 }
6341 \f
6342 /* Do any necessary cleanup after a function to restore stack, frame,
6343 and regs. */
6344
6345 void
6346 sparc_flat_output_function_epilogue (file, size)
6347 FILE *file;
6348 int size;
6349 {
6350 rtx epilogue_delay = current_function_epilogue_delay_list;
6351 int noepilogue = FALSE;
6352
6353 /* This is only for the human reader. */
6354 fprintf (file, "\t%s#EPILOGUE#\n", ASM_COMMENT_START);
6355
6356 /* The epilogue does not depend on any registers, but the stack
6357 registers, so we assume that if we have 1 pending nop, it can be
6358 ignored, and 2 it must be filled (2 nops occur for integer
6359 multiply and divide). */
6360
6361 size = SPARC_STACK_ALIGN (size);
6362 size = (!current_frame_info.initialized
6363 ? sparc_flat_compute_frame_size (size)
6364 : current_frame_info.total_size);
6365
6366 if (size == 0 && epilogue_delay == 0)
6367 {
6368 rtx insn = get_last_insn ();
6369
6370 /* If the last insn was a BARRIER, we don't have to write any code
6371 because a jump (aka return) was put there. */
6372 if (GET_CODE (insn) == NOTE)
6373 insn = prev_nonnote_insn (insn);
6374 if (insn && GET_CODE (insn) == BARRIER)
6375 noepilogue = TRUE;
6376 }
6377
6378 if (!noepilogue)
6379 {
6380 unsigned int reg_offset = current_frame_info.reg_offset;
6381 unsigned int size1;
6382 const char *sp_str = reg_names[STACK_POINTER_REGNUM];
6383 const char *fp_str = reg_names[FRAME_POINTER_REGNUM];
6384 const char *t1_str = "%g1";
6385
6386 /* In the reload sequence, we don't need to fill the load delay
6387 slots for most of the loads, also see if we can fill the final
6388 delay slot if not otherwise filled by the reload sequence. */
6389
6390 if (size > 4095)
6391 fprintf (file, "\tset\t%d, %s\n", size, t1_str);
6392
6393 if (frame_pointer_needed)
6394 {
6395 if (size > 4095)
6396 fprintf (file,"\tsub\t%s, %s, %s\t\t%s# sp not trusted here\n",
6397 fp_str, t1_str, sp_str, ASM_COMMENT_START);
6398 else
6399 fprintf (file,"\tsub\t%s, %d, %s\t\t%s# sp not trusted here\n",
6400 fp_str, size, sp_str, ASM_COMMENT_START);
6401 }
6402
6403 /* Is the entire register save area offsettable from %sp? */
6404 if (reg_offset < 4096 - 64 * UNITS_PER_WORD)
6405 {
6406 size1 = 0;
6407 }
6408 else
6409 {
6410 /* Restore %sp in two steps, but make sure there is always a
6411 64 byte register save area, and %sp is properly aligned. */
6412 /* Amount to increment %sp by, the first time. */
6413 size1 = ((reg_offset - 64 - 16) + 15) & -16;
6414 /* Offset to register save area from %sp. */
6415 reg_offset = size1 - reg_offset;
6416
6417 fprintf (file, "\tset\t%d, %s\n\tadd\t%s, %s, %s\n",
6418 size1, t1_str, sp_str, t1_str, sp_str);
6419 }
6420
6421 /* We must restore the frame pointer and return address reg first
6422 because they are treated specially by the prologue output code. */
6423 if (current_frame_info.gmask & FRAME_POINTER_MASK)
6424 {
6425 fprintf (file, "\tld\t[%s+%d], %s\n",
6426 sp_str, reg_offset, fp_str);
6427 reg_offset += 4;
6428 }
6429 if (current_frame_info.gmask & RETURN_ADDR_MASK)
6430 {
6431 fprintf (file, "\tld\t[%s+%d], %s\n",
6432 sp_str, reg_offset, reg_names[RETURN_ADDR_REGNUM]);
6433 reg_offset += 4;
6434 }
6435
6436 /* Restore any remaining saved registers. */
6437 sparc_flat_save_restore (file, sp_str, reg_offset,
6438 current_frame_info.gmask & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK),
6439 current_frame_info.fmask,
6440 "ld", "ldd", 0);
6441
6442 /* If we had to increment %sp in two steps, record it so the second
6443 restoration in the epilogue finishes up. */
6444 if (size1 > 0)
6445 {
6446 size -= size1;
6447 if (size > 4095)
6448 fprintf (file, "\tset\t%d, %s\n",
6449 size, t1_str);
6450 }
6451
6452 if (current_function_returns_struct)
6453 fprintf (file, "\tjmp\t%%o7+12\n");
6454 else
6455 fprintf (file, "\tretl\n");
6456
6457 /* If the only register saved is the return address, we need a
6458 nop, unless we have an instruction to put into it. Otherwise
6459 we don't since reloading multiple registers doesn't reference
6460 the register being loaded. */
6461
6462 if (epilogue_delay)
6463 {
6464 if (size)
6465 abort ();
6466 final_scan_insn (XEXP (epilogue_delay, 0), file, 1, -2, 1);
6467 }
6468
6469 else if (size > 4095)
6470 fprintf (file, "\tadd\t%s, %s, %s\n", sp_str, t1_str, sp_str);
6471
6472 else if (size > 0)
6473 fprintf (file, "\tadd\t%s, %d, %s\n", sp_str, size, sp_str);
6474
6475 else
6476 fprintf (file, "\tnop\n");
6477 }
6478
6479 /* Reset state info for each function. */
6480 current_frame_info = zero_frame_info;
6481
6482 sparc_output_deferred_case_vectors ();
6483 }
6484 \f
6485 /* Define the number of delay slots needed for the function epilogue.
6486
6487 On the sparc, we need a slot if either no stack has been allocated,
6488 or the only register saved is the return register. */
6489
6490 int
6491 sparc_flat_epilogue_delay_slots ()
6492 {
6493 if (!current_frame_info.initialized)
6494 (void) sparc_flat_compute_frame_size (get_frame_size ());
6495
6496 if (current_frame_info.total_size == 0)
6497 return 1;
6498
6499 return 0;
6500 }
6501
6502 /* Return true is TRIAL is a valid insn for the epilogue delay slot.
6503 Any single length instruction which doesn't reference the stack or frame
6504 pointer is OK. */
6505
6506 int
6507 sparc_flat_eligible_for_epilogue_delay (trial, slot)
6508 rtx trial;
6509 int slot ATTRIBUTE_UNUSED;
6510 {
6511 rtx pat = PATTERN (trial);
6512
6513 if (get_attr_length (trial) != 1)
6514 return 0;
6515
6516 /* If %g0 is live, there are lots of things we can't handle.
6517 Rather than trying to find them all now, let's punt and only
6518 optimize things as necessary. */
6519 if (TARGET_LIVE_G0)
6520 return 0;
6521
6522 if (! reg_mentioned_p (stack_pointer_rtx, pat)
6523 && ! reg_mentioned_p (frame_pointer_rtx, pat))
6524 return 1;
6525
6526 return 0;
6527 }
6528 \f
6529 /* Adjust the cost of a scheduling dependency. Return the new cost of
6530 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
6531
6532 static int
6533 supersparc_adjust_cost (insn, link, dep_insn, cost)
6534 rtx insn;
6535 rtx link;
6536 rtx dep_insn;
6537 int cost;
6538 {
6539 enum attr_type insn_type;
6540
6541 if (! recog_memoized (insn))
6542 return 0;
6543
6544 insn_type = get_attr_type (insn);
6545
6546 if (REG_NOTE_KIND (link) == 0)
6547 {
6548 /* Data dependency; DEP_INSN writes a register that INSN reads some
6549 cycles later. */
6550
6551 /* if a load, then the dependence must be on the memory address;
6552 add an extra "cycle". Note that the cost could be two cycles
6553 if the reg was written late in an instruction group; we ca not tell
6554 here. */
6555 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
6556 return cost + 3;
6557
6558 /* Get the delay only if the address of the store is the dependence. */
6559 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
6560 {
6561 rtx pat = PATTERN(insn);
6562 rtx dep_pat = PATTERN (dep_insn);
6563
6564 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
6565 return cost; /* This should not happen! */
6566
6567 /* The dependency between the two instructions was on the data that
6568 is being stored. Assume that this implies that the address of the
6569 store is not dependent. */
6570 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
6571 return cost;
6572
6573 return cost + 3; /* An approximation. */
6574 }
6575
6576 /* A shift instruction cannot receive its data from an instruction
6577 in the same cycle; add a one cycle penalty. */
6578 if (insn_type == TYPE_SHIFT)
6579 return cost + 3; /* Split before cascade into shift. */
6580 }
6581 else
6582 {
6583 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
6584 INSN writes some cycles later. */
6585
6586 /* These are only significant for the fpu unit; writing a fp reg before
6587 the fpu has finished with it stalls the processor. */
6588
6589 /* Reusing an integer register causes no problems. */
6590 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
6591 return 0;
6592 }
6593
6594 return cost;
6595 }
6596
6597 static int
6598 hypersparc_adjust_cost (insn, link, dep_insn, cost)
6599 rtx insn;
6600 rtx link;
6601 rtx dep_insn;
6602 int cost;
6603 {
6604 enum attr_type insn_type, dep_type;
6605 rtx pat = PATTERN(insn);
6606 rtx dep_pat = PATTERN (dep_insn);
6607
6608 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
6609 return cost;
6610
6611 insn_type = get_attr_type (insn);
6612 dep_type = get_attr_type (dep_insn);
6613
6614 switch (REG_NOTE_KIND (link))
6615 {
6616 case 0:
6617 /* Data dependency; DEP_INSN writes a register that INSN reads some
6618 cycles later. */
6619
6620 switch (insn_type)
6621 {
6622 case TYPE_STORE:
6623 case TYPE_FPSTORE:
6624 /* Get the delay iff the address of the store is the dependence. */
6625 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
6626 return cost;
6627
6628 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
6629 return cost;
6630 return cost + 3;
6631
6632 case TYPE_LOAD:
6633 case TYPE_SLOAD:
6634 case TYPE_FPLOAD:
6635 /* If a load, then the dependence must be on the memory address. If
6636 the addresses aren't equal, then it might be a false dependency */
6637 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
6638 {
6639 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
6640 || GET_CODE (SET_DEST (dep_pat)) != MEM
6641 || GET_CODE (SET_SRC (pat)) != MEM
6642 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
6643 XEXP (SET_SRC (pat), 0)))
6644 return cost + 2;
6645
6646 return cost + 8;
6647 }
6648 break;
6649
6650 case TYPE_BRANCH:
6651 /* Compare to branch latency is 0. There is no benefit from
6652 separating compare and branch. */
6653 if (dep_type == TYPE_COMPARE)
6654 return 0;
6655 /* Floating point compare to branch latency is less than
6656 compare to conditional move. */
6657 if (dep_type == TYPE_FPCMP)
6658 return cost - 1;
6659 break;
6660 default:
6661 break;
6662 }
6663 break;
6664
6665 case REG_DEP_ANTI:
6666 /* Anti-dependencies only penalize the fpu unit. */
6667 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
6668 return 0;
6669 break;
6670
6671 default:
6672 break;
6673 }
6674
6675 return cost;
6676 }
6677
6678 static int
6679 ultrasparc_adjust_cost (insn, link, dep_insn, cost)
6680 rtx insn;
6681 rtx link;
6682 rtx dep_insn;
6683 int cost;
6684 {
6685 enum attr_type insn_type, dep_type;
6686 rtx pat = PATTERN(insn);
6687 rtx dep_pat = PATTERN (dep_insn);
6688
6689 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
6690 return cost;
6691
6692 insn_type = get_attr_type (insn);
6693 dep_type = get_attr_type (dep_insn);
6694
6695 /* Nothing issues in parallel with integer multiplies, so
6696 mark as zero cost since the scheduler can not do anything
6697 about it. */
6698 if (insn_type == TYPE_IMUL)
6699 return 0;
6700
6701 #define SLOW_FP(dep_type) \
6702 (dep_type == TYPE_FPSQRTS || dep_type == TYPE_FPSQRTD || \
6703 dep_type == TYPE_FPDIVS || dep_type == TYPE_FPDIVD)
6704
6705 switch (REG_NOTE_KIND (link))
6706 {
6707 case 0:
6708 /* Data dependency; DEP_INSN writes a register that INSN reads some
6709 cycles later. */
6710
6711 if (dep_type == TYPE_CMOVE)
6712 {
6713 /* Instructions that read the result of conditional moves cannot
6714 be in the same group or the following group. */
6715 return cost + 1;
6716 }
6717
6718 switch (insn_type)
6719 {
6720 /* UltraSPARC can dual issue a store and an instruction setting
6721 the value stored, except for divide and square root. */
6722 case TYPE_FPSTORE:
6723 if (! SLOW_FP (dep_type))
6724 return 0;
6725 return cost;
6726
6727 case TYPE_STORE:
6728 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
6729 return cost;
6730
6731 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
6732 /* The dependency between the two instructions is on the data
6733 that is being stored. Assume that the address of the store
6734 is not also dependent. */
6735 return 0;
6736 return cost;
6737
6738 case TYPE_LOAD:
6739 case TYPE_SLOAD:
6740 case TYPE_FPLOAD:
6741 /* A load does not return data until at least 11 cycles after
6742 a store to the same location. 3 cycles are accounted for
6743 in the load latency; add the other 8 here. */
6744 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
6745 {
6746 /* If the addresses are not equal this may be a false
6747 dependency because pointer aliasing could not be
6748 determined. Add only 2 cycles in that case. 2 is
6749 an arbitrary compromise between 8, which would cause
6750 the scheduler to generate worse code elsewhere to
6751 compensate for a dependency which might not really
6752 exist, and 0. */
6753 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
6754 || GET_CODE (SET_SRC (pat)) != MEM
6755 || GET_CODE (SET_DEST (dep_pat)) != MEM
6756 || ! rtx_equal_p (XEXP (SET_SRC (pat), 0),
6757 XEXP (SET_DEST (dep_pat), 0)))
6758 return cost + 2;
6759
6760 return cost + 8;
6761 }
6762 return cost;
6763
6764 case TYPE_BRANCH:
6765 /* Compare to branch latency is 0. There is no benefit from
6766 separating compare and branch. */
6767 if (dep_type == TYPE_COMPARE)
6768 return 0;
6769 /* Floating point compare to branch latency is less than
6770 compare to conditional move. */
6771 if (dep_type == TYPE_FPCMP)
6772 return cost - 1;
6773 return cost;
6774
6775 case TYPE_FPCMOVE:
6776 /* FMOVR class instructions can not issue in the same cycle
6777 or the cycle after an instruction which writes any
6778 integer register. Model this as cost 2 for dependent
6779 instructions. */
6780 if ((dep_type == TYPE_IALU || dep_type == TYPE_UNARY
6781 || dep_type == TYPE_BINARY)
6782 && cost < 2)
6783 return 2;
6784 /* Otherwise check as for integer conditional moves. */
6785
6786 case TYPE_CMOVE:
6787 /* Conditional moves involving integer registers wait until
6788 3 cycles after loads return data. The interlock applies
6789 to all loads, not just dependent loads, but that is hard
6790 to model. */
6791 if (dep_type == TYPE_LOAD || dep_type == TYPE_SLOAD)
6792 return cost + 3;
6793 return cost;
6794
6795 default:
6796 break;
6797 }
6798 break;
6799
6800 case REG_DEP_ANTI:
6801 /* Divide and square root lock destination registers for full latency. */
6802 if (! SLOW_FP (dep_type))
6803 return 0;
6804 break;
6805
6806 case REG_DEP_OUTPUT:
6807 /* IEU and FPU instruction that have the same destination
6808 register cannot be grouped together. */
6809 return cost + 1;
6810
6811 default:
6812 break;
6813 }
6814
6815 /* Other costs not accounted for:
6816 - Single precision floating point loads lock the other half of
6817 the even/odd register pair.
6818 - Several hazards associated with ldd/std are ignored because these
6819 instructions are rarely generated for V9.
6820 - The floating point pipeline can not have both a single and double
6821 precision operation active at the same time. Format conversions
6822 and graphics instructions are given honorary double precision status.
6823 - call and jmpl are always the first instruction in a group. */
6824
6825 return cost;
6826
6827 #undef SLOW_FP
6828 }
6829
6830 int
6831 sparc_adjust_cost(insn, link, dep, cost)
6832 rtx insn;
6833 rtx link;
6834 rtx dep;
6835 int cost;
6836 {
6837 switch (sparc_cpu)
6838 {
6839 case PROCESSOR_SUPERSPARC:
6840 cost = supersparc_adjust_cost (insn, link, dep, cost);
6841 break;
6842 case PROCESSOR_HYPERSPARC:
6843 case PROCESSOR_SPARCLITE86X:
6844 cost = hypersparc_adjust_cost (insn, link, dep, cost);
6845 break;
6846 case PROCESSOR_ULTRASPARC:
6847 cost = ultrasparc_adjust_cost (insn, link, dep, cost);
6848 break;
6849 default:
6850 break;
6851 }
6852 return cost;
6853 }
6854
6855 /* This describes the state of the UltraSPARC pipeline during
6856 instruction scheduling. */
6857
6858 #define TMASK(__x) ((unsigned)1 << ((int)(__x)))
6859 #define UMASK(__x) ((unsigned)1 << ((int)(__x)))
6860
6861 enum ultra_code { NONE=0, /* no insn at all */
6862 IEU0, /* shifts and conditional moves */
6863 IEU1, /* condition code setting insns, calls+jumps */
6864 IEUN, /* all other single cycle ieu insns */
6865 LSU, /* loads and stores */
6866 CTI, /* branches */
6867 FPM, /* FPU pipeline 1, multiplies and divides */
6868 FPA, /* FPU pipeline 2, all other operations */
6869 SINGLE, /* single issue instructions */
6870 NUM_ULTRA_CODES };
6871
6872 static enum ultra_code ultra_code_from_mask PROTO ((int));
6873 static void ultra_schedule_insn PROTO ((rtx *, rtx *, int, enum ultra_code));
6874
6875 static const char *ultra_code_names[NUM_ULTRA_CODES] = {
6876 "NONE", "IEU0", "IEU1", "IEUN", "LSU", "CTI",
6877 "FPM", "FPA", "SINGLE" };
6878
6879 struct ultrasparc_pipeline_state {
6880 /* The insns in this group. */
6881 rtx group[4];
6882
6883 /* The code for each insn. */
6884 enum ultra_code codes[4];
6885
6886 /* Which insns in this group have been committed by the
6887 scheduler. This is how we determine how many more
6888 can issue this cycle. */
6889 char commit[4];
6890
6891 /* How many insns in this group. */
6892 char group_size;
6893
6894 /* Mask of free slots still in this group. */
6895 char free_slot_mask;
6896
6897 /* The slotter uses the following to determine what other
6898 insn types can still make their way into this group. */
6899 char contents [NUM_ULTRA_CODES];
6900 char num_ieu_insns;
6901 };
6902
6903 #define ULTRA_NUM_HIST 8
6904 static struct ultrasparc_pipeline_state ultra_pipe_hist[ULTRA_NUM_HIST];
6905 static int ultra_cur_hist;
6906 static int ultra_cycles_elapsed;
6907
6908 #define ultra_pipe (ultra_pipe_hist[ultra_cur_hist])
6909
6910 /* Given TYPE_MASK compute the ultra_code it has. */
6911 static enum ultra_code
6912 ultra_code_from_mask (type_mask)
6913 int type_mask;
6914 {
6915 if (type_mask & (TMASK (TYPE_SHIFT) | TMASK (TYPE_CMOVE)))
6916 return IEU0;
6917 else if (type_mask & (TMASK (TYPE_COMPARE) |
6918 TMASK (TYPE_CALL) |
6919 TMASK (TYPE_UNCOND_BRANCH)))
6920 return IEU1;
6921 else if (type_mask & (TMASK (TYPE_IALU) | TMASK (TYPE_BINARY) |
6922 TMASK (TYPE_MOVE) | TMASK (TYPE_UNARY)))
6923 return IEUN;
6924 else if (type_mask & (TMASK (TYPE_LOAD) | TMASK (TYPE_SLOAD) |
6925 TMASK (TYPE_STORE) | TMASK (TYPE_FPLOAD) |
6926 TMASK (TYPE_FPSTORE)))
6927 return LSU;
6928 else if (type_mask & (TMASK (TYPE_FPMUL) | TMASK (TYPE_FPDIVS) |
6929 TMASK (TYPE_FPDIVD) | TMASK (TYPE_FPSQRTS) |
6930 TMASK (TYPE_FPSQRTD)))
6931 return FPM;
6932 else if (type_mask & (TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPCMOVE) |
6933 TMASK (TYPE_FP) | TMASK (TYPE_FPCMP)))
6934 return FPA;
6935 else if (type_mask & TMASK (TYPE_BRANCH))
6936 return CTI;
6937
6938 return SINGLE;
6939 }
6940
6941 /* Check INSN (a conditional move) and make sure that it's
6942 results are available at this cycle. Return 1 if the
6943 results are in fact ready. */
6944 static int
6945 ultra_cmove_results_ready_p (insn)
6946 rtx insn;
6947 {
6948 struct ultrasparc_pipeline_state *up;
6949 int entry, slot;
6950
6951 /* If this got dispatched in the previous
6952 group, the results are not ready. */
6953 entry = (ultra_cur_hist - 1) & (ULTRA_NUM_HIST - 1);
6954 up = &ultra_pipe_hist[entry];
6955 slot = 4;
6956 while (--slot >= 0)
6957 if (up->group[slot] == insn)
6958 return 0;
6959
6960 return 1;
6961 }
6962
6963 /* Walk backwards in pipeline history looking for FPU
6964 operations which use a mode different than FPMODE and
6965 will create a stall if an insn using FPMODE were to be
6966 dispatched this cycle. */
6967 static int
6968 ultra_fpmode_conflict_exists (fpmode)
6969 enum machine_mode fpmode;
6970 {
6971 int hist_ent;
6972 int hist_lim;
6973
6974 hist_ent = (ultra_cur_hist - 1) & (ULTRA_NUM_HIST - 1);
6975 if (ultra_cycles_elapsed < 4)
6976 hist_lim = ultra_cycles_elapsed;
6977 else
6978 hist_lim = 4;
6979 while (hist_lim > 0)
6980 {
6981 struct ultrasparc_pipeline_state *up = &ultra_pipe_hist[hist_ent];
6982 int slot = 4;
6983
6984 while (--slot >= 0)
6985 {
6986 rtx insn = up->group[slot];
6987 enum machine_mode this_mode;
6988 rtx pat;
6989
6990 if (! insn
6991 || GET_CODE (insn) != INSN
6992 || (pat = PATTERN (insn)) == 0
6993 || GET_CODE (pat) != SET)
6994 continue;
6995
6996 this_mode = GET_MODE (SET_DEST (pat));
6997 if ((this_mode != SFmode
6998 && this_mode != DFmode)
6999 || this_mode == fpmode)
7000 continue;
7001
7002 /* If it is not FMOV, FABS, FNEG, FDIV, or FSQRT then
7003 we will get a stall. Loads and stores are independant
7004 of these rules. */
7005 if (GET_CODE (SET_SRC (pat)) != ABS
7006 && GET_CODE (SET_SRC (pat)) != NEG
7007 && ((TMASK (get_attr_type (insn)) &
7008 (TMASK (TYPE_FPDIVS) | TMASK (TYPE_FPDIVD) |
7009 TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPSQRTS) |
7010 TMASK (TYPE_FPSQRTD) |
7011 TMASK (TYPE_LOAD) | TMASK (TYPE_STORE))) == 0))
7012 return 1;
7013 }
7014 hist_lim--;
7015 hist_ent = (hist_ent - 1) & (ULTRA_NUM_HIST - 1);
7016 }
7017
7018 /* No conflicts, safe to dispatch. */
7019 return 0;
7020 }
7021
7022 /* Find an instruction in LIST which has one of the
7023 type attributes enumerated in TYPE_MASK. START
7024 says where to begin the search.
7025
7026 NOTE: This scheme depends upon the fact that we
7027 have less than 32 distinct type attributes. */
7028
7029 static int ultra_types_avail;
7030
7031 static rtx *
7032 ultra_find_type (type_mask, list, start)
7033 int type_mask;
7034 rtx *list;
7035 int start;
7036 {
7037 int i;
7038
7039 /* Short circuit if no such insn exists in the ready
7040 at the moment. */
7041 if ((type_mask & ultra_types_avail) == 0)
7042 return 0;
7043
7044 for (i = start; i >= 0; i--)
7045 {
7046 rtx insn = list[i];
7047
7048 if (recog_memoized (insn) >= 0
7049 && (TMASK(get_attr_type (insn)) & type_mask))
7050 {
7051 enum machine_mode fpmode = SFmode;
7052 rtx pat = 0;
7053 int slot;
7054 int check_depend = 0;
7055 int check_fpmode_conflict = 0;
7056
7057 if (GET_CODE (insn) == INSN
7058 && (pat = PATTERN(insn)) != 0
7059 && GET_CODE (pat) == SET
7060 && !(type_mask & (TMASK (TYPE_STORE) |
7061 TMASK (TYPE_FPSTORE))))
7062 {
7063 check_depend = 1;
7064 if (GET_MODE (SET_DEST (pat)) == SFmode
7065 || GET_MODE (SET_DEST (pat)) == DFmode)
7066 {
7067 fpmode = GET_MODE (SET_DEST (pat));
7068 check_fpmode_conflict = 1;
7069 }
7070 }
7071
7072 slot = 4;
7073 while(--slot >= 0)
7074 {
7075 rtx slot_insn = ultra_pipe.group[slot];
7076 rtx slot_pat;
7077
7078 /* Already issued, bad dependency, or FPU
7079 mode conflict. */
7080 if (slot_insn != 0
7081 && (slot_pat = PATTERN (slot_insn)) != 0
7082 && ((insn == slot_insn)
7083 || (check_depend == 1
7084 && GET_CODE (slot_insn) == INSN
7085 && GET_CODE (slot_pat) == SET
7086 && ((GET_CODE (SET_DEST (slot_pat)) == REG
7087 && GET_CODE (SET_SRC (pat)) == REG
7088 && REGNO (SET_DEST (slot_pat)) ==
7089 REGNO (SET_SRC (pat)))
7090 || (GET_CODE (SET_DEST (slot_pat)) == SUBREG
7091 && GET_CODE (SET_SRC (pat)) == SUBREG
7092 && REGNO (SUBREG_REG (SET_DEST (slot_pat))) ==
7093 REGNO (SUBREG_REG (SET_SRC (pat)))
7094 && SUBREG_WORD (SET_DEST (slot_pat)) ==
7095 SUBREG_WORD (SET_SRC (pat)))))
7096 || (check_fpmode_conflict == 1
7097 && GET_CODE (slot_insn) == INSN
7098 && GET_CODE (slot_pat) == SET
7099 && (GET_MODE (SET_DEST (slot_pat)) == SFmode
7100 || GET_MODE (SET_DEST (slot_pat)) == DFmode)
7101 && GET_MODE (SET_DEST (slot_pat)) != fpmode)))
7102 goto next;
7103 }
7104
7105 /* Check for peculiar result availability and dispatch
7106 interference situations. */
7107 if (pat != 0
7108 && ultra_cycles_elapsed > 0)
7109 {
7110 rtx link;
7111
7112 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
7113 {
7114 rtx link_insn = XEXP (link, 0);
7115 if (GET_CODE (link_insn) == INSN
7116 && recog_memoized (link_insn) >= 0
7117 && (TMASK (get_attr_type (link_insn)) &
7118 (TMASK (TYPE_CMOVE) | TMASK (TYPE_FPCMOVE)))
7119 && ! ultra_cmove_results_ready_p (link_insn))
7120 goto next;
7121 }
7122
7123 if (check_fpmode_conflict
7124 && ultra_fpmode_conflict_exists (fpmode))
7125 goto next;
7126 }
7127
7128 return &list[i];
7129 }
7130 next:
7131 ;
7132 }
7133 return 0;
7134 }
7135
7136 static void
7137 ultra_build_types_avail (ready, n_ready)
7138 rtx *ready;
7139 int n_ready;
7140 {
7141 int i = n_ready - 1;
7142
7143 ultra_types_avail = 0;
7144 while(i >= 0)
7145 {
7146 rtx insn = ready[i];
7147
7148 if (recog_memoized (insn) >= 0)
7149 ultra_types_avail |= TMASK (get_attr_type (insn));
7150
7151 i -= 1;
7152 }
7153 }
7154
7155 /* Place insn pointed to my IP into the pipeline.
7156 Make element THIS of READY be that insn if it
7157 is not already. TYPE indicates the pipeline class
7158 this insn falls into. */
7159 static void
7160 ultra_schedule_insn (ip, ready, this, type)
7161 rtx *ip;
7162 rtx *ready;
7163 int this;
7164 enum ultra_code type;
7165 {
7166 int pipe_slot;
7167 char mask = ultra_pipe.free_slot_mask;
7168 rtx temp;
7169
7170 /* Obtain free slot. */
7171 for (pipe_slot = 0; pipe_slot < 4; pipe_slot++)
7172 if ((mask & (1 << pipe_slot)) != 0)
7173 break;
7174 if (pipe_slot == 4)
7175 abort ();
7176
7177 /* In it goes, and it hasn't been committed yet. */
7178 ultra_pipe.group[pipe_slot] = *ip;
7179 ultra_pipe.codes[pipe_slot] = type;
7180 ultra_pipe.contents[type] = 1;
7181 if (UMASK (type) &
7182 (UMASK (IEUN) | UMASK (IEU0) | UMASK (IEU1)))
7183 ultra_pipe.num_ieu_insns += 1;
7184
7185 ultra_pipe.free_slot_mask = (mask & ~(1 << pipe_slot));
7186 ultra_pipe.group_size += 1;
7187 ultra_pipe.commit[pipe_slot] = 0;
7188
7189 /* Update ready list. */
7190 temp = *ip;
7191 while (ip != &ready[this])
7192 {
7193 ip[0] = ip[1];
7194 ++ip;
7195 }
7196 *ip = temp;
7197 }
7198
7199 /* Advance to the next pipeline group. */
7200 static void
7201 ultra_flush_pipeline ()
7202 {
7203 ultra_cur_hist = (ultra_cur_hist + 1) & (ULTRA_NUM_HIST - 1);
7204 ultra_cycles_elapsed += 1;
7205 bzero ((char *) &ultra_pipe, sizeof ultra_pipe);
7206 ultra_pipe.free_slot_mask = 0xf;
7207 }
7208
7209 /* Init our data structures for this current block. */
7210 void
7211 ultrasparc_sched_init (dump, sched_verbose)
7212 FILE *dump ATTRIBUTE_UNUSED;
7213 int sched_verbose ATTRIBUTE_UNUSED;
7214 {
7215 bzero ((char *) ultra_pipe_hist, sizeof ultra_pipe_hist);
7216 ultra_cur_hist = 0;
7217 ultra_cycles_elapsed = 0;
7218 ultra_pipe.free_slot_mask = 0xf;
7219 }
7220
7221 /* INSN has been scheduled, update pipeline commit state
7222 and return how many instructions are still to be
7223 scheduled in this group. */
7224 int
7225 ultrasparc_variable_issue (insn)
7226 rtx insn;
7227 {
7228 struct ultrasparc_pipeline_state *up = &ultra_pipe;
7229 int i, left_to_fire;
7230
7231 left_to_fire = 0;
7232 for (i = 0; i < 4; i++)
7233 {
7234 if (up->group[i] == 0)
7235 continue;
7236
7237 if (up->group[i] == insn)
7238 {
7239 up->commit[i] = 1;
7240 }
7241 else if (! up->commit[i])
7242 left_to_fire++;
7243 }
7244
7245 return left_to_fire;
7246 }
7247
7248 /* In actual_hazard_this_instance, we may have yanked some
7249 instructions from the ready list due to conflict cost
7250 adjustments. If so, and such an insn was in our pipeline
7251 group, remove it and update state. */
7252 static void
7253 ultra_rescan_pipeline_state (ready, n_ready)
7254 rtx *ready;
7255 int n_ready;
7256 {
7257 struct ultrasparc_pipeline_state *up = &ultra_pipe;
7258 int i;
7259
7260 for (i = 0; i < 4; i++)
7261 {
7262 rtx insn = up->group[i];
7263 int j;
7264
7265 if (! insn)
7266 continue;
7267
7268 /* If it has been committed, then it was removed from
7269 the ready list because it was actually scheduled,
7270 and that is not the case we are searching for here. */
7271 if (up->commit[i] != 0)
7272 continue;
7273
7274 for (j = n_ready - 1; j >= 0; j--)
7275 if (ready[j] == insn)
7276 break;
7277
7278 /* If we didn't find it, toss it. */
7279 if (j < 0)
7280 {
7281 enum ultra_code ucode = up->codes[i];
7282
7283 up->group[i] = 0;
7284 up->codes[i] = NONE;
7285 up->contents[ucode] = 0;
7286 if (UMASK (ucode) &
7287 (UMASK (IEUN) | UMASK (IEU0) | UMASK (IEU1)))
7288 up->num_ieu_insns -= 1;
7289
7290 up->free_slot_mask |= (1 << i);
7291 up->group_size -= 1;
7292 up->commit[i] = 0;
7293 }
7294 }
7295 }
7296
7297 void
7298 ultrasparc_sched_reorder (dump, sched_verbose, ready, n_ready)
7299 FILE *dump;
7300 int sched_verbose;
7301 rtx *ready;
7302 int n_ready;
7303 {
7304 struct ultrasparc_pipeline_state *up = &ultra_pipe;
7305 int i, this_insn;
7306
7307 if (sched_verbose)
7308 {
7309 int n;
7310
7311 fprintf (dump, "\n;;\tUltraSPARC Looking at [");
7312 for (n = n_ready - 1; n >= 0; n--)
7313 {
7314 rtx insn = ready[n];
7315 enum ultra_code ucode;
7316
7317 if (recog_memoized (insn) < 0)
7318 continue;
7319 ucode = ultra_code_from_mask (TMASK (get_attr_type (insn)));
7320 if (n != 0)
7321 fprintf (dump, "%s(%d) ",
7322 ultra_code_names[ucode],
7323 INSN_UID (insn));
7324 else
7325 fprintf (dump, "%s(%d)",
7326 ultra_code_names[ucode],
7327 INSN_UID (insn));
7328 }
7329 fprintf (dump, "]\n");
7330 }
7331
7332 this_insn = n_ready - 1;
7333
7334 /* Skip over junk we don't understand. */
7335 while ((this_insn >= 0)
7336 && recog_memoized (ready[this_insn]) < 0)
7337 this_insn--;
7338
7339 ultra_build_types_avail (ready, this_insn + 1);
7340
7341 while (this_insn >= 0) {
7342 int old_group_size = up->group_size;
7343
7344 if (up->group_size != 0)
7345 {
7346 int num_committed;
7347
7348 num_committed = (up->commit[0] + up->commit[1] +
7349 up->commit[2] + up->commit[3]);
7350 /* If nothing has been commited from our group, or all of
7351 them have. Clear out the (current cycle's) pipeline
7352 state and start afresh. */
7353 if (num_committed == 0
7354 || num_committed == up->group_size)
7355 {
7356 ultra_flush_pipeline ();
7357 up = &ultra_pipe;
7358 old_group_size = 0;
7359 }
7360 else
7361 {
7362 /* OK, some ready list insns got requeued and thus removed
7363 from the ready list. Account for this fact. */
7364 ultra_rescan_pipeline_state (ready, n_ready);
7365
7366 /* Something "changed", make this look like a newly
7367 formed group so the code at the end of the loop
7368 knows that progress was in fact made. */
7369 if (up->group_size != old_group_size)
7370 old_group_size = 0;
7371 }
7372 }
7373
7374 if (up->group_size == 0)
7375 {
7376 /* If the pipeline is (still) empty and we have any single
7377 group insns, get them out now as this is a good time. */
7378 rtx *ip = ultra_find_type ((TMASK (TYPE_RETURN) | TMASK (TYPE_ADDRESS) |
7379 TMASK (TYPE_IMUL) | TMASK (TYPE_CMOVE) |
7380 TMASK (TYPE_MULTI) | TMASK (TYPE_MISC)),
7381 ready, this_insn);
7382 if (ip)
7383 {
7384 ultra_schedule_insn (ip, ready, this_insn, SINGLE);
7385 break;
7386 }
7387
7388 /* If we are not in the process of emptying out the pipe, try to
7389 obtain an instruction which must be the first in it's group. */
7390 ip = ultra_find_type ((TMASK (TYPE_CALL) |
7391 TMASK (TYPE_CALL_NO_DELAY_SLOT) |
7392 TMASK (TYPE_UNCOND_BRANCH)),
7393 ready, this_insn);
7394 if (ip)
7395 {
7396 ultra_schedule_insn (ip, ready, this_insn, IEU1);
7397 this_insn--;
7398 }
7399 else if ((ip = ultra_find_type ((TMASK (TYPE_FPDIVS) |
7400 TMASK (TYPE_FPDIVD) |
7401 TMASK (TYPE_FPSQRTS) |
7402 TMASK (TYPE_FPSQRTD)),
7403 ready, this_insn)) != 0)
7404 {
7405 ultra_schedule_insn (ip, ready, this_insn, FPM);
7406 this_insn--;
7407 }
7408 }
7409
7410 /* Try to fill the integer pipeline. First, look for an IEU0 specific
7411 operation. We can't do more IEU operations if the first 3 slots are
7412 all full or we have dispatched two IEU insns already. */
7413 if ((up->free_slot_mask & 0x7) != 0
7414 && up->num_ieu_insns < 2
7415 && up->contents[IEU0] == 0
7416 && up->contents[IEUN] == 0)
7417 {
7418 rtx *ip = ultra_find_type (TMASK(TYPE_SHIFT), ready, this_insn);
7419 if (ip)
7420 {
7421 ultra_schedule_insn (ip, ready, this_insn, IEU0);
7422 this_insn--;
7423 }
7424 }
7425
7426 /* If we can, try to find an IEU1 specific or an unnamed
7427 IEU instruction. */
7428 if ((up->free_slot_mask & 0x7) != 0
7429 && up->num_ieu_insns < 2)
7430 {
7431 rtx *ip = ultra_find_type ((TMASK (TYPE_IALU) | TMASK (TYPE_BINARY) |
7432 TMASK (TYPE_MOVE) | TMASK (TYPE_UNARY) |
7433 (up->contents[IEU1] == 0 ? TMASK (TYPE_COMPARE) : 0)),
7434 ready, this_insn);
7435 if (ip)
7436 {
7437 rtx insn = *ip;
7438
7439 ultra_schedule_insn (ip, ready, this_insn,
7440 (!up->contents[IEU1]
7441 && get_attr_type (insn) == TYPE_COMPARE)
7442 ? IEU1 : IEUN);
7443 this_insn--;
7444 }
7445 }
7446
7447 /* If only one IEU insn has been found, try to find another unnamed
7448 IEU operation or an IEU1 specific one. */
7449 if ((up->free_slot_mask & 0x7) != 0
7450 && up->num_ieu_insns < 2)
7451 {
7452 rtx *ip;
7453 int tmask = (TMASK (TYPE_IALU) | TMASK (TYPE_BINARY) |
7454 TMASK (TYPE_MOVE) | TMASK (TYPE_UNARY));
7455
7456 if (!up->contents[IEU1])
7457 tmask |= TMASK (TYPE_COMPARE);
7458 ip = ultra_find_type (tmask, ready, this_insn);
7459 if (ip)
7460 {
7461 rtx insn = *ip;
7462
7463 ultra_schedule_insn (ip, ready, this_insn,
7464 (!up->contents[IEU1]
7465 && get_attr_type (insn) == TYPE_COMPARE)
7466 ? IEU1 : IEUN);
7467 this_insn--;
7468 }
7469 }
7470
7471 /* Try for a load or store, but such an insn can only be issued
7472 if it is within' one of the first 3 slots. */
7473 if ((up->free_slot_mask & 0x7) != 0
7474 && up->contents[LSU] == 0)
7475 {
7476 rtx *ip = ultra_find_type ((TMASK (TYPE_LOAD) | TMASK (TYPE_SLOAD) |
7477 TMASK (TYPE_STORE) | TMASK (TYPE_FPLOAD) |
7478 TMASK (TYPE_FPSTORE)), ready, this_insn);
7479 if (ip)
7480 {
7481 ultra_schedule_insn (ip, ready, this_insn, LSU);
7482 this_insn--;
7483 }
7484 }
7485
7486 /* Now find FPU operations, first FPM class. But not divisions or
7487 square-roots because those will break the group up. Unlike all
7488 the previous types, these can go in any slot. */
7489 if (up->free_slot_mask != 0
7490 && up->contents[FPM] == 0)
7491 {
7492 rtx *ip = ultra_find_type (TMASK (TYPE_FPMUL), ready, this_insn);
7493 if (ip)
7494 {
7495 ultra_schedule_insn (ip, ready, this_insn, FPM);
7496 this_insn--;
7497 }
7498 }
7499
7500 /* Continue on with FPA class if we have not filled the group already. */
7501 if (up->free_slot_mask != 0
7502 && up->contents[FPA] == 0)
7503 {
7504 rtx *ip = ultra_find_type ((TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPCMOVE) |
7505 TMASK (TYPE_FP) | TMASK (TYPE_FPCMP)),
7506 ready, this_insn);
7507 if (ip)
7508 {
7509 ultra_schedule_insn (ip, ready, this_insn, FPA);
7510 this_insn--;
7511 }
7512 }
7513
7514 /* Finally, maybe stick a branch in here. */
7515 if (up->free_slot_mask != 0
7516 && up->contents[CTI] == 0)
7517 {
7518 rtx *ip = ultra_find_type (TMASK (TYPE_BRANCH), ready, this_insn);
7519
7520 /* Try to slip in a branch only if it is one of the
7521 next 2 in the ready list. */
7522 if (ip && ((&ready[this_insn] - ip) < 2))
7523 {
7524 ultra_schedule_insn (ip, ready, this_insn, CTI);
7525 this_insn--;
7526 }
7527 }
7528
7529 up->group_size = 0;
7530 for (i = 0; i < 4; i++)
7531 if ((up->free_slot_mask & (1 << i)) == 0)
7532 up->group_size++;
7533
7534 /* See if we made any progress... */
7535 if (old_group_size != up->group_size)
7536 break;
7537
7538 /* Clean out the (current cycle's) pipeline state
7539 and try once more. If we placed no instructions
7540 into the pipeline at all, it means a real hard
7541 conflict exists with some earlier issued instruction
7542 so we must advance to the next cycle to clear it up. */
7543 if (up->group_size == 0)
7544 {
7545 ultra_flush_pipeline ();
7546 up = &ultra_pipe;
7547 }
7548 else
7549 {
7550 bzero ((char *) &ultra_pipe, sizeof ultra_pipe);
7551 ultra_pipe.free_slot_mask = 0xf;
7552 }
7553 }
7554
7555 if (sched_verbose)
7556 {
7557 int n, gsize;
7558
7559 fprintf (dump, ";;\tUltraSPARC Launched [");
7560 gsize = up->group_size;
7561 for (n = 0; n < 4; n++)
7562 {
7563 rtx insn = up->group[n];
7564
7565 if (! insn)
7566 continue;
7567
7568 gsize -= 1;
7569 if (gsize != 0)
7570 fprintf (dump, "%s(%d) ",
7571 ultra_code_names[up->codes[n]],
7572 INSN_UID (insn));
7573 else
7574 fprintf (dump, "%s(%d)",
7575 ultra_code_names[up->codes[n]],
7576 INSN_UID (insn));
7577 }
7578 fprintf (dump, "]\n");
7579 }
7580 }
7581
7582 int
7583 sparc_issue_rate ()
7584 {
7585 switch (sparc_cpu)
7586 {
7587 default:
7588 return 1;
7589 case PROCESSOR_V9:
7590 /* Assume V9 processors are capable of at least dual-issue. */
7591 return 2;
7592 case PROCESSOR_SUPERSPARC:
7593 return 3;
7594 case PROCESSOR_HYPERSPARC:
7595 case PROCESSOR_SPARCLITE86X:
7596 return 2;
7597 case PROCESSOR_ULTRASPARC:
7598 return 4;
7599 }
7600 }
7601
7602 static int
7603 set_extends(x, insn)
7604 rtx x, insn;
7605 {
7606 register rtx pat = PATTERN (insn);
7607
7608 switch (GET_CODE (SET_SRC (pat)))
7609 {
7610 /* Load and some shift instructions zero extend. */
7611 case MEM:
7612 case ZERO_EXTEND:
7613 /* sethi clears the high bits */
7614 case HIGH:
7615 /* LO_SUM is used with sethi. sethi cleared the high
7616 bits and the values used with lo_sum are positive */
7617 case LO_SUM:
7618 /* Store flag stores 0 or 1 */
7619 case LT: case LTU:
7620 case GT: case GTU:
7621 case LE: case LEU:
7622 case GE: case GEU:
7623 case EQ:
7624 case NE:
7625 return 1;
7626 case AND:
7627 {
7628 rtx op1 = XEXP (SET_SRC (pat), 1);
7629 if (GET_CODE (op1) == CONST_INT)
7630 return INTVAL (op1) >= 0;
7631 if (GET_CODE (XEXP (SET_SRC (pat), 0)) == REG
7632 && sparc_check_64 (XEXP (SET_SRC (pat), 0), insn) == 1)
7633 return 1;
7634 if (GET_CODE (op1) == REG
7635 && sparc_check_64 ((op1), insn) == 1)
7636 return 1;
7637 }
7638 case ASHIFT:
7639 case LSHIFTRT:
7640 return GET_MODE (SET_SRC (pat)) == SImode;
7641 /* Positive integers leave the high bits zero. */
7642 case CONST_DOUBLE:
7643 return ! (CONST_DOUBLE_LOW (x) & 0x80000000);
7644 case CONST_INT:
7645 return ! (INTVAL (x) & 0x80000000);
7646 case ASHIFTRT:
7647 case SIGN_EXTEND:
7648 return - (GET_MODE (SET_SRC (pat)) == SImode);
7649 default:
7650 return 0;
7651 }
7652 }
7653
7654 /* We _ought_ to have only one kind per function, but... */
7655 static rtx sparc_addr_diff_list;
7656 static rtx sparc_addr_list;
7657
7658 void
7659 sparc_defer_case_vector (lab, vec, diff)
7660 rtx lab, vec;
7661 int diff;
7662 {
7663 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
7664 if (diff)
7665 sparc_addr_diff_list
7666 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
7667 else
7668 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
7669 }
7670
7671 static void
7672 sparc_output_addr_vec (vec)
7673 rtx vec;
7674 {
7675 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
7676 int idx, vlen = XVECLEN (body, 0);
7677
7678 #ifdef ASM_OUTPUT_ADDR_VEC_START
7679 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
7680 #endif
7681
7682 #ifdef ASM_OUTPUT_CASE_LABEL
7683 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
7684 NEXT_INSN (lab));
7685 #else
7686 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
7687 #endif
7688
7689 for (idx = 0; idx < vlen; idx++)
7690 {
7691 ASM_OUTPUT_ADDR_VEC_ELT
7692 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
7693 }
7694
7695 #ifdef ASM_OUTPUT_ADDR_VEC_END
7696 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
7697 #endif
7698 }
7699
7700 static void
7701 sparc_output_addr_diff_vec (vec)
7702 rtx vec;
7703 {
7704 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
7705 rtx base = XEXP (XEXP (body, 0), 0);
7706 int idx, vlen = XVECLEN (body, 1);
7707
7708 #ifdef ASM_OUTPUT_ADDR_VEC_START
7709 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
7710 #endif
7711
7712 #ifdef ASM_OUTPUT_CASE_LABEL
7713 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
7714 NEXT_INSN (lab));
7715 #else
7716 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
7717 #endif
7718
7719 for (idx = 0; idx < vlen; idx++)
7720 {
7721 ASM_OUTPUT_ADDR_DIFF_ELT
7722 (asm_out_file,
7723 body,
7724 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
7725 CODE_LABEL_NUMBER (base));
7726 }
7727
7728 #ifdef ASM_OUTPUT_ADDR_VEC_END
7729 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
7730 #endif
7731 }
7732
7733 static void
7734 sparc_output_deferred_case_vectors ()
7735 {
7736 rtx t;
7737 int align;
7738
7739 if (sparc_addr_list == NULL_RTX
7740 && sparc_addr_diff_list == NULL_RTX)
7741 return;
7742
7743 /* Align to cache line in the function's code section. */
7744 function_section (current_function_decl);
7745
7746 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
7747 if (align > 0)
7748 ASM_OUTPUT_ALIGN (asm_out_file, align);
7749
7750 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
7751 sparc_output_addr_vec (XEXP (t, 0));
7752 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
7753 sparc_output_addr_diff_vec (XEXP (t, 0));
7754
7755 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
7756 }
7757
7758 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
7759 unknown. Return 1 if the high bits are zero, -1 if the register is
7760 sign extended. */
7761 int
7762 sparc_check_64 (x, insn)
7763 rtx x, insn;
7764 {
7765 /* If a register is set only once it is safe to ignore insns this
7766 code does not know how to handle. The loop will either recognize
7767 the single set and return the correct value or fail to recognize
7768 it and return 0. */
7769 int set_once = 0;
7770
7771 if (GET_CODE (x) == REG
7772 && flag_expensive_optimizations
7773 && REG_N_SETS (REGNO (x)) == 1)
7774 set_once = 1;
7775
7776 if (insn == 0)
7777 {
7778 if (set_once)
7779 insn = get_last_insn_anywhere ();
7780 else
7781 return 0;
7782 }
7783
7784 while ((insn = PREV_INSN (insn)))
7785 {
7786 switch (GET_CODE (insn))
7787 {
7788 case JUMP_INSN:
7789 case NOTE:
7790 break;
7791 case CODE_LABEL:
7792 case CALL_INSN:
7793 default:
7794 if (! set_once)
7795 return 0;
7796 break;
7797 case INSN:
7798 {
7799 rtx pat = PATTERN (insn);
7800 if (GET_CODE (pat) != SET)
7801 return 0;
7802 if (rtx_equal_p (x, SET_DEST (pat)))
7803 return set_extends (x, insn);
7804 if (reg_overlap_mentioned_p (SET_DEST (pat), x))
7805 return 0;
7806 }
7807 }
7808 }
7809 return 0;
7810 }
7811
7812 char *
7813 sparc_v8plus_shift (operands, insn, opcode)
7814 rtx *operands;
7815 rtx insn;
7816 const char *opcode;
7817 {
7818 static char asm_code[60];
7819
7820 if (GET_CODE (operands[3]) == SCRATCH)
7821 operands[3] = operands[0];
7822 if (GET_CODE (operands[1]) == CONST_INT)
7823 {
7824 output_asm_insn ("mov %1,%3", operands);
7825 }
7826 else
7827 {
7828 output_asm_insn ("sllx %H1,32,%3", operands);
7829 if (sparc_check_64 (operands[1], insn) <= 0)
7830 output_asm_insn ("srl %L1,0,%L1", operands);
7831 output_asm_insn ("or %L1,%3,%3", operands);
7832 }
7833
7834 strcpy(asm_code, opcode);
7835 if (which_alternative != 2)
7836 return strcat (asm_code, " %0,%2,%L0\n\tsrlx %L0,32,%H0");
7837 else
7838 return strcat (asm_code, " %3,%2,%3\n\tsrlx %3,32,%H0\n\tmov %3,%L0");
7839 }
7840
7841
7842 /* Return 1 if DEST and SRC reference only global and in registers. */
7843
7844 int
7845 sparc_return_peephole_ok (dest, src)
7846 rtx dest, src;
7847 {
7848 if (! TARGET_V9)
7849 return 0;
7850 if (current_function_uses_only_leaf_regs)
7851 return 0;
7852 if (GET_CODE (src) != CONST_INT
7853 && (GET_CODE (src) != REG || ! IN_OR_GLOBAL_P (src)))
7854 return 0;
7855 return IN_OR_GLOBAL_P (dest);
7856 }
7857 \f
7858 /* Output assembler code to FILE to increment profiler label # LABELNO
7859 for profiling a function entry.
7860
7861 32 bit sparc uses %g2 as the STATIC_CHAIN_REGNUM which gets clobbered
7862 during profiling so we need to save/restore it around the call to mcount.
7863 We're guaranteed that a save has just been done, and we use the space
7864 allocated for intreg/fpreg value passing. */
7865
7866 void
7867 sparc_function_profiler (file, labelno)
7868 FILE *file;
7869 int labelno;
7870 {
7871 char buf[32];
7872 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
7873
7874 if (! TARGET_ARCH64)
7875 fputs ("\tst\t%g2,[%fp-4]\n", file);
7876
7877 fputs ("\tsethi\t%hi(", file);
7878 assemble_name (file, buf);
7879 fputs ("),%o0\n", file);
7880
7881 fputs ("\tcall\t", file);
7882 assemble_name (file, MCOUNT_FUNCTION);
7883 putc ('\n', file);
7884
7885 fputs ("\t or\t%o0,%lo(", file);
7886 assemble_name (file, buf);
7887 fputs ("),%o0\n", file);
7888
7889 if (! TARGET_ARCH64)
7890 fputs ("\tld\t[%fp-4],%g2\n", file);
7891 }
7892
7893
7894 /* The following macro shall output assembler code to FILE
7895 to initialize basic-block profiling.
7896
7897 If profile_block_flag == 2
7898
7899 Output code to call the subroutine `__bb_init_trace_func'
7900 and pass two parameters to it. The first parameter is
7901 the address of a block allocated in the object module.
7902 The second parameter is the number of the first basic block
7903 of the function.
7904
7905 The name of the block is a local symbol made with this statement:
7906
7907 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
7908
7909 Of course, since you are writing the definition of
7910 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
7911 can take a short cut in the definition of this macro and use the
7912 name that you know will result.
7913
7914 The number of the first basic block of the function is
7915 passed to the macro in BLOCK_OR_LABEL.
7916
7917 If described in a virtual assembler language the code to be
7918 output looks like:
7919
7920 parameter1 <- LPBX0
7921 parameter2 <- BLOCK_OR_LABEL
7922 call __bb_init_trace_func
7923
7924 else if profile_block_flag != 0
7925
7926 Output code to call the subroutine `__bb_init_func'
7927 and pass one single parameter to it, which is the same
7928 as the first parameter to `__bb_init_trace_func'.
7929
7930 The first word of this parameter is a flag which will be nonzero if
7931 the object module has already been initialized. So test this word
7932 first, and do not call `__bb_init_func' if the flag is nonzero.
7933 Note: When profile_block_flag == 2 the test need not be done
7934 but `__bb_init_trace_func' *must* be called.
7935
7936 BLOCK_OR_LABEL may be used to generate a label number as a
7937 branch destination in case `__bb_init_func' will not be called.
7938
7939 If described in a virtual assembler language the code to be
7940 output looks like:
7941
7942 cmp (LPBX0),0
7943 jne local_label
7944 parameter1 <- LPBX0
7945 call __bb_init_func
7946 local_label:
7947
7948 */
7949
7950 void
7951 sparc_function_block_profiler(file, block_or_label)
7952 FILE *file;
7953 int block_or_label;
7954 {
7955 char LPBX[32];
7956 ASM_GENERATE_INTERNAL_LABEL (LPBX, "LPBX", 0);
7957
7958 if (profile_block_flag == 2)
7959 {
7960 fputs ("\tsethi\t%hi(", file);
7961 assemble_name (file, LPBX);
7962 fputs ("),%o0\n", file);
7963
7964 fprintf (file, "\tsethi\t%%hi(%d),%%o1\n", block_or_label);
7965
7966 fputs ("\tor\t%o0,%lo(", file);
7967 assemble_name (file, LPBX);
7968 fputs ("),%o0\n", file);
7969
7970 fprintf (file, "\tcall\t%s__bb_init_trace_func\n", user_label_prefix);
7971
7972 fprintf (file, "\t or\t%%o1,%%lo(%d),%%o1\n", block_or_label);
7973 }
7974 else if (profile_block_flag != 0)
7975 {
7976 char LPBY[32];
7977 ASM_GENERATE_INTERNAL_LABEL (LPBY, "LPBY", block_or_label);
7978
7979 fputs ("\tsethi\t%hi(", file);
7980 assemble_name (file, LPBX);
7981 fputs ("),%o0\n", file);
7982
7983 fputs ("\tld\t[%lo(", file);
7984 assemble_name (file, LPBX);
7985 fputs (")+%o0],%o1\n", file);
7986
7987 fputs ("\ttst\t%o1\n", file);
7988
7989 if (TARGET_V9)
7990 {
7991 fputs ("\tbne,pn\t%icc,", file);
7992 assemble_name (file, LPBY);
7993 putc ('\n', file);
7994 }
7995 else
7996 {
7997 fputs ("\tbne\t", file);
7998 assemble_name (file, LPBY);
7999 putc ('\n', file);
8000 }
8001
8002 fputs ("\t or\t%o0,%lo(", file);
8003 assemble_name (file, LPBX);
8004 fputs ("),%o0\n", file);
8005
8006 fprintf (file, "\tcall\t%s__bb_init_func\n\t nop\n", user_label_prefix);
8007
8008 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBY", block_or_label);
8009 }
8010 }
8011
8012 /* The following macro shall output assembler code to FILE
8013 to increment a counter associated with basic block number BLOCKNO.
8014
8015 If profile_block_flag == 2
8016
8017 Output code to initialize the global structure `__bb' and
8018 call the function `__bb_trace_func' which will increment the
8019 counter.
8020
8021 `__bb' consists of two words. In the first word the number
8022 of the basic block has to be stored. In the second word
8023 the address of a block allocated in the object module
8024 has to be stored.
8025
8026 The basic block number is given by BLOCKNO.
8027
8028 The address of the block is given by the label created with
8029
8030 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
8031
8032 by FUNCTION_BLOCK_PROFILER.
8033
8034 Of course, since you are writing the definition of
8035 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
8036 can take a short cut in the definition of this macro and use the
8037 name that you know will result.
8038
8039 If described in a virtual assembler language the code to be
8040 output looks like:
8041
8042 move BLOCKNO -> (__bb)
8043 move LPBX0 -> (__bb+4)
8044 call __bb_trace_func
8045
8046 Note that function `__bb_trace_func' must not change the
8047 machine state, especially the flag register. To grant
8048 this, you must output code to save and restore registers
8049 either in this macro or in the macros MACHINE_STATE_SAVE
8050 and MACHINE_STATE_RESTORE. The last two macros will be
8051 used in the function `__bb_trace_func', so you must make
8052 sure that the function prologue does not change any
8053 register prior to saving it with MACHINE_STATE_SAVE.
8054
8055 else if profile_block_flag != 0
8056
8057 Output code to increment the counter directly.
8058 Basic blocks are numbered separately from zero within each
8059 compiled object module. The count associated with block number
8060 BLOCKNO is at index BLOCKNO in an array of words; the name of
8061 this array is a local symbol made with this statement:
8062
8063 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
8064
8065 Of course, since you are writing the definition of
8066 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
8067 can take a short cut in the definition of this macro and use the
8068 name that you know will result.
8069
8070 If described in a virtual assembler language, the code to be
8071 output looks like:
8072
8073 inc (LPBX2+4*BLOCKNO)
8074
8075 */
8076
8077 void
8078 sparc_block_profiler(file, blockno)
8079 FILE *file;
8080 int blockno;
8081 {
8082 char LPBX[32];
8083 int bbreg = TARGET_ARCH64 ? 4 : 2;
8084
8085 if (profile_block_flag == 2)
8086 {
8087 ASM_GENERATE_INTERNAL_LABEL (LPBX, "LPBX", 0);
8088
8089 fprintf (file, "\tsethi\t%%hi(%s__bb),%%g1\n", user_label_prefix);
8090 fprintf (file, "\tsethi\t%%hi(%d),%%g%d\n", blockno, bbreg);
8091 fprintf (file, "\tor\t%%g1,%%lo(%s__bb),%%g1\n", user_label_prefix);
8092 fprintf (file, "\tor\t%%g%d,%%lo(%d),%%g%d\n", bbreg, blockno, bbreg);
8093
8094 fprintf (file, "\tst\t%%g%d,[%%g1]\n", bbreg);
8095
8096 fputs ("\tsethi\t%hi(", file);
8097 assemble_name (file, LPBX);
8098 fprintf (file, "),%%g%d\n", bbreg);
8099
8100 fputs ("\tor\t%o2,%lo(", file);
8101 assemble_name (file, LPBX);
8102 fprintf (file, "),%%g%d\n", bbreg);
8103
8104 fprintf (file, "\tst\t%%g%d,[%%g1+4]\n", bbreg);
8105 fprintf (file, "\tmov\t%%o7,%%g%d\n", bbreg);
8106
8107 fprintf (file, "\tcall\t%s__bb_trace_func\n\t nop\n", user_label_prefix);
8108
8109 fprintf (file, "\tmov\t%%g%d,%%o7\n", bbreg);
8110 }
8111 else if (profile_block_flag != 0)
8112 {
8113 ASM_GENERATE_INTERNAL_LABEL (LPBX, "LPBX", 2);
8114
8115 fputs ("\tsethi\t%hi(", file);
8116 assemble_name (file, LPBX);
8117 fprintf (file, "+%d),%%g1\n", blockno*4);
8118
8119 fputs ("\tld\t[%g1+%lo(", file);
8120 assemble_name (file, LPBX);
8121 if (TARGET_ARCH64 && USE_AS_OFFSETABLE_LO10)
8122 fprintf (file, ")+%d],%%g%d\n", blockno*4, bbreg);
8123 else
8124 fprintf (file, "+%d)],%%g%d\n", blockno*4, bbreg);
8125
8126 fprintf (file, "\tadd\t%%g%d,1,%%g%d\n", bbreg, bbreg);
8127
8128 fprintf (file, "\tst\t%%g%d,[%%g1+%%lo(", bbreg);
8129 assemble_name (file, LPBX);
8130 if (TARGET_ARCH64 && USE_AS_OFFSETABLE_LO10)
8131 fprintf (file, ")+%d]\n", blockno*4);
8132 else
8133 fprintf (file, "+%d)]\n", blockno*4);
8134 }
8135 }
8136
8137 /* The following macro shall output assembler code to FILE
8138 to indicate a return from function during basic-block profiling.
8139
8140 If profile_block_flag == 2:
8141
8142 Output assembler code to call function `__bb_trace_ret'.
8143
8144 Note that function `__bb_trace_ret' must not change the
8145 machine state, especially the flag register. To grant
8146 this, you must output code to save and restore registers
8147 either in this macro or in the macros MACHINE_STATE_SAVE_RET
8148 and MACHINE_STATE_RESTORE_RET. The last two macros will be
8149 used in the function `__bb_trace_ret', so you must make
8150 sure that the function prologue does not change any
8151 register prior to saving it with MACHINE_STATE_SAVE_RET.
8152
8153 else if profile_block_flag != 0:
8154
8155 The macro will not be used, so it need not distinguish
8156 these cases.
8157 */
8158
8159 void
8160 sparc_function_block_profiler_exit(file)
8161 FILE *file;
8162 {
8163 if (profile_block_flag == 2)
8164 fprintf (file, "\tcall\t%s__bb_trace_ret\n\t nop\n", user_label_prefix);
8165 else
8166 abort ();
8167 }
8168
8169 /* Mark ARG, which is really a struct ultrasparc_pipline_state *, for
8170 GC. */
8171
8172 static void
8173 mark_ultrasparc_pipeline_state (arg)
8174 void *arg;
8175 {
8176 struct ultrasparc_pipeline_state *ups;
8177 size_t i;
8178
8179 ups = (struct ultrasparc_pipeline_state *) arg;
8180 for (i = 0; i < sizeof (ups->group) / sizeof (rtx); ++i)
8181 ggc_mark_rtx (ups->group[i]);
8182 }
8183
8184 /* Called to register all of our global variables with the garbage
8185 collector. */
8186
8187 static void
8188 sparc_add_gc_roots ()
8189 {
8190 ggc_add_rtx_root (&sparc_compare_op0, 1);
8191 ggc_add_rtx_root (&sparc_compare_op1, 1);
8192 ggc_add_rtx_root (&leaf_label, 1);
8193 ggc_add_rtx_root (&global_offset_table, 1);
8194 ggc_add_rtx_root (&get_pc_symbol, 1);
8195 ggc_add_rtx_root (&sparc_addr_diff_list, 1);
8196 ggc_add_rtx_root (&sparc_addr_list, 1);
8197 ggc_add_root (ultra_pipe_hist,
8198 sizeof (ultra_pipe_hist) / sizeof (ultra_pipe_hist[0]),
8199 sizeof (ultra_pipe_hist[0]),
8200 &mark_ultrasparc_pipeline_state);
8201 }