re PR target/55673 (Reversed before/after handling in sparc_emit_membar_for_model)
[gcc.git] / gcc / config / sparc / sparc.c
1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
4 2011, 2012
5 Free Software Foundation, Inc.
6 Contributed by Michael Tiemann (tiemann@cygnus.com)
7 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
8 at Cygnus Support.
9
10 This file is part of GCC.
11
12 GCC is free software; you can redistribute it and/or modify
13 it under the terms of the GNU General Public License as published by
14 the Free Software Foundation; either version 3, or (at your option)
15 any later version.
16
17 GCC is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
21
22 You should have received a copy of the GNU General Public License
23 along with GCC; see the file COPYING3. If not see
24 <http://www.gnu.org/licenses/>. */
25
26 #include "config.h"
27 #include "system.h"
28 #include "coretypes.h"
29 #include "tm.h"
30 #include "tree.h"
31 #include "rtl.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "insn-codes.h"
36 #include "conditions.h"
37 #include "output.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "function.h"
41 #include "except.h"
42 #include "expr.h"
43 #include "optabs.h"
44 #include "recog.h"
45 #include "diagnostic-core.h"
46 #include "ggc.h"
47 #include "tm_p.h"
48 #include "debug.h"
49 #include "target.h"
50 #include "target-def.h"
51 #include "common/common-target.h"
52 #include "gimple.h"
53 #include "langhooks.h"
54 #include "reload.h"
55 #include "params.h"
56 #include "df.h"
57 #include "opts.h"
58
59 /* Processor costs */
60
61 struct processor_costs {
62 /* Integer load */
63 const int int_load;
64
65 /* Integer signed load */
66 const int int_sload;
67
68 /* Integer zeroed load */
69 const int int_zload;
70
71 /* Float load */
72 const int float_load;
73
74 /* fmov, fneg, fabs */
75 const int float_move;
76
77 /* fadd, fsub */
78 const int float_plusminus;
79
80 /* fcmp */
81 const int float_cmp;
82
83 /* fmov, fmovr */
84 const int float_cmove;
85
86 /* fmul */
87 const int float_mul;
88
89 /* fdivs */
90 const int float_div_sf;
91
92 /* fdivd */
93 const int float_div_df;
94
95 /* fsqrts */
96 const int float_sqrt_sf;
97
98 /* fsqrtd */
99 const int float_sqrt_df;
100
101 /* umul/smul */
102 const int int_mul;
103
104 /* mulX */
105 const int int_mulX;
106
107 /* integer multiply cost for each bit set past the most
108 significant 3, so the formula for multiply cost becomes:
109
110 if (rs1 < 0)
111 highest_bit = highest_clear_bit(rs1);
112 else
113 highest_bit = highest_set_bit(rs1);
114 if (highest_bit < 3)
115 highest_bit = 3;
116 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
117
118 A value of zero indicates that the multiply costs is fixed,
119 and not variable. */
120 const int int_mul_bit_factor;
121
122 /* udiv/sdiv */
123 const int int_div;
124
125 /* divX */
126 const int int_divX;
127
128 /* movcc, movr */
129 const int int_cmove;
130
131 /* penalty for shifts, due to scheduling rules etc. */
132 const int shift_penalty;
133 };
134
135 static const
136 struct processor_costs cypress_costs = {
137 COSTS_N_INSNS (2), /* int load */
138 COSTS_N_INSNS (2), /* int signed load */
139 COSTS_N_INSNS (2), /* int zeroed load */
140 COSTS_N_INSNS (2), /* float load */
141 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
142 COSTS_N_INSNS (5), /* fadd, fsub */
143 COSTS_N_INSNS (1), /* fcmp */
144 COSTS_N_INSNS (1), /* fmov, fmovr */
145 COSTS_N_INSNS (7), /* fmul */
146 COSTS_N_INSNS (37), /* fdivs */
147 COSTS_N_INSNS (37), /* fdivd */
148 COSTS_N_INSNS (63), /* fsqrts */
149 COSTS_N_INSNS (63), /* fsqrtd */
150 COSTS_N_INSNS (1), /* imul */
151 COSTS_N_INSNS (1), /* imulX */
152 0, /* imul bit factor */
153 COSTS_N_INSNS (1), /* idiv */
154 COSTS_N_INSNS (1), /* idivX */
155 COSTS_N_INSNS (1), /* movcc/movr */
156 0, /* shift penalty */
157 };
158
159 static const
160 struct processor_costs supersparc_costs = {
161 COSTS_N_INSNS (1), /* int load */
162 COSTS_N_INSNS (1), /* int signed load */
163 COSTS_N_INSNS (1), /* int zeroed load */
164 COSTS_N_INSNS (0), /* float load */
165 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
166 COSTS_N_INSNS (3), /* fadd, fsub */
167 COSTS_N_INSNS (3), /* fcmp */
168 COSTS_N_INSNS (1), /* fmov, fmovr */
169 COSTS_N_INSNS (3), /* fmul */
170 COSTS_N_INSNS (6), /* fdivs */
171 COSTS_N_INSNS (9), /* fdivd */
172 COSTS_N_INSNS (12), /* fsqrts */
173 COSTS_N_INSNS (12), /* fsqrtd */
174 COSTS_N_INSNS (4), /* imul */
175 COSTS_N_INSNS (4), /* imulX */
176 0, /* imul bit factor */
177 COSTS_N_INSNS (4), /* idiv */
178 COSTS_N_INSNS (4), /* idivX */
179 COSTS_N_INSNS (1), /* movcc/movr */
180 1, /* shift penalty */
181 };
182
183 static const
184 struct processor_costs hypersparc_costs = {
185 COSTS_N_INSNS (1), /* int load */
186 COSTS_N_INSNS (1), /* int signed load */
187 COSTS_N_INSNS (1), /* int zeroed load */
188 COSTS_N_INSNS (1), /* float load */
189 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
190 COSTS_N_INSNS (1), /* fadd, fsub */
191 COSTS_N_INSNS (1), /* fcmp */
192 COSTS_N_INSNS (1), /* fmov, fmovr */
193 COSTS_N_INSNS (1), /* fmul */
194 COSTS_N_INSNS (8), /* fdivs */
195 COSTS_N_INSNS (12), /* fdivd */
196 COSTS_N_INSNS (17), /* fsqrts */
197 COSTS_N_INSNS (17), /* fsqrtd */
198 COSTS_N_INSNS (17), /* imul */
199 COSTS_N_INSNS (17), /* imulX */
200 0, /* imul bit factor */
201 COSTS_N_INSNS (17), /* idiv */
202 COSTS_N_INSNS (17), /* idivX */
203 COSTS_N_INSNS (1), /* movcc/movr */
204 0, /* shift penalty */
205 };
206
207 static const
208 struct processor_costs leon_costs = {
209 COSTS_N_INSNS (1), /* int load */
210 COSTS_N_INSNS (1), /* int signed load */
211 COSTS_N_INSNS (1), /* int zeroed load */
212 COSTS_N_INSNS (1), /* float load */
213 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
214 COSTS_N_INSNS (1), /* fadd, fsub */
215 COSTS_N_INSNS (1), /* fcmp */
216 COSTS_N_INSNS (1), /* fmov, fmovr */
217 COSTS_N_INSNS (1), /* fmul */
218 COSTS_N_INSNS (15), /* fdivs */
219 COSTS_N_INSNS (15), /* fdivd */
220 COSTS_N_INSNS (23), /* fsqrts */
221 COSTS_N_INSNS (23), /* fsqrtd */
222 COSTS_N_INSNS (5), /* imul */
223 COSTS_N_INSNS (5), /* imulX */
224 0, /* imul bit factor */
225 COSTS_N_INSNS (5), /* idiv */
226 COSTS_N_INSNS (5), /* idivX */
227 COSTS_N_INSNS (1), /* movcc/movr */
228 0, /* shift penalty */
229 };
230
231 static const
232 struct processor_costs sparclet_costs = {
233 COSTS_N_INSNS (3), /* int load */
234 COSTS_N_INSNS (3), /* int signed load */
235 COSTS_N_INSNS (1), /* int zeroed load */
236 COSTS_N_INSNS (1), /* float load */
237 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
238 COSTS_N_INSNS (1), /* fadd, fsub */
239 COSTS_N_INSNS (1), /* fcmp */
240 COSTS_N_INSNS (1), /* fmov, fmovr */
241 COSTS_N_INSNS (1), /* fmul */
242 COSTS_N_INSNS (1), /* fdivs */
243 COSTS_N_INSNS (1), /* fdivd */
244 COSTS_N_INSNS (1), /* fsqrts */
245 COSTS_N_INSNS (1), /* fsqrtd */
246 COSTS_N_INSNS (5), /* imul */
247 COSTS_N_INSNS (5), /* imulX */
248 0, /* imul bit factor */
249 COSTS_N_INSNS (5), /* idiv */
250 COSTS_N_INSNS (5), /* idivX */
251 COSTS_N_INSNS (1), /* movcc/movr */
252 0, /* shift penalty */
253 };
254
255 static const
256 struct processor_costs ultrasparc_costs = {
257 COSTS_N_INSNS (2), /* int load */
258 COSTS_N_INSNS (3), /* int signed load */
259 COSTS_N_INSNS (2), /* int zeroed load */
260 COSTS_N_INSNS (2), /* float load */
261 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
262 COSTS_N_INSNS (4), /* fadd, fsub */
263 COSTS_N_INSNS (1), /* fcmp */
264 COSTS_N_INSNS (2), /* fmov, fmovr */
265 COSTS_N_INSNS (4), /* fmul */
266 COSTS_N_INSNS (13), /* fdivs */
267 COSTS_N_INSNS (23), /* fdivd */
268 COSTS_N_INSNS (13), /* fsqrts */
269 COSTS_N_INSNS (23), /* fsqrtd */
270 COSTS_N_INSNS (4), /* imul */
271 COSTS_N_INSNS (4), /* imulX */
272 2, /* imul bit factor */
273 COSTS_N_INSNS (37), /* idiv */
274 COSTS_N_INSNS (68), /* idivX */
275 COSTS_N_INSNS (2), /* movcc/movr */
276 2, /* shift penalty */
277 };
278
279 static const
280 struct processor_costs ultrasparc3_costs = {
281 COSTS_N_INSNS (2), /* int load */
282 COSTS_N_INSNS (3), /* int signed load */
283 COSTS_N_INSNS (3), /* int zeroed load */
284 COSTS_N_INSNS (2), /* float load */
285 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
286 COSTS_N_INSNS (4), /* fadd, fsub */
287 COSTS_N_INSNS (5), /* fcmp */
288 COSTS_N_INSNS (3), /* fmov, fmovr */
289 COSTS_N_INSNS (4), /* fmul */
290 COSTS_N_INSNS (17), /* fdivs */
291 COSTS_N_INSNS (20), /* fdivd */
292 COSTS_N_INSNS (20), /* fsqrts */
293 COSTS_N_INSNS (29), /* fsqrtd */
294 COSTS_N_INSNS (6), /* imul */
295 COSTS_N_INSNS (6), /* imulX */
296 0, /* imul bit factor */
297 COSTS_N_INSNS (40), /* idiv */
298 COSTS_N_INSNS (71), /* idivX */
299 COSTS_N_INSNS (2), /* movcc/movr */
300 0, /* shift penalty */
301 };
302
303 static const
304 struct processor_costs niagara_costs = {
305 COSTS_N_INSNS (3), /* int load */
306 COSTS_N_INSNS (3), /* int signed load */
307 COSTS_N_INSNS (3), /* int zeroed load */
308 COSTS_N_INSNS (9), /* float load */
309 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
310 COSTS_N_INSNS (8), /* fadd, fsub */
311 COSTS_N_INSNS (26), /* fcmp */
312 COSTS_N_INSNS (8), /* fmov, fmovr */
313 COSTS_N_INSNS (29), /* fmul */
314 COSTS_N_INSNS (54), /* fdivs */
315 COSTS_N_INSNS (83), /* fdivd */
316 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
317 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
318 COSTS_N_INSNS (11), /* imul */
319 COSTS_N_INSNS (11), /* imulX */
320 0, /* imul bit factor */
321 COSTS_N_INSNS (72), /* idiv */
322 COSTS_N_INSNS (72), /* idivX */
323 COSTS_N_INSNS (1), /* movcc/movr */
324 0, /* shift penalty */
325 };
326
327 static const
328 struct processor_costs niagara2_costs = {
329 COSTS_N_INSNS (3), /* int load */
330 COSTS_N_INSNS (3), /* int signed load */
331 COSTS_N_INSNS (3), /* int zeroed load */
332 COSTS_N_INSNS (3), /* float load */
333 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
334 COSTS_N_INSNS (6), /* fadd, fsub */
335 COSTS_N_INSNS (6), /* fcmp */
336 COSTS_N_INSNS (6), /* fmov, fmovr */
337 COSTS_N_INSNS (6), /* fmul */
338 COSTS_N_INSNS (19), /* fdivs */
339 COSTS_N_INSNS (33), /* fdivd */
340 COSTS_N_INSNS (19), /* fsqrts */
341 COSTS_N_INSNS (33), /* fsqrtd */
342 COSTS_N_INSNS (5), /* imul */
343 COSTS_N_INSNS (5), /* imulX */
344 0, /* imul bit factor */
345 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
346 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
347 COSTS_N_INSNS (1), /* movcc/movr */
348 0, /* shift penalty */
349 };
350
351 static const
352 struct processor_costs niagara3_costs = {
353 COSTS_N_INSNS (3), /* int load */
354 COSTS_N_INSNS (3), /* int signed load */
355 COSTS_N_INSNS (3), /* int zeroed load */
356 COSTS_N_INSNS (3), /* float load */
357 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
358 COSTS_N_INSNS (9), /* fadd, fsub */
359 COSTS_N_INSNS (9), /* fcmp */
360 COSTS_N_INSNS (9), /* fmov, fmovr */
361 COSTS_N_INSNS (9), /* fmul */
362 COSTS_N_INSNS (23), /* fdivs */
363 COSTS_N_INSNS (37), /* fdivd */
364 COSTS_N_INSNS (23), /* fsqrts */
365 COSTS_N_INSNS (37), /* fsqrtd */
366 COSTS_N_INSNS (9), /* imul */
367 COSTS_N_INSNS (9), /* imulX */
368 0, /* imul bit factor */
369 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
370 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
371 COSTS_N_INSNS (1), /* movcc/movr */
372 0, /* shift penalty */
373 };
374
375 static const
376 struct processor_costs niagara4_costs = {
377 COSTS_N_INSNS (5), /* int load */
378 COSTS_N_INSNS (5), /* int signed load */
379 COSTS_N_INSNS (5), /* int zeroed load */
380 COSTS_N_INSNS (5), /* float load */
381 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
382 COSTS_N_INSNS (11), /* fadd, fsub */
383 COSTS_N_INSNS (11), /* fcmp */
384 COSTS_N_INSNS (11), /* fmov, fmovr */
385 COSTS_N_INSNS (11), /* fmul */
386 COSTS_N_INSNS (24), /* fdivs */
387 COSTS_N_INSNS (37), /* fdivd */
388 COSTS_N_INSNS (24), /* fsqrts */
389 COSTS_N_INSNS (37), /* fsqrtd */
390 COSTS_N_INSNS (12), /* imul */
391 COSTS_N_INSNS (12), /* imulX */
392 0, /* imul bit factor */
393 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
394 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
395 COSTS_N_INSNS (1), /* movcc/movr */
396 0, /* shift penalty */
397 };
398
399 static const struct processor_costs *sparc_costs = &cypress_costs;
400
401 #ifdef HAVE_AS_RELAX_OPTION
402 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
403 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
404 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
405 somebody does not branch between the sethi and jmp. */
406 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
407 #else
408 #define LEAF_SIBCALL_SLOT_RESERVED_P \
409 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
410 #endif
411
412 /* Vector to say how input registers are mapped to output registers.
413 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
414 eliminate it. You must use -fomit-frame-pointer to get that. */
415 char leaf_reg_remap[] =
416 { 0, 1, 2, 3, 4, 5, 6, 7,
417 -1, -1, -1, -1, -1, -1, 14, -1,
418 -1, -1, -1, -1, -1, -1, -1, -1,
419 8, 9, 10, 11, 12, 13, -1, 15,
420
421 32, 33, 34, 35, 36, 37, 38, 39,
422 40, 41, 42, 43, 44, 45, 46, 47,
423 48, 49, 50, 51, 52, 53, 54, 55,
424 56, 57, 58, 59, 60, 61, 62, 63,
425 64, 65, 66, 67, 68, 69, 70, 71,
426 72, 73, 74, 75, 76, 77, 78, 79,
427 80, 81, 82, 83, 84, 85, 86, 87,
428 88, 89, 90, 91, 92, 93, 94, 95,
429 96, 97, 98, 99, 100, 101, 102};
430
431 /* Vector, indexed by hard register number, which contains 1
432 for a register that is allowable in a candidate for leaf
433 function treatment. */
434 char sparc_leaf_regs[] =
435 { 1, 1, 1, 1, 1, 1, 1, 1,
436 0, 0, 0, 0, 0, 0, 1, 0,
437 0, 0, 0, 0, 0, 0, 0, 0,
438 1, 1, 1, 1, 1, 1, 0, 1,
439 1, 1, 1, 1, 1, 1, 1, 1,
440 1, 1, 1, 1, 1, 1, 1, 1,
441 1, 1, 1, 1, 1, 1, 1, 1,
442 1, 1, 1, 1, 1, 1, 1, 1,
443 1, 1, 1, 1, 1, 1, 1, 1,
444 1, 1, 1, 1, 1, 1, 1, 1,
445 1, 1, 1, 1, 1, 1, 1, 1,
446 1, 1, 1, 1, 1, 1, 1, 1,
447 1, 1, 1, 1, 1, 1, 1};
448
449 struct GTY(()) machine_function
450 {
451 /* Size of the frame of the function. */
452 HOST_WIDE_INT frame_size;
453
454 /* Size of the frame of the function minus the register window save area
455 and the outgoing argument area. */
456 HOST_WIDE_INT apparent_frame_size;
457
458 /* Register we pretend the frame pointer is allocated to. Normally, this
459 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
460 record "offset" separately as it may be too big for (reg + disp). */
461 rtx frame_base_reg;
462 HOST_WIDE_INT frame_base_offset;
463
464 /* Some local-dynamic TLS symbol name. */
465 const char *some_ld_name;
466
467 /* Number of global or FP registers to be saved (as 4-byte quantities). */
468 int n_global_fp_regs;
469
470 /* True if the current function is leaf and uses only leaf regs,
471 so that the SPARC leaf function optimization can be applied.
472 Private version of crtl->uses_only_leaf_regs, see
473 sparc_expand_prologue for the rationale. */
474 int leaf_function_p;
475
476 /* True if the prologue saves local or in registers. */
477 bool save_local_in_regs_p;
478
479 /* True if the data calculated by sparc_expand_prologue are valid. */
480 bool prologue_data_valid_p;
481 };
482
483 #define sparc_frame_size cfun->machine->frame_size
484 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
485 #define sparc_frame_base_reg cfun->machine->frame_base_reg
486 #define sparc_frame_base_offset cfun->machine->frame_base_offset
487 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
488 #define sparc_leaf_function_p cfun->machine->leaf_function_p
489 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
490 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
491
492 /* 1 if the next opcode is to be specially indented. */
493 int sparc_indent_opcode = 0;
494
495 static void sparc_option_override (void);
496 static void sparc_init_modes (void);
497 static void scan_record_type (const_tree, int *, int *, int *);
498 static int function_arg_slotno (const CUMULATIVE_ARGS *, enum machine_mode,
499 const_tree, bool, bool, int *, int *);
500
501 static int supersparc_adjust_cost (rtx, rtx, rtx, int);
502 static int hypersparc_adjust_cost (rtx, rtx, rtx, int);
503
504 static void sparc_emit_set_const32 (rtx, rtx);
505 static void sparc_emit_set_const64 (rtx, rtx);
506 static void sparc_output_addr_vec (rtx);
507 static void sparc_output_addr_diff_vec (rtx);
508 static void sparc_output_deferred_case_vectors (void);
509 static bool sparc_legitimate_address_p (enum machine_mode, rtx, bool);
510 static bool sparc_legitimate_constant_p (enum machine_mode, rtx);
511 static rtx sparc_builtin_saveregs (void);
512 static int epilogue_renumber (rtx *, int);
513 static bool sparc_assemble_integer (rtx, unsigned int, int);
514 static int set_extends (rtx);
515 static void sparc_asm_function_prologue (FILE *, HOST_WIDE_INT);
516 static void sparc_asm_function_epilogue (FILE *, HOST_WIDE_INT);
517 #ifdef TARGET_SOLARIS
518 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
519 tree) ATTRIBUTE_UNUSED;
520 #endif
521 static int sparc_adjust_cost (rtx, rtx, rtx, int);
522 static int sparc_issue_rate (void);
523 static void sparc_sched_init (FILE *, int, int);
524 static int sparc_use_sched_lookahead (void);
525
526 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
527 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
528 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
529 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
530 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
531
532 static bool sparc_function_ok_for_sibcall (tree, tree);
533 static void sparc_init_libfuncs (void);
534 static void sparc_init_builtins (void);
535 static void sparc_vis_init_builtins (void);
536 static rtx sparc_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
537 static tree sparc_fold_builtin (tree, int, tree *, bool);
538 static int sparc_vis_mul8x16 (int, int);
539 static void sparc_handle_vis_mul8x16 (tree *, int, tree, tree, tree);
540 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
541 HOST_WIDE_INT, tree);
542 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
543 HOST_WIDE_INT, const_tree);
544 static void sparc_reorg (void);
545 static struct machine_function * sparc_init_machine_status (void);
546 static bool sparc_cannot_force_const_mem (enum machine_mode, rtx);
547 static rtx sparc_tls_get_addr (void);
548 static rtx sparc_tls_got (void);
549 static const char *get_some_local_dynamic_name (void);
550 static int get_some_local_dynamic_name_1 (rtx *, void *);
551 static int sparc_register_move_cost (enum machine_mode,
552 reg_class_t, reg_class_t);
553 static bool sparc_rtx_costs (rtx, int, int, int, int *, bool);
554 static rtx sparc_function_value (const_tree, const_tree, bool);
555 static rtx sparc_libcall_value (enum machine_mode, const_rtx);
556 static bool sparc_function_value_regno_p (const unsigned int);
557 static rtx sparc_struct_value_rtx (tree, int);
558 static enum machine_mode sparc_promote_function_mode (const_tree, enum machine_mode,
559 int *, const_tree, int);
560 static bool sparc_return_in_memory (const_tree, const_tree);
561 static bool sparc_strict_argument_naming (cumulative_args_t);
562 static void sparc_va_start (tree, rtx);
563 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
564 static bool sparc_vector_mode_supported_p (enum machine_mode);
565 static bool sparc_tls_referenced_p (rtx);
566 static rtx sparc_legitimize_tls_address (rtx);
567 static rtx sparc_legitimize_pic_address (rtx, rtx);
568 static rtx sparc_legitimize_address (rtx, rtx, enum machine_mode);
569 static rtx sparc_delegitimize_address (rtx);
570 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
571 static bool sparc_pass_by_reference (cumulative_args_t,
572 enum machine_mode, const_tree, bool);
573 static void sparc_function_arg_advance (cumulative_args_t,
574 enum machine_mode, const_tree, bool);
575 static rtx sparc_function_arg_1 (cumulative_args_t,
576 enum machine_mode, const_tree, bool, bool);
577 static rtx sparc_function_arg (cumulative_args_t,
578 enum machine_mode, const_tree, bool);
579 static rtx sparc_function_incoming_arg (cumulative_args_t,
580 enum machine_mode, const_tree, bool);
581 static unsigned int sparc_function_arg_boundary (enum machine_mode,
582 const_tree);
583 static int sparc_arg_partial_bytes (cumulative_args_t,
584 enum machine_mode, tree, bool);
585 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
586 static void sparc_file_end (void);
587 static bool sparc_frame_pointer_required (void);
588 static bool sparc_can_eliminate (const int, const int);
589 static rtx sparc_builtin_setjmp_frame_value (void);
590 static void sparc_conditional_register_usage (void);
591 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
592 static const char *sparc_mangle_type (const_tree);
593 #endif
594 static void sparc_trampoline_init (rtx, tree, rtx);
595 static enum machine_mode sparc_preferred_simd_mode (enum machine_mode);
596 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
597 static bool sparc_print_operand_punct_valid_p (unsigned char);
598 static void sparc_print_operand (FILE *, rtx, int);
599 static void sparc_print_operand_address (FILE *, rtx);
600 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
601 enum machine_mode,
602 secondary_reload_info *);
603 \f
604 #ifdef SUBTARGET_ATTRIBUTE_TABLE
605 /* Table of valid machine attributes. */
606 static const struct attribute_spec sparc_attribute_table[] =
607 {
608 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
609 do_diagnostic } */
610 SUBTARGET_ATTRIBUTE_TABLE,
611 { NULL, 0, 0, false, false, false, NULL, false }
612 };
613 #endif
614 \f
615 /* Option handling. */
616
617 /* Parsed value. */
618 enum cmodel sparc_cmodel;
619
620 char sparc_hard_reg_printed[8];
621
622 /* Initialize the GCC target structure. */
623
624 /* The default is to use .half rather than .short for aligned HI objects. */
625 #undef TARGET_ASM_ALIGNED_HI_OP
626 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
627
628 #undef TARGET_ASM_UNALIGNED_HI_OP
629 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
630 #undef TARGET_ASM_UNALIGNED_SI_OP
631 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
632 #undef TARGET_ASM_UNALIGNED_DI_OP
633 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
634
635 /* The target hook has to handle DI-mode values. */
636 #undef TARGET_ASM_INTEGER
637 #define TARGET_ASM_INTEGER sparc_assemble_integer
638
639 #undef TARGET_ASM_FUNCTION_PROLOGUE
640 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
641 #undef TARGET_ASM_FUNCTION_EPILOGUE
642 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
643
644 #undef TARGET_SCHED_ADJUST_COST
645 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
646 #undef TARGET_SCHED_ISSUE_RATE
647 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
648 #undef TARGET_SCHED_INIT
649 #define TARGET_SCHED_INIT sparc_sched_init
650 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
651 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
652
653 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
654 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
655
656 #undef TARGET_INIT_LIBFUNCS
657 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
658 #undef TARGET_INIT_BUILTINS
659 #define TARGET_INIT_BUILTINS sparc_init_builtins
660
661 #undef TARGET_LEGITIMIZE_ADDRESS
662 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
663 #undef TARGET_DELEGITIMIZE_ADDRESS
664 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
665 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
666 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
667
668 #undef TARGET_EXPAND_BUILTIN
669 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
670 #undef TARGET_FOLD_BUILTIN
671 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
672
673 #if TARGET_TLS
674 #undef TARGET_HAVE_TLS
675 #define TARGET_HAVE_TLS true
676 #endif
677
678 #undef TARGET_CANNOT_FORCE_CONST_MEM
679 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
680
681 #undef TARGET_ASM_OUTPUT_MI_THUNK
682 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
683 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
684 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
685
686 #undef TARGET_MACHINE_DEPENDENT_REORG
687 #define TARGET_MACHINE_DEPENDENT_REORG sparc_reorg
688
689 #undef TARGET_RTX_COSTS
690 #define TARGET_RTX_COSTS sparc_rtx_costs
691 #undef TARGET_ADDRESS_COST
692 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
693 #undef TARGET_REGISTER_MOVE_COST
694 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
695
696 #undef TARGET_PROMOTE_FUNCTION_MODE
697 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
698
699 #undef TARGET_FUNCTION_VALUE
700 #define TARGET_FUNCTION_VALUE sparc_function_value
701 #undef TARGET_LIBCALL_VALUE
702 #define TARGET_LIBCALL_VALUE sparc_libcall_value
703 #undef TARGET_FUNCTION_VALUE_REGNO_P
704 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
705
706 #undef TARGET_STRUCT_VALUE_RTX
707 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
708 #undef TARGET_RETURN_IN_MEMORY
709 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
710 #undef TARGET_MUST_PASS_IN_STACK
711 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
712 #undef TARGET_PASS_BY_REFERENCE
713 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
714 #undef TARGET_ARG_PARTIAL_BYTES
715 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
716 #undef TARGET_FUNCTION_ARG_ADVANCE
717 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
718 #undef TARGET_FUNCTION_ARG
719 #define TARGET_FUNCTION_ARG sparc_function_arg
720 #undef TARGET_FUNCTION_INCOMING_ARG
721 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
722 #undef TARGET_FUNCTION_ARG_BOUNDARY
723 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
724
725 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
726 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
727 #undef TARGET_STRICT_ARGUMENT_NAMING
728 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
729
730 #undef TARGET_EXPAND_BUILTIN_VA_START
731 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
732 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
733 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
734
735 #undef TARGET_VECTOR_MODE_SUPPORTED_P
736 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
737
738 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
739 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
740
741 #ifdef SUBTARGET_INSERT_ATTRIBUTES
742 #undef TARGET_INSERT_ATTRIBUTES
743 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
744 #endif
745
746 #ifdef SUBTARGET_ATTRIBUTE_TABLE
747 #undef TARGET_ATTRIBUTE_TABLE
748 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
749 #endif
750
751 #undef TARGET_RELAXED_ORDERING
752 #define TARGET_RELAXED_ORDERING SPARC_RELAXED_ORDERING
753
754 #undef TARGET_OPTION_OVERRIDE
755 #define TARGET_OPTION_OVERRIDE sparc_option_override
756
757 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
758 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
759 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
760 #endif
761
762 #undef TARGET_ASM_FILE_END
763 #define TARGET_ASM_FILE_END sparc_file_end
764
765 #undef TARGET_FRAME_POINTER_REQUIRED
766 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
767
768 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
769 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
770
771 #undef TARGET_CAN_ELIMINATE
772 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
773
774 #undef TARGET_PREFERRED_RELOAD_CLASS
775 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
776
777 #undef TARGET_SECONDARY_RELOAD
778 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
779
780 #undef TARGET_CONDITIONAL_REGISTER_USAGE
781 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
782
783 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
784 #undef TARGET_MANGLE_TYPE
785 #define TARGET_MANGLE_TYPE sparc_mangle_type
786 #endif
787
788 #undef TARGET_LEGITIMATE_ADDRESS_P
789 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
790
791 #undef TARGET_LEGITIMATE_CONSTANT_P
792 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
793
794 #undef TARGET_TRAMPOLINE_INIT
795 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
796
797 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
798 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
799 #undef TARGET_PRINT_OPERAND
800 #define TARGET_PRINT_OPERAND sparc_print_operand
801 #undef TARGET_PRINT_OPERAND_ADDRESS
802 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
803
804 /* The value stored by LDSTUB. */
805 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
806 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
807
808 struct gcc_target targetm = TARGET_INITIALIZER;
809
810 static void
811 dump_target_flag_bits (const int flags)
812 {
813 if (flags & MASK_64BIT)
814 fprintf (stderr, "64BIT ");
815 if (flags & MASK_APP_REGS)
816 fprintf (stderr, "APP_REGS ");
817 if (flags & MASK_FASTER_STRUCTS)
818 fprintf (stderr, "FASTER_STRUCTS ");
819 if (flags & MASK_FLAT)
820 fprintf (stderr, "FLAT ");
821 if (flags & MASK_FMAF)
822 fprintf (stderr, "FMAF ");
823 if (flags & MASK_FPU)
824 fprintf (stderr, "FPU ");
825 if (flags & MASK_HARD_QUAD)
826 fprintf (stderr, "HARD_QUAD ");
827 if (flags & MASK_POPC)
828 fprintf (stderr, "POPC ");
829 if (flags & MASK_PTR64)
830 fprintf (stderr, "PTR64 ");
831 if (flags & MASK_STACK_BIAS)
832 fprintf (stderr, "STACK_BIAS ");
833 if (flags & MASK_UNALIGNED_DOUBLES)
834 fprintf (stderr, "UNALIGNED_DOUBLES ");
835 if (flags & MASK_V8PLUS)
836 fprintf (stderr, "V8PLUS ");
837 if (flags & MASK_VIS)
838 fprintf (stderr, "VIS ");
839 if (flags & MASK_VIS2)
840 fprintf (stderr, "VIS2 ");
841 if (flags & MASK_VIS3)
842 fprintf (stderr, "VIS3 ");
843 if (flags & MASK_CBCOND)
844 fprintf (stderr, "CBCOND ");
845 if (flags & MASK_DEPRECATED_V8_INSNS)
846 fprintf (stderr, "DEPRECATED_V8_INSNS ");
847 if (flags & MASK_SPARCLET)
848 fprintf (stderr, "SPARCLET ");
849 if (flags & MASK_SPARCLITE)
850 fprintf (stderr, "SPARCLITE ");
851 if (flags & MASK_V8)
852 fprintf (stderr, "V8 ");
853 if (flags & MASK_V9)
854 fprintf (stderr, "V9 ");
855 }
856
857 static void
858 dump_target_flags (const char *prefix, const int flags)
859 {
860 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
861 dump_target_flag_bits (flags);
862 fprintf(stderr, "]\n");
863 }
864
865 /* Validate and override various options, and do some machine dependent
866 initialization. */
867
868 static void
869 sparc_option_override (void)
870 {
871 static struct code_model {
872 const char *const name;
873 const enum cmodel value;
874 } const cmodels[] = {
875 { "32", CM_32 },
876 { "medlow", CM_MEDLOW },
877 { "medmid", CM_MEDMID },
878 { "medany", CM_MEDANY },
879 { "embmedany", CM_EMBMEDANY },
880 { NULL, (enum cmodel) 0 }
881 };
882 const struct code_model *cmodel;
883 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
884 static struct cpu_default {
885 const int cpu;
886 const enum processor_type processor;
887 } const cpu_default[] = {
888 /* There must be one entry here for each TARGET_CPU value. */
889 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
890 { TARGET_CPU_v8, PROCESSOR_V8 },
891 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
892 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
893 { TARGET_CPU_leon, PROCESSOR_LEON },
894 { TARGET_CPU_sparclite, PROCESSOR_F930 },
895 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
896 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
897 { TARGET_CPU_v9, PROCESSOR_V9 },
898 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
899 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
900 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
901 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
902 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
903 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
904 { -1, PROCESSOR_V7 }
905 };
906 const struct cpu_default *def;
907 /* Table of values for -m{cpu,tune}=. This must match the order of
908 the PROCESSOR_* enumeration. */
909 static struct cpu_table {
910 const char *const name;
911 const int disable;
912 const int enable;
913 } const cpu_table[] = {
914 { "v7", MASK_ISA, 0 },
915 { "cypress", MASK_ISA, 0 },
916 { "v8", MASK_ISA, MASK_V8 },
917 /* TI TMS390Z55 supersparc */
918 { "supersparc", MASK_ISA, MASK_V8 },
919 { "hypersparc", MASK_ISA, MASK_V8|MASK_FPU },
920 /* LEON */
921 { "leon", MASK_ISA, MASK_V8|MASK_FPU },
922 { "sparclite", MASK_ISA, MASK_SPARCLITE },
923 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
924 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
925 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
926 { "f934", MASK_ISA, MASK_SPARCLITE|MASK_FPU },
927 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
928 { "sparclet", MASK_ISA, MASK_SPARCLET },
929 /* TEMIC sparclet */
930 { "tsc701", MASK_ISA, MASK_SPARCLET },
931 { "v9", MASK_ISA, MASK_V9 },
932 /* UltraSPARC I, II, IIi */
933 { "ultrasparc", MASK_ISA,
934 /* Although insns using %y are deprecated, it is a clear win. */
935 MASK_V9|MASK_DEPRECATED_V8_INSNS },
936 /* UltraSPARC III */
937 /* ??? Check if %y issue still holds true. */
938 { "ultrasparc3", MASK_ISA,
939 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
940 /* UltraSPARC T1 */
941 { "niagara", MASK_ISA,
942 MASK_V9|MASK_DEPRECATED_V8_INSNS },
943 /* UltraSPARC T2 */
944 { "niagara2", MASK_ISA,
945 MASK_V9|MASK_POPC|MASK_VIS2 },
946 /* UltraSPARC T3 */
947 { "niagara3", MASK_ISA,
948 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF },
949 /* UltraSPARC T4 */
950 { "niagara4", MASK_ISA,
951 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
952 };
953 const struct cpu_table *cpu;
954 unsigned int i;
955 int fpu;
956
957 if (sparc_debug_string != NULL)
958 {
959 const char *q;
960 char *p;
961
962 p = ASTRDUP (sparc_debug_string);
963 while ((q = strtok (p, ",")) != NULL)
964 {
965 bool invert;
966 int mask;
967
968 p = NULL;
969 if (*q == '!')
970 {
971 invert = true;
972 q++;
973 }
974 else
975 invert = false;
976
977 if (! strcmp (q, "all"))
978 mask = MASK_DEBUG_ALL;
979 else if (! strcmp (q, "options"))
980 mask = MASK_DEBUG_OPTIONS;
981 else
982 error ("unknown -mdebug-%s switch", q);
983
984 if (invert)
985 sparc_debug &= ~mask;
986 else
987 sparc_debug |= mask;
988 }
989 }
990
991 if (TARGET_DEBUG_OPTIONS)
992 {
993 dump_target_flags("Initial target_flags", target_flags);
994 dump_target_flags("target_flags_explicit", target_flags_explicit);
995 }
996
997 #ifdef SUBTARGET_OVERRIDE_OPTIONS
998 SUBTARGET_OVERRIDE_OPTIONS;
999 #endif
1000
1001 #ifndef SPARC_BI_ARCH
1002 /* Check for unsupported architecture size. */
1003 if (! TARGET_64BIT != DEFAULT_ARCH32_P)
1004 error ("%s is not supported by this configuration",
1005 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1006 #endif
1007
1008 /* We force all 64bit archs to use 128 bit long double */
1009 if (TARGET_64BIT && ! TARGET_LONG_DOUBLE_128)
1010 {
1011 error ("-mlong-double-64 not allowed with -m64");
1012 target_flags |= MASK_LONG_DOUBLE_128;
1013 }
1014
1015 /* Code model selection. */
1016 sparc_cmodel = SPARC_DEFAULT_CMODEL;
1017
1018 #ifdef SPARC_BI_ARCH
1019 if (TARGET_ARCH32)
1020 sparc_cmodel = CM_32;
1021 #endif
1022
1023 if (sparc_cmodel_string != NULL)
1024 {
1025 if (TARGET_ARCH64)
1026 {
1027 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1028 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1029 break;
1030 if (cmodel->name == NULL)
1031 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1032 else
1033 sparc_cmodel = cmodel->value;
1034 }
1035 else
1036 error ("-mcmodel= is not supported on 32 bit systems");
1037 }
1038
1039 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1040 for (i = 8; i < 16; i++)
1041 if (!call_used_regs [i])
1042 {
1043 error ("-fcall-saved-REG is not supported for out registers");
1044 call_used_regs [i] = 1;
1045 }
1046
1047 fpu = target_flags & MASK_FPU; /* save current -mfpu status */
1048
1049 /* Set the default CPU. */
1050 if (!global_options_set.x_sparc_cpu_and_features)
1051 {
1052 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1053 if (def->cpu == TARGET_CPU_DEFAULT)
1054 break;
1055 gcc_assert (def->cpu != -1);
1056 sparc_cpu_and_features = def->processor;
1057 }
1058
1059 if (!global_options_set.x_sparc_cpu)
1060 sparc_cpu = sparc_cpu_and_features;
1061
1062 cpu = &cpu_table[(int) sparc_cpu_and_features];
1063
1064 if (TARGET_DEBUG_OPTIONS)
1065 {
1066 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1067 fprintf (stderr, "sparc_cpu: %s\n",
1068 cpu_table[(int) sparc_cpu].name);
1069 dump_target_flags ("cpu->disable", cpu->disable);
1070 dump_target_flags ("cpu->enable", cpu->enable);
1071 }
1072
1073 target_flags &= ~cpu->disable;
1074 target_flags |= (cpu->enable
1075 #ifndef HAVE_AS_FMAF_HPC_VIS3
1076 & ~(MASK_FMAF | MASK_VIS3)
1077 #endif
1078 #ifndef HAVE_AS_SPARC4
1079 & ~MASK_CBCOND
1080 #endif
1081 );
1082
1083 /* If -mfpu or -mno-fpu was explicitly used, don't override with
1084 the processor default. */
1085 if (target_flags_explicit & MASK_FPU)
1086 target_flags = (target_flags & ~MASK_FPU) | fpu;
1087
1088 /* -mvis2 implies -mvis */
1089 if (TARGET_VIS2)
1090 target_flags |= MASK_VIS;
1091
1092 /* -mvis3 implies -mvis2 and -mvis */
1093 if (TARGET_VIS3)
1094 target_flags |= MASK_VIS2 | MASK_VIS;
1095
1096 /* Don't allow -mvis, -mvis2, -mvis3, or -mfmaf if FPU is
1097 disabled. */
1098 if (! TARGET_FPU)
1099 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_FMAF);
1100
1101 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1102 are available.
1103 -m64 also implies v9. */
1104 if (TARGET_VIS || TARGET_ARCH64)
1105 {
1106 target_flags |= MASK_V9;
1107 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1108 }
1109
1110 /* -mvis also implies -mv8plus on 32-bit */
1111 if (TARGET_VIS && ! TARGET_ARCH64)
1112 target_flags |= MASK_V8PLUS;
1113
1114 /* Use the deprecated v8 insns for sparc64 in 32 bit mode. */
1115 if (TARGET_V9 && TARGET_ARCH32)
1116 target_flags |= MASK_DEPRECATED_V8_INSNS;
1117
1118 /* V8PLUS requires V9, makes no sense in 64 bit mode. */
1119 if (! TARGET_V9 || TARGET_ARCH64)
1120 target_flags &= ~MASK_V8PLUS;
1121
1122 /* Don't use stack biasing in 32 bit mode. */
1123 if (TARGET_ARCH32)
1124 target_flags &= ~MASK_STACK_BIAS;
1125
1126 /* Supply a default value for align_functions. */
1127 if (align_functions == 0
1128 && (sparc_cpu == PROCESSOR_ULTRASPARC
1129 || sparc_cpu == PROCESSOR_ULTRASPARC3
1130 || sparc_cpu == PROCESSOR_NIAGARA
1131 || sparc_cpu == PROCESSOR_NIAGARA2
1132 || sparc_cpu == PROCESSOR_NIAGARA3
1133 || sparc_cpu == PROCESSOR_NIAGARA4))
1134 align_functions = 32;
1135
1136 /* Validate PCC_STRUCT_RETURN. */
1137 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1138 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1139
1140 /* Only use .uaxword when compiling for a 64-bit target. */
1141 if (!TARGET_ARCH64)
1142 targetm.asm_out.unaligned_op.di = NULL;
1143
1144 /* Do various machine dependent initializations. */
1145 sparc_init_modes ();
1146
1147 /* Set up function hooks. */
1148 init_machine_status = sparc_init_machine_status;
1149
1150 switch (sparc_cpu)
1151 {
1152 case PROCESSOR_V7:
1153 case PROCESSOR_CYPRESS:
1154 sparc_costs = &cypress_costs;
1155 break;
1156 case PROCESSOR_V8:
1157 case PROCESSOR_SPARCLITE:
1158 case PROCESSOR_SUPERSPARC:
1159 sparc_costs = &supersparc_costs;
1160 break;
1161 case PROCESSOR_F930:
1162 case PROCESSOR_F934:
1163 case PROCESSOR_HYPERSPARC:
1164 case PROCESSOR_SPARCLITE86X:
1165 sparc_costs = &hypersparc_costs;
1166 break;
1167 case PROCESSOR_LEON:
1168 sparc_costs = &leon_costs;
1169 break;
1170 case PROCESSOR_SPARCLET:
1171 case PROCESSOR_TSC701:
1172 sparc_costs = &sparclet_costs;
1173 break;
1174 case PROCESSOR_V9:
1175 case PROCESSOR_ULTRASPARC:
1176 sparc_costs = &ultrasparc_costs;
1177 break;
1178 case PROCESSOR_ULTRASPARC3:
1179 sparc_costs = &ultrasparc3_costs;
1180 break;
1181 case PROCESSOR_NIAGARA:
1182 sparc_costs = &niagara_costs;
1183 break;
1184 case PROCESSOR_NIAGARA2:
1185 sparc_costs = &niagara2_costs;
1186 break;
1187 case PROCESSOR_NIAGARA3:
1188 sparc_costs = &niagara3_costs;
1189 break;
1190 case PROCESSOR_NIAGARA4:
1191 sparc_costs = &niagara4_costs;
1192 break;
1193 case PROCESSOR_NATIVE:
1194 gcc_unreachable ();
1195 };
1196
1197 if (sparc_memory_model == SMM_DEFAULT)
1198 {
1199 /* Choose the memory model for the operating system. */
1200 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1201 if (os_default != SMM_DEFAULT)
1202 sparc_memory_model = os_default;
1203 /* Choose the most relaxed model for the processor. */
1204 else if (TARGET_V9)
1205 sparc_memory_model = SMM_RMO;
1206 else if (TARGET_V8)
1207 sparc_memory_model = SMM_PSO;
1208 else
1209 sparc_memory_model = SMM_SC;
1210 }
1211
1212 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1213 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1214 target_flags |= MASK_LONG_DOUBLE_128;
1215 #endif
1216
1217 if (TARGET_DEBUG_OPTIONS)
1218 dump_target_flags ("Final target_flags", target_flags);
1219
1220 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1221 ((sparc_cpu == PROCESSOR_ULTRASPARC
1222 || sparc_cpu == PROCESSOR_NIAGARA
1223 || sparc_cpu == PROCESSOR_NIAGARA2
1224 || sparc_cpu == PROCESSOR_NIAGARA3
1225 || sparc_cpu == PROCESSOR_NIAGARA4)
1226 ? 2
1227 : (sparc_cpu == PROCESSOR_ULTRASPARC3
1228 ? 8 : 3)),
1229 global_options.x_param_values,
1230 global_options_set.x_param_values);
1231 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
1232 ((sparc_cpu == PROCESSOR_ULTRASPARC
1233 || sparc_cpu == PROCESSOR_ULTRASPARC3
1234 || sparc_cpu == PROCESSOR_NIAGARA
1235 || sparc_cpu == PROCESSOR_NIAGARA2
1236 || sparc_cpu == PROCESSOR_NIAGARA3
1237 || sparc_cpu == PROCESSOR_NIAGARA4)
1238 ? 64 : 32),
1239 global_options.x_param_values,
1240 global_options_set.x_param_values);
1241
1242 /* Disable save slot sharing for call-clobbered registers by default.
1243 The IRA sharing algorithm works on single registers only and this
1244 pessimizes for double floating-point registers. */
1245 if (!global_options_set.x_flag_ira_share_save_slots)
1246 flag_ira_share_save_slots = 0;
1247 }
1248 \f
1249 /* Miscellaneous utilities. */
1250
1251 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
1252 or branch on register contents instructions. */
1253
1254 int
1255 v9_regcmp_p (enum rtx_code code)
1256 {
1257 return (code == EQ || code == NE || code == GE || code == LT
1258 || code == LE || code == GT);
1259 }
1260
1261 /* Nonzero if OP is a floating point constant which can
1262 be loaded into an integer register using a single
1263 sethi instruction. */
1264
1265 int
1266 fp_sethi_p (rtx op)
1267 {
1268 if (GET_CODE (op) == CONST_DOUBLE)
1269 {
1270 REAL_VALUE_TYPE r;
1271 long i;
1272
1273 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1274 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1275 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
1276 }
1277
1278 return 0;
1279 }
1280
1281 /* Nonzero if OP is a floating point constant which can
1282 be loaded into an integer register using a single
1283 mov instruction. */
1284
1285 int
1286 fp_mov_p (rtx op)
1287 {
1288 if (GET_CODE (op) == CONST_DOUBLE)
1289 {
1290 REAL_VALUE_TYPE r;
1291 long i;
1292
1293 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1294 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1295 return SPARC_SIMM13_P (i);
1296 }
1297
1298 return 0;
1299 }
1300
1301 /* Nonzero if OP is a floating point constant which can
1302 be loaded into an integer register using a high/losum
1303 instruction sequence. */
1304
1305 int
1306 fp_high_losum_p (rtx op)
1307 {
1308 /* The constraints calling this should only be in
1309 SFmode move insns, so any constant which cannot
1310 be moved using a single insn will do. */
1311 if (GET_CODE (op) == CONST_DOUBLE)
1312 {
1313 REAL_VALUE_TYPE r;
1314 long i;
1315
1316 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1317 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1318 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
1319 }
1320
1321 return 0;
1322 }
1323
1324 /* Return true if the address of LABEL can be loaded by means of the
1325 mov{si,di}_pic_label_ref patterns in PIC mode. */
1326
1327 static bool
1328 can_use_mov_pic_label_ref (rtx label)
1329 {
1330 /* VxWorks does not impose a fixed gap between segments; the run-time
1331 gap can be different from the object-file gap. We therefore can't
1332 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
1333 are absolutely sure that X is in the same segment as the GOT.
1334 Unfortunately, the flexibility of linker scripts means that we
1335 can't be sure of that in general, so assume that GOT-relative
1336 accesses are never valid on VxWorks. */
1337 if (TARGET_VXWORKS_RTP)
1338 return false;
1339
1340 /* Similarly, if the label is non-local, it might end up being placed
1341 in a different section than the current one; now mov_pic_label_ref
1342 requires the label and the code to be in the same section. */
1343 if (LABEL_REF_NONLOCAL_P (label))
1344 return false;
1345
1346 /* Finally, if we are reordering basic blocks and partition into hot
1347 and cold sections, this might happen for any label. */
1348 if (flag_reorder_blocks_and_partition)
1349 return false;
1350
1351 return true;
1352 }
1353
1354 /* Expand a move instruction. Return true if all work is done. */
1355
1356 bool
1357 sparc_expand_move (enum machine_mode mode, rtx *operands)
1358 {
1359 /* Handle sets of MEM first. */
1360 if (GET_CODE (operands[0]) == MEM)
1361 {
1362 /* 0 is a register (or a pair of registers) on SPARC. */
1363 if (register_or_zero_operand (operands[1], mode))
1364 return false;
1365
1366 if (!reload_in_progress)
1367 {
1368 operands[0] = validize_mem (operands[0]);
1369 operands[1] = force_reg (mode, operands[1]);
1370 }
1371 }
1372
1373 /* Fixup TLS cases. */
1374 if (TARGET_HAVE_TLS
1375 && CONSTANT_P (operands[1])
1376 && sparc_tls_referenced_p (operands [1]))
1377 {
1378 operands[1] = sparc_legitimize_tls_address (operands[1]);
1379 return false;
1380 }
1381
1382 /* Fixup PIC cases. */
1383 if (flag_pic && CONSTANT_P (operands[1]))
1384 {
1385 if (pic_address_needs_scratch (operands[1]))
1386 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
1387
1388 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
1389 if (GET_CODE (operands[1]) == LABEL_REF
1390 && can_use_mov_pic_label_ref (operands[1]))
1391 {
1392 if (mode == SImode)
1393 {
1394 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
1395 return true;
1396 }
1397
1398 if (mode == DImode)
1399 {
1400 gcc_assert (TARGET_ARCH64);
1401 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
1402 return true;
1403 }
1404 }
1405
1406 if (symbolic_operand (operands[1], mode))
1407 {
1408 operands[1]
1409 = sparc_legitimize_pic_address (operands[1],
1410 reload_in_progress
1411 ? operands[0] : NULL_RTX);
1412 return false;
1413 }
1414 }
1415
1416 /* If we are trying to toss an integer constant into FP registers,
1417 or loading a FP or vector constant, force it into memory. */
1418 if (CONSTANT_P (operands[1])
1419 && REG_P (operands[0])
1420 && (SPARC_FP_REG_P (REGNO (operands[0]))
1421 || SCALAR_FLOAT_MODE_P (mode)
1422 || VECTOR_MODE_P (mode)))
1423 {
1424 /* emit_group_store will send such bogosity to us when it is
1425 not storing directly into memory. So fix this up to avoid
1426 crashes in output_constant_pool. */
1427 if (operands [1] == const0_rtx)
1428 operands[1] = CONST0_RTX (mode);
1429
1430 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
1431 always other regs. */
1432 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
1433 && (const_zero_operand (operands[1], mode)
1434 || const_all_ones_operand (operands[1], mode)))
1435 return false;
1436
1437 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
1438 /* We are able to build any SF constant in integer registers
1439 with at most 2 instructions. */
1440 && (mode == SFmode
1441 /* And any DF constant in integer registers. */
1442 || (mode == DFmode
1443 && ! can_create_pseudo_p ())))
1444 return false;
1445
1446 operands[1] = force_const_mem (mode, operands[1]);
1447 if (!reload_in_progress)
1448 operands[1] = validize_mem (operands[1]);
1449 return false;
1450 }
1451
1452 /* Accept non-constants and valid constants unmodified. */
1453 if (!CONSTANT_P (operands[1])
1454 || GET_CODE (operands[1]) == HIGH
1455 || input_operand (operands[1], mode))
1456 return false;
1457
1458 switch (mode)
1459 {
1460 case QImode:
1461 /* All QImode constants require only one insn, so proceed. */
1462 break;
1463
1464 case HImode:
1465 case SImode:
1466 sparc_emit_set_const32 (operands[0], operands[1]);
1467 return true;
1468
1469 case DImode:
1470 /* input_operand should have filtered out 32-bit mode. */
1471 sparc_emit_set_const64 (operands[0], operands[1]);
1472 return true;
1473
1474 case TImode:
1475 {
1476 rtx high, low;
1477 /* TImode isn't available in 32-bit mode. */
1478 split_double (operands[1], &high, &low);
1479 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
1480 high));
1481 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
1482 low));
1483 }
1484 return true;
1485
1486 default:
1487 gcc_unreachable ();
1488 }
1489
1490 return false;
1491 }
1492
1493 /* Load OP1, a 32-bit constant, into OP0, a register.
1494 We know it can't be done in one insn when we get
1495 here, the move expander guarantees this. */
1496
1497 static void
1498 sparc_emit_set_const32 (rtx op0, rtx op1)
1499 {
1500 enum machine_mode mode = GET_MODE (op0);
1501 rtx temp = op0;
1502
1503 if (can_create_pseudo_p ())
1504 temp = gen_reg_rtx (mode);
1505
1506 if (GET_CODE (op1) == CONST_INT)
1507 {
1508 gcc_assert (!small_int_operand (op1, mode)
1509 && !const_high_operand (op1, mode));
1510
1511 /* Emit them as real moves instead of a HIGH/LO_SUM,
1512 this way CSE can see everything and reuse intermediate
1513 values if it wants. */
1514 emit_insn (gen_rtx_SET (VOIDmode, temp,
1515 GEN_INT (INTVAL (op1)
1516 & ~(HOST_WIDE_INT)0x3ff)));
1517
1518 emit_insn (gen_rtx_SET (VOIDmode,
1519 op0,
1520 gen_rtx_IOR (mode, temp,
1521 GEN_INT (INTVAL (op1) & 0x3ff))));
1522 }
1523 else
1524 {
1525 /* A symbol, emit in the traditional way. */
1526 emit_insn (gen_rtx_SET (VOIDmode, temp,
1527 gen_rtx_HIGH (mode, op1)));
1528 emit_insn (gen_rtx_SET (VOIDmode,
1529 op0, gen_rtx_LO_SUM (mode, temp, op1)));
1530 }
1531 }
1532
1533 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
1534 If TEMP is nonzero, we are forbidden to use any other scratch
1535 registers. Otherwise, we are allowed to generate them as needed.
1536
1537 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
1538 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
1539
1540 void
1541 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
1542 {
1543 rtx temp1, temp2, temp3, temp4, temp5;
1544 rtx ti_temp = 0;
1545
1546 if (temp && GET_MODE (temp) == TImode)
1547 {
1548 ti_temp = temp;
1549 temp = gen_rtx_REG (DImode, REGNO (temp));
1550 }
1551
1552 /* SPARC-V9 code-model support. */
1553 switch (sparc_cmodel)
1554 {
1555 case CM_MEDLOW:
1556 /* The range spanned by all instructions in the object is less
1557 than 2^31 bytes (2GB) and the distance from any instruction
1558 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1559 than 2^31 bytes (2GB).
1560
1561 The executable must be in the low 4TB of the virtual address
1562 space.
1563
1564 sethi %hi(symbol), %temp1
1565 or %temp1, %lo(symbol), %reg */
1566 if (temp)
1567 temp1 = temp; /* op0 is allowed. */
1568 else
1569 temp1 = gen_reg_rtx (DImode);
1570
1571 emit_insn (gen_rtx_SET (VOIDmode, temp1, gen_rtx_HIGH (DImode, op1)));
1572 emit_insn (gen_rtx_SET (VOIDmode, op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
1573 break;
1574
1575 case CM_MEDMID:
1576 /* The range spanned by all instructions in the object is less
1577 than 2^31 bytes (2GB) and the distance from any instruction
1578 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1579 than 2^31 bytes (2GB).
1580
1581 The executable must be in the low 16TB of the virtual address
1582 space.
1583
1584 sethi %h44(symbol), %temp1
1585 or %temp1, %m44(symbol), %temp2
1586 sllx %temp2, 12, %temp3
1587 or %temp3, %l44(symbol), %reg */
1588 if (temp)
1589 {
1590 temp1 = op0;
1591 temp2 = op0;
1592 temp3 = temp; /* op0 is allowed. */
1593 }
1594 else
1595 {
1596 temp1 = gen_reg_rtx (DImode);
1597 temp2 = gen_reg_rtx (DImode);
1598 temp3 = gen_reg_rtx (DImode);
1599 }
1600
1601 emit_insn (gen_seth44 (temp1, op1));
1602 emit_insn (gen_setm44 (temp2, temp1, op1));
1603 emit_insn (gen_rtx_SET (VOIDmode, temp3,
1604 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
1605 emit_insn (gen_setl44 (op0, temp3, op1));
1606 break;
1607
1608 case CM_MEDANY:
1609 /* The range spanned by all instructions in the object is less
1610 than 2^31 bytes (2GB) and the distance from any instruction
1611 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1612 than 2^31 bytes (2GB).
1613
1614 The executable can be placed anywhere in the virtual address
1615 space.
1616
1617 sethi %hh(symbol), %temp1
1618 sethi %lm(symbol), %temp2
1619 or %temp1, %hm(symbol), %temp3
1620 sllx %temp3, 32, %temp4
1621 or %temp4, %temp2, %temp5
1622 or %temp5, %lo(symbol), %reg */
1623 if (temp)
1624 {
1625 /* It is possible that one of the registers we got for operands[2]
1626 might coincide with that of operands[0] (which is why we made
1627 it TImode). Pick the other one to use as our scratch. */
1628 if (rtx_equal_p (temp, op0))
1629 {
1630 gcc_assert (ti_temp);
1631 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
1632 }
1633 temp1 = op0;
1634 temp2 = temp; /* op0 is _not_ allowed, see above. */
1635 temp3 = op0;
1636 temp4 = op0;
1637 temp5 = op0;
1638 }
1639 else
1640 {
1641 temp1 = gen_reg_rtx (DImode);
1642 temp2 = gen_reg_rtx (DImode);
1643 temp3 = gen_reg_rtx (DImode);
1644 temp4 = gen_reg_rtx (DImode);
1645 temp5 = gen_reg_rtx (DImode);
1646 }
1647
1648 emit_insn (gen_sethh (temp1, op1));
1649 emit_insn (gen_setlm (temp2, op1));
1650 emit_insn (gen_sethm (temp3, temp1, op1));
1651 emit_insn (gen_rtx_SET (VOIDmode, temp4,
1652 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
1653 emit_insn (gen_rtx_SET (VOIDmode, temp5,
1654 gen_rtx_PLUS (DImode, temp4, temp2)));
1655 emit_insn (gen_setlo (op0, temp5, op1));
1656 break;
1657
1658 case CM_EMBMEDANY:
1659 /* Old old old backwards compatibility kruft here.
1660 Essentially it is MEDLOW with a fixed 64-bit
1661 virtual base added to all data segment addresses.
1662 Text-segment stuff is computed like MEDANY, we can't
1663 reuse the code above because the relocation knobs
1664 look different.
1665
1666 Data segment: sethi %hi(symbol), %temp1
1667 add %temp1, EMBMEDANY_BASE_REG, %temp2
1668 or %temp2, %lo(symbol), %reg */
1669 if (data_segment_operand (op1, GET_MODE (op1)))
1670 {
1671 if (temp)
1672 {
1673 temp1 = temp; /* op0 is allowed. */
1674 temp2 = op0;
1675 }
1676 else
1677 {
1678 temp1 = gen_reg_rtx (DImode);
1679 temp2 = gen_reg_rtx (DImode);
1680 }
1681
1682 emit_insn (gen_embmedany_sethi (temp1, op1));
1683 emit_insn (gen_embmedany_brsum (temp2, temp1));
1684 emit_insn (gen_embmedany_losum (op0, temp2, op1));
1685 }
1686
1687 /* Text segment: sethi %uhi(symbol), %temp1
1688 sethi %hi(symbol), %temp2
1689 or %temp1, %ulo(symbol), %temp3
1690 sllx %temp3, 32, %temp4
1691 or %temp4, %temp2, %temp5
1692 or %temp5, %lo(symbol), %reg */
1693 else
1694 {
1695 if (temp)
1696 {
1697 /* It is possible that one of the registers we got for operands[2]
1698 might coincide with that of operands[0] (which is why we made
1699 it TImode). Pick the other one to use as our scratch. */
1700 if (rtx_equal_p (temp, op0))
1701 {
1702 gcc_assert (ti_temp);
1703 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
1704 }
1705 temp1 = op0;
1706 temp2 = temp; /* op0 is _not_ allowed, see above. */
1707 temp3 = op0;
1708 temp4 = op0;
1709 temp5 = op0;
1710 }
1711 else
1712 {
1713 temp1 = gen_reg_rtx (DImode);
1714 temp2 = gen_reg_rtx (DImode);
1715 temp3 = gen_reg_rtx (DImode);
1716 temp4 = gen_reg_rtx (DImode);
1717 temp5 = gen_reg_rtx (DImode);
1718 }
1719
1720 emit_insn (gen_embmedany_textuhi (temp1, op1));
1721 emit_insn (gen_embmedany_texthi (temp2, op1));
1722 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
1723 emit_insn (gen_rtx_SET (VOIDmode, temp4,
1724 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
1725 emit_insn (gen_rtx_SET (VOIDmode, temp5,
1726 gen_rtx_PLUS (DImode, temp4, temp2)));
1727 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
1728 }
1729 break;
1730
1731 default:
1732 gcc_unreachable ();
1733 }
1734 }
1735
1736 #if HOST_BITS_PER_WIDE_INT == 32
1737 static void
1738 sparc_emit_set_const64 (rtx op0 ATTRIBUTE_UNUSED, rtx op1 ATTRIBUTE_UNUSED)
1739 {
1740 gcc_unreachable ();
1741 }
1742 #else
1743 /* These avoid problems when cross compiling. If we do not
1744 go through all this hair then the optimizer will see
1745 invalid REG_EQUAL notes or in some cases none at all. */
1746 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
1747 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
1748 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
1749 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
1750
1751 /* The optimizer is not to assume anything about exactly
1752 which bits are set for a HIGH, they are unspecified.
1753 Unfortunately this leads to many missed optimizations
1754 during CSE. We mask out the non-HIGH bits, and matches
1755 a plain movdi, to alleviate this problem. */
1756 static rtx
1757 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
1758 {
1759 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
1760 }
1761
1762 static rtx
1763 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
1764 {
1765 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val));
1766 }
1767
1768 static rtx
1769 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
1770 {
1771 return gen_rtx_IOR (DImode, src, GEN_INT (val));
1772 }
1773
1774 static rtx
1775 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
1776 {
1777 return gen_rtx_XOR (DImode, src, GEN_INT (val));
1778 }
1779
1780 /* Worker routines for 64-bit constant formation on arch64.
1781 One of the key things to be doing in these emissions is
1782 to create as many temp REGs as possible. This makes it
1783 possible for half-built constants to be used later when
1784 such values are similar to something required later on.
1785 Without doing this, the optimizer cannot see such
1786 opportunities. */
1787
1788 static void sparc_emit_set_const64_quick1 (rtx, rtx,
1789 unsigned HOST_WIDE_INT, int);
1790
1791 static void
1792 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
1793 unsigned HOST_WIDE_INT low_bits, int is_neg)
1794 {
1795 unsigned HOST_WIDE_INT high_bits;
1796
1797 if (is_neg)
1798 high_bits = (~low_bits) & 0xffffffff;
1799 else
1800 high_bits = low_bits;
1801
1802 emit_insn (gen_safe_HIGH64 (temp, high_bits));
1803 if (!is_neg)
1804 {
1805 emit_insn (gen_rtx_SET (VOIDmode, op0,
1806 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1807 }
1808 else
1809 {
1810 /* If we are XOR'ing with -1, then we should emit a one's complement
1811 instead. This way the combiner will notice logical operations
1812 such as ANDN later on and substitute. */
1813 if ((low_bits & 0x3ff) == 0x3ff)
1814 {
1815 emit_insn (gen_rtx_SET (VOIDmode, op0,
1816 gen_rtx_NOT (DImode, temp)));
1817 }
1818 else
1819 {
1820 emit_insn (gen_rtx_SET (VOIDmode, op0,
1821 gen_safe_XOR64 (temp,
1822 (-(HOST_WIDE_INT)0x400
1823 | (low_bits & 0x3ff)))));
1824 }
1825 }
1826 }
1827
1828 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
1829 unsigned HOST_WIDE_INT, int);
1830
1831 static void
1832 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
1833 unsigned HOST_WIDE_INT high_bits,
1834 unsigned HOST_WIDE_INT low_immediate,
1835 int shift_count)
1836 {
1837 rtx temp2 = op0;
1838
1839 if ((high_bits & 0xfffffc00) != 0)
1840 {
1841 emit_insn (gen_safe_HIGH64 (temp, high_bits));
1842 if ((high_bits & ~0xfffffc00) != 0)
1843 emit_insn (gen_rtx_SET (VOIDmode, op0,
1844 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1845 else
1846 temp2 = temp;
1847 }
1848 else
1849 {
1850 emit_insn (gen_safe_SET64 (temp, high_bits));
1851 temp2 = temp;
1852 }
1853
1854 /* Now shift it up into place. */
1855 emit_insn (gen_rtx_SET (VOIDmode, op0,
1856 gen_rtx_ASHIFT (DImode, temp2,
1857 GEN_INT (shift_count))));
1858
1859 /* If there is a low immediate part piece, finish up by
1860 putting that in as well. */
1861 if (low_immediate != 0)
1862 emit_insn (gen_rtx_SET (VOIDmode, op0,
1863 gen_safe_OR64 (op0, low_immediate)));
1864 }
1865
1866 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
1867 unsigned HOST_WIDE_INT);
1868
1869 /* Full 64-bit constant decomposition. Even though this is the
1870 'worst' case, we still optimize a few things away. */
1871 static void
1872 sparc_emit_set_const64_longway (rtx op0, rtx temp,
1873 unsigned HOST_WIDE_INT high_bits,
1874 unsigned HOST_WIDE_INT low_bits)
1875 {
1876 rtx sub_temp = op0;
1877
1878 if (can_create_pseudo_p ())
1879 sub_temp = gen_reg_rtx (DImode);
1880
1881 if ((high_bits & 0xfffffc00) != 0)
1882 {
1883 emit_insn (gen_safe_HIGH64 (temp, high_bits));
1884 if ((high_bits & ~0xfffffc00) != 0)
1885 emit_insn (gen_rtx_SET (VOIDmode,
1886 sub_temp,
1887 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1888 else
1889 sub_temp = temp;
1890 }
1891 else
1892 {
1893 emit_insn (gen_safe_SET64 (temp, high_bits));
1894 sub_temp = temp;
1895 }
1896
1897 if (can_create_pseudo_p ())
1898 {
1899 rtx temp2 = gen_reg_rtx (DImode);
1900 rtx temp3 = gen_reg_rtx (DImode);
1901 rtx temp4 = gen_reg_rtx (DImode);
1902
1903 emit_insn (gen_rtx_SET (VOIDmode, temp4,
1904 gen_rtx_ASHIFT (DImode, sub_temp,
1905 GEN_INT (32))));
1906
1907 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
1908 if ((low_bits & ~0xfffffc00) != 0)
1909 {
1910 emit_insn (gen_rtx_SET (VOIDmode, temp3,
1911 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
1912 emit_insn (gen_rtx_SET (VOIDmode, op0,
1913 gen_rtx_PLUS (DImode, temp4, temp3)));
1914 }
1915 else
1916 {
1917 emit_insn (gen_rtx_SET (VOIDmode, op0,
1918 gen_rtx_PLUS (DImode, temp4, temp2)));
1919 }
1920 }
1921 else
1922 {
1923 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
1924 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
1925 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
1926 int to_shift = 12;
1927
1928 /* We are in the middle of reload, so this is really
1929 painful. However we do still make an attempt to
1930 avoid emitting truly stupid code. */
1931 if (low1 != const0_rtx)
1932 {
1933 emit_insn (gen_rtx_SET (VOIDmode, op0,
1934 gen_rtx_ASHIFT (DImode, sub_temp,
1935 GEN_INT (to_shift))));
1936 emit_insn (gen_rtx_SET (VOIDmode, op0,
1937 gen_rtx_IOR (DImode, op0, low1)));
1938 sub_temp = op0;
1939 to_shift = 12;
1940 }
1941 else
1942 {
1943 to_shift += 12;
1944 }
1945 if (low2 != const0_rtx)
1946 {
1947 emit_insn (gen_rtx_SET (VOIDmode, op0,
1948 gen_rtx_ASHIFT (DImode, sub_temp,
1949 GEN_INT (to_shift))));
1950 emit_insn (gen_rtx_SET (VOIDmode, op0,
1951 gen_rtx_IOR (DImode, op0, low2)));
1952 sub_temp = op0;
1953 to_shift = 8;
1954 }
1955 else
1956 {
1957 to_shift += 8;
1958 }
1959 emit_insn (gen_rtx_SET (VOIDmode, op0,
1960 gen_rtx_ASHIFT (DImode, sub_temp,
1961 GEN_INT (to_shift))));
1962 if (low3 != const0_rtx)
1963 emit_insn (gen_rtx_SET (VOIDmode, op0,
1964 gen_rtx_IOR (DImode, op0, low3)));
1965 /* phew... */
1966 }
1967 }
1968
1969 /* Analyze a 64-bit constant for certain properties. */
1970 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
1971 unsigned HOST_WIDE_INT,
1972 int *, int *, int *);
1973
1974 static void
1975 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
1976 unsigned HOST_WIDE_INT low_bits,
1977 int *hbsp, int *lbsp, int *abbasp)
1978 {
1979 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
1980 int i;
1981
1982 lowest_bit_set = highest_bit_set = -1;
1983 i = 0;
1984 do
1985 {
1986 if ((lowest_bit_set == -1)
1987 && ((low_bits >> i) & 1))
1988 lowest_bit_set = i;
1989 if ((highest_bit_set == -1)
1990 && ((high_bits >> (32 - i - 1)) & 1))
1991 highest_bit_set = (64 - i - 1);
1992 }
1993 while (++i < 32
1994 && ((highest_bit_set == -1)
1995 || (lowest_bit_set == -1)));
1996 if (i == 32)
1997 {
1998 i = 0;
1999 do
2000 {
2001 if ((lowest_bit_set == -1)
2002 && ((high_bits >> i) & 1))
2003 lowest_bit_set = i + 32;
2004 if ((highest_bit_set == -1)
2005 && ((low_bits >> (32 - i - 1)) & 1))
2006 highest_bit_set = 32 - i - 1;
2007 }
2008 while (++i < 32
2009 && ((highest_bit_set == -1)
2010 || (lowest_bit_set == -1)));
2011 }
2012 /* If there are no bits set this should have gone out
2013 as one instruction! */
2014 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2015 all_bits_between_are_set = 1;
2016 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2017 {
2018 if (i < 32)
2019 {
2020 if ((low_bits & (1 << i)) != 0)
2021 continue;
2022 }
2023 else
2024 {
2025 if ((high_bits & (1 << (i - 32))) != 0)
2026 continue;
2027 }
2028 all_bits_between_are_set = 0;
2029 break;
2030 }
2031 *hbsp = highest_bit_set;
2032 *lbsp = lowest_bit_set;
2033 *abbasp = all_bits_between_are_set;
2034 }
2035
2036 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2037
2038 static int
2039 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2040 unsigned HOST_WIDE_INT low_bits)
2041 {
2042 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2043
2044 if (high_bits == 0
2045 || high_bits == 0xffffffff)
2046 return 1;
2047
2048 analyze_64bit_constant (high_bits, low_bits,
2049 &highest_bit_set, &lowest_bit_set,
2050 &all_bits_between_are_set);
2051
2052 if ((highest_bit_set == 63
2053 || lowest_bit_set == 0)
2054 && all_bits_between_are_set != 0)
2055 return 1;
2056
2057 if ((highest_bit_set - lowest_bit_set) < 21)
2058 return 1;
2059
2060 return 0;
2061 }
2062
2063 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2064 unsigned HOST_WIDE_INT,
2065 int, int);
2066
2067 static unsigned HOST_WIDE_INT
2068 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2069 unsigned HOST_WIDE_INT low_bits,
2070 int lowest_bit_set, int shift)
2071 {
2072 HOST_WIDE_INT hi, lo;
2073
2074 if (lowest_bit_set < 32)
2075 {
2076 lo = (low_bits >> lowest_bit_set) << shift;
2077 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2078 }
2079 else
2080 {
2081 lo = 0;
2082 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2083 }
2084 gcc_assert (! (hi & lo));
2085 return (hi | lo);
2086 }
2087
2088 /* Here we are sure to be arch64 and this is an integer constant
2089 being loaded into a register. Emit the most efficient
2090 insn sequence possible. Detection of all the 1-insn cases
2091 has been done already. */
2092 static void
2093 sparc_emit_set_const64 (rtx op0, rtx op1)
2094 {
2095 unsigned HOST_WIDE_INT high_bits, low_bits;
2096 int lowest_bit_set, highest_bit_set;
2097 int all_bits_between_are_set;
2098 rtx temp = 0;
2099
2100 /* Sanity check that we know what we are working with. */
2101 gcc_assert (TARGET_ARCH64
2102 && (GET_CODE (op0) == SUBREG
2103 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2104
2105 if (! can_create_pseudo_p ())
2106 temp = op0;
2107
2108 if (GET_CODE (op1) != CONST_INT)
2109 {
2110 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2111 return;
2112 }
2113
2114 if (! temp)
2115 temp = gen_reg_rtx (DImode);
2116
2117 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2118 low_bits = (INTVAL (op1) & 0xffffffff);
2119
2120 /* low_bits bits 0 --> 31
2121 high_bits bits 32 --> 63 */
2122
2123 analyze_64bit_constant (high_bits, low_bits,
2124 &highest_bit_set, &lowest_bit_set,
2125 &all_bits_between_are_set);
2126
2127 /* First try for a 2-insn sequence. */
2128
2129 /* These situations are preferred because the optimizer can
2130 * do more things with them:
2131 * 1) mov -1, %reg
2132 * sllx %reg, shift, %reg
2133 * 2) mov -1, %reg
2134 * srlx %reg, shift, %reg
2135 * 3) mov some_small_const, %reg
2136 * sllx %reg, shift, %reg
2137 */
2138 if (((highest_bit_set == 63
2139 || lowest_bit_set == 0)
2140 && all_bits_between_are_set != 0)
2141 || ((highest_bit_set - lowest_bit_set) < 12))
2142 {
2143 HOST_WIDE_INT the_const = -1;
2144 int shift = lowest_bit_set;
2145
2146 if ((highest_bit_set != 63
2147 && lowest_bit_set != 0)
2148 || all_bits_between_are_set == 0)
2149 {
2150 the_const =
2151 create_simple_focus_bits (high_bits, low_bits,
2152 lowest_bit_set, 0);
2153 }
2154 else if (lowest_bit_set == 0)
2155 shift = -(63 - highest_bit_set);
2156
2157 gcc_assert (SPARC_SIMM13_P (the_const));
2158 gcc_assert (shift != 0);
2159
2160 emit_insn (gen_safe_SET64 (temp, the_const));
2161 if (shift > 0)
2162 emit_insn (gen_rtx_SET (VOIDmode,
2163 op0,
2164 gen_rtx_ASHIFT (DImode,
2165 temp,
2166 GEN_INT (shift))));
2167 else if (shift < 0)
2168 emit_insn (gen_rtx_SET (VOIDmode,
2169 op0,
2170 gen_rtx_LSHIFTRT (DImode,
2171 temp,
2172 GEN_INT (-shift))));
2173 return;
2174 }
2175
2176 /* Now a range of 22 or less bits set somewhere.
2177 * 1) sethi %hi(focus_bits), %reg
2178 * sllx %reg, shift, %reg
2179 * 2) sethi %hi(focus_bits), %reg
2180 * srlx %reg, shift, %reg
2181 */
2182 if ((highest_bit_set - lowest_bit_set) < 21)
2183 {
2184 unsigned HOST_WIDE_INT focus_bits =
2185 create_simple_focus_bits (high_bits, low_bits,
2186 lowest_bit_set, 10);
2187
2188 gcc_assert (SPARC_SETHI_P (focus_bits));
2189 gcc_assert (lowest_bit_set != 10);
2190
2191 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
2192
2193 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
2194 if (lowest_bit_set < 10)
2195 emit_insn (gen_rtx_SET (VOIDmode,
2196 op0,
2197 gen_rtx_LSHIFTRT (DImode, temp,
2198 GEN_INT (10 - lowest_bit_set))));
2199 else if (lowest_bit_set > 10)
2200 emit_insn (gen_rtx_SET (VOIDmode,
2201 op0,
2202 gen_rtx_ASHIFT (DImode, temp,
2203 GEN_INT (lowest_bit_set - 10))));
2204 return;
2205 }
2206
2207 /* 1) sethi %hi(low_bits), %reg
2208 * or %reg, %lo(low_bits), %reg
2209 * 2) sethi %hi(~low_bits), %reg
2210 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
2211 */
2212 if (high_bits == 0
2213 || high_bits == 0xffffffff)
2214 {
2215 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
2216 (high_bits == 0xffffffff));
2217 return;
2218 }
2219
2220 /* Now, try 3-insn sequences. */
2221
2222 /* 1) sethi %hi(high_bits), %reg
2223 * or %reg, %lo(high_bits), %reg
2224 * sllx %reg, 32, %reg
2225 */
2226 if (low_bits == 0)
2227 {
2228 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
2229 return;
2230 }
2231
2232 /* We may be able to do something quick
2233 when the constant is negated, so try that. */
2234 if (const64_is_2insns ((~high_bits) & 0xffffffff,
2235 (~low_bits) & 0xfffffc00))
2236 {
2237 /* NOTE: The trailing bits get XOR'd so we need the
2238 non-negated bits, not the negated ones. */
2239 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
2240
2241 if ((((~high_bits) & 0xffffffff) == 0
2242 && ((~low_bits) & 0x80000000) == 0)
2243 || (((~high_bits) & 0xffffffff) == 0xffffffff
2244 && ((~low_bits) & 0x80000000) != 0))
2245 {
2246 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
2247
2248 if ((SPARC_SETHI_P (fast_int)
2249 && (~high_bits & 0xffffffff) == 0)
2250 || SPARC_SIMM13_P (fast_int))
2251 emit_insn (gen_safe_SET64 (temp, fast_int));
2252 else
2253 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
2254 }
2255 else
2256 {
2257 rtx negated_const;
2258 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
2259 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
2260 sparc_emit_set_const64 (temp, negated_const);
2261 }
2262
2263 /* If we are XOR'ing with -1, then we should emit a one's complement
2264 instead. This way the combiner will notice logical operations
2265 such as ANDN later on and substitute. */
2266 if (trailing_bits == 0x3ff)
2267 {
2268 emit_insn (gen_rtx_SET (VOIDmode, op0,
2269 gen_rtx_NOT (DImode, temp)));
2270 }
2271 else
2272 {
2273 emit_insn (gen_rtx_SET (VOIDmode,
2274 op0,
2275 gen_safe_XOR64 (temp,
2276 (-0x400 | trailing_bits))));
2277 }
2278 return;
2279 }
2280
2281 /* 1) sethi %hi(xxx), %reg
2282 * or %reg, %lo(xxx), %reg
2283 * sllx %reg, yyy, %reg
2284 *
2285 * ??? This is just a generalized version of the low_bits==0
2286 * thing above, FIXME...
2287 */
2288 if ((highest_bit_set - lowest_bit_set) < 32)
2289 {
2290 unsigned HOST_WIDE_INT focus_bits =
2291 create_simple_focus_bits (high_bits, low_bits,
2292 lowest_bit_set, 0);
2293
2294 /* We can't get here in this state. */
2295 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
2296
2297 /* So what we know is that the set bits straddle the
2298 middle of the 64-bit word. */
2299 sparc_emit_set_const64_quick2 (op0, temp,
2300 focus_bits, 0,
2301 lowest_bit_set);
2302 return;
2303 }
2304
2305 /* 1) sethi %hi(high_bits), %reg
2306 * or %reg, %lo(high_bits), %reg
2307 * sllx %reg, 32, %reg
2308 * or %reg, low_bits, %reg
2309 */
2310 if (SPARC_SIMM13_P(low_bits)
2311 && ((int)low_bits > 0))
2312 {
2313 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
2314 return;
2315 }
2316
2317 /* The easiest way when all else fails, is full decomposition. */
2318 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2319 }
2320 #endif /* HOST_BITS_PER_WIDE_INT == 32 */
2321
2322 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
2323 return the mode to be used for the comparison. For floating-point,
2324 CCFP[E]mode is used. CC_NOOVmode should be used when the first operand
2325 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
2326 processing is needed. */
2327
2328 enum machine_mode
2329 select_cc_mode (enum rtx_code op, rtx x, rtx y ATTRIBUTE_UNUSED)
2330 {
2331 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2332 {
2333 switch (op)
2334 {
2335 case EQ:
2336 case NE:
2337 case UNORDERED:
2338 case ORDERED:
2339 case UNLT:
2340 case UNLE:
2341 case UNGT:
2342 case UNGE:
2343 case UNEQ:
2344 case LTGT:
2345 return CCFPmode;
2346
2347 case LT:
2348 case LE:
2349 case GT:
2350 case GE:
2351 return CCFPEmode;
2352
2353 default:
2354 gcc_unreachable ();
2355 }
2356 }
2357 else if (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
2358 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
2359 {
2360 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2361 return CCX_NOOVmode;
2362 else
2363 return CC_NOOVmode;
2364 }
2365 else
2366 {
2367 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2368 return CCXmode;
2369 else
2370 return CCmode;
2371 }
2372 }
2373
2374 /* Emit the compare insn and return the CC reg for a CODE comparison
2375 with operands X and Y. */
2376
2377 static rtx
2378 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
2379 {
2380 enum machine_mode mode;
2381 rtx cc_reg;
2382
2383 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
2384 return x;
2385
2386 mode = SELECT_CC_MODE (code, x, y);
2387
2388 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2389 fcc regs (cse can't tell they're really call clobbered regs and will
2390 remove a duplicate comparison even if there is an intervening function
2391 call - it will then try to reload the cc reg via an int reg which is why
2392 we need the movcc patterns). It is possible to provide the movcc
2393 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
2394 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
2395 to tell cse that CCFPE mode registers (even pseudos) are call
2396 clobbered. */
2397
2398 /* ??? This is an experiment. Rather than making changes to cse which may
2399 or may not be easy/clean, we do our own cse. This is possible because
2400 we will generate hard registers. Cse knows they're call clobbered (it
2401 doesn't know the same thing about pseudos). If we guess wrong, no big
2402 deal, but if we win, great! */
2403
2404 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2405 #if 1 /* experiment */
2406 {
2407 int reg;
2408 /* We cycle through the registers to ensure they're all exercised. */
2409 static int next_fcc_reg = 0;
2410 /* Previous x,y for each fcc reg. */
2411 static rtx prev_args[4][2];
2412
2413 /* Scan prev_args for x,y. */
2414 for (reg = 0; reg < 4; reg++)
2415 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2416 break;
2417 if (reg == 4)
2418 {
2419 reg = next_fcc_reg;
2420 prev_args[reg][0] = x;
2421 prev_args[reg][1] = y;
2422 next_fcc_reg = (next_fcc_reg + 1) & 3;
2423 }
2424 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2425 }
2426 #else
2427 cc_reg = gen_reg_rtx (mode);
2428 #endif /* ! experiment */
2429 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2430 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2431 else
2432 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2433
2434 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
2435 will only result in an unrecognizable insn so no point in asserting. */
2436 emit_insn (gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y)));
2437
2438 return cc_reg;
2439 }
2440
2441
2442 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
2443
2444 rtx
2445 gen_compare_reg (rtx cmp)
2446 {
2447 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
2448 }
2449
2450 /* This function is used for v9 only.
2451 DEST is the target of the Scc insn.
2452 CODE is the code for an Scc's comparison.
2453 X and Y are the values we compare.
2454
2455 This function is needed to turn
2456
2457 (set (reg:SI 110)
2458 (gt (reg:CCX 100 %icc)
2459 (const_int 0)))
2460 into
2461 (set (reg:SI 110)
2462 (gt:DI (reg:CCX 100 %icc)
2463 (const_int 0)))
2464
2465 IE: The instruction recognizer needs to see the mode of the comparison to
2466 find the right instruction. We could use "gt:DI" right in the
2467 define_expand, but leaving it out allows us to handle DI, SI, etc. */
2468
2469 static int
2470 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
2471 {
2472 if (! TARGET_ARCH64
2473 && (GET_MODE (x) == DImode
2474 || GET_MODE (dest) == DImode))
2475 return 0;
2476
2477 /* Try to use the movrCC insns. */
2478 if (TARGET_ARCH64
2479 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
2480 && y == const0_rtx
2481 && v9_regcmp_p (compare_code))
2482 {
2483 rtx op0 = x;
2484 rtx temp;
2485
2486 /* Special case for op0 != 0. This can be done with one instruction if
2487 dest == x. */
2488
2489 if (compare_code == NE
2490 && GET_MODE (dest) == DImode
2491 && rtx_equal_p (op0, dest))
2492 {
2493 emit_insn (gen_rtx_SET (VOIDmode, dest,
2494 gen_rtx_IF_THEN_ELSE (DImode,
2495 gen_rtx_fmt_ee (compare_code, DImode,
2496 op0, const0_rtx),
2497 const1_rtx,
2498 dest)));
2499 return 1;
2500 }
2501
2502 if (reg_overlap_mentioned_p (dest, op0))
2503 {
2504 /* Handle the case where dest == x.
2505 We "early clobber" the result. */
2506 op0 = gen_reg_rtx (GET_MODE (x));
2507 emit_move_insn (op0, x);
2508 }
2509
2510 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2511 if (GET_MODE (op0) != DImode)
2512 {
2513 temp = gen_reg_rtx (DImode);
2514 convert_move (temp, op0, 0);
2515 }
2516 else
2517 temp = op0;
2518 emit_insn (gen_rtx_SET (VOIDmode, dest,
2519 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2520 gen_rtx_fmt_ee (compare_code, DImode,
2521 temp, const0_rtx),
2522 const1_rtx,
2523 dest)));
2524 return 1;
2525 }
2526 else
2527 {
2528 x = gen_compare_reg_1 (compare_code, x, y);
2529 y = const0_rtx;
2530
2531 gcc_assert (GET_MODE (x) != CC_NOOVmode
2532 && GET_MODE (x) != CCX_NOOVmode);
2533
2534 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2535 emit_insn (gen_rtx_SET (VOIDmode, dest,
2536 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2537 gen_rtx_fmt_ee (compare_code,
2538 GET_MODE (x), x, y),
2539 const1_rtx, dest)));
2540 return 1;
2541 }
2542 }
2543
2544
2545 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
2546 without jumps using the addx/subx instructions. */
2547
2548 bool
2549 emit_scc_insn (rtx operands[])
2550 {
2551 rtx tem;
2552 rtx x;
2553 rtx y;
2554 enum rtx_code code;
2555
2556 /* The quad-word fp compare library routines all return nonzero to indicate
2557 true, which is different from the equivalent libgcc routines, so we must
2558 handle them specially here. */
2559 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
2560 {
2561 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
2562 GET_CODE (operands[1]));
2563 operands[2] = XEXP (operands[1], 0);
2564 operands[3] = XEXP (operands[1], 1);
2565 }
2566
2567 code = GET_CODE (operands[1]);
2568 x = operands[2];
2569 y = operands[3];
2570
2571 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
2572 more applications). The exception to this is "reg != 0" which can
2573 be done in one instruction on v9 (so we do it). */
2574 if (code == EQ)
2575 {
2576 if (GET_MODE (x) == SImode)
2577 {
2578 rtx pat = gen_seqsi_special (operands[0], x, y);
2579 emit_insn (pat);
2580 return true;
2581 }
2582 else if (GET_MODE (x) == DImode)
2583 {
2584 rtx pat = gen_seqdi_special (operands[0], x, y);
2585 emit_insn (pat);
2586 return true;
2587 }
2588 }
2589
2590 if (code == NE)
2591 {
2592 if (GET_MODE (x) == SImode)
2593 {
2594 rtx pat = gen_snesi_special (operands[0], x, y);
2595 emit_insn (pat);
2596 return true;
2597 }
2598 else if (GET_MODE (x) == DImode)
2599 {
2600 rtx pat;
2601 if (TARGET_VIS3)
2602 pat = gen_snedi_special_vis3 (operands[0], x, y);
2603 else
2604 pat = gen_snedi_special (operands[0], x, y);
2605 emit_insn (pat);
2606 return true;
2607 }
2608 }
2609
2610 if (TARGET_V9
2611 && TARGET_ARCH64
2612 && GET_MODE (x) == DImode
2613 && !(TARGET_VIS3
2614 && (code == GTU || code == LTU))
2615 && gen_v9_scc (operands[0], code, x, y))
2616 return true;
2617
2618 /* We can do LTU and GEU using the addx/subx instructions too. And
2619 for GTU/LEU, if both operands are registers swap them and fall
2620 back to the easy case. */
2621 if (code == GTU || code == LEU)
2622 {
2623 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
2624 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
2625 {
2626 tem = x;
2627 x = y;
2628 y = tem;
2629 code = swap_condition (code);
2630 }
2631 }
2632
2633 if (code == LTU
2634 || (!TARGET_VIS3 && code == GEU))
2635 {
2636 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2637 gen_rtx_fmt_ee (code, SImode,
2638 gen_compare_reg_1 (code, x, y),
2639 const0_rtx)));
2640 return true;
2641 }
2642
2643 /* All the posibilities to use addx/subx based sequences has been
2644 exhausted, try for a 3 instruction sequence using v9 conditional
2645 moves. */
2646 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
2647 return true;
2648
2649 /* Nope, do branches. */
2650 return false;
2651 }
2652
2653 /* Emit a conditional jump insn for the v9 architecture using comparison code
2654 CODE and jump target LABEL.
2655 This function exists to take advantage of the v9 brxx insns. */
2656
2657 static void
2658 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
2659 {
2660 emit_jump_insn (gen_rtx_SET (VOIDmode,
2661 pc_rtx,
2662 gen_rtx_IF_THEN_ELSE (VOIDmode,
2663 gen_rtx_fmt_ee (code, GET_MODE (op0),
2664 op0, const0_rtx),
2665 gen_rtx_LABEL_REF (VOIDmode, label),
2666 pc_rtx)));
2667 }
2668
2669 /* Emit a conditional jump insn for the UA2011 architecture using
2670 comparison code CODE and jump target LABEL. This function exists
2671 to take advantage of the UA2011 Compare and Branch insns. */
2672
2673 static void
2674 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
2675 {
2676 rtx if_then_else;
2677
2678 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
2679 gen_rtx_fmt_ee(code, GET_MODE(op0),
2680 op0, op1),
2681 gen_rtx_LABEL_REF (VOIDmode, label),
2682 pc_rtx);
2683
2684 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, if_then_else));
2685 }
2686
2687 void
2688 emit_conditional_branch_insn (rtx operands[])
2689 {
2690 /* The quad-word fp compare library routines all return nonzero to indicate
2691 true, which is different from the equivalent libgcc routines, so we must
2692 handle them specially here. */
2693 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
2694 {
2695 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
2696 GET_CODE (operands[0]));
2697 operands[1] = XEXP (operands[0], 0);
2698 operands[2] = XEXP (operands[0], 1);
2699 }
2700
2701 /* If we can tell early on that the comparison is against a constant
2702 that won't fit in the 5-bit signed immediate field of a cbcond,
2703 use one of the other v9 conditional branch sequences. */
2704 if (TARGET_CBCOND
2705 && GET_CODE (operands[1]) == REG
2706 && (GET_MODE (operands[1]) == SImode
2707 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
2708 && (GET_CODE (operands[2]) != CONST_INT
2709 || SPARC_SIMM5_P (INTVAL (operands[2]))))
2710 {
2711 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
2712 return;
2713 }
2714
2715 if (TARGET_ARCH64 && operands[2] == const0_rtx
2716 && GET_CODE (operands[1]) == REG
2717 && GET_MODE (operands[1]) == DImode)
2718 {
2719 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
2720 return;
2721 }
2722
2723 operands[1] = gen_compare_reg (operands[0]);
2724 operands[2] = const0_rtx;
2725 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
2726 operands[1], operands[2]);
2727 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
2728 operands[3]));
2729 }
2730
2731
2732 /* Generate a DFmode part of a hard TFmode register.
2733 REG is the TFmode hard register, LOW is 1 for the
2734 low 64bit of the register and 0 otherwise.
2735 */
2736 rtx
2737 gen_df_reg (rtx reg, int low)
2738 {
2739 int regno = REGNO (reg);
2740
2741 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
2742 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
2743 return gen_rtx_REG (DFmode, regno);
2744 }
2745 \f
2746 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
2747 Unlike normal calls, TFmode operands are passed by reference. It is
2748 assumed that no more than 3 operands are required. */
2749
2750 static void
2751 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
2752 {
2753 rtx ret_slot = NULL, arg[3], func_sym;
2754 int i;
2755
2756 /* We only expect to be called for conversions, unary, and binary ops. */
2757 gcc_assert (nargs == 2 || nargs == 3);
2758
2759 for (i = 0; i < nargs; ++i)
2760 {
2761 rtx this_arg = operands[i];
2762 rtx this_slot;
2763
2764 /* TFmode arguments and return values are passed by reference. */
2765 if (GET_MODE (this_arg) == TFmode)
2766 {
2767 int force_stack_temp;
2768
2769 force_stack_temp = 0;
2770 if (TARGET_BUGGY_QP_LIB && i == 0)
2771 force_stack_temp = 1;
2772
2773 if (GET_CODE (this_arg) == MEM
2774 && ! force_stack_temp)
2775 {
2776 tree expr = MEM_EXPR (this_arg);
2777 if (expr)
2778 mark_addressable (expr);
2779 this_arg = XEXP (this_arg, 0);
2780 }
2781 else if (CONSTANT_P (this_arg)
2782 && ! force_stack_temp)
2783 {
2784 this_slot = force_const_mem (TFmode, this_arg);
2785 this_arg = XEXP (this_slot, 0);
2786 }
2787 else
2788 {
2789 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
2790
2791 /* Operand 0 is the return value. We'll copy it out later. */
2792 if (i > 0)
2793 emit_move_insn (this_slot, this_arg);
2794 else
2795 ret_slot = this_slot;
2796
2797 this_arg = XEXP (this_slot, 0);
2798 }
2799 }
2800
2801 arg[i] = this_arg;
2802 }
2803
2804 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
2805
2806 if (GET_MODE (operands[0]) == TFmode)
2807 {
2808 if (nargs == 2)
2809 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2,
2810 arg[0], GET_MODE (arg[0]),
2811 arg[1], GET_MODE (arg[1]));
2812 else
2813 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3,
2814 arg[0], GET_MODE (arg[0]),
2815 arg[1], GET_MODE (arg[1]),
2816 arg[2], GET_MODE (arg[2]));
2817
2818 if (ret_slot)
2819 emit_move_insn (operands[0], ret_slot);
2820 }
2821 else
2822 {
2823 rtx ret;
2824
2825 gcc_assert (nargs == 2);
2826
2827 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
2828 GET_MODE (operands[0]), 1,
2829 arg[1], GET_MODE (arg[1]));
2830
2831 if (ret != operands[0])
2832 emit_move_insn (operands[0], ret);
2833 }
2834 }
2835
2836 /* Expand soft-float TFmode calls to sparc abi routines. */
2837
2838 static void
2839 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
2840 {
2841 const char *func;
2842
2843 switch (code)
2844 {
2845 case PLUS:
2846 func = "_Qp_add";
2847 break;
2848 case MINUS:
2849 func = "_Qp_sub";
2850 break;
2851 case MULT:
2852 func = "_Qp_mul";
2853 break;
2854 case DIV:
2855 func = "_Qp_div";
2856 break;
2857 default:
2858 gcc_unreachable ();
2859 }
2860
2861 emit_soft_tfmode_libcall (func, 3, operands);
2862 }
2863
2864 static void
2865 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
2866 {
2867 const char *func;
2868
2869 gcc_assert (code == SQRT);
2870 func = "_Qp_sqrt";
2871
2872 emit_soft_tfmode_libcall (func, 2, operands);
2873 }
2874
2875 static void
2876 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
2877 {
2878 const char *func;
2879
2880 switch (code)
2881 {
2882 case FLOAT_EXTEND:
2883 switch (GET_MODE (operands[1]))
2884 {
2885 case SFmode:
2886 func = "_Qp_stoq";
2887 break;
2888 case DFmode:
2889 func = "_Qp_dtoq";
2890 break;
2891 default:
2892 gcc_unreachable ();
2893 }
2894 break;
2895
2896 case FLOAT_TRUNCATE:
2897 switch (GET_MODE (operands[0]))
2898 {
2899 case SFmode:
2900 func = "_Qp_qtos";
2901 break;
2902 case DFmode:
2903 func = "_Qp_qtod";
2904 break;
2905 default:
2906 gcc_unreachable ();
2907 }
2908 break;
2909
2910 case FLOAT:
2911 switch (GET_MODE (operands[1]))
2912 {
2913 case SImode:
2914 func = "_Qp_itoq";
2915 if (TARGET_ARCH64)
2916 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
2917 break;
2918 case DImode:
2919 func = "_Qp_xtoq";
2920 break;
2921 default:
2922 gcc_unreachable ();
2923 }
2924 break;
2925
2926 case UNSIGNED_FLOAT:
2927 switch (GET_MODE (operands[1]))
2928 {
2929 case SImode:
2930 func = "_Qp_uitoq";
2931 if (TARGET_ARCH64)
2932 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
2933 break;
2934 case DImode:
2935 func = "_Qp_uxtoq";
2936 break;
2937 default:
2938 gcc_unreachable ();
2939 }
2940 break;
2941
2942 case FIX:
2943 switch (GET_MODE (operands[0]))
2944 {
2945 case SImode:
2946 func = "_Qp_qtoi";
2947 break;
2948 case DImode:
2949 func = "_Qp_qtox";
2950 break;
2951 default:
2952 gcc_unreachable ();
2953 }
2954 break;
2955
2956 case UNSIGNED_FIX:
2957 switch (GET_MODE (operands[0]))
2958 {
2959 case SImode:
2960 func = "_Qp_qtoui";
2961 break;
2962 case DImode:
2963 func = "_Qp_qtoux";
2964 break;
2965 default:
2966 gcc_unreachable ();
2967 }
2968 break;
2969
2970 default:
2971 gcc_unreachable ();
2972 }
2973
2974 emit_soft_tfmode_libcall (func, 2, operands);
2975 }
2976
2977 /* Expand a hard-float tfmode operation. All arguments must be in
2978 registers. */
2979
2980 static void
2981 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
2982 {
2983 rtx op, dest;
2984
2985 if (GET_RTX_CLASS (code) == RTX_UNARY)
2986 {
2987 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
2988 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
2989 }
2990 else
2991 {
2992 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
2993 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
2994 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
2995 operands[1], operands[2]);
2996 }
2997
2998 if (register_operand (operands[0], VOIDmode))
2999 dest = operands[0];
3000 else
3001 dest = gen_reg_rtx (GET_MODE (operands[0]));
3002
3003 emit_insn (gen_rtx_SET (VOIDmode, dest, op));
3004
3005 if (dest != operands[0])
3006 emit_move_insn (operands[0], dest);
3007 }
3008
3009 void
3010 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3011 {
3012 if (TARGET_HARD_QUAD)
3013 emit_hard_tfmode_operation (code, operands);
3014 else
3015 emit_soft_tfmode_binop (code, operands);
3016 }
3017
3018 void
3019 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3020 {
3021 if (TARGET_HARD_QUAD)
3022 emit_hard_tfmode_operation (code, operands);
3023 else
3024 emit_soft_tfmode_unop (code, operands);
3025 }
3026
3027 void
3028 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3029 {
3030 if (TARGET_HARD_QUAD)
3031 emit_hard_tfmode_operation (code, operands);
3032 else
3033 emit_soft_tfmode_cvt (code, operands);
3034 }
3035 \f
3036 /* Return nonzero if a branch/jump/call instruction will be emitting
3037 nop into its delay slot. */
3038
3039 int
3040 empty_delay_slot (rtx insn)
3041 {
3042 rtx seq;
3043
3044 /* If no previous instruction (should not happen), return true. */
3045 if (PREV_INSN (insn) == NULL)
3046 return 1;
3047
3048 seq = NEXT_INSN (PREV_INSN (insn));
3049 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3050 return 0;
3051
3052 return 1;
3053 }
3054
3055 /* Return nonzero if we should emit a nop after a cbcond instruction.
3056 The cbcond instruction does not have a delay slot, however there is
3057 a severe performance penalty if a control transfer appears right
3058 after a cbcond. Therefore we emit a nop when we detect this
3059 situation. */
3060
3061 int
3062 emit_cbcond_nop (rtx insn)
3063 {
3064 rtx next = next_active_insn (insn);
3065
3066 if (!next)
3067 return 1;
3068
3069 if (GET_CODE (next) == INSN
3070 && GET_CODE (PATTERN (next)) == SEQUENCE)
3071 next = XVECEXP (PATTERN (next), 0, 0);
3072 else if (GET_CODE (next) == CALL_INSN
3073 && GET_CODE (PATTERN (next)) == PARALLEL)
3074 {
3075 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3076
3077 if (GET_CODE (delay) == RETURN)
3078 {
3079 /* It's a sibling call. Do not emit the nop if we're going
3080 to emit something other than the jump itself as the first
3081 instruction of the sibcall sequence. */
3082 if (sparc_leaf_function_p || TARGET_FLAT)
3083 return 0;
3084 }
3085 }
3086
3087 if (NONJUMP_INSN_P (next))
3088 return 0;
3089
3090 return 1;
3091 }
3092
3093 /* Return nonzero if TRIAL can go into the call delay slot. */
3094
3095 int
3096 tls_call_delay (rtx trial)
3097 {
3098 rtx pat;
3099
3100 /* Binutils allows
3101 call __tls_get_addr, %tgd_call (foo)
3102 add %l7, %o0, %o0, %tgd_add (foo)
3103 while Sun as/ld does not. */
3104 if (TARGET_GNU_TLS || !TARGET_TLS)
3105 return 1;
3106
3107 pat = PATTERN (trial);
3108
3109 /* We must reject tgd_add{32|64}, i.e.
3110 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3111 and tldm_add{32|64}, i.e.
3112 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3113 for Sun as/ld. */
3114 if (GET_CODE (pat) == SET
3115 && GET_CODE (SET_SRC (pat)) == PLUS)
3116 {
3117 rtx unspec = XEXP (SET_SRC (pat), 1);
3118
3119 if (GET_CODE (unspec) == UNSPEC
3120 && (XINT (unspec, 1) == UNSPEC_TLSGD
3121 || XINT (unspec, 1) == UNSPEC_TLSLDM))
3122 return 0;
3123 }
3124
3125 return 1;
3126 }
3127
3128 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3129 instruction. RETURN_P is true if the v9 variant 'return' is to be
3130 considered in the test too.
3131
3132 TRIAL must be a SET whose destination is a REG appropriate for the
3133 'restore' instruction or, if RETURN_P is true, for the 'return'
3134 instruction. */
3135
3136 static int
3137 eligible_for_restore_insn (rtx trial, bool return_p)
3138 {
3139 rtx pat = PATTERN (trial);
3140 rtx src = SET_SRC (pat);
3141 bool src_is_freg = false;
3142 rtx src_reg;
3143
3144 /* Since we now can do moves between float and integer registers when
3145 VIS3 is enabled, we have to catch this case. We can allow such
3146 moves when doing a 'return' however. */
3147 src_reg = src;
3148 if (GET_CODE (src_reg) == SUBREG)
3149 src_reg = SUBREG_REG (src_reg);
3150 if (GET_CODE (src_reg) == REG
3151 && SPARC_FP_REG_P (REGNO (src_reg)))
3152 src_is_freg = true;
3153
3154 /* The 'restore src,%g0,dest' pattern for word mode and below. */
3155 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3156 && arith_operand (src, GET_MODE (src))
3157 && ! src_is_freg)
3158 {
3159 if (TARGET_ARCH64)
3160 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3161 else
3162 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
3163 }
3164
3165 /* The 'restore src,%g0,dest' pattern for double-word mode. */
3166 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3167 && arith_double_operand (src, GET_MODE (src))
3168 && ! src_is_freg)
3169 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3170
3171 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
3172 else if (! TARGET_FPU && register_operand (src, SFmode))
3173 return 1;
3174
3175 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
3176 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
3177 return 1;
3178
3179 /* If we have the 'return' instruction, anything that does not use
3180 local or output registers and can go into a delay slot wins. */
3181 else if (return_p
3182 && TARGET_V9
3183 && !epilogue_renumber (&pat, 1)
3184 && get_attr_in_uncond_branch_delay (trial)
3185 == IN_UNCOND_BRANCH_DELAY_TRUE)
3186 return 1;
3187
3188 /* The 'restore src1,src2,dest' pattern for SImode. */
3189 else if (GET_CODE (src) == PLUS
3190 && register_operand (XEXP (src, 0), SImode)
3191 && arith_operand (XEXP (src, 1), SImode))
3192 return 1;
3193
3194 /* The 'restore src1,src2,dest' pattern for DImode. */
3195 else if (GET_CODE (src) == PLUS
3196 && register_operand (XEXP (src, 0), DImode)
3197 && arith_double_operand (XEXP (src, 1), DImode))
3198 return 1;
3199
3200 /* The 'restore src1,%lo(src2),dest' pattern. */
3201 else if (GET_CODE (src) == LO_SUM
3202 && ! TARGET_CM_MEDMID
3203 && ((register_operand (XEXP (src, 0), SImode)
3204 && immediate_operand (XEXP (src, 1), SImode))
3205 || (TARGET_ARCH64
3206 && register_operand (XEXP (src, 0), DImode)
3207 && immediate_operand (XEXP (src, 1), DImode))))
3208 return 1;
3209
3210 /* The 'restore src,src,dest' pattern. */
3211 else if (GET_CODE (src) == ASHIFT
3212 && (register_operand (XEXP (src, 0), SImode)
3213 || register_operand (XEXP (src, 0), DImode))
3214 && XEXP (src, 1) == const1_rtx)
3215 return 1;
3216
3217 return 0;
3218 }
3219
3220 /* Return nonzero if TRIAL can go into the function return's delay slot. */
3221
3222 int
3223 eligible_for_return_delay (rtx trial)
3224 {
3225 int regno;
3226 rtx pat;
3227
3228 if (GET_CODE (trial) != INSN)
3229 return 0;
3230
3231 if (get_attr_length (trial) != 1)
3232 return 0;
3233
3234 /* If the function uses __builtin_eh_return, the eh_return machinery
3235 occupies the delay slot. */
3236 if (crtl->calls_eh_return)
3237 return 0;
3238
3239 /* In the case of a leaf or flat function, anything can go into the slot. */
3240 if (sparc_leaf_function_p || TARGET_FLAT)
3241 return
3242 get_attr_in_uncond_branch_delay (trial) == IN_UNCOND_BRANCH_DELAY_TRUE;
3243
3244 pat = PATTERN (trial);
3245 if (GET_CODE (pat) == PARALLEL)
3246 {
3247 int i;
3248
3249 if (! TARGET_V9)
3250 return 0;
3251 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
3252 {
3253 rtx expr = XVECEXP (pat, 0, i);
3254 if (GET_CODE (expr) != SET)
3255 return 0;
3256 if (GET_CODE (SET_DEST (expr)) != REG)
3257 return 0;
3258 regno = REGNO (SET_DEST (expr));
3259 if (regno >= 8 && regno < 24)
3260 return 0;
3261 }
3262 return !epilogue_renumber (&pat, 1)
3263 && (get_attr_in_uncond_branch_delay (trial)
3264 == IN_UNCOND_BRANCH_DELAY_TRUE);
3265 }
3266
3267 if (GET_CODE (pat) != SET)
3268 return 0;
3269
3270 if (GET_CODE (SET_DEST (pat)) != REG)
3271 return 0;
3272
3273 regno = REGNO (SET_DEST (pat));
3274
3275 /* Otherwise, only operations which can be done in tandem with
3276 a `restore' or `return' insn can go into the delay slot. */
3277 if (regno >= 8 && regno < 24)
3278 return 0;
3279
3280 /* If this instruction sets up floating point register and we have a return
3281 instruction, it can probably go in. But restore will not work
3282 with FP_REGS. */
3283 if (! SPARC_INT_REG_P (regno))
3284 return (TARGET_V9
3285 && !epilogue_renumber (&pat, 1)
3286 && get_attr_in_uncond_branch_delay (trial)
3287 == IN_UNCOND_BRANCH_DELAY_TRUE);
3288
3289 return eligible_for_restore_insn (trial, true);
3290 }
3291
3292 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
3293
3294 int
3295 eligible_for_sibcall_delay (rtx trial)
3296 {
3297 rtx pat;
3298
3299 if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET)
3300 return 0;
3301
3302 if (get_attr_length (trial) != 1)
3303 return 0;
3304
3305 pat = PATTERN (trial);
3306
3307 if (sparc_leaf_function_p || TARGET_FLAT)
3308 {
3309 /* If the tail call is done using the call instruction,
3310 we have to restore %o7 in the delay slot. */
3311 if (LEAF_SIBCALL_SLOT_RESERVED_P)
3312 return 0;
3313
3314 /* %g1 is used to build the function address */
3315 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
3316 return 0;
3317
3318 return 1;
3319 }
3320
3321 /* Otherwise, only operations which can be done in tandem with
3322 a `restore' insn can go into the delay slot. */
3323 if (GET_CODE (SET_DEST (pat)) != REG
3324 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
3325 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
3326 return 0;
3327
3328 /* If it mentions %o7, it can't go in, because sibcall will clobber it
3329 in most cases. */
3330 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
3331 return 0;
3332
3333 return eligible_for_restore_insn (trial, false);
3334 }
3335 \f
3336 /* Determine if it's legal to put X into the constant pool. This
3337 is not possible if X contains the address of a symbol that is
3338 not constant (TLS) or not known at final link time (PIC). */
3339
3340 static bool
3341 sparc_cannot_force_const_mem (enum machine_mode mode, rtx x)
3342 {
3343 switch (GET_CODE (x))
3344 {
3345 case CONST_INT:
3346 case CONST_DOUBLE:
3347 case CONST_VECTOR:
3348 /* Accept all non-symbolic constants. */
3349 return false;
3350
3351 case LABEL_REF:
3352 /* Labels are OK iff we are non-PIC. */
3353 return flag_pic != 0;
3354
3355 case SYMBOL_REF:
3356 /* 'Naked' TLS symbol references are never OK,
3357 non-TLS symbols are OK iff we are non-PIC. */
3358 if (SYMBOL_REF_TLS_MODEL (x))
3359 return true;
3360 else
3361 return flag_pic != 0;
3362
3363 case CONST:
3364 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
3365 case PLUS:
3366 case MINUS:
3367 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
3368 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
3369 case UNSPEC:
3370 return true;
3371 default:
3372 gcc_unreachable ();
3373 }
3374 }
3375 \f
3376 /* Global Offset Table support. */
3377 static GTY(()) rtx got_helper_rtx = NULL_RTX;
3378 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
3379
3380 /* Return the SYMBOL_REF for the Global Offset Table. */
3381
3382 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
3383
3384 static rtx
3385 sparc_got (void)
3386 {
3387 if (!sparc_got_symbol)
3388 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3389
3390 return sparc_got_symbol;
3391 }
3392
3393 /* Ensure that we are not using patterns that are not OK with PIC. */
3394
3395 int
3396 check_pic (int i)
3397 {
3398 rtx op;
3399
3400 switch (flag_pic)
3401 {
3402 case 1:
3403 op = recog_data.operand[i];
3404 gcc_assert (GET_CODE (op) != SYMBOL_REF
3405 && (GET_CODE (op) != CONST
3406 || (GET_CODE (XEXP (op, 0)) == MINUS
3407 && XEXP (XEXP (op, 0), 0) == sparc_got ()
3408 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
3409 case 2:
3410 default:
3411 return 1;
3412 }
3413 }
3414
3415 /* Return true if X is an address which needs a temporary register when
3416 reloaded while generating PIC code. */
3417
3418 int
3419 pic_address_needs_scratch (rtx x)
3420 {
3421 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
3422 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
3423 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
3424 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3425 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
3426 return 1;
3427
3428 return 0;
3429 }
3430
3431 /* Determine if a given RTX is a valid constant. We already know this
3432 satisfies CONSTANT_P. */
3433
3434 static bool
3435 sparc_legitimate_constant_p (enum machine_mode mode, rtx x)
3436 {
3437 switch (GET_CODE (x))
3438 {
3439 case CONST:
3440 case SYMBOL_REF:
3441 if (sparc_tls_referenced_p (x))
3442 return false;
3443 break;
3444
3445 case CONST_DOUBLE:
3446 if (GET_MODE (x) == VOIDmode)
3447 return true;
3448
3449 /* Floating point constants are generally not ok.
3450 The only exception is 0.0 and all-ones in VIS. */
3451 if (TARGET_VIS
3452 && SCALAR_FLOAT_MODE_P (mode)
3453 && (const_zero_operand (x, mode)
3454 || const_all_ones_operand (x, mode)))
3455 return true;
3456
3457 return false;
3458
3459 case CONST_VECTOR:
3460 /* Vector constants are generally not ok.
3461 The only exception is 0 or -1 in VIS. */
3462 if (TARGET_VIS
3463 && (const_zero_operand (x, mode)
3464 || const_all_ones_operand (x, mode)))
3465 return true;
3466
3467 return false;
3468
3469 default:
3470 break;
3471 }
3472
3473 return true;
3474 }
3475
3476 /* Determine if a given RTX is a valid constant address. */
3477
3478 bool
3479 constant_address_p (rtx x)
3480 {
3481 switch (GET_CODE (x))
3482 {
3483 case LABEL_REF:
3484 case CONST_INT:
3485 case HIGH:
3486 return true;
3487
3488 case CONST:
3489 if (flag_pic && pic_address_needs_scratch (x))
3490 return false;
3491 return sparc_legitimate_constant_p (Pmode, x);
3492
3493 case SYMBOL_REF:
3494 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
3495
3496 default:
3497 return false;
3498 }
3499 }
3500
3501 /* Nonzero if the constant value X is a legitimate general operand
3502 when generating PIC code. It is given that flag_pic is on and
3503 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
3504
3505 bool
3506 legitimate_pic_operand_p (rtx x)
3507 {
3508 if (pic_address_needs_scratch (x))
3509 return false;
3510 if (sparc_tls_referenced_p (x))
3511 return false;
3512 return true;
3513 }
3514
3515 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
3516 (CONST_INT_P (X) \
3517 && INTVAL (X) >= -0x1000 \
3518 && INTVAL (X) < (0x1000 - GET_MODE_SIZE (MODE)))
3519
3520 #define RTX_OK_FOR_OLO10_P(X, MODE) \
3521 (CONST_INT_P (X) \
3522 && INTVAL (X) >= -0x1000 \
3523 && INTVAL (X) < (0xc00 - GET_MODE_SIZE (MODE)))
3524
3525 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
3526
3527 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
3528 ordinarily. This changes a bit when generating PIC. */
3529
3530 static bool
3531 sparc_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict)
3532 {
3533 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
3534
3535 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
3536 rs1 = addr;
3537 else if (GET_CODE (addr) == PLUS)
3538 {
3539 rs1 = XEXP (addr, 0);
3540 rs2 = XEXP (addr, 1);
3541
3542 /* Canonicalize. REG comes first, if there are no regs,
3543 LO_SUM comes first. */
3544 if (!REG_P (rs1)
3545 && GET_CODE (rs1) != SUBREG
3546 && (REG_P (rs2)
3547 || GET_CODE (rs2) == SUBREG
3548 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
3549 {
3550 rs1 = XEXP (addr, 1);
3551 rs2 = XEXP (addr, 0);
3552 }
3553
3554 if ((flag_pic == 1
3555 && rs1 == pic_offset_table_rtx
3556 && !REG_P (rs2)
3557 && GET_CODE (rs2) != SUBREG
3558 && GET_CODE (rs2) != LO_SUM
3559 && GET_CODE (rs2) != MEM
3560 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
3561 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
3562 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
3563 || ((REG_P (rs1)
3564 || GET_CODE (rs1) == SUBREG)
3565 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
3566 {
3567 imm1 = rs2;
3568 rs2 = NULL;
3569 }
3570 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
3571 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
3572 {
3573 /* We prohibit REG + REG for TFmode when there are no quad move insns
3574 and we consequently need to split. We do this because REG+REG
3575 is not an offsettable address. If we get the situation in reload
3576 where source and destination of a movtf pattern are both MEMs with
3577 REG+REG address, then only one of them gets converted to an
3578 offsettable address. */
3579 if (mode == TFmode
3580 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
3581 return 0;
3582
3583 /* Likewise for TImode, but in all cases. */
3584 if (mode == TImode)
3585 return 0;
3586
3587 /* We prohibit REG + REG on ARCH32 if not optimizing for
3588 DFmode/DImode because then mem_min_alignment is likely to be zero
3589 after reload and the forced split would lack a matching splitter
3590 pattern. */
3591 if (TARGET_ARCH32 && !optimize
3592 && (mode == DFmode || mode == DImode))
3593 return 0;
3594 }
3595 else if (USE_AS_OFFSETABLE_LO10
3596 && GET_CODE (rs1) == LO_SUM
3597 && TARGET_ARCH64
3598 && ! TARGET_CM_MEDMID
3599 && RTX_OK_FOR_OLO10_P (rs2, mode))
3600 {
3601 rs2 = NULL;
3602 imm1 = XEXP (rs1, 1);
3603 rs1 = XEXP (rs1, 0);
3604 if (!CONSTANT_P (imm1)
3605 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
3606 return 0;
3607 }
3608 }
3609 else if (GET_CODE (addr) == LO_SUM)
3610 {
3611 rs1 = XEXP (addr, 0);
3612 imm1 = XEXP (addr, 1);
3613
3614 if (!CONSTANT_P (imm1)
3615 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
3616 return 0;
3617
3618 /* We can't allow TFmode in 32-bit mode, because an offset greater
3619 than the alignment (8) may cause the LO_SUM to overflow. */
3620 if (mode == TFmode && TARGET_ARCH32)
3621 return 0;
3622 }
3623 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
3624 return 1;
3625 else
3626 return 0;
3627
3628 if (GET_CODE (rs1) == SUBREG)
3629 rs1 = SUBREG_REG (rs1);
3630 if (!REG_P (rs1))
3631 return 0;
3632
3633 if (rs2)
3634 {
3635 if (GET_CODE (rs2) == SUBREG)
3636 rs2 = SUBREG_REG (rs2);
3637 if (!REG_P (rs2))
3638 return 0;
3639 }
3640
3641 if (strict)
3642 {
3643 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
3644 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
3645 return 0;
3646 }
3647 else
3648 {
3649 if ((! SPARC_INT_REG_P (REGNO (rs1))
3650 && REGNO (rs1) != FRAME_POINTER_REGNUM
3651 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
3652 || (rs2
3653 && (! SPARC_INT_REG_P (REGNO (rs2))
3654 && REGNO (rs2) != FRAME_POINTER_REGNUM
3655 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
3656 return 0;
3657 }
3658 return 1;
3659 }
3660
3661 /* Return the SYMBOL_REF for the tls_get_addr function. */
3662
3663 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
3664
3665 static rtx
3666 sparc_tls_get_addr (void)
3667 {
3668 if (!sparc_tls_symbol)
3669 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
3670
3671 return sparc_tls_symbol;
3672 }
3673
3674 /* Return the Global Offset Table to be used in TLS mode. */
3675
3676 static rtx
3677 sparc_tls_got (void)
3678 {
3679 /* In PIC mode, this is just the PIC offset table. */
3680 if (flag_pic)
3681 {
3682 crtl->uses_pic_offset_table = 1;
3683 return pic_offset_table_rtx;
3684 }
3685
3686 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
3687 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
3688 if (TARGET_SUN_TLS && TARGET_ARCH32)
3689 {
3690 load_got_register ();
3691 return global_offset_table_rtx;
3692 }
3693
3694 /* In all other cases, we load a new pseudo with the GOT symbol. */
3695 return copy_to_reg (sparc_got ());
3696 }
3697
3698 /* Return true if X contains a thread-local symbol. */
3699
3700 static bool
3701 sparc_tls_referenced_p (rtx x)
3702 {
3703 if (!TARGET_HAVE_TLS)
3704 return false;
3705
3706 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3707 x = XEXP (XEXP (x, 0), 0);
3708
3709 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
3710 return true;
3711
3712 /* That's all we handle in sparc_legitimize_tls_address for now. */
3713 return false;
3714 }
3715
3716 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
3717 this (thread-local) address. */
3718
3719 static rtx
3720 sparc_legitimize_tls_address (rtx addr)
3721 {
3722 rtx temp1, temp2, temp3, ret, o0, got, insn;
3723
3724 gcc_assert (can_create_pseudo_p ());
3725
3726 if (GET_CODE (addr) == SYMBOL_REF)
3727 switch (SYMBOL_REF_TLS_MODEL (addr))
3728 {
3729 case TLS_MODEL_GLOBAL_DYNAMIC:
3730 start_sequence ();
3731 temp1 = gen_reg_rtx (SImode);
3732 temp2 = gen_reg_rtx (SImode);
3733 ret = gen_reg_rtx (Pmode);
3734 o0 = gen_rtx_REG (Pmode, 8);
3735 got = sparc_tls_got ();
3736 emit_insn (gen_tgd_hi22 (temp1, addr));
3737 emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
3738 if (TARGET_ARCH32)
3739 {
3740 emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
3741 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
3742 addr, const1_rtx));
3743 }
3744 else
3745 {
3746 emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
3747 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
3748 addr, const1_rtx));
3749 }
3750 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
3751 insn = get_insns ();
3752 end_sequence ();
3753 emit_libcall_block (insn, ret, o0, addr);
3754 break;
3755
3756 case TLS_MODEL_LOCAL_DYNAMIC:
3757 start_sequence ();
3758 temp1 = gen_reg_rtx (SImode);
3759 temp2 = gen_reg_rtx (SImode);
3760 temp3 = gen_reg_rtx (Pmode);
3761 ret = gen_reg_rtx (Pmode);
3762 o0 = gen_rtx_REG (Pmode, 8);
3763 got = sparc_tls_got ();
3764 emit_insn (gen_tldm_hi22 (temp1));
3765 emit_insn (gen_tldm_lo10 (temp2, temp1));
3766 if (TARGET_ARCH32)
3767 {
3768 emit_insn (gen_tldm_add32 (o0, got, temp2));
3769 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
3770 const1_rtx));
3771 }
3772 else
3773 {
3774 emit_insn (gen_tldm_add64 (o0, got, temp2));
3775 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
3776 const1_rtx));
3777 }
3778 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
3779 insn = get_insns ();
3780 end_sequence ();
3781 emit_libcall_block (insn, temp3, o0,
3782 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
3783 UNSPEC_TLSLD_BASE));
3784 temp1 = gen_reg_rtx (SImode);
3785 temp2 = gen_reg_rtx (SImode);
3786 emit_insn (gen_tldo_hix22 (temp1, addr));
3787 emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
3788 if (TARGET_ARCH32)
3789 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
3790 else
3791 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
3792 break;
3793
3794 case TLS_MODEL_INITIAL_EXEC:
3795 temp1 = gen_reg_rtx (SImode);
3796 temp2 = gen_reg_rtx (SImode);
3797 temp3 = gen_reg_rtx (Pmode);
3798 got = sparc_tls_got ();
3799 emit_insn (gen_tie_hi22 (temp1, addr));
3800 emit_insn (gen_tie_lo10 (temp2, temp1, addr));
3801 if (TARGET_ARCH32)
3802 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
3803 else
3804 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
3805 if (TARGET_SUN_TLS)
3806 {
3807 ret = gen_reg_rtx (Pmode);
3808 if (TARGET_ARCH32)
3809 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
3810 temp3, addr));
3811 else
3812 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
3813 temp3, addr));
3814 }
3815 else
3816 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
3817 break;
3818
3819 case TLS_MODEL_LOCAL_EXEC:
3820 temp1 = gen_reg_rtx (Pmode);
3821 temp2 = gen_reg_rtx (Pmode);
3822 if (TARGET_ARCH32)
3823 {
3824 emit_insn (gen_tle_hix22_sp32 (temp1, addr));
3825 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
3826 }
3827 else
3828 {
3829 emit_insn (gen_tle_hix22_sp64 (temp1, addr));
3830 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
3831 }
3832 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
3833 break;
3834
3835 default:
3836 gcc_unreachable ();
3837 }
3838
3839 else if (GET_CODE (addr) == CONST)
3840 {
3841 rtx base, offset;
3842
3843 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
3844
3845 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
3846 offset = XEXP (XEXP (addr, 0), 1);
3847
3848 base = force_operand (base, NULL_RTX);
3849 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
3850 offset = force_reg (Pmode, offset);
3851 ret = gen_rtx_PLUS (Pmode, base, offset);
3852 }
3853
3854 else
3855 gcc_unreachable (); /* for now ... */
3856
3857 return ret;
3858 }
3859
3860 /* Legitimize PIC addresses. If the address is already position-independent,
3861 we return ORIG. Newly generated position-independent addresses go into a
3862 reg. This is REG if nonzero, otherwise we allocate register(s) as
3863 necessary. */
3864
3865 static rtx
3866 sparc_legitimize_pic_address (rtx orig, rtx reg)
3867 {
3868 bool gotdata_op = false;
3869
3870 if (GET_CODE (orig) == SYMBOL_REF
3871 /* See the comment in sparc_expand_move. */
3872 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
3873 {
3874 rtx pic_ref, address;
3875 rtx insn;
3876
3877 if (reg == 0)
3878 {
3879 gcc_assert (can_create_pseudo_p ());
3880 reg = gen_reg_rtx (Pmode);
3881 }
3882
3883 if (flag_pic == 2)
3884 {
3885 /* If not during reload, allocate another temp reg here for loading
3886 in the address, so that these instructions can be optimized
3887 properly. */
3888 rtx temp_reg = (! can_create_pseudo_p ()
3889 ? reg : gen_reg_rtx (Pmode));
3890
3891 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
3892 won't get confused into thinking that these two instructions
3893 are loading in the true address of the symbol. If in the
3894 future a PIC rtx exists, that should be used instead. */
3895 if (TARGET_ARCH64)
3896 {
3897 emit_insn (gen_movdi_high_pic (temp_reg, orig));
3898 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
3899 }
3900 else
3901 {
3902 emit_insn (gen_movsi_high_pic (temp_reg, orig));
3903 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
3904 }
3905 address = temp_reg;
3906 gotdata_op = true;
3907 }
3908 else
3909 address = orig;
3910
3911 crtl->uses_pic_offset_table = 1;
3912 if (gotdata_op)
3913 {
3914 if (TARGET_ARCH64)
3915 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
3916 pic_offset_table_rtx,
3917 address, orig));
3918 else
3919 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
3920 pic_offset_table_rtx,
3921 address, orig));
3922 }
3923 else
3924 {
3925 pic_ref
3926 = gen_const_mem (Pmode,
3927 gen_rtx_PLUS (Pmode,
3928 pic_offset_table_rtx, address));
3929 insn = emit_move_insn (reg, pic_ref);
3930 }
3931
3932 /* Put a REG_EQUAL note on this insn, so that it can be optimized
3933 by loop. */
3934 set_unique_reg_note (insn, REG_EQUAL, orig);
3935 return reg;
3936 }
3937 else if (GET_CODE (orig) == CONST)
3938 {
3939 rtx base, offset;
3940
3941 if (GET_CODE (XEXP (orig, 0)) == PLUS
3942 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
3943 return orig;
3944
3945 if (reg == 0)
3946 {
3947 gcc_assert (can_create_pseudo_p ());
3948 reg = gen_reg_rtx (Pmode);
3949 }
3950
3951 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
3952 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
3953 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
3954 base == reg ? NULL_RTX : reg);
3955
3956 if (GET_CODE (offset) == CONST_INT)
3957 {
3958 if (SMALL_INT (offset))
3959 return plus_constant (Pmode, base, INTVAL (offset));
3960 else if (can_create_pseudo_p ())
3961 offset = force_reg (Pmode, offset);
3962 else
3963 /* If we reach here, then something is seriously wrong. */
3964 gcc_unreachable ();
3965 }
3966 return gen_rtx_PLUS (Pmode, base, offset);
3967 }
3968 else if (GET_CODE (orig) == LABEL_REF)
3969 /* ??? We ought to be checking that the register is live instead, in case
3970 it is eliminated. */
3971 crtl->uses_pic_offset_table = 1;
3972
3973 return orig;
3974 }
3975
3976 /* Try machine-dependent ways of modifying an illegitimate address X
3977 to be legitimate. If we find one, return the new, valid address.
3978
3979 OLDX is the address as it was before break_out_memory_refs was called.
3980 In some cases it is useful to look at this to decide what needs to be done.
3981
3982 MODE is the mode of the operand pointed to by X.
3983
3984 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
3985
3986 static rtx
3987 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3988 enum machine_mode mode)
3989 {
3990 rtx orig_x = x;
3991
3992 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
3993 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
3994 force_operand (XEXP (x, 0), NULL_RTX));
3995 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
3996 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
3997 force_operand (XEXP (x, 1), NULL_RTX));
3998 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
3999 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4000 XEXP (x, 1));
4001 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4002 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4003 force_operand (XEXP (x, 1), NULL_RTX));
4004
4005 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4006 return x;
4007
4008 if (sparc_tls_referenced_p (x))
4009 x = sparc_legitimize_tls_address (x);
4010 else if (flag_pic)
4011 x = sparc_legitimize_pic_address (x, NULL_RTX);
4012 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4013 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4014 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4015 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4016 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4017 copy_to_mode_reg (Pmode, XEXP (x, 0)));
4018 else if (GET_CODE (x) == SYMBOL_REF
4019 || GET_CODE (x) == CONST
4020 || GET_CODE (x) == LABEL_REF)
4021 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4022
4023 return x;
4024 }
4025
4026 /* Delegitimize an address that was legitimized by the above function. */
4027
4028 static rtx
4029 sparc_delegitimize_address (rtx x)
4030 {
4031 x = delegitimize_mem_from_attrs (x);
4032
4033 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
4034 switch (XINT (XEXP (x, 1), 1))
4035 {
4036 case UNSPEC_MOVE_PIC:
4037 case UNSPEC_TLSLE:
4038 x = XVECEXP (XEXP (x, 1), 0, 0);
4039 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4040 break;
4041 default:
4042 break;
4043 }
4044
4045 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
4046 if (GET_CODE (x) == MINUS
4047 && REG_P (XEXP (x, 0))
4048 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
4049 && GET_CODE (XEXP (x, 1)) == LO_SUM
4050 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
4051 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
4052 {
4053 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
4054 gcc_assert (GET_CODE (x) == LABEL_REF);
4055 }
4056
4057 return x;
4058 }
4059
4060 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
4061 replace the input X, or the original X if no replacement is called for.
4062 The output parameter *WIN is 1 if the calling macro should goto WIN,
4063 0 if it should not.
4064
4065 For SPARC, we wish to handle addresses by splitting them into
4066 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
4067 This cuts the number of extra insns by one.
4068
4069 Do nothing when generating PIC code and the address is a symbolic
4070 operand or requires a scratch register. */
4071
4072 rtx
4073 sparc_legitimize_reload_address (rtx x, enum machine_mode mode,
4074 int opnum, int type,
4075 int ind_levels ATTRIBUTE_UNUSED, int *win)
4076 {
4077 /* Decompose SImode constants into HIGH+LO_SUM. */
4078 if (CONSTANT_P (x)
4079 && (mode != TFmode || TARGET_ARCH64)
4080 && GET_MODE (x) == SImode
4081 && GET_CODE (x) != LO_SUM
4082 && GET_CODE (x) != HIGH
4083 && sparc_cmodel <= CM_MEDLOW
4084 && !(flag_pic
4085 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
4086 {
4087 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
4088 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4089 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4090 opnum, (enum reload_type)type);
4091 *win = 1;
4092 return x;
4093 }
4094
4095 /* We have to recognize what we have already generated above. */
4096 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
4097 {
4098 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4099 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4100 opnum, (enum reload_type)type);
4101 *win = 1;
4102 return x;
4103 }
4104
4105 *win = 0;
4106 return x;
4107 }
4108
4109 /* Return true if ADDR (a legitimate address expression)
4110 has an effect that depends on the machine mode it is used for.
4111
4112 In PIC mode,
4113
4114 (mem:HI [%l7+a])
4115
4116 is not equivalent to
4117
4118 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
4119
4120 because [%l7+a+1] is interpreted as the address of (a+1). */
4121
4122
4123 static bool
4124 sparc_mode_dependent_address_p (const_rtx addr,
4125 addr_space_t as ATTRIBUTE_UNUSED)
4126 {
4127 if (flag_pic && GET_CODE (addr) == PLUS)
4128 {
4129 rtx op0 = XEXP (addr, 0);
4130 rtx op1 = XEXP (addr, 1);
4131 if (op0 == pic_offset_table_rtx
4132 && symbolic_operand (op1, VOIDmode))
4133 return true;
4134 }
4135
4136 return false;
4137 }
4138
4139 #ifdef HAVE_GAS_HIDDEN
4140 # define USE_HIDDEN_LINKONCE 1
4141 #else
4142 # define USE_HIDDEN_LINKONCE 0
4143 #endif
4144
4145 static void
4146 get_pc_thunk_name (char name[32], unsigned int regno)
4147 {
4148 const char *reg_name = reg_names[regno];
4149
4150 /* Skip the leading '%' as that cannot be used in a
4151 symbol name. */
4152 reg_name += 1;
4153
4154 if (USE_HIDDEN_LINKONCE)
4155 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
4156 else
4157 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
4158 }
4159
4160 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
4161
4162 static rtx
4163 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
4164 {
4165 int orig_flag_pic = flag_pic;
4166 rtx insn;
4167
4168 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4169 flag_pic = 0;
4170 if (TARGET_ARCH64)
4171 insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
4172 else
4173 insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
4174 flag_pic = orig_flag_pic;
4175
4176 return insn;
4177 }
4178
4179 /* Emit code to load the GOT register. */
4180
4181 void
4182 load_got_register (void)
4183 {
4184 /* In PIC mode, this will retrieve pic_offset_table_rtx. */
4185 if (!global_offset_table_rtx)
4186 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4187
4188 if (TARGET_VXWORKS_RTP)
4189 emit_insn (gen_vxworks_load_got ());
4190 else
4191 {
4192 /* The GOT symbol is subject to a PC-relative relocation so we need a
4193 helper function to add the PC value and thus get the final value. */
4194 if (!got_helper_rtx)
4195 {
4196 char name[32];
4197 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
4198 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4199 }
4200
4201 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
4202 got_helper_rtx,
4203 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
4204 }
4205
4206 /* Need to emit this whether or not we obey regdecls,
4207 since setjmp/longjmp can cause life info to screw up.
4208 ??? In the case where we don't obey regdecls, this is not sufficient
4209 since we may not fall out the bottom. */
4210 emit_use (global_offset_table_rtx);
4211 }
4212
4213 /* Emit a call instruction with the pattern given by PAT. ADDR is the
4214 address of the call target. */
4215
4216 void
4217 sparc_emit_call_insn (rtx pat, rtx addr)
4218 {
4219 rtx insn;
4220
4221 insn = emit_call_insn (pat);
4222
4223 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
4224 if (TARGET_VXWORKS_RTP
4225 && flag_pic
4226 && GET_CODE (addr) == SYMBOL_REF
4227 && (SYMBOL_REF_DECL (addr)
4228 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
4229 : !SYMBOL_REF_LOCAL_P (addr)))
4230 {
4231 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
4232 crtl->uses_pic_offset_table = 1;
4233 }
4234 }
4235 \f
4236 /* Return 1 if RTX is a MEM which is known to be aligned to at
4237 least a DESIRED byte boundary. */
4238
4239 int
4240 mem_min_alignment (rtx mem, int desired)
4241 {
4242 rtx addr, base, offset;
4243
4244 /* If it's not a MEM we can't accept it. */
4245 if (GET_CODE (mem) != MEM)
4246 return 0;
4247
4248 /* Obviously... */
4249 if (!TARGET_UNALIGNED_DOUBLES
4250 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
4251 return 1;
4252
4253 /* ??? The rest of the function predates MEM_ALIGN so
4254 there is probably a bit of redundancy. */
4255 addr = XEXP (mem, 0);
4256 base = offset = NULL_RTX;
4257 if (GET_CODE (addr) == PLUS)
4258 {
4259 if (GET_CODE (XEXP (addr, 0)) == REG)
4260 {
4261 base = XEXP (addr, 0);
4262
4263 /* What we are saying here is that if the base
4264 REG is aligned properly, the compiler will make
4265 sure any REG based index upon it will be so
4266 as well. */
4267 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
4268 offset = XEXP (addr, 1);
4269 else
4270 offset = const0_rtx;
4271 }
4272 }
4273 else if (GET_CODE (addr) == REG)
4274 {
4275 base = addr;
4276 offset = const0_rtx;
4277 }
4278
4279 if (base != NULL_RTX)
4280 {
4281 int regno = REGNO (base);
4282
4283 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
4284 {
4285 /* Check if the compiler has recorded some information
4286 about the alignment of the base REG. If reload has
4287 completed, we already matched with proper alignments.
4288 If not running global_alloc, reload might give us
4289 unaligned pointer to local stack though. */
4290 if (((cfun != 0
4291 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
4292 || (optimize && reload_completed))
4293 && (INTVAL (offset) & (desired - 1)) == 0)
4294 return 1;
4295 }
4296 else
4297 {
4298 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
4299 return 1;
4300 }
4301 }
4302 else if (! TARGET_UNALIGNED_DOUBLES
4303 || CONSTANT_P (addr)
4304 || GET_CODE (addr) == LO_SUM)
4305 {
4306 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
4307 is true, in which case we can only assume that an access is aligned if
4308 it is to a constant address, or the address involves a LO_SUM. */
4309 return 1;
4310 }
4311
4312 /* An obviously unaligned address. */
4313 return 0;
4314 }
4315
4316 \f
4317 /* Vectors to keep interesting information about registers where it can easily
4318 be got. We used to use the actual mode value as the bit number, but there
4319 are more than 32 modes now. Instead we use two tables: one indexed by
4320 hard register number, and one indexed by mode. */
4321
4322 /* The purpose of sparc_mode_class is to shrink the range of modes so that
4323 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
4324 mapped into one sparc_mode_class mode. */
4325
4326 enum sparc_mode_class {
4327 S_MODE, D_MODE, T_MODE, O_MODE,
4328 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
4329 CC_MODE, CCFP_MODE
4330 };
4331
4332 /* Modes for single-word and smaller quantities. */
4333 #define S_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
4334
4335 /* Modes for double-word and smaller quantities. */
4336 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4337
4338 /* Modes for quad-word and smaller quantities. */
4339 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
4340
4341 /* Modes for 8-word and smaller quantities. */
4342 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
4343
4344 /* Modes for single-float quantities. We must allow any single word or
4345 smaller quantity. This is because the fix/float conversion instructions
4346 take integer inputs/outputs from the float registers. */
4347 #define SF_MODES (S_MODES)
4348
4349 /* Modes for double-float and smaller quantities. */
4350 #define DF_MODES (D_MODES)
4351
4352 /* Modes for quad-float and smaller quantities. */
4353 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
4354
4355 /* Modes for quad-float pairs and smaller quantities. */
4356 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
4357
4358 /* Modes for double-float only quantities. */
4359 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
4360
4361 /* Modes for quad-float and double-float only quantities. */
4362 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
4363
4364 /* Modes for quad-float pairs and double-float only quantities. */
4365 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
4366
4367 /* Modes for condition codes. */
4368 #define CC_MODES (1 << (int) CC_MODE)
4369 #define CCFP_MODES (1 << (int) CCFP_MODE)
4370
4371 /* Value is 1 if register/mode pair is acceptable on sparc.
4372 The funny mixture of D and T modes is because integer operations
4373 do not specially operate on tetra quantities, so non-quad-aligned
4374 registers can hold quadword quantities (except %o4 and %i4 because
4375 they cross fixed registers). */
4376
4377 /* This points to either the 32 bit or the 64 bit version. */
4378 const int *hard_regno_mode_classes;
4379
4380 static const int hard_32bit_mode_classes[] = {
4381 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4382 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4383 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4384 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4385
4386 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4387 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4388 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4389 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4390
4391 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4392 and none can hold SFmode/SImode values. */
4393 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4394 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4395 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4396 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4397
4398 /* %fcc[0123] */
4399 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4400
4401 /* %icc, %sfp, %gsr */
4402 CC_MODES, 0, D_MODES
4403 };
4404
4405 static const int hard_64bit_mode_classes[] = {
4406 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4407 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4408 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4409 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4410
4411 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4412 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4413 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4414 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4415
4416 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4417 and none can hold SFmode/SImode values. */
4418 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4419 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4420 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4421 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4422
4423 /* %fcc[0123] */
4424 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4425
4426 /* %icc, %sfp, %gsr */
4427 CC_MODES, 0, D_MODES
4428 };
4429
4430 int sparc_mode_class [NUM_MACHINE_MODES];
4431
4432 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
4433
4434 static void
4435 sparc_init_modes (void)
4436 {
4437 int i;
4438
4439 for (i = 0; i < NUM_MACHINE_MODES; i++)
4440 {
4441 switch (GET_MODE_CLASS (i))
4442 {
4443 case MODE_INT:
4444 case MODE_PARTIAL_INT:
4445 case MODE_COMPLEX_INT:
4446 if (GET_MODE_SIZE (i) <= 4)
4447 sparc_mode_class[i] = 1 << (int) S_MODE;
4448 else if (GET_MODE_SIZE (i) == 8)
4449 sparc_mode_class[i] = 1 << (int) D_MODE;
4450 else if (GET_MODE_SIZE (i) == 16)
4451 sparc_mode_class[i] = 1 << (int) T_MODE;
4452 else if (GET_MODE_SIZE (i) == 32)
4453 sparc_mode_class[i] = 1 << (int) O_MODE;
4454 else
4455 sparc_mode_class[i] = 0;
4456 break;
4457 case MODE_VECTOR_INT:
4458 if (GET_MODE_SIZE (i) <= 4)
4459 sparc_mode_class[i] = 1 << (int)SF_MODE;
4460 else if (GET_MODE_SIZE (i) == 8)
4461 sparc_mode_class[i] = 1 << (int)DF_MODE;
4462 break;
4463 case MODE_FLOAT:
4464 case MODE_COMPLEX_FLOAT:
4465 if (GET_MODE_SIZE (i) <= 4)
4466 sparc_mode_class[i] = 1 << (int) SF_MODE;
4467 else if (GET_MODE_SIZE (i) == 8)
4468 sparc_mode_class[i] = 1 << (int) DF_MODE;
4469 else if (GET_MODE_SIZE (i) == 16)
4470 sparc_mode_class[i] = 1 << (int) TF_MODE;
4471 else if (GET_MODE_SIZE (i) == 32)
4472 sparc_mode_class[i] = 1 << (int) OF_MODE;
4473 else
4474 sparc_mode_class[i] = 0;
4475 break;
4476 case MODE_CC:
4477 if (i == (int) CCFPmode || i == (int) CCFPEmode)
4478 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
4479 else
4480 sparc_mode_class[i] = 1 << (int) CC_MODE;
4481 break;
4482 default:
4483 sparc_mode_class[i] = 0;
4484 break;
4485 }
4486 }
4487
4488 if (TARGET_ARCH64)
4489 hard_regno_mode_classes = hard_64bit_mode_classes;
4490 else
4491 hard_regno_mode_classes = hard_32bit_mode_classes;
4492
4493 /* Initialize the array used by REGNO_REG_CLASS. */
4494 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4495 {
4496 if (i < 16 && TARGET_V8PLUS)
4497 sparc_regno_reg_class[i] = I64_REGS;
4498 else if (i < 32 || i == FRAME_POINTER_REGNUM)
4499 sparc_regno_reg_class[i] = GENERAL_REGS;
4500 else if (i < 64)
4501 sparc_regno_reg_class[i] = FP_REGS;
4502 else if (i < 96)
4503 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
4504 else if (i < 100)
4505 sparc_regno_reg_class[i] = FPCC_REGS;
4506 else
4507 sparc_regno_reg_class[i] = NO_REGS;
4508 }
4509 }
4510 \f
4511 /* Return whether REGNO, a global or FP register, must be saved/restored. */
4512
4513 static inline bool
4514 save_global_or_fp_reg_p (unsigned int regno,
4515 int leaf_function ATTRIBUTE_UNUSED)
4516 {
4517 return !call_used_regs[regno] && df_regs_ever_live_p (regno);
4518 }
4519
4520 /* Return whether the return address register (%i7) is needed. */
4521
4522 static inline bool
4523 return_addr_reg_needed_p (int leaf_function)
4524 {
4525 /* If it is live, for example because of __builtin_return_address (0). */
4526 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
4527 return true;
4528
4529 /* Otherwise, it is needed as save register if %o7 is clobbered. */
4530 if (!leaf_function
4531 /* Loading the GOT register clobbers %o7. */
4532 || crtl->uses_pic_offset_table
4533 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
4534 return true;
4535
4536 return false;
4537 }
4538
4539 /* Return whether REGNO, a local or in register, must be saved/restored. */
4540
4541 static bool
4542 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
4543 {
4544 /* General case: call-saved registers live at some point. */
4545 if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
4546 return true;
4547
4548 /* Frame pointer register (%fp) if needed. */
4549 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
4550 return true;
4551
4552 /* Return address register (%i7) if needed. */
4553 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
4554 return true;
4555
4556 /* GOT register (%l7) if needed. */
4557 if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
4558 return true;
4559
4560 /* If the function accesses prior frames, the frame pointer and the return
4561 address of the previous frame must be saved on the stack. */
4562 if (crtl->accesses_prior_frames
4563 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
4564 return true;
4565
4566 return false;
4567 }
4568
4569 /* Compute the frame size required by the function. This function is called
4570 during the reload pass and also by sparc_expand_prologue. */
4571
4572 HOST_WIDE_INT
4573 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
4574 {
4575 HOST_WIDE_INT frame_size, apparent_frame_size;
4576 int args_size, n_global_fp_regs = 0;
4577 bool save_local_in_regs_p = false;
4578 unsigned int i;
4579
4580 /* If the function allocates dynamic stack space, the dynamic offset is
4581 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
4582 if (leaf_function && !cfun->calls_alloca)
4583 args_size = 0;
4584 else
4585 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
4586
4587 /* Calculate space needed for global registers. */
4588 if (TARGET_ARCH64)
4589 for (i = 0; i < 8; i++)
4590 if (save_global_or_fp_reg_p (i, 0))
4591 n_global_fp_regs += 2;
4592 else
4593 for (i = 0; i < 8; i += 2)
4594 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
4595 n_global_fp_regs += 2;
4596
4597 /* In the flat window model, find out which local and in registers need to
4598 be saved. We don't reserve space in the current frame for them as they
4599 will be spilled into the register window save area of the caller's frame.
4600 However, as soon as we use this register window save area, we must create
4601 that of the current frame to make it the live one. */
4602 if (TARGET_FLAT)
4603 for (i = 16; i < 32; i++)
4604 if (save_local_or_in_reg_p (i, leaf_function))
4605 {
4606 save_local_in_regs_p = true;
4607 break;
4608 }
4609
4610 /* Calculate space needed for FP registers. */
4611 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
4612 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
4613 n_global_fp_regs += 2;
4614
4615 if (size == 0
4616 && n_global_fp_regs == 0
4617 && args_size == 0
4618 && !save_local_in_regs_p)
4619 frame_size = apparent_frame_size = 0;
4620 else
4621 {
4622 /* We subtract STARTING_FRAME_OFFSET, remember it's negative. */
4623 apparent_frame_size = (size - STARTING_FRAME_OFFSET + 7) & -8;
4624 apparent_frame_size += n_global_fp_regs * 4;
4625
4626 /* We need to add the size of the outgoing argument area. */
4627 frame_size = apparent_frame_size + ((args_size + 7) & -8);
4628
4629 /* And that of the register window save area. */
4630 frame_size += FIRST_PARM_OFFSET (cfun->decl);
4631
4632 /* Finally, bump to the appropriate alignment. */
4633 frame_size = SPARC_STACK_ALIGN (frame_size);
4634 }
4635
4636 /* Set up values for use in prologue and epilogue. */
4637 sparc_frame_size = frame_size;
4638 sparc_apparent_frame_size = apparent_frame_size;
4639 sparc_n_global_fp_regs = n_global_fp_regs;
4640 sparc_save_local_in_regs_p = save_local_in_regs_p;
4641
4642 return frame_size;
4643 }
4644
4645 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
4646
4647 int
4648 sparc_initial_elimination_offset (int to)
4649 {
4650 int offset;
4651
4652 if (to == STACK_POINTER_REGNUM)
4653 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
4654 else
4655 offset = 0;
4656
4657 offset += SPARC_STACK_BIAS;
4658 return offset;
4659 }
4660
4661 /* Output any necessary .register pseudo-ops. */
4662
4663 void
4664 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
4665 {
4666 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
4667 int i;
4668
4669 if (TARGET_ARCH32)
4670 return;
4671
4672 /* Check if %g[2367] were used without
4673 .register being printed for them already. */
4674 for (i = 2; i < 8; i++)
4675 {
4676 if (df_regs_ever_live_p (i)
4677 && ! sparc_hard_reg_printed [i])
4678 {
4679 sparc_hard_reg_printed [i] = 1;
4680 /* %g7 is used as TLS base register, use #ignore
4681 for it instead of #scratch. */
4682 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
4683 i == 7 ? "ignore" : "scratch");
4684 }
4685 if (i == 3) i = 5;
4686 }
4687 #endif
4688 }
4689
4690 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
4691
4692 #if PROBE_INTERVAL > 4096
4693 #error Cannot use indexed addressing mode for stack probing
4694 #endif
4695
4696 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
4697 inclusive. These are offsets from the current stack pointer.
4698
4699 Note that we don't use the REG+REG addressing mode for the probes because
4700 of the stack bias in 64-bit mode. And it doesn't really buy us anything
4701 so the advantages of having a single code win here. */
4702
4703 static void
4704 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
4705 {
4706 rtx g1 = gen_rtx_REG (Pmode, 1);
4707
4708 /* See if we have a constant small number of probes to generate. If so,
4709 that's the easy case. */
4710 if (size <= PROBE_INTERVAL)
4711 {
4712 emit_move_insn (g1, GEN_INT (first));
4713 emit_insn (gen_rtx_SET (VOIDmode, g1,
4714 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
4715 emit_stack_probe (plus_constant (Pmode, g1, -size));
4716 }
4717
4718 /* The run-time loop is made up of 10 insns in the generic case while the
4719 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
4720 else if (size <= 5 * PROBE_INTERVAL)
4721 {
4722 HOST_WIDE_INT i;
4723
4724 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
4725 emit_insn (gen_rtx_SET (VOIDmode, g1,
4726 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
4727 emit_stack_probe (g1);
4728
4729 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
4730 it exceeds SIZE. If only two probes are needed, this will not
4731 generate any code. Then probe at FIRST + SIZE. */
4732 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
4733 {
4734 emit_insn (gen_rtx_SET (VOIDmode, g1,
4735 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
4736 emit_stack_probe (g1);
4737 }
4738
4739 emit_stack_probe (plus_constant (Pmode, g1,
4740 (i - PROBE_INTERVAL) - size));
4741 }
4742
4743 /* Otherwise, do the same as above, but in a loop. Note that we must be
4744 extra careful with variables wrapping around because we might be at
4745 the very top (or the very bottom) of the address space and we have
4746 to be able to handle this case properly; in particular, we use an
4747 equality test for the loop condition. */
4748 else
4749 {
4750 HOST_WIDE_INT rounded_size;
4751 rtx g4 = gen_rtx_REG (Pmode, 4);
4752
4753 emit_move_insn (g1, GEN_INT (first));
4754
4755
4756 /* Step 1: round SIZE to the previous multiple of the interval. */
4757
4758 rounded_size = size & -PROBE_INTERVAL;
4759 emit_move_insn (g4, GEN_INT (rounded_size));
4760
4761
4762 /* Step 2: compute initial and final value of the loop counter. */
4763
4764 /* TEST_ADDR = SP + FIRST. */
4765 emit_insn (gen_rtx_SET (VOIDmode, g1,
4766 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
4767
4768 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
4769 emit_insn (gen_rtx_SET (VOIDmode, g4, gen_rtx_MINUS (Pmode, g1, g4)));
4770
4771
4772 /* Step 3: the loop
4773
4774 while (TEST_ADDR != LAST_ADDR)
4775 {
4776 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
4777 probe at TEST_ADDR
4778 }
4779
4780 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
4781 until it is equal to ROUNDED_SIZE. */
4782
4783 if (TARGET_64BIT)
4784 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
4785 else
4786 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
4787
4788
4789 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
4790 that SIZE is equal to ROUNDED_SIZE. */
4791
4792 if (size != rounded_size)
4793 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
4794 }
4795
4796 /* Make sure nothing is scheduled before we are done. */
4797 emit_insn (gen_blockage ());
4798 }
4799
4800 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
4801 absolute addresses. */
4802
4803 const char *
4804 output_probe_stack_range (rtx reg1, rtx reg2)
4805 {
4806 static int labelno = 0;
4807 char loop_lab[32], end_lab[32];
4808 rtx xops[2];
4809
4810 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
4811 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
4812
4813 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
4814
4815 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
4816 xops[0] = reg1;
4817 xops[1] = reg2;
4818 output_asm_insn ("cmp\t%0, %1", xops);
4819 if (TARGET_ARCH64)
4820 fputs ("\tbe,pn\t%xcc,", asm_out_file);
4821 else
4822 fputs ("\tbe\t", asm_out_file);
4823 assemble_name_raw (asm_out_file, end_lab);
4824 fputc ('\n', asm_out_file);
4825
4826 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
4827 xops[1] = GEN_INT (-PROBE_INTERVAL);
4828 output_asm_insn (" add\t%0, %1, %0", xops);
4829
4830 /* Probe at TEST_ADDR and branch. */
4831 if (TARGET_ARCH64)
4832 fputs ("\tba,pt\t%xcc,", asm_out_file);
4833 else
4834 fputs ("\tba\t", asm_out_file);
4835 assemble_name_raw (asm_out_file, loop_lab);
4836 fputc ('\n', asm_out_file);
4837 xops[1] = GEN_INT (SPARC_STACK_BIAS);
4838 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
4839
4840 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
4841
4842 return "";
4843 }
4844
4845 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
4846 needed. LOW is supposed to be double-word aligned for 32-bit registers.
4847 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
4848 is the action to be performed if SAVE_P returns true and ACTION_FALSE
4849 the action to be performed if it returns false. Return the new offset. */
4850
4851 typedef bool (*sorr_pred_t) (unsigned int, int);
4852 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
4853
4854 static int
4855 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
4856 int offset, int leaf_function, sorr_pred_t save_p,
4857 sorr_act_t action_true, sorr_act_t action_false)
4858 {
4859 unsigned int i;
4860 rtx mem, insn;
4861
4862 if (TARGET_ARCH64 && high <= 32)
4863 {
4864 int fp_offset = -1;
4865
4866 for (i = low; i < high; i++)
4867 {
4868 if (save_p (i, leaf_function))
4869 {
4870 mem = gen_frame_mem (DImode, plus_constant (Pmode,
4871 base, offset));
4872 if (action_true == SORR_SAVE)
4873 {
4874 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
4875 RTX_FRAME_RELATED_P (insn) = 1;
4876 }
4877 else /* action_true == SORR_RESTORE */
4878 {
4879 /* The frame pointer must be restored last since its old
4880 value may be used as base address for the frame. This
4881 is problematic in 64-bit mode only because of the lack
4882 of double-word load instruction. */
4883 if (i == HARD_FRAME_POINTER_REGNUM)
4884 fp_offset = offset;
4885 else
4886 emit_move_insn (gen_rtx_REG (DImode, i), mem);
4887 }
4888 offset += 8;
4889 }
4890 else if (action_false == SORR_ADVANCE)
4891 offset += 8;
4892 }
4893
4894 if (fp_offset >= 0)
4895 {
4896 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
4897 emit_move_insn (hard_frame_pointer_rtx, mem);
4898 }
4899 }
4900 else
4901 {
4902 for (i = low; i < high; i += 2)
4903 {
4904 bool reg0 = save_p (i, leaf_function);
4905 bool reg1 = save_p (i + 1, leaf_function);
4906 enum machine_mode mode;
4907 int regno;
4908
4909 if (reg0 && reg1)
4910 {
4911 mode = SPARC_INT_REG_P (i) ? DImode : DFmode;
4912 regno = i;
4913 }
4914 else if (reg0)
4915 {
4916 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
4917 regno = i;
4918 }
4919 else if (reg1)
4920 {
4921 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
4922 regno = i + 1;
4923 offset += 4;
4924 }
4925 else
4926 {
4927 if (action_false == SORR_ADVANCE)
4928 offset += 8;
4929 continue;
4930 }
4931
4932 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
4933 if (action_true == SORR_SAVE)
4934 {
4935 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
4936 RTX_FRAME_RELATED_P (insn) = 1;
4937 if (mode == DImode)
4938 {
4939 rtx set1, set2;
4940 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
4941 offset));
4942 set1 = gen_rtx_SET (VOIDmode, mem,
4943 gen_rtx_REG (SImode, regno));
4944 RTX_FRAME_RELATED_P (set1) = 1;
4945 mem
4946 = gen_frame_mem (SImode, plus_constant (Pmode, base,
4947 offset + 4));
4948 set2 = gen_rtx_SET (VOIDmode, mem,
4949 gen_rtx_REG (SImode, regno + 1));
4950 RTX_FRAME_RELATED_P (set2) = 1;
4951 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4952 gen_rtx_PARALLEL (VOIDmode,
4953 gen_rtvec (2, set1, set2)));
4954 }
4955 }
4956 else /* action_true == SORR_RESTORE */
4957 emit_move_insn (gen_rtx_REG (mode, regno), mem);
4958
4959 /* Always preserve double-word alignment. */
4960 offset = (offset + 8) & -8;
4961 }
4962 }
4963
4964 return offset;
4965 }
4966
4967 /* Emit code to adjust BASE to OFFSET. Return the new base. */
4968
4969 static rtx
4970 emit_adjust_base_to_offset (rtx base, int offset)
4971 {
4972 /* ??? This might be optimized a little as %g1 might already have a
4973 value close enough that a single add insn will do. */
4974 /* ??? Although, all of this is probably only a temporary fix because
4975 if %g1 can hold a function result, then sparc_expand_epilogue will
4976 lose (the result will be clobbered). */
4977 rtx new_base = gen_rtx_REG (Pmode, 1);
4978 emit_move_insn (new_base, GEN_INT (offset));
4979 emit_insn (gen_rtx_SET (VOIDmode,
4980 new_base, gen_rtx_PLUS (Pmode, base, new_base)));
4981 return new_base;
4982 }
4983
4984 /* Emit code to save/restore call-saved global and FP registers. */
4985
4986 static void
4987 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
4988 {
4989 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
4990 {
4991 base = emit_adjust_base_to_offset (base, offset);
4992 offset = 0;
4993 }
4994
4995 offset
4996 = emit_save_or_restore_regs (0, 8, base, offset, 0,
4997 save_global_or_fp_reg_p, action, SORR_NONE);
4998 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
4999 save_global_or_fp_reg_p, action, SORR_NONE);
5000 }
5001
5002 /* Emit code to save/restore call-saved local and in registers. */
5003
5004 static void
5005 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5006 {
5007 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5008 {
5009 base = emit_adjust_base_to_offset (base, offset);
5010 offset = 0;
5011 }
5012
5013 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5014 save_local_or_in_reg_p, action, SORR_ADVANCE);
5015 }
5016
5017 /* Emit a window_save insn. */
5018
5019 static rtx
5020 emit_window_save (rtx increment)
5021 {
5022 rtx insn = emit_insn (gen_window_save (increment));
5023 RTX_FRAME_RELATED_P (insn) = 1;
5024
5025 /* The incoming return address (%o7) is saved in %i7. */
5026 add_reg_note (insn, REG_CFA_REGISTER,
5027 gen_rtx_SET (VOIDmode,
5028 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5029 gen_rtx_REG (Pmode,
5030 INCOMING_RETURN_ADDR_REGNUM)));
5031
5032 /* The window save event. */
5033 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5034
5035 /* The CFA is %fp, the hard frame pointer. */
5036 add_reg_note (insn, REG_CFA_DEF_CFA,
5037 plus_constant (Pmode, hard_frame_pointer_rtx,
5038 INCOMING_FRAME_SP_OFFSET));
5039
5040 return insn;
5041 }
5042
5043 /* Generate an increment for the stack pointer. */
5044
5045 static rtx
5046 gen_stack_pointer_inc (rtx increment)
5047 {
5048 return gen_rtx_SET (VOIDmode,
5049 stack_pointer_rtx,
5050 gen_rtx_PLUS (Pmode,
5051 stack_pointer_rtx,
5052 increment));
5053 }
5054
5055 /* Expand the function prologue. The prologue is responsible for reserving
5056 storage for the frame, saving the call-saved registers and loading the
5057 GOT register if needed. */
5058
5059 void
5060 sparc_expand_prologue (void)
5061 {
5062 HOST_WIDE_INT size;
5063 rtx insn;
5064
5065 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5066 on the final value of the flag means deferring the prologue/epilogue
5067 expansion until just before the second scheduling pass, which is too
5068 late to emit multiple epilogues or return insns.
5069
5070 Of course we are making the assumption that the value of the flag
5071 will not change between now and its final value. Of the three parts
5072 of the formula, only the last one can reasonably vary. Let's take a
5073 closer look, after assuming that the first two ones are set to true
5074 (otherwise the last value is effectively silenced).
5075
5076 If only_leaf_regs_used returns false, the global predicate will also
5077 be false so the actual frame size calculated below will be positive.
5078 As a consequence, the save_register_window insn will be emitted in
5079 the instruction stream; now this insn explicitly references %fp
5080 which is not a leaf register so only_leaf_regs_used will always
5081 return false subsequently.
5082
5083 If only_leaf_regs_used returns true, we hope that the subsequent
5084 optimization passes won't cause non-leaf registers to pop up. For
5085 example, the regrename pass has special provisions to not rename to
5086 non-leaf registers in a leaf function. */
5087 sparc_leaf_function_p
5088 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
5089
5090 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5091
5092 if (flag_stack_usage_info)
5093 current_function_static_stack_size = size;
5094
5095 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
5096 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5097
5098 if (size == 0)
5099 ; /* do nothing. */
5100 else if (sparc_leaf_function_p)
5101 {
5102 rtx size_int_rtx = GEN_INT (-size);
5103
5104 if (size <= 4096)
5105 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5106 else if (size <= 8192)
5107 {
5108 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5109 RTX_FRAME_RELATED_P (insn) = 1;
5110
5111 /* %sp is still the CFA register. */
5112 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5113 }
5114 else
5115 {
5116 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5117 emit_move_insn (size_rtx, size_int_rtx);
5118 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5119 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5120 gen_stack_pointer_inc (size_int_rtx));
5121 }
5122
5123 RTX_FRAME_RELATED_P (insn) = 1;
5124 }
5125 else
5126 {
5127 rtx size_int_rtx = GEN_INT (-size);
5128
5129 if (size <= 4096)
5130 emit_window_save (size_int_rtx);
5131 else if (size <= 8192)
5132 {
5133 emit_window_save (GEN_INT (-4096));
5134
5135 /* %sp is not the CFA register anymore. */
5136 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5137
5138 /* Make sure no %fp-based store is issued until after the frame is
5139 established. The offset between the frame pointer and the stack
5140 pointer is calculated relative to the value of the stack pointer
5141 at the end of the function prologue, and moving instructions that
5142 access the stack via the frame pointer between the instructions
5143 that decrement the stack pointer could result in accessing the
5144 register window save area, which is volatile. */
5145 emit_insn (gen_frame_blockage ());
5146 }
5147 else
5148 {
5149 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5150 emit_move_insn (size_rtx, size_int_rtx);
5151 emit_window_save (size_rtx);
5152 }
5153 }
5154
5155 if (sparc_leaf_function_p)
5156 {
5157 sparc_frame_base_reg = stack_pointer_rtx;
5158 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5159 }
5160 else
5161 {
5162 sparc_frame_base_reg = hard_frame_pointer_rtx;
5163 sparc_frame_base_offset = SPARC_STACK_BIAS;
5164 }
5165
5166 if (sparc_n_global_fp_regs > 0)
5167 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5168 sparc_frame_base_offset
5169 - sparc_apparent_frame_size,
5170 SORR_SAVE);
5171
5172 /* Load the GOT register if needed. */
5173 if (crtl->uses_pic_offset_table)
5174 load_got_register ();
5175
5176 /* Advertise that the data calculated just above are now valid. */
5177 sparc_prologue_data_valid_p = true;
5178 }
5179
5180 /* Expand the function prologue. The prologue is responsible for reserving
5181 storage for the frame, saving the call-saved registers and loading the
5182 GOT register if needed. */
5183
5184 void
5185 sparc_flat_expand_prologue (void)
5186 {
5187 HOST_WIDE_INT size;
5188 rtx insn;
5189
5190 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
5191
5192 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5193
5194 if (flag_stack_usage_info)
5195 current_function_static_stack_size = size;
5196
5197 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
5198 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5199
5200 if (sparc_save_local_in_regs_p)
5201 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
5202 SORR_SAVE);
5203
5204 if (size == 0)
5205 ; /* do nothing. */
5206 else
5207 {
5208 rtx size_int_rtx, size_rtx;
5209
5210 size_rtx = size_int_rtx = GEN_INT (-size);
5211
5212 /* We establish the frame (i.e. decrement the stack pointer) first, even
5213 if we use a frame pointer, because we cannot clobber any call-saved
5214 registers, including the frame pointer, if we haven't created a new
5215 register save area, for the sake of compatibility with the ABI. */
5216 if (size <= 4096)
5217 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5218 else if (size <= 8192 && !frame_pointer_needed)
5219 {
5220 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5221 RTX_FRAME_RELATED_P (insn) = 1;
5222 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5223 }
5224 else
5225 {
5226 size_rtx = gen_rtx_REG (Pmode, 1);
5227 emit_move_insn (size_rtx, size_int_rtx);
5228 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5229 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5230 gen_stack_pointer_inc (size_int_rtx));
5231 }
5232 RTX_FRAME_RELATED_P (insn) = 1;
5233
5234 /* Ensure nothing is scheduled until after the frame is established. */
5235 emit_insn (gen_blockage ());
5236
5237 if (frame_pointer_needed)
5238 {
5239 insn = emit_insn (gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
5240 gen_rtx_MINUS (Pmode,
5241 stack_pointer_rtx,
5242 size_rtx)));
5243 RTX_FRAME_RELATED_P (insn) = 1;
5244
5245 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5246 gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
5247 plus_constant (Pmode, stack_pointer_rtx,
5248 size)));
5249 }
5250
5251 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5252 {
5253 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
5254 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
5255
5256 insn = emit_move_insn (i7, o7);
5257 RTX_FRAME_RELATED_P (insn) = 1;
5258
5259 add_reg_note (insn, REG_CFA_REGISTER,
5260 gen_rtx_SET (VOIDmode, i7, o7));
5261
5262 /* Prevent this instruction from ever being considered dead,
5263 even if this function has no epilogue. */
5264 emit_use (i7);
5265 }
5266 }
5267
5268 if (frame_pointer_needed)
5269 {
5270 sparc_frame_base_reg = hard_frame_pointer_rtx;
5271 sparc_frame_base_offset = SPARC_STACK_BIAS;
5272 }
5273 else
5274 {
5275 sparc_frame_base_reg = stack_pointer_rtx;
5276 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5277 }
5278
5279 if (sparc_n_global_fp_regs > 0)
5280 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5281 sparc_frame_base_offset
5282 - sparc_apparent_frame_size,
5283 SORR_SAVE);
5284
5285 /* Load the GOT register if needed. */
5286 if (crtl->uses_pic_offset_table)
5287 load_got_register ();
5288
5289 /* Advertise that the data calculated just above are now valid. */
5290 sparc_prologue_data_valid_p = true;
5291 }
5292
5293 /* This function generates the assembly code for function entry, which boils
5294 down to emitting the necessary .register directives. */
5295
5296 static void
5297 sparc_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5298 {
5299 /* Check that the assumption we made in sparc_expand_prologue is valid. */
5300 if (!TARGET_FLAT)
5301 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
5302
5303 sparc_output_scratch_registers (file);
5304 }
5305
5306 /* Expand the function epilogue, either normal or part of a sibcall.
5307 We emit all the instructions except the return or the call. */
5308
5309 void
5310 sparc_expand_epilogue (bool for_eh)
5311 {
5312 HOST_WIDE_INT size = sparc_frame_size;
5313
5314 if (sparc_n_global_fp_regs > 0)
5315 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5316 sparc_frame_base_offset
5317 - sparc_apparent_frame_size,
5318 SORR_RESTORE);
5319
5320 if (size == 0 || for_eh)
5321 ; /* do nothing. */
5322 else if (sparc_leaf_function_p)
5323 {
5324 if (size <= 4096)
5325 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5326 else if (size <= 8192)
5327 {
5328 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5329 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5330 }
5331 else
5332 {
5333 rtx reg = gen_rtx_REG (Pmode, 1);
5334 emit_move_insn (reg, GEN_INT (size));
5335 emit_insn (gen_stack_pointer_inc (reg));
5336 }
5337 }
5338 }
5339
5340 /* Expand the function epilogue, either normal or part of a sibcall.
5341 We emit all the instructions except the return or the call. */
5342
5343 void
5344 sparc_flat_expand_epilogue (bool for_eh)
5345 {
5346 HOST_WIDE_INT size = sparc_frame_size;
5347
5348 if (sparc_n_global_fp_regs > 0)
5349 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5350 sparc_frame_base_offset
5351 - sparc_apparent_frame_size,
5352 SORR_RESTORE);
5353
5354 /* If we have a frame pointer, we'll need both to restore it before the
5355 frame is destroyed and use its current value in destroying the frame.
5356 Since we don't have an atomic way to do that in the flat window model,
5357 we save the current value into a temporary register (%g1). */
5358 if (frame_pointer_needed && !for_eh)
5359 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
5360
5361 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5362 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
5363 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
5364
5365 if (sparc_save_local_in_regs_p)
5366 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
5367 sparc_frame_base_offset,
5368 SORR_RESTORE);
5369
5370 if (size == 0 || for_eh)
5371 ; /* do nothing. */
5372 else if (frame_pointer_needed)
5373 {
5374 /* Make sure the frame is destroyed after everything else is done. */
5375 emit_insn (gen_blockage ());
5376
5377 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
5378 }
5379 else
5380 {
5381 /* Likewise. */
5382 emit_insn (gen_blockage ());
5383
5384 if (size <= 4096)
5385 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5386 else if (size <= 8192)
5387 {
5388 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5389 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5390 }
5391 else
5392 {
5393 rtx reg = gen_rtx_REG (Pmode, 1);
5394 emit_move_insn (reg, GEN_INT (size));
5395 emit_insn (gen_stack_pointer_inc (reg));
5396 }
5397 }
5398 }
5399
5400 /* Return true if it is appropriate to emit `return' instructions in the
5401 body of a function. */
5402
5403 bool
5404 sparc_can_use_return_insn_p (void)
5405 {
5406 return sparc_prologue_data_valid_p
5407 && sparc_n_global_fp_regs == 0
5408 && TARGET_FLAT
5409 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
5410 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
5411 }
5412
5413 /* This function generates the assembly code for function exit. */
5414
5415 static void
5416 sparc_asm_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5417 {
5418 /* If the last two instructions of a function are "call foo; dslot;"
5419 the return address might point to the first instruction in the next
5420 function and we have to output a dummy nop for the sake of sane
5421 backtraces in such cases. This is pointless for sibling calls since
5422 the return address is explicitly adjusted. */
5423
5424 rtx insn, last_real_insn;
5425
5426 insn = get_last_insn ();
5427
5428 last_real_insn = prev_real_insn (insn);
5429 if (last_real_insn
5430 && GET_CODE (last_real_insn) == INSN
5431 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
5432 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
5433
5434 if (last_real_insn
5435 && CALL_P (last_real_insn)
5436 && !SIBLING_CALL_P (last_real_insn))
5437 fputs("\tnop\n", file);
5438
5439 sparc_output_deferred_case_vectors ();
5440 }
5441
5442 /* Output a 'restore' instruction. */
5443
5444 static void
5445 output_restore (rtx pat)
5446 {
5447 rtx operands[3];
5448
5449 if (! pat)
5450 {
5451 fputs ("\t restore\n", asm_out_file);
5452 return;
5453 }
5454
5455 gcc_assert (GET_CODE (pat) == SET);
5456
5457 operands[0] = SET_DEST (pat);
5458 pat = SET_SRC (pat);
5459
5460 switch (GET_CODE (pat))
5461 {
5462 case PLUS:
5463 operands[1] = XEXP (pat, 0);
5464 operands[2] = XEXP (pat, 1);
5465 output_asm_insn (" restore %r1, %2, %Y0", operands);
5466 break;
5467 case LO_SUM:
5468 operands[1] = XEXP (pat, 0);
5469 operands[2] = XEXP (pat, 1);
5470 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
5471 break;
5472 case ASHIFT:
5473 operands[1] = XEXP (pat, 0);
5474 gcc_assert (XEXP (pat, 1) == const1_rtx);
5475 output_asm_insn (" restore %r1, %r1, %Y0", operands);
5476 break;
5477 default:
5478 operands[1] = pat;
5479 output_asm_insn (" restore %%g0, %1, %Y0", operands);
5480 break;
5481 }
5482 }
5483
5484 /* Output a return. */
5485
5486 const char *
5487 output_return (rtx insn)
5488 {
5489 if (crtl->calls_eh_return)
5490 {
5491 /* If the function uses __builtin_eh_return, the eh_return
5492 machinery occupies the delay slot. */
5493 gcc_assert (!final_sequence);
5494
5495 if (flag_delayed_branch)
5496 {
5497 if (!TARGET_FLAT && TARGET_V9)
5498 fputs ("\treturn\t%i7+8\n", asm_out_file);
5499 else
5500 {
5501 if (!TARGET_FLAT)
5502 fputs ("\trestore\n", asm_out_file);
5503
5504 fputs ("\tjmp\t%o7+8\n", asm_out_file);
5505 }
5506
5507 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
5508 }
5509 else
5510 {
5511 if (!TARGET_FLAT)
5512 fputs ("\trestore\n", asm_out_file);
5513
5514 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
5515 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
5516 }
5517 }
5518 else if (sparc_leaf_function_p || TARGET_FLAT)
5519 {
5520 /* This is a leaf or flat function so we don't have to bother restoring
5521 the register window, which frees us from dealing with the convoluted
5522 semantics of restore/return. We simply output the jump to the
5523 return address and the insn in the delay slot (if any). */
5524
5525 return "jmp\t%%o7+%)%#";
5526 }
5527 else
5528 {
5529 /* This is a regular function so we have to restore the register window.
5530 We may have a pending insn for the delay slot, which will be either
5531 combined with the 'restore' instruction or put in the delay slot of
5532 the 'return' instruction. */
5533
5534 if (final_sequence)
5535 {
5536 rtx delay, pat;
5537
5538 delay = NEXT_INSN (insn);
5539 gcc_assert (delay);
5540
5541 pat = PATTERN (delay);
5542
5543 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
5544 {
5545 epilogue_renumber (&pat, 0);
5546 return "return\t%%i7+%)%#";
5547 }
5548 else
5549 {
5550 output_asm_insn ("jmp\t%%i7+%)", NULL);
5551 output_restore (pat);
5552 PATTERN (delay) = gen_blockage ();
5553 INSN_CODE (delay) = -1;
5554 }
5555 }
5556 else
5557 {
5558 /* The delay slot is empty. */
5559 if (TARGET_V9)
5560 return "return\t%%i7+%)\n\t nop";
5561 else if (flag_delayed_branch)
5562 return "jmp\t%%i7+%)\n\t restore";
5563 else
5564 return "restore\n\tjmp\t%%o7+%)\n\t nop";
5565 }
5566 }
5567
5568 return "";
5569 }
5570
5571 /* Output a sibling call. */
5572
5573 const char *
5574 output_sibcall (rtx insn, rtx call_operand)
5575 {
5576 rtx operands[1];
5577
5578 gcc_assert (flag_delayed_branch);
5579
5580 operands[0] = call_operand;
5581
5582 if (sparc_leaf_function_p || TARGET_FLAT)
5583 {
5584 /* This is a leaf or flat function so we don't have to bother restoring
5585 the register window. We simply output the jump to the function and
5586 the insn in the delay slot (if any). */
5587
5588 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
5589
5590 if (final_sequence)
5591 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
5592 operands);
5593 else
5594 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
5595 it into branch if possible. */
5596 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
5597 operands);
5598 }
5599 else
5600 {
5601 /* This is a regular function so we have to restore the register window.
5602 We may have a pending insn for the delay slot, which will be combined
5603 with the 'restore' instruction. */
5604
5605 output_asm_insn ("call\t%a0, 0", operands);
5606
5607 if (final_sequence)
5608 {
5609 rtx delay = NEXT_INSN (insn);
5610 gcc_assert (delay);
5611
5612 output_restore (PATTERN (delay));
5613
5614 PATTERN (delay) = gen_blockage ();
5615 INSN_CODE (delay) = -1;
5616 }
5617 else
5618 output_restore (NULL_RTX);
5619 }
5620
5621 return "";
5622 }
5623 \f
5624 /* Functions for handling argument passing.
5625
5626 For 32-bit, the first 6 args are normally in registers and the rest are
5627 pushed. Any arg that starts within the first 6 words is at least
5628 partially passed in a register unless its data type forbids.
5629
5630 For 64-bit, the argument registers are laid out as an array of 16 elements
5631 and arguments are added sequentially. The first 6 int args and up to the
5632 first 16 fp args (depending on size) are passed in regs.
5633
5634 Slot Stack Integral Float Float in structure Double Long Double
5635 ---- ----- -------- ----- ------------------ ------ -----------
5636 15 [SP+248] %f31 %f30,%f31 %d30
5637 14 [SP+240] %f29 %f28,%f29 %d28 %q28
5638 13 [SP+232] %f27 %f26,%f27 %d26
5639 12 [SP+224] %f25 %f24,%f25 %d24 %q24
5640 11 [SP+216] %f23 %f22,%f23 %d22
5641 10 [SP+208] %f21 %f20,%f21 %d20 %q20
5642 9 [SP+200] %f19 %f18,%f19 %d18
5643 8 [SP+192] %f17 %f16,%f17 %d16 %q16
5644 7 [SP+184] %f15 %f14,%f15 %d14
5645 6 [SP+176] %f13 %f12,%f13 %d12 %q12
5646 5 [SP+168] %o5 %f11 %f10,%f11 %d10
5647 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
5648 3 [SP+152] %o3 %f7 %f6,%f7 %d6
5649 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
5650 1 [SP+136] %o1 %f3 %f2,%f3 %d2
5651 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
5652
5653 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
5654
5655 Integral arguments are always passed as 64-bit quantities appropriately
5656 extended.
5657
5658 Passing of floating point values is handled as follows.
5659 If a prototype is in scope:
5660 If the value is in a named argument (i.e. not a stdarg function or a
5661 value not part of the `...') then the value is passed in the appropriate
5662 fp reg.
5663 If the value is part of the `...' and is passed in one of the first 6
5664 slots then the value is passed in the appropriate int reg.
5665 If the value is part of the `...' and is not passed in one of the first 6
5666 slots then the value is passed in memory.
5667 If a prototype is not in scope:
5668 If the value is one of the first 6 arguments the value is passed in the
5669 appropriate integer reg and the appropriate fp reg.
5670 If the value is not one of the first 6 arguments the value is passed in
5671 the appropriate fp reg and in memory.
5672
5673
5674 Summary of the calling conventions implemented by GCC on the SPARC:
5675
5676 32-bit ABI:
5677 size argument return value
5678
5679 small integer <4 int. reg. int. reg.
5680 word 4 int. reg. int. reg.
5681 double word 8 int. reg. int. reg.
5682
5683 _Complex small integer <8 int. reg. int. reg.
5684 _Complex word 8 int. reg. int. reg.
5685 _Complex double word 16 memory int. reg.
5686
5687 vector integer <=8 int. reg. FP reg.
5688 vector integer >8 memory memory
5689
5690 float 4 int. reg. FP reg.
5691 double 8 int. reg. FP reg.
5692 long double 16 memory memory
5693
5694 _Complex float 8 memory FP reg.
5695 _Complex double 16 memory FP reg.
5696 _Complex long double 32 memory FP reg.
5697
5698 vector float any memory memory
5699
5700 aggregate any memory memory
5701
5702
5703
5704 64-bit ABI:
5705 size argument return value
5706
5707 small integer <8 int. reg. int. reg.
5708 word 8 int. reg. int. reg.
5709 double word 16 int. reg. int. reg.
5710
5711 _Complex small integer <16 int. reg. int. reg.
5712 _Complex word 16 int. reg. int. reg.
5713 _Complex double word 32 memory int. reg.
5714
5715 vector integer <=16 FP reg. FP reg.
5716 vector integer 16<s<=32 memory FP reg.
5717 vector integer >32 memory memory
5718
5719 float 4 FP reg. FP reg.
5720 double 8 FP reg. FP reg.
5721 long double 16 FP reg. FP reg.
5722
5723 _Complex float 8 FP reg. FP reg.
5724 _Complex double 16 FP reg. FP reg.
5725 _Complex long double 32 memory FP reg.
5726
5727 vector float <=16 FP reg. FP reg.
5728 vector float 16<s<=32 memory FP reg.
5729 vector float >32 memory memory
5730
5731 aggregate <=16 reg. reg.
5732 aggregate 16<s<=32 memory reg.
5733 aggregate >32 memory memory
5734
5735
5736
5737 Note #1: complex floating-point types follow the extended SPARC ABIs as
5738 implemented by the Sun compiler.
5739
5740 Note #2: integral vector types follow the scalar floating-point types
5741 conventions to match what is implemented by the Sun VIS SDK.
5742
5743 Note #3: floating-point vector types follow the aggregate types
5744 conventions. */
5745
5746
5747 /* Maximum number of int regs for args. */
5748 #define SPARC_INT_ARG_MAX 6
5749 /* Maximum number of fp regs for args. */
5750 #define SPARC_FP_ARG_MAX 16
5751
5752 #define ROUND_ADVANCE(SIZE) (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
5753
5754 /* Handle the INIT_CUMULATIVE_ARGS macro.
5755 Initialize a variable CUM of type CUMULATIVE_ARGS
5756 for a call to a function whose data type is FNTYPE.
5757 For a library call, FNTYPE is 0. */
5758
5759 void
5760 init_cumulative_args (struct sparc_args *cum, tree fntype,
5761 rtx libname ATTRIBUTE_UNUSED,
5762 tree fndecl ATTRIBUTE_UNUSED)
5763 {
5764 cum->words = 0;
5765 cum->prototype_p = fntype && prototype_p (fntype);
5766 cum->libcall_p = fntype == 0;
5767 }
5768
5769 /* Handle promotion of pointer and integer arguments. */
5770
5771 static enum machine_mode
5772 sparc_promote_function_mode (const_tree type,
5773 enum machine_mode mode,
5774 int *punsignedp,
5775 const_tree fntype ATTRIBUTE_UNUSED,
5776 int for_return ATTRIBUTE_UNUSED)
5777 {
5778 if (type != NULL_TREE && POINTER_TYPE_P (type))
5779 {
5780 *punsignedp = POINTERS_EXTEND_UNSIGNED;
5781 return Pmode;
5782 }
5783
5784 /* Integral arguments are passed as full words, as per the ABI. */
5785 if (GET_MODE_CLASS (mode) == MODE_INT
5786 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5787 return word_mode;
5788
5789 return mode;
5790 }
5791
5792 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
5793
5794 static bool
5795 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
5796 {
5797 return TARGET_ARCH64 ? true : false;
5798 }
5799
5800 /* Scan the record type TYPE and return the following predicates:
5801 - INTREGS_P: the record contains at least one field or sub-field
5802 that is eligible for promotion in integer registers.
5803 - FP_REGS_P: the record contains at least one field or sub-field
5804 that is eligible for promotion in floating-point registers.
5805 - PACKED_P: the record contains at least one field that is packed.
5806
5807 Sub-fields are not taken into account for the PACKED_P predicate. */
5808
5809 static void
5810 scan_record_type (const_tree type, int *intregs_p, int *fpregs_p,
5811 int *packed_p)
5812 {
5813 tree field;
5814
5815 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5816 {
5817 if (TREE_CODE (field) == FIELD_DECL)
5818 {
5819 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
5820 scan_record_type (TREE_TYPE (field), intregs_p, fpregs_p, 0);
5821 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
5822 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
5823 && TARGET_FPU)
5824 *fpregs_p = 1;
5825 else
5826 *intregs_p = 1;
5827
5828 if (packed_p && DECL_PACKED (field))
5829 *packed_p = 1;
5830 }
5831 }
5832 }
5833
5834 /* Compute the slot number to pass an argument in.
5835 Return the slot number or -1 if passing on the stack.
5836
5837 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5838 the preceding args and about the function being called.
5839 MODE is the argument's machine mode.
5840 TYPE is the data type of the argument (as a tree).
5841 This is null for libcalls where that information may
5842 not be available.
5843 NAMED is nonzero if this argument is a named parameter
5844 (otherwise it is an extra parameter matching an ellipsis).
5845 INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
5846 *PREGNO records the register number to use if scalar type.
5847 *PPADDING records the amount of padding needed in words. */
5848
5849 static int
5850 function_arg_slotno (const struct sparc_args *cum, enum machine_mode mode,
5851 const_tree type, bool named, bool incoming_p,
5852 int *pregno, int *ppadding)
5853 {
5854 int regbase = (incoming_p
5855 ? SPARC_INCOMING_INT_ARG_FIRST
5856 : SPARC_OUTGOING_INT_ARG_FIRST);
5857 int slotno = cum->words;
5858 enum mode_class mclass;
5859 int regno;
5860
5861 *ppadding = 0;
5862
5863 if (type && TREE_ADDRESSABLE (type))
5864 return -1;
5865
5866 if (TARGET_ARCH32
5867 && mode == BLKmode
5868 && type
5869 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
5870 return -1;
5871
5872 /* For SPARC64, objects requiring 16-byte alignment get it. */
5873 if (TARGET_ARCH64
5874 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
5875 && (slotno & 1) != 0)
5876 slotno++, *ppadding = 1;
5877
5878 mclass = GET_MODE_CLASS (mode);
5879 if (type && TREE_CODE (type) == VECTOR_TYPE)
5880 {
5881 /* Vector types deserve special treatment because they are
5882 polymorphic wrt their mode, depending upon whether VIS
5883 instructions are enabled. */
5884 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5885 {
5886 /* The SPARC port defines no floating-point vector modes. */
5887 gcc_assert (mode == BLKmode);
5888 }
5889 else
5890 {
5891 /* Integral vector types should either have a vector
5892 mode or an integral mode, because we are guaranteed
5893 by pass_by_reference that their size is not greater
5894 than 16 bytes and TImode is 16-byte wide. */
5895 gcc_assert (mode != BLKmode);
5896
5897 /* Vector integers are handled like floats according to
5898 the Sun VIS SDK. */
5899 mclass = MODE_FLOAT;
5900 }
5901 }
5902
5903 switch (mclass)
5904 {
5905 case MODE_FLOAT:
5906 case MODE_COMPLEX_FLOAT:
5907 case MODE_VECTOR_INT:
5908 if (TARGET_ARCH64 && TARGET_FPU && named)
5909 {
5910 if (slotno >= SPARC_FP_ARG_MAX)
5911 return -1;
5912 regno = SPARC_FP_ARG_FIRST + slotno * 2;
5913 /* Arguments filling only one single FP register are
5914 right-justified in the outer double FP register. */
5915 if (GET_MODE_SIZE (mode) <= 4)
5916 regno++;
5917 break;
5918 }
5919 /* fallthrough */
5920
5921 case MODE_INT:
5922 case MODE_COMPLEX_INT:
5923 if (slotno >= SPARC_INT_ARG_MAX)
5924 return -1;
5925 regno = regbase + slotno;
5926 break;
5927
5928 case MODE_RANDOM:
5929 if (mode == VOIDmode)
5930 /* MODE is VOIDmode when generating the actual call. */
5931 return -1;
5932
5933 gcc_assert (mode == BLKmode);
5934
5935 if (TARGET_ARCH32
5936 || !type
5937 || (TREE_CODE (type) != VECTOR_TYPE
5938 && TREE_CODE (type) != RECORD_TYPE))
5939 {
5940 if (slotno >= SPARC_INT_ARG_MAX)
5941 return -1;
5942 regno = regbase + slotno;
5943 }
5944 else /* TARGET_ARCH64 && type */
5945 {
5946 int intregs_p = 0, fpregs_p = 0, packed_p = 0;
5947
5948 /* First see what kinds of registers we would need. */
5949 if (TREE_CODE (type) == VECTOR_TYPE)
5950 fpregs_p = 1;
5951 else
5952 scan_record_type (type, &intregs_p, &fpregs_p, &packed_p);
5953
5954 /* The ABI obviously doesn't specify how packed structures
5955 are passed. These are defined to be passed in int regs
5956 if possible, otherwise memory. */
5957 if (packed_p || !named)
5958 fpregs_p = 0, intregs_p = 1;
5959
5960 /* If all arg slots are filled, then must pass on stack. */
5961 if (fpregs_p && slotno >= SPARC_FP_ARG_MAX)
5962 return -1;
5963
5964 /* If there are only int args and all int arg slots are filled,
5965 then must pass on stack. */
5966 if (!fpregs_p && intregs_p && slotno >= SPARC_INT_ARG_MAX)
5967 return -1;
5968
5969 /* Note that even if all int arg slots are filled, fp members may
5970 still be passed in regs if such regs are available.
5971 *PREGNO isn't set because there may be more than one, it's up
5972 to the caller to compute them. */
5973 return slotno;
5974 }
5975 break;
5976
5977 default :
5978 gcc_unreachable ();
5979 }
5980
5981 *pregno = regno;
5982 return slotno;
5983 }
5984
5985 /* Handle recursive register counting for structure field layout. */
5986
5987 struct function_arg_record_value_parms
5988 {
5989 rtx ret; /* return expression being built. */
5990 int slotno; /* slot number of the argument. */
5991 int named; /* whether the argument is named. */
5992 int regbase; /* regno of the base register. */
5993 int stack; /* 1 if part of the argument is on the stack. */
5994 int intoffset; /* offset of the first pending integer field. */
5995 unsigned int nregs; /* number of words passed in registers. */
5996 };
5997
5998 static void function_arg_record_value_3
5999 (HOST_WIDE_INT, struct function_arg_record_value_parms *);
6000 static void function_arg_record_value_2
6001 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
6002 static void function_arg_record_value_1
6003 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
6004 static rtx function_arg_record_value (const_tree, enum machine_mode, int, int, int);
6005 static rtx function_arg_union_value (int, enum machine_mode, int, int);
6006
6007 /* A subroutine of function_arg_record_value. Traverse the structure
6008 recursively and determine how many registers will be required. */
6009
6010 static void
6011 function_arg_record_value_1 (const_tree type, HOST_WIDE_INT startbitpos,
6012 struct function_arg_record_value_parms *parms,
6013 bool packed_p)
6014 {
6015 tree field;
6016
6017 /* We need to compute how many registers are needed so we can
6018 allocate the PARALLEL but before we can do that we need to know
6019 whether there are any packed fields. The ABI obviously doesn't
6020 specify how structures are passed in this case, so they are
6021 defined to be passed in int regs if possible, otherwise memory,
6022 regardless of whether there are fp values present. */
6023
6024 if (! packed_p)
6025 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6026 {
6027 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6028 {
6029 packed_p = true;
6030 break;
6031 }
6032 }
6033
6034 /* Compute how many registers we need. */
6035 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6036 {
6037 if (TREE_CODE (field) == FIELD_DECL)
6038 {
6039 HOST_WIDE_INT bitpos = startbitpos;
6040
6041 if (DECL_SIZE (field) != 0)
6042 {
6043 if (integer_zerop (DECL_SIZE (field)))
6044 continue;
6045
6046 if (host_integerp (bit_position (field), 1))
6047 bitpos += int_bit_position (field);
6048 }
6049
6050 /* ??? FIXME: else assume zero offset. */
6051
6052 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6053 function_arg_record_value_1 (TREE_TYPE (field),
6054 bitpos,
6055 parms,
6056 packed_p);
6057 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6058 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6059 && TARGET_FPU
6060 && parms->named
6061 && ! packed_p)
6062 {
6063 if (parms->intoffset != -1)
6064 {
6065 unsigned int startbit, endbit;
6066 int intslots, this_slotno;
6067
6068 startbit = parms->intoffset & -BITS_PER_WORD;
6069 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6070
6071 intslots = (endbit - startbit) / BITS_PER_WORD;
6072 this_slotno = parms->slotno + parms->intoffset
6073 / BITS_PER_WORD;
6074
6075 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
6076 {
6077 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
6078 /* We need to pass this field on the stack. */
6079 parms->stack = 1;
6080 }
6081
6082 parms->nregs += intslots;
6083 parms->intoffset = -1;
6084 }
6085
6086 /* There's no need to check this_slotno < SPARC_FP_ARG MAX.
6087 If it wasn't true we wouldn't be here. */
6088 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
6089 && DECL_MODE (field) == BLKmode)
6090 parms->nregs += TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6091 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6092 parms->nregs += 2;
6093 else
6094 parms->nregs += 1;
6095 }
6096 else
6097 {
6098 if (parms->intoffset == -1)
6099 parms->intoffset = bitpos;
6100 }
6101 }
6102 }
6103 }
6104
6105 /* A subroutine of function_arg_record_value. Assign the bits of the
6106 structure between parms->intoffset and bitpos to integer registers. */
6107
6108 static void
6109 function_arg_record_value_3 (HOST_WIDE_INT bitpos,
6110 struct function_arg_record_value_parms *parms)
6111 {
6112 enum machine_mode mode;
6113 unsigned int regno;
6114 unsigned int startbit, endbit;
6115 int this_slotno, intslots, intoffset;
6116 rtx reg;
6117
6118 if (parms->intoffset == -1)
6119 return;
6120
6121 intoffset = parms->intoffset;
6122 parms->intoffset = -1;
6123
6124 startbit = intoffset & -BITS_PER_WORD;
6125 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6126 intslots = (endbit - startbit) / BITS_PER_WORD;
6127 this_slotno = parms->slotno + intoffset / BITS_PER_WORD;
6128
6129 intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
6130 if (intslots <= 0)
6131 return;
6132
6133 /* If this is the trailing part of a word, only load that much into
6134 the register. Otherwise load the whole register. Note that in
6135 the latter case we may pick up unwanted bits. It's not a problem
6136 at the moment but may wish to revisit. */
6137
6138 if (intoffset % BITS_PER_WORD != 0)
6139 mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
6140 MODE_INT);
6141 else
6142 mode = word_mode;
6143
6144 intoffset /= BITS_PER_UNIT;
6145 do
6146 {
6147 regno = parms->regbase + this_slotno;
6148 reg = gen_rtx_REG (mode, regno);
6149 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6150 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
6151
6152 this_slotno += 1;
6153 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
6154 mode = word_mode;
6155 parms->nregs += 1;
6156 intslots -= 1;
6157 }
6158 while (intslots > 0);
6159 }
6160
6161 /* A subroutine of function_arg_record_value. Traverse the structure
6162 recursively and assign bits to floating point registers. Track which
6163 bits in between need integer registers; invoke function_arg_record_value_3
6164 to make that happen. */
6165
6166 static void
6167 function_arg_record_value_2 (const_tree type, HOST_WIDE_INT startbitpos,
6168 struct function_arg_record_value_parms *parms,
6169 bool packed_p)
6170 {
6171 tree field;
6172
6173 if (! packed_p)
6174 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6175 {
6176 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6177 {
6178 packed_p = true;
6179 break;
6180 }
6181 }
6182
6183 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6184 {
6185 if (TREE_CODE (field) == FIELD_DECL)
6186 {
6187 HOST_WIDE_INT bitpos = startbitpos;
6188
6189 if (DECL_SIZE (field) != 0)
6190 {
6191 if (integer_zerop (DECL_SIZE (field)))
6192 continue;
6193
6194 if (host_integerp (bit_position (field), 1))
6195 bitpos += int_bit_position (field);
6196 }
6197
6198 /* ??? FIXME: else assume zero offset. */
6199
6200 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6201 function_arg_record_value_2 (TREE_TYPE (field),
6202 bitpos,
6203 parms,
6204 packed_p);
6205 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6206 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6207 && TARGET_FPU
6208 && parms->named
6209 && ! packed_p)
6210 {
6211 int this_slotno = parms->slotno + bitpos / BITS_PER_WORD;
6212 int regno, nregs, pos;
6213 enum machine_mode mode = DECL_MODE (field);
6214 rtx reg;
6215
6216 function_arg_record_value_3 (bitpos, parms);
6217
6218 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
6219 && mode == BLKmode)
6220 {
6221 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6222 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6223 }
6224 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6225 {
6226 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6227 nregs = 2;
6228 }
6229 else
6230 nregs = 1;
6231
6232 regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
6233 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
6234 regno++;
6235 reg = gen_rtx_REG (mode, regno);
6236 pos = bitpos / BITS_PER_UNIT;
6237 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6238 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6239 parms->nregs += 1;
6240 while (--nregs > 0)
6241 {
6242 regno += GET_MODE_SIZE (mode) / 4;
6243 reg = gen_rtx_REG (mode, regno);
6244 pos += GET_MODE_SIZE (mode);
6245 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6246 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6247 parms->nregs += 1;
6248 }
6249 }
6250 else
6251 {
6252 if (parms->intoffset == -1)
6253 parms->intoffset = bitpos;
6254 }
6255 }
6256 }
6257 }
6258
6259 /* Used by function_arg and sparc_function_value_1 to implement the complex
6260 conventions of the 64-bit ABI for passing and returning structures.
6261 Return an expression valid as a return value for the FUNCTION_ARG
6262 and TARGET_FUNCTION_VALUE.
6263
6264 TYPE is the data type of the argument (as a tree).
6265 This is null for libcalls where that information may
6266 not be available.
6267 MODE is the argument's machine mode.
6268 SLOTNO is the index number of the argument's slot in the parameter array.
6269 NAMED is nonzero if this argument is a named parameter
6270 (otherwise it is an extra parameter matching an ellipsis).
6271 REGBASE is the regno of the base register for the parameter array. */
6272
6273 static rtx
6274 function_arg_record_value (const_tree type, enum machine_mode mode,
6275 int slotno, int named, int regbase)
6276 {
6277 HOST_WIDE_INT typesize = int_size_in_bytes (type);
6278 struct function_arg_record_value_parms parms;
6279 unsigned int nregs;
6280
6281 parms.ret = NULL_RTX;
6282 parms.slotno = slotno;
6283 parms.named = named;
6284 parms.regbase = regbase;
6285 parms.stack = 0;
6286
6287 /* Compute how many registers we need. */
6288 parms.nregs = 0;
6289 parms.intoffset = 0;
6290 function_arg_record_value_1 (type, 0, &parms, false);
6291
6292 /* Take into account pending integer fields. */
6293 if (parms.intoffset != -1)
6294 {
6295 unsigned int startbit, endbit;
6296 int intslots, this_slotno;
6297
6298 startbit = parms.intoffset & -BITS_PER_WORD;
6299 endbit = (typesize*BITS_PER_UNIT + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6300 intslots = (endbit - startbit) / BITS_PER_WORD;
6301 this_slotno = slotno + parms.intoffset / BITS_PER_WORD;
6302
6303 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
6304 {
6305 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
6306 /* We need to pass this field on the stack. */
6307 parms.stack = 1;
6308 }
6309
6310 parms.nregs += intslots;
6311 }
6312 nregs = parms.nregs;
6313
6314 /* Allocate the vector and handle some annoying special cases. */
6315 if (nregs == 0)
6316 {
6317 /* ??? Empty structure has no value? Duh? */
6318 if (typesize <= 0)
6319 {
6320 /* Though there's nothing really to store, return a word register
6321 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
6322 leads to breakage due to the fact that there are zero bytes to
6323 load. */
6324 return gen_rtx_REG (mode, regbase);
6325 }
6326 else
6327 {
6328 /* ??? C++ has structures with no fields, and yet a size. Give up
6329 for now and pass everything back in integer registers. */
6330 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6331 }
6332 if (nregs + slotno > SPARC_INT_ARG_MAX)
6333 nregs = SPARC_INT_ARG_MAX - slotno;
6334 }
6335 gcc_assert (nregs != 0);
6336
6337 parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (parms.stack + nregs));
6338
6339 /* If at least one field must be passed on the stack, generate
6340 (parallel [(expr_list (nil) ...) ...]) so that all fields will
6341 also be passed on the stack. We can't do much better because the
6342 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
6343 of structures for which the fields passed exclusively in registers
6344 are not at the beginning of the structure. */
6345 if (parms.stack)
6346 XVECEXP (parms.ret, 0, 0)
6347 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6348
6349 /* Fill in the entries. */
6350 parms.nregs = 0;
6351 parms.intoffset = 0;
6352 function_arg_record_value_2 (type, 0, &parms, false);
6353 function_arg_record_value_3 (typesize * BITS_PER_UNIT, &parms);
6354
6355 gcc_assert (parms.nregs == nregs);
6356
6357 return parms.ret;
6358 }
6359
6360 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6361 of the 64-bit ABI for passing and returning unions.
6362 Return an expression valid as a return value for the FUNCTION_ARG
6363 and TARGET_FUNCTION_VALUE.
6364
6365 SIZE is the size in bytes of the union.
6366 MODE is the argument's machine mode.
6367 REGNO is the hard register the union will be passed in. */
6368
6369 static rtx
6370 function_arg_union_value (int size, enum machine_mode mode, int slotno,
6371 int regno)
6372 {
6373 int nwords = ROUND_ADVANCE (size), i;
6374 rtx regs;
6375
6376 /* See comment in previous function for empty structures. */
6377 if (nwords == 0)
6378 return gen_rtx_REG (mode, regno);
6379
6380 if (slotno == SPARC_INT_ARG_MAX - 1)
6381 nwords = 1;
6382
6383 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
6384
6385 for (i = 0; i < nwords; i++)
6386 {
6387 /* Unions are passed left-justified. */
6388 XVECEXP (regs, 0, i)
6389 = gen_rtx_EXPR_LIST (VOIDmode,
6390 gen_rtx_REG (word_mode, regno),
6391 GEN_INT (UNITS_PER_WORD * i));
6392 regno++;
6393 }
6394
6395 return regs;
6396 }
6397
6398 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6399 for passing and returning large (BLKmode) vectors.
6400 Return an expression valid as a return value for the FUNCTION_ARG
6401 and TARGET_FUNCTION_VALUE.
6402
6403 SIZE is the size in bytes of the vector (at least 8 bytes).
6404 REGNO is the FP hard register the vector will be passed in. */
6405
6406 static rtx
6407 function_arg_vector_value (int size, int regno)
6408 {
6409 int i, nregs = size / 8;
6410 rtx regs;
6411
6412 regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
6413
6414 for (i = 0; i < nregs; i++)
6415 {
6416 XVECEXP (regs, 0, i)
6417 = gen_rtx_EXPR_LIST (VOIDmode,
6418 gen_rtx_REG (DImode, regno + 2*i),
6419 GEN_INT (i*8));
6420 }
6421
6422 return regs;
6423 }
6424
6425 /* Determine where to put an argument to a function.
6426 Value is zero to push the argument on the stack,
6427 or a hard register in which to store the argument.
6428
6429 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6430 the preceding args and about the function being called.
6431 MODE is the argument's machine mode.
6432 TYPE is the data type of the argument (as a tree).
6433 This is null for libcalls where that information may
6434 not be available.
6435 NAMED is true if this argument is a named parameter
6436 (otherwise it is an extra parameter matching an ellipsis).
6437 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
6438 TARGET_FUNCTION_INCOMING_ARG. */
6439
6440 static rtx
6441 sparc_function_arg_1 (cumulative_args_t cum_v, enum machine_mode mode,
6442 const_tree type, bool named, bool incoming_p)
6443 {
6444 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6445
6446 int regbase = (incoming_p
6447 ? SPARC_INCOMING_INT_ARG_FIRST
6448 : SPARC_OUTGOING_INT_ARG_FIRST);
6449 int slotno, regno, padding;
6450 enum mode_class mclass = GET_MODE_CLASS (mode);
6451
6452 slotno = function_arg_slotno (cum, mode, type, named, incoming_p,
6453 &regno, &padding);
6454 if (slotno == -1)
6455 return 0;
6456
6457 /* Vector types deserve special treatment because they are polymorphic wrt
6458 their mode, depending upon whether VIS instructions are enabled. */
6459 if (type && TREE_CODE (type) == VECTOR_TYPE)
6460 {
6461 HOST_WIDE_INT size = int_size_in_bytes (type);
6462 gcc_assert ((TARGET_ARCH32 && size <= 8)
6463 || (TARGET_ARCH64 && size <= 16));
6464
6465 if (mode == BLKmode)
6466 return function_arg_vector_value (size,
6467 SPARC_FP_ARG_FIRST + 2*slotno);
6468 else
6469 mclass = MODE_FLOAT;
6470 }
6471
6472 if (TARGET_ARCH32)
6473 return gen_rtx_REG (mode, regno);
6474
6475 /* Structures up to 16 bytes in size are passed in arg slots on the stack
6476 and are promoted to registers if possible. */
6477 if (type && TREE_CODE (type) == RECORD_TYPE)
6478 {
6479 HOST_WIDE_INT size = int_size_in_bytes (type);
6480 gcc_assert (size <= 16);
6481
6482 return function_arg_record_value (type, mode, slotno, named, regbase);
6483 }
6484
6485 /* Unions up to 16 bytes in size are passed in integer registers. */
6486 else if (type && TREE_CODE (type) == UNION_TYPE)
6487 {
6488 HOST_WIDE_INT size = int_size_in_bytes (type);
6489 gcc_assert (size <= 16);
6490
6491 return function_arg_union_value (size, mode, slotno, regno);
6492 }
6493
6494 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
6495 but also have the slot allocated for them.
6496 If no prototype is in scope fp values in register slots get passed
6497 in two places, either fp regs and int regs or fp regs and memory. */
6498 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
6499 && SPARC_FP_REG_P (regno))
6500 {
6501 rtx reg = gen_rtx_REG (mode, regno);
6502 if (cum->prototype_p || cum->libcall_p)
6503 {
6504 /* "* 2" because fp reg numbers are recorded in 4 byte
6505 quantities. */
6506 #if 0
6507 /* ??? This will cause the value to be passed in the fp reg and
6508 in the stack. When a prototype exists we want to pass the
6509 value in the reg but reserve space on the stack. That's an
6510 optimization, and is deferred [for a bit]. */
6511 if ((regno - SPARC_FP_ARG_FIRST) >= SPARC_INT_ARG_MAX * 2)
6512 return gen_rtx_PARALLEL (mode,
6513 gen_rtvec (2,
6514 gen_rtx_EXPR_LIST (VOIDmode,
6515 NULL_RTX, const0_rtx),
6516 gen_rtx_EXPR_LIST (VOIDmode,
6517 reg, const0_rtx)));
6518 else
6519 #else
6520 /* ??? It seems that passing back a register even when past
6521 the area declared by REG_PARM_STACK_SPACE will allocate
6522 space appropriately, and will not copy the data onto the
6523 stack, exactly as we desire.
6524
6525 This is due to locate_and_pad_parm being called in
6526 expand_call whenever reg_parm_stack_space > 0, which
6527 while beneficial to our example here, would seem to be
6528 in error from what had been intended. Ho hum... -- r~ */
6529 #endif
6530 return reg;
6531 }
6532 else
6533 {
6534 rtx v0, v1;
6535
6536 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
6537 {
6538 int intreg;
6539
6540 /* On incoming, we don't need to know that the value
6541 is passed in %f0 and %i0, and it confuses other parts
6542 causing needless spillage even on the simplest cases. */
6543 if (incoming_p)
6544 return reg;
6545
6546 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
6547 + (regno - SPARC_FP_ARG_FIRST) / 2);
6548
6549 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6550 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
6551 const0_rtx);
6552 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6553 }
6554 else
6555 {
6556 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6557 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6558 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6559 }
6560 }
6561 }
6562
6563 /* All other aggregate types are passed in an integer register in a mode
6564 corresponding to the size of the type. */
6565 else if (type && AGGREGATE_TYPE_P (type))
6566 {
6567 HOST_WIDE_INT size = int_size_in_bytes (type);
6568 gcc_assert (size <= 16);
6569
6570 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6571 }
6572
6573 return gen_rtx_REG (mode, regno);
6574 }
6575
6576 /* Handle the TARGET_FUNCTION_ARG target hook. */
6577
6578 static rtx
6579 sparc_function_arg (cumulative_args_t cum, enum machine_mode mode,
6580 const_tree type, bool named)
6581 {
6582 return sparc_function_arg_1 (cum, mode, type, named, false);
6583 }
6584
6585 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
6586
6587 static rtx
6588 sparc_function_incoming_arg (cumulative_args_t cum, enum machine_mode mode,
6589 const_tree type, bool named)
6590 {
6591 return sparc_function_arg_1 (cum, mode, type, named, true);
6592 }
6593
6594 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
6595
6596 static unsigned int
6597 sparc_function_arg_boundary (enum machine_mode mode, const_tree type)
6598 {
6599 return ((TARGET_ARCH64
6600 && (GET_MODE_ALIGNMENT (mode) == 128
6601 || (type && TYPE_ALIGN (type) == 128)))
6602 ? 128
6603 : PARM_BOUNDARY);
6604 }
6605
6606 /* For an arg passed partly in registers and partly in memory,
6607 this is the number of bytes of registers used.
6608 For args passed entirely in registers or entirely in memory, zero.
6609
6610 Any arg that starts in the first 6 regs but won't entirely fit in them
6611 needs partial registers on v8. On v9, structures with integer
6612 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
6613 values that begin in the last fp reg [where "last fp reg" varies with the
6614 mode] will be split between that reg and memory. */
6615
6616 static int
6617 sparc_arg_partial_bytes (cumulative_args_t cum, enum machine_mode mode,
6618 tree type, bool named)
6619 {
6620 int slotno, regno, padding;
6621
6622 /* We pass false for incoming_p here, it doesn't matter. */
6623 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
6624 false, &regno, &padding);
6625
6626 if (slotno == -1)
6627 return 0;
6628
6629 if (TARGET_ARCH32)
6630 {
6631 if ((slotno + (mode == BLKmode
6632 ? ROUND_ADVANCE (int_size_in_bytes (type))
6633 : ROUND_ADVANCE (GET_MODE_SIZE (mode))))
6634 > SPARC_INT_ARG_MAX)
6635 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
6636 }
6637 else
6638 {
6639 /* We are guaranteed by pass_by_reference that the size of the
6640 argument is not greater than 16 bytes, so we only need to return
6641 one word if the argument is partially passed in registers. */
6642
6643 if (type && AGGREGATE_TYPE_P (type))
6644 {
6645 int size = int_size_in_bytes (type);
6646
6647 if (size > UNITS_PER_WORD
6648 && slotno == SPARC_INT_ARG_MAX - 1)
6649 return UNITS_PER_WORD;
6650 }
6651 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
6652 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6653 && ! (TARGET_FPU && named)))
6654 {
6655 /* The complex types are passed as packed types. */
6656 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
6657 && slotno == SPARC_INT_ARG_MAX - 1)
6658 return UNITS_PER_WORD;
6659 }
6660 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6661 {
6662 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
6663 > SPARC_FP_ARG_MAX)
6664 return UNITS_PER_WORD;
6665 }
6666 }
6667
6668 return 0;
6669 }
6670
6671 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
6672 Specify whether to pass the argument by reference. */
6673
6674 static bool
6675 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6676 enum machine_mode mode, const_tree type,
6677 bool named ATTRIBUTE_UNUSED)
6678 {
6679 if (TARGET_ARCH32)
6680 /* Original SPARC 32-bit ABI says that structures and unions,
6681 and quad-precision floats are passed by reference. For Pascal,
6682 also pass arrays by reference. All other base types are passed
6683 in registers.
6684
6685 Extended ABI (as implemented by the Sun compiler) says that all
6686 complex floats are passed by reference. Pass complex integers
6687 in registers up to 8 bytes. More generally, enforce the 2-word
6688 cap for passing arguments in registers.
6689
6690 Vector ABI (as implemented by the Sun VIS SDK) says that vector
6691 integers are passed like floats of the same size, that is in
6692 registers up to 8 bytes. Pass all vector floats by reference
6693 like structure and unions. */
6694 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
6695 || mode == SCmode
6696 /* Catch CDImode, TFmode, DCmode and TCmode. */
6697 || GET_MODE_SIZE (mode) > 8
6698 || (type
6699 && TREE_CODE (type) == VECTOR_TYPE
6700 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
6701 else
6702 /* Original SPARC 64-bit ABI says that structures and unions
6703 smaller than 16 bytes are passed in registers, as well as
6704 all other base types.
6705
6706 Extended ABI (as implemented by the Sun compiler) says that
6707 complex floats are passed in registers up to 16 bytes. Pass
6708 all complex integers in registers up to 16 bytes. More generally,
6709 enforce the 2-word cap for passing arguments in registers.
6710
6711 Vector ABI (as implemented by the Sun VIS SDK) says that vector
6712 integers are passed like floats of the same size, that is in
6713 registers (up to 16 bytes). Pass all vector floats like structure
6714 and unions. */
6715 return ((type
6716 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
6717 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
6718 /* Catch CTImode and TCmode. */
6719 || GET_MODE_SIZE (mode) > 16);
6720 }
6721
6722 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
6723 Update the data in CUM to advance over an argument
6724 of mode MODE and data type TYPE.
6725 TYPE is null for libcalls where that information may not be available. */
6726
6727 static void
6728 sparc_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
6729 const_tree type, bool named)
6730 {
6731 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6732 int regno, padding;
6733
6734 /* We pass false for incoming_p here, it doesn't matter. */
6735 function_arg_slotno (cum, mode, type, named, false, &regno, &padding);
6736
6737 /* If argument requires leading padding, add it. */
6738 cum->words += padding;
6739
6740 if (TARGET_ARCH32)
6741 {
6742 cum->words += (mode != BLKmode
6743 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
6744 : ROUND_ADVANCE (int_size_in_bytes (type)));
6745 }
6746 else
6747 {
6748 if (type && AGGREGATE_TYPE_P (type))
6749 {
6750 int size = int_size_in_bytes (type);
6751
6752 if (size <= 8)
6753 ++cum->words;
6754 else if (size <= 16)
6755 cum->words += 2;
6756 else /* passed by reference */
6757 ++cum->words;
6758 }
6759 else
6760 {
6761 cum->words += (mode != BLKmode
6762 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
6763 : ROUND_ADVANCE (int_size_in_bytes (type)));
6764 }
6765 }
6766 }
6767
6768 /* Handle the FUNCTION_ARG_PADDING macro.
6769 For the 64 bit ABI structs are always stored left shifted in their
6770 argument slot. */
6771
6772 enum direction
6773 function_arg_padding (enum machine_mode mode, const_tree type)
6774 {
6775 if (TARGET_ARCH64 && type != 0 && AGGREGATE_TYPE_P (type))
6776 return upward;
6777
6778 /* Fall back to the default. */
6779 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
6780 }
6781
6782 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
6783 Specify whether to return the return value in memory. */
6784
6785 static bool
6786 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6787 {
6788 if (TARGET_ARCH32)
6789 /* Original SPARC 32-bit ABI says that structures and unions,
6790 and quad-precision floats are returned in memory. All other
6791 base types are returned in registers.
6792
6793 Extended ABI (as implemented by the Sun compiler) says that
6794 all complex floats are returned in registers (8 FP registers
6795 at most for '_Complex long double'). Return all complex integers
6796 in registers (4 at most for '_Complex long long').
6797
6798 Vector ABI (as implemented by the Sun VIS SDK) says that vector
6799 integers are returned like floats of the same size, that is in
6800 registers up to 8 bytes and in memory otherwise. Return all
6801 vector floats in memory like structure and unions; note that
6802 they always have BLKmode like the latter. */
6803 return (TYPE_MODE (type) == BLKmode
6804 || TYPE_MODE (type) == TFmode
6805 || (TREE_CODE (type) == VECTOR_TYPE
6806 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
6807 else
6808 /* Original SPARC 64-bit ABI says that structures and unions
6809 smaller than 32 bytes are returned in registers, as well as
6810 all other base types.
6811
6812 Extended ABI (as implemented by the Sun compiler) says that all
6813 complex floats are returned in registers (8 FP registers at most
6814 for '_Complex long double'). Return all complex integers in
6815 registers (4 at most for '_Complex TItype').
6816
6817 Vector ABI (as implemented by the Sun VIS SDK) says that vector
6818 integers are returned like floats of the same size, that is in
6819 registers. Return all vector floats like structure and unions;
6820 note that they always have BLKmode like the latter. */
6821 return (TYPE_MODE (type) == BLKmode
6822 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
6823 }
6824
6825 /* Handle the TARGET_STRUCT_VALUE target hook.
6826 Return where to find the structure return value address. */
6827
6828 static rtx
6829 sparc_struct_value_rtx (tree fndecl, int incoming)
6830 {
6831 if (TARGET_ARCH64)
6832 return 0;
6833 else
6834 {
6835 rtx mem;
6836
6837 if (incoming)
6838 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
6839 STRUCT_VALUE_OFFSET));
6840 else
6841 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
6842 STRUCT_VALUE_OFFSET));
6843
6844 /* Only follow the SPARC ABI for fixed-size structure returns.
6845 Variable size structure returns are handled per the normal
6846 procedures in GCC. This is enabled by -mstd-struct-return */
6847 if (incoming == 2
6848 && sparc_std_struct_return
6849 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
6850 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
6851 {
6852 /* We must check and adjust the return address, as it is
6853 optional as to whether the return object is really
6854 provided. */
6855 rtx ret_reg = gen_rtx_REG (Pmode, 31);
6856 rtx scratch = gen_reg_rtx (SImode);
6857 rtx endlab = gen_label_rtx ();
6858
6859 /* Calculate the return object size */
6860 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
6861 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
6862 /* Construct a temporary return value */
6863 rtx temp_val
6864 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
6865
6866 /* Implement SPARC 32-bit psABI callee return struct checking:
6867
6868 Fetch the instruction where we will return to and see if
6869 it's an unimp instruction (the most significant 10 bits
6870 will be zero). */
6871 emit_move_insn (scratch, gen_rtx_MEM (SImode,
6872 plus_constant (Pmode,
6873 ret_reg, 8)));
6874 /* Assume the size is valid and pre-adjust */
6875 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
6876 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
6877 0, endlab);
6878 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
6879 /* Write the address of the memory pointed to by temp_val into
6880 the memory pointed to by mem */
6881 emit_move_insn (mem, XEXP (temp_val, 0));
6882 emit_label (endlab);
6883 }
6884
6885 return mem;
6886 }
6887 }
6888
6889 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
6890 For v9, function return values are subject to the same rules as arguments,
6891 except that up to 32 bytes may be returned in registers. */
6892
6893 static rtx
6894 sparc_function_value_1 (const_tree type, enum machine_mode mode,
6895 bool outgoing)
6896 {
6897 /* Beware that the two values are swapped here wrt function_arg. */
6898 int regbase = (outgoing
6899 ? SPARC_INCOMING_INT_ARG_FIRST
6900 : SPARC_OUTGOING_INT_ARG_FIRST);
6901 enum mode_class mclass = GET_MODE_CLASS (mode);
6902 int regno;
6903
6904 /* Vector types deserve special treatment because they are polymorphic wrt
6905 their mode, depending upon whether VIS instructions are enabled. */
6906 if (type && TREE_CODE (type) == VECTOR_TYPE)
6907 {
6908 HOST_WIDE_INT size = int_size_in_bytes (type);
6909 gcc_assert ((TARGET_ARCH32 && size <= 8)
6910 || (TARGET_ARCH64 && size <= 32));
6911
6912 if (mode == BLKmode)
6913 return function_arg_vector_value (size,
6914 SPARC_FP_ARG_FIRST);
6915 else
6916 mclass = MODE_FLOAT;
6917 }
6918
6919 if (TARGET_ARCH64 && type)
6920 {
6921 /* Structures up to 32 bytes in size are returned in registers. */
6922 if (TREE_CODE (type) == RECORD_TYPE)
6923 {
6924 HOST_WIDE_INT size = int_size_in_bytes (type);
6925 gcc_assert (size <= 32);
6926
6927 return function_arg_record_value (type, mode, 0, 1, regbase);
6928 }
6929
6930 /* Unions up to 32 bytes in size are returned in integer registers. */
6931 else if (TREE_CODE (type) == UNION_TYPE)
6932 {
6933 HOST_WIDE_INT size = int_size_in_bytes (type);
6934 gcc_assert (size <= 32);
6935
6936 return function_arg_union_value (size, mode, 0, regbase);
6937 }
6938
6939 /* Objects that require it are returned in FP registers. */
6940 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
6941 ;
6942
6943 /* All other aggregate types are returned in an integer register in a
6944 mode corresponding to the size of the type. */
6945 else if (AGGREGATE_TYPE_P (type))
6946 {
6947 /* All other aggregate types are passed in an integer register
6948 in a mode corresponding to the size of the type. */
6949 HOST_WIDE_INT size = int_size_in_bytes (type);
6950 gcc_assert (size <= 32);
6951
6952 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6953
6954 /* ??? We probably should have made the same ABI change in
6955 3.4.0 as the one we made for unions. The latter was
6956 required by the SCD though, while the former is not
6957 specified, so we favored compatibility and efficiency.
6958
6959 Now we're stuck for aggregates larger than 16 bytes,
6960 because OImode vanished in the meantime. Let's not
6961 try to be unduly clever, and simply follow the ABI
6962 for unions in that case. */
6963 if (mode == BLKmode)
6964 return function_arg_union_value (size, mode, 0, regbase);
6965 else
6966 mclass = MODE_INT;
6967 }
6968
6969 /* We should only have pointer and integer types at this point. This
6970 must match sparc_promote_function_mode. */
6971 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6972 mode = word_mode;
6973 }
6974
6975 /* We should only have pointer and integer types at this point. This must
6976 match sparc_promote_function_mode. */
6977 else if (TARGET_ARCH32
6978 && mclass == MODE_INT
6979 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6980 mode = word_mode;
6981
6982 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
6983 regno = SPARC_FP_ARG_FIRST;
6984 else
6985 regno = regbase;
6986
6987 return gen_rtx_REG (mode, regno);
6988 }
6989
6990 /* Handle TARGET_FUNCTION_VALUE.
6991 On the SPARC, the value is found in the first "output" register, but the
6992 called function leaves it in the first "input" register. */
6993
6994 static rtx
6995 sparc_function_value (const_tree valtype,
6996 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
6997 bool outgoing)
6998 {
6999 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7000 }
7001
7002 /* Handle TARGET_LIBCALL_VALUE. */
7003
7004 static rtx
7005 sparc_libcall_value (enum machine_mode mode,
7006 const_rtx fun ATTRIBUTE_UNUSED)
7007 {
7008 return sparc_function_value_1 (NULL_TREE, mode, false);
7009 }
7010
7011 /* Handle FUNCTION_VALUE_REGNO_P.
7012 On the SPARC, the first "output" reg is used for integer values, and the
7013 first floating point register is used for floating point values. */
7014
7015 static bool
7016 sparc_function_value_regno_p (const unsigned int regno)
7017 {
7018 return (regno == 8 || regno == 32);
7019 }
7020
7021 /* Do what is necessary for `va_start'. We look at the current function
7022 to determine if stdarg or varargs is used and return the address of
7023 the first unnamed parameter. */
7024
7025 static rtx
7026 sparc_builtin_saveregs (void)
7027 {
7028 int first_reg = crtl->args.info.words;
7029 rtx address;
7030 int regno;
7031
7032 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7033 emit_move_insn (gen_rtx_MEM (word_mode,
7034 gen_rtx_PLUS (Pmode,
7035 frame_pointer_rtx,
7036 GEN_INT (FIRST_PARM_OFFSET (0)
7037 + (UNITS_PER_WORD
7038 * regno)))),
7039 gen_rtx_REG (word_mode,
7040 SPARC_INCOMING_INT_ARG_FIRST + regno));
7041
7042 address = gen_rtx_PLUS (Pmode,
7043 frame_pointer_rtx,
7044 GEN_INT (FIRST_PARM_OFFSET (0)
7045 + UNITS_PER_WORD * first_reg));
7046
7047 return address;
7048 }
7049
7050 /* Implement `va_start' for stdarg. */
7051
7052 static void
7053 sparc_va_start (tree valist, rtx nextarg)
7054 {
7055 nextarg = expand_builtin_saveregs ();
7056 std_expand_builtin_va_start (valist, nextarg);
7057 }
7058
7059 /* Implement `va_arg' for stdarg. */
7060
7061 static tree
7062 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7063 gimple_seq *post_p)
7064 {
7065 HOST_WIDE_INT size, rsize, align;
7066 tree addr, incr;
7067 bool indirect;
7068 tree ptrtype = build_pointer_type (type);
7069
7070 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7071 {
7072 indirect = true;
7073 size = rsize = UNITS_PER_WORD;
7074 align = 0;
7075 }
7076 else
7077 {
7078 indirect = false;
7079 size = int_size_in_bytes (type);
7080 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7081 align = 0;
7082
7083 if (TARGET_ARCH64)
7084 {
7085 /* For SPARC64, objects requiring 16-byte alignment get it. */
7086 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7087 align = 2 * UNITS_PER_WORD;
7088
7089 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7090 are left-justified in their slots. */
7091 if (AGGREGATE_TYPE_P (type))
7092 {
7093 if (size == 0)
7094 size = rsize = UNITS_PER_WORD;
7095 else
7096 size = rsize;
7097 }
7098 }
7099 }
7100
7101 incr = valist;
7102 if (align)
7103 {
7104 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7105 incr = fold_convert (sizetype, incr);
7106 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7107 size_int (-align));
7108 incr = fold_convert (ptr_type_node, incr);
7109 }
7110
7111 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7112 addr = incr;
7113
7114 if (BYTES_BIG_ENDIAN && size < rsize)
7115 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7116
7117 if (indirect)
7118 {
7119 addr = fold_convert (build_pointer_type (ptrtype), addr);
7120 addr = build_va_arg_indirect_ref (addr);
7121 }
7122
7123 /* If the address isn't aligned properly for the type, we need a temporary.
7124 FIXME: This is inefficient, usually we can do this in registers. */
7125 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7126 {
7127 tree tmp = create_tmp_var (type, "va_arg_tmp");
7128 tree dest_addr = build_fold_addr_expr (tmp);
7129 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7130 3, dest_addr, addr, size_int (rsize));
7131 TREE_ADDRESSABLE (tmp) = 1;
7132 gimplify_and_add (copy, pre_p);
7133 addr = dest_addr;
7134 }
7135
7136 else
7137 addr = fold_convert (ptrtype, addr);
7138
7139 incr = fold_build_pointer_plus_hwi (incr, rsize);
7140 gimplify_assign (valist, incr, post_p);
7141
7142 return build_va_arg_indirect_ref (addr);
7143 }
7144 \f
7145 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
7146 Specify whether the vector mode is supported by the hardware. */
7147
7148 static bool
7149 sparc_vector_mode_supported_p (enum machine_mode mode)
7150 {
7151 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
7152 }
7153 \f
7154 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
7155
7156 static enum machine_mode
7157 sparc_preferred_simd_mode (enum machine_mode mode)
7158 {
7159 if (TARGET_VIS)
7160 switch (mode)
7161 {
7162 case SImode:
7163 return V2SImode;
7164 case HImode:
7165 return V4HImode;
7166 case QImode:
7167 return V8QImode;
7168
7169 default:;
7170 }
7171
7172 return word_mode;
7173 }
7174 \f
7175 /* Return the string to output an unconditional branch to LABEL, which is
7176 the operand number of the label.
7177
7178 DEST is the destination insn (i.e. the label), INSN is the source. */
7179
7180 const char *
7181 output_ubranch (rtx dest, rtx insn)
7182 {
7183 static char string[64];
7184 bool v9_form = false;
7185 int delta;
7186 char *p;
7187
7188 /* Even if we are trying to use cbcond for this, evaluate
7189 whether we can use V9 branches as our backup plan. */
7190
7191 delta = 5000000;
7192 if (INSN_ADDRESSES_SET_P ())
7193 delta = (INSN_ADDRESSES (INSN_UID (dest))
7194 - INSN_ADDRESSES (INSN_UID (insn)));
7195
7196 /* Leave some instructions for "slop". */
7197 if (TARGET_V9 && delta >= -260000 && delta < 260000)
7198 v9_form = true;
7199
7200 if (TARGET_CBCOND)
7201 {
7202 bool emit_nop = emit_cbcond_nop (insn);
7203 bool far = false;
7204 const char *rval;
7205
7206 if (delta < -500 || delta > 500)
7207 far = true;
7208
7209 if (far)
7210 {
7211 if (v9_form)
7212 rval = "ba,a,pt\t%%xcc, %l0";
7213 else
7214 rval = "b,a\t%l0";
7215 }
7216 else
7217 {
7218 if (emit_nop)
7219 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
7220 else
7221 rval = "cwbe\t%%g0, %%g0, %l0";
7222 }
7223 return rval;
7224 }
7225
7226 if (v9_form)
7227 strcpy (string, "ba%*,pt\t%%xcc, ");
7228 else
7229 strcpy (string, "b%*\t");
7230
7231 p = strchr (string, '\0');
7232 *p++ = '%';
7233 *p++ = 'l';
7234 *p++ = '0';
7235 *p++ = '%';
7236 *p++ = '(';
7237 *p = '\0';
7238
7239 return string;
7240 }
7241
7242 /* Return the string to output a conditional branch to LABEL, which is
7243 the operand number of the label. OP is the conditional expression.
7244 XEXP (OP, 0) is assumed to be a condition code register (integer or
7245 floating point) and its mode specifies what kind of comparison we made.
7246
7247 DEST is the destination insn (i.e. the label), INSN is the source.
7248
7249 REVERSED is nonzero if we should reverse the sense of the comparison.
7250
7251 ANNUL is nonzero if we should generate an annulling branch. */
7252
7253 const char *
7254 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
7255 rtx insn)
7256 {
7257 static char string[64];
7258 enum rtx_code code = GET_CODE (op);
7259 rtx cc_reg = XEXP (op, 0);
7260 enum machine_mode mode = GET_MODE (cc_reg);
7261 const char *labelno, *branch;
7262 int spaces = 8, far;
7263 char *p;
7264
7265 /* v9 branches are limited to +-1MB. If it is too far away,
7266 change
7267
7268 bne,pt %xcc, .LC30
7269
7270 to
7271
7272 be,pn %xcc, .+12
7273 nop
7274 ba .LC30
7275
7276 and
7277
7278 fbne,a,pn %fcc2, .LC29
7279
7280 to
7281
7282 fbe,pt %fcc2, .+16
7283 nop
7284 ba .LC29 */
7285
7286 far = TARGET_V9 && (get_attr_length (insn) >= 3);
7287 if (reversed ^ far)
7288 {
7289 /* Reversal of FP compares takes care -- an ordered compare
7290 becomes an unordered compare and vice versa. */
7291 if (mode == CCFPmode || mode == CCFPEmode)
7292 code = reverse_condition_maybe_unordered (code);
7293 else
7294 code = reverse_condition (code);
7295 }
7296
7297 /* Start by writing the branch condition. */
7298 if (mode == CCFPmode || mode == CCFPEmode)
7299 {
7300 switch (code)
7301 {
7302 case NE:
7303 branch = "fbne";
7304 break;
7305 case EQ:
7306 branch = "fbe";
7307 break;
7308 case GE:
7309 branch = "fbge";
7310 break;
7311 case GT:
7312 branch = "fbg";
7313 break;
7314 case LE:
7315 branch = "fble";
7316 break;
7317 case LT:
7318 branch = "fbl";
7319 break;
7320 case UNORDERED:
7321 branch = "fbu";
7322 break;
7323 case ORDERED:
7324 branch = "fbo";
7325 break;
7326 case UNGT:
7327 branch = "fbug";
7328 break;
7329 case UNLT:
7330 branch = "fbul";
7331 break;
7332 case UNEQ:
7333 branch = "fbue";
7334 break;
7335 case UNGE:
7336 branch = "fbuge";
7337 break;
7338 case UNLE:
7339 branch = "fbule";
7340 break;
7341 case LTGT:
7342 branch = "fblg";
7343 break;
7344
7345 default:
7346 gcc_unreachable ();
7347 }
7348
7349 /* ??? !v9: FP branches cannot be preceded by another floating point
7350 insn. Because there is currently no concept of pre-delay slots,
7351 we can fix this only by always emitting a nop before a floating
7352 point branch. */
7353
7354 string[0] = '\0';
7355 if (! TARGET_V9)
7356 strcpy (string, "nop\n\t");
7357 strcat (string, branch);
7358 }
7359 else
7360 {
7361 switch (code)
7362 {
7363 case NE:
7364 branch = "bne";
7365 break;
7366 case EQ:
7367 branch = "be";
7368 break;
7369 case GE:
7370 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7371 branch = "bpos";
7372 else
7373 branch = "bge";
7374 break;
7375 case GT:
7376 branch = "bg";
7377 break;
7378 case LE:
7379 branch = "ble";
7380 break;
7381 case LT:
7382 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7383 branch = "bneg";
7384 else
7385 branch = "bl";
7386 break;
7387 case GEU:
7388 branch = "bgeu";
7389 break;
7390 case GTU:
7391 branch = "bgu";
7392 break;
7393 case LEU:
7394 branch = "bleu";
7395 break;
7396 case LTU:
7397 branch = "blu";
7398 break;
7399
7400 default:
7401 gcc_unreachable ();
7402 }
7403 strcpy (string, branch);
7404 }
7405 spaces -= strlen (branch);
7406 p = strchr (string, '\0');
7407
7408 /* Now add the annulling, the label, and a possible noop. */
7409 if (annul && ! far)
7410 {
7411 strcpy (p, ",a");
7412 p += 2;
7413 spaces -= 2;
7414 }
7415
7416 if (TARGET_V9)
7417 {
7418 rtx note;
7419 int v8 = 0;
7420
7421 if (! far && insn && INSN_ADDRESSES_SET_P ())
7422 {
7423 int delta = (INSN_ADDRESSES (INSN_UID (dest))
7424 - INSN_ADDRESSES (INSN_UID (insn)));
7425 /* Leave some instructions for "slop". */
7426 if (delta < -260000 || delta >= 260000)
7427 v8 = 1;
7428 }
7429
7430 if (mode == CCFPmode || mode == CCFPEmode)
7431 {
7432 static char v9_fcc_labelno[] = "%%fccX, ";
7433 /* Set the char indicating the number of the fcc reg to use. */
7434 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
7435 labelno = v9_fcc_labelno;
7436 if (v8)
7437 {
7438 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
7439 labelno = "";
7440 }
7441 }
7442 else if (mode == CCXmode || mode == CCX_NOOVmode)
7443 {
7444 labelno = "%%xcc, ";
7445 gcc_assert (! v8);
7446 }
7447 else
7448 {
7449 labelno = "%%icc, ";
7450 if (v8)
7451 labelno = "";
7452 }
7453
7454 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
7455 {
7456 strcpy (p,
7457 ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far)
7458 ? ",pt" : ",pn");
7459 p += 3;
7460 spaces -= 3;
7461 }
7462 }
7463 else
7464 labelno = "";
7465
7466 if (spaces > 0)
7467 *p++ = '\t';
7468 else
7469 *p++ = ' ';
7470 strcpy (p, labelno);
7471 p = strchr (p, '\0');
7472 if (far)
7473 {
7474 strcpy (p, ".+12\n\t nop\n\tb\t");
7475 /* Skip the next insn if requested or
7476 if we know that it will be a nop. */
7477 if (annul || ! final_sequence)
7478 p[3] = '6';
7479 p += 14;
7480 }
7481 *p++ = '%';
7482 *p++ = 'l';
7483 *p++ = label + '0';
7484 *p++ = '%';
7485 *p++ = '#';
7486 *p = '\0';
7487
7488 return string;
7489 }
7490
7491 /* Emit a library call comparison between floating point X and Y.
7492 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
7493 Return the new operator to be used in the comparison sequence.
7494
7495 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
7496 values as arguments instead of the TFmode registers themselves,
7497 that's why we cannot call emit_float_lib_cmp. */
7498
7499 rtx
7500 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
7501 {
7502 const char *qpfunc;
7503 rtx slot0, slot1, result, tem, tem2, libfunc;
7504 enum machine_mode mode;
7505 enum rtx_code new_comparison;
7506
7507 switch (comparison)
7508 {
7509 case EQ:
7510 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
7511 break;
7512
7513 case NE:
7514 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
7515 break;
7516
7517 case GT:
7518 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
7519 break;
7520
7521 case GE:
7522 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
7523 break;
7524
7525 case LT:
7526 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
7527 break;
7528
7529 case LE:
7530 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
7531 break;
7532
7533 case ORDERED:
7534 case UNORDERED:
7535 case UNGT:
7536 case UNLT:
7537 case UNEQ:
7538 case UNGE:
7539 case UNLE:
7540 case LTGT:
7541 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
7542 break;
7543
7544 default:
7545 gcc_unreachable ();
7546 }
7547
7548 if (TARGET_ARCH64)
7549 {
7550 if (MEM_P (x))
7551 {
7552 tree expr = MEM_EXPR (x);
7553 if (expr)
7554 mark_addressable (expr);
7555 slot0 = x;
7556 }
7557 else
7558 {
7559 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7560 emit_move_insn (slot0, x);
7561 }
7562
7563 if (MEM_P (y))
7564 {
7565 tree expr = MEM_EXPR (y);
7566 if (expr)
7567 mark_addressable (expr);
7568 slot1 = y;
7569 }
7570 else
7571 {
7572 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7573 emit_move_insn (slot1, y);
7574 }
7575
7576 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7577 emit_library_call (libfunc, LCT_NORMAL,
7578 DImode, 2,
7579 XEXP (slot0, 0), Pmode,
7580 XEXP (slot1, 0), Pmode);
7581 mode = DImode;
7582 }
7583 else
7584 {
7585 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7586 emit_library_call (libfunc, LCT_NORMAL,
7587 SImode, 2,
7588 x, TFmode, y, TFmode);
7589 mode = SImode;
7590 }
7591
7592
7593 /* Immediately move the result of the libcall into a pseudo
7594 register so reload doesn't clobber the value if it needs
7595 the return register for a spill reg. */
7596 result = gen_reg_rtx (mode);
7597 emit_move_insn (result, hard_libcall_value (mode, libfunc));
7598
7599 switch (comparison)
7600 {
7601 default:
7602 return gen_rtx_NE (VOIDmode, result, const0_rtx);
7603 case ORDERED:
7604 case UNORDERED:
7605 new_comparison = (comparison == UNORDERED ? EQ : NE);
7606 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
7607 case UNGT:
7608 case UNGE:
7609 new_comparison = (comparison == UNGT ? GT : NE);
7610 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
7611 case UNLE:
7612 return gen_rtx_NE (VOIDmode, result, const2_rtx);
7613 case UNLT:
7614 tem = gen_reg_rtx (mode);
7615 if (TARGET_ARCH32)
7616 emit_insn (gen_andsi3 (tem, result, const1_rtx));
7617 else
7618 emit_insn (gen_anddi3 (tem, result, const1_rtx));
7619 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
7620 case UNEQ:
7621 case LTGT:
7622 tem = gen_reg_rtx (mode);
7623 if (TARGET_ARCH32)
7624 emit_insn (gen_addsi3 (tem, result, const1_rtx));
7625 else
7626 emit_insn (gen_adddi3 (tem, result, const1_rtx));
7627 tem2 = gen_reg_rtx (mode);
7628 if (TARGET_ARCH32)
7629 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
7630 else
7631 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
7632 new_comparison = (comparison == UNEQ ? EQ : NE);
7633 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
7634 }
7635
7636 gcc_unreachable ();
7637 }
7638
7639 /* Generate an unsigned DImode to FP conversion. This is the same code
7640 optabs would emit if we didn't have TFmode patterns. */
7641
7642 void
7643 sparc_emit_floatunsdi (rtx *operands, enum machine_mode mode)
7644 {
7645 rtx neglab, donelab, i0, i1, f0, in, out;
7646
7647 out = operands[0];
7648 in = force_reg (DImode, operands[1]);
7649 neglab = gen_label_rtx ();
7650 donelab = gen_label_rtx ();
7651 i0 = gen_reg_rtx (DImode);
7652 i1 = gen_reg_rtx (DImode);
7653 f0 = gen_reg_rtx (mode);
7654
7655 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
7656
7657 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
7658 emit_jump_insn (gen_jump (donelab));
7659 emit_barrier ();
7660
7661 emit_label (neglab);
7662
7663 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
7664 emit_insn (gen_anddi3 (i1, in, const1_rtx));
7665 emit_insn (gen_iordi3 (i0, i0, i1));
7666 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
7667 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
7668
7669 emit_label (donelab);
7670 }
7671
7672 /* Generate an FP to unsigned DImode conversion. This is the same code
7673 optabs would emit if we didn't have TFmode patterns. */
7674
7675 void
7676 sparc_emit_fixunsdi (rtx *operands, enum machine_mode mode)
7677 {
7678 rtx neglab, donelab, i0, i1, f0, in, out, limit;
7679
7680 out = operands[0];
7681 in = force_reg (mode, operands[1]);
7682 neglab = gen_label_rtx ();
7683 donelab = gen_label_rtx ();
7684 i0 = gen_reg_rtx (DImode);
7685 i1 = gen_reg_rtx (DImode);
7686 limit = gen_reg_rtx (mode);
7687 f0 = gen_reg_rtx (mode);
7688
7689 emit_move_insn (limit,
7690 CONST_DOUBLE_FROM_REAL_VALUE (
7691 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
7692 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
7693
7694 emit_insn (gen_rtx_SET (VOIDmode,
7695 out,
7696 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
7697 emit_jump_insn (gen_jump (donelab));
7698 emit_barrier ();
7699
7700 emit_label (neglab);
7701
7702 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_MINUS (mode, in, limit)));
7703 emit_insn (gen_rtx_SET (VOIDmode,
7704 i0,
7705 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
7706 emit_insn (gen_movdi (i1, const1_rtx));
7707 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
7708 emit_insn (gen_xordi3 (out, i0, i1));
7709
7710 emit_label (donelab);
7711 }
7712
7713 /* Return the string to output a compare and branch instruction to DEST.
7714 DEST is the destination insn (i.e. the label), INSN is the source,
7715 and OP is the conditional expression. */
7716
7717 const char *
7718 output_cbcond (rtx op, rtx dest, rtx insn)
7719 {
7720 enum machine_mode mode = GET_MODE (XEXP (op, 0));
7721 enum rtx_code code = GET_CODE (op);
7722 const char *cond_str, *tmpl;
7723 int far, emit_nop, len;
7724 static char string[64];
7725 char size_char;
7726
7727 /* Compare and Branch is limited to +-2KB. If it is too far away,
7728 change
7729
7730 cxbne X, Y, .LC30
7731
7732 to
7733
7734 cxbe X, Y, .+16
7735 nop
7736 ba,pt xcc, .LC30
7737 nop */
7738
7739 len = get_attr_length (insn);
7740
7741 far = len == 4;
7742 emit_nop = len == 2;
7743
7744 if (far)
7745 code = reverse_condition (code);
7746
7747 size_char = ((mode == SImode) ? 'w' : 'x');
7748
7749 switch (code)
7750 {
7751 case NE:
7752 cond_str = "ne";
7753 break;
7754
7755 case EQ:
7756 cond_str = "e";
7757 break;
7758
7759 case GE:
7760 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7761 cond_str = "pos";
7762 else
7763 cond_str = "ge";
7764 break;
7765
7766 case GT:
7767 cond_str = "g";
7768 break;
7769
7770 case LE:
7771 cond_str = "le";
7772 break;
7773
7774 case LT:
7775 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7776 cond_str = "neg";
7777 else
7778 cond_str = "l";
7779 break;
7780
7781 case GEU:
7782 cond_str = "cc";
7783 break;
7784
7785 case GTU:
7786 cond_str = "gu";
7787 break;
7788
7789 case LEU:
7790 cond_str = "leu";
7791 break;
7792
7793 case LTU:
7794 cond_str = "cs";
7795 break;
7796
7797 default:
7798 gcc_unreachable ();
7799 }
7800
7801 if (far)
7802 {
7803 int veryfar = 1, delta;
7804
7805 if (INSN_ADDRESSES_SET_P ())
7806 {
7807 delta = (INSN_ADDRESSES (INSN_UID (dest))
7808 - INSN_ADDRESSES (INSN_UID (insn)));
7809 /* Leave some instructions for "slop". */
7810 if (delta >= -260000 && delta < 260000)
7811 veryfar = 0;
7812 }
7813
7814 if (veryfar)
7815 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
7816 else
7817 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
7818 }
7819 else
7820 {
7821 if (emit_nop)
7822 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
7823 else
7824 tmpl = "c%cb%s\t%%1, %%2, %%3";
7825 }
7826
7827 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
7828
7829 return string;
7830 }
7831
7832 /* Return the string to output a conditional branch to LABEL, testing
7833 register REG. LABEL is the operand number of the label; REG is the
7834 operand number of the reg. OP is the conditional expression. The mode
7835 of REG says what kind of comparison we made.
7836
7837 DEST is the destination insn (i.e. the label), INSN is the source.
7838
7839 REVERSED is nonzero if we should reverse the sense of the comparison.
7840
7841 ANNUL is nonzero if we should generate an annulling branch. */
7842
7843 const char *
7844 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
7845 int annul, rtx insn)
7846 {
7847 static char string[64];
7848 enum rtx_code code = GET_CODE (op);
7849 enum machine_mode mode = GET_MODE (XEXP (op, 0));
7850 rtx note;
7851 int far;
7852 char *p;
7853
7854 /* branch on register are limited to +-128KB. If it is too far away,
7855 change
7856
7857 brnz,pt %g1, .LC30
7858
7859 to
7860
7861 brz,pn %g1, .+12
7862 nop
7863 ba,pt %xcc, .LC30
7864
7865 and
7866
7867 brgez,a,pn %o1, .LC29
7868
7869 to
7870
7871 brlz,pt %o1, .+16
7872 nop
7873 ba,pt %xcc, .LC29 */
7874
7875 far = get_attr_length (insn) >= 3;
7876
7877 /* If not floating-point or if EQ or NE, we can just reverse the code. */
7878 if (reversed ^ far)
7879 code = reverse_condition (code);
7880
7881 /* Only 64 bit versions of these instructions exist. */
7882 gcc_assert (mode == DImode);
7883
7884 /* Start by writing the branch condition. */
7885
7886 switch (code)
7887 {
7888 case NE:
7889 strcpy (string, "brnz");
7890 break;
7891
7892 case EQ:
7893 strcpy (string, "brz");
7894 break;
7895
7896 case GE:
7897 strcpy (string, "brgez");
7898 break;
7899
7900 case LT:
7901 strcpy (string, "brlz");
7902 break;
7903
7904 case LE:
7905 strcpy (string, "brlez");
7906 break;
7907
7908 case GT:
7909 strcpy (string, "brgz");
7910 break;
7911
7912 default:
7913 gcc_unreachable ();
7914 }
7915
7916 p = strchr (string, '\0');
7917
7918 /* Now add the annulling, reg, label, and nop. */
7919 if (annul && ! far)
7920 {
7921 strcpy (p, ",a");
7922 p += 2;
7923 }
7924
7925 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
7926 {
7927 strcpy (p,
7928 ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far)
7929 ? ",pt" : ",pn");
7930 p += 3;
7931 }
7932
7933 *p = p < string + 8 ? '\t' : ' ';
7934 p++;
7935 *p++ = '%';
7936 *p++ = '0' + reg;
7937 *p++ = ',';
7938 *p++ = ' ';
7939 if (far)
7940 {
7941 int veryfar = 1, delta;
7942
7943 if (INSN_ADDRESSES_SET_P ())
7944 {
7945 delta = (INSN_ADDRESSES (INSN_UID (dest))
7946 - INSN_ADDRESSES (INSN_UID (insn)));
7947 /* Leave some instructions for "slop". */
7948 if (delta >= -260000 && delta < 260000)
7949 veryfar = 0;
7950 }
7951
7952 strcpy (p, ".+12\n\t nop\n\t");
7953 /* Skip the next insn if requested or
7954 if we know that it will be a nop. */
7955 if (annul || ! final_sequence)
7956 p[3] = '6';
7957 p += 12;
7958 if (veryfar)
7959 {
7960 strcpy (p, "b\t");
7961 p += 2;
7962 }
7963 else
7964 {
7965 strcpy (p, "ba,pt\t%%xcc, ");
7966 p += 13;
7967 }
7968 }
7969 *p++ = '%';
7970 *p++ = 'l';
7971 *p++ = '0' + label;
7972 *p++ = '%';
7973 *p++ = '#';
7974 *p = '\0';
7975
7976 return string;
7977 }
7978
7979 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
7980 Such instructions cannot be used in the delay slot of return insn on v9.
7981 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
7982 */
7983
7984 static int
7985 epilogue_renumber (register rtx *where, int test)
7986 {
7987 register const char *fmt;
7988 register int i;
7989 register enum rtx_code code;
7990
7991 if (*where == 0)
7992 return 0;
7993
7994 code = GET_CODE (*where);
7995
7996 switch (code)
7997 {
7998 case REG:
7999 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8000 return 1;
8001 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8002 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8003 case SCRATCH:
8004 case CC0:
8005 case PC:
8006 case CONST_INT:
8007 case CONST_DOUBLE:
8008 return 0;
8009
8010 /* Do not replace the frame pointer with the stack pointer because
8011 it can cause the delayed instruction to load below the stack.
8012 This occurs when instructions like:
8013
8014 (set (reg/i:SI 24 %i0)
8015 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8016 (const_int -20 [0xffffffec])) 0))
8017
8018 are in the return delayed slot. */
8019 case PLUS:
8020 if (GET_CODE (XEXP (*where, 0)) == REG
8021 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8022 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8023 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8024 return 1;
8025 break;
8026
8027 case MEM:
8028 if (SPARC_STACK_BIAS
8029 && GET_CODE (XEXP (*where, 0)) == REG
8030 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8031 return 1;
8032 break;
8033
8034 default:
8035 break;
8036 }
8037
8038 fmt = GET_RTX_FORMAT (code);
8039
8040 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8041 {
8042 if (fmt[i] == 'E')
8043 {
8044 register int j;
8045 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8046 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8047 return 1;
8048 }
8049 else if (fmt[i] == 'e'
8050 && epilogue_renumber (&(XEXP (*where, i)), test))
8051 return 1;
8052 }
8053 return 0;
8054 }
8055 \f
8056 /* Leaf functions and non-leaf functions have different needs. */
8057
8058 static const int
8059 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8060
8061 static const int
8062 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8063
8064 static const int *const reg_alloc_orders[] = {
8065 reg_leaf_alloc_order,
8066 reg_nonleaf_alloc_order};
8067
8068 void
8069 order_regs_for_local_alloc (void)
8070 {
8071 static int last_order_nonleaf = 1;
8072
8073 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8074 {
8075 last_order_nonleaf = !last_order_nonleaf;
8076 memcpy ((char *) reg_alloc_order,
8077 (const char *) reg_alloc_orders[last_order_nonleaf],
8078 FIRST_PSEUDO_REGISTER * sizeof (int));
8079 }
8080 }
8081 \f
8082 /* Return 1 if REG and MEM are legitimate enough to allow the various
8083 mem<-->reg splits to be run. */
8084
8085 int
8086 sparc_splitdi_legitimate (rtx reg, rtx mem)
8087 {
8088 /* Punt if we are here by mistake. */
8089 gcc_assert (reload_completed);
8090
8091 /* We must have an offsettable memory reference. */
8092 if (! offsettable_memref_p (mem))
8093 return 0;
8094
8095 /* If we have legitimate args for ldd/std, we do not want
8096 the split to happen. */
8097 if ((REGNO (reg) % 2) == 0
8098 && mem_min_alignment (mem, 8))
8099 return 0;
8100
8101 /* Success. */
8102 return 1;
8103 }
8104
8105 /* Like sparc_splitdi_legitimate but for REG <--> REG moves. */
8106
8107 int
8108 sparc_split_regreg_legitimate (rtx reg1, rtx reg2)
8109 {
8110 int regno1, regno2;
8111
8112 if (GET_CODE (reg1) == SUBREG)
8113 reg1 = SUBREG_REG (reg1);
8114 if (GET_CODE (reg1) != REG)
8115 return 0;
8116 regno1 = REGNO (reg1);
8117
8118 if (GET_CODE (reg2) == SUBREG)
8119 reg2 = SUBREG_REG (reg2);
8120 if (GET_CODE (reg2) != REG)
8121 return 0;
8122 regno2 = REGNO (reg2);
8123
8124 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
8125 return 1;
8126
8127 if (TARGET_VIS3)
8128 {
8129 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
8130 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
8131 return 1;
8132 }
8133
8134 return 0;
8135 }
8136
8137 /* Return 1 if x and y are some kind of REG and they refer to
8138 different hard registers. This test is guaranteed to be
8139 run after reload. */
8140
8141 int
8142 sparc_absnegfloat_split_legitimate (rtx x, rtx y)
8143 {
8144 if (GET_CODE (x) != REG)
8145 return 0;
8146 if (GET_CODE (y) != REG)
8147 return 0;
8148 if (REGNO (x) == REGNO (y))
8149 return 0;
8150 return 1;
8151 }
8152
8153 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
8154 This makes them candidates for using ldd and std insns.
8155
8156 Note reg1 and reg2 *must* be hard registers. */
8157
8158 int
8159 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
8160 {
8161 /* We might have been passed a SUBREG. */
8162 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
8163 return 0;
8164
8165 if (REGNO (reg1) % 2 != 0)
8166 return 0;
8167
8168 /* Integer ldd is deprecated in SPARC V9 */
8169 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
8170 return 0;
8171
8172 return (REGNO (reg1) == REGNO (reg2) - 1);
8173 }
8174
8175 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
8176 an ldd or std insn.
8177
8178 This can only happen when addr1 and addr2, the addresses in mem1
8179 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
8180 addr1 must also be aligned on a 64-bit boundary.
8181
8182 Also iff dependent_reg_rtx is not null it should not be used to
8183 compute the address for mem1, i.e. we cannot optimize a sequence
8184 like:
8185 ld [%o0], %o0
8186 ld [%o0 + 4], %o1
8187 to
8188 ldd [%o0], %o0
8189 nor:
8190 ld [%g3 + 4], %g3
8191 ld [%g3], %g2
8192 to
8193 ldd [%g3], %g2
8194
8195 But, note that the transformation from:
8196 ld [%g2 + 4], %g3
8197 ld [%g2], %g2
8198 to
8199 ldd [%g2], %g2
8200 is perfectly fine. Thus, the peephole2 patterns always pass us
8201 the destination register of the first load, never the second one.
8202
8203 For stores we don't have a similar problem, so dependent_reg_rtx is
8204 NULL_RTX. */
8205
8206 int
8207 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
8208 {
8209 rtx addr1, addr2;
8210 unsigned int reg1;
8211 HOST_WIDE_INT offset1;
8212
8213 /* The mems cannot be volatile. */
8214 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
8215 return 0;
8216
8217 /* MEM1 should be aligned on a 64-bit boundary. */
8218 if (MEM_ALIGN (mem1) < 64)
8219 return 0;
8220
8221 addr1 = XEXP (mem1, 0);
8222 addr2 = XEXP (mem2, 0);
8223
8224 /* Extract a register number and offset (if used) from the first addr. */
8225 if (GET_CODE (addr1) == PLUS)
8226 {
8227 /* If not a REG, return zero. */
8228 if (GET_CODE (XEXP (addr1, 0)) != REG)
8229 return 0;
8230 else
8231 {
8232 reg1 = REGNO (XEXP (addr1, 0));
8233 /* The offset must be constant! */
8234 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
8235 return 0;
8236 offset1 = INTVAL (XEXP (addr1, 1));
8237 }
8238 }
8239 else if (GET_CODE (addr1) != REG)
8240 return 0;
8241 else
8242 {
8243 reg1 = REGNO (addr1);
8244 /* This was a simple (mem (reg)) expression. Offset is 0. */
8245 offset1 = 0;
8246 }
8247
8248 /* Make sure the second address is a (mem (plus (reg) (const_int). */
8249 if (GET_CODE (addr2) != PLUS)
8250 return 0;
8251
8252 if (GET_CODE (XEXP (addr2, 0)) != REG
8253 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
8254 return 0;
8255
8256 if (reg1 != REGNO (XEXP (addr2, 0)))
8257 return 0;
8258
8259 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
8260 return 0;
8261
8262 /* The first offset must be evenly divisible by 8 to ensure the
8263 address is 64 bit aligned. */
8264 if (offset1 % 8 != 0)
8265 return 0;
8266
8267 /* The offset for the second addr must be 4 more than the first addr. */
8268 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
8269 return 0;
8270
8271 /* All the tests passed. addr1 and addr2 are valid for ldd and std
8272 instructions. */
8273 return 1;
8274 }
8275
8276 /* Return 1 if reg is a pseudo, or is the first register in
8277 a hard register pair. This makes it suitable for use in
8278 ldd and std insns. */
8279
8280 int
8281 register_ok_for_ldd (rtx reg)
8282 {
8283 /* We might have been passed a SUBREG. */
8284 if (!REG_P (reg))
8285 return 0;
8286
8287 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
8288 return (REGNO (reg) % 2 == 0);
8289
8290 return 1;
8291 }
8292
8293 /* Return 1 if OP, a MEM, has an address which is known to be
8294 aligned to an 8-byte boundary. */
8295
8296 int
8297 memory_ok_for_ldd (rtx op)
8298 {
8299 /* In 64-bit mode, we assume that the address is word-aligned. */
8300 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
8301 return 0;
8302
8303 if (! can_create_pseudo_p ()
8304 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
8305 return 0;
8306
8307 return 1;
8308 }
8309 \f
8310 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
8311
8312 static bool
8313 sparc_print_operand_punct_valid_p (unsigned char code)
8314 {
8315 if (code == '#'
8316 || code == '*'
8317 || code == '('
8318 || code == ')'
8319 || code == '_'
8320 || code == '&')
8321 return true;
8322
8323 return false;
8324 }
8325
8326 /* Implement TARGET_PRINT_OPERAND.
8327 Print operand X (an rtx) in assembler syntax to file FILE.
8328 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
8329 For `%' followed by punctuation, CODE is the punctuation and X is null. */
8330
8331 static void
8332 sparc_print_operand (FILE *file, rtx x, int code)
8333 {
8334 switch (code)
8335 {
8336 case '#':
8337 /* Output an insn in a delay slot. */
8338 if (final_sequence)
8339 sparc_indent_opcode = 1;
8340 else
8341 fputs ("\n\t nop", file);
8342 return;
8343 case '*':
8344 /* Output an annul flag if there's nothing for the delay slot and we
8345 are optimizing. This is always used with '(' below.
8346 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
8347 this is a dbx bug. So, we only do this when optimizing.
8348 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
8349 Always emit a nop in case the next instruction is a branch. */
8350 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
8351 fputs (",a", file);
8352 return;
8353 case '(':
8354 /* Output a 'nop' if there's nothing for the delay slot and we are
8355 not optimizing. This is always used with '*' above. */
8356 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
8357 fputs ("\n\t nop", file);
8358 else if (final_sequence)
8359 sparc_indent_opcode = 1;
8360 return;
8361 case ')':
8362 /* Output the right displacement from the saved PC on function return.
8363 The caller may have placed an "unimp" insn immediately after the call
8364 so we have to account for it. This insn is used in the 32-bit ABI
8365 when calling a function that returns a non zero-sized structure. The
8366 64-bit ABI doesn't have it. Be careful to have this test be the same
8367 as that for the call. The exception is when sparc_std_struct_return
8368 is enabled, the psABI is followed exactly and the adjustment is made
8369 by the code in sparc_struct_value_rtx. The call emitted is the same
8370 when sparc_std_struct_return is enabled. */
8371 if (!TARGET_ARCH64
8372 && cfun->returns_struct
8373 && !sparc_std_struct_return
8374 && DECL_SIZE (DECL_RESULT (current_function_decl))
8375 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
8376 == INTEGER_CST
8377 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
8378 fputs ("12", file);
8379 else
8380 fputc ('8', file);
8381 return;
8382 case '_':
8383 /* Output the Embedded Medium/Anywhere code model base register. */
8384 fputs (EMBMEDANY_BASE_REG, file);
8385 return;
8386 case '&':
8387 /* Print some local dynamic TLS name. */
8388 assemble_name (file, get_some_local_dynamic_name ());
8389 return;
8390
8391 case 'Y':
8392 /* Adjust the operand to take into account a RESTORE operation. */
8393 if (GET_CODE (x) == CONST_INT)
8394 break;
8395 else if (GET_CODE (x) != REG)
8396 output_operand_lossage ("invalid %%Y operand");
8397 else if (REGNO (x) < 8)
8398 fputs (reg_names[REGNO (x)], file);
8399 else if (REGNO (x) >= 24 && REGNO (x) < 32)
8400 fputs (reg_names[REGNO (x)-16], file);
8401 else
8402 output_operand_lossage ("invalid %%Y operand");
8403 return;
8404 case 'L':
8405 /* Print out the low order register name of a register pair. */
8406 if (WORDS_BIG_ENDIAN)
8407 fputs (reg_names[REGNO (x)+1], file);
8408 else
8409 fputs (reg_names[REGNO (x)], file);
8410 return;
8411 case 'H':
8412 /* Print out the high order register name of a register pair. */
8413 if (WORDS_BIG_ENDIAN)
8414 fputs (reg_names[REGNO (x)], file);
8415 else
8416 fputs (reg_names[REGNO (x)+1], file);
8417 return;
8418 case 'R':
8419 /* Print out the second register name of a register pair or quad.
8420 I.e., R (%o0) => %o1. */
8421 fputs (reg_names[REGNO (x)+1], file);
8422 return;
8423 case 'S':
8424 /* Print out the third register name of a register quad.
8425 I.e., S (%o0) => %o2. */
8426 fputs (reg_names[REGNO (x)+2], file);
8427 return;
8428 case 'T':
8429 /* Print out the fourth register name of a register quad.
8430 I.e., T (%o0) => %o3. */
8431 fputs (reg_names[REGNO (x)+3], file);
8432 return;
8433 case 'x':
8434 /* Print a condition code register. */
8435 if (REGNO (x) == SPARC_ICC_REG)
8436 {
8437 /* We don't handle CC[X]_NOOVmode because they're not supposed
8438 to occur here. */
8439 if (GET_MODE (x) == CCmode)
8440 fputs ("%icc", file);
8441 else if (GET_MODE (x) == CCXmode)
8442 fputs ("%xcc", file);
8443 else
8444 gcc_unreachable ();
8445 }
8446 else
8447 /* %fccN register */
8448 fputs (reg_names[REGNO (x)], file);
8449 return;
8450 case 'm':
8451 /* Print the operand's address only. */
8452 output_address (XEXP (x, 0));
8453 return;
8454 case 'r':
8455 /* In this case we need a register. Use %g0 if the
8456 operand is const0_rtx. */
8457 if (x == const0_rtx
8458 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
8459 {
8460 fputs ("%g0", file);
8461 return;
8462 }
8463 else
8464 break;
8465
8466 case 'A':
8467 switch (GET_CODE (x))
8468 {
8469 case IOR: fputs ("or", file); break;
8470 case AND: fputs ("and", file); break;
8471 case XOR: fputs ("xor", file); break;
8472 default: output_operand_lossage ("invalid %%A operand");
8473 }
8474 return;
8475
8476 case 'B':
8477 switch (GET_CODE (x))
8478 {
8479 case IOR: fputs ("orn", file); break;
8480 case AND: fputs ("andn", file); break;
8481 case XOR: fputs ("xnor", file); break;
8482 default: output_operand_lossage ("invalid %%B operand");
8483 }
8484 return;
8485
8486 /* This is used by the conditional move instructions. */
8487 case 'C':
8488 {
8489 enum rtx_code rc = GET_CODE (x);
8490
8491 switch (rc)
8492 {
8493 case NE: fputs ("ne", file); break;
8494 case EQ: fputs ("e", file); break;
8495 case GE: fputs ("ge", file); break;
8496 case GT: fputs ("g", file); break;
8497 case LE: fputs ("le", file); break;
8498 case LT: fputs ("l", file); break;
8499 case GEU: fputs ("geu", file); break;
8500 case GTU: fputs ("gu", file); break;
8501 case LEU: fputs ("leu", file); break;
8502 case LTU: fputs ("lu", file); break;
8503 case LTGT: fputs ("lg", file); break;
8504 case UNORDERED: fputs ("u", file); break;
8505 case ORDERED: fputs ("o", file); break;
8506 case UNLT: fputs ("ul", file); break;
8507 case UNLE: fputs ("ule", file); break;
8508 case UNGT: fputs ("ug", file); break;
8509 case UNGE: fputs ("uge", file); break;
8510 case UNEQ: fputs ("ue", file); break;
8511 default: output_operand_lossage ("invalid %%C operand");
8512 }
8513 return;
8514 }
8515
8516 /* This are used by the movr instruction pattern. */
8517 case 'D':
8518 {
8519 enum rtx_code rc = GET_CODE (x);
8520 switch (rc)
8521 {
8522 case NE: fputs ("ne", file); break;
8523 case EQ: fputs ("e", file); break;
8524 case GE: fputs ("gez", file); break;
8525 case LT: fputs ("lz", file); break;
8526 case LE: fputs ("lez", file); break;
8527 case GT: fputs ("gz", file); break;
8528 default: output_operand_lossage ("invalid %%D operand");
8529 }
8530 return;
8531 }
8532
8533 case 'b':
8534 {
8535 /* Print a sign-extended character. */
8536 int i = trunc_int_for_mode (INTVAL (x), QImode);
8537 fprintf (file, "%d", i);
8538 return;
8539 }
8540
8541 case 'f':
8542 /* Operand must be a MEM; write its address. */
8543 if (GET_CODE (x) != MEM)
8544 output_operand_lossage ("invalid %%f operand");
8545 output_address (XEXP (x, 0));
8546 return;
8547
8548 case 's':
8549 {
8550 /* Print a sign-extended 32-bit value. */
8551 HOST_WIDE_INT i;
8552 if (GET_CODE(x) == CONST_INT)
8553 i = INTVAL (x);
8554 else if (GET_CODE(x) == CONST_DOUBLE)
8555 i = CONST_DOUBLE_LOW (x);
8556 else
8557 {
8558 output_operand_lossage ("invalid %%s operand");
8559 return;
8560 }
8561 i = trunc_int_for_mode (i, SImode);
8562 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
8563 return;
8564 }
8565
8566 case 0:
8567 /* Do nothing special. */
8568 break;
8569
8570 default:
8571 /* Undocumented flag. */
8572 output_operand_lossage ("invalid operand output code");
8573 }
8574
8575 if (GET_CODE (x) == REG)
8576 fputs (reg_names[REGNO (x)], file);
8577 else if (GET_CODE (x) == MEM)
8578 {
8579 fputc ('[', file);
8580 /* Poor Sun assembler doesn't understand absolute addressing. */
8581 if (CONSTANT_P (XEXP (x, 0)))
8582 fputs ("%g0+", file);
8583 output_address (XEXP (x, 0));
8584 fputc (']', file);
8585 }
8586 else if (GET_CODE (x) == HIGH)
8587 {
8588 fputs ("%hi(", file);
8589 output_addr_const (file, XEXP (x, 0));
8590 fputc (')', file);
8591 }
8592 else if (GET_CODE (x) == LO_SUM)
8593 {
8594 sparc_print_operand (file, XEXP (x, 0), 0);
8595 if (TARGET_CM_MEDMID)
8596 fputs ("+%l44(", file);
8597 else
8598 fputs ("+%lo(", file);
8599 output_addr_const (file, XEXP (x, 1));
8600 fputc (')', file);
8601 }
8602 else if (GET_CODE (x) == CONST_DOUBLE
8603 && (GET_MODE (x) == VOIDmode
8604 || GET_MODE_CLASS (GET_MODE (x)) == MODE_INT))
8605 {
8606 if (CONST_DOUBLE_HIGH (x) == 0)
8607 fprintf (file, "%u", (unsigned int) CONST_DOUBLE_LOW (x));
8608 else if (CONST_DOUBLE_HIGH (x) == -1
8609 && CONST_DOUBLE_LOW (x) < 0)
8610 fprintf (file, "%d", (int) CONST_DOUBLE_LOW (x));
8611 else
8612 output_operand_lossage ("long long constant not a valid immediate operand");
8613 }
8614 else if (GET_CODE (x) == CONST_DOUBLE)
8615 output_operand_lossage ("floating point constant not a valid immediate operand");
8616 else { output_addr_const (file, x); }
8617 }
8618
8619 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
8620
8621 static void
8622 sparc_print_operand_address (FILE *file, rtx x)
8623 {
8624 register rtx base, index = 0;
8625 int offset = 0;
8626 register rtx addr = x;
8627
8628 if (REG_P (addr))
8629 fputs (reg_names[REGNO (addr)], file);
8630 else if (GET_CODE (addr) == PLUS)
8631 {
8632 if (CONST_INT_P (XEXP (addr, 0)))
8633 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
8634 else if (CONST_INT_P (XEXP (addr, 1)))
8635 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
8636 else
8637 base = XEXP (addr, 0), index = XEXP (addr, 1);
8638 if (GET_CODE (base) == LO_SUM)
8639 {
8640 gcc_assert (USE_AS_OFFSETABLE_LO10
8641 && TARGET_ARCH64
8642 && ! TARGET_CM_MEDMID);
8643 output_operand (XEXP (base, 0), 0);
8644 fputs ("+%lo(", file);
8645 output_address (XEXP (base, 1));
8646 fprintf (file, ")+%d", offset);
8647 }
8648 else
8649 {
8650 fputs (reg_names[REGNO (base)], file);
8651 if (index == 0)
8652 fprintf (file, "%+d", offset);
8653 else if (REG_P (index))
8654 fprintf (file, "+%s", reg_names[REGNO (index)]);
8655 else if (GET_CODE (index) == SYMBOL_REF
8656 || GET_CODE (index) == LABEL_REF
8657 || GET_CODE (index) == CONST)
8658 fputc ('+', file), output_addr_const (file, index);
8659 else gcc_unreachable ();
8660 }
8661 }
8662 else if (GET_CODE (addr) == MINUS
8663 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
8664 {
8665 output_addr_const (file, XEXP (addr, 0));
8666 fputs ("-(", file);
8667 output_addr_const (file, XEXP (addr, 1));
8668 fputs ("-.)", file);
8669 }
8670 else if (GET_CODE (addr) == LO_SUM)
8671 {
8672 output_operand (XEXP (addr, 0), 0);
8673 if (TARGET_CM_MEDMID)
8674 fputs ("+%l44(", file);
8675 else
8676 fputs ("+%lo(", file);
8677 output_address (XEXP (addr, 1));
8678 fputc (')', file);
8679 }
8680 else if (flag_pic
8681 && GET_CODE (addr) == CONST
8682 && GET_CODE (XEXP (addr, 0)) == MINUS
8683 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
8684 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
8685 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
8686 {
8687 addr = XEXP (addr, 0);
8688 output_addr_const (file, XEXP (addr, 0));
8689 /* Group the args of the second CONST in parenthesis. */
8690 fputs ("-(", file);
8691 /* Skip past the second CONST--it does nothing for us. */
8692 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
8693 /* Close the parenthesis. */
8694 fputc (')', file);
8695 }
8696 else
8697 {
8698 output_addr_const (file, addr);
8699 }
8700 }
8701 \f
8702 /* Target hook for assembling integer objects. The sparc version has
8703 special handling for aligned DI-mode objects. */
8704
8705 static bool
8706 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
8707 {
8708 /* ??? We only output .xword's for symbols and only then in environments
8709 where the assembler can handle them. */
8710 if (aligned_p && size == 8
8711 && (GET_CODE (x) != CONST_INT && GET_CODE (x) != CONST_DOUBLE))
8712 {
8713 if (TARGET_V9)
8714 {
8715 assemble_integer_with_op ("\t.xword\t", x);
8716 return true;
8717 }
8718 else
8719 {
8720 assemble_aligned_integer (4, const0_rtx);
8721 assemble_aligned_integer (4, x);
8722 return true;
8723 }
8724 }
8725 return default_assemble_integer (x, size, aligned_p);
8726 }
8727 \f
8728 /* Return the value of a code used in the .proc pseudo-op that says
8729 what kind of result this function returns. For non-C types, we pick
8730 the closest C type. */
8731
8732 #ifndef SHORT_TYPE_SIZE
8733 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
8734 #endif
8735
8736 #ifndef INT_TYPE_SIZE
8737 #define INT_TYPE_SIZE BITS_PER_WORD
8738 #endif
8739
8740 #ifndef LONG_TYPE_SIZE
8741 #define LONG_TYPE_SIZE BITS_PER_WORD
8742 #endif
8743
8744 #ifndef LONG_LONG_TYPE_SIZE
8745 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
8746 #endif
8747
8748 #ifndef FLOAT_TYPE_SIZE
8749 #define FLOAT_TYPE_SIZE BITS_PER_WORD
8750 #endif
8751
8752 #ifndef DOUBLE_TYPE_SIZE
8753 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
8754 #endif
8755
8756 #ifndef LONG_DOUBLE_TYPE_SIZE
8757 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
8758 #endif
8759
8760 unsigned long
8761 sparc_type_code (register tree type)
8762 {
8763 register unsigned long qualifiers = 0;
8764 register unsigned shift;
8765
8766 /* Only the first 30 bits of the qualifier are valid. We must refrain from
8767 setting more, since some assemblers will give an error for this. Also,
8768 we must be careful to avoid shifts of 32 bits or more to avoid getting
8769 unpredictable results. */
8770
8771 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
8772 {
8773 switch (TREE_CODE (type))
8774 {
8775 case ERROR_MARK:
8776 return qualifiers;
8777
8778 case ARRAY_TYPE:
8779 qualifiers |= (3 << shift);
8780 break;
8781
8782 case FUNCTION_TYPE:
8783 case METHOD_TYPE:
8784 qualifiers |= (2 << shift);
8785 break;
8786
8787 case POINTER_TYPE:
8788 case REFERENCE_TYPE:
8789 case OFFSET_TYPE:
8790 qualifiers |= (1 << shift);
8791 break;
8792
8793 case RECORD_TYPE:
8794 return (qualifiers | 8);
8795
8796 case UNION_TYPE:
8797 case QUAL_UNION_TYPE:
8798 return (qualifiers | 9);
8799
8800 case ENUMERAL_TYPE:
8801 return (qualifiers | 10);
8802
8803 case VOID_TYPE:
8804 return (qualifiers | 16);
8805
8806 case INTEGER_TYPE:
8807 /* If this is a range type, consider it to be the underlying
8808 type. */
8809 if (TREE_TYPE (type) != 0)
8810 break;
8811
8812 /* Carefully distinguish all the standard types of C,
8813 without messing up if the language is not C. We do this by
8814 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
8815 look at both the names and the above fields, but that's redundant.
8816 Any type whose size is between two C types will be considered
8817 to be the wider of the two types. Also, we do not have a
8818 special code to use for "long long", so anything wider than
8819 long is treated the same. Note that we can't distinguish
8820 between "int" and "long" in this code if they are the same
8821 size, but that's fine, since neither can the assembler. */
8822
8823 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
8824 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
8825
8826 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
8827 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
8828
8829 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
8830 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
8831
8832 else
8833 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
8834
8835 case REAL_TYPE:
8836 /* If this is a range type, consider it to be the underlying
8837 type. */
8838 if (TREE_TYPE (type) != 0)
8839 break;
8840
8841 /* Carefully distinguish all the standard types of C,
8842 without messing up if the language is not C. */
8843
8844 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
8845 return (qualifiers | 6);
8846
8847 else
8848 return (qualifiers | 7);
8849
8850 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
8851 /* ??? We need to distinguish between double and float complex types,
8852 but I don't know how yet because I can't reach this code from
8853 existing front-ends. */
8854 return (qualifiers | 7); /* Who knows? */
8855
8856 case VECTOR_TYPE:
8857 case BOOLEAN_TYPE: /* Boolean truth value type. */
8858 case LANG_TYPE:
8859 case NULLPTR_TYPE:
8860 return qualifiers;
8861
8862 default:
8863 gcc_unreachable (); /* Not a type! */
8864 }
8865 }
8866
8867 return qualifiers;
8868 }
8869 \f
8870 /* Nested function support. */
8871
8872 /* Emit RTL insns to initialize the variable parts of a trampoline.
8873 FNADDR is an RTX for the address of the function's pure code.
8874 CXT is an RTX for the static chain value for the function.
8875
8876 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
8877 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
8878 (to store insns). This is a bit excessive. Perhaps a different
8879 mechanism would be better here.
8880
8881 Emit enough FLUSH insns to synchronize the data and instruction caches. */
8882
8883 static void
8884 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
8885 {
8886 /* SPARC 32-bit trampoline:
8887
8888 sethi %hi(fn), %g1
8889 sethi %hi(static), %g2
8890 jmp %g1+%lo(fn)
8891 or %g2, %lo(static), %g2
8892
8893 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
8894 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
8895 */
8896
8897 emit_move_insn
8898 (adjust_address (m_tramp, SImode, 0),
8899 expand_binop (SImode, ior_optab,
8900 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
8901 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
8902 NULL_RTX, 1, OPTAB_DIRECT));
8903
8904 emit_move_insn
8905 (adjust_address (m_tramp, SImode, 4),
8906 expand_binop (SImode, ior_optab,
8907 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
8908 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
8909 NULL_RTX, 1, OPTAB_DIRECT));
8910
8911 emit_move_insn
8912 (adjust_address (m_tramp, SImode, 8),
8913 expand_binop (SImode, ior_optab,
8914 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
8915 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
8916 NULL_RTX, 1, OPTAB_DIRECT));
8917
8918 emit_move_insn
8919 (adjust_address (m_tramp, SImode, 12),
8920 expand_binop (SImode, ior_optab,
8921 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
8922 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
8923 NULL_RTX, 1, OPTAB_DIRECT));
8924
8925 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
8926 aligned on a 16 byte boundary so one flush clears it all. */
8927 emit_insn (gen_flush (validize_mem (adjust_address (m_tramp, SImode, 0))));
8928 if (sparc_cpu != PROCESSOR_ULTRASPARC
8929 && sparc_cpu != PROCESSOR_ULTRASPARC3
8930 && sparc_cpu != PROCESSOR_NIAGARA
8931 && sparc_cpu != PROCESSOR_NIAGARA2
8932 && sparc_cpu != PROCESSOR_NIAGARA3
8933 && sparc_cpu != PROCESSOR_NIAGARA4)
8934 emit_insn (gen_flush (validize_mem (adjust_address (m_tramp, SImode, 8))));
8935
8936 /* Call __enable_execute_stack after writing onto the stack to make sure
8937 the stack address is accessible. */
8938 #ifdef HAVE_ENABLE_EXECUTE_STACK
8939 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
8940 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
8941 #endif
8942
8943 }
8944
8945 /* The 64-bit version is simpler because it makes more sense to load the
8946 values as "immediate" data out of the trampoline. It's also easier since
8947 we can read the PC without clobbering a register. */
8948
8949 static void
8950 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
8951 {
8952 /* SPARC 64-bit trampoline:
8953
8954 rd %pc, %g1
8955 ldx [%g1+24], %g5
8956 jmp %g5
8957 ldx [%g1+16], %g5
8958 +16 bytes data
8959 */
8960
8961 emit_move_insn (adjust_address (m_tramp, SImode, 0),
8962 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
8963 emit_move_insn (adjust_address (m_tramp, SImode, 4),
8964 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
8965 emit_move_insn (adjust_address (m_tramp, SImode, 8),
8966 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
8967 emit_move_insn (adjust_address (m_tramp, SImode, 12),
8968 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
8969 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
8970 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
8971 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
8972
8973 if (sparc_cpu != PROCESSOR_ULTRASPARC
8974 && sparc_cpu != PROCESSOR_ULTRASPARC3
8975 && sparc_cpu != PROCESSOR_NIAGARA
8976 && sparc_cpu != PROCESSOR_NIAGARA2
8977 && sparc_cpu != PROCESSOR_NIAGARA3
8978 && sparc_cpu != PROCESSOR_NIAGARA4)
8979 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
8980
8981 /* Call __enable_execute_stack after writing onto the stack to make sure
8982 the stack address is accessible. */
8983 #ifdef HAVE_ENABLE_EXECUTE_STACK
8984 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
8985 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
8986 #endif
8987 }
8988
8989 /* Worker for TARGET_TRAMPOLINE_INIT. */
8990
8991 static void
8992 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
8993 {
8994 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
8995 cxt = force_reg (Pmode, cxt);
8996 if (TARGET_ARCH64)
8997 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
8998 else
8999 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
9000 }
9001 \f
9002 /* Adjust the cost of a scheduling dependency. Return the new cost of
9003 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
9004
9005 static int
9006 supersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
9007 {
9008 enum attr_type insn_type;
9009
9010 if (! recog_memoized (insn))
9011 return 0;
9012
9013 insn_type = get_attr_type (insn);
9014
9015 if (REG_NOTE_KIND (link) == 0)
9016 {
9017 /* Data dependency; DEP_INSN writes a register that INSN reads some
9018 cycles later. */
9019
9020 /* if a load, then the dependence must be on the memory address;
9021 add an extra "cycle". Note that the cost could be two cycles
9022 if the reg was written late in an instruction group; we ca not tell
9023 here. */
9024 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
9025 return cost + 3;
9026
9027 /* Get the delay only if the address of the store is the dependence. */
9028 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
9029 {
9030 rtx pat = PATTERN(insn);
9031 rtx dep_pat = PATTERN (dep_insn);
9032
9033 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9034 return cost; /* This should not happen! */
9035
9036 /* The dependency between the two instructions was on the data that
9037 is being stored. Assume that this implies that the address of the
9038 store is not dependent. */
9039 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9040 return cost;
9041
9042 return cost + 3; /* An approximation. */
9043 }
9044
9045 /* A shift instruction cannot receive its data from an instruction
9046 in the same cycle; add a one cycle penalty. */
9047 if (insn_type == TYPE_SHIFT)
9048 return cost + 3; /* Split before cascade into shift. */
9049 }
9050 else
9051 {
9052 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
9053 INSN writes some cycles later. */
9054
9055 /* These are only significant for the fpu unit; writing a fp reg before
9056 the fpu has finished with it stalls the processor. */
9057
9058 /* Reusing an integer register causes no problems. */
9059 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9060 return 0;
9061 }
9062
9063 return cost;
9064 }
9065
9066 static int
9067 hypersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
9068 {
9069 enum attr_type insn_type, dep_type;
9070 rtx pat = PATTERN(insn);
9071 rtx dep_pat = PATTERN (dep_insn);
9072
9073 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
9074 return cost;
9075
9076 insn_type = get_attr_type (insn);
9077 dep_type = get_attr_type (dep_insn);
9078
9079 switch (REG_NOTE_KIND (link))
9080 {
9081 case 0:
9082 /* Data dependency; DEP_INSN writes a register that INSN reads some
9083 cycles later. */
9084
9085 switch (insn_type)
9086 {
9087 case TYPE_STORE:
9088 case TYPE_FPSTORE:
9089 /* Get the delay iff the address of the store is the dependence. */
9090 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9091 return cost;
9092
9093 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9094 return cost;
9095 return cost + 3;
9096
9097 case TYPE_LOAD:
9098 case TYPE_SLOAD:
9099 case TYPE_FPLOAD:
9100 /* If a load, then the dependence must be on the memory address. If
9101 the addresses aren't equal, then it might be a false dependency */
9102 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
9103 {
9104 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
9105 || GET_CODE (SET_DEST (dep_pat)) != MEM
9106 || GET_CODE (SET_SRC (pat)) != MEM
9107 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
9108 XEXP (SET_SRC (pat), 0)))
9109 return cost + 2;
9110
9111 return cost + 8;
9112 }
9113 break;
9114
9115 case TYPE_BRANCH:
9116 /* Compare to branch latency is 0. There is no benefit from
9117 separating compare and branch. */
9118 if (dep_type == TYPE_COMPARE)
9119 return 0;
9120 /* Floating point compare to branch latency is less than
9121 compare to conditional move. */
9122 if (dep_type == TYPE_FPCMP)
9123 return cost - 1;
9124 break;
9125 default:
9126 break;
9127 }
9128 break;
9129
9130 case REG_DEP_ANTI:
9131 /* Anti-dependencies only penalize the fpu unit. */
9132 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9133 return 0;
9134 break;
9135
9136 default:
9137 break;
9138 }
9139
9140 return cost;
9141 }
9142
9143 static int
9144 sparc_adjust_cost(rtx insn, rtx link, rtx dep, int cost)
9145 {
9146 switch (sparc_cpu)
9147 {
9148 case PROCESSOR_SUPERSPARC:
9149 cost = supersparc_adjust_cost (insn, link, dep, cost);
9150 break;
9151 case PROCESSOR_HYPERSPARC:
9152 case PROCESSOR_SPARCLITE86X:
9153 cost = hypersparc_adjust_cost (insn, link, dep, cost);
9154 break;
9155 default:
9156 break;
9157 }
9158 return cost;
9159 }
9160
9161 static void
9162 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
9163 int sched_verbose ATTRIBUTE_UNUSED,
9164 int max_ready ATTRIBUTE_UNUSED)
9165 {}
9166
9167 static int
9168 sparc_use_sched_lookahead (void)
9169 {
9170 if (sparc_cpu == PROCESSOR_NIAGARA
9171 || sparc_cpu == PROCESSOR_NIAGARA2
9172 || sparc_cpu == PROCESSOR_NIAGARA3)
9173 return 0;
9174 if (sparc_cpu == PROCESSOR_NIAGARA4)
9175 return 2;
9176 if (sparc_cpu == PROCESSOR_ULTRASPARC
9177 || sparc_cpu == PROCESSOR_ULTRASPARC3)
9178 return 4;
9179 if ((1 << sparc_cpu) &
9180 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
9181 (1 << PROCESSOR_SPARCLITE86X)))
9182 return 3;
9183 return 0;
9184 }
9185
9186 static int
9187 sparc_issue_rate (void)
9188 {
9189 switch (sparc_cpu)
9190 {
9191 case PROCESSOR_NIAGARA:
9192 case PROCESSOR_NIAGARA2:
9193 case PROCESSOR_NIAGARA3:
9194 default:
9195 return 1;
9196 case PROCESSOR_NIAGARA4:
9197 case PROCESSOR_V9:
9198 /* Assume V9 processors are capable of at least dual-issue. */
9199 return 2;
9200 case PROCESSOR_SUPERSPARC:
9201 return 3;
9202 case PROCESSOR_HYPERSPARC:
9203 case PROCESSOR_SPARCLITE86X:
9204 return 2;
9205 case PROCESSOR_ULTRASPARC:
9206 case PROCESSOR_ULTRASPARC3:
9207 return 4;
9208 }
9209 }
9210
9211 static int
9212 set_extends (rtx insn)
9213 {
9214 register rtx pat = PATTERN (insn);
9215
9216 switch (GET_CODE (SET_SRC (pat)))
9217 {
9218 /* Load and some shift instructions zero extend. */
9219 case MEM:
9220 case ZERO_EXTEND:
9221 /* sethi clears the high bits */
9222 case HIGH:
9223 /* LO_SUM is used with sethi. sethi cleared the high
9224 bits and the values used with lo_sum are positive */
9225 case LO_SUM:
9226 /* Store flag stores 0 or 1 */
9227 case LT: case LTU:
9228 case GT: case GTU:
9229 case LE: case LEU:
9230 case GE: case GEU:
9231 case EQ:
9232 case NE:
9233 return 1;
9234 case AND:
9235 {
9236 rtx op0 = XEXP (SET_SRC (pat), 0);
9237 rtx op1 = XEXP (SET_SRC (pat), 1);
9238 if (GET_CODE (op1) == CONST_INT)
9239 return INTVAL (op1) >= 0;
9240 if (GET_CODE (op0) != REG)
9241 return 0;
9242 if (sparc_check_64 (op0, insn) == 1)
9243 return 1;
9244 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9245 }
9246 case IOR:
9247 case XOR:
9248 {
9249 rtx op0 = XEXP (SET_SRC (pat), 0);
9250 rtx op1 = XEXP (SET_SRC (pat), 1);
9251 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
9252 return 0;
9253 if (GET_CODE (op1) == CONST_INT)
9254 return INTVAL (op1) >= 0;
9255 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9256 }
9257 case LSHIFTRT:
9258 return GET_MODE (SET_SRC (pat)) == SImode;
9259 /* Positive integers leave the high bits zero. */
9260 case CONST_DOUBLE:
9261 return ! (CONST_DOUBLE_LOW (SET_SRC (pat)) & 0x80000000);
9262 case CONST_INT:
9263 return ! (INTVAL (SET_SRC (pat)) & 0x80000000);
9264 case ASHIFTRT:
9265 case SIGN_EXTEND:
9266 return - (GET_MODE (SET_SRC (pat)) == SImode);
9267 case REG:
9268 return sparc_check_64 (SET_SRC (pat), insn);
9269 default:
9270 return 0;
9271 }
9272 }
9273
9274 /* We _ought_ to have only one kind per function, but... */
9275 static GTY(()) rtx sparc_addr_diff_list;
9276 static GTY(()) rtx sparc_addr_list;
9277
9278 void
9279 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
9280 {
9281 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
9282 if (diff)
9283 sparc_addr_diff_list
9284 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
9285 else
9286 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
9287 }
9288
9289 static void
9290 sparc_output_addr_vec (rtx vec)
9291 {
9292 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9293 int idx, vlen = XVECLEN (body, 0);
9294
9295 #ifdef ASM_OUTPUT_ADDR_VEC_START
9296 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9297 #endif
9298
9299 #ifdef ASM_OUTPUT_CASE_LABEL
9300 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9301 NEXT_INSN (lab));
9302 #else
9303 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9304 #endif
9305
9306 for (idx = 0; idx < vlen; idx++)
9307 {
9308 ASM_OUTPUT_ADDR_VEC_ELT
9309 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
9310 }
9311
9312 #ifdef ASM_OUTPUT_ADDR_VEC_END
9313 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9314 #endif
9315 }
9316
9317 static void
9318 sparc_output_addr_diff_vec (rtx vec)
9319 {
9320 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9321 rtx base = XEXP (XEXP (body, 0), 0);
9322 int idx, vlen = XVECLEN (body, 1);
9323
9324 #ifdef ASM_OUTPUT_ADDR_VEC_START
9325 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9326 #endif
9327
9328 #ifdef ASM_OUTPUT_CASE_LABEL
9329 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9330 NEXT_INSN (lab));
9331 #else
9332 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9333 #endif
9334
9335 for (idx = 0; idx < vlen; idx++)
9336 {
9337 ASM_OUTPUT_ADDR_DIFF_ELT
9338 (asm_out_file,
9339 body,
9340 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
9341 CODE_LABEL_NUMBER (base));
9342 }
9343
9344 #ifdef ASM_OUTPUT_ADDR_VEC_END
9345 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9346 #endif
9347 }
9348
9349 static void
9350 sparc_output_deferred_case_vectors (void)
9351 {
9352 rtx t;
9353 int align;
9354
9355 if (sparc_addr_list == NULL_RTX
9356 && sparc_addr_diff_list == NULL_RTX)
9357 return;
9358
9359 /* Align to cache line in the function's code section. */
9360 switch_to_section (current_function_section ());
9361
9362 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
9363 if (align > 0)
9364 ASM_OUTPUT_ALIGN (asm_out_file, align);
9365
9366 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
9367 sparc_output_addr_vec (XEXP (t, 0));
9368 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
9369 sparc_output_addr_diff_vec (XEXP (t, 0));
9370
9371 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
9372 }
9373
9374 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
9375 unknown. Return 1 if the high bits are zero, -1 if the register is
9376 sign extended. */
9377 int
9378 sparc_check_64 (rtx x, rtx insn)
9379 {
9380 /* If a register is set only once it is safe to ignore insns this
9381 code does not know how to handle. The loop will either recognize
9382 the single set and return the correct value or fail to recognize
9383 it and return 0. */
9384 int set_once = 0;
9385 rtx y = x;
9386
9387 gcc_assert (GET_CODE (x) == REG);
9388
9389 if (GET_MODE (x) == DImode)
9390 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
9391
9392 if (flag_expensive_optimizations
9393 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
9394 set_once = 1;
9395
9396 if (insn == 0)
9397 {
9398 if (set_once)
9399 insn = get_last_insn_anywhere ();
9400 else
9401 return 0;
9402 }
9403
9404 while ((insn = PREV_INSN (insn)))
9405 {
9406 switch (GET_CODE (insn))
9407 {
9408 case JUMP_INSN:
9409 case NOTE:
9410 break;
9411 case CODE_LABEL:
9412 case CALL_INSN:
9413 default:
9414 if (! set_once)
9415 return 0;
9416 break;
9417 case INSN:
9418 {
9419 rtx pat = PATTERN (insn);
9420 if (GET_CODE (pat) != SET)
9421 return 0;
9422 if (rtx_equal_p (x, SET_DEST (pat)))
9423 return set_extends (insn);
9424 if (y && rtx_equal_p (y, SET_DEST (pat)))
9425 return set_extends (insn);
9426 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
9427 return 0;
9428 }
9429 }
9430 }
9431 return 0;
9432 }
9433
9434 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
9435 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
9436
9437 const char *
9438 output_v8plus_shift (rtx insn, rtx *operands, const char *opcode)
9439 {
9440 static char asm_code[60];
9441
9442 /* The scratch register is only required when the destination
9443 register is not a 64-bit global or out register. */
9444 if (which_alternative != 2)
9445 operands[3] = operands[0];
9446
9447 /* We can only shift by constants <= 63. */
9448 if (GET_CODE (operands[2]) == CONST_INT)
9449 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
9450
9451 if (GET_CODE (operands[1]) == CONST_INT)
9452 {
9453 output_asm_insn ("mov\t%1, %3", operands);
9454 }
9455 else
9456 {
9457 output_asm_insn ("sllx\t%H1, 32, %3", operands);
9458 if (sparc_check_64 (operands[1], insn) <= 0)
9459 output_asm_insn ("srl\t%L1, 0, %L1", operands);
9460 output_asm_insn ("or\t%L1, %3, %3", operands);
9461 }
9462
9463 strcpy (asm_code, opcode);
9464
9465 if (which_alternative != 2)
9466 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
9467 else
9468 return
9469 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
9470 }
9471 \f
9472 /* Output rtl to increment the profiler label LABELNO
9473 for profiling a function entry. */
9474
9475 void
9476 sparc_profile_hook (int labelno)
9477 {
9478 char buf[32];
9479 rtx lab, fun;
9480
9481 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
9482 if (NO_PROFILE_COUNTERS)
9483 {
9484 emit_library_call (fun, LCT_NORMAL, VOIDmode, 0);
9485 }
9486 else
9487 {
9488 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
9489 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
9490 emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode);
9491 }
9492 }
9493 \f
9494 #ifdef TARGET_SOLARIS
9495 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
9496
9497 static void
9498 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
9499 tree decl ATTRIBUTE_UNUSED)
9500 {
9501 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
9502 {
9503 solaris_elf_asm_comdat_section (name, flags, decl);
9504 return;
9505 }
9506
9507 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
9508
9509 if (!(flags & SECTION_DEBUG))
9510 fputs (",#alloc", asm_out_file);
9511 if (flags & SECTION_WRITE)
9512 fputs (",#write", asm_out_file);
9513 if (flags & SECTION_TLS)
9514 fputs (",#tls", asm_out_file);
9515 if (flags & SECTION_CODE)
9516 fputs (",#execinstr", asm_out_file);
9517
9518 /* ??? Handle SECTION_BSS. */
9519
9520 fputc ('\n', asm_out_file);
9521 }
9522 #endif /* TARGET_SOLARIS */
9523
9524 /* We do not allow indirect calls to be optimized into sibling calls.
9525
9526 We cannot use sibling calls when delayed branches are disabled
9527 because they will likely require the call delay slot to be filled.
9528
9529 Also, on SPARC 32-bit we cannot emit a sibling call when the
9530 current function returns a structure. This is because the "unimp
9531 after call" convention would cause the callee to return to the
9532 wrong place. The generic code already disallows cases where the
9533 function being called returns a structure.
9534
9535 It may seem strange how this last case could occur. Usually there
9536 is code after the call which jumps to epilogue code which dumps the
9537 return value into the struct return area. That ought to invalidate
9538 the sibling call right? Well, in the C++ case we can end up passing
9539 the pointer to the struct return area to a constructor (which returns
9540 void) and then nothing else happens. Such a sibling call would look
9541 valid without the added check here.
9542
9543 VxWorks PIC PLT entries require the global pointer to be initialized
9544 on entry. We therefore can't emit sibling calls to them. */
9545 static bool
9546 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9547 {
9548 return (decl
9549 && flag_delayed_branch
9550 && (TARGET_ARCH64 || ! cfun->returns_struct)
9551 && !(TARGET_VXWORKS_RTP
9552 && flag_pic
9553 && !targetm.binds_local_p (decl)));
9554 }
9555 \f
9556 /* libfunc renaming. */
9557
9558 static void
9559 sparc_init_libfuncs (void)
9560 {
9561 if (TARGET_ARCH32)
9562 {
9563 /* Use the subroutines that Sun's library provides for integer
9564 multiply and divide. The `*' prevents an underscore from
9565 being prepended by the compiler. .umul is a little faster
9566 than .mul. */
9567 set_optab_libfunc (smul_optab, SImode, "*.umul");
9568 set_optab_libfunc (sdiv_optab, SImode, "*.div");
9569 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
9570 set_optab_libfunc (smod_optab, SImode, "*.rem");
9571 set_optab_libfunc (umod_optab, SImode, "*.urem");
9572
9573 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
9574 set_optab_libfunc (add_optab, TFmode, "_Q_add");
9575 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
9576 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
9577 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
9578 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
9579
9580 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
9581 is because with soft-float, the SFmode and DFmode sqrt
9582 instructions will be absent, and the compiler will notice and
9583 try to use the TFmode sqrt instruction for calls to the
9584 builtin function sqrt, but this fails. */
9585 if (TARGET_FPU)
9586 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
9587
9588 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
9589 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
9590 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
9591 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
9592 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
9593 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
9594
9595 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
9596 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
9597 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
9598 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
9599
9600 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
9601 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
9602 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
9603 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
9604
9605 if (DITF_CONVERSION_LIBFUNCS)
9606 {
9607 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
9608 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
9609 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
9610 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
9611 }
9612
9613 if (SUN_CONVERSION_LIBFUNCS)
9614 {
9615 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
9616 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
9617 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
9618 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
9619 }
9620 }
9621 if (TARGET_ARCH64)
9622 {
9623 /* In the SPARC 64bit ABI, SImode multiply and divide functions
9624 do not exist in the library. Make sure the compiler does not
9625 emit calls to them by accident. (It should always use the
9626 hardware instructions.) */
9627 set_optab_libfunc (smul_optab, SImode, 0);
9628 set_optab_libfunc (sdiv_optab, SImode, 0);
9629 set_optab_libfunc (udiv_optab, SImode, 0);
9630 set_optab_libfunc (smod_optab, SImode, 0);
9631 set_optab_libfunc (umod_optab, SImode, 0);
9632
9633 if (SUN_INTEGER_MULTIPLY_64)
9634 {
9635 set_optab_libfunc (smul_optab, DImode, "__mul64");
9636 set_optab_libfunc (sdiv_optab, DImode, "__div64");
9637 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
9638 set_optab_libfunc (smod_optab, DImode, "__rem64");
9639 set_optab_libfunc (umod_optab, DImode, "__urem64");
9640 }
9641
9642 if (SUN_CONVERSION_LIBFUNCS)
9643 {
9644 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
9645 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
9646 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
9647 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
9648 }
9649 }
9650 }
9651 \f
9652 static tree def_builtin(const char *name, int code, tree type)
9653 {
9654 return add_builtin_function(name, type, code, BUILT_IN_MD, NULL,
9655 NULL_TREE);
9656 }
9657
9658 static tree def_builtin_const(const char *name, int code, tree type)
9659 {
9660 tree t = def_builtin(name, code, type);
9661
9662 if (t)
9663 TREE_READONLY (t) = 1;
9664
9665 return t;
9666 }
9667
9668 /* Implement the TARGET_INIT_BUILTINS target hook.
9669 Create builtin functions for special SPARC instructions. */
9670
9671 static void
9672 sparc_init_builtins (void)
9673 {
9674 if (TARGET_VIS)
9675 sparc_vis_init_builtins ();
9676 }
9677
9678 /* Create builtin functions for VIS 1.0 instructions. */
9679
9680 static void
9681 sparc_vis_init_builtins (void)
9682 {
9683 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
9684 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
9685 tree v4hi = build_vector_type (intHI_type_node, 4);
9686 tree v2hi = build_vector_type (intHI_type_node, 2);
9687 tree v2si = build_vector_type (intSI_type_node, 2);
9688 tree v1si = build_vector_type (intSI_type_node, 1);
9689
9690 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
9691 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
9692 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
9693 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
9694 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
9695 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
9696 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
9697 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
9698 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
9699 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
9700 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
9701 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
9702 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
9703 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
9704 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
9705 v8qi, v8qi,
9706 intDI_type_node, 0);
9707 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
9708 v8qi, v8qi, 0);
9709 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
9710 v8qi, v8qi, 0);
9711 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
9712 intDI_type_node,
9713 intDI_type_node, 0);
9714 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
9715 intSI_type_node,
9716 intSI_type_node, 0);
9717 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
9718 ptr_type_node,
9719 intSI_type_node, 0);
9720 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
9721 ptr_type_node,
9722 intDI_type_node, 0);
9723 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
9724 ptr_type_node,
9725 ptr_type_node, 0);
9726 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
9727 ptr_type_node,
9728 ptr_type_node, 0);
9729 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
9730 v4hi, v4hi, 0);
9731 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
9732 v2si, v2si, 0);
9733 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
9734 v4hi, v4hi, 0);
9735 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
9736 v2si, v2si, 0);
9737 tree void_ftype_di = build_function_type_list (void_type_node,
9738 intDI_type_node, 0);
9739 tree di_ftype_void = build_function_type_list (intDI_type_node,
9740 void_type_node, 0);
9741 tree void_ftype_si = build_function_type_list (void_type_node,
9742 intSI_type_node, 0);
9743 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
9744 float_type_node,
9745 float_type_node, 0);
9746 tree df_ftype_df_df = build_function_type_list (double_type_node,
9747 double_type_node,
9748 double_type_node, 0);
9749
9750 /* Packing and expanding vectors. */
9751 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
9752 v4qi_ftype_v4hi);
9753 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
9754 v8qi_ftype_v2si_v8qi);
9755 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
9756 v2hi_ftype_v2si);
9757 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
9758 v4hi_ftype_v4qi);
9759 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
9760 v8qi_ftype_v4qi_v4qi);
9761
9762 /* Multiplications. */
9763 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
9764 v4hi_ftype_v4qi_v4hi);
9765 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
9766 v4hi_ftype_v4qi_v2hi);
9767 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
9768 v4hi_ftype_v4qi_v2hi);
9769 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
9770 v4hi_ftype_v8qi_v4hi);
9771 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
9772 v4hi_ftype_v8qi_v4hi);
9773 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
9774 v2si_ftype_v4qi_v2hi);
9775 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
9776 v2si_ftype_v4qi_v2hi);
9777
9778 /* Data aligning. */
9779 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
9780 v4hi_ftype_v4hi_v4hi);
9781 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
9782 v8qi_ftype_v8qi_v8qi);
9783 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
9784 v2si_ftype_v2si_v2si);
9785 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
9786 di_ftype_di_di);
9787
9788 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
9789 void_ftype_di);
9790 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
9791 di_ftype_void);
9792
9793 if (TARGET_ARCH64)
9794 {
9795 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
9796 ptr_ftype_ptr_di);
9797 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
9798 ptr_ftype_ptr_di);
9799 }
9800 else
9801 {
9802 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
9803 ptr_ftype_ptr_si);
9804 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
9805 ptr_ftype_ptr_si);
9806 }
9807
9808 /* Pixel distance. */
9809 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
9810 di_ftype_v8qi_v8qi_di);
9811
9812 /* Edge handling. */
9813 if (TARGET_ARCH64)
9814 {
9815 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
9816 di_ftype_ptr_ptr);
9817 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
9818 di_ftype_ptr_ptr);
9819 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
9820 di_ftype_ptr_ptr);
9821 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
9822 di_ftype_ptr_ptr);
9823 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
9824 di_ftype_ptr_ptr);
9825 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
9826 di_ftype_ptr_ptr);
9827 if (TARGET_VIS2)
9828 {
9829 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
9830 di_ftype_ptr_ptr);
9831 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
9832 di_ftype_ptr_ptr);
9833 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
9834 di_ftype_ptr_ptr);
9835 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
9836 di_ftype_ptr_ptr);
9837 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
9838 di_ftype_ptr_ptr);
9839 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
9840 di_ftype_ptr_ptr);
9841 }
9842 }
9843 else
9844 {
9845 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
9846 si_ftype_ptr_ptr);
9847 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
9848 si_ftype_ptr_ptr);
9849 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
9850 si_ftype_ptr_ptr);
9851 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
9852 si_ftype_ptr_ptr);
9853 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
9854 si_ftype_ptr_ptr);
9855 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
9856 si_ftype_ptr_ptr);
9857 if (TARGET_VIS2)
9858 {
9859 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
9860 si_ftype_ptr_ptr);
9861 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
9862 si_ftype_ptr_ptr);
9863 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
9864 si_ftype_ptr_ptr);
9865 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
9866 si_ftype_ptr_ptr);
9867 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
9868 si_ftype_ptr_ptr);
9869 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
9870 si_ftype_ptr_ptr);
9871 }
9872 }
9873
9874 /* Pixel compare. */
9875 if (TARGET_ARCH64)
9876 {
9877 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
9878 di_ftype_v4hi_v4hi);
9879 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
9880 di_ftype_v2si_v2si);
9881 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
9882 di_ftype_v4hi_v4hi);
9883 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
9884 di_ftype_v2si_v2si);
9885 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
9886 di_ftype_v4hi_v4hi);
9887 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
9888 di_ftype_v2si_v2si);
9889 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
9890 di_ftype_v4hi_v4hi);
9891 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
9892 di_ftype_v2si_v2si);
9893 }
9894 else
9895 {
9896 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
9897 si_ftype_v4hi_v4hi);
9898 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
9899 si_ftype_v2si_v2si);
9900 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
9901 si_ftype_v4hi_v4hi);
9902 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
9903 si_ftype_v2si_v2si);
9904 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
9905 si_ftype_v4hi_v4hi);
9906 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
9907 si_ftype_v2si_v2si);
9908 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
9909 si_ftype_v4hi_v4hi);
9910 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
9911 si_ftype_v2si_v2si);
9912 }
9913
9914 /* Addition and subtraction. */
9915 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
9916 v4hi_ftype_v4hi_v4hi);
9917 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
9918 v2hi_ftype_v2hi_v2hi);
9919 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
9920 v2si_ftype_v2si_v2si);
9921 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
9922 v1si_ftype_v1si_v1si);
9923 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
9924 v4hi_ftype_v4hi_v4hi);
9925 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
9926 v2hi_ftype_v2hi_v2hi);
9927 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
9928 v2si_ftype_v2si_v2si);
9929 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
9930 v1si_ftype_v1si_v1si);
9931
9932 /* Three-dimensional array addressing. */
9933 if (TARGET_ARCH64)
9934 {
9935 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
9936 di_ftype_di_di);
9937 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
9938 di_ftype_di_di);
9939 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
9940 di_ftype_di_di);
9941 }
9942 else
9943 {
9944 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
9945 si_ftype_si_si);
9946 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
9947 si_ftype_si_si);
9948 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
9949 si_ftype_si_si);
9950 }
9951
9952 if (TARGET_VIS2)
9953 {
9954 /* Byte mask and shuffle */
9955 if (TARGET_ARCH64)
9956 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
9957 di_ftype_di_di);
9958 else
9959 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
9960 si_ftype_si_si);
9961 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
9962 v4hi_ftype_v4hi_v4hi);
9963 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
9964 v8qi_ftype_v8qi_v8qi);
9965 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
9966 v2si_ftype_v2si_v2si);
9967 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
9968 di_ftype_di_di);
9969 }
9970
9971 if (TARGET_VIS3)
9972 {
9973 if (TARGET_ARCH64)
9974 {
9975 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
9976 void_ftype_di);
9977 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
9978 void_ftype_di);
9979 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
9980 void_ftype_di);
9981 }
9982 else
9983 {
9984 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
9985 void_ftype_si);
9986 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
9987 void_ftype_si);
9988 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
9989 void_ftype_si);
9990 }
9991
9992 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
9993 v4hi_ftype_v4hi_v4hi);
9994
9995 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
9996 v4hi_ftype_v4hi_v4hi);
9997 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
9998 v4hi_ftype_v4hi_v4hi);
9999 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
10000 v4hi_ftype_v4hi_v4hi);
10001 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
10002 v4hi_ftype_v4hi_v4hi);
10003 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
10004 v2si_ftype_v2si_v2si);
10005 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
10006 v2si_ftype_v2si_v2si);
10007 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
10008 v2si_ftype_v2si_v2si);
10009 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
10010 v2si_ftype_v2si_v2si);
10011
10012 if (TARGET_ARCH64)
10013 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
10014 di_ftype_v8qi_v8qi);
10015 else
10016 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
10017 si_ftype_v8qi_v8qi);
10018
10019 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
10020 v4hi_ftype_v4hi_v4hi);
10021 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
10022 di_ftype_di_di);
10023 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
10024 di_ftype_di_di);
10025
10026 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
10027 v4hi_ftype_v4hi_v4hi);
10028 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
10029 v2hi_ftype_v2hi_v2hi);
10030 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
10031 v4hi_ftype_v4hi_v4hi);
10032 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
10033 v2hi_ftype_v2hi_v2hi);
10034 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
10035 v2si_ftype_v2si_v2si);
10036 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
10037 v1si_ftype_v1si_v1si);
10038 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
10039 v2si_ftype_v2si_v2si);
10040 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
10041 v1si_ftype_v1si_v1si);
10042
10043 if (TARGET_ARCH64)
10044 {
10045 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
10046 di_ftype_v8qi_v8qi);
10047 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
10048 di_ftype_v8qi_v8qi);
10049 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
10050 di_ftype_v8qi_v8qi);
10051 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
10052 di_ftype_v8qi_v8qi);
10053 }
10054 else
10055 {
10056 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
10057 si_ftype_v8qi_v8qi);
10058 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
10059 si_ftype_v8qi_v8qi);
10060 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
10061 si_ftype_v8qi_v8qi);
10062 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
10063 si_ftype_v8qi_v8qi);
10064 }
10065
10066 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
10067 sf_ftype_sf_sf);
10068 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
10069 df_ftype_df_df);
10070 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
10071 sf_ftype_sf_sf);
10072 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
10073 df_ftype_df_df);
10074 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
10075 sf_ftype_sf_sf);
10076 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
10077 df_ftype_df_df);
10078
10079 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
10080 di_ftype_di_di);
10081 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
10082 di_ftype_di_di);
10083 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
10084 di_ftype_di_di);
10085 }
10086 }
10087
10088 /* Handle TARGET_EXPAND_BUILTIN target hook.
10089 Expand builtin functions for sparc intrinsics. */
10090
10091 static rtx
10092 sparc_expand_builtin (tree exp, rtx target,
10093 rtx subtarget ATTRIBUTE_UNUSED,
10094 enum machine_mode tmode ATTRIBUTE_UNUSED,
10095 int ignore ATTRIBUTE_UNUSED)
10096 {
10097 tree arg;
10098 call_expr_arg_iterator iter;
10099 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10100 unsigned int icode = DECL_FUNCTION_CODE (fndecl);
10101 rtx pat, op[4];
10102 int arg_count = 0;
10103 bool nonvoid;
10104
10105 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
10106
10107 if (nonvoid)
10108 {
10109 enum machine_mode tmode = insn_data[icode].operand[0].mode;
10110 if (!target
10111 || GET_MODE (target) != tmode
10112 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10113 op[0] = gen_reg_rtx (tmode);
10114 else
10115 op[0] = target;
10116 }
10117 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
10118 {
10119 const struct insn_operand_data *insn_op;
10120 int idx;
10121
10122 if (arg == error_mark_node)
10123 return NULL_RTX;
10124
10125 arg_count++;
10126 idx = arg_count - !nonvoid;
10127 insn_op = &insn_data[icode].operand[idx];
10128 op[arg_count] = expand_normal (arg);
10129
10130 if (insn_op->mode == V1DImode
10131 && GET_MODE (op[arg_count]) == DImode)
10132 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
10133 else if (insn_op->mode == V1SImode
10134 && GET_MODE (op[arg_count]) == SImode)
10135 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
10136
10137 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
10138 insn_op->mode))
10139 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
10140 }
10141
10142 switch (arg_count)
10143 {
10144 case 0:
10145 pat = GEN_FCN (icode) (op[0]);
10146 break;
10147 case 1:
10148 if (nonvoid)
10149 pat = GEN_FCN (icode) (op[0], op[1]);
10150 else
10151 pat = GEN_FCN (icode) (op[1]);
10152 break;
10153 case 2:
10154 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
10155 break;
10156 case 3:
10157 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
10158 break;
10159 default:
10160 gcc_unreachable ();
10161 }
10162
10163 if (!pat)
10164 return NULL_RTX;
10165
10166 emit_insn (pat);
10167
10168 if (nonvoid)
10169 return op[0];
10170 else
10171 return const0_rtx;
10172 }
10173
10174 static int
10175 sparc_vis_mul8x16 (int e8, int e16)
10176 {
10177 return (e8 * e16 + 128) / 256;
10178 }
10179
10180 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
10181 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
10182
10183 static void
10184 sparc_handle_vis_mul8x16 (tree *n_elts, int fncode, tree inner_type,
10185 tree cst0, tree cst1)
10186 {
10187 unsigned i, num = VECTOR_CST_NELTS (cst0);
10188 int scale;
10189
10190 switch (fncode)
10191 {
10192 case CODE_FOR_fmul8x16_vis:
10193 for (i = 0; i < num; ++i)
10194 {
10195 int val
10196 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10197 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
10198 n_elts[i] = build_int_cst (inner_type, val);
10199 }
10200 break;
10201
10202 case CODE_FOR_fmul8x16au_vis:
10203 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
10204
10205 for (i = 0; i < num; ++i)
10206 {
10207 int val
10208 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10209 scale);
10210 n_elts[i] = build_int_cst (inner_type, val);
10211 }
10212 break;
10213
10214 case CODE_FOR_fmul8x16al_vis:
10215 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
10216
10217 for (i = 0; i < num; ++i)
10218 {
10219 int val
10220 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10221 scale);
10222 n_elts[i] = build_int_cst (inner_type, val);
10223 }
10224 break;
10225
10226 default:
10227 gcc_unreachable ();
10228 }
10229 }
10230
10231 /* Handle TARGET_FOLD_BUILTIN target hook.
10232 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
10233 result of the function call is ignored. NULL_TREE is returned if the
10234 function could not be folded. */
10235
10236 static tree
10237 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
10238 tree *args, bool ignore)
10239 {
10240 tree arg0, arg1, arg2;
10241 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
10242 enum insn_code icode = (enum insn_code) DECL_FUNCTION_CODE (fndecl);
10243
10244 if (ignore)
10245 {
10246 /* Note that a switch statement instead of the sequence of tests would
10247 be incorrect as many of the CODE_FOR values could be CODE_FOR_nothing
10248 and that would yield multiple alternatives with identical values. */
10249 if (icode == CODE_FOR_alignaddrsi_vis
10250 || icode == CODE_FOR_alignaddrdi_vis
10251 || icode == CODE_FOR_wrgsr_vis
10252 || icode == CODE_FOR_bmasksi_vis
10253 || icode == CODE_FOR_bmaskdi_vis
10254 || icode == CODE_FOR_cmask8si_vis
10255 || icode == CODE_FOR_cmask8di_vis
10256 || icode == CODE_FOR_cmask16si_vis
10257 || icode == CODE_FOR_cmask16di_vis
10258 || icode == CODE_FOR_cmask32si_vis
10259 || icode == CODE_FOR_cmask32di_vis)
10260 ;
10261 else
10262 return build_zero_cst (rtype);
10263 }
10264
10265 switch (icode)
10266 {
10267 case CODE_FOR_fexpand_vis:
10268 arg0 = args[0];
10269 STRIP_NOPS (arg0);
10270
10271 if (TREE_CODE (arg0) == VECTOR_CST)
10272 {
10273 tree inner_type = TREE_TYPE (rtype);
10274 tree *n_elts;
10275 unsigned i;
10276
10277 n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10278 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10279 n_elts[i] = build_int_cst (inner_type,
10280 TREE_INT_CST_LOW
10281 (VECTOR_CST_ELT (arg0, i)) << 4);
10282 return build_vector (rtype, n_elts);
10283 }
10284 break;
10285
10286 case CODE_FOR_fmul8x16_vis:
10287 case CODE_FOR_fmul8x16au_vis:
10288 case CODE_FOR_fmul8x16al_vis:
10289 arg0 = args[0];
10290 arg1 = args[1];
10291 STRIP_NOPS (arg0);
10292 STRIP_NOPS (arg1);
10293
10294 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10295 {
10296 tree inner_type = TREE_TYPE (rtype);
10297 tree *n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10298 sparc_handle_vis_mul8x16 (n_elts, icode, inner_type, arg0, arg1);
10299 return build_vector (rtype, n_elts);
10300 }
10301 break;
10302
10303 case CODE_FOR_fpmerge_vis:
10304 arg0 = args[0];
10305 arg1 = args[1];
10306 STRIP_NOPS (arg0);
10307 STRIP_NOPS (arg1);
10308
10309 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10310 {
10311 tree *n_elts = XALLOCAVEC (tree, 2 * VECTOR_CST_NELTS (arg0));
10312 unsigned i;
10313 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10314 {
10315 n_elts[2*i] = VECTOR_CST_ELT (arg0, i);
10316 n_elts[2*i+1] = VECTOR_CST_ELT (arg1, i);
10317 }
10318
10319 return build_vector (rtype, n_elts);
10320 }
10321 break;
10322
10323 case CODE_FOR_pdist_vis:
10324 arg0 = args[0];
10325 arg1 = args[1];
10326 arg2 = args[2];
10327 STRIP_NOPS (arg0);
10328 STRIP_NOPS (arg1);
10329 STRIP_NOPS (arg2);
10330
10331 if (TREE_CODE (arg0) == VECTOR_CST
10332 && TREE_CODE (arg1) == VECTOR_CST
10333 && TREE_CODE (arg2) == INTEGER_CST)
10334 {
10335 bool overflow = false;
10336 double_int result = TREE_INT_CST (arg2);
10337 double_int tmp;
10338 unsigned i;
10339
10340 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10341 {
10342 double_int e0 = TREE_INT_CST (VECTOR_CST_ELT (arg0, i));
10343 double_int e1 = TREE_INT_CST (VECTOR_CST_ELT (arg1, i));
10344
10345 bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
10346
10347 tmp = e1.neg_with_overflow (&neg1_ovf);
10348 tmp = e0.add_with_sign (tmp, false, &add1_ovf);
10349 if (tmp.is_negative ())
10350 tmp = tmp.neg_with_overflow (&neg2_ovf);
10351
10352 result = result.add_with_sign (tmp, false, &add2_ovf);
10353 overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
10354 }
10355
10356 gcc_assert (!overflow);
10357
10358 return build_int_cst_wide (rtype, result.low, result.high);
10359 }
10360
10361 default:
10362 break;
10363 }
10364
10365 return NULL_TREE;
10366 }
10367 \f
10368 /* ??? This duplicates information provided to the compiler by the
10369 ??? scheduler description. Some day, teach genautomata to output
10370 ??? the latencies and then CSE will just use that. */
10371
10372 static bool
10373 sparc_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
10374 int *total, bool speed ATTRIBUTE_UNUSED)
10375 {
10376 enum machine_mode mode = GET_MODE (x);
10377 bool float_mode_p = FLOAT_MODE_P (mode);
10378
10379 switch (code)
10380 {
10381 case CONST_INT:
10382 if (INTVAL (x) < 0x1000 && INTVAL (x) >= -0x1000)
10383 {
10384 *total = 0;
10385 return true;
10386 }
10387 /* FALLTHRU */
10388
10389 case HIGH:
10390 *total = 2;
10391 return true;
10392
10393 case CONST:
10394 case LABEL_REF:
10395 case SYMBOL_REF:
10396 *total = 4;
10397 return true;
10398
10399 case CONST_DOUBLE:
10400 if (GET_MODE (x) == VOIDmode
10401 && ((CONST_DOUBLE_HIGH (x) == 0
10402 && CONST_DOUBLE_LOW (x) < 0x1000)
10403 || (CONST_DOUBLE_HIGH (x) == -1
10404 && CONST_DOUBLE_LOW (x) < 0
10405 && CONST_DOUBLE_LOW (x) >= -0x1000)))
10406 *total = 0;
10407 else
10408 *total = 8;
10409 return true;
10410
10411 case MEM:
10412 /* If outer-code was a sign or zero extension, a cost
10413 of COSTS_N_INSNS (1) was already added in. This is
10414 why we are subtracting it back out. */
10415 if (outer_code == ZERO_EXTEND)
10416 {
10417 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
10418 }
10419 else if (outer_code == SIGN_EXTEND)
10420 {
10421 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
10422 }
10423 else if (float_mode_p)
10424 {
10425 *total = sparc_costs->float_load;
10426 }
10427 else
10428 {
10429 *total = sparc_costs->int_load;
10430 }
10431
10432 return true;
10433
10434 case PLUS:
10435 case MINUS:
10436 if (float_mode_p)
10437 *total = sparc_costs->float_plusminus;
10438 else
10439 *total = COSTS_N_INSNS (1);
10440 return false;
10441
10442 case FMA:
10443 {
10444 rtx sub;
10445
10446 gcc_assert (float_mode_p);
10447 *total = sparc_costs->float_mul;
10448
10449 sub = XEXP (x, 0);
10450 if (GET_CODE (sub) == NEG)
10451 sub = XEXP (sub, 0);
10452 *total += rtx_cost (sub, FMA, 0, speed);
10453
10454 sub = XEXP (x, 2);
10455 if (GET_CODE (sub) == NEG)
10456 sub = XEXP (sub, 0);
10457 *total += rtx_cost (sub, FMA, 2, speed);
10458 return true;
10459 }
10460
10461 case MULT:
10462 if (float_mode_p)
10463 *total = sparc_costs->float_mul;
10464 else if (! TARGET_HARD_MUL)
10465 *total = COSTS_N_INSNS (25);
10466 else
10467 {
10468 int bit_cost;
10469
10470 bit_cost = 0;
10471 if (sparc_costs->int_mul_bit_factor)
10472 {
10473 int nbits;
10474
10475 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
10476 {
10477 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
10478 for (nbits = 0; value != 0; value &= value - 1)
10479 nbits++;
10480 }
10481 else if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
10482 && GET_MODE (XEXP (x, 1)) == VOIDmode)
10483 {
10484 rtx x1 = XEXP (x, 1);
10485 unsigned HOST_WIDE_INT value1 = CONST_DOUBLE_LOW (x1);
10486 unsigned HOST_WIDE_INT value2 = CONST_DOUBLE_HIGH (x1);
10487
10488 for (nbits = 0; value1 != 0; value1 &= value1 - 1)
10489 nbits++;
10490 for (; value2 != 0; value2 &= value2 - 1)
10491 nbits++;
10492 }
10493 else
10494 nbits = 7;
10495
10496 if (nbits < 3)
10497 nbits = 3;
10498 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
10499 bit_cost = COSTS_N_INSNS (bit_cost);
10500 }
10501
10502 if (mode == DImode)
10503 *total = sparc_costs->int_mulX + bit_cost;
10504 else
10505 *total = sparc_costs->int_mul + bit_cost;
10506 }
10507 return false;
10508
10509 case ASHIFT:
10510 case ASHIFTRT:
10511 case LSHIFTRT:
10512 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
10513 return false;
10514
10515 case DIV:
10516 case UDIV:
10517 case MOD:
10518 case UMOD:
10519 if (float_mode_p)
10520 {
10521 if (mode == DFmode)
10522 *total = sparc_costs->float_div_df;
10523 else
10524 *total = sparc_costs->float_div_sf;
10525 }
10526 else
10527 {
10528 if (mode == DImode)
10529 *total = sparc_costs->int_divX;
10530 else
10531 *total = sparc_costs->int_div;
10532 }
10533 return false;
10534
10535 case NEG:
10536 if (! float_mode_p)
10537 {
10538 *total = COSTS_N_INSNS (1);
10539 return false;
10540 }
10541 /* FALLTHRU */
10542
10543 case ABS:
10544 case FLOAT:
10545 case UNSIGNED_FLOAT:
10546 case FIX:
10547 case UNSIGNED_FIX:
10548 case FLOAT_EXTEND:
10549 case FLOAT_TRUNCATE:
10550 *total = sparc_costs->float_move;
10551 return false;
10552
10553 case SQRT:
10554 if (mode == DFmode)
10555 *total = sparc_costs->float_sqrt_df;
10556 else
10557 *total = sparc_costs->float_sqrt_sf;
10558 return false;
10559
10560 case COMPARE:
10561 if (float_mode_p)
10562 *total = sparc_costs->float_cmp;
10563 else
10564 *total = COSTS_N_INSNS (1);
10565 return false;
10566
10567 case IF_THEN_ELSE:
10568 if (float_mode_p)
10569 *total = sparc_costs->float_cmove;
10570 else
10571 *total = sparc_costs->int_cmove;
10572 return false;
10573
10574 case IOR:
10575 /* Handle the NAND vector patterns. */
10576 if (sparc_vector_mode_supported_p (GET_MODE (x))
10577 && GET_CODE (XEXP (x, 0)) == NOT
10578 && GET_CODE (XEXP (x, 1)) == NOT)
10579 {
10580 *total = COSTS_N_INSNS (1);
10581 return true;
10582 }
10583 else
10584 return false;
10585
10586 default:
10587 return false;
10588 }
10589 }
10590
10591 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
10592
10593 static inline bool
10594 general_or_i64_p (reg_class_t rclass)
10595 {
10596 return (rclass == GENERAL_REGS || rclass == I64_REGS);
10597 }
10598
10599 /* Implement TARGET_REGISTER_MOVE_COST. */
10600
10601 static int
10602 sparc_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
10603 reg_class_t from, reg_class_t to)
10604 {
10605 bool need_memory = false;
10606
10607 if (from == FPCC_REGS || to == FPCC_REGS)
10608 need_memory = true;
10609 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
10610 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
10611 {
10612 if (TARGET_VIS3)
10613 {
10614 int size = GET_MODE_SIZE (mode);
10615 if (size == 8 || size == 4)
10616 {
10617 if (! TARGET_ARCH32 || size == 4)
10618 return 4;
10619 else
10620 return 6;
10621 }
10622 }
10623 need_memory = true;
10624 }
10625
10626 if (need_memory)
10627 {
10628 if (sparc_cpu == PROCESSOR_ULTRASPARC
10629 || sparc_cpu == PROCESSOR_ULTRASPARC3
10630 || sparc_cpu == PROCESSOR_NIAGARA
10631 || sparc_cpu == PROCESSOR_NIAGARA2
10632 || sparc_cpu == PROCESSOR_NIAGARA3
10633 || sparc_cpu == PROCESSOR_NIAGARA4)
10634 return 12;
10635
10636 return 6;
10637 }
10638
10639 return 2;
10640 }
10641
10642 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
10643 This is achieved by means of a manual dynamic stack space allocation in
10644 the current frame. We make the assumption that SEQ doesn't contain any
10645 function calls, with the possible exception of calls to the GOT helper. */
10646
10647 static void
10648 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
10649 {
10650 /* We must preserve the lowest 16 words for the register save area. */
10651 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
10652 /* We really need only 2 words of fresh stack space. */
10653 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
10654
10655 rtx slot
10656 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
10657 SPARC_STACK_BIAS + offset));
10658
10659 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
10660 emit_insn (gen_rtx_SET (VOIDmode, slot, reg));
10661 if (reg2)
10662 emit_insn (gen_rtx_SET (VOIDmode,
10663 adjust_address (slot, word_mode, UNITS_PER_WORD),
10664 reg2));
10665 emit_insn (seq);
10666 if (reg2)
10667 emit_insn (gen_rtx_SET (VOIDmode,
10668 reg2,
10669 adjust_address (slot, word_mode, UNITS_PER_WORD)));
10670 emit_insn (gen_rtx_SET (VOIDmode, reg, slot));
10671 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
10672 }
10673
10674 /* Output the assembler code for a thunk function. THUNK_DECL is the
10675 declaration for the thunk function itself, FUNCTION is the decl for
10676 the target function. DELTA is an immediate constant offset to be
10677 added to THIS. If VCALL_OFFSET is nonzero, the word at address
10678 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
10679
10680 static void
10681 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
10682 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10683 tree function)
10684 {
10685 rtx this_rtx, insn, funexp;
10686 unsigned int int_arg_first;
10687
10688 reload_completed = 1;
10689 epilogue_completed = 1;
10690
10691 emit_note (NOTE_INSN_PROLOGUE_END);
10692
10693 if (TARGET_FLAT)
10694 {
10695 sparc_leaf_function_p = 1;
10696
10697 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
10698 }
10699 else if (flag_delayed_branch)
10700 {
10701 /* We will emit a regular sibcall below, so we need to instruct
10702 output_sibcall that we are in a leaf function. */
10703 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
10704
10705 /* This will cause final.c to invoke leaf_renumber_regs so we
10706 must behave as if we were in a not-yet-leafified function. */
10707 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
10708 }
10709 else
10710 {
10711 /* We will emit the sibcall manually below, so we will need to
10712 manually spill non-leaf registers. */
10713 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
10714
10715 /* We really are in a leaf function. */
10716 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
10717 }
10718
10719 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
10720 returns a structure, the structure return pointer is there instead. */
10721 if (TARGET_ARCH64
10722 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
10723 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
10724 else
10725 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
10726
10727 /* Add DELTA. When possible use a plain add, otherwise load it into
10728 a register first. */
10729 if (delta)
10730 {
10731 rtx delta_rtx = GEN_INT (delta);
10732
10733 if (! SPARC_SIMM13_P (delta))
10734 {
10735 rtx scratch = gen_rtx_REG (Pmode, 1);
10736 emit_move_insn (scratch, delta_rtx);
10737 delta_rtx = scratch;
10738 }
10739
10740 /* THIS_RTX += DELTA. */
10741 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
10742 }
10743
10744 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
10745 if (vcall_offset)
10746 {
10747 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
10748 rtx scratch = gen_rtx_REG (Pmode, 1);
10749
10750 gcc_assert (vcall_offset < 0);
10751
10752 /* SCRATCH = *THIS_RTX. */
10753 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
10754
10755 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
10756 may not have any available scratch register at this point. */
10757 if (SPARC_SIMM13_P (vcall_offset))
10758 ;
10759 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
10760 else if (! fixed_regs[5]
10761 /* The below sequence is made up of at least 2 insns,
10762 while the default method may need only one. */
10763 && vcall_offset < -8192)
10764 {
10765 rtx scratch2 = gen_rtx_REG (Pmode, 5);
10766 emit_move_insn (scratch2, vcall_offset_rtx);
10767 vcall_offset_rtx = scratch2;
10768 }
10769 else
10770 {
10771 rtx increment = GEN_INT (-4096);
10772
10773 /* VCALL_OFFSET is a negative number whose typical range can be
10774 estimated as -32768..0 in 32-bit mode. In almost all cases
10775 it is therefore cheaper to emit multiple add insns than
10776 spilling and loading the constant into a register (at least
10777 6 insns). */
10778 while (! SPARC_SIMM13_P (vcall_offset))
10779 {
10780 emit_insn (gen_add2_insn (scratch, increment));
10781 vcall_offset += 4096;
10782 }
10783 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
10784 }
10785
10786 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
10787 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
10788 gen_rtx_PLUS (Pmode,
10789 scratch,
10790 vcall_offset_rtx)));
10791
10792 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
10793 emit_insn (gen_add2_insn (this_rtx, scratch));
10794 }
10795
10796 /* Generate a tail call to the target function. */
10797 if (! TREE_USED (function))
10798 {
10799 assemble_external (function);
10800 TREE_USED (function) = 1;
10801 }
10802 funexp = XEXP (DECL_RTL (function), 0);
10803
10804 if (flag_delayed_branch)
10805 {
10806 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
10807 insn = emit_call_insn (gen_sibcall (funexp));
10808 SIBLING_CALL_P (insn) = 1;
10809 }
10810 else
10811 {
10812 /* The hoops we have to jump through in order to generate a sibcall
10813 without using delay slots... */
10814 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
10815
10816 if (flag_pic)
10817 {
10818 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
10819 start_sequence ();
10820 load_got_register (); /* clobbers %o7 */
10821 scratch = sparc_legitimize_pic_address (funexp, scratch);
10822 seq = get_insns ();
10823 end_sequence ();
10824 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
10825 }
10826 else if (TARGET_ARCH32)
10827 {
10828 emit_insn (gen_rtx_SET (VOIDmode,
10829 scratch,
10830 gen_rtx_HIGH (SImode, funexp)));
10831 emit_insn (gen_rtx_SET (VOIDmode,
10832 scratch,
10833 gen_rtx_LO_SUM (SImode, scratch, funexp)));
10834 }
10835 else /* TARGET_ARCH64 */
10836 {
10837 switch (sparc_cmodel)
10838 {
10839 case CM_MEDLOW:
10840 case CM_MEDMID:
10841 /* The destination can serve as a temporary. */
10842 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
10843 break;
10844
10845 case CM_MEDANY:
10846 case CM_EMBMEDANY:
10847 /* The destination cannot serve as a temporary. */
10848 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
10849 start_sequence ();
10850 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
10851 seq = get_insns ();
10852 end_sequence ();
10853 emit_and_preserve (seq, spill_reg, 0);
10854 break;
10855
10856 default:
10857 gcc_unreachable ();
10858 }
10859 }
10860
10861 emit_jump_insn (gen_indirect_jump (scratch));
10862 }
10863
10864 emit_barrier ();
10865
10866 /* Run just enough of rest_of_compilation to get the insns emitted.
10867 There's not really enough bulk here to make other passes such as
10868 instruction scheduling worth while. Note that use_thunk calls
10869 assemble_start_function and assemble_end_function. */
10870 insn = get_insns ();
10871 shorten_branches (insn);
10872 final_start_function (insn, file, 1);
10873 final (insn, file, 1);
10874 final_end_function ();
10875
10876 reload_completed = 0;
10877 epilogue_completed = 0;
10878 }
10879
10880 /* Return true if sparc_output_mi_thunk would be able to output the
10881 assembler code for the thunk function specified by the arguments
10882 it is passed, and false otherwise. */
10883 static bool
10884 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
10885 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
10886 HOST_WIDE_INT vcall_offset,
10887 const_tree function ATTRIBUTE_UNUSED)
10888 {
10889 /* Bound the loop used in the default method above. */
10890 return (vcall_offset >= -32768 || ! fixed_regs[5]);
10891 }
10892
10893 /* We use the machine specific reorg pass to enable workarounds for errata. */
10894
10895 static void
10896 sparc_reorg (void)
10897 {
10898 rtx insn, next;
10899
10900 /* The only erratum we handle for now is that of the AT697F processor. */
10901 if (!sparc_fix_at697f)
10902 return;
10903
10904 /* We need to have the (essentially) final form of the insn stream in order
10905 to properly detect the various hazards. Run delay slot scheduling. */
10906 if (optimize > 0 && flag_delayed_branch)
10907 {
10908 cleanup_barriers ();
10909 dbr_schedule (get_insns ());
10910 }
10911
10912 /* Now look for specific patterns in the insn stream. */
10913 for (insn = get_insns (); insn; insn = next)
10914 {
10915 bool insert_nop = false;
10916 rtx set;
10917
10918 /* Look for a single-word load into an odd-numbered FP register. */
10919 if (NONJUMP_INSN_P (insn)
10920 && (set = single_set (insn)) != NULL_RTX
10921 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
10922 && MEM_P (SET_SRC (set))
10923 && REG_P (SET_DEST (set))
10924 && REGNO (SET_DEST (set)) > 31
10925 && REGNO (SET_DEST (set)) % 2 != 0)
10926 {
10927 /* The wrong dependency is on the enclosing double register. */
10928 unsigned int x = REGNO (SET_DEST (set)) - 1;
10929 unsigned int src1, src2, dest;
10930 int code;
10931
10932 /* If the insn has a delay slot, then it cannot be problematic. */
10933 next = next_active_insn (insn);
10934 if (NONJUMP_INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE)
10935 code = -1;
10936 else
10937 {
10938 extract_insn (next);
10939 code = INSN_CODE (next);
10940 }
10941
10942 switch (code)
10943 {
10944 case CODE_FOR_adddf3:
10945 case CODE_FOR_subdf3:
10946 case CODE_FOR_muldf3:
10947 case CODE_FOR_divdf3:
10948 dest = REGNO (recog_data.operand[0]);
10949 src1 = REGNO (recog_data.operand[1]);
10950 src2 = REGNO (recog_data.operand[2]);
10951 if (src1 != src2)
10952 {
10953 /* Case [1-4]:
10954 ld [address], %fx+1
10955 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
10956 if ((src1 == x || src2 == x)
10957 && (dest == src1 || dest == src2))
10958 insert_nop = true;
10959 }
10960 else
10961 {
10962 /* Case 5:
10963 ld [address], %fx+1
10964 FPOPd %fx, %fx, %fx */
10965 if (src1 == x
10966 && dest == src1
10967 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
10968 insert_nop = true;
10969 }
10970 break;
10971
10972 case CODE_FOR_sqrtdf2:
10973 dest = REGNO (recog_data.operand[0]);
10974 src1 = REGNO (recog_data.operand[1]);
10975 /* Case 6:
10976 ld [address], %fx+1
10977 fsqrtd %fx, %fx */
10978 if (src1 == x && dest == src1)
10979 insert_nop = true;
10980 break;
10981
10982 default:
10983 break;
10984 }
10985 }
10986 else
10987 next = NEXT_INSN (insn);
10988
10989 if (insert_nop)
10990 emit_insn_after (gen_nop (), insn);
10991 }
10992 }
10993
10994 /* How to allocate a 'struct machine_function'. */
10995
10996 static struct machine_function *
10997 sparc_init_machine_status (void)
10998 {
10999 return ggc_alloc_cleared_machine_function ();
11000 }
11001
11002 /* Locate some local-dynamic symbol still in use by this function
11003 so that we can print its name in local-dynamic base patterns. */
11004
11005 static const char *
11006 get_some_local_dynamic_name (void)
11007 {
11008 rtx insn;
11009
11010 if (cfun->machine->some_ld_name)
11011 return cfun->machine->some_ld_name;
11012
11013 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
11014 if (INSN_P (insn)
11015 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
11016 return cfun->machine->some_ld_name;
11017
11018 gcc_unreachable ();
11019 }
11020
11021 static int
11022 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
11023 {
11024 rtx x = *px;
11025
11026 if (x
11027 && GET_CODE (x) == SYMBOL_REF
11028 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
11029 {
11030 cfun->machine->some_ld_name = XSTR (x, 0);
11031 return 1;
11032 }
11033
11034 return 0;
11035 }
11036
11037 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11038 We need to emit DTP-relative relocations. */
11039
11040 static void
11041 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
11042 {
11043 switch (size)
11044 {
11045 case 4:
11046 fputs ("\t.word\t%r_tls_dtpoff32(", file);
11047 break;
11048 case 8:
11049 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
11050 break;
11051 default:
11052 gcc_unreachable ();
11053 }
11054 output_addr_const (file, x);
11055 fputs (")", file);
11056 }
11057
11058 /* Do whatever processing is required at the end of a file. */
11059
11060 static void
11061 sparc_file_end (void)
11062 {
11063 /* If we need to emit the special GOT helper function, do so now. */
11064 if (got_helper_rtx)
11065 {
11066 const char *name = XSTR (got_helper_rtx, 0);
11067 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
11068 #ifdef DWARF2_UNWIND_INFO
11069 bool do_cfi;
11070 #endif
11071
11072 if (USE_HIDDEN_LINKONCE)
11073 {
11074 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
11075 get_identifier (name),
11076 build_function_type_list (void_type_node,
11077 NULL_TREE));
11078 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
11079 NULL_TREE, void_type_node);
11080 TREE_PUBLIC (decl) = 1;
11081 TREE_STATIC (decl) = 1;
11082 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
11083 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
11084 DECL_VISIBILITY_SPECIFIED (decl) = 1;
11085 resolve_unique_section (decl, 0, flag_function_sections);
11086 allocate_struct_function (decl, true);
11087 cfun->is_thunk = 1;
11088 current_function_decl = decl;
11089 init_varasm_status ();
11090 assemble_start_function (decl, name);
11091 }
11092 else
11093 {
11094 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
11095 switch_to_section (text_section);
11096 if (align > 0)
11097 ASM_OUTPUT_ALIGN (asm_out_file, align);
11098 ASM_OUTPUT_LABEL (asm_out_file, name);
11099 }
11100
11101 #ifdef DWARF2_UNWIND_INFO
11102 do_cfi = dwarf2out_do_cfi_asm ();
11103 if (do_cfi)
11104 fprintf (asm_out_file, "\t.cfi_startproc\n");
11105 #endif
11106 if (flag_delayed_branch)
11107 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
11108 reg_name, reg_name);
11109 else
11110 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
11111 reg_name, reg_name);
11112 #ifdef DWARF2_UNWIND_INFO
11113 if (do_cfi)
11114 fprintf (asm_out_file, "\t.cfi_endproc\n");
11115 #endif
11116 }
11117
11118 if (NEED_INDICATE_EXEC_STACK)
11119 file_end_indicate_exec_stack ();
11120
11121 #ifdef TARGET_SOLARIS
11122 solaris_file_end ();
11123 #endif
11124 }
11125
11126 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
11127 /* Implement TARGET_MANGLE_TYPE. */
11128
11129 static const char *
11130 sparc_mangle_type (const_tree type)
11131 {
11132 if (!TARGET_64BIT
11133 && TYPE_MAIN_VARIANT (type) == long_double_type_node
11134 && TARGET_LONG_DOUBLE_128)
11135 return "g";
11136
11137 /* For all other types, use normal C++ mangling. */
11138 return NULL;
11139 }
11140 #endif
11141
11142 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
11143 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
11144 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
11145
11146 void
11147 sparc_emit_membar_for_model (enum memmodel model,
11148 int load_store, int before_after)
11149 {
11150 /* Bits for the MEMBAR mmask field. */
11151 const int LoadLoad = 1;
11152 const int StoreLoad = 2;
11153 const int LoadStore = 4;
11154 const int StoreStore = 8;
11155
11156 int mm = 0, implied = 0;
11157
11158 switch (sparc_memory_model)
11159 {
11160 case SMM_SC:
11161 /* Sequential Consistency. All memory transactions are immediately
11162 visible in sequential execution order. No barriers needed. */
11163 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
11164 break;
11165
11166 case SMM_TSO:
11167 /* Total Store Ordering: all memory transactions with store semantics
11168 are followed by an implied StoreStore. */
11169 implied |= StoreStore;
11170 /* FALLTHRU */
11171
11172 case SMM_PSO:
11173 /* Partial Store Ordering: all memory transactions with load semantics
11174 are followed by an implied LoadLoad | LoadStore. */
11175 implied |= LoadLoad | LoadStore;
11176
11177 /* If we're not looking for a raw barrer (before+after), then atomic
11178 operations get the benefit of being both load and store. */
11179 if (load_store == 3 && before_after == 2)
11180 implied |= StoreLoad | StoreStore;
11181 /* FALLTHRU */
11182
11183 case SMM_RMO:
11184 /* Relaxed Memory Ordering: no implicit bits. */
11185 break;
11186
11187 default:
11188 gcc_unreachable ();
11189 }
11190
11191 if (before_after & 1)
11192 {
11193 if (model == MEMMODEL_RELEASE
11194 || model == MEMMODEL_ACQ_REL
11195 || model == MEMMODEL_SEQ_CST)
11196 {
11197 if (load_store & 1)
11198 mm |= LoadLoad | StoreLoad;
11199 if (load_store & 2)
11200 mm |= LoadStore | StoreStore;
11201 }
11202 }
11203 if (before_after & 2)
11204 {
11205 if (model == MEMMODEL_ACQUIRE
11206 || model == MEMMODEL_ACQ_REL
11207 || model == MEMMODEL_SEQ_CST)
11208 {
11209 if (load_store & 1)
11210 mm |= LoadLoad | LoadStore;
11211 if (load_store & 2)
11212 mm |= StoreLoad | StoreStore;
11213 }
11214 }
11215
11216 /* Remove the bits implied by the system memory model. */
11217 mm &= ~implied;
11218
11219 /* For raw barriers (before+after), always emit a barrier.
11220 This will become a compile-time barrier if needed. */
11221 if (mm || before_after == 3)
11222 emit_insn (gen_membar (GEN_INT (mm)));
11223 }
11224
11225 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
11226 compare and swap on the word containing the byte or half-word. */
11227
11228 static void
11229 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
11230 rtx oldval, rtx newval)
11231 {
11232 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
11233 rtx addr = gen_reg_rtx (Pmode);
11234 rtx off = gen_reg_rtx (SImode);
11235 rtx oldv = gen_reg_rtx (SImode);
11236 rtx newv = gen_reg_rtx (SImode);
11237 rtx oldvalue = gen_reg_rtx (SImode);
11238 rtx newvalue = gen_reg_rtx (SImode);
11239 rtx res = gen_reg_rtx (SImode);
11240 rtx resv = gen_reg_rtx (SImode);
11241 rtx memsi, val, mask, end_label, loop_label, cc;
11242
11243 emit_insn (gen_rtx_SET (VOIDmode, addr,
11244 gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
11245
11246 if (Pmode != SImode)
11247 addr1 = gen_lowpart (SImode, addr1);
11248 emit_insn (gen_rtx_SET (VOIDmode, off,
11249 gen_rtx_AND (SImode, addr1, GEN_INT (3))));
11250
11251 memsi = gen_rtx_MEM (SImode, addr);
11252 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
11253 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
11254
11255 val = copy_to_reg (memsi);
11256
11257 emit_insn (gen_rtx_SET (VOIDmode, off,
11258 gen_rtx_XOR (SImode, off,
11259 GEN_INT (GET_MODE (mem) == QImode
11260 ? 3 : 2))));
11261
11262 emit_insn (gen_rtx_SET (VOIDmode, off,
11263 gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
11264
11265 if (GET_MODE (mem) == QImode)
11266 mask = force_reg (SImode, GEN_INT (0xff));
11267 else
11268 mask = force_reg (SImode, GEN_INT (0xffff));
11269
11270 emit_insn (gen_rtx_SET (VOIDmode, mask,
11271 gen_rtx_ASHIFT (SImode, mask, off)));
11272
11273 emit_insn (gen_rtx_SET (VOIDmode, val,
11274 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11275 val)));
11276
11277 oldval = gen_lowpart (SImode, oldval);
11278 emit_insn (gen_rtx_SET (VOIDmode, oldv,
11279 gen_rtx_ASHIFT (SImode, oldval, off)));
11280
11281 newval = gen_lowpart_common (SImode, newval);
11282 emit_insn (gen_rtx_SET (VOIDmode, newv,
11283 gen_rtx_ASHIFT (SImode, newval, off)));
11284
11285 emit_insn (gen_rtx_SET (VOIDmode, oldv,
11286 gen_rtx_AND (SImode, oldv, mask)));
11287
11288 emit_insn (gen_rtx_SET (VOIDmode, newv,
11289 gen_rtx_AND (SImode, newv, mask)));
11290
11291 end_label = gen_label_rtx ();
11292 loop_label = gen_label_rtx ();
11293 emit_label (loop_label);
11294
11295 emit_insn (gen_rtx_SET (VOIDmode, oldvalue,
11296 gen_rtx_IOR (SImode, oldv, val)));
11297
11298 emit_insn (gen_rtx_SET (VOIDmode, newvalue,
11299 gen_rtx_IOR (SImode, newv, val)));
11300
11301 emit_move_insn (bool_result, const1_rtx);
11302
11303 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
11304
11305 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
11306
11307 emit_insn (gen_rtx_SET (VOIDmode, resv,
11308 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11309 res)));
11310
11311 emit_move_insn (bool_result, const0_rtx);
11312
11313 cc = gen_compare_reg_1 (NE, resv, val);
11314 emit_insn (gen_rtx_SET (VOIDmode, val, resv));
11315
11316 /* Use cbranchcc4 to separate the compare and branch! */
11317 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
11318 cc, const0_rtx, loop_label));
11319
11320 emit_label (end_label);
11321
11322 emit_insn (gen_rtx_SET (VOIDmode, res,
11323 gen_rtx_AND (SImode, res, mask)));
11324
11325 emit_insn (gen_rtx_SET (VOIDmode, res,
11326 gen_rtx_LSHIFTRT (SImode, res, off)));
11327
11328 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
11329 }
11330
11331 /* Expand code to perform a compare-and-swap. */
11332
11333 void
11334 sparc_expand_compare_and_swap (rtx operands[])
11335 {
11336 rtx bval, retval, mem, oldval, newval;
11337 enum machine_mode mode;
11338 enum memmodel model;
11339
11340 bval = operands[0];
11341 retval = operands[1];
11342 mem = operands[2];
11343 oldval = operands[3];
11344 newval = operands[4];
11345 model = (enum memmodel) INTVAL (operands[6]);
11346 mode = GET_MODE (mem);
11347
11348 sparc_emit_membar_for_model (model, 3, 1);
11349
11350 if (reg_overlap_mentioned_p (retval, oldval))
11351 oldval = copy_to_reg (oldval);
11352
11353 if (mode == QImode || mode == HImode)
11354 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
11355 else
11356 {
11357 rtx (*gen) (rtx, rtx, rtx, rtx);
11358 rtx x;
11359
11360 if (mode == SImode)
11361 gen = gen_atomic_compare_and_swapsi_1;
11362 else
11363 gen = gen_atomic_compare_and_swapdi_1;
11364 emit_insn (gen (retval, mem, oldval, newval));
11365
11366 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
11367 if (x != bval)
11368 convert_move (bval, x, 1);
11369 }
11370
11371 sparc_emit_membar_for_model (model, 3, 2);
11372 }
11373
11374 void
11375 sparc_expand_vec_perm_bmask (enum machine_mode vmode, rtx sel)
11376 {
11377 rtx t_1, t_2, t_3;
11378
11379 sel = gen_lowpart (DImode, sel);
11380 switch (vmode)
11381 {
11382 case V2SImode:
11383 /* inp = xxxxxxxAxxxxxxxB */
11384 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11385 NULL_RTX, 1, OPTAB_DIRECT);
11386 /* t_1 = ....xxxxxxxAxxx. */
11387 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11388 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
11389 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11390 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
11391 /* sel = .......B */
11392 /* t_1 = ...A.... */
11393 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11394 /* sel = ...A...B */
11395 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
11396 /* sel = AAAABBBB * 4 */
11397 t_1 = force_reg (SImode, GEN_INT (0x01230123));
11398 /* sel = { A*4, A*4+1, A*4+2, ... } */
11399 break;
11400
11401 case V4HImode:
11402 /* inp = xxxAxxxBxxxCxxxD */
11403 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11404 NULL_RTX, 1, OPTAB_DIRECT);
11405 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11406 NULL_RTX, 1, OPTAB_DIRECT);
11407 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
11408 NULL_RTX, 1, OPTAB_DIRECT);
11409 /* t_1 = ..xxxAxxxBxxxCxx */
11410 /* t_2 = ....xxxAxxxBxxxC */
11411 /* t_3 = ......xxxAxxxBxx */
11412 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11413 GEN_INT (0x07),
11414 NULL_RTX, 1, OPTAB_DIRECT);
11415 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11416 GEN_INT (0x0700),
11417 NULL_RTX, 1, OPTAB_DIRECT);
11418 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
11419 GEN_INT (0x070000),
11420 NULL_RTX, 1, OPTAB_DIRECT);
11421 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
11422 GEN_INT (0x07000000),
11423 NULL_RTX, 1, OPTAB_DIRECT);
11424 /* sel = .......D */
11425 /* t_1 = .....C.. */
11426 /* t_2 = ...B.... */
11427 /* t_3 = .A...... */
11428 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11429 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
11430 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
11431 /* sel = .A.B.C.D */
11432 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
11433 /* sel = AABBCCDD * 2 */
11434 t_1 = force_reg (SImode, GEN_INT (0x01010101));
11435 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
11436 break;
11437
11438 case V8QImode:
11439 /* input = xAxBxCxDxExFxGxH */
11440 sel = expand_simple_binop (DImode, AND, sel,
11441 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
11442 | 0x0f0f0f0f),
11443 NULL_RTX, 1, OPTAB_DIRECT);
11444 /* sel = .A.B.C.D.E.F.G.H */
11445 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
11446 NULL_RTX, 1, OPTAB_DIRECT);
11447 /* t_1 = ..A.B.C.D.E.F.G. */
11448 sel = expand_simple_binop (DImode, IOR, sel, t_1,
11449 NULL_RTX, 1, OPTAB_DIRECT);
11450 /* sel = .AABBCCDDEEFFGGH */
11451 sel = expand_simple_binop (DImode, AND, sel,
11452 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
11453 | 0xff00ff),
11454 NULL_RTX, 1, OPTAB_DIRECT);
11455 /* sel = ..AB..CD..EF..GH */
11456 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11457 NULL_RTX, 1, OPTAB_DIRECT);
11458 /* t_1 = ....AB..CD..EF.. */
11459 sel = expand_simple_binop (DImode, IOR, sel, t_1,
11460 NULL_RTX, 1, OPTAB_DIRECT);
11461 /* sel = ..ABABCDCDEFEFGH */
11462 sel = expand_simple_binop (DImode, AND, sel,
11463 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
11464 NULL_RTX, 1, OPTAB_DIRECT);
11465 /* sel = ....ABCD....EFGH */
11466 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11467 NULL_RTX, 1, OPTAB_DIRECT);
11468 /* t_1 = ........ABCD.... */
11469 sel = gen_lowpart (SImode, sel);
11470 t_1 = gen_lowpart (SImode, t_1);
11471 break;
11472
11473 default:
11474 gcc_unreachable ();
11475 }
11476
11477 /* Always perform the final addition/merge within the bmask insn. */
11478 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
11479 }
11480
11481 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
11482
11483 static bool
11484 sparc_frame_pointer_required (void)
11485 {
11486 /* If the stack pointer is dynamically modified in the function, it cannot
11487 serve as the frame pointer. */
11488 if (cfun->calls_alloca)
11489 return true;
11490
11491 /* If the function receives nonlocal gotos, it needs to save the frame
11492 pointer in the nonlocal_goto_save_area object. */
11493 if (cfun->has_nonlocal_label)
11494 return true;
11495
11496 /* In flat mode, that's it. */
11497 if (TARGET_FLAT)
11498 return false;
11499
11500 /* Otherwise, the frame pointer is required if the function isn't leaf. */
11501 return !(crtl->is_leaf && only_leaf_regs_used ());
11502 }
11503
11504 /* The way this is structured, we can't eliminate SFP in favor of SP
11505 if the frame pointer is required: we want to use the SFP->HFP elimination
11506 in that case. But the test in update_eliminables doesn't know we are
11507 assuming below that we only do the former elimination. */
11508
11509 static bool
11510 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
11511 {
11512 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
11513 }
11514
11515 /* Return the hard frame pointer directly to bypass the stack bias. */
11516
11517 static rtx
11518 sparc_builtin_setjmp_frame_value (void)
11519 {
11520 return hard_frame_pointer_rtx;
11521 }
11522
11523 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
11524 they won't be allocated. */
11525
11526 static void
11527 sparc_conditional_register_usage (void)
11528 {
11529 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
11530 {
11531 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11532 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11533 }
11534 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
11535 /* then honor it. */
11536 if (TARGET_ARCH32 && fixed_regs[5])
11537 fixed_regs[5] = 1;
11538 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
11539 fixed_regs[5] = 0;
11540 if (! TARGET_V9)
11541 {
11542 int regno;
11543 for (regno = SPARC_FIRST_V9_FP_REG;
11544 regno <= SPARC_LAST_V9_FP_REG;
11545 regno++)
11546 fixed_regs[regno] = 1;
11547 /* %fcc0 is used by v8 and v9. */
11548 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
11549 regno <= SPARC_LAST_V9_FCC_REG;
11550 regno++)
11551 fixed_regs[regno] = 1;
11552 }
11553 if (! TARGET_FPU)
11554 {
11555 int regno;
11556 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
11557 fixed_regs[regno] = 1;
11558 }
11559 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
11560 /* then honor it. Likewise with g3 and g4. */
11561 if (fixed_regs[2] == 2)
11562 fixed_regs[2] = ! TARGET_APP_REGS;
11563 if (fixed_regs[3] == 2)
11564 fixed_regs[3] = ! TARGET_APP_REGS;
11565 if (TARGET_ARCH32 && fixed_regs[4] == 2)
11566 fixed_regs[4] = ! TARGET_APP_REGS;
11567 else if (TARGET_CM_EMBMEDANY)
11568 fixed_regs[4] = 1;
11569 else if (fixed_regs[4] == 2)
11570 fixed_regs[4] = 0;
11571 if (TARGET_FLAT)
11572 {
11573 int regno;
11574 /* Disable leaf functions. */
11575 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
11576 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11577 leaf_reg_remap [regno] = regno;
11578 }
11579 if (TARGET_VIS)
11580 global_regs[SPARC_GSR_REG] = 1;
11581 }
11582
11583 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
11584
11585 - We can't load constants into FP registers.
11586 - We can't load FP constants into integer registers when soft-float,
11587 because there is no soft-float pattern with a r/F constraint.
11588 - We can't load FP constants into integer registers for TFmode unless
11589 it is 0.0L, because there is no movtf pattern with a r/F constraint.
11590 - Try and reload integer constants (symbolic or otherwise) back into
11591 registers directly, rather than having them dumped to memory. */
11592
11593 static reg_class_t
11594 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
11595 {
11596 enum machine_mode mode = GET_MODE (x);
11597 if (CONSTANT_P (x))
11598 {
11599 if (FP_REG_CLASS_P (rclass)
11600 || rclass == GENERAL_OR_FP_REGS
11601 || rclass == GENERAL_OR_EXTRA_FP_REGS
11602 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
11603 || (mode == TFmode && ! const_zero_operand (x, mode)))
11604 return NO_REGS;
11605
11606 if (GET_MODE_CLASS (mode) == MODE_INT)
11607 return GENERAL_REGS;
11608
11609 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
11610 {
11611 if (! FP_REG_CLASS_P (rclass)
11612 || !(const_zero_operand (x, mode)
11613 || const_all_ones_operand (x, mode)))
11614 return NO_REGS;
11615 }
11616 }
11617
11618 if (TARGET_VIS3
11619 && ! TARGET_ARCH64
11620 && (rclass == EXTRA_FP_REGS
11621 || rclass == GENERAL_OR_EXTRA_FP_REGS))
11622 {
11623 int regno = true_regnum (x);
11624
11625 if (SPARC_INT_REG_P (regno))
11626 return (rclass == EXTRA_FP_REGS
11627 ? FP_REGS : GENERAL_OR_FP_REGS);
11628 }
11629
11630 return rclass;
11631 }
11632
11633 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
11634 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
11635
11636 const char *
11637 output_v8plus_mult (rtx insn, rtx *operands, const char *opcode)
11638 {
11639 char mulstr[32];
11640
11641 gcc_assert (! TARGET_ARCH64);
11642
11643 if (sparc_check_64 (operands[1], insn) <= 0)
11644 output_asm_insn ("srl\t%L1, 0, %L1", operands);
11645 if (which_alternative == 1)
11646 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
11647 if (GET_CODE (operands[2]) == CONST_INT)
11648 {
11649 if (which_alternative == 1)
11650 {
11651 output_asm_insn ("or\t%L1, %H1, %H1", operands);
11652 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
11653 output_asm_insn (mulstr, operands);
11654 return "srlx\t%L0, 32, %H0";
11655 }
11656 else
11657 {
11658 output_asm_insn ("sllx\t%H1, 32, %3", operands);
11659 output_asm_insn ("or\t%L1, %3, %3", operands);
11660 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
11661 output_asm_insn (mulstr, operands);
11662 output_asm_insn ("srlx\t%3, 32, %H0", operands);
11663 return "mov\t%3, %L0";
11664 }
11665 }
11666 else if (rtx_equal_p (operands[1], operands[2]))
11667 {
11668 if (which_alternative == 1)
11669 {
11670 output_asm_insn ("or\t%L1, %H1, %H1", operands);
11671 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
11672 output_asm_insn (mulstr, operands);
11673 return "srlx\t%L0, 32, %H0";
11674 }
11675 else
11676 {
11677 output_asm_insn ("sllx\t%H1, 32, %3", operands);
11678 output_asm_insn ("or\t%L1, %3, %3", operands);
11679 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
11680 output_asm_insn (mulstr, operands);
11681 output_asm_insn ("srlx\t%3, 32, %H0", operands);
11682 return "mov\t%3, %L0";
11683 }
11684 }
11685 if (sparc_check_64 (operands[2], insn) <= 0)
11686 output_asm_insn ("srl\t%L2, 0, %L2", operands);
11687 if (which_alternative == 1)
11688 {
11689 output_asm_insn ("or\t%L1, %H1, %H1", operands);
11690 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
11691 output_asm_insn ("or\t%L2, %L1, %L1", operands);
11692 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
11693 output_asm_insn (mulstr, operands);
11694 return "srlx\t%L0, 32, %H0";
11695 }
11696 else
11697 {
11698 output_asm_insn ("sllx\t%H1, 32, %3", operands);
11699 output_asm_insn ("sllx\t%H2, 32, %4", operands);
11700 output_asm_insn ("or\t%L1, %3, %3", operands);
11701 output_asm_insn ("or\t%L2, %4, %4", operands);
11702 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
11703 output_asm_insn (mulstr, operands);
11704 output_asm_insn ("srlx\t%3, 32, %H0", operands);
11705 return "mov\t%3, %L0";
11706 }
11707 }
11708
11709 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
11710 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
11711 and INNER_MODE are the modes describing TARGET. */
11712
11713 static void
11714 vector_init_bshuffle (rtx target, rtx elt, enum machine_mode mode,
11715 enum machine_mode inner_mode)
11716 {
11717 rtx t1, final_insn;
11718 int bmask;
11719
11720 t1 = gen_reg_rtx (mode);
11721
11722 elt = convert_modes (SImode, inner_mode, elt, true);
11723 emit_move_insn (gen_lowpart(SImode, t1), elt);
11724
11725 switch (mode)
11726 {
11727 case V2SImode:
11728 final_insn = gen_bshufflev2si_vis (target, t1, t1);
11729 bmask = 0x45674567;
11730 break;
11731 case V4HImode:
11732 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
11733 bmask = 0x67676767;
11734 break;
11735 case V8QImode:
11736 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
11737 bmask = 0x77777777;
11738 break;
11739 default:
11740 gcc_unreachable ();
11741 }
11742
11743 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), CONST0_RTX (SImode),
11744 force_reg (SImode, GEN_INT (bmask))));
11745 emit_insn (final_insn);
11746 }
11747
11748 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
11749 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
11750
11751 static void
11752 vector_init_fpmerge (rtx target, rtx elt)
11753 {
11754 rtx t1, t2, t2_low, t3, t3_low;
11755
11756 t1 = gen_reg_rtx (V4QImode);
11757 elt = convert_modes (SImode, QImode, elt, true);
11758 emit_move_insn (gen_lowpart (SImode, t1), elt);
11759
11760 t2 = gen_reg_rtx (V8QImode);
11761 t2_low = gen_lowpart (V4QImode, t2);
11762 emit_insn (gen_fpmerge_vis (t2, t1, t1));
11763
11764 t3 = gen_reg_rtx (V8QImode);
11765 t3_low = gen_lowpart (V4QImode, t3);
11766 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
11767
11768 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
11769 }
11770
11771 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
11772 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
11773
11774 static void
11775 vector_init_faligndata (rtx target, rtx elt)
11776 {
11777 rtx t1 = gen_reg_rtx (V4HImode);
11778 int i;
11779
11780 elt = convert_modes (SImode, HImode, elt, true);
11781 emit_move_insn (gen_lowpart (SImode, t1), elt);
11782
11783 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
11784 force_reg (SImode, GEN_INT (6)),
11785 const0_rtx));
11786
11787 for (i = 0; i < 4; i++)
11788 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
11789 }
11790
11791 /* Emit code to initialize TARGET to values for individual fields VALS. */
11792
11793 void
11794 sparc_expand_vector_init (rtx target, rtx vals)
11795 {
11796 const enum machine_mode mode = GET_MODE (target);
11797 const enum machine_mode inner_mode = GET_MODE_INNER (mode);
11798 const int n_elts = GET_MODE_NUNITS (mode);
11799 int i, n_var = 0;
11800 bool all_same;
11801 rtx mem;
11802
11803 all_same = true;
11804 for (i = 0; i < n_elts; i++)
11805 {
11806 rtx x = XVECEXP (vals, 0, i);
11807 if (!CONSTANT_P (x))
11808 n_var++;
11809
11810 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
11811 all_same = false;
11812 }
11813
11814 if (n_var == 0)
11815 {
11816 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
11817 return;
11818 }
11819
11820 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
11821 {
11822 if (GET_MODE_SIZE (inner_mode) == 4)
11823 {
11824 emit_move_insn (gen_lowpart (SImode, target),
11825 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
11826 return;
11827 }
11828 else if (GET_MODE_SIZE (inner_mode) == 8)
11829 {
11830 emit_move_insn (gen_lowpart (DImode, target),
11831 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
11832 return;
11833 }
11834 }
11835 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
11836 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
11837 {
11838 emit_move_insn (gen_highpart (word_mode, target),
11839 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
11840 emit_move_insn (gen_lowpart (word_mode, target),
11841 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
11842 return;
11843 }
11844
11845 if (all_same && GET_MODE_SIZE (mode) == 8)
11846 {
11847 if (TARGET_VIS2)
11848 {
11849 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
11850 return;
11851 }
11852 if (mode == V8QImode)
11853 {
11854 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
11855 return;
11856 }
11857 if (mode == V4HImode)
11858 {
11859 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
11860 return;
11861 }
11862 }
11863
11864 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
11865 for (i = 0; i < n_elts; i++)
11866 emit_move_insn (adjust_address_nv (mem, inner_mode,
11867 i * GET_MODE_SIZE (inner_mode)),
11868 XVECEXP (vals, 0, i));
11869 emit_move_insn (target, mem);
11870 }
11871
11872 /* Implement TARGET_SECONDARY_RELOAD. */
11873
11874 static reg_class_t
11875 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
11876 enum machine_mode mode, secondary_reload_info *sri)
11877 {
11878 enum reg_class rclass = (enum reg_class) rclass_i;
11879
11880 sri->icode = CODE_FOR_nothing;
11881 sri->extra_cost = 0;
11882
11883 /* We need a temporary when loading/storing a HImode/QImode value
11884 between memory and the FPU registers. This can happen when combine puts
11885 a paradoxical subreg in a float/fix conversion insn. */
11886 if (FP_REG_CLASS_P (rclass)
11887 && (mode == HImode || mode == QImode)
11888 && (GET_CODE (x) == MEM
11889 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
11890 && true_regnum (x) == -1)))
11891 return GENERAL_REGS;
11892
11893 /* On 32-bit we need a temporary when loading/storing a DFmode value
11894 between unaligned memory and the upper FPU registers. */
11895 if (TARGET_ARCH32
11896 && rclass == EXTRA_FP_REGS
11897 && mode == DFmode
11898 && GET_CODE (x) == MEM
11899 && ! mem_min_alignment (x, 8))
11900 return FP_REGS;
11901
11902 if (((TARGET_CM_MEDANY
11903 && symbolic_operand (x, mode))
11904 || (TARGET_CM_EMBMEDANY
11905 && text_segment_operand (x, mode)))
11906 && ! flag_pic)
11907 {
11908 if (in_p)
11909 sri->icode = direct_optab_handler (reload_in_optab, mode);
11910 else
11911 sri->icode = direct_optab_handler (reload_out_optab, mode);
11912 return NO_REGS;
11913 }
11914
11915 if (TARGET_VIS3 && TARGET_ARCH32)
11916 {
11917 int regno = true_regnum (x);
11918
11919 /* When using VIS3 fp<-->int register moves, on 32-bit we have
11920 to move 8-byte values in 4-byte pieces. This only works via
11921 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
11922 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
11923 an FP_REGS intermediate move. */
11924 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
11925 || ((general_or_i64_p (rclass)
11926 || rclass == GENERAL_OR_FP_REGS)
11927 && SPARC_FP_REG_P (regno)))
11928 {
11929 sri->extra_cost = 2;
11930 return FP_REGS;
11931 }
11932 }
11933
11934 return NO_REGS;
11935 }
11936
11937 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
11938 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
11939
11940 bool
11941 sparc_expand_conditional_move (enum machine_mode mode, rtx *operands)
11942 {
11943 enum rtx_code rc = GET_CODE (operands[1]);
11944 enum machine_mode cmp_mode;
11945 rtx cc_reg, dst, cmp;
11946
11947 cmp = operands[1];
11948 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
11949 return false;
11950
11951 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
11952 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
11953
11954 cmp_mode = GET_MODE (XEXP (cmp, 0));
11955 rc = GET_CODE (cmp);
11956
11957 dst = operands[0];
11958 if (! rtx_equal_p (operands[2], dst)
11959 && ! rtx_equal_p (operands[3], dst))
11960 {
11961 if (reg_overlap_mentioned_p (dst, cmp))
11962 dst = gen_reg_rtx (mode);
11963
11964 emit_move_insn (dst, operands[3]);
11965 }
11966 else if (operands[2] == dst)
11967 {
11968 operands[2] = operands[3];
11969
11970 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
11971 rc = reverse_condition_maybe_unordered (rc);
11972 else
11973 rc = reverse_condition (rc);
11974 }
11975
11976 if (XEXP (cmp, 1) == const0_rtx
11977 && GET_CODE (XEXP (cmp, 0)) == REG
11978 && cmp_mode == DImode
11979 && v9_regcmp_p (rc))
11980 cc_reg = XEXP (cmp, 0);
11981 else
11982 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
11983
11984 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
11985
11986 emit_insn (gen_rtx_SET (VOIDmode, dst,
11987 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
11988
11989 if (dst != operands[0])
11990 emit_move_insn (operands[0], dst);
11991
11992 return true;
11993 }
11994
11995 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
11996 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
11997 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
11998 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
11999 code to be used for the condition mask. */
12000
12001 void
12002 sparc_expand_vcond (enum machine_mode mode, rtx *operands, int ccode, int fcode)
12003 {
12004 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
12005 enum rtx_code code = GET_CODE (operands[3]);
12006
12007 mask = gen_reg_rtx (Pmode);
12008 cop0 = operands[4];
12009 cop1 = operands[5];
12010 if (code == LT || code == GE)
12011 {
12012 rtx t;
12013
12014 code = swap_condition (code);
12015 t = cop0; cop0 = cop1; cop1 = t;
12016 }
12017
12018 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
12019
12020 fcmp = gen_rtx_UNSPEC (Pmode,
12021 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
12022 fcode);
12023
12024 cmask = gen_rtx_UNSPEC (DImode,
12025 gen_rtvec (2, mask, gsr),
12026 ccode);
12027
12028 bshuf = gen_rtx_UNSPEC (mode,
12029 gen_rtvec (3, operands[1], operands[2], gsr),
12030 UNSPEC_BSHUFFLE);
12031
12032 emit_insn (gen_rtx_SET (VOIDmode, mask, fcmp));
12033 emit_insn (gen_rtx_SET (VOIDmode, gsr, cmask));
12034
12035 emit_insn (gen_rtx_SET (VOIDmode, operands[0], bshuf));
12036 }
12037
12038 /* On sparc, any mode which naturally allocates into the float
12039 registers should return 4 here. */
12040
12041 unsigned int
12042 sparc_regmode_natural_size (enum machine_mode mode)
12043 {
12044 int size = UNITS_PER_WORD;
12045
12046 if (TARGET_ARCH64)
12047 {
12048 enum mode_class mclass = GET_MODE_CLASS (mode);
12049
12050 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
12051 size = 4;
12052 }
12053
12054 return size;
12055 }
12056
12057 /* Return TRUE if it is a good idea to tie two pseudo registers
12058 when one has mode MODE1 and one has mode MODE2.
12059 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
12060 for any hard reg, then this must be FALSE for correct output.
12061
12062 For V9 we have to deal with the fact that only the lower 32 floating
12063 point registers are 32-bit addressable. */
12064
12065 bool
12066 sparc_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
12067 {
12068 enum mode_class mclass1, mclass2;
12069 unsigned short size1, size2;
12070
12071 if (mode1 == mode2)
12072 return true;
12073
12074 mclass1 = GET_MODE_CLASS (mode1);
12075 mclass2 = GET_MODE_CLASS (mode2);
12076 if (mclass1 != mclass2)
12077 return false;
12078
12079 if (! TARGET_V9)
12080 return true;
12081
12082 /* Classes are the same and we are V9 so we have to deal with upper
12083 vs. lower floating point registers. If one of the modes is a
12084 4-byte mode, and the other is not, we have to mark them as not
12085 tieable because only the lower 32 floating point register are
12086 addressable 32-bits at a time.
12087
12088 We can't just test explicitly for SFmode, otherwise we won't
12089 cover the vector mode cases properly. */
12090
12091 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
12092 return true;
12093
12094 size1 = GET_MODE_SIZE (mode1);
12095 size2 = GET_MODE_SIZE (mode2);
12096 if ((size1 > 4 && size2 == 4)
12097 || (size2 > 4 && size1 == 4))
12098 return false;
12099
12100 return true;
12101 }
12102
12103 #include "gt-sparc.h"