1f2a27ab1cebe424e2c96e80eb15abe2e4af0fd8
[gcc.git] / gcc / config / sparc / sparc.c
1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
4 2011
5 Free Software Foundation, Inc.
6 Contributed by Michael Tiemann (tiemann@cygnus.com)
7 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
8 at Cygnus Support.
9
10 This file is part of GCC.
11
12 GCC is free software; you can redistribute it and/or modify
13 it under the terms of the GNU General Public License as published by
14 the Free Software Foundation; either version 3, or (at your option)
15 any later version.
16
17 GCC is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
21
22 You should have received a copy of the GNU General Public License
23 along with GCC; see the file COPYING3. If not see
24 <http://www.gnu.org/licenses/>. */
25
26 #include "config.h"
27 #include "system.h"
28 #include "coretypes.h"
29 #include "tm.h"
30 #include "tree.h"
31 #include "rtl.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "insn-codes.h"
36 #include "conditions.h"
37 #include "output.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "function.h"
41 #include "except.h"
42 #include "expr.h"
43 #include "optabs.h"
44 #include "recog.h"
45 #include "diagnostic-core.h"
46 #include "ggc.h"
47 #include "tm_p.h"
48 #include "debug.h"
49 #include "target.h"
50 #include "target-def.h"
51 #include "common/common-target.h"
52 #include "cfglayout.h"
53 #include "gimple.h"
54 #include "langhooks.h"
55 #include "reload.h"
56 #include "params.h"
57 #include "df.h"
58 #include "dwarf2out.h"
59 #include "opts.h"
60
61 /* Processor costs */
62
63 struct processor_costs {
64 /* Integer load */
65 const int int_load;
66
67 /* Integer signed load */
68 const int int_sload;
69
70 /* Integer zeroed load */
71 const int int_zload;
72
73 /* Float load */
74 const int float_load;
75
76 /* fmov, fneg, fabs */
77 const int float_move;
78
79 /* fadd, fsub */
80 const int float_plusminus;
81
82 /* fcmp */
83 const int float_cmp;
84
85 /* fmov, fmovr */
86 const int float_cmove;
87
88 /* fmul */
89 const int float_mul;
90
91 /* fdivs */
92 const int float_div_sf;
93
94 /* fdivd */
95 const int float_div_df;
96
97 /* fsqrts */
98 const int float_sqrt_sf;
99
100 /* fsqrtd */
101 const int float_sqrt_df;
102
103 /* umul/smul */
104 const int int_mul;
105
106 /* mulX */
107 const int int_mulX;
108
109 /* integer multiply cost for each bit set past the most
110 significant 3, so the formula for multiply cost becomes:
111
112 if (rs1 < 0)
113 highest_bit = highest_clear_bit(rs1);
114 else
115 highest_bit = highest_set_bit(rs1);
116 if (highest_bit < 3)
117 highest_bit = 3;
118 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
119
120 A value of zero indicates that the multiply costs is fixed,
121 and not variable. */
122 const int int_mul_bit_factor;
123
124 /* udiv/sdiv */
125 const int int_div;
126
127 /* divX */
128 const int int_divX;
129
130 /* movcc, movr */
131 const int int_cmove;
132
133 /* penalty for shifts, due to scheduling rules etc. */
134 const int shift_penalty;
135 };
136
137 static const
138 struct processor_costs cypress_costs = {
139 COSTS_N_INSNS (2), /* int load */
140 COSTS_N_INSNS (2), /* int signed load */
141 COSTS_N_INSNS (2), /* int zeroed load */
142 COSTS_N_INSNS (2), /* float load */
143 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
144 COSTS_N_INSNS (5), /* fadd, fsub */
145 COSTS_N_INSNS (1), /* fcmp */
146 COSTS_N_INSNS (1), /* fmov, fmovr */
147 COSTS_N_INSNS (7), /* fmul */
148 COSTS_N_INSNS (37), /* fdivs */
149 COSTS_N_INSNS (37), /* fdivd */
150 COSTS_N_INSNS (63), /* fsqrts */
151 COSTS_N_INSNS (63), /* fsqrtd */
152 COSTS_N_INSNS (1), /* imul */
153 COSTS_N_INSNS (1), /* imulX */
154 0, /* imul bit factor */
155 COSTS_N_INSNS (1), /* idiv */
156 COSTS_N_INSNS (1), /* idivX */
157 COSTS_N_INSNS (1), /* movcc/movr */
158 0, /* shift penalty */
159 };
160
161 static const
162 struct processor_costs supersparc_costs = {
163 COSTS_N_INSNS (1), /* int load */
164 COSTS_N_INSNS (1), /* int signed load */
165 COSTS_N_INSNS (1), /* int zeroed load */
166 COSTS_N_INSNS (0), /* float load */
167 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
168 COSTS_N_INSNS (3), /* fadd, fsub */
169 COSTS_N_INSNS (3), /* fcmp */
170 COSTS_N_INSNS (1), /* fmov, fmovr */
171 COSTS_N_INSNS (3), /* fmul */
172 COSTS_N_INSNS (6), /* fdivs */
173 COSTS_N_INSNS (9), /* fdivd */
174 COSTS_N_INSNS (12), /* fsqrts */
175 COSTS_N_INSNS (12), /* fsqrtd */
176 COSTS_N_INSNS (4), /* imul */
177 COSTS_N_INSNS (4), /* imulX */
178 0, /* imul bit factor */
179 COSTS_N_INSNS (4), /* idiv */
180 COSTS_N_INSNS (4), /* idivX */
181 COSTS_N_INSNS (1), /* movcc/movr */
182 1, /* shift penalty */
183 };
184
185 static const
186 struct processor_costs hypersparc_costs = {
187 COSTS_N_INSNS (1), /* int load */
188 COSTS_N_INSNS (1), /* int signed load */
189 COSTS_N_INSNS (1), /* int zeroed load */
190 COSTS_N_INSNS (1), /* float load */
191 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
192 COSTS_N_INSNS (1), /* fadd, fsub */
193 COSTS_N_INSNS (1), /* fcmp */
194 COSTS_N_INSNS (1), /* fmov, fmovr */
195 COSTS_N_INSNS (1), /* fmul */
196 COSTS_N_INSNS (8), /* fdivs */
197 COSTS_N_INSNS (12), /* fdivd */
198 COSTS_N_INSNS (17), /* fsqrts */
199 COSTS_N_INSNS (17), /* fsqrtd */
200 COSTS_N_INSNS (17), /* imul */
201 COSTS_N_INSNS (17), /* imulX */
202 0, /* imul bit factor */
203 COSTS_N_INSNS (17), /* idiv */
204 COSTS_N_INSNS (17), /* idivX */
205 COSTS_N_INSNS (1), /* movcc/movr */
206 0, /* shift penalty */
207 };
208
209 static const
210 struct processor_costs leon_costs = {
211 COSTS_N_INSNS (1), /* int load */
212 COSTS_N_INSNS (1), /* int signed load */
213 COSTS_N_INSNS (1), /* int zeroed load */
214 COSTS_N_INSNS (1), /* float load */
215 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
216 COSTS_N_INSNS (1), /* fadd, fsub */
217 COSTS_N_INSNS (1), /* fcmp */
218 COSTS_N_INSNS (1), /* fmov, fmovr */
219 COSTS_N_INSNS (1), /* fmul */
220 COSTS_N_INSNS (15), /* fdivs */
221 COSTS_N_INSNS (15), /* fdivd */
222 COSTS_N_INSNS (23), /* fsqrts */
223 COSTS_N_INSNS (23), /* fsqrtd */
224 COSTS_N_INSNS (5), /* imul */
225 COSTS_N_INSNS (5), /* imulX */
226 0, /* imul bit factor */
227 COSTS_N_INSNS (5), /* idiv */
228 COSTS_N_INSNS (5), /* idivX */
229 COSTS_N_INSNS (1), /* movcc/movr */
230 0, /* shift penalty */
231 };
232
233 static const
234 struct processor_costs sparclet_costs = {
235 COSTS_N_INSNS (3), /* int load */
236 COSTS_N_INSNS (3), /* int signed load */
237 COSTS_N_INSNS (1), /* int zeroed load */
238 COSTS_N_INSNS (1), /* float load */
239 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
240 COSTS_N_INSNS (1), /* fadd, fsub */
241 COSTS_N_INSNS (1), /* fcmp */
242 COSTS_N_INSNS (1), /* fmov, fmovr */
243 COSTS_N_INSNS (1), /* fmul */
244 COSTS_N_INSNS (1), /* fdivs */
245 COSTS_N_INSNS (1), /* fdivd */
246 COSTS_N_INSNS (1), /* fsqrts */
247 COSTS_N_INSNS (1), /* fsqrtd */
248 COSTS_N_INSNS (5), /* imul */
249 COSTS_N_INSNS (5), /* imulX */
250 0, /* imul bit factor */
251 COSTS_N_INSNS (5), /* idiv */
252 COSTS_N_INSNS (5), /* idivX */
253 COSTS_N_INSNS (1), /* movcc/movr */
254 0, /* shift penalty */
255 };
256
257 static const
258 struct processor_costs ultrasparc_costs = {
259 COSTS_N_INSNS (2), /* int load */
260 COSTS_N_INSNS (3), /* int signed load */
261 COSTS_N_INSNS (2), /* int zeroed load */
262 COSTS_N_INSNS (2), /* float load */
263 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
264 COSTS_N_INSNS (4), /* fadd, fsub */
265 COSTS_N_INSNS (1), /* fcmp */
266 COSTS_N_INSNS (2), /* fmov, fmovr */
267 COSTS_N_INSNS (4), /* fmul */
268 COSTS_N_INSNS (13), /* fdivs */
269 COSTS_N_INSNS (23), /* fdivd */
270 COSTS_N_INSNS (13), /* fsqrts */
271 COSTS_N_INSNS (23), /* fsqrtd */
272 COSTS_N_INSNS (4), /* imul */
273 COSTS_N_INSNS (4), /* imulX */
274 2, /* imul bit factor */
275 COSTS_N_INSNS (37), /* idiv */
276 COSTS_N_INSNS (68), /* idivX */
277 COSTS_N_INSNS (2), /* movcc/movr */
278 2, /* shift penalty */
279 };
280
281 static const
282 struct processor_costs ultrasparc3_costs = {
283 COSTS_N_INSNS (2), /* int load */
284 COSTS_N_INSNS (3), /* int signed load */
285 COSTS_N_INSNS (3), /* int zeroed load */
286 COSTS_N_INSNS (2), /* float load */
287 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
288 COSTS_N_INSNS (4), /* fadd, fsub */
289 COSTS_N_INSNS (5), /* fcmp */
290 COSTS_N_INSNS (3), /* fmov, fmovr */
291 COSTS_N_INSNS (4), /* fmul */
292 COSTS_N_INSNS (17), /* fdivs */
293 COSTS_N_INSNS (20), /* fdivd */
294 COSTS_N_INSNS (20), /* fsqrts */
295 COSTS_N_INSNS (29), /* fsqrtd */
296 COSTS_N_INSNS (6), /* imul */
297 COSTS_N_INSNS (6), /* imulX */
298 0, /* imul bit factor */
299 COSTS_N_INSNS (40), /* idiv */
300 COSTS_N_INSNS (71), /* idivX */
301 COSTS_N_INSNS (2), /* movcc/movr */
302 0, /* shift penalty */
303 };
304
305 static const
306 struct processor_costs niagara_costs = {
307 COSTS_N_INSNS (3), /* int load */
308 COSTS_N_INSNS (3), /* int signed load */
309 COSTS_N_INSNS (3), /* int zeroed load */
310 COSTS_N_INSNS (9), /* float load */
311 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
312 COSTS_N_INSNS (8), /* fadd, fsub */
313 COSTS_N_INSNS (26), /* fcmp */
314 COSTS_N_INSNS (8), /* fmov, fmovr */
315 COSTS_N_INSNS (29), /* fmul */
316 COSTS_N_INSNS (54), /* fdivs */
317 COSTS_N_INSNS (83), /* fdivd */
318 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
319 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
320 COSTS_N_INSNS (11), /* imul */
321 COSTS_N_INSNS (11), /* imulX */
322 0, /* imul bit factor */
323 COSTS_N_INSNS (72), /* idiv */
324 COSTS_N_INSNS (72), /* idivX */
325 COSTS_N_INSNS (1), /* movcc/movr */
326 0, /* shift penalty */
327 };
328
329 static const
330 struct processor_costs niagara2_costs = {
331 COSTS_N_INSNS (3), /* int load */
332 COSTS_N_INSNS (3), /* int signed load */
333 COSTS_N_INSNS (3), /* int zeroed load */
334 COSTS_N_INSNS (3), /* float load */
335 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
336 COSTS_N_INSNS (6), /* fadd, fsub */
337 COSTS_N_INSNS (6), /* fcmp */
338 COSTS_N_INSNS (6), /* fmov, fmovr */
339 COSTS_N_INSNS (6), /* fmul */
340 COSTS_N_INSNS (19), /* fdivs */
341 COSTS_N_INSNS (33), /* fdivd */
342 COSTS_N_INSNS (19), /* fsqrts */
343 COSTS_N_INSNS (33), /* fsqrtd */
344 COSTS_N_INSNS (5), /* imul */
345 COSTS_N_INSNS (5), /* imulX */
346 0, /* imul bit factor */
347 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
348 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
349 COSTS_N_INSNS (1), /* movcc/movr */
350 0, /* shift penalty */
351 };
352
353 static const
354 struct processor_costs niagara3_costs = {
355 COSTS_N_INSNS (3), /* int load */
356 COSTS_N_INSNS (3), /* int signed load */
357 COSTS_N_INSNS (3), /* int zeroed load */
358 COSTS_N_INSNS (3), /* float load */
359 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
360 COSTS_N_INSNS (9), /* fadd, fsub */
361 COSTS_N_INSNS (9), /* fcmp */
362 COSTS_N_INSNS (9), /* fmov, fmovr */
363 COSTS_N_INSNS (9), /* fmul */
364 COSTS_N_INSNS (23), /* fdivs */
365 COSTS_N_INSNS (37), /* fdivd */
366 COSTS_N_INSNS (23), /* fsqrts */
367 COSTS_N_INSNS (37), /* fsqrtd */
368 COSTS_N_INSNS (9), /* imul */
369 COSTS_N_INSNS (9), /* imulX */
370 0, /* imul bit factor */
371 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
372 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
373 COSTS_N_INSNS (1), /* movcc/movr */
374 0, /* shift penalty */
375 };
376
377 static const struct processor_costs *sparc_costs = &cypress_costs;
378
379 #ifdef HAVE_AS_RELAX_OPTION
380 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
381 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
382 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
383 somebody does not branch between the sethi and jmp. */
384 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
385 #else
386 #define LEAF_SIBCALL_SLOT_RESERVED_P \
387 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
388 #endif
389
390 /* Vector to say how input registers are mapped to output registers.
391 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
392 eliminate it. You must use -fomit-frame-pointer to get that. */
393 char leaf_reg_remap[] =
394 { 0, 1, 2, 3, 4, 5, 6, 7,
395 -1, -1, -1, -1, -1, -1, 14, -1,
396 -1, -1, -1, -1, -1, -1, -1, -1,
397 8, 9, 10, 11, 12, 13, -1, 15,
398
399 32, 33, 34, 35, 36, 37, 38, 39,
400 40, 41, 42, 43, 44, 45, 46, 47,
401 48, 49, 50, 51, 52, 53, 54, 55,
402 56, 57, 58, 59, 60, 61, 62, 63,
403 64, 65, 66, 67, 68, 69, 70, 71,
404 72, 73, 74, 75, 76, 77, 78, 79,
405 80, 81, 82, 83, 84, 85, 86, 87,
406 88, 89, 90, 91, 92, 93, 94, 95,
407 96, 97, 98, 99, 100, 101, 102};
408
409 /* Vector, indexed by hard register number, which contains 1
410 for a register that is allowable in a candidate for leaf
411 function treatment. */
412 char sparc_leaf_regs[] =
413 { 1, 1, 1, 1, 1, 1, 1, 1,
414 0, 0, 0, 0, 0, 0, 1, 0,
415 0, 0, 0, 0, 0, 0, 0, 0,
416 1, 1, 1, 1, 1, 1, 0, 1,
417 1, 1, 1, 1, 1, 1, 1, 1,
418 1, 1, 1, 1, 1, 1, 1, 1,
419 1, 1, 1, 1, 1, 1, 1, 1,
420 1, 1, 1, 1, 1, 1, 1, 1,
421 1, 1, 1, 1, 1, 1, 1, 1,
422 1, 1, 1, 1, 1, 1, 1, 1,
423 1, 1, 1, 1, 1, 1, 1, 1,
424 1, 1, 1, 1, 1, 1, 1, 1,
425 1, 1, 1, 1, 1, 1, 1};
426
427 struct GTY(()) machine_function
428 {
429 /* Size of the frame of the function. */
430 HOST_WIDE_INT frame_size;
431
432 /* Size of the frame of the function minus the register window save area
433 and the outgoing argument area. */
434 HOST_WIDE_INT apparent_frame_size;
435
436 /* Register we pretend the frame pointer is allocated to. Normally, this
437 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
438 record "offset" separately as it may be too big for (reg + disp). */
439 rtx frame_base_reg;
440 HOST_WIDE_INT frame_base_offset;
441
442 /* Some local-dynamic TLS symbol name. */
443 const char *some_ld_name;
444
445 /* Number of global or FP registers to be saved (as 4-byte quantities). */
446 int n_global_fp_regs;
447
448 /* True if the current function is leaf and uses only leaf regs,
449 so that the SPARC leaf function optimization can be applied.
450 Private version of current_function_uses_only_leaf_regs, see
451 sparc_expand_prologue for the rationale. */
452 int leaf_function_p;
453
454 /* True if the prologue saves local or in registers. */
455 bool save_local_in_regs_p;
456
457 /* True if the data calculated by sparc_expand_prologue are valid. */
458 bool prologue_data_valid_p;
459 };
460
461 #define sparc_frame_size cfun->machine->frame_size
462 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
463 #define sparc_frame_base_reg cfun->machine->frame_base_reg
464 #define sparc_frame_base_offset cfun->machine->frame_base_offset
465 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
466 #define sparc_leaf_function_p cfun->machine->leaf_function_p
467 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
468 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
469
470 /* 1 if the next opcode is to be specially indented. */
471 int sparc_indent_opcode = 0;
472
473 static void sparc_option_override (void);
474 static void sparc_init_modes (void);
475 static void scan_record_type (const_tree, int *, int *, int *);
476 static int function_arg_slotno (const CUMULATIVE_ARGS *, enum machine_mode,
477 const_tree, bool, bool, int *, int *);
478
479 static int supersparc_adjust_cost (rtx, rtx, rtx, int);
480 static int hypersparc_adjust_cost (rtx, rtx, rtx, int);
481
482 static void sparc_emit_set_const32 (rtx, rtx);
483 static void sparc_emit_set_const64 (rtx, rtx);
484 static void sparc_output_addr_vec (rtx);
485 static void sparc_output_addr_diff_vec (rtx);
486 static void sparc_output_deferred_case_vectors (void);
487 static bool sparc_legitimate_address_p (enum machine_mode, rtx, bool);
488 static bool sparc_legitimate_constant_p (enum machine_mode, rtx);
489 static rtx sparc_builtin_saveregs (void);
490 static int epilogue_renumber (rtx *, int);
491 static bool sparc_assemble_integer (rtx, unsigned int, int);
492 static int set_extends (rtx);
493 static void sparc_asm_function_prologue (FILE *, HOST_WIDE_INT);
494 static void sparc_asm_function_epilogue (FILE *, HOST_WIDE_INT);
495 #ifdef TARGET_SOLARIS
496 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
497 tree) ATTRIBUTE_UNUSED;
498 #endif
499 static int sparc_adjust_cost (rtx, rtx, rtx, int);
500 static int sparc_issue_rate (void);
501 static void sparc_sched_init (FILE *, int, int);
502 static int sparc_use_sched_lookahead (void);
503
504 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
505 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
506 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
507 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
508 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
509
510 static bool sparc_function_ok_for_sibcall (tree, tree);
511 static void sparc_init_libfuncs (void);
512 static void sparc_init_builtins (void);
513 static void sparc_vis_init_builtins (void);
514 static rtx sparc_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
515 static tree sparc_fold_builtin (tree, int, tree *, bool);
516 static int sparc_vis_mul8x16 (int, int);
517 static tree sparc_handle_vis_mul8x16 (int, tree, tree, tree);
518 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
519 HOST_WIDE_INT, tree);
520 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
521 HOST_WIDE_INT, const_tree);
522 static void sparc_reorg (void);
523 static struct machine_function * sparc_init_machine_status (void);
524 static bool sparc_cannot_force_const_mem (enum machine_mode, rtx);
525 static rtx sparc_tls_get_addr (void);
526 static rtx sparc_tls_got (void);
527 static const char *get_some_local_dynamic_name (void);
528 static int get_some_local_dynamic_name_1 (rtx *, void *);
529 static int sparc_register_move_cost (enum machine_mode,
530 reg_class_t, reg_class_t);
531 static bool sparc_rtx_costs (rtx, int, int, int, int *, bool);
532 static rtx sparc_function_value (const_tree, const_tree, bool);
533 static rtx sparc_libcall_value (enum machine_mode, const_rtx);
534 static bool sparc_function_value_regno_p (const unsigned int);
535 static rtx sparc_struct_value_rtx (tree, int);
536 static enum machine_mode sparc_promote_function_mode (const_tree, enum machine_mode,
537 int *, const_tree, int);
538 static bool sparc_return_in_memory (const_tree, const_tree);
539 static bool sparc_strict_argument_naming (cumulative_args_t);
540 static void sparc_va_start (tree, rtx);
541 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
542 static bool sparc_vector_mode_supported_p (enum machine_mode);
543 static bool sparc_tls_referenced_p (rtx);
544 static rtx sparc_legitimize_tls_address (rtx);
545 static rtx sparc_legitimize_pic_address (rtx, rtx);
546 static rtx sparc_legitimize_address (rtx, rtx, enum machine_mode);
547 static rtx sparc_delegitimize_address (rtx);
548 static bool sparc_mode_dependent_address_p (const_rtx);
549 static bool sparc_pass_by_reference (cumulative_args_t,
550 enum machine_mode, const_tree, bool);
551 static void sparc_function_arg_advance (cumulative_args_t,
552 enum machine_mode, const_tree, bool);
553 static rtx sparc_function_arg_1 (cumulative_args_t,
554 enum machine_mode, const_tree, bool, bool);
555 static rtx sparc_function_arg (cumulative_args_t,
556 enum machine_mode, const_tree, bool);
557 static rtx sparc_function_incoming_arg (cumulative_args_t,
558 enum machine_mode, const_tree, bool);
559 static unsigned int sparc_function_arg_boundary (enum machine_mode,
560 const_tree);
561 static int sparc_arg_partial_bytes (cumulative_args_t,
562 enum machine_mode, tree, bool);
563 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
564 static void sparc_file_end (void);
565 static bool sparc_frame_pointer_required (void);
566 static bool sparc_can_eliminate (const int, const int);
567 static rtx sparc_builtin_setjmp_frame_value (void);
568 static void sparc_conditional_register_usage (void);
569 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
570 static const char *sparc_mangle_type (const_tree);
571 #endif
572 static void sparc_trampoline_init (rtx, tree, rtx);
573 static enum machine_mode sparc_preferred_simd_mode (enum machine_mode);
574 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
575 static bool sparc_print_operand_punct_valid_p (unsigned char);
576 static void sparc_print_operand (FILE *, rtx, int);
577 static void sparc_print_operand_address (FILE *, rtx);
578 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
579 enum machine_mode,
580 secondary_reload_info *);
581 \f
582 #ifdef SUBTARGET_ATTRIBUTE_TABLE
583 /* Table of valid machine attributes. */
584 static const struct attribute_spec sparc_attribute_table[] =
585 {
586 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
587 do_diagnostic } */
588 SUBTARGET_ATTRIBUTE_TABLE,
589 { NULL, 0, 0, false, false, false, NULL, false }
590 };
591 #endif
592 \f
593 /* Option handling. */
594
595 /* Parsed value. */
596 enum cmodel sparc_cmodel;
597
598 char sparc_hard_reg_printed[8];
599
600 /* Initialize the GCC target structure. */
601
602 /* The default is to use .half rather than .short for aligned HI objects. */
603 #undef TARGET_ASM_ALIGNED_HI_OP
604 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
605
606 #undef TARGET_ASM_UNALIGNED_HI_OP
607 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
608 #undef TARGET_ASM_UNALIGNED_SI_OP
609 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
610 #undef TARGET_ASM_UNALIGNED_DI_OP
611 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
612
613 /* The target hook has to handle DI-mode values. */
614 #undef TARGET_ASM_INTEGER
615 #define TARGET_ASM_INTEGER sparc_assemble_integer
616
617 #undef TARGET_ASM_FUNCTION_PROLOGUE
618 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
619 #undef TARGET_ASM_FUNCTION_EPILOGUE
620 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
621
622 #undef TARGET_SCHED_ADJUST_COST
623 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
624 #undef TARGET_SCHED_ISSUE_RATE
625 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
626 #undef TARGET_SCHED_INIT
627 #define TARGET_SCHED_INIT sparc_sched_init
628 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
629 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
630
631 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
632 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
633
634 #undef TARGET_INIT_LIBFUNCS
635 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
636 #undef TARGET_INIT_BUILTINS
637 #define TARGET_INIT_BUILTINS sparc_init_builtins
638
639 #undef TARGET_LEGITIMIZE_ADDRESS
640 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
641 #undef TARGET_DELEGITIMIZE_ADDRESS
642 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
643 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
644 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
645
646 #undef TARGET_EXPAND_BUILTIN
647 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
648 #undef TARGET_FOLD_BUILTIN
649 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
650
651 #if TARGET_TLS
652 #undef TARGET_HAVE_TLS
653 #define TARGET_HAVE_TLS true
654 #endif
655
656 #undef TARGET_CANNOT_FORCE_CONST_MEM
657 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
658
659 #undef TARGET_ASM_OUTPUT_MI_THUNK
660 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
661 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
662 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
663
664 #undef TARGET_MACHINE_DEPENDENT_REORG
665 #define TARGET_MACHINE_DEPENDENT_REORG sparc_reorg
666
667 #undef TARGET_RTX_COSTS
668 #define TARGET_RTX_COSTS sparc_rtx_costs
669 #undef TARGET_ADDRESS_COST
670 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0
671 #undef TARGET_REGISTER_MOVE_COST
672 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
673
674 #undef TARGET_PROMOTE_FUNCTION_MODE
675 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
676
677 #undef TARGET_FUNCTION_VALUE
678 #define TARGET_FUNCTION_VALUE sparc_function_value
679 #undef TARGET_LIBCALL_VALUE
680 #define TARGET_LIBCALL_VALUE sparc_libcall_value
681 #undef TARGET_FUNCTION_VALUE_REGNO_P
682 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
683
684 #undef TARGET_STRUCT_VALUE_RTX
685 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
686 #undef TARGET_RETURN_IN_MEMORY
687 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
688 #undef TARGET_MUST_PASS_IN_STACK
689 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
690 #undef TARGET_PASS_BY_REFERENCE
691 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
692 #undef TARGET_ARG_PARTIAL_BYTES
693 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
694 #undef TARGET_FUNCTION_ARG_ADVANCE
695 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
696 #undef TARGET_FUNCTION_ARG
697 #define TARGET_FUNCTION_ARG sparc_function_arg
698 #undef TARGET_FUNCTION_INCOMING_ARG
699 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
700 #undef TARGET_FUNCTION_ARG_BOUNDARY
701 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
702
703 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
704 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
705 #undef TARGET_STRICT_ARGUMENT_NAMING
706 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
707
708 #undef TARGET_EXPAND_BUILTIN_VA_START
709 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
710 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
711 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
712
713 #undef TARGET_VECTOR_MODE_SUPPORTED_P
714 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
715
716 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
717 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
718
719 #ifdef SUBTARGET_INSERT_ATTRIBUTES
720 #undef TARGET_INSERT_ATTRIBUTES
721 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
722 #endif
723
724 #ifdef SUBTARGET_ATTRIBUTE_TABLE
725 #undef TARGET_ATTRIBUTE_TABLE
726 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
727 #endif
728
729 #undef TARGET_RELAXED_ORDERING
730 #define TARGET_RELAXED_ORDERING SPARC_RELAXED_ORDERING
731
732 #undef TARGET_OPTION_OVERRIDE
733 #define TARGET_OPTION_OVERRIDE sparc_option_override
734
735 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
736 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
737 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
738 #endif
739
740 #undef TARGET_ASM_FILE_END
741 #define TARGET_ASM_FILE_END sparc_file_end
742
743 #undef TARGET_FRAME_POINTER_REQUIRED
744 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
745
746 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
747 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
748
749 #undef TARGET_CAN_ELIMINATE
750 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
751
752 #undef TARGET_PREFERRED_RELOAD_CLASS
753 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
754
755 #undef TARGET_SECONDARY_RELOAD
756 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
757
758 #undef TARGET_CONDITIONAL_REGISTER_USAGE
759 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
760
761 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
762 #undef TARGET_MANGLE_TYPE
763 #define TARGET_MANGLE_TYPE sparc_mangle_type
764 #endif
765
766 #undef TARGET_LEGITIMATE_ADDRESS_P
767 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
768
769 #undef TARGET_LEGITIMATE_CONSTANT_P
770 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
771
772 #undef TARGET_TRAMPOLINE_INIT
773 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
774
775 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
776 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
777 #undef TARGET_PRINT_OPERAND
778 #define TARGET_PRINT_OPERAND sparc_print_operand
779 #undef TARGET_PRINT_OPERAND_ADDRESS
780 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
781
782 struct gcc_target targetm = TARGET_INITIALIZER;
783
784 static void
785 dump_target_flag_bits (const int flags)
786 {
787 if (flags & MASK_64BIT)
788 fprintf (stderr, "64BIT ");
789 if (flags & MASK_APP_REGS)
790 fprintf (stderr, "APP_REGS ");
791 if (flags & MASK_FASTER_STRUCTS)
792 fprintf (stderr, "FASTER_STRUCTS ");
793 if (flags & MASK_FLAT)
794 fprintf (stderr, "FLAT ");
795 if (flags & MASK_FMAF)
796 fprintf (stderr, "FMAF ");
797 if (flags & MASK_FPU)
798 fprintf (stderr, "FPU ");
799 if (flags & MASK_HARD_QUAD)
800 fprintf (stderr, "HARD_QUAD ");
801 if (flags & MASK_POPC)
802 fprintf (stderr, "POPC ");
803 if (flags & MASK_PTR64)
804 fprintf (stderr, "PTR64 ");
805 if (flags & MASK_STACK_BIAS)
806 fprintf (stderr, "STACK_BIAS ");
807 if (flags & MASK_UNALIGNED_DOUBLES)
808 fprintf (stderr, "UNALIGNED_DOUBLES ");
809 if (flags & MASK_V8PLUS)
810 fprintf (stderr, "V8PLUS ");
811 if (flags & MASK_VIS)
812 fprintf (stderr, "VIS ");
813 if (flags & MASK_VIS2)
814 fprintf (stderr, "VIS2 ");
815 if (flags & MASK_VIS3)
816 fprintf (stderr, "VIS3 ");
817 if (flags & MASK_DEPRECATED_V8_INSNS)
818 fprintf (stderr, "DEPRECATED_V8_INSNS ");
819 if (flags & MASK_SPARCLET)
820 fprintf (stderr, "SPARCLET ");
821 if (flags & MASK_SPARCLITE)
822 fprintf (stderr, "SPARCLITE ");
823 if (flags & MASK_V8)
824 fprintf (stderr, "V8 ");
825 if (flags & MASK_V9)
826 fprintf (stderr, "V9 ");
827 }
828
829 static void
830 dump_target_flags (const char *prefix, const int flags)
831 {
832 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
833 dump_target_flag_bits (flags);
834 fprintf(stderr, "]\n");
835 }
836
837 /* Validate and override various options, and do some machine dependent
838 initialization. */
839
840 static void
841 sparc_option_override (void)
842 {
843 static struct code_model {
844 const char *const name;
845 const enum cmodel value;
846 } const cmodels[] = {
847 { "32", CM_32 },
848 { "medlow", CM_MEDLOW },
849 { "medmid", CM_MEDMID },
850 { "medany", CM_MEDANY },
851 { "embmedany", CM_EMBMEDANY },
852 { NULL, (enum cmodel) 0 }
853 };
854 const struct code_model *cmodel;
855 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
856 static struct cpu_default {
857 const int cpu;
858 const enum processor_type processor;
859 } const cpu_default[] = {
860 /* There must be one entry here for each TARGET_CPU value. */
861 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
862 { TARGET_CPU_v8, PROCESSOR_V8 },
863 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
864 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
865 { TARGET_CPU_leon, PROCESSOR_LEON },
866 { TARGET_CPU_sparclite, PROCESSOR_F930 },
867 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
868 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
869 { TARGET_CPU_v9, PROCESSOR_V9 },
870 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
871 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
872 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
873 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
874 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
875 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
876 { -1, PROCESSOR_V7 }
877 };
878 const struct cpu_default *def;
879 /* Table of values for -m{cpu,tune}=. This must match the order of
880 the PROCESSOR_* enumeration. */
881 static struct cpu_table {
882 const char *const name;
883 const int disable;
884 const int enable;
885 } const cpu_table[] = {
886 { "v7", MASK_ISA, 0 },
887 { "cypress", MASK_ISA, 0 },
888 { "v8", MASK_ISA, MASK_V8 },
889 /* TI TMS390Z55 supersparc */
890 { "supersparc", MASK_ISA, MASK_V8 },
891 { "hypersparc", MASK_ISA, MASK_V8|MASK_FPU },
892 /* LEON */
893 { "leon", MASK_ISA, MASK_V8|MASK_FPU },
894 { "sparclite", MASK_ISA, MASK_SPARCLITE },
895 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
896 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
897 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
898 { "f934", MASK_ISA, MASK_SPARCLITE|MASK_FPU },
899 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
900 { "sparclet", MASK_ISA, MASK_SPARCLET },
901 /* TEMIC sparclet */
902 { "tsc701", MASK_ISA, MASK_SPARCLET },
903 { "v9", MASK_ISA, MASK_V9 },
904 /* UltraSPARC I, II, IIi */
905 { "ultrasparc", MASK_ISA,
906 /* Although insns using %y are deprecated, it is a clear win. */
907 MASK_V9|MASK_DEPRECATED_V8_INSNS },
908 /* UltraSPARC III */
909 /* ??? Check if %y issue still holds true. */
910 { "ultrasparc3", MASK_ISA,
911 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
912 /* UltraSPARC T1 */
913 { "niagara", MASK_ISA,
914 MASK_V9|MASK_DEPRECATED_V8_INSNS },
915 /* UltraSPARC T2 */
916 { "niagara2", MASK_ISA,
917 MASK_V9|MASK_POPC|MASK_VIS2 },
918 /* UltraSPARC T3 */
919 { "niagara3", MASK_ISA,
920 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF },
921 /* UltraSPARC T4 */
922 { "niagara4", MASK_ISA,
923 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF },
924 };
925 const struct cpu_table *cpu;
926 unsigned int i;
927 int fpu;
928
929 if (sparc_debug_string != NULL)
930 {
931 const char *q;
932 char *p;
933
934 p = ASTRDUP (sparc_debug_string);
935 while ((q = strtok (p, ",")) != NULL)
936 {
937 bool invert;
938 int mask;
939
940 p = NULL;
941 if (*q == '!')
942 {
943 invert = true;
944 q++;
945 }
946 else
947 invert = false;
948
949 if (! strcmp (q, "all"))
950 mask = MASK_DEBUG_ALL;
951 else if (! strcmp (q, "options"))
952 mask = MASK_DEBUG_OPTIONS;
953 else
954 error ("unknown -mdebug-%s switch", q);
955
956 if (invert)
957 sparc_debug &= ~mask;
958 else
959 sparc_debug |= mask;
960 }
961 }
962
963 if (TARGET_DEBUG_OPTIONS)
964 {
965 dump_target_flags("Initial target_flags", target_flags);
966 dump_target_flags("target_flags_explicit", target_flags_explicit);
967 }
968
969 #ifdef SUBTARGET_OVERRIDE_OPTIONS
970 SUBTARGET_OVERRIDE_OPTIONS;
971 #endif
972
973 #ifndef SPARC_BI_ARCH
974 /* Check for unsupported architecture size. */
975 if (! TARGET_64BIT != DEFAULT_ARCH32_P)
976 error ("%s is not supported by this configuration",
977 DEFAULT_ARCH32_P ? "-m64" : "-m32");
978 #endif
979
980 /* We force all 64bit archs to use 128 bit long double */
981 if (TARGET_64BIT && ! TARGET_LONG_DOUBLE_128)
982 {
983 error ("-mlong-double-64 not allowed with -m64");
984 target_flags |= MASK_LONG_DOUBLE_128;
985 }
986
987 /* Code model selection. */
988 sparc_cmodel = SPARC_DEFAULT_CMODEL;
989
990 #ifdef SPARC_BI_ARCH
991 if (TARGET_ARCH32)
992 sparc_cmodel = CM_32;
993 #endif
994
995 if (sparc_cmodel_string != NULL)
996 {
997 if (TARGET_ARCH64)
998 {
999 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1000 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1001 break;
1002 if (cmodel->name == NULL)
1003 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1004 else
1005 sparc_cmodel = cmodel->value;
1006 }
1007 else
1008 error ("-mcmodel= is not supported on 32 bit systems");
1009 }
1010
1011 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1012 for (i = 8; i < 16; i++)
1013 if (!call_used_regs [i])
1014 {
1015 error ("-fcall-saved-REG is not supported for out registers");
1016 call_used_regs [i] = 1;
1017 }
1018
1019 fpu = target_flags & MASK_FPU; /* save current -mfpu status */
1020
1021 /* Set the default CPU. */
1022 if (!global_options_set.x_sparc_cpu_and_features)
1023 {
1024 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1025 if (def->cpu == TARGET_CPU_DEFAULT)
1026 break;
1027 gcc_assert (def->cpu != -1);
1028 sparc_cpu_and_features = def->processor;
1029 }
1030
1031 if (!global_options_set.x_sparc_cpu)
1032 sparc_cpu = sparc_cpu_and_features;
1033
1034 cpu = &cpu_table[(int) sparc_cpu_and_features];
1035
1036 if (TARGET_DEBUG_OPTIONS)
1037 {
1038 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1039 fprintf (stderr, "sparc_cpu: %s\n",
1040 cpu_table[(int) sparc_cpu].name);
1041 dump_target_flags ("cpu->disable", cpu->disable);
1042 dump_target_flags ("cpu->enable", cpu->enable);
1043 }
1044
1045 target_flags &= ~cpu->disable;
1046 target_flags |= (cpu->enable
1047 #ifndef HAVE_AS_FMAF_HPC_VIS3
1048 & ~(MASK_FMAF | MASK_VIS3)
1049 #endif
1050 );
1051
1052 /* If -mfpu or -mno-fpu was explicitly used, don't override with
1053 the processor default. */
1054 if (target_flags_explicit & MASK_FPU)
1055 target_flags = (target_flags & ~MASK_FPU) | fpu;
1056
1057 /* -mvis2 implies -mvis */
1058 if (TARGET_VIS2)
1059 target_flags |= MASK_VIS;
1060
1061 /* -mvis3 implies -mvis2 and -mvis */
1062 if (TARGET_VIS3)
1063 target_flags |= MASK_VIS2 | MASK_VIS;
1064
1065 /* Don't allow -mvis, -mvis2, -mvis3, or -mfmaf if FPU is disabled. */
1066 if (! TARGET_FPU)
1067 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_FMAF);
1068
1069 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1070 are available.
1071 -m64 also implies v9. */
1072 if (TARGET_VIS || TARGET_ARCH64)
1073 {
1074 target_flags |= MASK_V9;
1075 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1076 }
1077
1078 /* -mvis also implies -mv8plus on 32-bit */
1079 if (TARGET_VIS && ! TARGET_ARCH64)
1080 target_flags |= MASK_V8PLUS;
1081
1082 /* Use the deprecated v8 insns for sparc64 in 32 bit mode. */
1083 if (TARGET_V9 && TARGET_ARCH32)
1084 target_flags |= MASK_DEPRECATED_V8_INSNS;
1085
1086 /* V8PLUS requires V9, makes no sense in 64 bit mode. */
1087 if (! TARGET_V9 || TARGET_ARCH64)
1088 target_flags &= ~MASK_V8PLUS;
1089
1090 /* Don't use stack biasing in 32 bit mode. */
1091 if (TARGET_ARCH32)
1092 target_flags &= ~MASK_STACK_BIAS;
1093
1094 /* Supply a default value for align_functions. */
1095 if (align_functions == 0
1096 && (sparc_cpu == PROCESSOR_ULTRASPARC
1097 || sparc_cpu == PROCESSOR_ULTRASPARC3
1098 || sparc_cpu == PROCESSOR_NIAGARA
1099 || sparc_cpu == PROCESSOR_NIAGARA2
1100 || sparc_cpu == PROCESSOR_NIAGARA3
1101 || sparc_cpu == PROCESSOR_NIAGARA4))
1102 align_functions = 32;
1103
1104 /* Validate PCC_STRUCT_RETURN. */
1105 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1106 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1107
1108 /* Only use .uaxword when compiling for a 64-bit target. */
1109 if (!TARGET_ARCH64)
1110 targetm.asm_out.unaligned_op.di = NULL;
1111
1112 /* Do various machine dependent initializations. */
1113 sparc_init_modes ();
1114
1115 /* Set up function hooks. */
1116 init_machine_status = sparc_init_machine_status;
1117
1118 switch (sparc_cpu)
1119 {
1120 case PROCESSOR_V7:
1121 case PROCESSOR_CYPRESS:
1122 sparc_costs = &cypress_costs;
1123 break;
1124 case PROCESSOR_V8:
1125 case PROCESSOR_SPARCLITE:
1126 case PROCESSOR_SUPERSPARC:
1127 sparc_costs = &supersparc_costs;
1128 break;
1129 case PROCESSOR_F930:
1130 case PROCESSOR_F934:
1131 case PROCESSOR_HYPERSPARC:
1132 case PROCESSOR_SPARCLITE86X:
1133 sparc_costs = &hypersparc_costs;
1134 break;
1135 case PROCESSOR_LEON:
1136 sparc_costs = &leon_costs;
1137 break;
1138 case PROCESSOR_SPARCLET:
1139 case PROCESSOR_TSC701:
1140 sparc_costs = &sparclet_costs;
1141 break;
1142 case PROCESSOR_V9:
1143 case PROCESSOR_ULTRASPARC:
1144 sparc_costs = &ultrasparc_costs;
1145 break;
1146 case PROCESSOR_ULTRASPARC3:
1147 sparc_costs = &ultrasparc3_costs;
1148 break;
1149 case PROCESSOR_NIAGARA:
1150 sparc_costs = &niagara_costs;
1151 break;
1152 case PROCESSOR_NIAGARA2:
1153 sparc_costs = &niagara2_costs;
1154 break;
1155 case PROCESSOR_NIAGARA3:
1156 case PROCESSOR_NIAGARA4:
1157 sparc_costs = &niagara3_costs;
1158 break;
1159 case PROCESSOR_NATIVE:
1160 gcc_unreachable ();
1161 };
1162
1163 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1164 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1165 target_flags |= MASK_LONG_DOUBLE_128;
1166 #endif
1167
1168 if (TARGET_DEBUG_OPTIONS)
1169 dump_target_flags ("Final target_flags", target_flags);
1170
1171 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1172 ((sparc_cpu == PROCESSOR_ULTRASPARC
1173 || sparc_cpu == PROCESSOR_NIAGARA
1174 || sparc_cpu == PROCESSOR_NIAGARA2
1175 || sparc_cpu == PROCESSOR_NIAGARA3
1176 || sparc_cpu == PROCESSOR_NIAGARA4)
1177 ? 2
1178 : (sparc_cpu == PROCESSOR_ULTRASPARC3
1179 ? 8 : 3)),
1180 global_options.x_param_values,
1181 global_options_set.x_param_values);
1182 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
1183 ((sparc_cpu == PROCESSOR_ULTRASPARC
1184 || sparc_cpu == PROCESSOR_ULTRASPARC3
1185 || sparc_cpu == PROCESSOR_NIAGARA
1186 || sparc_cpu == PROCESSOR_NIAGARA2
1187 || sparc_cpu == PROCESSOR_NIAGARA3
1188 || sparc_cpu == PROCESSOR_NIAGARA4)
1189 ? 64 : 32),
1190 global_options.x_param_values,
1191 global_options_set.x_param_values);
1192
1193 /* Disable save slot sharing for call-clobbered registers by default.
1194 The IRA sharing algorithm works on single registers only and this
1195 pessimizes for double floating-point registers. */
1196 if (!global_options_set.x_flag_ira_share_save_slots)
1197 flag_ira_share_save_slots = 0;
1198 }
1199 \f
1200 /* Miscellaneous utilities. */
1201
1202 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
1203 or branch on register contents instructions. */
1204
1205 int
1206 v9_regcmp_p (enum rtx_code code)
1207 {
1208 return (code == EQ || code == NE || code == GE || code == LT
1209 || code == LE || code == GT);
1210 }
1211
1212 /* Nonzero if OP is a floating point constant which can
1213 be loaded into an integer register using a single
1214 sethi instruction. */
1215
1216 int
1217 fp_sethi_p (rtx op)
1218 {
1219 if (GET_CODE (op) == CONST_DOUBLE)
1220 {
1221 REAL_VALUE_TYPE r;
1222 long i;
1223
1224 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1225 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1226 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
1227 }
1228
1229 return 0;
1230 }
1231
1232 /* Nonzero if OP is a floating point constant which can
1233 be loaded into an integer register using a single
1234 mov instruction. */
1235
1236 int
1237 fp_mov_p (rtx op)
1238 {
1239 if (GET_CODE (op) == CONST_DOUBLE)
1240 {
1241 REAL_VALUE_TYPE r;
1242 long i;
1243
1244 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1245 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1246 return SPARC_SIMM13_P (i);
1247 }
1248
1249 return 0;
1250 }
1251
1252 /* Nonzero if OP is a floating point constant which can
1253 be loaded into an integer register using a high/losum
1254 instruction sequence. */
1255
1256 int
1257 fp_high_losum_p (rtx op)
1258 {
1259 /* The constraints calling this should only be in
1260 SFmode move insns, so any constant which cannot
1261 be moved using a single insn will do. */
1262 if (GET_CODE (op) == CONST_DOUBLE)
1263 {
1264 REAL_VALUE_TYPE r;
1265 long i;
1266
1267 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1268 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1269 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
1270 }
1271
1272 return 0;
1273 }
1274
1275 /* Return true if the address of LABEL can be loaded by means of the
1276 mov{si,di}_pic_label_ref patterns in PIC mode. */
1277
1278 static bool
1279 can_use_mov_pic_label_ref (rtx label)
1280 {
1281 /* VxWorks does not impose a fixed gap between segments; the run-time
1282 gap can be different from the object-file gap. We therefore can't
1283 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
1284 are absolutely sure that X is in the same segment as the GOT.
1285 Unfortunately, the flexibility of linker scripts means that we
1286 can't be sure of that in general, so assume that GOT-relative
1287 accesses are never valid on VxWorks. */
1288 if (TARGET_VXWORKS_RTP)
1289 return false;
1290
1291 /* Similarly, if the label is non-local, it might end up being placed
1292 in a different section than the current one; now mov_pic_label_ref
1293 requires the label and the code to be in the same section. */
1294 if (LABEL_REF_NONLOCAL_P (label))
1295 return false;
1296
1297 /* Finally, if we are reordering basic blocks and partition into hot
1298 and cold sections, this might happen for any label. */
1299 if (flag_reorder_blocks_and_partition)
1300 return false;
1301
1302 return true;
1303 }
1304
1305 /* Expand a move instruction. Return true if all work is done. */
1306
1307 bool
1308 sparc_expand_move (enum machine_mode mode, rtx *operands)
1309 {
1310 /* Handle sets of MEM first. */
1311 if (GET_CODE (operands[0]) == MEM)
1312 {
1313 /* 0 is a register (or a pair of registers) on SPARC. */
1314 if (register_or_zero_operand (operands[1], mode))
1315 return false;
1316
1317 if (!reload_in_progress)
1318 {
1319 operands[0] = validize_mem (operands[0]);
1320 operands[1] = force_reg (mode, operands[1]);
1321 }
1322 }
1323
1324 /* Fixup TLS cases. */
1325 if (TARGET_HAVE_TLS
1326 && CONSTANT_P (operands[1])
1327 && sparc_tls_referenced_p (operands [1]))
1328 {
1329 operands[1] = sparc_legitimize_tls_address (operands[1]);
1330 return false;
1331 }
1332
1333 /* Fixup PIC cases. */
1334 if (flag_pic && CONSTANT_P (operands[1]))
1335 {
1336 if (pic_address_needs_scratch (operands[1]))
1337 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
1338
1339 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
1340 if (GET_CODE (operands[1]) == LABEL_REF
1341 && can_use_mov_pic_label_ref (operands[1]))
1342 {
1343 if (mode == SImode)
1344 {
1345 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
1346 return true;
1347 }
1348
1349 if (mode == DImode)
1350 {
1351 gcc_assert (TARGET_ARCH64);
1352 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
1353 return true;
1354 }
1355 }
1356
1357 if (symbolic_operand (operands[1], mode))
1358 {
1359 operands[1]
1360 = sparc_legitimize_pic_address (operands[1],
1361 reload_in_progress
1362 ? operands[0] : NULL_RTX);
1363 return false;
1364 }
1365 }
1366
1367 /* If we are trying to toss an integer constant into FP registers,
1368 or loading a FP or vector constant, force it into memory. */
1369 if (CONSTANT_P (operands[1])
1370 && REG_P (operands[0])
1371 && (SPARC_FP_REG_P (REGNO (operands[0]))
1372 || SCALAR_FLOAT_MODE_P (mode)
1373 || VECTOR_MODE_P (mode)))
1374 {
1375 /* emit_group_store will send such bogosity to us when it is
1376 not storing directly into memory. So fix this up to avoid
1377 crashes in output_constant_pool. */
1378 if (operands [1] == const0_rtx)
1379 operands[1] = CONST0_RTX (mode);
1380
1381 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
1382 always other regs. */
1383 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
1384 && (const_zero_operand (operands[1], mode)
1385 || const_all_ones_operand (operands[1], mode)))
1386 return false;
1387
1388 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
1389 /* We are able to build any SF constant in integer registers
1390 with at most 2 instructions. */
1391 && (mode == SFmode
1392 /* And any DF constant in integer registers. */
1393 || (mode == DFmode
1394 && ! can_create_pseudo_p ())))
1395 return false;
1396
1397 operands[1] = force_const_mem (mode, operands[1]);
1398 if (!reload_in_progress)
1399 operands[1] = validize_mem (operands[1]);
1400 return false;
1401 }
1402
1403 /* Accept non-constants and valid constants unmodified. */
1404 if (!CONSTANT_P (operands[1])
1405 || GET_CODE (operands[1]) == HIGH
1406 || input_operand (operands[1], mode))
1407 return false;
1408
1409 switch (mode)
1410 {
1411 case QImode:
1412 /* All QImode constants require only one insn, so proceed. */
1413 break;
1414
1415 case HImode:
1416 case SImode:
1417 sparc_emit_set_const32 (operands[0], operands[1]);
1418 return true;
1419
1420 case DImode:
1421 /* input_operand should have filtered out 32-bit mode. */
1422 sparc_emit_set_const64 (operands[0], operands[1]);
1423 return true;
1424
1425 default:
1426 gcc_unreachable ();
1427 }
1428
1429 return false;
1430 }
1431
1432 /* Load OP1, a 32-bit constant, into OP0, a register.
1433 We know it can't be done in one insn when we get
1434 here, the move expander guarantees this. */
1435
1436 static void
1437 sparc_emit_set_const32 (rtx op0, rtx op1)
1438 {
1439 enum machine_mode mode = GET_MODE (op0);
1440 rtx temp = op0;
1441
1442 if (can_create_pseudo_p ())
1443 temp = gen_reg_rtx (mode);
1444
1445 if (GET_CODE (op1) == CONST_INT)
1446 {
1447 gcc_assert (!small_int_operand (op1, mode)
1448 && !const_high_operand (op1, mode));
1449
1450 /* Emit them as real moves instead of a HIGH/LO_SUM,
1451 this way CSE can see everything and reuse intermediate
1452 values if it wants. */
1453 emit_insn (gen_rtx_SET (VOIDmode, temp,
1454 GEN_INT (INTVAL (op1)
1455 & ~(HOST_WIDE_INT)0x3ff)));
1456
1457 emit_insn (gen_rtx_SET (VOIDmode,
1458 op0,
1459 gen_rtx_IOR (mode, temp,
1460 GEN_INT (INTVAL (op1) & 0x3ff))));
1461 }
1462 else
1463 {
1464 /* A symbol, emit in the traditional way. */
1465 emit_insn (gen_rtx_SET (VOIDmode, temp,
1466 gen_rtx_HIGH (mode, op1)));
1467 emit_insn (gen_rtx_SET (VOIDmode,
1468 op0, gen_rtx_LO_SUM (mode, temp, op1)));
1469 }
1470 }
1471
1472 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
1473 If TEMP is nonzero, we are forbidden to use any other scratch
1474 registers. Otherwise, we are allowed to generate them as needed.
1475
1476 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
1477 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
1478
1479 void
1480 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
1481 {
1482 rtx temp1, temp2, temp3, temp4, temp5;
1483 rtx ti_temp = 0;
1484
1485 if (temp && GET_MODE (temp) == TImode)
1486 {
1487 ti_temp = temp;
1488 temp = gen_rtx_REG (DImode, REGNO (temp));
1489 }
1490
1491 /* SPARC-V9 code-model support. */
1492 switch (sparc_cmodel)
1493 {
1494 case CM_MEDLOW:
1495 /* The range spanned by all instructions in the object is less
1496 than 2^31 bytes (2GB) and the distance from any instruction
1497 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1498 than 2^31 bytes (2GB).
1499
1500 The executable must be in the low 4TB of the virtual address
1501 space.
1502
1503 sethi %hi(symbol), %temp1
1504 or %temp1, %lo(symbol), %reg */
1505 if (temp)
1506 temp1 = temp; /* op0 is allowed. */
1507 else
1508 temp1 = gen_reg_rtx (DImode);
1509
1510 emit_insn (gen_rtx_SET (VOIDmode, temp1, gen_rtx_HIGH (DImode, op1)));
1511 emit_insn (gen_rtx_SET (VOIDmode, op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
1512 break;
1513
1514 case CM_MEDMID:
1515 /* The range spanned by all instructions in the object is less
1516 than 2^31 bytes (2GB) and the distance from any instruction
1517 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1518 than 2^31 bytes (2GB).
1519
1520 The executable must be in the low 16TB of the virtual address
1521 space.
1522
1523 sethi %h44(symbol), %temp1
1524 or %temp1, %m44(symbol), %temp2
1525 sllx %temp2, 12, %temp3
1526 or %temp3, %l44(symbol), %reg */
1527 if (temp)
1528 {
1529 temp1 = op0;
1530 temp2 = op0;
1531 temp3 = temp; /* op0 is allowed. */
1532 }
1533 else
1534 {
1535 temp1 = gen_reg_rtx (DImode);
1536 temp2 = gen_reg_rtx (DImode);
1537 temp3 = gen_reg_rtx (DImode);
1538 }
1539
1540 emit_insn (gen_seth44 (temp1, op1));
1541 emit_insn (gen_setm44 (temp2, temp1, op1));
1542 emit_insn (gen_rtx_SET (VOIDmode, temp3,
1543 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
1544 emit_insn (gen_setl44 (op0, temp3, op1));
1545 break;
1546
1547 case CM_MEDANY:
1548 /* The range spanned by all instructions in the object is less
1549 than 2^31 bytes (2GB) and the distance from any instruction
1550 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1551 than 2^31 bytes (2GB).
1552
1553 The executable can be placed anywhere in the virtual address
1554 space.
1555
1556 sethi %hh(symbol), %temp1
1557 sethi %lm(symbol), %temp2
1558 or %temp1, %hm(symbol), %temp3
1559 sllx %temp3, 32, %temp4
1560 or %temp4, %temp2, %temp5
1561 or %temp5, %lo(symbol), %reg */
1562 if (temp)
1563 {
1564 /* It is possible that one of the registers we got for operands[2]
1565 might coincide with that of operands[0] (which is why we made
1566 it TImode). Pick the other one to use as our scratch. */
1567 if (rtx_equal_p (temp, op0))
1568 {
1569 gcc_assert (ti_temp);
1570 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
1571 }
1572 temp1 = op0;
1573 temp2 = temp; /* op0 is _not_ allowed, see above. */
1574 temp3 = op0;
1575 temp4 = op0;
1576 temp5 = op0;
1577 }
1578 else
1579 {
1580 temp1 = gen_reg_rtx (DImode);
1581 temp2 = gen_reg_rtx (DImode);
1582 temp3 = gen_reg_rtx (DImode);
1583 temp4 = gen_reg_rtx (DImode);
1584 temp5 = gen_reg_rtx (DImode);
1585 }
1586
1587 emit_insn (gen_sethh (temp1, op1));
1588 emit_insn (gen_setlm (temp2, op1));
1589 emit_insn (gen_sethm (temp3, temp1, op1));
1590 emit_insn (gen_rtx_SET (VOIDmode, temp4,
1591 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
1592 emit_insn (gen_rtx_SET (VOIDmode, temp5,
1593 gen_rtx_PLUS (DImode, temp4, temp2)));
1594 emit_insn (gen_setlo (op0, temp5, op1));
1595 break;
1596
1597 case CM_EMBMEDANY:
1598 /* Old old old backwards compatibility kruft here.
1599 Essentially it is MEDLOW with a fixed 64-bit
1600 virtual base added to all data segment addresses.
1601 Text-segment stuff is computed like MEDANY, we can't
1602 reuse the code above because the relocation knobs
1603 look different.
1604
1605 Data segment: sethi %hi(symbol), %temp1
1606 add %temp1, EMBMEDANY_BASE_REG, %temp2
1607 or %temp2, %lo(symbol), %reg */
1608 if (data_segment_operand (op1, GET_MODE (op1)))
1609 {
1610 if (temp)
1611 {
1612 temp1 = temp; /* op0 is allowed. */
1613 temp2 = op0;
1614 }
1615 else
1616 {
1617 temp1 = gen_reg_rtx (DImode);
1618 temp2 = gen_reg_rtx (DImode);
1619 }
1620
1621 emit_insn (gen_embmedany_sethi (temp1, op1));
1622 emit_insn (gen_embmedany_brsum (temp2, temp1));
1623 emit_insn (gen_embmedany_losum (op0, temp2, op1));
1624 }
1625
1626 /* Text segment: sethi %uhi(symbol), %temp1
1627 sethi %hi(symbol), %temp2
1628 or %temp1, %ulo(symbol), %temp3
1629 sllx %temp3, 32, %temp4
1630 or %temp4, %temp2, %temp5
1631 or %temp5, %lo(symbol), %reg */
1632 else
1633 {
1634 if (temp)
1635 {
1636 /* It is possible that one of the registers we got for operands[2]
1637 might coincide with that of operands[0] (which is why we made
1638 it TImode). Pick the other one to use as our scratch. */
1639 if (rtx_equal_p (temp, op0))
1640 {
1641 gcc_assert (ti_temp);
1642 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
1643 }
1644 temp1 = op0;
1645 temp2 = temp; /* op0 is _not_ allowed, see above. */
1646 temp3 = op0;
1647 temp4 = op0;
1648 temp5 = op0;
1649 }
1650 else
1651 {
1652 temp1 = gen_reg_rtx (DImode);
1653 temp2 = gen_reg_rtx (DImode);
1654 temp3 = gen_reg_rtx (DImode);
1655 temp4 = gen_reg_rtx (DImode);
1656 temp5 = gen_reg_rtx (DImode);
1657 }
1658
1659 emit_insn (gen_embmedany_textuhi (temp1, op1));
1660 emit_insn (gen_embmedany_texthi (temp2, op1));
1661 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
1662 emit_insn (gen_rtx_SET (VOIDmode, temp4,
1663 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
1664 emit_insn (gen_rtx_SET (VOIDmode, temp5,
1665 gen_rtx_PLUS (DImode, temp4, temp2)));
1666 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
1667 }
1668 break;
1669
1670 default:
1671 gcc_unreachable ();
1672 }
1673 }
1674
1675 #if HOST_BITS_PER_WIDE_INT == 32
1676 static void
1677 sparc_emit_set_const64 (rtx op0 ATTRIBUTE_UNUSED, rtx op1 ATTRIBUTE_UNUSED)
1678 {
1679 gcc_unreachable ();
1680 }
1681 #else
1682 /* These avoid problems when cross compiling. If we do not
1683 go through all this hair then the optimizer will see
1684 invalid REG_EQUAL notes or in some cases none at all. */
1685 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
1686 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
1687 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
1688 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
1689
1690 /* The optimizer is not to assume anything about exactly
1691 which bits are set for a HIGH, they are unspecified.
1692 Unfortunately this leads to many missed optimizations
1693 during CSE. We mask out the non-HIGH bits, and matches
1694 a plain movdi, to alleviate this problem. */
1695 static rtx
1696 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
1697 {
1698 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
1699 }
1700
1701 static rtx
1702 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
1703 {
1704 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val));
1705 }
1706
1707 static rtx
1708 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
1709 {
1710 return gen_rtx_IOR (DImode, src, GEN_INT (val));
1711 }
1712
1713 static rtx
1714 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
1715 {
1716 return gen_rtx_XOR (DImode, src, GEN_INT (val));
1717 }
1718
1719 /* Worker routines for 64-bit constant formation on arch64.
1720 One of the key things to be doing in these emissions is
1721 to create as many temp REGs as possible. This makes it
1722 possible for half-built constants to be used later when
1723 such values are similar to something required later on.
1724 Without doing this, the optimizer cannot see such
1725 opportunities. */
1726
1727 static void sparc_emit_set_const64_quick1 (rtx, rtx,
1728 unsigned HOST_WIDE_INT, int);
1729
1730 static void
1731 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
1732 unsigned HOST_WIDE_INT low_bits, int is_neg)
1733 {
1734 unsigned HOST_WIDE_INT high_bits;
1735
1736 if (is_neg)
1737 high_bits = (~low_bits) & 0xffffffff;
1738 else
1739 high_bits = low_bits;
1740
1741 emit_insn (gen_safe_HIGH64 (temp, high_bits));
1742 if (!is_neg)
1743 {
1744 emit_insn (gen_rtx_SET (VOIDmode, op0,
1745 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1746 }
1747 else
1748 {
1749 /* If we are XOR'ing with -1, then we should emit a one's complement
1750 instead. This way the combiner will notice logical operations
1751 such as ANDN later on and substitute. */
1752 if ((low_bits & 0x3ff) == 0x3ff)
1753 {
1754 emit_insn (gen_rtx_SET (VOIDmode, op0,
1755 gen_rtx_NOT (DImode, temp)));
1756 }
1757 else
1758 {
1759 emit_insn (gen_rtx_SET (VOIDmode, op0,
1760 gen_safe_XOR64 (temp,
1761 (-(HOST_WIDE_INT)0x400
1762 | (low_bits & 0x3ff)))));
1763 }
1764 }
1765 }
1766
1767 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
1768 unsigned HOST_WIDE_INT, int);
1769
1770 static void
1771 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
1772 unsigned HOST_WIDE_INT high_bits,
1773 unsigned HOST_WIDE_INT low_immediate,
1774 int shift_count)
1775 {
1776 rtx temp2 = op0;
1777
1778 if ((high_bits & 0xfffffc00) != 0)
1779 {
1780 emit_insn (gen_safe_HIGH64 (temp, high_bits));
1781 if ((high_bits & ~0xfffffc00) != 0)
1782 emit_insn (gen_rtx_SET (VOIDmode, op0,
1783 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1784 else
1785 temp2 = temp;
1786 }
1787 else
1788 {
1789 emit_insn (gen_safe_SET64 (temp, high_bits));
1790 temp2 = temp;
1791 }
1792
1793 /* Now shift it up into place. */
1794 emit_insn (gen_rtx_SET (VOIDmode, op0,
1795 gen_rtx_ASHIFT (DImode, temp2,
1796 GEN_INT (shift_count))));
1797
1798 /* If there is a low immediate part piece, finish up by
1799 putting that in as well. */
1800 if (low_immediate != 0)
1801 emit_insn (gen_rtx_SET (VOIDmode, op0,
1802 gen_safe_OR64 (op0, low_immediate)));
1803 }
1804
1805 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
1806 unsigned HOST_WIDE_INT);
1807
1808 /* Full 64-bit constant decomposition. Even though this is the
1809 'worst' case, we still optimize a few things away. */
1810 static void
1811 sparc_emit_set_const64_longway (rtx op0, rtx temp,
1812 unsigned HOST_WIDE_INT high_bits,
1813 unsigned HOST_WIDE_INT low_bits)
1814 {
1815 rtx sub_temp = op0;
1816
1817 if (can_create_pseudo_p ())
1818 sub_temp = gen_reg_rtx (DImode);
1819
1820 if ((high_bits & 0xfffffc00) != 0)
1821 {
1822 emit_insn (gen_safe_HIGH64 (temp, high_bits));
1823 if ((high_bits & ~0xfffffc00) != 0)
1824 emit_insn (gen_rtx_SET (VOIDmode,
1825 sub_temp,
1826 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1827 else
1828 sub_temp = temp;
1829 }
1830 else
1831 {
1832 emit_insn (gen_safe_SET64 (temp, high_bits));
1833 sub_temp = temp;
1834 }
1835
1836 if (can_create_pseudo_p ())
1837 {
1838 rtx temp2 = gen_reg_rtx (DImode);
1839 rtx temp3 = gen_reg_rtx (DImode);
1840 rtx temp4 = gen_reg_rtx (DImode);
1841
1842 emit_insn (gen_rtx_SET (VOIDmode, temp4,
1843 gen_rtx_ASHIFT (DImode, sub_temp,
1844 GEN_INT (32))));
1845
1846 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
1847 if ((low_bits & ~0xfffffc00) != 0)
1848 {
1849 emit_insn (gen_rtx_SET (VOIDmode, temp3,
1850 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
1851 emit_insn (gen_rtx_SET (VOIDmode, op0,
1852 gen_rtx_PLUS (DImode, temp4, temp3)));
1853 }
1854 else
1855 {
1856 emit_insn (gen_rtx_SET (VOIDmode, op0,
1857 gen_rtx_PLUS (DImode, temp4, temp2)));
1858 }
1859 }
1860 else
1861 {
1862 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
1863 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
1864 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
1865 int to_shift = 12;
1866
1867 /* We are in the middle of reload, so this is really
1868 painful. However we do still make an attempt to
1869 avoid emitting truly stupid code. */
1870 if (low1 != const0_rtx)
1871 {
1872 emit_insn (gen_rtx_SET (VOIDmode, op0,
1873 gen_rtx_ASHIFT (DImode, sub_temp,
1874 GEN_INT (to_shift))));
1875 emit_insn (gen_rtx_SET (VOIDmode, op0,
1876 gen_rtx_IOR (DImode, op0, low1)));
1877 sub_temp = op0;
1878 to_shift = 12;
1879 }
1880 else
1881 {
1882 to_shift += 12;
1883 }
1884 if (low2 != const0_rtx)
1885 {
1886 emit_insn (gen_rtx_SET (VOIDmode, op0,
1887 gen_rtx_ASHIFT (DImode, sub_temp,
1888 GEN_INT (to_shift))));
1889 emit_insn (gen_rtx_SET (VOIDmode, op0,
1890 gen_rtx_IOR (DImode, op0, low2)));
1891 sub_temp = op0;
1892 to_shift = 8;
1893 }
1894 else
1895 {
1896 to_shift += 8;
1897 }
1898 emit_insn (gen_rtx_SET (VOIDmode, op0,
1899 gen_rtx_ASHIFT (DImode, sub_temp,
1900 GEN_INT (to_shift))));
1901 if (low3 != const0_rtx)
1902 emit_insn (gen_rtx_SET (VOIDmode, op0,
1903 gen_rtx_IOR (DImode, op0, low3)));
1904 /* phew... */
1905 }
1906 }
1907
1908 /* Analyze a 64-bit constant for certain properties. */
1909 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
1910 unsigned HOST_WIDE_INT,
1911 int *, int *, int *);
1912
1913 static void
1914 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
1915 unsigned HOST_WIDE_INT low_bits,
1916 int *hbsp, int *lbsp, int *abbasp)
1917 {
1918 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
1919 int i;
1920
1921 lowest_bit_set = highest_bit_set = -1;
1922 i = 0;
1923 do
1924 {
1925 if ((lowest_bit_set == -1)
1926 && ((low_bits >> i) & 1))
1927 lowest_bit_set = i;
1928 if ((highest_bit_set == -1)
1929 && ((high_bits >> (32 - i - 1)) & 1))
1930 highest_bit_set = (64 - i - 1);
1931 }
1932 while (++i < 32
1933 && ((highest_bit_set == -1)
1934 || (lowest_bit_set == -1)));
1935 if (i == 32)
1936 {
1937 i = 0;
1938 do
1939 {
1940 if ((lowest_bit_set == -1)
1941 && ((high_bits >> i) & 1))
1942 lowest_bit_set = i + 32;
1943 if ((highest_bit_set == -1)
1944 && ((low_bits >> (32 - i - 1)) & 1))
1945 highest_bit_set = 32 - i - 1;
1946 }
1947 while (++i < 32
1948 && ((highest_bit_set == -1)
1949 || (lowest_bit_set == -1)));
1950 }
1951 /* If there are no bits set this should have gone out
1952 as one instruction! */
1953 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
1954 all_bits_between_are_set = 1;
1955 for (i = lowest_bit_set; i <= highest_bit_set; i++)
1956 {
1957 if (i < 32)
1958 {
1959 if ((low_bits & (1 << i)) != 0)
1960 continue;
1961 }
1962 else
1963 {
1964 if ((high_bits & (1 << (i - 32))) != 0)
1965 continue;
1966 }
1967 all_bits_between_are_set = 0;
1968 break;
1969 }
1970 *hbsp = highest_bit_set;
1971 *lbsp = lowest_bit_set;
1972 *abbasp = all_bits_between_are_set;
1973 }
1974
1975 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
1976
1977 static int
1978 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
1979 unsigned HOST_WIDE_INT low_bits)
1980 {
1981 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
1982
1983 if (high_bits == 0
1984 || high_bits == 0xffffffff)
1985 return 1;
1986
1987 analyze_64bit_constant (high_bits, low_bits,
1988 &highest_bit_set, &lowest_bit_set,
1989 &all_bits_between_are_set);
1990
1991 if ((highest_bit_set == 63
1992 || lowest_bit_set == 0)
1993 && all_bits_between_are_set != 0)
1994 return 1;
1995
1996 if ((highest_bit_set - lowest_bit_set) < 21)
1997 return 1;
1998
1999 return 0;
2000 }
2001
2002 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2003 unsigned HOST_WIDE_INT,
2004 int, int);
2005
2006 static unsigned HOST_WIDE_INT
2007 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2008 unsigned HOST_WIDE_INT low_bits,
2009 int lowest_bit_set, int shift)
2010 {
2011 HOST_WIDE_INT hi, lo;
2012
2013 if (lowest_bit_set < 32)
2014 {
2015 lo = (low_bits >> lowest_bit_set) << shift;
2016 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2017 }
2018 else
2019 {
2020 lo = 0;
2021 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2022 }
2023 gcc_assert (! (hi & lo));
2024 return (hi | lo);
2025 }
2026
2027 /* Here we are sure to be arch64 and this is an integer constant
2028 being loaded into a register. Emit the most efficient
2029 insn sequence possible. Detection of all the 1-insn cases
2030 has been done already. */
2031 static void
2032 sparc_emit_set_const64 (rtx op0, rtx op1)
2033 {
2034 unsigned HOST_WIDE_INT high_bits, low_bits;
2035 int lowest_bit_set, highest_bit_set;
2036 int all_bits_between_are_set;
2037 rtx temp = 0;
2038
2039 /* Sanity check that we know what we are working with. */
2040 gcc_assert (TARGET_ARCH64
2041 && (GET_CODE (op0) == SUBREG
2042 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2043
2044 if (! can_create_pseudo_p ())
2045 temp = op0;
2046
2047 if (GET_CODE (op1) != CONST_INT)
2048 {
2049 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2050 return;
2051 }
2052
2053 if (! temp)
2054 temp = gen_reg_rtx (DImode);
2055
2056 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2057 low_bits = (INTVAL (op1) & 0xffffffff);
2058
2059 /* low_bits bits 0 --> 31
2060 high_bits bits 32 --> 63 */
2061
2062 analyze_64bit_constant (high_bits, low_bits,
2063 &highest_bit_set, &lowest_bit_set,
2064 &all_bits_between_are_set);
2065
2066 /* First try for a 2-insn sequence. */
2067
2068 /* These situations are preferred because the optimizer can
2069 * do more things with them:
2070 * 1) mov -1, %reg
2071 * sllx %reg, shift, %reg
2072 * 2) mov -1, %reg
2073 * srlx %reg, shift, %reg
2074 * 3) mov some_small_const, %reg
2075 * sllx %reg, shift, %reg
2076 */
2077 if (((highest_bit_set == 63
2078 || lowest_bit_set == 0)
2079 && all_bits_between_are_set != 0)
2080 || ((highest_bit_set - lowest_bit_set) < 12))
2081 {
2082 HOST_WIDE_INT the_const = -1;
2083 int shift = lowest_bit_set;
2084
2085 if ((highest_bit_set != 63
2086 && lowest_bit_set != 0)
2087 || all_bits_between_are_set == 0)
2088 {
2089 the_const =
2090 create_simple_focus_bits (high_bits, low_bits,
2091 lowest_bit_set, 0);
2092 }
2093 else if (lowest_bit_set == 0)
2094 shift = -(63 - highest_bit_set);
2095
2096 gcc_assert (SPARC_SIMM13_P (the_const));
2097 gcc_assert (shift != 0);
2098
2099 emit_insn (gen_safe_SET64 (temp, the_const));
2100 if (shift > 0)
2101 emit_insn (gen_rtx_SET (VOIDmode,
2102 op0,
2103 gen_rtx_ASHIFT (DImode,
2104 temp,
2105 GEN_INT (shift))));
2106 else if (shift < 0)
2107 emit_insn (gen_rtx_SET (VOIDmode,
2108 op0,
2109 gen_rtx_LSHIFTRT (DImode,
2110 temp,
2111 GEN_INT (-shift))));
2112 return;
2113 }
2114
2115 /* Now a range of 22 or less bits set somewhere.
2116 * 1) sethi %hi(focus_bits), %reg
2117 * sllx %reg, shift, %reg
2118 * 2) sethi %hi(focus_bits), %reg
2119 * srlx %reg, shift, %reg
2120 */
2121 if ((highest_bit_set - lowest_bit_set) < 21)
2122 {
2123 unsigned HOST_WIDE_INT focus_bits =
2124 create_simple_focus_bits (high_bits, low_bits,
2125 lowest_bit_set, 10);
2126
2127 gcc_assert (SPARC_SETHI_P (focus_bits));
2128 gcc_assert (lowest_bit_set != 10);
2129
2130 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
2131
2132 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
2133 if (lowest_bit_set < 10)
2134 emit_insn (gen_rtx_SET (VOIDmode,
2135 op0,
2136 gen_rtx_LSHIFTRT (DImode, temp,
2137 GEN_INT (10 - lowest_bit_set))));
2138 else if (lowest_bit_set > 10)
2139 emit_insn (gen_rtx_SET (VOIDmode,
2140 op0,
2141 gen_rtx_ASHIFT (DImode, temp,
2142 GEN_INT (lowest_bit_set - 10))));
2143 return;
2144 }
2145
2146 /* 1) sethi %hi(low_bits), %reg
2147 * or %reg, %lo(low_bits), %reg
2148 * 2) sethi %hi(~low_bits), %reg
2149 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
2150 */
2151 if (high_bits == 0
2152 || high_bits == 0xffffffff)
2153 {
2154 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
2155 (high_bits == 0xffffffff));
2156 return;
2157 }
2158
2159 /* Now, try 3-insn sequences. */
2160
2161 /* 1) sethi %hi(high_bits), %reg
2162 * or %reg, %lo(high_bits), %reg
2163 * sllx %reg, 32, %reg
2164 */
2165 if (low_bits == 0)
2166 {
2167 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
2168 return;
2169 }
2170
2171 /* We may be able to do something quick
2172 when the constant is negated, so try that. */
2173 if (const64_is_2insns ((~high_bits) & 0xffffffff,
2174 (~low_bits) & 0xfffffc00))
2175 {
2176 /* NOTE: The trailing bits get XOR'd so we need the
2177 non-negated bits, not the negated ones. */
2178 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
2179
2180 if ((((~high_bits) & 0xffffffff) == 0
2181 && ((~low_bits) & 0x80000000) == 0)
2182 || (((~high_bits) & 0xffffffff) == 0xffffffff
2183 && ((~low_bits) & 0x80000000) != 0))
2184 {
2185 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
2186
2187 if ((SPARC_SETHI_P (fast_int)
2188 && (~high_bits & 0xffffffff) == 0)
2189 || SPARC_SIMM13_P (fast_int))
2190 emit_insn (gen_safe_SET64 (temp, fast_int));
2191 else
2192 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
2193 }
2194 else
2195 {
2196 rtx negated_const;
2197 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
2198 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
2199 sparc_emit_set_const64 (temp, negated_const);
2200 }
2201
2202 /* If we are XOR'ing with -1, then we should emit a one's complement
2203 instead. This way the combiner will notice logical operations
2204 such as ANDN later on and substitute. */
2205 if (trailing_bits == 0x3ff)
2206 {
2207 emit_insn (gen_rtx_SET (VOIDmode, op0,
2208 gen_rtx_NOT (DImode, temp)));
2209 }
2210 else
2211 {
2212 emit_insn (gen_rtx_SET (VOIDmode,
2213 op0,
2214 gen_safe_XOR64 (temp,
2215 (-0x400 | trailing_bits))));
2216 }
2217 return;
2218 }
2219
2220 /* 1) sethi %hi(xxx), %reg
2221 * or %reg, %lo(xxx), %reg
2222 * sllx %reg, yyy, %reg
2223 *
2224 * ??? This is just a generalized version of the low_bits==0
2225 * thing above, FIXME...
2226 */
2227 if ((highest_bit_set - lowest_bit_set) < 32)
2228 {
2229 unsigned HOST_WIDE_INT focus_bits =
2230 create_simple_focus_bits (high_bits, low_bits,
2231 lowest_bit_set, 0);
2232
2233 /* We can't get here in this state. */
2234 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
2235
2236 /* So what we know is that the set bits straddle the
2237 middle of the 64-bit word. */
2238 sparc_emit_set_const64_quick2 (op0, temp,
2239 focus_bits, 0,
2240 lowest_bit_set);
2241 return;
2242 }
2243
2244 /* 1) sethi %hi(high_bits), %reg
2245 * or %reg, %lo(high_bits), %reg
2246 * sllx %reg, 32, %reg
2247 * or %reg, low_bits, %reg
2248 */
2249 if (SPARC_SIMM13_P(low_bits)
2250 && ((int)low_bits > 0))
2251 {
2252 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
2253 return;
2254 }
2255
2256 /* The easiest way when all else fails, is full decomposition. */
2257 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2258 }
2259 #endif /* HOST_BITS_PER_WIDE_INT == 32 */
2260
2261 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
2262 return the mode to be used for the comparison. For floating-point,
2263 CCFP[E]mode is used. CC_NOOVmode should be used when the first operand
2264 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
2265 processing is needed. */
2266
2267 enum machine_mode
2268 select_cc_mode (enum rtx_code op, rtx x, rtx y ATTRIBUTE_UNUSED)
2269 {
2270 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2271 {
2272 switch (op)
2273 {
2274 case EQ:
2275 case NE:
2276 case UNORDERED:
2277 case ORDERED:
2278 case UNLT:
2279 case UNLE:
2280 case UNGT:
2281 case UNGE:
2282 case UNEQ:
2283 case LTGT:
2284 return CCFPmode;
2285
2286 case LT:
2287 case LE:
2288 case GT:
2289 case GE:
2290 return CCFPEmode;
2291
2292 default:
2293 gcc_unreachable ();
2294 }
2295 }
2296 else if (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
2297 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
2298 {
2299 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2300 return CCX_NOOVmode;
2301 else
2302 return CC_NOOVmode;
2303 }
2304 else
2305 {
2306 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2307 return CCXmode;
2308 else
2309 return CCmode;
2310 }
2311 }
2312
2313 /* Emit the compare insn and return the CC reg for a CODE comparison
2314 with operands X and Y. */
2315
2316 static rtx
2317 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
2318 {
2319 enum machine_mode mode;
2320 rtx cc_reg;
2321
2322 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
2323 return x;
2324
2325 mode = SELECT_CC_MODE (code, x, y);
2326
2327 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2328 fcc regs (cse can't tell they're really call clobbered regs and will
2329 remove a duplicate comparison even if there is an intervening function
2330 call - it will then try to reload the cc reg via an int reg which is why
2331 we need the movcc patterns). It is possible to provide the movcc
2332 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
2333 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
2334 to tell cse that CCFPE mode registers (even pseudos) are call
2335 clobbered. */
2336
2337 /* ??? This is an experiment. Rather than making changes to cse which may
2338 or may not be easy/clean, we do our own cse. This is possible because
2339 we will generate hard registers. Cse knows they're call clobbered (it
2340 doesn't know the same thing about pseudos). If we guess wrong, no big
2341 deal, but if we win, great! */
2342
2343 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2344 #if 1 /* experiment */
2345 {
2346 int reg;
2347 /* We cycle through the registers to ensure they're all exercised. */
2348 static int next_fcc_reg = 0;
2349 /* Previous x,y for each fcc reg. */
2350 static rtx prev_args[4][2];
2351
2352 /* Scan prev_args for x,y. */
2353 for (reg = 0; reg < 4; reg++)
2354 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2355 break;
2356 if (reg == 4)
2357 {
2358 reg = next_fcc_reg;
2359 prev_args[reg][0] = x;
2360 prev_args[reg][1] = y;
2361 next_fcc_reg = (next_fcc_reg + 1) & 3;
2362 }
2363 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2364 }
2365 #else
2366 cc_reg = gen_reg_rtx (mode);
2367 #endif /* ! experiment */
2368 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2369 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2370 else
2371 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2372
2373 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
2374 will only result in an unrecognizable insn so no point in asserting. */
2375 emit_insn (gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y)));
2376
2377 return cc_reg;
2378 }
2379
2380
2381 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
2382
2383 rtx
2384 gen_compare_reg (rtx cmp)
2385 {
2386 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
2387 }
2388
2389 /* This function is used for v9 only.
2390 DEST is the target of the Scc insn.
2391 CODE is the code for an Scc's comparison.
2392 X and Y are the values we compare.
2393
2394 This function is needed to turn
2395
2396 (set (reg:SI 110)
2397 (gt (reg:CCX 100 %icc)
2398 (const_int 0)))
2399 into
2400 (set (reg:SI 110)
2401 (gt:DI (reg:CCX 100 %icc)
2402 (const_int 0)))
2403
2404 IE: The instruction recognizer needs to see the mode of the comparison to
2405 find the right instruction. We could use "gt:DI" right in the
2406 define_expand, but leaving it out allows us to handle DI, SI, etc. */
2407
2408 static int
2409 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
2410 {
2411 if (! TARGET_ARCH64
2412 && (GET_MODE (x) == DImode
2413 || GET_MODE (dest) == DImode))
2414 return 0;
2415
2416 /* Try to use the movrCC insns. */
2417 if (TARGET_ARCH64
2418 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
2419 && y == const0_rtx
2420 && v9_regcmp_p (compare_code))
2421 {
2422 rtx op0 = x;
2423 rtx temp;
2424
2425 /* Special case for op0 != 0. This can be done with one instruction if
2426 dest == x. */
2427
2428 if (compare_code == NE
2429 && GET_MODE (dest) == DImode
2430 && rtx_equal_p (op0, dest))
2431 {
2432 emit_insn (gen_rtx_SET (VOIDmode, dest,
2433 gen_rtx_IF_THEN_ELSE (DImode,
2434 gen_rtx_fmt_ee (compare_code, DImode,
2435 op0, const0_rtx),
2436 const1_rtx,
2437 dest)));
2438 return 1;
2439 }
2440
2441 if (reg_overlap_mentioned_p (dest, op0))
2442 {
2443 /* Handle the case where dest == x.
2444 We "early clobber" the result. */
2445 op0 = gen_reg_rtx (GET_MODE (x));
2446 emit_move_insn (op0, x);
2447 }
2448
2449 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2450 if (GET_MODE (op0) != DImode)
2451 {
2452 temp = gen_reg_rtx (DImode);
2453 convert_move (temp, op0, 0);
2454 }
2455 else
2456 temp = op0;
2457 emit_insn (gen_rtx_SET (VOIDmode, dest,
2458 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2459 gen_rtx_fmt_ee (compare_code, DImode,
2460 temp, const0_rtx),
2461 const1_rtx,
2462 dest)));
2463 return 1;
2464 }
2465 else
2466 {
2467 x = gen_compare_reg_1 (compare_code, x, y);
2468 y = const0_rtx;
2469
2470 gcc_assert (GET_MODE (x) != CC_NOOVmode
2471 && GET_MODE (x) != CCX_NOOVmode);
2472
2473 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2474 emit_insn (gen_rtx_SET (VOIDmode, dest,
2475 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2476 gen_rtx_fmt_ee (compare_code,
2477 GET_MODE (x), x, y),
2478 const1_rtx, dest)));
2479 return 1;
2480 }
2481 }
2482
2483
2484 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
2485 without jumps using the addx/subx instructions. */
2486
2487 bool
2488 emit_scc_insn (rtx operands[])
2489 {
2490 rtx tem;
2491 rtx x;
2492 rtx y;
2493 enum rtx_code code;
2494
2495 /* The quad-word fp compare library routines all return nonzero to indicate
2496 true, which is different from the equivalent libgcc routines, so we must
2497 handle them specially here. */
2498 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
2499 {
2500 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
2501 GET_CODE (operands[1]));
2502 operands[2] = XEXP (operands[1], 0);
2503 operands[3] = XEXP (operands[1], 1);
2504 }
2505
2506 code = GET_CODE (operands[1]);
2507 x = operands[2];
2508 y = operands[3];
2509
2510 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
2511 more applications). The exception to this is "reg != 0" which can
2512 be done in one instruction on v9 (so we do it). */
2513 if (code == EQ)
2514 {
2515 if (GET_MODE (x) == SImode)
2516 {
2517 rtx pat = gen_seqsi_special (operands[0], x, y);
2518 emit_insn (pat);
2519 return true;
2520 }
2521 else if (GET_MODE (x) == DImode)
2522 {
2523 rtx pat = gen_seqdi_special (operands[0], x, y);
2524 emit_insn (pat);
2525 return true;
2526 }
2527 }
2528
2529 if (code == NE)
2530 {
2531 if (GET_MODE (x) == SImode)
2532 {
2533 rtx pat = gen_snesi_special (operands[0], x, y);
2534 emit_insn (pat);
2535 return true;
2536 }
2537 else if (GET_MODE (x) == DImode)
2538 {
2539 rtx pat;
2540 if (TARGET_VIS3)
2541 pat = gen_snedi_special_vis3 (operands[0], x, y);
2542 else
2543 pat = gen_snedi_special (operands[0], x, y);
2544 emit_insn (pat);
2545 return true;
2546 }
2547 }
2548
2549 if (TARGET_V9
2550 && TARGET_ARCH64
2551 && GET_MODE (x) == DImode
2552 && !(TARGET_VIS3
2553 && (code == GTU || code == LTU))
2554 && gen_v9_scc (operands[0], code, x, y))
2555 return true;
2556
2557 /* We can do LTU and GEU using the addx/subx instructions too. And
2558 for GTU/LEU, if both operands are registers swap them and fall
2559 back to the easy case. */
2560 if (code == GTU || code == LEU)
2561 {
2562 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
2563 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
2564 {
2565 tem = x;
2566 x = y;
2567 y = tem;
2568 code = swap_condition (code);
2569 }
2570 }
2571
2572 if (code == LTU
2573 || (!TARGET_VIS3 && code == GEU))
2574 {
2575 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2576 gen_rtx_fmt_ee (code, SImode,
2577 gen_compare_reg_1 (code, x, y),
2578 const0_rtx)));
2579 return true;
2580 }
2581
2582 /* All the posibilities to use addx/subx based sequences has been
2583 exhausted, try for a 3 instruction sequence using v9 conditional
2584 moves. */
2585 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
2586 return true;
2587
2588 /* Nope, do branches. */
2589 return false;
2590 }
2591
2592 /* Emit a conditional jump insn for the v9 architecture using comparison code
2593 CODE and jump target LABEL.
2594 This function exists to take advantage of the v9 brxx insns. */
2595
2596 static void
2597 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
2598 {
2599 emit_jump_insn (gen_rtx_SET (VOIDmode,
2600 pc_rtx,
2601 gen_rtx_IF_THEN_ELSE (VOIDmode,
2602 gen_rtx_fmt_ee (code, GET_MODE (op0),
2603 op0, const0_rtx),
2604 gen_rtx_LABEL_REF (VOIDmode, label),
2605 pc_rtx)));
2606 }
2607
2608 void
2609 emit_conditional_branch_insn (rtx operands[])
2610 {
2611 /* The quad-word fp compare library routines all return nonzero to indicate
2612 true, which is different from the equivalent libgcc routines, so we must
2613 handle them specially here. */
2614 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
2615 {
2616 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
2617 GET_CODE (operands[0]));
2618 operands[1] = XEXP (operands[0], 0);
2619 operands[2] = XEXP (operands[0], 1);
2620 }
2621
2622 if (TARGET_ARCH64 && operands[2] == const0_rtx
2623 && GET_CODE (operands[1]) == REG
2624 && GET_MODE (operands[1]) == DImode)
2625 {
2626 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
2627 return;
2628 }
2629
2630 operands[1] = gen_compare_reg (operands[0]);
2631 operands[2] = const0_rtx;
2632 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
2633 operands[1], operands[2]);
2634 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
2635 operands[3]));
2636 }
2637
2638
2639 /* Generate a DFmode part of a hard TFmode register.
2640 REG is the TFmode hard register, LOW is 1 for the
2641 low 64bit of the register and 0 otherwise.
2642 */
2643 rtx
2644 gen_df_reg (rtx reg, int low)
2645 {
2646 int regno = REGNO (reg);
2647
2648 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
2649 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
2650 return gen_rtx_REG (DFmode, regno);
2651 }
2652 \f
2653 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
2654 Unlike normal calls, TFmode operands are passed by reference. It is
2655 assumed that no more than 3 operands are required. */
2656
2657 static void
2658 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
2659 {
2660 rtx ret_slot = NULL, arg[3], func_sym;
2661 int i;
2662
2663 /* We only expect to be called for conversions, unary, and binary ops. */
2664 gcc_assert (nargs == 2 || nargs == 3);
2665
2666 for (i = 0; i < nargs; ++i)
2667 {
2668 rtx this_arg = operands[i];
2669 rtx this_slot;
2670
2671 /* TFmode arguments and return values are passed by reference. */
2672 if (GET_MODE (this_arg) == TFmode)
2673 {
2674 int force_stack_temp;
2675
2676 force_stack_temp = 0;
2677 if (TARGET_BUGGY_QP_LIB && i == 0)
2678 force_stack_temp = 1;
2679
2680 if (GET_CODE (this_arg) == MEM
2681 && ! force_stack_temp)
2682 this_arg = XEXP (this_arg, 0);
2683 else if (CONSTANT_P (this_arg)
2684 && ! force_stack_temp)
2685 {
2686 this_slot = force_const_mem (TFmode, this_arg);
2687 this_arg = XEXP (this_slot, 0);
2688 }
2689 else
2690 {
2691 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode), 0);
2692
2693 /* Operand 0 is the return value. We'll copy it out later. */
2694 if (i > 0)
2695 emit_move_insn (this_slot, this_arg);
2696 else
2697 ret_slot = this_slot;
2698
2699 this_arg = XEXP (this_slot, 0);
2700 }
2701 }
2702
2703 arg[i] = this_arg;
2704 }
2705
2706 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
2707
2708 if (GET_MODE (operands[0]) == TFmode)
2709 {
2710 if (nargs == 2)
2711 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2,
2712 arg[0], GET_MODE (arg[0]),
2713 arg[1], GET_MODE (arg[1]));
2714 else
2715 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3,
2716 arg[0], GET_MODE (arg[0]),
2717 arg[1], GET_MODE (arg[1]),
2718 arg[2], GET_MODE (arg[2]));
2719
2720 if (ret_slot)
2721 emit_move_insn (operands[0], ret_slot);
2722 }
2723 else
2724 {
2725 rtx ret;
2726
2727 gcc_assert (nargs == 2);
2728
2729 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
2730 GET_MODE (operands[0]), 1,
2731 arg[1], GET_MODE (arg[1]));
2732
2733 if (ret != operands[0])
2734 emit_move_insn (operands[0], ret);
2735 }
2736 }
2737
2738 /* Expand soft-float TFmode calls to sparc abi routines. */
2739
2740 static void
2741 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
2742 {
2743 const char *func;
2744
2745 switch (code)
2746 {
2747 case PLUS:
2748 func = "_Qp_add";
2749 break;
2750 case MINUS:
2751 func = "_Qp_sub";
2752 break;
2753 case MULT:
2754 func = "_Qp_mul";
2755 break;
2756 case DIV:
2757 func = "_Qp_div";
2758 break;
2759 default:
2760 gcc_unreachable ();
2761 }
2762
2763 emit_soft_tfmode_libcall (func, 3, operands);
2764 }
2765
2766 static void
2767 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
2768 {
2769 const char *func;
2770
2771 gcc_assert (code == SQRT);
2772 func = "_Qp_sqrt";
2773
2774 emit_soft_tfmode_libcall (func, 2, operands);
2775 }
2776
2777 static void
2778 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
2779 {
2780 const char *func;
2781
2782 switch (code)
2783 {
2784 case FLOAT_EXTEND:
2785 switch (GET_MODE (operands[1]))
2786 {
2787 case SFmode:
2788 func = "_Qp_stoq";
2789 break;
2790 case DFmode:
2791 func = "_Qp_dtoq";
2792 break;
2793 default:
2794 gcc_unreachable ();
2795 }
2796 break;
2797
2798 case FLOAT_TRUNCATE:
2799 switch (GET_MODE (operands[0]))
2800 {
2801 case SFmode:
2802 func = "_Qp_qtos";
2803 break;
2804 case DFmode:
2805 func = "_Qp_qtod";
2806 break;
2807 default:
2808 gcc_unreachable ();
2809 }
2810 break;
2811
2812 case FLOAT:
2813 switch (GET_MODE (operands[1]))
2814 {
2815 case SImode:
2816 func = "_Qp_itoq";
2817 if (TARGET_ARCH64)
2818 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
2819 break;
2820 case DImode:
2821 func = "_Qp_xtoq";
2822 break;
2823 default:
2824 gcc_unreachable ();
2825 }
2826 break;
2827
2828 case UNSIGNED_FLOAT:
2829 switch (GET_MODE (operands[1]))
2830 {
2831 case SImode:
2832 func = "_Qp_uitoq";
2833 if (TARGET_ARCH64)
2834 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
2835 break;
2836 case DImode:
2837 func = "_Qp_uxtoq";
2838 break;
2839 default:
2840 gcc_unreachable ();
2841 }
2842 break;
2843
2844 case FIX:
2845 switch (GET_MODE (operands[0]))
2846 {
2847 case SImode:
2848 func = "_Qp_qtoi";
2849 break;
2850 case DImode:
2851 func = "_Qp_qtox";
2852 break;
2853 default:
2854 gcc_unreachable ();
2855 }
2856 break;
2857
2858 case UNSIGNED_FIX:
2859 switch (GET_MODE (operands[0]))
2860 {
2861 case SImode:
2862 func = "_Qp_qtoui";
2863 break;
2864 case DImode:
2865 func = "_Qp_qtoux";
2866 break;
2867 default:
2868 gcc_unreachable ();
2869 }
2870 break;
2871
2872 default:
2873 gcc_unreachable ();
2874 }
2875
2876 emit_soft_tfmode_libcall (func, 2, operands);
2877 }
2878
2879 /* Expand a hard-float tfmode operation. All arguments must be in
2880 registers. */
2881
2882 static void
2883 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
2884 {
2885 rtx op, dest;
2886
2887 if (GET_RTX_CLASS (code) == RTX_UNARY)
2888 {
2889 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
2890 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
2891 }
2892 else
2893 {
2894 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
2895 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
2896 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
2897 operands[1], operands[2]);
2898 }
2899
2900 if (register_operand (operands[0], VOIDmode))
2901 dest = operands[0];
2902 else
2903 dest = gen_reg_rtx (GET_MODE (operands[0]));
2904
2905 emit_insn (gen_rtx_SET (VOIDmode, dest, op));
2906
2907 if (dest != operands[0])
2908 emit_move_insn (operands[0], dest);
2909 }
2910
2911 void
2912 emit_tfmode_binop (enum rtx_code code, rtx *operands)
2913 {
2914 if (TARGET_HARD_QUAD)
2915 emit_hard_tfmode_operation (code, operands);
2916 else
2917 emit_soft_tfmode_binop (code, operands);
2918 }
2919
2920 void
2921 emit_tfmode_unop (enum rtx_code code, rtx *operands)
2922 {
2923 if (TARGET_HARD_QUAD)
2924 emit_hard_tfmode_operation (code, operands);
2925 else
2926 emit_soft_tfmode_unop (code, operands);
2927 }
2928
2929 void
2930 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
2931 {
2932 if (TARGET_HARD_QUAD)
2933 emit_hard_tfmode_operation (code, operands);
2934 else
2935 emit_soft_tfmode_cvt (code, operands);
2936 }
2937 \f
2938 /* Return nonzero if a branch/jump/call instruction will be emitting
2939 nop into its delay slot. */
2940
2941 int
2942 empty_delay_slot (rtx insn)
2943 {
2944 rtx seq;
2945
2946 /* If no previous instruction (should not happen), return true. */
2947 if (PREV_INSN (insn) == NULL)
2948 return 1;
2949
2950 seq = NEXT_INSN (PREV_INSN (insn));
2951 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
2952 return 0;
2953
2954 return 1;
2955 }
2956
2957 /* Return nonzero if TRIAL can go into the call delay slot. */
2958
2959 int
2960 tls_call_delay (rtx trial)
2961 {
2962 rtx pat;
2963
2964 /* Binutils allows
2965 call __tls_get_addr, %tgd_call (foo)
2966 add %l7, %o0, %o0, %tgd_add (foo)
2967 while Sun as/ld does not. */
2968 if (TARGET_GNU_TLS || !TARGET_TLS)
2969 return 1;
2970
2971 pat = PATTERN (trial);
2972
2973 /* We must reject tgd_add{32|64}, i.e.
2974 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
2975 and tldm_add{32|64}, i.e.
2976 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
2977 for Sun as/ld. */
2978 if (GET_CODE (pat) == SET
2979 && GET_CODE (SET_SRC (pat)) == PLUS)
2980 {
2981 rtx unspec = XEXP (SET_SRC (pat), 1);
2982
2983 if (GET_CODE (unspec) == UNSPEC
2984 && (XINT (unspec, 1) == UNSPEC_TLSGD
2985 || XINT (unspec, 1) == UNSPEC_TLSLDM))
2986 return 0;
2987 }
2988
2989 return 1;
2990 }
2991
2992 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
2993 instruction. RETURN_P is true if the v9 variant 'return' is to be
2994 considered in the test too.
2995
2996 TRIAL must be a SET whose destination is a REG appropriate for the
2997 'restore' instruction or, if RETURN_P is true, for the 'return'
2998 instruction. */
2999
3000 static int
3001 eligible_for_restore_insn (rtx trial, bool return_p)
3002 {
3003 rtx pat = PATTERN (trial);
3004 rtx src = SET_SRC (pat);
3005 bool src_is_freg = false;
3006 rtx src_reg;
3007
3008 /* Since we now can do moves between float and integer registers when
3009 VIS3 is enabled, we have to catch this case. We can allow such
3010 moves when doing a 'return' however. */
3011 src_reg = src;
3012 if (GET_CODE (src_reg) == SUBREG)
3013 src_reg = SUBREG_REG (src_reg);
3014 if (GET_CODE (src_reg) == REG
3015 && SPARC_FP_REG_P (REGNO (src_reg)))
3016 src_is_freg = true;
3017
3018 /* The 'restore src,%g0,dest' pattern for word mode and below. */
3019 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3020 && arith_operand (src, GET_MODE (src))
3021 && ! src_is_freg)
3022 {
3023 if (TARGET_ARCH64)
3024 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3025 else
3026 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
3027 }
3028
3029 /* The 'restore src,%g0,dest' pattern for double-word mode. */
3030 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3031 && arith_double_operand (src, GET_MODE (src))
3032 && ! src_is_freg)
3033 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3034
3035 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
3036 else if (! TARGET_FPU && register_operand (src, SFmode))
3037 return 1;
3038
3039 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
3040 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
3041 return 1;
3042
3043 /* If we have the 'return' instruction, anything that does not use
3044 local or output registers and can go into a delay slot wins. */
3045 else if (return_p
3046 && TARGET_V9
3047 && !epilogue_renumber (&pat, 1)
3048 && get_attr_in_uncond_branch_delay (trial)
3049 == IN_UNCOND_BRANCH_DELAY_TRUE)
3050 return 1;
3051
3052 /* The 'restore src1,src2,dest' pattern for SImode. */
3053 else if (GET_CODE (src) == PLUS
3054 && register_operand (XEXP (src, 0), SImode)
3055 && arith_operand (XEXP (src, 1), SImode))
3056 return 1;
3057
3058 /* The 'restore src1,src2,dest' pattern for DImode. */
3059 else if (GET_CODE (src) == PLUS
3060 && register_operand (XEXP (src, 0), DImode)
3061 && arith_double_operand (XEXP (src, 1), DImode))
3062 return 1;
3063
3064 /* The 'restore src1,%lo(src2),dest' pattern. */
3065 else if (GET_CODE (src) == LO_SUM
3066 && ! TARGET_CM_MEDMID
3067 && ((register_operand (XEXP (src, 0), SImode)
3068 && immediate_operand (XEXP (src, 1), SImode))
3069 || (TARGET_ARCH64
3070 && register_operand (XEXP (src, 0), DImode)
3071 && immediate_operand (XEXP (src, 1), DImode))))
3072 return 1;
3073
3074 /* The 'restore src,src,dest' pattern. */
3075 else if (GET_CODE (src) == ASHIFT
3076 && (register_operand (XEXP (src, 0), SImode)
3077 || register_operand (XEXP (src, 0), DImode))
3078 && XEXP (src, 1) == const1_rtx)
3079 return 1;
3080
3081 return 0;
3082 }
3083
3084 /* Return nonzero if TRIAL can go into the function return's delay slot. */
3085
3086 int
3087 eligible_for_return_delay (rtx trial)
3088 {
3089 int regno;
3090 rtx pat;
3091
3092 if (GET_CODE (trial) != INSN)
3093 return 0;
3094
3095 if (get_attr_length (trial) != 1)
3096 return 0;
3097
3098 /* If the function uses __builtin_eh_return, the eh_return machinery
3099 occupies the delay slot. */
3100 if (crtl->calls_eh_return)
3101 return 0;
3102
3103 /* In the case of a leaf or flat function, anything can go into the slot. */
3104 if (sparc_leaf_function_p || TARGET_FLAT)
3105 return
3106 get_attr_in_uncond_branch_delay (trial) == IN_UNCOND_BRANCH_DELAY_TRUE;
3107
3108 pat = PATTERN (trial);
3109 if (GET_CODE (pat) == PARALLEL)
3110 {
3111 int i;
3112
3113 if (! TARGET_V9)
3114 return 0;
3115 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
3116 {
3117 rtx expr = XVECEXP (pat, 0, i);
3118 if (GET_CODE (expr) != SET)
3119 return 0;
3120 if (GET_CODE (SET_DEST (expr)) != REG)
3121 return 0;
3122 regno = REGNO (SET_DEST (expr));
3123 if (regno >= 8 && regno < 24)
3124 return 0;
3125 }
3126 return !epilogue_renumber (&pat, 1)
3127 && (get_attr_in_uncond_branch_delay (trial)
3128 == IN_UNCOND_BRANCH_DELAY_TRUE);
3129 }
3130
3131 if (GET_CODE (pat) != SET)
3132 return 0;
3133
3134 if (GET_CODE (SET_DEST (pat)) != REG)
3135 return 0;
3136
3137 regno = REGNO (SET_DEST (pat));
3138
3139 /* Otherwise, only operations which can be done in tandem with
3140 a `restore' or `return' insn can go into the delay slot. */
3141 if (regno >= 8 && regno < 24)
3142 return 0;
3143
3144 /* If this instruction sets up floating point register and we have a return
3145 instruction, it can probably go in. But restore will not work
3146 with FP_REGS. */
3147 if (! SPARC_INT_REG_P (regno))
3148 return (TARGET_V9
3149 && !epilogue_renumber (&pat, 1)
3150 && get_attr_in_uncond_branch_delay (trial)
3151 == IN_UNCOND_BRANCH_DELAY_TRUE);
3152
3153 return eligible_for_restore_insn (trial, true);
3154 }
3155
3156 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
3157
3158 int
3159 eligible_for_sibcall_delay (rtx trial)
3160 {
3161 rtx pat;
3162
3163 if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET)
3164 return 0;
3165
3166 if (get_attr_length (trial) != 1)
3167 return 0;
3168
3169 pat = PATTERN (trial);
3170
3171 if (sparc_leaf_function_p || TARGET_FLAT)
3172 {
3173 /* If the tail call is done using the call instruction,
3174 we have to restore %o7 in the delay slot. */
3175 if (LEAF_SIBCALL_SLOT_RESERVED_P)
3176 return 0;
3177
3178 /* %g1 is used to build the function address */
3179 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
3180 return 0;
3181
3182 return 1;
3183 }
3184
3185 /* Otherwise, only operations which can be done in tandem with
3186 a `restore' insn can go into the delay slot. */
3187 if (GET_CODE (SET_DEST (pat)) != REG
3188 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
3189 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
3190 return 0;
3191
3192 /* If it mentions %o7, it can't go in, because sibcall will clobber it
3193 in most cases. */
3194 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
3195 return 0;
3196
3197 return eligible_for_restore_insn (trial, false);
3198 }
3199 \f
3200 /* Determine if it's legal to put X into the constant pool. This
3201 is not possible if X contains the address of a symbol that is
3202 not constant (TLS) or not known at final link time (PIC). */
3203
3204 static bool
3205 sparc_cannot_force_const_mem (enum machine_mode mode, rtx x)
3206 {
3207 switch (GET_CODE (x))
3208 {
3209 case CONST_INT:
3210 case CONST_DOUBLE:
3211 case CONST_VECTOR:
3212 /* Accept all non-symbolic constants. */
3213 return false;
3214
3215 case LABEL_REF:
3216 /* Labels are OK iff we are non-PIC. */
3217 return flag_pic != 0;
3218
3219 case SYMBOL_REF:
3220 /* 'Naked' TLS symbol references are never OK,
3221 non-TLS symbols are OK iff we are non-PIC. */
3222 if (SYMBOL_REF_TLS_MODEL (x))
3223 return true;
3224 else
3225 return flag_pic != 0;
3226
3227 case CONST:
3228 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
3229 case PLUS:
3230 case MINUS:
3231 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
3232 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
3233 case UNSPEC:
3234 return true;
3235 default:
3236 gcc_unreachable ();
3237 }
3238 }
3239 \f
3240 /* Global Offset Table support. */
3241 static GTY(()) rtx got_helper_rtx = NULL_RTX;
3242 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
3243
3244 /* Return the SYMBOL_REF for the Global Offset Table. */
3245
3246 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
3247
3248 static rtx
3249 sparc_got (void)
3250 {
3251 if (!sparc_got_symbol)
3252 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3253
3254 return sparc_got_symbol;
3255 }
3256
3257 /* Ensure that we are not using patterns that are not OK with PIC. */
3258
3259 int
3260 check_pic (int i)
3261 {
3262 rtx op;
3263
3264 switch (flag_pic)
3265 {
3266 case 1:
3267 op = recog_data.operand[i];
3268 gcc_assert (GET_CODE (op) != SYMBOL_REF
3269 && (GET_CODE (op) != CONST
3270 || (GET_CODE (XEXP (op, 0)) == MINUS
3271 && XEXP (XEXP (op, 0), 0) == sparc_got ()
3272 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
3273 case 2:
3274 default:
3275 return 1;
3276 }
3277 }
3278
3279 /* Return true if X is an address which needs a temporary register when
3280 reloaded while generating PIC code. */
3281
3282 int
3283 pic_address_needs_scratch (rtx x)
3284 {
3285 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
3286 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
3287 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
3288 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3289 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
3290 return 1;
3291
3292 return 0;
3293 }
3294
3295 /* Determine if a given RTX is a valid constant. We already know this
3296 satisfies CONSTANT_P. */
3297
3298 static bool
3299 sparc_legitimate_constant_p (enum machine_mode mode, rtx x)
3300 {
3301 switch (GET_CODE (x))
3302 {
3303 case CONST:
3304 case SYMBOL_REF:
3305 if (sparc_tls_referenced_p (x))
3306 return false;
3307 break;
3308
3309 case CONST_DOUBLE:
3310 if (GET_MODE (x) == VOIDmode)
3311 return true;
3312
3313 /* Floating point constants are generally not ok.
3314 The only exception is 0.0 and all-ones in VIS. */
3315 if (TARGET_VIS
3316 && SCALAR_FLOAT_MODE_P (mode)
3317 && (const_zero_operand (x, mode)
3318 || const_all_ones_operand (x, mode)))
3319 return true;
3320
3321 return false;
3322
3323 case CONST_VECTOR:
3324 /* Vector constants are generally not ok.
3325 The only exception is 0 or -1 in VIS. */
3326 if (TARGET_VIS
3327 && (const_zero_operand (x, mode)
3328 || const_all_ones_operand (x, mode)))
3329 return true;
3330
3331 return false;
3332
3333 default:
3334 break;
3335 }
3336
3337 return true;
3338 }
3339
3340 /* Determine if a given RTX is a valid constant address. */
3341
3342 bool
3343 constant_address_p (rtx x)
3344 {
3345 switch (GET_CODE (x))
3346 {
3347 case LABEL_REF:
3348 case CONST_INT:
3349 case HIGH:
3350 return true;
3351
3352 case CONST:
3353 if (flag_pic && pic_address_needs_scratch (x))
3354 return false;
3355 return sparc_legitimate_constant_p (Pmode, x);
3356
3357 case SYMBOL_REF:
3358 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
3359
3360 default:
3361 return false;
3362 }
3363 }
3364
3365 /* Nonzero if the constant value X is a legitimate general operand
3366 when generating PIC code. It is given that flag_pic is on and
3367 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
3368
3369 bool
3370 legitimate_pic_operand_p (rtx x)
3371 {
3372 if (pic_address_needs_scratch (x))
3373 return false;
3374 if (sparc_tls_referenced_p (x))
3375 return false;
3376 return true;
3377 }
3378
3379 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
3380 (CONST_INT_P (X) \
3381 && INTVAL (X) >= -0x1000 \
3382 && INTVAL (X) < (0x1000 - GET_MODE_SIZE (MODE)))
3383
3384 #define RTX_OK_FOR_OLO10_P(X, MODE) \
3385 (CONST_INT_P (X) \
3386 && INTVAL (X) >= -0x1000 \
3387 && INTVAL (X) < (0xc00 - GET_MODE_SIZE (MODE)))
3388
3389 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
3390
3391 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
3392 ordinarily. This changes a bit when generating PIC. */
3393
3394 static bool
3395 sparc_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict)
3396 {
3397 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
3398
3399 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
3400 rs1 = addr;
3401 else if (GET_CODE (addr) == PLUS)
3402 {
3403 rs1 = XEXP (addr, 0);
3404 rs2 = XEXP (addr, 1);
3405
3406 /* Canonicalize. REG comes first, if there are no regs,
3407 LO_SUM comes first. */
3408 if (!REG_P (rs1)
3409 && GET_CODE (rs1) != SUBREG
3410 && (REG_P (rs2)
3411 || GET_CODE (rs2) == SUBREG
3412 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
3413 {
3414 rs1 = XEXP (addr, 1);
3415 rs2 = XEXP (addr, 0);
3416 }
3417
3418 if ((flag_pic == 1
3419 && rs1 == pic_offset_table_rtx
3420 && !REG_P (rs2)
3421 && GET_CODE (rs2) != SUBREG
3422 && GET_CODE (rs2) != LO_SUM
3423 && GET_CODE (rs2) != MEM
3424 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
3425 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
3426 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
3427 || ((REG_P (rs1)
3428 || GET_CODE (rs1) == SUBREG)
3429 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
3430 {
3431 imm1 = rs2;
3432 rs2 = NULL;
3433 }
3434 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
3435 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
3436 {
3437 /* We prohibit REG + REG for TFmode when there are no quad move insns
3438 and we consequently need to split. We do this because REG+REG
3439 is not an offsettable address. If we get the situation in reload
3440 where source and destination of a movtf pattern are both MEMs with
3441 REG+REG address, then only one of them gets converted to an
3442 offsettable address. */
3443 if (mode == TFmode
3444 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
3445 return 0;
3446
3447 /* We prohibit REG + REG on ARCH32 if not optimizing for
3448 DFmode/DImode because then mem_min_alignment is likely to be zero
3449 after reload and the forced split would lack a matching splitter
3450 pattern. */
3451 if (TARGET_ARCH32 && !optimize
3452 && (mode == DFmode || mode == DImode))
3453 return 0;
3454 }
3455 else if (USE_AS_OFFSETABLE_LO10
3456 && GET_CODE (rs1) == LO_SUM
3457 && TARGET_ARCH64
3458 && ! TARGET_CM_MEDMID
3459 && RTX_OK_FOR_OLO10_P (rs2, mode))
3460 {
3461 rs2 = NULL;
3462 imm1 = XEXP (rs1, 1);
3463 rs1 = XEXP (rs1, 0);
3464 if (!CONSTANT_P (imm1)
3465 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
3466 return 0;
3467 }
3468 }
3469 else if (GET_CODE (addr) == LO_SUM)
3470 {
3471 rs1 = XEXP (addr, 0);
3472 imm1 = XEXP (addr, 1);
3473
3474 if (!CONSTANT_P (imm1)
3475 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
3476 return 0;
3477
3478 /* We can't allow TFmode in 32-bit mode, because an offset greater
3479 than the alignment (8) may cause the LO_SUM to overflow. */
3480 if (mode == TFmode && TARGET_ARCH32)
3481 return 0;
3482 }
3483 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
3484 return 1;
3485 else
3486 return 0;
3487
3488 if (GET_CODE (rs1) == SUBREG)
3489 rs1 = SUBREG_REG (rs1);
3490 if (!REG_P (rs1))
3491 return 0;
3492
3493 if (rs2)
3494 {
3495 if (GET_CODE (rs2) == SUBREG)
3496 rs2 = SUBREG_REG (rs2);
3497 if (!REG_P (rs2))
3498 return 0;
3499 }
3500
3501 if (strict)
3502 {
3503 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
3504 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
3505 return 0;
3506 }
3507 else
3508 {
3509 if ((! SPARC_INT_REG_P (REGNO (rs1))
3510 && REGNO (rs1) != FRAME_POINTER_REGNUM
3511 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
3512 || (rs2
3513 && (! SPARC_INT_REG_P (REGNO (rs2))
3514 && REGNO (rs2) != FRAME_POINTER_REGNUM
3515 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
3516 return 0;
3517 }
3518 return 1;
3519 }
3520
3521 /* Return the SYMBOL_REF for the tls_get_addr function. */
3522
3523 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
3524
3525 static rtx
3526 sparc_tls_get_addr (void)
3527 {
3528 if (!sparc_tls_symbol)
3529 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
3530
3531 return sparc_tls_symbol;
3532 }
3533
3534 /* Return the Global Offset Table to be used in TLS mode. */
3535
3536 static rtx
3537 sparc_tls_got (void)
3538 {
3539 /* In PIC mode, this is just the PIC offset table. */
3540 if (flag_pic)
3541 {
3542 crtl->uses_pic_offset_table = 1;
3543 return pic_offset_table_rtx;
3544 }
3545
3546 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
3547 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
3548 if (TARGET_SUN_TLS && TARGET_ARCH32)
3549 {
3550 load_got_register ();
3551 return global_offset_table_rtx;
3552 }
3553
3554 /* In all other cases, we load a new pseudo with the GOT symbol. */
3555 return copy_to_reg (sparc_got ());
3556 }
3557
3558 /* Return true if X contains a thread-local symbol. */
3559
3560 static bool
3561 sparc_tls_referenced_p (rtx x)
3562 {
3563 if (!TARGET_HAVE_TLS)
3564 return false;
3565
3566 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3567 x = XEXP (XEXP (x, 0), 0);
3568
3569 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
3570 return true;
3571
3572 /* That's all we handle in sparc_legitimize_tls_address for now. */
3573 return false;
3574 }
3575
3576 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
3577 this (thread-local) address. */
3578
3579 static rtx
3580 sparc_legitimize_tls_address (rtx addr)
3581 {
3582 rtx temp1, temp2, temp3, ret, o0, got, insn;
3583
3584 gcc_assert (can_create_pseudo_p ());
3585
3586 if (GET_CODE (addr) == SYMBOL_REF)
3587 switch (SYMBOL_REF_TLS_MODEL (addr))
3588 {
3589 case TLS_MODEL_GLOBAL_DYNAMIC:
3590 start_sequence ();
3591 temp1 = gen_reg_rtx (SImode);
3592 temp2 = gen_reg_rtx (SImode);
3593 ret = gen_reg_rtx (Pmode);
3594 o0 = gen_rtx_REG (Pmode, 8);
3595 got = sparc_tls_got ();
3596 emit_insn (gen_tgd_hi22 (temp1, addr));
3597 emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
3598 if (TARGET_ARCH32)
3599 {
3600 emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
3601 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
3602 addr, const1_rtx));
3603 }
3604 else
3605 {
3606 emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
3607 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
3608 addr, const1_rtx));
3609 }
3610 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
3611 insn = get_insns ();
3612 end_sequence ();
3613 emit_libcall_block (insn, ret, o0, addr);
3614 break;
3615
3616 case TLS_MODEL_LOCAL_DYNAMIC:
3617 start_sequence ();
3618 temp1 = gen_reg_rtx (SImode);
3619 temp2 = gen_reg_rtx (SImode);
3620 temp3 = gen_reg_rtx (Pmode);
3621 ret = gen_reg_rtx (Pmode);
3622 o0 = gen_rtx_REG (Pmode, 8);
3623 got = sparc_tls_got ();
3624 emit_insn (gen_tldm_hi22 (temp1));
3625 emit_insn (gen_tldm_lo10 (temp2, temp1));
3626 if (TARGET_ARCH32)
3627 {
3628 emit_insn (gen_tldm_add32 (o0, got, temp2));
3629 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
3630 const1_rtx));
3631 }
3632 else
3633 {
3634 emit_insn (gen_tldm_add64 (o0, got, temp2));
3635 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
3636 const1_rtx));
3637 }
3638 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
3639 insn = get_insns ();
3640 end_sequence ();
3641 emit_libcall_block (insn, temp3, o0,
3642 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
3643 UNSPEC_TLSLD_BASE));
3644 temp1 = gen_reg_rtx (SImode);
3645 temp2 = gen_reg_rtx (SImode);
3646 emit_insn (gen_tldo_hix22 (temp1, addr));
3647 emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
3648 if (TARGET_ARCH32)
3649 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
3650 else
3651 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
3652 break;
3653
3654 case TLS_MODEL_INITIAL_EXEC:
3655 temp1 = gen_reg_rtx (SImode);
3656 temp2 = gen_reg_rtx (SImode);
3657 temp3 = gen_reg_rtx (Pmode);
3658 got = sparc_tls_got ();
3659 emit_insn (gen_tie_hi22 (temp1, addr));
3660 emit_insn (gen_tie_lo10 (temp2, temp1, addr));
3661 if (TARGET_ARCH32)
3662 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
3663 else
3664 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
3665 if (TARGET_SUN_TLS)
3666 {
3667 ret = gen_reg_rtx (Pmode);
3668 if (TARGET_ARCH32)
3669 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
3670 temp3, addr));
3671 else
3672 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
3673 temp3, addr));
3674 }
3675 else
3676 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
3677 break;
3678
3679 case TLS_MODEL_LOCAL_EXEC:
3680 temp1 = gen_reg_rtx (Pmode);
3681 temp2 = gen_reg_rtx (Pmode);
3682 if (TARGET_ARCH32)
3683 {
3684 emit_insn (gen_tle_hix22_sp32 (temp1, addr));
3685 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
3686 }
3687 else
3688 {
3689 emit_insn (gen_tle_hix22_sp64 (temp1, addr));
3690 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
3691 }
3692 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
3693 break;
3694
3695 default:
3696 gcc_unreachable ();
3697 }
3698
3699 else if (GET_CODE (addr) == CONST)
3700 {
3701 rtx base, offset;
3702
3703 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
3704
3705 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
3706 offset = XEXP (XEXP (addr, 0), 1);
3707
3708 base = force_operand (base, NULL_RTX);
3709 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
3710 offset = force_reg (Pmode, offset);
3711 ret = gen_rtx_PLUS (Pmode, base, offset);
3712 }
3713
3714 else
3715 gcc_unreachable (); /* for now ... */
3716
3717 return ret;
3718 }
3719
3720 /* Legitimize PIC addresses. If the address is already position-independent,
3721 we return ORIG. Newly generated position-independent addresses go into a
3722 reg. This is REG if nonzero, otherwise we allocate register(s) as
3723 necessary. */
3724
3725 static rtx
3726 sparc_legitimize_pic_address (rtx orig, rtx reg)
3727 {
3728 bool gotdata_op = false;
3729
3730 if (GET_CODE (orig) == SYMBOL_REF
3731 /* See the comment in sparc_expand_move. */
3732 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
3733 {
3734 rtx pic_ref, address;
3735 rtx insn;
3736
3737 if (reg == 0)
3738 {
3739 gcc_assert (can_create_pseudo_p ());
3740 reg = gen_reg_rtx (Pmode);
3741 }
3742
3743 if (flag_pic == 2)
3744 {
3745 /* If not during reload, allocate another temp reg here for loading
3746 in the address, so that these instructions can be optimized
3747 properly. */
3748 rtx temp_reg = (! can_create_pseudo_p ()
3749 ? reg : gen_reg_rtx (Pmode));
3750
3751 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
3752 won't get confused into thinking that these two instructions
3753 are loading in the true address of the symbol. If in the
3754 future a PIC rtx exists, that should be used instead. */
3755 if (TARGET_ARCH64)
3756 {
3757 emit_insn (gen_movdi_high_pic (temp_reg, orig));
3758 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
3759 }
3760 else
3761 {
3762 emit_insn (gen_movsi_high_pic (temp_reg, orig));
3763 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
3764 }
3765 address = temp_reg;
3766 gotdata_op = true;
3767 }
3768 else
3769 address = orig;
3770
3771 crtl->uses_pic_offset_table = 1;
3772 if (gotdata_op)
3773 {
3774 if (TARGET_ARCH64)
3775 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
3776 pic_offset_table_rtx,
3777 address, orig));
3778 else
3779 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
3780 pic_offset_table_rtx,
3781 address, orig));
3782 }
3783 else
3784 {
3785 pic_ref
3786 = gen_const_mem (Pmode,
3787 gen_rtx_PLUS (Pmode,
3788 pic_offset_table_rtx, address));
3789 insn = emit_move_insn (reg, pic_ref);
3790 }
3791
3792 /* Put a REG_EQUAL note on this insn, so that it can be optimized
3793 by loop. */
3794 set_unique_reg_note (insn, REG_EQUAL, orig);
3795 return reg;
3796 }
3797 else if (GET_CODE (orig) == CONST)
3798 {
3799 rtx base, offset;
3800
3801 if (GET_CODE (XEXP (orig, 0)) == PLUS
3802 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
3803 return orig;
3804
3805 if (reg == 0)
3806 {
3807 gcc_assert (can_create_pseudo_p ());
3808 reg = gen_reg_rtx (Pmode);
3809 }
3810
3811 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
3812 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
3813 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
3814 base == reg ? NULL_RTX : reg);
3815
3816 if (GET_CODE (offset) == CONST_INT)
3817 {
3818 if (SMALL_INT (offset))
3819 return plus_constant (base, INTVAL (offset));
3820 else if (can_create_pseudo_p ())
3821 offset = force_reg (Pmode, offset);
3822 else
3823 /* If we reach here, then something is seriously wrong. */
3824 gcc_unreachable ();
3825 }
3826 return gen_rtx_PLUS (Pmode, base, offset);
3827 }
3828 else if (GET_CODE (orig) == LABEL_REF)
3829 /* ??? We ought to be checking that the register is live instead, in case
3830 it is eliminated. */
3831 crtl->uses_pic_offset_table = 1;
3832
3833 return orig;
3834 }
3835
3836 /* Try machine-dependent ways of modifying an illegitimate address X
3837 to be legitimate. If we find one, return the new, valid address.
3838
3839 OLDX is the address as it was before break_out_memory_refs was called.
3840 In some cases it is useful to look at this to decide what needs to be done.
3841
3842 MODE is the mode of the operand pointed to by X.
3843
3844 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
3845
3846 static rtx
3847 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3848 enum machine_mode mode)
3849 {
3850 rtx orig_x = x;
3851
3852 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
3853 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
3854 force_operand (XEXP (x, 0), NULL_RTX));
3855 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
3856 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
3857 force_operand (XEXP (x, 1), NULL_RTX));
3858 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
3859 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
3860 XEXP (x, 1));
3861 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
3862 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
3863 force_operand (XEXP (x, 1), NULL_RTX));
3864
3865 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
3866 return x;
3867
3868 if (sparc_tls_referenced_p (x))
3869 x = sparc_legitimize_tls_address (x);
3870 else if (flag_pic)
3871 x = sparc_legitimize_pic_address (x, NULL_RTX);
3872 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
3873 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
3874 copy_to_mode_reg (Pmode, XEXP (x, 1)));
3875 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
3876 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
3877 copy_to_mode_reg (Pmode, XEXP (x, 0)));
3878 else if (GET_CODE (x) == SYMBOL_REF
3879 || GET_CODE (x) == CONST
3880 || GET_CODE (x) == LABEL_REF)
3881 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
3882
3883 return x;
3884 }
3885
3886 /* Delegitimize an address that was legitimized by the above function. */
3887
3888 static rtx
3889 sparc_delegitimize_address (rtx x)
3890 {
3891 x = delegitimize_mem_from_attrs (x);
3892
3893 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
3894 switch (XINT (XEXP (x, 1), 1))
3895 {
3896 case UNSPEC_MOVE_PIC:
3897 case UNSPEC_TLSLE:
3898 x = XVECEXP (XEXP (x, 1), 0, 0);
3899 gcc_assert (GET_CODE (x) == SYMBOL_REF);
3900 break;
3901 default:
3902 break;
3903 }
3904
3905 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
3906 if (GET_CODE (x) == MINUS
3907 && REG_P (XEXP (x, 0))
3908 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
3909 && GET_CODE (XEXP (x, 1)) == LO_SUM
3910 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
3911 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
3912 {
3913 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
3914 gcc_assert (GET_CODE (x) == LABEL_REF);
3915 }
3916
3917 return x;
3918 }
3919
3920 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
3921 replace the input X, or the original X if no replacement is called for.
3922 The output parameter *WIN is 1 if the calling macro should goto WIN,
3923 0 if it should not.
3924
3925 For SPARC, we wish to handle addresses by splitting them into
3926 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
3927 This cuts the number of extra insns by one.
3928
3929 Do nothing when generating PIC code and the address is a symbolic
3930 operand or requires a scratch register. */
3931
3932 rtx
3933 sparc_legitimize_reload_address (rtx x, enum machine_mode mode,
3934 int opnum, int type,
3935 int ind_levels ATTRIBUTE_UNUSED, int *win)
3936 {
3937 /* Decompose SImode constants into HIGH+LO_SUM. */
3938 if (CONSTANT_P (x)
3939 && (mode != TFmode || TARGET_ARCH64)
3940 && GET_MODE (x) == SImode
3941 && GET_CODE (x) != LO_SUM
3942 && GET_CODE (x) != HIGH
3943 && sparc_cmodel <= CM_MEDLOW
3944 && !(flag_pic
3945 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
3946 {
3947 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
3948 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3949 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3950 opnum, (enum reload_type)type);
3951 *win = 1;
3952 return x;
3953 }
3954
3955 /* We have to recognize what we have already generated above. */
3956 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
3957 {
3958 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3959 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3960 opnum, (enum reload_type)type);
3961 *win = 1;
3962 return x;
3963 }
3964
3965 *win = 0;
3966 return x;
3967 }
3968
3969 /* Return true if ADDR (a legitimate address expression)
3970 has an effect that depends on the machine mode it is used for.
3971
3972 In PIC mode,
3973
3974 (mem:HI [%l7+a])
3975
3976 is not equivalent to
3977
3978 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
3979
3980 because [%l7+a+1] is interpreted as the address of (a+1). */
3981
3982
3983 static bool
3984 sparc_mode_dependent_address_p (const_rtx addr)
3985 {
3986 if (flag_pic && GET_CODE (addr) == PLUS)
3987 {
3988 rtx op0 = XEXP (addr, 0);
3989 rtx op1 = XEXP (addr, 1);
3990 if (op0 == pic_offset_table_rtx
3991 && symbolic_operand (op1, VOIDmode))
3992 return true;
3993 }
3994
3995 return false;
3996 }
3997
3998 #ifdef HAVE_GAS_HIDDEN
3999 # define USE_HIDDEN_LINKONCE 1
4000 #else
4001 # define USE_HIDDEN_LINKONCE 0
4002 #endif
4003
4004 static void
4005 get_pc_thunk_name (char name[32], unsigned int regno)
4006 {
4007 const char *reg_name = reg_names[regno];
4008
4009 /* Skip the leading '%' as that cannot be used in a
4010 symbol name. */
4011 reg_name += 1;
4012
4013 if (USE_HIDDEN_LINKONCE)
4014 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
4015 else
4016 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
4017 }
4018
4019 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
4020
4021 static rtx
4022 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
4023 {
4024 int orig_flag_pic = flag_pic;
4025 rtx insn;
4026
4027 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4028 flag_pic = 0;
4029 if (TARGET_ARCH64)
4030 insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
4031 else
4032 insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
4033 flag_pic = orig_flag_pic;
4034
4035 return insn;
4036 }
4037
4038 /* Emit code to load the GOT register. */
4039
4040 void
4041 load_got_register (void)
4042 {
4043 /* In PIC mode, this will retrieve pic_offset_table_rtx. */
4044 if (!global_offset_table_rtx)
4045 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4046
4047 if (TARGET_VXWORKS_RTP)
4048 emit_insn (gen_vxworks_load_got ());
4049 else
4050 {
4051 /* The GOT symbol is subject to a PC-relative relocation so we need a
4052 helper function to add the PC value and thus get the final value. */
4053 if (!got_helper_rtx)
4054 {
4055 char name[32];
4056 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
4057 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4058 }
4059
4060 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
4061 got_helper_rtx,
4062 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
4063 }
4064
4065 /* Need to emit this whether or not we obey regdecls,
4066 since setjmp/longjmp can cause life info to screw up.
4067 ??? In the case where we don't obey regdecls, this is not sufficient
4068 since we may not fall out the bottom. */
4069 emit_use (global_offset_table_rtx);
4070 }
4071
4072 /* Emit a call instruction with the pattern given by PAT. ADDR is the
4073 address of the call target. */
4074
4075 void
4076 sparc_emit_call_insn (rtx pat, rtx addr)
4077 {
4078 rtx insn;
4079
4080 insn = emit_call_insn (pat);
4081
4082 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
4083 if (TARGET_VXWORKS_RTP
4084 && flag_pic
4085 && GET_CODE (addr) == SYMBOL_REF
4086 && (SYMBOL_REF_DECL (addr)
4087 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
4088 : !SYMBOL_REF_LOCAL_P (addr)))
4089 {
4090 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
4091 crtl->uses_pic_offset_table = 1;
4092 }
4093 }
4094 \f
4095 /* Return 1 if RTX is a MEM which is known to be aligned to at
4096 least a DESIRED byte boundary. */
4097
4098 int
4099 mem_min_alignment (rtx mem, int desired)
4100 {
4101 rtx addr, base, offset;
4102
4103 /* If it's not a MEM we can't accept it. */
4104 if (GET_CODE (mem) != MEM)
4105 return 0;
4106
4107 /* Obviously... */
4108 if (!TARGET_UNALIGNED_DOUBLES
4109 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
4110 return 1;
4111
4112 /* ??? The rest of the function predates MEM_ALIGN so
4113 there is probably a bit of redundancy. */
4114 addr = XEXP (mem, 0);
4115 base = offset = NULL_RTX;
4116 if (GET_CODE (addr) == PLUS)
4117 {
4118 if (GET_CODE (XEXP (addr, 0)) == REG)
4119 {
4120 base = XEXP (addr, 0);
4121
4122 /* What we are saying here is that if the base
4123 REG is aligned properly, the compiler will make
4124 sure any REG based index upon it will be so
4125 as well. */
4126 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
4127 offset = XEXP (addr, 1);
4128 else
4129 offset = const0_rtx;
4130 }
4131 }
4132 else if (GET_CODE (addr) == REG)
4133 {
4134 base = addr;
4135 offset = const0_rtx;
4136 }
4137
4138 if (base != NULL_RTX)
4139 {
4140 int regno = REGNO (base);
4141
4142 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
4143 {
4144 /* Check if the compiler has recorded some information
4145 about the alignment of the base REG. If reload has
4146 completed, we already matched with proper alignments.
4147 If not running global_alloc, reload might give us
4148 unaligned pointer to local stack though. */
4149 if (((cfun != 0
4150 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
4151 || (optimize && reload_completed))
4152 && (INTVAL (offset) & (desired - 1)) == 0)
4153 return 1;
4154 }
4155 else
4156 {
4157 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
4158 return 1;
4159 }
4160 }
4161 else if (! TARGET_UNALIGNED_DOUBLES
4162 || CONSTANT_P (addr)
4163 || GET_CODE (addr) == LO_SUM)
4164 {
4165 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
4166 is true, in which case we can only assume that an access is aligned if
4167 it is to a constant address, or the address involves a LO_SUM. */
4168 return 1;
4169 }
4170
4171 /* An obviously unaligned address. */
4172 return 0;
4173 }
4174
4175 \f
4176 /* Vectors to keep interesting information about registers where it can easily
4177 be got. We used to use the actual mode value as the bit number, but there
4178 are more than 32 modes now. Instead we use two tables: one indexed by
4179 hard register number, and one indexed by mode. */
4180
4181 /* The purpose of sparc_mode_class is to shrink the range of modes so that
4182 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
4183 mapped into one sparc_mode_class mode. */
4184
4185 enum sparc_mode_class {
4186 S_MODE, D_MODE, T_MODE, O_MODE,
4187 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
4188 CC_MODE, CCFP_MODE
4189 };
4190
4191 /* Modes for single-word and smaller quantities. */
4192 #define S_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
4193
4194 /* Modes for double-word and smaller quantities. */
4195 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4196
4197 /* Modes for quad-word and smaller quantities. */
4198 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
4199
4200 /* Modes for 8-word and smaller quantities. */
4201 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
4202
4203 /* Modes for single-float quantities. We must allow any single word or
4204 smaller quantity. This is because the fix/float conversion instructions
4205 take integer inputs/outputs from the float registers. */
4206 #define SF_MODES (S_MODES)
4207
4208 /* Modes for double-float and smaller quantities. */
4209 #define DF_MODES (D_MODES)
4210
4211 /* Modes for quad-float and smaller quantities. */
4212 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
4213
4214 /* Modes for quad-float pairs and smaller quantities. */
4215 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
4216
4217 /* Modes for double-float only quantities. */
4218 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
4219
4220 /* Modes for quad-float and double-float only quantities. */
4221 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
4222
4223 /* Modes for quad-float pairs and double-float only quantities. */
4224 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
4225
4226 /* Modes for condition codes. */
4227 #define CC_MODES (1 << (int) CC_MODE)
4228 #define CCFP_MODES (1 << (int) CCFP_MODE)
4229
4230 /* Value is 1 if register/mode pair is acceptable on sparc.
4231 The funny mixture of D and T modes is because integer operations
4232 do not specially operate on tetra quantities, so non-quad-aligned
4233 registers can hold quadword quantities (except %o4 and %i4 because
4234 they cross fixed registers). */
4235
4236 /* This points to either the 32 bit or the 64 bit version. */
4237 const int *hard_regno_mode_classes;
4238
4239 static const int hard_32bit_mode_classes[] = {
4240 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4241 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4242 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4243 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4244
4245 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4246 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4247 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4248 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4249
4250 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4251 and none can hold SFmode/SImode values. */
4252 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4253 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4254 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4255 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4256
4257 /* %fcc[0123] */
4258 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4259
4260 /* %icc, %sfp, %gsr */
4261 CC_MODES, 0, D_MODES
4262 };
4263
4264 static const int hard_64bit_mode_classes[] = {
4265 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4266 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4267 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4268 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4269
4270 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4271 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4272 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4273 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4274
4275 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4276 and none can hold SFmode/SImode values. */
4277 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4278 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4279 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4280 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4281
4282 /* %fcc[0123] */
4283 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4284
4285 /* %icc, %sfp, %gsr */
4286 CC_MODES, 0, D_MODES
4287 };
4288
4289 int sparc_mode_class [NUM_MACHINE_MODES];
4290
4291 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
4292
4293 static void
4294 sparc_init_modes (void)
4295 {
4296 int i;
4297
4298 for (i = 0; i < NUM_MACHINE_MODES; i++)
4299 {
4300 switch (GET_MODE_CLASS (i))
4301 {
4302 case MODE_INT:
4303 case MODE_PARTIAL_INT:
4304 case MODE_COMPLEX_INT:
4305 if (GET_MODE_SIZE (i) <= 4)
4306 sparc_mode_class[i] = 1 << (int) S_MODE;
4307 else if (GET_MODE_SIZE (i) == 8)
4308 sparc_mode_class[i] = 1 << (int) D_MODE;
4309 else if (GET_MODE_SIZE (i) == 16)
4310 sparc_mode_class[i] = 1 << (int) T_MODE;
4311 else if (GET_MODE_SIZE (i) == 32)
4312 sparc_mode_class[i] = 1 << (int) O_MODE;
4313 else
4314 sparc_mode_class[i] = 0;
4315 break;
4316 case MODE_VECTOR_INT:
4317 if (GET_MODE_SIZE (i) <= 4)
4318 sparc_mode_class[i] = 1 << (int)SF_MODE;
4319 else if (GET_MODE_SIZE (i) == 8)
4320 sparc_mode_class[i] = 1 << (int)DF_MODE;
4321 break;
4322 case MODE_FLOAT:
4323 case MODE_COMPLEX_FLOAT:
4324 if (GET_MODE_SIZE (i) <= 4)
4325 sparc_mode_class[i] = 1 << (int) SF_MODE;
4326 else if (GET_MODE_SIZE (i) == 8)
4327 sparc_mode_class[i] = 1 << (int) DF_MODE;
4328 else if (GET_MODE_SIZE (i) == 16)
4329 sparc_mode_class[i] = 1 << (int) TF_MODE;
4330 else if (GET_MODE_SIZE (i) == 32)
4331 sparc_mode_class[i] = 1 << (int) OF_MODE;
4332 else
4333 sparc_mode_class[i] = 0;
4334 break;
4335 case MODE_CC:
4336 if (i == (int) CCFPmode || i == (int) CCFPEmode)
4337 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
4338 else
4339 sparc_mode_class[i] = 1 << (int) CC_MODE;
4340 break;
4341 default:
4342 sparc_mode_class[i] = 0;
4343 break;
4344 }
4345 }
4346
4347 if (TARGET_ARCH64)
4348 hard_regno_mode_classes = hard_64bit_mode_classes;
4349 else
4350 hard_regno_mode_classes = hard_32bit_mode_classes;
4351
4352 /* Initialize the array used by REGNO_REG_CLASS. */
4353 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4354 {
4355 if (i < 16 && TARGET_V8PLUS)
4356 sparc_regno_reg_class[i] = I64_REGS;
4357 else if (i < 32 || i == FRAME_POINTER_REGNUM)
4358 sparc_regno_reg_class[i] = GENERAL_REGS;
4359 else if (i < 64)
4360 sparc_regno_reg_class[i] = FP_REGS;
4361 else if (i < 96)
4362 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
4363 else if (i < 100)
4364 sparc_regno_reg_class[i] = FPCC_REGS;
4365 else
4366 sparc_regno_reg_class[i] = NO_REGS;
4367 }
4368 }
4369 \f
4370 /* Return whether REGNO, a global or FP register, must be saved/restored. */
4371
4372 static inline bool
4373 save_global_or_fp_reg_p (unsigned int regno,
4374 int leaf_function ATTRIBUTE_UNUSED)
4375 {
4376 return !call_used_regs[regno] && df_regs_ever_live_p (regno);
4377 }
4378
4379 /* Return whether the return address register (%i7) is needed. */
4380
4381 static inline bool
4382 return_addr_reg_needed_p (int leaf_function)
4383 {
4384 /* If it is live, for example because of __builtin_return_address (0). */
4385 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
4386 return true;
4387
4388 /* Otherwise, it is needed as save register if %o7 is clobbered. */
4389 if (!leaf_function
4390 /* Loading the GOT register clobbers %o7. */
4391 || crtl->uses_pic_offset_table
4392 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
4393 return true;
4394
4395 return false;
4396 }
4397
4398 /* Return whether REGNO, a local or in register, must be saved/restored. */
4399
4400 static bool
4401 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
4402 {
4403 /* General case: call-saved registers live at some point. */
4404 if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
4405 return true;
4406
4407 /* Frame pointer register (%fp) if needed. */
4408 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
4409 return true;
4410
4411 /* Return address register (%i7) if needed. */
4412 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
4413 return true;
4414
4415 /* GOT register (%l7) if needed. */
4416 if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
4417 return true;
4418
4419 /* If the function accesses prior frames, the frame pointer and the return
4420 address of the previous frame must be saved on the stack. */
4421 if (crtl->accesses_prior_frames
4422 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
4423 return true;
4424
4425 return false;
4426 }
4427
4428 /* Compute the frame size required by the function. This function is called
4429 during the reload pass and also by sparc_expand_prologue. */
4430
4431 HOST_WIDE_INT
4432 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
4433 {
4434 HOST_WIDE_INT frame_size, apparent_frame_size;
4435 int args_size, n_global_fp_regs = 0;
4436 bool save_local_in_regs_p = false;
4437 unsigned int i;
4438
4439 /* If the function allocates dynamic stack space, the dynamic offset is
4440 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
4441 if (leaf_function && !cfun->calls_alloca)
4442 args_size = 0;
4443 else
4444 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
4445
4446 /* Calculate space needed for global registers. */
4447 if (TARGET_ARCH64)
4448 for (i = 0; i < 8; i++)
4449 if (save_global_or_fp_reg_p (i, 0))
4450 n_global_fp_regs += 2;
4451 else
4452 for (i = 0; i < 8; i += 2)
4453 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
4454 n_global_fp_regs += 2;
4455
4456 /* In the flat window model, find out which local and in registers need to
4457 be saved. We don't reserve space in the current frame for them as they
4458 will be spilled into the register window save area of the caller's frame.
4459 However, as soon as we use this register window save area, we must create
4460 that of the current frame to make it the live one. */
4461 if (TARGET_FLAT)
4462 for (i = 16; i < 32; i++)
4463 if (save_local_or_in_reg_p (i, leaf_function))
4464 {
4465 save_local_in_regs_p = true;
4466 break;
4467 }
4468
4469 /* Calculate space needed for FP registers. */
4470 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
4471 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
4472 n_global_fp_regs += 2;
4473
4474 if (size == 0
4475 && n_global_fp_regs == 0
4476 && args_size == 0
4477 && !save_local_in_regs_p)
4478 frame_size = apparent_frame_size = 0;
4479 else
4480 {
4481 /* We subtract STARTING_FRAME_OFFSET, remember it's negative. */
4482 apparent_frame_size = (size - STARTING_FRAME_OFFSET + 7) & -8;
4483 apparent_frame_size += n_global_fp_regs * 4;
4484
4485 /* We need to add the size of the outgoing argument area. */
4486 frame_size = apparent_frame_size + ((args_size + 7) & -8);
4487
4488 /* And that of the register window save area. */
4489 frame_size += FIRST_PARM_OFFSET (cfun->decl);
4490
4491 /* Finally, bump to the appropriate alignment. */
4492 frame_size = SPARC_STACK_ALIGN (frame_size);
4493 }
4494
4495 /* Set up values for use in prologue and epilogue. */
4496 sparc_frame_size = frame_size;
4497 sparc_apparent_frame_size = apparent_frame_size;
4498 sparc_n_global_fp_regs = n_global_fp_regs;
4499 sparc_save_local_in_regs_p = save_local_in_regs_p;
4500
4501 return frame_size;
4502 }
4503
4504 /* Output any necessary .register pseudo-ops. */
4505
4506 void
4507 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
4508 {
4509 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
4510 int i;
4511
4512 if (TARGET_ARCH32)
4513 return;
4514
4515 /* Check if %g[2367] were used without
4516 .register being printed for them already. */
4517 for (i = 2; i < 8; i++)
4518 {
4519 if (df_regs_ever_live_p (i)
4520 && ! sparc_hard_reg_printed [i])
4521 {
4522 sparc_hard_reg_printed [i] = 1;
4523 /* %g7 is used as TLS base register, use #ignore
4524 for it instead of #scratch. */
4525 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
4526 i == 7 ? "ignore" : "scratch");
4527 }
4528 if (i == 3) i = 5;
4529 }
4530 #endif
4531 }
4532
4533 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
4534
4535 #if PROBE_INTERVAL > 4096
4536 #error Cannot use indexed addressing mode for stack probing
4537 #endif
4538
4539 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
4540 inclusive. These are offsets from the current stack pointer.
4541
4542 Note that we don't use the REG+REG addressing mode for the probes because
4543 of the stack bias in 64-bit mode. And it doesn't really buy us anything
4544 so the advantages of having a single code win here. */
4545
4546 static void
4547 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
4548 {
4549 rtx g1 = gen_rtx_REG (Pmode, 1);
4550
4551 /* See if we have a constant small number of probes to generate. If so,
4552 that's the easy case. */
4553 if (size <= PROBE_INTERVAL)
4554 {
4555 emit_move_insn (g1, GEN_INT (first));
4556 emit_insn (gen_rtx_SET (VOIDmode, g1,
4557 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
4558 emit_stack_probe (plus_constant (g1, -size));
4559 }
4560
4561 /* The run-time loop is made up of 10 insns in the generic case while the
4562 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
4563 else if (size <= 5 * PROBE_INTERVAL)
4564 {
4565 HOST_WIDE_INT i;
4566
4567 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
4568 emit_insn (gen_rtx_SET (VOIDmode, g1,
4569 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
4570 emit_stack_probe (g1);
4571
4572 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
4573 it exceeds SIZE. If only two probes are needed, this will not
4574 generate any code. Then probe at FIRST + SIZE. */
4575 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
4576 {
4577 emit_insn (gen_rtx_SET (VOIDmode, g1,
4578 plus_constant (g1, -PROBE_INTERVAL)));
4579 emit_stack_probe (g1);
4580 }
4581
4582 emit_stack_probe (plus_constant (g1, (i - PROBE_INTERVAL) - size));
4583 }
4584
4585 /* Otherwise, do the same as above, but in a loop. Note that we must be
4586 extra careful with variables wrapping around because we might be at
4587 the very top (or the very bottom) of the address space and we have
4588 to be able to handle this case properly; in particular, we use an
4589 equality test for the loop condition. */
4590 else
4591 {
4592 HOST_WIDE_INT rounded_size;
4593 rtx g4 = gen_rtx_REG (Pmode, 4);
4594
4595 emit_move_insn (g1, GEN_INT (first));
4596
4597
4598 /* Step 1: round SIZE to the previous multiple of the interval. */
4599
4600 rounded_size = size & -PROBE_INTERVAL;
4601 emit_move_insn (g4, GEN_INT (rounded_size));
4602
4603
4604 /* Step 2: compute initial and final value of the loop counter. */
4605
4606 /* TEST_ADDR = SP + FIRST. */
4607 emit_insn (gen_rtx_SET (VOIDmode, g1,
4608 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
4609
4610 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
4611 emit_insn (gen_rtx_SET (VOIDmode, g4, gen_rtx_MINUS (Pmode, g1, g4)));
4612
4613
4614 /* Step 3: the loop
4615
4616 while (TEST_ADDR != LAST_ADDR)
4617 {
4618 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
4619 probe at TEST_ADDR
4620 }
4621
4622 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
4623 until it is equal to ROUNDED_SIZE. */
4624
4625 if (TARGET_64BIT)
4626 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
4627 else
4628 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
4629
4630
4631 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
4632 that SIZE is equal to ROUNDED_SIZE. */
4633
4634 if (size != rounded_size)
4635 emit_stack_probe (plus_constant (g4, rounded_size - size));
4636 }
4637
4638 /* Make sure nothing is scheduled before we are done. */
4639 emit_insn (gen_blockage ());
4640 }
4641
4642 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
4643 absolute addresses. */
4644
4645 const char *
4646 output_probe_stack_range (rtx reg1, rtx reg2)
4647 {
4648 static int labelno = 0;
4649 char loop_lab[32], end_lab[32];
4650 rtx xops[2];
4651
4652 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
4653 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
4654
4655 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
4656
4657 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
4658 xops[0] = reg1;
4659 xops[1] = reg2;
4660 output_asm_insn ("cmp\t%0, %1", xops);
4661 if (TARGET_ARCH64)
4662 fputs ("\tbe,pn\t%xcc,", asm_out_file);
4663 else
4664 fputs ("\tbe\t", asm_out_file);
4665 assemble_name_raw (asm_out_file, end_lab);
4666 fputc ('\n', asm_out_file);
4667
4668 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
4669 xops[1] = GEN_INT (-PROBE_INTERVAL);
4670 output_asm_insn (" add\t%0, %1, %0", xops);
4671
4672 /* Probe at TEST_ADDR and branch. */
4673 if (TARGET_ARCH64)
4674 fputs ("\tba,pt\t%xcc,", asm_out_file);
4675 else
4676 fputs ("\tba\t", asm_out_file);
4677 assemble_name_raw (asm_out_file, loop_lab);
4678 fputc ('\n', asm_out_file);
4679 xops[1] = GEN_INT (SPARC_STACK_BIAS);
4680 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
4681
4682 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
4683
4684 return "";
4685 }
4686
4687 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
4688 needed. LOW is supposed to be double-word aligned for 32-bit registers.
4689 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
4690 is the action to be performed if SAVE_P returns true and ACTION_FALSE
4691 the action to be performed if it returns false. Return the new offset. */
4692
4693 typedef bool (*sorr_pred_t) (unsigned int, int);
4694 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
4695
4696 static int
4697 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
4698 int offset, int leaf_function, sorr_pred_t save_p,
4699 sorr_act_t action_true, sorr_act_t action_false)
4700 {
4701 unsigned int i;
4702 rtx mem, insn;
4703
4704 if (TARGET_ARCH64 && high <= 32)
4705 {
4706 int fp_offset = -1;
4707
4708 for (i = low; i < high; i++)
4709 {
4710 if (save_p (i, leaf_function))
4711 {
4712 mem = gen_frame_mem (DImode, plus_constant (base, offset));
4713 if (action_true == SORR_SAVE)
4714 {
4715 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
4716 RTX_FRAME_RELATED_P (insn) = 1;
4717 }
4718 else /* action_true == SORR_RESTORE */
4719 {
4720 /* The frame pointer must be restored last since its old
4721 value may be used as base address for the frame. This
4722 is problematic in 64-bit mode only because of the lack
4723 of double-word load instruction. */
4724 if (i == HARD_FRAME_POINTER_REGNUM)
4725 fp_offset = offset;
4726 else
4727 emit_move_insn (gen_rtx_REG (DImode, i), mem);
4728 }
4729 offset += 8;
4730 }
4731 else if (action_false == SORR_ADVANCE)
4732 offset += 8;
4733 }
4734
4735 if (fp_offset >= 0)
4736 {
4737 mem = gen_frame_mem (DImode, plus_constant (base, fp_offset));
4738 emit_move_insn (hard_frame_pointer_rtx, mem);
4739 }
4740 }
4741 else
4742 {
4743 for (i = low; i < high; i += 2)
4744 {
4745 bool reg0 = save_p (i, leaf_function);
4746 bool reg1 = save_p (i + 1, leaf_function);
4747 enum machine_mode mode;
4748 int regno;
4749
4750 if (reg0 && reg1)
4751 {
4752 mode = SPARC_INT_REG_P (i) ? DImode : DFmode;
4753 regno = i;
4754 }
4755 else if (reg0)
4756 {
4757 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
4758 regno = i;
4759 }
4760 else if (reg1)
4761 {
4762 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
4763 regno = i + 1;
4764 offset += 4;
4765 }
4766 else
4767 {
4768 if (action_false == SORR_ADVANCE)
4769 offset += 8;
4770 continue;
4771 }
4772
4773 mem = gen_frame_mem (mode, plus_constant (base, offset));
4774 if (action_true == SORR_SAVE)
4775 {
4776 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
4777 RTX_FRAME_RELATED_P (insn) = 1;
4778 if (mode == DImode)
4779 {
4780 rtx set1, set2;
4781 mem = gen_frame_mem (SImode, plus_constant (base, offset));
4782 set1 = gen_rtx_SET (VOIDmode, mem,
4783 gen_rtx_REG (SImode, regno));
4784 RTX_FRAME_RELATED_P (set1) = 1;
4785 mem
4786 = gen_frame_mem (SImode, plus_constant (base, offset + 4));
4787 set2 = gen_rtx_SET (VOIDmode, mem,
4788 gen_rtx_REG (SImode, regno + 1));
4789 RTX_FRAME_RELATED_P (set2) = 1;
4790 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4791 gen_rtx_PARALLEL (VOIDmode,
4792 gen_rtvec (2, set1, set2)));
4793 }
4794 }
4795 else /* action_true == SORR_RESTORE */
4796 emit_move_insn (gen_rtx_REG (mode, regno), mem);
4797
4798 /* Always preserve double-word alignment. */
4799 offset = (offset + 8) & -8;
4800 }
4801 }
4802
4803 return offset;
4804 }
4805
4806 /* Emit code to adjust BASE to OFFSET. Return the new base. */
4807
4808 static rtx
4809 emit_adjust_base_to_offset (rtx base, int offset)
4810 {
4811 /* ??? This might be optimized a little as %g1 might already have a
4812 value close enough that a single add insn will do. */
4813 /* ??? Although, all of this is probably only a temporary fix because
4814 if %g1 can hold a function result, then sparc_expand_epilogue will
4815 lose (the result will be clobbered). */
4816 rtx new_base = gen_rtx_REG (Pmode, 1);
4817 emit_move_insn (new_base, GEN_INT (offset));
4818 emit_insn (gen_rtx_SET (VOIDmode,
4819 new_base, gen_rtx_PLUS (Pmode, base, new_base)));
4820 return new_base;
4821 }
4822
4823 /* Emit code to save/restore call-saved global and FP registers. */
4824
4825 static void
4826 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
4827 {
4828 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
4829 {
4830 base = emit_adjust_base_to_offset (base, offset);
4831 offset = 0;
4832 }
4833
4834 offset
4835 = emit_save_or_restore_regs (0, 8, base, offset, 0,
4836 save_global_or_fp_reg_p, action, SORR_NONE);
4837 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
4838 save_global_or_fp_reg_p, action, SORR_NONE);
4839 }
4840
4841 /* Emit code to save/restore call-saved local and in registers. */
4842
4843 static void
4844 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
4845 {
4846 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
4847 {
4848 base = emit_adjust_base_to_offset (base, offset);
4849 offset = 0;
4850 }
4851
4852 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
4853 save_local_or_in_reg_p, action, SORR_ADVANCE);
4854 }
4855
4856 /* Emit a window_save insn. */
4857
4858 static rtx
4859 emit_window_save (rtx increment)
4860 {
4861 rtx insn = emit_insn (gen_window_save (increment));
4862 RTX_FRAME_RELATED_P (insn) = 1;
4863
4864 /* The incoming return address (%o7) is saved in %i7. */
4865 add_reg_note (insn, REG_CFA_REGISTER,
4866 gen_rtx_SET (VOIDmode,
4867 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
4868 gen_rtx_REG (Pmode,
4869 INCOMING_RETURN_ADDR_REGNUM)));
4870
4871 /* The window save event. */
4872 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
4873
4874 /* The CFA is %fp, the hard frame pointer. */
4875 add_reg_note (insn, REG_CFA_DEF_CFA,
4876 plus_constant (hard_frame_pointer_rtx,
4877 INCOMING_FRAME_SP_OFFSET));
4878
4879 return insn;
4880 }
4881
4882 /* Generate an increment for the stack pointer. */
4883
4884 static rtx
4885 gen_stack_pointer_inc (rtx increment)
4886 {
4887 return gen_rtx_SET (VOIDmode,
4888 stack_pointer_rtx,
4889 gen_rtx_PLUS (Pmode,
4890 stack_pointer_rtx,
4891 increment));
4892 }
4893
4894 /* Generate a decrement for the stack pointer. */
4895
4896 static rtx
4897 gen_stack_pointer_dec (rtx decrement)
4898 {
4899 return gen_rtx_SET (VOIDmode,
4900 stack_pointer_rtx,
4901 gen_rtx_MINUS (Pmode,
4902 stack_pointer_rtx,
4903 decrement));
4904 }
4905
4906 /* Expand the function prologue. The prologue is responsible for reserving
4907 storage for the frame, saving the call-saved registers and loading the
4908 GOT register if needed. */
4909
4910 void
4911 sparc_expand_prologue (void)
4912 {
4913 HOST_WIDE_INT size;
4914 rtx insn;
4915
4916 /* Compute a snapshot of current_function_uses_only_leaf_regs. Relying
4917 on the final value of the flag means deferring the prologue/epilogue
4918 expansion until just before the second scheduling pass, which is too
4919 late to emit multiple epilogues or return insns.
4920
4921 Of course we are making the assumption that the value of the flag
4922 will not change between now and its final value. Of the three parts
4923 of the formula, only the last one can reasonably vary. Let's take a
4924 closer look, after assuming that the first two ones are set to true
4925 (otherwise the last value is effectively silenced).
4926
4927 If only_leaf_regs_used returns false, the global predicate will also
4928 be false so the actual frame size calculated below will be positive.
4929 As a consequence, the save_register_window insn will be emitted in
4930 the instruction stream; now this insn explicitly references %fp
4931 which is not a leaf register so only_leaf_regs_used will always
4932 return false subsequently.
4933
4934 If only_leaf_regs_used returns true, we hope that the subsequent
4935 optimization passes won't cause non-leaf registers to pop up. For
4936 example, the regrename pass has special provisions to not rename to
4937 non-leaf registers in a leaf function. */
4938 sparc_leaf_function_p
4939 = optimize > 0 && current_function_is_leaf && only_leaf_regs_used ();
4940
4941 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
4942
4943 if (flag_stack_usage_info)
4944 current_function_static_stack_size = size;
4945
4946 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
4947 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
4948
4949 if (size == 0)
4950 ; /* do nothing. */
4951 else if (sparc_leaf_function_p)
4952 {
4953 rtx size_int_rtx = GEN_INT (-size);
4954
4955 if (size <= 4096)
4956 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
4957 else if (size <= 8192)
4958 {
4959 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
4960 /* %sp is still the CFA register. */
4961 RTX_FRAME_RELATED_P (insn) = 1;
4962 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
4963 }
4964 else
4965 {
4966 rtx size_rtx = gen_rtx_REG (Pmode, 1);
4967 emit_move_insn (size_rtx, size_int_rtx);
4968 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
4969 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4970 gen_stack_pointer_inc (size_int_rtx));
4971 }
4972
4973 RTX_FRAME_RELATED_P (insn) = 1;
4974 }
4975 else
4976 {
4977 rtx size_int_rtx = GEN_INT (-size);
4978
4979 if (size <= 4096)
4980 emit_window_save (size_int_rtx);
4981 else if (size <= 8192)
4982 {
4983 emit_window_save (GEN_INT (-4096));
4984 /* %sp is not the CFA register anymore. */
4985 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
4986 }
4987 else
4988 {
4989 rtx size_rtx = gen_rtx_REG (Pmode, 1);
4990 emit_move_insn (size_rtx, size_int_rtx);
4991 emit_window_save (size_rtx);
4992 }
4993 }
4994
4995 if (sparc_leaf_function_p)
4996 {
4997 sparc_frame_base_reg = stack_pointer_rtx;
4998 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
4999 }
5000 else
5001 {
5002 sparc_frame_base_reg = hard_frame_pointer_rtx;
5003 sparc_frame_base_offset = SPARC_STACK_BIAS;
5004 }
5005
5006 if (sparc_n_global_fp_regs > 0)
5007 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5008 sparc_frame_base_offset
5009 - sparc_apparent_frame_size,
5010 SORR_SAVE);
5011
5012 /* Load the GOT register if needed. */
5013 if (crtl->uses_pic_offset_table)
5014 load_got_register ();
5015
5016 /* Advertise that the data calculated just above are now valid. */
5017 sparc_prologue_data_valid_p = true;
5018 }
5019
5020 /* Expand the function prologue. The prologue is responsible for reserving
5021 storage for the frame, saving the call-saved registers and loading the
5022 GOT register if needed. */
5023
5024 void
5025 sparc_flat_expand_prologue (void)
5026 {
5027 HOST_WIDE_INT size;
5028 rtx insn;
5029
5030 sparc_leaf_function_p = optimize > 0 && current_function_is_leaf;
5031
5032 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5033
5034 if (flag_stack_usage_info)
5035 current_function_static_stack_size = size;
5036
5037 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
5038 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5039
5040 if (sparc_save_local_in_regs_p)
5041 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
5042 SORR_SAVE);
5043
5044 if (size == 0)
5045 ; /* do nothing. */
5046 else
5047 {
5048 rtx size_int_rtx, size_rtx;
5049
5050 size_rtx = size_int_rtx = GEN_INT (-size);
5051
5052 /* We establish the frame (i.e. decrement the stack pointer) first, even
5053 if we use a frame pointer, because we cannot clobber any call-saved
5054 registers, including the frame pointer, if we haven't created a new
5055 register save area, for the sake of compatibility with the ABI. */
5056 if (size <= 4096)
5057 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5058 else if (size <= 8192 && !frame_pointer_needed)
5059 {
5060 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5061 RTX_FRAME_RELATED_P (insn) = 1;
5062 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5063 }
5064 else
5065 {
5066 size_rtx = gen_rtx_REG (Pmode, 1);
5067 emit_move_insn (size_rtx, size_int_rtx);
5068 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5069 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5070 gen_stack_pointer_inc (size_int_rtx));
5071 }
5072 RTX_FRAME_RELATED_P (insn) = 1;
5073
5074 /* Ensure nothing is scheduled until after the frame is established. */
5075 emit_insn (gen_blockage ());
5076
5077 if (frame_pointer_needed)
5078 {
5079 insn = emit_insn (gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
5080 gen_rtx_MINUS (Pmode,
5081 stack_pointer_rtx,
5082 size_rtx)));
5083 RTX_FRAME_RELATED_P (insn) = 1;
5084
5085 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5086 gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
5087 plus_constant (stack_pointer_rtx,
5088 size)));
5089 }
5090
5091 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5092 {
5093 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
5094 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
5095
5096 insn = emit_move_insn (i7, o7);
5097 RTX_FRAME_RELATED_P (insn) = 1;
5098
5099 add_reg_note (insn, REG_CFA_REGISTER,
5100 gen_rtx_SET (VOIDmode, i7, o7));
5101
5102 /* Prevent this instruction from ever being considered dead,
5103 even if this function has no epilogue. */
5104 emit_insn (gen_rtx_USE (VOIDmode, i7));
5105 }
5106 }
5107
5108 if (frame_pointer_needed)
5109 {
5110 sparc_frame_base_reg = hard_frame_pointer_rtx;
5111 sparc_frame_base_offset = SPARC_STACK_BIAS;
5112 }
5113 else
5114 {
5115 sparc_frame_base_reg = stack_pointer_rtx;
5116 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5117 }
5118
5119 if (sparc_n_global_fp_regs > 0)
5120 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5121 sparc_frame_base_offset
5122 - sparc_apparent_frame_size,
5123 SORR_SAVE);
5124
5125 /* Load the GOT register if needed. */
5126 if (crtl->uses_pic_offset_table)
5127 load_got_register ();
5128
5129 /* Advertise that the data calculated just above are now valid. */
5130 sparc_prologue_data_valid_p = true;
5131 }
5132
5133 /* This function generates the assembly code for function entry, which boils
5134 down to emitting the necessary .register directives. */
5135
5136 static void
5137 sparc_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5138 {
5139 /* Check that the assumption we made in sparc_expand_prologue is valid. */
5140 if (!TARGET_FLAT)
5141 gcc_assert (sparc_leaf_function_p == current_function_uses_only_leaf_regs);
5142
5143 sparc_output_scratch_registers (file);
5144 }
5145
5146 /* Expand the function epilogue, either normal or part of a sibcall.
5147 We emit all the instructions except the return or the call. */
5148
5149 void
5150 sparc_expand_epilogue (bool for_eh)
5151 {
5152 HOST_WIDE_INT size = sparc_frame_size;
5153
5154 if (sparc_n_global_fp_regs > 0)
5155 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5156 sparc_frame_base_offset
5157 - sparc_apparent_frame_size,
5158 SORR_RESTORE);
5159
5160 if (size == 0 || for_eh)
5161 ; /* do nothing. */
5162 else if (sparc_leaf_function_p)
5163 {
5164 if (size <= 4096)
5165 emit_insn (gen_stack_pointer_dec (GEN_INT (-size)));
5166 else if (size <= 8192)
5167 {
5168 emit_insn (gen_stack_pointer_dec (GEN_INT (-4096)));
5169 emit_insn (gen_stack_pointer_dec (GEN_INT (4096 - size)));
5170 }
5171 else
5172 {
5173 rtx reg = gen_rtx_REG (Pmode, 1);
5174 emit_move_insn (reg, GEN_INT (-size));
5175 emit_insn (gen_stack_pointer_dec (reg));
5176 }
5177 }
5178 }
5179
5180 /* Expand the function epilogue, either normal or part of a sibcall.
5181 We emit all the instructions except the return or the call. */
5182
5183 void
5184 sparc_flat_expand_epilogue (bool for_eh)
5185 {
5186 HOST_WIDE_INT size = sparc_frame_size;
5187
5188 if (sparc_n_global_fp_regs > 0)
5189 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5190 sparc_frame_base_offset
5191 - sparc_apparent_frame_size,
5192 SORR_RESTORE);
5193
5194 /* If we have a frame pointer, we'll need both to restore it before the
5195 frame is destroyed and use its current value in destroying the frame.
5196 Since we don't have an atomic way to do that in the flat window model,
5197 we save the current value into a temporary register (%g1). */
5198 if (frame_pointer_needed && !for_eh)
5199 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
5200
5201 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5202 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
5203 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
5204
5205 if (sparc_save_local_in_regs_p)
5206 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
5207 sparc_frame_base_offset,
5208 SORR_RESTORE);
5209
5210 if (size == 0 || for_eh)
5211 ; /* do nothing. */
5212 else if (frame_pointer_needed)
5213 {
5214 /* Make sure the frame is destroyed after everything else is done. */
5215 emit_insn (gen_blockage ());
5216
5217 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
5218 }
5219 else
5220 {
5221 /* Likewise. */
5222 emit_insn (gen_blockage ());
5223
5224 if (size <= 4096)
5225 emit_insn (gen_stack_pointer_dec (GEN_INT (-size)));
5226 else if (size <= 8192)
5227 {
5228 emit_insn (gen_stack_pointer_dec (GEN_INT (-4096)));
5229 emit_insn (gen_stack_pointer_dec (GEN_INT (4096 - size)));
5230 }
5231 else
5232 {
5233 rtx reg = gen_rtx_REG (Pmode, 1);
5234 emit_move_insn (reg, GEN_INT (-size));
5235 emit_insn (gen_stack_pointer_dec (reg));
5236 }
5237 }
5238 }
5239
5240 /* Return true if it is appropriate to emit `return' instructions in the
5241 body of a function. */
5242
5243 bool
5244 sparc_can_use_return_insn_p (void)
5245 {
5246 return sparc_prologue_data_valid_p
5247 && sparc_n_global_fp_regs == 0
5248 && TARGET_FLAT
5249 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
5250 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
5251 }
5252
5253 /* This function generates the assembly code for function exit. */
5254
5255 static void
5256 sparc_asm_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5257 {
5258 /* If the last two instructions of a function are "call foo; dslot;"
5259 the return address might point to the first instruction in the next
5260 function and we have to output a dummy nop for the sake of sane
5261 backtraces in such cases. This is pointless for sibling calls since
5262 the return address is explicitly adjusted. */
5263
5264 rtx insn, last_real_insn;
5265
5266 insn = get_last_insn ();
5267
5268 last_real_insn = prev_real_insn (insn);
5269 if (last_real_insn
5270 && GET_CODE (last_real_insn) == INSN
5271 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
5272 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
5273
5274 if (last_real_insn
5275 && CALL_P (last_real_insn)
5276 && !SIBLING_CALL_P (last_real_insn))
5277 fputs("\tnop\n", file);
5278
5279 sparc_output_deferred_case_vectors ();
5280 }
5281
5282 /* Output a 'restore' instruction. */
5283
5284 static void
5285 output_restore (rtx pat)
5286 {
5287 rtx operands[3];
5288
5289 if (! pat)
5290 {
5291 fputs ("\t restore\n", asm_out_file);
5292 return;
5293 }
5294
5295 gcc_assert (GET_CODE (pat) == SET);
5296
5297 operands[0] = SET_DEST (pat);
5298 pat = SET_SRC (pat);
5299
5300 switch (GET_CODE (pat))
5301 {
5302 case PLUS:
5303 operands[1] = XEXP (pat, 0);
5304 operands[2] = XEXP (pat, 1);
5305 output_asm_insn (" restore %r1, %2, %Y0", operands);
5306 break;
5307 case LO_SUM:
5308 operands[1] = XEXP (pat, 0);
5309 operands[2] = XEXP (pat, 1);
5310 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
5311 break;
5312 case ASHIFT:
5313 operands[1] = XEXP (pat, 0);
5314 gcc_assert (XEXP (pat, 1) == const1_rtx);
5315 output_asm_insn (" restore %r1, %r1, %Y0", operands);
5316 break;
5317 default:
5318 operands[1] = pat;
5319 output_asm_insn (" restore %%g0, %1, %Y0", operands);
5320 break;
5321 }
5322 }
5323
5324 /* Output a return. */
5325
5326 const char *
5327 output_return (rtx insn)
5328 {
5329 if (crtl->calls_eh_return)
5330 {
5331 /* If the function uses __builtin_eh_return, the eh_return
5332 machinery occupies the delay slot. */
5333 gcc_assert (!final_sequence);
5334
5335 if (flag_delayed_branch)
5336 {
5337 if (!TARGET_FLAT && TARGET_V9)
5338 fputs ("\treturn\t%i7+8\n", asm_out_file);
5339 else
5340 {
5341 if (!TARGET_FLAT)
5342 fputs ("\trestore\n", asm_out_file);
5343
5344 fputs ("\tjmp\t%o7+8\n", asm_out_file);
5345 }
5346
5347 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
5348 }
5349 else
5350 {
5351 if (!TARGET_FLAT)
5352 fputs ("\trestore\n", asm_out_file);
5353
5354 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
5355 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
5356 }
5357 }
5358 else if (sparc_leaf_function_p || TARGET_FLAT)
5359 {
5360 /* This is a leaf or flat function so we don't have to bother restoring
5361 the register window, which frees us from dealing with the convoluted
5362 semantics of restore/return. We simply output the jump to the
5363 return address and the insn in the delay slot (if any). */
5364
5365 return "jmp\t%%o7+%)%#";
5366 }
5367 else
5368 {
5369 /* This is a regular function so we have to restore the register window.
5370 We may have a pending insn for the delay slot, which will be either
5371 combined with the 'restore' instruction or put in the delay slot of
5372 the 'return' instruction. */
5373
5374 if (final_sequence)
5375 {
5376 rtx delay, pat;
5377
5378 delay = NEXT_INSN (insn);
5379 gcc_assert (delay);
5380
5381 pat = PATTERN (delay);
5382
5383 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
5384 {
5385 epilogue_renumber (&pat, 0);
5386 return "return\t%%i7+%)%#";
5387 }
5388 else
5389 {
5390 output_asm_insn ("jmp\t%%i7+%)", NULL);
5391 output_restore (pat);
5392 PATTERN (delay) = gen_blockage ();
5393 INSN_CODE (delay) = -1;
5394 }
5395 }
5396 else
5397 {
5398 /* The delay slot is empty. */
5399 if (TARGET_V9)
5400 return "return\t%%i7+%)\n\t nop";
5401 else if (flag_delayed_branch)
5402 return "jmp\t%%i7+%)\n\t restore";
5403 else
5404 return "restore\n\tjmp\t%%o7+%)\n\t nop";
5405 }
5406 }
5407
5408 return "";
5409 }
5410
5411 /* Output a sibling call. */
5412
5413 const char *
5414 output_sibcall (rtx insn, rtx call_operand)
5415 {
5416 rtx operands[1];
5417
5418 gcc_assert (flag_delayed_branch);
5419
5420 operands[0] = call_operand;
5421
5422 if (sparc_leaf_function_p || TARGET_FLAT)
5423 {
5424 /* This is a leaf or flat function so we don't have to bother restoring
5425 the register window. We simply output the jump to the function and
5426 the insn in the delay slot (if any). */
5427
5428 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
5429
5430 if (final_sequence)
5431 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
5432 operands);
5433 else
5434 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
5435 it into branch if possible. */
5436 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
5437 operands);
5438 }
5439 else
5440 {
5441 /* This is a regular function so we have to restore the register window.
5442 We may have a pending insn for the delay slot, which will be combined
5443 with the 'restore' instruction. */
5444
5445 output_asm_insn ("call\t%a0, 0", operands);
5446
5447 if (final_sequence)
5448 {
5449 rtx delay = NEXT_INSN (insn);
5450 gcc_assert (delay);
5451
5452 output_restore (PATTERN (delay));
5453
5454 PATTERN (delay) = gen_blockage ();
5455 INSN_CODE (delay) = -1;
5456 }
5457 else
5458 output_restore (NULL_RTX);
5459 }
5460
5461 return "";
5462 }
5463 \f
5464 /* Functions for handling argument passing.
5465
5466 For 32-bit, the first 6 args are normally in registers and the rest are
5467 pushed. Any arg that starts within the first 6 words is at least
5468 partially passed in a register unless its data type forbids.
5469
5470 For 64-bit, the argument registers are laid out as an array of 16 elements
5471 and arguments are added sequentially. The first 6 int args and up to the
5472 first 16 fp args (depending on size) are passed in regs.
5473
5474 Slot Stack Integral Float Float in structure Double Long Double
5475 ---- ----- -------- ----- ------------------ ------ -----------
5476 15 [SP+248] %f31 %f30,%f31 %d30
5477 14 [SP+240] %f29 %f28,%f29 %d28 %q28
5478 13 [SP+232] %f27 %f26,%f27 %d26
5479 12 [SP+224] %f25 %f24,%f25 %d24 %q24
5480 11 [SP+216] %f23 %f22,%f23 %d22
5481 10 [SP+208] %f21 %f20,%f21 %d20 %q20
5482 9 [SP+200] %f19 %f18,%f19 %d18
5483 8 [SP+192] %f17 %f16,%f17 %d16 %q16
5484 7 [SP+184] %f15 %f14,%f15 %d14
5485 6 [SP+176] %f13 %f12,%f13 %d12 %q12
5486 5 [SP+168] %o5 %f11 %f10,%f11 %d10
5487 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
5488 3 [SP+152] %o3 %f7 %f6,%f7 %d6
5489 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
5490 1 [SP+136] %o1 %f3 %f2,%f3 %d2
5491 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
5492
5493 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
5494
5495 Integral arguments are always passed as 64-bit quantities appropriately
5496 extended.
5497
5498 Passing of floating point values is handled as follows.
5499 If a prototype is in scope:
5500 If the value is in a named argument (i.e. not a stdarg function or a
5501 value not part of the `...') then the value is passed in the appropriate
5502 fp reg.
5503 If the value is part of the `...' and is passed in one of the first 6
5504 slots then the value is passed in the appropriate int reg.
5505 If the value is part of the `...' and is not passed in one of the first 6
5506 slots then the value is passed in memory.
5507 If a prototype is not in scope:
5508 If the value is one of the first 6 arguments the value is passed in the
5509 appropriate integer reg and the appropriate fp reg.
5510 If the value is not one of the first 6 arguments the value is passed in
5511 the appropriate fp reg and in memory.
5512
5513
5514 Summary of the calling conventions implemented by GCC on the SPARC:
5515
5516 32-bit ABI:
5517 size argument return value
5518
5519 small integer <4 int. reg. int. reg.
5520 word 4 int. reg. int. reg.
5521 double word 8 int. reg. int. reg.
5522
5523 _Complex small integer <8 int. reg. int. reg.
5524 _Complex word 8 int. reg. int. reg.
5525 _Complex double word 16 memory int. reg.
5526
5527 vector integer <=8 int. reg. FP reg.
5528 vector integer >8 memory memory
5529
5530 float 4 int. reg. FP reg.
5531 double 8 int. reg. FP reg.
5532 long double 16 memory memory
5533
5534 _Complex float 8 memory FP reg.
5535 _Complex double 16 memory FP reg.
5536 _Complex long double 32 memory FP reg.
5537
5538 vector float any memory memory
5539
5540 aggregate any memory memory
5541
5542
5543
5544 64-bit ABI:
5545 size argument return value
5546
5547 small integer <8 int. reg. int. reg.
5548 word 8 int. reg. int. reg.
5549 double word 16 int. reg. int. reg.
5550
5551 _Complex small integer <16 int. reg. int. reg.
5552 _Complex word 16 int. reg. int. reg.
5553 _Complex double word 32 memory int. reg.
5554
5555 vector integer <=16 FP reg. FP reg.
5556 vector integer 16<s<=32 memory FP reg.
5557 vector integer >32 memory memory
5558
5559 float 4 FP reg. FP reg.
5560 double 8 FP reg. FP reg.
5561 long double 16 FP reg. FP reg.
5562
5563 _Complex float 8 FP reg. FP reg.
5564 _Complex double 16 FP reg. FP reg.
5565 _Complex long double 32 memory FP reg.
5566
5567 vector float <=16 FP reg. FP reg.
5568 vector float 16<s<=32 memory FP reg.
5569 vector float >32 memory memory
5570
5571 aggregate <=16 reg. reg.
5572 aggregate 16<s<=32 memory reg.
5573 aggregate >32 memory memory
5574
5575
5576
5577 Note #1: complex floating-point types follow the extended SPARC ABIs as
5578 implemented by the Sun compiler.
5579
5580 Note #2: integral vector types follow the scalar floating-point types
5581 conventions to match what is implemented by the Sun VIS SDK.
5582
5583 Note #3: floating-point vector types follow the aggregate types
5584 conventions. */
5585
5586
5587 /* Maximum number of int regs for args. */
5588 #define SPARC_INT_ARG_MAX 6
5589 /* Maximum number of fp regs for args. */
5590 #define SPARC_FP_ARG_MAX 16
5591
5592 #define ROUND_ADVANCE(SIZE) (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
5593
5594 /* Handle the INIT_CUMULATIVE_ARGS macro.
5595 Initialize a variable CUM of type CUMULATIVE_ARGS
5596 for a call to a function whose data type is FNTYPE.
5597 For a library call, FNTYPE is 0. */
5598
5599 void
5600 init_cumulative_args (struct sparc_args *cum, tree fntype,
5601 rtx libname ATTRIBUTE_UNUSED,
5602 tree fndecl ATTRIBUTE_UNUSED)
5603 {
5604 cum->words = 0;
5605 cum->prototype_p = fntype && prototype_p (fntype);
5606 cum->libcall_p = fntype == 0;
5607 }
5608
5609 /* Handle promotion of pointer and integer arguments. */
5610
5611 static enum machine_mode
5612 sparc_promote_function_mode (const_tree type,
5613 enum machine_mode mode,
5614 int *punsignedp,
5615 const_tree fntype ATTRIBUTE_UNUSED,
5616 int for_return ATTRIBUTE_UNUSED)
5617 {
5618 if (type != NULL_TREE && POINTER_TYPE_P (type))
5619 {
5620 *punsignedp = POINTERS_EXTEND_UNSIGNED;
5621 return Pmode;
5622 }
5623
5624 /* Integral arguments are passed as full words, as per the ABI. */
5625 if (GET_MODE_CLASS (mode) == MODE_INT
5626 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5627 return word_mode;
5628
5629 return mode;
5630 }
5631
5632 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
5633
5634 static bool
5635 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
5636 {
5637 return TARGET_ARCH64 ? true : false;
5638 }
5639
5640 /* Scan the record type TYPE and return the following predicates:
5641 - INTREGS_P: the record contains at least one field or sub-field
5642 that is eligible for promotion in integer registers.
5643 - FP_REGS_P: the record contains at least one field or sub-field
5644 that is eligible for promotion in floating-point registers.
5645 - PACKED_P: the record contains at least one field that is packed.
5646
5647 Sub-fields are not taken into account for the PACKED_P predicate. */
5648
5649 static void
5650 scan_record_type (const_tree type, int *intregs_p, int *fpregs_p,
5651 int *packed_p)
5652 {
5653 tree field;
5654
5655 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5656 {
5657 if (TREE_CODE (field) == FIELD_DECL)
5658 {
5659 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
5660 scan_record_type (TREE_TYPE (field), intregs_p, fpregs_p, 0);
5661 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
5662 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
5663 && TARGET_FPU)
5664 *fpregs_p = 1;
5665 else
5666 *intregs_p = 1;
5667
5668 if (packed_p && DECL_PACKED (field))
5669 *packed_p = 1;
5670 }
5671 }
5672 }
5673
5674 /* Compute the slot number to pass an argument in.
5675 Return the slot number or -1 if passing on the stack.
5676
5677 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5678 the preceding args and about the function being called.
5679 MODE is the argument's machine mode.
5680 TYPE is the data type of the argument (as a tree).
5681 This is null for libcalls where that information may
5682 not be available.
5683 NAMED is nonzero if this argument is a named parameter
5684 (otherwise it is an extra parameter matching an ellipsis).
5685 INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
5686 *PREGNO records the register number to use if scalar type.
5687 *PPADDING records the amount of padding needed in words. */
5688
5689 static int
5690 function_arg_slotno (const struct sparc_args *cum, enum machine_mode mode,
5691 const_tree type, bool named, bool incoming_p,
5692 int *pregno, int *ppadding)
5693 {
5694 int regbase = (incoming_p
5695 ? SPARC_INCOMING_INT_ARG_FIRST
5696 : SPARC_OUTGOING_INT_ARG_FIRST);
5697 int slotno = cum->words;
5698 enum mode_class mclass;
5699 int regno;
5700
5701 *ppadding = 0;
5702
5703 if (type && TREE_ADDRESSABLE (type))
5704 return -1;
5705
5706 if (TARGET_ARCH32
5707 && mode == BLKmode
5708 && type
5709 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
5710 return -1;
5711
5712 /* For SPARC64, objects requiring 16-byte alignment get it. */
5713 if (TARGET_ARCH64
5714 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
5715 && (slotno & 1) != 0)
5716 slotno++, *ppadding = 1;
5717
5718 mclass = GET_MODE_CLASS (mode);
5719 if (type && TREE_CODE (type) == VECTOR_TYPE)
5720 {
5721 /* Vector types deserve special treatment because they are
5722 polymorphic wrt their mode, depending upon whether VIS
5723 instructions are enabled. */
5724 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5725 {
5726 /* The SPARC port defines no floating-point vector modes. */
5727 gcc_assert (mode == BLKmode);
5728 }
5729 else
5730 {
5731 /* Integral vector types should either have a vector
5732 mode or an integral mode, because we are guaranteed
5733 by pass_by_reference that their size is not greater
5734 than 16 bytes and TImode is 16-byte wide. */
5735 gcc_assert (mode != BLKmode);
5736
5737 /* Vector integers are handled like floats according to
5738 the Sun VIS SDK. */
5739 mclass = MODE_FLOAT;
5740 }
5741 }
5742
5743 switch (mclass)
5744 {
5745 case MODE_FLOAT:
5746 case MODE_COMPLEX_FLOAT:
5747 case MODE_VECTOR_INT:
5748 if (TARGET_ARCH64 && TARGET_FPU && named)
5749 {
5750 if (slotno >= SPARC_FP_ARG_MAX)
5751 return -1;
5752 regno = SPARC_FP_ARG_FIRST + slotno * 2;
5753 /* Arguments filling only one single FP register are
5754 right-justified in the outer double FP register. */
5755 if (GET_MODE_SIZE (mode) <= 4)
5756 regno++;
5757 break;
5758 }
5759 /* fallthrough */
5760
5761 case MODE_INT:
5762 case MODE_COMPLEX_INT:
5763 if (slotno >= SPARC_INT_ARG_MAX)
5764 return -1;
5765 regno = regbase + slotno;
5766 break;
5767
5768 case MODE_RANDOM:
5769 if (mode == VOIDmode)
5770 /* MODE is VOIDmode when generating the actual call. */
5771 return -1;
5772
5773 gcc_assert (mode == BLKmode);
5774
5775 if (TARGET_ARCH32
5776 || !type
5777 || (TREE_CODE (type) != VECTOR_TYPE
5778 && TREE_CODE (type) != RECORD_TYPE))
5779 {
5780 if (slotno >= SPARC_INT_ARG_MAX)
5781 return -1;
5782 regno = regbase + slotno;
5783 }
5784 else /* TARGET_ARCH64 && type */
5785 {
5786 int intregs_p = 0, fpregs_p = 0, packed_p = 0;
5787
5788 /* First see what kinds of registers we would need. */
5789 if (TREE_CODE (type) == VECTOR_TYPE)
5790 fpregs_p = 1;
5791 else
5792 scan_record_type (type, &intregs_p, &fpregs_p, &packed_p);
5793
5794 /* The ABI obviously doesn't specify how packed structures
5795 are passed. These are defined to be passed in int regs
5796 if possible, otherwise memory. */
5797 if (packed_p || !named)
5798 fpregs_p = 0, intregs_p = 1;
5799
5800 /* If all arg slots are filled, then must pass on stack. */
5801 if (fpregs_p && slotno >= SPARC_FP_ARG_MAX)
5802 return -1;
5803
5804 /* If there are only int args and all int arg slots are filled,
5805 then must pass on stack. */
5806 if (!fpregs_p && intregs_p && slotno >= SPARC_INT_ARG_MAX)
5807 return -1;
5808
5809 /* Note that even if all int arg slots are filled, fp members may
5810 still be passed in regs if such regs are available.
5811 *PREGNO isn't set because there may be more than one, it's up
5812 to the caller to compute them. */
5813 return slotno;
5814 }
5815 break;
5816
5817 default :
5818 gcc_unreachable ();
5819 }
5820
5821 *pregno = regno;
5822 return slotno;
5823 }
5824
5825 /* Handle recursive register counting for structure field layout. */
5826
5827 struct function_arg_record_value_parms
5828 {
5829 rtx ret; /* return expression being built. */
5830 int slotno; /* slot number of the argument. */
5831 int named; /* whether the argument is named. */
5832 int regbase; /* regno of the base register. */
5833 int stack; /* 1 if part of the argument is on the stack. */
5834 int intoffset; /* offset of the first pending integer field. */
5835 unsigned int nregs; /* number of words passed in registers. */
5836 };
5837
5838 static void function_arg_record_value_3
5839 (HOST_WIDE_INT, struct function_arg_record_value_parms *);
5840 static void function_arg_record_value_2
5841 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
5842 static void function_arg_record_value_1
5843 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
5844 static rtx function_arg_record_value (const_tree, enum machine_mode, int, int, int);
5845 static rtx function_arg_union_value (int, enum machine_mode, int, int);
5846
5847 /* A subroutine of function_arg_record_value. Traverse the structure
5848 recursively and determine how many registers will be required. */
5849
5850 static void
5851 function_arg_record_value_1 (const_tree type, HOST_WIDE_INT startbitpos,
5852 struct function_arg_record_value_parms *parms,
5853 bool packed_p)
5854 {
5855 tree field;
5856
5857 /* We need to compute how many registers are needed so we can
5858 allocate the PARALLEL but before we can do that we need to know
5859 whether there are any packed fields. The ABI obviously doesn't
5860 specify how structures are passed in this case, so they are
5861 defined to be passed in int regs if possible, otherwise memory,
5862 regardless of whether there are fp values present. */
5863
5864 if (! packed_p)
5865 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5866 {
5867 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
5868 {
5869 packed_p = true;
5870 break;
5871 }
5872 }
5873
5874 /* Compute how many registers we need. */
5875 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5876 {
5877 if (TREE_CODE (field) == FIELD_DECL)
5878 {
5879 HOST_WIDE_INT bitpos = startbitpos;
5880
5881 if (DECL_SIZE (field) != 0)
5882 {
5883 if (integer_zerop (DECL_SIZE (field)))
5884 continue;
5885
5886 if (host_integerp (bit_position (field), 1))
5887 bitpos += int_bit_position (field);
5888 }
5889
5890 /* ??? FIXME: else assume zero offset. */
5891
5892 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
5893 function_arg_record_value_1 (TREE_TYPE (field),
5894 bitpos,
5895 parms,
5896 packed_p);
5897 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
5898 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
5899 && TARGET_FPU
5900 && parms->named
5901 && ! packed_p)
5902 {
5903 if (parms->intoffset != -1)
5904 {
5905 unsigned int startbit, endbit;
5906 int intslots, this_slotno;
5907
5908 startbit = parms->intoffset & -BITS_PER_WORD;
5909 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
5910
5911 intslots = (endbit - startbit) / BITS_PER_WORD;
5912 this_slotno = parms->slotno + parms->intoffset
5913 / BITS_PER_WORD;
5914
5915 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
5916 {
5917 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
5918 /* We need to pass this field on the stack. */
5919 parms->stack = 1;
5920 }
5921
5922 parms->nregs += intslots;
5923 parms->intoffset = -1;
5924 }
5925
5926 /* There's no need to check this_slotno < SPARC_FP_ARG MAX.
5927 If it wasn't true we wouldn't be here. */
5928 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
5929 && DECL_MODE (field) == BLKmode)
5930 parms->nregs += TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
5931 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
5932 parms->nregs += 2;
5933 else
5934 parms->nregs += 1;
5935 }
5936 else
5937 {
5938 if (parms->intoffset == -1)
5939 parms->intoffset = bitpos;
5940 }
5941 }
5942 }
5943 }
5944
5945 /* A subroutine of function_arg_record_value. Assign the bits of the
5946 structure between parms->intoffset and bitpos to integer registers. */
5947
5948 static void
5949 function_arg_record_value_3 (HOST_WIDE_INT bitpos,
5950 struct function_arg_record_value_parms *parms)
5951 {
5952 enum machine_mode mode;
5953 unsigned int regno;
5954 unsigned int startbit, endbit;
5955 int this_slotno, intslots, intoffset;
5956 rtx reg;
5957
5958 if (parms->intoffset == -1)
5959 return;
5960
5961 intoffset = parms->intoffset;
5962 parms->intoffset = -1;
5963
5964 startbit = intoffset & -BITS_PER_WORD;
5965 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
5966 intslots = (endbit - startbit) / BITS_PER_WORD;
5967 this_slotno = parms->slotno + intoffset / BITS_PER_WORD;
5968
5969 intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
5970 if (intslots <= 0)
5971 return;
5972
5973 /* If this is the trailing part of a word, only load that much into
5974 the register. Otherwise load the whole register. Note that in
5975 the latter case we may pick up unwanted bits. It's not a problem
5976 at the moment but may wish to revisit. */
5977
5978 if (intoffset % BITS_PER_WORD != 0)
5979 mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
5980 MODE_INT);
5981 else
5982 mode = word_mode;
5983
5984 intoffset /= BITS_PER_UNIT;
5985 do
5986 {
5987 regno = parms->regbase + this_slotno;
5988 reg = gen_rtx_REG (mode, regno);
5989 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
5990 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
5991
5992 this_slotno += 1;
5993 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
5994 mode = word_mode;
5995 parms->nregs += 1;
5996 intslots -= 1;
5997 }
5998 while (intslots > 0);
5999 }
6000
6001 /* A subroutine of function_arg_record_value. Traverse the structure
6002 recursively and assign bits to floating point registers. Track which
6003 bits in between need integer registers; invoke function_arg_record_value_3
6004 to make that happen. */
6005
6006 static void
6007 function_arg_record_value_2 (const_tree type, HOST_WIDE_INT startbitpos,
6008 struct function_arg_record_value_parms *parms,
6009 bool packed_p)
6010 {
6011 tree field;
6012
6013 if (! packed_p)
6014 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6015 {
6016 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6017 {
6018 packed_p = true;
6019 break;
6020 }
6021 }
6022
6023 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6024 {
6025 if (TREE_CODE (field) == FIELD_DECL)
6026 {
6027 HOST_WIDE_INT bitpos = startbitpos;
6028
6029 if (DECL_SIZE (field) != 0)
6030 {
6031 if (integer_zerop (DECL_SIZE (field)))
6032 continue;
6033
6034 if (host_integerp (bit_position (field), 1))
6035 bitpos += int_bit_position (field);
6036 }
6037
6038 /* ??? FIXME: else assume zero offset. */
6039
6040 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6041 function_arg_record_value_2 (TREE_TYPE (field),
6042 bitpos,
6043 parms,
6044 packed_p);
6045 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6046 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6047 && TARGET_FPU
6048 && parms->named
6049 && ! packed_p)
6050 {
6051 int this_slotno = parms->slotno + bitpos / BITS_PER_WORD;
6052 int regno, nregs, pos;
6053 enum machine_mode mode = DECL_MODE (field);
6054 rtx reg;
6055
6056 function_arg_record_value_3 (bitpos, parms);
6057
6058 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
6059 && mode == BLKmode)
6060 {
6061 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6062 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6063 }
6064 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6065 {
6066 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6067 nregs = 2;
6068 }
6069 else
6070 nregs = 1;
6071
6072 regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
6073 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
6074 regno++;
6075 reg = gen_rtx_REG (mode, regno);
6076 pos = bitpos / BITS_PER_UNIT;
6077 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6078 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6079 parms->nregs += 1;
6080 while (--nregs > 0)
6081 {
6082 regno += GET_MODE_SIZE (mode) / 4;
6083 reg = gen_rtx_REG (mode, regno);
6084 pos += GET_MODE_SIZE (mode);
6085 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6086 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6087 parms->nregs += 1;
6088 }
6089 }
6090 else
6091 {
6092 if (parms->intoffset == -1)
6093 parms->intoffset = bitpos;
6094 }
6095 }
6096 }
6097 }
6098
6099 /* Used by function_arg and sparc_function_value_1 to implement the complex
6100 conventions of the 64-bit ABI for passing and returning structures.
6101 Return an expression valid as a return value for the FUNCTION_ARG
6102 and TARGET_FUNCTION_VALUE.
6103
6104 TYPE is the data type of the argument (as a tree).
6105 This is null for libcalls where that information may
6106 not be available.
6107 MODE is the argument's machine mode.
6108 SLOTNO is the index number of the argument's slot in the parameter array.
6109 NAMED is nonzero if this argument is a named parameter
6110 (otherwise it is an extra parameter matching an ellipsis).
6111 REGBASE is the regno of the base register for the parameter array. */
6112
6113 static rtx
6114 function_arg_record_value (const_tree type, enum machine_mode mode,
6115 int slotno, int named, int regbase)
6116 {
6117 HOST_WIDE_INT typesize = int_size_in_bytes (type);
6118 struct function_arg_record_value_parms parms;
6119 unsigned int nregs;
6120
6121 parms.ret = NULL_RTX;
6122 parms.slotno = slotno;
6123 parms.named = named;
6124 parms.regbase = regbase;
6125 parms.stack = 0;
6126
6127 /* Compute how many registers we need. */
6128 parms.nregs = 0;
6129 parms.intoffset = 0;
6130 function_arg_record_value_1 (type, 0, &parms, false);
6131
6132 /* Take into account pending integer fields. */
6133 if (parms.intoffset != -1)
6134 {
6135 unsigned int startbit, endbit;
6136 int intslots, this_slotno;
6137
6138 startbit = parms.intoffset & -BITS_PER_WORD;
6139 endbit = (typesize*BITS_PER_UNIT + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6140 intslots = (endbit - startbit) / BITS_PER_WORD;
6141 this_slotno = slotno + parms.intoffset / BITS_PER_WORD;
6142
6143 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
6144 {
6145 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
6146 /* We need to pass this field on the stack. */
6147 parms.stack = 1;
6148 }
6149
6150 parms.nregs += intslots;
6151 }
6152 nregs = parms.nregs;
6153
6154 /* Allocate the vector and handle some annoying special cases. */
6155 if (nregs == 0)
6156 {
6157 /* ??? Empty structure has no value? Duh? */
6158 if (typesize <= 0)
6159 {
6160 /* Though there's nothing really to store, return a word register
6161 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
6162 leads to breakage due to the fact that there are zero bytes to
6163 load. */
6164 return gen_rtx_REG (mode, regbase);
6165 }
6166 else
6167 {
6168 /* ??? C++ has structures with no fields, and yet a size. Give up
6169 for now and pass everything back in integer registers. */
6170 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6171 }
6172 if (nregs + slotno > SPARC_INT_ARG_MAX)
6173 nregs = SPARC_INT_ARG_MAX - slotno;
6174 }
6175 gcc_assert (nregs != 0);
6176
6177 parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (parms.stack + nregs));
6178
6179 /* If at least one field must be passed on the stack, generate
6180 (parallel [(expr_list (nil) ...) ...]) so that all fields will
6181 also be passed on the stack. We can't do much better because the
6182 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
6183 of structures for which the fields passed exclusively in registers
6184 are not at the beginning of the structure. */
6185 if (parms.stack)
6186 XVECEXP (parms.ret, 0, 0)
6187 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6188
6189 /* Fill in the entries. */
6190 parms.nregs = 0;
6191 parms.intoffset = 0;
6192 function_arg_record_value_2 (type, 0, &parms, false);
6193 function_arg_record_value_3 (typesize * BITS_PER_UNIT, &parms);
6194
6195 gcc_assert (parms.nregs == nregs);
6196
6197 return parms.ret;
6198 }
6199
6200 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6201 of the 64-bit ABI for passing and returning unions.
6202 Return an expression valid as a return value for the FUNCTION_ARG
6203 and TARGET_FUNCTION_VALUE.
6204
6205 SIZE is the size in bytes of the union.
6206 MODE is the argument's machine mode.
6207 REGNO is the hard register the union will be passed in. */
6208
6209 static rtx
6210 function_arg_union_value (int size, enum machine_mode mode, int slotno,
6211 int regno)
6212 {
6213 int nwords = ROUND_ADVANCE (size), i;
6214 rtx regs;
6215
6216 /* See comment in previous function for empty structures. */
6217 if (nwords == 0)
6218 return gen_rtx_REG (mode, regno);
6219
6220 if (slotno == SPARC_INT_ARG_MAX - 1)
6221 nwords = 1;
6222
6223 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
6224
6225 for (i = 0; i < nwords; i++)
6226 {
6227 /* Unions are passed left-justified. */
6228 XVECEXP (regs, 0, i)
6229 = gen_rtx_EXPR_LIST (VOIDmode,
6230 gen_rtx_REG (word_mode, regno),
6231 GEN_INT (UNITS_PER_WORD * i));
6232 regno++;
6233 }
6234
6235 return regs;
6236 }
6237
6238 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6239 for passing and returning large (BLKmode) vectors.
6240 Return an expression valid as a return value for the FUNCTION_ARG
6241 and TARGET_FUNCTION_VALUE.
6242
6243 SIZE is the size in bytes of the vector (at least 8 bytes).
6244 REGNO is the FP hard register the vector will be passed in. */
6245
6246 static rtx
6247 function_arg_vector_value (int size, int regno)
6248 {
6249 int i, nregs = size / 8;
6250 rtx regs;
6251
6252 regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
6253
6254 for (i = 0; i < nregs; i++)
6255 {
6256 XVECEXP (regs, 0, i)
6257 = gen_rtx_EXPR_LIST (VOIDmode,
6258 gen_rtx_REG (DImode, regno + 2*i),
6259 GEN_INT (i*8));
6260 }
6261
6262 return regs;
6263 }
6264
6265 /* Determine where to put an argument to a function.
6266 Value is zero to push the argument on the stack,
6267 or a hard register in which to store the argument.
6268
6269 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6270 the preceding args and about the function being called.
6271 MODE is the argument's machine mode.
6272 TYPE is the data type of the argument (as a tree).
6273 This is null for libcalls where that information may
6274 not be available.
6275 NAMED is true if this argument is a named parameter
6276 (otherwise it is an extra parameter matching an ellipsis).
6277 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
6278 TARGET_FUNCTION_INCOMING_ARG. */
6279
6280 static rtx
6281 sparc_function_arg_1 (cumulative_args_t cum_v, enum machine_mode mode,
6282 const_tree type, bool named, bool incoming_p)
6283 {
6284 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6285
6286 int regbase = (incoming_p
6287 ? SPARC_INCOMING_INT_ARG_FIRST
6288 : SPARC_OUTGOING_INT_ARG_FIRST);
6289 int slotno, regno, padding;
6290 enum mode_class mclass = GET_MODE_CLASS (mode);
6291
6292 slotno = function_arg_slotno (cum, mode, type, named, incoming_p,
6293 &regno, &padding);
6294 if (slotno == -1)
6295 return 0;
6296
6297 /* Vector types deserve special treatment because they are polymorphic wrt
6298 their mode, depending upon whether VIS instructions are enabled. */
6299 if (type && TREE_CODE (type) == VECTOR_TYPE)
6300 {
6301 HOST_WIDE_INT size = int_size_in_bytes (type);
6302 gcc_assert ((TARGET_ARCH32 && size <= 8)
6303 || (TARGET_ARCH64 && size <= 16));
6304
6305 if (mode == BLKmode)
6306 return function_arg_vector_value (size,
6307 SPARC_FP_ARG_FIRST + 2*slotno);
6308 else
6309 mclass = MODE_FLOAT;
6310 }
6311
6312 if (TARGET_ARCH32)
6313 return gen_rtx_REG (mode, regno);
6314
6315 /* Structures up to 16 bytes in size are passed in arg slots on the stack
6316 and are promoted to registers if possible. */
6317 if (type && TREE_CODE (type) == RECORD_TYPE)
6318 {
6319 HOST_WIDE_INT size = int_size_in_bytes (type);
6320 gcc_assert (size <= 16);
6321
6322 return function_arg_record_value (type, mode, slotno, named, regbase);
6323 }
6324
6325 /* Unions up to 16 bytes in size are passed in integer registers. */
6326 else if (type && TREE_CODE (type) == UNION_TYPE)
6327 {
6328 HOST_WIDE_INT size = int_size_in_bytes (type);
6329 gcc_assert (size <= 16);
6330
6331 return function_arg_union_value (size, mode, slotno, regno);
6332 }
6333
6334 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
6335 but also have the slot allocated for them.
6336 If no prototype is in scope fp values in register slots get passed
6337 in two places, either fp regs and int regs or fp regs and memory. */
6338 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
6339 && SPARC_FP_REG_P (regno))
6340 {
6341 rtx reg = gen_rtx_REG (mode, regno);
6342 if (cum->prototype_p || cum->libcall_p)
6343 {
6344 /* "* 2" because fp reg numbers are recorded in 4 byte
6345 quantities. */
6346 #if 0
6347 /* ??? This will cause the value to be passed in the fp reg and
6348 in the stack. When a prototype exists we want to pass the
6349 value in the reg but reserve space on the stack. That's an
6350 optimization, and is deferred [for a bit]. */
6351 if ((regno - SPARC_FP_ARG_FIRST) >= SPARC_INT_ARG_MAX * 2)
6352 return gen_rtx_PARALLEL (mode,
6353 gen_rtvec (2,
6354 gen_rtx_EXPR_LIST (VOIDmode,
6355 NULL_RTX, const0_rtx),
6356 gen_rtx_EXPR_LIST (VOIDmode,
6357 reg, const0_rtx)));
6358 else
6359 #else
6360 /* ??? It seems that passing back a register even when past
6361 the area declared by REG_PARM_STACK_SPACE will allocate
6362 space appropriately, and will not copy the data onto the
6363 stack, exactly as we desire.
6364
6365 This is due to locate_and_pad_parm being called in
6366 expand_call whenever reg_parm_stack_space > 0, which
6367 while beneficial to our example here, would seem to be
6368 in error from what had been intended. Ho hum... -- r~ */
6369 #endif
6370 return reg;
6371 }
6372 else
6373 {
6374 rtx v0, v1;
6375
6376 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
6377 {
6378 int intreg;
6379
6380 /* On incoming, we don't need to know that the value
6381 is passed in %f0 and %i0, and it confuses other parts
6382 causing needless spillage even on the simplest cases. */
6383 if (incoming_p)
6384 return reg;
6385
6386 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
6387 + (regno - SPARC_FP_ARG_FIRST) / 2);
6388
6389 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6390 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
6391 const0_rtx);
6392 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6393 }
6394 else
6395 {
6396 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6397 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6398 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6399 }
6400 }
6401 }
6402
6403 /* All other aggregate types are passed in an integer register in a mode
6404 corresponding to the size of the type. */
6405 else if (type && AGGREGATE_TYPE_P (type))
6406 {
6407 HOST_WIDE_INT size = int_size_in_bytes (type);
6408 gcc_assert (size <= 16);
6409
6410 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6411 }
6412
6413 return gen_rtx_REG (mode, regno);
6414 }
6415
6416 /* Handle the TARGET_FUNCTION_ARG target hook. */
6417
6418 static rtx
6419 sparc_function_arg (cumulative_args_t cum, enum machine_mode mode,
6420 const_tree type, bool named)
6421 {
6422 return sparc_function_arg_1 (cum, mode, type, named, false);
6423 }
6424
6425 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
6426
6427 static rtx
6428 sparc_function_incoming_arg (cumulative_args_t cum, enum machine_mode mode,
6429 const_tree type, bool named)
6430 {
6431 return sparc_function_arg_1 (cum, mode, type, named, true);
6432 }
6433
6434 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
6435
6436 static unsigned int
6437 sparc_function_arg_boundary (enum machine_mode mode, const_tree type)
6438 {
6439 return ((TARGET_ARCH64
6440 && (GET_MODE_ALIGNMENT (mode) == 128
6441 || (type && TYPE_ALIGN (type) == 128)))
6442 ? 128
6443 : PARM_BOUNDARY);
6444 }
6445
6446 /* For an arg passed partly in registers and partly in memory,
6447 this is the number of bytes of registers used.
6448 For args passed entirely in registers or entirely in memory, zero.
6449
6450 Any arg that starts in the first 6 regs but won't entirely fit in them
6451 needs partial registers on v8. On v9, structures with integer
6452 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
6453 values that begin in the last fp reg [where "last fp reg" varies with the
6454 mode] will be split between that reg and memory. */
6455
6456 static int
6457 sparc_arg_partial_bytes (cumulative_args_t cum, enum machine_mode mode,
6458 tree type, bool named)
6459 {
6460 int slotno, regno, padding;
6461
6462 /* We pass false for incoming_p here, it doesn't matter. */
6463 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
6464 false, &regno, &padding);
6465
6466 if (slotno == -1)
6467 return 0;
6468
6469 if (TARGET_ARCH32)
6470 {
6471 if ((slotno + (mode == BLKmode
6472 ? ROUND_ADVANCE (int_size_in_bytes (type))
6473 : ROUND_ADVANCE (GET_MODE_SIZE (mode))))
6474 > SPARC_INT_ARG_MAX)
6475 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
6476 }
6477 else
6478 {
6479 /* We are guaranteed by pass_by_reference that the size of the
6480 argument is not greater than 16 bytes, so we only need to return
6481 one word if the argument is partially passed in registers. */
6482
6483 if (type && AGGREGATE_TYPE_P (type))
6484 {
6485 int size = int_size_in_bytes (type);
6486
6487 if (size > UNITS_PER_WORD
6488 && slotno == SPARC_INT_ARG_MAX - 1)
6489 return UNITS_PER_WORD;
6490 }
6491 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
6492 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6493 && ! (TARGET_FPU && named)))
6494 {
6495 /* The complex types are passed as packed types. */
6496 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
6497 && slotno == SPARC_INT_ARG_MAX - 1)
6498 return UNITS_PER_WORD;
6499 }
6500 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6501 {
6502 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
6503 > SPARC_FP_ARG_MAX)
6504 return UNITS_PER_WORD;
6505 }
6506 }
6507
6508 return 0;
6509 }
6510
6511 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
6512 Specify whether to pass the argument by reference. */
6513
6514 static bool
6515 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6516 enum machine_mode mode, const_tree type,
6517 bool named ATTRIBUTE_UNUSED)
6518 {
6519 if (TARGET_ARCH32)
6520 /* Original SPARC 32-bit ABI says that structures and unions,
6521 and quad-precision floats are passed by reference. For Pascal,
6522 also pass arrays by reference. All other base types are passed
6523 in registers.
6524
6525 Extended ABI (as implemented by the Sun compiler) says that all
6526 complex floats are passed by reference. Pass complex integers
6527 in registers up to 8 bytes. More generally, enforce the 2-word
6528 cap for passing arguments in registers.
6529
6530 Vector ABI (as implemented by the Sun VIS SDK) says that vector
6531 integers are passed like floats of the same size, that is in
6532 registers up to 8 bytes. Pass all vector floats by reference
6533 like structure and unions. */
6534 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
6535 || mode == SCmode
6536 /* Catch CDImode, TFmode, DCmode and TCmode. */
6537 || GET_MODE_SIZE (mode) > 8
6538 || (type
6539 && TREE_CODE (type) == VECTOR_TYPE
6540 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
6541 else
6542 /* Original SPARC 64-bit ABI says that structures and unions
6543 smaller than 16 bytes are passed in registers, as well as
6544 all other base types.
6545
6546 Extended ABI (as implemented by the Sun compiler) says that
6547 complex floats are passed in registers up to 16 bytes. Pass
6548 all complex integers in registers up to 16 bytes. More generally,
6549 enforce the 2-word cap for passing arguments in registers.
6550
6551 Vector ABI (as implemented by the Sun VIS SDK) says that vector
6552 integers are passed like floats of the same size, that is in
6553 registers (up to 16 bytes). Pass all vector floats like structure
6554 and unions. */
6555 return ((type
6556 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
6557 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
6558 /* Catch CTImode and TCmode. */
6559 || GET_MODE_SIZE (mode) > 16);
6560 }
6561
6562 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
6563 Update the data in CUM to advance over an argument
6564 of mode MODE and data type TYPE.
6565 TYPE is null for libcalls where that information may not be available. */
6566
6567 static void
6568 sparc_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
6569 const_tree type, bool named)
6570 {
6571 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6572 int regno, padding;
6573
6574 /* We pass false for incoming_p here, it doesn't matter. */
6575 function_arg_slotno (cum, mode, type, named, false, &regno, &padding);
6576
6577 /* If argument requires leading padding, add it. */
6578 cum->words += padding;
6579
6580 if (TARGET_ARCH32)
6581 {
6582 cum->words += (mode != BLKmode
6583 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
6584 : ROUND_ADVANCE (int_size_in_bytes (type)));
6585 }
6586 else
6587 {
6588 if (type && AGGREGATE_TYPE_P (type))
6589 {
6590 int size = int_size_in_bytes (type);
6591
6592 if (size <= 8)
6593 ++cum->words;
6594 else if (size <= 16)
6595 cum->words += 2;
6596 else /* passed by reference */
6597 ++cum->words;
6598 }
6599 else
6600 {
6601 cum->words += (mode != BLKmode
6602 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
6603 : ROUND_ADVANCE (int_size_in_bytes (type)));
6604 }
6605 }
6606 }
6607
6608 /* Handle the FUNCTION_ARG_PADDING macro.
6609 For the 64 bit ABI structs are always stored left shifted in their
6610 argument slot. */
6611
6612 enum direction
6613 function_arg_padding (enum machine_mode mode, const_tree type)
6614 {
6615 if (TARGET_ARCH64 && type != 0 && AGGREGATE_TYPE_P (type))
6616 return upward;
6617
6618 /* Fall back to the default. */
6619 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
6620 }
6621
6622 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
6623 Specify whether to return the return value in memory. */
6624
6625 static bool
6626 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6627 {
6628 if (TARGET_ARCH32)
6629 /* Original SPARC 32-bit ABI says that structures and unions,
6630 and quad-precision floats are returned in memory. All other
6631 base types are returned in registers.
6632
6633 Extended ABI (as implemented by the Sun compiler) says that
6634 all complex floats are returned in registers (8 FP registers
6635 at most for '_Complex long double'). Return all complex integers
6636 in registers (4 at most for '_Complex long long').
6637
6638 Vector ABI (as implemented by the Sun VIS SDK) says that vector
6639 integers are returned like floats of the same size, that is in
6640 registers up to 8 bytes and in memory otherwise. Return all
6641 vector floats in memory like structure and unions; note that
6642 they always have BLKmode like the latter. */
6643 return (TYPE_MODE (type) == BLKmode
6644 || TYPE_MODE (type) == TFmode
6645 || (TREE_CODE (type) == VECTOR_TYPE
6646 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
6647 else
6648 /* Original SPARC 64-bit ABI says that structures and unions
6649 smaller than 32 bytes are returned in registers, as well as
6650 all other base types.
6651
6652 Extended ABI (as implemented by the Sun compiler) says that all
6653 complex floats are returned in registers (8 FP registers at most
6654 for '_Complex long double'). Return all complex integers in
6655 registers (4 at most for '_Complex TItype').
6656
6657 Vector ABI (as implemented by the Sun VIS SDK) says that vector
6658 integers are returned like floats of the same size, that is in
6659 registers. Return all vector floats like structure and unions;
6660 note that they always have BLKmode like the latter. */
6661 return (TYPE_MODE (type) == BLKmode
6662 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
6663 }
6664
6665 /* Handle the TARGET_STRUCT_VALUE target hook.
6666 Return where to find the structure return value address. */
6667
6668 static rtx
6669 sparc_struct_value_rtx (tree fndecl, int incoming)
6670 {
6671 if (TARGET_ARCH64)
6672 return 0;
6673 else
6674 {
6675 rtx mem;
6676
6677 if (incoming)
6678 mem = gen_frame_mem (Pmode, plus_constant (frame_pointer_rtx,
6679 STRUCT_VALUE_OFFSET));
6680 else
6681 mem = gen_frame_mem (Pmode, plus_constant (stack_pointer_rtx,
6682 STRUCT_VALUE_OFFSET));
6683
6684 /* Only follow the SPARC ABI for fixed-size structure returns.
6685 Variable size structure returns are handled per the normal
6686 procedures in GCC. This is enabled by -mstd-struct-return */
6687 if (incoming == 2
6688 && sparc_std_struct_return
6689 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
6690 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
6691 {
6692 /* We must check and adjust the return address, as it is
6693 optional as to whether the return object is really
6694 provided. */
6695 rtx ret_reg = gen_rtx_REG (Pmode, 31);
6696 rtx scratch = gen_reg_rtx (SImode);
6697 rtx endlab = gen_label_rtx ();
6698
6699 /* Calculate the return object size */
6700 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
6701 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
6702 /* Construct a temporary return value */
6703 rtx temp_val
6704 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
6705
6706 /* Implement SPARC 32-bit psABI callee return struct checking:
6707
6708 Fetch the instruction where we will return to and see if
6709 it's an unimp instruction (the most significant 10 bits
6710 will be zero). */
6711 emit_move_insn (scratch, gen_rtx_MEM (SImode,
6712 plus_constant (ret_reg, 8)));
6713 /* Assume the size is valid and pre-adjust */
6714 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
6715 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
6716 0, endlab);
6717 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
6718 /* Write the address of the memory pointed to by temp_val into
6719 the memory pointed to by mem */
6720 emit_move_insn (mem, XEXP (temp_val, 0));
6721 emit_label (endlab);
6722 }
6723
6724 return mem;
6725 }
6726 }
6727
6728 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
6729 For v9, function return values are subject to the same rules as arguments,
6730 except that up to 32 bytes may be returned in registers. */
6731
6732 static rtx
6733 sparc_function_value_1 (const_tree type, enum machine_mode mode,
6734 bool outgoing)
6735 {
6736 /* Beware that the two values are swapped here wrt function_arg. */
6737 int regbase = (outgoing
6738 ? SPARC_INCOMING_INT_ARG_FIRST
6739 : SPARC_OUTGOING_INT_ARG_FIRST);
6740 enum mode_class mclass = GET_MODE_CLASS (mode);
6741 int regno;
6742
6743 /* Vector types deserve special treatment because they are polymorphic wrt
6744 their mode, depending upon whether VIS instructions are enabled. */
6745 if (type && TREE_CODE (type) == VECTOR_TYPE)
6746 {
6747 HOST_WIDE_INT size = int_size_in_bytes (type);
6748 gcc_assert ((TARGET_ARCH32 && size <= 8)
6749 || (TARGET_ARCH64 && size <= 32));
6750
6751 if (mode == BLKmode)
6752 return function_arg_vector_value (size,
6753 SPARC_FP_ARG_FIRST);
6754 else
6755 mclass = MODE_FLOAT;
6756 }
6757
6758 if (TARGET_ARCH64 && type)
6759 {
6760 /* Structures up to 32 bytes in size are returned in registers. */
6761 if (TREE_CODE (type) == RECORD_TYPE)
6762 {
6763 HOST_WIDE_INT size = int_size_in_bytes (type);
6764 gcc_assert (size <= 32);
6765
6766 return function_arg_record_value (type, mode, 0, 1, regbase);
6767 }
6768
6769 /* Unions up to 32 bytes in size are returned in integer registers. */
6770 else if (TREE_CODE (type) == UNION_TYPE)
6771 {
6772 HOST_WIDE_INT size = int_size_in_bytes (type);
6773 gcc_assert (size <= 32);
6774
6775 return function_arg_union_value (size, mode, 0, regbase);
6776 }
6777
6778 /* Objects that require it are returned in FP registers. */
6779 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
6780 ;
6781
6782 /* All other aggregate types are returned in an integer register in a
6783 mode corresponding to the size of the type. */
6784 else if (AGGREGATE_TYPE_P (type))
6785 {
6786 /* All other aggregate types are passed in an integer register
6787 in a mode corresponding to the size of the type. */
6788 HOST_WIDE_INT size = int_size_in_bytes (type);
6789 gcc_assert (size <= 32);
6790
6791 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6792
6793 /* ??? We probably should have made the same ABI change in
6794 3.4.0 as the one we made for unions. The latter was
6795 required by the SCD though, while the former is not
6796 specified, so we favored compatibility and efficiency.
6797
6798 Now we're stuck for aggregates larger than 16 bytes,
6799 because OImode vanished in the meantime. Let's not
6800 try to be unduly clever, and simply follow the ABI
6801 for unions in that case. */
6802 if (mode == BLKmode)
6803 return function_arg_union_value (size, mode, 0, regbase);
6804 else
6805 mclass = MODE_INT;
6806 }
6807
6808 /* We should only have pointer and integer types at this point. This
6809 must match sparc_promote_function_mode. */
6810 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6811 mode = word_mode;
6812 }
6813
6814 /* We should only have pointer and integer types at this point. This must
6815 match sparc_promote_function_mode. */
6816 else if (TARGET_ARCH32
6817 && mclass == MODE_INT
6818 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6819 mode = word_mode;
6820
6821 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
6822 regno = SPARC_FP_ARG_FIRST;
6823 else
6824 regno = regbase;
6825
6826 return gen_rtx_REG (mode, regno);
6827 }
6828
6829 /* Handle TARGET_FUNCTION_VALUE.
6830 On the SPARC, the value is found in the first "output" register, but the
6831 called function leaves it in the first "input" register. */
6832
6833 static rtx
6834 sparc_function_value (const_tree valtype,
6835 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
6836 bool outgoing)
6837 {
6838 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
6839 }
6840
6841 /* Handle TARGET_LIBCALL_VALUE. */
6842
6843 static rtx
6844 sparc_libcall_value (enum machine_mode mode,
6845 const_rtx fun ATTRIBUTE_UNUSED)
6846 {
6847 return sparc_function_value_1 (NULL_TREE, mode, false);
6848 }
6849
6850 /* Handle FUNCTION_VALUE_REGNO_P.
6851 On the SPARC, the first "output" reg is used for integer values, and the
6852 first floating point register is used for floating point values. */
6853
6854 static bool
6855 sparc_function_value_regno_p (const unsigned int regno)
6856 {
6857 return (regno == 8 || regno == 32);
6858 }
6859
6860 /* Do what is necessary for `va_start'. We look at the current function
6861 to determine if stdarg or varargs is used and return the address of
6862 the first unnamed parameter. */
6863
6864 static rtx
6865 sparc_builtin_saveregs (void)
6866 {
6867 int first_reg = crtl->args.info.words;
6868 rtx address;
6869 int regno;
6870
6871 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
6872 emit_move_insn (gen_rtx_MEM (word_mode,
6873 gen_rtx_PLUS (Pmode,
6874 frame_pointer_rtx,
6875 GEN_INT (FIRST_PARM_OFFSET (0)
6876 + (UNITS_PER_WORD
6877 * regno)))),
6878 gen_rtx_REG (word_mode,
6879 SPARC_INCOMING_INT_ARG_FIRST + regno));
6880
6881 address = gen_rtx_PLUS (Pmode,
6882 frame_pointer_rtx,
6883 GEN_INT (FIRST_PARM_OFFSET (0)
6884 + UNITS_PER_WORD * first_reg));
6885
6886 return address;
6887 }
6888
6889 /* Implement `va_start' for stdarg. */
6890
6891 static void
6892 sparc_va_start (tree valist, rtx nextarg)
6893 {
6894 nextarg = expand_builtin_saveregs ();
6895 std_expand_builtin_va_start (valist, nextarg);
6896 }
6897
6898 /* Implement `va_arg' for stdarg. */
6899
6900 static tree
6901 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6902 gimple_seq *post_p)
6903 {
6904 HOST_WIDE_INT size, rsize, align;
6905 tree addr, incr;
6906 bool indirect;
6907 tree ptrtype = build_pointer_type (type);
6908
6909 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
6910 {
6911 indirect = true;
6912 size = rsize = UNITS_PER_WORD;
6913 align = 0;
6914 }
6915 else
6916 {
6917 indirect = false;
6918 size = int_size_in_bytes (type);
6919 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6920 align = 0;
6921
6922 if (TARGET_ARCH64)
6923 {
6924 /* For SPARC64, objects requiring 16-byte alignment get it. */
6925 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
6926 align = 2 * UNITS_PER_WORD;
6927
6928 /* SPARC-V9 ABI states that structures up to 16 bytes in size
6929 are left-justified in their slots. */
6930 if (AGGREGATE_TYPE_P (type))
6931 {
6932 if (size == 0)
6933 size = rsize = UNITS_PER_WORD;
6934 else
6935 size = rsize;
6936 }
6937 }
6938 }
6939
6940 incr = valist;
6941 if (align)
6942 {
6943 incr = fold_build_pointer_plus_hwi (incr, align - 1);
6944 incr = fold_convert (sizetype, incr);
6945 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
6946 size_int (-align));
6947 incr = fold_convert (ptr_type_node, incr);
6948 }
6949
6950 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
6951 addr = incr;
6952
6953 if (BYTES_BIG_ENDIAN && size < rsize)
6954 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
6955
6956 if (indirect)
6957 {
6958 addr = fold_convert (build_pointer_type (ptrtype), addr);
6959 addr = build_va_arg_indirect_ref (addr);
6960 }
6961
6962 /* If the address isn't aligned properly for the type, we need a temporary.
6963 FIXME: This is inefficient, usually we can do this in registers. */
6964 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
6965 {
6966 tree tmp = create_tmp_var (type, "va_arg_tmp");
6967 tree dest_addr = build_fold_addr_expr (tmp);
6968 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
6969 3, dest_addr, addr, size_int (rsize));
6970 TREE_ADDRESSABLE (tmp) = 1;
6971 gimplify_and_add (copy, pre_p);
6972 addr = dest_addr;
6973 }
6974
6975 else
6976 addr = fold_convert (ptrtype, addr);
6977
6978 incr = fold_build_pointer_plus_hwi (incr, rsize);
6979 gimplify_assign (valist, incr, post_p);
6980
6981 return build_va_arg_indirect_ref (addr);
6982 }
6983 \f
6984 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
6985 Specify whether the vector mode is supported by the hardware. */
6986
6987 static bool
6988 sparc_vector_mode_supported_p (enum machine_mode mode)
6989 {
6990 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
6991 }
6992 \f
6993 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
6994
6995 static enum machine_mode
6996 sparc_preferred_simd_mode (enum machine_mode mode)
6997 {
6998 if (TARGET_VIS)
6999 switch (mode)
7000 {
7001 case SImode:
7002 return V2SImode;
7003 case HImode:
7004 return V4HImode;
7005 case QImode:
7006 return V8QImode;
7007
7008 default:;
7009 }
7010
7011 return word_mode;
7012 }
7013 \f
7014 /* Return the string to output an unconditional branch to LABEL, which is
7015 the operand number of the label.
7016
7017 DEST is the destination insn (i.e. the label), INSN is the source. */
7018
7019 const char *
7020 output_ubranch (rtx dest, int label, rtx insn)
7021 {
7022 static char string[64];
7023 bool v9_form = false;
7024 char *p;
7025
7026 if (TARGET_V9 && INSN_ADDRESSES_SET_P ())
7027 {
7028 int delta = (INSN_ADDRESSES (INSN_UID (dest))
7029 - INSN_ADDRESSES (INSN_UID (insn)));
7030 /* Leave some instructions for "slop". */
7031 if (delta >= -260000 && delta < 260000)
7032 v9_form = true;
7033 }
7034
7035 if (v9_form)
7036 strcpy (string, "ba%*,pt\t%%xcc, ");
7037 else
7038 strcpy (string, "b%*\t");
7039
7040 p = strchr (string, '\0');
7041 *p++ = '%';
7042 *p++ = 'l';
7043 *p++ = '0' + label;
7044 *p++ = '%';
7045 *p++ = '(';
7046 *p = '\0';
7047
7048 return string;
7049 }
7050
7051 /* Return the string to output a conditional branch to LABEL, which is
7052 the operand number of the label. OP is the conditional expression.
7053 XEXP (OP, 0) is assumed to be a condition code register (integer or
7054 floating point) and its mode specifies what kind of comparison we made.
7055
7056 DEST is the destination insn (i.e. the label), INSN is the source.
7057
7058 REVERSED is nonzero if we should reverse the sense of the comparison.
7059
7060 ANNUL is nonzero if we should generate an annulling branch. */
7061
7062 const char *
7063 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
7064 rtx insn)
7065 {
7066 static char string[64];
7067 enum rtx_code code = GET_CODE (op);
7068 rtx cc_reg = XEXP (op, 0);
7069 enum machine_mode mode = GET_MODE (cc_reg);
7070 const char *labelno, *branch;
7071 int spaces = 8, far;
7072 char *p;
7073
7074 /* v9 branches are limited to +-1MB. If it is too far away,
7075 change
7076
7077 bne,pt %xcc, .LC30
7078
7079 to
7080
7081 be,pn %xcc, .+12
7082 nop
7083 ba .LC30
7084
7085 and
7086
7087 fbne,a,pn %fcc2, .LC29
7088
7089 to
7090
7091 fbe,pt %fcc2, .+16
7092 nop
7093 ba .LC29 */
7094
7095 far = TARGET_V9 && (get_attr_length (insn) >= 3);
7096 if (reversed ^ far)
7097 {
7098 /* Reversal of FP compares takes care -- an ordered compare
7099 becomes an unordered compare and vice versa. */
7100 if (mode == CCFPmode || mode == CCFPEmode)
7101 code = reverse_condition_maybe_unordered (code);
7102 else
7103 code = reverse_condition (code);
7104 }
7105
7106 /* Start by writing the branch condition. */
7107 if (mode == CCFPmode || mode == CCFPEmode)
7108 {
7109 switch (code)
7110 {
7111 case NE:
7112 branch = "fbne";
7113 break;
7114 case EQ:
7115 branch = "fbe";
7116 break;
7117 case GE:
7118 branch = "fbge";
7119 break;
7120 case GT:
7121 branch = "fbg";
7122 break;
7123 case LE:
7124 branch = "fble";
7125 break;
7126 case LT:
7127 branch = "fbl";
7128 break;
7129 case UNORDERED:
7130 branch = "fbu";
7131 break;
7132 case ORDERED:
7133 branch = "fbo";
7134 break;
7135 case UNGT:
7136 branch = "fbug";
7137 break;
7138 case UNLT:
7139 branch = "fbul";
7140 break;
7141 case UNEQ:
7142 branch = "fbue";
7143 break;
7144 case UNGE:
7145 branch = "fbuge";
7146 break;
7147 case UNLE:
7148 branch = "fbule";
7149 break;
7150 case LTGT:
7151 branch = "fblg";
7152 break;
7153
7154 default:
7155 gcc_unreachable ();
7156 }
7157
7158 /* ??? !v9: FP branches cannot be preceded by another floating point
7159 insn. Because there is currently no concept of pre-delay slots,
7160 we can fix this only by always emitting a nop before a floating
7161 point branch. */
7162
7163 string[0] = '\0';
7164 if (! TARGET_V9)
7165 strcpy (string, "nop\n\t");
7166 strcat (string, branch);
7167 }
7168 else
7169 {
7170 switch (code)
7171 {
7172 case NE:
7173 branch = "bne";
7174 break;
7175 case EQ:
7176 branch = "be";
7177 break;
7178 case GE:
7179 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7180 branch = "bpos";
7181 else
7182 branch = "bge";
7183 break;
7184 case GT:
7185 branch = "bg";
7186 break;
7187 case LE:
7188 branch = "ble";
7189 break;
7190 case LT:
7191 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7192 branch = "bneg";
7193 else
7194 branch = "bl";
7195 break;
7196 case GEU:
7197 branch = "bgeu";
7198 break;
7199 case GTU:
7200 branch = "bgu";
7201 break;
7202 case LEU:
7203 branch = "bleu";
7204 break;
7205 case LTU:
7206 branch = "blu";
7207 break;
7208
7209 default:
7210 gcc_unreachable ();
7211 }
7212 strcpy (string, branch);
7213 }
7214 spaces -= strlen (branch);
7215 p = strchr (string, '\0');
7216
7217 /* Now add the annulling, the label, and a possible noop. */
7218 if (annul && ! far)
7219 {
7220 strcpy (p, ",a");
7221 p += 2;
7222 spaces -= 2;
7223 }
7224
7225 if (TARGET_V9)
7226 {
7227 rtx note;
7228 int v8 = 0;
7229
7230 if (! far && insn && INSN_ADDRESSES_SET_P ())
7231 {
7232 int delta = (INSN_ADDRESSES (INSN_UID (dest))
7233 - INSN_ADDRESSES (INSN_UID (insn)));
7234 /* Leave some instructions for "slop". */
7235 if (delta < -260000 || delta >= 260000)
7236 v8 = 1;
7237 }
7238
7239 if (mode == CCFPmode || mode == CCFPEmode)
7240 {
7241 static char v9_fcc_labelno[] = "%%fccX, ";
7242 /* Set the char indicating the number of the fcc reg to use. */
7243 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
7244 labelno = v9_fcc_labelno;
7245 if (v8)
7246 {
7247 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
7248 labelno = "";
7249 }
7250 }
7251 else if (mode == CCXmode || mode == CCX_NOOVmode)
7252 {
7253 labelno = "%%xcc, ";
7254 gcc_assert (! v8);
7255 }
7256 else
7257 {
7258 labelno = "%%icc, ";
7259 if (v8)
7260 labelno = "";
7261 }
7262
7263 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
7264 {
7265 strcpy (p,
7266 ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far)
7267 ? ",pt" : ",pn");
7268 p += 3;
7269 spaces -= 3;
7270 }
7271 }
7272 else
7273 labelno = "";
7274
7275 if (spaces > 0)
7276 *p++ = '\t';
7277 else
7278 *p++ = ' ';
7279 strcpy (p, labelno);
7280 p = strchr (p, '\0');
7281 if (far)
7282 {
7283 strcpy (p, ".+12\n\t nop\n\tb\t");
7284 /* Skip the next insn if requested or
7285 if we know that it will be a nop. */
7286 if (annul || ! final_sequence)
7287 p[3] = '6';
7288 p += 14;
7289 }
7290 *p++ = '%';
7291 *p++ = 'l';
7292 *p++ = label + '0';
7293 *p++ = '%';
7294 *p++ = '#';
7295 *p = '\0';
7296
7297 return string;
7298 }
7299
7300 /* Emit a library call comparison between floating point X and Y.
7301 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
7302 Return the new operator to be used in the comparison sequence.
7303
7304 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
7305 values as arguments instead of the TFmode registers themselves,
7306 that's why we cannot call emit_float_lib_cmp. */
7307
7308 rtx
7309 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
7310 {
7311 const char *qpfunc;
7312 rtx slot0, slot1, result, tem, tem2, libfunc;
7313 enum machine_mode mode;
7314 enum rtx_code new_comparison;
7315
7316 switch (comparison)
7317 {
7318 case EQ:
7319 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
7320 break;
7321
7322 case NE:
7323 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
7324 break;
7325
7326 case GT:
7327 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
7328 break;
7329
7330 case GE:
7331 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
7332 break;
7333
7334 case LT:
7335 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
7336 break;
7337
7338 case LE:
7339 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
7340 break;
7341
7342 case ORDERED:
7343 case UNORDERED:
7344 case UNGT:
7345 case UNLT:
7346 case UNEQ:
7347 case UNGE:
7348 case UNLE:
7349 case LTGT:
7350 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
7351 break;
7352
7353 default:
7354 gcc_unreachable ();
7355 }
7356
7357 if (TARGET_ARCH64)
7358 {
7359 if (MEM_P (x))
7360 slot0 = x;
7361 else
7362 {
7363 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode), 0);
7364 emit_move_insn (slot0, x);
7365 }
7366
7367 if (MEM_P (y))
7368 slot1 = y;
7369 else
7370 {
7371 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode), 0);
7372 emit_move_insn (slot1, y);
7373 }
7374
7375 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7376 emit_library_call (libfunc, LCT_NORMAL,
7377 DImode, 2,
7378 XEXP (slot0, 0), Pmode,
7379 XEXP (slot1, 0), Pmode);
7380 mode = DImode;
7381 }
7382 else
7383 {
7384 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7385 emit_library_call (libfunc, LCT_NORMAL,
7386 SImode, 2,
7387 x, TFmode, y, TFmode);
7388 mode = SImode;
7389 }
7390
7391
7392 /* Immediately move the result of the libcall into a pseudo
7393 register so reload doesn't clobber the value if it needs
7394 the return register for a spill reg. */
7395 result = gen_reg_rtx (mode);
7396 emit_move_insn (result, hard_libcall_value (mode, libfunc));
7397
7398 switch (comparison)
7399 {
7400 default:
7401 return gen_rtx_NE (VOIDmode, result, const0_rtx);
7402 case ORDERED:
7403 case UNORDERED:
7404 new_comparison = (comparison == UNORDERED ? EQ : NE);
7405 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
7406 case UNGT:
7407 case UNGE:
7408 new_comparison = (comparison == UNGT ? GT : NE);
7409 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
7410 case UNLE:
7411 return gen_rtx_NE (VOIDmode, result, const2_rtx);
7412 case UNLT:
7413 tem = gen_reg_rtx (mode);
7414 if (TARGET_ARCH32)
7415 emit_insn (gen_andsi3 (tem, result, const1_rtx));
7416 else
7417 emit_insn (gen_anddi3 (tem, result, const1_rtx));
7418 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
7419 case UNEQ:
7420 case LTGT:
7421 tem = gen_reg_rtx (mode);
7422 if (TARGET_ARCH32)
7423 emit_insn (gen_addsi3 (tem, result, const1_rtx));
7424 else
7425 emit_insn (gen_adddi3 (tem, result, const1_rtx));
7426 tem2 = gen_reg_rtx (mode);
7427 if (TARGET_ARCH32)
7428 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
7429 else
7430 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
7431 new_comparison = (comparison == UNEQ ? EQ : NE);
7432 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
7433 }
7434
7435 gcc_unreachable ();
7436 }
7437
7438 /* Generate an unsigned DImode to FP conversion. This is the same code
7439 optabs would emit if we didn't have TFmode patterns. */
7440
7441 void
7442 sparc_emit_floatunsdi (rtx *operands, enum machine_mode mode)
7443 {
7444 rtx neglab, donelab, i0, i1, f0, in, out;
7445
7446 out = operands[0];
7447 in = force_reg (DImode, operands[1]);
7448 neglab = gen_label_rtx ();
7449 donelab = gen_label_rtx ();
7450 i0 = gen_reg_rtx (DImode);
7451 i1 = gen_reg_rtx (DImode);
7452 f0 = gen_reg_rtx (mode);
7453
7454 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
7455
7456 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
7457 emit_jump_insn (gen_jump (donelab));
7458 emit_barrier ();
7459
7460 emit_label (neglab);
7461
7462 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
7463 emit_insn (gen_anddi3 (i1, in, const1_rtx));
7464 emit_insn (gen_iordi3 (i0, i0, i1));
7465 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
7466 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
7467
7468 emit_label (donelab);
7469 }
7470
7471 /* Generate an FP to unsigned DImode conversion. This is the same code
7472 optabs would emit if we didn't have TFmode patterns. */
7473
7474 void
7475 sparc_emit_fixunsdi (rtx *operands, enum machine_mode mode)
7476 {
7477 rtx neglab, donelab, i0, i1, f0, in, out, limit;
7478
7479 out = operands[0];
7480 in = force_reg (mode, operands[1]);
7481 neglab = gen_label_rtx ();
7482 donelab = gen_label_rtx ();
7483 i0 = gen_reg_rtx (DImode);
7484 i1 = gen_reg_rtx (DImode);
7485 limit = gen_reg_rtx (mode);
7486 f0 = gen_reg_rtx (mode);
7487
7488 emit_move_insn (limit,
7489 CONST_DOUBLE_FROM_REAL_VALUE (
7490 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
7491 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
7492
7493 emit_insn (gen_rtx_SET (VOIDmode,
7494 out,
7495 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
7496 emit_jump_insn (gen_jump (donelab));
7497 emit_barrier ();
7498
7499 emit_label (neglab);
7500
7501 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_MINUS (mode, in, limit)));
7502 emit_insn (gen_rtx_SET (VOIDmode,
7503 i0,
7504 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
7505 emit_insn (gen_movdi (i1, const1_rtx));
7506 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
7507 emit_insn (gen_xordi3 (out, i0, i1));
7508
7509 emit_label (donelab);
7510 }
7511
7512 /* Return the string to output a conditional branch to LABEL, testing
7513 register REG. LABEL is the operand number of the label; REG is the
7514 operand number of the reg. OP is the conditional expression. The mode
7515 of REG says what kind of comparison we made.
7516
7517 DEST is the destination insn (i.e. the label), INSN is the source.
7518
7519 REVERSED is nonzero if we should reverse the sense of the comparison.
7520
7521 ANNUL is nonzero if we should generate an annulling branch. */
7522
7523 const char *
7524 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
7525 int annul, rtx insn)
7526 {
7527 static char string[64];
7528 enum rtx_code code = GET_CODE (op);
7529 enum machine_mode mode = GET_MODE (XEXP (op, 0));
7530 rtx note;
7531 int far;
7532 char *p;
7533
7534 /* branch on register are limited to +-128KB. If it is too far away,
7535 change
7536
7537 brnz,pt %g1, .LC30
7538
7539 to
7540
7541 brz,pn %g1, .+12
7542 nop
7543 ba,pt %xcc, .LC30
7544
7545 and
7546
7547 brgez,a,pn %o1, .LC29
7548
7549 to
7550
7551 brlz,pt %o1, .+16
7552 nop
7553 ba,pt %xcc, .LC29 */
7554
7555 far = get_attr_length (insn) >= 3;
7556
7557 /* If not floating-point or if EQ or NE, we can just reverse the code. */
7558 if (reversed ^ far)
7559 code = reverse_condition (code);
7560
7561 /* Only 64 bit versions of these instructions exist. */
7562 gcc_assert (mode == DImode);
7563
7564 /* Start by writing the branch condition. */
7565
7566 switch (code)
7567 {
7568 case NE:
7569 strcpy (string, "brnz");
7570 break;
7571
7572 case EQ:
7573 strcpy (string, "brz");
7574 break;
7575
7576 case GE:
7577 strcpy (string, "brgez");
7578 break;
7579
7580 case LT:
7581 strcpy (string, "brlz");
7582 break;
7583
7584 case LE:
7585 strcpy (string, "brlez");
7586 break;
7587
7588 case GT:
7589 strcpy (string, "brgz");
7590 break;
7591
7592 default:
7593 gcc_unreachable ();
7594 }
7595
7596 p = strchr (string, '\0');
7597
7598 /* Now add the annulling, reg, label, and nop. */
7599 if (annul && ! far)
7600 {
7601 strcpy (p, ",a");
7602 p += 2;
7603 }
7604
7605 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
7606 {
7607 strcpy (p,
7608 ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far)
7609 ? ",pt" : ",pn");
7610 p += 3;
7611 }
7612
7613 *p = p < string + 8 ? '\t' : ' ';
7614 p++;
7615 *p++ = '%';
7616 *p++ = '0' + reg;
7617 *p++ = ',';
7618 *p++ = ' ';
7619 if (far)
7620 {
7621 int veryfar = 1, delta;
7622
7623 if (INSN_ADDRESSES_SET_P ())
7624 {
7625 delta = (INSN_ADDRESSES (INSN_UID (dest))
7626 - INSN_ADDRESSES (INSN_UID (insn)));
7627 /* Leave some instructions for "slop". */
7628 if (delta >= -260000 && delta < 260000)
7629 veryfar = 0;
7630 }
7631
7632 strcpy (p, ".+12\n\t nop\n\t");
7633 /* Skip the next insn if requested or
7634 if we know that it will be a nop. */
7635 if (annul || ! final_sequence)
7636 p[3] = '6';
7637 p += 12;
7638 if (veryfar)
7639 {
7640 strcpy (p, "b\t");
7641 p += 2;
7642 }
7643 else
7644 {
7645 strcpy (p, "ba,pt\t%%xcc, ");
7646 p += 13;
7647 }
7648 }
7649 *p++ = '%';
7650 *p++ = 'l';
7651 *p++ = '0' + label;
7652 *p++ = '%';
7653 *p++ = '#';
7654 *p = '\0';
7655
7656 return string;
7657 }
7658
7659 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
7660 Such instructions cannot be used in the delay slot of return insn on v9.
7661 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
7662 */
7663
7664 static int
7665 epilogue_renumber (register rtx *where, int test)
7666 {
7667 register const char *fmt;
7668 register int i;
7669 register enum rtx_code code;
7670
7671 if (*where == 0)
7672 return 0;
7673
7674 code = GET_CODE (*where);
7675
7676 switch (code)
7677 {
7678 case REG:
7679 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
7680 return 1;
7681 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
7682 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
7683 case SCRATCH:
7684 case CC0:
7685 case PC:
7686 case CONST_INT:
7687 case CONST_DOUBLE:
7688 return 0;
7689
7690 /* Do not replace the frame pointer with the stack pointer because
7691 it can cause the delayed instruction to load below the stack.
7692 This occurs when instructions like:
7693
7694 (set (reg/i:SI 24 %i0)
7695 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
7696 (const_int -20 [0xffffffec])) 0))
7697
7698 are in the return delayed slot. */
7699 case PLUS:
7700 if (GET_CODE (XEXP (*where, 0)) == REG
7701 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
7702 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
7703 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
7704 return 1;
7705 break;
7706
7707 case MEM:
7708 if (SPARC_STACK_BIAS
7709 && GET_CODE (XEXP (*where, 0)) == REG
7710 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
7711 return 1;
7712 break;
7713
7714 default:
7715 break;
7716 }
7717
7718 fmt = GET_RTX_FORMAT (code);
7719
7720 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
7721 {
7722 if (fmt[i] == 'E')
7723 {
7724 register int j;
7725 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
7726 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
7727 return 1;
7728 }
7729 else if (fmt[i] == 'e'
7730 && epilogue_renumber (&(XEXP (*where, i)), test))
7731 return 1;
7732 }
7733 return 0;
7734 }
7735 \f
7736 /* Leaf functions and non-leaf functions have different needs. */
7737
7738 static const int
7739 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
7740
7741 static const int
7742 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
7743
7744 static const int *const reg_alloc_orders[] = {
7745 reg_leaf_alloc_order,
7746 reg_nonleaf_alloc_order};
7747
7748 void
7749 order_regs_for_local_alloc (void)
7750 {
7751 static int last_order_nonleaf = 1;
7752
7753 if (df_regs_ever_live_p (15) != last_order_nonleaf)
7754 {
7755 last_order_nonleaf = !last_order_nonleaf;
7756 memcpy ((char *) reg_alloc_order,
7757 (const char *) reg_alloc_orders[last_order_nonleaf],
7758 FIRST_PSEUDO_REGISTER * sizeof (int));
7759 }
7760 }
7761 \f
7762 /* Return 1 if REG and MEM are legitimate enough to allow the various
7763 mem<-->reg splits to be run. */
7764
7765 int
7766 sparc_splitdi_legitimate (rtx reg, rtx mem)
7767 {
7768 /* Punt if we are here by mistake. */
7769 gcc_assert (reload_completed);
7770
7771 /* We must have an offsettable memory reference. */
7772 if (! offsettable_memref_p (mem))
7773 return 0;
7774
7775 /* If we have legitimate args for ldd/std, we do not want
7776 the split to happen. */
7777 if ((REGNO (reg) % 2) == 0
7778 && mem_min_alignment (mem, 8))
7779 return 0;
7780
7781 /* Success. */
7782 return 1;
7783 }
7784
7785 /* Like sparc_splitdi_legitimate but for REG <--> REG moves. */
7786
7787 int
7788 sparc_split_regreg_legitimate (rtx reg1, rtx reg2)
7789 {
7790 int regno1, regno2;
7791
7792 if (GET_CODE (reg1) == SUBREG)
7793 reg1 = SUBREG_REG (reg1);
7794 if (GET_CODE (reg1) != REG)
7795 return 0;
7796 regno1 = REGNO (reg1);
7797
7798 if (GET_CODE (reg2) == SUBREG)
7799 reg2 = SUBREG_REG (reg2);
7800 if (GET_CODE (reg2) != REG)
7801 return 0;
7802 regno2 = REGNO (reg2);
7803
7804 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
7805 return 1;
7806
7807 if (TARGET_VIS3)
7808 {
7809 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
7810 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
7811 return 1;
7812 }
7813
7814 return 0;
7815 }
7816
7817 /* Return 1 if x and y are some kind of REG and they refer to
7818 different hard registers. This test is guaranteed to be
7819 run after reload. */
7820
7821 int
7822 sparc_absnegfloat_split_legitimate (rtx x, rtx y)
7823 {
7824 if (GET_CODE (x) != REG)
7825 return 0;
7826 if (GET_CODE (y) != REG)
7827 return 0;
7828 if (REGNO (x) == REGNO (y))
7829 return 0;
7830 return 1;
7831 }
7832
7833 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
7834 This makes them candidates for using ldd and std insns.
7835
7836 Note reg1 and reg2 *must* be hard registers. */
7837
7838 int
7839 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
7840 {
7841 /* We might have been passed a SUBREG. */
7842 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
7843 return 0;
7844
7845 if (REGNO (reg1) % 2 != 0)
7846 return 0;
7847
7848 /* Integer ldd is deprecated in SPARC V9 */
7849 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
7850 return 0;
7851
7852 return (REGNO (reg1) == REGNO (reg2) - 1);
7853 }
7854
7855 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
7856 an ldd or std insn.
7857
7858 This can only happen when addr1 and addr2, the addresses in mem1
7859 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
7860 addr1 must also be aligned on a 64-bit boundary.
7861
7862 Also iff dependent_reg_rtx is not null it should not be used to
7863 compute the address for mem1, i.e. we cannot optimize a sequence
7864 like:
7865 ld [%o0], %o0
7866 ld [%o0 + 4], %o1
7867 to
7868 ldd [%o0], %o0
7869 nor:
7870 ld [%g3 + 4], %g3
7871 ld [%g3], %g2
7872 to
7873 ldd [%g3], %g2
7874
7875 But, note that the transformation from:
7876 ld [%g2 + 4], %g3
7877 ld [%g2], %g2
7878 to
7879 ldd [%g2], %g2
7880 is perfectly fine. Thus, the peephole2 patterns always pass us
7881 the destination register of the first load, never the second one.
7882
7883 For stores we don't have a similar problem, so dependent_reg_rtx is
7884 NULL_RTX. */
7885
7886 int
7887 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
7888 {
7889 rtx addr1, addr2;
7890 unsigned int reg1;
7891 HOST_WIDE_INT offset1;
7892
7893 /* The mems cannot be volatile. */
7894 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
7895 return 0;
7896
7897 /* MEM1 should be aligned on a 64-bit boundary. */
7898 if (MEM_ALIGN (mem1) < 64)
7899 return 0;
7900
7901 addr1 = XEXP (mem1, 0);
7902 addr2 = XEXP (mem2, 0);
7903
7904 /* Extract a register number and offset (if used) from the first addr. */
7905 if (GET_CODE (addr1) == PLUS)
7906 {
7907 /* If not a REG, return zero. */
7908 if (GET_CODE (XEXP (addr1, 0)) != REG)
7909 return 0;
7910 else
7911 {
7912 reg1 = REGNO (XEXP (addr1, 0));
7913 /* The offset must be constant! */
7914 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
7915 return 0;
7916 offset1 = INTVAL (XEXP (addr1, 1));
7917 }
7918 }
7919 else if (GET_CODE (addr1) != REG)
7920 return 0;
7921 else
7922 {
7923 reg1 = REGNO (addr1);
7924 /* This was a simple (mem (reg)) expression. Offset is 0. */
7925 offset1 = 0;
7926 }
7927
7928 /* Make sure the second address is a (mem (plus (reg) (const_int). */
7929 if (GET_CODE (addr2) != PLUS)
7930 return 0;
7931
7932 if (GET_CODE (XEXP (addr2, 0)) != REG
7933 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
7934 return 0;
7935
7936 if (reg1 != REGNO (XEXP (addr2, 0)))
7937 return 0;
7938
7939 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
7940 return 0;
7941
7942 /* The first offset must be evenly divisible by 8 to ensure the
7943 address is 64 bit aligned. */
7944 if (offset1 % 8 != 0)
7945 return 0;
7946
7947 /* The offset for the second addr must be 4 more than the first addr. */
7948 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
7949 return 0;
7950
7951 /* All the tests passed. addr1 and addr2 are valid for ldd and std
7952 instructions. */
7953 return 1;
7954 }
7955
7956 /* Return 1 if reg is a pseudo, or is the first register in
7957 a hard register pair. This makes it suitable for use in
7958 ldd and std insns. */
7959
7960 int
7961 register_ok_for_ldd (rtx reg)
7962 {
7963 /* We might have been passed a SUBREG. */
7964 if (!REG_P (reg))
7965 return 0;
7966
7967 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
7968 return (REGNO (reg) % 2 == 0);
7969
7970 return 1;
7971 }
7972
7973 /* Return 1 if OP is a memory whose address is known to be
7974 aligned to 8-byte boundary, or a pseudo during reload.
7975 This makes it suitable for use in ldd and std insns. */
7976
7977 int
7978 memory_ok_for_ldd (rtx op)
7979 {
7980 if (MEM_P (op))
7981 {
7982 /* In 64-bit mode, we assume that the address is word-aligned. */
7983 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
7984 return 0;
7985
7986 if (! can_create_pseudo_p ()
7987 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
7988 return 0;
7989 }
7990 else if (REG_P (op) && REGNO (op) >= FIRST_PSEUDO_REGISTER)
7991 {
7992 if (!(reload_in_progress && reg_renumber [REGNO (op)] < 0))
7993 return 0;
7994 }
7995 else
7996 return 0;
7997
7998 return 1;
7999 }
8000 \f
8001 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
8002
8003 static bool
8004 sparc_print_operand_punct_valid_p (unsigned char code)
8005 {
8006 if (code == '#'
8007 || code == '*'
8008 || code == '('
8009 || code == ')'
8010 || code == '_'
8011 || code == '&')
8012 return true;
8013
8014 return false;
8015 }
8016
8017 /* Implement TARGET_PRINT_OPERAND.
8018 Print operand X (an rtx) in assembler syntax to file FILE.
8019 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
8020 For `%' followed by punctuation, CODE is the punctuation and X is null. */
8021
8022 static void
8023 sparc_print_operand (FILE *file, rtx x, int code)
8024 {
8025 switch (code)
8026 {
8027 case '#':
8028 /* Output an insn in a delay slot. */
8029 if (final_sequence)
8030 sparc_indent_opcode = 1;
8031 else
8032 fputs ("\n\t nop", file);
8033 return;
8034 case '*':
8035 /* Output an annul flag if there's nothing for the delay slot and we
8036 are optimizing. This is always used with '(' below.
8037 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
8038 this is a dbx bug. So, we only do this when optimizing.
8039 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
8040 Always emit a nop in case the next instruction is a branch. */
8041 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
8042 fputs (",a", file);
8043 return;
8044 case '(':
8045 /* Output a 'nop' if there's nothing for the delay slot and we are
8046 not optimizing. This is always used with '*' above. */
8047 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
8048 fputs ("\n\t nop", file);
8049 else if (final_sequence)
8050 sparc_indent_opcode = 1;
8051 return;
8052 case ')':
8053 /* Output the right displacement from the saved PC on function return.
8054 The caller may have placed an "unimp" insn immediately after the call
8055 so we have to account for it. This insn is used in the 32-bit ABI
8056 when calling a function that returns a non zero-sized structure. The
8057 64-bit ABI doesn't have it. Be careful to have this test be the same
8058 as that for the call. The exception is when sparc_std_struct_return
8059 is enabled, the psABI is followed exactly and the adjustment is made
8060 by the code in sparc_struct_value_rtx. The call emitted is the same
8061 when sparc_std_struct_return is enabled. */
8062 if (!TARGET_ARCH64
8063 && cfun->returns_struct
8064 && !sparc_std_struct_return
8065 && DECL_SIZE (DECL_RESULT (current_function_decl))
8066 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
8067 == INTEGER_CST
8068 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
8069 fputs ("12", file);
8070 else
8071 fputc ('8', file);
8072 return;
8073 case '_':
8074 /* Output the Embedded Medium/Anywhere code model base register. */
8075 fputs (EMBMEDANY_BASE_REG, file);
8076 return;
8077 case '&':
8078 /* Print some local dynamic TLS name. */
8079 assemble_name (file, get_some_local_dynamic_name ());
8080 return;
8081
8082 case 'Y':
8083 /* Adjust the operand to take into account a RESTORE operation. */
8084 if (GET_CODE (x) == CONST_INT)
8085 break;
8086 else if (GET_CODE (x) != REG)
8087 output_operand_lossage ("invalid %%Y operand");
8088 else if (REGNO (x) < 8)
8089 fputs (reg_names[REGNO (x)], file);
8090 else if (REGNO (x) >= 24 && REGNO (x) < 32)
8091 fputs (reg_names[REGNO (x)-16], file);
8092 else
8093 output_operand_lossage ("invalid %%Y operand");
8094 return;
8095 case 'L':
8096 /* Print out the low order register name of a register pair. */
8097 if (WORDS_BIG_ENDIAN)
8098 fputs (reg_names[REGNO (x)+1], file);
8099 else
8100 fputs (reg_names[REGNO (x)], file);
8101 return;
8102 case 'H':
8103 /* Print out the high order register name of a register pair. */
8104 if (WORDS_BIG_ENDIAN)
8105 fputs (reg_names[REGNO (x)], file);
8106 else
8107 fputs (reg_names[REGNO (x)+1], file);
8108 return;
8109 case 'R':
8110 /* Print out the second register name of a register pair or quad.
8111 I.e., R (%o0) => %o1. */
8112 fputs (reg_names[REGNO (x)+1], file);
8113 return;
8114 case 'S':
8115 /* Print out the third register name of a register quad.
8116 I.e., S (%o0) => %o2. */
8117 fputs (reg_names[REGNO (x)+2], file);
8118 return;
8119 case 'T':
8120 /* Print out the fourth register name of a register quad.
8121 I.e., T (%o0) => %o3. */
8122 fputs (reg_names[REGNO (x)+3], file);
8123 return;
8124 case 'x':
8125 /* Print a condition code register. */
8126 if (REGNO (x) == SPARC_ICC_REG)
8127 {
8128 /* We don't handle CC[X]_NOOVmode because they're not supposed
8129 to occur here. */
8130 if (GET_MODE (x) == CCmode)
8131 fputs ("%icc", file);
8132 else if (GET_MODE (x) == CCXmode)
8133 fputs ("%xcc", file);
8134 else
8135 gcc_unreachable ();
8136 }
8137 else
8138 /* %fccN register */
8139 fputs (reg_names[REGNO (x)], file);
8140 return;
8141 case 'm':
8142 /* Print the operand's address only. */
8143 output_address (XEXP (x, 0));
8144 return;
8145 case 'r':
8146 /* In this case we need a register. Use %g0 if the
8147 operand is const0_rtx. */
8148 if (x == const0_rtx
8149 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
8150 {
8151 fputs ("%g0", file);
8152 return;
8153 }
8154 else
8155 break;
8156
8157 case 'A':
8158 switch (GET_CODE (x))
8159 {
8160 case IOR: fputs ("or", file); break;
8161 case AND: fputs ("and", file); break;
8162 case XOR: fputs ("xor", file); break;
8163 default: output_operand_lossage ("invalid %%A operand");
8164 }
8165 return;
8166
8167 case 'B':
8168 switch (GET_CODE (x))
8169 {
8170 case IOR: fputs ("orn", file); break;
8171 case AND: fputs ("andn", file); break;
8172 case XOR: fputs ("xnor", file); break;
8173 default: output_operand_lossage ("invalid %%B operand");
8174 }
8175 return;
8176
8177 /* This is used by the conditional move instructions. */
8178 case 'C':
8179 {
8180 enum rtx_code rc = GET_CODE (x);
8181
8182 switch (rc)
8183 {
8184 case NE: fputs ("ne", file); break;
8185 case EQ: fputs ("e", file); break;
8186 case GE: fputs ("ge", file); break;
8187 case GT: fputs ("g", file); break;
8188 case LE: fputs ("le", file); break;
8189 case LT: fputs ("l", file); break;
8190 case GEU: fputs ("geu", file); break;
8191 case GTU: fputs ("gu", file); break;
8192 case LEU: fputs ("leu", file); break;
8193 case LTU: fputs ("lu", file); break;
8194 case LTGT: fputs ("lg", file); break;
8195 case UNORDERED: fputs ("u", file); break;
8196 case ORDERED: fputs ("o", file); break;
8197 case UNLT: fputs ("ul", file); break;
8198 case UNLE: fputs ("ule", file); break;
8199 case UNGT: fputs ("ug", file); break;
8200 case UNGE: fputs ("uge", file); break;
8201 case UNEQ: fputs ("ue", file); break;
8202 default: output_operand_lossage ("invalid %%C operand");
8203 }
8204 return;
8205 }
8206
8207 /* This are used by the movr instruction pattern. */
8208 case 'D':
8209 {
8210 enum rtx_code rc = GET_CODE (x);
8211 switch (rc)
8212 {
8213 case NE: fputs ("ne", file); break;
8214 case EQ: fputs ("e", file); break;
8215 case GE: fputs ("gez", file); break;
8216 case LT: fputs ("lz", file); break;
8217 case LE: fputs ("lez", file); break;
8218 case GT: fputs ("gz", file); break;
8219 default: output_operand_lossage ("invalid %%D operand");
8220 }
8221 return;
8222 }
8223
8224 case 'b':
8225 {
8226 /* Print a sign-extended character. */
8227 int i = trunc_int_for_mode (INTVAL (x), QImode);
8228 fprintf (file, "%d", i);
8229 return;
8230 }
8231
8232 case 'f':
8233 /* Operand must be a MEM; write its address. */
8234 if (GET_CODE (x) != MEM)
8235 output_operand_lossage ("invalid %%f operand");
8236 output_address (XEXP (x, 0));
8237 return;
8238
8239 case 's':
8240 {
8241 /* Print a sign-extended 32-bit value. */
8242 HOST_WIDE_INT i;
8243 if (GET_CODE(x) == CONST_INT)
8244 i = INTVAL (x);
8245 else if (GET_CODE(x) == CONST_DOUBLE)
8246 i = CONST_DOUBLE_LOW (x);
8247 else
8248 {
8249 output_operand_lossage ("invalid %%s operand");
8250 return;
8251 }
8252 i = trunc_int_for_mode (i, SImode);
8253 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
8254 return;
8255 }
8256
8257 case 0:
8258 /* Do nothing special. */
8259 break;
8260
8261 default:
8262 /* Undocumented flag. */
8263 output_operand_lossage ("invalid operand output code");
8264 }
8265
8266 if (GET_CODE (x) == REG)
8267 fputs (reg_names[REGNO (x)], file);
8268 else if (GET_CODE (x) == MEM)
8269 {
8270 fputc ('[', file);
8271 /* Poor Sun assembler doesn't understand absolute addressing. */
8272 if (CONSTANT_P (XEXP (x, 0)))
8273 fputs ("%g0+", file);
8274 output_address (XEXP (x, 0));
8275 fputc (']', file);
8276 }
8277 else if (GET_CODE (x) == HIGH)
8278 {
8279 fputs ("%hi(", file);
8280 output_addr_const (file, XEXP (x, 0));
8281 fputc (')', file);
8282 }
8283 else if (GET_CODE (x) == LO_SUM)
8284 {
8285 sparc_print_operand (file, XEXP (x, 0), 0);
8286 if (TARGET_CM_MEDMID)
8287 fputs ("+%l44(", file);
8288 else
8289 fputs ("+%lo(", file);
8290 output_addr_const (file, XEXP (x, 1));
8291 fputc (')', file);
8292 }
8293 else if (GET_CODE (x) == CONST_DOUBLE
8294 && (GET_MODE (x) == VOIDmode
8295 || GET_MODE_CLASS (GET_MODE (x)) == MODE_INT))
8296 {
8297 if (CONST_DOUBLE_HIGH (x) == 0)
8298 fprintf (file, "%u", (unsigned int) CONST_DOUBLE_LOW (x));
8299 else if (CONST_DOUBLE_HIGH (x) == -1
8300 && CONST_DOUBLE_LOW (x) < 0)
8301 fprintf (file, "%d", (int) CONST_DOUBLE_LOW (x));
8302 else
8303 output_operand_lossage ("long long constant not a valid immediate operand");
8304 }
8305 else if (GET_CODE (x) == CONST_DOUBLE)
8306 output_operand_lossage ("floating point constant not a valid immediate operand");
8307 else { output_addr_const (file, x); }
8308 }
8309
8310 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
8311
8312 static void
8313 sparc_print_operand_address (FILE *file, rtx x)
8314 {
8315 register rtx base, index = 0;
8316 int offset = 0;
8317 register rtx addr = x;
8318
8319 if (REG_P (addr))
8320 fputs (reg_names[REGNO (addr)], file);
8321 else if (GET_CODE (addr) == PLUS)
8322 {
8323 if (CONST_INT_P (XEXP (addr, 0)))
8324 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
8325 else if (CONST_INT_P (XEXP (addr, 1)))
8326 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
8327 else
8328 base = XEXP (addr, 0), index = XEXP (addr, 1);
8329 if (GET_CODE (base) == LO_SUM)
8330 {
8331 gcc_assert (USE_AS_OFFSETABLE_LO10
8332 && TARGET_ARCH64
8333 && ! TARGET_CM_MEDMID);
8334 output_operand (XEXP (base, 0), 0);
8335 fputs ("+%lo(", file);
8336 output_address (XEXP (base, 1));
8337 fprintf (file, ")+%d", offset);
8338 }
8339 else
8340 {
8341 fputs (reg_names[REGNO (base)], file);
8342 if (index == 0)
8343 fprintf (file, "%+d", offset);
8344 else if (REG_P (index))
8345 fprintf (file, "+%s", reg_names[REGNO (index)]);
8346 else if (GET_CODE (index) == SYMBOL_REF
8347 || GET_CODE (index) == LABEL_REF
8348 || GET_CODE (index) == CONST)
8349 fputc ('+', file), output_addr_const (file, index);
8350 else gcc_unreachable ();
8351 }
8352 }
8353 else if (GET_CODE (addr) == MINUS
8354 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
8355 {
8356 output_addr_const (file, XEXP (addr, 0));
8357 fputs ("-(", file);
8358 output_addr_const (file, XEXP (addr, 1));
8359 fputs ("-.)", file);
8360 }
8361 else if (GET_CODE (addr) == LO_SUM)
8362 {
8363 output_operand (XEXP (addr, 0), 0);
8364 if (TARGET_CM_MEDMID)
8365 fputs ("+%l44(", file);
8366 else
8367 fputs ("+%lo(", file);
8368 output_address (XEXP (addr, 1));
8369 fputc (')', file);
8370 }
8371 else if (flag_pic
8372 && GET_CODE (addr) == CONST
8373 && GET_CODE (XEXP (addr, 0)) == MINUS
8374 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
8375 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
8376 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
8377 {
8378 addr = XEXP (addr, 0);
8379 output_addr_const (file, XEXP (addr, 0));
8380 /* Group the args of the second CONST in parenthesis. */
8381 fputs ("-(", file);
8382 /* Skip past the second CONST--it does nothing for us. */
8383 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
8384 /* Close the parenthesis. */
8385 fputc (')', file);
8386 }
8387 else
8388 {
8389 output_addr_const (file, addr);
8390 }
8391 }
8392 \f
8393 /* Target hook for assembling integer objects. The sparc version has
8394 special handling for aligned DI-mode objects. */
8395
8396 static bool
8397 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
8398 {
8399 /* ??? We only output .xword's for symbols and only then in environments
8400 where the assembler can handle them. */
8401 if (aligned_p && size == 8
8402 && (GET_CODE (x) != CONST_INT && GET_CODE (x) != CONST_DOUBLE))
8403 {
8404 if (TARGET_V9)
8405 {
8406 assemble_integer_with_op ("\t.xword\t", x);
8407 return true;
8408 }
8409 else
8410 {
8411 assemble_aligned_integer (4, const0_rtx);
8412 assemble_aligned_integer (4, x);
8413 return true;
8414 }
8415 }
8416 return default_assemble_integer (x, size, aligned_p);
8417 }
8418 \f
8419 /* Return the value of a code used in the .proc pseudo-op that says
8420 what kind of result this function returns. For non-C types, we pick
8421 the closest C type. */
8422
8423 #ifndef SHORT_TYPE_SIZE
8424 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
8425 #endif
8426
8427 #ifndef INT_TYPE_SIZE
8428 #define INT_TYPE_SIZE BITS_PER_WORD
8429 #endif
8430
8431 #ifndef LONG_TYPE_SIZE
8432 #define LONG_TYPE_SIZE BITS_PER_WORD
8433 #endif
8434
8435 #ifndef LONG_LONG_TYPE_SIZE
8436 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
8437 #endif
8438
8439 #ifndef FLOAT_TYPE_SIZE
8440 #define FLOAT_TYPE_SIZE BITS_PER_WORD
8441 #endif
8442
8443 #ifndef DOUBLE_TYPE_SIZE
8444 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
8445 #endif
8446
8447 #ifndef LONG_DOUBLE_TYPE_SIZE
8448 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
8449 #endif
8450
8451 unsigned long
8452 sparc_type_code (register tree type)
8453 {
8454 register unsigned long qualifiers = 0;
8455 register unsigned shift;
8456
8457 /* Only the first 30 bits of the qualifier are valid. We must refrain from
8458 setting more, since some assemblers will give an error for this. Also,
8459 we must be careful to avoid shifts of 32 bits or more to avoid getting
8460 unpredictable results. */
8461
8462 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
8463 {
8464 switch (TREE_CODE (type))
8465 {
8466 case ERROR_MARK:
8467 return qualifiers;
8468
8469 case ARRAY_TYPE:
8470 qualifiers |= (3 << shift);
8471 break;
8472
8473 case FUNCTION_TYPE:
8474 case METHOD_TYPE:
8475 qualifiers |= (2 << shift);
8476 break;
8477
8478 case POINTER_TYPE:
8479 case REFERENCE_TYPE:
8480 case OFFSET_TYPE:
8481 qualifiers |= (1 << shift);
8482 break;
8483
8484 case RECORD_TYPE:
8485 return (qualifiers | 8);
8486
8487 case UNION_TYPE:
8488 case QUAL_UNION_TYPE:
8489 return (qualifiers | 9);
8490
8491 case ENUMERAL_TYPE:
8492 return (qualifiers | 10);
8493
8494 case VOID_TYPE:
8495 return (qualifiers | 16);
8496
8497 case INTEGER_TYPE:
8498 /* If this is a range type, consider it to be the underlying
8499 type. */
8500 if (TREE_TYPE (type) != 0)
8501 break;
8502
8503 /* Carefully distinguish all the standard types of C,
8504 without messing up if the language is not C. We do this by
8505 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
8506 look at both the names and the above fields, but that's redundant.
8507 Any type whose size is between two C types will be considered
8508 to be the wider of the two types. Also, we do not have a
8509 special code to use for "long long", so anything wider than
8510 long is treated the same. Note that we can't distinguish
8511 between "int" and "long" in this code if they are the same
8512 size, but that's fine, since neither can the assembler. */
8513
8514 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
8515 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
8516
8517 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
8518 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
8519
8520 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
8521 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
8522
8523 else
8524 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
8525
8526 case REAL_TYPE:
8527 /* If this is a range type, consider it to be the underlying
8528 type. */
8529 if (TREE_TYPE (type) != 0)
8530 break;
8531
8532 /* Carefully distinguish all the standard types of C,
8533 without messing up if the language is not C. */
8534
8535 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
8536 return (qualifiers | 6);
8537
8538 else
8539 return (qualifiers | 7);
8540
8541 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
8542 /* ??? We need to distinguish between double and float complex types,
8543 but I don't know how yet because I can't reach this code from
8544 existing front-ends. */
8545 return (qualifiers | 7); /* Who knows? */
8546
8547 case VECTOR_TYPE:
8548 case BOOLEAN_TYPE: /* Boolean truth value type. */
8549 case LANG_TYPE:
8550 case NULLPTR_TYPE:
8551 return qualifiers;
8552
8553 default:
8554 gcc_unreachable (); /* Not a type! */
8555 }
8556 }
8557
8558 return qualifiers;
8559 }
8560 \f
8561 /* Nested function support. */
8562
8563 /* Emit RTL insns to initialize the variable parts of a trampoline.
8564 FNADDR is an RTX for the address of the function's pure code.
8565 CXT is an RTX for the static chain value for the function.
8566
8567 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
8568 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
8569 (to store insns). This is a bit excessive. Perhaps a different
8570 mechanism would be better here.
8571
8572 Emit enough FLUSH insns to synchronize the data and instruction caches. */
8573
8574 static void
8575 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
8576 {
8577 /* SPARC 32-bit trampoline:
8578
8579 sethi %hi(fn), %g1
8580 sethi %hi(static), %g2
8581 jmp %g1+%lo(fn)
8582 or %g2, %lo(static), %g2
8583
8584 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
8585 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
8586 */
8587
8588 emit_move_insn
8589 (adjust_address (m_tramp, SImode, 0),
8590 expand_binop (SImode, ior_optab,
8591 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
8592 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
8593 NULL_RTX, 1, OPTAB_DIRECT));
8594
8595 emit_move_insn
8596 (adjust_address (m_tramp, SImode, 4),
8597 expand_binop (SImode, ior_optab,
8598 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
8599 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
8600 NULL_RTX, 1, OPTAB_DIRECT));
8601
8602 emit_move_insn
8603 (adjust_address (m_tramp, SImode, 8),
8604 expand_binop (SImode, ior_optab,
8605 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
8606 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
8607 NULL_RTX, 1, OPTAB_DIRECT));
8608
8609 emit_move_insn
8610 (adjust_address (m_tramp, SImode, 12),
8611 expand_binop (SImode, ior_optab,
8612 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
8613 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
8614 NULL_RTX, 1, OPTAB_DIRECT));
8615
8616 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
8617 aligned on a 16 byte boundary so one flush clears it all. */
8618 emit_insn (gen_flush (validize_mem (adjust_address (m_tramp, SImode, 0))));
8619 if (sparc_cpu != PROCESSOR_ULTRASPARC
8620 && sparc_cpu != PROCESSOR_ULTRASPARC3
8621 && sparc_cpu != PROCESSOR_NIAGARA
8622 && sparc_cpu != PROCESSOR_NIAGARA2
8623 && sparc_cpu != PROCESSOR_NIAGARA3
8624 && sparc_cpu != PROCESSOR_NIAGARA4)
8625 emit_insn (gen_flush (validize_mem (adjust_address (m_tramp, SImode, 8))));
8626
8627 /* Call __enable_execute_stack after writing onto the stack to make sure
8628 the stack address is accessible. */
8629 #ifdef HAVE_ENABLE_EXECUTE_STACK
8630 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
8631 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
8632 #endif
8633
8634 }
8635
8636 /* The 64-bit version is simpler because it makes more sense to load the
8637 values as "immediate" data out of the trampoline. It's also easier since
8638 we can read the PC without clobbering a register. */
8639
8640 static void
8641 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
8642 {
8643 /* SPARC 64-bit trampoline:
8644
8645 rd %pc, %g1
8646 ldx [%g1+24], %g5
8647 jmp %g5
8648 ldx [%g1+16], %g5
8649 +16 bytes data
8650 */
8651
8652 emit_move_insn (adjust_address (m_tramp, SImode, 0),
8653 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
8654 emit_move_insn (adjust_address (m_tramp, SImode, 4),
8655 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
8656 emit_move_insn (adjust_address (m_tramp, SImode, 8),
8657 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
8658 emit_move_insn (adjust_address (m_tramp, SImode, 12),
8659 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
8660 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
8661 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
8662 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
8663
8664 if (sparc_cpu != PROCESSOR_ULTRASPARC
8665 && sparc_cpu != PROCESSOR_ULTRASPARC3
8666 && sparc_cpu != PROCESSOR_NIAGARA
8667 && sparc_cpu != PROCESSOR_NIAGARA2
8668 && sparc_cpu != PROCESSOR_NIAGARA3
8669 && sparc_cpu != PROCESSOR_NIAGARA4)
8670 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
8671
8672 /* Call __enable_execute_stack after writing onto the stack to make sure
8673 the stack address is accessible. */
8674 #ifdef HAVE_ENABLE_EXECUTE_STACK
8675 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
8676 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
8677 #endif
8678 }
8679
8680 /* Worker for TARGET_TRAMPOLINE_INIT. */
8681
8682 static void
8683 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
8684 {
8685 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
8686 cxt = force_reg (Pmode, cxt);
8687 if (TARGET_ARCH64)
8688 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
8689 else
8690 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
8691 }
8692 \f
8693 /* Adjust the cost of a scheduling dependency. Return the new cost of
8694 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
8695
8696 static int
8697 supersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
8698 {
8699 enum attr_type insn_type;
8700
8701 if (! recog_memoized (insn))
8702 return 0;
8703
8704 insn_type = get_attr_type (insn);
8705
8706 if (REG_NOTE_KIND (link) == 0)
8707 {
8708 /* Data dependency; DEP_INSN writes a register that INSN reads some
8709 cycles later. */
8710
8711 /* if a load, then the dependence must be on the memory address;
8712 add an extra "cycle". Note that the cost could be two cycles
8713 if the reg was written late in an instruction group; we ca not tell
8714 here. */
8715 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
8716 return cost + 3;
8717
8718 /* Get the delay only if the address of the store is the dependence. */
8719 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
8720 {
8721 rtx pat = PATTERN(insn);
8722 rtx dep_pat = PATTERN (dep_insn);
8723
8724 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
8725 return cost; /* This should not happen! */
8726
8727 /* The dependency between the two instructions was on the data that
8728 is being stored. Assume that this implies that the address of the
8729 store is not dependent. */
8730 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
8731 return cost;
8732
8733 return cost + 3; /* An approximation. */
8734 }
8735
8736 /* A shift instruction cannot receive its data from an instruction
8737 in the same cycle; add a one cycle penalty. */
8738 if (insn_type == TYPE_SHIFT)
8739 return cost + 3; /* Split before cascade into shift. */
8740 }
8741 else
8742 {
8743 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
8744 INSN writes some cycles later. */
8745
8746 /* These are only significant for the fpu unit; writing a fp reg before
8747 the fpu has finished with it stalls the processor. */
8748
8749 /* Reusing an integer register causes no problems. */
8750 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
8751 return 0;
8752 }
8753
8754 return cost;
8755 }
8756
8757 static int
8758 hypersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
8759 {
8760 enum attr_type insn_type, dep_type;
8761 rtx pat = PATTERN(insn);
8762 rtx dep_pat = PATTERN (dep_insn);
8763
8764 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
8765 return cost;
8766
8767 insn_type = get_attr_type (insn);
8768 dep_type = get_attr_type (dep_insn);
8769
8770 switch (REG_NOTE_KIND (link))
8771 {
8772 case 0:
8773 /* Data dependency; DEP_INSN writes a register that INSN reads some
8774 cycles later. */
8775
8776 switch (insn_type)
8777 {
8778 case TYPE_STORE:
8779 case TYPE_FPSTORE:
8780 /* Get the delay iff the address of the store is the dependence. */
8781 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
8782 return cost;
8783
8784 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
8785 return cost;
8786 return cost + 3;
8787
8788 case TYPE_LOAD:
8789 case TYPE_SLOAD:
8790 case TYPE_FPLOAD:
8791 /* If a load, then the dependence must be on the memory address. If
8792 the addresses aren't equal, then it might be a false dependency */
8793 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
8794 {
8795 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
8796 || GET_CODE (SET_DEST (dep_pat)) != MEM
8797 || GET_CODE (SET_SRC (pat)) != MEM
8798 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
8799 XEXP (SET_SRC (pat), 0)))
8800 return cost + 2;
8801
8802 return cost + 8;
8803 }
8804 break;
8805
8806 case TYPE_BRANCH:
8807 /* Compare to branch latency is 0. There is no benefit from
8808 separating compare and branch. */
8809 if (dep_type == TYPE_COMPARE)
8810 return 0;
8811 /* Floating point compare to branch latency is less than
8812 compare to conditional move. */
8813 if (dep_type == TYPE_FPCMP)
8814 return cost - 1;
8815 break;
8816 default:
8817 break;
8818 }
8819 break;
8820
8821 case REG_DEP_ANTI:
8822 /* Anti-dependencies only penalize the fpu unit. */
8823 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
8824 return 0;
8825 break;
8826
8827 default:
8828 break;
8829 }
8830
8831 return cost;
8832 }
8833
8834 static int
8835 sparc_adjust_cost(rtx insn, rtx link, rtx dep, int cost)
8836 {
8837 switch (sparc_cpu)
8838 {
8839 case PROCESSOR_SUPERSPARC:
8840 cost = supersparc_adjust_cost (insn, link, dep, cost);
8841 break;
8842 case PROCESSOR_HYPERSPARC:
8843 case PROCESSOR_SPARCLITE86X:
8844 cost = hypersparc_adjust_cost (insn, link, dep, cost);
8845 break;
8846 default:
8847 break;
8848 }
8849 return cost;
8850 }
8851
8852 static void
8853 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
8854 int sched_verbose ATTRIBUTE_UNUSED,
8855 int max_ready ATTRIBUTE_UNUSED)
8856 {}
8857
8858 static int
8859 sparc_use_sched_lookahead (void)
8860 {
8861 if (sparc_cpu == PROCESSOR_NIAGARA
8862 || sparc_cpu == PROCESSOR_NIAGARA2
8863 || sparc_cpu == PROCESSOR_NIAGARA3
8864 || sparc_cpu == PROCESSOR_NIAGARA4)
8865 return 0;
8866 if (sparc_cpu == PROCESSOR_ULTRASPARC
8867 || sparc_cpu == PROCESSOR_ULTRASPARC3)
8868 return 4;
8869 if ((1 << sparc_cpu) &
8870 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
8871 (1 << PROCESSOR_SPARCLITE86X)))
8872 return 3;
8873 return 0;
8874 }
8875
8876 static int
8877 sparc_issue_rate (void)
8878 {
8879 switch (sparc_cpu)
8880 {
8881 case PROCESSOR_NIAGARA:
8882 case PROCESSOR_NIAGARA2:
8883 case PROCESSOR_NIAGARA3:
8884 case PROCESSOR_NIAGARA4:
8885 default:
8886 return 1;
8887 case PROCESSOR_V9:
8888 /* Assume V9 processors are capable of at least dual-issue. */
8889 return 2;
8890 case PROCESSOR_SUPERSPARC:
8891 return 3;
8892 case PROCESSOR_HYPERSPARC:
8893 case PROCESSOR_SPARCLITE86X:
8894 return 2;
8895 case PROCESSOR_ULTRASPARC:
8896 case PROCESSOR_ULTRASPARC3:
8897 return 4;
8898 }
8899 }
8900
8901 static int
8902 set_extends (rtx insn)
8903 {
8904 register rtx pat = PATTERN (insn);
8905
8906 switch (GET_CODE (SET_SRC (pat)))
8907 {
8908 /* Load and some shift instructions zero extend. */
8909 case MEM:
8910 case ZERO_EXTEND:
8911 /* sethi clears the high bits */
8912 case HIGH:
8913 /* LO_SUM is used with sethi. sethi cleared the high
8914 bits and the values used with lo_sum are positive */
8915 case LO_SUM:
8916 /* Store flag stores 0 or 1 */
8917 case LT: case LTU:
8918 case GT: case GTU:
8919 case LE: case LEU:
8920 case GE: case GEU:
8921 case EQ:
8922 case NE:
8923 return 1;
8924 case AND:
8925 {
8926 rtx op0 = XEXP (SET_SRC (pat), 0);
8927 rtx op1 = XEXP (SET_SRC (pat), 1);
8928 if (GET_CODE (op1) == CONST_INT)
8929 return INTVAL (op1) >= 0;
8930 if (GET_CODE (op0) != REG)
8931 return 0;
8932 if (sparc_check_64 (op0, insn) == 1)
8933 return 1;
8934 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
8935 }
8936 case IOR:
8937 case XOR:
8938 {
8939 rtx op0 = XEXP (SET_SRC (pat), 0);
8940 rtx op1 = XEXP (SET_SRC (pat), 1);
8941 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
8942 return 0;
8943 if (GET_CODE (op1) == CONST_INT)
8944 return INTVAL (op1) >= 0;
8945 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
8946 }
8947 case LSHIFTRT:
8948 return GET_MODE (SET_SRC (pat)) == SImode;
8949 /* Positive integers leave the high bits zero. */
8950 case CONST_DOUBLE:
8951 return ! (CONST_DOUBLE_LOW (SET_SRC (pat)) & 0x80000000);
8952 case CONST_INT:
8953 return ! (INTVAL (SET_SRC (pat)) & 0x80000000);
8954 case ASHIFTRT:
8955 case SIGN_EXTEND:
8956 return - (GET_MODE (SET_SRC (pat)) == SImode);
8957 case REG:
8958 return sparc_check_64 (SET_SRC (pat), insn);
8959 default:
8960 return 0;
8961 }
8962 }
8963
8964 /* We _ought_ to have only one kind per function, but... */
8965 static GTY(()) rtx sparc_addr_diff_list;
8966 static GTY(()) rtx sparc_addr_list;
8967
8968 void
8969 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
8970 {
8971 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
8972 if (diff)
8973 sparc_addr_diff_list
8974 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
8975 else
8976 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
8977 }
8978
8979 static void
8980 sparc_output_addr_vec (rtx vec)
8981 {
8982 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
8983 int idx, vlen = XVECLEN (body, 0);
8984
8985 #ifdef ASM_OUTPUT_ADDR_VEC_START
8986 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
8987 #endif
8988
8989 #ifdef ASM_OUTPUT_CASE_LABEL
8990 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
8991 NEXT_INSN (lab));
8992 #else
8993 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
8994 #endif
8995
8996 for (idx = 0; idx < vlen; idx++)
8997 {
8998 ASM_OUTPUT_ADDR_VEC_ELT
8999 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
9000 }
9001
9002 #ifdef ASM_OUTPUT_ADDR_VEC_END
9003 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9004 #endif
9005 }
9006
9007 static void
9008 sparc_output_addr_diff_vec (rtx vec)
9009 {
9010 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9011 rtx base = XEXP (XEXP (body, 0), 0);
9012 int idx, vlen = XVECLEN (body, 1);
9013
9014 #ifdef ASM_OUTPUT_ADDR_VEC_START
9015 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9016 #endif
9017
9018 #ifdef ASM_OUTPUT_CASE_LABEL
9019 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9020 NEXT_INSN (lab));
9021 #else
9022 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9023 #endif
9024
9025 for (idx = 0; idx < vlen; idx++)
9026 {
9027 ASM_OUTPUT_ADDR_DIFF_ELT
9028 (asm_out_file,
9029 body,
9030 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
9031 CODE_LABEL_NUMBER (base));
9032 }
9033
9034 #ifdef ASM_OUTPUT_ADDR_VEC_END
9035 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9036 #endif
9037 }
9038
9039 static void
9040 sparc_output_deferred_case_vectors (void)
9041 {
9042 rtx t;
9043 int align;
9044
9045 if (sparc_addr_list == NULL_RTX
9046 && sparc_addr_diff_list == NULL_RTX)
9047 return;
9048
9049 /* Align to cache line in the function's code section. */
9050 switch_to_section (current_function_section ());
9051
9052 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
9053 if (align > 0)
9054 ASM_OUTPUT_ALIGN (asm_out_file, align);
9055
9056 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
9057 sparc_output_addr_vec (XEXP (t, 0));
9058 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
9059 sparc_output_addr_diff_vec (XEXP (t, 0));
9060
9061 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
9062 }
9063
9064 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
9065 unknown. Return 1 if the high bits are zero, -1 if the register is
9066 sign extended. */
9067 int
9068 sparc_check_64 (rtx x, rtx insn)
9069 {
9070 /* If a register is set only once it is safe to ignore insns this
9071 code does not know how to handle. The loop will either recognize
9072 the single set and return the correct value or fail to recognize
9073 it and return 0. */
9074 int set_once = 0;
9075 rtx y = x;
9076
9077 gcc_assert (GET_CODE (x) == REG);
9078
9079 if (GET_MODE (x) == DImode)
9080 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
9081
9082 if (flag_expensive_optimizations
9083 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
9084 set_once = 1;
9085
9086 if (insn == 0)
9087 {
9088 if (set_once)
9089 insn = get_last_insn_anywhere ();
9090 else
9091 return 0;
9092 }
9093
9094 while ((insn = PREV_INSN (insn)))
9095 {
9096 switch (GET_CODE (insn))
9097 {
9098 case JUMP_INSN:
9099 case NOTE:
9100 break;
9101 case CODE_LABEL:
9102 case CALL_INSN:
9103 default:
9104 if (! set_once)
9105 return 0;
9106 break;
9107 case INSN:
9108 {
9109 rtx pat = PATTERN (insn);
9110 if (GET_CODE (pat) != SET)
9111 return 0;
9112 if (rtx_equal_p (x, SET_DEST (pat)))
9113 return set_extends (insn);
9114 if (y && rtx_equal_p (y, SET_DEST (pat)))
9115 return set_extends (insn);
9116 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
9117 return 0;
9118 }
9119 }
9120 }
9121 return 0;
9122 }
9123
9124 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
9125 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
9126
9127 const char *
9128 output_v8plus_shift (rtx insn, rtx *operands, const char *opcode)
9129 {
9130 static char asm_code[60];
9131
9132 /* The scratch register is only required when the destination
9133 register is not a 64-bit global or out register. */
9134 if (which_alternative != 2)
9135 operands[3] = operands[0];
9136
9137 /* We can only shift by constants <= 63. */
9138 if (GET_CODE (operands[2]) == CONST_INT)
9139 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
9140
9141 if (GET_CODE (operands[1]) == CONST_INT)
9142 {
9143 output_asm_insn ("mov\t%1, %3", operands);
9144 }
9145 else
9146 {
9147 output_asm_insn ("sllx\t%H1, 32, %3", operands);
9148 if (sparc_check_64 (operands[1], insn) <= 0)
9149 output_asm_insn ("srl\t%L1, 0, %L1", operands);
9150 output_asm_insn ("or\t%L1, %3, %3", operands);
9151 }
9152
9153 strcpy (asm_code, opcode);
9154
9155 if (which_alternative != 2)
9156 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
9157 else
9158 return
9159 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
9160 }
9161 \f
9162 /* Output rtl to increment the profiler label LABELNO
9163 for profiling a function entry. */
9164
9165 void
9166 sparc_profile_hook (int labelno)
9167 {
9168 char buf[32];
9169 rtx lab, fun;
9170
9171 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
9172 if (NO_PROFILE_COUNTERS)
9173 {
9174 emit_library_call (fun, LCT_NORMAL, VOIDmode, 0);
9175 }
9176 else
9177 {
9178 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
9179 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
9180 emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode);
9181 }
9182 }
9183 \f
9184 #ifdef TARGET_SOLARIS
9185 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
9186
9187 static void
9188 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
9189 tree decl ATTRIBUTE_UNUSED)
9190 {
9191 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
9192 {
9193 solaris_elf_asm_comdat_section (name, flags, decl);
9194 return;
9195 }
9196
9197 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
9198
9199 if (!(flags & SECTION_DEBUG))
9200 fputs (",#alloc", asm_out_file);
9201 if (flags & SECTION_WRITE)
9202 fputs (",#write", asm_out_file);
9203 if (flags & SECTION_TLS)
9204 fputs (",#tls", asm_out_file);
9205 if (flags & SECTION_CODE)
9206 fputs (",#execinstr", asm_out_file);
9207
9208 /* ??? Handle SECTION_BSS. */
9209
9210 fputc ('\n', asm_out_file);
9211 }
9212 #endif /* TARGET_SOLARIS */
9213
9214 /* We do not allow indirect calls to be optimized into sibling calls.
9215
9216 We cannot use sibling calls when delayed branches are disabled
9217 because they will likely require the call delay slot to be filled.
9218
9219 Also, on SPARC 32-bit we cannot emit a sibling call when the
9220 current function returns a structure. This is because the "unimp
9221 after call" convention would cause the callee to return to the
9222 wrong place. The generic code already disallows cases where the
9223 function being called returns a structure.
9224
9225 It may seem strange how this last case could occur. Usually there
9226 is code after the call which jumps to epilogue code which dumps the
9227 return value into the struct return area. That ought to invalidate
9228 the sibling call right? Well, in the C++ case we can end up passing
9229 the pointer to the struct return area to a constructor (which returns
9230 void) and then nothing else happens. Such a sibling call would look
9231 valid without the added check here.
9232
9233 VxWorks PIC PLT entries require the global pointer to be initialized
9234 on entry. We therefore can't emit sibling calls to them. */
9235 static bool
9236 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9237 {
9238 return (decl
9239 && flag_delayed_branch
9240 && (TARGET_ARCH64 || ! cfun->returns_struct)
9241 && !(TARGET_VXWORKS_RTP
9242 && flag_pic
9243 && !targetm.binds_local_p (decl)));
9244 }
9245 \f
9246 /* libfunc renaming. */
9247
9248 static void
9249 sparc_init_libfuncs (void)
9250 {
9251 if (TARGET_ARCH32)
9252 {
9253 /* Use the subroutines that Sun's library provides for integer
9254 multiply and divide. The `*' prevents an underscore from
9255 being prepended by the compiler. .umul is a little faster
9256 than .mul. */
9257 set_optab_libfunc (smul_optab, SImode, "*.umul");
9258 set_optab_libfunc (sdiv_optab, SImode, "*.div");
9259 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
9260 set_optab_libfunc (smod_optab, SImode, "*.rem");
9261 set_optab_libfunc (umod_optab, SImode, "*.urem");
9262
9263 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
9264 set_optab_libfunc (add_optab, TFmode, "_Q_add");
9265 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
9266 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
9267 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
9268 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
9269
9270 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
9271 is because with soft-float, the SFmode and DFmode sqrt
9272 instructions will be absent, and the compiler will notice and
9273 try to use the TFmode sqrt instruction for calls to the
9274 builtin function sqrt, but this fails. */
9275 if (TARGET_FPU)
9276 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
9277
9278 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
9279 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
9280 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
9281 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
9282 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
9283 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
9284
9285 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
9286 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
9287 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
9288 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
9289
9290 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
9291 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
9292 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
9293 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
9294
9295 if (DITF_CONVERSION_LIBFUNCS)
9296 {
9297 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
9298 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
9299 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
9300 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
9301 }
9302
9303 if (SUN_CONVERSION_LIBFUNCS)
9304 {
9305 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
9306 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
9307 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
9308 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
9309 }
9310 }
9311 if (TARGET_ARCH64)
9312 {
9313 /* In the SPARC 64bit ABI, SImode multiply and divide functions
9314 do not exist in the library. Make sure the compiler does not
9315 emit calls to them by accident. (It should always use the
9316 hardware instructions.) */
9317 set_optab_libfunc (smul_optab, SImode, 0);
9318 set_optab_libfunc (sdiv_optab, SImode, 0);
9319 set_optab_libfunc (udiv_optab, SImode, 0);
9320 set_optab_libfunc (smod_optab, SImode, 0);
9321 set_optab_libfunc (umod_optab, SImode, 0);
9322
9323 if (SUN_INTEGER_MULTIPLY_64)
9324 {
9325 set_optab_libfunc (smul_optab, DImode, "__mul64");
9326 set_optab_libfunc (sdiv_optab, DImode, "__div64");
9327 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
9328 set_optab_libfunc (smod_optab, DImode, "__rem64");
9329 set_optab_libfunc (umod_optab, DImode, "__urem64");
9330 }
9331
9332 if (SUN_CONVERSION_LIBFUNCS)
9333 {
9334 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
9335 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
9336 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
9337 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
9338 }
9339 }
9340 }
9341 \f
9342 static tree def_builtin(const char *name, int code, tree type)
9343 {
9344 return add_builtin_function(name, type, code, BUILT_IN_MD, NULL,
9345 NULL_TREE);
9346 }
9347
9348 static tree def_builtin_const(const char *name, int code, tree type)
9349 {
9350 tree t = def_builtin(name, code, type);
9351
9352 if (t)
9353 TREE_READONLY (t) = 1;
9354
9355 return t;
9356 }
9357
9358 /* Implement the TARGET_INIT_BUILTINS target hook.
9359 Create builtin functions for special SPARC instructions. */
9360
9361 static void
9362 sparc_init_builtins (void)
9363 {
9364 if (TARGET_VIS)
9365 sparc_vis_init_builtins ();
9366 }
9367
9368 /* Create builtin functions for VIS 1.0 instructions. */
9369
9370 static void
9371 sparc_vis_init_builtins (void)
9372 {
9373 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
9374 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
9375 tree v4hi = build_vector_type (intHI_type_node, 4);
9376 tree v2hi = build_vector_type (intHI_type_node, 2);
9377 tree v2si = build_vector_type (intSI_type_node, 2);
9378 tree v1si = build_vector_type (intSI_type_node, 1);
9379
9380 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
9381 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
9382 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
9383 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
9384 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
9385 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
9386 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
9387 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
9388 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
9389 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
9390 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
9391 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
9392 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
9393 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
9394 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
9395 v8qi, v8qi,
9396 intDI_type_node, 0);
9397 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
9398 v8qi, v8qi, 0);
9399 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
9400 v8qi, v8qi, 0);
9401 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
9402 intDI_type_node,
9403 intDI_type_node, 0);
9404 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
9405 intSI_type_node,
9406 intSI_type_node, 0);
9407 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
9408 ptr_type_node,
9409 intSI_type_node, 0);
9410 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
9411 ptr_type_node,
9412 intDI_type_node, 0);
9413 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
9414 ptr_type_node,
9415 ptr_type_node, 0);
9416 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
9417 ptr_type_node,
9418 ptr_type_node, 0);
9419 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
9420 v4hi, v4hi, 0);
9421 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
9422 v2si, v2si, 0);
9423 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
9424 v4hi, v4hi, 0);
9425 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
9426 v2si, v2si, 0);
9427 tree void_ftype_di = build_function_type_list (void_type_node,
9428 intDI_type_node, 0);
9429 tree di_ftype_void = build_function_type_list (intDI_type_node,
9430 void_type_node, 0);
9431 tree void_ftype_si = build_function_type_list (void_type_node,
9432 intSI_type_node, 0);
9433 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
9434 float_type_node,
9435 float_type_node, 0);
9436 tree df_ftype_df_df = build_function_type_list (double_type_node,
9437 double_type_node,
9438 double_type_node, 0);
9439
9440 /* Packing and expanding vectors. */
9441 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
9442 v4qi_ftype_v4hi);
9443 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
9444 v8qi_ftype_v2si_v8qi);
9445 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
9446 v2hi_ftype_v2si);
9447 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
9448 v4hi_ftype_v4qi);
9449 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
9450 v8qi_ftype_v4qi_v4qi);
9451
9452 /* Multiplications. */
9453 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
9454 v4hi_ftype_v4qi_v4hi);
9455 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
9456 v4hi_ftype_v4qi_v2hi);
9457 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
9458 v4hi_ftype_v4qi_v2hi);
9459 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
9460 v4hi_ftype_v8qi_v4hi);
9461 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
9462 v4hi_ftype_v8qi_v4hi);
9463 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
9464 v2si_ftype_v4qi_v2hi);
9465 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
9466 v2si_ftype_v4qi_v2hi);
9467
9468 /* Data aligning. */
9469 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
9470 v4hi_ftype_v4hi_v4hi);
9471 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
9472 v8qi_ftype_v8qi_v8qi);
9473 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
9474 v2si_ftype_v2si_v2si);
9475 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
9476 di_ftype_di_di);
9477
9478 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
9479 void_ftype_di);
9480 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
9481 di_ftype_void);
9482
9483 if (TARGET_ARCH64)
9484 {
9485 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
9486 ptr_ftype_ptr_di);
9487 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
9488 ptr_ftype_ptr_di);
9489 }
9490 else
9491 {
9492 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
9493 ptr_ftype_ptr_si);
9494 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
9495 ptr_ftype_ptr_si);
9496 }
9497
9498 /* Pixel distance. */
9499 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
9500 di_ftype_v8qi_v8qi_di);
9501
9502 /* Edge handling. */
9503 if (TARGET_ARCH64)
9504 {
9505 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
9506 di_ftype_ptr_ptr);
9507 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
9508 di_ftype_ptr_ptr);
9509 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
9510 di_ftype_ptr_ptr);
9511 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
9512 di_ftype_ptr_ptr);
9513 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
9514 di_ftype_ptr_ptr);
9515 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
9516 di_ftype_ptr_ptr);
9517 if (TARGET_VIS2)
9518 {
9519 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
9520 di_ftype_ptr_ptr);
9521 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
9522 di_ftype_ptr_ptr);
9523 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
9524 di_ftype_ptr_ptr);
9525 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
9526 di_ftype_ptr_ptr);
9527 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
9528 di_ftype_ptr_ptr);
9529 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
9530 di_ftype_ptr_ptr);
9531 }
9532 }
9533 else
9534 {
9535 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
9536 si_ftype_ptr_ptr);
9537 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
9538 si_ftype_ptr_ptr);
9539 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
9540 si_ftype_ptr_ptr);
9541 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
9542 si_ftype_ptr_ptr);
9543 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
9544 si_ftype_ptr_ptr);
9545 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
9546 si_ftype_ptr_ptr);
9547 if (TARGET_VIS2)
9548 {
9549 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
9550 si_ftype_ptr_ptr);
9551 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
9552 si_ftype_ptr_ptr);
9553 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
9554 si_ftype_ptr_ptr);
9555 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
9556 si_ftype_ptr_ptr);
9557 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
9558 si_ftype_ptr_ptr);
9559 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
9560 si_ftype_ptr_ptr);
9561 }
9562 }
9563
9564 /* Pixel compare. */
9565 if (TARGET_ARCH64)
9566 {
9567 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
9568 di_ftype_v4hi_v4hi);
9569 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
9570 di_ftype_v2si_v2si);
9571 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
9572 di_ftype_v4hi_v4hi);
9573 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
9574 di_ftype_v2si_v2si);
9575 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
9576 di_ftype_v4hi_v4hi);
9577 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
9578 di_ftype_v2si_v2si);
9579 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
9580 di_ftype_v4hi_v4hi);
9581 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
9582 di_ftype_v2si_v2si);
9583 }
9584 else
9585 {
9586 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
9587 si_ftype_v4hi_v4hi);
9588 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
9589 si_ftype_v2si_v2si);
9590 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
9591 si_ftype_v4hi_v4hi);
9592 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
9593 si_ftype_v2si_v2si);
9594 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
9595 si_ftype_v4hi_v4hi);
9596 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
9597 si_ftype_v2si_v2si);
9598 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
9599 si_ftype_v4hi_v4hi);
9600 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
9601 si_ftype_v2si_v2si);
9602 }
9603
9604 /* Addition and subtraction. */
9605 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
9606 v4hi_ftype_v4hi_v4hi);
9607 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
9608 v2hi_ftype_v2hi_v2hi);
9609 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
9610 v2si_ftype_v2si_v2si);
9611 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
9612 v1si_ftype_v1si_v1si);
9613 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
9614 v4hi_ftype_v4hi_v4hi);
9615 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
9616 v2hi_ftype_v2hi_v2hi);
9617 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
9618 v2si_ftype_v2si_v2si);
9619 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
9620 v1si_ftype_v1si_v1si);
9621
9622 /* Three-dimensional array addressing. */
9623 if (TARGET_ARCH64)
9624 {
9625 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
9626 di_ftype_di_di);
9627 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
9628 di_ftype_di_di);
9629 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
9630 di_ftype_di_di);
9631 }
9632 else
9633 {
9634 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
9635 si_ftype_si_si);
9636 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
9637 si_ftype_si_si);
9638 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
9639 si_ftype_si_si);
9640 }
9641
9642 if (TARGET_VIS2)
9643 {
9644 /* Byte mask and shuffle */
9645 if (TARGET_ARCH64)
9646 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
9647 di_ftype_di_di);
9648 else
9649 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
9650 si_ftype_si_si);
9651 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
9652 v4hi_ftype_v4hi_v4hi);
9653 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
9654 v8qi_ftype_v8qi_v8qi);
9655 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
9656 v2si_ftype_v2si_v2si);
9657 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
9658 di_ftype_di_di);
9659 }
9660
9661 if (TARGET_VIS3)
9662 {
9663 if (TARGET_ARCH64)
9664 {
9665 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
9666 void_ftype_di);
9667 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
9668 void_ftype_di);
9669 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
9670 void_ftype_di);
9671 }
9672 else
9673 {
9674 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
9675 void_ftype_si);
9676 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
9677 void_ftype_si);
9678 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
9679 void_ftype_si);
9680 }
9681
9682 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
9683 v4hi_ftype_v4hi_v4hi);
9684
9685 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
9686 v4hi_ftype_v4hi_v4hi);
9687 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
9688 v4hi_ftype_v4hi_v4hi);
9689 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
9690 v4hi_ftype_v4hi_v4hi);
9691 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
9692 v4hi_ftype_v4hi_v4hi);
9693 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
9694 v2si_ftype_v2si_v2si);
9695 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
9696 v2si_ftype_v2si_v2si);
9697 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
9698 v2si_ftype_v2si_v2si);
9699 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
9700 v2si_ftype_v2si_v2si);
9701
9702 if (TARGET_ARCH64)
9703 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
9704 di_ftype_v8qi_v8qi);
9705 else
9706 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
9707 si_ftype_v8qi_v8qi);
9708
9709 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
9710 v4hi_ftype_v4hi_v4hi);
9711 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
9712 di_ftype_di_di);
9713 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
9714 di_ftype_di_di);
9715
9716 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
9717 v4hi_ftype_v4hi_v4hi);
9718 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
9719 v2hi_ftype_v2hi_v2hi);
9720 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
9721 v4hi_ftype_v4hi_v4hi);
9722 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
9723 v2hi_ftype_v2hi_v2hi);
9724 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
9725 v2si_ftype_v2si_v2si);
9726 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
9727 v1si_ftype_v1si_v1si);
9728 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
9729 v2si_ftype_v2si_v2si);
9730 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
9731 v1si_ftype_v1si_v1si);
9732
9733 if (TARGET_ARCH64)
9734 {
9735 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
9736 di_ftype_v8qi_v8qi);
9737 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
9738 di_ftype_v8qi_v8qi);
9739 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
9740 di_ftype_v8qi_v8qi);
9741 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
9742 di_ftype_v8qi_v8qi);
9743 }
9744 else
9745 {
9746 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
9747 si_ftype_v8qi_v8qi);
9748 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
9749 si_ftype_v8qi_v8qi);
9750 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
9751 si_ftype_v8qi_v8qi);
9752 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
9753 si_ftype_v8qi_v8qi);
9754 }
9755
9756 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
9757 sf_ftype_sf_sf);
9758 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
9759 df_ftype_df_df);
9760 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
9761 sf_ftype_sf_sf);
9762 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
9763 df_ftype_df_df);
9764 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
9765 sf_ftype_sf_sf);
9766 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
9767 df_ftype_df_df);
9768
9769 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
9770 di_ftype_di_di);
9771 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
9772 di_ftype_di_di);
9773 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
9774 di_ftype_di_di);
9775 }
9776 }
9777
9778 /* Handle TARGET_EXPAND_BUILTIN target hook.
9779 Expand builtin functions for sparc intrinsics. */
9780
9781 static rtx
9782 sparc_expand_builtin (tree exp, rtx target,
9783 rtx subtarget ATTRIBUTE_UNUSED,
9784 enum machine_mode tmode ATTRIBUTE_UNUSED,
9785 int ignore ATTRIBUTE_UNUSED)
9786 {
9787 tree arg;
9788 call_expr_arg_iterator iter;
9789 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
9790 unsigned int icode = DECL_FUNCTION_CODE (fndecl);
9791 rtx pat, op[4];
9792 int arg_count = 0;
9793 bool nonvoid;
9794
9795 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
9796
9797 if (nonvoid)
9798 {
9799 enum machine_mode tmode = insn_data[icode].operand[0].mode;
9800 if (!target
9801 || GET_MODE (target) != tmode
9802 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9803 op[0] = gen_reg_rtx (tmode);
9804 else
9805 op[0] = target;
9806 }
9807 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
9808 {
9809 const struct insn_operand_data *insn_op;
9810 int idx;
9811
9812 if (arg == error_mark_node)
9813 return NULL_RTX;
9814
9815 arg_count++;
9816 idx = arg_count - !nonvoid;
9817 insn_op = &insn_data[icode].operand[idx];
9818 op[arg_count] = expand_normal (arg);
9819
9820 if (insn_op->mode == V1DImode
9821 && GET_MODE (op[arg_count]) == DImode)
9822 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
9823 else if (insn_op->mode == V1SImode
9824 && GET_MODE (op[arg_count]) == SImode)
9825 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
9826
9827 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
9828 insn_op->mode))
9829 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
9830 }
9831
9832 switch (arg_count)
9833 {
9834 case 0:
9835 pat = GEN_FCN (icode) (op[0]);
9836 break;
9837 case 1:
9838 if (nonvoid)
9839 pat = GEN_FCN (icode) (op[0], op[1]);
9840 else
9841 pat = GEN_FCN (icode) (op[1]);
9842 break;
9843 case 2:
9844 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
9845 break;
9846 case 3:
9847 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
9848 break;
9849 default:
9850 gcc_unreachable ();
9851 }
9852
9853 if (!pat)
9854 return NULL_RTX;
9855
9856 emit_insn (pat);
9857
9858 if (nonvoid)
9859 return op[0];
9860 else
9861 return const0_rtx;
9862 }
9863
9864 static int
9865 sparc_vis_mul8x16 (int e8, int e16)
9866 {
9867 return (e8 * e16 + 128) / 256;
9868 }
9869
9870 /* Multiply the vector elements in ELTS0 to the elements in ELTS1 as specified
9871 by FNCODE. All of the elements in ELTS0 and ELTS1 lists must be integer
9872 constants. A tree list with the results of the multiplications is returned,
9873 and each element in the list is of INNER_TYPE. */
9874
9875 static tree
9876 sparc_handle_vis_mul8x16 (int fncode, tree inner_type, tree elts0, tree elts1)
9877 {
9878 tree n_elts = NULL_TREE;
9879 int scale;
9880
9881 switch (fncode)
9882 {
9883 case CODE_FOR_fmul8x16_vis:
9884 for (; elts0 && elts1;
9885 elts0 = TREE_CHAIN (elts0), elts1 = TREE_CHAIN (elts1))
9886 {
9887 int val
9888 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (TREE_VALUE (elts0)),
9889 TREE_INT_CST_LOW (TREE_VALUE (elts1)));
9890 n_elts = tree_cons (NULL_TREE,
9891 build_int_cst (inner_type, val),
9892 n_elts);
9893 }
9894 break;
9895
9896 case CODE_FOR_fmul8x16au_vis:
9897 scale = TREE_INT_CST_LOW (TREE_VALUE (elts1));
9898
9899 for (; elts0; elts0 = TREE_CHAIN (elts0))
9900 {
9901 int val
9902 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (TREE_VALUE (elts0)),
9903 scale);
9904 n_elts = tree_cons (NULL_TREE,
9905 build_int_cst (inner_type, val),
9906 n_elts);
9907 }
9908 break;
9909
9910 case CODE_FOR_fmul8x16al_vis:
9911 scale = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (elts1)));
9912
9913 for (; elts0; elts0 = TREE_CHAIN (elts0))
9914 {
9915 int val
9916 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (TREE_VALUE (elts0)),
9917 scale);
9918 n_elts = tree_cons (NULL_TREE,
9919 build_int_cst (inner_type, val),
9920 n_elts);
9921 }
9922 break;
9923
9924 default:
9925 gcc_unreachable ();
9926 }
9927
9928 return nreverse (n_elts);
9929
9930 }
9931 /* Handle TARGET_FOLD_BUILTIN target hook.
9932 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
9933 result of the function call is ignored. NULL_TREE is returned if the
9934 function could not be folded. */
9935
9936 static tree
9937 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
9938 tree *args, bool ignore)
9939 {
9940 tree arg0, arg1, arg2;
9941 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
9942 enum insn_code icode = (enum insn_code) DECL_FUNCTION_CODE (fndecl);
9943
9944 if (ignore)
9945 {
9946 /* Note that a switch statement instead of the sequence of tests would
9947 be incorrect as many of the CODE_FOR values could be CODE_FOR_nothing
9948 and that would yield multiple alternatives with identical values. */
9949 if (icode == CODE_FOR_alignaddrsi_vis
9950 || icode == CODE_FOR_alignaddrdi_vis
9951 || icode == CODE_FOR_wrgsr_vis
9952 || icode == CODE_FOR_bmasksi_vis
9953 || icode == CODE_FOR_bmaskdi_vis
9954 || icode == CODE_FOR_cmask8si_vis
9955 || icode == CODE_FOR_cmask8di_vis
9956 || icode == CODE_FOR_cmask16si_vis
9957 || icode == CODE_FOR_cmask16di_vis
9958 || icode == CODE_FOR_cmask32si_vis
9959 || icode == CODE_FOR_cmask32di_vis)
9960 ;
9961 else
9962 return build_zero_cst (rtype);
9963 }
9964
9965 switch (icode)
9966 {
9967 case CODE_FOR_fexpand_vis:
9968 arg0 = args[0];
9969 STRIP_NOPS (arg0);
9970
9971 if (TREE_CODE (arg0) == VECTOR_CST)
9972 {
9973 tree inner_type = TREE_TYPE (rtype);
9974 tree elts = TREE_VECTOR_CST_ELTS (arg0);
9975 tree n_elts = NULL_TREE;
9976
9977 for (; elts; elts = TREE_CHAIN (elts))
9978 {
9979 unsigned int val = TREE_INT_CST_LOW (TREE_VALUE (elts)) << 4;
9980 n_elts = tree_cons (NULL_TREE,
9981 build_int_cst (inner_type, val),
9982 n_elts);
9983 }
9984 return build_vector (rtype, nreverse (n_elts));
9985 }
9986 break;
9987
9988 case CODE_FOR_fmul8x16_vis:
9989 case CODE_FOR_fmul8x16au_vis:
9990 case CODE_FOR_fmul8x16al_vis:
9991 arg0 = args[0];
9992 arg1 = args[1];
9993 STRIP_NOPS (arg0);
9994 STRIP_NOPS (arg1);
9995
9996 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
9997 {
9998 tree inner_type = TREE_TYPE (rtype);
9999 tree elts0 = TREE_VECTOR_CST_ELTS (arg0);
10000 tree elts1 = TREE_VECTOR_CST_ELTS (arg1);
10001 tree n_elts = sparc_handle_vis_mul8x16 (icode, inner_type, elts0,
10002 elts1);
10003
10004 return build_vector (rtype, n_elts);
10005 }
10006 break;
10007
10008 case CODE_FOR_fpmerge_vis:
10009 arg0 = args[0];
10010 arg1 = args[1];
10011 STRIP_NOPS (arg0);
10012 STRIP_NOPS (arg1);
10013
10014 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10015 {
10016 tree elts0 = TREE_VECTOR_CST_ELTS (arg0);
10017 tree elts1 = TREE_VECTOR_CST_ELTS (arg1);
10018 tree n_elts = NULL_TREE;
10019
10020 for (; elts0 && elts1;
10021 elts0 = TREE_CHAIN (elts0), elts1 = TREE_CHAIN (elts1))
10022 {
10023 n_elts = tree_cons (NULL_TREE, TREE_VALUE (elts0), n_elts);
10024 n_elts = tree_cons (NULL_TREE, TREE_VALUE (elts1), n_elts);
10025 }
10026
10027 return build_vector (rtype, nreverse (n_elts));
10028 }
10029 break;
10030
10031 case CODE_FOR_pdist_vis:
10032 arg0 = args[0];
10033 arg1 = args[1];
10034 arg2 = args[2];
10035 STRIP_NOPS (arg0);
10036 STRIP_NOPS (arg1);
10037 STRIP_NOPS (arg2);
10038
10039 if (TREE_CODE (arg0) == VECTOR_CST
10040 && TREE_CODE (arg1) == VECTOR_CST
10041 && TREE_CODE (arg2) == INTEGER_CST)
10042 {
10043 int overflow = 0;
10044 unsigned HOST_WIDE_INT low = TREE_INT_CST_LOW (arg2);
10045 HOST_WIDE_INT high = TREE_INT_CST_HIGH (arg2);
10046 tree elts0 = TREE_VECTOR_CST_ELTS (arg0);
10047 tree elts1 = TREE_VECTOR_CST_ELTS (arg1);
10048
10049 for (; elts0 && elts1;
10050 elts0 = TREE_CHAIN (elts0), elts1 = TREE_CHAIN (elts1))
10051 {
10052 unsigned HOST_WIDE_INT
10053 low0 = TREE_INT_CST_LOW (TREE_VALUE (elts0)),
10054 low1 = TREE_INT_CST_LOW (TREE_VALUE (elts1));
10055 HOST_WIDE_INT high0 = TREE_INT_CST_HIGH (TREE_VALUE (elts0));
10056 HOST_WIDE_INT high1 = TREE_INT_CST_HIGH (TREE_VALUE (elts1));
10057
10058 unsigned HOST_WIDE_INT l;
10059 HOST_WIDE_INT h;
10060
10061 overflow |= neg_double (low1, high1, &l, &h);
10062 overflow |= add_double (low0, high0, l, h, &l, &h);
10063 if (h < 0)
10064 overflow |= neg_double (l, h, &l, &h);
10065
10066 overflow |= add_double (low, high, l, h, &low, &high);
10067 }
10068
10069 gcc_assert (overflow == 0);
10070
10071 return build_int_cst_wide (rtype, low, high);
10072 }
10073
10074 default:
10075 break;
10076 }
10077
10078 return NULL_TREE;
10079 }
10080 \f
10081 /* ??? This duplicates information provided to the compiler by the
10082 ??? scheduler description. Some day, teach genautomata to output
10083 ??? the latencies and then CSE will just use that. */
10084
10085 static bool
10086 sparc_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
10087 int *total, bool speed ATTRIBUTE_UNUSED)
10088 {
10089 enum machine_mode mode = GET_MODE (x);
10090 bool float_mode_p = FLOAT_MODE_P (mode);
10091
10092 switch (code)
10093 {
10094 case CONST_INT:
10095 if (INTVAL (x) < 0x1000 && INTVAL (x) >= -0x1000)
10096 {
10097 *total = 0;
10098 return true;
10099 }
10100 /* FALLTHRU */
10101
10102 case HIGH:
10103 *total = 2;
10104 return true;
10105
10106 case CONST:
10107 case LABEL_REF:
10108 case SYMBOL_REF:
10109 *total = 4;
10110 return true;
10111
10112 case CONST_DOUBLE:
10113 if (GET_MODE (x) == VOIDmode
10114 && ((CONST_DOUBLE_HIGH (x) == 0
10115 && CONST_DOUBLE_LOW (x) < 0x1000)
10116 || (CONST_DOUBLE_HIGH (x) == -1
10117 && CONST_DOUBLE_LOW (x) < 0
10118 && CONST_DOUBLE_LOW (x) >= -0x1000)))
10119 *total = 0;
10120 else
10121 *total = 8;
10122 return true;
10123
10124 case MEM:
10125 /* If outer-code was a sign or zero extension, a cost
10126 of COSTS_N_INSNS (1) was already added in. This is
10127 why we are subtracting it back out. */
10128 if (outer_code == ZERO_EXTEND)
10129 {
10130 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
10131 }
10132 else if (outer_code == SIGN_EXTEND)
10133 {
10134 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
10135 }
10136 else if (float_mode_p)
10137 {
10138 *total = sparc_costs->float_load;
10139 }
10140 else
10141 {
10142 *total = sparc_costs->int_load;
10143 }
10144
10145 return true;
10146
10147 case PLUS:
10148 case MINUS:
10149 if (float_mode_p)
10150 *total = sparc_costs->float_plusminus;
10151 else
10152 *total = COSTS_N_INSNS (1);
10153 return false;
10154
10155 case FMA:
10156 {
10157 rtx sub;
10158
10159 gcc_assert (float_mode_p);
10160 *total = sparc_costs->float_mul;
10161
10162 sub = XEXP (x, 0);
10163 if (GET_CODE (sub) == NEG)
10164 sub = XEXP (sub, 0);
10165 *total += rtx_cost (sub, FMA, 0, speed);
10166
10167 sub = XEXP (x, 2);
10168 if (GET_CODE (sub) == NEG)
10169 sub = XEXP (sub, 0);
10170 *total += rtx_cost (sub, FMA, 2, speed);
10171 return true;
10172 }
10173
10174 case MULT:
10175 if (float_mode_p)
10176 *total = sparc_costs->float_mul;
10177 else if (! TARGET_HARD_MUL)
10178 *total = COSTS_N_INSNS (25);
10179 else
10180 {
10181 int bit_cost;
10182
10183 bit_cost = 0;
10184 if (sparc_costs->int_mul_bit_factor)
10185 {
10186 int nbits;
10187
10188 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
10189 {
10190 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
10191 for (nbits = 0; value != 0; value &= value - 1)
10192 nbits++;
10193 }
10194 else if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
10195 && GET_MODE (XEXP (x, 1)) == VOIDmode)
10196 {
10197 rtx x1 = XEXP (x, 1);
10198 unsigned HOST_WIDE_INT value1 = CONST_DOUBLE_LOW (x1);
10199 unsigned HOST_WIDE_INT value2 = CONST_DOUBLE_HIGH (x1);
10200
10201 for (nbits = 0; value1 != 0; value1 &= value1 - 1)
10202 nbits++;
10203 for (; value2 != 0; value2 &= value2 - 1)
10204 nbits++;
10205 }
10206 else
10207 nbits = 7;
10208
10209 if (nbits < 3)
10210 nbits = 3;
10211 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
10212 bit_cost = COSTS_N_INSNS (bit_cost);
10213 }
10214
10215 if (mode == DImode)
10216 *total = sparc_costs->int_mulX + bit_cost;
10217 else
10218 *total = sparc_costs->int_mul + bit_cost;
10219 }
10220 return false;
10221
10222 case ASHIFT:
10223 case ASHIFTRT:
10224 case LSHIFTRT:
10225 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
10226 return false;
10227
10228 case DIV:
10229 case UDIV:
10230 case MOD:
10231 case UMOD:
10232 if (float_mode_p)
10233 {
10234 if (mode == DFmode)
10235 *total = sparc_costs->float_div_df;
10236 else
10237 *total = sparc_costs->float_div_sf;
10238 }
10239 else
10240 {
10241 if (mode == DImode)
10242 *total = sparc_costs->int_divX;
10243 else
10244 *total = sparc_costs->int_div;
10245 }
10246 return false;
10247
10248 case NEG:
10249 if (! float_mode_p)
10250 {
10251 *total = COSTS_N_INSNS (1);
10252 return false;
10253 }
10254 /* FALLTHRU */
10255
10256 case ABS:
10257 case FLOAT:
10258 case UNSIGNED_FLOAT:
10259 case FIX:
10260 case UNSIGNED_FIX:
10261 case FLOAT_EXTEND:
10262 case FLOAT_TRUNCATE:
10263 *total = sparc_costs->float_move;
10264 return false;
10265
10266 case SQRT:
10267 if (mode == DFmode)
10268 *total = sparc_costs->float_sqrt_df;
10269 else
10270 *total = sparc_costs->float_sqrt_sf;
10271 return false;
10272
10273 case COMPARE:
10274 if (float_mode_p)
10275 *total = sparc_costs->float_cmp;
10276 else
10277 *total = COSTS_N_INSNS (1);
10278 return false;
10279
10280 case IF_THEN_ELSE:
10281 if (float_mode_p)
10282 *total = sparc_costs->float_cmove;
10283 else
10284 *total = sparc_costs->int_cmove;
10285 return false;
10286
10287 case IOR:
10288 /* Handle the NAND vector patterns. */
10289 if (sparc_vector_mode_supported_p (GET_MODE (x))
10290 && GET_CODE (XEXP (x, 0)) == NOT
10291 && GET_CODE (XEXP (x, 1)) == NOT)
10292 {
10293 *total = COSTS_N_INSNS (1);
10294 return true;
10295 }
10296 else
10297 return false;
10298
10299 default:
10300 return false;
10301 }
10302 }
10303
10304 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
10305
10306 static inline bool
10307 general_or_i64_p (reg_class_t rclass)
10308 {
10309 return (rclass == GENERAL_REGS || rclass == I64_REGS);
10310 }
10311
10312 /* Implement TARGET_REGISTER_MOVE_COST. */
10313
10314 static int
10315 sparc_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
10316 reg_class_t from, reg_class_t to)
10317 {
10318 bool need_memory = false;
10319
10320 if (from == FPCC_REGS || to == FPCC_REGS)
10321 need_memory = true;
10322 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
10323 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
10324 {
10325 if (TARGET_VIS3)
10326 {
10327 int size = GET_MODE_SIZE (mode);
10328 if (size == 8 || size == 4)
10329 {
10330 if (! TARGET_ARCH32 || size == 4)
10331 return 4;
10332 else
10333 return 6;
10334 }
10335 }
10336 need_memory = true;
10337 }
10338
10339 if (need_memory)
10340 {
10341 if (sparc_cpu == PROCESSOR_ULTRASPARC
10342 || sparc_cpu == PROCESSOR_ULTRASPARC3
10343 || sparc_cpu == PROCESSOR_NIAGARA
10344 || sparc_cpu == PROCESSOR_NIAGARA2
10345 || sparc_cpu == PROCESSOR_NIAGARA3
10346 || sparc_cpu == PROCESSOR_NIAGARA4)
10347 return 12;
10348
10349 return 6;
10350 }
10351
10352 return 2;
10353 }
10354
10355 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
10356 This is achieved by means of a manual dynamic stack space allocation in
10357 the current frame. We make the assumption that SEQ doesn't contain any
10358 function calls, with the possible exception of calls to the GOT helper. */
10359
10360 static void
10361 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
10362 {
10363 /* We must preserve the lowest 16 words for the register save area. */
10364 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
10365 /* We really need only 2 words of fresh stack space. */
10366 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
10367
10368 rtx slot
10369 = gen_rtx_MEM (word_mode, plus_constant (stack_pointer_rtx,
10370 SPARC_STACK_BIAS + offset));
10371
10372 emit_insn (gen_stack_pointer_dec (GEN_INT (size)));
10373 emit_insn (gen_rtx_SET (VOIDmode, slot, reg));
10374 if (reg2)
10375 emit_insn (gen_rtx_SET (VOIDmode,
10376 adjust_address (slot, word_mode, UNITS_PER_WORD),
10377 reg2));
10378 emit_insn (seq);
10379 if (reg2)
10380 emit_insn (gen_rtx_SET (VOIDmode,
10381 reg2,
10382 adjust_address (slot, word_mode, UNITS_PER_WORD)));
10383 emit_insn (gen_rtx_SET (VOIDmode, reg, slot));
10384 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
10385 }
10386
10387 /* Output the assembler code for a thunk function. THUNK_DECL is the
10388 declaration for the thunk function itself, FUNCTION is the decl for
10389 the target function. DELTA is an immediate constant offset to be
10390 added to THIS. If VCALL_OFFSET is nonzero, the word at address
10391 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
10392
10393 static void
10394 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
10395 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10396 tree function)
10397 {
10398 rtx this_rtx, insn, funexp;
10399 unsigned int int_arg_first;
10400
10401 reload_completed = 1;
10402 epilogue_completed = 1;
10403
10404 emit_note (NOTE_INSN_PROLOGUE_END);
10405
10406 if (TARGET_FLAT)
10407 {
10408 sparc_leaf_function_p = 1;
10409
10410 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
10411 }
10412 else if (flag_delayed_branch)
10413 {
10414 /* We will emit a regular sibcall below, so we need to instruct
10415 output_sibcall that we are in a leaf function. */
10416 sparc_leaf_function_p = current_function_uses_only_leaf_regs = 1;
10417
10418 /* This will cause final.c to invoke leaf_renumber_regs so we
10419 must behave as if we were in a not-yet-leafified function. */
10420 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
10421 }
10422 else
10423 {
10424 /* We will emit the sibcall manually below, so we will need to
10425 manually spill non-leaf registers. */
10426 sparc_leaf_function_p = current_function_uses_only_leaf_regs = 0;
10427
10428 /* We really are in a leaf function. */
10429 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
10430 }
10431
10432 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
10433 returns a structure, the structure return pointer is there instead. */
10434 if (TARGET_ARCH64
10435 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
10436 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
10437 else
10438 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
10439
10440 /* Add DELTA. When possible use a plain add, otherwise load it into
10441 a register first. */
10442 if (delta)
10443 {
10444 rtx delta_rtx = GEN_INT (delta);
10445
10446 if (! SPARC_SIMM13_P (delta))
10447 {
10448 rtx scratch = gen_rtx_REG (Pmode, 1);
10449 emit_move_insn (scratch, delta_rtx);
10450 delta_rtx = scratch;
10451 }
10452
10453 /* THIS_RTX += DELTA. */
10454 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
10455 }
10456
10457 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
10458 if (vcall_offset)
10459 {
10460 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
10461 rtx scratch = gen_rtx_REG (Pmode, 1);
10462
10463 gcc_assert (vcall_offset < 0);
10464
10465 /* SCRATCH = *THIS_RTX. */
10466 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
10467
10468 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
10469 may not have any available scratch register at this point. */
10470 if (SPARC_SIMM13_P (vcall_offset))
10471 ;
10472 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
10473 else if (! fixed_regs[5]
10474 /* The below sequence is made up of at least 2 insns,
10475 while the default method may need only one. */
10476 && vcall_offset < -8192)
10477 {
10478 rtx scratch2 = gen_rtx_REG (Pmode, 5);
10479 emit_move_insn (scratch2, vcall_offset_rtx);
10480 vcall_offset_rtx = scratch2;
10481 }
10482 else
10483 {
10484 rtx increment = GEN_INT (-4096);
10485
10486 /* VCALL_OFFSET is a negative number whose typical range can be
10487 estimated as -32768..0 in 32-bit mode. In almost all cases
10488 it is therefore cheaper to emit multiple add insns than
10489 spilling and loading the constant into a register (at least
10490 6 insns). */
10491 while (! SPARC_SIMM13_P (vcall_offset))
10492 {
10493 emit_insn (gen_add2_insn (scratch, increment));
10494 vcall_offset += 4096;
10495 }
10496 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
10497 }
10498
10499 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
10500 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
10501 gen_rtx_PLUS (Pmode,
10502 scratch,
10503 vcall_offset_rtx)));
10504
10505 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
10506 emit_insn (gen_add2_insn (this_rtx, scratch));
10507 }
10508
10509 /* Generate a tail call to the target function. */
10510 if (! TREE_USED (function))
10511 {
10512 assemble_external (function);
10513 TREE_USED (function) = 1;
10514 }
10515 funexp = XEXP (DECL_RTL (function), 0);
10516
10517 if (flag_delayed_branch)
10518 {
10519 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
10520 insn = emit_call_insn (gen_sibcall (funexp));
10521 SIBLING_CALL_P (insn) = 1;
10522 }
10523 else
10524 {
10525 /* The hoops we have to jump through in order to generate a sibcall
10526 without using delay slots... */
10527 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
10528
10529 if (flag_pic)
10530 {
10531 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
10532 start_sequence ();
10533 load_got_register (); /* clobbers %o7 */
10534 scratch = sparc_legitimize_pic_address (funexp, scratch);
10535 seq = get_insns ();
10536 end_sequence ();
10537 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
10538 }
10539 else if (TARGET_ARCH32)
10540 {
10541 emit_insn (gen_rtx_SET (VOIDmode,
10542 scratch,
10543 gen_rtx_HIGH (SImode, funexp)));
10544 emit_insn (gen_rtx_SET (VOIDmode,
10545 scratch,
10546 gen_rtx_LO_SUM (SImode, scratch, funexp)));
10547 }
10548 else /* TARGET_ARCH64 */
10549 {
10550 switch (sparc_cmodel)
10551 {
10552 case CM_MEDLOW:
10553 case CM_MEDMID:
10554 /* The destination can serve as a temporary. */
10555 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
10556 break;
10557
10558 case CM_MEDANY:
10559 case CM_EMBMEDANY:
10560 /* The destination cannot serve as a temporary. */
10561 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
10562 start_sequence ();
10563 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
10564 seq = get_insns ();
10565 end_sequence ();
10566 emit_and_preserve (seq, spill_reg, 0);
10567 break;
10568
10569 default:
10570 gcc_unreachable ();
10571 }
10572 }
10573
10574 emit_jump_insn (gen_indirect_jump (scratch));
10575 }
10576
10577 emit_barrier ();
10578
10579 /* Run just enough of rest_of_compilation to get the insns emitted.
10580 There's not really enough bulk here to make other passes such as
10581 instruction scheduling worth while. Note that use_thunk calls
10582 assemble_start_function and assemble_end_function. */
10583 insn = get_insns ();
10584 insn_locators_alloc ();
10585 shorten_branches (insn);
10586 final_start_function (insn, file, 1);
10587 final (insn, file, 1);
10588 final_end_function ();
10589
10590 reload_completed = 0;
10591 epilogue_completed = 0;
10592 }
10593
10594 /* Return true if sparc_output_mi_thunk would be able to output the
10595 assembler code for the thunk function specified by the arguments
10596 it is passed, and false otherwise. */
10597 static bool
10598 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
10599 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
10600 HOST_WIDE_INT vcall_offset,
10601 const_tree function ATTRIBUTE_UNUSED)
10602 {
10603 /* Bound the loop used in the default method above. */
10604 return (vcall_offset >= -32768 || ! fixed_regs[5]);
10605 }
10606
10607 /* We use the machine specific reorg pass to enable workarounds for errata. */
10608
10609 static void
10610 sparc_reorg (void)
10611 {
10612 rtx insn, next;
10613
10614 /* The only erratum we handle for now is that of the AT697F processor. */
10615 if (!sparc_fix_at697f)
10616 return;
10617
10618 /* We need to have the (essentially) final form of the insn stream in order
10619 to properly detect the various hazards. Run delay slot scheduling. */
10620 if (optimize > 0 && flag_delayed_branch)
10621 dbr_schedule (get_insns ());
10622
10623 /* Now look for specific patterns in the insn stream. */
10624 for (insn = get_insns (); insn; insn = next)
10625 {
10626 bool insert_nop = false;
10627 rtx set;
10628
10629 /* Look for a single-word load into an odd-numbered FP register. */
10630 if (NONJUMP_INSN_P (insn)
10631 && (set = single_set (insn)) != NULL_RTX
10632 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
10633 && MEM_P (SET_SRC (set))
10634 && REG_P (SET_DEST (set))
10635 && REGNO (SET_DEST (set)) > 31
10636 && REGNO (SET_DEST (set)) % 2 != 0)
10637 {
10638 /* The wrong dependency is on the enclosing double register. */
10639 unsigned int x = REGNO (SET_DEST (set)) - 1;
10640 unsigned int src1, src2, dest;
10641 int code;
10642
10643 /* If the insn has a delay slot, then it cannot be problematic. */
10644 next = next_active_insn (insn);
10645 if (NONJUMP_INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE)
10646 code = -1;
10647 else
10648 {
10649 extract_insn (next);
10650 code = INSN_CODE (next);
10651 }
10652
10653 switch (code)
10654 {
10655 case CODE_FOR_adddf3:
10656 case CODE_FOR_subdf3:
10657 case CODE_FOR_muldf3:
10658 case CODE_FOR_divdf3:
10659 dest = REGNO (recog_data.operand[0]);
10660 src1 = REGNO (recog_data.operand[1]);
10661 src2 = REGNO (recog_data.operand[2]);
10662 if (src1 != src2)
10663 {
10664 /* Case [1-4]:
10665 ld [address], %fx+1
10666 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
10667 if ((src1 == x || src2 == x)
10668 && (dest == src1 || dest == src2))
10669 insert_nop = true;
10670 }
10671 else
10672 {
10673 /* Case 5:
10674 ld [address], %fx+1
10675 FPOPd %fx, %fx, %fx */
10676 if (src1 == x
10677 && dest == src1
10678 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
10679 insert_nop = true;
10680 }
10681 break;
10682
10683 case CODE_FOR_sqrtdf2:
10684 dest = REGNO (recog_data.operand[0]);
10685 src1 = REGNO (recog_data.operand[1]);
10686 /* Case 6:
10687 ld [address], %fx+1
10688 fsqrtd %fx, %fx */
10689 if (src1 == x && dest == src1)
10690 insert_nop = true;
10691 break;
10692
10693 default:
10694 break;
10695 }
10696 }
10697 else
10698 next = NEXT_INSN (insn);
10699
10700 if (insert_nop)
10701 emit_insn_after (gen_nop (), insn);
10702 }
10703 }
10704
10705 /* How to allocate a 'struct machine_function'. */
10706
10707 static struct machine_function *
10708 sparc_init_machine_status (void)
10709 {
10710 return ggc_alloc_cleared_machine_function ();
10711 }
10712
10713 /* Locate some local-dynamic symbol still in use by this function
10714 so that we can print its name in local-dynamic base patterns. */
10715
10716 static const char *
10717 get_some_local_dynamic_name (void)
10718 {
10719 rtx insn;
10720
10721 if (cfun->machine->some_ld_name)
10722 return cfun->machine->some_ld_name;
10723
10724 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
10725 if (INSN_P (insn)
10726 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
10727 return cfun->machine->some_ld_name;
10728
10729 gcc_unreachable ();
10730 }
10731
10732 static int
10733 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
10734 {
10735 rtx x = *px;
10736
10737 if (x
10738 && GET_CODE (x) == SYMBOL_REF
10739 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
10740 {
10741 cfun->machine->some_ld_name = XSTR (x, 0);
10742 return 1;
10743 }
10744
10745 return 0;
10746 }
10747
10748 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
10749 We need to emit DTP-relative relocations. */
10750
10751 static void
10752 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
10753 {
10754 switch (size)
10755 {
10756 case 4:
10757 fputs ("\t.word\t%r_tls_dtpoff32(", file);
10758 break;
10759 case 8:
10760 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
10761 break;
10762 default:
10763 gcc_unreachable ();
10764 }
10765 output_addr_const (file, x);
10766 fputs (")", file);
10767 }
10768
10769 /* Do whatever processing is required at the end of a file. */
10770
10771 static void
10772 sparc_file_end (void)
10773 {
10774 /* If we need to emit the special GOT helper function, do so now. */
10775 if (got_helper_rtx)
10776 {
10777 const char *name = XSTR (got_helper_rtx, 0);
10778 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
10779 #ifdef DWARF2_UNWIND_INFO
10780 bool do_cfi;
10781 #endif
10782
10783 if (USE_HIDDEN_LINKONCE)
10784 {
10785 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
10786 get_identifier (name),
10787 build_function_type_list (void_type_node,
10788 NULL_TREE));
10789 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
10790 NULL_TREE, void_type_node);
10791 TREE_STATIC (decl) = 1;
10792 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
10793 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
10794 DECL_VISIBILITY_SPECIFIED (decl) = 1;
10795 resolve_unique_section (decl, 0, flag_function_sections);
10796 allocate_struct_function (decl, true);
10797 cfun->is_thunk = 1;
10798 current_function_decl = decl;
10799 init_varasm_status ();
10800 assemble_start_function (decl, name);
10801 }
10802 else
10803 {
10804 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
10805 switch_to_section (text_section);
10806 if (align > 0)
10807 ASM_OUTPUT_ALIGN (asm_out_file, align);
10808 ASM_OUTPUT_LABEL (asm_out_file, name);
10809 }
10810
10811 #ifdef DWARF2_UNWIND_INFO
10812 do_cfi = dwarf2out_do_cfi_asm ();
10813 if (do_cfi)
10814 fprintf (asm_out_file, "\t.cfi_startproc\n");
10815 #endif
10816 if (flag_delayed_branch)
10817 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
10818 reg_name, reg_name);
10819 else
10820 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
10821 reg_name, reg_name);
10822 #ifdef DWARF2_UNWIND_INFO
10823 if (do_cfi)
10824 fprintf (asm_out_file, "\t.cfi_endproc\n");
10825 #endif
10826 }
10827
10828 if (NEED_INDICATE_EXEC_STACK)
10829 file_end_indicate_exec_stack ();
10830
10831 #ifdef TARGET_SOLARIS
10832 solaris_file_end ();
10833 #endif
10834 }
10835
10836 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
10837 /* Implement TARGET_MANGLE_TYPE. */
10838
10839 static const char *
10840 sparc_mangle_type (const_tree type)
10841 {
10842 if (!TARGET_64BIT
10843 && TYPE_MAIN_VARIANT (type) == long_double_type_node
10844 && TARGET_LONG_DOUBLE_128)
10845 return "g";
10846
10847 /* For all other types, use normal C++ mangling. */
10848 return NULL;
10849 }
10850 #endif
10851
10852 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
10853 compare and swap on the word containing the byte or half-word. */
10854
10855 void
10856 sparc_expand_compare_and_swap_12 (rtx result, rtx mem, rtx oldval, rtx newval)
10857 {
10858 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
10859 rtx addr = gen_reg_rtx (Pmode);
10860 rtx off = gen_reg_rtx (SImode);
10861 rtx oldv = gen_reg_rtx (SImode);
10862 rtx newv = gen_reg_rtx (SImode);
10863 rtx oldvalue = gen_reg_rtx (SImode);
10864 rtx newvalue = gen_reg_rtx (SImode);
10865 rtx res = gen_reg_rtx (SImode);
10866 rtx resv = gen_reg_rtx (SImode);
10867 rtx memsi, val, mask, end_label, loop_label, cc;
10868
10869 emit_insn (gen_rtx_SET (VOIDmode, addr,
10870 gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
10871
10872 if (Pmode != SImode)
10873 addr1 = gen_lowpart (SImode, addr1);
10874 emit_insn (gen_rtx_SET (VOIDmode, off,
10875 gen_rtx_AND (SImode, addr1, GEN_INT (3))));
10876
10877 memsi = gen_rtx_MEM (SImode, addr);
10878 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
10879 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
10880
10881 val = force_reg (SImode, memsi);
10882
10883 emit_insn (gen_rtx_SET (VOIDmode, off,
10884 gen_rtx_XOR (SImode, off,
10885 GEN_INT (GET_MODE (mem) == QImode
10886 ? 3 : 2))));
10887
10888 emit_insn (gen_rtx_SET (VOIDmode, off,
10889 gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
10890
10891 if (GET_MODE (mem) == QImode)
10892 mask = force_reg (SImode, GEN_INT (0xff));
10893 else
10894 mask = force_reg (SImode, GEN_INT (0xffff));
10895
10896 emit_insn (gen_rtx_SET (VOIDmode, mask,
10897 gen_rtx_ASHIFT (SImode, mask, off)));
10898
10899 emit_insn (gen_rtx_SET (VOIDmode, val,
10900 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
10901 val)));
10902
10903 oldval = gen_lowpart (SImode, oldval);
10904 emit_insn (gen_rtx_SET (VOIDmode, oldv,
10905 gen_rtx_ASHIFT (SImode, oldval, off)));
10906
10907 newval = gen_lowpart_common (SImode, newval);
10908 emit_insn (gen_rtx_SET (VOIDmode, newv,
10909 gen_rtx_ASHIFT (SImode, newval, off)));
10910
10911 emit_insn (gen_rtx_SET (VOIDmode, oldv,
10912 gen_rtx_AND (SImode, oldv, mask)));
10913
10914 emit_insn (gen_rtx_SET (VOIDmode, newv,
10915 gen_rtx_AND (SImode, newv, mask)));
10916
10917 end_label = gen_label_rtx ();
10918 loop_label = gen_label_rtx ();
10919 emit_label (loop_label);
10920
10921 emit_insn (gen_rtx_SET (VOIDmode, oldvalue,
10922 gen_rtx_IOR (SImode, oldv, val)));
10923
10924 emit_insn (gen_rtx_SET (VOIDmode, newvalue,
10925 gen_rtx_IOR (SImode, newv, val)));
10926
10927 emit_insn (gen_sync_compare_and_swapsi (res, memsi, oldvalue, newvalue));
10928
10929 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
10930
10931 emit_insn (gen_rtx_SET (VOIDmode, resv,
10932 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
10933 res)));
10934
10935 cc = gen_compare_reg_1 (NE, resv, val);
10936 emit_insn (gen_rtx_SET (VOIDmode, val, resv));
10937
10938 /* Use cbranchcc4 to separate the compare and branch! */
10939 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
10940 cc, const0_rtx, loop_label));
10941
10942 emit_label (end_label);
10943
10944 emit_insn (gen_rtx_SET (VOIDmode, res,
10945 gen_rtx_AND (SImode, res, mask)));
10946
10947 emit_insn (gen_rtx_SET (VOIDmode, res,
10948 gen_rtx_LSHIFTRT (SImode, res, off)));
10949
10950 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
10951 }
10952
10953 void
10954 sparc_expand_vec_perm_bmask (enum machine_mode vmode, rtx sel)
10955 {
10956 rtx t_1, t_2, t_3;
10957
10958 sel = gen_lowpart (DImode, sel);
10959 switch (vmode)
10960 {
10961 case V2SImode:
10962 /* inp = xxxxxxxAxxxxxxxB */
10963 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
10964 NULL_RTX, 1, OPTAB_DIRECT);
10965 /* t_1 = ....xxxxxxxAxxx. */
10966 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
10967 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
10968 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
10969 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
10970 /* sel = .......B */
10971 /* t_1 = ...A.... */
10972 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
10973 /* sel = ...A...B */
10974 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
10975 /* sel = AAAABBBB * 4 */
10976 t_1 = force_reg (SImode, GEN_INT (0x01230123));
10977 /* sel = { A*4, A*4+1, A*4+2, ... } */
10978 break;
10979
10980 case V4HImode:
10981 /* inp = xxxAxxxBxxxCxxxD */
10982 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
10983 NULL_RTX, 1, OPTAB_DIRECT);
10984 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
10985 NULL_RTX, 1, OPTAB_DIRECT);
10986 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
10987 NULL_RTX, 1, OPTAB_DIRECT);
10988 /* t_1 = ..xxxAxxxBxxxCxx */
10989 /* t_2 = ....xxxAxxxBxxxC */
10990 /* t_3 = ......xxxAxxxBxx */
10991 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
10992 GEN_INT (0x07),
10993 NULL_RTX, 1, OPTAB_DIRECT);
10994 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
10995 GEN_INT (0x0700),
10996 NULL_RTX, 1, OPTAB_DIRECT);
10997 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
10998 GEN_INT (0x070000),
10999 NULL_RTX, 1, OPTAB_DIRECT);
11000 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
11001 GEN_INT (0x07000000),
11002 NULL_RTX, 1, OPTAB_DIRECT);
11003 /* sel = .......D */
11004 /* t_1 = .....C.. */
11005 /* t_2 = ...B.... */
11006 /* t_3 = .A...... */
11007 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11008 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
11009 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
11010 /* sel = .A.B.C.D */
11011 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
11012 /* sel = AABBCCDD * 2 */
11013 t_1 = force_reg (SImode, GEN_INT (0x01010101));
11014 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
11015 break;
11016
11017 case V8QImode:
11018 /* input = xAxBxCxDxExFxGxH */
11019 sel = expand_simple_binop (DImode, AND, sel,
11020 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
11021 | 0x0f0f0f0f),
11022 NULL_RTX, 1, OPTAB_DIRECT);
11023 /* sel = .A.B.C.D.E.F.G.H */
11024 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
11025 NULL_RTX, 1, OPTAB_DIRECT);
11026 /* t_1 = ..A.B.C.D.E.F.G. */
11027 sel = expand_simple_binop (DImode, IOR, sel, t_1,
11028 NULL_RTX, 1, OPTAB_DIRECT);
11029 /* sel = .AABBCCDDEEFFGGH */
11030 sel = expand_simple_binop (DImode, AND, sel,
11031 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
11032 | 0xff00ff),
11033 NULL_RTX, 1, OPTAB_DIRECT);
11034 /* sel = ..AB..CD..EF..GH */
11035 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11036 NULL_RTX, 1, OPTAB_DIRECT);
11037 /* t_1 = ....AB..CD..EF.. */
11038 sel = expand_simple_binop (DImode, IOR, sel, t_1,
11039 NULL_RTX, 1, OPTAB_DIRECT);
11040 /* sel = ..ABABCDCDEFEFGH */
11041 sel = expand_simple_binop (DImode, AND, sel,
11042 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
11043 NULL_RTX, 1, OPTAB_DIRECT);
11044 /* sel = ....ABCD....EFGH */
11045 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11046 NULL_RTX, 1, OPTAB_DIRECT);
11047 /* t_1 = ........ABCD.... */
11048 sel = gen_lowpart (SImode, sel);
11049 t_1 = gen_lowpart (SImode, t_1);
11050 break;
11051
11052 default:
11053 gcc_unreachable ();
11054 }
11055
11056 /* Always perform the final addition/merge within the bmask insn. */
11057 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
11058 }
11059
11060 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
11061
11062 static bool
11063 sparc_frame_pointer_required (void)
11064 {
11065 /* If the stack pointer is dynamically modified in the function, it cannot
11066 serve as the frame pointer. */
11067 if (cfun->calls_alloca)
11068 return true;
11069
11070 /* If the function receives nonlocal gotos, it needs to save the frame
11071 pointer in the nonlocal_goto_save_area object. */
11072 if (cfun->has_nonlocal_label)
11073 return true;
11074
11075 /* In flat mode, that's it. */
11076 if (TARGET_FLAT)
11077 return false;
11078
11079 /* Otherwise, the frame pointer is required if the function isn't leaf. */
11080 return !(current_function_is_leaf && only_leaf_regs_used ());
11081 }
11082
11083 /* The way this is structured, we can't eliminate SFP in favor of SP
11084 if the frame pointer is required: we want to use the SFP->HFP elimination
11085 in that case. But the test in update_eliminables doesn't know we are
11086 assuming below that we only do the former elimination. */
11087
11088 static bool
11089 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
11090 {
11091 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
11092 }
11093
11094 /* Return the hard frame pointer directly to bypass the stack bias. */
11095
11096 static rtx
11097 sparc_builtin_setjmp_frame_value (void)
11098 {
11099 return hard_frame_pointer_rtx;
11100 }
11101
11102 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
11103 they won't be allocated. */
11104
11105 static void
11106 sparc_conditional_register_usage (void)
11107 {
11108 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
11109 {
11110 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11111 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11112 }
11113 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
11114 /* then honor it. */
11115 if (TARGET_ARCH32 && fixed_regs[5])
11116 fixed_regs[5] = 1;
11117 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
11118 fixed_regs[5] = 0;
11119 if (! TARGET_V9)
11120 {
11121 int regno;
11122 for (regno = SPARC_FIRST_V9_FP_REG;
11123 regno <= SPARC_LAST_V9_FP_REG;
11124 regno++)
11125 fixed_regs[regno] = 1;
11126 /* %fcc0 is used by v8 and v9. */
11127 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
11128 regno <= SPARC_LAST_V9_FCC_REG;
11129 regno++)
11130 fixed_regs[regno] = 1;
11131 }
11132 if (! TARGET_FPU)
11133 {
11134 int regno;
11135 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
11136 fixed_regs[regno] = 1;
11137 }
11138 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
11139 /* then honor it. Likewise with g3 and g4. */
11140 if (fixed_regs[2] == 2)
11141 fixed_regs[2] = ! TARGET_APP_REGS;
11142 if (fixed_regs[3] == 2)
11143 fixed_regs[3] = ! TARGET_APP_REGS;
11144 if (TARGET_ARCH32 && fixed_regs[4] == 2)
11145 fixed_regs[4] = ! TARGET_APP_REGS;
11146 else if (TARGET_CM_EMBMEDANY)
11147 fixed_regs[4] = 1;
11148 else if (fixed_regs[4] == 2)
11149 fixed_regs[4] = 0;
11150 if (TARGET_FLAT)
11151 {
11152 int regno;
11153 /* Disable leaf functions. */
11154 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
11155 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11156 leaf_reg_remap [regno] = regno;
11157 }
11158 if (TARGET_VIS)
11159 global_regs[SPARC_GSR_REG] = 1;
11160 }
11161
11162 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
11163
11164 - We can't load constants into FP registers.
11165 - We can't load FP constants into integer registers when soft-float,
11166 because there is no soft-float pattern with a r/F constraint.
11167 - We can't load FP constants into integer registers for TFmode unless
11168 it is 0.0L, because there is no movtf pattern with a r/F constraint.
11169 - Try and reload integer constants (symbolic or otherwise) back into
11170 registers directly, rather than having them dumped to memory. */
11171
11172 static reg_class_t
11173 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
11174 {
11175 enum machine_mode mode = GET_MODE (x);
11176 if (CONSTANT_P (x))
11177 {
11178 if (FP_REG_CLASS_P (rclass)
11179 || rclass == GENERAL_OR_FP_REGS
11180 || rclass == GENERAL_OR_EXTRA_FP_REGS
11181 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
11182 || (mode == TFmode && ! const_zero_operand (x, mode)))
11183 return NO_REGS;
11184
11185 if (GET_MODE_CLASS (mode) == MODE_INT)
11186 return GENERAL_REGS;
11187
11188 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
11189 {
11190 if (! FP_REG_CLASS_P (rclass)
11191 || !(const_zero_operand (x, mode)
11192 || const_all_ones_operand (x, mode)))
11193 return NO_REGS;
11194 }
11195 }
11196
11197 if (TARGET_VIS3
11198 && ! TARGET_ARCH64
11199 && (rclass == EXTRA_FP_REGS
11200 || rclass == GENERAL_OR_EXTRA_FP_REGS))
11201 {
11202 int regno = true_regnum (x);
11203
11204 if (SPARC_INT_REG_P (regno))
11205 return (rclass == EXTRA_FP_REGS
11206 ? FP_REGS : GENERAL_OR_FP_REGS);
11207 }
11208
11209 return rclass;
11210 }
11211
11212 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
11213 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
11214
11215 const char *
11216 output_v8plus_mult (rtx insn, rtx *operands, const char *opcode)
11217 {
11218 char mulstr[32];
11219
11220 gcc_assert (! TARGET_ARCH64);
11221
11222 if (sparc_check_64 (operands[1], insn) <= 0)
11223 output_asm_insn ("srl\t%L1, 0, %L1", operands);
11224 if (which_alternative == 1)
11225 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
11226 if (GET_CODE (operands[2]) == CONST_INT)
11227 {
11228 if (which_alternative == 1)
11229 {
11230 output_asm_insn ("or\t%L1, %H1, %H1", operands);
11231 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
11232 output_asm_insn (mulstr, operands);
11233 return "srlx\t%L0, 32, %H0";
11234 }
11235 else
11236 {
11237 output_asm_insn ("sllx\t%H1, 32, %3", operands);
11238 output_asm_insn ("or\t%L1, %3, %3", operands);
11239 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
11240 output_asm_insn (mulstr, operands);
11241 output_asm_insn ("srlx\t%3, 32, %H0", operands);
11242 return "mov\t%3, %L0";
11243 }
11244 }
11245 else if (rtx_equal_p (operands[1], operands[2]))
11246 {
11247 if (which_alternative == 1)
11248 {
11249 output_asm_insn ("or\t%L1, %H1, %H1", operands);
11250 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
11251 output_asm_insn (mulstr, operands);
11252 return "srlx\t%L0, 32, %H0";
11253 }
11254 else
11255 {
11256 output_asm_insn ("sllx\t%H1, 32, %3", operands);
11257 output_asm_insn ("or\t%L1, %3, %3", operands);
11258 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
11259 output_asm_insn (mulstr, operands);
11260 output_asm_insn ("srlx\t%3, 32, %H0", operands);
11261 return "mov\t%3, %L0";
11262 }
11263 }
11264 if (sparc_check_64 (operands[2], insn) <= 0)
11265 output_asm_insn ("srl\t%L2, 0, %L2", operands);
11266 if (which_alternative == 1)
11267 {
11268 output_asm_insn ("or\t%L1, %H1, %H1", operands);
11269 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
11270 output_asm_insn ("or\t%L2, %L1, %L1", operands);
11271 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
11272 output_asm_insn (mulstr, operands);
11273 return "srlx\t%L0, 32, %H0";
11274 }
11275 else
11276 {
11277 output_asm_insn ("sllx\t%H1, 32, %3", operands);
11278 output_asm_insn ("sllx\t%H2, 32, %4", operands);
11279 output_asm_insn ("or\t%L1, %3, %3", operands);
11280 output_asm_insn ("or\t%L2, %4, %4", operands);
11281 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
11282 output_asm_insn (mulstr, operands);
11283 output_asm_insn ("srlx\t%3, 32, %H0", operands);
11284 return "mov\t%3, %L0";
11285 }
11286 }
11287
11288 /* Subroutine of sparc_expand_vector_init. Emit code to initialize TARGET to
11289 the N_ELTS values for individual fields contained in LOCS by means of VIS2
11290 BSHUFFLE insn. MODE and INNER_MODE are the modes describing TARGET. */
11291
11292 static void
11293 vector_init_bshuffle (rtx target, rtx *locs, int n_elts,
11294 enum machine_mode mode,
11295 enum machine_mode inner_mode)
11296 {
11297 rtx mid_target, r0_high, r0_low, r1_high, r1_low;
11298 enum machine_mode partial_mode;
11299 int bmask, i, idxs[8];
11300
11301 partial_mode = (mode == V4HImode
11302 ? V2HImode
11303 : (mode == V8QImode
11304 ? V4QImode : mode));
11305
11306 r0_high = r0_low = NULL_RTX;
11307 r1_high = r1_low = NULL_RTX;
11308
11309 /* Move the pieces into place, as needed, and calculate the nibble
11310 indexes for the bmask calculation. After we execute this loop the
11311 locs[] array is no longer needed. Therefore, to simplify things,
11312 we set entries that have been processed already to NULL_RTX. */
11313
11314 for (i = 0; i < n_elts; i++)
11315 {
11316 int j;
11317
11318 if (locs[i] == NULL_RTX)
11319 continue;
11320
11321 if (!r0_low)
11322 {
11323 r0_low = locs[i];
11324 idxs[i] = 0x7;
11325 }
11326 else if (!r1_low)
11327 {
11328 r1_low = locs[i];
11329 idxs[i] = 0xf;
11330 }
11331 else if (!r0_high)
11332 {
11333 r0_high = gen_highpart (partial_mode, r0_low);
11334 emit_move_insn (r0_high, gen_lowpart (partial_mode, locs[i]));
11335 idxs[i] = 0x3;
11336 }
11337 else if (!r1_high)
11338 {
11339 r1_high = gen_highpart (partial_mode, r1_low);
11340 emit_move_insn (r1_high, gen_lowpart (partial_mode, locs[i]));
11341 idxs[i] = 0xb;
11342 }
11343 else
11344 gcc_unreachable ();
11345
11346 for (j = i + 1; j < n_elts; j++)
11347 {
11348 if (locs[j] == locs[i])
11349 {
11350 locs[j] = NULL_RTX;
11351 idxs[j] = idxs[i];
11352 }
11353 }
11354 locs[i] = NULL_RTX;
11355 }
11356
11357 bmask = 0;
11358 for (i = 0; i < n_elts; i++)
11359 {
11360 int v = idxs[i];
11361
11362 switch (GET_MODE_SIZE (inner_mode))
11363 {
11364 case 2:
11365 bmask <<= 8;
11366 bmask |= (((v - 1) << 4) | v);
11367 break;
11368
11369 case 1:
11370 bmask <<= 4;
11371 bmask |= v;
11372 break;
11373
11374 default:
11375 gcc_unreachable ();
11376 }
11377 }
11378
11379 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), CONST0_RTX (SImode),
11380 force_reg (SImode, GEN_INT (bmask))));
11381
11382 mid_target = target;
11383 if (GET_MODE_SIZE (mode) == 4)
11384 {
11385 mid_target = gen_reg_rtx (mode == V2HImode
11386 ? V4HImode : V8QImode);
11387 }
11388
11389 if (!r1_low)
11390 r1_low = r0_low;
11391
11392 switch (GET_MODE (mid_target))
11393 {
11394 case V4HImode:
11395 emit_insn (gen_bshufflev4hi_vis (mid_target, r0_low, r1_low));
11396 break;
11397 case V8QImode:
11398 emit_insn (gen_bshufflev8qi_vis (mid_target, r0_low, r1_low));
11399 break;
11400 default:
11401 gcc_unreachable ();
11402 }
11403
11404 if (mid_target != target)
11405 emit_move_insn (target, gen_lowpart (partial_mode, mid_target));
11406 }
11407
11408 /* Subroutine of sparc_expand_vector_init. Emit code to initialize TARGET to
11409 values for individual fields VALS by means of simple word moves if this is
11410 possible. MODE and INNER_MODE are the modes describing TARGET. Return true
11411 on success. */
11412
11413 static bool
11414 vector_init_move_words (rtx target, rtx vals, enum machine_mode mode,
11415 enum machine_mode inner_mode)
11416 {
11417 switch (mode)
11418 {
11419 case V1SImode:
11420 case V1DImode:
11421 emit_move_insn (gen_lowpart (inner_mode, target),
11422 gen_lowpart (inner_mode, XVECEXP (vals, 0, 0)));
11423 return true;
11424
11425 case V2SImode:
11426 emit_move_insn (gen_highpart (SImode, target), XVECEXP (vals, 0, 0));
11427 emit_move_insn (gen_lowpart (SImode, target), XVECEXP (vals, 0, 1));
11428 return true;
11429
11430 default:
11431 break;
11432 }
11433 return false;
11434 }
11435
11436 /* Subroutine of sparc_expand_vector_init. Move the N_ELTS elements in VALS
11437 into registers compatible with MODE and INNER_MODE. Store the RTX for
11438 these regs into the corresponding array entry of LOCS. */
11439
11440 static void
11441 vector_init_prepare_elts (rtx *locs, rtx vals, int n_elts,
11442 enum machine_mode mode,
11443 enum machine_mode inner_mode)
11444 {
11445 enum machine_mode loc_mode;
11446 int i;
11447
11448 switch (mode)
11449 {
11450 case V2HImode:
11451 loc_mode = V4HImode;
11452 break;
11453
11454 case V4QImode:
11455 loc_mode = V8QImode;
11456 break;
11457
11458 case V4HImode:
11459 case V8QImode:
11460 loc_mode = mode;
11461 break;
11462
11463 default:
11464 gcc_unreachable ();
11465 }
11466
11467 gcc_assert (GET_MODE_SIZE (inner_mode) <= 4);
11468 for (i = 0; i < n_elts; i++)
11469 {
11470 rtx dst, elt = XVECEXP (vals, 0, i);
11471 int j;
11472
11473 /* Did we see this already? If so just record it's location. */
11474 dst = NULL_RTX;
11475 for (j = 0; j < i; j++)
11476 {
11477 if (XVECEXP (vals, 0, j) == elt)
11478 {
11479 dst = locs[j];
11480 break;
11481 }
11482 }
11483
11484 if (! dst)
11485 {
11486 enum rtx_code code = GET_CODE (elt);
11487
11488 dst = gen_reg_rtx (loc_mode);
11489
11490 /* We use different strategies based upon whether the element
11491 is in memory or in a register. When we start in a register
11492 and we're VIS3 capable, it's always cheaper to use the VIS3
11493 int-->fp register moves since we avoid having to use stack
11494 memory. */
11495 if ((TARGET_VIS3 && (code == REG || code == SUBREG))
11496 || (CONSTANT_P (elt)
11497 && (const_zero_operand (elt, inner_mode)
11498 || const_all_ones_operand (elt, inner_mode))))
11499 {
11500 elt = convert_modes (SImode, inner_mode, elt, true);
11501
11502 emit_clobber (dst);
11503 emit_move_insn (gen_lowpart (SImode, dst), elt);
11504 }
11505 else
11506 {
11507 rtx m = elt;
11508
11509 if (CONSTANT_P (elt))
11510 {
11511 m = force_const_mem (inner_mode, elt);
11512 }
11513 else if (code != MEM)
11514 {
11515 rtx stk
11516 = assign_stack_temp (inner_mode, GET_MODE_SIZE(inner_mode),
11517 0);
11518 emit_move_insn (stk, elt);
11519 m = stk;
11520 }
11521
11522 switch (loc_mode)
11523 {
11524 case V4HImode:
11525 emit_insn (gen_zero_extend_v4hi_vis (dst, m));
11526 break;
11527 case V8QImode:
11528 emit_insn (gen_zero_extend_v8qi_vis (dst, m));
11529 break;
11530 default:
11531 gcc_unreachable ();
11532 }
11533 }
11534 }
11535 locs[i] = dst;
11536 }
11537 }
11538
11539 /* Subroutine of sparc_expand_vector_init. Emit code to initialize TARGET to
11540 the N_ELTS values for individual fields contained in LOCS by means of VIS2
11541 instructions, among which N_UNIQUE are unique. MODE and INNER_MODE are the
11542 modes describing TARGET. */
11543
11544 static void
11545 sparc_expand_vector_init_vis2 (rtx target, rtx *locs, int n_elts, int n_unique,
11546 enum machine_mode mode,
11547 enum machine_mode inner_mode)
11548 {
11549 if (n_unique <= 4)
11550 {
11551 vector_init_bshuffle (target, locs, n_elts, mode, inner_mode);
11552 }
11553 else
11554 {
11555 int i;
11556
11557 gcc_assert (mode == V8QImode);
11558
11559 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
11560 force_reg (SImode, GEN_INT (7)),
11561 CONST0_RTX (SImode)));
11562 i = n_elts - 1;
11563 emit_insn (gen_faligndatav8qi_vis (target, locs[i], locs[i]));
11564 while (--i >= 0)
11565 emit_insn (gen_faligndatav8qi_vis (target, locs[i], target));
11566 }
11567 }
11568
11569 /* Subroutine of sparc_expand_vector_init. Emit code to initialize TARGET to
11570 the N_ELTS values for individual fields contained in LOCS by means of VIS1
11571 instructions, among which N_UNIQUE are unique. MODE is TARGET's mode. */
11572
11573 static void
11574 sparc_expand_vector_init_vis1 (rtx target, rtx *locs, int n_elts, int n_unique,
11575 enum machine_mode mode)
11576 {
11577 enum machine_mode full_mode = mode;
11578 rtx (*emitter)(rtx, rtx, rtx);
11579 int alignaddr_val, i;
11580 rtx tmp = target;
11581
11582 if (n_unique == 1 && mode == V8QImode)
11583 {
11584 rtx t2, t2_low, t1;
11585
11586 t1 = gen_reg_rtx (V4QImode);
11587 emit_move_insn (t1, gen_lowpart (V4QImode, locs[0]));
11588
11589 t2 = gen_reg_rtx (V8QImode);
11590 t2_low = gen_lowpart (V4QImode, t2);
11591
11592 /* xxxxxxAA --> xxxxxxxxxxxxAAAA
11593 xxxxAAAA --> xxxxxxxxAAAAAAAA
11594 AAAAAAAA --> AAAAAAAAAAAAAAAA */
11595 emit_insn (gen_fpmerge_vis (t2, t1, t1));
11596 emit_move_insn (t1, t2_low);
11597 emit_insn (gen_fpmerge_vis (t2, t1, t1));
11598 emit_move_insn (t1, t2_low);
11599 emit_insn (gen_fpmerge_vis (target, t1, t1));
11600 return;
11601 }
11602
11603 switch (mode)
11604 {
11605 case V2HImode:
11606 full_mode = V4HImode;
11607 /* FALLTHRU */
11608 case V4HImode:
11609 emitter = gen_faligndatav4hi_vis;
11610 alignaddr_val = 6;
11611 break;
11612
11613 case V4QImode:
11614 full_mode = V8QImode;
11615 /* FALLTHRU */
11616 case V8QImode:
11617 emitter = gen_faligndatav8qi_vis;
11618 alignaddr_val = 7;
11619 break;
11620
11621 default:
11622 gcc_unreachable ();
11623 }
11624
11625 if (full_mode != mode)
11626 tmp = gen_reg_rtx (full_mode);
11627
11628 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
11629 force_reg (SImode, GEN_INT (alignaddr_val)),
11630 CONST0_RTX (SImode)));
11631
11632 i = n_elts - 1;
11633 emit_insn (emitter (tmp, locs[i], locs[i]));
11634 while (--i >= 0)
11635 emit_insn (emitter (tmp, locs[i], tmp));
11636
11637 if (tmp != target)
11638 emit_move_insn (target, gen_highpart (mode, tmp));
11639 }
11640
11641 /* Emit code to initialize TARGET to values for individual fields VALS. */
11642
11643 void
11644 sparc_expand_vector_init (rtx target, rtx vals)
11645 {
11646 enum machine_mode mode = GET_MODE (target);
11647 enum machine_mode inner_mode = GET_MODE_INNER (mode);
11648 int n_elts = GET_MODE_NUNITS (mode);
11649 int i, n_var = 0, n_unique = 0;
11650 rtx locs[8];
11651
11652 gcc_assert (n_elts <= 8);
11653
11654 for (i = 0; i < n_elts; i++)
11655 {
11656 rtx x = XVECEXP (vals, 0, i);
11657 bool found = false;
11658 int j;
11659
11660 if (!CONSTANT_P (x))
11661 n_var++;
11662
11663 for (j = 0; j < i; j++)
11664 {
11665 if (rtx_equal_p (x, XVECEXP (vals, 0, j)))
11666 {
11667 found = true;
11668 break;
11669 }
11670 }
11671 if (!found)
11672 n_unique++;
11673 }
11674
11675 if (n_var == 0)
11676 {
11677 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
11678 return;
11679 }
11680
11681 if (vector_init_move_words (target, vals, mode, inner_mode))
11682 return;
11683
11684 vector_init_prepare_elts (locs, vals, n_elts, mode, inner_mode);
11685
11686 if (TARGET_VIS2)
11687 sparc_expand_vector_init_vis2 (target, locs, n_elts, n_unique,
11688 mode, inner_mode);
11689 else
11690 sparc_expand_vector_init_vis1 (target, locs, n_elts, n_unique, mode);
11691 }
11692
11693 /* Implement TARGET_SECONDARY_RELOAD. */
11694
11695 static reg_class_t
11696 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
11697 enum machine_mode mode, secondary_reload_info *sri)
11698 {
11699 enum reg_class rclass = (enum reg_class) rclass_i;
11700
11701 sri->icode = CODE_FOR_nothing;
11702 sri->extra_cost = 0;
11703
11704 /* We need a temporary when loading/storing a HImode/QImode value
11705 between memory and the FPU registers. This can happen when combine puts
11706 a paradoxical subreg in a float/fix conversion insn. */
11707 if (FP_REG_CLASS_P (rclass)
11708 && (mode == HImode || mode == QImode)
11709 && (GET_CODE (x) == MEM
11710 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
11711 && true_regnum (x) == -1)))
11712 return GENERAL_REGS;
11713
11714 /* On 32-bit we need a temporary when loading/storing a DFmode value
11715 between unaligned memory and the upper FPU registers. */
11716 if (TARGET_ARCH32
11717 && rclass == EXTRA_FP_REGS
11718 && mode == DFmode
11719 && GET_CODE (x) == MEM
11720 && ! mem_min_alignment (x, 8))
11721 return FP_REGS;
11722
11723 if (((TARGET_CM_MEDANY
11724 && symbolic_operand (x, mode))
11725 || (TARGET_CM_EMBMEDANY
11726 && text_segment_operand (x, mode)))
11727 && ! flag_pic)
11728 {
11729 if (in_p)
11730 sri->icode = direct_optab_handler (reload_in_optab, mode);
11731 else
11732 sri->icode = direct_optab_handler (reload_out_optab, mode);
11733 return NO_REGS;
11734 }
11735
11736 if (TARGET_VIS3 && TARGET_ARCH32)
11737 {
11738 int regno = true_regnum (x);
11739
11740 /* When using VIS3 fp<-->int register moves, on 32-bit we have
11741 to move 8-byte values in 4-byte pieces. This only works via
11742 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
11743 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
11744 an FP_REGS intermediate move. */
11745 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
11746 || ((general_or_i64_p (rclass)
11747 || rclass == GENERAL_OR_FP_REGS)
11748 && SPARC_FP_REG_P (regno)))
11749 {
11750 sri->extra_cost = 2;
11751 return FP_REGS;
11752 }
11753 }
11754
11755 return NO_REGS;
11756 }
11757
11758 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
11759 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
11760
11761 bool
11762 sparc_expand_conditional_move (enum machine_mode mode, rtx *operands)
11763 {
11764 enum rtx_code rc = GET_CODE (operands[1]);
11765 enum machine_mode cmp_mode;
11766 rtx cc_reg, dst, cmp;
11767
11768 cmp = operands[1];
11769 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
11770 return false;
11771
11772 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
11773 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
11774
11775 cmp_mode = GET_MODE (XEXP (cmp, 0));
11776 rc = GET_CODE (cmp);
11777
11778 dst = operands[0];
11779 if (! rtx_equal_p (operands[2], dst)
11780 && ! rtx_equal_p (operands[3], dst))
11781 {
11782 if (reg_overlap_mentioned_p (dst, cmp))
11783 dst = gen_reg_rtx (mode);
11784
11785 emit_move_insn (dst, operands[3]);
11786 }
11787 else if (operands[2] == dst)
11788 {
11789 operands[2] = operands[3];
11790
11791 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
11792 rc = reverse_condition_maybe_unordered (rc);
11793 else
11794 rc = reverse_condition (rc);
11795 }
11796
11797 if (XEXP (cmp, 1) == const0_rtx
11798 && GET_CODE (XEXP (cmp, 0)) == REG
11799 && cmp_mode == DImode
11800 && v9_regcmp_p (rc))
11801 cc_reg = XEXP (cmp, 0);
11802 else
11803 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
11804
11805 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
11806
11807 emit_insn (gen_rtx_SET (VOIDmode, dst,
11808 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
11809
11810 if (dst != operands[0])
11811 emit_move_insn (operands[0], dst);
11812
11813 return true;
11814 }
11815
11816 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
11817 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
11818 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
11819 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
11820 code to be used for the condition mask. */
11821
11822 void
11823 sparc_expand_vcond (enum machine_mode mode, rtx *operands, int ccode, int fcode)
11824 {
11825 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
11826 enum rtx_code code = GET_CODE (operands[3]);
11827
11828 mask = gen_reg_rtx (Pmode);
11829 cop0 = operands[4];
11830 cop1 = operands[5];
11831 if (code == LT || code == GE)
11832 {
11833 rtx t;
11834
11835 code = swap_condition (code);
11836 t = cop0; cop0 = cop1; cop1 = t;
11837 }
11838
11839 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
11840
11841 fcmp = gen_rtx_UNSPEC (Pmode,
11842 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
11843 fcode);
11844
11845 cmask = gen_rtx_UNSPEC (DImode,
11846 gen_rtvec (2, mask, gsr),
11847 ccode);
11848
11849 bshuf = gen_rtx_UNSPEC (mode,
11850 gen_rtvec (3, operands[1], operands[2], gsr),
11851 UNSPEC_BSHUFFLE);
11852
11853 emit_insn (gen_rtx_SET (VOIDmode, mask, fcmp));
11854 emit_insn (gen_rtx_SET (VOIDmode, gsr, cmask));
11855
11856 emit_insn (gen_rtx_SET (VOIDmode, operands[0], bshuf));
11857 }
11858
11859 #include "gt-sparc.h"