re PR target/46729 (32-bit 30_threads execution tests fail on Solaris 10/SPARC with...
[gcc.git] / gcc / config / sparc / sparc.c
1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Michael Tiemann (tiemann@cygnus.com)
6 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
7 at Cygnus Support.
8
9 This file is part of GCC.
10
11 GCC is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 3, or (at your option)
14 any later version.
15
16 GCC is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
20
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "tree.h"
30 #include "rtl.h"
31 #include "regs.h"
32 #include "hard-reg-set.h"
33 #include "insn-config.h"
34 #include "insn-codes.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-attr.h"
38 #include "flags.h"
39 #include "function.h"
40 #include "except.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "recog.h"
44 #include "diagnostic-core.h"
45 #include "ggc.h"
46 #include "tm_p.h"
47 #include "debug.h"
48 #include "target.h"
49 #include "target-def.h"
50 #include "cfglayout.h"
51 #include "gimple.h"
52 #include "langhooks.h"
53 #include "reload.h"
54 #include "params.h"
55 #include "df.h"
56 #include "dwarf2out.h"
57
58 /* Processor costs */
59 static const
60 struct processor_costs cypress_costs = {
61 COSTS_N_INSNS (2), /* int load */
62 COSTS_N_INSNS (2), /* int signed load */
63 COSTS_N_INSNS (2), /* int zeroed load */
64 COSTS_N_INSNS (2), /* float load */
65 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
66 COSTS_N_INSNS (5), /* fadd, fsub */
67 COSTS_N_INSNS (1), /* fcmp */
68 COSTS_N_INSNS (1), /* fmov, fmovr */
69 COSTS_N_INSNS (7), /* fmul */
70 COSTS_N_INSNS (37), /* fdivs */
71 COSTS_N_INSNS (37), /* fdivd */
72 COSTS_N_INSNS (63), /* fsqrts */
73 COSTS_N_INSNS (63), /* fsqrtd */
74 COSTS_N_INSNS (1), /* imul */
75 COSTS_N_INSNS (1), /* imulX */
76 0, /* imul bit factor */
77 COSTS_N_INSNS (1), /* idiv */
78 COSTS_N_INSNS (1), /* idivX */
79 COSTS_N_INSNS (1), /* movcc/movr */
80 0, /* shift penalty */
81 };
82
83 static const
84 struct processor_costs supersparc_costs = {
85 COSTS_N_INSNS (1), /* int load */
86 COSTS_N_INSNS (1), /* int signed load */
87 COSTS_N_INSNS (1), /* int zeroed load */
88 COSTS_N_INSNS (0), /* float load */
89 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
90 COSTS_N_INSNS (3), /* fadd, fsub */
91 COSTS_N_INSNS (3), /* fcmp */
92 COSTS_N_INSNS (1), /* fmov, fmovr */
93 COSTS_N_INSNS (3), /* fmul */
94 COSTS_N_INSNS (6), /* fdivs */
95 COSTS_N_INSNS (9), /* fdivd */
96 COSTS_N_INSNS (12), /* fsqrts */
97 COSTS_N_INSNS (12), /* fsqrtd */
98 COSTS_N_INSNS (4), /* imul */
99 COSTS_N_INSNS (4), /* imulX */
100 0, /* imul bit factor */
101 COSTS_N_INSNS (4), /* idiv */
102 COSTS_N_INSNS (4), /* idivX */
103 COSTS_N_INSNS (1), /* movcc/movr */
104 1, /* shift penalty */
105 };
106
107 static const
108 struct processor_costs hypersparc_costs = {
109 COSTS_N_INSNS (1), /* int load */
110 COSTS_N_INSNS (1), /* int signed load */
111 COSTS_N_INSNS (1), /* int zeroed load */
112 COSTS_N_INSNS (1), /* float load */
113 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
114 COSTS_N_INSNS (1), /* fadd, fsub */
115 COSTS_N_INSNS (1), /* fcmp */
116 COSTS_N_INSNS (1), /* fmov, fmovr */
117 COSTS_N_INSNS (1), /* fmul */
118 COSTS_N_INSNS (8), /* fdivs */
119 COSTS_N_INSNS (12), /* fdivd */
120 COSTS_N_INSNS (17), /* fsqrts */
121 COSTS_N_INSNS (17), /* fsqrtd */
122 COSTS_N_INSNS (17), /* imul */
123 COSTS_N_INSNS (17), /* imulX */
124 0, /* imul bit factor */
125 COSTS_N_INSNS (17), /* idiv */
126 COSTS_N_INSNS (17), /* idivX */
127 COSTS_N_INSNS (1), /* movcc/movr */
128 0, /* shift penalty */
129 };
130
131 static const
132 struct processor_costs leon_costs = {
133 COSTS_N_INSNS (1), /* int load */
134 COSTS_N_INSNS (1), /* int signed load */
135 COSTS_N_INSNS (1), /* int zeroed load */
136 COSTS_N_INSNS (1), /* float load */
137 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
138 COSTS_N_INSNS (1), /* fadd, fsub */
139 COSTS_N_INSNS (1), /* fcmp */
140 COSTS_N_INSNS (1), /* fmov, fmovr */
141 COSTS_N_INSNS (1), /* fmul */
142 COSTS_N_INSNS (15), /* fdivs */
143 COSTS_N_INSNS (15), /* fdivd */
144 COSTS_N_INSNS (23), /* fsqrts */
145 COSTS_N_INSNS (23), /* fsqrtd */
146 COSTS_N_INSNS (5), /* imul */
147 COSTS_N_INSNS (5), /* imulX */
148 0, /* imul bit factor */
149 COSTS_N_INSNS (5), /* idiv */
150 COSTS_N_INSNS (5), /* idivX */
151 COSTS_N_INSNS (1), /* movcc/movr */
152 0, /* shift penalty */
153 };
154
155 static const
156 struct processor_costs sparclet_costs = {
157 COSTS_N_INSNS (3), /* int load */
158 COSTS_N_INSNS (3), /* int signed load */
159 COSTS_N_INSNS (1), /* int zeroed load */
160 COSTS_N_INSNS (1), /* float load */
161 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
162 COSTS_N_INSNS (1), /* fadd, fsub */
163 COSTS_N_INSNS (1), /* fcmp */
164 COSTS_N_INSNS (1), /* fmov, fmovr */
165 COSTS_N_INSNS (1), /* fmul */
166 COSTS_N_INSNS (1), /* fdivs */
167 COSTS_N_INSNS (1), /* fdivd */
168 COSTS_N_INSNS (1), /* fsqrts */
169 COSTS_N_INSNS (1), /* fsqrtd */
170 COSTS_N_INSNS (5), /* imul */
171 COSTS_N_INSNS (5), /* imulX */
172 0, /* imul bit factor */
173 COSTS_N_INSNS (5), /* idiv */
174 COSTS_N_INSNS (5), /* idivX */
175 COSTS_N_INSNS (1), /* movcc/movr */
176 0, /* shift penalty */
177 };
178
179 static const
180 struct processor_costs ultrasparc_costs = {
181 COSTS_N_INSNS (2), /* int load */
182 COSTS_N_INSNS (3), /* int signed load */
183 COSTS_N_INSNS (2), /* int zeroed load */
184 COSTS_N_INSNS (2), /* float load */
185 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
186 COSTS_N_INSNS (4), /* fadd, fsub */
187 COSTS_N_INSNS (1), /* fcmp */
188 COSTS_N_INSNS (2), /* fmov, fmovr */
189 COSTS_N_INSNS (4), /* fmul */
190 COSTS_N_INSNS (13), /* fdivs */
191 COSTS_N_INSNS (23), /* fdivd */
192 COSTS_N_INSNS (13), /* fsqrts */
193 COSTS_N_INSNS (23), /* fsqrtd */
194 COSTS_N_INSNS (4), /* imul */
195 COSTS_N_INSNS (4), /* imulX */
196 2, /* imul bit factor */
197 COSTS_N_INSNS (37), /* idiv */
198 COSTS_N_INSNS (68), /* idivX */
199 COSTS_N_INSNS (2), /* movcc/movr */
200 2, /* shift penalty */
201 };
202
203 static const
204 struct processor_costs ultrasparc3_costs = {
205 COSTS_N_INSNS (2), /* int load */
206 COSTS_N_INSNS (3), /* int signed load */
207 COSTS_N_INSNS (3), /* int zeroed load */
208 COSTS_N_INSNS (2), /* float load */
209 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
210 COSTS_N_INSNS (4), /* fadd, fsub */
211 COSTS_N_INSNS (5), /* fcmp */
212 COSTS_N_INSNS (3), /* fmov, fmovr */
213 COSTS_N_INSNS (4), /* fmul */
214 COSTS_N_INSNS (17), /* fdivs */
215 COSTS_N_INSNS (20), /* fdivd */
216 COSTS_N_INSNS (20), /* fsqrts */
217 COSTS_N_INSNS (29), /* fsqrtd */
218 COSTS_N_INSNS (6), /* imul */
219 COSTS_N_INSNS (6), /* imulX */
220 0, /* imul bit factor */
221 COSTS_N_INSNS (40), /* idiv */
222 COSTS_N_INSNS (71), /* idivX */
223 COSTS_N_INSNS (2), /* movcc/movr */
224 0, /* shift penalty */
225 };
226
227 static const
228 struct processor_costs niagara_costs = {
229 COSTS_N_INSNS (3), /* int load */
230 COSTS_N_INSNS (3), /* int signed load */
231 COSTS_N_INSNS (3), /* int zeroed load */
232 COSTS_N_INSNS (9), /* float load */
233 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
234 COSTS_N_INSNS (8), /* fadd, fsub */
235 COSTS_N_INSNS (26), /* fcmp */
236 COSTS_N_INSNS (8), /* fmov, fmovr */
237 COSTS_N_INSNS (29), /* fmul */
238 COSTS_N_INSNS (54), /* fdivs */
239 COSTS_N_INSNS (83), /* fdivd */
240 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
241 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
242 COSTS_N_INSNS (11), /* imul */
243 COSTS_N_INSNS (11), /* imulX */
244 0, /* imul bit factor */
245 COSTS_N_INSNS (72), /* idiv */
246 COSTS_N_INSNS (72), /* idivX */
247 COSTS_N_INSNS (1), /* movcc/movr */
248 0, /* shift penalty */
249 };
250
251 static const
252 struct processor_costs niagara2_costs = {
253 COSTS_N_INSNS (3), /* int load */
254 COSTS_N_INSNS (3), /* int signed load */
255 COSTS_N_INSNS (3), /* int zeroed load */
256 COSTS_N_INSNS (3), /* float load */
257 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
258 COSTS_N_INSNS (6), /* fadd, fsub */
259 COSTS_N_INSNS (6), /* fcmp */
260 COSTS_N_INSNS (6), /* fmov, fmovr */
261 COSTS_N_INSNS (6), /* fmul */
262 COSTS_N_INSNS (19), /* fdivs */
263 COSTS_N_INSNS (33), /* fdivd */
264 COSTS_N_INSNS (19), /* fsqrts */
265 COSTS_N_INSNS (33), /* fsqrtd */
266 COSTS_N_INSNS (5), /* imul */
267 COSTS_N_INSNS (5), /* imulX */
268 0, /* imul bit factor */
269 COSTS_N_INSNS (31), /* idiv, average of 12 - 41 cycle range */
270 COSTS_N_INSNS (31), /* idivX, average of 12 - 41 cycle range */
271 COSTS_N_INSNS (1), /* movcc/movr */
272 0, /* shift penalty */
273 };
274
275 const struct processor_costs *sparc_costs = &cypress_costs;
276
277 #ifdef HAVE_AS_RELAX_OPTION
278 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
279 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
280 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
281 somebody does not branch between the sethi and jmp. */
282 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
283 #else
284 #define LEAF_SIBCALL_SLOT_RESERVED_P \
285 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
286 #endif
287
288 /* Global variables for machine-dependent things. */
289
290 /* Size of frame. Need to know this to emit return insns from leaf procedures.
291 ACTUAL_FSIZE is set by sparc_compute_frame_size() which is called during the
292 reload pass. This is important as the value is later used for scheduling
293 (to see what can go in a delay slot).
294 APPARENT_FSIZE is the size of the stack less the register save area and less
295 the outgoing argument area. It is used when saving call preserved regs. */
296 static HOST_WIDE_INT apparent_fsize;
297 static HOST_WIDE_INT actual_fsize;
298
299 /* Number of live general or floating point registers needed to be
300 saved (as 4-byte quantities). */
301 static int num_gfregs;
302
303 /* The alias set for prologue/epilogue register save/restore. */
304 static GTY(()) alias_set_type sparc_sr_alias_set;
305
306 /* The alias set for the structure return value. */
307 static GTY(()) alias_set_type struct_value_alias_set;
308
309 /* Vector to say how input registers are mapped to output registers.
310 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
311 eliminate it. You must use -fomit-frame-pointer to get that. */
312 char leaf_reg_remap[] =
313 { 0, 1, 2, 3, 4, 5, 6, 7,
314 -1, -1, -1, -1, -1, -1, 14, -1,
315 -1, -1, -1, -1, -1, -1, -1, -1,
316 8, 9, 10, 11, 12, 13, -1, 15,
317
318 32, 33, 34, 35, 36, 37, 38, 39,
319 40, 41, 42, 43, 44, 45, 46, 47,
320 48, 49, 50, 51, 52, 53, 54, 55,
321 56, 57, 58, 59, 60, 61, 62, 63,
322 64, 65, 66, 67, 68, 69, 70, 71,
323 72, 73, 74, 75, 76, 77, 78, 79,
324 80, 81, 82, 83, 84, 85, 86, 87,
325 88, 89, 90, 91, 92, 93, 94, 95,
326 96, 97, 98, 99, 100};
327
328 /* Vector, indexed by hard register number, which contains 1
329 for a register that is allowable in a candidate for leaf
330 function treatment. */
331 char sparc_leaf_regs[] =
332 { 1, 1, 1, 1, 1, 1, 1, 1,
333 0, 0, 0, 0, 0, 0, 1, 0,
334 0, 0, 0, 0, 0, 0, 0, 0,
335 1, 1, 1, 1, 1, 1, 0, 1,
336 1, 1, 1, 1, 1, 1, 1, 1,
337 1, 1, 1, 1, 1, 1, 1, 1,
338 1, 1, 1, 1, 1, 1, 1, 1,
339 1, 1, 1, 1, 1, 1, 1, 1,
340 1, 1, 1, 1, 1, 1, 1, 1,
341 1, 1, 1, 1, 1, 1, 1, 1,
342 1, 1, 1, 1, 1, 1, 1, 1,
343 1, 1, 1, 1, 1, 1, 1, 1,
344 1, 1, 1, 1, 1};
345
346 struct GTY(()) machine_function
347 {
348 /* Some local-dynamic TLS symbol name. */
349 const char *some_ld_name;
350
351 /* True if the current function is leaf and uses only leaf regs,
352 so that the SPARC leaf function optimization can be applied.
353 Private version of current_function_uses_only_leaf_regs, see
354 sparc_expand_prologue for the rationale. */
355 int leaf_function_p;
356
357 /* True if the data calculated by sparc_expand_prologue are valid. */
358 bool prologue_data_valid_p;
359 };
360
361 #define sparc_leaf_function_p cfun->machine->leaf_function_p
362 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
363
364 /* Register we pretend to think the frame pointer is allocated to.
365 Normally, this is %fp, but if we are in a leaf procedure, this
366 is %sp+"something". We record "something" separately as it may
367 be too big for reg+constant addressing. */
368 static rtx frame_base_reg;
369 static HOST_WIDE_INT frame_base_offset;
370
371 /* 1 if the next opcode is to be specially indented. */
372 int sparc_indent_opcode = 0;
373
374 static bool sparc_handle_option (size_t, const char *, int);
375 static void sparc_option_override (void);
376 static void sparc_init_modes (void);
377 static void scan_record_type (const_tree, int *, int *, int *);
378 static int function_arg_slotno (const CUMULATIVE_ARGS *, enum machine_mode,
379 const_tree, bool, bool, int *, int *);
380
381 static int supersparc_adjust_cost (rtx, rtx, rtx, int);
382 static int hypersparc_adjust_cost (rtx, rtx, rtx, int);
383
384 static void sparc_emit_set_const32 (rtx, rtx);
385 static void sparc_emit_set_const64 (rtx, rtx);
386 static void sparc_output_addr_vec (rtx);
387 static void sparc_output_addr_diff_vec (rtx);
388 static void sparc_output_deferred_case_vectors (void);
389 static bool sparc_legitimate_address_p (enum machine_mode, rtx, bool);
390 static rtx sparc_builtin_saveregs (void);
391 static int epilogue_renumber (rtx *, int);
392 static bool sparc_assemble_integer (rtx, unsigned int, int);
393 static int set_extends (rtx);
394 static void load_got_register (void);
395 static int save_or_restore_regs (int, int, rtx, int, int);
396 static void emit_save_or_restore_regs (int);
397 static void sparc_asm_function_prologue (FILE *, HOST_WIDE_INT);
398 static void sparc_asm_function_epilogue (FILE *, HOST_WIDE_INT);
399 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
400 tree) ATTRIBUTE_UNUSED;
401 static int sparc_adjust_cost (rtx, rtx, rtx, int);
402 static int sparc_issue_rate (void);
403 static void sparc_sched_init (FILE *, int, int);
404 static int sparc_use_sched_lookahead (void);
405
406 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
407 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
408 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
409 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
410 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
411
412 static bool sparc_function_ok_for_sibcall (tree, tree);
413 static void sparc_init_libfuncs (void);
414 static void sparc_init_builtins (void);
415 static void sparc_vis_init_builtins (void);
416 static rtx sparc_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
417 static tree sparc_fold_builtin (tree, int, tree *, bool);
418 static int sparc_vis_mul8x16 (int, int);
419 static tree sparc_handle_vis_mul8x16 (int, tree, tree, tree);
420 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
421 HOST_WIDE_INT, tree);
422 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
423 HOST_WIDE_INT, const_tree);
424 static struct machine_function * sparc_init_machine_status (void);
425 static bool sparc_cannot_force_const_mem (rtx);
426 static rtx sparc_tls_get_addr (void);
427 static rtx sparc_tls_got (void);
428 static const char *get_some_local_dynamic_name (void);
429 static int get_some_local_dynamic_name_1 (rtx *, void *);
430 static bool sparc_rtx_costs (rtx, int, int, int *, bool);
431 static rtx sparc_function_value (const_tree, const_tree, bool);
432 static rtx sparc_libcall_value (enum machine_mode, const_rtx);
433 static bool sparc_function_value_regno_p (const unsigned int);
434 static rtx sparc_struct_value_rtx (tree, int);
435 static enum machine_mode sparc_promote_function_mode (const_tree, enum machine_mode,
436 int *, const_tree, int);
437 static bool sparc_return_in_memory (const_tree, const_tree);
438 static bool sparc_strict_argument_naming (CUMULATIVE_ARGS *);
439 static void sparc_va_start (tree, rtx);
440 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
441 static bool sparc_vector_mode_supported_p (enum machine_mode);
442 static bool sparc_tls_referenced_p (rtx);
443 static rtx sparc_legitimize_tls_address (rtx);
444 static rtx sparc_legitimize_pic_address (rtx, rtx);
445 static rtx sparc_legitimize_address (rtx, rtx, enum machine_mode);
446 static rtx sparc_delegitimize_address (rtx);
447 static bool sparc_mode_dependent_address_p (const_rtx);
448 static bool sparc_pass_by_reference (CUMULATIVE_ARGS *,
449 enum machine_mode, const_tree, bool);
450 static void sparc_function_arg_advance (CUMULATIVE_ARGS *,
451 enum machine_mode, const_tree, bool);
452 static rtx sparc_function_arg_1 (const CUMULATIVE_ARGS *,
453 enum machine_mode, const_tree, bool, bool);
454 static rtx sparc_function_arg (CUMULATIVE_ARGS *,
455 enum machine_mode, const_tree, bool);
456 static rtx sparc_function_incoming_arg (CUMULATIVE_ARGS *,
457 enum machine_mode, const_tree, bool);
458 static unsigned int sparc_function_arg_boundary (enum machine_mode,
459 const_tree);
460 static int sparc_arg_partial_bytes (CUMULATIVE_ARGS *,
461 enum machine_mode, tree, bool);
462 static void sparc_dwarf_handle_frame_unspec (const char *, rtx, int);
463 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
464 static void sparc_file_end (void);
465 static bool sparc_frame_pointer_required (void);
466 static bool sparc_can_eliminate (const int, const int);
467 static void sparc_conditional_register_usage (void);
468 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
469 static const char *sparc_mangle_type (const_tree);
470 #endif
471 static void sparc_trampoline_init (rtx, tree, rtx);
472 static enum machine_mode sparc_preferred_simd_mode (enum machine_mode);
473 \f
474 #ifdef SUBTARGET_ATTRIBUTE_TABLE
475 /* Table of valid machine attributes. */
476 static const struct attribute_spec sparc_attribute_table[] =
477 {
478 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
479 SUBTARGET_ATTRIBUTE_TABLE,
480 { NULL, 0, 0, false, false, false, NULL }
481 };
482 #endif
483 \f
484 /* Option handling. */
485
486 /* Parsed value. */
487 enum cmodel sparc_cmodel;
488
489 char sparc_hard_reg_printed[8];
490
491 struct sparc_cpu_select sparc_select[] =
492 {
493 /* switch name, tune arch */
494 { (char *)0, "default", 1, 1 },
495 { (char *)0, "-mcpu=", 1, 1 },
496 { (char *)0, "-mtune=", 1, 0 },
497 { 0, 0, 0, 0 }
498 };
499
500 /* CPU type. This is set from TARGET_CPU_DEFAULT and -m{cpu,tune}=xxx. */
501 enum processor_type sparc_cpu;
502
503 /* Whether\fan FPU option was specified. */
504 static bool fpu_option_set = false;
505
506 /* Implement TARGET_OPTION_OPTIMIZATION_TABLE. */
507 static const struct default_options sparc_option_optimization_table[] =
508 {
509 { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
510 { OPT_LEVELS_NONE, 0, NULL, 0 }
511 };
512
513 /* Initialize the GCC target structure. */
514
515 /* The default is to use .half rather than .short for aligned HI objects. */
516 #undef TARGET_ASM_ALIGNED_HI_OP
517 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
518
519 #undef TARGET_ASM_UNALIGNED_HI_OP
520 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
521 #undef TARGET_ASM_UNALIGNED_SI_OP
522 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
523 #undef TARGET_ASM_UNALIGNED_DI_OP
524 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
525
526 /* The target hook has to handle DI-mode values. */
527 #undef TARGET_ASM_INTEGER
528 #define TARGET_ASM_INTEGER sparc_assemble_integer
529
530 #undef TARGET_ASM_FUNCTION_PROLOGUE
531 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
532 #undef TARGET_ASM_FUNCTION_EPILOGUE
533 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
534
535 #undef TARGET_SCHED_ADJUST_COST
536 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
537 #undef TARGET_SCHED_ISSUE_RATE
538 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
539 #undef TARGET_SCHED_INIT
540 #define TARGET_SCHED_INIT sparc_sched_init
541 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
542 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
543
544 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
545 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
546
547 #undef TARGET_INIT_LIBFUNCS
548 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
549 #undef TARGET_INIT_BUILTINS
550 #define TARGET_INIT_BUILTINS sparc_init_builtins
551
552 #undef TARGET_LEGITIMIZE_ADDRESS
553 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
554 #undef TARGET_DELEGITIMIZE_ADDRESS
555 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
556 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
557 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
558
559 #undef TARGET_EXPAND_BUILTIN
560 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
561 #undef TARGET_FOLD_BUILTIN
562 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
563
564 #if TARGET_TLS
565 #undef TARGET_HAVE_TLS
566 #define TARGET_HAVE_TLS true
567 #endif
568
569 #undef TARGET_CANNOT_FORCE_CONST_MEM
570 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
571
572 #undef TARGET_ASM_OUTPUT_MI_THUNK
573 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
574 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
575 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
576
577 #undef TARGET_RTX_COSTS
578 #define TARGET_RTX_COSTS sparc_rtx_costs
579 #undef TARGET_ADDRESS_COST
580 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0
581
582 #undef TARGET_PROMOTE_FUNCTION_MODE
583 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
584
585 #undef TARGET_FUNCTION_VALUE
586 #define TARGET_FUNCTION_VALUE sparc_function_value
587 #undef TARGET_LIBCALL_VALUE
588 #define TARGET_LIBCALL_VALUE sparc_libcall_value
589 #undef TARGET_FUNCTION_VALUE_REGNO_P
590 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
591
592 #undef TARGET_STRUCT_VALUE_RTX
593 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
594 #undef TARGET_RETURN_IN_MEMORY
595 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
596 #undef TARGET_MUST_PASS_IN_STACK
597 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
598 #undef TARGET_PASS_BY_REFERENCE
599 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
600 #undef TARGET_ARG_PARTIAL_BYTES
601 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
602 #undef TARGET_FUNCTION_ARG_ADVANCE
603 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
604 #undef TARGET_FUNCTION_ARG
605 #define TARGET_FUNCTION_ARG sparc_function_arg
606 #undef TARGET_FUNCTION_INCOMING_ARG
607 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
608 #undef TARGET_FUNCTION_ARG_BOUNDARY
609 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
610
611 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
612 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
613 #undef TARGET_STRICT_ARGUMENT_NAMING
614 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
615
616 #undef TARGET_EXPAND_BUILTIN_VA_START
617 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
618 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
619 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
620
621 #undef TARGET_VECTOR_MODE_SUPPORTED_P
622 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
623
624 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
625 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
626
627 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
628 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC sparc_dwarf_handle_frame_unspec
629
630 #ifdef SUBTARGET_INSERT_ATTRIBUTES
631 #undef TARGET_INSERT_ATTRIBUTES
632 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
633 #endif
634
635 #ifdef SUBTARGET_ATTRIBUTE_TABLE
636 #undef TARGET_ATTRIBUTE_TABLE
637 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
638 #endif
639
640 #undef TARGET_RELAXED_ORDERING
641 #define TARGET_RELAXED_ORDERING SPARC_RELAXED_ORDERING
642
643 #undef TARGET_DEFAULT_TARGET_FLAGS
644 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
645 #undef TARGET_HANDLE_OPTION
646 #define TARGET_HANDLE_OPTION sparc_handle_option
647 #undef TARGET_OPTION_OVERRIDE
648 #define TARGET_OPTION_OVERRIDE sparc_option_override
649 #undef TARGET_OPTION_OPTIMIZATION_TABLE
650 #define TARGET_OPTION_OPTIMIZATION_TABLE sparc_option_optimization_table
651
652 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
653 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
654 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
655 #endif
656
657 #undef TARGET_ASM_FILE_END
658 #define TARGET_ASM_FILE_END sparc_file_end
659
660 #undef TARGET_FRAME_POINTER_REQUIRED
661 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
662
663 #undef TARGET_CAN_ELIMINATE
664 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
665
666 #undef TARGET_CONDITIONAL_REGISTER_USAGE
667 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
668
669 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
670 #undef TARGET_MANGLE_TYPE
671 #define TARGET_MANGLE_TYPE sparc_mangle_type
672 #endif
673
674 #undef TARGET_LEGITIMATE_ADDRESS_P
675 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
676
677 #undef TARGET_TRAMPOLINE_INIT
678 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
679
680 struct gcc_target targetm = TARGET_INITIALIZER;
681
682 /* Implement TARGET_HANDLE_OPTION. */
683
684 static bool
685 sparc_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
686 {
687 switch (code)
688 {
689 case OPT_mfpu:
690 case OPT_mhard_float:
691 case OPT_msoft_float:
692 fpu_option_set = true;
693 break;
694
695 case OPT_mcpu_:
696 sparc_select[1].string = arg;
697 break;
698
699 case OPT_mtune_:
700 sparc_select[2].string = arg;
701 break;
702 }
703
704 return true;
705 }
706
707 /* Validate and override various options, and do some machine dependent
708 initialization. */
709
710 static void
711 sparc_option_override (void)
712 {
713 static struct code_model {
714 const char *const name;
715 const enum cmodel value;
716 } const cmodels[] = {
717 { "32", CM_32 },
718 { "medlow", CM_MEDLOW },
719 { "medmid", CM_MEDMID },
720 { "medany", CM_MEDANY },
721 { "embmedany", CM_EMBMEDANY },
722 { NULL, (enum cmodel) 0 }
723 };
724 const struct code_model *cmodel;
725 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
726 static struct cpu_default {
727 const int cpu;
728 const char *const name;
729 } const cpu_default[] = {
730 /* There must be one entry here for each TARGET_CPU value. */
731 { TARGET_CPU_sparc, "cypress" },
732 { TARGET_CPU_v8, "v8" },
733 { TARGET_CPU_supersparc, "supersparc" },
734 { TARGET_CPU_hypersparc, "hypersparc" },
735 { TARGET_CPU_leon, "leon" },
736 { TARGET_CPU_sparclite, "f930" },
737 { TARGET_CPU_sparclite86x, "sparclite86x" },
738 { TARGET_CPU_sparclet, "tsc701" },
739 { TARGET_CPU_v9, "v9" },
740 { TARGET_CPU_ultrasparc, "ultrasparc" },
741 { TARGET_CPU_ultrasparc3, "ultrasparc3" },
742 { TARGET_CPU_niagara, "niagara" },
743 { TARGET_CPU_niagara2, "niagara2" },
744 { 0, 0 }
745 };
746 const struct cpu_default *def;
747 /* Table of values for -m{cpu,tune}=. */
748 static struct cpu_table {
749 const char *const name;
750 const enum processor_type processor;
751 const int disable;
752 const int enable;
753 } const cpu_table[] = {
754 { "v7", PROCESSOR_V7, MASK_ISA, 0 },
755 { "cypress", PROCESSOR_CYPRESS, MASK_ISA, 0 },
756 { "v8", PROCESSOR_V8, MASK_ISA, MASK_V8 },
757 /* TI TMS390Z55 supersparc */
758 { "supersparc", PROCESSOR_SUPERSPARC, MASK_ISA, MASK_V8 },
759 { "hypersparc", PROCESSOR_HYPERSPARC, MASK_ISA, MASK_V8|MASK_FPU },
760 /* LEON */
761 { "leon", PROCESSOR_LEON, MASK_ISA, MASK_V8|MASK_FPU },
762 { "sparclite", PROCESSOR_SPARCLITE, MASK_ISA, MASK_SPARCLITE },
763 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
764 { "f930", PROCESSOR_F930, MASK_ISA|MASK_FPU, MASK_SPARCLITE },
765 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
766 { "f934", PROCESSOR_F934, MASK_ISA, MASK_SPARCLITE|MASK_FPU },
767 { "sparclite86x", PROCESSOR_SPARCLITE86X, MASK_ISA|MASK_FPU,
768 MASK_SPARCLITE },
769 { "sparclet", PROCESSOR_SPARCLET, MASK_ISA, MASK_SPARCLET },
770 /* TEMIC sparclet */
771 { "tsc701", PROCESSOR_TSC701, MASK_ISA, MASK_SPARCLET },
772 { "v9", PROCESSOR_V9, MASK_ISA, MASK_V9 },
773 /* UltraSPARC I, II, IIi */
774 { "ultrasparc", PROCESSOR_ULTRASPARC, MASK_ISA,
775 /* Although insns using %y are deprecated, it is a clear win. */
776 MASK_V9|MASK_DEPRECATED_V8_INSNS},
777 /* UltraSPARC III */
778 /* ??? Check if %y issue still holds true. */
779 { "ultrasparc3", PROCESSOR_ULTRASPARC3, MASK_ISA,
780 MASK_V9|MASK_DEPRECATED_V8_INSNS},
781 /* UltraSPARC T1 */
782 { "niagara", PROCESSOR_NIAGARA, MASK_ISA,
783 MASK_V9|MASK_DEPRECATED_V8_INSNS},
784 /* UltraSPARC T2 */
785 { "niagara2", PROCESSOR_NIAGARA, MASK_ISA, MASK_V9},
786 { 0, (enum processor_type) 0, 0, 0 }
787 };
788 const struct cpu_table *cpu;
789 const struct sparc_cpu_select *sel;
790 int fpu;
791
792 #ifdef SUBTARGET_OVERRIDE_OPTIONS
793 SUBTARGET_OVERRIDE_OPTIONS;
794 #endif
795
796 #ifndef SPARC_BI_ARCH
797 /* Check for unsupported architecture size. */
798 if (! TARGET_64BIT != DEFAULT_ARCH32_P)
799 error ("%s is not supported by this configuration",
800 DEFAULT_ARCH32_P ? "-m64" : "-m32");
801 #endif
802
803 /* We force all 64bit archs to use 128 bit long double */
804 if (TARGET_64BIT && ! TARGET_LONG_DOUBLE_128)
805 {
806 error ("-mlong-double-64 not allowed with -m64");
807 target_flags |= MASK_LONG_DOUBLE_128;
808 }
809
810 /* Code model selection. */
811 sparc_cmodel = SPARC_DEFAULT_CMODEL;
812
813 #ifdef SPARC_BI_ARCH
814 if (TARGET_ARCH32)
815 sparc_cmodel = CM_32;
816 #endif
817
818 if (sparc_cmodel_string != NULL)
819 {
820 if (TARGET_ARCH64)
821 {
822 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
823 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
824 break;
825 if (cmodel->name == NULL)
826 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
827 else
828 sparc_cmodel = cmodel->value;
829 }
830 else
831 error ("-mcmodel= is not supported on 32 bit systems");
832 }
833
834 fpu = target_flags & MASK_FPU; /* save current -mfpu status */
835
836 /* Set the default CPU. */
837 for (def = &cpu_default[0]; def->name; ++def)
838 if (def->cpu == TARGET_CPU_DEFAULT)
839 break;
840 gcc_assert (def->name);
841 sparc_select[0].string = def->name;
842
843 for (sel = &sparc_select[0]; sel->name; ++sel)
844 {
845 if (sel->string)
846 {
847 for (cpu = &cpu_table[0]; cpu->name; ++cpu)
848 if (! strcmp (sel->string, cpu->name))
849 {
850 if (sel->set_tune_p)
851 sparc_cpu = cpu->processor;
852
853 if (sel->set_arch_p)
854 {
855 target_flags &= ~cpu->disable;
856 target_flags |= cpu->enable;
857 }
858 break;
859 }
860
861 if (! cpu->name)
862 error ("bad value (%s) for %s switch", sel->string, sel->name);
863 }
864 }
865
866 /* If -mfpu or -mno-fpu was explicitly used, don't override with
867 the processor default. */
868 if (fpu_option_set)
869 target_flags = (target_flags & ~MASK_FPU) | fpu;
870
871 /* Don't allow -mvis if FPU is disabled. */
872 if (! TARGET_FPU)
873 target_flags &= ~MASK_VIS;
874
875 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
876 are available.
877 -m64 also implies v9. */
878 if (TARGET_VIS || TARGET_ARCH64)
879 {
880 target_flags |= MASK_V9;
881 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
882 }
883
884 /* Use the deprecated v8 insns for sparc64 in 32 bit mode. */
885 if (TARGET_V9 && TARGET_ARCH32)
886 target_flags |= MASK_DEPRECATED_V8_INSNS;
887
888 /* V8PLUS requires V9, makes no sense in 64 bit mode. */
889 if (! TARGET_V9 || TARGET_ARCH64)
890 target_flags &= ~MASK_V8PLUS;
891
892 /* Don't use stack biasing in 32 bit mode. */
893 if (TARGET_ARCH32)
894 target_flags &= ~MASK_STACK_BIAS;
895
896 /* Supply a default value for align_functions. */
897 if (align_functions == 0
898 && (sparc_cpu == PROCESSOR_ULTRASPARC
899 || sparc_cpu == PROCESSOR_ULTRASPARC3
900 || sparc_cpu == PROCESSOR_NIAGARA
901 || sparc_cpu == PROCESSOR_NIAGARA2))
902 align_functions = 32;
903
904 /* Validate PCC_STRUCT_RETURN. */
905 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
906 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
907
908 /* Only use .uaxword when compiling for a 64-bit target. */
909 if (!TARGET_ARCH64)
910 targetm.asm_out.unaligned_op.di = NULL;
911
912 /* Do various machine dependent initializations. */
913 sparc_init_modes ();
914
915 /* Acquire unique alias sets for our private stuff. */
916 sparc_sr_alias_set = new_alias_set ();
917 struct_value_alias_set = new_alias_set ();
918
919 /* Set up function hooks. */
920 init_machine_status = sparc_init_machine_status;
921
922 switch (sparc_cpu)
923 {
924 case PROCESSOR_V7:
925 case PROCESSOR_CYPRESS:
926 sparc_costs = &cypress_costs;
927 break;
928 case PROCESSOR_V8:
929 case PROCESSOR_SPARCLITE:
930 case PROCESSOR_SUPERSPARC:
931 sparc_costs = &supersparc_costs;
932 break;
933 case PROCESSOR_F930:
934 case PROCESSOR_F934:
935 case PROCESSOR_HYPERSPARC:
936 case PROCESSOR_SPARCLITE86X:
937 sparc_costs = &hypersparc_costs;
938 break;
939 case PROCESSOR_LEON:
940 sparc_costs = &leon_costs;
941 break;
942 case PROCESSOR_SPARCLET:
943 case PROCESSOR_TSC701:
944 sparc_costs = &sparclet_costs;
945 break;
946 case PROCESSOR_V9:
947 case PROCESSOR_ULTRASPARC:
948 sparc_costs = &ultrasparc_costs;
949 break;
950 case PROCESSOR_ULTRASPARC3:
951 sparc_costs = &ultrasparc3_costs;
952 break;
953 case PROCESSOR_NIAGARA:
954 sparc_costs = &niagara_costs;
955 break;
956 case PROCESSOR_NIAGARA2:
957 sparc_costs = &niagara2_costs;
958 break;
959 };
960
961 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
962 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
963 target_flags |= MASK_LONG_DOUBLE_128;
964 #endif
965
966 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
967 ((sparc_cpu == PROCESSOR_ULTRASPARC
968 || sparc_cpu == PROCESSOR_NIAGARA
969 || sparc_cpu == PROCESSOR_NIAGARA2)
970 ? 2
971 : (sparc_cpu == PROCESSOR_ULTRASPARC3
972 ? 8 : 3)),
973 global_options.x_param_values,
974 global_options_set.x_param_values);
975 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
976 ((sparc_cpu == PROCESSOR_ULTRASPARC
977 || sparc_cpu == PROCESSOR_ULTRASPARC3
978 || sparc_cpu == PROCESSOR_NIAGARA
979 || sparc_cpu == PROCESSOR_NIAGARA2)
980 ? 64 : 32),
981 global_options.x_param_values,
982 global_options_set.x_param_values);
983 }
984 \f
985 /* Miscellaneous utilities. */
986
987 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
988 or branch on register contents instructions. */
989
990 int
991 v9_regcmp_p (enum rtx_code code)
992 {
993 return (code == EQ || code == NE || code == GE || code == LT
994 || code == LE || code == GT);
995 }
996
997 /* Nonzero if OP is a floating point constant which can
998 be loaded into an integer register using a single
999 sethi instruction. */
1000
1001 int
1002 fp_sethi_p (rtx op)
1003 {
1004 if (GET_CODE (op) == CONST_DOUBLE)
1005 {
1006 REAL_VALUE_TYPE r;
1007 long i;
1008
1009 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1010 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1011 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
1012 }
1013
1014 return 0;
1015 }
1016
1017 /* Nonzero if OP is a floating point constant which can
1018 be loaded into an integer register using a single
1019 mov instruction. */
1020
1021 int
1022 fp_mov_p (rtx op)
1023 {
1024 if (GET_CODE (op) == CONST_DOUBLE)
1025 {
1026 REAL_VALUE_TYPE r;
1027 long i;
1028
1029 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1030 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1031 return SPARC_SIMM13_P (i);
1032 }
1033
1034 return 0;
1035 }
1036
1037 /* Nonzero if OP is a floating point constant which can
1038 be loaded into an integer register using a high/losum
1039 instruction sequence. */
1040
1041 int
1042 fp_high_losum_p (rtx op)
1043 {
1044 /* The constraints calling this should only be in
1045 SFmode move insns, so any constant which cannot
1046 be moved using a single insn will do. */
1047 if (GET_CODE (op) == CONST_DOUBLE)
1048 {
1049 REAL_VALUE_TYPE r;
1050 long i;
1051
1052 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1053 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1054 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
1055 }
1056
1057 return 0;
1058 }
1059
1060 /* Return true if the address of LABEL can be loaded by means of the
1061 mov{si,di}_pic_label_ref patterns in PIC mode. */
1062
1063 static bool
1064 can_use_mov_pic_label_ref (rtx label)
1065 {
1066 /* VxWorks does not impose a fixed gap between segments; the run-time
1067 gap can be different from the object-file gap. We therefore can't
1068 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
1069 are absolutely sure that X is in the same segment as the GOT.
1070 Unfortunately, the flexibility of linker scripts means that we
1071 can't be sure of that in general, so assume that GOT-relative
1072 accesses are never valid on VxWorks. */
1073 if (TARGET_VXWORKS_RTP)
1074 return false;
1075
1076 /* Similarly, if the label is non-local, it might end up being placed
1077 in a different section than the current one; now mov_pic_label_ref
1078 requires the label and the code to be in the same section. */
1079 if (LABEL_REF_NONLOCAL_P (label))
1080 return false;
1081
1082 /* Finally, if we are reordering basic blocks and partition into hot
1083 and cold sections, this might happen for any label. */
1084 if (flag_reorder_blocks_and_partition)
1085 return false;
1086
1087 return true;
1088 }
1089
1090 /* Expand a move instruction. Return true if all work is done. */
1091
1092 bool
1093 sparc_expand_move (enum machine_mode mode, rtx *operands)
1094 {
1095 /* Handle sets of MEM first. */
1096 if (GET_CODE (operands[0]) == MEM)
1097 {
1098 /* 0 is a register (or a pair of registers) on SPARC. */
1099 if (register_or_zero_operand (operands[1], mode))
1100 return false;
1101
1102 if (!reload_in_progress)
1103 {
1104 operands[0] = validize_mem (operands[0]);
1105 operands[1] = force_reg (mode, operands[1]);
1106 }
1107 }
1108
1109 /* Fixup TLS cases. */
1110 if (TARGET_HAVE_TLS
1111 && CONSTANT_P (operands[1])
1112 && sparc_tls_referenced_p (operands [1]))
1113 {
1114 operands[1] = sparc_legitimize_tls_address (operands[1]);
1115 return false;
1116 }
1117
1118 /* Fixup PIC cases. */
1119 if (flag_pic && CONSTANT_P (operands[1]))
1120 {
1121 if (pic_address_needs_scratch (operands[1]))
1122 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
1123
1124 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
1125 if (GET_CODE (operands[1]) == LABEL_REF
1126 && can_use_mov_pic_label_ref (operands[1]))
1127 {
1128 if (mode == SImode)
1129 {
1130 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
1131 return true;
1132 }
1133
1134 if (mode == DImode)
1135 {
1136 gcc_assert (TARGET_ARCH64);
1137 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
1138 return true;
1139 }
1140 }
1141
1142 if (symbolic_operand (operands[1], mode))
1143 {
1144 operands[1]
1145 = sparc_legitimize_pic_address (operands[1],
1146 reload_in_progress
1147 ? operands[0] : NULL_RTX);
1148 return false;
1149 }
1150 }
1151
1152 /* If we are trying to toss an integer constant into FP registers,
1153 or loading a FP or vector constant, force it into memory. */
1154 if (CONSTANT_P (operands[1])
1155 && REG_P (operands[0])
1156 && (SPARC_FP_REG_P (REGNO (operands[0]))
1157 || SCALAR_FLOAT_MODE_P (mode)
1158 || VECTOR_MODE_P (mode)))
1159 {
1160 /* emit_group_store will send such bogosity to us when it is
1161 not storing directly into memory. So fix this up to avoid
1162 crashes in output_constant_pool. */
1163 if (operands [1] == const0_rtx)
1164 operands[1] = CONST0_RTX (mode);
1165
1166 /* We can clear FP registers if TARGET_VIS, and always other regs. */
1167 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
1168 && const_zero_operand (operands[1], mode))
1169 return false;
1170
1171 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
1172 /* We are able to build any SF constant in integer registers
1173 with at most 2 instructions. */
1174 && (mode == SFmode
1175 /* And any DF constant in integer registers. */
1176 || (mode == DFmode
1177 && (reload_completed || reload_in_progress))))
1178 return false;
1179
1180 operands[1] = force_const_mem (mode, operands[1]);
1181 if (!reload_in_progress)
1182 operands[1] = validize_mem (operands[1]);
1183 return false;
1184 }
1185
1186 /* Accept non-constants and valid constants unmodified. */
1187 if (!CONSTANT_P (operands[1])
1188 || GET_CODE (operands[1]) == HIGH
1189 || input_operand (operands[1], mode))
1190 return false;
1191
1192 switch (mode)
1193 {
1194 case QImode:
1195 /* All QImode constants require only one insn, so proceed. */
1196 break;
1197
1198 case HImode:
1199 case SImode:
1200 sparc_emit_set_const32 (operands[0], operands[1]);
1201 return true;
1202
1203 case DImode:
1204 /* input_operand should have filtered out 32-bit mode. */
1205 sparc_emit_set_const64 (operands[0], operands[1]);
1206 return true;
1207
1208 default:
1209 gcc_unreachable ();
1210 }
1211
1212 return false;
1213 }
1214
1215 /* Load OP1, a 32-bit constant, into OP0, a register.
1216 We know it can't be done in one insn when we get
1217 here, the move expander guarantees this. */
1218
1219 static void
1220 sparc_emit_set_const32 (rtx op0, rtx op1)
1221 {
1222 enum machine_mode mode = GET_MODE (op0);
1223 rtx temp;
1224
1225 if (reload_in_progress || reload_completed)
1226 temp = op0;
1227 else
1228 temp = gen_reg_rtx (mode);
1229
1230 if (GET_CODE (op1) == CONST_INT)
1231 {
1232 gcc_assert (!small_int_operand (op1, mode)
1233 && !const_high_operand (op1, mode));
1234
1235 /* Emit them as real moves instead of a HIGH/LO_SUM,
1236 this way CSE can see everything and reuse intermediate
1237 values if it wants. */
1238 emit_insn (gen_rtx_SET (VOIDmode, temp,
1239 GEN_INT (INTVAL (op1)
1240 & ~(HOST_WIDE_INT)0x3ff)));
1241
1242 emit_insn (gen_rtx_SET (VOIDmode,
1243 op0,
1244 gen_rtx_IOR (mode, temp,
1245 GEN_INT (INTVAL (op1) & 0x3ff))));
1246 }
1247 else
1248 {
1249 /* A symbol, emit in the traditional way. */
1250 emit_insn (gen_rtx_SET (VOIDmode, temp,
1251 gen_rtx_HIGH (mode, op1)));
1252 emit_insn (gen_rtx_SET (VOIDmode,
1253 op0, gen_rtx_LO_SUM (mode, temp, op1)));
1254 }
1255 }
1256
1257 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
1258 If TEMP is nonzero, we are forbidden to use any other scratch
1259 registers. Otherwise, we are allowed to generate them as needed.
1260
1261 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
1262 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
1263
1264 void
1265 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
1266 {
1267 rtx temp1, temp2, temp3, temp4, temp5;
1268 rtx ti_temp = 0;
1269
1270 if (temp && GET_MODE (temp) == TImode)
1271 {
1272 ti_temp = temp;
1273 temp = gen_rtx_REG (DImode, REGNO (temp));
1274 }
1275
1276 /* SPARC-V9 code-model support. */
1277 switch (sparc_cmodel)
1278 {
1279 case CM_MEDLOW:
1280 /* The range spanned by all instructions in the object is less
1281 than 2^31 bytes (2GB) and the distance from any instruction
1282 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1283 than 2^31 bytes (2GB).
1284
1285 The executable must be in the low 4TB of the virtual address
1286 space.
1287
1288 sethi %hi(symbol), %temp1
1289 or %temp1, %lo(symbol), %reg */
1290 if (temp)
1291 temp1 = temp; /* op0 is allowed. */
1292 else
1293 temp1 = gen_reg_rtx (DImode);
1294
1295 emit_insn (gen_rtx_SET (VOIDmode, temp1, gen_rtx_HIGH (DImode, op1)));
1296 emit_insn (gen_rtx_SET (VOIDmode, op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
1297 break;
1298
1299 case CM_MEDMID:
1300 /* The range spanned by all instructions in the object is less
1301 than 2^31 bytes (2GB) and the distance from any instruction
1302 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1303 than 2^31 bytes (2GB).
1304
1305 The executable must be in the low 16TB of the virtual address
1306 space.
1307
1308 sethi %h44(symbol), %temp1
1309 or %temp1, %m44(symbol), %temp2
1310 sllx %temp2, 12, %temp3
1311 or %temp3, %l44(symbol), %reg */
1312 if (temp)
1313 {
1314 temp1 = op0;
1315 temp2 = op0;
1316 temp3 = temp; /* op0 is allowed. */
1317 }
1318 else
1319 {
1320 temp1 = gen_reg_rtx (DImode);
1321 temp2 = gen_reg_rtx (DImode);
1322 temp3 = gen_reg_rtx (DImode);
1323 }
1324
1325 emit_insn (gen_seth44 (temp1, op1));
1326 emit_insn (gen_setm44 (temp2, temp1, op1));
1327 emit_insn (gen_rtx_SET (VOIDmode, temp3,
1328 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
1329 emit_insn (gen_setl44 (op0, temp3, op1));
1330 break;
1331
1332 case CM_MEDANY:
1333 /* The range spanned by all instructions in the object is less
1334 than 2^31 bytes (2GB) and the distance from any instruction
1335 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1336 than 2^31 bytes (2GB).
1337
1338 The executable can be placed anywhere in the virtual address
1339 space.
1340
1341 sethi %hh(symbol), %temp1
1342 sethi %lm(symbol), %temp2
1343 or %temp1, %hm(symbol), %temp3
1344 sllx %temp3, 32, %temp4
1345 or %temp4, %temp2, %temp5
1346 or %temp5, %lo(symbol), %reg */
1347 if (temp)
1348 {
1349 /* It is possible that one of the registers we got for operands[2]
1350 might coincide with that of operands[0] (which is why we made
1351 it TImode). Pick the other one to use as our scratch. */
1352 if (rtx_equal_p (temp, op0))
1353 {
1354 gcc_assert (ti_temp);
1355 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
1356 }
1357 temp1 = op0;
1358 temp2 = temp; /* op0 is _not_ allowed, see above. */
1359 temp3 = op0;
1360 temp4 = op0;
1361 temp5 = op0;
1362 }
1363 else
1364 {
1365 temp1 = gen_reg_rtx (DImode);
1366 temp2 = gen_reg_rtx (DImode);
1367 temp3 = gen_reg_rtx (DImode);
1368 temp4 = gen_reg_rtx (DImode);
1369 temp5 = gen_reg_rtx (DImode);
1370 }
1371
1372 emit_insn (gen_sethh (temp1, op1));
1373 emit_insn (gen_setlm (temp2, op1));
1374 emit_insn (gen_sethm (temp3, temp1, op1));
1375 emit_insn (gen_rtx_SET (VOIDmode, temp4,
1376 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
1377 emit_insn (gen_rtx_SET (VOIDmode, temp5,
1378 gen_rtx_PLUS (DImode, temp4, temp2)));
1379 emit_insn (gen_setlo (op0, temp5, op1));
1380 break;
1381
1382 case CM_EMBMEDANY:
1383 /* Old old old backwards compatibility kruft here.
1384 Essentially it is MEDLOW with a fixed 64-bit
1385 virtual base added to all data segment addresses.
1386 Text-segment stuff is computed like MEDANY, we can't
1387 reuse the code above because the relocation knobs
1388 look different.
1389
1390 Data segment: sethi %hi(symbol), %temp1
1391 add %temp1, EMBMEDANY_BASE_REG, %temp2
1392 or %temp2, %lo(symbol), %reg */
1393 if (data_segment_operand (op1, GET_MODE (op1)))
1394 {
1395 if (temp)
1396 {
1397 temp1 = temp; /* op0 is allowed. */
1398 temp2 = op0;
1399 }
1400 else
1401 {
1402 temp1 = gen_reg_rtx (DImode);
1403 temp2 = gen_reg_rtx (DImode);
1404 }
1405
1406 emit_insn (gen_embmedany_sethi (temp1, op1));
1407 emit_insn (gen_embmedany_brsum (temp2, temp1));
1408 emit_insn (gen_embmedany_losum (op0, temp2, op1));
1409 }
1410
1411 /* Text segment: sethi %uhi(symbol), %temp1
1412 sethi %hi(symbol), %temp2
1413 or %temp1, %ulo(symbol), %temp3
1414 sllx %temp3, 32, %temp4
1415 or %temp4, %temp2, %temp5
1416 or %temp5, %lo(symbol), %reg */
1417 else
1418 {
1419 if (temp)
1420 {
1421 /* It is possible that one of the registers we got for operands[2]
1422 might coincide with that of operands[0] (which is why we made
1423 it TImode). Pick the other one to use as our scratch. */
1424 if (rtx_equal_p (temp, op0))
1425 {
1426 gcc_assert (ti_temp);
1427 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
1428 }
1429 temp1 = op0;
1430 temp2 = temp; /* op0 is _not_ allowed, see above. */
1431 temp3 = op0;
1432 temp4 = op0;
1433 temp5 = op0;
1434 }
1435 else
1436 {
1437 temp1 = gen_reg_rtx (DImode);
1438 temp2 = gen_reg_rtx (DImode);
1439 temp3 = gen_reg_rtx (DImode);
1440 temp4 = gen_reg_rtx (DImode);
1441 temp5 = gen_reg_rtx (DImode);
1442 }
1443
1444 emit_insn (gen_embmedany_textuhi (temp1, op1));
1445 emit_insn (gen_embmedany_texthi (temp2, op1));
1446 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
1447 emit_insn (gen_rtx_SET (VOIDmode, temp4,
1448 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
1449 emit_insn (gen_rtx_SET (VOIDmode, temp5,
1450 gen_rtx_PLUS (DImode, temp4, temp2)));
1451 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
1452 }
1453 break;
1454
1455 default:
1456 gcc_unreachable ();
1457 }
1458 }
1459
1460 #if HOST_BITS_PER_WIDE_INT == 32
1461 static void
1462 sparc_emit_set_const64 (rtx op0 ATTRIBUTE_UNUSED, rtx op1 ATTRIBUTE_UNUSED)
1463 {
1464 gcc_unreachable ();
1465 }
1466 #else
1467 /* These avoid problems when cross compiling. If we do not
1468 go through all this hair then the optimizer will see
1469 invalid REG_EQUAL notes or in some cases none at all. */
1470 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
1471 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
1472 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
1473 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
1474
1475 /* The optimizer is not to assume anything about exactly
1476 which bits are set for a HIGH, they are unspecified.
1477 Unfortunately this leads to many missed optimizations
1478 during CSE. We mask out the non-HIGH bits, and matches
1479 a plain movdi, to alleviate this problem. */
1480 static rtx
1481 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
1482 {
1483 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
1484 }
1485
1486 static rtx
1487 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
1488 {
1489 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val));
1490 }
1491
1492 static rtx
1493 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
1494 {
1495 return gen_rtx_IOR (DImode, src, GEN_INT (val));
1496 }
1497
1498 static rtx
1499 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
1500 {
1501 return gen_rtx_XOR (DImode, src, GEN_INT (val));
1502 }
1503
1504 /* Worker routines for 64-bit constant formation on arch64.
1505 One of the key things to be doing in these emissions is
1506 to create as many temp REGs as possible. This makes it
1507 possible for half-built constants to be used later when
1508 such values are similar to something required later on.
1509 Without doing this, the optimizer cannot see such
1510 opportunities. */
1511
1512 static void sparc_emit_set_const64_quick1 (rtx, rtx,
1513 unsigned HOST_WIDE_INT, int);
1514
1515 static void
1516 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
1517 unsigned HOST_WIDE_INT low_bits, int is_neg)
1518 {
1519 unsigned HOST_WIDE_INT high_bits;
1520
1521 if (is_neg)
1522 high_bits = (~low_bits) & 0xffffffff;
1523 else
1524 high_bits = low_bits;
1525
1526 emit_insn (gen_safe_HIGH64 (temp, high_bits));
1527 if (!is_neg)
1528 {
1529 emit_insn (gen_rtx_SET (VOIDmode, op0,
1530 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1531 }
1532 else
1533 {
1534 /* If we are XOR'ing with -1, then we should emit a one's complement
1535 instead. This way the combiner will notice logical operations
1536 such as ANDN later on and substitute. */
1537 if ((low_bits & 0x3ff) == 0x3ff)
1538 {
1539 emit_insn (gen_rtx_SET (VOIDmode, op0,
1540 gen_rtx_NOT (DImode, temp)));
1541 }
1542 else
1543 {
1544 emit_insn (gen_rtx_SET (VOIDmode, op0,
1545 gen_safe_XOR64 (temp,
1546 (-(HOST_WIDE_INT)0x400
1547 | (low_bits & 0x3ff)))));
1548 }
1549 }
1550 }
1551
1552 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
1553 unsigned HOST_WIDE_INT, int);
1554
1555 static void
1556 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
1557 unsigned HOST_WIDE_INT high_bits,
1558 unsigned HOST_WIDE_INT low_immediate,
1559 int shift_count)
1560 {
1561 rtx temp2 = op0;
1562
1563 if ((high_bits & 0xfffffc00) != 0)
1564 {
1565 emit_insn (gen_safe_HIGH64 (temp, high_bits));
1566 if ((high_bits & ~0xfffffc00) != 0)
1567 emit_insn (gen_rtx_SET (VOIDmode, op0,
1568 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1569 else
1570 temp2 = temp;
1571 }
1572 else
1573 {
1574 emit_insn (gen_safe_SET64 (temp, high_bits));
1575 temp2 = temp;
1576 }
1577
1578 /* Now shift it up into place. */
1579 emit_insn (gen_rtx_SET (VOIDmode, op0,
1580 gen_rtx_ASHIFT (DImode, temp2,
1581 GEN_INT (shift_count))));
1582
1583 /* If there is a low immediate part piece, finish up by
1584 putting that in as well. */
1585 if (low_immediate != 0)
1586 emit_insn (gen_rtx_SET (VOIDmode, op0,
1587 gen_safe_OR64 (op0, low_immediate)));
1588 }
1589
1590 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
1591 unsigned HOST_WIDE_INT);
1592
1593 /* Full 64-bit constant decomposition. Even though this is the
1594 'worst' case, we still optimize a few things away. */
1595 static void
1596 sparc_emit_set_const64_longway (rtx op0, rtx temp,
1597 unsigned HOST_WIDE_INT high_bits,
1598 unsigned HOST_WIDE_INT low_bits)
1599 {
1600 rtx sub_temp;
1601
1602 if (reload_in_progress || reload_completed)
1603 sub_temp = op0;
1604 else
1605 sub_temp = gen_reg_rtx (DImode);
1606
1607 if ((high_bits & 0xfffffc00) != 0)
1608 {
1609 emit_insn (gen_safe_HIGH64 (temp, high_bits));
1610 if ((high_bits & ~0xfffffc00) != 0)
1611 emit_insn (gen_rtx_SET (VOIDmode,
1612 sub_temp,
1613 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1614 else
1615 sub_temp = temp;
1616 }
1617 else
1618 {
1619 emit_insn (gen_safe_SET64 (temp, high_bits));
1620 sub_temp = temp;
1621 }
1622
1623 if (!reload_in_progress && !reload_completed)
1624 {
1625 rtx temp2 = gen_reg_rtx (DImode);
1626 rtx temp3 = gen_reg_rtx (DImode);
1627 rtx temp4 = gen_reg_rtx (DImode);
1628
1629 emit_insn (gen_rtx_SET (VOIDmode, temp4,
1630 gen_rtx_ASHIFT (DImode, sub_temp,
1631 GEN_INT (32))));
1632
1633 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
1634 if ((low_bits & ~0xfffffc00) != 0)
1635 {
1636 emit_insn (gen_rtx_SET (VOIDmode, temp3,
1637 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
1638 emit_insn (gen_rtx_SET (VOIDmode, op0,
1639 gen_rtx_PLUS (DImode, temp4, temp3)));
1640 }
1641 else
1642 {
1643 emit_insn (gen_rtx_SET (VOIDmode, op0,
1644 gen_rtx_PLUS (DImode, temp4, temp2)));
1645 }
1646 }
1647 else
1648 {
1649 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
1650 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
1651 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
1652 int to_shift = 12;
1653
1654 /* We are in the middle of reload, so this is really
1655 painful. However we do still make an attempt to
1656 avoid emitting truly stupid code. */
1657 if (low1 != const0_rtx)
1658 {
1659 emit_insn (gen_rtx_SET (VOIDmode, op0,
1660 gen_rtx_ASHIFT (DImode, sub_temp,
1661 GEN_INT (to_shift))));
1662 emit_insn (gen_rtx_SET (VOIDmode, op0,
1663 gen_rtx_IOR (DImode, op0, low1)));
1664 sub_temp = op0;
1665 to_shift = 12;
1666 }
1667 else
1668 {
1669 to_shift += 12;
1670 }
1671 if (low2 != const0_rtx)
1672 {
1673 emit_insn (gen_rtx_SET (VOIDmode, op0,
1674 gen_rtx_ASHIFT (DImode, sub_temp,
1675 GEN_INT (to_shift))));
1676 emit_insn (gen_rtx_SET (VOIDmode, op0,
1677 gen_rtx_IOR (DImode, op0, low2)));
1678 sub_temp = op0;
1679 to_shift = 8;
1680 }
1681 else
1682 {
1683 to_shift += 8;
1684 }
1685 emit_insn (gen_rtx_SET (VOIDmode, op0,
1686 gen_rtx_ASHIFT (DImode, sub_temp,
1687 GEN_INT (to_shift))));
1688 if (low3 != const0_rtx)
1689 emit_insn (gen_rtx_SET (VOIDmode, op0,
1690 gen_rtx_IOR (DImode, op0, low3)));
1691 /* phew... */
1692 }
1693 }
1694
1695 /* Analyze a 64-bit constant for certain properties. */
1696 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
1697 unsigned HOST_WIDE_INT,
1698 int *, int *, int *);
1699
1700 static void
1701 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
1702 unsigned HOST_WIDE_INT low_bits,
1703 int *hbsp, int *lbsp, int *abbasp)
1704 {
1705 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
1706 int i;
1707
1708 lowest_bit_set = highest_bit_set = -1;
1709 i = 0;
1710 do
1711 {
1712 if ((lowest_bit_set == -1)
1713 && ((low_bits >> i) & 1))
1714 lowest_bit_set = i;
1715 if ((highest_bit_set == -1)
1716 && ((high_bits >> (32 - i - 1)) & 1))
1717 highest_bit_set = (64 - i - 1);
1718 }
1719 while (++i < 32
1720 && ((highest_bit_set == -1)
1721 || (lowest_bit_set == -1)));
1722 if (i == 32)
1723 {
1724 i = 0;
1725 do
1726 {
1727 if ((lowest_bit_set == -1)
1728 && ((high_bits >> i) & 1))
1729 lowest_bit_set = i + 32;
1730 if ((highest_bit_set == -1)
1731 && ((low_bits >> (32 - i - 1)) & 1))
1732 highest_bit_set = 32 - i - 1;
1733 }
1734 while (++i < 32
1735 && ((highest_bit_set == -1)
1736 || (lowest_bit_set == -1)));
1737 }
1738 /* If there are no bits set this should have gone out
1739 as one instruction! */
1740 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
1741 all_bits_between_are_set = 1;
1742 for (i = lowest_bit_set; i <= highest_bit_set; i++)
1743 {
1744 if (i < 32)
1745 {
1746 if ((low_bits & (1 << i)) != 0)
1747 continue;
1748 }
1749 else
1750 {
1751 if ((high_bits & (1 << (i - 32))) != 0)
1752 continue;
1753 }
1754 all_bits_between_are_set = 0;
1755 break;
1756 }
1757 *hbsp = highest_bit_set;
1758 *lbsp = lowest_bit_set;
1759 *abbasp = all_bits_between_are_set;
1760 }
1761
1762 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
1763
1764 static int
1765 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
1766 unsigned HOST_WIDE_INT low_bits)
1767 {
1768 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
1769
1770 if (high_bits == 0
1771 || high_bits == 0xffffffff)
1772 return 1;
1773
1774 analyze_64bit_constant (high_bits, low_bits,
1775 &highest_bit_set, &lowest_bit_set,
1776 &all_bits_between_are_set);
1777
1778 if ((highest_bit_set == 63
1779 || lowest_bit_set == 0)
1780 && all_bits_between_are_set != 0)
1781 return 1;
1782
1783 if ((highest_bit_set - lowest_bit_set) < 21)
1784 return 1;
1785
1786 return 0;
1787 }
1788
1789 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
1790 unsigned HOST_WIDE_INT,
1791 int, int);
1792
1793 static unsigned HOST_WIDE_INT
1794 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
1795 unsigned HOST_WIDE_INT low_bits,
1796 int lowest_bit_set, int shift)
1797 {
1798 HOST_WIDE_INT hi, lo;
1799
1800 if (lowest_bit_set < 32)
1801 {
1802 lo = (low_bits >> lowest_bit_set) << shift;
1803 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
1804 }
1805 else
1806 {
1807 lo = 0;
1808 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
1809 }
1810 gcc_assert (! (hi & lo));
1811 return (hi | lo);
1812 }
1813
1814 /* Here we are sure to be arch64 and this is an integer constant
1815 being loaded into a register. Emit the most efficient
1816 insn sequence possible. Detection of all the 1-insn cases
1817 has been done already. */
1818 static void
1819 sparc_emit_set_const64 (rtx op0, rtx op1)
1820 {
1821 unsigned HOST_WIDE_INT high_bits, low_bits;
1822 int lowest_bit_set, highest_bit_set;
1823 int all_bits_between_are_set;
1824 rtx temp = 0;
1825
1826 /* Sanity check that we know what we are working with. */
1827 gcc_assert (TARGET_ARCH64
1828 && (GET_CODE (op0) == SUBREG
1829 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
1830
1831 if (reload_in_progress || reload_completed)
1832 temp = op0;
1833
1834 if (GET_CODE (op1) != CONST_INT)
1835 {
1836 sparc_emit_set_symbolic_const64 (op0, op1, temp);
1837 return;
1838 }
1839
1840 if (! temp)
1841 temp = gen_reg_rtx (DImode);
1842
1843 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
1844 low_bits = (INTVAL (op1) & 0xffffffff);
1845
1846 /* low_bits bits 0 --> 31
1847 high_bits bits 32 --> 63 */
1848
1849 analyze_64bit_constant (high_bits, low_bits,
1850 &highest_bit_set, &lowest_bit_set,
1851 &all_bits_between_are_set);
1852
1853 /* First try for a 2-insn sequence. */
1854
1855 /* These situations are preferred because the optimizer can
1856 * do more things with them:
1857 * 1) mov -1, %reg
1858 * sllx %reg, shift, %reg
1859 * 2) mov -1, %reg
1860 * srlx %reg, shift, %reg
1861 * 3) mov some_small_const, %reg
1862 * sllx %reg, shift, %reg
1863 */
1864 if (((highest_bit_set == 63
1865 || lowest_bit_set == 0)
1866 && all_bits_between_are_set != 0)
1867 || ((highest_bit_set - lowest_bit_set) < 12))
1868 {
1869 HOST_WIDE_INT the_const = -1;
1870 int shift = lowest_bit_set;
1871
1872 if ((highest_bit_set != 63
1873 && lowest_bit_set != 0)
1874 || all_bits_between_are_set == 0)
1875 {
1876 the_const =
1877 create_simple_focus_bits (high_bits, low_bits,
1878 lowest_bit_set, 0);
1879 }
1880 else if (lowest_bit_set == 0)
1881 shift = -(63 - highest_bit_set);
1882
1883 gcc_assert (SPARC_SIMM13_P (the_const));
1884 gcc_assert (shift != 0);
1885
1886 emit_insn (gen_safe_SET64 (temp, the_const));
1887 if (shift > 0)
1888 emit_insn (gen_rtx_SET (VOIDmode,
1889 op0,
1890 gen_rtx_ASHIFT (DImode,
1891 temp,
1892 GEN_INT (shift))));
1893 else if (shift < 0)
1894 emit_insn (gen_rtx_SET (VOIDmode,
1895 op0,
1896 gen_rtx_LSHIFTRT (DImode,
1897 temp,
1898 GEN_INT (-shift))));
1899 return;
1900 }
1901
1902 /* Now a range of 22 or less bits set somewhere.
1903 * 1) sethi %hi(focus_bits), %reg
1904 * sllx %reg, shift, %reg
1905 * 2) sethi %hi(focus_bits), %reg
1906 * srlx %reg, shift, %reg
1907 */
1908 if ((highest_bit_set - lowest_bit_set) < 21)
1909 {
1910 unsigned HOST_WIDE_INT focus_bits =
1911 create_simple_focus_bits (high_bits, low_bits,
1912 lowest_bit_set, 10);
1913
1914 gcc_assert (SPARC_SETHI_P (focus_bits));
1915 gcc_assert (lowest_bit_set != 10);
1916
1917 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
1918
1919 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
1920 if (lowest_bit_set < 10)
1921 emit_insn (gen_rtx_SET (VOIDmode,
1922 op0,
1923 gen_rtx_LSHIFTRT (DImode, temp,
1924 GEN_INT (10 - lowest_bit_set))));
1925 else if (lowest_bit_set > 10)
1926 emit_insn (gen_rtx_SET (VOIDmode,
1927 op0,
1928 gen_rtx_ASHIFT (DImode, temp,
1929 GEN_INT (lowest_bit_set - 10))));
1930 return;
1931 }
1932
1933 /* 1) sethi %hi(low_bits), %reg
1934 * or %reg, %lo(low_bits), %reg
1935 * 2) sethi %hi(~low_bits), %reg
1936 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
1937 */
1938 if (high_bits == 0
1939 || high_bits == 0xffffffff)
1940 {
1941 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
1942 (high_bits == 0xffffffff));
1943 return;
1944 }
1945
1946 /* Now, try 3-insn sequences. */
1947
1948 /* 1) sethi %hi(high_bits), %reg
1949 * or %reg, %lo(high_bits), %reg
1950 * sllx %reg, 32, %reg
1951 */
1952 if (low_bits == 0)
1953 {
1954 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
1955 return;
1956 }
1957
1958 /* We may be able to do something quick
1959 when the constant is negated, so try that. */
1960 if (const64_is_2insns ((~high_bits) & 0xffffffff,
1961 (~low_bits) & 0xfffffc00))
1962 {
1963 /* NOTE: The trailing bits get XOR'd so we need the
1964 non-negated bits, not the negated ones. */
1965 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
1966
1967 if ((((~high_bits) & 0xffffffff) == 0
1968 && ((~low_bits) & 0x80000000) == 0)
1969 || (((~high_bits) & 0xffffffff) == 0xffffffff
1970 && ((~low_bits) & 0x80000000) != 0))
1971 {
1972 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
1973
1974 if ((SPARC_SETHI_P (fast_int)
1975 && (~high_bits & 0xffffffff) == 0)
1976 || SPARC_SIMM13_P (fast_int))
1977 emit_insn (gen_safe_SET64 (temp, fast_int));
1978 else
1979 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
1980 }
1981 else
1982 {
1983 rtx negated_const;
1984 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
1985 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
1986 sparc_emit_set_const64 (temp, negated_const);
1987 }
1988
1989 /* If we are XOR'ing with -1, then we should emit a one's complement
1990 instead. This way the combiner will notice logical operations
1991 such as ANDN later on and substitute. */
1992 if (trailing_bits == 0x3ff)
1993 {
1994 emit_insn (gen_rtx_SET (VOIDmode, op0,
1995 gen_rtx_NOT (DImode, temp)));
1996 }
1997 else
1998 {
1999 emit_insn (gen_rtx_SET (VOIDmode,
2000 op0,
2001 gen_safe_XOR64 (temp,
2002 (-0x400 | trailing_bits))));
2003 }
2004 return;
2005 }
2006
2007 /* 1) sethi %hi(xxx), %reg
2008 * or %reg, %lo(xxx), %reg
2009 * sllx %reg, yyy, %reg
2010 *
2011 * ??? This is just a generalized version of the low_bits==0
2012 * thing above, FIXME...
2013 */
2014 if ((highest_bit_set - lowest_bit_set) < 32)
2015 {
2016 unsigned HOST_WIDE_INT focus_bits =
2017 create_simple_focus_bits (high_bits, low_bits,
2018 lowest_bit_set, 0);
2019
2020 /* We can't get here in this state. */
2021 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
2022
2023 /* So what we know is that the set bits straddle the
2024 middle of the 64-bit word. */
2025 sparc_emit_set_const64_quick2 (op0, temp,
2026 focus_bits, 0,
2027 lowest_bit_set);
2028 return;
2029 }
2030
2031 /* 1) sethi %hi(high_bits), %reg
2032 * or %reg, %lo(high_bits), %reg
2033 * sllx %reg, 32, %reg
2034 * or %reg, low_bits, %reg
2035 */
2036 if (SPARC_SIMM13_P(low_bits)
2037 && ((int)low_bits > 0))
2038 {
2039 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
2040 return;
2041 }
2042
2043 /* The easiest way when all else fails, is full decomposition. */
2044 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2045 }
2046 #endif /* HOST_BITS_PER_WIDE_INT == 32 */
2047
2048 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
2049 return the mode to be used for the comparison. For floating-point,
2050 CCFP[E]mode is used. CC_NOOVmode should be used when the first operand
2051 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
2052 processing is needed. */
2053
2054 enum machine_mode
2055 select_cc_mode (enum rtx_code op, rtx x, rtx y ATTRIBUTE_UNUSED)
2056 {
2057 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2058 {
2059 switch (op)
2060 {
2061 case EQ:
2062 case NE:
2063 case UNORDERED:
2064 case ORDERED:
2065 case UNLT:
2066 case UNLE:
2067 case UNGT:
2068 case UNGE:
2069 case UNEQ:
2070 case LTGT:
2071 return CCFPmode;
2072
2073 case LT:
2074 case LE:
2075 case GT:
2076 case GE:
2077 return CCFPEmode;
2078
2079 default:
2080 gcc_unreachable ();
2081 }
2082 }
2083 else if (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
2084 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
2085 {
2086 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2087 return CCX_NOOVmode;
2088 else
2089 return CC_NOOVmode;
2090 }
2091 else
2092 {
2093 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2094 return CCXmode;
2095 else
2096 return CCmode;
2097 }
2098 }
2099
2100 /* Emit the compare insn and return the CC reg for a CODE comparison
2101 with operands X and Y. */
2102
2103 static rtx
2104 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
2105 {
2106 enum machine_mode mode;
2107 rtx cc_reg;
2108
2109 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
2110 return x;
2111
2112 mode = SELECT_CC_MODE (code, x, y);
2113
2114 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2115 fcc regs (cse can't tell they're really call clobbered regs and will
2116 remove a duplicate comparison even if there is an intervening function
2117 call - it will then try to reload the cc reg via an int reg which is why
2118 we need the movcc patterns). It is possible to provide the movcc
2119 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
2120 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
2121 to tell cse that CCFPE mode registers (even pseudos) are call
2122 clobbered. */
2123
2124 /* ??? This is an experiment. Rather than making changes to cse which may
2125 or may not be easy/clean, we do our own cse. This is possible because
2126 we will generate hard registers. Cse knows they're call clobbered (it
2127 doesn't know the same thing about pseudos). If we guess wrong, no big
2128 deal, but if we win, great! */
2129
2130 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2131 #if 1 /* experiment */
2132 {
2133 int reg;
2134 /* We cycle through the registers to ensure they're all exercised. */
2135 static int next_fcc_reg = 0;
2136 /* Previous x,y for each fcc reg. */
2137 static rtx prev_args[4][2];
2138
2139 /* Scan prev_args for x,y. */
2140 for (reg = 0; reg < 4; reg++)
2141 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2142 break;
2143 if (reg == 4)
2144 {
2145 reg = next_fcc_reg;
2146 prev_args[reg][0] = x;
2147 prev_args[reg][1] = y;
2148 next_fcc_reg = (next_fcc_reg + 1) & 3;
2149 }
2150 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2151 }
2152 #else
2153 cc_reg = gen_reg_rtx (mode);
2154 #endif /* ! experiment */
2155 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2156 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2157 else
2158 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2159
2160 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
2161 will only result in an unrecognizable insn so no point in asserting. */
2162 emit_insn (gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y)));
2163
2164 return cc_reg;
2165 }
2166
2167
2168 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
2169
2170 rtx
2171 gen_compare_reg (rtx cmp)
2172 {
2173 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
2174 }
2175
2176 /* This function is used for v9 only.
2177 DEST is the target of the Scc insn.
2178 CODE is the code for an Scc's comparison.
2179 X and Y are the values we compare.
2180
2181 This function is needed to turn
2182
2183 (set (reg:SI 110)
2184 (gt (reg:CCX 100 %icc)
2185 (const_int 0)))
2186 into
2187 (set (reg:SI 110)
2188 (gt:DI (reg:CCX 100 %icc)
2189 (const_int 0)))
2190
2191 IE: The instruction recognizer needs to see the mode of the comparison to
2192 find the right instruction. We could use "gt:DI" right in the
2193 define_expand, but leaving it out allows us to handle DI, SI, etc. */
2194
2195 static int
2196 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
2197 {
2198 if (! TARGET_ARCH64
2199 && (GET_MODE (x) == DImode
2200 || GET_MODE (dest) == DImode))
2201 return 0;
2202
2203 /* Try to use the movrCC insns. */
2204 if (TARGET_ARCH64
2205 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
2206 && y == const0_rtx
2207 && v9_regcmp_p (compare_code))
2208 {
2209 rtx op0 = x;
2210 rtx temp;
2211
2212 /* Special case for op0 != 0. This can be done with one instruction if
2213 dest == x. */
2214
2215 if (compare_code == NE
2216 && GET_MODE (dest) == DImode
2217 && rtx_equal_p (op0, dest))
2218 {
2219 emit_insn (gen_rtx_SET (VOIDmode, dest,
2220 gen_rtx_IF_THEN_ELSE (DImode,
2221 gen_rtx_fmt_ee (compare_code, DImode,
2222 op0, const0_rtx),
2223 const1_rtx,
2224 dest)));
2225 return 1;
2226 }
2227
2228 if (reg_overlap_mentioned_p (dest, op0))
2229 {
2230 /* Handle the case where dest == x.
2231 We "early clobber" the result. */
2232 op0 = gen_reg_rtx (GET_MODE (x));
2233 emit_move_insn (op0, x);
2234 }
2235
2236 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2237 if (GET_MODE (op0) != DImode)
2238 {
2239 temp = gen_reg_rtx (DImode);
2240 convert_move (temp, op0, 0);
2241 }
2242 else
2243 temp = op0;
2244 emit_insn (gen_rtx_SET (VOIDmode, dest,
2245 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2246 gen_rtx_fmt_ee (compare_code, DImode,
2247 temp, const0_rtx),
2248 const1_rtx,
2249 dest)));
2250 return 1;
2251 }
2252 else
2253 {
2254 x = gen_compare_reg_1 (compare_code, x, y);
2255 y = const0_rtx;
2256
2257 gcc_assert (GET_MODE (x) != CC_NOOVmode
2258 && GET_MODE (x) != CCX_NOOVmode);
2259
2260 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2261 emit_insn (gen_rtx_SET (VOIDmode, dest,
2262 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2263 gen_rtx_fmt_ee (compare_code,
2264 GET_MODE (x), x, y),
2265 const1_rtx, dest)));
2266 return 1;
2267 }
2268 }
2269
2270
2271 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
2272 without jumps using the addx/subx instructions. */
2273
2274 bool
2275 emit_scc_insn (rtx operands[])
2276 {
2277 rtx tem;
2278 rtx x;
2279 rtx y;
2280 enum rtx_code code;
2281
2282 /* The quad-word fp compare library routines all return nonzero to indicate
2283 true, which is different from the equivalent libgcc routines, so we must
2284 handle them specially here. */
2285 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
2286 {
2287 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
2288 GET_CODE (operands[1]));
2289 operands[2] = XEXP (operands[1], 0);
2290 operands[3] = XEXP (operands[1], 1);
2291 }
2292
2293 code = GET_CODE (operands[1]);
2294 x = operands[2];
2295 y = operands[3];
2296
2297 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
2298 more applications). The exception to this is "reg != 0" which can
2299 be done in one instruction on v9 (so we do it). */
2300 if (code == EQ)
2301 {
2302 if (GET_MODE (x) == SImode)
2303 {
2304 rtx pat = gen_seqsi_special (operands[0], x, y);
2305 emit_insn (pat);
2306 return true;
2307 }
2308 else if (GET_MODE (x) == DImode)
2309 {
2310 rtx pat = gen_seqdi_special (operands[0], x, y);
2311 emit_insn (pat);
2312 return true;
2313 }
2314 }
2315
2316 if (code == NE)
2317 {
2318 if (GET_MODE (x) == SImode)
2319 {
2320 rtx pat = gen_snesi_special (operands[0], x, y);
2321 emit_insn (pat);
2322 return true;
2323 }
2324 else if (GET_MODE (x) == DImode)
2325 {
2326 rtx pat = gen_snedi_special (operands[0], x, y);
2327 emit_insn (pat);
2328 return true;
2329 }
2330 }
2331
2332 /* For the rest, on v9 we can use conditional moves. */
2333
2334 if (TARGET_V9)
2335 {
2336 if (gen_v9_scc (operands[0], code, x, y))
2337 return true;
2338 }
2339
2340 /* We can do LTU and GEU using the addx/subx instructions too. And
2341 for GTU/LEU, if both operands are registers swap them and fall
2342 back to the easy case. */
2343 if (code == GTU || code == LEU)
2344 {
2345 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
2346 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
2347 {
2348 tem = x;
2349 x = y;
2350 y = tem;
2351 code = swap_condition (code);
2352 }
2353 }
2354
2355 if (code == LTU || code == GEU)
2356 {
2357 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2358 gen_rtx_fmt_ee (code, SImode,
2359 gen_compare_reg_1 (code, x, y),
2360 const0_rtx)));
2361 return true;
2362 }
2363
2364 /* Nope, do branches. */
2365 return false;
2366 }
2367
2368 /* Emit a conditional jump insn for the v9 architecture using comparison code
2369 CODE and jump target LABEL.
2370 This function exists to take advantage of the v9 brxx insns. */
2371
2372 static void
2373 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
2374 {
2375 emit_jump_insn (gen_rtx_SET (VOIDmode,
2376 pc_rtx,
2377 gen_rtx_IF_THEN_ELSE (VOIDmode,
2378 gen_rtx_fmt_ee (code, GET_MODE (op0),
2379 op0, const0_rtx),
2380 gen_rtx_LABEL_REF (VOIDmode, label),
2381 pc_rtx)));
2382 }
2383
2384 void
2385 emit_conditional_branch_insn (rtx operands[])
2386 {
2387 /* The quad-word fp compare library routines all return nonzero to indicate
2388 true, which is different from the equivalent libgcc routines, so we must
2389 handle them specially here. */
2390 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
2391 {
2392 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
2393 GET_CODE (operands[0]));
2394 operands[1] = XEXP (operands[0], 0);
2395 operands[2] = XEXP (operands[0], 1);
2396 }
2397
2398 if (TARGET_ARCH64 && operands[2] == const0_rtx
2399 && GET_CODE (operands[1]) == REG
2400 && GET_MODE (operands[1]) == DImode)
2401 {
2402 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
2403 return;
2404 }
2405
2406 operands[1] = gen_compare_reg (operands[0]);
2407 operands[2] = const0_rtx;
2408 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
2409 operands[1], operands[2]);
2410 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
2411 operands[3]));
2412 }
2413
2414
2415 /* Generate a DFmode part of a hard TFmode register.
2416 REG is the TFmode hard register, LOW is 1 for the
2417 low 64bit of the register and 0 otherwise.
2418 */
2419 rtx
2420 gen_df_reg (rtx reg, int low)
2421 {
2422 int regno = REGNO (reg);
2423
2424 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
2425 regno += (TARGET_ARCH64 && regno < 32) ? 1 : 2;
2426 return gen_rtx_REG (DFmode, regno);
2427 }
2428 \f
2429 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
2430 Unlike normal calls, TFmode operands are passed by reference. It is
2431 assumed that no more than 3 operands are required. */
2432
2433 static void
2434 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
2435 {
2436 rtx ret_slot = NULL, arg[3], func_sym;
2437 int i;
2438
2439 /* We only expect to be called for conversions, unary, and binary ops. */
2440 gcc_assert (nargs == 2 || nargs == 3);
2441
2442 for (i = 0; i < nargs; ++i)
2443 {
2444 rtx this_arg = operands[i];
2445 rtx this_slot;
2446
2447 /* TFmode arguments and return values are passed by reference. */
2448 if (GET_MODE (this_arg) == TFmode)
2449 {
2450 int force_stack_temp;
2451
2452 force_stack_temp = 0;
2453 if (TARGET_BUGGY_QP_LIB && i == 0)
2454 force_stack_temp = 1;
2455
2456 if (GET_CODE (this_arg) == MEM
2457 && ! force_stack_temp)
2458 this_arg = XEXP (this_arg, 0);
2459 else if (CONSTANT_P (this_arg)
2460 && ! force_stack_temp)
2461 {
2462 this_slot = force_const_mem (TFmode, this_arg);
2463 this_arg = XEXP (this_slot, 0);
2464 }
2465 else
2466 {
2467 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode), 0);
2468
2469 /* Operand 0 is the return value. We'll copy it out later. */
2470 if (i > 0)
2471 emit_move_insn (this_slot, this_arg);
2472 else
2473 ret_slot = this_slot;
2474
2475 this_arg = XEXP (this_slot, 0);
2476 }
2477 }
2478
2479 arg[i] = this_arg;
2480 }
2481
2482 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
2483
2484 if (GET_MODE (operands[0]) == TFmode)
2485 {
2486 if (nargs == 2)
2487 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2,
2488 arg[0], GET_MODE (arg[0]),
2489 arg[1], GET_MODE (arg[1]));
2490 else
2491 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3,
2492 arg[0], GET_MODE (arg[0]),
2493 arg[1], GET_MODE (arg[1]),
2494 arg[2], GET_MODE (arg[2]));
2495
2496 if (ret_slot)
2497 emit_move_insn (operands[0], ret_slot);
2498 }
2499 else
2500 {
2501 rtx ret;
2502
2503 gcc_assert (nargs == 2);
2504
2505 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
2506 GET_MODE (operands[0]), 1,
2507 arg[1], GET_MODE (arg[1]));
2508
2509 if (ret != operands[0])
2510 emit_move_insn (operands[0], ret);
2511 }
2512 }
2513
2514 /* Expand soft-float TFmode calls to sparc abi routines. */
2515
2516 static void
2517 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
2518 {
2519 const char *func;
2520
2521 switch (code)
2522 {
2523 case PLUS:
2524 func = "_Qp_add";
2525 break;
2526 case MINUS:
2527 func = "_Qp_sub";
2528 break;
2529 case MULT:
2530 func = "_Qp_mul";
2531 break;
2532 case DIV:
2533 func = "_Qp_div";
2534 break;
2535 default:
2536 gcc_unreachable ();
2537 }
2538
2539 emit_soft_tfmode_libcall (func, 3, operands);
2540 }
2541
2542 static void
2543 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
2544 {
2545 const char *func;
2546
2547 gcc_assert (code == SQRT);
2548 func = "_Qp_sqrt";
2549
2550 emit_soft_tfmode_libcall (func, 2, operands);
2551 }
2552
2553 static void
2554 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
2555 {
2556 const char *func;
2557
2558 switch (code)
2559 {
2560 case FLOAT_EXTEND:
2561 switch (GET_MODE (operands[1]))
2562 {
2563 case SFmode:
2564 func = "_Qp_stoq";
2565 break;
2566 case DFmode:
2567 func = "_Qp_dtoq";
2568 break;
2569 default:
2570 gcc_unreachable ();
2571 }
2572 break;
2573
2574 case FLOAT_TRUNCATE:
2575 switch (GET_MODE (operands[0]))
2576 {
2577 case SFmode:
2578 func = "_Qp_qtos";
2579 break;
2580 case DFmode:
2581 func = "_Qp_qtod";
2582 break;
2583 default:
2584 gcc_unreachable ();
2585 }
2586 break;
2587
2588 case FLOAT:
2589 switch (GET_MODE (operands[1]))
2590 {
2591 case SImode:
2592 func = "_Qp_itoq";
2593 if (TARGET_ARCH64)
2594 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
2595 break;
2596 case DImode:
2597 func = "_Qp_xtoq";
2598 break;
2599 default:
2600 gcc_unreachable ();
2601 }
2602 break;
2603
2604 case UNSIGNED_FLOAT:
2605 switch (GET_MODE (operands[1]))
2606 {
2607 case SImode:
2608 func = "_Qp_uitoq";
2609 if (TARGET_ARCH64)
2610 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
2611 break;
2612 case DImode:
2613 func = "_Qp_uxtoq";
2614 break;
2615 default:
2616 gcc_unreachable ();
2617 }
2618 break;
2619
2620 case FIX:
2621 switch (GET_MODE (operands[0]))
2622 {
2623 case SImode:
2624 func = "_Qp_qtoi";
2625 break;
2626 case DImode:
2627 func = "_Qp_qtox";
2628 break;
2629 default:
2630 gcc_unreachable ();
2631 }
2632 break;
2633
2634 case UNSIGNED_FIX:
2635 switch (GET_MODE (operands[0]))
2636 {
2637 case SImode:
2638 func = "_Qp_qtoui";
2639 break;
2640 case DImode:
2641 func = "_Qp_qtoux";
2642 break;
2643 default:
2644 gcc_unreachable ();
2645 }
2646 break;
2647
2648 default:
2649 gcc_unreachable ();
2650 }
2651
2652 emit_soft_tfmode_libcall (func, 2, operands);
2653 }
2654
2655 /* Expand a hard-float tfmode operation. All arguments must be in
2656 registers. */
2657
2658 static void
2659 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
2660 {
2661 rtx op, dest;
2662
2663 if (GET_RTX_CLASS (code) == RTX_UNARY)
2664 {
2665 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
2666 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
2667 }
2668 else
2669 {
2670 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
2671 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
2672 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
2673 operands[1], operands[2]);
2674 }
2675
2676 if (register_operand (operands[0], VOIDmode))
2677 dest = operands[0];
2678 else
2679 dest = gen_reg_rtx (GET_MODE (operands[0]));
2680
2681 emit_insn (gen_rtx_SET (VOIDmode, dest, op));
2682
2683 if (dest != operands[0])
2684 emit_move_insn (operands[0], dest);
2685 }
2686
2687 void
2688 emit_tfmode_binop (enum rtx_code code, rtx *operands)
2689 {
2690 if (TARGET_HARD_QUAD)
2691 emit_hard_tfmode_operation (code, operands);
2692 else
2693 emit_soft_tfmode_binop (code, operands);
2694 }
2695
2696 void
2697 emit_tfmode_unop (enum rtx_code code, rtx *operands)
2698 {
2699 if (TARGET_HARD_QUAD)
2700 emit_hard_tfmode_operation (code, operands);
2701 else
2702 emit_soft_tfmode_unop (code, operands);
2703 }
2704
2705 void
2706 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
2707 {
2708 if (TARGET_HARD_QUAD)
2709 emit_hard_tfmode_operation (code, operands);
2710 else
2711 emit_soft_tfmode_cvt (code, operands);
2712 }
2713 \f
2714 /* Return nonzero if a branch/jump/call instruction will be emitting
2715 nop into its delay slot. */
2716
2717 int
2718 empty_delay_slot (rtx insn)
2719 {
2720 rtx seq;
2721
2722 /* If no previous instruction (should not happen), return true. */
2723 if (PREV_INSN (insn) == NULL)
2724 return 1;
2725
2726 seq = NEXT_INSN (PREV_INSN (insn));
2727 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
2728 return 0;
2729
2730 return 1;
2731 }
2732
2733 /* Return nonzero if TRIAL can go into the call delay slot. */
2734
2735 int
2736 tls_call_delay (rtx trial)
2737 {
2738 rtx pat;
2739
2740 /* Binutils allows
2741 call __tls_get_addr, %tgd_call (foo)
2742 add %l7, %o0, %o0, %tgd_add (foo)
2743 while Sun as/ld does not. */
2744 if (TARGET_GNU_TLS || !TARGET_TLS)
2745 return 1;
2746
2747 pat = PATTERN (trial);
2748
2749 /* We must reject tgd_add{32|64}, i.e.
2750 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
2751 and tldm_add{32|64}, i.e.
2752 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
2753 for Sun as/ld. */
2754 if (GET_CODE (pat) == SET
2755 && GET_CODE (SET_SRC (pat)) == PLUS)
2756 {
2757 rtx unspec = XEXP (SET_SRC (pat), 1);
2758
2759 if (GET_CODE (unspec) == UNSPEC
2760 && (XINT (unspec, 1) == UNSPEC_TLSGD
2761 || XINT (unspec, 1) == UNSPEC_TLSLDM))
2762 return 0;
2763 }
2764
2765 return 1;
2766 }
2767
2768 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
2769 instruction. RETURN_P is true if the v9 variant 'return' is to be
2770 considered in the test too.
2771
2772 TRIAL must be a SET whose destination is a REG appropriate for the
2773 'restore' instruction or, if RETURN_P is true, for the 'return'
2774 instruction. */
2775
2776 static int
2777 eligible_for_restore_insn (rtx trial, bool return_p)
2778 {
2779 rtx pat = PATTERN (trial);
2780 rtx src = SET_SRC (pat);
2781
2782 /* The 'restore src,%g0,dest' pattern for word mode and below. */
2783 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
2784 && arith_operand (src, GET_MODE (src)))
2785 {
2786 if (TARGET_ARCH64)
2787 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
2788 else
2789 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
2790 }
2791
2792 /* The 'restore src,%g0,dest' pattern for double-word mode. */
2793 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
2794 && arith_double_operand (src, GET_MODE (src)))
2795 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
2796
2797 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
2798 else if (! TARGET_FPU && register_operand (src, SFmode))
2799 return 1;
2800
2801 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
2802 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
2803 return 1;
2804
2805 /* If we have the 'return' instruction, anything that does not use
2806 local or output registers and can go into a delay slot wins. */
2807 else if (return_p && TARGET_V9 && ! epilogue_renumber (&pat, 1)
2808 && (get_attr_in_uncond_branch_delay (trial)
2809 == IN_UNCOND_BRANCH_DELAY_TRUE))
2810 return 1;
2811
2812 /* The 'restore src1,src2,dest' pattern for SImode. */
2813 else if (GET_CODE (src) == PLUS
2814 && register_operand (XEXP (src, 0), SImode)
2815 && arith_operand (XEXP (src, 1), SImode))
2816 return 1;
2817
2818 /* The 'restore src1,src2,dest' pattern for DImode. */
2819 else if (GET_CODE (src) == PLUS
2820 && register_operand (XEXP (src, 0), DImode)
2821 && arith_double_operand (XEXP (src, 1), DImode))
2822 return 1;
2823
2824 /* The 'restore src1,%lo(src2),dest' pattern. */
2825 else if (GET_CODE (src) == LO_SUM
2826 && ! TARGET_CM_MEDMID
2827 && ((register_operand (XEXP (src, 0), SImode)
2828 && immediate_operand (XEXP (src, 1), SImode))
2829 || (TARGET_ARCH64
2830 && register_operand (XEXP (src, 0), DImode)
2831 && immediate_operand (XEXP (src, 1), DImode))))
2832 return 1;
2833
2834 /* The 'restore src,src,dest' pattern. */
2835 else if (GET_CODE (src) == ASHIFT
2836 && (register_operand (XEXP (src, 0), SImode)
2837 || register_operand (XEXP (src, 0), DImode))
2838 && XEXP (src, 1) == const1_rtx)
2839 return 1;
2840
2841 return 0;
2842 }
2843
2844 /* Return nonzero if TRIAL can go into the function return's
2845 delay slot. */
2846
2847 int
2848 eligible_for_return_delay (rtx trial)
2849 {
2850 rtx pat;
2851
2852 if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET)
2853 return 0;
2854
2855 if (get_attr_length (trial) != 1)
2856 return 0;
2857
2858 /* If there are any call-saved registers, we should scan TRIAL if it
2859 does not reference them. For now just make it easy. */
2860 if (num_gfregs)
2861 return 0;
2862
2863 /* If the function uses __builtin_eh_return, the eh_return machinery
2864 occupies the delay slot. */
2865 if (crtl->calls_eh_return)
2866 return 0;
2867
2868 /* In the case of a true leaf function, anything can go into the slot. */
2869 if (sparc_leaf_function_p)
2870 return get_attr_in_uncond_branch_delay (trial)
2871 == IN_UNCOND_BRANCH_DELAY_TRUE;
2872
2873 pat = PATTERN (trial);
2874
2875 /* Otherwise, only operations which can be done in tandem with
2876 a `restore' or `return' insn can go into the delay slot. */
2877 if (GET_CODE (SET_DEST (pat)) != REG
2878 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24))
2879 return 0;
2880
2881 /* If this instruction sets up floating point register and we have a return
2882 instruction, it can probably go in. But restore will not work
2883 with FP_REGS. */
2884 if (REGNO (SET_DEST (pat)) >= 32)
2885 return (TARGET_V9
2886 && ! epilogue_renumber (&pat, 1)
2887 && (get_attr_in_uncond_branch_delay (trial)
2888 == IN_UNCOND_BRANCH_DELAY_TRUE));
2889
2890 return eligible_for_restore_insn (trial, true);
2891 }
2892
2893 /* Return nonzero if TRIAL can go into the sibling call's
2894 delay slot. */
2895
2896 int
2897 eligible_for_sibcall_delay (rtx trial)
2898 {
2899 rtx pat;
2900
2901 if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET)
2902 return 0;
2903
2904 if (get_attr_length (trial) != 1)
2905 return 0;
2906
2907 pat = PATTERN (trial);
2908
2909 if (sparc_leaf_function_p)
2910 {
2911 /* If the tail call is done using the call instruction,
2912 we have to restore %o7 in the delay slot. */
2913 if (LEAF_SIBCALL_SLOT_RESERVED_P)
2914 return 0;
2915
2916 /* %g1 is used to build the function address */
2917 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
2918 return 0;
2919
2920 return 1;
2921 }
2922
2923 /* Otherwise, only operations which can be done in tandem with
2924 a `restore' insn can go into the delay slot. */
2925 if (GET_CODE (SET_DEST (pat)) != REG
2926 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
2927 || REGNO (SET_DEST (pat)) >= 32)
2928 return 0;
2929
2930 /* If it mentions %o7, it can't go in, because sibcall will clobber it
2931 in most cases. */
2932 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
2933 return 0;
2934
2935 return eligible_for_restore_insn (trial, false);
2936 }
2937
2938 int
2939 short_branch (int uid1, int uid2)
2940 {
2941 int delta = INSN_ADDRESSES (uid1) - INSN_ADDRESSES (uid2);
2942
2943 /* Leave a few words of "slop". */
2944 if (delta >= -1023 && delta <= 1022)
2945 return 1;
2946
2947 return 0;
2948 }
2949
2950 /* Return nonzero if REG is not used after INSN.
2951 We assume REG is a reload reg, and therefore does
2952 not live past labels or calls or jumps. */
2953 int
2954 reg_unused_after (rtx reg, rtx insn)
2955 {
2956 enum rtx_code code, prev_code = UNKNOWN;
2957
2958 while ((insn = NEXT_INSN (insn)))
2959 {
2960 if (prev_code == CALL_INSN && call_used_regs[REGNO (reg)])
2961 return 1;
2962
2963 code = GET_CODE (insn);
2964 if (GET_CODE (insn) == CODE_LABEL)
2965 return 1;
2966
2967 if (INSN_P (insn))
2968 {
2969 rtx set = single_set (insn);
2970 int in_src = set && reg_overlap_mentioned_p (reg, SET_SRC (set));
2971 if (set && in_src)
2972 return 0;
2973 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
2974 return 1;
2975 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
2976 return 0;
2977 }
2978 prev_code = code;
2979 }
2980 return 1;
2981 }
2982 \f
2983 /* Determine if it's legal to put X into the constant pool. This
2984 is not possible if X contains the address of a symbol that is
2985 not constant (TLS) or not known at final link time (PIC). */
2986
2987 static bool
2988 sparc_cannot_force_const_mem (rtx x)
2989 {
2990 switch (GET_CODE (x))
2991 {
2992 case CONST_INT:
2993 case CONST_DOUBLE:
2994 case CONST_VECTOR:
2995 /* Accept all non-symbolic constants. */
2996 return false;
2997
2998 case LABEL_REF:
2999 /* Labels are OK iff we are non-PIC. */
3000 return flag_pic != 0;
3001
3002 case SYMBOL_REF:
3003 /* 'Naked' TLS symbol references are never OK,
3004 non-TLS symbols are OK iff we are non-PIC. */
3005 if (SYMBOL_REF_TLS_MODEL (x))
3006 return true;
3007 else
3008 return flag_pic != 0;
3009
3010 case CONST:
3011 return sparc_cannot_force_const_mem (XEXP (x, 0));
3012 case PLUS:
3013 case MINUS:
3014 return sparc_cannot_force_const_mem (XEXP (x, 0))
3015 || sparc_cannot_force_const_mem (XEXP (x, 1));
3016 case UNSPEC:
3017 return true;
3018 default:
3019 gcc_unreachable ();
3020 }
3021 }
3022 \f
3023 /* Global Offset Table support. */
3024 static GTY(()) rtx got_helper_rtx = NULL_RTX;
3025 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
3026
3027 /* Return the SYMBOL_REF for the Global Offset Table. */
3028
3029 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
3030
3031 static rtx
3032 sparc_got (void)
3033 {
3034 if (!sparc_got_symbol)
3035 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3036
3037 return sparc_got_symbol;
3038 }
3039
3040 /* Ensure that we are not using patterns that are not OK with PIC. */
3041
3042 int
3043 check_pic (int i)
3044 {
3045 rtx op;
3046
3047 switch (flag_pic)
3048 {
3049 case 1:
3050 op = recog_data.operand[i];
3051 gcc_assert (GET_CODE (op) != SYMBOL_REF
3052 && (GET_CODE (op) != CONST
3053 || (GET_CODE (XEXP (op, 0)) == MINUS
3054 && XEXP (XEXP (op, 0), 0) == sparc_got ()
3055 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
3056 case 2:
3057 default:
3058 return 1;
3059 }
3060 }
3061
3062 /* Return true if X is an address which needs a temporary register when
3063 reloaded while generating PIC code. */
3064
3065 int
3066 pic_address_needs_scratch (rtx x)
3067 {
3068 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
3069 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
3070 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
3071 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3072 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
3073 return 1;
3074
3075 return 0;
3076 }
3077
3078 /* Determine if a given RTX is a valid constant. We already know this
3079 satisfies CONSTANT_P. */
3080
3081 bool
3082 legitimate_constant_p (rtx x)
3083 {
3084 switch (GET_CODE (x))
3085 {
3086 case CONST:
3087 case SYMBOL_REF:
3088 if (sparc_tls_referenced_p (x))
3089 return false;
3090 break;
3091
3092 case CONST_DOUBLE:
3093 if (GET_MODE (x) == VOIDmode)
3094 return true;
3095
3096 /* Floating point constants are generally not ok.
3097 The only exception is 0.0 in VIS. */
3098 if (TARGET_VIS
3099 && SCALAR_FLOAT_MODE_P (GET_MODE (x))
3100 && const_zero_operand (x, GET_MODE (x)))
3101 return true;
3102
3103 return false;
3104
3105 case CONST_VECTOR:
3106 /* Vector constants are generally not ok.
3107 The only exception is 0 in VIS. */
3108 if (TARGET_VIS
3109 && const_zero_operand (x, GET_MODE (x)))
3110 return true;
3111
3112 return false;
3113
3114 default:
3115 break;
3116 }
3117
3118 return true;
3119 }
3120
3121 /* Determine if a given RTX is a valid constant address. */
3122
3123 bool
3124 constant_address_p (rtx x)
3125 {
3126 switch (GET_CODE (x))
3127 {
3128 case LABEL_REF:
3129 case CONST_INT:
3130 case HIGH:
3131 return true;
3132
3133 case CONST:
3134 if (flag_pic && pic_address_needs_scratch (x))
3135 return false;
3136 return legitimate_constant_p (x);
3137
3138 case SYMBOL_REF:
3139 return !flag_pic && legitimate_constant_p (x);
3140
3141 default:
3142 return false;
3143 }
3144 }
3145
3146 /* Nonzero if the constant value X is a legitimate general operand
3147 when generating PIC code. It is given that flag_pic is on and
3148 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
3149
3150 bool
3151 legitimate_pic_operand_p (rtx x)
3152 {
3153 if (pic_address_needs_scratch (x))
3154 return false;
3155 if (sparc_tls_referenced_p (x))
3156 return false;
3157 return true;
3158 }
3159
3160 /* Return nonzero if ADDR is a valid memory address.
3161 STRICT specifies whether strict register checking applies. */
3162
3163 static bool
3164 sparc_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict)
3165 {
3166 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
3167
3168 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
3169 rs1 = addr;
3170 else if (GET_CODE (addr) == PLUS)
3171 {
3172 rs1 = XEXP (addr, 0);
3173 rs2 = XEXP (addr, 1);
3174
3175 /* Canonicalize. REG comes first, if there are no regs,
3176 LO_SUM comes first. */
3177 if (!REG_P (rs1)
3178 && GET_CODE (rs1) != SUBREG
3179 && (REG_P (rs2)
3180 || GET_CODE (rs2) == SUBREG
3181 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
3182 {
3183 rs1 = XEXP (addr, 1);
3184 rs2 = XEXP (addr, 0);
3185 }
3186
3187 if ((flag_pic == 1
3188 && rs1 == pic_offset_table_rtx
3189 && !REG_P (rs2)
3190 && GET_CODE (rs2) != SUBREG
3191 && GET_CODE (rs2) != LO_SUM
3192 && GET_CODE (rs2) != MEM
3193 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
3194 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
3195 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
3196 || ((REG_P (rs1)
3197 || GET_CODE (rs1) == SUBREG)
3198 && RTX_OK_FOR_OFFSET_P (rs2)))
3199 {
3200 imm1 = rs2;
3201 rs2 = NULL;
3202 }
3203 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
3204 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
3205 {
3206 /* We prohibit REG + REG for TFmode when there are no quad move insns
3207 and we consequently need to split. We do this because REG+REG
3208 is not an offsettable address. If we get the situation in reload
3209 where source and destination of a movtf pattern are both MEMs with
3210 REG+REG address, then only one of them gets converted to an
3211 offsettable address. */
3212 if (mode == TFmode
3213 && ! (TARGET_FPU && TARGET_ARCH64 && TARGET_HARD_QUAD))
3214 return 0;
3215
3216 /* We prohibit REG + REG on ARCH32 if not optimizing for
3217 DFmode/DImode because then mem_min_alignment is likely to be zero
3218 after reload and the forced split would lack a matching splitter
3219 pattern. */
3220 if (TARGET_ARCH32 && !optimize
3221 && (mode == DFmode || mode == DImode))
3222 return 0;
3223 }
3224 else if (USE_AS_OFFSETABLE_LO10
3225 && GET_CODE (rs1) == LO_SUM
3226 && TARGET_ARCH64
3227 && ! TARGET_CM_MEDMID
3228 && RTX_OK_FOR_OLO10_P (rs2))
3229 {
3230 rs2 = NULL;
3231 imm1 = XEXP (rs1, 1);
3232 rs1 = XEXP (rs1, 0);
3233 if (!CONSTANT_P (imm1)
3234 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
3235 return 0;
3236 }
3237 }
3238 else if (GET_CODE (addr) == LO_SUM)
3239 {
3240 rs1 = XEXP (addr, 0);
3241 imm1 = XEXP (addr, 1);
3242
3243 if (!CONSTANT_P (imm1)
3244 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
3245 return 0;
3246
3247 /* We can't allow TFmode in 32-bit mode, because an offset greater
3248 than the alignment (8) may cause the LO_SUM to overflow. */
3249 if (mode == TFmode && TARGET_ARCH32)
3250 return 0;
3251 }
3252 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
3253 return 1;
3254 else
3255 return 0;
3256
3257 if (GET_CODE (rs1) == SUBREG)
3258 rs1 = SUBREG_REG (rs1);
3259 if (!REG_P (rs1))
3260 return 0;
3261
3262 if (rs2)
3263 {
3264 if (GET_CODE (rs2) == SUBREG)
3265 rs2 = SUBREG_REG (rs2);
3266 if (!REG_P (rs2))
3267 return 0;
3268 }
3269
3270 if (strict)
3271 {
3272 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
3273 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
3274 return 0;
3275 }
3276 else
3277 {
3278 if ((REGNO (rs1) >= 32
3279 && REGNO (rs1) != FRAME_POINTER_REGNUM
3280 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
3281 || (rs2
3282 && (REGNO (rs2) >= 32
3283 && REGNO (rs2) != FRAME_POINTER_REGNUM
3284 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
3285 return 0;
3286 }
3287 return 1;
3288 }
3289
3290 /* Return the SYMBOL_REF for the tls_get_addr function. */
3291
3292 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
3293
3294 static rtx
3295 sparc_tls_get_addr (void)
3296 {
3297 if (!sparc_tls_symbol)
3298 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
3299
3300 return sparc_tls_symbol;
3301 }
3302
3303 /* Return the Global Offset Table to be used in TLS mode. */
3304
3305 static rtx
3306 sparc_tls_got (void)
3307 {
3308 /* In PIC mode, this is just the PIC offset table. */
3309 if (flag_pic)
3310 {
3311 crtl->uses_pic_offset_table = 1;
3312 return pic_offset_table_rtx;
3313 }
3314
3315 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
3316 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
3317 if (TARGET_SUN_TLS && TARGET_ARCH32)
3318 {
3319 load_got_register ();
3320 return global_offset_table_rtx;
3321 }
3322
3323 /* In all other cases, we load a new pseudo with the GOT symbol. */
3324 return copy_to_reg (sparc_got ());
3325 }
3326
3327 /* Return true if X contains a thread-local symbol. */
3328
3329 static bool
3330 sparc_tls_referenced_p (rtx x)
3331 {
3332 if (!TARGET_HAVE_TLS)
3333 return false;
3334
3335 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3336 x = XEXP (XEXP (x, 0), 0);
3337
3338 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
3339 return true;
3340
3341 /* That's all we handle in sparc_legitimize_tls_address for now. */
3342 return false;
3343 }
3344
3345 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
3346 this (thread-local) address. */
3347
3348 static rtx
3349 sparc_legitimize_tls_address (rtx addr)
3350 {
3351 rtx temp1, temp2, temp3, ret, o0, got, insn;
3352
3353 gcc_assert (can_create_pseudo_p ());
3354
3355 if (GET_CODE (addr) == SYMBOL_REF)
3356 switch (SYMBOL_REF_TLS_MODEL (addr))
3357 {
3358 case TLS_MODEL_GLOBAL_DYNAMIC:
3359 start_sequence ();
3360 temp1 = gen_reg_rtx (SImode);
3361 temp2 = gen_reg_rtx (SImode);
3362 ret = gen_reg_rtx (Pmode);
3363 o0 = gen_rtx_REG (Pmode, 8);
3364 got = sparc_tls_got ();
3365 emit_insn (gen_tgd_hi22 (temp1, addr));
3366 emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
3367 if (TARGET_ARCH32)
3368 {
3369 emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
3370 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
3371 addr, const1_rtx));
3372 }
3373 else
3374 {
3375 emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
3376 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
3377 addr, const1_rtx));
3378 }
3379 CALL_INSN_FUNCTION_USAGE (insn)
3380 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, o0),
3381 CALL_INSN_FUNCTION_USAGE (insn));
3382 insn = get_insns ();
3383 end_sequence ();
3384 emit_libcall_block (insn, ret, o0, addr);
3385 break;
3386
3387 case TLS_MODEL_LOCAL_DYNAMIC:
3388 start_sequence ();
3389 temp1 = gen_reg_rtx (SImode);
3390 temp2 = gen_reg_rtx (SImode);
3391 temp3 = gen_reg_rtx (Pmode);
3392 ret = gen_reg_rtx (Pmode);
3393 o0 = gen_rtx_REG (Pmode, 8);
3394 got = sparc_tls_got ();
3395 emit_insn (gen_tldm_hi22 (temp1));
3396 emit_insn (gen_tldm_lo10 (temp2, temp1));
3397 if (TARGET_ARCH32)
3398 {
3399 emit_insn (gen_tldm_add32 (o0, got, temp2));
3400 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
3401 const1_rtx));
3402 }
3403 else
3404 {
3405 emit_insn (gen_tldm_add64 (o0, got, temp2));
3406 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
3407 const1_rtx));
3408 }
3409 CALL_INSN_FUNCTION_USAGE (insn)
3410 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, o0),
3411 CALL_INSN_FUNCTION_USAGE (insn));
3412 insn = get_insns ();
3413 end_sequence ();
3414 emit_libcall_block (insn, temp3, o0,
3415 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
3416 UNSPEC_TLSLD_BASE));
3417 temp1 = gen_reg_rtx (SImode);
3418 temp2 = gen_reg_rtx (SImode);
3419 emit_insn (gen_tldo_hix22 (temp1, addr));
3420 emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
3421 if (TARGET_ARCH32)
3422 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
3423 else
3424 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
3425 break;
3426
3427 case TLS_MODEL_INITIAL_EXEC:
3428 temp1 = gen_reg_rtx (SImode);
3429 temp2 = gen_reg_rtx (SImode);
3430 temp3 = gen_reg_rtx (Pmode);
3431 got = sparc_tls_got ();
3432 emit_insn (gen_tie_hi22 (temp1, addr));
3433 emit_insn (gen_tie_lo10 (temp2, temp1, addr));
3434 if (TARGET_ARCH32)
3435 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
3436 else
3437 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
3438 if (TARGET_SUN_TLS)
3439 {
3440 ret = gen_reg_rtx (Pmode);
3441 if (TARGET_ARCH32)
3442 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
3443 temp3, addr));
3444 else
3445 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
3446 temp3, addr));
3447 }
3448 else
3449 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
3450 break;
3451
3452 case TLS_MODEL_LOCAL_EXEC:
3453 temp1 = gen_reg_rtx (Pmode);
3454 temp2 = gen_reg_rtx (Pmode);
3455 if (TARGET_ARCH32)
3456 {
3457 emit_insn (gen_tle_hix22_sp32 (temp1, addr));
3458 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
3459 }
3460 else
3461 {
3462 emit_insn (gen_tle_hix22_sp64 (temp1, addr));
3463 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
3464 }
3465 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
3466 break;
3467
3468 default:
3469 gcc_unreachable ();
3470 }
3471
3472 else if (GET_CODE (addr) == CONST)
3473 {
3474 rtx base, offset;
3475
3476 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
3477
3478 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
3479 offset = XEXP (XEXP (addr, 0), 1);
3480
3481 base = force_operand (base, NULL_RTX);
3482 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
3483 offset = force_reg (Pmode, offset);
3484 ret = gen_rtx_PLUS (Pmode, base, offset);
3485 }
3486
3487 else
3488 gcc_unreachable (); /* for now ... */
3489
3490 return ret;
3491 }
3492
3493 /* Legitimize PIC addresses. If the address is already position-independent,
3494 we return ORIG. Newly generated position-independent addresses go into a
3495 reg. This is REG if nonzero, otherwise we allocate register(s) as
3496 necessary. */
3497
3498 static rtx
3499 sparc_legitimize_pic_address (rtx orig, rtx reg)
3500 {
3501 bool gotdata_op = false;
3502
3503 if (GET_CODE (orig) == SYMBOL_REF
3504 /* See the comment in sparc_expand_move. */
3505 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
3506 {
3507 rtx pic_ref, address;
3508 rtx insn;
3509
3510 if (reg == 0)
3511 {
3512 gcc_assert (! reload_in_progress && ! reload_completed);
3513 reg = gen_reg_rtx (Pmode);
3514 }
3515
3516 if (flag_pic == 2)
3517 {
3518 /* If not during reload, allocate another temp reg here for loading
3519 in the address, so that these instructions can be optimized
3520 properly. */
3521 rtx temp_reg = ((reload_in_progress || reload_completed)
3522 ? reg : gen_reg_rtx (Pmode));
3523
3524 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
3525 won't get confused into thinking that these two instructions
3526 are loading in the true address of the symbol. If in the
3527 future a PIC rtx exists, that should be used instead. */
3528 if (TARGET_ARCH64)
3529 {
3530 emit_insn (gen_movdi_high_pic (temp_reg, orig));
3531 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
3532 }
3533 else
3534 {
3535 emit_insn (gen_movsi_high_pic (temp_reg, orig));
3536 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
3537 }
3538 address = temp_reg;
3539 gotdata_op = true;
3540 }
3541 else
3542 address = orig;
3543
3544 crtl->uses_pic_offset_table = 1;
3545 if (gotdata_op)
3546 {
3547 if (TARGET_ARCH64)
3548 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
3549 pic_offset_table_rtx,
3550 address, orig));
3551 else
3552 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
3553 pic_offset_table_rtx,
3554 address, orig));
3555 }
3556 else
3557 {
3558 pic_ref
3559 = gen_const_mem (Pmode,
3560 gen_rtx_PLUS (Pmode,
3561 pic_offset_table_rtx, address));
3562 insn = emit_move_insn (reg, pic_ref);
3563 }
3564
3565 /* Put a REG_EQUAL note on this insn, so that it can be optimized
3566 by loop. */
3567 set_unique_reg_note (insn, REG_EQUAL, orig);
3568 return reg;
3569 }
3570 else if (GET_CODE (orig) == CONST)
3571 {
3572 rtx base, offset;
3573
3574 if (GET_CODE (XEXP (orig, 0)) == PLUS
3575 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
3576 return orig;
3577
3578 if (reg == 0)
3579 {
3580 gcc_assert (! reload_in_progress && ! reload_completed);
3581 reg = gen_reg_rtx (Pmode);
3582 }
3583
3584 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
3585 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
3586 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
3587 base == reg ? NULL_RTX : reg);
3588
3589 if (GET_CODE (offset) == CONST_INT)
3590 {
3591 if (SMALL_INT (offset))
3592 return plus_constant (base, INTVAL (offset));
3593 else if (! reload_in_progress && ! reload_completed)
3594 offset = force_reg (Pmode, offset);
3595 else
3596 /* If we reach here, then something is seriously wrong. */
3597 gcc_unreachable ();
3598 }
3599 return gen_rtx_PLUS (Pmode, base, offset);
3600 }
3601 else if (GET_CODE (orig) == LABEL_REF)
3602 /* ??? We ought to be checking that the register is live instead, in case
3603 it is eliminated. */
3604 crtl->uses_pic_offset_table = 1;
3605
3606 return orig;
3607 }
3608
3609 /* Try machine-dependent ways of modifying an illegitimate address X
3610 to be legitimate. If we find one, return the new, valid address.
3611
3612 OLDX is the address as it was before break_out_memory_refs was called.
3613 In some cases it is useful to look at this to decide what needs to be done.
3614
3615 MODE is the mode of the operand pointed to by X.
3616
3617 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
3618
3619 static rtx
3620 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3621 enum machine_mode mode)
3622 {
3623 rtx orig_x = x;
3624
3625 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
3626 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
3627 force_operand (XEXP (x, 0), NULL_RTX));
3628 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
3629 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
3630 force_operand (XEXP (x, 1), NULL_RTX));
3631 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
3632 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
3633 XEXP (x, 1));
3634 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
3635 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
3636 force_operand (XEXP (x, 1), NULL_RTX));
3637
3638 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
3639 return x;
3640
3641 if (sparc_tls_referenced_p (x))
3642 x = sparc_legitimize_tls_address (x);
3643 else if (flag_pic)
3644 x = sparc_legitimize_pic_address (x, NULL_RTX);
3645 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
3646 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
3647 copy_to_mode_reg (Pmode, XEXP (x, 1)));
3648 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
3649 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
3650 copy_to_mode_reg (Pmode, XEXP (x, 0)));
3651 else if (GET_CODE (x) == SYMBOL_REF
3652 || GET_CODE (x) == CONST
3653 || GET_CODE (x) == LABEL_REF)
3654 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
3655
3656 return x;
3657 }
3658
3659 /* Delegitimize an address that was legitimized by the above function. */
3660
3661 static rtx
3662 sparc_delegitimize_address (rtx x)
3663 {
3664 x = delegitimize_mem_from_attrs (x);
3665
3666 if (GET_CODE (x) == LO_SUM
3667 && GET_CODE (XEXP (x, 1)) == UNSPEC
3668 && XINT (XEXP (x, 1), 1) == UNSPEC_TLSLE)
3669 {
3670 x = XVECEXP (XEXP (x, 1), 0, 0);
3671 gcc_assert (GET_CODE (x) == SYMBOL_REF);
3672 }
3673
3674 return x;
3675 }
3676
3677 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
3678 replace the input X, or the original X if no replacement is called for.
3679 The output parameter *WIN is 1 if the calling macro should goto WIN,
3680 0 if it should not.
3681
3682 For SPARC, we wish to handle addresses by splitting them into
3683 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
3684 This cuts the number of extra insns by one.
3685
3686 Do nothing when generating PIC code and the address is a symbolic
3687 operand or requires a scratch register. */
3688
3689 rtx
3690 sparc_legitimize_reload_address (rtx x, enum machine_mode mode,
3691 int opnum, int type,
3692 int ind_levels ATTRIBUTE_UNUSED, int *win)
3693 {
3694 /* Decompose SImode constants into HIGH+LO_SUM. */
3695 if (CONSTANT_P (x)
3696 && (mode != TFmode || TARGET_ARCH64)
3697 && GET_MODE (x) == SImode
3698 && GET_CODE (x) != LO_SUM
3699 && GET_CODE (x) != HIGH
3700 && sparc_cmodel <= CM_MEDLOW
3701 && !(flag_pic
3702 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
3703 {
3704 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
3705 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3706 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3707 opnum, (enum reload_type)type);
3708 *win = 1;
3709 return x;
3710 }
3711
3712 /* We have to recognize what we have already generated above. */
3713 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
3714 {
3715 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3716 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3717 opnum, (enum reload_type)type);
3718 *win = 1;
3719 return x;
3720 }
3721
3722 *win = 0;
3723 return x;
3724 }
3725
3726 /* Return true if ADDR (a legitimate address expression)
3727 has an effect that depends on the machine mode it is used for.
3728
3729 In PIC mode,
3730
3731 (mem:HI [%l7+a])
3732
3733 is not equivalent to
3734
3735 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
3736
3737 because [%l7+a+1] is interpreted as the address of (a+1). */
3738
3739
3740 static bool
3741 sparc_mode_dependent_address_p (const_rtx addr)
3742 {
3743 if (flag_pic && GET_CODE (addr) == PLUS)
3744 {
3745 rtx op0 = XEXP (addr, 0);
3746 rtx op1 = XEXP (addr, 1);
3747 if (op0 == pic_offset_table_rtx
3748 && SYMBOLIC_CONST (op1))
3749 return true;
3750 }
3751
3752 return false;
3753 }
3754
3755 #ifdef HAVE_GAS_HIDDEN
3756 # define USE_HIDDEN_LINKONCE 1
3757 #else
3758 # define USE_HIDDEN_LINKONCE 0
3759 #endif
3760
3761 static void
3762 get_pc_thunk_name (char name[32], unsigned int regno)
3763 {
3764 const char *reg_name = reg_names[regno];
3765
3766 /* Skip the leading '%' as that cannot be used in a
3767 symbol name. */
3768 reg_name += 1;
3769
3770 if (USE_HIDDEN_LINKONCE)
3771 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
3772 else
3773 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
3774 }
3775
3776 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
3777
3778 static rtx
3779 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
3780 {
3781 int orig_flag_pic = flag_pic;
3782 rtx insn;
3783
3784 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
3785 flag_pic = 0;
3786 if (TARGET_ARCH64)
3787 insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
3788 else
3789 insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
3790 flag_pic = orig_flag_pic;
3791
3792 return insn;
3793 }
3794
3795 /* Emit code to load the GOT register. */
3796
3797 static void
3798 load_got_register (void)
3799 {
3800 /* In PIC mode, this will retrieve pic_offset_table_rtx. */
3801 if (!global_offset_table_rtx)
3802 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
3803
3804 if (TARGET_VXWORKS_RTP)
3805 emit_insn (gen_vxworks_load_got ());
3806 else
3807 {
3808 /* The GOT symbol is subject to a PC-relative relocation so we need a
3809 helper function to add the PC value and thus get the final value. */
3810 if (!got_helper_rtx)
3811 {
3812 char name[32];
3813 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
3814 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
3815 }
3816
3817 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
3818 got_helper_rtx,
3819 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
3820 }
3821
3822 /* Need to emit this whether or not we obey regdecls,
3823 since setjmp/longjmp can cause life info to screw up.
3824 ??? In the case where we don't obey regdecls, this is not sufficient
3825 since we may not fall out the bottom. */
3826 emit_use (global_offset_table_rtx);
3827 }
3828
3829 /* Emit a call instruction with the pattern given by PAT. ADDR is the
3830 address of the call target. */
3831
3832 void
3833 sparc_emit_call_insn (rtx pat, rtx addr)
3834 {
3835 rtx insn;
3836
3837 insn = emit_call_insn (pat);
3838
3839 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
3840 if (TARGET_VXWORKS_RTP
3841 && flag_pic
3842 && GET_CODE (addr) == SYMBOL_REF
3843 && (SYMBOL_REF_DECL (addr)
3844 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
3845 : !SYMBOL_REF_LOCAL_P (addr)))
3846 {
3847 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
3848 crtl->uses_pic_offset_table = 1;
3849 }
3850 }
3851 \f
3852 /* Return 1 if RTX is a MEM which is known to be aligned to at
3853 least a DESIRED byte boundary. */
3854
3855 int
3856 mem_min_alignment (rtx mem, int desired)
3857 {
3858 rtx addr, base, offset;
3859
3860 /* If it's not a MEM we can't accept it. */
3861 if (GET_CODE (mem) != MEM)
3862 return 0;
3863
3864 /* Obviously... */
3865 if (!TARGET_UNALIGNED_DOUBLES
3866 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
3867 return 1;
3868
3869 /* ??? The rest of the function predates MEM_ALIGN so
3870 there is probably a bit of redundancy. */
3871 addr = XEXP (mem, 0);
3872 base = offset = NULL_RTX;
3873 if (GET_CODE (addr) == PLUS)
3874 {
3875 if (GET_CODE (XEXP (addr, 0)) == REG)
3876 {
3877 base = XEXP (addr, 0);
3878
3879 /* What we are saying here is that if the base
3880 REG is aligned properly, the compiler will make
3881 sure any REG based index upon it will be so
3882 as well. */
3883 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
3884 offset = XEXP (addr, 1);
3885 else
3886 offset = const0_rtx;
3887 }
3888 }
3889 else if (GET_CODE (addr) == REG)
3890 {
3891 base = addr;
3892 offset = const0_rtx;
3893 }
3894
3895 if (base != NULL_RTX)
3896 {
3897 int regno = REGNO (base);
3898
3899 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
3900 {
3901 /* Check if the compiler has recorded some information
3902 about the alignment of the base REG. If reload has
3903 completed, we already matched with proper alignments.
3904 If not running global_alloc, reload might give us
3905 unaligned pointer to local stack though. */
3906 if (((cfun != 0
3907 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
3908 || (optimize && reload_completed))
3909 && (INTVAL (offset) & (desired - 1)) == 0)
3910 return 1;
3911 }
3912 else
3913 {
3914 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
3915 return 1;
3916 }
3917 }
3918 else if (! TARGET_UNALIGNED_DOUBLES
3919 || CONSTANT_P (addr)
3920 || GET_CODE (addr) == LO_SUM)
3921 {
3922 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
3923 is true, in which case we can only assume that an access is aligned if
3924 it is to a constant address, or the address involves a LO_SUM. */
3925 return 1;
3926 }
3927
3928 /* An obviously unaligned address. */
3929 return 0;
3930 }
3931
3932 \f
3933 /* Vectors to keep interesting information about registers where it can easily
3934 be got. We used to use the actual mode value as the bit number, but there
3935 are more than 32 modes now. Instead we use two tables: one indexed by
3936 hard register number, and one indexed by mode. */
3937
3938 /* The purpose of sparc_mode_class is to shrink the range of modes so that
3939 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
3940 mapped into one sparc_mode_class mode. */
3941
3942 enum sparc_mode_class {
3943 S_MODE, D_MODE, T_MODE, O_MODE,
3944 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
3945 CC_MODE, CCFP_MODE
3946 };
3947
3948 /* Modes for single-word and smaller quantities. */
3949 #define S_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
3950
3951 /* Modes for double-word and smaller quantities. */
3952 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
3953
3954 /* Modes for quad-word and smaller quantities. */
3955 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
3956
3957 /* Modes for 8-word and smaller quantities. */
3958 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
3959
3960 /* Modes for single-float quantities. We must allow any single word or
3961 smaller quantity. This is because the fix/float conversion instructions
3962 take integer inputs/outputs from the float registers. */
3963 #define SF_MODES (S_MODES)
3964
3965 /* Modes for double-float and smaller quantities. */
3966 #define DF_MODES (D_MODES)
3967
3968 /* Modes for quad-float and smaller quantities. */
3969 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
3970
3971 /* Modes for quad-float pairs and smaller quantities. */
3972 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
3973
3974 /* Modes for double-float only quantities. */
3975 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
3976
3977 /* Modes for quad-float and double-float only quantities. */
3978 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
3979
3980 /* Modes for quad-float pairs and double-float only quantities. */
3981 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
3982
3983 /* Modes for condition codes. */
3984 #define CC_MODES (1 << (int) CC_MODE)
3985 #define CCFP_MODES (1 << (int) CCFP_MODE)
3986
3987 /* Value is 1 if register/mode pair is acceptable on sparc.
3988 The funny mixture of D and T modes is because integer operations
3989 do not specially operate on tetra quantities, so non-quad-aligned
3990 registers can hold quadword quantities (except %o4 and %i4 because
3991 they cross fixed registers). */
3992
3993 /* This points to either the 32 bit or the 64 bit version. */
3994 const int *hard_regno_mode_classes;
3995
3996 static const int hard_32bit_mode_classes[] = {
3997 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
3998 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
3999 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4000 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4001
4002 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4003 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4004 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4005 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4006
4007 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4008 and none can hold SFmode/SImode values. */
4009 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4010 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4011 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4012 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4013
4014 /* %fcc[0123] */
4015 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4016
4017 /* %icc */
4018 CC_MODES
4019 };
4020
4021 static const int hard_64bit_mode_classes[] = {
4022 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4023 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4024 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4025 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4026
4027 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4028 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4029 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4030 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4031
4032 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4033 and none can hold SFmode/SImode values. */
4034 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4035 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4036 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4037 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4038
4039 /* %fcc[0123] */
4040 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4041
4042 /* %icc */
4043 CC_MODES
4044 };
4045
4046 int sparc_mode_class [NUM_MACHINE_MODES];
4047
4048 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
4049
4050 static void
4051 sparc_init_modes (void)
4052 {
4053 int i;
4054
4055 for (i = 0; i < NUM_MACHINE_MODES; i++)
4056 {
4057 switch (GET_MODE_CLASS (i))
4058 {
4059 case MODE_INT:
4060 case MODE_PARTIAL_INT:
4061 case MODE_COMPLEX_INT:
4062 if (GET_MODE_SIZE (i) <= 4)
4063 sparc_mode_class[i] = 1 << (int) S_MODE;
4064 else if (GET_MODE_SIZE (i) == 8)
4065 sparc_mode_class[i] = 1 << (int) D_MODE;
4066 else if (GET_MODE_SIZE (i) == 16)
4067 sparc_mode_class[i] = 1 << (int) T_MODE;
4068 else if (GET_MODE_SIZE (i) == 32)
4069 sparc_mode_class[i] = 1 << (int) O_MODE;
4070 else
4071 sparc_mode_class[i] = 0;
4072 break;
4073 case MODE_VECTOR_INT:
4074 if (GET_MODE_SIZE (i) <= 4)
4075 sparc_mode_class[i] = 1 << (int)SF_MODE;
4076 else if (GET_MODE_SIZE (i) == 8)
4077 sparc_mode_class[i] = 1 << (int)DF_MODE;
4078 break;
4079 case MODE_FLOAT:
4080 case MODE_COMPLEX_FLOAT:
4081 if (GET_MODE_SIZE (i) <= 4)
4082 sparc_mode_class[i] = 1 << (int) SF_MODE;
4083 else if (GET_MODE_SIZE (i) == 8)
4084 sparc_mode_class[i] = 1 << (int) DF_MODE;
4085 else if (GET_MODE_SIZE (i) == 16)
4086 sparc_mode_class[i] = 1 << (int) TF_MODE;
4087 else if (GET_MODE_SIZE (i) == 32)
4088 sparc_mode_class[i] = 1 << (int) OF_MODE;
4089 else
4090 sparc_mode_class[i] = 0;
4091 break;
4092 case MODE_CC:
4093 if (i == (int) CCFPmode || i == (int) CCFPEmode)
4094 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
4095 else
4096 sparc_mode_class[i] = 1 << (int) CC_MODE;
4097 break;
4098 default:
4099 sparc_mode_class[i] = 0;
4100 break;
4101 }
4102 }
4103
4104 if (TARGET_ARCH64)
4105 hard_regno_mode_classes = hard_64bit_mode_classes;
4106 else
4107 hard_regno_mode_classes = hard_32bit_mode_classes;
4108
4109 /* Initialize the array used by REGNO_REG_CLASS. */
4110 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4111 {
4112 if (i < 16 && TARGET_V8PLUS)
4113 sparc_regno_reg_class[i] = I64_REGS;
4114 else if (i < 32 || i == FRAME_POINTER_REGNUM)
4115 sparc_regno_reg_class[i] = GENERAL_REGS;
4116 else if (i < 64)
4117 sparc_regno_reg_class[i] = FP_REGS;
4118 else if (i < 96)
4119 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
4120 else if (i < 100)
4121 sparc_regno_reg_class[i] = FPCC_REGS;
4122 else
4123 sparc_regno_reg_class[i] = NO_REGS;
4124 }
4125 }
4126 \f
4127 /* Compute the frame size required by the function. This function is called
4128 during the reload pass and also by sparc_expand_prologue. */
4129
4130 HOST_WIDE_INT
4131 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function_p)
4132 {
4133 int outgoing_args_size = (crtl->outgoing_args_size
4134 + REG_PARM_STACK_SPACE (current_function_decl));
4135 int n_regs = 0; /* N_REGS is the number of 4-byte regs saved thus far. */
4136 int i;
4137
4138 if (TARGET_ARCH64)
4139 {
4140 for (i = 0; i < 8; i++)
4141 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4142 n_regs += 2;
4143 }
4144 else
4145 {
4146 for (i = 0; i < 8; i += 2)
4147 if ((df_regs_ever_live_p (i) && ! call_used_regs[i])
4148 || (df_regs_ever_live_p (i+1) && ! call_used_regs[i+1]))
4149 n_regs += 2;
4150 }
4151
4152 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
4153 if ((df_regs_ever_live_p (i) && ! call_used_regs[i])
4154 || (df_regs_ever_live_p (i+1) && ! call_used_regs[i+1]))
4155 n_regs += 2;
4156
4157 /* Set up values for use in prologue and epilogue. */
4158 num_gfregs = n_regs;
4159
4160 if (leaf_function_p
4161 && n_regs == 0
4162 && size == 0
4163 && crtl->outgoing_args_size == 0)
4164 actual_fsize = apparent_fsize = 0;
4165 else
4166 {
4167 /* We subtract STARTING_FRAME_OFFSET, remember it's negative. */
4168 apparent_fsize = (size - STARTING_FRAME_OFFSET + 7) & -8;
4169 apparent_fsize += n_regs * 4;
4170 actual_fsize = apparent_fsize + ((outgoing_args_size + 7) & -8);
4171 }
4172
4173 /* Make sure nothing can clobber our register windows.
4174 If a SAVE must be done, or there is a stack-local variable,
4175 the register window area must be allocated. */
4176 if (! leaf_function_p || size > 0)
4177 actual_fsize += FIRST_PARM_OFFSET (current_function_decl);
4178
4179 return SPARC_STACK_ALIGN (actual_fsize);
4180 }
4181
4182 /* Output any necessary .register pseudo-ops. */
4183
4184 void
4185 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
4186 {
4187 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
4188 int i;
4189
4190 if (TARGET_ARCH32)
4191 return;
4192
4193 /* Check if %g[2367] were used without
4194 .register being printed for them already. */
4195 for (i = 2; i < 8; i++)
4196 {
4197 if (df_regs_ever_live_p (i)
4198 && ! sparc_hard_reg_printed [i])
4199 {
4200 sparc_hard_reg_printed [i] = 1;
4201 /* %g7 is used as TLS base register, use #ignore
4202 for it instead of #scratch. */
4203 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
4204 i == 7 ? "ignore" : "scratch");
4205 }
4206 if (i == 3) i = 5;
4207 }
4208 #endif
4209 }
4210
4211 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
4212
4213 #if PROBE_INTERVAL > 4096
4214 #error Cannot use indexed addressing mode for stack probing
4215 #endif
4216
4217 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
4218 inclusive. These are offsets from the current stack pointer.
4219
4220 Note that we don't use the REG+REG addressing mode for the probes because
4221 of the stack bias in 64-bit mode. And it doesn't really buy us anything
4222 so the advantages of having a single code win here. */
4223
4224 static void
4225 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
4226 {
4227 rtx g1 = gen_rtx_REG (Pmode, 1);
4228
4229 /* See if we have a constant small number of probes to generate. If so,
4230 that's the easy case. */
4231 if (size <= PROBE_INTERVAL)
4232 {
4233 emit_move_insn (g1, GEN_INT (first));
4234 emit_insn (gen_rtx_SET (VOIDmode, g1,
4235 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
4236 emit_stack_probe (plus_constant (g1, -size));
4237 }
4238
4239 /* The run-time loop is made up of 10 insns in the generic case while the
4240 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
4241 else if (size <= 5 * PROBE_INTERVAL)
4242 {
4243 HOST_WIDE_INT i;
4244
4245 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
4246 emit_insn (gen_rtx_SET (VOIDmode, g1,
4247 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
4248 emit_stack_probe (g1);
4249
4250 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
4251 it exceeds SIZE. If only two probes are needed, this will not
4252 generate any code. Then probe at FIRST + SIZE. */
4253 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
4254 {
4255 emit_insn (gen_rtx_SET (VOIDmode, g1,
4256 plus_constant (g1, -PROBE_INTERVAL)));
4257 emit_stack_probe (g1);
4258 }
4259
4260 emit_stack_probe (plus_constant (g1, (i - PROBE_INTERVAL) - size));
4261 }
4262
4263 /* Otherwise, do the same as above, but in a loop. Note that we must be
4264 extra careful with variables wrapping around because we might be at
4265 the very top (or the very bottom) of the address space and we have
4266 to be able to handle this case properly; in particular, we use an
4267 equality test for the loop condition. */
4268 else
4269 {
4270 HOST_WIDE_INT rounded_size;
4271 rtx g4 = gen_rtx_REG (Pmode, 4);
4272
4273 emit_move_insn (g1, GEN_INT (first));
4274
4275
4276 /* Step 1: round SIZE to the previous multiple of the interval. */
4277
4278 rounded_size = size & -PROBE_INTERVAL;
4279 emit_move_insn (g4, GEN_INT (rounded_size));
4280
4281
4282 /* Step 2: compute initial and final value of the loop counter. */
4283
4284 /* TEST_ADDR = SP + FIRST. */
4285 emit_insn (gen_rtx_SET (VOIDmode, g1,
4286 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
4287
4288 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
4289 emit_insn (gen_rtx_SET (VOIDmode, g4, gen_rtx_MINUS (Pmode, g1, g4)));
4290
4291
4292 /* Step 3: the loop
4293
4294 while (TEST_ADDR != LAST_ADDR)
4295 {
4296 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
4297 probe at TEST_ADDR
4298 }
4299
4300 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
4301 until it is equal to ROUNDED_SIZE. */
4302
4303 if (TARGET_64BIT)
4304 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
4305 else
4306 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
4307
4308
4309 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
4310 that SIZE is equal to ROUNDED_SIZE. */
4311
4312 if (size != rounded_size)
4313 emit_stack_probe (plus_constant (g4, rounded_size - size));
4314 }
4315
4316 /* Make sure nothing is scheduled before we are done. */
4317 emit_insn (gen_blockage ());
4318 }
4319
4320 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
4321 absolute addresses. */
4322
4323 const char *
4324 output_probe_stack_range (rtx reg1, rtx reg2)
4325 {
4326 static int labelno = 0;
4327 char loop_lab[32], end_lab[32];
4328 rtx xops[2];
4329
4330 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
4331 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
4332
4333 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
4334
4335 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
4336 xops[0] = reg1;
4337 xops[1] = reg2;
4338 output_asm_insn ("cmp\t%0, %1", xops);
4339 if (TARGET_ARCH64)
4340 fputs ("\tbe,pn\t%xcc,", asm_out_file);
4341 else
4342 fputs ("\tbe\t", asm_out_file);
4343 assemble_name_raw (asm_out_file, end_lab);
4344 fputc ('\n', asm_out_file);
4345
4346 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
4347 xops[1] = GEN_INT (-PROBE_INTERVAL);
4348 output_asm_insn (" add\t%0, %1, %0", xops);
4349
4350 /* Probe at TEST_ADDR and branch. */
4351 if (TARGET_ARCH64)
4352 fputs ("\tba,pt\t%xcc,", asm_out_file);
4353 else
4354 fputs ("\tba\t", asm_out_file);
4355 assemble_name_raw (asm_out_file, loop_lab);
4356 fputc ('\n', asm_out_file);
4357 xops[1] = GEN_INT (SPARC_STACK_BIAS);
4358 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
4359
4360 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
4361
4362 return "";
4363 }
4364
4365 /* Save/restore call-saved registers from LOW to HIGH at BASE+OFFSET
4366 as needed. LOW should be double-word aligned for 32-bit registers.
4367 Return the new OFFSET. */
4368
4369 #define SORR_SAVE 0
4370 #define SORR_RESTORE 1
4371
4372 static int
4373 save_or_restore_regs (int low, int high, rtx base, int offset, int action)
4374 {
4375 rtx mem, insn;
4376 int i;
4377
4378 if (TARGET_ARCH64 && high <= 32)
4379 {
4380 for (i = low; i < high; i++)
4381 {
4382 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4383 {
4384 mem = gen_rtx_MEM (DImode, plus_constant (base, offset));
4385 set_mem_alias_set (mem, sparc_sr_alias_set);
4386 if (action == SORR_SAVE)
4387 {
4388 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
4389 RTX_FRAME_RELATED_P (insn) = 1;
4390 }
4391 else /* action == SORR_RESTORE */
4392 emit_move_insn (gen_rtx_REG (DImode, i), mem);
4393 offset += 8;
4394 }
4395 }
4396 }
4397 else
4398 {
4399 for (i = low; i < high; i += 2)
4400 {
4401 bool reg0 = df_regs_ever_live_p (i) && ! call_used_regs[i];
4402 bool reg1 = df_regs_ever_live_p (i+1) && ! call_used_regs[i+1];
4403 enum machine_mode mode;
4404 int regno;
4405
4406 if (reg0 && reg1)
4407 {
4408 mode = i < 32 ? DImode : DFmode;
4409 regno = i;
4410 }
4411 else if (reg0)
4412 {
4413 mode = i < 32 ? SImode : SFmode;
4414 regno = i;
4415 }
4416 else if (reg1)
4417 {
4418 mode = i < 32 ? SImode : SFmode;
4419 regno = i + 1;
4420 offset += 4;
4421 }
4422 else
4423 continue;
4424
4425 mem = gen_rtx_MEM (mode, plus_constant (base, offset));
4426 set_mem_alias_set (mem, sparc_sr_alias_set);
4427 if (action == SORR_SAVE)
4428 {
4429 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
4430 RTX_FRAME_RELATED_P (insn) = 1;
4431 }
4432 else /* action == SORR_RESTORE */
4433 emit_move_insn (gen_rtx_REG (mode, regno), mem);
4434
4435 /* Always preserve double-word alignment. */
4436 offset = (offset + 7) & -8;
4437 }
4438 }
4439
4440 return offset;
4441 }
4442
4443 /* Emit code to save call-saved registers. */
4444
4445 static void
4446 emit_save_or_restore_regs (int action)
4447 {
4448 HOST_WIDE_INT offset;
4449 rtx base;
4450
4451 offset = frame_base_offset - apparent_fsize;
4452
4453 if (offset < -4096 || offset + num_gfregs * 4 > 4095)
4454 {
4455 /* ??? This might be optimized a little as %g1 might already have a
4456 value close enough that a single add insn will do. */
4457 /* ??? Although, all of this is probably only a temporary fix
4458 because if %g1 can hold a function result, then
4459 sparc_expand_epilogue will lose (the result will be
4460 clobbered). */
4461 base = gen_rtx_REG (Pmode, 1);
4462 emit_move_insn (base, GEN_INT (offset));
4463 emit_insn (gen_rtx_SET (VOIDmode,
4464 base,
4465 gen_rtx_PLUS (Pmode, frame_base_reg, base)));
4466 offset = 0;
4467 }
4468 else
4469 base = frame_base_reg;
4470
4471 offset = save_or_restore_regs (0, 8, base, offset, action);
4472 save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, action);
4473 }
4474
4475 /* Generate a save_register_window insn. */
4476
4477 static rtx
4478 gen_save_register_window (rtx increment)
4479 {
4480 if (TARGET_ARCH64)
4481 return gen_save_register_windowdi (increment);
4482 else
4483 return gen_save_register_windowsi (increment);
4484 }
4485
4486 /* Generate an increment for the stack pointer. */
4487
4488 static rtx
4489 gen_stack_pointer_inc (rtx increment)
4490 {
4491 return gen_rtx_SET (VOIDmode,
4492 stack_pointer_rtx,
4493 gen_rtx_PLUS (Pmode,
4494 stack_pointer_rtx,
4495 increment));
4496 }
4497
4498 /* Generate a decrement for the stack pointer. */
4499
4500 static rtx
4501 gen_stack_pointer_dec (rtx decrement)
4502 {
4503 return gen_rtx_SET (VOIDmode,
4504 stack_pointer_rtx,
4505 gen_rtx_MINUS (Pmode,
4506 stack_pointer_rtx,
4507 decrement));
4508 }
4509
4510 /* Expand the function prologue. The prologue is responsible for reserving
4511 storage for the frame, saving the call-saved registers and loading the
4512 GOT register if needed. */
4513
4514 void
4515 sparc_expand_prologue (void)
4516 {
4517 rtx insn;
4518 int i;
4519
4520 /* Compute a snapshot of current_function_uses_only_leaf_regs. Relying
4521 on the final value of the flag means deferring the prologue/epilogue
4522 expansion until just before the second scheduling pass, which is too
4523 late to emit multiple epilogues or return insns.
4524
4525 Of course we are making the assumption that the value of the flag
4526 will not change between now and its final value. Of the three parts
4527 of the formula, only the last one can reasonably vary. Let's take a
4528 closer look, after assuming that the first two ones are set to true
4529 (otherwise the last value is effectively silenced).
4530
4531 If only_leaf_regs_used returns false, the global predicate will also
4532 be false so the actual frame size calculated below will be positive.
4533 As a consequence, the save_register_window insn will be emitted in
4534 the instruction stream; now this insn explicitly references %fp
4535 which is not a leaf register so only_leaf_regs_used will always
4536 return false subsequently.
4537
4538 If only_leaf_regs_used returns true, we hope that the subsequent
4539 optimization passes won't cause non-leaf registers to pop up. For
4540 example, the regrename pass has special provisions to not rename to
4541 non-leaf registers in a leaf function. */
4542 sparc_leaf_function_p
4543 = optimize > 0 && leaf_function_p () && only_leaf_regs_used ();
4544
4545 /* Need to use actual_fsize, since we are also allocating
4546 space for our callee (and our own register save area). */
4547 actual_fsize
4548 = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
4549
4550 /* Advertise that the data calculated just above are now valid. */
4551 sparc_prologue_data_valid_p = true;
4552
4553 if (flag_stack_usage)
4554 current_function_static_stack_size = actual_fsize;
4555
4556 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && actual_fsize)
4557 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, actual_fsize);
4558
4559 if (sparc_leaf_function_p)
4560 {
4561 frame_base_reg = stack_pointer_rtx;
4562 frame_base_offset = actual_fsize + SPARC_STACK_BIAS;
4563 }
4564 else
4565 {
4566 frame_base_reg = hard_frame_pointer_rtx;
4567 frame_base_offset = SPARC_STACK_BIAS;
4568 }
4569
4570 if (actual_fsize == 0)
4571 /* do nothing. */ ;
4572 else if (sparc_leaf_function_p)
4573 {
4574 if (actual_fsize <= 4096)
4575 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-actual_fsize)));
4576 else if (actual_fsize <= 8192)
4577 {
4578 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
4579 /* %sp is still the CFA register. */
4580 RTX_FRAME_RELATED_P (insn) = 1;
4581 insn
4582 = emit_insn (gen_stack_pointer_inc (GEN_INT (4096-actual_fsize)));
4583 }
4584 else
4585 {
4586 rtx reg = gen_rtx_REG (Pmode, 1);
4587 emit_move_insn (reg, GEN_INT (-actual_fsize));
4588 insn = emit_insn (gen_stack_pointer_inc (reg));
4589 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4590 gen_stack_pointer_inc (GEN_INT (-actual_fsize)));
4591 }
4592
4593 RTX_FRAME_RELATED_P (insn) = 1;
4594 }
4595 else
4596 {
4597 if (actual_fsize <= 4096)
4598 insn = emit_insn (gen_save_register_window (GEN_INT (-actual_fsize)));
4599 else if (actual_fsize <= 8192)
4600 {
4601 insn = emit_insn (gen_save_register_window (GEN_INT (-4096)));
4602 /* %sp is not the CFA register anymore. */
4603 emit_insn (gen_stack_pointer_inc (GEN_INT (4096-actual_fsize)));
4604 }
4605 else
4606 {
4607 rtx reg = gen_rtx_REG (Pmode, 1);
4608 emit_move_insn (reg, GEN_INT (-actual_fsize));
4609 insn = emit_insn (gen_save_register_window (reg));
4610 }
4611
4612 RTX_FRAME_RELATED_P (insn) = 1;
4613 for (i=0; i < XVECLEN (PATTERN (insn), 0); i++)
4614 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, i)) = 1;
4615 }
4616
4617 if (num_gfregs)
4618 emit_save_or_restore_regs (SORR_SAVE);
4619
4620 /* Load the GOT register if needed. */
4621 if (crtl->uses_pic_offset_table)
4622 load_got_register ();
4623 }
4624
4625 /* This function generates the assembly code for function entry, which boils
4626 down to emitting the necessary .register directives. */
4627
4628 static void
4629 sparc_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4630 {
4631 /* Check that the assumption we made in sparc_expand_prologue is valid. */
4632 gcc_assert (sparc_leaf_function_p == current_function_uses_only_leaf_regs);
4633
4634 sparc_output_scratch_registers (file);
4635 }
4636
4637 /* Expand the function epilogue, either normal or part of a sibcall.
4638 We emit all the instructions except the return or the call. */
4639
4640 void
4641 sparc_expand_epilogue (void)
4642 {
4643 if (num_gfregs)
4644 emit_save_or_restore_regs (SORR_RESTORE);
4645
4646 if (actual_fsize == 0)
4647 /* do nothing. */ ;
4648 else if (sparc_leaf_function_p)
4649 {
4650 if (actual_fsize <= 4096)
4651 emit_insn (gen_stack_pointer_dec (GEN_INT (- actual_fsize)));
4652 else if (actual_fsize <= 8192)
4653 {
4654 emit_insn (gen_stack_pointer_dec (GEN_INT (-4096)));
4655 emit_insn (gen_stack_pointer_dec (GEN_INT (4096 - actual_fsize)));
4656 }
4657 else
4658 {
4659 rtx reg = gen_rtx_REG (Pmode, 1);
4660 emit_move_insn (reg, GEN_INT (-actual_fsize));
4661 emit_insn (gen_stack_pointer_dec (reg));
4662 }
4663 }
4664 }
4665
4666 /* Return true if it is appropriate to emit `return' instructions in the
4667 body of a function. */
4668
4669 bool
4670 sparc_can_use_return_insn_p (void)
4671 {
4672 return sparc_prologue_data_valid_p
4673 && (actual_fsize == 0 || !sparc_leaf_function_p);
4674 }
4675
4676 /* This function generates the assembly code for function exit. */
4677
4678 static void
4679 sparc_asm_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4680 {
4681 /* If the last two instructions of a function are "call foo; dslot;"
4682 the return address might point to the first instruction in the next
4683 function and we have to output a dummy nop for the sake of sane
4684 backtraces in such cases. This is pointless for sibling calls since
4685 the return address is explicitly adjusted. */
4686
4687 rtx insn, last_real_insn;
4688
4689 insn = get_last_insn ();
4690
4691 last_real_insn = prev_real_insn (insn);
4692 if (last_real_insn
4693 && GET_CODE (last_real_insn) == INSN
4694 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
4695 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
4696
4697 if (last_real_insn
4698 && CALL_P (last_real_insn)
4699 && !SIBLING_CALL_P (last_real_insn))
4700 fputs("\tnop\n", file);
4701
4702 sparc_output_deferred_case_vectors ();
4703 }
4704
4705 /* Output a 'restore' instruction. */
4706
4707 static void
4708 output_restore (rtx pat)
4709 {
4710 rtx operands[3];
4711
4712 if (! pat)
4713 {
4714 fputs ("\t restore\n", asm_out_file);
4715 return;
4716 }
4717
4718 gcc_assert (GET_CODE (pat) == SET);
4719
4720 operands[0] = SET_DEST (pat);
4721 pat = SET_SRC (pat);
4722
4723 switch (GET_CODE (pat))
4724 {
4725 case PLUS:
4726 operands[1] = XEXP (pat, 0);
4727 operands[2] = XEXP (pat, 1);
4728 output_asm_insn (" restore %r1, %2, %Y0", operands);
4729 break;
4730 case LO_SUM:
4731 operands[1] = XEXP (pat, 0);
4732 operands[2] = XEXP (pat, 1);
4733 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
4734 break;
4735 case ASHIFT:
4736 operands[1] = XEXP (pat, 0);
4737 gcc_assert (XEXP (pat, 1) == const1_rtx);
4738 output_asm_insn (" restore %r1, %r1, %Y0", operands);
4739 break;
4740 default:
4741 operands[1] = pat;
4742 output_asm_insn (" restore %%g0, %1, %Y0", operands);
4743 break;
4744 }
4745 }
4746
4747 /* Output a return. */
4748
4749 const char *
4750 output_return (rtx insn)
4751 {
4752 if (sparc_leaf_function_p)
4753 {
4754 /* This is a leaf function so we don't have to bother restoring the
4755 register window, which frees us from dealing with the convoluted
4756 semantics of restore/return. We simply output the jump to the
4757 return address and the insn in the delay slot (if any). */
4758
4759 gcc_assert (! crtl->calls_eh_return);
4760
4761 return "jmp\t%%o7+%)%#";
4762 }
4763 else
4764 {
4765 /* This is a regular function so we have to restore the register window.
4766 We may have a pending insn for the delay slot, which will be either
4767 combined with the 'restore' instruction or put in the delay slot of
4768 the 'return' instruction. */
4769
4770 if (crtl->calls_eh_return)
4771 {
4772 /* If the function uses __builtin_eh_return, the eh_return
4773 machinery occupies the delay slot. */
4774 gcc_assert (! final_sequence);
4775
4776 if (! flag_delayed_branch)
4777 fputs ("\tadd\t%fp, %g1, %fp\n", asm_out_file);
4778
4779 if (TARGET_V9)
4780 fputs ("\treturn\t%i7+8\n", asm_out_file);
4781 else
4782 fputs ("\trestore\n\tjmp\t%o7+8\n", asm_out_file);
4783
4784 if (flag_delayed_branch)
4785 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
4786 else
4787 fputs ("\t nop\n", asm_out_file);
4788 }
4789 else if (final_sequence)
4790 {
4791 rtx delay, pat;
4792
4793 delay = NEXT_INSN (insn);
4794 gcc_assert (delay);
4795
4796 pat = PATTERN (delay);
4797
4798 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
4799 {
4800 epilogue_renumber (&pat, 0);
4801 return "return\t%%i7+%)%#";
4802 }
4803 else
4804 {
4805 output_asm_insn ("jmp\t%%i7+%)", NULL);
4806 output_restore (pat);
4807 PATTERN (delay) = gen_blockage ();
4808 INSN_CODE (delay) = -1;
4809 }
4810 }
4811 else
4812 {
4813 /* The delay slot is empty. */
4814 if (TARGET_V9)
4815 return "return\t%%i7+%)\n\t nop";
4816 else if (flag_delayed_branch)
4817 return "jmp\t%%i7+%)\n\t restore";
4818 else
4819 return "restore\n\tjmp\t%%o7+%)\n\t nop";
4820 }
4821 }
4822
4823 return "";
4824 }
4825
4826 /* Output a sibling call. */
4827
4828 const char *
4829 output_sibcall (rtx insn, rtx call_operand)
4830 {
4831 rtx operands[1];
4832
4833 gcc_assert (flag_delayed_branch);
4834
4835 operands[0] = call_operand;
4836
4837 if (sparc_leaf_function_p)
4838 {
4839 /* This is a leaf function so we don't have to bother restoring the
4840 register window. We simply output the jump to the function and
4841 the insn in the delay slot (if any). */
4842
4843 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
4844
4845 if (final_sequence)
4846 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
4847 operands);
4848 else
4849 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
4850 it into branch if possible. */
4851 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
4852 operands);
4853 }
4854 else
4855 {
4856 /* This is a regular function so we have to restore the register window.
4857 We may have a pending insn for the delay slot, which will be combined
4858 with the 'restore' instruction. */
4859
4860 output_asm_insn ("call\t%a0, 0", operands);
4861
4862 if (final_sequence)
4863 {
4864 rtx delay = NEXT_INSN (insn);
4865 gcc_assert (delay);
4866
4867 output_restore (PATTERN (delay));
4868
4869 PATTERN (delay) = gen_blockage ();
4870 INSN_CODE (delay) = -1;
4871 }
4872 else
4873 output_restore (NULL_RTX);
4874 }
4875
4876 return "";
4877 }
4878 \f
4879 /* Functions for handling argument passing.
4880
4881 For 32-bit, the first 6 args are normally in registers and the rest are
4882 pushed. Any arg that starts within the first 6 words is at least
4883 partially passed in a register unless its data type forbids.
4884
4885 For 64-bit, the argument registers are laid out as an array of 16 elements
4886 and arguments are added sequentially. The first 6 int args and up to the
4887 first 16 fp args (depending on size) are passed in regs.
4888
4889 Slot Stack Integral Float Float in structure Double Long Double
4890 ---- ----- -------- ----- ------------------ ------ -----------
4891 15 [SP+248] %f31 %f30,%f31 %d30
4892 14 [SP+240] %f29 %f28,%f29 %d28 %q28
4893 13 [SP+232] %f27 %f26,%f27 %d26
4894 12 [SP+224] %f25 %f24,%f25 %d24 %q24
4895 11 [SP+216] %f23 %f22,%f23 %d22
4896 10 [SP+208] %f21 %f20,%f21 %d20 %q20
4897 9 [SP+200] %f19 %f18,%f19 %d18
4898 8 [SP+192] %f17 %f16,%f17 %d16 %q16
4899 7 [SP+184] %f15 %f14,%f15 %d14
4900 6 [SP+176] %f13 %f12,%f13 %d12 %q12
4901 5 [SP+168] %o5 %f11 %f10,%f11 %d10
4902 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
4903 3 [SP+152] %o3 %f7 %f6,%f7 %d6
4904 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
4905 1 [SP+136] %o1 %f3 %f2,%f3 %d2
4906 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
4907
4908 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
4909
4910 Integral arguments are always passed as 64-bit quantities appropriately
4911 extended.
4912
4913 Passing of floating point values is handled as follows.
4914 If a prototype is in scope:
4915 If the value is in a named argument (i.e. not a stdarg function or a
4916 value not part of the `...') then the value is passed in the appropriate
4917 fp reg.
4918 If the value is part of the `...' and is passed in one of the first 6
4919 slots then the value is passed in the appropriate int reg.
4920 If the value is part of the `...' and is not passed in one of the first 6
4921 slots then the value is passed in memory.
4922 If a prototype is not in scope:
4923 If the value is one of the first 6 arguments the value is passed in the
4924 appropriate integer reg and the appropriate fp reg.
4925 If the value is not one of the first 6 arguments the value is passed in
4926 the appropriate fp reg and in memory.
4927
4928
4929 Summary of the calling conventions implemented by GCC on the SPARC:
4930
4931 32-bit ABI:
4932 size argument return value
4933
4934 small integer <4 int. reg. int. reg.
4935 word 4 int. reg. int. reg.
4936 double word 8 int. reg. int. reg.
4937
4938 _Complex small integer <8 int. reg. int. reg.
4939 _Complex word 8 int. reg. int. reg.
4940 _Complex double word 16 memory int. reg.
4941
4942 vector integer <=8 int. reg. FP reg.
4943 vector integer >8 memory memory
4944
4945 float 4 int. reg. FP reg.
4946 double 8 int. reg. FP reg.
4947 long double 16 memory memory
4948
4949 _Complex float 8 memory FP reg.
4950 _Complex double 16 memory FP reg.
4951 _Complex long double 32 memory FP reg.
4952
4953 vector float any memory memory
4954
4955 aggregate any memory memory
4956
4957
4958
4959 64-bit ABI:
4960 size argument return value
4961
4962 small integer <8 int. reg. int. reg.
4963 word 8 int. reg. int. reg.
4964 double word 16 int. reg. int. reg.
4965
4966 _Complex small integer <16 int. reg. int. reg.
4967 _Complex word 16 int. reg. int. reg.
4968 _Complex double word 32 memory int. reg.
4969
4970 vector integer <=16 FP reg. FP reg.
4971 vector integer 16<s<=32 memory FP reg.
4972 vector integer >32 memory memory
4973
4974 float 4 FP reg. FP reg.
4975 double 8 FP reg. FP reg.
4976 long double 16 FP reg. FP reg.
4977
4978 _Complex float 8 FP reg. FP reg.
4979 _Complex double 16 FP reg. FP reg.
4980 _Complex long double 32 memory FP reg.
4981
4982 vector float <=16 FP reg. FP reg.
4983 vector float 16<s<=32 memory FP reg.
4984 vector float >32 memory memory
4985
4986 aggregate <=16 reg. reg.
4987 aggregate 16<s<=32 memory reg.
4988 aggregate >32 memory memory
4989
4990
4991
4992 Note #1: complex floating-point types follow the extended SPARC ABIs as
4993 implemented by the Sun compiler.
4994
4995 Note #2: integral vector types follow the scalar floating-point types
4996 conventions to match what is implemented by the Sun VIS SDK.
4997
4998 Note #3: floating-point vector types follow the aggregate types
4999 conventions. */
5000
5001
5002 /* Maximum number of int regs for args. */
5003 #define SPARC_INT_ARG_MAX 6
5004 /* Maximum number of fp regs for args. */
5005 #define SPARC_FP_ARG_MAX 16
5006
5007 #define ROUND_ADVANCE(SIZE) (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
5008
5009 /* Handle the INIT_CUMULATIVE_ARGS macro.
5010 Initialize a variable CUM of type CUMULATIVE_ARGS
5011 for a call to a function whose data type is FNTYPE.
5012 For a library call, FNTYPE is 0. */
5013
5014 void
5015 init_cumulative_args (struct sparc_args *cum, tree fntype,
5016 rtx libname ATTRIBUTE_UNUSED,
5017 tree fndecl ATTRIBUTE_UNUSED)
5018 {
5019 cum->words = 0;
5020 cum->prototype_p = fntype && TYPE_ARG_TYPES (fntype);
5021 cum->libcall_p = fntype == 0;
5022 }
5023
5024 /* Handle promotion of pointer and integer arguments. */
5025
5026 static enum machine_mode
5027 sparc_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
5028 enum machine_mode mode,
5029 int *punsignedp ATTRIBUTE_UNUSED,
5030 const_tree fntype ATTRIBUTE_UNUSED,
5031 int for_return ATTRIBUTE_UNUSED)
5032 {
5033 if (POINTER_TYPE_P (type))
5034 {
5035 *punsignedp = POINTERS_EXTEND_UNSIGNED;
5036 return Pmode;
5037 }
5038
5039 /* Integral arguments are passed as full words, as per the ABI. */
5040 if (GET_MODE_CLASS (mode) == MODE_INT
5041 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5042 return word_mode;
5043
5044 return mode;
5045 }
5046
5047 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
5048
5049 static bool
5050 sparc_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
5051 {
5052 return TARGET_ARCH64 ? true : false;
5053 }
5054
5055 /* Scan the record type TYPE and return the following predicates:
5056 - INTREGS_P: the record contains at least one field or sub-field
5057 that is eligible for promotion in integer registers.
5058 - FP_REGS_P: the record contains at least one field or sub-field
5059 that is eligible for promotion in floating-point registers.
5060 - PACKED_P: the record contains at least one field that is packed.
5061
5062 Sub-fields are not taken into account for the PACKED_P predicate. */
5063
5064 static void
5065 scan_record_type (const_tree type, int *intregs_p, int *fpregs_p,
5066 int *packed_p)
5067 {
5068 tree field;
5069
5070 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5071 {
5072 if (TREE_CODE (field) == FIELD_DECL)
5073 {
5074 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
5075 scan_record_type (TREE_TYPE (field), intregs_p, fpregs_p, 0);
5076 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
5077 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
5078 && TARGET_FPU)
5079 *fpregs_p = 1;
5080 else
5081 *intregs_p = 1;
5082
5083 if (packed_p && DECL_PACKED (field))
5084 *packed_p = 1;
5085 }
5086 }
5087 }
5088
5089 /* Compute the slot number to pass an argument in.
5090 Return the slot number or -1 if passing on the stack.
5091
5092 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5093 the preceding args and about the function being called.
5094 MODE is the argument's machine mode.
5095 TYPE is the data type of the argument (as a tree).
5096 This is null for libcalls where that information may
5097 not be available.
5098 NAMED is nonzero if this argument is a named parameter
5099 (otherwise it is an extra parameter matching an ellipsis).
5100 INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
5101 *PREGNO records the register number to use if scalar type.
5102 *PPADDING records the amount of padding needed in words. */
5103
5104 static int
5105 function_arg_slotno (const struct sparc_args *cum, enum machine_mode mode,
5106 const_tree type, bool named, bool incoming_p,
5107 int *pregno, int *ppadding)
5108 {
5109 int regbase = (incoming_p
5110 ? SPARC_INCOMING_INT_ARG_FIRST
5111 : SPARC_OUTGOING_INT_ARG_FIRST);
5112 int slotno = cum->words;
5113 enum mode_class mclass;
5114 int regno;
5115
5116 *ppadding = 0;
5117
5118 if (type && TREE_ADDRESSABLE (type))
5119 return -1;
5120
5121 if (TARGET_ARCH32
5122 && mode == BLKmode
5123 && type
5124 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
5125 return -1;
5126
5127 /* For SPARC64, objects requiring 16-byte alignment get it. */
5128 if (TARGET_ARCH64
5129 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
5130 && (slotno & 1) != 0)
5131 slotno++, *ppadding = 1;
5132
5133 mclass = GET_MODE_CLASS (mode);
5134 if (type && TREE_CODE (type) == VECTOR_TYPE)
5135 {
5136 /* Vector types deserve special treatment because they are
5137 polymorphic wrt their mode, depending upon whether VIS
5138 instructions are enabled. */
5139 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5140 {
5141 /* The SPARC port defines no floating-point vector modes. */
5142 gcc_assert (mode == BLKmode);
5143 }
5144 else
5145 {
5146 /* Integral vector types should either have a vector
5147 mode or an integral mode, because we are guaranteed
5148 by pass_by_reference that their size is not greater
5149 than 16 bytes and TImode is 16-byte wide. */
5150 gcc_assert (mode != BLKmode);
5151
5152 /* Vector integers are handled like floats according to
5153 the Sun VIS SDK. */
5154 mclass = MODE_FLOAT;
5155 }
5156 }
5157
5158 switch (mclass)
5159 {
5160 case MODE_FLOAT:
5161 case MODE_COMPLEX_FLOAT:
5162 case MODE_VECTOR_INT:
5163 if (TARGET_ARCH64 && TARGET_FPU && named)
5164 {
5165 if (slotno >= SPARC_FP_ARG_MAX)
5166 return -1;
5167 regno = SPARC_FP_ARG_FIRST + slotno * 2;
5168 /* Arguments filling only one single FP register are
5169 right-justified in the outer double FP register. */
5170 if (GET_MODE_SIZE (mode) <= 4)
5171 regno++;
5172 break;
5173 }
5174 /* fallthrough */
5175
5176 case MODE_INT:
5177 case MODE_COMPLEX_INT:
5178 if (slotno >= SPARC_INT_ARG_MAX)
5179 return -1;
5180 regno = regbase + slotno;
5181 break;
5182
5183 case MODE_RANDOM:
5184 if (mode == VOIDmode)
5185 /* MODE is VOIDmode when generating the actual call. */
5186 return -1;
5187
5188 gcc_assert (mode == BLKmode);
5189
5190 if (TARGET_ARCH32
5191 || !type
5192 || (TREE_CODE (type) != VECTOR_TYPE
5193 && TREE_CODE (type) != RECORD_TYPE))
5194 {
5195 if (slotno >= SPARC_INT_ARG_MAX)
5196 return -1;
5197 regno = regbase + slotno;
5198 }
5199 else /* TARGET_ARCH64 && type */
5200 {
5201 int intregs_p = 0, fpregs_p = 0, packed_p = 0;
5202
5203 /* First see what kinds of registers we would need. */
5204 if (TREE_CODE (type) == VECTOR_TYPE)
5205 fpregs_p = 1;
5206 else
5207 scan_record_type (type, &intregs_p, &fpregs_p, &packed_p);
5208
5209 /* The ABI obviously doesn't specify how packed structures
5210 are passed. These are defined to be passed in int regs
5211 if possible, otherwise memory. */
5212 if (packed_p || !named)
5213 fpregs_p = 0, intregs_p = 1;
5214
5215 /* If all arg slots are filled, then must pass on stack. */
5216 if (fpregs_p && slotno >= SPARC_FP_ARG_MAX)
5217 return -1;
5218
5219 /* If there are only int args and all int arg slots are filled,
5220 then must pass on stack. */
5221 if (!fpregs_p && intregs_p && slotno >= SPARC_INT_ARG_MAX)
5222 return -1;
5223
5224 /* Note that even if all int arg slots are filled, fp members may
5225 still be passed in regs if such regs are available.
5226 *PREGNO isn't set because there may be more than one, it's up
5227 to the caller to compute them. */
5228 return slotno;
5229 }
5230 break;
5231
5232 default :
5233 gcc_unreachable ();
5234 }
5235
5236 *pregno = regno;
5237 return slotno;
5238 }
5239
5240 /* Handle recursive register counting for structure field layout. */
5241
5242 struct function_arg_record_value_parms
5243 {
5244 rtx ret; /* return expression being built. */
5245 int slotno; /* slot number of the argument. */
5246 int named; /* whether the argument is named. */
5247 int regbase; /* regno of the base register. */
5248 int stack; /* 1 if part of the argument is on the stack. */
5249 int intoffset; /* offset of the first pending integer field. */
5250 unsigned int nregs; /* number of words passed in registers. */
5251 };
5252
5253 static void function_arg_record_value_3
5254 (HOST_WIDE_INT, struct function_arg_record_value_parms *);
5255 static void function_arg_record_value_2
5256 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
5257 static void function_arg_record_value_1
5258 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
5259 static rtx function_arg_record_value (const_tree, enum machine_mode, int, int, int);
5260 static rtx function_arg_union_value (int, enum machine_mode, int, int);
5261
5262 /* A subroutine of function_arg_record_value. Traverse the structure
5263 recursively and determine how many registers will be required. */
5264
5265 static void
5266 function_arg_record_value_1 (const_tree type, HOST_WIDE_INT startbitpos,
5267 struct function_arg_record_value_parms *parms,
5268 bool packed_p)
5269 {
5270 tree field;
5271
5272 /* We need to compute how many registers are needed so we can
5273 allocate the PARALLEL but before we can do that we need to know
5274 whether there are any packed fields. The ABI obviously doesn't
5275 specify how structures are passed in this case, so they are
5276 defined to be passed in int regs if possible, otherwise memory,
5277 regardless of whether there are fp values present. */
5278
5279 if (! packed_p)
5280 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5281 {
5282 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
5283 {
5284 packed_p = true;
5285 break;
5286 }
5287 }
5288
5289 /* Compute how many registers we need. */
5290 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5291 {
5292 if (TREE_CODE (field) == FIELD_DECL)
5293 {
5294 HOST_WIDE_INT bitpos = startbitpos;
5295
5296 if (DECL_SIZE (field) != 0)
5297 {
5298 if (integer_zerop (DECL_SIZE (field)))
5299 continue;
5300
5301 if (host_integerp (bit_position (field), 1))
5302 bitpos += int_bit_position (field);
5303 }
5304
5305 /* ??? FIXME: else assume zero offset. */
5306
5307 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
5308 function_arg_record_value_1 (TREE_TYPE (field),
5309 bitpos,
5310 parms,
5311 packed_p);
5312 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
5313 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
5314 && TARGET_FPU
5315 && parms->named
5316 && ! packed_p)
5317 {
5318 if (parms->intoffset != -1)
5319 {
5320 unsigned int startbit, endbit;
5321 int intslots, this_slotno;
5322
5323 startbit = parms->intoffset & -BITS_PER_WORD;
5324 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
5325
5326 intslots = (endbit - startbit) / BITS_PER_WORD;
5327 this_slotno = parms->slotno + parms->intoffset
5328 / BITS_PER_WORD;
5329
5330 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
5331 {
5332 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
5333 /* We need to pass this field on the stack. */
5334 parms->stack = 1;
5335 }
5336
5337 parms->nregs += intslots;
5338 parms->intoffset = -1;
5339 }
5340
5341 /* There's no need to check this_slotno < SPARC_FP_ARG MAX.
5342 If it wasn't true we wouldn't be here. */
5343 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
5344 && DECL_MODE (field) == BLKmode)
5345 parms->nregs += TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
5346 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
5347 parms->nregs += 2;
5348 else
5349 parms->nregs += 1;
5350 }
5351 else
5352 {
5353 if (parms->intoffset == -1)
5354 parms->intoffset = bitpos;
5355 }
5356 }
5357 }
5358 }
5359
5360 /* A subroutine of function_arg_record_value. Assign the bits of the
5361 structure between parms->intoffset and bitpos to integer registers. */
5362
5363 static void
5364 function_arg_record_value_3 (HOST_WIDE_INT bitpos,
5365 struct function_arg_record_value_parms *parms)
5366 {
5367 enum machine_mode mode;
5368 unsigned int regno;
5369 unsigned int startbit, endbit;
5370 int this_slotno, intslots, intoffset;
5371 rtx reg;
5372
5373 if (parms->intoffset == -1)
5374 return;
5375
5376 intoffset = parms->intoffset;
5377 parms->intoffset = -1;
5378
5379 startbit = intoffset & -BITS_PER_WORD;
5380 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
5381 intslots = (endbit - startbit) / BITS_PER_WORD;
5382 this_slotno = parms->slotno + intoffset / BITS_PER_WORD;
5383
5384 intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
5385 if (intslots <= 0)
5386 return;
5387
5388 /* If this is the trailing part of a word, only load that much into
5389 the register. Otherwise load the whole register. Note that in
5390 the latter case we may pick up unwanted bits. It's not a problem
5391 at the moment but may wish to revisit. */
5392
5393 if (intoffset % BITS_PER_WORD != 0)
5394 mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
5395 MODE_INT);
5396 else
5397 mode = word_mode;
5398
5399 intoffset /= BITS_PER_UNIT;
5400 do
5401 {
5402 regno = parms->regbase + this_slotno;
5403 reg = gen_rtx_REG (mode, regno);
5404 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
5405 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
5406
5407 this_slotno += 1;
5408 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
5409 mode = word_mode;
5410 parms->nregs += 1;
5411 intslots -= 1;
5412 }
5413 while (intslots > 0);
5414 }
5415
5416 /* A subroutine of function_arg_record_value. Traverse the structure
5417 recursively and assign bits to floating point registers. Track which
5418 bits in between need integer registers; invoke function_arg_record_value_3
5419 to make that happen. */
5420
5421 static void
5422 function_arg_record_value_2 (const_tree type, HOST_WIDE_INT startbitpos,
5423 struct function_arg_record_value_parms *parms,
5424 bool packed_p)
5425 {
5426 tree field;
5427
5428 if (! packed_p)
5429 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5430 {
5431 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
5432 {
5433 packed_p = true;
5434 break;
5435 }
5436 }
5437
5438 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5439 {
5440 if (TREE_CODE (field) == FIELD_DECL)
5441 {
5442 HOST_WIDE_INT bitpos = startbitpos;
5443
5444 if (DECL_SIZE (field) != 0)
5445 {
5446 if (integer_zerop (DECL_SIZE (field)))
5447 continue;
5448
5449 if (host_integerp (bit_position (field), 1))
5450 bitpos += int_bit_position (field);
5451 }
5452
5453 /* ??? FIXME: else assume zero offset. */
5454
5455 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
5456 function_arg_record_value_2 (TREE_TYPE (field),
5457 bitpos,
5458 parms,
5459 packed_p);
5460 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
5461 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
5462 && TARGET_FPU
5463 && parms->named
5464 && ! packed_p)
5465 {
5466 int this_slotno = parms->slotno + bitpos / BITS_PER_WORD;
5467 int regno, nregs, pos;
5468 enum machine_mode mode = DECL_MODE (field);
5469 rtx reg;
5470
5471 function_arg_record_value_3 (bitpos, parms);
5472
5473 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
5474 && mode == BLKmode)
5475 {
5476 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
5477 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
5478 }
5479 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
5480 {
5481 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
5482 nregs = 2;
5483 }
5484 else
5485 nregs = 1;
5486
5487 regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
5488 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
5489 regno++;
5490 reg = gen_rtx_REG (mode, regno);
5491 pos = bitpos / BITS_PER_UNIT;
5492 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
5493 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
5494 parms->nregs += 1;
5495 while (--nregs > 0)
5496 {
5497 regno += GET_MODE_SIZE (mode) / 4;
5498 reg = gen_rtx_REG (mode, regno);
5499 pos += GET_MODE_SIZE (mode);
5500 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
5501 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
5502 parms->nregs += 1;
5503 }
5504 }
5505 else
5506 {
5507 if (parms->intoffset == -1)
5508 parms->intoffset = bitpos;
5509 }
5510 }
5511 }
5512 }
5513
5514 /* Used by function_arg and sparc_function_value_1 to implement the complex
5515 conventions of the 64-bit ABI for passing and returning structures.
5516 Return an expression valid as a return value for the FUNCTION_ARG
5517 and TARGET_FUNCTION_VALUE.
5518
5519 TYPE is the data type of the argument (as a tree).
5520 This is null for libcalls where that information may
5521 not be available.
5522 MODE is the argument's machine mode.
5523 SLOTNO is the index number of the argument's slot in the parameter array.
5524 NAMED is nonzero if this argument is a named parameter
5525 (otherwise it is an extra parameter matching an ellipsis).
5526 REGBASE is the regno of the base register for the parameter array. */
5527
5528 static rtx
5529 function_arg_record_value (const_tree type, enum machine_mode mode,
5530 int slotno, int named, int regbase)
5531 {
5532 HOST_WIDE_INT typesize = int_size_in_bytes (type);
5533 struct function_arg_record_value_parms parms;
5534 unsigned int nregs;
5535
5536 parms.ret = NULL_RTX;
5537 parms.slotno = slotno;
5538 parms.named = named;
5539 parms.regbase = regbase;
5540 parms.stack = 0;
5541
5542 /* Compute how many registers we need. */
5543 parms.nregs = 0;
5544 parms.intoffset = 0;
5545 function_arg_record_value_1 (type, 0, &parms, false);
5546
5547 /* Take into account pending integer fields. */
5548 if (parms.intoffset != -1)
5549 {
5550 unsigned int startbit, endbit;
5551 int intslots, this_slotno;
5552
5553 startbit = parms.intoffset & -BITS_PER_WORD;
5554 endbit = (typesize*BITS_PER_UNIT + BITS_PER_WORD - 1) & -BITS_PER_WORD;
5555 intslots = (endbit - startbit) / BITS_PER_WORD;
5556 this_slotno = slotno + parms.intoffset / BITS_PER_WORD;
5557
5558 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
5559 {
5560 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
5561 /* We need to pass this field on the stack. */
5562 parms.stack = 1;
5563 }
5564
5565 parms.nregs += intslots;
5566 }
5567 nregs = parms.nregs;
5568
5569 /* Allocate the vector and handle some annoying special cases. */
5570 if (nregs == 0)
5571 {
5572 /* ??? Empty structure has no value? Duh? */
5573 if (typesize <= 0)
5574 {
5575 /* Though there's nothing really to store, return a word register
5576 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
5577 leads to breakage due to the fact that there are zero bytes to
5578 load. */
5579 return gen_rtx_REG (mode, regbase);
5580 }
5581 else
5582 {
5583 /* ??? C++ has structures with no fields, and yet a size. Give up
5584 for now and pass everything back in integer registers. */
5585 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5586 }
5587 if (nregs + slotno > SPARC_INT_ARG_MAX)
5588 nregs = SPARC_INT_ARG_MAX - slotno;
5589 }
5590 gcc_assert (nregs != 0);
5591
5592 parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (parms.stack + nregs));
5593
5594 /* If at least one field must be passed on the stack, generate
5595 (parallel [(expr_list (nil) ...) ...]) so that all fields will
5596 also be passed on the stack. We can't do much better because the
5597 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
5598 of structures for which the fields passed exclusively in registers
5599 are not at the beginning of the structure. */
5600 if (parms.stack)
5601 XVECEXP (parms.ret, 0, 0)
5602 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
5603
5604 /* Fill in the entries. */
5605 parms.nregs = 0;
5606 parms.intoffset = 0;
5607 function_arg_record_value_2 (type, 0, &parms, false);
5608 function_arg_record_value_3 (typesize * BITS_PER_UNIT, &parms);
5609
5610 gcc_assert (parms.nregs == nregs);
5611
5612 return parms.ret;
5613 }
5614
5615 /* Used by function_arg and sparc_function_value_1 to implement the conventions
5616 of the 64-bit ABI for passing and returning unions.
5617 Return an expression valid as a return value for the FUNCTION_ARG
5618 and TARGET_FUNCTION_VALUE.
5619
5620 SIZE is the size in bytes of the union.
5621 MODE is the argument's machine mode.
5622 REGNO is the hard register the union will be passed in. */
5623
5624 static rtx
5625 function_arg_union_value (int size, enum machine_mode mode, int slotno,
5626 int regno)
5627 {
5628 int nwords = ROUND_ADVANCE (size), i;
5629 rtx regs;
5630
5631 /* See comment in previous function for empty structures. */
5632 if (nwords == 0)
5633 return gen_rtx_REG (mode, regno);
5634
5635 if (slotno == SPARC_INT_ARG_MAX - 1)
5636 nwords = 1;
5637
5638 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
5639
5640 for (i = 0; i < nwords; i++)
5641 {
5642 /* Unions are passed left-justified. */
5643 XVECEXP (regs, 0, i)
5644 = gen_rtx_EXPR_LIST (VOIDmode,
5645 gen_rtx_REG (word_mode, regno),
5646 GEN_INT (UNITS_PER_WORD * i));
5647 regno++;
5648 }
5649
5650 return regs;
5651 }
5652
5653 /* Used by function_arg and sparc_function_value_1 to implement the conventions
5654 for passing and returning large (BLKmode) vectors.
5655 Return an expression valid as a return value for the FUNCTION_ARG
5656 and TARGET_FUNCTION_VALUE.
5657
5658 SIZE is the size in bytes of the vector (at least 8 bytes).
5659 REGNO is the FP hard register the vector will be passed in. */
5660
5661 static rtx
5662 function_arg_vector_value (int size, int regno)
5663 {
5664 int i, nregs = size / 8;
5665 rtx regs;
5666
5667 regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
5668
5669 for (i = 0; i < nregs; i++)
5670 {
5671 XVECEXP (regs, 0, i)
5672 = gen_rtx_EXPR_LIST (VOIDmode,
5673 gen_rtx_REG (DImode, regno + 2*i),
5674 GEN_INT (i*8));
5675 }
5676
5677 return regs;
5678 }
5679
5680 /* Determine where to put an argument to a function.
5681 Value is zero to push the argument on the stack,
5682 or a hard register in which to store the argument.
5683
5684 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5685 the preceding args and about the function being called.
5686 MODE is the argument's machine mode.
5687 TYPE is the data type of the argument (as a tree).
5688 This is null for libcalls where that information may
5689 not be available.
5690 NAMED is true if this argument is a named parameter
5691 (otherwise it is an extra parameter matching an ellipsis).
5692 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
5693 TARGET_FUNCTION_INCOMING_ARG. */
5694
5695 static rtx
5696 sparc_function_arg_1 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
5697 const_tree type, bool named, bool incoming_p)
5698 {
5699 int regbase = (incoming_p
5700 ? SPARC_INCOMING_INT_ARG_FIRST
5701 : SPARC_OUTGOING_INT_ARG_FIRST);
5702 int slotno, regno, padding;
5703 enum mode_class mclass = GET_MODE_CLASS (mode);
5704
5705 slotno = function_arg_slotno (cum, mode, type, named, incoming_p,
5706 &regno, &padding);
5707 if (slotno == -1)
5708 return 0;
5709
5710 /* Vector types deserve special treatment because they are polymorphic wrt
5711 their mode, depending upon whether VIS instructions are enabled. */
5712 if (type && TREE_CODE (type) == VECTOR_TYPE)
5713 {
5714 HOST_WIDE_INT size = int_size_in_bytes (type);
5715 gcc_assert ((TARGET_ARCH32 && size <= 8)
5716 || (TARGET_ARCH64 && size <= 16));
5717
5718 if (mode == BLKmode)
5719 return function_arg_vector_value (size,
5720 SPARC_FP_ARG_FIRST + 2*slotno);
5721 else
5722 mclass = MODE_FLOAT;
5723 }
5724
5725 if (TARGET_ARCH32)
5726 return gen_rtx_REG (mode, regno);
5727
5728 /* Structures up to 16 bytes in size are passed in arg slots on the stack
5729 and are promoted to registers if possible. */
5730 if (type && TREE_CODE (type) == RECORD_TYPE)
5731 {
5732 HOST_WIDE_INT size = int_size_in_bytes (type);
5733 gcc_assert (size <= 16);
5734
5735 return function_arg_record_value (type, mode, slotno, named, regbase);
5736 }
5737
5738 /* Unions up to 16 bytes in size are passed in integer registers. */
5739 else if (type && TREE_CODE (type) == UNION_TYPE)
5740 {
5741 HOST_WIDE_INT size = int_size_in_bytes (type);
5742 gcc_assert (size <= 16);
5743
5744 return function_arg_union_value (size, mode, slotno, regno);
5745 }
5746
5747 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
5748 but also have the slot allocated for them.
5749 If no prototype is in scope fp values in register slots get passed
5750 in two places, either fp regs and int regs or fp regs and memory. */
5751 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
5752 && SPARC_FP_REG_P (regno))
5753 {
5754 rtx reg = gen_rtx_REG (mode, regno);
5755 if (cum->prototype_p || cum->libcall_p)
5756 {
5757 /* "* 2" because fp reg numbers are recorded in 4 byte
5758 quantities. */
5759 #if 0
5760 /* ??? This will cause the value to be passed in the fp reg and
5761 in the stack. When a prototype exists we want to pass the
5762 value in the reg but reserve space on the stack. That's an
5763 optimization, and is deferred [for a bit]. */
5764 if ((regno - SPARC_FP_ARG_FIRST) >= SPARC_INT_ARG_MAX * 2)
5765 return gen_rtx_PARALLEL (mode,
5766 gen_rtvec (2,
5767 gen_rtx_EXPR_LIST (VOIDmode,
5768 NULL_RTX, const0_rtx),
5769 gen_rtx_EXPR_LIST (VOIDmode,
5770 reg, const0_rtx)));
5771 else
5772 #else
5773 /* ??? It seems that passing back a register even when past
5774 the area declared by REG_PARM_STACK_SPACE will allocate
5775 space appropriately, and will not copy the data onto the
5776 stack, exactly as we desire.
5777
5778 This is due to locate_and_pad_parm being called in
5779 expand_call whenever reg_parm_stack_space > 0, which
5780 while beneficial to our example here, would seem to be
5781 in error from what had been intended. Ho hum... -- r~ */
5782 #endif
5783 return reg;
5784 }
5785 else
5786 {
5787 rtx v0, v1;
5788
5789 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
5790 {
5791 int intreg;
5792
5793 /* On incoming, we don't need to know that the value
5794 is passed in %f0 and %i0, and it confuses other parts
5795 causing needless spillage even on the simplest cases. */
5796 if (incoming_p)
5797 return reg;
5798
5799 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
5800 + (regno - SPARC_FP_ARG_FIRST) / 2);
5801
5802 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
5803 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
5804 const0_rtx);
5805 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
5806 }
5807 else
5808 {
5809 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
5810 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
5811 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
5812 }
5813 }
5814 }
5815
5816 /* All other aggregate types are passed in an integer register in a mode
5817 corresponding to the size of the type. */
5818 else if (type && AGGREGATE_TYPE_P (type))
5819 {
5820 HOST_WIDE_INT size = int_size_in_bytes (type);
5821 gcc_assert (size <= 16);
5822
5823 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5824 }
5825
5826 return gen_rtx_REG (mode, regno);
5827 }
5828
5829 /* Handle the TARGET_FUNCTION_ARG target hook. */
5830
5831 static rtx
5832 sparc_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5833 const_tree type, bool named)
5834 {
5835 return sparc_function_arg_1 (cum, mode, type, named, false);
5836 }
5837
5838 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
5839
5840 static rtx
5841 sparc_function_incoming_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5842 const_tree type, bool named)
5843 {
5844 return sparc_function_arg_1 (cum, mode, type, named, true);
5845 }
5846
5847 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
5848
5849 static unsigned int
5850 sparc_function_arg_boundary (enum machine_mode mode, const_tree type)
5851 {
5852 return ((TARGET_ARCH64
5853 && (GET_MODE_ALIGNMENT (mode) == 128
5854 || (type && TYPE_ALIGN (type) == 128)))
5855 ? 128
5856 : PARM_BOUNDARY);
5857 }
5858
5859 /* For an arg passed partly in registers and partly in memory,
5860 this is the number of bytes of registers used.
5861 For args passed entirely in registers or entirely in memory, zero.
5862
5863 Any arg that starts in the first 6 regs but won't entirely fit in them
5864 needs partial registers on v8. On v9, structures with integer
5865 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
5866 values that begin in the last fp reg [where "last fp reg" varies with the
5867 mode] will be split between that reg and memory. */
5868
5869 static int
5870 sparc_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5871 tree type, bool named)
5872 {
5873 int slotno, regno, padding;
5874
5875 /* We pass false for incoming_p here, it doesn't matter. */
5876 slotno = function_arg_slotno (cum, mode, type, named, false,
5877 &regno, &padding);
5878
5879 if (slotno == -1)
5880 return 0;
5881
5882 if (TARGET_ARCH32)
5883 {
5884 if ((slotno + (mode == BLKmode
5885 ? ROUND_ADVANCE (int_size_in_bytes (type))
5886 : ROUND_ADVANCE (GET_MODE_SIZE (mode))))
5887 > SPARC_INT_ARG_MAX)
5888 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
5889 }
5890 else
5891 {
5892 /* We are guaranteed by pass_by_reference that the size of the
5893 argument is not greater than 16 bytes, so we only need to return
5894 one word if the argument is partially passed in registers. */
5895
5896 if (type && AGGREGATE_TYPE_P (type))
5897 {
5898 int size = int_size_in_bytes (type);
5899
5900 if (size > UNITS_PER_WORD
5901 && slotno == SPARC_INT_ARG_MAX - 1)
5902 return UNITS_PER_WORD;
5903 }
5904 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
5905 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
5906 && ! (TARGET_FPU && named)))
5907 {
5908 /* The complex types are passed as packed types. */
5909 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
5910 && slotno == SPARC_INT_ARG_MAX - 1)
5911 return UNITS_PER_WORD;
5912 }
5913 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5914 {
5915 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
5916 > SPARC_FP_ARG_MAX)
5917 return UNITS_PER_WORD;
5918 }
5919 }
5920
5921 return 0;
5922 }
5923
5924 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
5925 Specify whether to pass the argument by reference. */
5926
5927 static bool
5928 sparc_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
5929 enum machine_mode mode, const_tree type,
5930 bool named ATTRIBUTE_UNUSED)
5931 {
5932 if (TARGET_ARCH32)
5933 /* Original SPARC 32-bit ABI says that structures and unions,
5934 and quad-precision floats are passed by reference. For Pascal,
5935 also pass arrays by reference. All other base types are passed
5936 in registers.
5937
5938 Extended ABI (as implemented by the Sun compiler) says that all
5939 complex floats are passed by reference. Pass complex integers
5940 in registers up to 8 bytes. More generally, enforce the 2-word
5941 cap for passing arguments in registers.
5942
5943 Vector ABI (as implemented by the Sun VIS SDK) says that vector
5944 integers are passed like floats of the same size, that is in
5945 registers up to 8 bytes. Pass all vector floats by reference
5946 like structure and unions. */
5947 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
5948 || mode == SCmode
5949 /* Catch CDImode, TFmode, DCmode and TCmode. */
5950 || GET_MODE_SIZE (mode) > 8
5951 || (type
5952 && TREE_CODE (type) == VECTOR_TYPE
5953 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
5954 else
5955 /* Original SPARC 64-bit ABI says that structures and unions
5956 smaller than 16 bytes are passed in registers, as well as
5957 all other base types.
5958
5959 Extended ABI (as implemented by the Sun compiler) says that
5960 complex floats are passed in registers up to 16 bytes. Pass
5961 all complex integers in registers up to 16 bytes. More generally,
5962 enforce the 2-word cap for passing arguments in registers.
5963
5964 Vector ABI (as implemented by the Sun VIS SDK) says that vector
5965 integers are passed like floats of the same size, that is in
5966 registers (up to 16 bytes). Pass all vector floats like structure
5967 and unions. */
5968 return ((type
5969 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
5970 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
5971 /* Catch CTImode and TCmode. */
5972 || GET_MODE_SIZE (mode) > 16);
5973 }
5974
5975 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
5976 Update the data in CUM to advance over an argument
5977 of mode MODE and data type TYPE.
5978 TYPE is null for libcalls where that information may not be available. */
5979
5980 static void
5981 sparc_function_arg_advance (struct sparc_args *cum, enum machine_mode mode,
5982 const_tree type, bool named)
5983 {
5984 int regno, padding;
5985
5986 /* We pass false for incoming_p here, it doesn't matter. */
5987 function_arg_slotno (cum, mode, type, named, false, &regno, &padding);
5988
5989 /* If argument requires leading padding, add it. */
5990 cum->words += padding;
5991
5992 if (TARGET_ARCH32)
5993 {
5994 cum->words += (mode != BLKmode
5995 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
5996 : ROUND_ADVANCE (int_size_in_bytes (type)));
5997 }
5998 else
5999 {
6000 if (type && AGGREGATE_TYPE_P (type))
6001 {
6002 int size = int_size_in_bytes (type);
6003
6004 if (size <= 8)
6005 ++cum->words;
6006 else if (size <= 16)
6007 cum->words += 2;
6008 else /* passed by reference */
6009 ++cum->words;
6010 }
6011 else
6012 {
6013 cum->words += (mode != BLKmode
6014 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
6015 : ROUND_ADVANCE (int_size_in_bytes (type)));
6016 }
6017 }
6018 }
6019
6020 /* Handle the FUNCTION_ARG_PADDING macro.
6021 For the 64 bit ABI structs are always stored left shifted in their
6022 argument slot. */
6023
6024 enum direction
6025 function_arg_padding (enum machine_mode mode, const_tree type)
6026 {
6027 if (TARGET_ARCH64 && type != 0 && AGGREGATE_TYPE_P (type))
6028 return upward;
6029
6030 /* Fall back to the default. */
6031 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
6032 }
6033
6034 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
6035 Specify whether to return the return value in memory. */
6036
6037 static bool
6038 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6039 {
6040 if (TARGET_ARCH32)
6041 /* Original SPARC 32-bit ABI says that structures and unions,
6042 and quad-precision floats are returned in memory. All other
6043 base types are returned in registers.
6044
6045 Extended ABI (as implemented by the Sun compiler) says that
6046 all complex floats are returned in registers (8 FP registers
6047 at most for '_Complex long double'). Return all complex integers
6048 in registers (4 at most for '_Complex long long').
6049
6050 Vector ABI (as implemented by the Sun VIS SDK) says that vector
6051 integers are returned like floats of the same size, that is in
6052 registers up to 8 bytes and in memory otherwise. Return all
6053 vector floats in memory like structure and unions; note that
6054 they always have BLKmode like the latter. */
6055 return (TYPE_MODE (type) == BLKmode
6056 || TYPE_MODE (type) == TFmode
6057 || (TREE_CODE (type) == VECTOR_TYPE
6058 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
6059 else
6060 /* Original SPARC 64-bit ABI says that structures and unions
6061 smaller than 32 bytes are returned in registers, as well as
6062 all other base types.
6063
6064 Extended ABI (as implemented by the Sun compiler) says that all
6065 complex floats are returned in registers (8 FP registers at most
6066 for '_Complex long double'). Return all complex integers in
6067 registers (4 at most for '_Complex TItype').
6068
6069 Vector ABI (as implemented by the Sun VIS SDK) says that vector
6070 integers are returned like floats of the same size, that is in
6071 registers. Return all vector floats like structure and unions;
6072 note that they always have BLKmode like the latter. */
6073 return (TYPE_MODE (type) == BLKmode
6074 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
6075 }
6076
6077 /* Handle the TARGET_STRUCT_VALUE target hook.
6078 Return where to find the structure return value address. */
6079
6080 static rtx
6081 sparc_struct_value_rtx (tree fndecl, int incoming)
6082 {
6083 if (TARGET_ARCH64)
6084 return 0;
6085 else
6086 {
6087 rtx mem;
6088
6089 if (incoming)
6090 mem = gen_rtx_MEM (Pmode, plus_constant (frame_pointer_rtx,
6091 STRUCT_VALUE_OFFSET));
6092 else
6093 mem = gen_rtx_MEM (Pmode, plus_constant (stack_pointer_rtx,
6094 STRUCT_VALUE_OFFSET));
6095
6096 /* Only follow the SPARC ABI for fixed-size structure returns.
6097 Variable size structure returns are handled per the normal
6098 procedures in GCC. This is enabled by -mstd-struct-return */
6099 if (incoming == 2
6100 && sparc_std_struct_return
6101 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
6102 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
6103 {
6104 /* We must check and adjust the return address, as it is
6105 optional as to whether the return object is really
6106 provided. */
6107 rtx ret_rtx = gen_rtx_REG (Pmode, 31);
6108 rtx scratch = gen_reg_rtx (SImode);
6109 rtx endlab = gen_label_rtx ();
6110
6111 /* Calculate the return object size */
6112 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
6113 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
6114 /* Construct a temporary return value */
6115 rtx temp_val
6116 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
6117
6118 /* Implement SPARC 32-bit psABI callee return struct checking:
6119
6120 Fetch the instruction where we will return to and see if
6121 it's an unimp instruction (the most significant 10 bits
6122 will be zero). */
6123 emit_move_insn (scratch, gen_rtx_MEM (SImode,
6124 plus_constant (ret_rtx, 8)));
6125 /* Assume the size is valid and pre-adjust */
6126 emit_insn (gen_add3_insn (ret_rtx, ret_rtx, GEN_INT (4)));
6127 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
6128 0, endlab);
6129 emit_insn (gen_sub3_insn (ret_rtx, ret_rtx, GEN_INT (4)));
6130 /* Write the address of the memory pointed to by temp_val into
6131 the memory pointed to by mem */
6132 emit_move_insn (mem, XEXP (temp_val, 0));
6133 emit_label (endlab);
6134 }
6135
6136 set_mem_alias_set (mem, struct_value_alias_set);
6137 return mem;
6138 }
6139 }
6140
6141 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
6142 For v9, function return values are subject to the same rules as arguments,
6143 except that up to 32 bytes may be returned in registers. */
6144
6145 static rtx
6146 sparc_function_value_1 (const_tree type, enum machine_mode mode,
6147 bool outgoing)
6148 {
6149 /* Beware that the two values are swapped here wrt function_arg. */
6150 int regbase = (outgoing
6151 ? SPARC_INCOMING_INT_ARG_FIRST
6152 : SPARC_OUTGOING_INT_ARG_FIRST);
6153 enum mode_class mclass = GET_MODE_CLASS (mode);
6154 int regno;
6155
6156 /* Vector types deserve special treatment because they are polymorphic wrt
6157 their mode, depending upon whether VIS instructions are enabled. */
6158 if (type && TREE_CODE (type) == VECTOR_TYPE)
6159 {
6160 HOST_WIDE_INT size = int_size_in_bytes (type);
6161 gcc_assert ((TARGET_ARCH32 && size <= 8)
6162 || (TARGET_ARCH64 && size <= 32));
6163
6164 if (mode == BLKmode)
6165 return function_arg_vector_value (size,
6166 SPARC_FP_ARG_FIRST);
6167 else
6168 mclass = MODE_FLOAT;
6169 }
6170
6171 if (TARGET_ARCH64 && type)
6172 {
6173 /* Structures up to 32 bytes in size are returned in registers. */
6174 if (TREE_CODE (type) == RECORD_TYPE)
6175 {
6176 HOST_WIDE_INT size = int_size_in_bytes (type);
6177 gcc_assert (size <= 32);
6178
6179 return function_arg_record_value (type, mode, 0, 1, regbase);
6180 }
6181
6182 /* Unions up to 32 bytes in size are returned in integer registers. */
6183 else if (TREE_CODE (type) == UNION_TYPE)
6184 {
6185 HOST_WIDE_INT size = int_size_in_bytes (type);
6186 gcc_assert (size <= 32);
6187
6188 return function_arg_union_value (size, mode, 0, regbase);
6189 }
6190
6191 /* Objects that require it are returned in FP registers. */
6192 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
6193 ;
6194
6195 /* All other aggregate types are returned in an integer register in a
6196 mode corresponding to the size of the type. */
6197 else if (AGGREGATE_TYPE_P (type))
6198 {
6199 /* All other aggregate types are passed in an integer register
6200 in a mode corresponding to the size of the type. */
6201 HOST_WIDE_INT size = int_size_in_bytes (type);
6202 gcc_assert (size <= 32);
6203
6204 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6205
6206 /* ??? We probably should have made the same ABI change in
6207 3.4.0 as the one we made for unions. The latter was
6208 required by the SCD though, while the former is not
6209 specified, so we favored compatibility and efficiency.
6210
6211 Now we're stuck for aggregates larger than 16 bytes,
6212 because OImode vanished in the meantime. Let's not
6213 try to be unduly clever, and simply follow the ABI
6214 for unions in that case. */
6215 if (mode == BLKmode)
6216 return function_arg_union_value (size, mode, 0, regbase);
6217 else
6218 mclass = MODE_INT;
6219 }
6220
6221 /* We should only have pointer and integer types at this point. This
6222 must match sparc_promote_function_mode. */
6223 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6224 mode = word_mode;
6225 }
6226
6227 /* We should only have pointer and integer types at this point. This must
6228 match sparc_promote_function_mode. */
6229 else if (TARGET_ARCH32
6230 && mclass == MODE_INT
6231 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6232 mode = word_mode;
6233
6234 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
6235 regno = SPARC_FP_ARG_FIRST;
6236 else
6237 regno = regbase;
6238
6239 return gen_rtx_REG (mode, regno);
6240 }
6241
6242 /* Handle TARGET_FUNCTION_VALUE.
6243 On the SPARC, the value is found in the first "output" register, but the
6244 called function leaves it in the first "input" register. */
6245
6246 static rtx
6247 sparc_function_value (const_tree valtype,
6248 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
6249 bool outgoing)
6250 {
6251 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
6252 }
6253
6254 /* Handle TARGET_LIBCALL_VALUE. */
6255
6256 static rtx
6257 sparc_libcall_value (enum machine_mode mode,
6258 const_rtx fun ATTRIBUTE_UNUSED)
6259 {
6260 return sparc_function_value_1 (NULL_TREE, mode, false);
6261 }
6262
6263 /* Handle FUNCTION_VALUE_REGNO_P.
6264 On the SPARC, the first "output" reg is used for integer values, and the
6265 first floating point register is used for floating point values. */
6266
6267 static bool
6268 sparc_function_value_regno_p (const unsigned int regno)
6269 {
6270 return (regno == 8 || regno == 32);
6271 }
6272
6273 /* Do what is necessary for `va_start'. We look at the current function
6274 to determine if stdarg or varargs is used and return the address of
6275 the first unnamed parameter. */
6276
6277 static rtx
6278 sparc_builtin_saveregs (void)
6279 {
6280 int first_reg = crtl->args.info.words;
6281 rtx address;
6282 int regno;
6283
6284 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
6285 emit_move_insn (gen_rtx_MEM (word_mode,
6286 gen_rtx_PLUS (Pmode,
6287 frame_pointer_rtx,
6288 GEN_INT (FIRST_PARM_OFFSET (0)
6289 + (UNITS_PER_WORD
6290 * regno)))),
6291 gen_rtx_REG (word_mode,
6292 SPARC_INCOMING_INT_ARG_FIRST + regno));
6293
6294 address = gen_rtx_PLUS (Pmode,
6295 frame_pointer_rtx,
6296 GEN_INT (FIRST_PARM_OFFSET (0)
6297 + UNITS_PER_WORD * first_reg));
6298
6299 return address;
6300 }
6301
6302 /* Implement `va_start' for stdarg. */
6303
6304 static void
6305 sparc_va_start (tree valist, rtx nextarg)
6306 {
6307 nextarg = expand_builtin_saveregs ();
6308 std_expand_builtin_va_start (valist, nextarg);
6309 }
6310
6311 /* Implement `va_arg' for stdarg. */
6312
6313 static tree
6314 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6315 gimple_seq *post_p)
6316 {
6317 HOST_WIDE_INT size, rsize, align;
6318 tree addr, incr;
6319 bool indirect;
6320 tree ptrtype = build_pointer_type (type);
6321
6322 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
6323 {
6324 indirect = true;
6325 size = rsize = UNITS_PER_WORD;
6326 align = 0;
6327 }
6328 else
6329 {
6330 indirect = false;
6331 size = int_size_in_bytes (type);
6332 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6333 align = 0;
6334
6335 if (TARGET_ARCH64)
6336 {
6337 /* For SPARC64, objects requiring 16-byte alignment get it. */
6338 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
6339 align = 2 * UNITS_PER_WORD;
6340
6341 /* SPARC-V9 ABI states that structures up to 16 bytes in size
6342 are left-justified in their slots. */
6343 if (AGGREGATE_TYPE_P (type))
6344 {
6345 if (size == 0)
6346 size = rsize = UNITS_PER_WORD;
6347 else
6348 size = rsize;
6349 }
6350 }
6351 }
6352
6353 incr = valist;
6354 if (align)
6355 {
6356 incr = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, incr,
6357 size_int (align - 1));
6358 incr = fold_convert (sizetype, incr);
6359 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
6360 size_int (-align));
6361 incr = fold_convert (ptr_type_node, incr);
6362 }
6363
6364 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
6365 addr = incr;
6366
6367 if (BYTES_BIG_ENDIAN && size < rsize)
6368 addr = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, incr,
6369 size_int (rsize - size));
6370
6371 if (indirect)
6372 {
6373 addr = fold_convert (build_pointer_type (ptrtype), addr);
6374 addr = build_va_arg_indirect_ref (addr);
6375 }
6376
6377 /* If the address isn't aligned properly for the type, we need a temporary.
6378 FIXME: This is inefficient, usually we can do this in registers. */
6379 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
6380 {
6381 tree tmp = create_tmp_var (type, "va_arg_tmp");
6382 tree dest_addr = build_fold_addr_expr (tmp);
6383 tree copy = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
6384 3, dest_addr, addr, size_int (rsize));
6385 TREE_ADDRESSABLE (tmp) = 1;
6386 gimplify_and_add (copy, pre_p);
6387 addr = dest_addr;
6388 }
6389
6390 else
6391 addr = fold_convert (ptrtype, addr);
6392
6393 incr
6394 = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, incr, size_int (rsize));
6395 gimplify_assign (valist, incr, post_p);
6396
6397 return build_va_arg_indirect_ref (addr);
6398 }
6399 \f
6400 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
6401 Specify whether the vector mode is supported by the hardware. */
6402
6403 static bool
6404 sparc_vector_mode_supported_p (enum machine_mode mode)
6405 {
6406 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
6407 }
6408 \f
6409 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
6410
6411 static enum machine_mode
6412 sparc_preferred_simd_mode (enum machine_mode mode)
6413 {
6414 if (TARGET_VIS)
6415 switch (mode)
6416 {
6417 case SImode:
6418 return V2SImode;
6419 case HImode:
6420 return V4HImode;
6421 case QImode:
6422 return V8QImode;
6423
6424 default:;
6425 }
6426
6427 return word_mode;
6428 }
6429 \f
6430 /* Return the string to output an unconditional branch to LABEL, which is
6431 the operand number of the label.
6432
6433 DEST is the destination insn (i.e. the label), INSN is the source. */
6434
6435 const char *
6436 output_ubranch (rtx dest, int label, rtx insn)
6437 {
6438 static char string[64];
6439 bool v9_form = false;
6440 char *p;
6441
6442 if (TARGET_V9 && INSN_ADDRESSES_SET_P ())
6443 {
6444 int delta = (INSN_ADDRESSES (INSN_UID (dest))
6445 - INSN_ADDRESSES (INSN_UID (insn)));
6446 /* Leave some instructions for "slop". */
6447 if (delta >= -260000 && delta < 260000)
6448 v9_form = true;
6449 }
6450
6451 if (v9_form)
6452 strcpy (string, "ba%*,pt\t%%xcc, ");
6453 else
6454 strcpy (string, "b%*\t");
6455
6456 p = strchr (string, '\0');
6457 *p++ = '%';
6458 *p++ = 'l';
6459 *p++ = '0' + label;
6460 *p++ = '%';
6461 *p++ = '(';
6462 *p = '\0';
6463
6464 return string;
6465 }
6466
6467 /* Return the string to output a conditional branch to LABEL, which is
6468 the operand number of the label. OP is the conditional expression.
6469 XEXP (OP, 0) is assumed to be a condition code register (integer or
6470 floating point) and its mode specifies what kind of comparison we made.
6471
6472 DEST is the destination insn (i.e. the label), INSN is the source.
6473
6474 REVERSED is nonzero if we should reverse the sense of the comparison.
6475
6476 ANNUL is nonzero if we should generate an annulling branch. */
6477
6478 const char *
6479 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
6480 rtx insn)
6481 {
6482 static char string[64];
6483 enum rtx_code code = GET_CODE (op);
6484 rtx cc_reg = XEXP (op, 0);
6485 enum machine_mode mode = GET_MODE (cc_reg);
6486 const char *labelno, *branch;
6487 int spaces = 8, far;
6488 char *p;
6489
6490 /* v9 branches are limited to +-1MB. If it is too far away,
6491 change
6492
6493 bne,pt %xcc, .LC30
6494
6495 to
6496
6497 be,pn %xcc, .+12
6498 nop
6499 ba .LC30
6500
6501 and
6502
6503 fbne,a,pn %fcc2, .LC29
6504
6505 to
6506
6507 fbe,pt %fcc2, .+16
6508 nop
6509 ba .LC29 */
6510
6511 far = TARGET_V9 && (get_attr_length (insn) >= 3);
6512 if (reversed ^ far)
6513 {
6514 /* Reversal of FP compares takes care -- an ordered compare
6515 becomes an unordered compare and vice versa. */
6516 if (mode == CCFPmode || mode == CCFPEmode)
6517 code = reverse_condition_maybe_unordered (code);
6518 else
6519 code = reverse_condition (code);
6520 }
6521
6522 /* Start by writing the branch condition. */
6523 if (mode == CCFPmode || mode == CCFPEmode)
6524 {
6525 switch (code)
6526 {
6527 case NE:
6528 branch = "fbne";
6529 break;
6530 case EQ:
6531 branch = "fbe";
6532 break;
6533 case GE:
6534 branch = "fbge";
6535 break;
6536 case GT:
6537 branch = "fbg";
6538 break;
6539 case LE:
6540 branch = "fble";
6541 break;
6542 case LT:
6543 branch = "fbl";
6544 break;
6545 case UNORDERED:
6546 branch = "fbu";
6547 break;
6548 case ORDERED:
6549 branch = "fbo";
6550 break;
6551 case UNGT:
6552 branch = "fbug";
6553 break;
6554 case UNLT:
6555 branch = "fbul";
6556 break;
6557 case UNEQ:
6558 branch = "fbue";
6559 break;
6560 case UNGE:
6561 branch = "fbuge";
6562 break;
6563 case UNLE:
6564 branch = "fbule";
6565 break;
6566 case LTGT:
6567 branch = "fblg";
6568 break;
6569
6570 default:
6571 gcc_unreachable ();
6572 }
6573
6574 /* ??? !v9: FP branches cannot be preceded by another floating point
6575 insn. Because there is currently no concept of pre-delay slots,
6576 we can fix this only by always emitting a nop before a floating
6577 point branch. */
6578
6579 string[0] = '\0';
6580 if (! TARGET_V9)
6581 strcpy (string, "nop\n\t");
6582 strcat (string, branch);
6583 }
6584 else
6585 {
6586 switch (code)
6587 {
6588 case NE:
6589 branch = "bne";
6590 break;
6591 case EQ:
6592 branch = "be";
6593 break;
6594 case GE:
6595 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
6596 branch = "bpos";
6597 else
6598 branch = "bge";
6599 break;
6600 case GT:
6601 branch = "bg";
6602 break;
6603 case LE:
6604 branch = "ble";
6605 break;
6606 case LT:
6607 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
6608 branch = "bneg";
6609 else
6610 branch = "bl";
6611 break;
6612 case GEU:
6613 branch = "bgeu";
6614 break;
6615 case GTU:
6616 branch = "bgu";
6617 break;
6618 case LEU:
6619 branch = "bleu";
6620 break;
6621 case LTU:
6622 branch = "blu";
6623 break;
6624
6625 default:
6626 gcc_unreachable ();
6627 }
6628 strcpy (string, branch);
6629 }
6630 spaces -= strlen (branch);
6631 p = strchr (string, '\0');
6632
6633 /* Now add the annulling, the label, and a possible noop. */
6634 if (annul && ! far)
6635 {
6636 strcpy (p, ",a");
6637 p += 2;
6638 spaces -= 2;
6639 }
6640
6641 if (TARGET_V9)
6642 {
6643 rtx note;
6644 int v8 = 0;
6645
6646 if (! far && insn && INSN_ADDRESSES_SET_P ())
6647 {
6648 int delta = (INSN_ADDRESSES (INSN_UID (dest))
6649 - INSN_ADDRESSES (INSN_UID (insn)));
6650 /* Leave some instructions for "slop". */
6651 if (delta < -260000 || delta >= 260000)
6652 v8 = 1;
6653 }
6654
6655 if (mode == CCFPmode || mode == CCFPEmode)
6656 {
6657 static char v9_fcc_labelno[] = "%%fccX, ";
6658 /* Set the char indicating the number of the fcc reg to use. */
6659 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
6660 labelno = v9_fcc_labelno;
6661 if (v8)
6662 {
6663 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
6664 labelno = "";
6665 }
6666 }
6667 else if (mode == CCXmode || mode == CCX_NOOVmode)
6668 {
6669 labelno = "%%xcc, ";
6670 gcc_assert (! v8);
6671 }
6672 else
6673 {
6674 labelno = "%%icc, ";
6675 if (v8)
6676 labelno = "";
6677 }
6678
6679 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
6680 {
6681 strcpy (p,
6682 ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far)
6683 ? ",pt" : ",pn");
6684 p += 3;
6685 spaces -= 3;
6686 }
6687 }
6688 else
6689 labelno = "";
6690
6691 if (spaces > 0)
6692 *p++ = '\t';
6693 else
6694 *p++ = ' ';
6695 strcpy (p, labelno);
6696 p = strchr (p, '\0');
6697 if (far)
6698 {
6699 strcpy (p, ".+12\n\t nop\n\tb\t");
6700 /* Skip the next insn if requested or
6701 if we know that it will be a nop. */
6702 if (annul || ! final_sequence)
6703 p[3] = '6';
6704 p += 14;
6705 }
6706 *p++ = '%';
6707 *p++ = 'l';
6708 *p++ = label + '0';
6709 *p++ = '%';
6710 *p++ = '#';
6711 *p = '\0';
6712
6713 return string;
6714 }
6715
6716 /* Emit a library call comparison between floating point X and Y.
6717 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
6718 Return the new operator to be used in the comparison sequence.
6719
6720 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
6721 values as arguments instead of the TFmode registers themselves,
6722 that's why we cannot call emit_float_lib_cmp. */
6723
6724 rtx
6725 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
6726 {
6727 const char *qpfunc;
6728 rtx slot0, slot1, result, tem, tem2, libfunc;
6729 enum machine_mode mode;
6730 enum rtx_code new_comparison;
6731
6732 switch (comparison)
6733 {
6734 case EQ:
6735 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
6736 break;
6737
6738 case NE:
6739 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
6740 break;
6741
6742 case GT:
6743 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
6744 break;
6745
6746 case GE:
6747 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
6748 break;
6749
6750 case LT:
6751 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
6752 break;
6753
6754 case LE:
6755 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
6756 break;
6757
6758 case ORDERED:
6759 case UNORDERED:
6760 case UNGT:
6761 case UNLT:
6762 case UNEQ:
6763 case UNGE:
6764 case UNLE:
6765 case LTGT:
6766 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
6767 break;
6768
6769 default:
6770 gcc_unreachable ();
6771 }
6772
6773 if (TARGET_ARCH64)
6774 {
6775 if (MEM_P (x))
6776 slot0 = x;
6777 else
6778 {
6779 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode), 0);
6780 emit_move_insn (slot0, x);
6781 }
6782
6783 if (MEM_P (y))
6784 slot1 = y;
6785 else
6786 {
6787 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode), 0);
6788 emit_move_insn (slot1, y);
6789 }
6790
6791 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
6792 emit_library_call (libfunc, LCT_NORMAL,
6793 DImode, 2,
6794 XEXP (slot0, 0), Pmode,
6795 XEXP (slot1, 0), Pmode);
6796 mode = DImode;
6797 }
6798 else
6799 {
6800 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
6801 emit_library_call (libfunc, LCT_NORMAL,
6802 SImode, 2,
6803 x, TFmode, y, TFmode);
6804 mode = SImode;
6805 }
6806
6807
6808 /* Immediately move the result of the libcall into a pseudo
6809 register so reload doesn't clobber the value if it needs
6810 the return register for a spill reg. */
6811 result = gen_reg_rtx (mode);
6812 emit_move_insn (result, hard_libcall_value (mode, libfunc));
6813
6814 switch (comparison)
6815 {
6816 default:
6817 return gen_rtx_NE (VOIDmode, result, const0_rtx);
6818 case ORDERED:
6819 case UNORDERED:
6820 new_comparison = (comparison == UNORDERED ? EQ : NE);
6821 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
6822 case UNGT:
6823 case UNGE:
6824 new_comparison = (comparison == UNGT ? GT : NE);
6825 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
6826 case UNLE:
6827 return gen_rtx_NE (VOIDmode, result, const2_rtx);
6828 case UNLT:
6829 tem = gen_reg_rtx (mode);
6830 if (TARGET_ARCH32)
6831 emit_insn (gen_andsi3 (tem, result, const1_rtx));
6832 else
6833 emit_insn (gen_anddi3 (tem, result, const1_rtx));
6834 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
6835 case UNEQ:
6836 case LTGT:
6837 tem = gen_reg_rtx (mode);
6838 if (TARGET_ARCH32)
6839 emit_insn (gen_addsi3 (tem, result, const1_rtx));
6840 else
6841 emit_insn (gen_adddi3 (tem, result, const1_rtx));
6842 tem2 = gen_reg_rtx (mode);
6843 if (TARGET_ARCH32)
6844 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
6845 else
6846 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
6847 new_comparison = (comparison == UNEQ ? EQ : NE);
6848 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
6849 }
6850
6851 gcc_unreachable ();
6852 }
6853
6854 /* Generate an unsigned DImode to FP conversion. This is the same code
6855 optabs would emit if we didn't have TFmode patterns. */
6856
6857 void
6858 sparc_emit_floatunsdi (rtx *operands, enum machine_mode mode)
6859 {
6860 rtx neglab, donelab, i0, i1, f0, in, out;
6861
6862 out = operands[0];
6863 in = force_reg (DImode, operands[1]);
6864 neglab = gen_label_rtx ();
6865 donelab = gen_label_rtx ();
6866 i0 = gen_reg_rtx (DImode);
6867 i1 = gen_reg_rtx (DImode);
6868 f0 = gen_reg_rtx (mode);
6869
6870 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
6871
6872 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
6873 emit_jump_insn (gen_jump (donelab));
6874 emit_barrier ();
6875
6876 emit_label (neglab);
6877
6878 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
6879 emit_insn (gen_anddi3 (i1, in, const1_rtx));
6880 emit_insn (gen_iordi3 (i0, i0, i1));
6881 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
6882 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
6883
6884 emit_label (donelab);
6885 }
6886
6887 /* Generate an FP to unsigned DImode conversion. This is the same code
6888 optabs would emit if we didn't have TFmode patterns. */
6889
6890 void
6891 sparc_emit_fixunsdi (rtx *operands, enum machine_mode mode)
6892 {
6893 rtx neglab, donelab, i0, i1, f0, in, out, limit;
6894
6895 out = operands[0];
6896 in = force_reg (mode, operands[1]);
6897 neglab = gen_label_rtx ();
6898 donelab = gen_label_rtx ();
6899 i0 = gen_reg_rtx (DImode);
6900 i1 = gen_reg_rtx (DImode);
6901 limit = gen_reg_rtx (mode);
6902 f0 = gen_reg_rtx (mode);
6903
6904 emit_move_insn (limit,
6905 CONST_DOUBLE_FROM_REAL_VALUE (
6906 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
6907 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
6908
6909 emit_insn (gen_rtx_SET (VOIDmode,
6910 out,
6911 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
6912 emit_jump_insn (gen_jump (donelab));
6913 emit_barrier ();
6914
6915 emit_label (neglab);
6916
6917 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_MINUS (mode, in, limit)));
6918 emit_insn (gen_rtx_SET (VOIDmode,
6919 i0,
6920 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
6921 emit_insn (gen_movdi (i1, const1_rtx));
6922 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
6923 emit_insn (gen_xordi3 (out, i0, i1));
6924
6925 emit_label (donelab);
6926 }
6927
6928 /* Return the string to output a conditional branch to LABEL, testing
6929 register REG. LABEL is the operand number of the label; REG is the
6930 operand number of the reg. OP is the conditional expression. The mode
6931 of REG says what kind of comparison we made.
6932
6933 DEST is the destination insn (i.e. the label), INSN is the source.
6934
6935 REVERSED is nonzero if we should reverse the sense of the comparison.
6936
6937 ANNUL is nonzero if we should generate an annulling branch. */
6938
6939 const char *
6940 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
6941 int annul, rtx insn)
6942 {
6943 static char string[64];
6944 enum rtx_code code = GET_CODE (op);
6945 enum machine_mode mode = GET_MODE (XEXP (op, 0));
6946 rtx note;
6947 int far;
6948 char *p;
6949
6950 /* branch on register are limited to +-128KB. If it is too far away,
6951 change
6952
6953 brnz,pt %g1, .LC30
6954
6955 to
6956
6957 brz,pn %g1, .+12
6958 nop
6959 ba,pt %xcc, .LC30
6960
6961 and
6962
6963 brgez,a,pn %o1, .LC29
6964
6965 to
6966
6967 brlz,pt %o1, .+16
6968 nop
6969 ba,pt %xcc, .LC29 */
6970
6971 far = get_attr_length (insn) >= 3;
6972
6973 /* If not floating-point or if EQ or NE, we can just reverse the code. */
6974 if (reversed ^ far)
6975 code = reverse_condition (code);
6976
6977 /* Only 64 bit versions of these instructions exist. */
6978 gcc_assert (mode == DImode);
6979
6980 /* Start by writing the branch condition. */
6981
6982 switch (code)
6983 {
6984 case NE:
6985 strcpy (string, "brnz");
6986 break;
6987
6988 case EQ:
6989 strcpy (string, "brz");
6990 break;
6991
6992 case GE:
6993 strcpy (string, "brgez");
6994 break;
6995
6996 case LT:
6997 strcpy (string, "brlz");
6998 break;
6999
7000 case LE:
7001 strcpy (string, "brlez");
7002 break;
7003
7004 case GT:
7005 strcpy (string, "brgz");
7006 break;
7007
7008 default:
7009 gcc_unreachable ();
7010 }
7011
7012 p = strchr (string, '\0');
7013
7014 /* Now add the annulling, reg, label, and nop. */
7015 if (annul && ! far)
7016 {
7017 strcpy (p, ",a");
7018 p += 2;
7019 }
7020
7021 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
7022 {
7023 strcpy (p,
7024 ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far)
7025 ? ",pt" : ",pn");
7026 p += 3;
7027 }
7028
7029 *p = p < string + 8 ? '\t' : ' ';
7030 p++;
7031 *p++ = '%';
7032 *p++ = '0' + reg;
7033 *p++ = ',';
7034 *p++ = ' ';
7035 if (far)
7036 {
7037 int veryfar = 1, delta;
7038
7039 if (INSN_ADDRESSES_SET_P ())
7040 {
7041 delta = (INSN_ADDRESSES (INSN_UID (dest))
7042 - INSN_ADDRESSES (INSN_UID (insn)));
7043 /* Leave some instructions for "slop". */
7044 if (delta >= -260000 && delta < 260000)
7045 veryfar = 0;
7046 }
7047
7048 strcpy (p, ".+12\n\t nop\n\t");
7049 /* Skip the next insn if requested or
7050 if we know that it will be a nop. */
7051 if (annul || ! final_sequence)
7052 p[3] = '6';
7053 p += 12;
7054 if (veryfar)
7055 {
7056 strcpy (p, "b\t");
7057 p += 2;
7058 }
7059 else
7060 {
7061 strcpy (p, "ba,pt\t%%xcc, ");
7062 p += 13;
7063 }
7064 }
7065 *p++ = '%';
7066 *p++ = 'l';
7067 *p++ = '0' + label;
7068 *p++ = '%';
7069 *p++ = '#';
7070 *p = '\0';
7071
7072 return string;
7073 }
7074
7075 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
7076 Such instructions cannot be used in the delay slot of return insn on v9.
7077 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
7078 */
7079
7080 static int
7081 epilogue_renumber (register rtx *where, int test)
7082 {
7083 register const char *fmt;
7084 register int i;
7085 register enum rtx_code code;
7086
7087 if (*where == 0)
7088 return 0;
7089
7090 code = GET_CODE (*where);
7091
7092 switch (code)
7093 {
7094 case REG:
7095 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
7096 return 1;
7097 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
7098 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
7099 case SCRATCH:
7100 case CC0:
7101 case PC:
7102 case CONST_INT:
7103 case CONST_DOUBLE:
7104 return 0;
7105
7106 /* Do not replace the frame pointer with the stack pointer because
7107 it can cause the delayed instruction to load below the stack.
7108 This occurs when instructions like:
7109
7110 (set (reg/i:SI 24 %i0)
7111 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
7112 (const_int -20 [0xffffffec])) 0))
7113
7114 are in the return delayed slot. */
7115 case PLUS:
7116 if (GET_CODE (XEXP (*where, 0)) == REG
7117 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
7118 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
7119 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
7120 return 1;
7121 break;
7122
7123 case MEM:
7124 if (SPARC_STACK_BIAS
7125 && GET_CODE (XEXP (*where, 0)) == REG
7126 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
7127 return 1;
7128 break;
7129
7130 default:
7131 break;
7132 }
7133
7134 fmt = GET_RTX_FORMAT (code);
7135
7136 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
7137 {
7138 if (fmt[i] == 'E')
7139 {
7140 register int j;
7141 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
7142 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
7143 return 1;
7144 }
7145 else if (fmt[i] == 'e'
7146 && epilogue_renumber (&(XEXP (*where, i)), test))
7147 return 1;
7148 }
7149 return 0;
7150 }
7151 \f
7152 /* Leaf functions and non-leaf functions have different needs. */
7153
7154 static const int
7155 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
7156
7157 static const int
7158 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
7159
7160 static const int *const reg_alloc_orders[] = {
7161 reg_leaf_alloc_order,
7162 reg_nonleaf_alloc_order};
7163
7164 void
7165 order_regs_for_local_alloc (void)
7166 {
7167 static int last_order_nonleaf = 1;
7168
7169 if (df_regs_ever_live_p (15) != last_order_nonleaf)
7170 {
7171 last_order_nonleaf = !last_order_nonleaf;
7172 memcpy ((char *) reg_alloc_order,
7173 (const char *) reg_alloc_orders[last_order_nonleaf],
7174 FIRST_PSEUDO_REGISTER * sizeof (int));
7175 }
7176 }
7177 \f
7178 /* Return 1 if REG and MEM are legitimate enough to allow the various
7179 mem<-->reg splits to be run. */
7180
7181 int
7182 sparc_splitdi_legitimate (rtx reg, rtx mem)
7183 {
7184 /* Punt if we are here by mistake. */
7185 gcc_assert (reload_completed);
7186
7187 /* We must have an offsettable memory reference. */
7188 if (! offsettable_memref_p (mem))
7189 return 0;
7190
7191 /* If we have legitimate args for ldd/std, we do not want
7192 the split to happen. */
7193 if ((REGNO (reg) % 2) == 0
7194 && mem_min_alignment (mem, 8))
7195 return 0;
7196
7197 /* Success. */
7198 return 1;
7199 }
7200
7201 /* Return 1 if x and y are some kind of REG and they refer to
7202 different hard registers. This test is guaranteed to be
7203 run after reload. */
7204
7205 int
7206 sparc_absnegfloat_split_legitimate (rtx x, rtx y)
7207 {
7208 if (GET_CODE (x) != REG)
7209 return 0;
7210 if (GET_CODE (y) != REG)
7211 return 0;
7212 if (REGNO (x) == REGNO (y))
7213 return 0;
7214 return 1;
7215 }
7216
7217 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
7218 This makes them candidates for using ldd and std insns.
7219
7220 Note reg1 and reg2 *must* be hard registers. */
7221
7222 int
7223 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
7224 {
7225 /* We might have been passed a SUBREG. */
7226 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
7227 return 0;
7228
7229 if (REGNO (reg1) % 2 != 0)
7230 return 0;
7231
7232 /* Integer ldd is deprecated in SPARC V9 */
7233 if (TARGET_V9 && REGNO (reg1) < 32)
7234 return 0;
7235
7236 return (REGNO (reg1) == REGNO (reg2) - 1);
7237 }
7238
7239 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
7240 an ldd or std insn.
7241
7242 This can only happen when addr1 and addr2, the addresses in mem1
7243 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
7244 addr1 must also be aligned on a 64-bit boundary.
7245
7246 Also iff dependent_reg_rtx is not null it should not be used to
7247 compute the address for mem1, i.e. we cannot optimize a sequence
7248 like:
7249 ld [%o0], %o0
7250 ld [%o0 + 4], %o1
7251 to
7252 ldd [%o0], %o0
7253 nor:
7254 ld [%g3 + 4], %g3
7255 ld [%g3], %g2
7256 to
7257 ldd [%g3], %g2
7258
7259 But, note that the transformation from:
7260 ld [%g2 + 4], %g3
7261 ld [%g2], %g2
7262 to
7263 ldd [%g2], %g2
7264 is perfectly fine. Thus, the peephole2 patterns always pass us
7265 the destination register of the first load, never the second one.
7266
7267 For stores we don't have a similar problem, so dependent_reg_rtx is
7268 NULL_RTX. */
7269
7270 int
7271 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
7272 {
7273 rtx addr1, addr2;
7274 unsigned int reg1;
7275 HOST_WIDE_INT offset1;
7276
7277 /* The mems cannot be volatile. */
7278 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
7279 return 0;
7280
7281 /* MEM1 should be aligned on a 64-bit boundary. */
7282 if (MEM_ALIGN (mem1) < 64)
7283 return 0;
7284
7285 addr1 = XEXP (mem1, 0);
7286 addr2 = XEXP (mem2, 0);
7287
7288 /* Extract a register number and offset (if used) from the first addr. */
7289 if (GET_CODE (addr1) == PLUS)
7290 {
7291 /* If not a REG, return zero. */
7292 if (GET_CODE (XEXP (addr1, 0)) != REG)
7293 return 0;
7294 else
7295 {
7296 reg1 = REGNO (XEXP (addr1, 0));
7297 /* The offset must be constant! */
7298 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
7299 return 0;
7300 offset1 = INTVAL (XEXP (addr1, 1));
7301 }
7302 }
7303 else if (GET_CODE (addr1) != REG)
7304 return 0;
7305 else
7306 {
7307 reg1 = REGNO (addr1);
7308 /* This was a simple (mem (reg)) expression. Offset is 0. */
7309 offset1 = 0;
7310 }
7311
7312 /* Make sure the second address is a (mem (plus (reg) (const_int). */
7313 if (GET_CODE (addr2) != PLUS)
7314 return 0;
7315
7316 if (GET_CODE (XEXP (addr2, 0)) != REG
7317 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
7318 return 0;
7319
7320 if (reg1 != REGNO (XEXP (addr2, 0)))
7321 return 0;
7322
7323 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
7324 return 0;
7325
7326 /* The first offset must be evenly divisible by 8 to ensure the
7327 address is 64 bit aligned. */
7328 if (offset1 % 8 != 0)
7329 return 0;
7330
7331 /* The offset for the second addr must be 4 more than the first addr. */
7332 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
7333 return 0;
7334
7335 /* All the tests passed. addr1 and addr2 are valid for ldd and std
7336 instructions. */
7337 return 1;
7338 }
7339
7340 /* Return 1 if reg is a pseudo, or is the first register in
7341 a hard register pair. This makes it suitable for use in
7342 ldd and std insns. */
7343
7344 int
7345 register_ok_for_ldd (rtx reg)
7346 {
7347 /* We might have been passed a SUBREG. */
7348 if (!REG_P (reg))
7349 return 0;
7350
7351 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
7352 return (REGNO (reg) % 2 == 0);
7353
7354 return 1;
7355 }
7356
7357 /* Return 1 if OP is a memory whose address is known to be
7358 aligned to 8-byte boundary, or a pseudo during reload.
7359 This makes it suitable for use in ldd and std insns. */
7360
7361 int
7362 memory_ok_for_ldd (rtx op)
7363 {
7364 if (MEM_P (op))
7365 {
7366 /* In 64-bit mode, we assume that the address is word-aligned. */
7367 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
7368 return 0;
7369
7370 if ((reload_in_progress || reload_completed)
7371 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
7372 return 0;
7373 }
7374 else if (REG_P (op) && REGNO (op) >= FIRST_PSEUDO_REGISTER)
7375 {
7376 if (!(reload_in_progress && reg_renumber [REGNO (op)] < 0))
7377 return 0;
7378 }
7379 else
7380 return 0;
7381
7382 return 1;
7383 }
7384 \f
7385 /* Print operand X (an rtx) in assembler syntax to file FILE.
7386 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
7387 For `%' followed by punctuation, CODE is the punctuation and X is null. */
7388
7389 void
7390 print_operand (FILE *file, rtx x, int code)
7391 {
7392 switch (code)
7393 {
7394 case '#':
7395 /* Output an insn in a delay slot. */
7396 if (final_sequence)
7397 sparc_indent_opcode = 1;
7398 else
7399 fputs ("\n\t nop", file);
7400 return;
7401 case '*':
7402 /* Output an annul flag if there's nothing for the delay slot and we
7403 are optimizing. This is always used with '(' below.
7404 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
7405 this is a dbx bug. So, we only do this when optimizing.
7406 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
7407 Always emit a nop in case the next instruction is a branch. */
7408 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
7409 fputs (",a", file);
7410 return;
7411 case '(':
7412 /* Output a 'nop' if there's nothing for the delay slot and we are
7413 not optimizing. This is always used with '*' above. */
7414 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
7415 fputs ("\n\t nop", file);
7416 else if (final_sequence)
7417 sparc_indent_opcode = 1;
7418 return;
7419 case ')':
7420 /* Output the right displacement from the saved PC on function return.
7421 The caller may have placed an "unimp" insn immediately after the call
7422 so we have to account for it. This insn is used in the 32-bit ABI
7423 when calling a function that returns a non zero-sized structure. The
7424 64-bit ABI doesn't have it. Be careful to have this test be the same
7425 as that for the call. The exception is when sparc_std_struct_return
7426 is enabled, the psABI is followed exactly and the adjustment is made
7427 by the code in sparc_struct_value_rtx. The call emitted is the same
7428 when sparc_std_struct_return is enabled. */
7429 if (!TARGET_ARCH64
7430 && cfun->returns_struct
7431 && !sparc_std_struct_return
7432 && DECL_SIZE (DECL_RESULT (current_function_decl))
7433 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
7434 == INTEGER_CST
7435 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
7436 fputs ("12", file);
7437 else
7438 fputc ('8', file);
7439 return;
7440 case '_':
7441 /* Output the Embedded Medium/Anywhere code model base register. */
7442 fputs (EMBMEDANY_BASE_REG, file);
7443 return;
7444 case '&':
7445 /* Print some local dynamic TLS name. */
7446 assemble_name (file, get_some_local_dynamic_name ());
7447 return;
7448
7449 case 'Y':
7450 /* Adjust the operand to take into account a RESTORE operation. */
7451 if (GET_CODE (x) == CONST_INT)
7452 break;
7453 else if (GET_CODE (x) != REG)
7454 output_operand_lossage ("invalid %%Y operand");
7455 else if (REGNO (x) < 8)
7456 fputs (reg_names[REGNO (x)], file);
7457 else if (REGNO (x) >= 24 && REGNO (x) < 32)
7458 fputs (reg_names[REGNO (x)-16], file);
7459 else
7460 output_operand_lossage ("invalid %%Y operand");
7461 return;
7462 case 'L':
7463 /* Print out the low order register name of a register pair. */
7464 if (WORDS_BIG_ENDIAN)
7465 fputs (reg_names[REGNO (x)+1], file);
7466 else
7467 fputs (reg_names[REGNO (x)], file);
7468 return;
7469 case 'H':
7470 /* Print out the high order register name of a register pair. */
7471 if (WORDS_BIG_ENDIAN)
7472 fputs (reg_names[REGNO (x)], file);
7473 else
7474 fputs (reg_names[REGNO (x)+1], file);
7475 return;
7476 case 'R':
7477 /* Print out the second register name of a register pair or quad.
7478 I.e., R (%o0) => %o1. */
7479 fputs (reg_names[REGNO (x)+1], file);
7480 return;
7481 case 'S':
7482 /* Print out the third register name of a register quad.
7483 I.e., S (%o0) => %o2. */
7484 fputs (reg_names[REGNO (x)+2], file);
7485 return;
7486 case 'T':
7487 /* Print out the fourth register name of a register quad.
7488 I.e., T (%o0) => %o3. */
7489 fputs (reg_names[REGNO (x)+3], file);
7490 return;
7491 case 'x':
7492 /* Print a condition code register. */
7493 if (REGNO (x) == SPARC_ICC_REG)
7494 {
7495 /* We don't handle CC[X]_NOOVmode because they're not supposed
7496 to occur here. */
7497 if (GET_MODE (x) == CCmode)
7498 fputs ("%icc", file);
7499 else if (GET_MODE (x) == CCXmode)
7500 fputs ("%xcc", file);
7501 else
7502 gcc_unreachable ();
7503 }
7504 else
7505 /* %fccN register */
7506 fputs (reg_names[REGNO (x)], file);
7507 return;
7508 case 'm':
7509 /* Print the operand's address only. */
7510 output_address (XEXP (x, 0));
7511 return;
7512 case 'r':
7513 /* In this case we need a register. Use %g0 if the
7514 operand is const0_rtx. */
7515 if (x == const0_rtx
7516 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
7517 {
7518 fputs ("%g0", file);
7519 return;
7520 }
7521 else
7522 break;
7523
7524 case 'A':
7525 switch (GET_CODE (x))
7526 {
7527 case IOR: fputs ("or", file); break;
7528 case AND: fputs ("and", file); break;
7529 case XOR: fputs ("xor", file); break;
7530 default: output_operand_lossage ("invalid %%A operand");
7531 }
7532 return;
7533
7534 case 'B':
7535 switch (GET_CODE (x))
7536 {
7537 case IOR: fputs ("orn", file); break;
7538 case AND: fputs ("andn", file); break;
7539 case XOR: fputs ("xnor", file); break;
7540 default: output_operand_lossage ("invalid %%B operand");
7541 }
7542 return;
7543
7544 /* These are used by the conditional move instructions. */
7545 case 'c' :
7546 case 'C':
7547 {
7548 enum rtx_code rc = GET_CODE (x);
7549
7550 if (code == 'c')
7551 {
7552 enum machine_mode mode = GET_MODE (XEXP (x, 0));
7553 if (mode == CCFPmode || mode == CCFPEmode)
7554 rc = reverse_condition_maybe_unordered (GET_CODE (x));
7555 else
7556 rc = reverse_condition (GET_CODE (x));
7557 }
7558 switch (rc)
7559 {
7560 case NE: fputs ("ne", file); break;
7561 case EQ: fputs ("e", file); break;
7562 case GE: fputs ("ge", file); break;
7563 case GT: fputs ("g", file); break;
7564 case LE: fputs ("le", file); break;
7565 case LT: fputs ("l", file); break;
7566 case GEU: fputs ("geu", file); break;
7567 case GTU: fputs ("gu", file); break;
7568 case LEU: fputs ("leu", file); break;
7569 case LTU: fputs ("lu", file); break;
7570 case LTGT: fputs ("lg", file); break;
7571 case UNORDERED: fputs ("u", file); break;
7572 case ORDERED: fputs ("o", file); break;
7573 case UNLT: fputs ("ul", file); break;
7574 case UNLE: fputs ("ule", file); break;
7575 case UNGT: fputs ("ug", file); break;
7576 case UNGE: fputs ("uge", file); break;
7577 case UNEQ: fputs ("ue", file); break;
7578 default: output_operand_lossage (code == 'c'
7579 ? "invalid %%c operand"
7580 : "invalid %%C operand");
7581 }
7582 return;
7583 }
7584
7585 /* These are used by the movr instruction pattern. */
7586 case 'd':
7587 case 'D':
7588 {
7589 enum rtx_code rc = (code == 'd'
7590 ? reverse_condition (GET_CODE (x))
7591 : GET_CODE (x));
7592 switch (rc)
7593 {
7594 case NE: fputs ("ne", file); break;
7595 case EQ: fputs ("e", file); break;
7596 case GE: fputs ("gez", file); break;
7597 case LT: fputs ("lz", file); break;
7598 case LE: fputs ("lez", file); break;
7599 case GT: fputs ("gz", file); break;
7600 default: output_operand_lossage (code == 'd'
7601 ? "invalid %%d operand"
7602 : "invalid %%D operand");
7603 }
7604 return;
7605 }
7606
7607 case 'b':
7608 {
7609 /* Print a sign-extended character. */
7610 int i = trunc_int_for_mode (INTVAL (x), QImode);
7611 fprintf (file, "%d", i);
7612 return;
7613 }
7614
7615 case 'f':
7616 /* Operand must be a MEM; write its address. */
7617 if (GET_CODE (x) != MEM)
7618 output_operand_lossage ("invalid %%f operand");
7619 output_address (XEXP (x, 0));
7620 return;
7621
7622 case 's':
7623 {
7624 /* Print a sign-extended 32-bit value. */
7625 HOST_WIDE_INT i;
7626 if (GET_CODE(x) == CONST_INT)
7627 i = INTVAL (x);
7628 else if (GET_CODE(x) == CONST_DOUBLE)
7629 i = CONST_DOUBLE_LOW (x);
7630 else
7631 {
7632 output_operand_lossage ("invalid %%s operand");
7633 return;
7634 }
7635 i = trunc_int_for_mode (i, SImode);
7636 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
7637 return;
7638 }
7639
7640 case 0:
7641 /* Do nothing special. */
7642 break;
7643
7644 default:
7645 /* Undocumented flag. */
7646 output_operand_lossage ("invalid operand output code");
7647 }
7648
7649 if (GET_CODE (x) == REG)
7650 fputs (reg_names[REGNO (x)], file);
7651 else if (GET_CODE (x) == MEM)
7652 {
7653 fputc ('[', file);
7654 /* Poor Sun assembler doesn't understand absolute addressing. */
7655 if (CONSTANT_P (XEXP (x, 0)))
7656 fputs ("%g0+", file);
7657 output_address (XEXP (x, 0));
7658 fputc (']', file);
7659 }
7660 else if (GET_CODE (x) == HIGH)
7661 {
7662 fputs ("%hi(", file);
7663 output_addr_const (file, XEXP (x, 0));
7664 fputc (')', file);
7665 }
7666 else if (GET_CODE (x) == LO_SUM)
7667 {
7668 print_operand (file, XEXP (x, 0), 0);
7669 if (TARGET_CM_MEDMID)
7670 fputs ("+%l44(", file);
7671 else
7672 fputs ("+%lo(", file);
7673 output_addr_const (file, XEXP (x, 1));
7674 fputc (')', file);
7675 }
7676 else if (GET_CODE (x) == CONST_DOUBLE
7677 && (GET_MODE (x) == VOIDmode
7678 || GET_MODE_CLASS (GET_MODE (x)) == MODE_INT))
7679 {
7680 if (CONST_DOUBLE_HIGH (x) == 0)
7681 fprintf (file, "%u", (unsigned int) CONST_DOUBLE_LOW (x));
7682 else if (CONST_DOUBLE_HIGH (x) == -1
7683 && CONST_DOUBLE_LOW (x) < 0)
7684 fprintf (file, "%d", (int) CONST_DOUBLE_LOW (x));
7685 else
7686 output_operand_lossage ("long long constant not a valid immediate operand");
7687 }
7688 else if (GET_CODE (x) == CONST_DOUBLE)
7689 output_operand_lossage ("floating point constant not a valid immediate operand");
7690 else { output_addr_const (file, x); }
7691 }
7692 \f
7693 /* Target hook for assembling integer objects. The sparc version has
7694 special handling for aligned DI-mode objects. */
7695
7696 static bool
7697 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
7698 {
7699 /* ??? We only output .xword's for symbols and only then in environments
7700 where the assembler can handle them. */
7701 if (aligned_p && size == 8
7702 && (GET_CODE (x) != CONST_INT && GET_CODE (x) != CONST_DOUBLE))
7703 {
7704 if (TARGET_V9)
7705 {
7706 assemble_integer_with_op ("\t.xword\t", x);
7707 return true;
7708 }
7709 else
7710 {
7711 assemble_aligned_integer (4, const0_rtx);
7712 assemble_aligned_integer (4, x);
7713 return true;
7714 }
7715 }
7716 return default_assemble_integer (x, size, aligned_p);
7717 }
7718 \f
7719 /* Return the value of a code used in the .proc pseudo-op that says
7720 what kind of result this function returns. For non-C types, we pick
7721 the closest C type. */
7722
7723 #ifndef SHORT_TYPE_SIZE
7724 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
7725 #endif
7726
7727 #ifndef INT_TYPE_SIZE
7728 #define INT_TYPE_SIZE BITS_PER_WORD
7729 #endif
7730
7731 #ifndef LONG_TYPE_SIZE
7732 #define LONG_TYPE_SIZE BITS_PER_WORD
7733 #endif
7734
7735 #ifndef LONG_LONG_TYPE_SIZE
7736 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
7737 #endif
7738
7739 #ifndef FLOAT_TYPE_SIZE
7740 #define FLOAT_TYPE_SIZE BITS_PER_WORD
7741 #endif
7742
7743 #ifndef DOUBLE_TYPE_SIZE
7744 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
7745 #endif
7746
7747 #ifndef LONG_DOUBLE_TYPE_SIZE
7748 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
7749 #endif
7750
7751 unsigned long
7752 sparc_type_code (register tree type)
7753 {
7754 register unsigned long qualifiers = 0;
7755 register unsigned shift;
7756
7757 /* Only the first 30 bits of the qualifier are valid. We must refrain from
7758 setting more, since some assemblers will give an error for this. Also,
7759 we must be careful to avoid shifts of 32 bits or more to avoid getting
7760 unpredictable results. */
7761
7762 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
7763 {
7764 switch (TREE_CODE (type))
7765 {
7766 case ERROR_MARK:
7767 return qualifiers;
7768
7769 case ARRAY_TYPE:
7770 qualifiers |= (3 << shift);
7771 break;
7772
7773 case FUNCTION_TYPE:
7774 case METHOD_TYPE:
7775 qualifiers |= (2 << shift);
7776 break;
7777
7778 case POINTER_TYPE:
7779 case REFERENCE_TYPE:
7780 case OFFSET_TYPE:
7781 qualifiers |= (1 << shift);
7782 break;
7783
7784 case RECORD_TYPE:
7785 return (qualifiers | 8);
7786
7787 case UNION_TYPE:
7788 case QUAL_UNION_TYPE:
7789 return (qualifiers | 9);
7790
7791 case ENUMERAL_TYPE:
7792 return (qualifiers | 10);
7793
7794 case VOID_TYPE:
7795 return (qualifiers | 16);
7796
7797 case INTEGER_TYPE:
7798 /* If this is a range type, consider it to be the underlying
7799 type. */
7800 if (TREE_TYPE (type) != 0)
7801 break;
7802
7803 /* Carefully distinguish all the standard types of C,
7804 without messing up if the language is not C. We do this by
7805 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
7806 look at both the names and the above fields, but that's redundant.
7807 Any type whose size is between two C types will be considered
7808 to be the wider of the two types. Also, we do not have a
7809 special code to use for "long long", so anything wider than
7810 long is treated the same. Note that we can't distinguish
7811 between "int" and "long" in this code if they are the same
7812 size, but that's fine, since neither can the assembler. */
7813
7814 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
7815 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
7816
7817 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
7818 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
7819
7820 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
7821 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
7822
7823 else
7824 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
7825
7826 case REAL_TYPE:
7827 /* If this is a range type, consider it to be the underlying
7828 type. */
7829 if (TREE_TYPE (type) != 0)
7830 break;
7831
7832 /* Carefully distinguish all the standard types of C,
7833 without messing up if the language is not C. */
7834
7835 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
7836 return (qualifiers | 6);
7837
7838 else
7839 return (qualifiers | 7);
7840
7841 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
7842 /* ??? We need to distinguish between double and float complex types,
7843 but I don't know how yet because I can't reach this code from
7844 existing front-ends. */
7845 return (qualifiers | 7); /* Who knows? */
7846
7847 case VECTOR_TYPE:
7848 case BOOLEAN_TYPE: /* Boolean truth value type. */
7849 case LANG_TYPE:
7850 case NULLPTR_TYPE:
7851 return qualifiers;
7852
7853 default:
7854 gcc_unreachable (); /* Not a type! */
7855 }
7856 }
7857
7858 return qualifiers;
7859 }
7860 \f
7861 /* Nested function support. */
7862
7863 /* Emit RTL insns to initialize the variable parts of a trampoline.
7864 FNADDR is an RTX for the address of the function's pure code.
7865 CXT is an RTX for the static chain value for the function.
7866
7867 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
7868 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
7869 (to store insns). This is a bit excessive. Perhaps a different
7870 mechanism would be better here.
7871
7872 Emit enough FLUSH insns to synchronize the data and instruction caches. */
7873
7874 static void
7875 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
7876 {
7877 /* SPARC 32-bit trampoline:
7878
7879 sethi %hi(fn), %g1
7880 sethi %hi(static), %g2
7881 jmp %g1+%lo(fn)
7882 or %g2, %lo(static), %g2
7883
7884 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
7885 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
7886 */
7887
7888 emit_move_insn
7889 (adjust_address (m_tramp, SImode, 0),
7890 expand_binop (SImode, ior_optab,
7891 expand_shift (RSHIFT_EXPR, SImode, fnaddr,
7892 size_int (10), 0, 1),
7893 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
7894 NULL_RTX, 1, OPTAB_DIRECT));
7895
7896 emit_move_insn
7897 (adjust_address (m_tramp, SImode, 4),
7898 expand_binop (SImode, ior_optab,
7899 expand_shift (RSHIFT_EXPR, SImode, cxt,
7900 size_int (10), 0, 1),
7901 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
7902 NULL_RTX, 1, OPTAB_DIRECT));
7903
7904 emit_move_insn
7905 (adjust_address (m_tramp, SImode, 8),
7906 expand_binop (SImode, ior_optab,
7907 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
7908 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
7909 NULL_RTX, 1, OPTAB_DIRECT));
7910
7911 emit_move_insn
7912 (adjust_address (m_tramp, SImode, 12),
7913 expand_binop (SImode, ior_optab,
7914 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
7915 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
7916 NULL_RTX, 1, OPTAB_DIRECT));
7917
7918 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
7919 aligned on a 16 byte boundary so one flush clears it all. */
7920 emit_insn (gen_flush (validize_mem (adjust_address (m_tramp, SImode, 0))));
7921 if (sparc_cpu != PROCESSOR_ULTRASPARC
7922 && sparc_cpu != PROCESSOR_ULTRASPARC3
7923 && sparc_cpu != PROCESSOR_NIAGARA
7924 && sparc_cpu != PROCESSOR_NIAGARA2)
7925 emit_insn (gen_flush (validize_mem (adjust_address (m_tramp, SImode, 8))));
7926
7927 /* Call __enable_execute_stack after writing onto the stack to make sure
7928 the stack address is accessible. */
7929 #ifdef ENABLE_EXECUTE_STACK
7930 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
7931 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
7932 #endif
7933
7934 }
7935
7936 /* The 64-bit version is simpler because it makes more sense to load the
7937 values as "immediate" data out of the trampoline. It's also easier since
7938 we can read the PC without clobbering a register. */
7939
7940 static void
7941 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
7942 {
7943 /* SPARC 64-bit trampoline:
7944
7945 rd %pc, %g1
7946 ldx [%g1+24], %g5
7947 jmp %g5
7948 ldx [%g1+16], %g5
7949 +16 bytes data
7950 */
7951
7952 emit_move_insn (adjust_address (m_tramp, SImode, 0),
7953 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
7954 emit_move_insn (adjust_address (m_tramp, SImode, 4),
7955 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
7956 emit_move_insn (adjust_address (m_tramp, SImode, 8),
7957 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
7958 emit_move_insn (adjust_address (m_tramp, SImode, 12),
7959 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
7960 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
7961 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
7962 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
7963
7964 if (sparc_cpu != PROCESSOR_ULTRASPARC
7965 && sparc_cpu != PROCESSOR_ULTRASPARC3
7966 && sparc_cpu != PROCESSOR_NIAGARA
7967 && sparc_cpu != PROCESSOR_NIAGARA2)
7968 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
7969
7970 /* Call __enable_execute_stack after writing onto the stack to make sure
7971 the stack address is accessible. */
7972 #ifdef ENABLE_EXECUTE_STACK
7973 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
7974 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
7975 #endif
7976 }
7977
7978 /* Worker for TARGET_TRAMPOLINE_INIT. */
7979
7980 static void
7981 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
7982 {
7983 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
7984 cxt = force_reg (Pmode, cxt);
7985 if (TARGET_ARCH64)
7986 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
7987 else
7988 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
7989 }
7990 \f
7991 /* Adjust the cost of a scheduling dependency. Return the new cost of
7992 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
7993
7994 static int
7995 supersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
7996 {
7997 enum attr_type insn_type;
7998
7999 if (! recog_memoized (insn))
8000 return 0;
8001
8002 insn_type = get_attr_type (insn);
8003
8004 if (REG_NOTE_KIND (link) == 0)
8005 {
8006 /* Data dependency; DEP_INSN writes a register that INSN reads some
8007 cycles later. */
8008
8009 /* if a load, then the dependence must be on the memory address;
8010 add an extra "cycle". Note that the cost could be two cycles
8011 if the reg was written late in an instruction group; we ca not tell
8012 here. */
8013 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
8014 return cost + 3;
8015
8016 /* Get the delay only if the address of the store is the dependence. */
8017 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
8018 {
8019 rtx pat = PATTERN(insn);
8020 rtx dep_pat = PATTERN (dep_insn);
8021
8022 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
8023 return cost; /* This should not happen! */
8024
8025 /* The dependency between the two instructions was on the data that
8026 is being stored. Assume that this implies that the address of the
8027 store is not dependent. */
8028 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
8029 return cost;
8030
8031 return cost + 3; /* An approximation. */
8032 }
8033
8034 /* A shift instruction cannot receive its data from an instruction
8035 in the same cycle; add a one cycle penalty. */
8036 if (insn_type == TYPE_SHIFT)
8037 return cost + 3; /* Split before cascade into shift. */
8038 }
8039 else
8040 {
8041 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
8042 INSN writes some cycles later. */
8043
8044 /* These are only significant for the fpu unit; writing a fp reg before
8045 the fpu has finished with it stalls the processor. */
8046
8047 /* Reusing an integer register causes no problems. */
8048 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
8049 return 0;
8050 }
8051
8052 return cost;
8053 }
8054
8055 static int
8056 hypersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
8057 {
8058 enum attr_type insn_type, dep_type;
8059 rtx pat = PATTERN(insn);
8060 rtx dep_pat = PATTERN (dep_insn);
8061
8062 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
8063 return cost;
8064
8065 insn_type = get_attr_type (insn);
8066 dep_type = get_attr_type (dep_insn);
8067
8068 switch (REG_NOTE_KIND (link))
8069 {
8070 case 0:
8071 /* Data dependency; DEP_INSN writes a register that INSN reads some
8072 cycles later. */
8073
8074 switch (insn_type)
8075 {
8076 case TYPE_STORE:
8077 case TYPE_FPSTORE:
8078 /* Get the delay iff the address of the store is the dependence. */
8079 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
8080 return cost;
8081
8082 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
8083 return cost;
8084 return cost + 3;
8085
8086 case TYPE_LOAD:
8087 case TYPE_SLOAD:
8088 case TYPE_FPLOAD:
8089 /* If a load, then the dependence must be on the memory address. If
8090 the addresses aren't equal, then it might be a false dependency */
8091 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
8092 {
8093 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
8094 || GET_CODE (SET_DEST (dep_pat)) != MEM
8095 || GET_CODE (SET_SRC (pat)) != MEM
8096 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
8097 XEXP (SET_SRC (pat), 0)))
8098 return cost + 2;
8099
8100 return cost + 8;
8101 }
8102 break;
8103
8104 case TYPE_BRANCH:
8105 /* Compare to branch latency is 0. There is no benefit from
8106 separating compare and branch. */
8107 if (dep_type == TYPE_COMPARE)
8108 return 0;
8109 /* Floating point compare to branch latency is less than
8110 compare to conditional move. */
8111 if (dep_type == TYPE_FPCMP)
8112 return cost - 1;
8113 break;
8114 default:
8115 break;
8116 }
8117 break;
8118
8119 case REG_DEP_ANTI:
8120 /* Anti-dependencies only penalize the fpu unit. */
8121 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
8122 return 0;
8123 break;
8124
8125 default:
8126 break;
8127 }
8128
8129 return cost;
8130 }
8131
8132 static int
8133 sparc_adjust_cost(rtx insn, rtx link, rtx dep, int cost)
8134 {
8135 switch (sparc_cpu)
8136 {
8137 case PROCESSOR_SUPERSPARC:
8138 cost = supersparc_adjust_cost (insn, link, dep, cost);
8139 break;
8140 case PROCESSOR_HYPERSPARC:
8141 case PROCESSOR_SPARCLITE86X:
8142 cost = hypersparc_adjust_cost (insn, link, dep, cost);
8143 break;
8144 default:
8145 break;
8146 }
8147 return cost;
8148 }
8149
8150 static void
8151 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
8152 int sched_verbose ATTRIBUTE_UNUSED,
8153 int max_ready ATTRIBUTE_UNUSED)
8154 {}
8155
8156 static int
8157 sparc_use_sched_lookahead (void)
8158 {
8159 if (sparc_cpu == PROCESSOR_NIAGARA
8160 || sparc_cpu == PROCESSOR_NIAGARA2)
8161 return 0;
8162 if (sparc_cpu == PROCESSOR_ULTRASPARC
8163 || sparc_cpu == PROCESSOR_ULTRASPARC3)
8164 return 4;
8165 if ((1 << sparc_cpu) &
8166 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
8167 (1 << PROCESSOR_SPARCLITE86X)))
8168 return 3;
8169 return 0;
8170 }
8171
8172 static int
8173 sparc_issue_rate (void)
8174 {
8175 switch (sparc_cpu)
8176 {
8177 case PROCESSOR_NIAGARA:
8178 case PROCESSOR_NIAGARA2:
8179 default:
8180 return 1;
8181 case PROCESSOR_V9:
8182 /* Assume V9 processors are capable of at least dual-issue. */
8183 return 2;
8184 case PROCESSOR_SUPERSPARC:
8185 return 3;
8186 case PROCESSOR_HYPERSPARC:
8187 case PROCESSOR_SPARCLITE86X:
8188 return 2;
8189 case PROCESSOR_ULTRASPARC:
8190 case PROCESSOR_ULTRASPARC3:
8191 return 4;
8192 }
8193 }
8194
8195 static int
8196 set_extends (rtx insn)
8197 {
8198 register rtx pat = PATTERN (insn);
8199
8200 switch (GET_CODE (SET_SRC (pat)))
8201 {
8202 /* Load and some shift instructions zero extend. */
8203 case MEM:
8204 case ZERO_EXTEND:
8205 /* sethi clears the high bits */
8206 case HIGH:
8207 /* LO_SUM is used with sethi. sethi cleared the high
8208 bits and the values used with lo_sum are positive */
8209 case LO_SUM:
8210 /* Store flag stores 0 or 1 */
8211 case LT: case LTU:
8212 case GT: case GTU:
8213 case LE: case LEU:
8214 case GE: case GEU:
8215 case EQ:
8216 case NE:
8217 return 1;
8218 case AND:
8219 {
8220 rtx op0 = XEXP (SET_SRC (pat), 0);
8221 rtx op1 = XEXP (SET_SRC (pat), 1);
8222 if (GET_CODE (op1) == CONST_INT)
8223 return INTVAL (op1) >= 0;
8224 if (GET_CODE (op0) != REG)
8225 return 0;
8226 if (sparc_check_64 (op0, insn) == 1)
8227 return 1;
8228 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
8229 }
8230 case IOR:
8231 case XOR:
8232 {
8233 rtx op0 = XEXP (SET_SRC (pat), 0);
8234 rtx op1 = XEXP (SET_SRC (pat), 1);
8235 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
8236 return 0;
8237 if (GET_CODE (op1) == CONST_INT)
8238 return INTVAL (op1) >= 0;
8239 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
8240 }
8241 case LSHIFTRT:
8242 return GET_MODE (SET_SRC (pat)) == SImode;
8243 /* Positive integers leave the high bits zero. */
8244 case CONST_DOUBLE:
8245 return ! (CONST_DOUBLE_LOW (SET_SRC (pat)) & 0x80000000);
8246 case CONST_INT:
8247 return ! (INTVAL (SET_SRC (pat)) & 0x80000000);
8248 case ASHIFTRT:
8249 case SIGN_EXTEND:
8250 return - (GET_MODE (SET_SRC (pat)) == SImode);
8251 case REG:
8252 return sparc_check_64 (SET_SRC (pat), insn);
8253 default:
8254 return 0;
8255 }
8256 }
8257
8258 /* We _ought_ to have only one kind per function, but... */
8259 static GTY(()) rtx sparc_addr_diff_list;
8260 static GTY(()) rtx sparc_addr_list;
8261
8262 void
8263 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
8264 {
8265 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
8266 if (diff)
8267 sparc_addr_diff_list
8268 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
8269 else
8270 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
8271 }
8272
8273 static void
8274 sparc_output_addr_vec (rtx vec)
8275 {
8276 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
8277 int idx, vlen = XVECLEN (body, 0);
8278
8279 #ifdef ASM_OUTPUT_ADDR_VEC_START
8280 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
8281 #endif
8282
8283 #ifdef ASM_OUTPUT_CASE_LABEL
8284 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
8285 NEXT_INSN (lab));
8286 #else
8287 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
8288 #endif
8289
8290 for (idx = 0; idx < vlen; idx++)
8291 {
8292 ASM_OUTPUT_ADDR_VEC_ELT
8293 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
8294 }
8295
8296 #ifdef ASM_OUTPUT_ADDR_VEC_END
8297 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
8298 #endif
8299 }
8300
8301 static void
8302 sparc_output_addr_diff_vec (rtx vec)
8303 {
8304 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
8305 rtx base = XEXP (XEXP (body, 0), 0);
8306 int idx, vlen = XVECLEN (body, 1);
8307
8308 #ifdef ASM_OUTPUT_ADDR_VEC_START
8309 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
8310 #endif
8311
8312 #ifdef ASM_OUTPUT_CASE_LABEL
8313 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
8314 NEXT_INSN (lab));
8315 #else
8316 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
8317 #endif
8318
8319 for (idx = 0; idx < vlen; idx++)
8320 {
8321 ASM_OUTPUT_ADDR_DIFF_ELT
8322 (asm_out_file,
8323 body,
8324 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
8325 CODE_LABEL_NUMBER (base));
8326 }
8327
8328 #ifdef ASM_OUTPUT_ADDR_VEC_END
8329 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
8330 #endif
8331 }
8332
8333 static void
8334 sparc_output_deferred_case_vectors (void)
8335 {
8336 rtx t;
8337 int align;
8338
8339 if (sparc_addr_list == NULL_RTX
8340 && sparc_addr_diff_list == NULL_RTX)
8341 return;
8342
8343 /* Align to cache line in the function's code section. */
8344 switch_to_section (current_function_section ());
8345
8346 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
8347 if (align > 0)
8348 ASM_OUTPUT_ALIGN (asm_out_file, align);
8349
8350 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
8351 sparc_output_addr_vec (XEXP (t, 0));
8352 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
8353 sparc_output_addr_diff_vec (XEXP (t, 0));
8354
8355 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
8356 }
8357
8358 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
8359 unknown. Return 1 if the high bits are zero, -1 if the register is
8360 sign extended. */
8361 int
8362 sparc_check_64 (rtx x, rtx insn)
8363 {
8364 /* If a register is set only once it is safe to ignore insns this
8365 code does not know how to handle. The loop will either recognize
8366 the single set and return the correct value or fail to recognize
8367 it and return 0. */
8368 int set_once = 0;
8369 rtx y = x;
8370
8371 gcc_assert (GET_CODE (x) == REG);
8372
8373 if (GET_MODE (x) == DImode)
8374 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
8375
8376 if (flag_expensive_optimizations
8377 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
8378 set_once = 1;
8379
8380 if (insn == 0)
8381 {
8382 if (set_once)
8383 insn = get_last_insn_anywhere ();
8384 else
8385 return 0;
8386 }
8387
8388 while ((insn = PREV_INSN (insn)))
8389 {
8390 switch (GET_CODE (insn))
8391 {
8392 case JUMP_INSN:
8393 case NOTE:
8394 break;
8395 case CODE_LABEL:
8396 case CALL_INSN:
8397 default:
8398 if (! set_once)
8399 return 0;
8400 break;
8401 case INSN:
8402 {
8403 rtx pat = PATTERN (insn);
8404 if (GET_CODE (pat) != SET)
8405 return 0;
8406 if (rtx_equal_p (x, SET_DEST (pat)))
8407 return set_extends (insn);
8408 if (y && rtx_equal_p (y, SET_DEST (pat)))
8409 return set_extends (insn);
8410 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
8411 return 0;
8412 }
8413 }
8414 }
8415 return 0;
8416 }
8417
8418 /* Returns assembly code to perform a DImode shift using
8419 a 64-bit global or out register on SPARC-V8+. */
8420 const char *
8421 output_v8plus_shift (rtx *operands, rtx insn, const char *opcode)
8422 {
8423 static char asm_code[60];
8424
8425 /* The scratch register is only required when the destination
8426 register is not a 64-bit global or out register. */
8427 if (which_alternative != 2)
8428 operands[3] = operands[0];
8429
8430 /* We can only shift by constants <= 63. */
8431 if (GET_CODE (operands[2]) == CONST_INT)
8432 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
8433
8434 if (GET_CODE (operands[1]) == CONST_INT)
8435 {
8436 output_asm_insn ("mov\t%1, %3", operands);
8437 }
8438 else
8439 {
8440 output_asm_insn ("sllx\t%H1, 32, %3", operands);
8441 if (sparc_check_64 (operands[1], insn) <= 0)
8442 output_asm_insn ("srl\t%L1, 0, %L1", operands);
8443 output_asm_insn ("or\t%L1, %3, %3", operands);
8444 }
8445
8446 strcpy(asm_code, opcode);
8447
8448 if (which_alternative != 2)
8449 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
8450 else
8451 return strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
8452 }
8453 \f
8454 /* Output rtl to increment the profiler label LABELNO
8455 for profiling a function entry. */
8456
8457 void
8458 sparc_profile_hook (int labelno)
8459 {
8460 char buf[32];
8461 rtx lab, fun;
8462
8463 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
8464 if (NO_PROFILE_COUNTERS)
8465 {
8466 emit_library_call (fun, LCT_NORMAL, VOIDmode, 0);
8467 }
8468 else
8469 {
8470 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
8471 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
8472 emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode);
8473 }
8474 }
8475 \f
8476 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
8477
8478 static void
8479 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
8480 tree decl ATTRIBUTE_UNUSED)
8481 {
8482 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
8483
8484 if (!(flags & SECTION_DEBUG))
8485 fputs (",#alloc", asm_out_file);
8486 if (flags & SECTION_WRITE)
8487 fputs (",#write", asm_out_file);
8488 if (flags & SECTION_TLS)
8489 fputs (",#tls", asm_out_file);
8490 if (flags & SECTION_CODE)
8491 fputs (",#execinstr", asm_out_file);
8492
8493 /* ??? Handle SECTION_BSS. */
8494
8495 fputc ('\n', asm_out_file);
8496 }
8497
8498 /* We do not allow indirect calls to be optimized into sibling calls.
8499
8500 We cannot use sibling calls when delayed branches are disabled
8501 because they will likely require the call delay slot to be filled.
8502
8503 Also, on SPARC 32-bit we cannot emit a sibling call when the
8504 current function returns a structure. This is because the "unimp
8505 after call" convention would cause the callee to return to the
8506 wrong place. The generic code already disallows cases where the
8507 function being called returns a structure.
8508
8509 It may seem strange how this last case could occur. Usually there
8510 is code after the call which jumps to epilogue code which dumps the
8511 return value into the struct return area. That ought to invalidate
8512 the sibling call right? Well, in the C++ case we can end up passing
8513 the pointer to the struct return area to a constructor (which returns
8514 void) and then nothing else happens. Such a sibling call would look
8515 valid without the added check here.
8516
8517 VxWorks PIC PLT entries require the global pointer to be initialized
8518 on entry. We therefore can't emit sibling calls to them. */
8519 static bool
8520 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8521 {
8522 return (decl
8523 && flag_delayed_branch
8524 && (TARGET_ARCH64 || ! cfun->returns_struct)
8525 && !(TARGET_VXWORKS_RTP
8526 && flag_pic
8527 && !targetm.binds_local_p (decl)));
8528 }
8529 \f
8530 /* libfunc renaming. */
8531
8532 static void
8533 sparc_init_libfuncs (void)
8534 {
8535 if (TARGET_ARCH32)
8536 {
8537 /* Use the subroutines that Sun's library provides for integer
8538 multiply and divide. The `*' prevents an underscore from
8539 being prepended by the compiler. .umul is a little faster
8540 than .mul. */
8541 set_optab_libfunc (smul_optab, SImode, "*.umul");
8542 set_optab_libfunc (sdiv_optab, SImode, "*.div");
8543 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
8544 set_optab_libfunc (smod_optab, SImode, "*.rem");
8545 set_optab_libfunc (umod_optab, SImode, "*.urem");
8546
8547 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
8548 set_optab_libfunc (add_optab, TFmode, "_Q_add");
8549 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
8550 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
8551 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
8552 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
8553
8554 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
8555 is because with soft-float, the SFmode and DFmode sqrt
8556 instructions will be absent, and the compiler will notice and
8557 try to use the TFmode sqrt instruction for calls to the
8558 builtin function sqrt, but this fails. */
8559 if (TARGET_FPU)
8560 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
8561
8562 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
8563 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
8564 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
8565 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
8566 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
8567 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
8568
8569 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
8570 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
8571 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
8572 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
8573
8574 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
8575 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
8576 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
8577 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
8578
8579 if (DITF_CONVERSION_LIBFUNCS)
8580 {
8581 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
8582 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
8583 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
8584 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
8585 }
8586
8587 if (SUN_CONVERSION_LIBFUNCS)
8588 {
8589 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
8590 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
8591 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
8592 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
8593 }
8594 }
8595 if (TARGET_ARCH64)
8596 {
8597 /* In the SPARC 64bit ABI, SImode multiply and divide functions
8598 do not exist in the library. Make sure the compiler does not
8599 emit calls to them by accident. (It should always use the
8600 hardware instructions.) */
8601 set_optab_libfunc (smul_optab, SImode, 0);
8602 set_optab_libfunc (sdiv_optab, SImode, 0);
8603 set_optab_libfunc (udiv_optab, SImode, 0);
8604 set_optab_libfunc (smod_optab, SImode, 0);
8605 set_optab_libfunc (umod_optab, SImode, 0);
8606
8607 if (SUN_INTEGER_MULTIPLY_64)
8608 {
8609 set_optab_libfunc (smul_optab, DImode, "__mul64");
8610 set_optab_libfunc (sdiv_optab, DImode, "__div64");
8611 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
8612 set_optab_libfunc (smod_optab, DImode, "__rem64");
8613 set_optab_libfunc (umod_optab, DImode, "__urem64");
8614 }
8615
8616 if (SUN_CONVERSION_LIBFUNCS)
8617 {
8618 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
8619 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
8620 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
8621 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
8622 }
8623 }
8624 }
8625 \f
8626 #define def_builtin(NAME, CODE, TYPE) \
8627 add_builtin_function((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL, \
8628 NULL_TREE)
8629
8630 /* Implement the TARGET_INIT_BUILTINS target hook.
8631 Create builtin functions for special SPARC instructions. */
8632
8633 static void
8634 sparc_init_builtins (void)
8635 {
8636 if (TARGET_VIS)
8637 sparc_vis_init_builtins ();
8638 }
8639
8640 /* Create builtin functions for VIS 1.0 instructions. */
8641
8642 static void
8643 sparc_vis_init_builtins (void)
8644 {
8645 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
8646 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
8647 tree v4hi = build_vector_type (intHI_type_node, 4);
8648 tree v2hi = build_vector_type (intHI_type_node, 2);
8649 tree v2si = build_vector_type (intSI_type_node, 2);
8650
8651 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
8652 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
8653 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
8654 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
8655 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
8656 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
8657 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
8658 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
8659 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
8660 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
8661 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
8662 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
8663 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
8664 v8qi, v8qi,
8665 intDI_type_node, 0);
8666 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
8667 intDI_type_node,
8668 intDI_type_node, 0);
8669 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
8670 ptr_type_node,
8671 intSI_type_node, 0);
8672 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
8673 ptr_type_node,
8674 intDI_type_node, 0);
8675
8676 /* Packing and expanding vectors. */
8677 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis, v4qi_ftype_v4hi);
8678 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
8679 v8qi_ftype_v2si_v8qi);
8680 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
8681 v2hi_ftype_v2si);
8682 def_builtin ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis, v4hi_ftype_v4qi);
8683 def_builtin ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
8684 v8qi_ftype_v4qi_v4qi);
8685
8686 /* Multiplications. */
8687 def_builtin ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
8688 v4hi_ftype_v4qi_v4hi);
8689 def_builtin ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
8690 v4hi_ftype_v4qi_v2hi);
8691 def_builtin ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
8692 v4hi_ftype_v4qi_v2hi);
8693 def_builtin ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
8694 v4hi_ftype_v8qi_v4hi);
8695 def_builtin ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
8696 v4hi_ftype_v8qi_v4hi);
8697 def_builtin ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
8698 v2si_ftype_v4qi_v2hi);
8699 def_builtin ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
8700 v2si_ftype_v4qi_v2hi);
8701
8702 /* Data aligning. */
8703 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
8704 v4hi_ftype_v4hi_v4hi);
8705 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
8706 v8qi_ftype_v8qi_v8qi);
8707 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
8708 v2si_ftype_v2si_v2si);
8709 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatadi_vis,
8710 di_ftype_di_di);
8711 if (TARGET_ARCH64)
8712 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
8713 ptr_ftype_ptr_di);
8714 else
8715 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
8716 ptr_ftype_ptr_si);
8717
8718 /* Pixel distance. */
8719 def_builtin ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
8720 di_ftype_v8qi_v8qi_di);
8721 }
8722
8723 /* Handle TARGET_EXPAND_BUILTIN target hook.
8724 Expand builtin functions for sparc intrinsics. */
8725
8726 static rtx
8727 sparc_expand_builtin (tree exp, rtx target,
8728 rtx subtarget ATTRIBUTE_UNUSED,
8729 enum machine_mode tmode ATTRIBUTE_UNUSED,
8730 int ignore ATTRIBUTE_UNUSED)
8731 {
8732 tree arg;
8733 call_expr_arg_iterator iter;
8734 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
8735 unsigned int icode = DECL_FUNCTION_CODE (fndecl);
8736 rtx pat, op[4];
8737 enum machine_mode mode[4];
8738 int arg_count = 0;
8739
8740 mode[0] = insn_data[icode].operand[0].mode;
8741 if (!target
8742 || GET_MODE (target) != mode[0]
8743 || ! (*insn_data[icode].operand[0].predicate) (target, mode[0]))
8744 op[0] = gen_reg_rtx (mode[0]);
8745 else
8746 op[0] = target;
8747
8748 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
8749 {
8750 arg_count++;
8751 mode[arg_count] = insn_data[icode].operand[arg_count].mode;
8752 op[arg_count] = expand_normal (arg);
8753
8754 if (! (*insn_data[icode].operand[arg_count].predicate) (op[arg_count],
8755 mode[arg_count]))
8756 op[arg_count] = copy_to_mode_reg (mode[arg_count], op[arg_count]);
8757 }
8758
8759 switch (arg_count)
8760 {
8761 case 1:
8762 pat = GEN_FCN (icode) (op[0], op[1]);
8763 break;
8764 case 2:
8765 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
8766 break;
8767 case 3:
8768 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
8769 break;
8770 default:
8771 gcc_unreachable ();
8772 }
8773
8774 if (!pat)
8775 return NULL_RTX;
8776
8777 emit_insn (pat);
8778
8779 return op[0];
8780 }
8781
8782 static int
8783 sparc_vis_mul8x16 (int e8, int e16)
8784 {
8785 return (e8 * e16 + 128) / 256;
8786 }
8787
8788 /* Multiply the vector elements in ELTS0 to the elements in ELTS1 as specified
8789 by FNCODE. All of the elements in ELTS0 and ELTS1 lists must be integer
8790 constants. A tree list with the results of the multiplications is returned,
8791 and each element in the list is of INNER_TYPE. */
8792
8793 static tree
8794 sparc_handle_vis_mul8x16 (int fncode, tree inner_type, tree elts0, tree elts1)
8795 {
8796 tree n_elts = NULL_TREE;
8797 int scale;
8798
8799 switch (fncode)
8800 {
8801 case CODE_FOR_fmul8x16_vis:
8802 for (; elts0 && elts1;
8803 elts0 = TREE_CHAIN (elts0), elts1 = TREE_CHAIN (elts1))
8804 {
8805 int val
8806 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (TREE_VALUE (elts0)),
8807 TREE_INT_CST_LOW (TREE_VALUE (elts1)));
8808 n_elts = tree_cons (NULL_TREE,
8809 build_int_cst (inner_type, val),
8810 n_elts);
8811 }
8812 break;
8813
8814 case CODE_FOR_fmul8x16au_vis:
8815 scale = TREE_INT_CST_LOW (TREE_VALUE (elts1));
8816
8817 for (; elts0; elts0 = TREE_CHAIN (elts0))
8818 {
8819 int val
8820 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (TREE_VALUE (elts0)),
8821 scale);
8822 n_elts = tree_cons (NULL_TREE,
8823 build_int_cst (inner_type, val),
8824 n_elts);
8825 }
8826 break;
8827
8828 case CODE_FOR_fmul8x16al_vis:
8829 scale = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (elts1)));
8830
8831 for (; elts0; elts0 = TREE_CHAIN (elts0))
8832 {
8833 int val
8834 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (TREE_VALUE (elts0)),
8835 scale);
8836 n_elts = tree_cons (NULL_TREE,
8837 build_int_cst (inner_type, val),
8838 n_elts);
8839 }
8840 break;
8841
8842 default:
8843 gcc_unreachable ();
8844 }
8845
8846 return nreverse (n_elts);
8847
8848 }
8849 /* Handle TARGET_FOLD_BUILTIN target hook.
8850 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
8851 result of the function call is ignored. NULL_TREE is returned if the
8852 function could not be folded. */
8853
8854 static tree
8855 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
8856 tree *args, bool ignore)
8857 {
8858 tree arg0, arg1, arg2;
8859 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
8860 enum insn_code icode = (enum insn_code) DECL_FUNCTION_CODE (fndecl);
8861
8862 if (ignore
8863 && icode != CODE_FOR_alignaddrsi_vis
8864 && icode != CODE_FOR_alignaddrdi_vis)
8865 return build_zero_cst (rtype);
8866
8867 switch (icode)
8868 {
8869 case CODE_FOR_fexpand_vis:
8870 arg0 = args[0];
8871 STRIP_NOPS (arg0);
8872
8873 if (TREE_CODE (arg0) == VECTOR_CST)
8874 {
8875 tree inner_type = TREE_TYPE (rtype);
8876 tree elts = TREE_VECTOR_CST_ELTS (arg0);
8877 tree n_elts = NULL_TREE;
8878
8879 for (; elts; elts = TREE_CHAIN (elts))
8880 {
8881 unsigned int val = TREE_INT_CST_LOW (TREE_VALUE (elts)) << 4;
8882 n_elts = tree_cons (NULL_TREE,
8883 build_int_cst (inner_type, val),
8884 n_elts);
8885 }
8886 return build_vector (rtype, nreverse (n_elts));
8887 }
8888 break;
8889
8890 case CODE_FOR_fmul8x16_vis:
8891 case CODE_FOR_fmul8x16au_vis:
8892 case CODE_FOR_fmul8x16al_vis:
8893 arg0 = args[0];
8894 arg1 = args[1];
8895 STRIP_NOPS (arg0);
8896 STRIP_NOPS (arg1);
8897
8898 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
8899 {
8900 tree inner_type = TREE_TYPE (rtype);
8901 tree elts0 = TREE_VECTOR_CST_ELTS (arg0);
8902 tree elts1 = TREE_VECTOR_CST_ELTS (arg1);
8903 tree n_elts = sparc_handle_vis_mul8x16 (icode, inner_type, elts0,
8904 elts1);
8905
8906 return build_vector (rtype, n_elts);
8907 }
8908 break;
8909
8910 case CODE_FOR_fpmerge_vis:
8911 arg0 = args[0];
8912 arg1 = args[1];
8913 STRIP_NOPS (arg0);
8914 STRIP_NOPS (arg1);
8915
8916 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
8917 {
8918 tree elts0 = TREE_VECTOR_CST_ELTS (arg0);
8919 tree elts1 = TREE_VECTOR_CST_ELTS (arg1);
8920 tree n_elts = NULL_TREE;
8921
8922 for (; elts0 && elts1;
8923 elts0 = TREE_CHAIN (elts0), elts1 = TREE_CHAIN (elts1))
8924 {
8925 n_elts = tree_cons (NULL_TREE, TREE_VALUE (elts0), n_elts);
8926 n_elts = tree_cons (NULL_TREE, TREE_VALUE (elts1), n_elts);
8927 }
8928
8929 return build_vector (rtype, nreverse (n_elts));
8930 }
8931 break;
8932
8933 case CODE_FOR_pdist_vis:
8934 arg0 = args[0];
8935 arg1 = args[1];
8936 arg2 = args[2];
8937 STRIP_NOPS (arg0);
8938 STRIP_NOPS (arg1);
8939 STRIP_NOPS (arg2);
8940
8941 if (TREE_CODE (arg0) == VECTOR_CST
8942 && TREE_CODE (arg1) == VECTOR_CST
8943 && TREE_CODE (arg2) == INTEGER_CST)
8944 {
8945 int overflow = 0;
8946 unsigned HOST_WIDE_INT low = TREE_INT_CST_LOW (arg2);
8947 HOST_WIDE_INT high = TREE_INT_CST_HIGH (arg2);
8948 tree elts0 = TREE_VECTOR_CST_ELTS (arg0);
8949 tree elts1 = TREE_VECTOR_CST_ELTS (arg1);
8950
8951 for (; elts0 && elts1;
8952 elts0 = TREE_CHAIN (elts0), elts1 = TREE_CHAIN (elts1))
8953 {
8954 unsigned HOST_WIDE_INT
8955 low0 = TREE_INT_CST_LOW (TREE_VALUE (elts0)),
8956 low1 = TREE_INT_CST_LOW (TREE_VALUE (elts1));
8957 HOST_WIDE_INT high0 = TREE_INT_CST_HIGH (TREE_VALUE (elts0));
8958 HOST_WIDE_INT high1 = TREE_INT_CST_HIGH (TREE_VALUE (elts1));
8959
8960 unsigned HOST_WIDE_INT l;
8961 HOST_WIDE_INT h;
8962
8963 overflow |= neg_double (low1, high1, &l, &h);
8964 overflow |= add_double (low0, high0, l, h, &l, &h);
8965 if (h < 0)
8966 overflow |= neg_double (l, h, &l, &h);
8967
8968 overflow |= add_double (low, high, l, h, &low, &high);
8969 }
8970
8971 gcc_assert (overflow == 0);
8972
8973 return build_int_cst_wide (rtype, low, high);
8974 }
8975
8976 default:
8977 break;
8978 }
8979
8980 return NULL_TREE;
8981 }
8982 \f
8983 /* ??? This duplicates information provided to the compiler by the
8984 ??? scheduler description. Some day, teach genautomata to output
8985 ??? the latencies and then CSE will just use that. */
8986
8987 static bool
8988 sparc_rtx_costs (rtx x, int code, int outer_code, int *total,
8989 bool speed ATTRIBUTE_UNUSED)
8990 {
8991 enum machine_mode mode = GET_MODE (x);
8992 bool float_mode_p = FLOAT_MODE_P (mode);
8993
8994 switch (code)
8995 {
8996 case CONST_INT:
8997 if (INTVAL (x) < 0x1000 && INTVAL (x) >= -0x1000)
8998 {
8999 *total = 0;
9000 return true;
9001 }
9002 /* FALLTHRU */
9003
9004 case HIGH:
9005 *total = 2;
9006 return true;
9007
9008 case CONST:
9009 case LABEL_REF:
9010 case SYMBOL_REF:
9011 *total = 4;
9012 return true;
9013
9014 case CONST_DOUBLE:
9015 if (GET_MODE (x) == VOIDmode
9016 && ((CONST_DOUBLE_HIGH (x) == 0
9017 && CONST_DOUBLE_LOW (x) < 0x1000)
9018 || (CONST_DOUBLE_HIGH (x) == -1
9019 && CONST_DOUBLE_LOW (x) < 0
9020 && CONST_DOUBLE_LOW (x) >= -0x1000)))
9021 *total = 0;
9022 else
9023 *total = 8;
9024 return true;
9025
9026 case MEM:
9027 /* If outer-code was a sign or zero extension, a cost
9028 of COSTS_N_INSNS (1) was already added in. This is
9029 why we are subtracting it back out. */
9030 if (outer_code == ZERO_EXTEND)
9031 {
9032 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
9033 }
9034 else if (outer_code == SIGN_EXTEND)
9035 {
9036 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
9037 }
9038 else if (float_mode_p)
9039 {
9040 *total = sparc_costs->float_load;
9041 }
9042 else
9043 {
9044 *total = sparc_costs->int_load;
9045 }
9046
9047 return true;
9048
9049 case PLUS:
9050 case MINUS:
9051 if (float_mode_p)
9052 *total = sparc_costs->float_plusminus;
9053 else
9054 *total = COSTS_N_INSNS (1);
9055 return false;
9056
9057 case MULT:
9058 if (float_mode_p)
9059 *total = sparc_costs->float_mul;
9060 else if (! TARGET_HARD_MUL)
9061 *total = COSTS_N_INSNS (25);
9062 else
9063 {
9064 int bit_cost;
9065
9066 bit_cost = 0;
9067 if (sparc_costs->int_mul_bit_factor)
9068 {
9069 int nbits;
9070
9071 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
9072 {
9073 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
9074 for (nbits = 0; value != 0; value &= value - 1)
9075 nbits++;
9076 }
9077 else if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
9078 && GET_MODE (XEXP (x, 1)) == VOIDmode)
9079 {
9080 rtx x1 = XEXP (x, 1);
9081 unsigned HOST_WIDE_INT value1 = CONST_DOUBLE_LOW (x1);
9082 unsigned HOST_WIDE_INT value2 = CONST_DOUBLE_HIGH (x1);
9083
9084 for (nbits = 0; value1 != 0; value1 &= value1 - 1)
9085 nbits++;
9086 for (; value2 != 0; value2 &= value2 - 1)
9087 nbits++;
9088 }
9089 else
9090 nbits = 7;
9091
9092 if (nbits < 3)
9093 nbits = 3;
9094 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
9095 bit_cost = COSTS_N_INSNS (bit_cost);
9096 }
9097
9098 if (mode == DImode)
9099 *total = sparc_costs->int_mulX + bit_cost;
9100 else
9101 *total = sparc_costs->int_mul + bit_cost;
9102 }
9103 return false;
9104
9105 case ASHIFT:
9106 case ASHIFTRT:
9107 case LSHIFTRT:
9108 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
9109 return false;
9110
9111 case DIV:
9112 case UDIV:
9113 case MOD:
9114 case UMOD:
9115 if (float_mode_p)
9116 {
9117 if (mode == DFmode)
9118 *total = sparc_costs->float_div_df;
9119 else
9120 *total = sparc_costs->float_div_sf;
9121 }
9122 else
9123 {
9124 if (mode == DImode)
9125 *total = sparc_costs->int_divX;
9126 else
9127 *total = sparc_costs->int_div;
9128 }
9129 return false;
9130
9131 case NEG:
9132 if (! float_mode_p)
9133 {
9134 *total = COSTS_N_INSNS (1);
9135 return false;
9136 }
9137 /* FALLTHRU */
9138
9139 case ABS:
9140 case FLOAT:
9141 case UNSIGNED_FLOAT:
9142 case FIX:
9143 case UNSIGNED_FIX:
9144 case FLOAT_EXTEND:
9145 case FLOAT_TRUNCATE:
9146 *total = sparc_costs->float_move;
9147 return false;
9148
9149 case SQRT:
9150 if (mode == DFmode)
9151 *total = sparc_costs->float_sqrt_df;
9152 else
9153 *total = sparc_costs->float_sqrt_sf;
9154 return false;
9155
9156 case COMPARE:
9157 if (float_mode_p)
9158 *total = sparc_costs->float_cmp;
9159 else
9160 *total = COSTS_N_INSNS (1);
9161 return false;
9162
9163 case IF_THEN_ELSE:
9164 if (float_mode_p)
9165 *total = sparc_costs->float_cmove;
9166 else
9167 *total = sparc_costs->int_cmove;
9168 return false;
9169
9170 case IOR:
9171 /* Handle the NAND vector patterns. */
9172 if (sparc_vector_mode_supported_p (GET_MODE (x))
9173 && GET_CODE (XEXP (x, 0)) == NOT
9174 && GET_CODE (XEXP (x, 1)) == NOT)
9175 {
9176 *total = COSTS_N_INSNS (1);
9177 return true;
9178 }
9179 else
9180 return false;
9181
9182 default:
9183 return false;
9184 }
9185 }
9186
9187 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
9188 This is achieved by means of a manual dynamic stack space allocation in
9189 the current frame. We make the assumption that SEQ doesn't contain any
9190 function calls, with the possible exception of calls to the GOT helper. */
9191
9192 static void
9193 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
9194 {
9195 /* We must preserve the lowest 16 words for the register save area. */
9196 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
9197 /* We really need only 2 words of fresh stack space. */
9198 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
9199
9200 rtx slot
9201 = gen_rtx_MEM (word_mode, plus_constant (stack_pointer_rtx,
9202 SPARC_STACK_BIAS + offset));
9203
9204 emit_insn (gen_stack_pointer_dec (GEN_INT (size)));
9205 emit_insn (gen_rtx_SET (VOIDmode, slot, reg));
9206 if (reg2)
9207 emit_insn (gen_rtx_SET (VOIDmode,
9208 adjust_address (slot, word_mode, UNITS_PER_WORD),
9209 reg2));
9210 emit_insn (seq);
9211 if (reg2)
9212 emit_insn (gen_rtx_SET (VOIDmode,
9213 reg2,
9214 adjust_address (slot, word_mode, UNITS_PER_WORD)));
9215 emit_insn (gen_rtx_SET (VOIDmode, reg, slot));
9216 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
9217 }
9218
9219 /* Output the assembler code for a thunk function. THUNK_DECL is the
9220 declaration for the thunk function itself, FUNCTION is the decl for
9221 the target function. DELTA is an immediate constant offset to be
9222 added to THIS. If VCALL_OFFSET is nonzero, the word at address
9223 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
9224
9225 static void
9226 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
9227 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9228 tree function)
9229 {
9230 rtx this_rtx, insn, funexp;
9231 unsigned int int_arg_first;
9232
9233 reload_completed = 1;
9234 epilogue_completed = 1;
9235
9236 emit_note (NOTE_INSN_PROLOGUE_END);
9237
9238 if (flag_delayed_branch)
9239 {
9240 /* We will emit a regular sibcall below, so we need to instruct
9241 output_sibcall that we are in a leaf function. */
9242 sparc_leaf_function_p = current_function_uses_only_leaf_regs = 1;
9243
9244 /* This will cause final.c to invoke leaf_renumber_regs so we
9245 must behave as if we were in a not-yet-leafified function. */
9246 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
9247 }
9248 else
9249 {
9250 /* We will emit the sibcall manually below, so we will need to
9251 manually spill non-leaf registers. */
9252 sparc_leaf_function_p = current_function_uses_only_leaf_regs = 0;
9253
9254 /* We really are in a leaf function. */
9255 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
9256 }
9257
9258 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
9259 returns a structure, the structure return pointer is there instead. */
9260 if (TARGET_ARCH64
9261 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
9262 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
9263 else
9264 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
9265
9266 /* Add DELTA. When possible use a plain add, otherwise load it into
9267 a register first. */
9268 if (delta)
9269 {
9270 rtx delta_rtx = GEN_INT (delta);
9271
9272 if (! SPARC_SIMM13_P (delta))
9273 {
9274 rtx scratch = gen_rtx_REG (Pmode, 1);
9275 emit_move_insn (scratch, delta_rtx);
9276 delta_rtx = scratch;
9277 }
9278
9279 /* THIS_RTX += DELTA. */
9280 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
9281 }
9282
9283 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
9284 if (vcall_offset)
9285 {
9286 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
9287 rtx scratch = gen_rtx_REG (Pmode, 1);
9288
9289 gcc_assert (vcall_offset < 0);
9290
9291 /* SCRATCH = *THIS_RTX. */
9292 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
9293
9294 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
9295 may not have any available scratch register at this point. */
9296 if (SPARC_SIMM13_P (vcall_offset))
9297 ;
9298 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
9299 else if (! fixed_regs[5]
9300 /* The below sequence is made up of at least 2 insns,
9301 while the default method may need only one. */
9302 && vcall_offset < -8192)
9303 {
9304 rtx scratch2 = gen_rtx_REG (Pmode, 5);
9305 emit_move_insn (scratch2, vcall_offset_rtx);
9306 vcall_offset_rtx = scratch2;
9307 }
9308 else
9309 {
9310 rtx increment = GEN_INT (-4096);
9311
9312 /* VCALL_OFFSET is a negative number whose typical range can be
9313 estimated as -32768..0 in 32-bit mode. In almost all cases
9314 it is therefore cheaper to emit multiple add insns than
9315 spilling and loading the constant into a register (at least
9316 6 insns). */
9317 while (! SPARC_SIMM13_P (vcall_offset))
9318 {
9319 emit_insn (gen_add2_insn (scratch, increment));
9320 vcall_offset += 4096;
9321 }
9322 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
9323 }
9324
9325 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
9326 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
9327 gen_rtx_PLUS (Pmode,
9328 scratch,
9329 vcall_offset_rtx)));
9330
9331 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
9332 emit_insn (gen_add2_insn (this_rtx, scratch));
9333 }
9334
9335 /* Generate a tail call to the target function. */
9336 if (! TREE_USED (function))
9337 {
9338 assemble_external (function);
9339 TREE_USED (function) = 1;
9340 }
9341 funexp = XEXP (DECL_RTL (function), 0);
9342
9343 if (flag_delayed_branch)
9344 {
9345 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
9346 insn = emit_call_insn (gen_sibcall (funexp));
9347 SIBLING_CALL_P (insn) = 1;
9348 }
9349 else
9350 {
9351 /* The hoops we have to jump through in order to generate a sibcall
9352 without using delay slots... */
9353 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
9354
9355 if (flag_pic)
9356 {
9357 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
9358 start_sequence ();
9359 /* Delay emitting the GOT helper function because it needs to
9360 change the section and we are emitting assembly code. */
9361 load_got_register (); /* clobbers %o7 */
9362 scratch = sparc_legitimize_pic_address (funexp, scratch);
9363 seq = get_insns ();
9364 end_sequence ();
9365 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
9366 }
9367 else if (TARGET_ARCH32)
9368 {
9369 emit_insn (gen_rtx_SET (VOIDmode,
9370 scratch,
9371 gen_rtx_HIGH (SImode, funexp)));
9372 emit_insn (gen_rtx_SET (VOIDmode,
9373 scratch,
9374 gen_rtx_LO_SUM (SImode, scratch, funexp)));
9375 }
9376 else /* TARGET_ARCH64 */
9377 {
9378 switch (sparc_cmodel)
9379 {
9380 case CM_MEDLOW:
9381 case CM_MEDMID:
9382 /* The destination can serve as a temporary. */
9383 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
9384 break;
9385
9386 case CM_MEDANY:
9387 case CM_EMBMEDANY:
9388 /* The destination cannot serve as a temporary. */
9389 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
9390 start_sequence ();
9391 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
9392 seq = get_insns ();
9393 end_sequence ();
9394 emit_and_preserve (seq, spill_reg, 0);
9395 break;
9396
9397 default:
9398 gcc_unreachable ();
9399 }
9400 }
9401
9402 emit_jump_insn (gen_indirect_jump (scratch));
9403 }
9404
9405 emit_barrier ();
9406
9407 /* Run just enough of rest_of_compilation to get the insns emitted.
9408 There's not really enough bulk here to make other passes such as
9409 instruction scheduling worth while. Note that use_thunk calls
9410 assemble_start_function and assemble_end_function. */
9411 insn = get_insns ();
9412 insn_locators_alloc ();
9413 shorten_branches (insn);
9414 final_start_function (insn, file, 1);
9415 final (insn, file, 1);
9416 final_end_function ();
9417
9418 reload_completed = 0;
9419 epilogue_completed = 0;
9420 }
9421
9422 /* Return true if sparc_output_mi_thunk would be able to output the
9423 assembler code for the thunk function specified by the arguments
9424 it is passed, and false otherwise. */
9425 static bool
9426 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
9427 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
9428 HOST_WIDE_INT vcall_offset,
9429 const_tree function ATTRIBUTE_UNUSED)
9430 {
9431 /* Bound the loop used in the default method above. */
9432 return (vcall_offset >= -32768 || ! fixed_regs[5]);
9433 }
9434
9435 /* How to allocate a 'struct machine_function'. */
9436
9437 static struct machine_function *
9438 sparc_init_machine_status (void)
9439 {
9440 return ggc_alloc_cleared_machine_function ();
9441 }
9442
9443 /* Locate some local-dynamic symbol still in use by this function
9444 so that we can print its name in local-dynamic base patterns. */
9445
9446 static const char *
9447 get_some_local_dynamic_name (void)
9448 {
9449 rtx insn;
9450
9451 if (cfun->machine->some_ld_name)
9452 return cfun->machine->some_ld_name;
9453
9454 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
9455 if (INSN_P (insn)
9456 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
9457 return cfun->machine->some_ld_name;
9458
9459 gcc_unreachable ();
9460 }
9461
9462 static int
9463 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
9464 {
9465 rtx x = *px;
9466
9467 if (x
9468 && GET_CODE (x) == SYMBOL_REF
9469 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
9470 {
9471 cfun->machine->some_ld_name = XSTR (x, 0);
9472 return 1;
9473 }
9474
9475 return 0;
9476 }
9477
9478 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
9479 This is called from dwarf2out.c to emit call frame instructions
9480 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
9481 static void
9482 sparc_dwarf_handle_frame_unspec (const char *label,
9483 rtx pattern ATTRIBUTE_UNUSED,
9484 int index ATTRIBUTE_UNUSED)
9485 {
9486 gcc_assert (index == UNSPECV_SAVEW);
9487 dwarf2out_window_save (label);
9488 }
9489
9490 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9491 We need to emit DTP-relative relocations. */
9492
9493 static void
9494 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
9495 {
9496 switch (size)
9497 {
9498 case 4:
9499 fputs ("\t.word\t%r_tls_dtpoff32(", file);
9500 break;
9501 case 8:
9502 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
9503 break;
9504 default:
9505 gcc_unreachable ();
9506 }
9507 output_addr_const (file, x);
9508 fputs (")", file);
9509 }
9510
9511 /* Do whatever processing is required at the end of a file. */
9512
9513 static void
9514 sparc_file_end (void)
9515 {
9516 /* If we need to emit the special GOT helper function, do so now. */
9517 if (got_helper_rtx)
9518 {
9519 const char *name = XSTR (got_helper_rtx, 0);
9520 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
9521 #ifdef DWARF2_UNWIND_INFO
9522 bool do_cfi;
9523 #endif
9524
9525 if (USE_HIDDEN_LINKONCE)
9526 {
9527 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9528 get_identifier (name),
9529 build_function_type (void_type_node,
9530 void_list_node));
9531 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9532 NULL_TREE, void_type_node);
9533 TREE_STATIC (decl) = 1;
9534 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
9535 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
9536 DECL_VISIBILITY_SPECIFIED (decl) = 1;
9537 allocate_struct_function (decl, true);
9538 cfun->is_thunk = 1;
9539 current_function_decl = decl;
9540 init_varasm_status ();
9541 assemble_start_function (decl, name);
9542 }
9543 else
9544 {
9545 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
9546 switch_to_section (text_section);
9547 if (align > 0)
9548 ASM_OUTPUT_ALIGN (asm_out_file, align);
9549 ASM_OUTPUT_LABEL (asm_out_file, name);
9550 }
9551
9552 #ifdef DWARF2_UNWIND_INFO
9553 do_cfi = dwarf2out_do_cfi_asm ();
9554 if (do_cfi)
9555 fprintf (asm_out_file, "\t.cfi_startproc\n");
9556 #endif
9557 if (flag_delayed_branch)
9558 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
9559 reg_name, reg_name);
9560 else
9561 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
9562 reg_name, reg_name);
9563 #ifdef DWARF2_UNWIND_INFO
9564 if (do_cfi)
9565 fprintf (asm_out_file, "\t.cfi_endproc\n");
9566 #endif
9567 }
9568
9569 if (NEED_INDICATE_EXEC_STACK)
9570 file_end_indicate_exec_stack ();
9571 }
9572
9573 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
9574 /* Implement TARGET_MANGLE_TYPE. */
9575
9576 static const char *
9577 sparc_mangle_type (const_tree type)
9578 {
9579 if (!TARGET_64BIT
9580 && TYPE_MAIN_VARIANT (type) == long_double_type_node
9581 && TARGET_LONG_DOUBLE_128)
9582 return "g";
9583
9584 /* For all other types, use normal C++ mangling. */
9585 return NULL;
9586 }
9587 #endif
9588
9589 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
9590 compare and swap on the word containing the byte or half-word. */
9591
9592 void
9593 sparc_expand_compare_and_swap_12 (rtx result, rtx mem, rtx oldval, rtx newval)
9594 {
9595 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
9596 rtx addr = gen_reg_rtx (Pmode);
9597 rtx off = gen_reg_rtx (SImode);
9598 rtx oldv = gen_reg_rtx (SImode);
9599 rtx newv = gen_reg_rtx (SImode);
9600 rtx oldvalue = gen_reg_rtx (SImode);
9601 rtx newvalue = gen_reg_rtx (SImode);
9602 rtx res = gen_reg_rtx (SImode);
9603 rtx resv = gen_reg_rtx (SImode);
9604 rtx memsi, val, mask, end_label, loop_label, cc;
9605
9606 emit_insn (gen_rtx_SET (VOIDmode, addr,
9607 gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
9608
9609 if (Pmode != SImode)
9610 addr1 = gen_lowpart (SImode, addr1);
9611 emit_insn (gen_rtx_SET (VOIDmode, off,
9612 gen_rtx_AND (SImode, addr1, GEN_INT (3))));
9613
9614 memsi = gen_rtx_MEM (SImode, addr);
9615 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
9616 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
9617
9618 val = force_reg (SImode, memsi);
9619
9620 emit_insn (gen_rtx_SET (VOIDmode, off,
9621 gen_rtx_XOR (SImode, off,
9622 GEN_INT (GET_MODE (mem) == QImode
9623 ? 3 : 2))));
9624
9625 emit_insn (gen_rtx_SET (VOIDmode, off,
9626 gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
9627
9628 if (GET_MODE (mem) == QImode)
9629 mask = force_reg (SImode, GEN_INT (0xff));
9630 else
9631 mask = force_reg (SImode, GEN_INT (0xffff));
9632
9633 emit_insn (gen_rtx_SET (VOIDmode, mask,
9634 gen_rtx_ASHIFT (SImode, mask, off)));
9635
9636 emit_insn (gen_rtx_SET (VOIDmode, val,
9637 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
9638 val)));
9639
9640 oldval = gen_lowpart (SImode, oldval);
9641 emit_insn (gen_rtx_SET (VOIDmode, oldv,
9642 gen_rtx_ASHIFT (SImode, oldval, off)));
9643
9644 newval = gen_lowpart_common (SImode, newval);
9645 emit_insn (gen_rtx_SET (VOIDmode, newv,
9646 gen_rtx_ASHIFT (SImode, newval, off)));
9647
9648 emit_insn (gen_rtx_SET (VOIDmode, oldv,
9649 gen_rtx_AND (SImode, oldv, mask)));
9650
9651 emit_insn (gen_rtx_SET (VOIDmode, newv,
9652 gen_rtx_AND (SImode, newv, mask)));
9653
9654 end_label = gen_label_rtx ();
9655 loop_label = gen_label_rtx ();
9656 emit_label (loop_label);
9657
9658 emit_insn (gen_rtx_SET (VOIDmode, oldvalue,
9659 gen_rtx_IOR (SImode, oldv, val)));
9660
9661 emit_insn (gen_rtx_SET (VOIDmode, newvalue,
9662 gen_rtx_IOR (SImode, newv, val)));
9663
9664 emit_insn (gen_sync_compare_and_swapsi (res, memsi, oldvalue, newvalue));
9665
9666 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
9667
9668 emit_insn (gen_rtx_SET (VOIDmode, resv,
9669 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
9670 res)));
9671
9672 cc = gen_compare_reg_1 (NE, resv, val);
9673 emit_insn (gen_rtx_SET (VOIDmode, val, resv));
9674
9675 /* Use cbranchcc4 to separate the compare and branch! */
9676 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
9677 cc, const0_rtx, loop_label));
9678
9679 emit_label (end_label);
9680
9681 emit_insn (gen_rtx_SET (VOIDmode, res,
9682 gen_rtx_AND (SImode, res, mask)));
9683
9684 emit_insn (gen_rtx_SET (VOIDmode, res,
9685 gen_rtx_LSHIFTRT (SImode, res, off)));
9686
9687 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
9688 }
9689
9690 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
9691
9692 bool
9693 sparc_frame_pointer_required (void)
9694 {
9695 return !(leaf_function_p () && only_leaf_regs_used ());
9696 }
9697
9698 /* The way this is structured, we can't eliminate SFP in favor of SP
9699 if the frame pointer is required: we want to use the SFP->HFP elimination
9700 in that case. But the test in update_eliminables doesn't know we are
9701 assuming below that we only do the former elimination. */
9702
9703 bool
9704 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
9705 {
9706 return (to == HARD_FRAME_POINTER_REGNUM
9707 || !targetm.frame_pointer_required ());
9708 }
9709
9710 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
9711 they won't be allocated. */
9712
9713 static void
9714 sparc_conditional_register_usage (void)
9715 {
9716 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
9717 {
9718 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
9719 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
9720 }
9721 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
9722 /* then honor it. */
9723 if (TARGET_ARCH32 && fixed_regs[5])
9724 fixed_regs[5] = 1;
9725 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
9726 fixed_regs[5] = 0;
9727 if (! TARGET_V9)
9728 {
9729 int regno;
9730 for (regno = SPARC_FIRST_V9_FP_REG;
9731 regno <= SPARC_LAST_V9_FP_REG;
9732 regno++)
9733 fixed_regs[regno] = 1;
9734 /* %fcc0 is used by v8 and v9. */
9735 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
9736 regno <= SPARC_LAST_V9_FCC_REG;
9737 regno++)
9738 fixed_regs[regno] = 1;
9739 }
9740 if (! TARGET_FPU)
9741 {
9742 int regno;
9743 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
9744 fixed_regs[regno] = 1;
9745 }
9746 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
9747 /* then honor it. Likewise with g3 and g4. */
9748 if (fixed_regs[2] == 2)
9749 fixed_regs[2] = ! TARGET_APP_REGS;
9750 if (fixed_regs[3] == 2)
9751 fixed_regs[3] = ! TARGET_APP_REGS;
9752 if (TARGET_ARCH32 && fixed_regs[4] == 2)
9753 fixed_regs[4] = ! TARGET_APP_REGS;
9754 else if (TARGET_CM_EMBMEDANY)
9755 fixed_regs[4] = 1;
9756 else if (fixed_regs[4] == 2)
9757 fixed_regs[4] = 0;
9758 }
9759
9760 #include "gt-sparc.h"