predicates.md: Remove blank lines.
[gcc.git] / gcc / config / sh / sh.c
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
5 Contributed by Steve Chamberlain (sac@cygnus.com).
6 Improved by Jim Wilson (wilson@cygnus.com).
7
8 This file is part of GCC.
9
10 GCC is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
14
15 GCC is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "insn-config.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "flags.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "reload.h"
35 #include "function.h"
36 #include "regs.h"
37 #include "hard-reg-set.h"
38 #include "output.h"
39 #include "insn-attr.h"
40 #include "diagnostic-core.h"
41 #include "recog.h"
42 #include "integrate.h"
43 #include "dwarf2.h"
44 #include "tm_p.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "langhooks.h"
48 #include "basic-block.h"
49 #include "df.h"
50 #include "cfglayout.h"
51 #include "intl.h"
52 #include "sched-int.h"
53 #include "params.h"
54 #include "ggc.h"
55 #include "gimple.h"
56 #include "cfgloop.h"
57 #include "alloc-pool.h"
58 #include "tm-constrs.h"
59 #include "opts.h"
60
61
62 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
63
64 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
65 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
66
67 /* These are some macros to abstract register modes. */
68 #define CONST_OK_FOR_ADD(size) \
69 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
70 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
71 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
72 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
73
74 /* Used to simplify the logic below. Find the attributes wherever
75 they may be. */
76 #define SH_ATTRIBUTES(decl) \
77 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
78 : DECL_ATTRIBUTES (decl) \
79 ? (DECL_ATTRIBUTES (decl)) \
80 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
81
82 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
83 int current_function_interrupt;
84
85 tree sh_deferred_function_attributes;
86 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
87
88 /* Global variables for machine-dependent things. */
89
90 /* Which cpu are we scheduling for. */
91 enum processor_type sh_cpu;
92
93 /* Definitions used in ready queue reordering for first scheduling pass. */
94
95 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
96 static short *regmode_weight[2];
97
98 /* Total SFmode and SImode weights of scheduled insns. */
99 static int curr_regmode_pressure[2];
100
101 /* Number of r0 life regions. */
102 static int r0_life_regions;
103
104 /* If true, skip cycles for Q -> R movement. */
105 static int skip_cycles = 0;
106
107 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
108 and returned from sh_reorder2. */
109 static short cached_can_issue_more;
110
111 /* Unique number for UNSPEC_BBR pattern. */
112 static unsigned int unspec_bbr_uid = 1;
113
114 /* Provides the class number of the smallest class containing
115 reg number. */
116
117 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
118 {
119 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
152 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
153 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
154 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
155 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
156 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
157 GENERAL_REGS, GENERAL_REGS,
158 };
159
160 char sh_register_names[FIRST_PSEUDO_REGISTER] \
161 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
162
163 char sh_additional_register_names[ADDREGNAMES_SIZE] \
164 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
165 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
166
167 int assembler_dialect;
168
169 static bool shmedia_space_reserved_for_target_registers;
170
171 static void split_branches (rtx);
172 static int branch_dest (rtx);
173 static void force_into (rtx, rtx);
174 static void print_slot (rtx);
175 static rtx add_constant (rtx, enum machine_mode, rtx);
176 static void dump_table (rtx, rtx);
177 static int hi_const (rtx);
178 static int broken_move (rtx);
179 static int mova_p (rtx);
180 static rtx find_barrier (int, rtx, rtx);
181 static int noncall_uses_reg (rtx, rtx, rtx *);
182 static rtx gen_block_redirect (rtx, int, int);
183 static void sh_reorg (void);
184 static void sh_option_override (void);
185 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
186 static rtx frame_insn (rtx);
187 static rtx push (int);
188 static void pop (int);
189 static void push_regs (HARD_REG_SET *, int);
190 static int calc_live_regs (HARD_REG_SET *);
191 static HOST_WIDE_INT rounded_frame_size (int);
192 static bool sh_frame_pointer_required (void);
193 static rtx mark_constant_pool_use (rtx);
194 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
195 static tree sh_handle_resbank_handler_attribute (tree *, tree,
196 tree, int, bool *);
197 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
198 tree, int, bool *);
199 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
200 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
201 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
202 static void sh_print_operand (FILE *, rtx, int);
203 static void sh_print_operand_address (FILE *, rtx);
204 static bool sh_print_operand_punct_valid_p (unsigned char code);
205 static bool sh_asm_output_addr_const_extra (FILE *file, rtx x);
206 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
207 static void sh_insert_attributes (tree, tree *);
208 static const char *sh_check_pch_target_flags (int);
209 static int sh_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
210 static int sh_adjust_cost (rtx, rtx, rtx, int);
211 static int sh_issue_rate (void);
212 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
213 static short find_set_regmode_weight (rtx, enum machine_mode);
214 static short find_insn_regmode_weight (rtx, enum machine_mode);
215 static void find_regmode_weight (basic_block, enum machine_mode);
216 static int find_r0_life_regions (basic_block);
217 static void sh_md_init_global (FILE *, int, int);
218 static void sh_md_finish_global (FILE *, int);
219 static int rank_for_reorder (const void *, const void *);
220 static void swap_reorder (rtx *, int);
221 static void ready_reorder (rtx *, int);
222 static short high_pressure (enum machine_mode);
223 static int sh_reorder (FILE *, int, rtx *, int *, int);
224 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
225 static void sh_md_init (FILE *, int, int);
226 static int sh_variable_issue (FILE *, int, rtx, int);
227
228 static bool sh_function_ok_for_sibcall (tree, tree);
229
230 static bool sh_cannot_modify_jumps_p (void);
231 static reg_class_t sh_target_reg_class (void);
232 static bool sh_optimize_target_register_callee_saved (bool);
233 static bool sh_ms_bitfield_layout_p (const_tree);
234
235 static void sh_init_builtins (void);
236 static tree sh_builtin_decl (unsigned, bool);
237 static void sh_media_init_builtins (void);
238 static tree sh_media_builtin_decl (unsigned, bool);
239 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
240 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
241 static void sh_file_start (void);
242 static int flow_dependent_p (rtx, rtx);
243 static void flow_dependent_p_1 (rtx, const_rtx, void *);
244 static int shiftcosts (rtx);
245 static int and_xor_ior_costs (rtx, int);
246 static int addsubcosts (rtx);
247 static int multcosts (rtx);
248 static bool unspec_caller_rtx_p (rtx);
249 static bool sh_cannot_copy_insn_p (rtx);
250 static bool sh_rtx_costs (rtx, int, int, int, int *, bool);
251 static int sh_address_cost (rtx, bool);
252 static int sh_pr_n_sets (void);
253 static rtx sh_allocate_initial_value (rtx);
254 static reg_class_t sh_preferred_reload_class (rtx, reg_class_t);
255 static reg_class_t sh_secondary_reload (bool, rtx, reg_class_t,
256 enum machine_mode,
257 struct secondary_reload_info *);
258 static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
259 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
260 static rtx sh_delegitimize_address (rtx);
261 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
262 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
263 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
264 static int scavenge_reg (HARD_REG_SET *s);
265 struct save_schedule_s;
266 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
267 struct save_schedule_s *, int);
268
269 static rtx sh_struct_value_rtx (tree, int);
270 static rtx sh_function_value (const_tree, const_tree, bool);
271 static bool sh_function_value_regno_p (const unsigned int);
272 static rtx sh_libcall_value (enum machine_mode, const_rtx);
273 static bool sh_return_in_memory (const_tree, const_tree);
274 static rtx sh_builtin_saveregs (void);
275 static void sh_setup_incoming_varargs (cumulative_args_t, enum machine_mode, tree, int *, int);
276 static bool sh_strict_argument_naming (cumulative_args_t);
277 static bool sh_pretend_outgoing_varargs_named (cumulative_args_t);
278 static tree sh_build_builtin_va_list (void);
279 static void sh_va_start (tree, rtx);
280 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
281 static bool sh_promote_prototypes (const_tree);
282 static enum machine_mode sh_promote_function_mode (const_tree type,
283 enum machine_mode,
284 int *punsignedp,
285 const_tree funtype,
286 int for_return);
287 static bool sh_pass_by_reference (cumulative_args_t, enum machine_mode,
288 const_tree, bool);
289 static bool sh_callee_copies (cumulative_args_t, enum machine_mode,
290 const_tree, bool);
291 static int sh_arg_partial_bytes (cumulative_args_t, enum machine_mode,
292 tree, bool);
293 static void sh_function_arg_advance (cumulative_args_t, enum machine_mode,
294 const_tree, bool);
295 static rtx sh_function_arg (cumulative_args_t, enum machine_mode,
296 const_tree, bool);
297 static bool sh_scalar_mode_supported_p (enum machine_mode);
298 static int sh_dwarf_calling_convention (const_tree);
299 static void sh_encode_section_info (tree, rtx, int);
300 static int sh2a_function_vector_p (tree);
301 static void sh_trampoline_init (rtx, tree, rtx);
302 static rtx sh_trampoline_adjust_address (rtx);
303 static void sh_conditional_register_usage (void);
304 static bool sh_legitimate_constant_p (enum machine_mode, rtx);
305
306 static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED;
307 \f
308 static const struct attribute_spec sh_attribute_table[] =
309 {
310 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
311 affects_type_identity } */
312 { "interrupt_handler", 0, 0, true, false, false,
313 sh_handle_interrupt_handler_attribute, false },
314 { "sp_switch", 1, 1, true, false, false,
315 sh_handle_sp_switch_attribute, false },
316 { "trap_exit", 1, 1, true, false, false,
317 sh_handle_trap_exit_attribute, false },
318 { "renesas", 0, 0, false, true, false,
319 sh_handle_renesas_attribute, false },
320 { "trapa_handler", 0, 0, true, false, false,
321 sh_handle_interrupt_handler_attribute, false },
322 { "nosave_low_regs", 0, 0, true, false, false,
323 sh_handle_interrupt_handler_attribute, false },
324 { "resbank", 0, 0, true, false, false,
325 sh_handle_resbank_handler_attribute, false },
326 { "function_vector", 1, 1, true, false, false,
327 sh2a_handle_function_vector_handler_attribute, false },
328 { NULL, 0, 0, false, false, false, NULL, false }
329 };
330 \f
331 /* Initialize the GCC target structure. */
332 #undef TARGET_ATTRIBUTE_TABLE
333 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
334
335 /* The next two are used for debug info when compiling with -gdwarf. */
336 #undef TARGET_ASM_UNALIGNED_HI_OP
337 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
338 #undef TARGET_ASM_UNALIGNED_SI_OP
339 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
340
341 /* These are NULLed out on non-SH5 in TARGET_OPTION_OVERRIDE. */
342 #undef TARGET_ASM_UNALIGNED_DI_OP
343 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
344 #undef TARGET_ASM_ALIGNED_DI_OP
345 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
346
347 #undef TARGET_OPTION_OVERRIDE
348 #define TARGET_OPTION_OVERRIDE sh_option_override
349
350 #undef TARGET_PRINT_OPERAND
351 #define TARGET_PRINT_OPERAND sh_print_operand
352 #undef TARGET_PRINT_OPERAND_ADDRESS
353 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
354 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
355 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
356 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
357 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra
358
359 #undef TARGET_ASM_FUNCTION_EPILOGUE
360 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
361
362 #undef TARGET_ASM_OUTPUT_MI_THUNK
363 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
364
365 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
366 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
367
368 #undef TARGET_ASM_FILE_START
369 #define TARGET_ASM_FILE_START sh_file_start
370 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
371 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
372
373 #undef TARGET_REGISTER_MOVE_COST
374 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
375
376 #undef TARGET_INSERT_ATTRIBUTES
377 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
378
379 #undef TARGET_SCHED_ADJUST_COST
380 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
381
382 #undef TARGET_SCHED_ISSUE_RATE
383 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
384
385 /* The next 5 hooks have been implemented for reenabling sched1. With the
386 help of these macros we are limiting the movement of insns in sched1 to
387 reduce the register pressure. The overall idea is to keep count of SImode
388 and SFmode regs required by already scheduled insns. When these counts
389 cross some threshold values; give priority to insns that free registers.
390 The insn that frees registers is most likely to be the insn with lowest
391 LUID (original insn order); but such an insn might be there in the stalled
392 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
393 upto a max of 8 cycles so that such insns may move from Q -> R.
394
395 The description of the hooks are as below:
396
397 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
398 scheduler; it is called inside the sched_init function just after
399 find_insn_reg_weights function call. It is used to calculate the SImode
400 and SFmode weights of insns of basic blocks; much similar to what
401 find_insn_reg_weights does.
402 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
403
404 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
405 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
406 (Q)->(R).
407
408 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
409 high; reorder the ready queue so that the insn with lowest LUID will be
410 issued next.
411
412 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
413 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
414
415 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
416 can be returned from TARGET_SCHED_REORDER2.
417
418 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
419
420 #undef TARGET_SCHED_DFA_NEW_CYCLE
421 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
422
423 #undef TARGET_SCHED_INIT_GLOBAL
424 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
425
426 #undef TARGET_SCHED_FINISH_GLOBAL
427 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
428
429 #undef TARGET_SCHED_VARIABLE_ISSUE
430 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
431
432 #undef TARGET_SCHED_REORDER
433 #define TARGET_SCHED_REORDER sh_reorder
434
435 #undef TARGET_SCHED_REORDER2
436 #define TARGET_SCHED_REORDER2 sh_reorder2
437
438 #undef TARGET_SCHED_INIT
439 #define TARGET_SCHED_INIT sh_md_init
440
441 #undef TARGET_DELEGITIMIZE_ADDRESS
442 #define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address
443
444 #undef TARGET_LEGITIMIZE_ADDRESS
445 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
446
447 #undef TARGET_CANNOT_MODIFY_JUMPS_P
448 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
449 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
450 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
451 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
452 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
453 sh_optimize_target_register_callee_saved
454
455 #undef TARGET_MS_BITFIELD_LAYOUT_P
456 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
457
458 #undef TARGET_INIT_BUILTINS
459 #define TARGET_INIT_BUILTINS sh_init_builtins
460 #undef TARGET_BUILTIN_DECL
461 #define TARGET_BUILTIN_DECL sh_builtin_decl
462 #undef TARGET_EXPAND_BUILTIN
463 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
464
465 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
466 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
467
468 #undef TARGET_CANNOT_COPY_INSN_P
469 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
470 #undef TARGET_RTX_COSTS
471 #define TARGET_RTX_COSTS sh_rtx_costs
472 #undef TARGET_ADDRESS_COST
473 #define TARGET_ADDRESS_COST sh_address_cost
474 #undef TARGET_ALLOCATE_INITIAL_VALUE
475 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
476
477 #undef TARGET_MACHINE_DEPENDENT_REORG
478 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
479
480 #undef TARGET_DWARF_REGISTER_SPAN
481 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
482
483 #ifdef HAVE_AS_TLS
484 #undef TARGET_HAVE_TLS
485 #define TARGET_HAVE_TLS true
486 #endif
487
488 #undef TARGET_PROMOTE_PROTOTYPES
489 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
490 #undef TARGET_PROMOTE_FUNCTION_MODE
491 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
492
493 #undef TARGET_FUNCTION_VALUE
494 #define TARGET_FUNCTION_VALUE sh_function_value
495 #undef TARGET_FUNCTION_VALUE_REGNO_P
496 #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
497 #undef TARGET_LIBCALL_VALUE
498 #define TARGET_LIBCALL_VALUE sh_libcall_value
499 #undef TARGET_STRUCT_VALUE_RTX
500 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
501 #undef TARGET_RETURN_IN_MEMORY
502 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
503
504 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
505 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
506 #undef TARGET_SETUP_INCOMING_VARARGS
507 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
508 #undef TARGET_STRICT_ARGUMENT_NAMING
509 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
510 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
511 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
512 #undef TARGET_MUST_PASS_IN_STACK
513 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
514 #undef TARGET_PASS_BY_REFERENCE
515 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
516 #undef TARGET_CALLEE_COPIES
517 #define TARGET_CALLEE_COPIES sh_callee_copies
518 #undef TARGET_ARG_PARTIAL_BYTES
519 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
520 #undef TARGET_FUNCTION_ARG
521 #define TARGET_FUNCTION_ARG sh_function_arg
522 #undef TARGET_FUNCTION_ARG_ADVANCE
523 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
524
525 #undef TARGET_BUILD_BUILTIN_VA_LIST
526 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
527 #undef TARGET_EXPAND_BUILTIN_VA_START
528 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
529 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
530 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
531
532 #undef TARGET_SCALAR_MODE_SUPPORTED_P
533 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
534 #undef TARGET_VECTOR_MODE_SUPPORTED_P
535 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
536
537 #undef TARGET_CHECK_PCH_TARGET_FLAGS
538 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
539
540 #undef TARGET_DWARF_CALLING_CONVENTION
541 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
542
543 #undef TARGET_FRAME_POINTER_REQUIRED
544 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
545
546 /* Return regmode weight for insn. */
547 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
548
549 /* Return current register pressure for regmode. */
550 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
551
552 #undef TARGET_ENCODE_SECTION_INFO
553 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
554
555 #undef TARGET_SECONDARY_RELOAD
556 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
557
558 #undef TARGET_PREFERRED_RELOAD_CLASS
559 #define TARGET_PREFERRED_RELOAD_CLASS sh_preferred_reload_class
560
561 #undef TARGET_CONDITIONAL_REGISTER_USAGE
562 #define TARGET_CONDITIONAL_REGISTER_USAGE sh_conditional_register_usage
563
564 #undef TARGET_LEGITIMATE_ADDRESS_P
565 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
566
567 #undef TARGET_TRAMPOLINE_INIT
568 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
569 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
570 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
571
572 #undef TARGET_LEGITIMATE_CONSTANT_P
573 #define TARGET_LEGITIMATE_CONSTANT_P sh_legitimate_constant_p
574
575 /* Machine-specific symbol_ref flags. */
576 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
577
578 struct gcc_target targetm = TARGET_INITIALIZER;
579 \f
580 /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override
581 various options, and do some machine dependent initialization. */
582 static void
583 sh_option_override (void)
584 {
585 int regno;
586
587 SUBTARGET_OVERRIDE_OPTIONS;
588 if (optimize > 1 && !optimize_size)
589 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
590 if (flag_finite_math_only == 2)
591 flag_finite_math_only
592 = !flag_signaling_nans && TARGET_SH2E && ! TARGET_IEEE;
593 if (TARGET_SH2E && !flag_finite_math_only)
594 target_flags |= MASK_IEEE;
595 sh_cpu = PROCESSOR_SH1;
596 assembler_dialect = 0;
597 if (TARGET_SH2)
598 sh_cpu = PROCESSOR_SH2;
599 if (TARGET_SH2E)
600 sh_cpu = PROCESSOR_SH2E;
601 if (TARGET_SH2A)
602 sh_cpu = PROCESSOR_SH2A;
603 if (TARGET_SH3)
604 sh_cpu = PROCESSOR_SH3;
605 if (TARGET_SH3E)
606 sh_cpu = PROCESSOR_SH3E;
607 if (TARGET_SH4)
608 {
609 assembler_dialect = 1;
610 sh_cpu = PROCESSOR_SH4;
611 }
612 if (TARGET_SH4A_ARCH)
613 {
614 assembler_dialect = 1;
615 sh_cpu = PROCESSOR_SH4A;
616 }
617 if (TARGET_SH5)
618 {
619 sh_cpu = PROCESSOR_SH5;
620 target_flags |= MASK_ALIGN_DOUBLE;
621 if (TARGET_SHMEDIA_FPU)
622 target_flags |= MASK_FMOVD;
623 if (TARGET_SHMEDIA)
624 {
625 /* There are no delay slots on SHmedia. */
626 flag_delayed_branch = 0;
627 /* Relaxation isn't yet supported for SHmedia */
628 target_flags &= ~MASK_RELAX;
629 /* After reload, if conversion does little good but can cause
630 ICEs:
631 - find_if_block doesn't do anything for SH because we don't
632 have conditional execution patterns. (We use conditional
633 move patterns, which are handled differently, and only
634 before reload).
635 - find_cond_trap doesn't do anything for the SH because we
636 don't have conditional traps.
637 - find_if_case_1 uses redirect_edge_and_branch_force in
638 the only path that does an optimization, and this causes
639 an ICE when branch targets are in registers.
640 - find_if_case_2 doesn't do anything for the SHmedia after
641 reload except when it can redirect a tablejump - and
642 that's rather rare. */
643 flag_if_conversion2 = 0;
644 if (! strcmp (sh_div_str, "call"))
645 sh_div_strategy = SH_DIV_CALL;
646 else if (! strcmp (sh_div_str, "call2"))
647 sh_div_strategy = SH_DIV_CALL2;
648 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
649 sh_div_strategy = SH_DIV_FP;
650 else if (! strcmp (sh_div_str, "inv"))
651 sh_div_strategy = SH_DIV_INV;
652 else if (! strcmp (sh_div_str, "inv:minlat"))
653 sh_div_strategy = SH_DIV_INV_MINLAT;
654 else if (! strcmp (sh_div_str, "inv20u"))
655 sh_div_strategy = SH_DIV_INV20U;
656 else if (! strcmp (sh_div_str, "inv20l"))
657 sh_div_strategy = SH_DIV_INV20L;
658 else if (! strcmp (sh_div_str, "inv:call2"))
659 sh_div_strategy = SH_DIV_INV_CALL2;
660 else if (! strcmp (sh_div_str, "inv:call"))
661 sh_div_strategy = SH_DIV_INV_CALL;
662 else if (! strcmp (sh_div_str, "inv:fp"))
663 {
664 if (TARGET_FPU_ANY)
665 sh_div_strategy = SH_DIV_INV_FP;
666 else
667 sh_div_strategy = SH_DIV_INV;
668 }
669 TARGET_CBRANCHDI4 = 0;
670 /* Assembler CFI isn't yet fully supported for SHmedia. */
671 flag_dwarf2_cfi_asm = 0;
672 }
673 }
674 else
675 {
676 /* Only the sh64-elf assembler fully supports .quad properly. */
677 targetm.asm_out.aligned_op.di = NULL;
678 targetm.asm_out.unaligned_op.di = NULL;
679 }
680 if (TARGET_SH1)
681 {
682 if (! strcmp (sh_div_str, "call-div1"))
683 sh_div_strategy = SH_DIV_CALL_DIV1;
684 else if (! strcmp (sh_div_str, "call-fp")
685 && (TARGET_FPU_DOUBLE
686 || (TARGET_HARD_SH4 && TARGET_SH2E)
687 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
688 sh_div_strategy = SH_DIV_CALL_FP;
689 else if (! strcmp (sh_div_str, "call-table") && TARGET_SH2)
690 sh_div_strategy = SH_DIV_CALL_TABLE;
691 else
692 /* Pick one that makes most sense for the target in general.
693 It is not much good to use different functions depending
694 on -Os, since then we'll end up with two different functions
695 when some of the code is compiled for size, and some for
696 speed. */
697
698 /* SH4 tends to emphasize speed. */
699 if (TARGET_HARD_SH4)
700 sh_div_strategy = SH_DIV_CALL_TABLE;
701 /* These have their own way of doing things. */
702 else if (TARGET_SH2A)
703 sh_div_strategy = SH_DIV_INTRINSIC;
704 /* ??? Should we use the integer SHmedia function instead? */
705 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
706 sh_div_strategy = SH_DIV_CALL_FP;
707 /* SH1 .. SH3 cores often go into small-footprint systems, so
708 default to the smallest implementation available. */
709 else if (TARGET_SH2) /* ??? EXPERIMENTAL */
710 sh_div_strategy = SH_DIV_CALL_TABLE;
711 else
712 sh_div_strategy = SH_DIV_CALL_DIV1;
713 }
714 if (!TARGET_SH1)
715 TARGET_PRETEND_CMOVE = 0;
716 if (sh_divsi3_libfunc[0])
717 ; /* User supplied - leave it alone. */
718 else if (TARGET_DIVIDE_CALL_FP)
719 sh_divsi3_libfunc = "__sdivsi3_i4";
720 else if (TARGET_DIVIDE_CALL_TABLE)
721 sh_divsi3_libfunc = "__sdivsi3_i4i";
722 else if (TARGET_SH5)
723 sh_divsi3_libfunc = "__sdivsi3_1";
724 else
725 sh_divsi3_libfunc = "__sdivsi3";
726 if (sh_branch_cost == -1)
727 {
728 sh_branch_cost = 1;
729
730 /* The SH1 does not have delay slots, hence we get a pipeline stall
731 at every branch. The SH4 is superscalar, so the single delay slot
732 is not sufficient to keep both pipelines filled. */
733 if (! TARGET_SH2 || TARGET_HARD_SH4)
734 sh_branch_cost = 2;
735 }
736
737 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
738 if (! VALID_REGISTER_P (regno))
739 sh_register_names[regno][0] = '\0';
740
741 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
742 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
743 sh_additional_register_names[regno][0] = '\0';
744
745 flag_omit_frame_pointer = (PREFERRED_DEBUGGING_TYPE == DWARF2_DEBUG);
746
747 if ((flag_pic && ! TARGET_PREFERGOT)
748 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
749 flag_no_function_cse = 1;
750
751 if (targetm.small_register_classes_for_mode_p (VOIDmode)) \
752 {
753 /* Never run scheduling before reload, since that can
754 break global alloc, and generates slower code anyway due
755 to the pressure on R0. */
756 /* Enable sched1 for SH4 if the user explicitly requests.
757 When sched1 is enabled, the ready queue will be reordered by
758 the target hooks if pressure is high. We can not do this for
759 PIC, SH3 and lower as they give spill failures for R0. */
760 if (!TARGET_HARD_SH4 || flag_pic)
761 flag_schedule_insns = 0;
762 /* ??? Current exception handling places basic block boundaries
763 after call_insns. It causes the high pressure on R0 and gives
764 spill failures for R0 in reload. See PR 22553 and the thread
765 on gcc-patches
766 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
767 else if (flag_exceptions)
768 {
769 if (flag_schedule_insns && global_options_set.x_flag_schedule_insns)
770 warning (0, "ignoring -fschedule-insns because of exception handling bug");
771 flag_schedule_insns = 0;
772 }
773 else if (flag_schedule_insns
774 && !global_options_set.x_flag_schedule_insns)
775 flag_schedule_insns = 0;
776 }
777
778 if ((target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) == 0)
779 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
780
781 /* Unwind info is not correct around the CFG unless either a frame
782 pointer is present or M_A_O_A is set. Fixing this requires rewriting
783 unwind info generation to be aware of the CFG and propagating states
784 around edges. */
785 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
786 || flag_exceptions || flag_non_call_exceptions)
787 && flag_omit_frame_pointer
788 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
789 {
790 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
791 warning (0, "unwind tables currently require either a frame pointer "
792 "or -maccumulate-outgoing-args for correctness");
793 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
794 }
795
796 /* Unwinding with -freorder-blocks-and-partition does not work on this
797 architecture, because it requires far jumps to label crossing between
798 hot/cold sections which are rejected on this architecture. */
799 if (flag_reorder_blocks_and_partition)
800 {
801 if (flag_exceptions)
802 {
803 inform (input_location,
804 "-freorder-blocks-and-partition does not work with "
805 "exceptions on this architecture");
806 flag_reorder_blocks_and_partition = 0;
807 flag_reorder_blocks = 1;
808 }
809 else if (flag_unwind_tables)
810 {
811 inform (input_location,
812 "-freorder-blocks-and-partition does not support unwind "
813 "info on this architecture");
814 flag_reorder_blocks_and_partition = 0;
815 flag_reorder_blocks = 1;
816 }
817 }
818
819 if (align_loops == 0)
820 align_loops = 1 << (TARGET_SH5 ? 3 : 2);
821 if (align_jumps == 0)
822 align_jumps = 1 << CACHE_LOG;
823 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
824 align_jumps = TARGET_SHMEDIA ? 4 : 2;
825
826 /* Allocation boundary (in *bytes*) for the code of a function.
827 SH1: 32 bit alignment is faster, because instructions are always
828 fetched as a pair from a longword boundary.
829 SH2 .. SH5 : align to cache line start. */
830 if (align_functions == 0)
831 align_functions
832 = optimize_size ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
833 /* The linker relaxation code breaks when a function contains
834 alignments that are larger than that at the start of a
835 compilation unit. */
836 if (TARGET_RELAX)
837 {
838 int min_align
839 = align_loops > align_jumps ? align_loops : align_jumps;
840
841 /* Also take possible .long constants / mova tables int account. */
842 if (min_align < 4)
843 min_align = 4;
844 if (align_functions < min_align)
845 align_functions = min_align;
846 }
847
848 if (sh_fixed_range_str)
849 sh_fix_range (sh_fixed_range_str);
850
851 /* This target defaults to strict volatile bitfields. */
852 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2))
853 flag_strict_volatile_bitfields = 1;
854 }
855 \f
856 /* Print the operand address in x to the stream. */
857
858 static void
859 sh_print_operand_address (FILE *stream, rtx x)
860 {
861 switch (GET_CODE (x))
862 {
863 case REG:
864 case SUBREG:
865 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
866 break;
867
868 case PLUS:
869 {
870 rtx base = XEXP (x, 0);
871 rtx index = XEXP (x, 1);
872
873 switch (GET_CODE (index))
874 {
875 case CONST_INT:
876 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
877 reg_names[true_regnum (base)]);
878 break;
879
880 case REG:
881 case SUBREG:
882 {
883 int base_num = true_regnum (base);
884 int index_num = true_regnum (index);
885
886 fprintf (stream, "@(r0,%s)",
887 reg_names[MAX (base_num, index_num)]);
888 break;
889 }
890
891 default:
892 gcc_unreachable ();
893 }
894 }
895 break;
896
897 case PRE_DEC:
898 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
899 break;
900
901 case POST_INC:
902 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
903 break;
904
905 default:
906 x = mark_constant_pool_use (x);
907 output_addr_const (stream, x);
908 break;
909 }
910 }
911
912 /* Print operand x (an rtx) in assembler syntax to file stream
913 according to modifier code.
914
915 '.' print a .s if insn needs delay slot
916 ',' print LOCAL_LABEL_PREFIX
917 '@' print trap, rte or rts depending upon pragma interruptness
918 '#' output a nop if there is nothing to put in the delay slot
919 ''' print likelihood suffix (/u for unlikely).
920 '>' print branch target if -fverbose-asm
921 'O' print a constant without the #
922 'R' print the LSW of a dp value - changes if in little endian
923 'S' print the MSW of a dp value - changes if in little endian
924 'T' print the next word of a dp value - same as 'R' in big endian mode.
925 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
926 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
927 'N' print 'r63' if the operand is (const_int 0).
928 'd' print a V2SF reg as dN instead of fpN.
929 'm' print a pair `base,offset' or `base,index', for LD and ST.
930 'U' Likewise for {LD,ST}{HI,LO}.
931 'V' print the position of a single bit set.
932 'W' print the position of a single bit cleared.
933 't' print a memory address which is a register.
934 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
935 'o' output an operator. */
936
937 static void
938 sh_print_operand (FILE *stream, rtx x, int code)
939 {
940 int regno;
941 enum machine_mode mode;
942
943 switch (code)
944 {
945 tree trapa_attr;
946
947 case '.':
948 if (final_sequence
949 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
950 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
951 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
952 break;
953 case ',':
954 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
955 break;
956 case '@':
957 trapa_attr = lookup_attribute ("trap_exit",
958 DECL_ATTRIBUTES (current_function_decl));
959 if (trapa_attr)
960 fprintf (stream, "trapa #%ld",
961 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
962 else if (sh_cfun_interrupt_handler_p ())
963 {
964 if (sh_cfun_resbank_handler_p ())
965 fprintf (stream, "resbank\n");
966 fprintf (stream, "rte");
967 }
968 else
969 fprintf (stream, "rts");
970 break;
971 case '#':
972 /* Output a nop if there's nothing in the delay slot. */
973 if (dbr_sequence_length () == 0)
974 fprintf (stream, "\n\tnop");
975 break;
976 case '\'':
977 {
978 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
979
980 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
981 fputs ("/u", stream);
982 break;
983 }
984 case '>':
985 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
986 {
987 fputs ("\t! target: ", stream);
988 output_addr_const (stream, JUMP_LABEL (current_output_insn));
989 }
990 break;
991 case 'O':
992 x = mark_constant_pool_use (x);
993 output_addr_const (stream, x);
994 break;
995 /* N.B.: %R / %S / %T adjust memory addresses by four.
996 For SHMEDIA, that means they can be used to access the first and
997 second 32 bit part of a 64 bit (or larger) value that
998 might be held in floating point registers or memory.
999 While they can be used to access 64 bit parts of a larger value
1000 held in general purpose registers, that won't work with memory -
1001 neither for fp registers, since the frxx names are used. */
1002 case 'R':
1003 if (REG_P (x) || GET_CODE (x) == SUBREG)
1004 {
1005 regno = true_regnum (x);
1006 regno += FP_REGISTER_P (regno) ? 1 : LSW;
1007 fputs (reg_names[regno], (stream));
1008 }
1009 else if (MEM_P (x))
1010 {
1011 x = adjust_address (x, SImode, 4 * LSW);
1012 sh_print_operand_address (stream, XEXP (x, 0));
1013 }
1014 else
1015 {
1016 rtx sub = NULL_RTX;
1017
1018 mode = GET_MODE (x);
1019 if (mode == VOIDmode)
1020 mode = DImode;
1021 if (GET_MODE_SIZE (mode) >= 8)
1022 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
1023 if (sub)
1024 sh_print_operand (stream, sub, 0);
1025 else
1026 output_operand_lossage ("invalid operand to %%R");
1027 }
1028 break;
1029 case 'S':
1030 if (REG_P (x) || GET_CODE (x) == SUBREG)
1031 {
1032 regno = true_regnum (x);
1033 regno += FP_REGISTER_P (regno) ? 0 : MSW;
1034 fputs (reg_names[regno], (stream));
1035 }
1036 else if (MEM_P (x))
1037 {
1038 x = adjust_address (x, SImode, 4 * MSW);
1039 sh_print_operand_address (stream, XEXP (x, 0));
1040 }
1041 else
1042 {
1043 rtx sub = NULL_RTX;
1044
1045 mode = GET_MODE (x);
1046 if (mode == VOIDmode)
1047 mode = DImode;
1048 if (GET_MODE_SIZE (mode) >= 8)
1049 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
1050 if (sub)
1051 sh_print_operand (stream, sub, 0);
1052 else
1053 output_operand_lossage ("invalid operand to %%S");
1054 }
1055 break;
1056 case 'T':
1057 /* Next word of a double. */
1058 switch (GET_CODE (x))
1059 {
1060 case REG:
1061 fputs (reg_names[REGNO (x) + 1], (stream));
1062 break;
1063 case MEM:
1064 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1065 && GET_CODE (XEXP (x, 0)) != POST_INC)
1066 x = adjust_address (x, SImode, 4);
1067 sh_print_operand_address (stream, XEXP (x, 0));
1068 break;
1069 default:
1070 break;
1071 }
1072 break;
1073
1074 case 't':
1075 gcc_assert (MEM_P (x));
1076 x = XEXP (x, 0);
1077 switch (GET_CODE (x))
1078 {
1079 case REG:
1080 case SUBREG:
1081 sh_print_operand (stream, x, 0);
1082 break;
1083 default:
1084 break;
1085 }
1086 break;
1087
1088 case 'o':
1089 switch (GET_CODE (x))
1090 {
1091 case PLUS: fputs ("add", stream); break;
1092 case MINUS: fputs ("sub", stream); break;
1093 case MULT: fputs ("mul", stream); break;
1094 case DIV: fputs ("div", stream); break;
1095 case EQ: fputs ("eq", stream); break;
1096 case NE: fputs ("ne", stream); break;
1097 case GT: case LT: fputs ("gt", stream); break;
1098 case GE: case LE: fputs ("ge", stream); break;
1099 case GTU: case LTU: fputs ("gtu", stream); break;
1100 case GEU: case LEU: fputs ("geu", stream); break;
1101 default:
1102 break;
1103 }
1104 break;
1105 case 'M':
1106 if (TARGET_SHMEDIA)
1107 {
1108 if (MEM_P (x)
1109 && GET_CODE (XEXP (x, 0)) == PLUS
1110 && (REG_P (XEXP (XEXP (x, 0), 1))
1111 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1112 fputc ('x', stream);
1113 }
1114 else
1115 {
1116 if (MEM_P (x))
1117 {
1118 switch (GET_MODE (x))
1119 {
1120 case QImode: fputs (".b", stream); break;
1121 case HImode: fputs (".w", stream); break;
1122 case SImode: fputs (".l", stream); break;
1123 case SFmode: fputs (".s", stream); break;
1124 case DFmode: fputs (".d", stream); break;
1125 default: gcc_unreachable ();
1126 }
1127 }
1128 }
1129 break;
1130
1131 case 'm':
1132 gcc_assert (MEM_P (x));
1133 x = XEXP (x, 0);
1134 /* Fall through. */
1135 case 'U':
1136 switch (GET_CODE (x))
1137 {
1138 case REG:
1139 case SUBREG:
1140 sh_print_operand (stream, x, 0);
1141 fputs (", 0", stream);
1142 break;
1143
1144 case PLUS:
1145 sh_print_operand (stream, XEXP (x, 0), 0);
1146 fputs (", ", stream);
1147 sh_print_operand (stream, XEXP (x, 1), 0);
1148 break;
1149
1150 default:
1151 gcc_unreachable ();
1152 }
1153 break;
1154
1155 case 'V':
1156 {
1157 int num = exact_log2 (INTVAL (x));
1158 gcc_assert (num >= 0);
1159 fprintf (stream, "#%d", num);
1160 }
1161 break;
1162
1163 case 'W':
1164 {
1165 int num = exact_log2 (~INTVAL (x));
1166 gcc_assert (num >= 0);
1167 fprintf (stream, "#%d", num);
1168 }
1169 break;
1170
1171 case 'd':
1172 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1173
1174 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1175 break;
1176
1177 case 'N':
1178 if (x == CONST0_RTX (GET_MODE (x)))
1179 {
1180 fprintf ((stream), "r63");
1181 break;
1182 }
1183 goto default_output;
1184 case 'u':
1185 if (CONST_INT_P (x))
1186 {
1187 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1188 break;
1189 }
1190 /* Fall through. */
1191
1192 default_output:
1193 default:
1194 regno = 0;
1195 mode = GET_MODE (x);
1196
1197 switch (GET_CODE (x))
1198 {
1199 case TRUNCATE:
1200 {
1201 rtx inner = XEXP (x, 0);
1202 int offset = 0;
1203 enum machine_mode inner_mode;
1204
1205 /* We might see SUBREGs with vector mode registers inside. */
1206 if (GET_CODE (inner) == SUBREG
1207 && (GET_MODE_SIZE (GET_MODE (inner))
1208 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1209 && subreg_lowpart_p (inner))
1210 inner = SUBREG_REG (inner);
1211 if (CONST_INT_P (inner))
1212 {
1213 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1214 goto default_output;
1215 }
1216 inner_mode = GET_MODE (inner);
1217 if (GET_CODE (inner) == SUBREG
1218 && (GET_MODE_SIZE (GET_MODE (inner))
1219 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1220 && REG_P (SUBREG_REG (inner)))
1221 {
1222 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1223 GET_MODE (SUBREG_REG (inner)),
1224 SUBREG_BYTE (inner),
1225 GET_MODE (inner));
1226 inner = SUBREG_REG (inner);
1227 }
1228 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1229 abort ();
1230 /* Floating point register pairs are always big endian;
1231 general purpose registers are 64 bit wide. */
1232 regno = REGNO (inner);
1233 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1234 - HARD_REGNO_NREGS (regno, mode))
1235 + offset;
1236 x = inner;
1237 goto reg;
1238 }
1239 case SIGN_EXTEND:
1240 x = XEXP (x, 0);
1241 goto reg;
1242 /* FIXME: We need this on SHmedia32 because reload generates
1243 some sign-extended HI or QI loads into DImode registers
1244 but, because Pmode is SImode, the address ends up with a
1245 subreg:SI of the DImode register. Maybe reload should be
1246 fixed so as to apply alter_subreg to such loads? */
1247 case IF_THEN_ELSE:
1248 gcc_assert (trapping_target_operand (x, VOIDmode));
1249 x = XEXP (XEXP (x, 2), 0);
1250 goto default_output;
1251 case SUBREG:
1252 gcc_assert (SUBREG_BYTE (x) == 0
1253 && REG_P (SUBREG_REG (x)));
1254
1255 x = SUBREG_REG (x);
1256 /* Fall through. */
1257
1258 reg:
1259 case REG:
1260 regno += REGNO (x);
1261 if (FP_REGISTER_P (regno)
1262 && mode == V16SFmode)
1263 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1264 else if (FP_REGISTER_P (REGNO (x))
1265 && mode == V4SFmode)
1266 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1267 else if (REG_P (x)
1268 && mode == V2SFmode)
1269 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1270 else if (FP_REGISTER_P (REGNO (x))
1271 && GET_MODE_SIZE (mode) > 4)
1272 fprintf ((stream), "d%s", reg_names[regno] + 1);
1273 else
1274 fputs (reg_names[regno], (stream));
1275 break;
1276
1277 case MEM:
1278 output_address (XEXP (x, 0));
1279 break;
1280
1281 default:
1282 if (TARGET_SH1)
1283 fputc ('#', stream);
1284 output_addr_const (stream, x);
1285 break;
1286 }
1287 break;
1288 }
1289 }
1290
1291 static bool
1292 sh_print_operand_punct_valid_p (unsigned char code)
1293 {
1294 return (code == '.' || code == '#' || code == '@' || code == ','
1295 || code == '$' || code == '\'' || code == '>');
1296 }
1297
1298 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
1299
1300 static bool
1301 sh_asm_output_addr_const_extra (FILE *file, rtx x)
1302 {
1303 if (GET_CODE (x) == UNSPEC)
1304 {
1305 switch (XINT (x, 1))
1306 {
1307 case UNSPEC_DATALABEL:
1308 fputs ("datalabel ", file);
1309 output_addr_const (file, XVECEXP (x, 0, 0));
1310 break;
1311 case UNSPEC_PIC:
1312 /* GLOBAL_OFFSET_TABLE or local symbols, no suffix. */
1313 output_addr_const (file, XVECEXP (x, 0, 0));
1314 break;
1315 case UNSPEC_GOT:
1316 output_addr_const (file, XVECEXP (x, 0, 0));
1317 fputs ("@GOT", file);
1318 break;
1319 case UNSPEC_GOTOFF:
1320 output_addr_const (file, XVECEXP (x, 0, 0));
1321 fputs ("@GOTOFF", file);
1322 break;
1323 case UNSPEC_PLT:
1324 output_addr_const (file, XVECEXP (x, 0, 0));
1325 fputs ("@PLT", file);
1326 break;
1327 case UNSPEC_GOTPLT:
1328 output_addr_const (file, XVECEXP (x, 0, 0));
1329 fputs ("@GOTPLT", file);
1330 break;
1331 case UNSPEC_DTPOFF:
1332 output_addr_const (file, XVECEXP (x, 0, 0));
1333 fputs ("@DTPOFF", file);
1334 break;
1335 case UNSPEC_GOTTPOFF:
1336 output_addr_const (file, XVECEXP (x, 0, 0));
1337 fputs ("@GOTTPOFF", file);
1338 break;
1339 case UNSPEC_TPOFF:
1340 output_addr_const (file, XVECEXP (x, 0, 0));
1341 fputs ("@TPOFF", file);
1342 break;
1343 case UNSPEC_CALLER:
1344 {
1345 char name[32];
1346 /* LPCS stands for Label for PIC Call Site. */
1347 targetm.asm_out.generate_internal_label (name, "LPCS",
1348 INTVAL (XVECEXP (x, 0, 0)));
1349 assemble_name (file, name);
1350 }
1351 break;
1352 case UNSPEC_EXTRACT_S16:
1353 case UNSPEC_EXTRACT_U16:
1354 {
1355 rtx val, shift;
1356
1357 val = XVECEXP (x, 0, 0);
1358 shift = XVECEXP (x, 0, 1);
1359 fputc ('(', file);
1360 if (shift != const0_rtx)
1361 fputc ('(', file);
1362 if (GET_CODE (val) == CONST
1363 || GET_RTX_CLASS (GET_CODE (val)) != RTX_OBJ)
1364 {
1365 fputc ('(', file);
1366 output_addr_const (file, val);
1367 fputc (')', file);
1368 }
1369 else
1370 output_addr_const (file, val);
1371 if (shift != const0_rtx)
1372 {
1373 fputs (" >> ", file);
1374 output_addr_const (file, shift);
1375 fputc (')', file);
1376 }
1377 fputs (" & 65535)", file);
1378 }
1379 break;
1380 case UNSPEC_SYMOFF:
1381 output_addr_const (file, XVECEXP (x, 0, 0));
1382 fputc ('-', file);
1383 if (GET_CODE (XVECEXP (x, 0, 1)) == CONST)
1384 {
1385 fputc ('(', file);
1386 output_addr_const (file, XVECEXP (x, 0, 1));
1387 fputc (')', file);
1388 }
1389 else
1390 output_addr_const (file, XVECEXP (x, 0, 1));
1391 break;
1392 case UNSPEC_PCREL_SYMOFF:
1393 output_addr_const (file, XVECEXP (x, 0, 0));
1394 fputs ("-(", file);
1395 output_addr_const (file, XVECEXP (x, 0, 1));
1396 fputs ("-.)", file);
1397 break;
1398 default:
1399 return false;
1400 }
1401 return true;
1402 }
1403 else
1404 return false;
1405 }
1406 \f
1407
1408 /* Encode symbol attributes of a SYMBOL_REF into its
1409 SYMBOL_REF_FLAGS. */
1410 static void
1411 sh_encode_section_info (tree decl, rtx rtl, int first)
1412 {
1413 default_encode_section_info (decl, rtl, first);
1414
1415 if (TREE_CODE (decl) == FUNCTION_DECL
1416 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1417 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1418 }
1419
1420 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1421 static void
1422 force_into (rtx value, rtx target)
1423 {
1424 value = force_operand (value, target);
1425 if (! rtx_equal_p (value, target))
1426 emit_insn (gen_move_insn (target, value));
1427 }
1428
1429 /* Emit code to perform a block move. Choose the best method.
1430
1431 OPERANDS[0] is the destination.
1432 OPERANDS[1] is the source.
1433 OPERANDS[2] is the size.
1434 OPERANDS[3] is the alignment safe to use. */
1435
1436 int
1437 expand_block_move (rtx *operands)
1438 {
1439 int align = INTVAL (operands[3]);
1440 int constp = (CONST_INT_P (operands[2]));
1441 int bytes = (constp ? INTVAL (operands[2]) : 0);
1442
1443 if (! constp)
1444 return 0;
1445
1446 /* If we could use mov.l to move words and dest is word-aligned, we
1447 can use movua.l for loads and still generate a relatively short
1448 and efficient sequence. */
1449 if (TARGET_SH4A_ARCH && align < 4
1450 && MEM_ALIGN (operands[0]) >= 32
1451 && can_move_by_pieces (bytes, 32))
1452 {
1453 rtx dest = copy_rtx (operands[0]);
1454 rtx src = copy_rtx (operands[1]);
1455 /* We could use different pseudos for each copied word, but
1456 since movua can only load into r0, it's kind of
1457 pointless. */
1458 rtx temp = gen_reg_rtx (SImode);
1459 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1460 int copied = 0;
1461
1462 while (copied + 4 <= bytes)
1463 {
1464 rtx to = adjust_address (dest, SImode, copied);
1465 rtx from = adjust_automodify_address (src, BLKmode,
1466 src_addr, copied);
1467
1468 set_mem_size (from, 4);
1469 emit_insn (gen_movua (temp, from));
1470 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1471 emit_move_insn (to, temp);
1472 copied += 4;
1473 }
1474
1475 if (copied < bytes)
1476 move_by_pieces (adjust_address (dest, BLKmode, copied),
1477 adjust_automodify_address (src, BLKmode,
1478 src_addr, copied),
1479 bytes - copied, align, 0);
1480
1481 return 1;
1482 }
1483
1484 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1485 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1486 if (align < 4 || (bytes % 4 != 0))
1487 return 0;
1488
1489 if (TARGET_HARD_SH4)
1490 {
1491 if (bytes < 12)
1492 return 0;
1493 else if (bytes == 12)
1494 {
1495 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1496 rtx r4 = gen_rtx_REG (SImode, 4);
1497 rtx r5 = gen_rtx_REG (SImode, 5);
1498
1499 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1500 force_into (XEXP (operands[0], 0), r4);
1501 force_into (XEXP (operands[1], 0), r5);
1502 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1503 return 1;
1504 }
1505 else if (! optimize_size)
1506 {
1507 const char *entry_name;
1508 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1509 int dwords;
1510 rtx r4 = gen_rtx_REG (SImode, 4);
1511 rtx r5 = gen_rtx_REG (SImode, 5);
1512 rtx r6 = gen_rtx_REG (SImode, 6);
1513
1514 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1515 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1516 force_into (XEXP (operands[0], 0), r4);
1517 force_into (XEXP (operands[1], 0), r5);
1518
1519 dwords = bytes >> 3;
1520 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1521 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1522 return 1;
1523 }
1524 else
1525 return 0;
1526 }
1527 if (bytes < 64)
1528 {
1529 char entry[30];
1530 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1531 rtx r4 = gen_rtx_REG (SImode, 4);
1532 rtx r5 = gen_rtx_REG (SImode, 5);
1533
1534 sprintf (entry, "__movmemSI%d", bytes);
1535 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1536 force_into (XEXP (operands[0], 0), r4);
1537 force_into (XEXP (operands[1], 0), r5);
1538 emit_insn (gen_block_move_real (func_addr_rtx));
1539 return 1;
1540 }
1541
1542 /* This is the same number of bytes as a memcpy call, but to a different
1543 less common function name, so this will occasionally use more space. */
1544 if (! optimize_size)
1545 {
1546 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1547 int final_switch, while_loop;
1548 rtx r4 = gen_rtx_REG (SImode, 4);
1549 rtx r5 = gen_rtx_REG (SImode, 5);
1550 rtx r6 = gen_rtx_REG (SImode, 6);
1551
1552 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1553 force_into (XEXP (operands[0], 0), r4);
1554 force_into (XEXP (operands[1], 0), r5);
1555
1556 /* r6 controls the size of the move. 16 is decremented from it
1557 for each 64 bytes moved. Then the negative bit left over is used
1558 as an index into a list of move instructions. e.g., a 72 byte move
1559 would be set up with size(r6) = 14, for one iteration through the
1560 big while loop, and a switch of -2 for the last part. */
1561
1562 final_switch = 16 - ((bytes / 4) % 16);
1563 while_loop = ((bytes / 4) / 16 - 1) * 16;
1564 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1565 emit_insn (gen_block_lump_real (func_addr_rtx));
1566 return 1;
1567 }
1568
1569 return 0;
1570 }
1571
1572 /* Prepare operands for a move define_expand; specifically, one of the
1573 operands must be in a register. */
1574
1575 int
1576 prepare_move_operands (rtx operands[], enum machine_mode mode)
1577 {
1578 if ((mode == SImode || mode == DImode)
1579 && flag_pic
1580 && ! ((mode == Pmode || mode == ptr_mode)
1581 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1582 {
1583 rtx temp;
1584 if (SYMBOLIC_CONST_P (operands[1]))
1585 {
1586 if (MEM_P (operands[0]))
1587 operands[1] = force_reg (Pmode, operands[1]);
1588 else if (TARGET_SHMEDIA
1589 && GET_CODE (operands[1]) == LABEL_REF
1590 && target_reg_operand (operands[0], mode))
1591 /* It's ok. */;
1592 else
1593 {
1594 temp = (!can_create_pseudo_p ()
1595 ? operands[0]
1596 : gen_reg_rtx (Pmode));
1597 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1598 }
1599 }
1600 else if (GET_CODE (operands[1]) == CONST
1601 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1602 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1603 {
1604 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1605 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1606 mode, temp);
1607 operands[1] = expand_binop (mode, add_optab, temp,
1608 XEXP (XEXP (operands[1], 0), 1),
1609 (!can_create_pseudo_p ()
1610 ? temp
1611 : gen_reg_rtx (Pmode)),
1612 0, OPTAB_LIB_WIDEN);
1613 }
1614 }
1615
1616 if (! reload_in_progress && ! reload_completed)
1617 {
1618 /* Copy the source to a register if both operands aren't registers. */
1619 if (! register_operand (operands[0], mode)
1620 && ! sh_register_operand (operands[1], mode))
1621 operands[1] = copy_to_mode_reg (mode, operands[1]);
1622
1623 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1624 {
1625 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1626 except that we can't use that function because it is static. */
1627 rtx new_rtx = change_address (operands[0], mode, 0);
1628 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1629 operands[0] = new_rtx;
1630 }
1631
1632 /* This case can happen while generating code to move the result
1633 of a library call to the target. Reject `st r0,@(rX,rY)' because
1634 reload will fail to find a spill register for rX, since r0 is already
1635 being used for the source. */
1636 else if (TARGET_SH1
1637 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1638 && MEM_P (operands[0])
1639 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1640 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1641 operands[1] = copy_to_mode_reg (mode, operands[1]);
1642 }
1643
1644 if (mode == Pmode || mode == ptr_mode)
1645 {
1646 rtx op0, op1, opc;
1647 enum tls_model tls_kind;
1648
1649 op0 = operands[0];
1650 op1 = operands[1];
1651 if (GET_CODE (op1) == CONST
1652 && GET_CODE (XEXP (op1, 0)) == PLUS
1653 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1654 != TLS_MODEL_NONE))
1655 {
1656 opc = XEXP (XEXP (op1, 0), 1);
1657 op1 = XEXP (XEXP (op1, 0), 0);
1658 }
1659 else
1660 opc = NULL_RTX;
1661
1662 if ((tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1663 {
1664 rtx tga_op1, tga_ret, tmp, tmp2;
1665
1666 if (! flag_pic
1667 && (tls_kind == TLS_MODEL_GLOBAL_DYNAMIC
1668 || tls_kind == TLS_MODEL_LOCAL_DYNAMIC
1669 || tls_kind == TLS_MODEL_INITIAL_EXEC))
1670 {
1671 /* Don't schedule insns for getting GOT address when
1672 the first scheduling is enabled, to avoid spill
1673 failures for R0. */
1674 if (flag_schedule_insns)
1675 emit_insn (gen_blockage ());
1676 emit_insn (gen_GOTaddr2picreg ());
1677 emit_use (gen_rtx_REG (SImode, PIC_REG));
1678 if (flag_schedule_insns)
1679 emit_insn (gen_blockage ());
1680 }
1681
1682 switch (tls_kind)
1683 {
1684 case TLS_MODEL_GLOBAL_DYNAMIC:
1685 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1686 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1687 tmp = gen_reg_rtx (Pmode);
1688 emit_move_insn (tmp, tga_ret);
1689 op1 = tmp;
1690 break;
1691
1692 case TLS_MODEL_LOCAL_DYNAMIC:
1693 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1694 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1695
1696 tmp = gen_reg_rtx (Pmode);
1697 emit_move_insn (tmp, tga_ret);
1698
1699 if (register_operand (op0, Pmode))
1700 tmp2 = op0;
1701 else
1702 tmp2 = gen_reg_rtx (Pmode);
1703
1704 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1705 op1 = tmp2;
1706 break;
1707
1708 case TLS_MODEL_INITIAL_EXEC:
1709 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1710 tmp = gen_sym2GOTTPOFF (op1);
1711 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1712 op1 = tga_op1;
1713 break;
1714
1715 case TLS_MODEL_LOCAL_EXEC:
1716 tmp2 = gen_reg_rtx (Pmode);
1717 emit_insn (gen_load_gbr (tmp2));
1718 tmp = gen_reg_rtx (Pmode);
1719 emit_insn (gen_symTPOFF2reg (tmp, op1));
1720
1721 if (register_operand (op0, Pmode))
1722 op1 = op0;
1723 else
1724 op1 = gen_reg_rtx (Pmode);
1725
1726 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1727 break;
1728
1729 default:
1730 gcc_unreachable ();
1731 }
1732 if (opc)
1733 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1734 operands[1] = op1;
1735 }
1736 }
1737
1738 return 0;
1739 }
1740
1741 enum rtx_code
1742 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1743 enum rtx_code comparison)
1744 {
1745 rtx op1;
1746 rtx scratch = NULL_RTX;
1747
1748 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1749 comparison = GET_CODE (operands[0]);
1750 else
1751 scratch = operands[4];
1752 if (CONST_INT_P (operands[1])
1753 && !CONST_INT_P (operands[2]))
1754 {
1755 rtx tmp = operands[1];
1756
1757 operands[1] = operands[2];
1758 operands[2] = tmp;
1759 comparison = swap_condition (comparison);
1760 }
1761 if (CONST_INT_P (operands[2]))
1762 {
1763 HOST_WIDE_INT val = INTVAL (operands[2]);
1764 if ((val == -1 || val == -0x81)
1765 && (comparison == GT || comparison == LE))
1766 {
1767 comparison = (comparison == GT) ? GE : LT;
1768 operands[2] = gen_int_mode (val + 1, mode);
1769 }
1770 else if ((val == 1 || val == 0x80)
1771 && (comparison == GE || comparison == LT))
1772 {
1773 comparison = (comparison == GE) ? GT : LE;
1774 operands[2] = gen_int_mode (val - 1, mode);
1775 }
1776 else if (val == 1 && (comparison == GEU || comparison == LTU))
1777 {
1778 comparison = (comparison == GEU) ? NE : EQ;
1779 operands[2] = CONST0_RTX (mode);
1780 }
1781 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1782 {
1783 comparison = (comparison == GEU) ? GTU : LEU;
1784 operands[2] = gen_int_mode (val - 1, mode);
1785 }
1786 else if (val == 0 && (comparison == GTU || comparison == LEU))
1787 comparison = (comparison == GTU) ? NE : EQ;
1788 else if (mode == SImode
1789 && ((val == 0x7fffffff
1790 && (comparison == GTU || comparison == LEU))
1791 || ((unsigned HOST_WIDE_INT) val
1792 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1793 && (comparison == GEU || comparison == LTU))))
1794 {
1795 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1796 operands[2] = CONST0_RTX (mode);
1797 }
1798 }
1799 op1 = operands[1];
1800 if (can_create_pseudo_p ())
1801 operands[1] = force_reg (mode, op1);
1802 /* When we are handling DImode comparisons, we want to keep constants so
1803 that we can optimize the component comparisons; however, memory loads
1804 are better issued as a whole so that they can be scheduled well.
1805 SImode equality comparisons allow I08 constants, but only when they
1806 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1807 into a register, that register might as well be r0, and we allow the
1808 constant. If it is already in a register, this is likely to be
1809 allocated to a different hard register, thus we load the constant into
1810 a register unless it is zero. */
1811 if (!REG_P (operands[2])
1812 && (!CONST_INT_P (operands[2])
1813 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1814 && ((comparison != EQ && comparison != NE)
1815 || (REG_P (op1) && REGNO (op1) != R0_REG)
1816 || !satisfies_constraint_I08 (operands[2])))))
1817 {
1818 if (scratch && GET_MODE (scratch) == mode)
1819 {
1820 emit_move_insn (scratch, operands[2]);
1821 operands[2] = scratch;
1822 }
1823 else if (can_create_pseudo_p ())
1824 operands[2] = force_reg (mode, operands[2]);
1825 }
1826 return comparison;
1827 }
1828
1829 void
1830 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1831 {
1832 rtx (*branch_expander) (rtx) = gen_branch_true;
1833 rtx jump;
1834
1835 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1836 switch (comparison)
1837 {
1838 case NE: case LT: case LE: case LTU: case LEU:
1839 comparison = reverse_condition (comparison);
1840 branch_expander = gen_branch_false;
1841 default: ;
1842 }
1843 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1844 gen_rtx_fmt_ee (comparison, SImode,
1845 operands[1], operands[2])));
1846 jump = emit_jump_insn (branch_expander (operands[3]));
1847 if (probability >= 0)
1848 add_reg_note (jump, REG_BR_PROB, GEN_INT (probability));
1849
1850 }
1851
1852 /* ??? How should we distribute probabilities when more than one branch
1853 is generated. So far we only have some ad-hoc observations:
1854 - If the operands are random, they are likely to differ in both parts.
1855 - If comparing items in a hash chain, the operands are random or equal;
1856 operation should be EQ or NE.
1857 - If items are searched in an ordered tree from the root, we can expect
1858 the highpart to be unequal about half of the time; operation should be
1859 an inequality comparison, operands non-constant, and overall probability
1860 about 50%. Likewise for quicksort.
1861 - Range checks will be often made against constants. Even if we assume for
1862 simplicity an even distribution of the non-constant operand over a
1863 sub-range here, the same probability could be generated with differently
1864 wide sub-ranges - as long as the ratio of the part of the subrange that
1865 is before the threshold to the part that comes after the threshold stays
1866 the same. Thus, we can't really tell anything here;
1867 assuming random distribution is at least simple.
1868 */
1869
1870 bool
1871 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1872 {
1873 enum rtx_code msw_taken, msw_skip, lsw_taken;
1874 rtx skip_label = NULL_RTX;
1875 rtx op1h, op1l, op2h, op2l;
1876 int num_branches;
1877 int prob, rev_prob;
1878 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1879 rtx scratch = operands[4];
1880
1881 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1882 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1883 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1884 op1l = gen_lowpart (SImode, operands[1]);
1885 op2l = gen_lowpart (SImode, operands[2]);
1886 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
1887 prob = split_branch_probability;
1888 rev_prob = REG_BR_PROB_BASE - prob;
1889 switch (comparison)
1890 {
1891 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1892 That costs 1 cycle more when the first branch can be predicted taken,
1893 but saves us mispredicts because only one branch needs prediction.
1894 It also enables generating the cmpeqdi_t-1 pattern. */
1895 case EQ:
1896 if (TARGET_CMPEQDI_T)
1897 {
1898 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1899 emit_jump_insn (gen_branch_true (operands[3]));
1900 return true;
1901 }
1902 msw_skip = NE;
1903 lsw_taken = EQ;
1904 if (prob >= 0)
1905 {
1906 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1907 */
1908 msw_skip_prob = rev_prob;
1909 if (REG_BR_PROB_BASE <= 65535)
1910 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1911 else
1912 {
1913 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1914 lsw_taken_prob
1915 = (prob
1916 ? (REG_BR_PROB_BASE
1917 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1918 / ((HOST_WIDEST_INT) prob << 32)))
1919 : 0);
1920 }
1921 }
1922 break;
1923 case NE:
1924 if (TARGET_CMPEQDI_T)
1925 {
1926 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1927 emit_jump_insn (gen_branch_false (operands[3]));
1928 return true;
1929 }
1930 msw_taken = NE;
1931 msw_taken_prob = prob;
1932 lsw_taken = NE;
1933 lsw_taken_prob = 0;
1934 break;
1935 case GTU: case GT:
1936 msw_taken = comparison;
1937 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1938 break;
1939 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1940 msw_skip = swap_condition (msw_taken);
1941 lsw_taken = GTU;
1942 break;
1943 case GEU: case GE:
1944 if (op2l == CONST0_RTX (SImode))
1945 msw_taken = comparison;
1946 else
1947 {
1948 msw_taken = comparison == GE ? GT : GTU;
1949 msw_skip = swap_condition (msw_taken);
1950 lsw_taken = GEU;
1951 }
1952 break;
1953 case LTU: case LT:
1954 msw_taken = comparison;
1955 if (op2l == CONST0_RTX (SImode))
1956 break;
1957 msw_skip = swap_condition (msw_taken);
1958 lsw_taken = LTU;
1959 break;
1960 case LEU: case LE:
1961 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1962 msw_taken = comparison;
1963 else
1964 {
1965 lsw_taken = LEU;
1966 if (comparison == LE)
1967 msw_taken = LT;
1968 else if (op2h != CONST0_RTX (SImode))
1969 msw_taken = LTU;
1970 else
1971 {
1972 msw_skip = swap_condition (LTU);
1973 break;
1974 }
1975 msw_skip = swap_condition (msw_taken);
1976 }
1977 break;
1978 default: return false;
1979 }
1980 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
1981 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
1982 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
1983 if (comparison != EQ && comparison != NE && num_branches > 1)
1984 {
1985 if (!CONSTANT_P (operands[2])
1986 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
1987 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
1988 {
1989 msw_taken_prob = prob / 2U;
1990 msw_skip_prob
1991 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
1992 lsw_taken_prob = prob;
1993 }
1994 else
1995 {
1996 msw_taken_prob = prob;
1997 msw_skip_prob = REG_BR_PROB_BASE;
1998 /* ??? If we have a constant op2h, should we use that when
1999 calculating lsw_taken_prob? */
2000 lsw_taken_prob = prob;
2001 }
2002 }
2003 operands[1] = op1h;
2004 operands[2] = op2h;
2005 operands[4] = NULL_RTX;
2006 if (reload_completed
2007 && ! arith_reg_or_0_operand (op2h, SImode)
2008 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
2009 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
2010 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
2011 {
2012 emit_move_insn (scratch, operands[2]);
2013 operands[2] = scratch;
2014 }
2015 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2016 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2017 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2018 {
2019 rtx taken_label = operands[3];
2020
2021 /* Operands were possibly modified, but msw_skip doesn't expect this.
2022 Always use the original ones. */
2023 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2024 {
2025 operands[1] = op1h;
2026 operands[2] = op2h;
2027 if (reload_completed
2028 && ! arith_reg_or_0_operand (op2h, SImode)
2029 && (true_regnum (op1h) || (comparison != EQ && comparison != NE)))
2030 {
2031 emit_move_insn (scratch, operands[2]);
2032 operands[2] = scratch;
2033 }
2034 }
2035
2036 operands[3] = skip_label = gen_label_rtx ();
2037 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2038 operands[3] = taken_label;
2039 }
2040 operands[1] = op1l;
2041 operands[2] = op2l;
2042 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2043 {
2044 if (reload_completed
2045 && ! arith_reg_or_0_operand (op2l, SImode)
2046 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2047 {
2048 emit_move_insn (scratch, operands[2]);
2049 operands[2] = scratch;
2050 }
2051 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2052 }
2053 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2054 emit_label (skip_label);
2055 return true;
2056 }
2057
2058 /* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */
2059
2060 static void
2061 sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
2062 {
2063 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
2064 {
2065 insn = gen_rtx_PARALLEL (VOIDmode,
2066 gen_rtvec (2, insn,
2067 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
2068 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
2069 }
2070 else
2071 emit_insn (insn);
2072 }
2073
2074 /* Prepare the operands for an scc instruction; make sure that the
2075 compare has been done and the result is in T_REG. */
2076 void
2077 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2078 {
2079 rtx t_reg = gen_rtx_REG (SImode, T_REG);
2080 enum rtx_code oldcode = code;
2081 enum machine_mode mode;
2082
2083 /* First need a compare insn. */
2084 switch (code)
2085 {
2086 case NE:
2087 /* It isn't possible to handle this case. */
2088 gcc_unreachable ();
2089 case LT:
2090 code = GT;
2091 break;
2092 case LE:
2093 code = GE;
2094 break;
2095 case LTU:
2096 code = GTU;
2097 break;
2098 case LEU:
2099 code = GEU;
2100 break;
2101 default:
2102 break;
2103 }
2104 if (code != oldcode)
2105 {
2106 rtx tmp = op0;
2107 op0 = op1;
2108 op1 = tmp;
2109 }
2110
2111 mode = GET_MODE (op0);
2112 if (mode == VOIDmode)
2113 mode = GET_MODE (op1);
2114
2115 op0 = force_reg (mode, op0);
2116 if ((code != EQ && code != NE
2117 && (op1 != const0_rtx
2118 || code == GTU || code == GEU || code == LTU || code == LEU))
2119 || (mode == DImode && op1 != const0_rtx)
2120 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2121 op1 = force_reg (mode, op1);
2122
2123 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2124 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2125 mode);
2126 }
2127
2128 rtx
2129 sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
2130 rtx op0, rtx op1)
2131 {
2132 rtx target = gen_reg_rtx (SImode);
2133 rtx tmp;
2134
2135 gcc_assert (TARGET_SHMEDIA);
2136 switch (code)
2137 {
2138 case EQ:
2139 case GT:
2140 case LT:
2141 case UNORDERED:
2142 case GTU:
2143 case LTU:
2144 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2145 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2146 code = NE;
2147 break;
2148
2149 case NE:
2150 case GE:
2151 case LE:
2152 case ORDERED:
2153 case GEU:
2154 case LEU:
2155 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2156 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2157 code = EQ;
2158 break;
2159
2160 case UNEQ:
2161 case UNGE:
2162 case UNGT:
2163 case UNLE:
2164 case UNLT:
2165 case LTGT:
2166 return NULL_RTX;
2167
2168 default:
2169 gcc_unreachable ();
2170 }
2171
2172 if (mode == DImode)
2173 {
2174 rtx t2 = gen_reg_rtx (DImode);
2175 emit_insn (gen_extendsidi2 (t2, target));
2176 target = t2;
2177 }
2178
2179 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2180 }
2181
2182 /* Called from the md file, set up the operands of a compare instruction. */
2183
2184 void
2185 sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
2186 {
2187 enum rtx_code code = GET_CODE (operands[0]);
2188 enum rtx_code branch_code;
2189 rtx op0 = operands[1];
2190 rtx op1 = operands[2];
2191 rtx insn, tem;
2192 bool need_ccmpeq = false;
2193
2194 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2195 {
2196 op0 = force_reg (mode, op0);
2197 op1 = force_reg (mode, op1);
2198 }
2199 else
2200 {
2201 if (code != EQ || mode == DImode)
2202 {
2203 /* Force args into regs, since we can't use constants here. */
2204 op0 = force_reg (mode, op0);
2205 if (op1 != const0_rtx || code == GTU || code == GEU)
2206 op1 = force_reg (mode, op1);
2207 }
2208 }
2209
2210 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2211 {
2212 if (code == LT
2213 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2214 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2215 {
2216 tem = op0, op0 = op1, op1 = tem;
2217 code = swap_condition (code);
2218 }
2219
2220 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2221 if (code == GE)
2222 {
2223 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2224 need_ccmpeq = true;
2225 code = GT;
2226 }
2227
2228 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2229 to EQ/GT respectively. */
2230 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2231 }
2232
2233 switch (code)
2234 {
2235 case EQ:
2236 case GT:
2237 case GE:
2238 case GTU:
2239 case GEU:
2240 branch_code = code;
2241 break;
2242 case NE:
2243 case LT:
2244 case LE:
2245 case LTU:
2246 case LEU:
2247 branch_code = reverse_condition (code);
2248 break;
2249 default:
2250 gcc_unreachable ();
2251 }
2252
2253 insn = gen_rtx_SET (VOIDmode,
2254 gen_rtx_REG (SImode, T_REG),
2255 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2256
2257 sh_emit_set_t_insn (insn, mode);
2258 if (need_ccmpeq)
2259 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2260
2261 if (branch_code == code)
2262 emit_jump_insn (gen_branch_true (operands[3]));
2263 else
2264 emit_jump_insn (gen_branch_false (operands[3]));
2265 }
2266
2267 void
2268 sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
2269 {
2270 enum rtx_code code = GET_CODE (operands[1]);
2271 rtx op0 = operands[2];
2272 rtx op1 = operands[3];
2273 rtx lab = NULL_RTX;
2274 bool invert = false;
2275 rtx tem;
2276
2277 op0 = force_reg (mode, op0);
2278 if ((code != EQ && code != NE
2279 && (op1 != const0_rtx
2280 || code == GTU || code == GEU || code == LTU || code == LEU))
2281 || (mode == DImode && op1 != const0_rtx)
2282 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2283 op1 = force_reg (mode, op1);
2284
2285 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2286 {
2287 if (code == LT || code == LE)
2288 {
2289 code = swap_condition (code);
2290 tem = op0, op0 = op1, op1 = tem;
2291 }
2292 if (code == GE)
2293 {
2294 if (TARGET_IEEE)
2295 {
2296 lab = gen_label_rtx ();
2297 sh_emit_scc_to_t (EQ, op0, op1);
2298 emit_jump_insn (gen_branch_true (lab));
2299 code = GT;
2300 }
2301 else
2302 {
2303 code = LT;
2304 invert = true;
2305 }
2306 }
2307 }
2308
2309 if (code == NE)
2310 {
2311 code = EQ;
2312 invert = true;
2313 }
2314
2315 sh_emit_scc_to_t (code, op0, op1);
2316 if (lab)
2317 emit_label (lab);
2318 if (invert)
2319 emit_insn (gen_movnegt (operands[0]));
2320 else
2321 emit_move_insn (operands[0], gen_rtx_REG (SImode, T_REG));
2322 }
2323 \f
2324 /* Functions to output assembly code. */
2325
2326 /* Return a sequence of instructions to perform DI or DF move.
2327
2328 Since the SH cannot move a DI or DF in one instruction, we have
2329 to take care when we see overlapping source and dest registers. */
2330
2331 const char *
2332 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2333 enum machine_mode mode)
2334 {
2335 rtx dst = operands[0];
2336 rtx src = operands[1];
2337
2338 if (MEM_P (dst)
2339 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2340 return "mov.l %T1,%0\n\tmov.l %1,%0";
2341
2342 if (register_operand (dst, mode)
2343 && register_operand (src, mode))
2344 {
2345 if (REGNO (src) == MACH_REG)
2346 return "sts mach,%S0\n\tsts macl,%R0";
2347
2348 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2349 when mov.d r1,r0 do r1->r0 then r2->r1. */
2350
2351 if (REGNO (src) + 1 == REGNO (dst))
2352 return "mov %T1,%T0\n\tmov %1,%0";
2353 else
2354 return "mov %1,%0\n\tmov %T1,%T0";
2355 }
2356 else if (CONST_INT_P (src))
2357 {
2358 if (INTVAL (src) < 0)
2359 output_asm_insn ("mov #-1,%S0", operands);
2360 else
2361 output_asm_insn ("mov #0,%S0", operands);
2362
2363 return "mov %1,%R0";
2364 }
2365 else if (MEM_P (src))
2366 {
2367 int ptrreg = -1;
2368 int dreg = REGNO (dst);
2369 rtx inside = XEXP (src, 0);
2370
2371 switch (GET_CODE (inside))
2372 {
2373 case REG:
2374 ptrreg = REGNO (inside);
2375 break;
2376
2377 case SUBREG:
2378 ptrreg = subreg_regno (inside);
2379 break;
2380
2381 case PLUS:
2382 ptrreg = REGNO (XEXP (inside, 0));
2383 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2384 an offsettable address. Unfortunately, offsettable addresses use
2385 QImode to check the offset, and a QImode offsettable address
2386 requires r0 for the other operand, which is not currently
2387 supported, so we can't use the 'o' constraint.
2388 Thus we must check for and handle r0+REG addresses here.
2389 We punt for now, since this is likely very rare. */
2390 gcc_assert (!REG_P (XEXP (inside, 1)));
2391 break;
2392
2393 case LABEL_REF:
2394 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
2395 case POST_INC:
2396 return "mov.l %1,%0\n\tmov.l %1,%T0";
2397 default:
2398 gcc_unreachable ();
2399 }
2400
2401 /* Work out the safe way to copy. Copy into the second half first. */
2402 if (dreg == ptrreg)
2403 return "mov.l %T1,%T0\n\tmov.l %1,%0";
2404 }
2405
2406 return "mov.l %1,%0\n\tmov.l %T1,%T0";
2407 }
2408
2409 /* Print an instruction which would have gone into a delay slot after
2410 another instruction, but couldn't because the other instruction expanded
2411 into a sequence where putting the slot insn at the end wouldn't work. */
2412
2413 static void
2414 print_slot (rtx insn)
2415 {
2416 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
2417
2418 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
2419 }
2420
2421 const char *
2422 output_far_jump (rtx insn, rtx op)
2423 {
2424 struct { rtx lab, reg, op; } this_jmp;
2425 rtx braf_base_lab = NULL_RTX;
2426 const char *jump;
2427 int far;
2428 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2429 rtx prev;
2430
2431 this_jmp.lab = gen_label_rtx ();
2432
2433 if (TARGET_SH2
2434 && offset >= -32764
2435 && offset - get_attr_length (insn) <= 32766)
2436 {
2437 far = 0;
2438 jump = "mov.w %O0,%1; braf %1";
2439 }
2440 else
2441 {
2442 far = 1;
2443 if (flag_pic)
2444 {
2445 if (TARGET_SH2)
2446 jump = "mov.l %O0,%1; braf %1";
2447 else
2448 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
2449 }
2450 else
2451 jump = "mov.l %O0,%1; jmp @%1";
2452 }
2453 /* If we have a scratch register available, use it. */
2454 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2455 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2456 {
2457 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2458 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2459 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
2460 output_asm_insn (jump, &this_jmp.lab);
2461 if (dbr_sequence_length ())
2462 print_slot (final_sequence);
2463 else
2464 output_asm_insn ("nop", 0);
2465 }
2466 else
2467 {
2468 /* Output the delay slot insn first if any. */
2469 if (dbr_sequence_length ())
2470 print_slot (final_sequence);
2471
2472 this_jmp.reg = gen_rtx_REG (SImode, 13);
2473 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2474 Fortunately, MACL is fixed and call-clobbered, and we never
2475 need its value across jumps, so save r13 in it instead of in
2476 the stack. */
2477 if (TARGET_SH5)
2478 output_asm_insn ("lds r13, macl", 0);
2479 else
2480 output_asm_insn ("mov.l r13,@-r15", 0);
2481 output_asm_insn (jump, &this_jmp.lab);
2482 if (TARGET_SH5)
2483 output_asm_insn ("sts macl, r13", 0);
2484 else
2485 output_asm_insn ("mov.l @r15+,r13", 0);
2486 }
2487 if (far && flag_pic && TARGET_SH2)
2488 {
2489 braf_base_lab = gen_label_rtx ();
2490 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2491 CODE_LABEL_NUMBER (braf_base_lab));
2492 }
2493 if (far)
2494 output_asm_insn (".align 2", 0);
2495 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2496 this_jmp.op = op;
2497 if (far && flag_pic)
2498 {
2499 if (TARGET_SH2)
2500 this_jmp.lab = braf_base_lab;
2501 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2502 }
2503 else
2504 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2505 return "";
2506 }
2507
2508 /* Local label counter, used for constants in the pool and inside
2509 pattern branches. */
2510
2511 static int lf = 100;
2512
2513 /* Output code for ordinary branches. */
2514
2515 const char *
2516 output_branch (int logic, rtx insn, rtx *operands)
2517 {
2518 switch (get_attr_length (insn))
2519 {
2520 case 6:
2521 /* This can happen if filling the delay slot has caused a forward
2522 branch to exceed its range (we could reverse it, but only
2523 when we know we won't overextend other branches; this should
2524 best be handled by relaxation).
2525 It can also happen when other condbranches hoist delay slot insn
2526 from their destination, thus leading to code size increase.
2527 But the branch will still be in the range -4092..+4098 bytes. */
2528
2529 if (! TARGET_RELAX)
2530 {
2531 int label = lf++;
2532 /* The call to print_slot will clobber the operands. */
2533 rtx op0 = operands[0];
2534
2535 /* If the instruction in the delay slot is annulled (true), then
2536 there is no delay slot where we can put it now. The only safe
2537 place for it is after the label. final will do that by default. */
2538
2539 if (final_sequence
2540 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2541 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2542 {
2543 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2544 ASSEMBLER_DIALECT ? "/" : ".", label);
2545 print_slot (final_sequence);
2546 }
2547 else
2548 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2549
2550 output_asm_insn ("bra\t%l0", &op0);
2551 fprintf (asm_out_file, "\tnop\n");
2552 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2553
2554 return "";
2555 }
2556 /* When relaxing, handle this like a short branch. The linker
2557 will fix it up if it still doesn't fit after relaxation. */
2558 case 2:
2559 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2560
2561 /* These are for SH2e, in which we have to account for the
2562 extra nop because of the hardware bug in annulled branches. */
2563 case 8:
2564 if (! TARGET_RELAX)
2565 {
2566 int label = lf++;
2567
2568 gcc_assert (!final_sequence
2569 || !(INSN_ANNULLED_BRANCH_P
2570 (XVECEXP (final_sequence, 0, 0))));
2571 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2572 logic ? "f" : "t",
2573 ASSEMBLER_DIALECT ? "/" : ".", label);
2574 fprintf (asm_out_file, "\tnop\n");
2575 output_asm_insn ("bra\t%l0", operands);
2576 fprintf (asm_out_file, "\tnop\n");
2577 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2578
2579 return "";
2580 }
2581 /* When relaxing, fall through. */
2582 case 4:
2583 {
2584 char buffer[10];
2585
2586 sprintf (buffer, "b%s%ss\t%%l0",
2587 logic ? "t" : "f",
2588 ASSEMBLER_DIALECT ? "/" : ".");
2589 output_asm_insn (buffer, &operands[0]);
2590 return "nop";
2591 }
2592
2593 default:
2594 /* There should be no longer branches now - that would
2595 indicate that something has destroyed the branches set
2596 up in machine_dependent_reorg. */
2597 gcc_unreachable ();
2598 }
2599 }
2600
2601 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2602 fill in operands 9 as a label to the successor insn.
2603 We try to use jump threading where possible.
2604 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2605 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2606 follow jmp and bt, if the address is in range. */
2607 const char *
2608 output_branchy_insn (enum rtx_code code, const char *templ,
2609 rtx insn, rtx *operands)
2610 {
2611 rtx next_insn = NEXT_INSN (insn);
2612
2613 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2614 {
2615 rtx src = SET_SRC (PATTERN (next_insn));
2616 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2617 {
2618 /* Following branch not taken */
2619 operands[9] = gen_label_rtx ();
2620 emit_label_after (operands[9], next_insn);
2621 INSN_ADDRESSES_NEW (operands[9],
2622 INSN_ADDRESSES (INSN_UID (next_insn))
2623 + get_attr_length (next_insn));
2624 return templ;
2625 }
2626 else
2627 {
2628 int offset = (branch_dest (next_insn)
2629 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2630 if (offset >= -252 && offset <= 258)
2631 {
2632 if (GET_CODE (src) == IF_THEN_ELSE)
2633 /* branch_true */
2634 src = XEXP (src, 1);
2635 operands[9] = src;
2636 return templ;
2637 }
2638 }
2639 }
2640 operands[9] = gen_label_rtx ();
2641 emit_label_after (operands[9], insn);
2642 INSN_ADDRESSES_NEW (operands[9],
2643 INSN_ADDRESSES (INSN_UID (insn))
2644 + get_attr_length (insn));
2645 return templ;
2646 }
2647
2648 const char *
2649 output_ieee_ccmpeq (rtx insn, rtx *operands)
2650 {
2651 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2652 insn, operands);
2653 }
2654 \f
2655 /* Output the start of the assembler file. */
2656
2657 static void
2658 sh_file_start (void)
2659 {
2660 default_file_start ();
2661
2662 if (TARGET_ELF)
2663 /* We need to show the text section with the proper
2664 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2665 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2666 will complain. We can teach GAS specifically about the
2667 default attributes for our choice of text section, but
2668 then we would have to change GAS again if/when we change
2669 the text section name. */
2670 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2671 else
2672 /* Switch to the data section so that the coffsem symbol
2673 isn't in the text section. */
2674 switch_to_section (data_section);
2675
2676 if (TARGET_LITTLE_ENDIAN)
2677 fputs ("\t.little\n", asm_out_file);
2678
2679 if (!TARGET_ELF)
2680 {
2681 if (TARGET_SHCOMPACT)
2682 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2683 else if (TARGET_SHMEDIA)
2684 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2685 TARGET_SHMEDIA64 ? 64 : 32);
2686 }
2687 }
2688 \f
2689 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2690
2691 static bool
2692 unspec_caller_rtx_p (rtx pat)
2693 {
2694 rtx base, offset;
2695 int i;
2696
2697 split_const (pat, &base, &offset);
2698 if (GET_CODE (base) == UNSPEC)
2699 {
2700 if (XINT (base, 1) == UNSPEC_CALLER)
2701 return true;
2702 for (i = 0; i < XVECLEN (base, 0); i++)
2703 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2704 return true;
2705 }
2706 return false;
2707 }
2708
2709 /* Indicate that INSN cannot be duplicated. This is true for insn
2710 that generates a unique label. */
2711
2712 static bool
2713 sh_cannot_copy_insn_p (rtx insn)
2714 {
2715 rtx pat;
2716
2717 if (!reload_completed || !flag_pic)
2718 return false;
2719
2720 if (!NONJUMP_INSN_P (insn))
2721 return false;
2722 if (asm_noperands (insn) >= 0)
2723 return false;
2724
2725 pat = PATTERN (insn);
2726 if (GET_CODE (pat) != SET)
2727 return false;
2728 pat = SET_SRC (pat);
2729
2730 if (unspec_caller_rtx_p (pat))
2731 return true;
2732
2733 return false;
2734 }
2735 \f
2736 /* Actual number of instructions used to make a shift by N. */
2737 static const char ashiftrt_insns[] =
2738 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2739
2740 /* Left shift and logical right shift are the same. */
2741 static const char shift_insns[] =
2742 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2743
2744 /* Individual shift amounts needed to get the above length sequences.
2745 One bit right shifts clobber the T bit, so when possible, put one bit
2746 shifts in the middle of the sequence, so the ends are eligible for
2747 branch delay slots. */
2748 static const short shift_amounts[32][5] = {
2749 {0}, {1}, {2}, {2, 1},
2750 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2751 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2752 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2753 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2754 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2755 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2756 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2757
2758 /* Likewise, but for shift amounts < 16, up to three highmost bits
2759 might be clobbered. This is typically used when combined with some
2760 kind of sign or zero extension. */
2761
2762 static const char ext_shift_insns[] =
2763 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2764
2765 static const short ext_shift_amounts[32][4] = {
2766 {0}, {1}, {2}, {2, 1},
2767 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2768 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2769 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2770 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2771 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2772 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2773 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2774
2775 /* Assuming we have a value that has been sign-extended by at least one bit,
2776 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2777 to shift it by N without data loss, and quicker than by other means? */
2778 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2779
2780 /* This is used in length attributes in sh.md to help compute the length
2781 of arbitrary constant shift instructions. */
2782
2783 int
2784 shift_insns_rtx (rtx insn)
2785 {
2786 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2787 int shift_count = INTVAL (XEXP (set_src, 1)) & 31;
2788 enum rtx_code shift_code = GET_CODE (set_src);
2789
2790 switch (shift_code)
2791 {
2792 case ASHIFTRT:
2793 return ashiftrt_insns[shift_count];
2794 case LSHIFTRT:
2795 case ASHIFT:
2796 return shift_insns[shift_count];
2797 default:
2798 gcc_unreachable ();
2799 }
2800 }
2801
2802 /* Return the cost of a shift. */
2803
2804 static inline int
2805 shiftcosts (rtx x)
2806 {
2807 int value;
2808
2809 if (TARGET_SHMEDIA)
2810 return 1;
2811
2812 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2813 {
2814 if (GET_MODE (x) == DImode
2815 && CONST_INT_P (XEXP (x, 1))
2816 && INTVAL (XEXP (x, 1)) == 1)
2817 return 2;
2818
2819 /* Everything else is invalid, because there is no pattern for it. */
2820 return MAX_COST;
2821 }
2822 /* If shift by a non constant, then this will be expensive. */
2823 if (!CONST_INT_P (XEXP (x, 1)))
2824 return SH_DYNAMIC_SHIFT_COST;
2825
2826 /* Otherwise, return the true cost in instructions. Cope with out of range
2827 shift counts more or less arbitrarily. */
2828 value = INTVAL (XEXP (x, 1)) & 31;
2829
2830 if (GET_CODE (x) == ASHIFTRT)
2831 {
2832 int cost = ashiftrt_insns[value];
2833 /* If SH3, then we put the constant in a reg and use shad. */
2834 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2835 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2836 return cost;
2837 }
2838 else
2839 return shift_insns[value];
2840 }
2841
2842 /* Return the cost of an AND/XOR/IOR operation. */
2843
2844 static inline int
2845 and_xor_ior_costs (rtx x, int code)
2846 {
2847 int i;
2848
2849 /* A logical operation with two registers is a single cycle
2850 instruction. */
2851 if (!CONST_INT_P (XEXP (x, 1)))
2852 return 1;
2853
2854 i = INTVAL (XEXP (x, 1));
2855
2856 if (TARGET_SHMEDIA)
2857 {
2858 if (satisfies_constraint_I10 (XEXP (x, 1))
2859 || satisfies_constraint_J16 (XEXP (x, 1)))
2860 return 1;
2861 else
2862 return 1 + rtx_cost (XEXP (x, 1), AND, 1, !optimize_size);
2863 }
2864
2865 /* These constants are single cycle extu.[bw] instructions. */
2866 if ((i == 0xff || i == 0xffff) && code == AND)
2867 return 1;
2868 /* Constants that can be used in an instruction as an immediate are
2869 a single cycle, but this requires r0, so make it a little more
2870 expensive. */
2871 if (CONST_OK_FOR_K08 (i))
2872 return 2;
2873 /* Constants that can be loaded with a mov immediate need one more cycle.
2874 This case is probably unnecessary. */
2875 if (CONST_OK_FOR_I08 (i))
2876 return 2;
2877 /* Any other constant requires an additional 2 cycle pc-relative load.
2878 This case is probably unnecessary. */
2879 return 3;
2880 }
2881
2882 /* Return the cost of an addition or a subtraction. */
2883
2884 static inline int
2885 addsubcosts (rtx x)
2886 {
2887 /* Adding a register is a single cycle insn. */
2888 if (REG_P (XEXP (x, 1))
2889 || GET_CODE (XEXP (x, 1)) == SUBREG)
2890 return 1;
2891
2892 /* Likewise for small constants. */
2893 if (CONST_INT_P (XEXP (x, 1))
2894 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2895 return 1;
2896
2897 if (TARGET_SHMEDIA)
2898 switch (GET_CODE (XEXP (x, 1)))
2899 {
2900 case CONST:
2901 case LABEL_REF:
2902 case SYMBOL_REF:
2903 return TARGET_SHMEDIA64 ? 5 : 3;
2904
2905 case CONST_INT:
2906 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2907 return 2;
2908 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2909 return 3;
2910 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2911 return 4;
2912
2913 /* Fall through. */
2914 default:
2915 return 5;
2916 }
2917
2918 /* Any other constant requires a 2 cycle pc-relative load plus an
2919 addition. */
2920 return 3;
2921 }
2922
2923 /* Return the cost of a multiply. */
2924 static inline int
2925 multcosts (rtx x ATTRIBUTE_UNUSED)
2926 {
2927 if (sh_multcost >= 0)
2928 return sh_multcost;
2929 if (TARGET_SHMEDIA)
2930 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2931 accept constants. Ideally, we would use a cost of one or two and
2932 add the cost of the operand, but disregard the latter when inside loops
2933 and loop invariant code motion is still to follow.
2934 Using a multiply first and splitting it later if it's a loss
2935 doesn't work because of different sign / zero extension semantics
2936 of multiplies vs. shifts. */
2937 return optimize_size ? 2 : 3;
2938
2939 if (TARGET_SH2)
2940 {
2941 /* We have a mul insn, so we can never take more than the mul and the
2942 read of the mac reg, but count more because of the latency and extra
2943 reg usage. */
2944 if (optimize_size)
2945 return 2;
2946 return 3;
2947 }
2948
2949 /* If we're aiming at small code, then just count the number of
2950 insns in a multiply call sequence. */
2951 if (optimize_size)
2952 return 5;
2953
2954 /* Otherwise count all the insns in the routine we'd be calling too. */
2955 return 20;
2956 }
2957
2958 /* Compute a (partial) cost for rtx X. Return true if the complete
2959 cost has been computed, and false if subexpressions should be
2960 scanned. In either case, *TOTAL contains the cost result. */
2961
2962 static bool
2963 sh_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
2964 int *total, bool speed ATTRIBUTE_UNUSED)
2965 {
2966 switch (code)
2967 {
2968 case CONST_INT:
2969 if (TARGET_SHMEDIA)
2970 {
2971 if (INTVAL (x) == 0)
2972 *total = 0;
2973 else if (outer_code == AND && and_operand ((x), DImode))
2974 *total = 0;
2975 else if ((outer_code == IOR || outer_code == XOR
2976 || outer_code == PLUS)
2977 && CONST_OK_FOR_I10 (INTVAL (x)))
2978 *total = 0;
2979 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2980 *total = COSTS_N_INSNS (outer_code != SET);
2981 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2982 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2983 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2984 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2985 else
2986 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2987 return true;
2988 }
2989 if (CONST_OK_FOR_I08 (INTVAL (x)))
2990 *total = 0;
2991 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2992 && CONST_OK_FOR_K08 (INTVAL (x)))
2993 *total = 1;
2994 /* prepare_cmp_insn will force costly constants int registers before
2995 the cbranch[sd]i4 patterns can see them, so preserve potentially
2996 interesting ones not covered by I08 above. */
2997 else if (outer_code == COMPARE
2998 && ((unsigned HOST_WIDE_INT) INTVAL (x)
2999 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3000 || INTVAL (x) == 0x7fffffff
3001 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3002 *total = 1;
3003 else
3004 *total = 8;
3005 return true;
3006
3007 case EQ:
3008 /* An and with a constant compared against zero is
3009 most likely going to be a TST #imm, R0 instruction.
3010 Notice that this does not catch the zero_extract variants from
3011 the md file. */
3012 if (GET_CODE (XEXP (x, 0)) == AND
3013 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 0)
3014 {
3015 *total = 1;
3016 return true;
3017 }
3018 else
3019 return false;
3020
3021 case CONST:
3022 case LABEL_REF:
3023 case SYMBOL_REF:
3024 if (TARGET_SHMEDIA64)
3025 *total = COSTS_N_INSNS (4);
3026 else if (TARGET_SHMEDIA32)
3027 *total = COSTS_N_INSNS (2);
3028 else
3029 *total = 5;
3030 return true;
3031
3032 case CONST_DOUBLE:
3033 if (TARGET_SHMEDIA)
3034 *total = COSTS_N_INSNS (4);
3035 /* prepare_cmp_insn will force costly constants int registers before
3036 the cbranchdi4 pattern can see them, so preserve potentially
3037 interesting ones. */
3038 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
3039 *total = 1;
3040 else
3041 *total = 10;
3042 return true;
3043 case CONST_VECTOR:
3044 if (x == CONST0_RTX (GET_MODE (x)))
3045 *total = 0;
3046 else if (sh_1el_vec (x, VOIDmode))
3047 *total = outer_code != SET;
3048 if (sh_rep_vec (x, VOIDmode))
3049 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3050 + (outer_code != SET));
3051 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3052 return true;
3053
3054 case PLUS:
3055 case MINUS:
3056 *total = COSTS_N_INSNS (addsubcosts (x));
3057 return true;
3058
3059 case AND:
3060 case XOR:
3061 case IOR:
3062 *total = COSTS_N_INSNS (and_xor_ior_costs (x, code));
3063 return true;
3064
3065 case MULT:
3066 *total = COSTS_N_INSNS (multcosts (x));
3067 return true;
3068
3069 case ASHIFT:
3070 case ASHIFTRT:
3071 case LSHIFTRT:
3072 *total = COSTS_N_INSNS (shiftcosts (x));
3073 return true;
3074
3075 case DIV:
3076 case UDIV:
3077 case MOD:
3078 case UMOD:
3079 *total = COSTS_N_INSNS (20);
3080 return true;
3081
3082 case PARALLEL:
3083 if (sh_1el_vec (x, VOIDmode))
3084 *total = outer_code != SET;
3085 if (sh_rep_vec (x, VOIDmode))
3086 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3087 + (outer_code != SET));
3088 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3089 return true;
3090
3091 case FLOAT:
3092 case FIX:
3093 *total = 100;
3094 return true;
3095
3096 default:
3097 return false;
3098 }
3099 }
3100
3101 /* Compute the cost of an address. For the SH, all valid addresses are
3102 the same cost. Use a slightly higher cost for reg + reg addressing,
3103 since it increases pressure on r0. */
3104
3105 static int
3106 sh_address_cost (rtx X,
3107 bool speed ATTRIBUTE_UNUSED)
3108 {
3109 return (GET_CODE (X) == PLUS
3110 && ! CONSTANT_P (XEXP (X, 1))
3111 && ! TARGET_SHMEDIA ? 1 : 0);
3112 }
3113
3114 /* Code to expand a shift. */
3115
3116 void
3117 gen_ashift (int type, int n, rtx reg)
3118 {
3119 /* Negative values here come from the shift_amounts array. */
3120 if (n < 0)
3121 {
3122 if (type == ASHIFT)
3123 type = LSHIFTRT;
3124 else
3125 type = ASHIFT;
3126 n = -n;
3127 }
3128
3129 switch (type)
3130 {
3131 case ASHIFTRT:
3132 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
3133 break;
3134 case LSHIFTRT:
3135 if (n == 1)
3136 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
3137 else
3138 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
3139 break;
3140 case ASHIFT:
3141 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
3142 break;
3143 }
3144 }
3145
3146 /* Same for HImode */
3147
3148 void
3149 gen_ashift_hi (int type, int n, rtx reg)
3150 {
3151 /* Negative values here come from the shift_amounts array. */
3152 if (n < 0)
3153 {
3154 if (type == ASHIFT)
3155 type = LSHIFTRT;
3156 else
3157 type = ASHIFT;
3158 n = -n;
3159 }
3160
3161 switch (type)
3162 {
3163 case ASHIFTRT:
3164 case LSHIFTRT:
3165 /* We don't have HImode right shift operations because using the
3166 ordinary 32 bit shift instructions for that doesn't generate proper
3167 zero/sign extension.
3168 gen_ashift_hi is only called in contexts where we know that the
3169 sign extension works out correctly. */
3170 {
3171 int offset = 0;
3172 if (GET_CODE (reg) == SUBREG)
3173 {
3174 offset = SUBREG_BYTE (reg);
3175 reg = SUBREG_REG (reg);
3176 }
3177 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3178 break;
3179 }
3180 case ASHIFT:
3181 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3182 break;
3183 }
3184 }
3185
3186 /* Output RTL to split a constant shift into its component SH constant
3187 shift instructions. */
3188
3189 void
3190 gen_shifty_op (int code, rtx *operands)
3191 {
3192 int value = INTVAL (operands[2]);
3193 int max, i;
3194
3195 /* Truncate the shift count in case it is out of bounds. */
3196 value = value & 31;
3197
3198 if (value == 31)
3199 {
3200 if (code == LSHIFTRT)
3201 {
3202 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3203 emit_insn (gen_movt (operands[0]));
3204 return;
3205 }
3206 else if (code == ASHIFT)
3207 {
3208 /* There is a two instruction sequence for 31 bit left shifts,
3209 but it requires r0. */
3210 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3211 {
3212 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3213 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3214 return;
3215 }
3216 }
3217 }
3218 else if (value == 0)
3219 {
3220 /* This can happen even when optimizing, if there were subregs before
3221 reload. Don't output a nop here, as this is never optimized away;
3222 use a no-op move instead. */
3223 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
3224 return;
3225 }
3226
3227 max = shift_insns[value];
3228 for (i = 0; i < max; i++)
3229 gen_ashift (code, shift_amounts[value][i], operands[0]);
3230 }
3231
3232 /* Same as above, but optimized for values where the topmost bits don't
3233 matter. */
3234
3235 void
3236 gen_shifty_hi_op (int code, rtx *operands)
3237 {
3238 int value = INTVAL (operands[2]);
3239 int max, i;
3240 void (*gen_fun) (int, int, rtx);
3241
3242 /* This operation is used by and_shl for SImode values with a few
3243 high bits known to be cleared. */
3244 value &= 31;
3245 if (value == 0)
3246 {
3247 emit_insn (gen_nop ());
3248 return;
3249 }
3250
3251 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3252 if (code == ASHIFT)
3253 {
3254 max = ext_shift_insns[value];
3255 for (i = 0; i < max; i++)
3256 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3257 }
3258 else
3259 /* When shifting right, emit the shifts in reverse order, so that
3260 solitary negative values come first. */
3261 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
3262 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3263 }
3264
3265 /* Output RTL for an arithmetic right shift. */
3266
3267 /* ??? Rewrite to use super-optimizer sequences. */
3268
3269 int
3270 expand_ashiftrt (rtx *operands)
3271 {
3272 rtx wrk;
3273 char func[18];
3274 int value;
3275
3276 if (TARGET_SH3 || TARGET_SH2A)
3277 {
3278 if (!CONST_INT_P (operands[2]))
3279 {
3280 rtx count = copy_to_mode_reg (SImode, operands[2]);
3281 emit_insn (gen_negsi2 (count, count));
3282 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3283 return 1;
3284 }
3285 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3286 > 1 + SH_DYNAMIC_SHIFT_COST)
3287 {
3288 rtx count
3289 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3290 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3291 return 1;
3292 }
3293 }
3294 if (!CONST_INT_P (operands[2]))
3295 return 0;
3296
3297 value = INTVAL (operands[2]) & 31;
3298
3299 if (value == 31)
3300 {
3301 /* If we are called from abs expansion, arrange things so that we
3302 we can use a single MT instruction that doesn't clobber the source,
3303 if LICM can hoist out the load of the constant zero. */
3304 if (currently_expanding_to_rtl)
3305 {
3306 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3307 operands[1]));
3308 emit_insn (gen_mov_neg_si_t (operands[0]));
3309 return 1;
3310 }
3311 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3312 return 1;
3313 }
3314 else if (value >= 16 && value <= 19)
3315 {
3316 wrk = gen_reg_rtx (SImode);
3317 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3318 value -= 16;
3319 while (value--)
3320 gen_ashift (ASHIFTRT, 1, wrk);
3321 emit_move_insn (operands[0], wrk);
3322 return 1;
3323 }
3324 /* Expand a short sequence inline, longer call a magic routine. */
3325 else if (value <= 5)
3326 {
3327 wrk = gen_reg_rtx (SImode);
3328 emit_move_insn (wrk, operands[1]);
3329 while (value--)
3330 gen_ashift (ASHIFTRT, 1, wrk);
3331 emit_move_insn (operands[0], wrk);
3332 return 1;
3333 }
3334
3335 wrk = gen_reg_rtx (Pmode);
3336
3337 /* Load the value into an arg reg and call a helper. */
3338 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3339 sprintf (func, "__ashiftrt_r4_%d", value);
3340 function_symbol (wrk, func, SFUNC_STATIC);
3341 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
3342 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3343 return 1;
3344 }
3345
3346 int
3347 sh_dynamicalize_shift_p (rtx count)
3348 {
3349 return shift_insns[INTVAL (count) & 31] > 1 + SH_DYNAMIC_SHIFT_COST;
3350 }
3351
3352 /* Try to find a good way to implement the combiner pattern
3353 [(set (match_operand:SI 0 "register_operand" "r")
3354 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3355 (match_operand:SI 2 "const_int_operand" "n"))
3356 (match_operand:SI 3 "const_int_operand" "n"))) .
3357 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3358 return 0 for simple right / left or left/right shift combination.
3359 return 1 for a combination of shifts with zero_extend.
3360 return 2 for a combination of shifts with an AND that needs r0.
3361 return 3 for a combination of shifts with an AND that needs an extra
3362 scratch register, when the three highmost bits of the AND mask are clear.
3363 return 4 for a combination of shifts with an AND that needs an extra
3364 scratch register, when any of the three highmost bits of the AND mask
3365 is set.
3366 If ATTRP is set, store an initial right shift width in ATTRP[0],
3367 and the instruction length in ATTRP[1] . These values are not valid
3368 when returning 0.
3369 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3370 shift_amounts for the last shift value that is to be used before the
3371 sign extend. */
3372 int
3373 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3374 {
3375 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3376 int left = INTVAL (left_rtx), right;
3377 int best = 0;
3378 int cost, best_cost = 10000;
3379 int best_right = 0, best_len = 0;
3380 int i;
3381 int can_ext;
3382
3383 if (left < 0 || left > 31)
3384 return 0;
3385 if (CONST_INT_P (mask_rtx))
3386 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3387 else
3388 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3389 /* Can this be expressed as a right shift / left shift pair? */
3390 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3391 right = exact_log2 (lsb);
3392 mask2 = ~(mask + lsb - 1);
3393 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3394 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3395 if (! mask2)
3396 best_cost = shift_insns[right] + shift_insns[right + left];
3397 /* mask has no trailing zeroes <==> ! right */
3398 else if (! right && mask2 == ~(lsb2 - 1))
3399 {
3400 int late_right = exact_log2 (lsb2);
3401 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
3402 }
3403 /* Try to use zero extend. */
3404 if (mask2 == ~(lsb2 - 1))
3405 {
3406 int width, first;
3407
3408 for (width = 8; width <= 16; width += 8)
3409 {
3410 /* Can we zero-extend right away? */
3411 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3412 {
3413 cost
3414 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
3415 if (cost < best_cost)
3416 {
3417 best = 1;
3418 best_cost = cost;
3419 best_right = right;
3420 best_len = cost;
3421 if (attrp)
3422 attrp[2] = -1;
3423 }
3424 continue;
3425 }
3426 /* ??? Could try to put zero extend into initial right shift,
3427 or even shift a bit left before the right shift. */
3428 /* Determine value of first part of left shift, to get to the
3429 zero extend cut-off point. */
3430 first = width - exact_log2 (lsb2) + right;
3431 if (first >= 0 && right + left - first >= 0)
3432 {
3433 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
3434 + ext_shift_insns[right + left - first];
3435 if (cost < best_cost)
3436 {
3437 best = 1;
3438 best_cost = cost;
3439 best_right = right;
3440 best_len = cost;
3441 if (attrp)
3442 attrp[2] = first;
3443 }
3444 }
3445 }
3446 }
3447 /* Try to use r0 AND pattern */
3448 for (i = 0; i <= 2; i++)
3449 {
3450 if (i > right)
3451 break;
3452 if (! CONST_OK_FOR_K08 (mask >> i))
3453 continue;
3454 cost = (i != 0) + 2 + ext_shift_insns[left + i];
3455 if (cost < best_cost)
3456 {
3457 best = 2;
3458 best_cost = cost;
3459 best_right = i;
3460 best_len = cost - 1;
3461 }
3462 }
3463 /* Try to use a scratch register to hold the AND operand. */
3464 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
3465 for (i = 0; i <= 2; i++)
3466 {
3467 if (i > right)
3468 break;
3469 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
3470 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
3471 if (cost < best_cost)
3472 {
3473 best = 4 - can_ext;
3474 best_cost = cost;
3475 best_right = i;
3476 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
3477 }
3478 }
3479
3480 if (attrp)
3481 {
3482 attrp[0] = best_right;
3483 attrp[1] = best_len;
3484 }
3485 return best;
3486 }
3487
3488 /* This is used in length attributes of the unnamed instructions
3489 corresponding to shl_and_kind return values of 1 and 2. */
3490 int
3491 shl_and_length (rtx insn)
3492 {
3493 rtx set_src, left_rtx, mask_rtx;
3494 int attributes[3];
3495
3496 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3497 left_rtx = XEXP (XEXP (set_src, 0), 1);
3498 mask_rtx = XEXP (set_src, 1);
3499 shl_and_kind (left_rtx, mask_rtx, attributes);
3500 return attributes[1];
3501 }
3502
3503 /* This is used in length attribute of the and_shl_scratch instruction. */
3504
3505 int
3506 shl_and_scr_length (rtx insn)
3507 {
3508 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3509 int len = shift_insns[INTVAL (XEXP (set_src, 1)) & 31];
3510 rtx op = XEXP (set_src, 0);
3511 len += shift_insns[INTVAL (XEXP (op, 1)) & 31] + 1;
3512 op = XEXP (XEXP (op, 0), 0);
3513 return len + shift_insns[INTVAL (XEXP (op, 1)) & 31];
3514 }
3515
3516 /* Generate rtl for instructions for which shl_and_kind advised a particular
3517 method of generating them, i.e. returned zero. */
3518
3519 int
3520 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
3521 {
3522 int attributes[3];
3523 unsigned HOST_WIDE_INT mask;
3524 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
3525 int right, total_shift;
3526 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
3527
3528 right = attributes[0];
3529 total_shift = INTVAL (left_rtx) + right;
3530 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
3531 switch (kind)
3532 {
3533 default:
3534 return -1;
3535 case 1:
3536 {
3537 int first = attributes[2];
3538 rtx operands[3];
3539
3540 if (first < 0)
3541 {
3542 emit_insn ((mask << right) <= 0xff
3543 ? gen_zero_extendqisi2 (dest,
3544 gen_lowpart (QImode, source))
3545 : gen_zero_extendhisi2 (dest,
3546 gen_lowpart (HImode, source)));
3547 source = dest;
3548 }
3549 if (source != dest)
3550 emit_insn (gen_movsi (dest, source));
3551 operands[0] = dest;
3552 if (right)
3553 {
3554 operands[2] = GEN_INT (right);
3555 gen_shifty_hi_op (LSHIFTRT, operands);
3556 }
3557 if (first > 0)
3558 {
3559 operands[2] = GEN_INT (first);
3560 gen_shifty_hi_op (ASHIFT, operands);
3561 total_shift -= first;
3562 mask <<= first;
3563 }
3564 if (first >= 0)
3565 emit_insn (mask <= 0xff
3566 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
3567 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3568 if (total_shift > 0)
3569 {
3570 operands[2] = GEN_INT (total_shift);
3571 gen_shifty_hi_op (ASHIFT, operands);
3572 }
3573 break;
3574 }
3575 case 4:
3576 shift_gen_fun = gen_shifty_op;
3577 case 3:
3578 /* If the topmost bit that matters is set, set the topmost bits
3579 that don't matter. This way, we might be able to get a shorter
3580 signed constant. */
3581 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
3582 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
3583 case 2:
3584 /* Don't expand fine-grained when combining, because that will
3585 make the pattern fail. */
3586 if (currently_expanding_to_rtl
3587 || reload_in_progress || reload_completed)
3588 {
3589 rtx operands[3];
3590
3591 /* Cases 3 and 4 should be handled by this split
3592 only while combining */
3593 gcc_assert (kind <= 2);
3594 if (right)
3595 {
3596 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
3597 source = dest;
3598 }
3599 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3600 if (total_shift)
3601 {
3602 operands[0] = dest;
3603 operands[1] = dest;
3604 operands[2] = GEN_INT (total_shift);
3605 shift_gen_fun (ASHIFT, operands);
3606 }
3607 break;
3608 }
3609 else
3610 {
3611 int neg = 0;
3612 if (kind != 4 && total_shift < 16)
3613 {
3614 neg = -ext_shift_amounts[total_shift][1];
3615 if (neg > 0)
3616 neg -= ext_shift_amounts[total_shift][2];
3617 else
3618 neg = 0;
3619 }
3620 emit_insn (gen_and_shl_scratch (dest, source,
3621 GEN_INT (right),
3622 GEN_INT (mask),
3623 GEN_INT (total_shift + neg),
3624 GEN_INT (neg)));
3625 emit_insn (gen_movsi (dest, dest));
3626 break;
3627 }
3628 }
3629 return 0;
3630 }
3631
3632 /* Try to find a good way to implement the combiner pattern
3633 [(set (match_operand:SI 0 "register_operand" "=r")
3634 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3635 (match_operand:SI 2 "const_int_operand" "n")
3636 (match_operand:SI 3 "const_int_operand" "n")
3637 (const_int 0)))
3638 (clobber (reg:SI T_REG))]
3639 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3640 return 0 for simple left / right shift combination.
3641 return 1 for left shift / 8 bit sign extend / left shift.
3642 return 2 for left shift / 16 bit sign extend / left shift.
3643 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3644 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3645 return 5 for left shift / 16 bit sign extend / right shift
3646 return 6 for < 8 bit sign extend / left shift.
3647 return 7 for < 8 bit sign extend / left shift / single right shift.
3648 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3649
3650 int
3651 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3652 {
3653 int left, size, insize, ext;
3654 int cost = 0, best_cost;
3655 int kind;
3656
3657 left = INTVAL (left_rtx);
3658 size = INTVAL (size_rtx);
3659 insize = size - left;
3660 gcc_assert (insize > 0);
3661 /* Default to left / right shift. */
3662 kind = 0;
3663 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3664 if (size <= 16)
3665 {
3666 /* 16 bit shift / sign extend / 16 bit shift */
3667 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3668 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3669 below, by alternative 3 or something even better. */
3670 if (cost < best_cost)
3671 {
3672 kind = 5;
3673 best_cost = cost;
3674 }
3675 }
3676 /* Try a plain sign extend between two shifts. */
3677 for (ext = 16; ext >= insize; ext -= 8)
3678 {
3679 if (ext <= size)
3680 {
3681 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3682 if (cost < best_cost)
3683 {
3684 kind = ext / (unsigned) 8;
3685 best_cost = cost;
3686 }
3687 }
3688 /* Check if we can do a sloppy shift with a final signed shift
3689 restoring the sign. */
3690 if (EXT_SHIFT_SIGNED (size - ext))
3691 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3692 /* If not, maybe it's still cheaper to do the second shift sloppy,
3693 and do a final sign extend? */
3694 else if (size <= 16)
3695 cost = ext_shift_insns[ext - insize] + 1
3696 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3697 else
3698 continue;
3699 if (cost < best_cost)
3700 {
3701 kind = ext / (unsigned) 8 + 2;
3702 best_cost = cost;
3703 }
3704 }
3705 /* Check if we can sign extend in r0 */
3706 if (insize < 8)
3707 {
3708 cost = 3 + shift_insns[left];
3709 if (cost < best_cost)
3710 {
3711 kind = 6;
3712 best_cost = cost;
3713 }
3714 /* Try the same with a final signed shift. */
3715 if (left < 31)
3716 {
3717 cost = 3 + ext_shift_insns[left + 1] + 1;
3718 if (cost < best_cost)
3719 {
3720 kind = 7;
3721 best_cost = cost;
3722 }
3723 }
3724 }
3725 if (TARGET_SH3 || TARGET_SH2A)
3726 {
3727 /* Try to use a dynamic shift. */
3728 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3729 if (cost < best_cost)
3730 {
3731 kind = 0;
3732 best_cost = cost;
3733 }
3734 }
3735 if (costp)
3736 *costp = cost;
3737 return kind;
3738 }
3739
3740 /* Function to be used in the length attribute of the instructions
3741 implementing this pattern. */
3742
3743 int
3744 shl_sext_length (rtx insn)
3745 {
3746 rtx set_src, left_rtx, size_rtx;
3747 int cost;
3748
3749 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3750 left_rtx = XEXP (XEXP (set_src, 0), 1);
3751 size_rtx = XEXP (set_src, 1);
3752 shl_sext_kind (left_rtx, size_rtx, &cost);
3753 return cost;
3754 }
3755
3756 /* Generate rtl for this pattern */
3757
3758 int
3759 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3760 {
3761 int kind;
3762 int left, size, insize, cost;
3763 rtx operands[3];
3764
3765 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3766 left = INTVAL (left_rtx);
3767 size = INTVAL (size_rtx);
3768 insize = size - left;
3769 switch (kind)
3770 {
3771 case 1:
3772 case 2:
3773 case 3:
3774 case 4:
3775 {
3776 int ext = kind & 1 ? 8 : 16;
3777 int shift2 = size - ext;
3778
3779 /* Don't expand fine-grained when combining, because that will
3780 make the pattern fail. */
3781 if (! currently_expanding_to_rtl
3782 && ! reload_in_progress && ! reload_completed)
3783 {
3784 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3785 emit_insn (gen_movsi (dest, source));
3786 break;
3787 }
3788 if (dest != source)
3789 emit_insn (gen_movsi (dest, source));
3790 operands[0] = dest;
3791 if (ext - insize)
3792 {
3793 operands[2] = GEN_INT (ext - insize);
3794 gen_shifty_hi_op (ASHIFT, operands);
3795 }
3796 emit_insn (kind & 1
3797 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3798 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3799 if (kind <= 2)
3800 {
3801 if (shift2)
3802 {
3803 operands[2] = GEN_INT (shift2);
3804 gen_shifty_op (ASHIFT, operands);
3805 }
3806 }
3807 else
3808 {
3809 if (shift2 > 0)
3810 {
3811 if (EXT_SHIFT_SIGNED (shift2))
3812 {
3813 operands[2] = GEN_INT (shift2 + 1);
3814 gen_shifty_op (ASHIFT, operands);
3815 operands[2] = const1_rtx;
3816 gen_shifty_op (ASHIFTRT, operands);
3817 break;
3818 }
3819 operands[2] = GEN_INT (shift2);
3820 gen_shifty_hi_op (ASHIFT, operands);
3821 }
3822 else if (shift2)
3823 {
3824 operands[2] = GEN_INT (-shift2);
3825 gen_shifty_hi_op (LSHIFTRT, operands);
3826 }
3827 emit_insn (size <= 8
3828 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3829 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3830 }
3831 break;
3832 }
3833 case 5:
3834 {
3835 int i = 16 - size;
3836 if (! currently_expanding_to_rtl
3837 && ! reload_in_progress && ! reload_completed)
3838 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3839 else
3840 {
3841 operands[0] = dest;
3842 operands[2] = GEN_INT (16 - insize);
3843 gen_shifty_hi_op (ASHIFT, operands);
3844 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3845 }
3846 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3847 while (--i >= 0)
3848 gen_ashift (ASHIFTRT, 1, dest);
3849 break;
3850 }
3851 case 6:
3852 case 7:
3853 /* Don't expand fine-grained when combining, because that will
3854 make the pattern fail. */
3855 if (! currently_expanding_to_rtl
3856 && ! reload_in_progress && ! reload_completed)
3857 {
3858 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3859 emit_insn (gen_movsi (dest, source));
3860 break;
3861 }
3862 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
3863 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
3864 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
3865 operands[0] = dest;
3866 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
3867 gen_shifty_op (ASHIFT, operands);
3868 if (kind == 7)
3869 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
3870 break;
3871 default:
3872 return -1;
3873 }
3874 return 0;
3875 }
3876
3877 /* Prefix a symbol_ref name with "datalabel". */
3878
3879 rtx
3880 gen_datalabel_ref (rtx sym)
3881 {
3882 const char *str;
3883
3884 if (GET_CODE (sym) == LABEL_REF)
3885 return gen_rtx_CONST (GET_MODE (sym),
3886 gen_rtx_UNSPEC (GET_MODE (sym),
3887 gen_rtvec (1, sym),
3888 UNSPEC_DATALABEL));
3889
3890 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
3891
3892 str = XSTR (sym, 0);
3893 /* Share all SYMBOL_REF strings with the same value - that is important
3894 for cse. */
3895 str = IDENTIFIER_POINTER (get_identifier (str));
3896 XSTR (sym, 0) = str;
3897
3898 return sym;
3899 }
3900
3901 \f
3902 static alloc_pool label_ref_list_pool;
3903
3904 typedef struct label_ref_list_d
3905 {
3906 rtx label;
3907 struct label_ref_list_d *next;
3908 } *label_ref_list_t;
3909
3910 /* The SH cannot load a large constant into a register, constants have to
3911 come from a pc relative load. The reference of a pc relative load
3912 instruction must be less than 1k in front of the instruction. This
3913 means that we often have to dump a constant inside a function, and
3914 generate code to branch around it.
3915
3916 It is important to minimize this, since the branches will slow things
3917 down and make things bigger.
3918
3919 Worst case code looks like:
3920
3921 mov.l L1,rn
3922 bra L2
3923 nop
3924 align
3925 L1: .long value
3926 L2:
3927 ..
3928
3929 mov.l L3,rn
3930 bra L4
3931 nop
3932 align
3933 L3: .long value
3934 L4:
3935 ..
3936
3937 We fix this by performing a scan before scheduling, which notices which
3938 instructions need to have their operands fetched from the constant table
3939 and builds the table.
3940
3941 The algorithm is:
3942
3943 scan, find an instruction which needs a pcrel move. Look forward, find the
3944 last barrier which is within MAX_COUNT bytes of the requirement.
3945 If there isn't one, make one. Process all the instructions between
3946 the find and the barrier.
3947
3948 In the above example, we can tell that L3 is within 1k of L1, so
3949 the first move can be shrunk from the 3 insn+constant sequence into
3950 just 1 insn, and the constant moved to L3 to make:
3951
3952 mov.l L1,rn
3953 ..
3954 mov.l L3,rn
3955 bra L4
3956 nop
3957 align
3958 L3:.long value
3959 L4:.long value
3960
3961 Then the second move becomes the target for the shortening process. */
3962
3963 typedef struct
3964 {
3965 rtx value; /* Value in table. */
3966 rtx label; /* Label of value. */
3967 label_ref_list_t wend; /* End of window. */
3968 enum machine_mode mode; /* Mode of value. */
3969
3970 /* True if this constant is accessed as part of a post-increment
3971 sequence. Note that HImode constants are never accessed in this way. */
3972 bool part_of_sequence_p;
3973 } pool_node;
3974
3975 /* The maximum number of constants that can fit into one pool, since
3976 constants in the range 0..510 are at least 2 bytes long, and in the
3977 range from there to 1018 at least 4 bytes. */
3978
3979 #define MAX_POOL_SIZE 372
3980 static pool_node pool_vector[MAX_POOL_SIZE];
3981 static int pool_size;
3982 static rtx pool_window_label;
3983 static int pool_window_last;
3984
3985 static int max_labelno_before_reorg;
3986
3987 /* ??? If we need a constant in HImode which is the truncated value of a
3988 constant we need in SImode, we could combine the two entries thus saving
3989 two bytes. Is this common enough to be worth the effort of implementing
3990 it? */
3991
3992 /* ??? This stuff should be done at the same time that we shorten branches.
3993 As it is now, we must assume that all branches are the maximum size, and
3994 this causes us to almost always output constant pools sooner than
3995 necessary. */
3996
3997 /* Add a constant to the pool and return its label. */
3998
3999 static rtx
4000 add_constant (rtx x, enum machine_mode mode, rtx last_value)
4001 {
4002 int i;
4003 rtx lab, new_rtx;
4004 label_ref_list_t ref, newref;
4005
4006 /* First see if we've already got it. */
4007 for (i = 0; i < pool_size; i++)
4008 {
4009 if (x->code == pool_vector[i].value->code
4010 && mode == pool_vector[i].mode)
4011 {
4012 if (x->code == CODE_LABEL)
4013 {
4014 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4015 continue;
4016 }
4017 if (rtx_equal_p (x, pool_vector[i].value))
4018 {
4019 lab = new_rtx = 0;
4020 if (! last_value
4021 || ! i
4022 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4023 {
4024 new_rtx = gen_label_rtx ();
4025 LABEL_REFS (new_rtx) = pool_vector[i].label;
4026 pool_vector[i].label = lab = new_rtx;
4027 }
4028 if (lab && pool_window_label)
4029 {
4030 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4031 newref->label = pool_window_label;
4032 ref = pool_vector[pool_window_last].wend;
4033 newref->next = ref;
4034 pool_vector[pool_window_last].wend = newref;
4035 }
4036 if (new_rtx)
4037 pool_window_label = new_rtx;
4038 pool_window_last = i;
4039 return lab;
4040 }
4041 }
4042 }
4043
4044 /* Need a new one. */
4045 pool_vector[pool_size].value = x;
4046 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4047 {
4048 lab = 0;
4049 pool_vector[pool_size - 1].part_of_sequence_p = true;
4050 }
4051 else
4052 lab = gen_label_rtx ();
4053 pool_vector[pool_size].mode = mode;
4054 pool_vector[pool_size].label = lab;
4055 pool_vector[pool_size].wend = NULL;
4056 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4057 if (lab && pool_window_label)
4058 {
4059 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4060 newref->label = pool_window_label;
4061 ref = pool_vector[pool_window_last].wend;
4062 newref->next = ref;
4063 pool_vector[pool_window_last].wend = newref;
4064 }
4065 if (lab)
4066 pool_window_label = lab;
4067 pool_window_last = pool_size;
4068 pool_size++;
4069 return lab;
4070 }
4071
4072 /* Output the literal table. START, if nonzero, is the first instruction
4073 this table is needed for, and also indicates that there is at least one
4074 casesi_worker_2 instruction; We have to emit the operand3 labels from
4075 these insns at a 4-byte aligned position. BARRIER is the barrier
4076 after which we are to place the table. */
4077
4078 static void
4079 dump_table (rtx start, rtx barrier)
4080 {
4081 rtx scan = barrier;
4082 int i;
4083 int need_align = 1;
4084 rtx lab;
4085 label_ref_list_t ref;
4086 int have_df = 0;
4087
4088 /* Do two passes, first time dump out the HI sized constants. */
4089
4090 for (i = 0; i < pool_size; i++)
4091 {
4092 pool_node *p = &pool_vector[i];
4093
4094 if (p->mode == HImode)
4095 {
4096 if (need_align)
4097 {
4098 scan = emit_insn_after (gen_align_2 (), scan);
4099 need_align = 0;
4100 }
4101 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4102 scan = emit_label_after (lab, scan);
4103 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4104 scan);
4105 for (ref = p->wend; ref; ref = ref->next)
4106 {
4107 lab = ref->label;
4108 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4109 }
4110 }
4111 else if (p->mode == DFmode)
4112 have_df = 1;
4113 }
4114
4115 need_align = 1;
4116
4117 if (start)
4118 {
4119 scan = emit_insn_after (gen_align_4 (), scan);
4120 need_align = 0;
4121 for (; start != barrier; start = NEXT_INSN (start))
4122 if (NONJUMP_INSN_P (start)
4123 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4124 {
4125 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4126 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4127
4128 scan = emit_label_after (lab, scan);
4129 }
4130 }
4131 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4132 {
4133 rtx align_insn = NULL_RTX;
4134
4135 scan = emit_label_after (gen_label_rtx (), scan);
4136 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4137 need_align = 0;
4138
4139 for (i = 0; i < pool_size; i++)
4140 {
4141 pool_node *p = &pool_vector[i];
4142
4143 switch (p->mode)
4144 {
4145 case HImode:
4146 break;
4147 case SImode:
4148 case SFmode:
4149 if (align_insn && !p->part_of_sequence_p)
4150 {
4151 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4152 emit_label_before (lab, align_insn);
4153 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4154 align_insn);
4155 for (ref = p->wend; ref; ref = ref->next)
4156 {
4157 lab = ref->label;
4158 emit_insn_before (gen_consttable_window_end (lab),
4159 align_insn);
4160 }
4161 delete_insn (align_insn);
4162 align_insn = NULL_RTX;
4163 continue;
4164 }
4165 else
4166 {
4167 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4168 scan = emit_label_after (lab, scan);
4169 scan = emit_insn_after (gen_consttable_4 (p->value,
4170 const0_rtx), scan);
4171 need_align = ! need_align;
4172 }
4173 break;
4174 case DFmode:
4175 if (need_align)
4176 {
4177 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4178 align_insn = scan;
4179 need_align = 0;
4180 }
4181 case DImode:
4182 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4183 scan = emit_label_after (lab, scan);
4184 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4185 scan);
4186 break;
4187 default:
4188 gcc_unreachable ();
4189 }
4190
4191 if (p->mode != HImode)
4192 {
4193 for (ref = p->wend; ref; ref = ref->next)
4194 {
4195 lab = ref->label;
4196 scan = emit_insn_after (gen_consttable_window_end (lab),
4197 scan);
4198 }
4199 }
4200 }
4201
4202 pool_size = 0;
4203 }
4204
4205 for (i = 0; i < pool_size; i++)
4206 {
4207 pool_node *p = &pool_vector[i];
4208
4209 switch (p->mode)
4210 {
4211 case HImode:
4212 break;
4213 case SImode:
4214 case SFmode:
4215 if (need_align)
4216 {
4217 need_align = 0;
4218 scan = emit_label_after (gen_label_rtx (), scan);
4219 scan = emit_insn_after (gen_align_4 (), scan);
4220 }
4221 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4222 scan = emit_label_after (lab, scan);
4223 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4224 scan);
4225 break;
4226 case DFmode:
4227 case DImode:
4228 if (need_align)
4229 {
4230 need_align = 0;
4231 scan = emit_label_after (gen_label_rtx (), scan);
4232 scan = emit_insn_after (gen_align_4 (), scan);
4233 }
4234 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4235 scan = emit_label_after (lab, scan);
4236 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4237 scan);
4238 break;
4239 default:
4240 gcc_unreachable ();
4241 }
4242
4243 if (p->mode != HImode)
4244 {
4245 for (ref = p->wend; ref; ref = ref->next)
4246 {
4247 lab = ref->label;
4248 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4249 }
4250 }
4251 }
4252
4253 scan = emit_insn_after (gen_consttable_end (), scan);
4254 scan = emit_barrier_after (scan);
4255 pool_size = 0;
4256 pool_window_label = NULL_RTX;
4257 pool_window_last = 0;
4258 }
4259
4260 /* Return nonzero if constant would be an ok source for a
4261 mov.w instead of a mov.l. */
4262
4263 static int
4264 hi_const (rtx src)
4265 {
4266 return (CONST_INT_P (src)
4267 && INTVAL (src) >= -32768
4268 && INTVAL (src) <= 32767);
4269 }
4270
4271 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4272
4273 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4274
4275 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4276 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4277 need to fix it if the input value is CONST_OK_FOR_I08. */
4278
4279 static int
4280 broken_move (rtx insn)
4281 {
4282 if (NONJUMP_INSN_P (insn))
4283 {
4284 rtx pat = PATTERN (insn);
4285 if (GET_CODE (pat) == PARALLEL)
4286 pat = XVECEXP (pat, 0, 0);
4287 if (GET_CODE (pat) == SET
4288 /* We can load any 8-bit value if we don't care what the high
4289 order bits end up as. */
4290 && GET_MODE (SET_DEST (pat)) != QImode
4291 && (CONSTANT_P (SET_SRC (pat))
4292 /* Match mova_const. */
4293 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4294 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4295 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4296 && ! (TARGET_SH2E
4297 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4298 && (fp_zero_operand (SET_SRC (pat))
4299 || fp_one_operand (SET_SRC (pat)))
4300 /* In general we don't know the current setting of fpscr, so disable fldi.
4301 There is an exception if this was a register-register move
4302 before reload - and hence it was ascertained that we have
4303 single precision setting - and in a post-reload optimization
4304 we changed this to do a constant load. In that case
4305 we don't have an r0 clobber, hence we must use fldi. */
4306 && (TARGET_FMOVD
4307 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4308 == SCRATCH))
4309 && REG_P (SET_DEST (pat))
4310 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4311 && ! (TARGET_SH2A
4312 && GET_MODE (SET_DEST (pat)) == SImode
4313 && (satisfies_constraint_I20 (SET_SRC (pat))
4314 || satisfies_constraint_I28 (SET_SRC (pat))))
4315 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4316 return 1;
4317 }
4318
4319 return 0;
4320 }
4321
4322 static int
4323 mova_p (rtx insn)
4324 {
4325 return (NONJUMP_INSN_P (insn)
4326 && GET_CODE (PATTERN (insn)) == SET
4327 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4328 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4329 /* Don't match mova_const. */
4330 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4331 }
4332
4333 /* Fix up a mova from a switch that went out of range. */
4334 static void
4335 fixup_mova (rtx mova)
4336 {
4337 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4338 if (! flag_pic)
4339 {
4340 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4341 INSN_CODE (mova) = -1;
4342 }
4343 else
4344 {
4345 rtx worker = mova;
4346 rtx lab = gen_label_rtx ();
4347 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4348
4349 do
4350 {
4351 worker = NEXT_INSN (worker);
4352 gcc_assert (worker
4353 && !LABEL_P (worker)
4354 && !JUMP_P (worker));
4355 } while (NOTE_P (worker)
4356 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4357 wpat = PATTERN (worker);
4358 wpat0 = XVECEXP (wpat, 0, 0);
4359 wpat1 = XVECEXP (wpat, 0, 1);
4360 wsrc = SET_SRC (wpat0);
4361 PATTERN (worker) = (gen_casesi_worker_2
4362 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4363 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4364 XEXP (wpat1, 0)));
4365 INSN_CODE (worker) = -1;
4366 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4367 base = gen_rtx_LABEL_REF (Pmode, lab);
4368 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4369 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4370 INSN_CODE (mova) = -1;
4371 }
4372 }
4373
4374 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4375 *num_mova, and check if the new mova is not nested within the first one.
4376 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4377 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4378 static int
4379 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
4380 {
4381 int n_addr = 0; /* Initialization to shut up spurious warning. */
4382 int f_target, n_target = 0; /* Likewise. */
4383
4384 if (optimize)
4385 {
4386 /* If NEW_MOVA has no address yet, it will be handled later. */
4387 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4388 return -1;
4389
4390 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4391 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4392 if (n_addr > n_target || n_addr + 1022 < n_target)
4393 {
4394 /* Change the mova into a load.
4395 broken_move will then return true for it. */
4396 fixup_mova (new_mova);
4397 return 1;
4398 }
4399 }
4400 if (!(*num_mova)++)
4401 {
4402 *first_mova = new_mova;
4403 return 2;
4404 }
4405 if (!optimize
4406 || ((f_target
4407 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4408 >= n_target))
4409 return -1;
4410
4411 (*num_mova)--;
4412 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4413 > n_target - n_addr)
4414 {
4415 fixup_mova (*first_mova);
4416 return 0;
4417 }
4418 else
4419 {
4420 fixup_mova (new_mova);
4421 return 1;
4422 }
4423 }
4424
4425 /* Find the last barrier from insn FROM which is close enough to hold the
4426 constant pool. If we can't find one, then create one near the end of
4427 the range. */
4428
4429 static rtx
4430 find_barrier (int num_mova, rtx mova, rtx from)
4431 {
4432 int count_si = 0;
4433 int count_hi = 0;
4434 int found_hi = 0;
4435 int found_si = 0;
4436 int found_di = 0;
4437 int hi_align = 2;
4438 int si_align = 2;
4439 int leading_mova = num_mova;
4440 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
4441 int si_limit;
4442 int hi_limit;
4443 rtx orig = from;
4444 rtx last_got = NULL_RTX;
4445 rtx last_symoff = NULL_RTX;
4446
4447 /* For HImode: range is 510, add 4 because pc counts from address of
4448 second instruction after this one, subtract 2 for the jump instruction
4449 that we may need to emit before the table, subtract 2 for the instruction
4450 that fills the jump delay slot (in very rare cases, reorg will take an
4451 instruction from after the constant pool or will leave the delay slot
4452 empty). This gives 510.
4453 For SImode: range is 1020, add 4 because pc counts from address of
4454 second instruction after this one, subtract 2 in case pc is 2 byte
4455 aligned, subtract 2 for the jump instruction that we may need to emit
4456 before the table, subtract 2 for the instruction that fills the jump
4457 delay slot. This gives 1018. */
4458
4459 /* The branch will always be shortened now that the reference address for
4460 forward branches is the successor address, thus we need no longer make
4461 adjustments to the [sh]i_limit for -O0. */
4462
4463 si_limit = 1018;
4464 hi_limit = 510;
4465
4466 while (from && count_si < si_limit && count_hi < hi_limit)
4467 {
4468 int inc = get_attr_length (from);
4469 int new_align = 1;
4470
4471 /* If this is a label that existed at the time of the compute_alignments
4472 call, determine the alignment. N.B. When find_barrier recurses for
4473 an out-of-reach mova, we might see labels at the start of previously
4474 inserted constant tables. */
4475 if (LABEL_P (from)
4476 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
4477 {
4478 if (optimize)
4479 new_align = 1 << label_to_alignment (from);
4480 else if (BARRIER_P (prev_nonnote_insn (from)))
4481 new_align = 1 << barrier_align (from);
4482 else
4483 new_align = 1;
4484 inc = 0;
4485 }
4486 /* In case we are scanning a constant table because of recursion, check
4487 for explicit alignments. If the table is long, we might be forced
4488 to emit the new table in front of it; the length of the alignment
4489 might be the last straw. */
4490 else if (NONJUMP_INSN_P (from)
4491 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4492 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
4493 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
4494 /* When we find the end of a constant table, paste the new constant
4495 at the end. That is better than putting it in front because
4496 this way, we don't need extra alignment for adding a 4-byte-aligned
4497 mov(a) label to a 2/4 or 8/4 byte aligned table. */
4498 else if (NONJUMP_INSN_P (from)
4499 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4500 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
4501 return from;
4502
4503 if (BARRIER_P (from))
4504 {
4505 rtx next;
4506
4507 found_barrier = from;
4508
4509 /* If we are at the end of the function, or in front of an alignment
4510 instruction, we need not insert an extra alignment. We prefer
4511 this kind of barrier. */
4512 if (barrier_align (from) > 2)
4513 good_barrier = from;
4514
4515 /* If we are at the end of a hot/cold block, dump the constants
4516 here. */
4517 next = NEXT_INSN (from);
4518 if (next
4519 && NOTE_P (next)
4520 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
4521 break;
4522 }
4523
4524 if (broken_move (from))
4525 {
4526 rtx pat, src, dst;
4527 enum machine_mode mode;
4528
4529 pat = PATTERN (from);
4530 if (GET_CODE (pat) == PARALLEL)
4531 pat = XVECEXP (pat, 0, 0);
4532 src = SET_SRC (pat);
4533 dst = SET_DEST (pat);
4534 mode = GET_MODE (dst);
4535
4536 /* GOT pcrelat setting comes in pair of
4537 mova .L8,r0
4538 mov.l .L8,r12
4539 instructions. (plus add r0,r12).
4540 Remember if we see one without the other. */
4541 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
4542 last_got = last_got ? NULL_RTX : from;
4543 else if (PIC_ADDR_P (src))
4544 last_got = last_got ? NULL_RTX : from;
4545
4546 /* We must explicitly check the mode, because sometimes the
4547 front end will generate code to load unsigned constants into
4548 HImode targets without properly sign extending them. */
4549 if (mode == HImode
4550 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
4551 {
4552 found_hi += 2;
4553 /* We put the short constants before the long constants, so
4554 we must count the length of short constants in the range
4555 for the long constants. */
4556 /* ??? This isn't optimal, but is easy to do. */
4557 si_limit -= 2;
4558 }
4559 else
4560 {
4561 /* We dump DF/DI constants before SF/SI ones, because
4562 the limit is the same, but the alignment requirements
4563 are higher. We may waste up to 4 additional bytes
4564 for alignment, and the DF/DI constant may have
4565 another SF/SI constant placed before it. */
4566 if (TARGET_SHCOMPACT
4567 && ! found_di
4568 && (mode == DFmode || mode == DImode))
4569 {
4570 found_di = 1;
4571 si_limit -= 8;
4572 }
4573 while (si_align > 2 && found_si + si_align - 2 > count_si)
4574 si_align >>= 1;
4575 if (found_si > count_si)
4576 count_si = found_si;
4577 found_si += GET_MODE_SIZE (mode);
4578 if (num_mova)
4579 si_limit -= GET_MODE_SIZE (mode);
4580 }
4581 }
4582
4583 if (mova_p (from))
4584 {
4585 switch (untangle_mova (&num_mova, &mova, from))
4586 {
4587 case 1:
4588 if (flag_pic)
4589 {
4590 rtx src = SET_SRC (PATTERN (from));
4591 if (GET_CODE (src) == CONST
4592 && GET_CODE (XEXP (src, 0)) == UNSPEC
4593 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
4594 last_symoff = from;
4595 }
4596 break;
4597 case 0: return find_barrier (0, 0, mova);
4598 case 2:
4599 {
4600 leading_mova = 0;
4601 barrier_before_mova
4602 = good_barrier ? good_barrier : found_barrier;
4603 }
4604 default: break;
4605 }
4606 if (found_si > count_si)
4607 count_si = found_si;
4608 }
4609 else if (JUMP_TABLE_DATA_P (from))
4610 {
4611 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
4612 || (num_mova
4613 && (prev_nonnote_insn (from)
4614 == XEXP (MOVA_LABELREF (mova), 0))))
4615 num_mova--;
4616 if (barrier_align (next_real_insn (from)) == align_jumps_log)
4617 {
4618 /* We have just passed the barrier in front of the
4619 ADDR_DIFF_VEC, which is stored in found_barrier. Since
4620 the ADDR_DIFF_VEC is accessed as data, just like our pool
4621 constants, this is a good opportunity to accommodate what
4622 we have gathered so far.
4623 If we waited any longer, we could end up at a barrier in
4624 front of code, which gives worse cache usage for separated
4625 instruction / data caches. */
4626 good_barrier = found_barrier;
4627 break;
4628 }
4629 else
4630 {
4631 rtx body = PATTERN (from);
4632 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
4633 }
4634 }
4635 /* For the SH1, we generate alignments even after jumps-around-jumps. */
4636 else if (JUMP_P (from)
4637 && ! TARGET_SH2
4638 && ! optimize_size)
4639 new_align = 4;
4640
4641 /* There is a possibility that a bf is transformed into a bf/s by the
4642 delay slot scheduler. */
4643 if (JUMP_P (from) && !JUMP_TABLE_DATA_P (from)
4644 && get_attr_type (from) == TYPE_CBRANCH
4645 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (from)))) != SEQUENCE)
4646 inc += 2;
4647
4648 if (found_si)
4649 {
4650 count_si += inc;
4651 if (new_align > si_align)
4652 {
4653 si_limit -= (count_si - 1) & (new_align - si_align);
4654 si_align = new_align;
4655 }
4656 count_si = (count_si + new_align - 1) & -new_align;
4657 }
4658 if (found_hi)
4659 {
4660 count_hi += inc;
4661 if (new_align > hi_align)
4662 {
4663 hi_limit -= (count_hi - 1) & (new_align - hi_align);
4664 hi_align = new_align;
4665 }
4666 count_hi = (count_hi + new_align - 1) & -new_align;
4667 }
4668 from = NEXT_INSN (from);
4669 }
4670
4671 if (num_mova)
4672 {
4673 if (leading_mova)
4674 {
4675 /* Try as we might, the leading mova is out of range. Change
4676 it into a load (which will become a pcload) and retry. */
4677 fixup_mova (mova);
4678 return find_barrier (0, 0, mova);
4679 }
4680 else
4681 {
4682 /* Insert the constant pool table before the mova instruction,
4683 to prevent the mova label reference from going out of range. */
4684 from = mova;
4685 good_barrier = found_barrier = barrier_before_mova;
4686 }
4687 }
4688
4689 if (found_barrier)
4690 {
4691 if (good_barrier && next_real_insn (found_barrier))
4692 found_barrier = good_barrier;
4693 }
4694 else
4695 {
4696 /* We didn't find a barrier in time to dump our stuff,
4697 so we'll make one. */
4698 rtx label = gen_label_rtx ();
4699
4700 /* Don't emit a constant table in the middle of insns for
4701 casesi_worker_2. This is a bit overkill but is enough
4702 because casesi_worker_2 wouldn't appear so frequently. */
4703 if (last_symoff)
4704 from = last_symoff;
4705
4706 /* If we exceeded the range, then we must back up over the last
4707 instruction we looked at. Otherwise, we just need to undo the
4708 NEXT_INSN at the end of the loop. */
4709 if (PREV_INSN (from) != orig
4710 && (count_hi > hi_limit || count_si > si_limit))
4711 from = PREV_INSN (PREV_INSN (from));
4712 else
4713 from = PREV_INSN (from);
4714
4715 /* Don't emit a constant table int the middle of global pointer setting,
4716 since that that would move the addressing base GOT into another table.
4717 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
4718 in the pool anyway, so just move up the whole constant pool. */
4719 if (last_got)
4720 from = PREV_INSN (last_got);
4721
4722 /* Don't insert the constant pool table at the position which
4723 may be the landing pad. */
4724 if (flag_exceptions
4725 && CALL_P (from)
4726 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
4727 from = PREV_INSN (from);
4728
4729 /* Walk back to be just before any jump or label.
4730 Putting it before a label reduces the number of times the branch
4731 around the constant pool table will be hit. Putting it before
4732 a jump makes it more likely that the bra delay slot will be
4733 filled. */
4734 while (NOTE_P (from) || JUMP_P (from)
4735 || LABEL_P (from))
4736 from = PREV_INSN (from);
4737
4738 /* Make sure we do not split between a call and its corresponding
4739 CALL_ARG_LOCATION note. */
4740 if (CALL_P (from))
4741 {
4742 rtx next = NEXT_INSN (from);
4743 if (next && NOTE_P (next)
4744 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
4745 from = next;
4746 }
4747
4748 from = emit_jump_insn_after (gen_jump (label), from);
4749 JUMP_LABEL (from) = label;
4750 LABEL_NUSES (label) = 1;
4751 found_barrier = emit_barrier_after (from);
4752 emit_label_after (label, found_barrier);
4753 }
4754
4755 return found_barrier;
4756 }
4757
4758 /* If the instruction INSN is implemented by a special function, and we can
4759 positively find the register that is used to call the sfunc, and this
4760 register is not used anywhere else in this instruction - except as the
4761 destination of a set, return this register; else, return 0. */
4762 rtx
4763 sfunc_uses_reg (rtx insn)
4764 {
4765 int i;
4766 rtx pattern, part, reg_part, reg;
4767
4768 if (!NONJUMP_INSN_P (insn))
4769 return 0;
4770 pattern = PATTERN (insn);
4771 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
4772 return 0;
4773
4774 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4775 {
4776 part = XVECEXP (pattern, 0, i);
4777 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
4778 reg_part = part;
4779 }
4780 if (! reg_part)
4781 return 0;
4782 reg = XEXP (reg_part, 0);
4783 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
4784 {
4785 part = XVECEXP (pattern, 0, i);
4786 if (part == reg_part || GET_CODE (part) == CLOBBER)
4787 continue;
4788 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
4789 && REG_P (SET_DEST (part)))
4790 ? SET_SRC (part) : part)))
4791 return 0;
4792 }
4793 return reg;
4794 }
4795
4796 /* See if the only way in which INSN uses REG is by calling it, or by
4797 setting it while calling it. Set *SET to a SET rtx if the register
4798 is set by INSN. */
4799
4800 static int
4801 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
4802 {
4803 rtx pattern, reg2;
4804
4805 *set = NULL_RTX;
4806
4807 reg2 = sfunc_uses_reg (insn);
4808 if (reg2 && REGNO (reg2) == REGNO (reg))
4809 {
4810 pattern = single_set (insn);
4811 if (pattern
4812 && REG_P (SET_DEST (pattern))
4813 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4814 *set = pattern;
4815 return 0;
4816 }
4817 if (!CALL_P (insn))
4818 {
4819 /* We don't use rtx_equal_p because we don't care if the mode is
4820 different. */
4821 pattern = single_set (insn);
4822 if (pattern
4823 && REG_P (SET_DEST (pattern))
4824 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4825 {
4826 rtx par, part;
4827 int i;
4828
4829 *set = pattern;
4830 par = PATTERN (insn);
4831 if (GET_CODE (par) == PARALLEL)
4832 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
4833 {
4834 part = XVECEXP (par, 0, i);
4835 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
4836 return 1;
4837 }
4838 return reg_mentioned_p (reg, SET_SRC (pattern));
4839 }
4840
4841 return 1;
4842 }
4843
4844 pattern = PATTERN (insn);
4845
4846 if (GET_CODE (pattern) == PARALLEL)
4847 {
4848 int i;
4849
4850 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4851 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
4852 return 1;
4853 pattern = XVECEXP (pattern, 0, 0);
4854 }
4855
4856 if (GET_CODE (pattern) == SET)
4857 {
4858 if (reg_mentioned_p (reg, SET_DEST (pattern)))
4859 {
4860 /* We don't use rtx_equal_p, because we don't care if the
4861 mode is different. */
4862 if (!REG_P (SET_DEST (pattern))
4863 || REGNO (reg) != REGNO (SET_DEST (pattern)))
4864 return 1;
4865
4866 *set = pattern;
4867 }
4868
4869 pattern = SET_SRC (pattern);
4870 }
4871
4872 if (GET_CODE (pattern) != CALL
4873 || !MEM_P (XEXP (pattern, 0))
4874 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
4875 return 1;
4876
4877 return 0;
4878 }
4879
4880 /* Given a X, a pattern of an insn or a part of it, return a mask of used
4881 general registers. Bits 0..15 mean that the respective registers
4882 are used as inputs in the instruction. Bits 16..31 mean that the
4883 registers 0..15, respectively, are used as outputs, or are clobbered.
4884 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
4885 int
4886 regs_used (rtx x, int is_dest)
4887 {
4888 enum rtx_code code;
4889 const char *fmt;
4890 int i, used = 0;
4891
4892 if (! x)
4893 return used;
4894 code = GET_CODE (x);
4895 switch (code)
4896 {
4897 case REG:
4898 if (REGNO (x) < 16)
4899 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4900 << (REGNO (x) + is_dest));
4901 return 0;
4902 case SUBREG:
4903 {
4904 rtx y = SUBREG_REG (x);
4905
4906 if (!REG_P (y))
4907 break;
4908 if (REGNO (y) < 16)
4909 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4910 << (REGNO (y) +
4911 subreg_regno_offset (REGNO (y),
4912 GET_MODE (y),
4913 SUBREG_BYTE (x),
4914 GET_MODE (x)) + is_dest));
4915 return 0;
4916 }
4917 case SET:
4918 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
4919 case RETURN:
4920 /* If there was a return value, it must have been indicated with USE. */
4921 return 0x00ffff00;
4922 case CLOBBER:
4923 is_dest = 1;
4924 break;
4925 case MEM:
4926 is_dest = 0;
4927 break;
4928 case CALL:
4929 used |= 0x00ff00f0;
4930 break;
4931 default:
4932 break;
4933 }
4934
4935 fmt = GET_RTX_FORMAT (code);
4936
4937 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4938 {
4939 if (fmt[i] == 'E')
4940 {
4941 register int j;
4942 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4943 used |= regs_used (XVECEXP (x, i, j), is_dest);
4944 }
4945 else if (fmt[i] == 'e')
4946 used |= regs_used (XEXP (x, i), is_dest);
4947 }
4948 return used;
4949 }
4950
4951 /* Create an instruction that prevents redirection of a conditional branch
4952 to the destination of the JUMP with address ADDR.
4953 If the branch needs to be implemented as an indirect jump, try to find
4954 a scratch register for it.
4955 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
4956 If any preceding insn that doesn't fit into a delay slot is good enough,
4957 pass 1. Pass 2 if a definite blocking insn is needed.
4958 -1 is used internally to avoid deep recursion.
4959 If a blocking instruction is made or recognized, return it. */
4960
4961 static rtx
4962 gen_block_redirect (rtx jump, int addr, int need_block)
4963 {
4964 int dead = 0;
4965 rtx prev = prev_nonnote_insn (jump);
4966 rtx dest;
4967
4968 /* First, check if we already have an instruction that satisfies our need. */
4969 if (prev && NONJUMP_INSN_P (prev) && ! INSN_DELETED_P (prev))
4970 {
4971 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
4972 return prev;
4973 if (GET_CODE (PATTERN (prev)) == USE
4974 || GET_CODE (PATTERN (prev)) == CLOBBER
4975 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4976 prev = jump;
4977 else if ((need_block &= ~1) < 0)
4978 return prev;
4979 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
4980 need_block = 0;
4981 }
4982 if (GET_CODE (PATTERN (jump)) == RETURN)
4983 {
4984 if (! need_block)
4985 return prev;
4986 /* Reorg even does nasty things with return insns that cause branches
4987 to go out of range - see find_end_label and callers. */
4988 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
4989 }
4990 /* We can't use JUMP_LABEL here because it might be undefined
4991 when not optimizing. */
4992 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
4993 /* If the branch is out of range, try to find a scratch register for it. */
4994 if (optimize
4995 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4996 > 4092 + 4098))
4997 {
4998 rtx scan;
4999 /* Don't look for the stack pointer as a scratch register,
5000 it would cause trouble if an interrupt occurred. */
5001 unsigned attempt = 0x7fff, used;
5002 int jump_left = flag_expensive_optimizations + 1;
5003
5004 /* It is likely that the most recent eligible instruction is wanted for
5005 the delay slot. Therefore, find out which registers it uses, and
5006 try to avoid using them. */
5007
5008 for (scan = jump; (scan = PREV_INSN (scan)); )
5009 {
5010 enum rtx_code code;
5011
5012 if (INSN_DELETED_P (scan))
5013 continue;
5014 code = GET_CODE (scan);
5015 if (code == CODE_LABEL || code == JUMP_INSN)
5016 break;
5017 if (code == INSN
5018 && GET_CODE (PATTERN (scan)) != USE
5019 && GET_CODE (PATTERN (scan)) != CLOBBER
5020 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5021 {
5022 attempt &= ~regs_used (PATTERN (scan), 0);
5023 break;
5024 }
5025 }
5026 for (used = dead = 0, scan = JUMP_LABEL (jump);
5027 (scan = NEXT_INSN (scan)); )
5028 {
5029 enum rtx_code code;
5030
5031 if (INSN_DELETED_P (scan))
5032 continue;
5033 code = GET_CODE (scan);
5034 if (INSN_P (scan))
5035 {
5036 used |= regs_used (PATTERN (scan), 0);
5037 if (code == CALL_INSN)
5038 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5039 dead |= (used >> 16) & ~used;
5040 if (dead & attempt)
5041 {
5042 dead &= attempt;
5043 break;
5044 }
5045 if (code == JUMP_INSN)
5046 {
5047 if (jump_left-- && simplejump_p (scan))
5048 scan = JUMP_LABEL (scan);
5049 else
5050 break;
5051 }
5052 }
5053 }
5054 /* Mask out the stack pointer again, in case it was
5055 the only 'free' register we have found. */
5056 dead &= 0x7fff;
5057 }
5058 /* If the immediate destination is still in range, check for possible
5059 threading with a jump beyond the delay slot insn.
5060 Don't check if we are called recursively; the jump has been or will be
5061 checked in a different invocation then. */
5062
5063 else if (optimize && need_block >= 0)
5064 {
5065 rtx next = next_active_insn (next_active_insn (dest));
5066 if (next && JUMP_P (next)
5067 && GET_CODE (PATTERN (next)) == SET
5068 && recog_memoized (next) == CODE_FOR_jump_compact)
5069 {
5070 dest = JUMP_LABEL (next);
5071 if (dest
5072 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5073 > 4092 + 4098))
5074 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5075 }
5076 }
5077
5078 if (dead)
5079 {
5080 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5081
5082 /* It would be nice if we could convert the jump into an indirect
5083 jump / far branch right now, and thus exposing all constituent
5084 instructions to further optimization. However, reorg uses
5085 simplejump_p to determine if there is an unconditional jump where
5086 it should try to schedule instructions from the target of the
5087 branch; simplejump_p fails for indirect jumps even if they have
5088 a JUMP_LABEL. */
5089 rtx insn = emit_insn_before (gen_indirect_jump_scratch
5090 (reg, GEN_INT (unspec_bbr_uid++)),
5091 jump);
5092 /* ??? We would like this to have the scope of the jump, but that
5093 scope will change when a delay slot insn of an inner scope is added.
5094 Hence, after delay slot scheduling, we'll have to expect
5095 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5096 the jump. */
5097
5098 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
5099 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5100 return insn;
5101 }
5102 else if (need_block)
5103 /* We can't use JUMP_LABEL here because it might be undefined
5104 when not optimizing. */
5105 return emit_insn_before (gen_block_branch_redirect
5106 (GEN_INT (unspec_bbr_uid++)),
5107 jump);
5108 return prev;
5109 }
5110
5111 #define CONDJUMP_MIN -252
5112 #define CONDJUMP_MAX 262
5113 struct far_branch
5114 {
5115 /* A label (to be placed) in front of the jump
5116 that jumps to our ultimate destination. */
5117 rtx near_label;
5118 /* Where we are going to insert it if we cannot move the jump any farther,
5119 or the jump itself if we have picked up an existing jump. */
5120 rtx insert_place;
5121 /* The ultimate destination. */
5122 rtx far_label;
5123 struct far_branch *prev;
5124 /* If the branch has already been created, its address;
5125 else the address of its first prospective user. */
5126 int address;
5127 };
5128
5129 static void gen_far_branch (struct far_branch *);
5130 enum mdep_reorg_phase_e mdep_reorg_phase;
5131 static void
5132 gen_far_branch (struct far_branch *bp)
5133 {
5134 rtx insn = bp->insert_place;
5135 rtx jump;
5136 rtx label = gen_label_rtx ();
5137 int ok;
5138
5139 emit_label_after (label, insn);
5140 if (bp->far_label)
5141 {
5142 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5143 LABEL_NUSES (bp->far_label)++;
5144 }
5145 else
5146 jump = emit_jump_insn_after (gen_return (), insn);
5147 /* Emit a barrier so that reorg knows that any following instructions
5148 are not reachable via a fall-through path.
5149 But don't do this when not optimizing, since we wouldn't suppress the
5150 alignment for the barrier then, and could end up with out-of-range
5151 pc-relative loads. */
5152 if (optimize)
5153 emit_barrier_after (jump);
5154 emit_label_after (bp->near_label, insn);
5155 JUMP_LABEL (jump) = bp->far_label;
5156 ok = invert_jump (insn, label, 1);
5157 gcc_assert (ok);
5158
5159 /* If we are branching around a jump (rather than a return), prevent
5160 reorg from using an insn from the jump target as the delay slot insn -
5161 when reorg did this, it pessimized code (we rather hide the delay slot)
5162 and it could cause branches to go out of range. */
5163 if (bp->far_label)
5164 (emit_insn_after
5165 (gen_stuff_delay_slot
5166 (GEN_INT (unspec_bbr_uid++),
5167 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5168 insn));
5169 /* Prevent reorg from undoing our splits. */
5170 gen_block_redirect (jump, bp->address += 2, 2);
5171 }
5172
5173 /* Fix up ADDR_DIFF_VECs. */
5174 void
5175 fixup_addr_diff_vecs (rtx first)
5176 {
5177 rtx insn;
5178
5179 for (insn = first; insn; insn = NEXT_INSN (insn))
5180 {
5181 rtx vec_lab, pat, prev, prevpat, x, braf_label;
5182
5183 if (!JUMP_P (insn)
5184 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5185 continue;
5186 pat = PATTERN (insn);
5187 vec_lab = XEXP (XEXP (pat, 0), 0);
5188
5189 /* Search the matching casesi_jump_2. */
5190 for (prev = vec_lab; ; prev = PREV_INSN (prev))
5191 {
5192 if (!JUMP_P (prev))
5193 continue;
5194 prevpat = PATTERN (prev);
5195 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5196 continue;
5197 x = XVECEXP (prevpat, 0, 1);
5198 if (GET_CODE (x) != USE)
5199 continue;
5200 x = XEXP (x, 0);
5201 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5202 break;
5203 }
5204 /* FIXME: This is a bug in the optimizer, but it seems harmless
5205 to just avoid panicing. */
5206 if (!prev)
5207 continue;
5208
5209 /* Emit the reference label of the braf where it belongs, right after
5210 the casesi_jump_2 (i.e. braf). */
5211 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5212 emit_label_after (braf_label, prev);
5213
5214 /* Fix up the ADDR_DIF_VEC to be relative
5215 to the reference address of the braf. */
5216 XEXP (XEXP (pat, 0), 0) = braf_label;
5217 }
5218 }
5219
5220 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5221 a barrier. Return the base 2 logarithm of the desired alignment. */
5222 int
5223 barrier_align (rtx barrier_or_label)
5224 {
5225 rtx next = next_real_insn (barrier_or_label), pat, prev;
5226 int slot, credit, jump_to_next = 0;
5227
5228 if (! next)
5229 return 0;
5230
5231 pat = PATTERN (next);
5232
5233 if (GET_CODE (pat) == ADDR_DIFF_VEC)
5234 return 2;
5235
5236 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5237 /* This is a barrier in front of a constant table. */
5238 return 0;
5239
5240 prev = prev_real_insn (barrier_or_label);
5241 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
5242 {
5243 pat = PATTERN (prev);
5244 /* If this is a very small table, we want to keep the alignment after
5245 the table to the minimum for proper code alignment. */
5246 return ((optimize_size
5247 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5248 <= (unsigned) 1 << (CACHE_LOG - 2)))
5249 ? 1 << TARGET_SHMEDIA : align_jumps_log);
5250 }
5251
5252 if (optimize_size)
5253 return 0;
5254
5255 if (! TARGET_SH2 || ! optimize)
5256 return align_jumps_log;
5257
5258 /* When fixing up pcloads, a constant table might be inserted just before
5259 the basic block that ends with the barrier. Thus, we can't trust the
5260 instruction lengths before that. */
5261 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5262 {
5263 /* Check if there is an immediately preceding branch to the insn beyond
5264 the barrier. We must weight the cost of discarding useful information
5265 from the current cache line when executing this branch and there is
5266 an alignment, against that of fetching unneeded insn in front of the
5267 branch target when there is no alignment. */
5268
5269 /* There are two delay_slot cases to consider. One is the simple case
5270 where the preceding branch is to the insn beyond the barrier (simple
5271 delay slot filling), and the other is where the preceding branch has
5272 a delay slot that is a duplicate of the insn after the barrier
5273 (fill_eager_delay_slots) and the branch is to the insn after the insn
5274 after the barrier. */
5275
5276 /* PREV is presumed to be the JUMP_INSN for the barrier under
5277 investigation. Skip to the insn before it. */
5278 prev = prev_real_insn (prev);
5279
5280 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5281 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5282 prev = prev_real_insn (prev))
5283 {
5284 jump_to_next = 0;
5285 if (GET_CODE (PATTERN (prev)) == USE
5286 || GET_CODE (PATTERN (prev)) == CLOBBER)
5287 continue;
5288 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
5289 {
5290 prev = XVECEXP (PATTERN (prev), 0, 1);
5291 if (INSN_UID (prev) == INSN_UID (next))
5292 {
5293 /* Delay slot was filled with insn at jump target. */
5294 jump_to_next = 1;
5295 continue;
5296 }
5297 }
5298
5299 if (slot &&
5300 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5301 slot = 0;
5302 credit -= get_attr_length (prev);
5303 }
5304 if (prev && jump_to_label_p (prev))
5305 {
5306 rtx x;
5307 if (jump_to_next
5308 || next_real_insn (JUMP_LABEL (prev)) == next
5309 /* If relax_delay_slots() decides NEXT was redundant
5310 with some previous instruction, it will have
5311 redirected PREV's jump to the following insn. */
5312 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5313 /* There is no upper bound on redundant instructions
5314 that might have been skipped, but we must not put an
5315 alignment where none had been before. */
5316 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5317 (INSN_P (x)
5318 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5319 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5320 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5321 {
5322 rtx pat = PATTERN (prev);
5323 if (GET_CODE (pat) == PARALLEL)
5324 pat = XVECEXP (pat, 0, 0);
5325 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5326 return 0;
5327 }
5328 }
5329 }
5330
5331 return align_jumps_log;
5332 }
5333
5334 /* If we are inside a phony loop, almost any kind of label can turn up as the
5335 first one in the loop. Aligning a braf label causes incorrect switch
5336 destination addresses; we can detect braf labels because they are
5337 followed by a BARRIER.
5338 Applying loop alignment to small constant or switch tables is a waste
5339 of space, so we suppress this too. */
5340 int
5341 sh_loop_align (rtx label)
5342 {
5343 rtx next = label;
5344
5345 do
5346 next = next_nonnote_insn (next);
5347 while (next && LABEL_P (next));
5348
5349 if (! next
5350 || ! INSN_P (next)
5351 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
5352 || recog_memoized (next) == CODE_FOR_consttable_2)
5353 return 0;
5354
5355 return align_loops_log;
5356 }
5357
5358 /* Do a final pass over the function, just before delayed branch
5359 scheduling. */
5360
5361 static void
5362 sh_reorg (void)
5363 {
5364 rtx first, insn, mova = NULL_RTX;
5365 int num_mova;
5366 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5367 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5368
5369 first = get_insns ();
5370 max_labelno_before_reorg = max_label_num ();
5371
5372 /* We must split call insns before introducing `mova's. If we're
5373 optimizing, they'll have already been split. Otherwise, make
5374 sure we don't split them too late. */
5375 if (! optimize)
5376 split_all_insns_noflow ();
5377
5378 if (TARGET_SHMEDIA)
5379 return;
5380
5381 /* If relaxing, generate pseudo-ops to associate function calls with
5382 the symbols they call. It does no harm to not generate these
5383 pseudo-ops. However, when we can generate them, it enables the
5384 linker to potentially relax the jsr to a bsr, and eliminate the
5385 register load and, possibly, the constant pool entry. */
5386
5387 mdep_reorg_phase = SH_INSERT_USES_LABELS;
5388 if (TARGET_RELAX)
5389 {
5390 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
5391 own purposes. This works because none of the remaining passes
5392 need to look at them.
5393
5394 ??? But it may break in the future. We should use a machine
5395 dependent REG_NOTE, or some other approach entirely. */
5396 for (insn = first; insn; insn = NEXT_INSN (insn))
5397 {
5398 if (INSN_P (insn))
5399 {
5400 rtx note;
5401
5402 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
5403 NULL_RTX)) != 0)
5404 remove_note (insn, note);
5405 }
5406 }
5407
5408 for (insn = first; insn; insn = NEXT_INSN (insn))
5409 {
5410 rtx pattern, reg, link, set, scan, dies, label;
5411 int rescan = 0, foundinsn = 0;
5412
5413 if (CALL_P (insn))
5414 {
5415 pattern = PATTERN (insn);
5416
5417 if (GET_CODE (pattern) == PARALLEL)
5418 pattern = XVECEXP (pattern, 0, 0);
5419 if (GET_CODE (pattern) == SET)
5420 pattern = SET_SRC (pattern);
5421
5422 if (GET_CODE (pattern) != CALL
5423 || !MEM_P (XEXP (pattern, 0)))
5424 continue;
5425
5426 reg = XEXP (XEXP (pattern, 0), 0);
5427 }
5428 else
5429 {
5430 reg = sfunc_uses_reg (insn);
5431 if (! reg)
5432 continue;
5433 }
5434
5435 if (!REG_P (reg))
5436 continue;
5437
5438 /* Try scanning backward to find where the register is set. */
5439 link = NULL;
5440 for (scan = PREV_INSN (insn);
5441 scan && !LABEL_P (scan);
5442 scan = PREV_INSN (scan))
5443 {
5444 if (! INSN_P (scan))
5445 continue;
5446
5447 if (! reg_mentioned_p (reg, scan))
5448 continue;
5449
5450 if (noncall_uses_reg (reg, scan, &set))
5451 break;
5452
5453 if (set)
5454 {
5455 link = scan;
5456 break;
5457 }
5458 }
5459
5460 if (! link)
5461 continue;
5462
5463 /* The register is set at LINK. */
5464
5465 /* We can only optimize the function call if the register is
5466 being set to a symbol. In theory, we could sometimes
5467 optimize calls to a constant location, but the assembler
5468 and linker do not support that at present. */
5469 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
5470 && GET_CODE (SET_SRC (set)) != LABEL_REF)
5471 continue;
5472
5473 /* Scan forward from LINK to the place where REG dies, and
5474 make sure that the only insns which use REG are
5475 themselves function calls. */
5476
5477 /* ??? This doesn't work for call targets that were allocated
5478 by reload, since there may not be a REG_DEAD note for the
5479 register. */
5480
5481 dies = NULL_RTX;
5482 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
5483 {
5484 rtx scanset;
5485
5486 /* Don't try to trace forward past a CODE_LABEL if we haven't
5487 seen INSN yet. Ordinarily, we will only find the setting insn
5488 if it is in the same basic block. However,
5489 cross-jumping can insert code labels in between the load and
5490 the call, and can result in situations where a single call
5491 insn may have two targets depending on where we came from. */
5492
5493 if (LABEL_P (scan) && ! foundinsn)
5494 break;
5495
5496 if (! INSN_P (scan))
5497 continue;
5498
5499 /* Don't try to trace forward past a JUMP. To optimize
5500 safely, we would have to check that all the
5501 instructions at the jump destination did not use REG. */
5502
5503 if (JUMP_P (scan))
5504 break;
5505
5506 if (! reg_mentioned_p (reg, scan))
5507 continue;
5508
5509 if (noncall_uses_reg (reg, scan, &scanset))
5510 break;
5511
5512 if (scan == insn)
5513 foundinsn = 1;
5514
5515 if (scan != insn
5516 && (CALL_P (scan) || sfunc_uses_reg (scan)))
5517 {
5518 /* There is a function call to this register other
5519 than the one we are checking. If we optimize
5520 this call, we need to rescan again below. */
5521 rescan = 1;
5522 }
5523
5524 /* ??? We shouldn't have to worry about SCANSET here.
5525 We should just be able to check for a REG_DEAD note
5526 on a function call. However, the REG_DEAD notes are
5527 apparently not dependable around libcalls; c-torture
5528 execute/920501-2 is a test case. If SCANSET is set,
5529 then this insn sets the register, so it must have
5530 died earlier. Unfortunately, this will only handle
5531 the cases in which the register is, in fact, set in a
5532 later insn. */
5533
5534 /* ??? We shouldn't have to use FOUNDINSN here.
5535 This dates back to when we used LOG_LINKS to find
5536 the most recent insn which sets the register. */
5537
5538 if (foundinsn
5539 && (scanset
5540 || find_reg_note (scan, REG_DEAD, reg)))
5541 {
5542 dies = scan;
5543 break;
5544 }
5545 }
5546
5547 if (! dies)
5548 {
5549 /* Either there was a branch, or some insn used REG
5550 other than as a function call address. */
5551 continue;
5552 }
5553
5554 /* Create a code label, and put it in a REG_LABEL_OPERAND note
5555 on the insn which sets the register, and on each call insn
5556 which uses the register. In final_prescan_insn we look for
5557 the REG_LABEL_OPERAND notes, and output the appropriate label
5558 or pseudo-op. */
5559
5560 label = gen_label_rtx ();
5561 add_reg_note (link, REG_LABEL_OPERAND, label);
5562 add_reg_note (insn, REG_LABEL_OPERAND, label);
5563 if (rescan)
5564 {
5565 scan = link;
5566 do
5567 {
5568 rtx reg2;
5569
5570 scan = NEXT_INSN (scan);
5571 if (scan != insn
5572 && ((CALL_P (scan)
5573 && reg_mentioned_p (reg, scan))
5574 || ((reg2 = sfunc_uses_reg (scan))
5575 && REGNO (reg2) == REGNO (reg))))
5576 add_reg_note (scan, REG_LABEL_OPERAND, label);
5577 }
5578 while (scan != dies);
5579 }
5580 }
5581 }
5582
5583 if (TARGET_SH2)
5584 fixup_addr_diff_vecs (first);
5585
5586 if (optimize)
5587 {
5588 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
5589 shorten_branches (first);
5590 }
5591
5592 /* Scan the function looking for move instructions which have to be
5593 changed to pc-relative loads and insert the literal tables. */
5594 label_ref_list_pool = create_alloc_pool ("label references list",
5595 sizeof (struct label_ref_list_d),
5596 30);
5597 mdep_reorg_phase = SH_FIXUP_PCLOAD;
5598 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
5599 {
5600 if (mova_p (insn))
5601 {
5602 /* ??? basic block reordering can move a switch table dispatch
5603 below the switch table. Check if that has happened.
5604 We only have the addresses available when optimizing; but then,
5605 this check shouldn't be needed when not optimizing. */
5606 if (!untangle_mova (&num_mova, &mova, insn))
5607 {
5608 insn = mova;
5609 num_mova = 0;
5610 }
5611 }
5612 else if (JUMP_P (insn)
5613 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
5614 && num_mova
5615 /* ??? loop invariant motion can also move a mova out of a
5616 loop. Since loop does this code motion anyway, maybe we
5617 should wrap UNSPEC_MOVA into a CONST, so that reload can
5618 move it back. */
5619 && ((num_mova > 1
5620 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
5621 || (prev_nonnote_insn (insn)
5622 == XEXP (MOVA_LABELREF (mova), 0))))
5623 {
5624 rtx scan;
5625 int total;
5626
5627 num_mova--;
5628
5629 /* Some code might have been inserted between the mova and
5630 its ADDR_DIFF_VEC. Check if the mova is still in range. */
5631 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
5632 total += get_attr_length (scan);
5633
5634 /* range of mova is 1020, add 4 because pc counts from address of
5635 second instruction after this one, subtract 2 in case pc is 2
5636 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
5637 cancels out with alignment effects of the mova itself. */
5638 if (total > 1022)
5639 {
5640 /* Change the mova into a load, and restart scanning
5641 there. broken_move will then return true for mova. */
5642 fixup_mova (mova);
5643 insn = mova;
5644 }
5645 }
5646 if (broken_move (insn)
5647 || (NONJUMP_INSN_P (insn)
5648 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
5649 {
5650 rtx scan;
5651 /* Scan ahead looking for a barrier to stick the constant table
5652 behind. */
5653 rtx barrier = find_barrier (num_mova, mova, insn);
5654 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
5655 int need_aligned_label = 0;
5656
5657 if (num_mova && ! mova_p (mova))
5658 {
5659 /* find_barrier had to change the first mova into a
5660 pcload; thus, we have to start with this new pcload. */
5661 insn = mova;
5662 num_mova = 0;
5663 }
5664 /* Now find all the moves between the points and modify them. */
5665 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
5666 {
5667 if (LABEL_P (scan))
5668 last_float = 0;
5669 if (NONJUMP_INSN_P (scan)
5670 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
5671 need_aligned_label = 1;
5672 if (broken_move (scan))
5673 {
5674 rtx *patp = &PATTERN (scan), pat = *patp;
5675 rtx src, dst;
5676 rtx lab;
5677 rtx newsrc;
5678 enum machine_mode mode;
5679
5680 if (GET_CODE (pat) == PARALLEL)
5681 patp = &XVECEXP (pat, 0, 0), pat = *patp;
5682 src = SET_SRC (pat);
5683 dst = SET_DEST (pat);
5684 mode = GET_MODE (dst);
5685
5686 if (mode == SImode && hi_const (src)
5687 && REGNO (dst) != FPUL_REG)
5688 {
5689 int offset = 0;
5690
5691 mode = HImode;
5692 while (GET_CODE (dst) == SUBREG)
5693 {
5694 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
5695 GET_MODE (SUBREG_REG (dst)),
5696 SUBREG_BYTE (dst),
5697 GET_MODE (dst));
5698 dst = SUBREG_REG (dst);
5699 }
5700 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
5701 }
5702 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
5703 {
5704 /* This must be an insn that clobbers r0. */
5705 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
5706 XVECLEN (PATTERN (scan), 0)
5707 - 1);
5708 rtx clobber = *clobberp;
5709
5710 gcc_assert (GET_CODE (clobber) == CLOBBER
5711 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
5712
5713 if (last_float
5714 && reg_set_between_p (r0_rtx, last_float_move, scan))
5715 last_float = 0;
5716 if (last_float
5717 && TARGET_SHCOMPACT
5718 && GET_MODE_SIZE (mode) != 4
5719 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
5720 last_float = 0;
5721 lab = add_constant (src, mode, last_float);
5722 if (lab)
5723 emit_insn_before (gen_mova (lab), scan);
5724 else
5725 {
5726 /* There will be a REG_UNUSED note for r0 on
5727 LAST_FLOAT_MOVE; we have to change it to REG_INC,
5728 lest reorg:mark_target_live_regs will not
5729 consider r0 to be used, and we end up with delay
5730 slot insn in front of SCAN that clobbers r0. */
5731 rtx note
5732 = find_regno_note (last_float_move, REG_UNUSED, 0);
5733
5734 /* If we are not optimizing, then there may not be
5735 a note. */
5736 if (note)
5737 PUT_REG_NOTE_KIND (note, REG_INC);
5738
5739 *last_float_addr = r0_inc_rtx;
5740 }
5741 last_float_move = scan;
5742 last_float = src;
5743 newsrc = gen_const_mem (mode,
5744 (((TARGET_SH4 && ! TARGET_FMOVD)
5745 || REGNO (dst) == FPUL_REG)
5746 ? r0_inc_rtx
5747 : r0_rtx));
5748 last_float_addr = &XEXP (newsrc, 0);
5749
5750 /* Remove the clobber of r0. */
5751 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
5752 gen_rtx_SCRATCH (Pmode));
5753 }
5754 /* This is a mova needing a label. Create it. */
5755 else if (GET_CODE (src) == UNSPEC
5756 && XINT (src, 1) == UNSPEC_MOVA
5757 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
5758 {
5759 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
5760 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5761 newsrc = gen_rtx_UNSPEC (SImode,
5762 gen_rtvec (1, newsrc),
5763 UNSPEC_MOVA);
5764 }
5765 else
5766 {
5767 lab = add_constant (src, mode, 0);
5768 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5769 newsrc = gen_const_mem (mode, newsrc);
5770 }
5771 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
5772 INSN_CODE (scan) = -1;
5773 }
5774 }
5775 dump_table (need_aligned_label ? insn : 0, barrier);
5776 insn = barrier;
5777 }
5778 }
5779 free_alloc_pool (label_ref_list_pool);
5780 for (insn = first; insn; insn = NEXT_INSN (insn))
5781 PUT_MODE (insn, VOIDmode);
5782
5783 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
5784 INSN_ADDRESSES_FREE ();
5785 split_branches (first);
5786
5787 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
5788 also has an effect on the register that holds the address of the sfunc.
5789 Insert an extra dummy insn in front of each sfunc that pretends to
5790 use this register. */
5791 if (flag_delayed_branch)
5792 {
5793 for (insn = first; insn; insn = NEXT_INSN (insn))
5794 {
5795 rtx reg = sfunc_uses_reg (insn);
5796
5797 if (! reg)
5798 continue;
5799 emit_insn_before (gen_use_sfunc_addr (reg), insn);
5800 }
5801 }
5802 #if 0
5803 /* fpscr is not actually a user variable, but we pretend it is for the
5804 sake of the previous optimization passes, since we want it handled like
5805 one. However, we don't have any debugging information for it, so turn
5806 it into a non-user variable now. */
5807 if (TARGET_SH4)
5808 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
5809 #endif
5810 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
5811 }
5812
5813 int
5814 get_dest_uid (rtx label, int max_uid)
5815 {
5816 rtx dest = next_real_insn (label);
5817 int dest_uid;
5818 if (! dest)
5819 /* This can happen for an undefined label. */
5820 return 0;
5821 dest_uid = INSN_UID (dest);
5822 /* If this is a newly created branch redirection blocking instruction,
5823 we cannot index the branch_uid or insn_addresses arrays with its
5824 uid. But then, we won't need to, because the actual destination is
5825 the following branch. */
5826 while (dest_uid >= max_uid)
5827 {
5828 dest = NEXT_INSN (dest);
5829 dest_uid = INSN_UID (dest);
5830 }
5831 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
5832 return 0;
5833 return dest_uid;
5834 }
5835
5836 /* Split condbranches that are out of range. Also add clobbers for
5837 scratch registers that are needed in far jumps.
5838 We do this before delay slot scheduling, so that it can take our
5839 newly created instructions into account. It also allows us to
5840 find branches with common targets more easily. */
5841
5842 static void
5843 split_branches (rtx first)
5844 {
5845 rtx insn;
5846 struct far_branch **uid_branch, *far_branch_list = 0;
5847 int max_uid = get_max_uid ();
5848 int ok;
5849
5850 /* Find out which branches are out of range. */
5851 shorten_branches (first);
5852
5853 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
5854 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
5855
5856 for (insn = first; insn; insn = NEXT_INSN (insn))
5857 if (! INSN_P (insn))
5858 continue;
5859 else if (INSN_DELETED_P (insn))
5860 {
5861 /* Shorten_branches would split this instruction again,
5862 so transform it into a note. */
5863 SET_INSN_DELETED (insn);
5864 }
5865 else if (JUMP_P (insn)
5866 /* Don't mess with ADDR_DIFF_VEC */
5867 && (GET_CODE (PATTERN (insn)) == SET
5868 || GET_CODE (PATTERN (insn)) == RETURN))
5869 {
5870 enum attr_type type = get_attr_type (insn);
5871 if (type == TYPE_CBRANCH)
5872 {
5873 rtx next, beyond;
5874
5875 if (get_attr_length (insn) > 4)
5876 {
5877 rtx src = SET_SRC (PATTERN (insn));
5878 rtx olabel = XEXP (XEXP (src, 1), 0);
5879 int addr = INSN_ADDRESSES (INSN_UID (insn));
5880 rtx label = 0;
5881 int dest_uid = get_dest_uid (olabel, max_uid);
5882 struct far_branch *bp = uid_branch[dest_uid];
5883
5884 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
5885 the label if the LABEL_NUSES count drops to zero. There is
5886 always a jump_optimize pass that sets these values, but it
5887 proceeds to delete unreferenced code, and then if not
5888 optimizing, to un-delete the deleted instructions, thus
5889 leaving labels with too low uses counts. */
5890 if (! optimize)
5891 {
5892 JUMP_LABEL (insn) = olabel;
5893 LABEL_NUSES (olabel)++;
5894 }
5895 if (! bp)
5896 {
5897 bp = (struct far_branch *) alloca (sizeof *bp);
5898 uid_branch[dest_uid] = bp;
5899 bp->prev = far_branch_list;
5900 far_branch_list = bp;
5901 bp->far_label
5902 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
5903 LABEL_NUSES (bp->far_label)++;
5904 }
5905 else
5906 {
5907 label = bp->near_label;
5908 if (! label && bp->address - addr >= CONDJUMP_MIN)
5909 {
5910 rtx block = bp->insert_place;
5911
5912 if (GET_CODE (PATTERN (block)) == RETURN)
5913 block = PREV_INSN (block);
5914 else
5915 block = gen_block_redirect (block,
5916 bp->address, 2);
5917 label = emit_label_after (gen_label_rtx (),
5918 PREV_INSN (block));
5919 bp->near_label = label;
5920 }
5921 else if (label && ! NEXT_INSN (label))
5922 {
5923 if (addr + 2 - bp->address <= CONDJUMP_MAX)
5924 bp->insert_place = insn;
5925 else
5926 gen_far_branch (bp);
5927 }
5928 }
5929 if (! label
5930 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
5931 {
5932 bp->near_label = label = gen_label_rtx ();
5933 bp->insert_place = insn;
5934 bp->address = addr;
5935 }
5936 ok = redirect_jump (insn, label, 0);
5937 gcc_assert (ok);
5938 }
5939 else
5940 {
5941 /* get_attr_length (insn) == 2 */
5942 /* Check if we have a pattern where reorg wants to redirect
5943 the branch to a label from an unconditional branch that
5944 is too far away. */
5945 /* We can't use JUMP_LABEL here because it might be undefined
5946 when not optimizing. */
5947 /* A syntax error might cause beyond to be NULL_RTX. */
5948 beyond
5949 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
5950 0));
5951
5952 if (beyond
5953 && (JUMP_P (beyond)
5954 || ((beyond = next_active_insn (beyond))
5955 && JUMP_P (beyond)))
5956 && GET_CODE (PATTERN (beyond)) == SET
5957 && recog_memoized (beyond) == CODE_FOR_jump_compact
5958 && ((INSN_ADDRESSES
5959 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
5960 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5961 > 252 + 258 + 2))
5962 gen_block_redirect (beyond,
5963 INSN_ADDRESSES (INSN_UID (beyond)), 1);
5964 }
5965
5966 next = next_active_insn (insn);
5967
5968 if (next
5969 && (JUMP_P (next)
5970 || ((next = next_active_insn (next))
5971 && JUMP_P (next)))
5972 && GET_CODE (PATTERN (next)) == SET
5973 && recog_memoized (next) == CODE_FOR_jump_compact
5974 && ((INSN_ADDRESSES
5975 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
5976 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5977 > 252 + 258 + 2))
5978 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
5979 }
5980 else if (type == TYPE_JUMP || type == TYPE_RETURN)
5981 {
5982 int addr = INSN_ADDRESSES (INSN_UID (insn));
5983 rtx far_label = 0;
5984 int dest_uid = 0;
5985 struct far_branch *bp;
5986
5987 if (type == TYPE_JUMP)
5988 {
5989 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
5990 dest_uid = get_dest_uid (far_label, max_uid);
5991 if (! dest_uid)
5992 {
5993 /* Parse errors can lead to labels outside
5994 the insn stream. */
5995 if (! NEXT_INSN (far_label))
5996 continue;
5997
5998 if (! optimize)
5999 {
6000 JUMP_LABEL (insn) = far_label;
6001 LABEL_NUSES (far_label)++;
6002 }
6003 redirect_jump (insn, ret_rtx, 1);
6004 far_label = 0;
6005 }
6006 }
6007 bp = uid_branch[dest_uid];
6008 if (! bp)
6009 {
6010 bp = (struct far_branch *) alloca (sizeof *bp);
6011 uid_branch[dest_uid] = bp;
6012 bp->prev = far_branch_list;
6013 far_branch_list = bp;
6014 bp->near_label = 0;
6015 bp->far_label = far_label;
6016 if (far_label)
6017 LABEL_NUSES (far_label)++;
6018 }
6019 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6020 if (addr - bp->address <= CONDJUMP_MAX)
6021 emit_label_after (bp->near_label, PREV_INSN (insn));
6022 else
6023 {
6024 gen_far_branch (bp);
6025 bp->near_label = 0;
6026 }
6027 else
6028 bp->near_label = 0;
6029 bp->address = addr;
6030 bp->insert_place = insn;
6031 if (! far_label)
6032 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6033 else
6034 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6035 }
6036 }
6037 /* Generate all pending far branches,
6038 and free our references to the far labels. */
6039 while (far_branch_list)
6040 {
6041 if (far_branch_list->near_label
6042 && ! NEXT_INSN (far_branch_list->near_label))
6043 gen_far_branch (far_branch_list);
6044 if (optimize
6045 && far_branch_list->far_label
6046 && ! --LABEL_NUSES (far_branch_list->far_label))
6047 delete_insn (far_branch_list->far_label);
6048 far_branch_list = far_branch_list->prev;
6049 }
6050
6051 /* Instruction length information is no longer valid due to the new
6052 instructions that have been generated. */
6053 init_insn_lengths ();
6054 }
6055
6056 /* Dump out instruction addresses, which is useful for debugging the
6057 constant pool table stuff.
6058
6059 If relaxing, output the label and pseudo-ops used to link together
6060 calls and the instruction which set the registers. */
6061
6062 /* ??? The addresses printed by this routine for insns are nonsense for
6063 insns which are inside of a sequence where none of the inner insns have
6064 variable length. This is because the second pass of shorten_branches
6065 does not bother to update them. */
6066
6067 void
6068 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
6069 int noperands ATTRIBUTE_UNUSED)
6070 {
6071 if (TARGET_DUMPISIZE)
6072 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6073
6074 if (TARGET_RELAX)
6075 {
6076 rtx note;
6077
6078 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
6079 if (note)
6080 {
6081 rtx pattern;
6082
6083 pattern = PATTERN (insn);
6084 if (GET_CODE (pattern) == PARALLEL)
6085 pattern = XVECEXP (pattern, 0, 0);
6086 switch (GET_CODE (pattern))
6087 {
6088 case SET:
6089 if (GET_CODE (SET_SRC (pattern)) != CALL
6090 && get_attr_type (insn) != TYPE_SFUNC)
6091 {
6092 targetm.asm_out.internal_label
6093 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6094 break;
6095 }
6096 /* else FALLTHROUGH */
6097 case CALL:
6098 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6099 CODE_LABEL_NUMBER (XEXP (note, 0)));
6100 break;
6101
6102 default:
6103 gcc_unreachable ();
6104 }
6105 }
6106 }
6107 }
6108
6109 /* Dump out any constants accumulated in the final pass. These will
6110 only be labels. */
6111
6112 const char *
6113 output_jump_label_table (void)
6114 {
6115 int i;
6116
6117 if (pool_size)
6118 {
6119 fprintf (asm_out_file, "\t.align 2\n");
6120 for (i = 0; i < pool_size; i++)
6121 {
6122 pool_node *p = &pool_vector[i];
6123
6124 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6125 CODE_LABEL_NUMBER (p->label));
6126 output_asm_insn (".long %O0", &p->value);
6127 }
6128 pool_size = 0;
6129 }
6130
6131 return "";
6132 }
6133 \f
6134 /* A full frame looks like:
6135
6136 arg-5
6137 arg-4
6138 [ if current_function_anonymous_args
6139 arg-3
6140 arg-2
6141 arg-1
6142 arg-0 ]
6143 saved-fp
6144 saved-r10
6145 saved-r11
6146 saved-r12
6147 saved-pr
6148 local-n
6149 ..
6150 local-1
6151 local-0 <- fp points here. */
6152
6153 /* Number of bytes pushed for anonymous args, used to pass information
6154 between expand_prologue and expand_epilogue. */
6155
6156 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6157 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6158 for an epilogue and a negative value means that it's for a sibcall
6159 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6160 all the registers that are about to be restored, and hence dead. */
6161
6162 static void
6163 output_stack_adjust (int size, rtx reg, int epilogue_p,
6164 HARD_REG_SET *live_regs_mask, bool frame_p)
6165 {
6166 rtx (*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6167 if (size)
6168 {
6169 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6170
6171 /* This test is bogus, as output_stack_adjust is used to re-align the
6172 stack. */
6173 #if 0
6174 gcc_assert (!(size % align));
6175 #endif
6176
6177 if (CONST_OK_FOR_ADD (size))
6178 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6179 /* Try to do it with two partial adjustments; however, we must make
6180 sure that the stack is properly aligned at all times, in case
6181 an interrupt occurs between the two partial adjustments. */
6182 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6183 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6184 {
6185 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6186 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6187 }
6188 else
6189 {
6190 rtx const_reg;
6191 rtx insn;
6192 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6193 int i;
6194
6195 /* If TEMP is invalid, we could temporarily save a general
6196 register to MACL. However, there is currently no need
6197 to handle this case, so just die when we see it. */
6198 if (epilogue_p < 0
6199 || current_function_interrupt
6200 || ! call_really_used_regs[temp] || fixed_regs[temp])
6201 temp = -1;
6202 if (temp < 0 && ! current_function_interrupt
6203 && (TARGET_SHMEDIA || epilogue_p >= 0))
6204 {
6205 HARD_REG_SET temps;
6206 COPY_HARD_REG_SET (temps, call_used_reg_set);
6207 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6208 if (epilogue_p > 0)
6209 {
6210 int nreg = 0;
6211 if (crtl->return_rtx)
6212 {
6213 enum machine_mode mode;
6214 mode = GET_MODE (crtl->return_rtx);
6215 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6216 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6217 }
6218 for (i = 0; i < nreg; i++)
6219 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6220 if (crtl->calls_eh_return)
6221 {
6222 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6223 for (i = 0; i <= 3; i++)
6224 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6225 }
6226 }
6227 if (TARGET_SHMEDIA && epilogue_p < 0)
6228 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
6229 CLEAR_HARD_REG_BIT (temps, i);
6230 if (epilogue_p <= 0)
6231 {
6232 for (i = FIRST_PARM_REG;
6233 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6234 CLEAR_HARD_REG_BIT (temps, i);
6235 if (cfun->static_chain_decl != NULL)
6236 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6237 }
6238 temp = scavenge_reg (&temps);
6239 }
6240 if (temp < 0 && live_regs_mask)
6241 {
6242 HARD_REG_SET temps;
6243
6244 COPY_HARD_REG_SET (temps, *live_regs_mask);
6245 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6246 temp = scavenge_reg (&temps);
6247 }
6248 if (temp < 0)
6249 {
6250 rtx adj_reg, tmp_reg, mem;
6251
6252 /* If we reached here, the most likely case is the (sibcall)
6253 epilogue for non SHmedia. Put a special push/pop sequence
6254 for such case as the last resort. This looks lengthy but
6255 would not be problem because it seems to be very
6256 rare. */
6257
6258 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
6259
6260
6261 /* ??? There is still the slight possibility that r4 or
6262 r5 have been reserved as fixed registers or assigned
6263 as global registers, and they change during an
6264 interrupt. There are possible ways to handle this:
6265
6266 - If we are adjusting the frame pointer (r14), we can do
6267 with a single temp register and an ordinary push / pop
6268 on the stack.
6269 - Grab any call-used or call-saved registers (i.e. not
6270 fixed or globals) for the temps we need. We might
6271 also grab r14 if we are adjusting the stack pointer.
6272 If we can't find enough available registers, issue
6273 a diagnostic and die - the user must have reserved
6274 way too many registers.
6275 But since all this is rather unlikely to happen and
6276 would require extra testing, we just die if r4 / r5
6277 are not available. */
6278 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6279 && !global_regs[4] && !global_regs[5]);
6280
6281 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6282 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6283 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6284 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6285 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6286 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6287 emit_move_insn (mem, tmp_reg);
6288 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6289 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6290 emit_move_insn (mem, tmp_reg);
6291 emit_move_insn (reg, adj_reg);
6292 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6293 emit_move_insn (adj_reg, mem);
6294 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6295 emit_move_insn (tmp_reg, mem);
6296 /* Tell flow the insns that pop r4/r5 aren't dead. */
6297 emit_use (tmp_reg);
6298 emit_use (adj_reg);
6299 return;
6300 }
6301 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6302
6303 /* If SIZE is negative, subtract the positive value.
6304 This sometimes allows a constant pool entry to be shared
6305 between prologue and epilogue code. */
6306 if (size < 0)
6307 {
6308 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6309 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6310 }
6311 else
6312 {
6313 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6314 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6315 }
6316 if (! epilogue_p)
6317 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6318 gen_rtx_SET (VOIDmode, reg,
6319 gen_rtx_PLUS (SImode, reg,
6320 GEN_INT (size))));
6321 }
6322 }
6323 }
6324
6325 static rtx
6326 frame_insn (rtx x)
6327 {
6328 x = emit_insn (x);
6329 RTX_FRAME_RELATED_P (x) = 1;
6330 return x;
6331 }
6332
6333 /* Output RTL to push register RN onto the stack. */
6334
6335 static rtx
6336 push (int rn)
6337 {
6338 rtx x;
6339 if (rn == FPUL_REG)
6340 x = gen_push_fpul ();
6341 else if (rn == FPSCR_REG)
6342 x = gen_push_fpscr ();
6343 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6344 && FP_OR_XD_REGISTER_P (rn))
6345 {
6346 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6347 return NULL_RTX;
6348 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6349 }
6350 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6351 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6352 else
6353 x = gen_push (gen_rtx_REG (SImode, rn));
6354
6355 x = frame_insn (x);
6356 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6357 return x;
6358 }
6359
6360 /* Output RTL to pop register RN from the stack. */
6361
6362 static void
6363 pop (int rn)
6364 {
6365 rtx x;
6366 if (rn == FPUL_REG)
6367 x = gen_pop_fpul ();
6368 else if (rn == FPSCR_REG)
6369 x = gen_pop_fpscr ();
6370 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6371 && FP_OR_XD_REGISTER_P (rn))
6372 {
6373 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6374 return;
6375 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6376 }
6377 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6378 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6379 else
6380 x = gen_pop (gen_rtx_REG (SImode, rn));
6381
6382 x = emit_insn (x);
6383 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6384 }
6385
6386 /* Generate code to push the regs specified in the mask. */
6387
6388 static void
6389 push_regs (HARD_REG_SET *mask, int interrupt_handler)
6390 {
6391 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
6392 int skip_fpscr = 0;
6393
6394 /* Push PR last; this gives better latencies after the prologue, and
6395 candidates for the return delay slot when there are no general
6396 registers pushed. */
6397 for (; i < FIRST_PSEUDO_REGISTER; i++)
6398 {
6399 /* If this is an interrupt handler, and the SZ bit varies,
6400 and we have to push any floating point register, we need
6401 to switch to the correct precision first. */
6402 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
6403 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
6404 {
6405 HARD_REG_SET unsaved;
6406
6407 push (FPSCR_REG);
6408 COMPL_HARD_REG_SET (unsaved, *mask);
6409 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
6410 skip_fpscr = 1;
6411 }
6412 if (i != PR_REG
6413 && (i != FPSCR_REG || ! skip_fpscr)
6414 && TEST_HARD_REG_BIT (*mask, i))
6415 {
6416 /* If the ISR has RESBANK attribute assigned, don't push any of
6417 the following registers - R0-R14, MACH, MACL and GBR. */
6418 if (! (sh_cfun_resbank_handler_p ()
6419 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
6420 || i == MACH_REG
6421 || i == MACL_REG
6422 || i == GBR_REG)))
6423 push (i);
6424 }
6425 }
6426
6427 /* Push banked registers last to improve delay slot opportunities. */
6428 if (interrupt_handler)
6429 {
6430 bool use_movml = false;
6431
6432 if (TARGET_SH2A)
6433 {
6434 unsigned int count = 0;
6435
6436 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6437 if (TEST_HARD_REG_BIT (*mask, i))
6438 count++;
6439 else
6440 break;
6441
6442 /* Use movml when all banked registers are pushed. */
6443 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
6444 use_movml = true;
6445 }
6446
6447 if (use_movml)
6448 {
6449 rtx x, mem, reg, set;
6450 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
6451
6452 /* We must avoid scheduling multiple store insn with another
6453 insns. */
6454 emit_insn (gen_blockage ());
6455 x = gen_movml_push_banked (sp_reg);
6456 x = frame_insn (x);
6457 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6458 {
6459 mem = gen_rtx_MEM (SImode, plus_constant (sp_reg, i * 4));
6460 reg = gen_rtx_REG (SImode, i);
6461 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (SImode, mem, reg));
6462 }
6463
6464 set = gen_rtx_SET (SImode, sp_reg, plus_constant (sp_reg, - 32));
6465 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
6466 emit_insn (gen_blockage ());
6467 }
6468 else
6469 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6470 if (TEST_HARD_REG_BIT (*mask, i))
6471 push (i);
6472 }
6473
6474 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
6475 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
6476 push (PR_REG);
6477 }
6478
6479 /* Calculate how much extra space is needed to save all callee-saved
6480 target registers.
6481 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6482
6483 static int
6484 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
6485 {
6486 int reg;
6487 int stack_space = 0;
6488 int interrupt_handler = sh_cfun_interrupt_handler_p ();
6489
6490 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6491 if ((! call_really_used_regs[reg] || interrupt_handler)
6492 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6493 /* Leave space to save this target register on the stack,
6494 in case target register allocation wants to use it. */
6495 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6496 return stack_space;
6497 }
6498
6499 /* Decide whether we should reserve space for callee-save target registers,
6500 in case target register allocation wants to use them. REGS_SAVED is
6501 the space, in bytes, that is already required for register saves.
6502 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6503
6504 static int
6505 shmedia_reserve_space_for_target_registers_p (int regs_saved,
6506 HARD_REG_SET *live_regs_mask)
6507 {
6508 if (optimize_size)
6509 return 0;
6510 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
6511 }
6512
6513 /* Decide how much space to reserve for callee-save target registers
6514 in case target register allocation wants to use them.
6515 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6516
6517 static int
6518 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
6519 {
6520 if (shmedia_space_reserved_for_target_registers)
6521 return shmedia_target_regs_stack_space (live_regs_mask);
6522 else
6523 return 0;
6524 }
6525
6526 /* Work out the registers which need to be saved, both as a mask and a
6527 count of saved words. Return the count.
6528
6529 If doing a pragma interrupt function, then push all regs used by the
6530 function, and if we call another function (we can tell by looking at PR),
6531 make sure that all the regs it clobbers are safe too. */
6532
6533 static int
6534 calc_live_regs (HARD_REG_SET *live_regs_mask)
6535 {
6536 unsigned int reg;
6537 int count;
6538 tree attrs;
6539 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
6540 bool nosave_low_regs;
6541 int pr_live, has_call;
6542
6543 attrs = DECL_ATTRIBUTES (current_function_decl);
6544 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
6545 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
6546 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
6547 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
6548
6549 CLEAR_HARD_REG_SET (*live_regs_mask);
6550 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
6551 && df_regs_ever_live_p (FPSCR_REG))
6552 target_flags &= ~MASK_FPU_SINGLE;
6553 /* If we can save a lot of saves by switching to double mode, do that. */
6554 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
6555 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
6556 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
6557 && (! call_really_used_regs[reg]
6558 || interrupt_handler)
6559 && ++count > 2)
6560 {
6561 target_flags &= ~MASK_FPU_SINGLE;
6562 break;
6563 }
6564 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
6565 knows how to use it. That means the pseudo originally allocated for
6566 the initial value can become the PR_MEDIA_REG hard register, as seen for
6567 execute/20010122-1.c:test9. */
6568 if (TARGET_SHMEDIA)
6569 /* ??? this function is called from initial_elimination_offset, hence we
6570 can't use the result of sh_media_register_for_return here. */
6571 pr_live = sh_pr_n_sets ();
6572 else
6573 {
6574 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
6575 pr_live = (pr_initial
6576 ? (!REG_P (pr_initial)
6577 || REGNO (pr_initial) != (PR_REG))
6578 : df_regs_ever_live_p (PR_REG));
6579 /* For Shcompact, if not optimizing, we end up with a memory reference
6580 using the return address pointer for __builtin_return_address even
6581 though there is no actual need to put the PR register on the stack. */
6582 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
6583 }
6584 /* Force PR to be live if the prologue has to call the SHmedia
6585 argument decoder or register saver. */
6586 if (TARGET_SHCOMPACT
6587 && ((crtl->args.info.call_cookie
6588 & ~ CALL_COOKIE_RET_TRAMP (1))
6589 || crtl->saves_all_registers))
6590 pr_live = 1;
6591 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
6592 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
6593 {
6594 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
6595 ? pr_live
6596 : interrupt_handler
6597 ? (/* Need to save all the regs ever live. */
6598 (df_regs_ever_live_p (reg)
6599 || (call_really_used_regs[reg]
6600 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
6601 || reg == PIC_OFFSET_TABLE_REGNUM)
6602 && has_call)
6603 || (TARGET_SHMEDIA && has_call
6604 && REGISTER_NATURAL_MODE (reg) == SImode
6605 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
6606 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
6607 && reg != RETURN_ADDRESS_POINTER_REGNUM
6608 && reg != T_REG && reg != GBR_REG
6609 /* Push fpscr only on targets which have FPU */
6610 && (reg != FPSCR_REG || TARGET_FPU_ANY))
6611 : (/* Only push those regs which are used and need to be saved. */
6612 (TARGET_SHCOMPACT
6613 && flag_pic
6614 && crtl->args.info.call_cookie
6615 && reg == PIC_OFFSET_TABLE_REGNUM)
6616 || (df_regs_ever_live_p (reg)
6617 && ((!call_really_used_regs[reg]
6618 && !(reg != PIC_OFFSET_TABLE_REGNUM
6619 && fixed_regs[reg] && call_used_regs[reg]))
6620 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
6621 || (crtl->calls_eh_return
6622 && (reg == EH_RETURN_DATA_REGNO (0)
6623 || reg == EH_RETURN_DATA_REGNO (1)
6624 || reg == EH_RETURN_DATA_REGNO (2)
6625 || reg == EH_RETURN_DATA_REGNO (3)))
6626 || ((reg == MACL_REG || reg == MACH_REG)
6627 && df_regs_ever_live_p (reg)
6628 && sh_cfun_attr_renesas_p ())
6629 ))
6630 {
6631 SET_HARD_REG_BIT (*live_regs_mask, reg);
6632 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6633
6634 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
6635 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
6636 {
6637 if (FP_REGISTER_P (reg))
6638 {
6639 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
6640 {
6641 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
6642 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
6643 }
6644 }
6645 else if (XD_REGISTER_P (reg))
6646 {
6647 /* Must switch to double mode to access these registers. */
6648 target_flags &= ~MASK_FPU_SINGLE;
6649 }
6650 }
6651 }
6652 if (nosave_low_regs && reg == R8_REG)
6653 break;
6654 }
6655 /* If we have a target register optimization pass after prologue / epilogue
6656 threading, we need to assume all target registers will be live even if
6657 they aren't now. */
6658 if (flag_branch_target_load_optimize2
6659 && TARGET_SAVE_ALL_TARGET_REGS
6660 && shmedia_space_reserved_for_target_registers)
6661 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6662 if ((! call_really_used_regs[reg] || interrupt_handler)
6663 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6664 {
6665 SET_HARD_REG_BIT (*live_regs_mask, reg);
6666 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6667 }
6668 /* If this is an interrupt handler, we don't have any call-clobbered
6669 registers we can conveniently use for target register save/restore.
6670 Make sure we save at least one general purpose register when we need
6671 to save target registers. */
6672 if (interrupt_handler
6673 && hard_reg_set_intersect_p (*live_regs_mask,
6674 reg_class_contents[TARGET_REGS])
6675 && ! hard_reg_set_intersect_p (*live_regs_mask,
6676 reg_class_contents[GENERAL_REGS]))
6677 {
6678 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
6679 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
6680 }
6681
6682 return count;
6683 }
6684
6685 /* Code to generate prologue and epilogue sequences */
6686
6687 /* PUSHED is the number of bytes that are being pushed on the
6688 stack for register saves. Return the frame size, padded
6689 appropriately so that the stack stays properly aligned. */
6690 static HOST_WIDE_INT
6691 rounded_frame_size (int pushed)
6692 {
6693 HOST_WIDE_INT size = get_frame_size ();
6694 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6695
6696 if (ACCUMULATE_OUTGOING_ARGS)
6697 size += crtl->outgoing_args_size;
6698
6699 return ((size + pushed + align - 1) & -align) - pushed;
6700 }
6701
6702 /* Choose a call-clobbered target-branch register that remains
6703 unchanged along the whole function. We set it up as the return
6704 value in the prologue. */
6705 int
6706 sh_media_register_for_return (void)
6707 {
6708 int regno;
6709 int tr0_used;
6710
6711 if (! current_function_is_leaf)
6712 return -1;
6713 if (lookup_attribute ("interrupt_handler",
6714 DECL_ATTRIBUTES (current_function_decl)))
6715 return -1;
6716 if (sh_cfun_interrupt_handler_p ())
6717 return -1;
6718
6719 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
6720
6721 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
6722 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
6723 return regno;
6724
6725 return -1;
6726 }
6727
6728 /* The maximum registers we need to save are:
6729 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
6730 - 32 floating point registers (for each pair, we save none,
6731 one single precision value, or a double precision value).
6732 - 8 target registers
6733 - add 1 entry for a delimiter. */
6734 #define MAX_SAVED_REGS (62+32+8)
6735
6736 typedef struct save_entry_s
6737 {
6738 unsigned char reg;
6739 unsigned char mode;
6740 short offset;
6741 } save_entry;
6742
6743 #define MAX_TEMPS 4
6744
6745 /* There will be a delimiter entry with VOIDmode both at the start and the
6746 end of a filled in schedule. The end delimiter has the offset of the
6747 save with the smallest (i.e. most negative) offset. */
6748 typedef struct save_schedule_s
6749 {
6750 save_entry entries[MAX_SAVED_REGS + 2];
6751 int temps[MAX_TEMPS+1];
6752 } save_schedule;
6753
6754 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
6755 use reverse order. Returns the last entry written to (not counting
6756 the delimiter). OFFSET_BASE is a number to be added to all offset
6757 entries. */
6758
6759 static save_entry *
6760 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
6761 int offset_base)
6762 {
6763 int align, i;
6764 save_entry *entry = schedule->entries;
6765 int tmpx = 0;
6766 int offset;
6767
6768 if (! current_function_interrupt)
6769 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
6770 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
6771 && ! FUNCTION_ARG_REGNO_P (i)
6772 && i != FIRST_RET_REG
6773 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
6774 && ! (crtl->calls_eh_return
6775 && (i == EH_RETURN_STACKADJ_REGNO
6776 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
6777 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
6778 schedule->temps[tmpx++] = i;
6779 entry->reg = -1;
6780 entry->mode = VOIDmode;
6781 entry->offset = offset_base;
6782 entry++;
6783 /* We loop twice: first, we save 8-byte aligned registers in the
6784 higher addresses, that are known to be aligned. Then, we
6785 proceed to saving 32-bit registers that don't need 8-byte
6786 alignment.
6787 If this is an interrupt function, all registers that need saving
6788 need to be saved in full. moreover, we need to postpone saving
6789 target registers till we have saved some general purpose registers
6790 we can then use as scratch registers. */
6791 offset = offset_base;
6792 for (align = 1; align >= 0; align--)
6793 {
6794 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6795 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6796 {
6797 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6798 int reg = i;
6799
6800 if (current_function_interrupt)
6801 {
6802 if (TARGET_REGISTER_P (i))
6803 continue;
6804 if (GENERAL_REGISTER_P (i))
6805 mode = DImode;
6806 }
6807 if (mode == SFmode && (i % 2) == 1
6808 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6809 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
6810 {
6811 mode = DFmode;
6812 i--;
6813 reg--;
6814 }
6815
6816 /* If we're doing the aligned pass and this is not aligned,
6817 or we're doing the unaligned pass and this is aligned,
6818 skip it. */
6819 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
6820 != align)
6821 continue;
6822
6823 if (current_function_interrupt
6824 && GENERAL_REGISTER_P (i)
6825 && tmpx < MAX_TEMPS)
6826 schedule->temps[tmpx++] = i;
6827
6828 offset -= GET_MODE_SIZE (mode);
6829 entry->reg = i;
6830 entry->mode = mode;
6831 entry->offset = offset;
6832 entry++;
6833 }
6834 if (align && current_function_interrupt)
6835 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
6836 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6837 {
6838 offset -= GET_MODE_SIZE (DImode);
6839 entry->reg = i;
6840 entry->mode = DImode;
6841 entry->offset = offset;
6842 entry++;
6843 }
6844 }
6845 entry->reg = -1;
6846 entry->mode = VOIDmode;
6847 entry->offset = offset;
6848 schedule->temps[tmpx] = -1;
6849 return entry - 1;
6850 }
6851
6852 void
6853 sh_expand_prologue (void)
6854 {
6855 HARD_REG_SET live_regs_mask;
6856 int d, i;
6857 int d_rounding = 0;
6858 int save_flags = target_flags;
6859 int pretend_args;
6860 int stack_usage;
6861 tree sp_switch_attr
6862 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
6863
6864 current_function_interrupt = sh_cfun_interrupt_handler_p ();
6865
6866 /* We have pretend args if we had an object sent partially in registers
6867 and partially on the stack, e.g. a large structure. */
6868 pretend_args = crtl->args.pretend_args_size;
6869 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
6870 && (NPARM_REGS(SImode)
6871 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
6872 pretend_args = 0;
6873
6874 output_stack_adjust (-pretend_args
6875 - crtl->args.info.stack_regs * 8,
6876 stack_pointer_rtx, 0, NULL, true);
6877 stack_usage = pretend_args + crtl->args.info.stack_regs * 8;
6878
6879 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
6880 /* We're going to use the PIC register to load the address of the
6881 incoming-argument decoder and/or of the return trampoline from
6882 the GOT, so make sure the PIC register is preserved and
6883 initialized. */
6884 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
6885
6886 if (TARGET_SHCOMPACT
6887 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6888 {
6889 int reg;
6890
6891 /* First, make all registers with incoming arguments that will
6892 be pushed onto the stack live, so that register renaming
6893 doesn't overwrite them. */
6894 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
6895 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
6896 >= NPARM_REGS (SImode) - reg)
6897 for (; reg < NPARM_REGS (SImode); reg++)
6898 emit_insn (gen_shcompact_preserve_incoming_args
6899 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6900 else if (CALL_COOKIE_INT_REG_GET
6901 (crtl->args.info.call_cookie, reg) == 1)
6902 emit_insn (gen_shcompact_preserve_incoming_args
6903 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6904
6905 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
6906 stack_pointer_rtx);
6907 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
6908 GEN_INT (crtl->args.info.call_cookie));
6909 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
6910 gen_rtx_REG (SImode, R0_REG));
6911 }
6912 else if (TARGET_SHMEDIA)
6913 {
6914 int tr = sh_media_register_for_return ();
6915
6916 if (tr >= 0)
6917 emit_move_insn (gen_rtx_REG (DImode, tr),
6918 gen_rtx_REG (DImode, PR_MEDIA_REG));
6919 }
6920
6921 /* Emit the code for SETUP_VARARGS. */
6922 if (cfun->stdarg)
6923 {
6924 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6925 {
6926 /* Push arg regs as if they'd been provided by caller in stack. */
6927 for (i = 0; i < NPARM_REGS(SImode); i++)
6928 {
6929 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
6930
6931 if (i >= (NPARM_REGS(SImode)
6932 - crtl->args.info.arg_count[(int) SH_ARG_INT]
6933 ))
6934 break;
6935 push (rn);
6936 stack_usage += GET_MODE_SIZE (SImode);
6937 }
6938 }
6939 }
6940
6941 /* If we're supposed to switch stacks at function entry, do so now. */
6942 if (sp_switch_attr)
6943 {
6944 rtx lab, newsrc;
6945 /* The argument specifies a variable holding the address of the
6946 stack the interrupt function should switch to/from at entry/exit. */
6947 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
6948 const char *s
6949 = ggc_strdup (TREE_STRING_POINTER (arg));
6950 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
6951
6952 lab = add_constant (sp_switch, SImode, 0);
6953 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6954 newsrc = gen_const_mem (SImode, newsrc);
6955
6956 emit_insn (gen_sp_switch_1 (newsrc));
6957 }
6958
6959 d = calc_live_regs (&live_regs_mask);
6960 /* ??? Maybe we could save some switching if we can move a mode switch
6961 that already happens to be at the function start into the prologue. */
6962 if (target_flags != save_flags && ! current_function_interrupt)
6963 emit_insn (gen_toggle_sz ());
6964
6965 if (TARGET_SH5)
6966 {
6967 int offset_base, offset;
6968 rtx r0 = NULL_RTX;
6969 int offset_in_r0 = -1;
6970 int sp_in_r0 = 0;
6971 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6972 int total_size, save_size;
6973 save_schedule schedule;
6974 save_entry *entry;
6975 int *tmp_pnt;
6976
6977 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
6978 && ! current_function_interrupt)
6979 r0 = gen_rtx_REG (Pmode, R0_REG);
6980
6981 /* D is the actual number of bytes that we need for saving registers,
6982 however, in initial_elimination_offset we have committed to using
6983 an additional TREGS_SPACE amount of bytes - in order to keep both
6984 addresses to arguments supplied by the caller and local variables
6985 valid, we must keep this gap. Place it between the incoming
6986 arguments and the actually saved registers in a bid to optimize
6987 locality of reference. */
6988 total_size = d + tregs_space;
6989 total_size += rounded_frame_size (total_size);
6990 save_size = total_size - rounded_frame_size (d);
6991 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
6992 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6993 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
6994
6995 /* If adjusting the stack in a single step costs nothing extra, do so.
6996 I.e. either if a single addi is enough, or we need a movi anyway,
6997 and we don't exceed the maximum offset range (the test for the
6998 latter is conservative for simplicity). */
6999 if (TARGET_SHMEDIA
7000 && (CONST_OK_FOR_I10 (-total_size)
7001 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
7002 && total_size <= 2044)))
7003 d_rounding = total_size - save_size;
7004
7005 offset_base = d + d_rounding;
7006
7007 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
7008 0, NULL, true);
7009 stack_usage += save_size + d_rounding;
7010
7011 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
7012 tmp_pnt = schedule.temps;
7013 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7014 {
7015 enum machine_mode mode = (enum machine_mode) entry->mode;
7016 unsigned int reg = entry->reg;
7017 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
7018 rtx orig_reg_rtx;
7019
7020 offset = entry->offset;
7021
7022 reg_rtx = gen_rtx_REG (mode, reg);
7023
7024 mem_rtx = gen_frame_mem (mode,
7025 gen_rtx_PLUS (Pmode,
7026 stack_pointer_rtx,
7027 GEN_INT (offset)));
7028
7029 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7030 {
7031 gcc_assert (r0);
7032 mem_rtx = NULL_RTX;
7033 }
7034
7035 if (HAVE_PRE_DECREMENT
7036 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
7037 || mem_rtx == NULL_RTX
7038 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7039 {
7040 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
7041
7042 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
7043 pre_dec = NULL_RTX;
7044 else
7045 {
7046 mem_rtx = NULL_RTX;
7047 offset += GET_MODE_SIZE (mode);
7048 }
7049 }
7050
7051 if (mem_rtx != NULL_RTX)
7052 goto addr_ok;
7053
7054 if (offset_in_r0 == -1)
7055 {
7056 emit_move_insn (r0, GEN_INT (offset));
7057 offset_in_r0 = offset;
7058 }
7059 else if (offset != offset_in_r0)
7060 {
7061 emit_move_insn (r0,
7062 gen_rtx_PLUS
7063 (Pmode, r0,
7064 GEN_INT (offset - offset_in_r0)));
7065 offset_in_r0 += offset - offset_in_r0;
7066 }
7067
7068 if (pre_dec != NULL_RTX)
7069 {
7070 if (! sp_in_r0)
7071 {
7072 emit_move_insn (r0,
7073 gen_rtx_PLUS
7074 (Pmode, r0, stack_pointer_rtx));
7075 sp_in_r0 = 1;
7076 }
7077
7078 offset -= GET_MODE_SIZE (mode);
7079 offset_in_r0 -= GET_MODE_SIZE (mode);
7080
7081 mem_rtx = pre_dec;
7082 }
7083 else if (sp_in_r0)
7084 mem_rtx = gen_frame_mem (mode, r0);
7085 else
7086 mem_rtx = gen_frame_mem (mode,
7087 gen_rtx_PLUS (Pmode,
7088 stack_pointer_rtx,
7089 r0));
7090
7091 /* We must not use an r0-based address for target-branch
7092 registers or for special registers without pre-dec
7093 memory addresses, since we store their values in r0
7094 first. */
7095 gcc_assert (!TARGET_REGISTER_P (reg)
7096 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7097 || mem_rtx == pre_dec));
7098
7099 addr_ok:
7100 orig_reg_rtx = reg_rtx;
7101 if (TARGET_REGISTER_P (reg)
7102 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7103 && mem_rtx != pre_dec))
7104 {
7105 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
7106
7107 emit_move_insn (tmp_reg, reg_rtx);
7108
7109 if (REGNO (tmp_reg) == R0_REG)
7110 {
7111 offset_in_r0 = -1;
7112 sp_in_r0 = 0;
7113 gcc_assert (!refers_to_regno_p
7114 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
7115 }
7116
7117 if (*++tmp_pnt <= 0)
7118 tmp_pnt = schedule.temps;
7119
7120 reg_rtx = tmp_reg;
7121 }
7122 {
7123 rtx insn;
7124
7125 /* Mark as interesting for dwarf cfi generator */
7126 insn = emit_move_insn (mem_rtx, reg_rtx);
7127 RTX_FRAME_RELATED_P (insn) = 1;
7128 /* If we use an intermediate register for the save, we can't
7129 describe this exactly in cfi as a copy of the to-be-saved
7130 register into the temporary register and then the temporary
7131 register on the stack, because the temporary register can
7132 have a different natural size than the to-be-saved register.
7133 Thus, we gloss over the intermediate copy and pretend we do
7134 a direct save from the to-be-saved register. */
7135 if (REGNO (reg_rtx) != reg)
7136 {
7137 rtx set;
7138
7139 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
7140 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7141 }
7142
7143 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
7144 {
7145 rtx reg_rtx = gen_rtx_REG (mode, reg);
7146 rtx set;
7147 rtx mem_rtx = gen_frame_mem (mode,
7148 gen_rtx_PLUS (Pmode,
7149 stack_pointer_rtx,
7150 GEN_INT (offset)));
7151
7152 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
7153 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7154 }
7155 }
7156 }
7157
7158 gcc_assert (entry->offset == d_rounding);
7159 }
7160 else
7161 {
7162 push_regs (&live_regs_mask, current_function_interrupt);
7163 stack_usage += d;
7164 }
7165
7166 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7167 emit_insn (gen_GOTaddr2picreg ());
7168
7169 if (SHMEDIA_REGS_STACK_ADJUST ())
7170 {
7171 /* This must NOT go through the PLT, otherwise mach and macl
7172 may be clobbered. */
7173 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7174 (TARGET_FPU_ANY
7175 ? "__GCC_push_shmedia_regs"
7176 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7177 emit_insn (gen_shmedia_save_restore_regs_compact
7178 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7179 }
7180
7181 if (target_flags != save_flags && ! current_function_interrupt)
7182 emit_insn (gen_toggle_sz ());
7183
7184 target_flags = save_flags;
7185
7186 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7187 stack_pointer_rtx, 0, NULL, true);
7188 stack_usage += rounded_frame_size (d) - d_rounding;
7189
7190 if (frame_pointer_needed)
7191 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7192
7193 if (TARGET_SHCOMPACT
7194 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7195 {
7196 /* This must NOT go through the PLT, otherwise mach and macl
7197 may be clobbered. */
7198 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7199 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7200 emit_insn (gen_shcompact_incoming_args ());
7201 }
7202
7203 if (flag_stack_usage_info)
7204 current_function_static_stack_size = stack_usage;
7205 }
7206
7207 void
7208 sh_expand_epilogue (bool sibcall_p)
7209 {
7210 HARD_REG_SET live_regs_mask;
7211 int d, i;
7212 int d_rounding = 0;
7213
7214 int save_flags = target_flags;
7215 int frame_size, save_size;
7216 int fpscr_deferred = 0;
7217 int e = sibcall_p ? -1 : 1;
7218
7219 d = calc_live_regs (&live_regs_mask);
7220
7221 save_size = d;
7222 frame_size = rounded_frame_size (d);
7223
7224 if (TARGET_SH5)
7225 {
7226 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7227 int total_size;
7228 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
7229 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7230 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
7231
7232 total_size = d + tregs_space;
7233 total_size += rounded_frame_size (total_size);
7234 save_size = total_size - frame_size;
7235
7236 /* If adjusting the stack in a single step costs nothing extra, do so.
7237 I.e. either if a single addi is enough, or we need a movi anyway,
7238 and we don't exceed the maximum offset range (the test for the
7239 latter is conservative for simplicity). */
7240 if (TARGET_SHMEDIA
7241 && ! frame_pointer_needed
7242 && (CONST_OK_FOR_I10 (total_size)
7243 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
7244 && total_size <= 2044)))
7245 d_rounding = frame_size;
7246
7247 frame_size -= d_rounding;
7248 }
7249
7250 if (frame_pointer_needed)
7251 {
7252 /* We must avoid scheduling the epilogue with previous basic blocks.
7253 See PR/18032 and PR/40313. */
7254 emit_insn (gen_blockage ());
7255 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7256 &live_regs_mask, false);
7257
7258 /* We must avoid moving the stack pointer adjustment past code
7259 which reads from the local frame, else an interrupt could
7260 occur after the SP adjustment and clobber data in the local
7261 frame. */
7262 emit_insn (gen_blockage ());
7263 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7264 }
7265 else if (frame_size)
7266 {
7267 /* We must avoid moving the stack pointer adjustment past code
7268 which reads from the local frame, else an interrupt could
7269 occur after the SP adjustment and clobber data in the local
7270 frame. */
7271 emit_insn (gen_blockage ());
7272 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7273 &live_regs_mask, false);
7274 }
7275
7276 if (SHMEDIA_REGS_STACK_ADJUST ())
7277 {
7278 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7279 (TARGET_FPU_ANY
7280 ? "__GCC_pop_shmedia_regs"
7281 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
7282 /* This must NOT go through the PLT, otherwise mach and macl
7283 may be clobbered. */
7284 emit_insn (gen_shmedia_save_restore_regs_compact
7285 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
7286 }
7287
7288 /* Pop all the registers. */
7289
7290 if (target_flags != save_flags && ! current_function_interrupt)
7291 emit_insn (gen_toggle_sz ());
7292 if (TARGET_SH5)
7293 {
7294 int offset_base, offset;
7295 int offset_in_r0 = -1;
7296 int sp_in_r0 = 0;
7297 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
7298 save_schedule schedule;
7299 save_entry *entry;
7300 int *tmp_pnt;
7301
7302 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
7303 offset_base = -entry[1].offset + d_rounding;
7304 tmp_pnt = schedule.temps;
7305 for (; entry->mode != VOIDmode; entry--)
7306 {
7307 enum machine_mode mode = (enum machine_mode) entry->mode;
7308 int reg = entry->reg;
7309 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX;
7310
7311 offset = offset_base + entry->offset;
7312 reg_rtx = gen_rtx_REG (mode, reg);
7313
7314 mem_rtx = gen_frame_mem (mode,
7315 gen_rtx_PLUS (Pmode,
7316 stack_pointer_rtx,
7317 GEN_INT (offset)));
7318
7319 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7320 mem_rtx = NULL_RTX;
7321
7322 if (HAVE_POST_INCREMENT
7323 && (offset == offset_in_r0
7324 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
7325 && mem_rtx == NULL_RTX)
7326 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7327 {
7328 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
7329
7330 if (!memory_address_p (mode, XEXP (post_inc, 0)))
7331 post_inc = NULL_RTX;
7332 else
7333 mem_rtx = NULL_RTX;
7334 }
7335
7336 if (mem_rtx != NULL_RTX)
7337 goto addr_ok;
7338
7339 if (offset_in_r0 == -1)
7340 {
7341 emit_move_insn (r0, GEN_INT (offset));
7342 offset_in_r0 = offset;
7343 }
7344 else if (offset != offset_in_r0)
7345 {
7346 emit_move_insn (r0,
7347 gen_rtx_PLUS
7348 (Pmode, r0,
7349 GEN_INT (offset - offset_in_r0)));
7350 offset_in_r0 += offset - offset_in_r0;
7351 }
7352
7353 if (post_inc != NULL_RTX)
7354 {
7355 if (! sp_in_r0)
7356 {
7357 emit_move_insn (r0,
7358 gen_rtx_PLUS
7359 (Pmode, r0, stack_pointer_rtx));
7360 sp_in_r0 = 1;
7361 }
7362
7363 mem_rtx = post_inc;
7364
7365 offset_in_r0 += GET_MODE_SIZE (mode);
7366 }
7367 else if (sp_in_r0)
7368 mem_rtx = gen_frame_mem (mode, r0);
7369 else
7370 mem_rtx = gen_frame_mem (mode,
7371 gen_rtx_PLUS (Pmode,
7372 stack_pointer_rtx,
7373 r0));
7374
7375 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7376 || mem_rtx == post_inc);
7377
7378 addr_ok:
7379 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7380 && mem_rtx != post_inc)
7381 {
7382 emit_move_insn (r0, mem_rtx);
7383 mem_rtx = r0;
7384 }
7385 else if (TARGET_REGISTER_P (reg))
7386 {
7387 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
7388
7389 /* Give the scheduler a bit of freedom by using up to
7390 MAX_TEMPS registers in a round-robin fashion. */
7391 emit_move_insn (tmp_reg, mem_rtx);
7392 mem_rtx = tmp_reg;
7393 if (*++tmp_pnt < 0)
7394 tmp_pnt = schedule.temps;
7395 }
7396
7397 emit_move_insn (reg_rtx, mem_rtx);
7398 }
7399
7400 gcc_assert (entry->offset + offset_base == d + d_rounding);
7401 }
7402 else /* ! TARGET_SH5 */
7403 {
7404 int last_reg;
7405
7406 save_size = 0;
7407 /* For an ISR with RESBANK attribute assigned, don't pop PR
7408 register. */
7409 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
7410 && !sh_cfun_resbank_handler_p ())
7411 {
7412 if (!frame_pointer_needed)
7413 emit_insn (gen_blockage ());
7414 pop (PR_REG);
7415 }
7416
7417 /* Banked registers are popped first to avoid being scheduled in the
7418 delay slot. RTE switches banks before the ds instruction. */
7419 if (current_function_interrupt)
7420 {
7421 bool use_movml = false;
7422
7423 if (TARGET_SH2A)
7424 {
7425 unsigned int count = 0;
7426
7427 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7428 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7429 count++;
7430 else
7431 break;
7432
7433 /* Use movml when all banked register are poped. */
7434 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7435 use_movml = true;
7436 }
7437
7438 if (use_movml)
7439 {
7440 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7441
7442 /* We must avoid scheduling multiple load insn with another
7443 insns. */
7444 emit_insn (gen_blockage ());
7445 emit_insn (gen_movml_pop_banked (sp_reg));
7446 emit_insn (gen_blockage ());
7447 }
7448 else
7449 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
7450 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7451 pop (i);
7452
7453 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
7454 }
7455 else
7456 last_reg = FIRST_PSEUDO_REGISTER;
7457
7458 for (i = 0; i < last_reg; i++)
7459 {
7460 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
7461
7462 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
7463 && hard_reg_set_intersect_p (live_regs_mask,
7464 reg_class_contents[DF_REGS]))
7465 fpscr_deferred = 1;
7466 /* For an ISR with RESBANK attribute assigned, don't pop
7467 following registers, R0-R14, MACH, MACL and GBR. */
7468 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
7469 && ! (sh_cfun_resbank_handler_p ()
7470 && ((j >= FIRST_GENERAL_REG
7471 && j < LAST_GENERAL_REG)
7472 || j == MACH_REG
7473 || j == MACL_REG
7474 || j == GBR_REG)))
7475 pop (j);
7476
7477 if (j == FIRST_FP_REG && fpscr_deferred)
7478 pop (FPSCR_REG);
7479 }
7480 }
7481 if (target_flags != save_flags && ! current_function_interrupt)
7482 emit_insn (gen_toggle_sz ());
7483 target_flags = save_flags;
7484
7485 output_stack_adjust (crtl->args.pretend_args_size
7486 + save_size + d_rounding
7487 + crtl->args.info.stack_regs * 8,
7488 stack_pointer_rtx, e, NULL, false);
7489
7490 if (crtl->calls_eh_return)
7491 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
7492 EH_RETURN_STACKADJ_RTX));
7493
7494 /* Switch back to the normal stack if necessary. */
7495 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
7496 emit_insn (gen_sp_switch_2 ());
7497
7498 /* Tell flow the insn that pops PR isn't dead. */
7499 /* PR_REG will never be live in SHmedia mode, and we don't need to
7500 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
7501 by the return pattern. */
7502 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7503 emit_use (gen_rtx_REG (SImode, PR_REG));
7504 }
7505
7506 static int sh_need_epilogue_known = 0;
7507
7508 int
7509 sh_need_epilogue (void)
7510 {
7511 if (! sh_need_epilogue_known)
7512 {
7513 rtx epilogue;
7514
7515 start_sequence ();
7516 sh_expand_epilogue (0);
7517 epilogue = get_insns ();
7518 end_sequence ();
7519 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
7520 }
7521 return sh_need_epilogue_known > 0;
7522 }
7523
7524 /* Emit code to change the current function's return address to RA.
7525 TEMP is available as a scratch register, if needed. */
7526
7527 void
7528 sh_set_return_address (rtx ra, rtx tmp)
7529 {
7530 HARD_REG_SET live_regs_mask;
7531 int d;
7532 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7533 int pr_offset;
7534
7535 d = calc_live_regs (&live_regs_mask);
7536
7537 /* If pr_reg isn't life, we can set it (or the register given in
7538 sh_media_register_for_return) directly. */
7539 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7540 {
7541 rtx rr;
7542
7543 if (TARGET_SHMEDIA)
7544 {
7545 int rr_regno = sh_media_register_for_return ();
7546
7547 if (rr_regno < 0)
7548 rr_regno = pr_reg;
7549
7550 rr = gen_rtx_REG (DImode, rr_regno);
7551 }
7552 else
7553 rr = gen_rtx_REG (SImode, pr_reg);
7554
7555 emit_insn (GEN_MOV (rr, ra));
7556 /* Tell flow the register for return isn't dead. */
7557 emit_use (rr);
7558 return;
7559 }
7560
7561 if (TARGET_SH5)
7562 {
7563 int offset;
7564 save_schedule schedule;
7565 save_entry *entry;
7566
7567 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
7568 offset = entry[1].offset;
7569 for (; entry->mode != VOIDmode; entry--)
7570 if (entry->reg == pr_reg)
7571 goto found;
7572
7573 /* We can't find pr register. */
7574 gcc_unreachable ();
7575
7576 found:
7577 offset = entry->offset - offset;
7578 pr_offset = (rounded_frame_size (d) + offset
7579 + SHMEDIA_REGS_STACK_ADJUST ());
7580 }
7581 else
7582 pr_offset = rounded_frame_size (d);
7583
7584 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
7585
7586 if (frame_pointer_needed)
7587 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
7588 else
7589 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
7590
7591 tmp = gen_frame_mem (Pmode, tmp);
7592 emit_insn (GEN_MOV (tmp, ra));
7593 /* Tell this store isn't dead. */
7594 emit_use (tmp);
7595 }
7596
7597 /* Clear variables at function end. */
7598
7599 static void
7600 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
7601 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
7602 {
7603 sh_need_epilogue_known = 0;
7604 }
7605
7606 static rtx
7607 sh_builtin_saveregs (void)
7608 {
7609 /* First unnamed integer register. */
7610 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
7611 /* Number of integer registers we need to save. */
7612 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
7613 /* First unnamed SFmode float reg */
7614 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
7615 /* Number of SFmode float regs to save. */
7616 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
7617 rtx regbuf, fpregs;
7618 int bufsize, regno;
7619 alias_set_type alias_set;
7620
7621 if (TARGET_SH5)
7622 {
7623 if (n_intregs)
7624 {
7625 int pushregs = n_intregs;
7626
7627 while (pushregs < NPARM_REGS (SImode) - 1
7628 && (CALL_COOKIE_INT_REG_GET
7629 (crtl->args.info.call_cookie,
7630 NPARM_REGS (SImode) - pushregs)
7631 == 1))
7632 {
7633 crtl->args.info.call_cookie
7634 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7635 - pushregs, 1);
7636 pushregs++;
7637 }
7638
7639 if (pushregs == NPARM_REGS (SImode))
7640 crtl->args.info.call_cookie
7641 |= (CALL_COOKIE_INT_REG (0, 1)
7642 | CALL_COOKIE_STACKSEQ (pushregs - 1));
7643 else
7644 crtl->args.info.call_cookie
7645 |= CALL_COOKIE_STACKSEQ (pushregs);
7646
7647 crtl->args.pretend_args_size += 8 * n_intregs;
7648 }
7649 if (TARGET_SHCOMPACT)
7650 return const0_rtx;
7651 }
7652
7653 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
7654 {
7655 error ("__builtin_saveregs not supported by this subtarget");
7656 return const0_rtx;
7657 }
7658
7659 if (TARGET_SHMEDIA)
7660 n_floatregs = 0;
7661
7662 /* Allocate block of memory for the regs. */
7663 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
7664 Or can assign_stack_local accept a 0 SIZE argument? */
7665 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
7666
7667 if (TARGET_SHMEDIA)
7668 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
7669 else if (n_floatregs & 1)
7670 {
7671 rtx addr;
7672
7673 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7674 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
7675 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
7676 regbuf = change_address (regbuf, BLKmode, addr);
7677 }
7678 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
7679 {
7680 rtx addr, mask;
7681
7682 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7683 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
7684 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
7685 emit_insn (gen_andsi3 (addr, addr, mask));
7686 regbuf = change_address (regbuf, BLKmode, addr);
7687 }
7688 else
7689 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7690 alias_set = get_varargs_alias_set ();
7691 set_mem_alias_set (regbuf, alias_set);
7692
7693 /* Save int args.
7694 This is optimized to only save the regs that are necessary. Explicitly
7695 named args need not be saved. */
7696 if (n_intregs > 0)
7697 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7698 adjust_address (regbuf, BLKmode,
7699 n_floatregs * UNITS_PER_WORD),
7700 n_intregs);
7701
7702 if (TARGET_SHMEDIA)
7703 /* Return the address of the regbuf. */
7704 return XEXP (regbuf, 0);
7705
7706 /* Save float args.
7707 This is optimized to only save the regs that are necessary. Explicitly
7708 named args need not be saved.
7709 We explicitly build a pointer to the buffer because it halves the insn
7710 count when not optimizing (otherwise the pointer is built for each reg
7711 saved).
7712 We emit the moves in reverse order so that we can use predecrement. */
7713
7714 fpregs = copy_to_mode_reg (Pmode,
7715 plus_constant (XEXP (regbuf, 0),
7716 n_floatregs * UNITS_PER_WORD));
7717 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7718 {
7719 rtx mem;
7720 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7721 {
7722 emit_insn (gen_addsi3 (fpregs, fpregs,
7723 GEN_INT (-2 * UNITS_PER_WORD)));
7724 mem = change_address (regbuf, DFmode, fpregs);
7725 emit_move_insn (mem,
7726 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7727 }
7728 regno = first_floatreg;
7729 if (regno & 1)
7730 {
7731 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7732 mem = change_address (regbuf, SFmode, fpregs);
7733 emit_move_insn (mem,
7734 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
7735 - (TARGET_LITTLE_ENDIAN != 0)));
7736 }
7737 }
7738 else
7739 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7740 {
7741 rtx mem;
7742
7743 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7744 mem = change_address (regbuf, SFmode, fpregs);
7745 emit_move_insn (mem,
7746 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7747 }
7748
7749 /* Return the address of the regbuf. */
7750 return XEXP (regbuf, 0);
7751 }
7752
7753 /* Define the `__builtin_va_list' type for the ABI. */
7754
7755 static tree
7756 sh_build_builtin_va_list (void)
7757 {
7758 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7759 tree record, type_decl;
7760
7761 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
7762 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7763 return ptr_type_node;
7764
7765 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7766 type_decl = build_decl (BUILTINS_LOCATION,
7767 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7768
7769 f_next_o = build_decl (BUILTINS_LOCATION,
7770 FIELD_DECL, get_identifier ("__va_next_o"),
7771 ptr_type_node);
7772 f_next_o_limit = build_decl (BUILTINS_LOCATION,
7773 FIELD_DECL,
7774 get_identifier ("__va_next_o_limit"),
7775 ptr_type_node);
7776 f_next_fp = build_decl (BUILTINS_LOCATION,
7777 FIELD_DECL, get_identifier ("__va_next_fp"),
7778 ptr_type_node);
7779 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
7780 FIELD_DECL,
7781 get_identifier ("__va_next_fp_limit"),
7782 ptr_type_node);
7783 f_next_stack = build_decl (BUILTINS_LOCATION,
7784 FIELD_DECL, get_identifier ("__va_next_stack"),
7785 ptr_type_node);
7786
7787 DECL_FIELD_CONTEXT (f_next_o) = record;
7788 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7789 DECL_FIELD_CONTEXT (f_next_fp) = record;
7790 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7791 DECL_FIELD_CONTEXT (f_next_stack) = record;
7792
7793 TYPE_STUB_DECL (record) = type_decl;
7794 TYPE_NAME (record) = type_decl;
7795 TYPE_FIELDS (record) = f_next_o;
7796 DECL_CHAIN (f_next_o) = f_next_o_limit;
7797 DECL_CHAIN (f_next_o_limit) = f_next_fp;
7798 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
7799 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
7800
7801 layout_type (record);
7802
7803 return record;
7804 }
7805
7806 /* Implement `va_start' for varargs and stdarg. */
7807
7808 static void
7809 sh_va_start (tree valist, rtx nextarg)
7810 {
7811 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7812 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7813 tree t, u;
7814 int nfp, nint;
7815
7816 if (TARGET_SH5)
7817 {
7818 expand_builtin_saveregs ();
7819 std_expand_builtin_va_start (valist, nextarg);
7820 return;
7821 }
7822
7823 if ((! TARGET_SH2E && ! TARGET_SH4)
7824 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7825 {
7826 std_expand_builtin_va_start (valist, nextarg);
7827 return;
7828 }
7829
7830 f_next_o = TYPE_FIELDS (va_list_type_node);
7831 f_next_o_limit = DECL_CHAIN (f_next_o);
7832 f_next_fp = DECL_CHAIN (f_next_o_limit);
7833 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7834 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7835
7836 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7837 NULL_TREE);
7838 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7839 valist, f_next_o_limit, NULL_TREE);
7840 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7841 NULL_TREE);
7842 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7843 valist, f_next_fp_limit, NULL_TREE);
7844 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7845 valist, f_next_stack, NULL_TREE);
7846
7847 /* Call __builtin_saveregs. */
7848 u = make_tree (sizetype, expand_builtin_saveregs ());
7849 u = fold_convert (ptr_type_node, u);
7850 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
7851 TREE_SIDE_EFFECTS (t) = 1;
7852 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7853
7854 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7855 if (nfp < 8)
7856 nfp = 8 - nfp;
7857 else
7858 nfp = 0;
7859 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nfp);
7860 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
7861 TREE_SIDE_EFFECTS (t) = 1;
7862 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7863
7864 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
7865 TREE_SIDE_EFFECTS (t) = 1;
7866 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7867
7868 nint = crtl->args.info.arg_count[SH_ARG_INT];
7869 if (nint < 4)
7870 nint = 4 - nint;
7871 else
7872 nint = 0;
7873 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nint);
7874 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
7875 TREE_SIDE_EFFECTS (t) = 1;
7876 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7877
7878 u = make_tree (ptr_type_node, nextarg);
7879 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
7880 TREE_SIDE_EFFECTS (t) = 1;
7881 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7882 }
7883
7884 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7885 member, return it. */
7886 static tree
7887 find_sole_member (tree type)
7888 {
7889 tree field, member = NULL_TREE;
7890
7891 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7892 {
7893 if (TREE_CODE (field) != FIELD_DECL)
7894 continue;
7895 if (!DECL_SIZE (field))
7896 return NULL_TREE;
7897 if (integer_zerop (DECL_SIZE (field)))
7898 continue;
7899 if (member)
7900 return NULL_TREE;
7901 member = field;
7902 }
7903 return member;
7904 }
7905 /* Implement `va_arg'. */
7906
7907 static tree
7908 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7909 gimple_seq *post_p ATTRIBUTE_UNUSED)
7910 {
7911 HOST_WIDE_INT size, rsize;
7912 tree tmp, pptr_type_node;
7913 tree addr, lab_over = NULL, result = NULL;
7914 bool pass_by_ref;
7915 tree eff_type;
7916
7917 if (!VOID_TYPE_P (type))
7918 pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7919 else
7920 pass_by_ref = false;
7921
7922 if (pass_by_ref)
7923 type = build_pointer_type (type);
7924
7925 size = int_size_in_bytes (type);
7926 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7927 pptr_type_node = build_pointer_type (ptr_type_node);
7928
7929 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
7930 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7931 {
7932 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7933 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7934 int pass_as_float;
7935 tree lab_false;
7936 tree member;
7937
7938 f_next_o = TYPE_FIELDS (va_list_type_node);
7939 f_next_o_limit = DECL_CHAIN (f_next_o);
7940 f_next_fp = DECL_CHAIN (f_next_o_limit);
7941 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7942 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7943
7944 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7945 NULL_TREE);
7946 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7947 valist, f_next_o_limit, NULL_TREE);
7948 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7949 valist, f_next_fp, NULL_TREE);
7950 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7951 valist, f_next_fp_limit, NULL_TREE);
7952 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7953 valist, f_next_stack, NULL_TREE);
7954
7955 /* Structures with a single member with a distinct mode are passed
7956 like their member. This is relevant if the latter has a REAL_TYPE
7957 or COMPLEX_TYPE type. */
7958 eff_type = type;
7959 while (TREE_CODE (eff_type) == RECORD_TYPE
7960 && (member = find_sole_member (eff_type))
7961 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7962 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7963 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7964 {
7965 tree field_type = TREE_TYPE (member);
7966
7967 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7968 eff_type = field_type;
7969 else
7970 {
7971 gcc_assert ((TYPE_ALIGN (eff_type)
7972 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7973 || (TYPE_ALIGN (eff_type)
7974 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7975 break;
7976 }
7977 }
7978
7979 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7980 {
7981 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7982 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7983 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7984 && size <= 16));
7985 }
7986 else
7987 {
7988 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7989 }
7990
7991 addr = create_tmp_var (pptr_type_node, NULL);
7992 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7993 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7994
7995 valist = build_simple_mem_ref (addr);
7996
7997 if (pass_as_float)
7998 {
7999 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
8000 tree cmp;
8001 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
8002
8003 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
8004 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8005
8006 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
8007 tmp = next_fp_limit;
8008 if (size > 4 && !is_double)
8009 tmp = fold_build_pointer_plus_hwi (unshare_expr (tmp), 4 - size);
8010 tmp = build2 (GE_EXPR, boolean_type_node,
8011 unshare_expr (next_fp_tmp), unshare_expr (tmp));
8012 cmp = build3 (COND_EXPR, void_type_node, tmp,
8013 build1 (GOTO_EXPR, void_type_node,
8014 unshare_expr (lab_false)), NULL_TREE);
8015 if (!is_double)
8016 gimplify_and_add (cmp, pre_p);
8017
8018 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
8019 || (is_double || size == 16))
8020 {
8021 tmp = fold_convert (sizetype, next_fp_tmp);
8022 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
8023 size_int (UNITS_PER_WORD));
8024 tmp = fold_build_pointer_plus (unshare_expr (next_fp_tmp), tmp);
8025 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
8026 }
8027 if (is_double)
8028 gimplify_and_add (cmp, pre_p);
8029
8030 #ifdef FUNCTION_ARG_SCmode_WART
8031 if (TYPE_MODE (eff_type) == SCmode
8032 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
8033 {
8034 tree subtype = TREE_TYPE (eff_type);
8035 tree real, imag;
8036
8037 imag
8038 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8039 imag = get_initialized_tmp_var (imag, pre_p, NULL);
8040
8041 real
8042 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8043 real = get_initialized_tmp_var (real, pre_p, NULL);
8044
8045 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
8046 if (type != eff_type)
8047 result = build1 (VIEW_CONVERT_EXPR, type, result);
8048 result = get_initialized_tmp_var (result, pre_p, NULL);
8049 }
8050 #endif /* FUNCTION_ARG_SCmode_WART */
8051
8052 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8053 gimplify_and_add (tmp, pre_p);
8054
8055 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8056 gimplify_and_add (tmp, pre_p);
8057
8058 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8059 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8060 gimplify_assign (unshare_expr (next_fp_tmp),
8061 unshare_expr (valist), pre_p);
8062
8063 gimplify_assign (unshare_expr (valist),
8064 unshare_expr (next_fp_tmp), post_p);
8065 valist = next_fp_tmp;
8066 }
8067 else
8068 {
8069 tmp = fold_build_pointer_plus_hwi (unshare_expr (next_o), rsize);
8070 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
8071 unshare_expr (next_o_limit));
8072 tmp = build3 (COND_EXPR, void_type_node, tmp,
8073 build1 (GOTO_EXPR, void_type_node,
8074 unshare_expr (lab_false)),
8075 NULL_TREE);
8076 gimplify_and_add (tmp, pre_p);
8077
8078 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
8079 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8080
8081 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8082 gimplify_and_add (tmp, pre_p);
8083
8084 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8085 gimplify_and_add (tmp, pre_p);
8086
8087 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
8088 gimplify_assign (unshare_expr (next_o),
8089 unshare_expr (next_o_limit), pre_p);
8090
8091 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8092 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8093 }
8094
8095 if (!result)
8096 {
8097 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8098 gimplify_and_add (tmp, pre_p);
8099 }
8100 }
8101
8102 /* ??? In va-sh.h, there had been code to make values larger than
8103 size 8 indirect. This does not match the FUNCTION_ARG macros. */
8104
8105 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
8106 if (result)
8107 {
8108 gimplify_assign (result, tmp, pre_p);
8109 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
8110 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8111 gimplify_and_add (tmp, pre_p);
8112 }
8113 else
8114 result = tmp;
8115
8116 if (pass_by_ref)
8117 result = build_va_arg_indirect_ref (result);
8118
8119 return result;
8120 }
8121
8122 /* 64 bit floating points memory transfers are paired single precision loads
8123 or store. So DWARF information needs fixing in little endian (unless
8124 PR=SZ=1 in FPSCR). */
8125 rtx
8126 sh_dwarf_register_span (rtx reg)
8127 {
8128 unsigned regno = REGNO (reg);
8129
8130 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
8131 return NULL_RTX;
8132
8133 return
8134 gen_rtx_PARALLEL (VOIDmode,
8135 gen_rtvec (2,
8136 gen_rtx_REG (SFmode,
8137 DBX_REGISTER_NUMBER (regno+1)),
8138 gen_rtx_REG (SFmode,
8139 DBX_REGISTER_NUMBER (regno))));
8140 }
8141
8142 static enum machine_mode
8143 sh_promote_function_mode (const_tree type, enum machine_mode mode,
8144 int *punsignedp, const_tree funtype,
8145 int for_return)
8146 {
8147 if (sh_promote_prototypes (funtype))
8148 return promote_mode (type, mode, punsignedp);
8149 else
8150 return default_promote_function_mode (type, mode, punsignedp, funtype,
8151 for_return);
8152 }
8153
8154 static bool
8155 sh_promote_prototypes (const_tree type)
8156 {
8157 if (TARGET_HITACHI)
8158 return 0;
8159 if (! type)
8160 return 1;
8161 return ! sh_attr_renesas_p (type);
8162 }
8163
8164 /* Whether an argument must be passed by reference. On SHcompact, we
8165 pretend arguments wider than 32-bits that would have been passed in
8166 registers are passed by reference, so that an SHmedia trampoline
8167 loads them into the full 64-bits registers. */
8168
8169 static int
8170 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
8171 const_tree type, bool named)
8172 {
8173 unsigned HOST_WIDE_INT size;
8174
8175 if (type)
8176 size = int_size_in_bytes (type);
8177 else
8178 size = GET_MODE_SIZE (mode);
8179
8180 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
8181 && (!named
8182 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
8183 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
8184 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
8185 && size > 4
8186 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
8187 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8188 return size;
8189 else
8190 return 0;
8191 }
8192
8193 static bool
8194 sh_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
8195 const_tree type, bool named)
8196 {
8197 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8198
8199 if (targetm.calls.must_pass_in_stack (mode, type))
8200 return true;
8201
8202 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
8203 wants to know about pass-by-reference semantics for incoming
8204 arguments. */
8205 if (! cum)
8206 return false;
8207
8208 if (TARGET_SHCOMPACT)
8209 {
8210 cum->byref = shcompact_byref (cum, mode, type, named);
8211 return cum->byref != 0;
8212 }
8213
8214 return false;
8215 }
8216
8217 static bool
8218 sh_callee_copies (cumulative_args_t cum, enum machine_mode mode,
8219 const_tree type, bool named ATTRIBUTE_UNUSED)
8220 {
8221 /* ??? How can it possibly be correct to return true only on the
8222 caller side of the equation? Is there someplace else in the
8223 sh backend that's magically producing the copies? */
8224 return (get_cumulative_args (cum)->outgoing
8225 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
8226 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
8227 }
8228
8229 static int
8230 sh_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
8231 tree type, bool named ATTRIBUTE_UNUSED)
8232 {
8233 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8234 int words = 0;
8235
8236 if (!TARGET_SH5
8237 && PASS_IN_REG_P (*cum, mode, type)
8238 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
8239 && (ROUND_REG (*cum, mode)
8240 + (mode != BLKmode
8241 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
8242 : ROUND_ADVANCE (int_size_in_bytes (type)))
8243 > NPARM_REGS (mode)))
8244 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
8245
8246 else if (!TARGET_SHCOMPACT
8247 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8248 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
8249
8250 return words * UNITS_PER_WORD;
8251 }
8252
8253
8254 /* Define where to put the arguments to a function.
8255 Value is zero to push the argument on the stack,
8256 or a hard register in which to store the argument.
8257
8258 MODE is the argument's machine mode.
8259 TYPE is the data type of the argument (as a tree).
8260 This is null for libcalls where that information may
8261 not be available.
8262 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8263 the preceding args and about the function being called.
8264 NAMED is nonzero if this argument is a named parameter
8265 (otherwise it is an extra parameter matching an ellipsis).
8266
8267 On SH the first args are normally in registers
8268 and the rest are pushed. Any arg that starts within the first
8269 NPARM_REGS words is at least partially passed in a register unless
8270 its data type forbids. */
8271
8272 static rtx
8273 sh_function_arg (cumulative_args_t ca_v, enum machine_mode mode,
8274 const_tree type, bool named)
8275 {
8276 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8277
8278 if (! TARGET_SH5 && mode == VOIDmode)
8279 return GEN_INT (ca->renesas_abi ? 1 : 0);
8280
8281 if (! TARGET_SH5
8282 && PASS_IN_REG_P (*ca, mode, type)
8283 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8284 {
8285 int regno;
8286
8287 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8288 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
8289 {
8290 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8291 gen_rtx_REG (SFmode,
8292 BASE_ARG_REG (mode)
8293 + (ROUND_REG (*ca, mode) ^ 1)),
8294 const0_rtx);
8295 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8296 gen_rtx_REG (SFmode,
8297 BASE_ARG_REG (mode)
8298 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
8299 GEN_INT (4));
8300 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8301 }
8302
8303 /* If the alignment of a DF value causes an SF register to be
8304 skipped, we will use that skipped register for the next SF
8305 value. */
8306 if ((TARGET_HITACHI || ca->renesas_abi)
8307 && ca->free_single_fp_reg
8308 && mode == SFmode)
8309 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8310
8311 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
8312 ^ (mode == SFmode && TARGET_SH4
8313 && TARGET_LITTLE_ENDIAN != 0
8314 && ! TARGET_HITACHI && ! ca->renesas_abi);
8315 return gen_rtx_REG (mode, regno);
8316
8317 }
8318
8319 if (TARGET_SH5)
8320 {
8321 if (mode == VOIDmode && TARGET_SHCOMPACT)
8322 return GEN_INT (ca->call_cookie);
8323
8324 /* The following test assumes unnamed arguments are promoted to
8325 DFmode. */
8326 if (mode == SFmode && ca->free_single_fp_reg)
8327 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
8328
8329 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
8330 && (named || ! ca->prototype_p)
8331 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
8332 {
8333 if (! ca->prototype_p && TARGET_SHMEDIA)
8334 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
8335
8336 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
8337 FIRST_FP_PARM_REG
8338 + ca->arg_count[(int) SH_ARG_FLOAT]);
8339 }
8340
8341 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
8342 && (! TARGET_SHCOMPACT
8343 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
8344 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
8345 type, named))))
8346 {
8347 return gen_rtx_REG (mode, (FIRST_PARM_REG
8348 + ca->arg_count[(int) SH_ARG_INT]));
8349 }
8350
8351 return 0;
8352 }
8353
8354 return 0;
8355 }
8356
8357 /* Update the data in CUM to advance over an argument
8358 of mode MODE and data type TYPE.
8359 (TYPE is null for libcalls where that information may not be
8360 available.) */
8361
8362 static void
8363 sh_function_arg_advance (cumulative_args_t ca_v, enum machine_mode mode,
8364 const_tree type, bool named)
8365 {
8366 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8367
8368 if (ca->force_mem)
8369 ca->force_mem = 0;
8370 else if (TARGET_SH5)
8371 {
8372 const_tree type2 = (ca->byref && type
8373 ? TREE_TYPE (type)
8374 : type);
8375 enum machine_mode mode2 = (ca->byref && type
8376 ? TYPE_MODE (type2)
8377 : mode);
8378 int dwords = ((ca->byref
8379 ? ca->byref
8380 : mode2 == BLKmode
8381 ? int_size_in_bytes (type2)
8382 : GET_MODE_SIZE (mode2)) + 7) / 8;
8383 int numregs = MIN (dwords, NPARM_REGS (SImode)
8384 - ca->arg_count[(int) SH_ARG_INT]);
8385
8386 if (numregs)
8387 {
8388 ca->arg_count[(int) SH_ARG_INT] += numregs;
8389 if (TARGET_SHCOMPACT
8390 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
8391 {
8392 ca->call_cookie
8393 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8394 - numregs, 1);
8395 /* N.B. We want this also for outgoing. */
8396 ca->stack_regs += numregs;
8397 }
8398 else if (ca->byref)
8399 {
8400 if (! ca->outgoing)
8401 ca->stack_regs += numregs;
8402 ca->byref_regs += numregs;
8403 ca->byref = 0;
8404 do
8405 ca->call_cookie
8406 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8407 - numregs, 2);
8408 while (--numregs);
8409 ca->call_cookie
8410 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8411 - 1, 1);
8412 }
8413 else if (dwords > numregs)
8414 {
8415 int pushregs = numregs;
8416
8417 if (TARGET_SHCOMPACT)
8418 ca->stack_regs += numregs;
8419 while (pushregs < NPARM_REGS (SImode) - 1
8420 && (CALL_COOKIE_INT_REG_GET
8421 (ca->call_cookie,
8422 NPARM_REGS (SImode) - pushregs)
8423 == 1))
8424 {
8425 ca->call_cookie
8426 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8427 - pushregs, 1);
8428 pushregs++;
8429 }
8430 if (numregs == NPARM_REGS (SImode))
8431 ca->call_cookie
8432 |= CALL_COOKIE_INT_REG (0, 1)
8433 | CALL_COOKIE_STACKSEQ (numregs - 1);
8434 else
8435 ca->call_cookie
8436 |= CALL_COOKIE_STACKSEQ (numregs);
8437 }
8438 }
8439 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
8440 && (named || ! ca->prototype_p))
8441 {
8442 if (mode2 == SFmode && ca->free_single_fp_reg)
8443 ca->free_single_fp_reg = 0;
8444 else if (ca->arg_count[(int) SH_ARG_FLOAT]
8445 < NPARM_REGS (SFmode))
8446 {
8447 int numfpregs
8448 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
8449 NPARM_REGS (SFmode)
8450 - ca->arg_count[(int) SH_ARG_FLOAT]);
8451
8452 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
8453
8454 if (TARGET_SHCOMPACT && ! ca->prototype_p)
8455 {
8456 if (ca->outgoing && numregs > 0)
8457 do
8458 {
8459 ca->call_cookie
8460 |= (CALL_COOKIE_INT_REG
8461 (ca->arg_count[(int) SH_ARG_INT]
8462 - numregs + ((numfpregs - 2) / 2),
8463 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
8464 - numfpregs) / 2));
8465 }
8466 while (numfpregs -= 2);
8467 }
8468 else if (mode2 == SFmode && (named)
8469 && (ca->arg_count[(int) SH_ARG_FLOAT]
8470 < NPARM_REGS (SFmode)))
8471 ca->free_single_fp_reg
8472 = FIRST_FP_PARM_REG - numfpregs
8473 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
8474 }
8475 }
8476 return;
8477 }
8478
8479 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
8480 {
8481 /* Note that we've used the skipped register. */
8482 if (mode == SFmode && ca->free_single_fp_reg)
8483 {
8484 ca->free_single_fp_reg = 0;
8485 return;
8486 }
8487 /* When we have a DF after an SF, there's an SF register that get
8488 skipped in order to align the DF value. We note this skipped
8489 register, because the next SF value will use it, and not the
8490 SF that follows the DF. */
8491 if (mode == DFmode
8492 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
8493 {
8494 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
8495 + BASE_ARG_REG (mode));
8496 }
8497 }
8498
8499 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
8500 || PASS_IN_REG_P (*ca, mode, type))
8501 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
8502 = (ROUND_REG (*ca, mode)
8503 + (mode == BLKmode
8504 ? ROUND_ADVANCE (int_size_in_bytes (type))
8505 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
8506 }
8507
8508 /* The Renesas calling convention doesn't quite fit into this scheme since
8509 the address is passed like an invisible argument, but one that is always
8510 passed in memory. */
8511 static rtx
8512 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
8513 {
8514 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8515 return 0;
8516 return gen_rtx_REG (Pmode, 2);
8517 }
8518
8519 /* Worker function for TARGET_FUNCTION_VALUE.
8520
8521 For the SH, this is like LIBCALL_VALUE, except that we must change the
8522 mode like PROMOTE_MODE does.
8523 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
8524 tested here has to be kept in sync with the one in explow.c:promote_mode.
8525 */
8526
8527 static rtx
8528 sh_function_value (const_tree valtype,
8529 const_tree fn_decl_or_type,
8530 bool outgoing ATTRIBUTE_UNUSED)
8531 {
8532 if (fn_decl_or_type
8533 && !DECL_P (fn_decl_or_type))
8534 fn_decl_or_type = NULL;
8535
8536 return gen_rtx_REG (
8537 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
8538 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
8539 && (TREE_CODE (valtype) == INTEGER_TYPE
8540 || TREE_CODE (valtype) == ENUMERAL_TYPE
8541 || TREE_CODE (valtype) == BOOLEAN_TYPE
8542 || TREE_CODE (valtype) == REAL_TYPE
8543 || TREE_CODE (valtype) == OFFSET_TYPE))
8544 && sh_promote_prototypes (fn_decl_or_type)
8545 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
8546 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
8547 }
8548
8549 /* Worker function for TARGET_LIBCALL_VALUE. */
8550
8551 static rtx
8552 sh_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
8553 {
8554 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
8555 }
8556
8557 /* Return true if N is a possible register number of function value. */
8558
8559 static bool
8560 sh_function_value_regno_p (const unsigned int regno)
8561 {
8562 return ((regno) == FIRST_RET_REG
8563 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
8564 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
8565 }
8566
8567 /* Worker function for TARGET_RETURN_IN_MEMORY. */
8568
8569 static bool
8570 sh_return_in_memory (const_tree type, const_tree fndecl)
8571 {
8572 if (TARGET_SH5)
8573 {
8574 if (TYPE_MODE (type) == BLKmode)
8575 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
8576 else
8577 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
8578 }
8579 else
8580 {
8581 return (TYPE_MODE (type) == BLKmode
8582 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8583 && TREE_CODE (type) == RECORD_TYPE));
8584 }
8585 }
8586
8587 /* We actually emit the code in sh_expand_prologue. We used to use
8588 a static variable to flag that we need to emit this code, but that
8589 doesn't when inlining, when functions are deferred and then emitted
8590 later. Fortunately, we already have two flags that are part of struct
8591 function that tell if a function uses varargs or stdarg. */
8592 static void
8593 sh_setup_incoming_varargs (cumulative_args_t ca,
8594 enum machine_mode mode,
8595 tree type,
8596 int *pretend_arg_size,
8597 int second_time ATTRIBUTE_UNUSED)
8598 {
8599 gcc_assert (cfun->stdarg);
8600 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
8601 {
8602 int named_parm_regs, anon_parm_regs;
8603
8604 named_parm_regs = (ROUND_REG (*get_cumulative_args (ca), mode)
8605 + (mode == BLKmode
8606 ? ROUND_ADVANCE (int_size_in_bytes (type))
8607 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
8608 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
8609 if (anon_parm_regs > 0)
8610 *pretend_arg_size = anon_parm_regs * 4;
8611 }
8612 }
8613
8614 static bool
8615 sh_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
8616 {
8617 return TARGET_SH5;
8618 }
8619
8620 static bool
8621 sh_pretend_outgoing_varargs_named (cumulative_args_t ca_v)
8622 {
8623 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8624
8625 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
8626 }
8627
8628
8629 /* Define the offset between two registers, one to be eliminated, and
8630 the other its replacement, at the start of a routine. */
8631
8632 int
8633 initial_elimination_offset (int from, int to)
8634 {
8635 int regs_saved;
8636 int regs_saved_rounding = 0;
8637 int total_saved_regs_space;
8638 int total_auto_space;
8639 int save_flags = target_flags;
8640 int copy_flags;
8641 HARD_REG_SET live_regs_mask;
8642
8643 shmedia_space_reserved_for_target_registers = false;
8644 regs_saved = calc_live_regs (&live_regs_mask);
8645 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
8646
8647 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
8648 {
8649 shmedia_space_reserved_for_target_registers = true;
8650 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
8651 }
8652
8653 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
8654 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
8655 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
8656
8657 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
8658 copy_flags = target_flags;
8659 target_flags = save_flags;
8660
8661 total_saved_regs_space = regs_saved + regs_saved_rounding;
8662
8663 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8664 return total_saved_regs_space + total_auto_space
8665 + crtl->args.info.byref_regs * 8;
8666
8667 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8668 return total_saved_regs_space + total_auto_space
8669 + crtl->args.info.byref_regs * 8;
8670
8671 /* Initial gap between fp and sp is 0. */
8672 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8673 return 0;
8674
8675 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8676 return rounded_frame_size (0);
8677
8678 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8679 return rounded_frame_size (0);
8680
8681 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
8682 && (to == HARD_FRAME_POINTER_REGNUM
8683 || to == STACK_POINTER_REGNUM));
8684 if (TARGET_SH5)
8685 {
8686 int n = total_saved_regs_space;
8687 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8688 save_schedule schedule;
8689 save_entry *entry;
8690
8691 n += total_auto_space;
8692
8693 /* If it wasn't saved, there's not much we can do. */
8694 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8695 return n;
8696
8697 target_flags = copy_flags;
8698
8699 sh5_schedule_saves (&live_regs_mask, &schedule, n);
8700 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
8701 if (entry->reg == pr_reg)
8702 {
8703 target_flags = save_flags;
8704 return entry->offset;
8705 }
8706 gcc_unreachable ();
8707 }
8708 else
8709 return total_auto_space;
8710 }
8711
8712 /* Parse the -mfixed-range= option string. */
8713 void
8714 sh_fix_range (const char *const_str)
8715 {
8716 int i, first, last;
8717 char *str, *dash, *comma;
8718
8719 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
8720 REG2 are either register names or register numbers. The effect
8721 of this option is to mark the registers in the range from REG1 to
8722 REG2 as ``fixed'' so they won't be used by the compiler. */
8723
8724 i = strlen (const_str);
8725 str = (char *) alloca (i + 1);
8726 memcpy (str, const_str, i + 1);
8727
8728 while (1)
8729 {
8730 dash = strchr (str, '-');
8731 if (!dash)
8732 {
8733 warning (0, "value of -mfixed-range must have form REG1-REG2");
8734 return;
8735 }
8736 *dash = '\0';
8737 comma = strchr (dash + 1, ',');
8738 if (comma)
8739 *comma = '\0';
8740
8741 first = decode_reg_name (str);
8742 if (first < 0)
8743 {
8744 warning (0, "unknown register name: %s", str);
8745 return;
8746 }
8747
8748 last = decode_reg_name (dash + 1);
8749 if (last < 0)
8750 {
8751 warning (0, "unknown register name: %s", dash + 1);
8752 return;
8753 }
8754
8755 *dash = '-';
8756
8757 if (first > last)
8758 {
8759 warning (0, "%s-%s is an empty range", str, dash + 1);
8760 return;
8761 }
8762
8763 for (i = first; i <= last; ++i)
8764 fixed_regs[i] = call_used_regs[i] = 1;
8765
8766 if (!comma)
8767 break;
8768
8769 *comma = ',';
8770 str = comma + 1;
8771 }
8772 }
8773 \f
8774 /* Insert any deferred function attributes from earlier pragmas. */
8775 static void
8776 sh_insert_attributes (tree node, tree *attributes)
8777 {
8778 tree attrs;
8779
8780 if (TREE_CODE (node) != FUNCTION_DECL)
8781 return;
8782
8783 /* We are only interested in fields. */
8784 if (!DECL_P (node))
8785 return;
8786
8787 /* Append the attributes to the deferred attributes. */
8788 *sh_deferred_function_attributes_tail = *attributes;
8789 attrs = sh_deferred_function_attributes;
8790 if (!attrs)
8791 return;
8792
8793 /* Some attributes imply or require the interrupt attribute. */
8794 if (!lookup_attribute ("interrupt_handler", attrs)
8795 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8796 {
8797 /* If we have a trapa_handler, but no interrupt_handler attribute,
8798 insert an interrupt_handler attribute. */
8799 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8800 /* We can't use sh_pr_interrupt here because that's not in the
8801 java frontend. */
8802 attrs
8803 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8804 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8805 if the interrupt attribute is missing, we ignore the attribute
8806 and warn. */
8807 else if (lookup_attribute ("sp_switch", attrs)
8808 || lookup_attribute ("trap_exit", attrs)
8809 || lookup_attribute ("nosave_low_regs", attrs)
8810 || lookup_attribute ("resbank", attrs))
8811 {
8812 tree *tail;
8813
8814 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8815 {
8816 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8817 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8818 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8819 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8820 warning (OPT_Wattributes,
8821 "%qE attribute only applies to interrupt functions",
8822 TREE_PURPOSE (attrs));
8823 else
8824 {
8825 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8826 NULL_TREE);
8827 tail = &TREE_CHAIN (*tail);
8828 }
8829 }
8830 attrs = *attributes;
8831 }
8832 }
8833
8834 /* Install the processed list. */
8835 *attributes = attrs;
8836
8837 /* Clear deferred attributes. */
8838 sh_deferred_function_attributes = NULL_TREE;
8839 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8840
8841 return;
8842 }
8843
8844 /* Supported attributes:
8845
8846 interrupt_handler -- specifies this function is an interrupt handler.
8847
8848 trapa_handler - like above, but don't save all registers.
8849
8850 sp_switch -- specifies an alternate stack for an interrupt handler
8851 to run on.
8852
8853 trap_exit -- use a trapa to exit an interrupt function instead of
8854 an rte instruction.
8855
8856 nosave_low_regs - don't save r0..r7 in an interrupt handler.
8857 This is useful on the SH3 and upwards,
8858 which has a separate set of low regs for User and Supervisor modes.
8859 This should only be used for the lowest level of interrupts. Higher levels
8860 of interrupts must save the registers in case they themselves are
8861 interrupted.
8862
8863 renesas -- use Renesas calling/layout conventions (functions and
8864 structures).
8865
8866 resbank -- In case of an ISR, use a register bank to save registers
8867 R0-R14, MACH, MACL, GBR and PR. This is useful only on SH2A targets.
8868 */
8869
8870 /* Handle a 'resbank' attribute. */
8871 static tree
8872 sh_handle_resbank_handler_attribute (tree * node, tree name,
8873 tree args ATTRIBUTE_UNUSED,
8874 int flags ATTRIBUTE_UNUSED,
8875 bool * no_add_attrs)
8876 {
8877 if (!TARGET_SH2A)
8878 {
8879 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
8880 name);
8881 *no_add_attrs = true;
8882 }
8883 if (TREE_CODE (*node) != FUNCTION_DECL)
8884 {
8885 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8886 name);
8887 *no_add_attrs = true;
8888 }
8889
8890 return NULL_TREE;
8891 }
8892
8893 /* Handle an "interrupt_handler" attribute; arguments as in
8894 struct attribute_spec.handler. */
8895 static tree
8896 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8897 tree args ATTRIBUTE_UNUSED,
8898 int flags ATTRIBUTE_UNUSED,
8899 bool *no_add_attrs)
8900 {
8901 if (TREE_CODE (*node) != FUNCTION_DECL)
8902 {
8903 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8904 name);
8905 *no_add_attrs = true;
8906 }
8907 else if (TARGET_SHCOMPACT)
8908 {
8909 error ("attribute interrupt_handler is not compatible with -m5-compact");
8910 *no_add_attrs = true;
8911 }
8912
8913 return NULL_TREE;
8914 }
8915
8916 /* Handle an 'function_vector' attribute; arguments as in
8917 struct attribute_spec.handler. */
8918 static tree
8919 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
8920 tree args ATTRIBUTE_UNUSED,
8921 int flags ATTRIBUTE_UNUSED,
8922 bool * no_add_attrs)
8923 {
8924 if (!TARGET_SH2A)
8925 {
8926 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
8927 name);
8928 *no_add_attrs = true;
8929 }
8930 else if (TREE_CODE (*node) != FUNCTION_DECL)
8931 {
8932 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8933 name);
8934 *no_add_attrs = true;
8935 }
8936 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8937 {
8938 /* The argument must be a constant integer. */
8939 warning (OPT_Wattributes,
8940 "%qE attribute argument not an integer constant",
8941 name);
8942 *no_add_attrs = true;
8943 }
8944 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
8945 {
8946 /* The argument value must be between 0 to 255. */
8947 warning (OPT_Wattributes,
8948 "%qE attribute argument should be between 0 to 255",
8949 name);
8950 *no_add_attrs = true;
8951 }
8952 return NULL_TREE;
8953 }
8954
8955 /* Returns 1 if current function has been assigned the attribute
8956 'function_vector'. */
8957 int
8958 sh2a_is_function_vector_call (rtx x)
8959 {
8960 if (GET_CODE (x) == SYMBOL_REF
8961 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8962 {
8963 tree tr = SYMBOL_REF_DECL (x);
8964
8965 if (sh2a_function_vector_p (tr))
8966 return 1;
8967 }
8968
8969 return 0;
8970 }
8971
8972 /* Returns the function vector number, if the attribute
8973 'function_vector' is assigned, otherwise returns zero. */
8974 int
8975 sh2a_get_function_vector_number (rtx x)
8976 {
8977 int num;
8978 tree list, t;
8979
8980 if ((GET_CODE (x) == SYMBOL_REF)
8981 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8982 {
8983 t = SYMBOL_REF_DECL (x);
8984
8985 if (TREE_CODE (t) != FUNCTION_DECL)
8986 return 0;
8987
8988 list = SH_ATTRIBUTES (t);
8989 while (list)
8990 {
8991 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8992 {
8993 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
8994 return num;
8995 }
8996
8997 list = TREE_CHAIN (list);
8998 }
8999
9000 return 0;
9001 }
9002 else
9003 return 0;
9004 }
9005
9006 /* Handle an "sp_switch" attribute; arguments as in
9007 struct attribute_spec.handler. */
9008 static tree
9009 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
9010 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9011 {
9012 if (TREE_CODE (*node) != FUNCTION_DECL)
9013 {
9014 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9015 name);
9016 *no_add_attrs = true;
9017 }
9018 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
9019 {
9020 /* The argument must be a constant string. */
9021 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
9022 name);
9023 *no_add_attrs = true;
9024 }
9025
9026 return NULL_TREE;
9027 }
9028
9029 /* Handle an "trap_exit" attribute; arguments as in
9030 struct attribute_spec.handler. */
9031 static tree
9032 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
9033 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9034 {
9035 if (TREE_CODE (*node) != FUNCTION_DECL)
9036 {
9037 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9038 name);
9039 *no_add_attrs = true;
9040 }
9041 /* The argument specifies a trap number to be used in a trapa instruction
9042 at function exit (instead of an rte instruction). */
9043 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9044 {
9045 /* The argument must be a constant integer. */
9046 warning (OPT_Wattributes, "%qE attribute argument not an "
9047 "integer constant", name);
9048 *no_add_attrs = true;
9049 }
9050
9051 return NULL_TREE;
9052 }
9053
9054 static tree
9055 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
9056 tree name ATTRIBUTE_UNUSED,
9057 tree args ATTRIBUTE_UNUSED,
9058 int flags ATTRIBUTE_UNUSED,
9059 bool *no_add_attrs ATTRIBUTE_UNUSED)
9060 {
9061 return NULL_TREE;
9062 }
9063
9064 /* True if __attribute__((renesas)) or -mrenesas. */
9065 int
9066 sh_attr_renesas_p (const_tree td)
9067 {
9068 if (TARGET_HITACHI)
9069 return 1;
9070 if (td == 0)
9071 return 0;
9072 if (DECL_P (td))
9073 td = TREE_TYPE (td);
9074 if (td == error_mark_node)
9075 return 0;
9076 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
9077 != NULL_TREE);
9078 }
9079
9080 /* True if __attribute__((renesas)) or -mrenesas, for the current
9081 function. */
9082 int
9083 sh_cfun_attr_renesas_p (void)
9084 {
9085 return sh_attr_renesas_p (current_function_decl);
9086 }
9087
9088 int
9089 sh_cfun_interrupt_handler_p (void)
9090 {
9091 return (lookup_attribute ("interrupt_handler",
9092 DECL_ATTRIBUTES (current_function_decl))
9093 != NULL_TREE);
9094 }
9095
9096 /* Returns 1 if FUNC has been assigned the attribute
9097 "function_vector". */
9098 int
9099 sh2a_function_vector_p (tree func)
9100 {
9101 tree list;
9102 if (TREE_CODE (func) != FUNCTION_DECL)
9103 return 0;
9104
9105 list = SH_ATTRIBUTES (func);
9106 while (list)
9107 {
9108 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9109 return 1;
9110
9111 list = TREE_CHAIN (list);
9112 }
9113 return 0;
9114 }
9115
9116 /* Returns TRUE if given tree has the "resbank" attribute. */
9117
9118 int
9119 sh_cfun_resbank_handler_p (void)
9120 {
9121 return ((lookup_attribute ("resbank",
9122 DECL_ATTRIBUTES (current_function_decl))
9123 != NULL_TREE)
9124 && (lookup_attribute ("interrupt_handler",
9125 DECL_ATTRIBUTES (current_function_decl))
9126 != NULL_TREE) && TARGET_SH2A);
9127 }
9128
9129 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
9130
9131 static const char *
9132 sh_check_pch_target_flags (int old_flags)
9133 {
9134 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
9135 | MASK_SH_E | MASK_HARD_SH4
9136 | MASK_FPU_SINGLE | MASK_SH4))
9137 return _("created and used with different architectures / ABIs");
9138 if ((old_flags ^ target_flags) & MASK_HITACHI)
9139 return _("created and used with different ABIs");
9140 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
9141 return _("created and used with different endianness");
9142 return NULL;
9143 }
9144 \f
9145 /* Predicates used by the templates. */
9146
9147 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
9148 Used only in general_movsrc_operand. */
9149
9150 int
9151 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9152 {
9153 switch (REGNO (op))
9154 {
9155 case PR_REG:
9156 case MACL_REG:
9157 case MACH_REG:
9158 return 1;
9159 }
9160 return 0;
9161 }
9162
9163 /* Nonzero if OP is a floating point value with value 0.0. */
9164
9165 int
9166 fp_zero_operand (rtx op)
9167 {
9168 REAL_VALUE_TYPE r;
9169
9170 if (GET_MODE (op) != SFmode)
9171 return 0;
9172
9173 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9174 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
9175 }
9176
9177 /* Nonzero if OP is a floating point value with value 1.0. */
9178
9179 int
9180 fp_one_operand (rtx op)
9181 {
9182 REAL_VALUE_TYPE r;
9183
9184 if (GET_MODE (op) != SFmode)
9185 return 0;
9186
9187 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9188 return REAL_VALUES_EQUAL (r, dconst1);
9189 }
9190
9191 /* In general mode switching is used. If we are
9192 compiling without -mfmovd, movsf_ie isn't taken into account for
9193 mode switching. We could check in machine_dependent_reorg for
9194 cases where we know we are in single precision mode, but there is
9195 interface to find that out during reload, so we must avoid
9196 choosing an fldi alternative during reload and thus failing to
9197 allocate a scratch register for the constant loading. */
9198 int
9199 fldi_ok (void)
9200 {
9201 return 1;
9202 }
9203
9204 int
9205 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9206 {
9207 enum rtx_code code = GET_CODE (op);
9208 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
9209 }
9210
9211 /* Return the TLS type for TLS symbols, 0 for otherwise. */
9212 enum tls_model
9213 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9214 {
9215 if (GET_CODE (op) != SYMBOL_REF)
9216 return TLS_MODEL_NONE;
9217 return SYMBOL_REF_TLS_MODEL (op);
9218 }
9219 \f
9220 /* Return the destination address of a branch. */
9221
9222 static int
9223 branch_dest (rtx branch)
9224 {
9225 rtx dest = SET_SRC (PATTERN (branch));
9226 int dest_uid;
9227
9228 if (GET_CODE (dest) == IF_THEN_ELSE)
9229 dest = XEXP (dest, 1);
9230 dest = XEXP (dest, 0);
9231 dest_uid = INSN_UID (dest);
9232 return INSN_ADDRESSES (dest_uid);
9233 }
9234 \f
9235 /* Return nonzero if REG is not used after INSN.
9236 We assume REG is a reload reg, and therefore does
9237 not live past labels. It may live past calls or jumps though. */
9238 int
9239 reg_unused_after (rtx reg, rtx insn)
9240 {
9241 enum rtx_code code;
9242 rtx set;
9243
9244 /* If the reg is set by this instruction, then it is safe for our
9245 case. Disregard the case where this is a store to memory, since
9246 we are checking a register used in the store address. */
9247 set = single_set (insn);
9248 if (set && !MEM_P (SET_DEST (set))
9249 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9250 return 1;
9251
9252 while ((insn = NEXT_INSN (insn)))
9253 {
9254 rtx set;
9255 if (!INSN_P (insn))
9256 continue;
9257
9258 code = GET_CODE (insn);
9259
9260 #if 0
9261 /* If this is a label that existed before reload, then the register
9262 is dead here. However, if this is a label added by reorg, then
9263 the register may still be live here. We can't tell the difference,
9264 so we just ignore labels completely. */
9265 if (code == CODE_LABEL)
9266 return 1;
9267 /* else */
9268 #endif
9269
9270 if (code == JUMP_INSN)
9271 return 0;
9272
9273 /* If this is a sequence, we must handle them all at once.
9274 We could have for instance a call that sets the target register,
9275 and an insn in a delay slot that uses the register. In this case,
9276 we must return 0. */
9277 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
9278 {
9279 int i;
9280 int retval = 0;
9281
9282 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
9283 {
9284 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
9285 rtx set = single_set (this_insn);
9286
9287 if (CALL_P (this_insn))
9288 code = CALL_INSN;
9289 else if (JUMP_P (this_insn))
9290 {
9291 if (INSN_ANNULLED_BRANCH_P (this_insn))
9292 return 0;
9293 code = JUMP_INSN;
9294 }
9295
9296 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9297 return 0;
9298 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9299 {
9300 if (!MEM_P (SET_DEST (set)))
9301 retval = 1;
9302 else
9303 return 0;
9304 }
9305 if (set == 0
9306 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
9307 return 0;
9308 }
9309 if (retval == 1)
9310 return 1;
9311 else if (code == JUMP_INSN)
9312 return 0;
9313 }
9314
9315 set = single_set (insn);
9316 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9317 return 0;
9318 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9319 return !MEM_P (SET_DEST (set));
9320 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
9321 return 0;
9322
9323 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
9324 return 1;
9325 }
9326 return 1;
9327 }
9328 \f
9329 #include "ggc.h"
9330
9331 static GTY(()) rtx fpscr_rtx;
9332 rtx
9333 get_fpscr_rtx (void)
9334 {
9335 if (! fpscr_rtx)
9336 {
9337 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
9338 REG_USERVAR_P (fpscr_rtx) = 1;
9339 mark_user_reg (fpscr_rtx);
9340 }
9341 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
9342 mark_user_reg (fpscr_rtx);
9343 return fpscr_rtx;
9344 }
9345
9346 static GTY(()) tree fpscr_values;
9347
9348 static void
9349 emit_fpu_switch (rtx scratch, int index)
9350 {
9351 rtx dst, src;
9352
9353 if (fpscr_values == NULL)
9354 {
9355 tree t;
9356
9357 t = build_index_type (integer_one_node);
9358 t = build_array_type (integer_type_node, t);
9359 t = build_decl (BUILTINS_LOCATION,
9360 VAR_DECL, get_identifier ("__fpscr_values"), t);
9361 DECL_ARTIFICIAL (t) = 1;
9362 DECL_IGNORED_P (t) = 1;
9363 DECL_EXTERNAL (t) = 1;
9364 TREE_STATIC (t) = 1;
9365 TREE_PUBLIC (t) = 1;
9366 TREE_USED (t) = 1;
9367
9368 fpscr_values = t;
9369 }
9370
9371 src = DECL_RTL (fpscr_values);
9372 if (!can_create_pseudo_p ())
9373 {
9374 emit_move_insn (scratch, XEXP (src, 0));
9375 if (index != 0)
9376 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
9377 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
9378 }
9379 else
9380 src = adjust_address (src, PSImode, index * 4);
9381
9382 dst = get_fpscr_rtx ();
9383 emit_move_insn (dst, src);
9384 }
9385
9386 void
9387 emit_sf_insn (rtx pat)
9388 {
9389 emit_insn (pat);
9390 }
9391
9392 void
9393 emit_df_insn (rtx pat)
9394 {
9395 emit_insn (pat);
9396 }
9397
9398 void
9399 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9400 {
9401 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9402 }
9403
9404 void
9405 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9406 {
9407 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
9408 get_fpscr_rtx ()));
9409 }
9410
9411 void
9412 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9413 {
9414 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9415 }
9416
9417 void
9418 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9419 {
9420 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
9421 get_fpscr_rtx ()));
9422 }
9423 \f
9424 static rtx get_free_reg (HARD_REG_SET);
9425
9426 /* This function returns a register to use to load the address to load
9427 the fpscr from. Currently it always returns r1 or r7, but when we are
9428 able to use pseudo registers after combine, or have a better mechanism
9429 for choosing a register, it should be done here. */
9430 /* REGS_LIVE is the liveness information for the point for which we
9431 need this allocation. In some bare-bones exit blocks, r1 is live at the
9432 start. We can even have all of r0..r3 being live:
9433 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
9434 INSN before which new insns are placed with will clobber the register
9435 we return. If a basic block consists only of setting the return value
9436 register to a pseudo and using that register, the return value is not
9437 live before or after this block, yet we we'll insert our insns right in
9438 the middle. */
9439
9440 static rtx
9441 get_free_reg (HARD_REG_SET regs_live)
9442 {
9443 if (! TEST_HARD_REG_BIT (regs_live, 1))
9444 return gen_rtx_REG (Pmode, 1);
9445
9446 /* Hard reg 1 is live; since this is a small register classes target,
9447 there shouldn't be anything but a jump before the function end. */
9448 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
9449 return gen_rtx_REG (Pmode, 7);
9450 }
9451
9452 /* This function will set the fpscr from memory.
9453 MODE is the mode we are setting it to. */
9454 void
9455 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
9456 {
9457 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
9458 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
9459 rtx addr_reg;
9460
9461 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
9462 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
9463 }
9464
9465 /* Is the given character a logical line separator for the assembler? */
9466 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
9467 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
9468 #endif
9469
9470 int
9471 sh_insn_length_adjustment (rtx insn)
9472 {
9473 /* Instructions with unfilled delay slots take up an extra two bytes for
9474 the nop in the delay slot. */
9475 if (((NONJUMP_INSN_P (insn)
9476 && GET_CODE (PATTERN (insn)) != USE
9477 && GET_CODE (PATTERN (insn)) != CLOBBER)
9478 || CALL_P (insn)
9479 || (JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)))
9480 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
9481 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
9482 return 2;
9483
9484 /* SH2e has a bug that prevents the use of annulled branches, so if
9485 the delay slot is not filled, we'll have to put a NOP in it. */
9486 if (sh_cpu_attr == CPU_SH2E
9487 && JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)
9488 && get_attr_type (insn) == TYPE_CBRANCH
9489 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
9490 return 2;
9491
9492 /* sh-dsp parallel processing insn take four bytes instead of two. */
9493
9494 if (NONJUMP_INSN_P (insn))
9495 {
9496 int sum = 0;
9497 rtx body = PATTERN (insn);
9498 const char *templ;
9499 char c;
9500 int maybe_label = 1;
9501
9502 if (GET_CODE (body) == ASM_INPUT)
9503 templ = XSTR (body, 0);
9504 else if (asm_noperands (body) >= 0)
9505 templ
9506 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
9507 else
9508 return 0;
9509 do
9510 {
9511 int ppi_adjust = 0;
9512
9513 do
9514 c = *templ++;
9515 while (c == ' ' || c == '\t');
9516 /* all sh-dsp parallel-processing insns start with p.
9517 The only non-ppi sh insn starting with p is pref.
9518 The only ppi starting with pr is prnd. */
9519 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
9520 ppi_adjust = 2;
9521 /* The repeat pseudo-insn expands two three insns, a total of
9522 six bytes in size. */
9523 else if ((c == 'r' || c == 'R')
9524 && ! strncasecmp ("epeat", templ, 5))
9525 ppi_adjust = 4;
9526 while (c && c != '\n'
9527 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
9528 {
9529 /* If this is a label, it is obviously not a ppi insn. */
9530 if (c == ':' && maybe_label)
9531 {
9532 ppi_adjust = 0;
9533 break;
9534 }
9535 else if (c == '\'' || c == '"')
9536 maybe_label = 0;
9537 c = *templ++;
9538 }
9539 sum += ppi_adjust;
9540 maybe_label = c != ':';
9541 }
9542 while (c);
9543 return sum;
9544 }
9545 return 0;
9546 }
9547 \f
9548 /* Return TRUE for a valid displacement for the REG+disp addressing
9549 with MODE. */
9550
9551 /* ??? The SH2e does not have the REG+disp addressing mode when loading values
9552 into the FRx registers. We implement this by setting the maximum offset
9553 to zero when the value is SFmode. This also restricts loading of SFmode
9554 values into the integer registers, but that can't be helped. */
9555
9556 /* The SH allows a displacement in a QI or HI amode, but only when the
9557 other operand is R0. GCC doesn't handle this very well, so we forgot
9558 all of that.
9559
9560 A legitimate index for a QI or HI is 0, SI can be any number 0..63,
9561 DI can be any number 0..60. */
9562
9563 bool
9564 sh_legitimate_index_p (enum machine_mode mode, rtx op)
9565 {
9566 if (CONST_INT_P (op))
9567 {
9568 if (TARGET_SHMEDIA)
9569 {
9570 int size;
9571
9572 /* Check if this is the address of an unaligned load / store. */
9573 if (mode == VOIDmode)
9574 return CONST_OK_FOR_I06 (INTVAL (op));
9575
9576 size = GET_MODE_SIZE (mode);
9577 return (!(INTVAL (op) & (size - 1))
9578 && INTVAL (op) >= -512 * size
9579 && INTVAL (op) < 512 * size);
9580 }
9581
9582 if (TARGET_SH2A)
9583 {
9584 if (GET_MODE_SIZE (mode) == 1
9585 && (unsigned) INTVAL (op) < 4096)
9586 return true;
9587 }
9588
9589 if ((GET_MODE_SIZE (mode) == 4
9590 && (unsigned) INTVAL (op) < 64
9591 && !(INTVAL (op) & 3)
9592 && !(TARGET_SH2E && mode == SFmode))
9593 || (GET_MODE_SIZE (mode) == 4
9594 && (unsigned) INTVAL (op) < 16383
9595 && !(INTVAL (op) & 3) && TARGET_SH2A))
9596 return true;
9597
9598 if ((GET_MODE_SIZE (mode) == 8
9599 && (unsigned) INTVAL (op) < 60
9600 && !(INTVAL (op) & 3)
9601 && !((TARGET_SH4 || TARGET_SH2A) && mode == DFmode))
9602 || ((GET_MODE_SIZE (mode)==8)
9603 && (unsigned) INTVAL (op) < 8192
9604 && !(INTVAL (op) & (TARGET_SH2A_DOUBLE ? 7 : 3))
9605 && (TARGET_SH2A && mode == DFmode)))
9606 return true;
9607 }
9608
9609 return false;
9610 }
9611
9612 /* Recognize an RTL expression that is a valid memory address for
9613 an instruction.
9614 The MODE argument is the machine mode for the MEM expression
9615 that wants to use this address.
9616 Allow REG
9617 REG+disp
9618 REG+r0
9619 REG++
9620 --REG */
9621
9622 static bool
9623 sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
9624 {
9625 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
9626 return true;
9627 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
9628 && ! TARGET_SHMEDIA
9629 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
9630 return true;
9631 else if (GET_CODE (x) == PLUS
9632 && (mode != PSImode || reload_completed))
9633 {
9634 rtx xop0 = XEXP (x, 0);
9635 rtx xop1 = XEXP (x, 1);
9636
9637 if (GET_MODE_SIZE (mode) <= 8
9638 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
9639 && sh_legitimate_index_p (mode, xop1))
9640 return true;
9641
9642 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
9643 || ((xop0 == stack_pointer_rtx
9644 || xop0 == hard_frame_pointer_rtx)
9645 && REG_P (xop1) && REGNO (xop1) == R0_REG)
9646 || ((xop1 == stack_pointer_rtx
9647 || xop1 == hard_frame_pointer_rtx)
9648 && REG_P (xop0) && REGNO (xop0) == R0_REG))
9649 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
9650 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
9651 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
9652 && TARGET_FMOVD && mode == DFmode)))
9653 {
9654 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
9655 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
9656 return true;
9657 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
9658 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
9659 return true;
9660 }
9661 }
9662
9663 return false;
9664 }
9665 \f
9666 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
9667 isn't protected by a PIC unspec. */
9668 int
9669 nonpic_symbol_mentioned_p (rtx x)
9670 {
9671 register const char *fmt;
9672 register int i;
9673
9674 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
9675 || GET_CODE (x) == PC)
9676 return 1;
9677
9678 /* We don't want to look into the possible MEM location of a
9679 CONST_DOUBLE, since we're not going to use it, in general. */
9680 if (GET_CODE (x) == CONST_DOUBLE)
9681 return 0;
9682
9683 if (GET_CODE (x) == UNSPEC
9684 && (XINT (x, 1) == UNSPEC_PIC
9685 || XINT (x, 1) == UNSPEC_GOT
9686 || XINT (x, 1) == UNSPEC_GOTOFF
9687 || XINT (x, 1) == UNSPEC_GOTPLT
9688 || XINT (x, 1) == UNSPEC_GOTTPOFF
9689 || XINT (x, 1) == UNSPEC_DTPOFF
9690 || XINT (x, 1) == UNSPEC_TPOFF
9691 || XINT (x, 1) == UNSPEC_PLT
9692 || XINT (x, 1) == UNSPEC_SYMOFF
9693 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
9694 return 0;
9695
9696 fmt = GET_RTX_FORMAT (GET_CODE (x));
9697 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9698 {
9699 if (fmt[i] == 'E')
9700 {
9701 register int j;
9702
9703 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9704 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
9705 return 1;
9706 }
9707 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
9708 return 1;
9709 }
9710
9711 return 0;
9712 }
9713
9714 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
9715 @GOTOFF in `reg'. */
9716 rtx
9717 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
9718 rtx reg)
9719 {
9720 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
9721 return orig;
9722
9723 if (GET_CODE (orig) == LABEL_REF
9724 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
9725 {
9726 if (reg == 0)
9727 reg = gen_reg_rtx (Pmode);
9728
9729 emit_insn (gen_symGOTOFF2reg (reg, orig));
9730 return reg;
9731 }
9732 else if (GET_CODE (orig) == SYMBOL_REF)
9733 {
9734 if (reg == 0)
9735 reg = gen_reg_rtx (Pmode);
9736
9737 emit_insn (gen_symGOT2reg (reg, orig));
9738 return reg;
9739 }
9740 return orig;
9741 }
9742
9743 /* Try machine-dependent ways of modifying an illegitimate address
9744 to be legitimate. If we find one, return the new, valid address.
9745 Otherwise, return X.
9746
9747 For the SH, if X is almost suitable for indexing, but the offset is
9748 out of range, convert it into a normal form so that CSE has a chance
9749 of reducing the number of address registers used. */
9750
9751 static rtx
9752 sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
9753 {
9754 if (flag_pic)
9755 x = legitimize_pic_address (oldx, mode, NULL_RTX);
9756
9757 if (GET_CODE (x) == PLUS
9758 && (GET_MODE_SIZE (mode) == 4
9759 || GET_MODE_SIZE (mode) == 8)
9760 && CONST_INT_P (XEXP (x, 1))
9761 && BASE_REGISTER_RTX_P (XEXP (x, 0))
9762 && ! TARGET_SHMEDIA
9763 && ! ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
9764 && ! (TARGET_SH2E && mode == SFmode))
9765 {
9766 rtx index_rtx = XEXP (x, 1);
9767 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9768 rtx sum;
9769
9770 /* On rare occasions, we might get an unaligned pointer
9771 that is indexed in a way to give an aligned address.
9772 Therefore, keep the lower two bits in offset_base. */
9773 /* Instead of offset_base 128..131 use 124..127, so that
9774 simple add suffices. */
9775 if (offset > 127)
9776 offset_base = ((offset + 4) & ~60) - 4;
9777 else
9778 offset_base = offset & ~60;
9779
9780 /* Sometimes the normal form does not suit DImode. We
9781 could avoid that by using smaller ranges, but that
9782 would give less optimized code when SImode is
9783 prevalent. */
9784 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9785 {
9786 sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
9787 GEN_INT (offset_base), NULL_RTX, 0,
9788 OPTAB_LIB_WIDEN);
9789
9790 return gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9791 }
9792 }
9793
9794 return x;
9795 }
9796
9797 /* Attempt to replace *P, which is an address that needs reloading, with
9798 a valid memory address for an operand of mode MODE.
9799 Like for sh_legitimize_address, for the SH we try to get a normal form
9800 of the address. That will allow inheritance of the address reloads. */
9801
9802 bool
9803 sh_legitimize_reload_address (rtx *p, enum machine_mode mode, int opnum,
9804 int itype)
9805 {
9806 enum reload_type type = (enum reload_type) itype;
9807
9808 if (GET_CODE (*p) == PLUS
9809 && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
9810 && CONST_INT_P (XEXP (*p, 1))
9811 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true)
9812 && ! TARGET_SHMEDIA
9813 && ! (TARGET_SH4 && mode == DFmode)
9814 && ! (mode == PSImode && type == RELOAD_FOR_INPUT_ADDRESS)
9815 && (ALLOW_INDEXED_ADDRESS
9816 || XEXP (*p, 0) == stack_pointer_rtx
9817 || XEXP (*p, 0) == hard_frame_pointer_rtx))
9818 {
9819 rtx index_rtx = XEXP (*p, 1);
9820 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9821 rtx sum;
9822
9823 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
9824 {
9825 push_reload (*p, NULL_RTX, p, NULL,
9826 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9827 goto win;
9828 }
9829 if (TARGET_SH2E && mode == SFmode)
9830 {
9831 *p = copy_rtx (*p);
9832 push_reload (*p, NULL_RTX, p, NULL,
9833 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9834 goto win;
9835 }
9836 /* Instead of offset_base 128..131 use 124..127, so that
9837 simple add suffices. */
9838 if (offset > 127)
9839 offset_base = ((offset + 4) & ~60) - 4;
9840 else
9841 offset_base = offset & ~60;
9842 /* Sometimes the normal form does not suit DImode. We could avoid
9843 that by using smaller ranges, but that would give less optimized
9844 code when SImode is prevalent. */
9845 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9846 {
9847 sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), GEN_INT (offset_base));
9848 *p = gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9849 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
9850 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9851 goto win;
9852 }
9853 }
9854 /* We must re-recognize what we created before. */
9855 else if (GET_CODE (*p) == PLUS
9856 && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
9857 && GET_CODE (XEXP (*p, 0)) == PLUS
9858 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
9859 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
9860 && CONST_INT_P (XEXP (*p, 1))
9861 && ! TARGET_SHMEDIA
9862 && ! (TARGET_SH2E && mode == SFmode))
9863 {
9864 /* Because this address is so complex, we know it must have
9865 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
9866 it is already unshared, and needs no further unsharing. */
9867 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
9868 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9869 goto win;
9870 }
9871
9872 return false;
9873
9874 win:
9875 return true;
9876 }
9877
9878 /* In the name of slightly smaller debug output, and to cater to
9879 general assembler lossage, recognize various UNSPEC sequences
9880 and turn them back into a direct symbol reference. */
9881
9882 static rtx
9883 sh_delegitimize_address (rtx orig_x)
9884 {
9885 rtx x, y;
9886
9887 orig_x = delegitimize_mem_from_attrs (orig_x);
9888
9889 x = orig_x;
9890 if (MEM_P (x))
9891 x = XEXP (x, 0);
9892 if (GET_CODE (x) == CONST)
9893 {
9894 y = XEXP (x, 0);
9895 if (GET_CODE (y) == UNSPEC)
9896 {
9897 if (XINT (y, 1) == UNSPEC_GOT
9898 || XINT (y, 1) == UNSPEC_GOTOFF
9899 || XINT (y, 1) == UNSPEC_SYMOFF)
9900 return XVECEXP (y, 0, 0);
9901 else if (XINT (y, 1) == UNSPEC_PCREL_SYMOFF)
9902 {
9903 if (GET_CODE (XVECEXP (y, 0, 0)) == CONST)
9904 {
9905 rtx symplt = XEXP (XVECEXP (y, 0, 0), 0);
9906
9907 if (GET_CODE (symplt) == UNSPEC
9908 && XINT (symplt, 1) == UNSPEC_PLT)
9909 return XVECEXP (symplt, 0, 0);
9910 }
9911 }
9912 else if (TARGET_SHMEDIA
9913 && (XINT (y, 1) == UNSPEC_EXTRACT_S16
9914 || XINT (y, 1) == UNSPEC_EXTRACT_U16))
9915 {
9916 rtx offset = XVECEXP (y, 0, 1);
9917
9918 x = gen_rtx_PLUS (Pmode, XVECEXP (y, 0, 0), offset);
9919 if (MEM_P (orig_x))
9920 x = replace_equiv_address_nv (orig_x, x);
9921 return x;
9922 }
9923 }
9924 }
9925
9926 return orig_x;
9927 }
9928
9929 /* Mark the use of a constant in the literal table. If the constant
9930 has multiple labels, make it unique. */
9931 static rtx
9932 mark_constant_pool_use (rtx x)
9933 {
9934 rtx insn, lab, pattern;
9935
9936 if (x == NULL)
9937 return x;
9938
9939 switch (GET_CODE (x))
9940 {
9941 case LABEL_REF:
9942 x = XEXP (x, 0);
9943 case CODE_LABEL:
9944 break;
9945 default:
9946 return x;
9947 }
9948
9949 /* Get the first label in the list of labels for the same constant
9950 and delete another labels in the list. */
9951 lab = x;
9952 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
9953 {
9954 if (!LABEL_P (insn)
9955 || LABEL_REFS (insn) != NEXT_INSN (insn))
9956 break;
9957 lab = insn;
9958 }
9959
9960 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
9961 INSN_DELETED_P (insn) = 1;
9962
9963 /* Mark constants in a window. */
9964 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
9965 {
9966 if (!NONJUMP_INSN_P (insn))
9967 continue;
9968
9969 pattern = PATTERN (insn);
9970 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
9971 continue;
9972
9973 switch (XINT (pattern, 1))
9974 {
9975 case UNSPECV_CONST2:
9976 case UNSPECV_CONST4:
9977 case UNSPECV_CONST8:
9978 XVECEXP (pattern, 0, 1) = const1_rtx;
9979 break;
9980 case UNSPECV_WINDOW_END:
9981 if (XVECEXP (pattern, 0, 0) == x)
9982 return lab;
9983 break;
9984 case UNSPECV_CONST_END:
9985 return lab;
9986 default:
9987 break;
9988 }
9989 }
9990
9991 return lab;
9992 }
9993 \f
9994 /* Return true if it's possible to redirect BRANCH1 to the destination
9995 of an unconditional jump BRANCH2. We only want to do this if the
9996 resulting branch will have a short displacement. */
9997 int
9998 sh_can_redirect_branch (rtx branch1, rtx branch2)
9999 {
10000 if (flag_expensive_optimizations && simplejump_p (branch2))
10001 {
10002 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
10003 rtx insn;
10004 int distance;
10005
10006 for (distance = 0, insn = NEXT_INSN (branch1);
10007 insn && distance < 256;
10008 insn = PREV_INSN (insn))
10009 {
10010 if (insn == dest)
10011 return 1;
10012 else
10013 distance += get_attr_length (insn);
10014 }
10015 for (distance = 0, insn = NEXT_INSN (branch1);
10016 insn && distance < 256;
10017 insn = NEXT_INSN (insn))
10018 {
10019 if (insn == dest)
10020 return 1;
10021 else
10022 distance += get_attr_length (insn);
10023 }
10024 }
10025 return 0;
10026 }
10027
10028 /* Return nonzero if register old_reg can be renamed to register new_reg. */
10029 int
10030 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
10031 unsigned int new_reg)
10032 {
10033 /* Interrupt functions can only use registers that have already been
10034 saved by the prologue, even if they would normally be
10035 call-clobbered. */
10036
10037 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
10038 return 0;
10039
10040 return 1;
10041 }
10042
10043 /* Function to update the integer COST
10044 based on the relationship between INSN that is dependent on
10045 DEP_INSN through the dependence LINK. The default is to make no
10046 adjustment to COST. This can be used for example to specify to
10047 the scheduler that an output- or anti-dependence does not incur
10048 the same cost as a data-dependence. The return value should be
10049 the new value for COST. */
10050 static int
10051 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
10052 {
10053 rtx reg, use_pat;
10054
10055 if (TARGET_SHMEDIA)
10056 {
10057 /* On SHmedia, if the dependence is an anti-dependence or
10058 output-dependence, there is no cost. */
10059 if (REG_NOTE_KIND (link) != 0)
10060 {
10061 /* However, dependencies between target register loads and
10062 uses of the register in a subsequent block that are separated
10063 by a conditional branch are not modelled - we have to do with
10064 the anti-dependency between the target register load and the
10065 conditional branch that ends the current block. */
10066 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
10067 && GET_CODE (PATTERN (dep_insn)) == SET
10068 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
10069 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
10070 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
10071 {
10072 int orig_cost = cost;
10073 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
10074 rtx target = ((! note
10075 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
10076 ? insn : JUMP_LABEL (insn));
10077 /* On the likely path, the branch costs 1, on the unlikely path,
10078 it costs 3. */
10079 cost--;
10080 do
10081 target = next_active_insn (target);
10082 while (target && ! flow_dependent_p (target, dep_insn)
10083 && --cost > 0);
10084 /* If two branches are executed in immediate succession, with the
10085 first branch properly predicted, this causes a stall at the
10086 second branch, hence we won't need the target for the
10087 second branch for two cycles after the launch of the first
10088 branch. */
10089 if (cost > orig_cost - 2)
10090 cost = orig_cost - 2;
10091 }
10092 else
10093 cost = 0;
10094 }
10095
10096 else if (get_attr_is_mac_media (insn)
10097 && get_attr_is_mac_media (dep_insn))
10098 cost = 1;
10099
10100 else if (! reload_completed
10101 && GET_CODE (PATTERN (insn)) == SET
10102 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
10103 && GET_CODE (PATTERN (dep_insn)) == SET
10104 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
10105 && cost < 4)
10106 cost = 4;
10107 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
10108 that is needed at the target. */
10109 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
10110 && ! flow_dependent_p (insn, dep_insn))
10111 cost--;
10112 }
10113 else if (REG_NOTE_KIND (link) == 0)
10114 {
10115 enum attr_type type;
10116 rtx dep_set;
10117
10118 if (recog_memoized (insn) < 0
10119 || recog_memoized (dep_insn) < 0)
10120 return cost;
10121
10122 dep_set = single_set (dep_insn);
10123
10124 /* The latency that we specify in the scheduling description refers
10125 to the actual output, not to an auto-increment register; for that,
10126 the latency is one. */
10127 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
10128 {
10129 rtx set = single_set (insn);
10130
10131 if (set
10132 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
10133 && (!MEM_P (SET_DEST (set))
10134 || !reg_mentioned_p (SET_DEST (dep_set),
10135 XEXP (SET_DEST (set), 0))))
10136 cost = 1;
10137 }
10138 /* The only input for a call that is timing-critical is the
10139 function's address. */
10140 if (CALL_P (insn))
10141 {
10142 rtx call = PATTERN (insn);
10143
10144 if (GET_CODE (call) == PARALLEL)
10145 call = XVECEXP (call, 0 ,0);
10146 if (GET_CODE (call) == SET)
10147 call = SET_SRC (call);
10148 if (GET_CODE (call) == CALL && MEM_P (XEXP (call, 0))
10149 /* sibcalli_thunk uses a symbol_ref in an unspec. */
10150 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
10151 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
10152 cost -= TARGET_SH4_300 ? 3 : 6;
10153 }
10154 /* Likewise, the most timing critical input for an sfuncs call
10155 is the function address. However, sfuncs typically start
10156 using their arguments pretty quickly.
10157 Assume a four cycle delay for SH4 before they are needed.
10158 Cached ST40-300 calls are quicker, so assume only a one
10159 cycle delay there.
10160 ??? Maybe we should encode the delays till input registers
10161 are needed by sfuncs into the sfunc call insn. */
10162 /* All sfunc calls are parallels with at least four components.
10163 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
10164 else if (GET_CODE (PATTERN (insn)) == PARALLEL
10165 && XVECLEN (PATTERN (insn), 0) >= 4
10166 && (reg = sfunc_uses_reg (insn)))
10167 {
10168 if (! reg_set_p (reg, dep_insn))
10169 cost -= TARGET_SH4_300 ? 1 : 4;
10170 }
10171 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
10172 {
10173 enum attr_type dep_type = get_attr_type (dep_insn);
10174
10175 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
10176 cost--;
10177 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
10178 && (type = get_attr_type (insn)) != TYPE_CALL
10179 && type != TYPE_SFUNC)
10180 cost--;
10181 /* When the preceding instruction loads the shift amount of
10182 the following SHAD/SHLD, the latency of the load is increased
10183 by 1 cycle. */
10184 if (get_attr_type (insn) == TYPE_DYN_SHIFT
10185 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
10186 && reg_overlap_mentioned_p (SET_DEST (dep_set),
10187 XEXP (SET_SRC (single_set (insn)),
10188 1)))
10189 cost++;
10190 /* When an LS group instruction with a latency of less than
10191 3 cycles is followed by a double-precision floating-point
10192 instruction, FIPR, or FTRV, the latency of the first
10193 instruction is increased to 3 cycles. */
10194 else if (cost < 3
10195 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
10196 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
10197 cost = 3;
10198 /* The lsw register of a double-precision computation is ready one
10199 cycle earlier. */
10200 else if (reload_completed
10201 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
10202 && (use_pat = single_set (insn))
10203 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
10204 SET_SRC (use_pat)))
10205 cost -= 1;
10206
10207 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
10208 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
10209 cost -= 1;
10210 }
10211 else if (TARGET_SH4_300)
10212 {
10213 /* Stores need their input register two cycles later. */
10214 if (dep_set && cost >= 1
10215 && ((type = get_attr_type (insn)) == TYPE_STORE
10216 || type == TYPE_PSTORE
10217 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
10218 {
10219 rtx set = single_set (insn);
10220
10221 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
10222 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
10223 {
10224 cost -= 2;
10225 /* But don't reduce the cost below 1 if the address depends
10226 on a side effect of dep_insn. */
10227 if (cost < 1
10228 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
10229 cost = 1;
10230 }
10231 }
10232 }
10233 }
10234 /* An anti-dependence penalty of two applies if the first insn is a double
10235 precision fadd / fsub / fmul. */
10236 else if (!TARGET_SH4_300
10237 && REG_NOTE_KIND (link) == REG_DEP_ANTI
10238 && recog_memoized (dep_insn) >= 0
10239 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
10240 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
10241 /* A lot of alleged anti-flow dependences are fake,
10242 so check this one is real. */
10243 && flow_dependent_p (dep_insn, insn))
10244 cost = 2;
10245
10246 return cost;
10247 }
10248
10249 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
10250 if DEP_INSN is anti-flow dependent on INSN. */
10251 static int
10252 flow_dependent_p (rtx insn, rtx dep_insn)
10253 {
10254 rtx tmp = PATTERN (insn);
10255
10256 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
10257 return tmp == NULL_RTX;
10258 }
10259
10260 /* A helper function for flow_dependent_p called through note_stores. */
10261 static void
10262 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
10263 {
10264 rtx * pinsn = (rtx *) data;
10265
10266 if (*pinsn && reg_referenced_p (x, *pinsn))
10267 *pinsn = NULL_RTX;
10268 }
10269
10270 /* For use by sh_allocate_initial_value. Note that sh.md contains some
10271 'special function' patterns (type sfunc) that clobber pr, but that
10272 do not look like function calls to leaf_function_p. Hence we must
10273 do this extra check. */
10274 static int
10275 sh_pr_n_sets (void)
10276 {
10277 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10278 }
10279
10280 /* Return where to allocate pseudo for a given hard register initial
10281 value. */
10282 static rtx
10283 sh_allocate_initial_value (rtx hard_reg)
10284 {
10285 rtx x;
10286
10287 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
10288 {
10289 if (current_function_is_leaf
10290 && ! sh_pr_n_sets ()
10291 && ! (TARGET_SHCOMPACT
10292 && ((crtl->args.info.call_cookie
10293 & ~ CALL_COOKIE_RET_TRAMP (1))
10294 || crtl->saves_all_registers)))
10295 x = hard_reg;
10296 else
10297 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
10298 }
10299 else
10300 x = NULL_RTX;
10301
10302 return x;
10303 }
10304
10305 /* This function returns "2" to indicate dual issue for the SH4
10306 processor. To be used by the DFA pipeline description. */
10307 static int
10308 sh_issue_rate (void)
10309 {
10310 if (TARGET_SUPERSCALAR)
10311 return 2;
10312 else
10313 return 1;
10314 }
10315
10316 /* Functions for ready queue reordering for sched1. */
10317
10318 /* Get weight for mode for a set x. */
10319 static short
10320 find_set_regmode_weight (rtx x, enum machine_mode mode)
10321 {
10322 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
10323 return 1;
10324 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
10325 {
10326 if (REG_P (SET_DEST (x)))
10327 {
10328 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
10329 return 1;
10330 else
10331 return 0;
10332 }
10333 return 1;
10334 }
10335 return 0;
10336 }
10337
10338 /* Get regmode weight for insn. */
10339 static short
10340 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
10341 {
10342 short reg_weight = 0;
10343 rtx x;
10344
10345 /* Increment weight for each register born here. */
10346 x = PATTERN (insn);
10347 reg_weight += find_set_regmode_weight (x, mode);
10348 if (GET_CODE (x) == PARALLEL)
10349 {
10350 int j;
10351 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
10352 {
10353 x = XVECEXP (PATTERN (insn), 0, j);
10354 reg_weight += find_set_regmode_weight (x, mode);
10355 }
10356 }
10357 /* Decrement weight for each register that dies here. */
10358 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
10359 {
10360 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
10361 {
10362 rtx note = XEXP (x, 0);
10363 if (REG_P (note) && GET_MODE (note) == mode)
10364 reg_weight--;
10365 }
10366 }
10367 return reg_weight;
10368 }
10369
10370 /* Calculate regmode weights for all insns of a basic block. */
10371 static void
10372 find_regmode_weight (basic_block b, enum machine_mode mode)
10373 {
10374 rtx insn, next_tail, head, tail;
10375
10376 get_ebb_head_tail (b, b, &head, &tail);
10377 next_tail = NEXT_INSN (tail);
10378
10379 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
10380 {
10381 /* Handle register life information. */
10382 if (!INSN_P (insn))
10383 continue;
10384
10385 if (mode == SFmode)
10386 INSN_REGMODE_WEIGHT (insn, mode) =
10387 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
10388 else if (mode == SImode)
10389 INSN_REGMODE_WEIGHT (insn, mode) =
10390 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
10391 }
10392 }
10393
10394 /* Comparison function for ready queue sorting. */
10395 static int
10396 rank_for_reorder (const void *x, const void *y)
10397 {
10398 rtx tmp = *(const rtx *) y;
10399 rtx tmp2 = *(const rtx *) x;
10400
10401 /* The insn in a schedule group should be issued the first. */
10402 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
10403 return SCHED_GROUP_P (tmp2) ? 1 : -1;
10404
10405 /* If insns are equally good, sort by INSN_LUID (original insn order), This
10406 minimizes instruction movement, thus minimizing sched's effect on
10407 register pressure. */
10408 return INSN_LUID (tmp) - INSN_LUID (tmp2);
10409 }
10410
10411 /* Resort the array A in which only element at index N may be out of order. */
10412 static void
10413 swap_reorder (rtx *a, int n)
10414 {
10415 rtx insn = a[n - 1];
10416 int i = n - 2;
10417
10418 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
10419 {
10420 a[i + 1] = a[i];
10421 i -= 1;
10422 }
10423 a[i + 1] = insn;
10424 }
10425
10426 #define SCHED_REORDER(READY, N_READY) \
10427 do \
10428 { \
10429 if ((N_READY) == 2) \
10430 swap_reorder (READY, N_READY); \
10431 else if ((N_READY) > 2) \
10432 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
10433 } \
10434 while (0)
10435
10436 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
10437 macro. */
10438 static void
10439 ready_reorder (rtx *ready, int nready)
10440 {
10441 SCHED_REORDER (ready, nready);
10442 }
10443
10444 /* Count life regions of r0 for a block. */
10445 static int
10446 find_r0_life_regions (basic_block b)
10447 {
10448 rtx end, insn;
10449 rtx pset;
10450 rtx r0_reg;
10451 int live;
10452 int set;
10453 int death = 0;
10454
10455 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
10456 {
10457 set = 1;
10458 live = 1;
10459 }
10460 else
10461 {
10462 set = 0;
10463 live = 0;
10464 }
10465
10466 insn = BB_HEAD (b);
10467 end = BB_END (b);
10468 r0_reg = gen_rtx_REG (SImode, R0_REG);
10469 while (1)
10470 {
10471 if (INSN_P (insn))
10472 {
10473 if (find_regno_note (insn, REG_DEAD, R0_REG))
10474 {
10475 death++;
10476 live = 0;
10477 }
10478 if (!live
10479 && (pset = single_set (insn))
10480 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
10481 && !find_regno_note (insn, REG_UNUSED, R0_REG))
10482 {
10483 set++;
10484 live = 1;
10485 }
10486 }
10487 if (insn == end)
10488 break;
10489 insn = NEXT_INSN (insn);
10490 }
10491 return set - death;
10492 }
10493
10494 /* Calculate regmode weights for all insns of all basic block. */
10495 static void
10496 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
10497 int verbose ATTRIBUTE_UNUSED,
10498 int old_max_uid)
10499 {
10500 basic_block b;
10501
10502 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
10503 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
10504 r0_life_regions = 0;
10505
10506 FOR_EACH_BB_REVERSE (b)
10507 {
10508 find_regmode_weight (b, SImode);
10509 find_regmode_weight (b, SFmode);
10510 if (!reload_completed)
10511 r0_life_regions += find_r0_life_regions (b);
10512 }
10513
10514 CURR_REGMODE_PRESSURE (SImode) = 0;
10515 CURR_REGMODE_PRESSURE (SFmode) = 0;
10516
10517 }
10518
10519 /* Cleanup. */
10520 static void
10521 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
10522 int verbose ATTRIBUTE_UNUSED)
10523 {
10524 if (regmode_weight[0])
10525 {
10526 free (regmode_weight[0]);
10527 regmode_weight[0] = NULL;
10528 }
10529 if (regmode_weight[1])
10530 {
10531 free (regmode_weight[1]);
10532 regmode_weight[1] = NULL;
10533 }
10534 }
10535
10536 /* The scalar modes supported differs from the default version in TImode
10537 for 32-bit SHMEDIA. */
10538 static bool
10539 sh_scalar_mode_supported_p (enum machine_mode mode)
10540 {
10541 if (TARGET_SHMEDIA32 && mode == TImode)
10542 return false;
10543
10544 return default_scalar_mode_supported_p (mode);
10545 }
10546
10547 /* Cache the can_issue_more so that we can return it from reorder2. Also,
10548 keep count of register pressures on SImode and SFmode. */
10549 static int
10550 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
10551 int sched_verbose ATTRIBUTE_UNUSED,
10552 rtx insn,
10553 int can_issue_more)
10554 {
10555 if (GET_CODE (PATTERN (insn)) != USE
10556 && GET_CODE (PATTERN (insn)) != CLOBBER)
10557 cached_can_issue_more = can_issue_more - 1;
10558 else
10559 cached_can_issue_more = can_issue_more;
10560
10561 if (reload_completed)
10562 return cached_can_issue_more;
10563
10564 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
10565 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
10566
10567 return cached_can_issue_more;
10568 }
10569
10570 static void
10571 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
10572 int verbose ATTRIBUTE_UNUSED,
10573 int veclen ATTRIBUTE_UNUSED)
10574 {
10575 CURR_REGMODE_PRESSURE (SImode) = 0;
10576 CURR_REGMODE_PRESSURE (SFmode) = 0;
10577 }
10578
10579 /* Some magic numbers. */
10580 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10581 functions that already have high pressure on r0. */
10582 #define R0_MAX_LIFE_REGIONS 2
10583 /* Register Pressure thresholds for SImode and SFmode registers. */
10584 #define SIMODE_MAX_WEIGHT 5
10585 #define SFMODE_MAX_WEIGHT 10
10586
10587 /* Return true if the pressure is high for MODE. */
10588 static short
10589 high_pressure (enum machine_mode mode)
10590 {
10591 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10592 functions that already have high pressure on r0. */
10593 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
10594 return 1;
10595
10596 if (mode == SFmode)
10597 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
10598 else
10599 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
10600 }
10601
10602 /* Reorder ready queue if register pressure is high. */
10603 static int
10604 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
10605 int sched_verbose ATTRIBUTE_UNUSED,
10606 rtx *ready,
10607 int *n_readyp,
10608 int clock_var ATTRIBUTE_UNUSED)
10609 {
10610 if (reload_completed)
10611 return sh_issue_rate ();
10612
10613 if (high_pressure (SFmode) || high_pressure (SImode))
10614 {
10615 ready_reorder (ready, *n_readyp);
10616 }
10617
10618 return sh_issue_rate ();
10619 }
10620
10621 /* Skip cycles if the current register pressure is high. */
10622 static int
10623 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
10624 int sched_verbose ATTRIBUTE_UNUSED,
10625 rtx *ready ATTRIBUTE_UNUSED,
10626 int *n_readyp ATTRIBUTE_UNUSED,
10627 int clock_var ATTRIBUTE_UNUSED)
10628 {
10629 if (reload_completed)
10630 return cached_can_issue_more;
10631
10632 if (high_pressure(SFmode) || high_pressure (SImode))
10633 skip_cycles = 1;
10634
10635 return cached_can_issue_more;
10636 }
10637
10638 /* Skip cycles without sorting the ready queue. This will move insn from
10639 Q->R. If this is the last cycle we are skipping; allow sorting of ready
10640 queue by sh_reorder. */
10641
10642 /* Generally, skipping these many cycles are sufficient for all insns to move
10643 from Q -> R. */
10644 #define MAX_SKIPS 8
10645
10646 static int
10647 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
10648 int sched_verbose ATTRIBUTE_UNUSED,
10649 rtx insn ATTRIBUTE_UNUSED,
10650 int last_clock_var,
10651 int clock_var,
10652 int *sort_p)
10653 {
10654 if (reload_completed)
10655 return 0;
10656
10657 if (skip_cycles)
10658 {
10659 if ((clock_var - last_clock_var) < MAX_SKIPS)
10660 {
10661 *sort_p = 0;
10662 return 1;
10663 }
10664 /* If this is the last cycle we are skipping, allow reordering of R. */
10665 if ((clock_var - last_clock_var) == MAX_SKIPS)
10666 {
10667 *sort_p = 1;
10668 return 1;
10669 }
10670 }
10671
10672 skip_cycles = 0;
10673
10674 return 0;
10675 }
10676
10677 /* SHmedia requires registers for branches, so we can't generate new
10678 branches past reload. */
10679 static bool
10680 sh_cannot_modify_jumps_p (void)
10681 {
10682 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
10683 }
10684
10685 static reg_class_t
10686 sh_target_reg_class (void)
10687 {
10688 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
10689 }
10690
10691 static bool
10692 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
10693 {
10694 HARD_REG_SET dummy;
10695 #if 0
10696 rtx insn;
10697 #endif
10698
10699 if (! shmedia_space_reserved_for_target_registers)
10700 return 0;
10701 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
10702 return 0;
10703 if (calc_live_regs (&dummy) >= 6 * 8)
10704 return 1;
10705 return 0;
10706 }
10707
10708 static bool
10709 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
10710 {
10711 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
10712 }
10713 \f
10714 /*
10715 On the SH1..SH4, the trampoline looks like
10716 2 0002 D202 mov.l l2,r2
10717 1 0000 D301 mov.l l1,r3
10718 3 0004 422B jmp @r2
10719 4 0006 0009 nop
10720 5 0008 00000000 l1: .long area
10721 6 000c 00000000 l2: .long function
10722
10723 SH5 (compact) uses r1 instead of r3 for the static chain. */
10724
10725
10726 /* Emit RTL insns to initialize the variable parts of a trampoline.
10727 FNADDR is an RTX for the address of the function's pure code.
10728 CXT is an RTX for the static chain value for the function. */
10729
10730 static void
10731 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
10732 {
10733 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10734 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
10735
10736 if (TARGET_SHMEDIA64)
10737 {
10738 rtx tramp_templ;
10739 int fixed_len;
10740
10741 rtx movi1 = GEN_INT (0xcc000010);
10742 rtx shori1 = GEN_INT (0xc8000010);
10743 rtx src, dst;
10744
10745 /* The following trampoline works within a +- 128 KB range for cxt:
10746 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
10747 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
10748 gettr tr1,r1; blink tr0,r63 */
10749 /* Address rounding makes it hard to compute the exact bounds of the
10750 offset for this trampoline, but we have a rather generous offset
10751 range, so frame_offset should do fine as an upper bound. */
10752 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
10753 {
10754 /* ??? could optimize this trampoline initialization
10755 by writing DImode words with two insns each. */
10756 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
10757 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
10758 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
10759 insn = gen_rtx_AND (DImode, insn, mask);
10760 /* Or in ptb/u .,tr1 pattern */
10761 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
10762 insn = force_operand (insn, NULL_RTX);
10763 insn = gen_lowpart (SImode, insn);
10764 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
10765 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
10766 insn = gen_rtx_AND (DImode, insn, mask);
10767 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
10768 insn = gen_lowpart (SImode, insn);
10769 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
10770 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
10771 insn = gen_rtx_AND (DImode, insn, mask);
10772 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10773 insn = gen_lowpart (SImode, insn);
10774 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
10775 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
10776 insn = gen_rtx_AND (DImode, insn, mask);
10777 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10778 insn = gen_lowpart (SImode, insn);
10779 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
10780 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
10781 insn = gen_rtx_AND (DImode, insn, mask);
10782 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10783 insn = gen_lowpart (SImode, insn);
10784 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
10785 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
10786 GEN_INT (0x6bf10600));
10787 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
10788 GEN_INT (0x4415fc10));
10789 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
10790 GEN_INT (0x4401fff0));
10791 emit_insn (gen_ic_invalidate_line (tramp));
10792 return;
10793 }
10794 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
10795 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
10796
10797 tramp_templ = gen_datalabel_ref (tramp_templ);
10798 dst = tramp_mem;
10799 src = gen_const_mem (BLKmode, tramp_templ);
10800 set_mem_align (dst, 256);
10801 set_mem_align (src, 64);
10802 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
10803
10804 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
10805 emit_move_insn (adjust_address (tramp_mem, Pmode,
10806 fixed_len + GET_MODE_SIZE (Pmode)),
10807 cxt);
10808 emit_insn (gen_ic_invalidate_line (tramp));
10809 return;
10810 }
10811 else if (TARGET_SHMEDIA)
10812 {
10813 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
10814 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
10815 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
10816 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
10817 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
10818 rotated 10 right, and higher 16 bit of every 32 selected. */
10819 rtx movishori
10820 = force_reg (V2HImode, (simplify_gen_subreg
10821 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
10822 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
10823 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
10824
10825 fnaddr = force_reg (SImode, fnaddr);
10826 cxt = force_reg (SImode, cxt);
10827 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
10828 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
10829 movishori));
10830 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
10831 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10832 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
10833 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
10834 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
10835 gen_rtx_SUBREG (V2HImode, cxt, 0),
10836 movishori));
10837 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
10838 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10839 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
10840 if (TARGET_LITTLE_ENDIAN)
10841 {
10842 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
10843 emit_insn (gen_mextr4 (quad2, cxtload, blink));
10844 }
10845 else
10846 {
10847 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
10848 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
10849 }
10850 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
10851 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
10852 emit_insn (gen_ic_invalidate_line (tramp));
10853 return;
10854 }
10855 else if (TARGET_SHCOMPACT)
10856 {
10857 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
10858 return;
10859 }
10860 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
10861 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
10862 SImode));
10863 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
10864 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
10865 SImode));
10866 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
10867 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
10868 if (TARGET_HARVARD)
10869 {
10870 if (!TARGET_INLINE_IC_INVALIDATE
10871 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
10872 emit_library_call (function_symbol (NULL, "__ic_invalidate",
10873 FUNCTION_ORDINARY),
10874 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
10875 else
10876 emit_insn (gen_ic_invalidate_line (tramp));
10877 }
10878 }
10879
10880 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
10881
10882 static rtx
10883 sh_trampoline_adjust_address (rtx tramp)
10884 {
10885 if (TARGET_SHMEDIA)
10886 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
10887 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
10888 return tramp;
10889 }
10890
10891 /* FIXME: This is overly conservative. A SHcompact function that
10892 receives arguments ``by reference'' will have them stored in its
10893 own stack frame, so it must not pass pointers or references to
10894 these arguments to other functions by means of sibling calls. */
10895 /* If PIC, we cannot make sibling calls to global functions
10896 because the PLT requires r12 to be live. */
10897 static bool
10898 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10899 {
10900 return (1
10901 && (! TARGET_SHCOMPACT
10902 || crtl->args.info.stack_regs == 0)
10903 && ! sh_cfun_interrupt_handler_p ()
10904 && (! flag_pic
10905 || (decl && ! TREE_PUBLIC (decl))
10906 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
10907 }
10908 \f
10909 /* Machine specific built-in functions. */
10910
10911 struct builtin_description
10912 {
10913 const enum insn_code icode;
10914 const char *const name;
10915 int signature;
10916 tree fndecl;
10917 };
10918
10919 /* describe number and signedness of arguments; arg[0] == result
10920 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
10921 /* 9: 64-bit pointer, 10: 32-bit pointer */
10922 static const char signature_args[][4] =
10923 {
10924 #define SH_BLTIN_V2SI2 0
10925 { 4, 4 },
10926 #define SH_BLTIN_V4HI2 1
10927 { 4, 4 },
10928 #define SH_BLTIN_V2SI3 2
10929 { 4, 4, 4 },
10930 #define SH_BLTIN_V4HI3 3
10931 { 4, 4, 4 },
10932 #define SH_BLTIN_V8QI3 4
10933 { 4, 4, 4 },
10934 #define SH_BLTIN_MAC_HISI 5
10935 { 1, 4, 4, 1 },
10936 #define SH_BLTIN_SH_HI 6
10937 { 4, 4, 1 },
10938 #define SH_BLTIN_SH_SI 7
10939 { 4, 4, 1 },
10940 #define SH_BLTIN_V4HI2V2SI 8
10941 { 4, 4, 4 },
10942 #define SH_BLTIN_V4HI2V8QI 9
10943 { 4, 4, 4 },
10944 #define SH_BLTIN_SISF 10
10945 { 4, 2 },
10946 #define SH_BLTIN_LDUA_L 11
10947 { 2, 10 },
10948 #define SH_BLTIN_LDUA_Q 12
10949 { 1, 10 },
10950 #define SH_BLTIN_STUA_L 13
10951 { 0, 10, 2 },
10952 #define SH_BLTIN_STUA_Q 14
10953 { 0, 10, 1 },
10954 #define SH_BLTIN_LDUA_L64 15
10955 { 2, 9 },
10956 #define SH_BLTIN_LDUA_Q64 16
10957 { 1, 9 },
10958 #define SH_BLTIN_STUA_L64 17
10959 { 0, 9, 2 },
10960 #define SH_BLTIN_STUA_Q64 18
10961 { 0, 9, 1 },
10962 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
10963 #define SH_BLTIN_2 19
10964 #define SH_BLTIN_SU 19
10965 { 1, 2 },
10966 #define SH_BLTIN_3 20
10967 #define SH_BLTIN_SUS 20
10968 { 2, 2, 1 },
10969 #define SH_BLTIN_PSSV 21
10970 { 0, 8, 2, 2 },
10971 #define SH_BLTIN_XXUU 22
10972 #define SH_BLTIN_UUUU 22
10973 { 1, 1, 1, 1 },
10974 #define SH_BLTIN_PV 23
10975 { 0, 8 },
10976 };
10977 /* mcmv: operands considered unsigned. */
10978 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
10979 /* mperm: control value considered unsigned int. */
10980 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
10981 /* mshards_q: returns signed short. */
10982 /* nsb: takes long long arg, returns unsigned char. */
10983 static struct builtin_description bdesc[] =
10984 {
10985 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
10986 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
10987 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
10988 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
10989 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
10990 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
10991 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
10992 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
10993 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
10994 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
10995 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
10996 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
10997 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
10998 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
10999 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
11000 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
11001 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
11002 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
11003 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
11004 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
11005 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
11006 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
11007 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
11008 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
11009 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
11010 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
11011 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
11012 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
11013 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
11014 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
11015 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
11016 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
11017 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
11018 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
11019 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
11020 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
11021 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
11022 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
11023 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
11024 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
11025 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
11026 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
11027 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
11028 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
11029 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
11030 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
11031 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
11032 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
11033 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
11034 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
11035 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
11036 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
11037 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
11038 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
11039 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
11040 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
11041 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
11042 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
11043 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
11044 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
11045 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
11046 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3, 0 },
11047 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
11048 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
11049 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
11050 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
11051 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
11052 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
11053 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
11054 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
11055 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
11056 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
11057 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
11058 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
11059 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
11060 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
11061 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
11062 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
11063 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
11064 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
11065 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
11066 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
11067 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
11068 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
11069 };
11070
11071 static void
11072 sh_media_init_builtins (void)
11073 {
11074 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
11075 struct builtin_description *d;
11076
11077 memset (shared, 0, sizeof shared);
11078 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
11079 {
11080 tree type, arg_type = 0;
11081 int signature = d->signature;
11082 int i;
11083
11084 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
11085 type = shared[signature];
11086 else
11087 {
11088 int has_result = signature_args[signature][0] != 0;
11089 tree args[3];
11090
11091 if ((signature_args[signature][1] & 8)
11092 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
11093 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
11094 continue;
11095 if (! TARGET_FPU_ANY
11096 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
11097 continue;
11098 for (i = 0; i < (int) ARRAY_SIZE (args); i++)
11099 args[i] = NULL_TREE;
11100 for (i = 3; ; i--)
11101 {
11102 int arg = signature_args[signature][i];
11103 int opno = i - 1 + has_result;
11104
11105 if (arg & 8)
11106 arg_type = ptr_type_node;
11107 else if (arg)
11108 arg_type = (*lang_hooks.types.type_for_mode)
11109 (insn_data[d->icode].operand[opno].mode,
11110 (arg & 1));
11111 else if (i)
11112 continue;
11113 else
11114 arg_type = void_type_node;
11115 if (i == 0)
11116 break;
11117 args[i-1] = arg_type;
11118 }
11119 type = build_function_type_list (arg_type, args[0], args[1],
11120 args[2], NULL_TREE);
11121 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
11122 shared[signature] = type;
11123 }
11124 d->fndecl =
11125 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
11126 NULL, NULL_TREE);
11127 }
11128 }
11129
11130 /* Returns the shmedia builtin decl for CODE. */
11131
11132 static tree
11133 sh_media_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11134 {
11135 if (code >= ARRAY_SIZE (bdesc))
11136 return error_mark_node;
11137
11138 return bdesc[code].fndecl;
11139 }
11140
11141 /* Implements target hook vector_mode_supported_p. */
11142 bool
11143 sh_vector_mode_supported_p (enum machine_mode mode)
11144 {
11145 if (TARGET_FPU_ANY
11146 && ((mode == V2SFmode)
11147 || (mode == V4SFmode)
11148 || (mode == V16SFmode)))
11149 return true;
11150
11151 else if (TARGET_SHMEDIA
11152 && ((mode == V8QImode)
11153 || (mode == V2HImode)
11154 || (mode == V4HImode)
11155 || (mode == V2SImode)))
11156 return true;
11157
11158 return false;
11159 }
11160
11161 bool
11162 sh_frame_pointer_required (void)
11163 {
11164 /* If needed override this in other tm.h files to cope with various OS
11165 lossage requiring a frame pointer. */
11166 if (SUBTARGET_FRAME_POINTER_REQUIRED)
11167 return true;
11168
11169 if (crtl->profile)
11170 return true;
11171
11172 return false;
11173 }
11174
11175 /* Implements target hook dwarf_calling_convention. Return an enum
11176 of dwarf_calling_convention. */
11177 int
11178 sh_dwarf_calling_convention (const_tree func)
11179 {
11180 if (sh_attr_renesas_p (func))
11181 return DW_CC_GNU_renesas_sh;
11182
11183 return DW_CC_normal;
11184 }
11185
11186 static void
11187 sh_init_builtins (void)
11188 {
11189 if (TARGET_SHMEDIA)
11190 sh_media_init_builtins ();
11191 }
11192
11193 /* Returns the sh builtin decl for CODE. */
11194
11195 static tree
11196 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11197 {
11198 if (TARGET_SHMEDIA)
11199 return sh_media_builtin_decl (code, initialize_p);
11200
11201 return error_mark_node;
11202 }
11203
11204 /* Expand an expression EXP that calls a built-in function,
11205 with result going to TARGET if that's convenient
11206 (and in mode MODE if that's convenient).
11207 SUBTARGET may be used as the target for computing one of EXP's operands.
11208 IGNORE is nonzero if the value is to be ignored. */
11209
11210 static rtx
11211 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
11212 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
11213 {
11214 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11215 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
11216 const struct builtin_description *d = &bdesc[fcode];
11217 enum insn_code icode = d->icode;
11218 int signature = d->signature;
11219 enum machine_mode tmode = VOIDmode;
11220 int nop = 0, i;
11221 rtx op[4];
11222 rtx pat = 0;
11223
11224 if (signature_args[signature][0])
11225 {
11226 if (ignore)
11227 return 0;
11228
11229 tmode = insn_data[icode].operand[0].mode;
11230 if (! target
11231 || GET_MODE (target) != tmode
11232 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11233 target = gen_reg_rtx (tmode);
11234 op[nop++] = target;
11235 }
11236 else
11237 target = 0;
11238
11239 for (i = 1; i <= 3; i++, nop++)
11240 {
11241 tree arg;
11242 enum machine_mode opmode, argmode;
11243 tree optype;
11244
11245 if (! signature_args[signature][i])
11246 break;
11247 arg = CALL_EXPR_ARG (exp, i - 1);
11248 if (arg == error_mark_node)
11249 return const0_rtx;
11250 if (signature_args[signature][i] & 8)
11251 {
11252 opmode = ptr_mode;
11253 optype = ptr_type_node;
11254 }
11255 else
11256 {
11257 opmode = insn_data[icode].operand[nop].mode;
11258 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
11259 }
11260 argmode = TYPE_MODE (TREE_TYPE (arg));
11261 if (argmode != opmode)
11262 arg = build1 (NOP_EXPR, optype, arg);
11263 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
11264 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
11265 op[nop] = copy_to_mode_reg (opmode, op[nop]);
11266 }
11267
11268 switch (nop)
11269 {
11270 case 1:
11271 pat = (*insn_data[d->icode].genfun) (op[0]);
11272 break;
11273 case 2:
11274 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
11275 break;
11276 case 3:
11277 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
11278 break;
11279 case 4:
11280 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
11281 break;
11282 default:
11283 gcc_unreachable ();
11284 }
11285 if (! pat)
11286 return 0;
11287 emit_insn (pat);
11288 return target;
11289 }
11290
11291 void
11292 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
11293 {
11294 rtx sel0 = const0_rtx;
11295 rtx sel1 = const1_rtx;
11296 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
11297 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
11298
11299 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
11300 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
11301 }
11302
11303 void
11304 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
11305 {
11306 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
11307
11308 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
11309 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
11310 }
11311
11312 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
11313 We can allow any mode in any general register. The special registers
11314 only allow SImode. Don't allow any mode in the PR.
11315
11316 We cannot hold DCmode values in the XD registers because alter_reg
11317 handles subregs of them incorrectly. We could work around this by
11318 spacing the XD registers like the DR registers, but this would require
11319 additional memory in every compilation to hold larger register vectors.
11320 We could hold SFmode / SCmode values in XD registers, but that
11321 would require a tertiary reload when reloading from / to memory,
11322 and a secondary reload to reload from / to general regs; that
11323 seems to be a loosing proposition.
11324
11325 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
11326 it won't be ferried through GP registers first. */
11327
11328 bool
11329 sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
11330 {
11331 if (SPECIAL_REGISTER_P (regno))
11332 return mode == SImode;
11333
11334 if (regno == FPUL_REG)
11335 return (mode == SImode || mode == SFmode);
11336
11337 if (FP_REGISTER_P (regno) && mode == SFmode)
11338 return true;
11339
11340 if (mode == V2SFmode)
11341 {
11342 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
11343 || GENERAL_REGISTER_P (regno)))
11344 return true;
11345 else
11346 return false;
11347 }
11348
11349 if (mode == V4SFmode)
11350 {
11351 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
11352 || GENERAL_REGISTER_P (regno))
11353 return true;
11354 else
11355 return false;
11356 }
11357
11358 if (mode == V16SFmode)
11359 {
11360 if (TARGET_SHMEDIA)
11361 {
11362 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
11363 return true;
11364 else
11365 return false;
11366 }
11367 else
11368 return regno == FIRST_XD_REG;
11369 }
11370
11371 if (FP_REGISTER_P (regno))
11372 {
11373 if (mode == SFmode
11374 || mode == SImode
11375 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
11376 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
11377 || mode == DCmode
11378 || (TARGET_SHMEDIA
11379 && (mode == DFmode || mode == DImode
11380 || mode == V2SFmode || mode == TImode)))
11381 && ((regno - FIRST_FP_REG) & 1) == 0)
11382 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
11383 && ((regno - FIRST_FP_REG) & 3) == 0))
11384 return true;
11385 else
11386 return false;
11387 }
11388
11389 if (XD_REGISTER_P (regno))
11390 return mode == DFmode;
11391
11392 if (TARGET_REGISTER_P (regno))
11393 return (mode == DImode || mode == SImode || mode == PDImode);
11394
11395 if (regno == PR_REG)
11396 return mode == SImode;
11397
11398 if (regno == FPSCR_REG)
11399 return mode == PSImode;
11400
11401 /* FIXME. This works around PR target/37633 for -O0. */
11402 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
11403 {
11404 unsigned int n = GET_MODE_SIZE (mode) / 8;
11405
11406 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
11407 && regno <= FIRST_GENERAL_REG + 14)
11408 return false;
11409 }
11410
11411 return true;
11412 }
11413
11414 /* Return the class of registers for which a mode change from FROM to TO
11415 is invalid. */
11416 bool
11417 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
11418 enum reg_class rclass)
11419 {
11420 /* We want to enable the use of SUBREGs as a means to
11421 VEC_SELECT a single element of a vector. */
11422 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
11423 return (reg_classes_intersect_p (GENERAL_REGS, rclass));
11424
11425 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
11426 {
11427 if (TARGET_LITTLE_ENDIAN)
11428 {
11429 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
11430 return reg_classes_intersect_p (DF_REGS, rclass);
11431 }
11432 else
11433 {
11434 if (GET_MODE_SIZE (from) < 8)
11435 return reg_classes_intersect_p (DF_HI_REGS, rclass);
11436 }
11437 }
11438 return 0;
11439 }
11440
11441 /* Return true if registers in machine mode MODE will likely be
11442 allocated to registers in small register classes. */
11443
11444 bool
11445 sh_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
11446 {
11447 return (! TARGET_SHMEDIA);
11448 }
11449
11450 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
11451 that label is used. */
11452
11453 void
11454 sh_mark_label (rtx address, int nuses)
11455 {
11456 if (GOTOFF_P (address))
11457 {
11458 /* Extract the label or symbol. */
11459 address = XEXP (address, 0);
11460 if (GET_CODE (address) == PLUS)
11461 address = XEXP (address, 0);
11462 address = XVECEXP (address, 0, 0);
11463 }
11464 if (GET_CODE (address) == LABEL_REF
11465 && LABEL_P (XEXP (address, 0)))
11466 LABEL_NUSES (XEXP (address, 0)) += nuses;
11467 }
11468
11469 /* Compute extra cost of moving data between one register class
11470 and another. */
11471
11472 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
11473 uses this information. Hence, the general register <-> floating point
11474 register information here is not used for SFmode. */
11475
11476 static int
11477 sh_register_move_cost (enum machine_mode mode,
11478 reg_class_t srcclass, reg_class_t dstclass)
11479 {
11480 if (dstclass == T_REGS || dstclass == PR_REGS)
11481 return 10;
11482
11483 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
11484 return 4;
11485
11486 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
11487 && REGCLASS_HAS_FP_REG (srcclass)
11488 && REGCLASS_HAS_FP_REG (dstclass))
11489 return 4;
11490
11491 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
11492 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
11493
11494 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
11495 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
11496 return 9;
11497
11498 if ((REGCLASS_HAS_FP_REG (dstclass)
11499 && REGCLASS_HAS_GENERAL_REG (srcclass))
11500 || (REGCLASS_HAS_GENERAL_REG (dstclass)
11501 && REGCLASS_HAS_FP_REG (srcclass)))
11502 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
11503 * ((GET_MODE_SIZE (mode) + 7) / 8U));
11504
11505 if ((dstclass == FPUL_REGS
11506 && REGCLASS_HAS_GENERAL_REG (srcclass))
11507 || (srcclass == FPUL_REGS
11508 && REGCLASS_HAS_GENERAL_REG (dstclass)))
11509 return 5;
11510
11511 if ((dstclass == FPUL_REGS
11512 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
11513 || (srcclass == FPUL_REGS
11514 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
11515 return 7;
11516
11517 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11518 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11519 return 20;
11520
11521 /* ??? ptabs faults on (value & 0x3) == 0x3 */
11522 if (TARGET_SHMEDIA
11523 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
11524 {
11525 if (sh_gettrcost >= 0)
11526 return sh_gettrcost;
11527 else if (!TARGET_PT_FIXED)
11528 return 100;
11529 }
11530
11531 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11532 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11533 return 4;
11534
11535 if (TARGET_SHMEDIA
11536 || (TARGET_FMOVD
11537 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
11538 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
11539 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
11540
11541 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
11542 }
11543
11544 static rtx emit_load_ptr (rtx, rtx);
11545
11546 static rtx
11547 emit_load_ptr (rtx reg, rtx addr)
11548 {
11549 rtx mem = gen_const_mem (ptr_mode, addr);
11550
11551 if (Pmode != ptr_mode)
11552 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
11553 return emit_move_insn (reg, mem);
11554 }
11555
11556 static void
11557 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11558 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11559 tree function)
11560 {
11561 CUMULATIVE_ARGS cum;
11562 int structure_value_byref = 0;
11563 rtx this_rtx, this_value, sibcall, insns, funexp;
11564 tree funtype = TREE_TYPE (function);
11565 int simple_add = CONST_OK_FOR_ADD (delta);
11566 int did_load = 0;
11567 rtx scratch0, scratch1, scratch2;
11568 unsigned i;
11569
11570 reload_completed = 1;
11571 epilogue_completed = 1;
11572 current_function_uses_only_leaf_regs = 1;
11573
11574 emit_note (NOTE_INSN_PROLOGUE_END);
11575
11576 /* Find the "this" pointer. We have such a wide range of ABIs for the
11577 SH that it's best to do this completely machine independently.
11578 "this" is passed as first argument, unless a structure return pointer
11579 comes first, in which case "this" comes second. */
11580 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
11581 #ifndef PCC_STATIC_STRUCT_RETURN
11582 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11583 structure_value_byref = 1;
11584 #endif /* not PCC_STATIC_STRUCT_RETURN */
11585 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
11586 {
11587 tree ptype = build_pointer_type (TREE_TYPE (funtype));
11588
11589 sh_function_arg_advance (pack_cumulative_args (&cum), Pmode, ptype, true);
11590 }
11591 this_rtx
11592 = sh_function_arg (pack_cumulative_args (&cum), Pmode, ptr_type_node, true);
11593
11594 /* For SHcompact, we only have r0 for a scratch register: r1 is the
11595 static chain pointer (even if you can't have nested virtual functions
11596 right now, someone might implement them sometime), and the rest of the
11597 registers are used for argument passing, are callee-saved, or reserved. */
11598 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
11599 -ffixed-reg has been used. */
11600 if (! call_used_regs[0] || fixed_regs[0])
11601 error ("r0 needs to be available as a call-clobbered register");
11602 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
11603 if (! TARGET_SH5)
11604 {
11605 if (call_used_regs[1] && ! fixed_regs[1])
11606 scratch1 = gen_rtx_REG (ptr_mode, 1);
11607 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
11608 pointing where to return struct values. */
11609 if (call_used_regs[3] && ! fixed_regs[3])
11610 scratch2 = gen_rtx_REG (Pmode, 3);
11611 }
11612 else if (TARGET_SHMEDIA)
11613 {
11614 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
11615 if (i != REGNO (scratch0) &&
11616 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
11617 {
11618 scratch1 = gen_rtx_REG (ptr_mode, i);
11619 break;
11620 }
11621 if (scratch1 == scratch0)
11622 error ("need a second call-clobbered general purpose register");
11623 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
11624 if (call_used_regs[i] && ! fixed_regs[i])
11625 {
11626 scratch2 = gen_rtx_REG (Pmode, i);
11627 break;
11628 }
11629 if (scratch2 == scratch0)
11630 error ("need a call-clobbered target register");
11631 }
11632
11633 this_value = plus_constant (this_rtx, delta);
11634 if (vcall_offset
11635 && (simple_add || scratch0 != scratch1)
11636 && strict_memory_address_p (ptr_mode, this_value))
11637 {
11638 emit_load_ptr (scratch0, this_value);
11639 did_load = 1;
11640 }
11641
11642 if (!delta)
11643 ; /* Do nothing. */
11644 else if (simple_add)
11645 emit_move_insn (this_rtx, this_value);
11646 else
11647 {
11648 emit_move_insn (scratch1, GEN_INT (delta));
11649 emit_insn (gen_add2_insn (this_rtx, scratch1));
11650 }
11651
11652 if (vcall_offset)
11653 {
11654 rtx offset_addr;
11655
11656 if (!did_load)
11657 emit_load_ptr (scratch0, this_rtx);
11658
11659 offset_addr = plus_constant (scratch0, vcall_offset);
11660 if (strict_memory_address_p (ptr_mode, offset_addr))
11661 ; /* Do nothing. */
11662 else if (! TARGET_SH5 && scratch0 != scratch1)
11663 {
11664 /* scratch0 != scratch1, and we have indexed loads. Get better
11665 schedule by loading the offset into r1 and using an indexed
11666 load - then the load of r1 can issue before the load from
11667 (this_rtx + delta) finishes. */
11668 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11669 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
11670 }
11671 else if (CONST_OK_FOR_ADD (vcall_offset))
11672 {
11673 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
11674 offset_addr = scratch0;
11675 }
11676 else if (scratch0 != scratch1)
11677 {
11678 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11679 emit_insn (gen_add2_insn (scratch0, scratch1));
11680 offset_addr = scratch0;
11681 }
11682 else
11683 gcc_unreachable (); /* FIXME */
11684 emit_load_ptr (scratch0, offset_addr);
11685
11686 if (Pmode != ptr_mode)
11687 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
11688 emit_insn (gen_add2_insn (this_rtx, scratch0));
11689 }
11690
11691 /* Generate a tail call to the target function. */
11692 if (! TREE_USED (function))
11693 {
11694 assemble_external (function);
11695 TREE_USED (function) = 1;
11696 }
11697 funexp = XEXP (DECL_RTL (function), 0);
11698 /* If the function is overridden, so is the thunk, hence we don't
11699 need GOT addressing even if this is a public symbol. */
11700 #if 0
11701 if (TARGET_SH1 && ! flag_weak)
11702 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
11703 else
11704 #endif
11705 if (TARGET_SH2 && flag_pic)
11706 {
11707 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
11708 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
11709 }
11710 else
11711 {
11712 if (TARGET_SHMEDIA && flag_pic)
11713 {
11714 funexp = gen_sym2PIC (funexp);
11715 PUT_MODE (funexp, Pmode);
11716 }
11717 emit_move_insn (scratch2, funexp);
11718 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
11719 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
11720 }
11721 sibcall = emit_call_insn (sibcall);
11722 SIBLING_CALL_P (sibcall) = 1;
11723 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
11724 emit_barrier ();
11725
11726 /* Run just enough of rest_of_compilation to do scheduling and get
11727 the insns emitted. Note that use_thunk calls
11728 assemble_start_function and assemble_end_function. */
11729
11730 insn_locators_alloc ();
11731 insns = get_insns ();
11732
11733 if (optimize > 0)
11734 {
11735 if (! cfun->cfg)
11736 init_flow (cfun);
11737 split_all_insns_noflow ();
11738 }
11739
11740 sh_reorg ();
11741 shorten_branches (insns);
11742 final_start_function (insns, file, 1);
11743 final (insns, file, 1);
11744 final_end_function ();
11745
11746 reload_completed = 0;
11747 epilogue_completed = 0;
11748 }
11749
11750 rtx
11751 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
11752 {
11753 rtx sym;
11754
11755 /* If this is not an ordinary function, the name usually comes from a
11756 string literal or an sprintf buffer. Make sure we use the same
11757 string consistently, so that cse will be able to unify address loads. */
11758 if (kind != FUNCTION_ORDINARY)
11759 name = IDENTIFIER_POINTER (get_identifier (name));
11760 sym = gen_rtx_SYMBOL_REF (Pmode, name);
11761 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
11762 if (flag_pic)
11763 switch (kind)
11764 {
11765 case FUNCTION_ORDINARY:
11766 break;
11767 case SFUNC_GOT:
11768 {
11769 rtx reg = target ? target : gen_reg_rtx (Pmode);
11770
11771 emit_insn (gen_symGOT2reg (reg, sym));
11772 sym = reg;
11773 break;
11774 }
11775 case SFUNC_STATIC:
11776 {
11777 /* ??? To allow cse to work, we use GOTOFF relocations.
11778 we could add combiner patterns to transform this into
11779 straight pc-relative calls with sym2PIC / bsrf when
11780 label load and function call are still 1:1 and in the
11781 same basic block during combine. */
11782 rtx reg = target ? target : gen_reg_rtx (Pmode);
11783
11784 emit_insn (gen_symGOTOFF2reg (reg, sym));
11785 sym = reg;
11786 break;
11787 }
11788 }
11789 if (target && sym != target)
11790 {
11791 emit_move_insn (target, sym);
11792 return target;
11793 }
11794 return sym;
11795 }
11796
11797 /* Find the number of a general purpose register in S. */
11798 static int
11799 scavenge_reg (HARD_REG_SET *s)
11800 {
11801 int r;
11802 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
11803 if (TEST_HARD_REG_BIT (*s, r))
11804 return r;
11805 return -1;
11806 }
11807
11808 rtx
11809 sh_get_pr_initial_val (void)
11810 {
11811 rtx val;
11812
11813 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
11814 PR register on SHcompact, because it might be clobbered by the prologue.
11815 We check first if that is known to be the case. */
11816 if (TARGET_SHCOMPACT
11817 && ((crtl->args.info.call_cookie
11818 & ~ CALL_COOKIE_RET_TRAMP (1))
11819 || crtl->saves_all_registers))
11820 return gen_frame_mem (SImode, return_address_pointer_rtx);
11821
11822 /* If we haven't finished rtl generation, there might be a nonlocal label
11823 that we haven't seen yet.
11824 ??? get_hard_reg_initial_val fails if it is called after register
11825 allocation has started, unless it has been called before for the
11826 same register. And even then, we end in trouble if we didn't use
11827 the register in the same basic block before. So call
11828 get_hard_reg_initial_val now and wrap it in an unspec if we might
11829 need to replace it. */
11830 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
11831 combine can put the pseudo returned by get_hard_reg_initial_val into
11832 instructions that need a general purpose registers, which will fail to
11833 be recognized when the pseudo becomes allocated to PR. */
11834 val
11835 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
11836 if (TARGET_SH1)
11837 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
11838 return val;
11839 }
11840
11841 int
11842 sh_expand_t_scc (rtx operands[])
11843 {
11844 enum rtx_code code = GET_CODE (operands[1]);
11845 rtx target = operands[0];
11846 rtx op0 = operands[2];
11847 rtx op1 = operands[3];
11848 rtx result = target;
11849 HOST_WIDE_INT val;
11850
11851 if (!REG_P (op0) || REGNO (op0) != T_REG
11852 || !CONST_INT_P (op1))
11853 return 0;
11854 if (!REG_P (result))
11855 result = gen_reg_rtx (SImode);
11856 val = INTVAL (op1);
11857 if ((code == EQ && val == 1) || (code == NE && val == 0))
11858 emit_insn (gen_movt (result));
11859 else if (TARGET_SH2A && ((code == EQ && val == 0)
11860 || (code == NE && val == 1)))
11861 emit_insn (gen_xorsi3_movrt (result));
11862 else if ((code == EQ && val == 0) || (code == NE && val == 1))
11863 {
11864 emit_clobber (result);
11865 emit_insn (gen_subc (result, result, result));
11866 emit_insn (gen_addsi3 (result, result, const1_rtx));
11867 }
11868 else if (code == EQ || code == NE)
11869 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
11870 else
11871 return 0;
11872 if (result != target)
11873 emit_move_insn (target, result);
11874 return 1;
11875 }
11876
11877 /* INSN is an sfunc; return the rtx that describes the address used. */
11878 static rtx
11879 extract_sfunc_addr (rtx insn)
11880 {
11881 rtx pattern, part = NULL_RTX;
11882 int len, i;
11883
11884 pattern = PATTERN (insn);
11885 len = XVECLEN (pattern, 0);
11886 for (i = 0; i < len; i++)
11887 {
11888 part = XVECEXP (pattern, 0, i);
11889 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
11890 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
11891 return XEXP (part, 0);
11892 }
11893 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
11894 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
11895 }
11896
11897 /* Verify that the register in use_sfunc_addr still agrees with the address
11898 used in the sfunc. This prevents fill_slots_from_thread from changing
11899 use_sfunc_addr.
11900 INSN is the use_sfunc_addr instruction, and REG is the register it
11901 guards. */
11902 int
11903 check_use_sfunc_addr (rtx insn, rtx reg)
11904 {
11905 /* Search for the sfunc. It should really come right after INSN. */
11906 while ((insn = NEXT_INSN (insn)))
11907 {
11908 if (LABEL_P (insn) || JUMP_P (insn))
11909 break;
11910 if (! INSN_P (insn))
11911 continue;
11912
11913 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
11914 insn = XVECEXP (PATTERN (insn), 0, 0);
11915 if (GET_CODE (PATTERN (insn)) != PARALLEL
11916 || get_attr_type (insn) != TYPE_SFUNC)
11917 continue;
11918 return rtx_equal_p (extract_sfunc_addr (insn), reg);
11919 }
11920 gcc_unreachable ();
11921 }
11922
11923 /* This function returns a constant rtx that represents pi / 2**15 in
11924 SFmode. it's used to scale SFmode angles, in radians, to a
11925 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11926 maps to 0x10000). */
11927
11928 static GTY(()) rtx sh_fsca_sf2int_rtx;
11929
11930 rtx
11931 sh_fsca_sf2int (void)
11932 {
11933 if (! sh_fsca_sf2int_rtx)
11934 {
11935 REAL_VALUE_TYPE rv;
11936
11937 real_from_string (&rv, "10430.378350470453");
11938 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
11939 }
11940
11941 return sh_fsca_sf2int_rtx;
11942 }
11943
11944 /* This function returns a constant rtx that represents pi / 2**15 in
11945 DFmode. it's used to scale DFmode angles, in radians, to a
11946 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11947 maps to 0x10000). */
11948
11949 static GTY(()) rtx sh_fsca_df2int_rtx;
11950
11951 rtx
11952 sh_fsca_df2int (void)
11953 {
11954 if (! sh_fsca_df2int_rtx)
11955 {
11956 REAL_VALUE_TYPE rv;
11957
11958 real_from_string (&rv, "10430.378350470453");
11959 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
11960 }
11961
11962 return sh_fsca_df2int_rtx;
11963 }
11964
11965 /* This function returns a constant rtx that represents 2**15 / pi in
11966 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
11967 of a full circle back to a SFmode value, i.e., 0x10000 maps to
11968 2*pi). */
11969
11970 static GTY(()) rtx sh_fsca_int2sf_rtx;
11971
11972 rtx
11973 sh_fsca_int2sf (void)
11974 {
11975 if (! sh_fsca_int2sf_rtx)
11976 {
11977 REAL_VALUE_TYPE rv;
11978
11979 real_from_string (&rv, "9.587379924285257e-5");
11980 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
11981 }
11982
11983 return sh_fsca_int2sf_rtx;
11984 }
11985
11986 /* Initialize the CUMULATIVE_ARGS structure. */
11987
11988 void
11989 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
11990 tree fntype,
11991 rtx libname ATTRIBUTE_UNUSED,
11992 tree fndecl,
11993 signed int n_named_args,
11994 enum machine_mode mode)
11995 {
11996 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
11997 pcum->free_single_fp_reg = 0;
11998 pcum->stack_regs = 0;
11999 pcum->byref_regs = 0;
12000 pcum->byref = 0;
12001 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
12002
12003 /* XXX - Should we check TARGET_HITACHI here ??? */
12004 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
12005
12006 if (fntype)
12007 {
12008 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
12009 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
12010 pcum->prototype_p = prototype_p (fntype);
12011 pcum->arg_count [(int) SH_ARG_INT]
12012 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
12013
12014 pcum->call_cookie
12015 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12016 && pcum->arg_count [(int) SH_ARG_INT] == 0
12017 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
12018 ? int_size_in_bytes (TREE_TYPE (fntype))
12019 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
12020 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
12021 == FIRST_RET_REG));
12022 }
12023 else
12024 {
12025 pcum->arg_count [(int) SH_ARG_INT] = 0;
12026 pcum->prototype_p = FALSE;
12027 if (mode != VOIDmode)
12028 {
12029 pcum->call_cookie =
12030 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12031 && GET_MODE_SIZE (mode) > 4
12032 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
12033
12034 /* If the default ABI is the Renesas ABI then all library
12035 calls must assume that the library will be using the
12036 Renesas ABI. So if the function would return its result
12037 in memory then we must force the address of this memory
12038 block onto the stack. Ideally we would like to call
12039 targetm.calls.return_in_memory() here but we do not have
12040 the TYPE or the FNDECL available so we synthesize the
12041 contents of that function as best we can. */
12042 pcum->force_mem =
12043 (TARGET_DEFAULT & MASK_HITACHI)
12044 && (mode == BLKmode
12045 || (GET_MODE_SIZE (mode) > 4
12046 && !(mode == DFmode
12047 && TARGET_FPU_DOUBLE)));
12048 }
12049 else
12050 {
12051 pcum->call_cookie = 0;
12052 pcum->force_mem = FALSE;
12053 }
12054 }
12055 }
12056
12057 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
12058 not enter into CONST_DOUBLE for the replace.
12059
12060 Note that copying is not done so X must not be shared unless all copies
12061 are to be modified.
12062
12063 This is like replace_rtx, except that we operate on N_REPLACEMENTS
12064 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
12065 replacements[n*2+1] - and that we take mode changes into account.
12066
12067 If a replacement is ambiguous, return NULL_RTX.
12068
12069 If MODIFY is zero, don't modify any rtl in place,
12070 just return zero or nonzero for failure / success. */
12071
12072 rtx
12073 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
12074 {
12075 int i, j;
12076 const char *fmt;
12077
12078 /* The following prevents loops occurrence when we change MEM in
12079 CONST_DOUBLE onto the same CONST_DOUBLE. */
12080 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
12081 return x;
12082
12083 for (i = n_replacements - 1; i >= 0 ; i--)
12084 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
12085 return replacements[i*2+1];
12086
12087 /* Allow this function to make replacements in EXPR_LISTs. */
12088 if (x == 0)
12089 return 0;
12090
12091 if (GET_CODE (x) == SUBREG)
12092 {
12093 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
12094 n_replacements, modify);
12095
12096 if (CONST_INT_P (new_rtx))
12097 {
12098 x = simplify_subreg (GET_MODE (x), new_rtx,
12099 GET_MODE (SUBREG_REG (x)),
12100 SUBREG_BYTE (x));
12101 if (! x)
12102 abort ();
12103 }
12104 else if (modify)
12105 SUBREG_REG (x) = new_rtx;
12106
12107 return x;
12108 }
12109 else if (REG_P (x))
12110 {
12111 unsigned regno = REGNO (x);
12112 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
12113 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
12114 rtx result = NULL_RTX;
12115
12116 for (i = n_replacements - 1; i >= 0; i--)
12117 {
12118 rtx from = replacements[i*2];
12119 rtx to = replacements[i*2+1];
12120 unsigned from_regno, from_nregs, to_regno, new_regno;
12121
12122 if (!REG_P (from))
12123 continue;
12124 from_regno = REGNO (from);
12125 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
12126 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
12127 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
12128 {
12129 if (regno < from_regno
12130 || regno + nregs > from_regno + nregs
12131 || !REG_P (to)
12132 || result)
12133 return NULL_RTX;
12134 to_regno = REGNO (to);
12135 if (to_regno < FIRST_PSEUDO_REGISTER)
12136 {
12137 new_regno = regno + to_regno - from_regno;
12138 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
12139 != nregs)
12140 return NULL_RTX;
12141 result = gen_rtx_REG (GET_MODE (x), new_regno);
12142 }
12143 else if (GET_MODE (x) <= GET_MODE (to))
12144 result = gen_lowpart_common (GET_MODE (x), to);
12145 else
12146 result = gen_lowpart_SUBREG (GET_MODE (x), to);
12147 }
12148 }
12149 return result ? result : x;
12150 }
12151 else if (GET_CODE (x) == ZERO_EXTEND)
12152 {
12153 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
12154 n_replacements, modify);
12155
12156 if (CONST_INT_P (new_rtx))
12157 {
12158 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
12159 new_rtx, GET_MODE (XEXP (x, 0)));
12160 if (! x)
12161 abort ();
12162 }
12163 else if (modify)
12164 XEXP (x, 0) = new_rtx;
12165
12166 return x;
12167 }
12168
12169 fmt = GET_RTX_FORMAT (GET_CODE (x));
12170 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12171 {
12172 rtx new_rtx;
12173
12174 if (fmt[i] == 'e')
12175 {
12176 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
12177 n_replacements, modify);
12178 if (!new_rtx)
12179 return NULL_RTX;
12180 if (modify)
12181 XEXP (x, i) = new_rtx;
12182 }
12183 else if (fmt[i] == 'E')
12184 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12185 {
12186 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
12187 n_replacements, modify);
12188 if (!new_rtx)
12189 return NULL_RTX;
12190 if (modify)
12191 XVECEXP (x, i, j) = new_rtx;
12192 }
12193 }
12194
12195 return x;
12196 }
12197
12198 rtx
12199 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
12200 {
12201 enum rtx_code code = TRUNCATE;
12202
12203 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
12204 {
12205 rtx inner = XEXP (x, 0);
12206 enum machine_mode inner_mode = GET_MODE (inner);
12207
12208 if (inner_mode == mode)
12209 return inner;
12210 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
12211 x = inner;
12212 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
12213 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
12214 {
12215 code = GET_CODE (x);
12216 x = inner;
12217 }
12218 }
12219 return gen_rtx_fmt_e (code, mode, x);
12220 }
12221
12222 /* called via for_each_rtx after reload, to clean up truncates of
12223 registers that span multiple actual hard registers. */
12224 int
12225 shmedia_cleanup_truncate (rtx *p, void *n_changes)
12226 {
12227 rtx x = *p, reg;
12228
12229 if (GET_CODE (x) != TRUNCATE)
12230 return 0;
12231 reg = XEXP (x, 0);
12232 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && REG_P (reg))
12233 {
12234 enum machine_mode reg_mode = GET_MODE (reg);
12235 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
12236 subreg_lowpart_offset (DImode, reg_mode));
12237 *(int*) n_changes += 1;
12238 return -1;
12239 }
12240 return 0;
12241 }
12242
12243 /* Load and store depend on the highpart of the address. However,
12244 set_attr_alternative does not give well-defined results before reload,
12245 so we must look at the rtl ourselves to see if any of the feeding
12246 registers is used in a memref. */
12247
12248 /* Called by sh_contains_memref_p via for_each_rtx. */
12249 static int
12250 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
12251 {
12252 return (MEM_P (*loc));
12253 }
12254
12255 /* Return nonzero iff INSN contains a MEM. */
12256 int
12257 sh_contains_memref_p (rtx insn)
12258 {
12259 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
12260 }
12261
12262 /* Return nonzero iff INSN loads a banked register. */
12263 int
12264 sh_loads_bankedreg_p (rtx insn)
12265 {
12266 if (GET_CODE (PATTERN (insn)) == SET)
12267 {
12268 rtx op = SET_DEST (PATTERN(insn));
12269 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
12270 return 1;
12271 }
12272
12273 return 0;
12274 }
12275
12276 /* FNADDR is the MEM expression from a call expander. Return an address
12277 to use in an SHmedia insn pattern. */
12278 rtx
12279 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
12280 {
12281 int is_sym;
12282
12283 fnaddr = XEXP (fnaddr, 0);
12284 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
12285 if (flag_pic && is_sym)
12286 {
12287 if (! SYMBOL_REF_LOCAL_P (fnaddr))
12288 {
12289 rtx reg = gen_reg_rtx (Pmode);
12290
12291 /* We must not use GOTPLT for sibcalls, because PIC_REG
12292 must be restored before the PLT code gets to run. */
12293 if (is_sibcall)
12294 emit_insn (gen_symGOT2reg (reg, fnaddr));
12295 else
12296 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
12297 fnaddr = reg;
12298 }
12299 else
12300 {
12301 fnaddr = gen_sym2PIC (fnaddr);
12302 PUT_MODE (fnaddr, Pmode);
12303 }
12304 }
12305 /* If ptabs might trap, make this visible to the rest of the compiler.
12306 We generally assume that symbols pertain to valid locations, but
12307 it is possible to generate invalid symbols with asm or linker tricks.
12308 In a list of functions where each returns its successor, an invalid
12309 symbol might denote an empty list. */
12310 if (!TARGET_PT_FIXED
12311 && (!is_sym || TARGET_INVALID_SYMBOLS)
12312 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
12313 {
12314 rtx tr = gen_reg_rtx (PDImode);
12315
12316 emit_insn (gen_ptabs (tr, fnaddr));
12317 fnaddr = tr;
12318 }
12319 else if (! target_reg_operand (fnaddr, Pmode))
12320 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
12321 return fnaddr;
12322 }
12323
12324 /* Implement TARGET_PREFERRED_RELOAD_CLASS. */
12325
12326 static reg_class_t
12327 sh_preferred_reload_class (rtx x, reg_class_t rclass)
12328 {
12329 if (rclass == NO_REGS
12330 && TARGET_SHMEDIA
12331 && (CONST_DOUBLE_P (x)
12332 || GET_CODE (x) == SYMBOL_REF
12333 || PIC_ADDR_P (x)))
12334 return GENERAL_REGS;
12335
12336 return rclass;
12337 }
12338
12339 /* Implement TARGET_SECONDARY_RELOAD. */
12340
12341 static reg_class_t
12342 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12343 enum machine_mode mode, secondary_reload_info *sri)
12344 {
12345 enum reg_class rclass = (enum reg_class) rclass_i;
12346
12347 if (in_p)
12348 {
12349 if (REGCLASS_HAS_FP_REG (rclass)
12350 && ! TARGET_SHMEDIA
12351 && immediate_operand ((x), mode)
12352 && ! ((fp_zero_operand (x) || fp_one_operand (x))
12353 && mode == SFmode && fldi_ok ()))
12354 switch (mode)
12355 {
12356 case SFmode:
12357 sri->icode = CODE_FOR_reload_insf__frn;
12358 return NO_REGS;
12359 case DFmode:
12360 sri->icode = CODE_FOR_reload_indf__frn;
12361 return NO_REGS;
12362 case SImode:
12363 /* ??? If we knew that we are in the appropriate mode -
12364 single precision - we could use a reload pattern directly. */
12365 return FPUL_REGS;
12366 default:
12367 abort ();
12368 }
12369 if (rclass == FPUL_REGS
12370 && ((REG_P (x)
12371 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
12372 || REGNO (x) == T_REG))
12373 || GET_CODE (x) == PLUS))
12374 return GENERAL_REGS;
12375 if (rclass == FPUL_REGS && immediate_operand (x, mode))
12376 {
12377 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
12378 return GENERAL_REGS;
12379 else if (mode == SFmode)
12380 return FP_REGS;
12381 sri->icode = CODE_FOR_reload_insi__i_fpul;
12382 return NO_REGS;
12383 }
12384 if (rclass == FPSCR_REGS
12385 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
12386 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
12387 return GENERAL_REGS;
12388 if (REGCLASS_HAS_FP_REG (rclass)
12389 && TARGET_SHMEDIA
12390 && immediate_operand (x, mode)
12391 && x != CONST0_RTX (GET_MODE (x))
12392 && GET_MODE (x) != V4SFmode)
12393 return GENERAL_REGS;
12394 if ((mode == QImode || mode == HImode)
12395 && TARGET_SHMEDIA && inqhi_operand (x, mode))
12396 {
12397 sri->icode = ((mode == QImode)
12398 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
12399 return NO_REGS;
12400 }
12401 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
12402 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
12403 return TARGET_REGS;
12404 } /* end of input-only processing. */
12405
12406 if (((REGCLASS_HAS_FP_REG (rclass)
12407 && (REG_P (x)
12408 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
12409 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
12410 && TARGET_FMOVD))))
12411 || (REGCLASS_HAS_GENERAL_REG (rclass)
12412 && REG_P (x)
12413 && FP_REGISTER_P (REGNO (x))))
12414 && ! TARGET_SHMEDIA
12415 && (mode == SFmode || mode == SImode))
12416 return FPUL_REGS;
12417 if ((rclass == FPUL_REGS
12418 || (REGCLASS_HAS_FP_REG (rclass)
12419 && ! TARGET_SHMEDIA && mode == SImode))
12420 && (MEM_P (x)
12421 || (REG_P (x)
12422 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
12423 || REGNO (x) == T_REG
12424 || system_reg_operand (x, VOIDmode)))))
12425 {
12426 if (rclass == FPUL_REGS)
12427 return GENERAL_REGS;
12428 return FPUL_REGS;
12429 }
12430 if ((rclass == TARGET_REGS
12431 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
12432 && !satisfies_constraint_Csy (x)
12433 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
12434 return GENERAL_REGS;
12435 if ((rclass == MAC_REGS || rclass == PR_REGS)
12436 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
12437 && rclass != REGNO_REG_CLASS (REGNO (x)))
12438 return GENERAL_REGS;
12439 if (rclass != GENERAL_REGS && REG_P (x)
12440 && TARGET_REGISTER_P (REGNO (x)))
12441 return GENERAL_REGS;
12442
12443 /* If here fall back to loading FPUL register through general registers.
12444 This case can happen when movsi_ie insn is picked initially to
12445 load/store the FPUL register from/to another register, and then the
12446 other register is allocated on the stack. */
12447 if (rclass == FPUL_REGS && true_regnum (x) == -1)
12448 return GENERAL_REGS;
12449
12450 return NO_REGS;
12451 }
12452
12453 static void
12454 sh_conditional_register_usage (void)
12455 {
12456 int regno;
12457 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)
12458 if (! VALID_REGISTER_P (regno))
12459 fixed_regs[regno] = call_used_regs[regno] = 1;
12460 /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs. */
12461 if (TARGET_SH5)
12462 {
12463 call_used_regs[FIRST_GENERAL_REG + 8]
12464 = call_used_regs[FIRST_GENERAL_REG + 9] = 1;
12465 call_really_used_regs[FIRST_GENERAL_REG + 8]
12466 = call_really_used_regs[FIRST_GENERAL_REG + 9] = 1;
12467 }
12468 if (TARGET_SHMEDIA)
12469 {
12470 regno_reg_class[FIRST_GENERAL_REG] = GENERAL_REGS;
12471 CLEAR_HARD_REG_SET (reg_class_contents[FP0_REGS]);
12472 regno_reg_class[FIRST_FP_REG] = FP_REGS;
12473 }
12474 if (flag_pic)
12475 {
12476 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12477 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12478 }
12479 /* Renesas saves and restores mac registers on call. */
12480 if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
12481 {
12482 call_really_used_regs[MACH_REG] = 0;
12483 call_really_used_regs[MACL_REG] = 0;
12484 }
12485 for (regno = FIRST_FP_REG + (TARGET_LITTLE_ENDIAN != 0);
12486 regno <= LAST_FP_REG; regno += 2)
12487 SET_HARD_REG_BIT (reg_class_contents[DF_HI_REGS], regno);
12488 if (TARGET_SHMEDIA)
12489 {
12490 for (regno = FIRST_TARGET_REG; regno <= LAST_TARGET_REG; regno ++)
12491 if (! fixed_regs[regno] && call_really_used_regs[regno])
12492 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
12493 }
12494 else
12495 for (regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++)
12496 if (! fixed_regs[regno] && call_really_used_regs[regno])
12497 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
12498 }
12499
12500 /* Implement TARGET_LEGITIMATE_CONSTANT_P
12501
12502 can_store_by_pieces constructs VOIDmode CONST_DOUBLEs. */
12503
12504 static bool
12505 sh_legitimate_constant_p (enum machine_mode mode, rtx x)
12506 {
12507 return (TARGET_SHMEDIA
12508 ? ((mode != DFmode && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
12509 || x == CONST0_RTX (mode)
12510 || !TARGET_SHMEDIA_FPU
12511 || TARGET_SHMEDIA64)
12512 : (GET_CODE (x) != CONST_DOUBLE
12513 || mode == DFmode || mode == SFmode
12514 || mode == DImode || GET_MODE (x) == VOIDmode));
12515 }
12516
12517 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
12518
12519 static void
12520 sh_init_sync_libfuncs (void)
12521 {
12522 init_sync_libfuncs (UNITS_PER_WORD);
12523 }
12524
12525 #include "gt-sh.h"