7e0c2c17515fef1e3e5dad427f60a34145b03382
[gcc.git] / gcc / config / sh / sh.c
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
4 Free Software Foundation, Inc.
5 Contributed by Steve Chamberlain (sac@cygnus.com).
6 Improved by Jim Wilson (wilson@cygnus.com).
7
8 This file is part of GCC.
9
10 GCC is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
14
15 GCC is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "insn-config.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "flags.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "reload.h"
35 #include "function.h"
36 #include "regs.h"
37 #include "hard-reg-set.h"
38 #include "output.h"
39 #include "insn-attr.h"
40 #include "diagnostic-core.h"
41 #include "recog.h"
42 #include "dwarf2.h"
43 #include "tm_p.h"
44 #include "target.h"
45 #include "target-def.h"
46 #include "langhooks.h"
47 #include "basic-block.h"
48 #include "df.h"
49 #include "intl.h"
50 #include "sched-int.h"
51 #include "params.h"
52 #include "ggc.h"
53 #include "gimple.h"
54 #include "cfgloop.h"
55 #include "alloc-pool.h"
56 #include "tm-constrs.h"
57 #include "opts.h"
58
59
60 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
61
62 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
63 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
64
65 /* These are some macros to abstract register modes. */
66 #define CONST_OK_FOR_ADD(size) \
67 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
68 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
69 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
70 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
71
72 /* Used to simplify the logic below. Find the attributes wherever
73 they may be. */
74 #define SH_ATTRIBUTES(decl) \
75 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
76 : DECL_ATTRIBUTES (decl) \
77 ? (DECL_ATTRIBUTES (decl)) \
78 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
79
80 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
81 int current_function_interrupt;
82
83 tree sh_deferred_function_attributes;
84 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
85
86 /* Global variables for machine-dependent things. */
87
88 /* Which cpu are we scheduling for. */
89 enum processor_type sh_cpu;
90
91 /* Definitions used in ready queue reordering for first scheduling pass. */
92
93 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
94 static short *regmode_weight[2];
95
96 /* Total SFmode and SImode weights of scheduled insns. */
97 static int curr_regmode_pressure[2];
98
99 /* Number of r0 life regions. */
100 static int r0_life_regions;
101
102 /* If true, skip cycles for Q -> R movement. */
103 static int skip_cycles = 0;
104
105 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
106 and returned from sh_reorder2. */
107 static short cached_can_issue_more;
108
109 /* Unique number for UNSPEC_BBR pattern. */
110 static unsigned int unspec_bbr_uid = 1;
111
112 /* Provides the class number of the smallest class containing
113 reg number. */
114
115 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
116 {
117 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
118 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
134 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
150 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
151 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
152 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
153 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
154 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
155 GENERAL_REGS, GENERAL_REGS,
156 };
157
158 char sh_register_names[FIRST_PSEUDO_REGISTER] \
159 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
160
161 char sh_additional_register_names[ADDREGNAMES_SIZE] \
162 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
163 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
164
165 int assembler_dialect;
166
167 static bool shmedia_space_reserved_for_target_registers;
168
169 static void split_branches (rtx);
170 static int branch_dest (rtx);
171 static void force_into (rtx, rtx);
172 static void print_slot (rtx);
173 static rtx add_constant (rtx, enum machine_mode, rtx);
174 static void dump_table (rtx, rtx);
175 static bool broken_move (rtx);
176 static bool mova_p (rtx);
177 static rtx find_barrier (int, rtx, rtx);
178 static bool noncall_uses_reg (rtx, rtx, rtx *);
179 static rtx gen_block_redirect (rtx, int, int);
180 static void sh_reorg (void);
181 static void sh_option_override (void);
182 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
183 static rtx frame_insn (rtx);
184 static rtx push (int);
185 static void pop (int);
186 static void push_regs (HARD_REG_SET *, int);
187 static int calc_live_regs (HARD_REG_SET *);
188 static HOST_WIDE_INT rounded_frame_size (int);
189 static bool sh_frame_pointer_required (void);
190 static rtx mark_constant_pool_use (rtx);
191 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
192 static tree sh_handle_resbank_handler_attribute (tree *, tree,
193 tree, int, bool *);
194 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
195 tree, int, bool *);
196 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
197 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
198 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
199 static void sh_print_operand (FILE *, rtx, int);
200 static void sh_print_operand_address (FILE *, rtx);
201 static bool sh_print_operand_punct_valid_p (unsigned char code);
202 static bool sh_asm_output_addr_const_extra (FILE *file, rtx x);
203 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
204 static void sh_insert_attributes (tree, tree *);
205 static const char *sh_check_pch_target_flags (int);
206 static int sh_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
207 static int sh_adjust_cost (rtx, rtx, rtx, int);
208 static int sh_issue_rate (void);
209 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
210 static short find_set_regmode_weight (rtx, enum machine_mode);
211 static short find_insn_regmode_weight (rtx, enum machine_mode);
212 static void find_regmode_weight (basic_block, enum machine_mode);
213 static int find_r0_life_regions (basic_block);
214 static void sh_md_init_global (FILE *, int, int);
215 static void sh_md_finish_global (FILE *, int);
216 static int rank_for_reorder (const void *, const void *);
217 static void swap_reorder (rtx *, int);
218 static void ready_reorder (rtx *, int);
219 static bool high_pressure (enum machine_mode);
220 static int sh_reorder (FILE *, int, rtx *, int *, int);
221 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
222 static void sh_md_init (FILE *, int, int);
223 static int sh_variable_issue (FILE *, int, rtx, int);
224
225 static bool sh_function_ok_for_sibcall (tree, tree);
226
227 static bool sh_cannot_modify_jumps_p (void);
228 static reg_class_t sh_target_reg_class (void);
229 static bool sh_optimize_target_register_callee_saved (bool);
230 static bool sh_ms_bitfield_layout_p (const_tree);
231
232 static void sh_init_builtins (void);
233 static tree sh_builtin_decl (unsigned, bool);
234 static void sh_media_init_builtins (void);
235 static tree sh_media_builtin_decl (unsigned, bool);
236 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
237 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
238 static void sh_file_start (void);
239 static bool flow_dependent_p (rtx, rtx);
240 static void flow_dependent_p_1 (rtx, const_rtx, void *);
241 static int shiftcosts (rtx);
242 static int and_xor_ior_costs (rtx, int);
243 static int addsubcosts (rtx);
244 static int multcosts (rtx);
245 static bool unspec_caller_rtx_p (rtx);
246 static bool sh_cannot_copy_insn_p (rtx);
247 static bool sh_rtx_costs (rtx, int, int, int, int *, bool);
248 static int sh_address_cost (rtx, bool);
249 static int sh_pr_n_sets (void);
250 static rtx sh_allocate_initial_value (rtx);
251 static reg_class_t sh_preferred_reload_class (rtx, reg_class_t);
252 static reg_class_t sh_secondary_reload (bool, rtx, reg_class_t,
253 enum machine_mode,
254 struct secondary_reload_info *);
255 static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
256 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
257 static rtx sh_delegitimize_address (rtx);
258 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
259 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
260 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
261 static int scavenge_reg (HARD_REG_SET *s);
262 struct save_schedule_s;
263 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
264 struct save_schedule_s *, int);
265
266 static rtx sh_struct_value_rtx (tree, int);
267 static rtx sh_function_value (const_tree, const_tree, bool);
268 static bool sh_function_value_regno_p (const unsigned int);
269 static rtx sh_libcall_value (enum machine_mode, const_rtx);
270 static bool sh_return_in_memory (const_tree, const_tree);
271 static rtx sh_builtin_saveregs (void);
272 static void sh_setup_incoming_varargs (cumulative_args_t, enum machine_mode, tree, int *, int);
273 static bool sh_strict_argument_naming (cumulative_args_t);
274 static bool sh_pretend_outgoing_varargs_named (cumulative_args_t);
275 static tree sh_build_builtin_va_list (void);
276 static void sh_va_start (tree, rtx);
277 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
278 static bool sh_promote_prototypes (const_tree);
279 static enum machine_mode sh_promote_function_mode (const_tree type,
280 enum machine_mode,
281 int *punsignedp,
282 const_tree funtype,
283 int for_return);
284 static bool sh_pass_by_reference (cumulative_args_t, enum machine_mode,
285 const_tree, bool);
286 static bool sh_callee_copies (cumulative_args_t, enum machine_mode,
287 const_tree, bool);
288 static int sh_arg_partial_bytes (cumulative_args_t, enum machine_mode,
289 tree, bool);
290 static void sh_function_arg_advance (cumulative_args_t, enum machine_mode,
291 const_tree, bool);
292 static rtx sh_function_arg (cumulative_args_t, enum machine_mode,
293 const_tree, bool);
294 static bool sh_scalar_mode_supported_p (enum machine_mode);
295 static int sh_dwarf_calling_convention (const_tree);
296 static void sh_encode_section_info (tree, rtx, int);
297 static bool sh2a_function_vector_p (tree);
298 static void sh_trampoline_init (rtx, tree, rtx);
299 static rtx sh_trampoline_adjust_address (rtx);
300 static void sh_conditional_register_usage (void);
301 static bool sh_legitimate_constant_p (enum machine_mode, rtx);
302 static int mov_insn_size (enum machine_mode, bool);
303 static int max_mov_insn_displacement (enum machine_mode, bool);
304 static int mov_insn_alignment_mask (enum machine_mode, bool);
305 static HOST_WIDE_INT disp_addr_displacement (rtx);
306
307 static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED;
308 \f
309 static const struct attribute_spec sh_attribute_table[] =
310 {
311 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
312 affects_type_identity } */
313 { "interrupt_handler", 0, 0, true, false, false,
314 sh_handle_interrupt_handler_attribute, false },
315 { "sp_switch", 1, 1, true, false, false,
316 sh_handle_sp_switch_attribute, false },
317 { "trap_exit", 1, 1, true, false, false,
318 sh_handle_trap_exit_attribute, false },
319 { "renesas", 0, 0, false, true, false,
320 sh_handle_renesas_attribute, false },
321 { "trapa_handler", 0, 0, true, false, false,
322 sh_handle_interrupt_handler_attribute, false },
323 { "nosave_low_regs", 0, 0, true, false, false,
324 sh_handle_interrupt_handler_attribute, false },
325 { "resbank", 0, 0, true, false, false,
326 sh_handle_resbank_handler_attribute, false },
327 { "function_vector", 1, 1, true, false, false,
328 sh2a_handle_function_vector_handler_attribute, false },
329 { NULL, 0, 0, false, false, false, NULL, false }
330 };
331 \f
332 /* Initialize the GCC target structure. */
333 #undef TARGET_ATTRIBUTE_TABLE
334 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
335
336 /* The next two are used for debug info when compiling with -gdwarf. */
337 #undef TARGET_ASM_UNALIGNED_HI_OP
338 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
339 #undef TARGET_ASM_UNALIGNED_SI_OP
340 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
341
342 /* These are NULLed out on non-SH5 in TARGET_OPTION_OVERRIDE. */
343 #undef TARGET_ASM_UNALIGNED_DI_OP
344 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
345 #undef TARGET_ASM_ALIGNED_DI_OP
346 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
347
348 #undef TARGET_OPTION_OVERRIDE
349 #define TARGET_OPTION_OVERRIDE sh_option_override
350
351 #undef TARGET_PRINT_OPERAND
352 #define TARGET_PRINT_OPERAND sh_print_operand
353 #undef TARGET_PRINT_OPERAND_ADDRESS
354 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
355 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
356 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
357 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
358 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra
359
360 #undef TARGET_ASM_FUNCTION_EPILOGUE
361 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
362
363 #undef TARGET_ASM_OUTPUT_MI_THUNK
364 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
365
366 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
367 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
368
369 #undef TARGET_ASM_FILE_START
370 #define TARGET_ASM_FILE_START sh_file_start
371 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
372 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
373
374 #undef TARGET_REGISTER_MOVE_COST
375 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
376
377 #undef TARGET_INSERT_ATTRIBUTES
378 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
379
380 #undef TARGET_SCHED_ADJUST_COST
381 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
382
383 #undef TARGET_SCHED_ISSUE_RATE
384 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
385
386 /* The next 5 hooks have been implemented for reenabling sched1. With the
387 help of these macros we are limiting the movement of insns in sched1 to
388 reduce the register pressure. The overall idea is to keep count of SImode
389 and SFmode regs required by already scheduled insns. When these counts
390 cross some threshold values; give priority to insns that free registers.
391 The insn that frees registers is most likely to be the insn with lowest
392 LUID (original insn order); but such an insn might be there in the stalled
393 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
394 up to a max of 8 cycles so that such insns may move from Q -> R.
395
396 The description of the hooks are as below:
397
398 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
399 scheduler; it is called inside the sched_init function just after
400 find_insn_reg_weights function call. It is used to calculate the SImode
401 and SFmode weights of insns of basic blocks; much similar to what
402 find_insn_reg_weights does.
403 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
404
405 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
406 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
407 (Q)->(R).
408
409 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
410 high; reorder the ready queue so that the insn with lowest LUID will be
411 issued next.
412
413 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
414 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
415
416 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
417 can be returned from TARGET_SCHED_REORDER2.
418
419 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
420
421 #undef TARGET_SCHED_DFA_NEW_CYCLE
422 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
423
424 #undef TARGET_SCHED_INIT_GLOBAL
425 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
426
427 #undef TARGET_SCHED_FINISH_GLOBAL
428 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
429
430 #undef TARGET_SCHED_VARIABLE_ISSUE
431 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
432
433 #undef TARGET_SCHED_REORDER
434 #define TARGET_SCHED_REORDER sh_reorder
435
436 #undef TARGET_SCHED_REORDER2
437 #define TARGET_SCHED_REORDER2 sh_reorder2
438
439 #undef TARGET_SCHED_INIT
440 #define TARGET_SCHED_INIT sh_md_init
441
442 #undef TARGET_DELEGITIMIZE_ADDRESS
443 #define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address
444
445 #undef TARGET_LEGITIMIZE_ADDRESS
446 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
447
448 #undef TARGET_CANNOT_MODIFY_JUMPS_P
449 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
450 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
451 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
452 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
453 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
454 sh_optimize_target_register_callee_saved
455
456 #undef TARGET_MS_BITFIELD_LAYOUT_P
457 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
458
459 #undef TARGET_INIT_BUILTINS
460 #define TARGET_INIT_BUILTINS sh_init_builtins
461 #undef TARGET_BUILTIN_DECL
462 #define TARGET_BUILTIN_DECL sh_builtin_decl
463 #undef TARGET_EXPAND_BUILTIN
464 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
465
466 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
467 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
468
469 #undef TARGET_CANNOT_COPY_INSN_P
470 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
471 #undef TARGET_RTX_COSTS
472 #define TARGET_RTX_COSTS sh_rtx_costs
473 #undef TARGET_ADDRESS_COST
474 #define TARGET_ADDRESS_COST sh_address_cost
475 #undef TARGET_ALLOCATE_INITIAL_VALUE
476 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
477
478 #undef TARGET_MACHINE_DEPENDENT_REORG
479 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
480
481 #undef TARGET_DWARF_REGISTER_SPAN
482 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
483
484 #ifdef HAVE_AS_TLS
485 #undef TARGET_HAVE_TLS
486 #define TARGET_HAVE_TLS true
487 #endif
488
489 #undef TARGET_PROMOTE_PROTOTYPES
490 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
491 #undef TARGET_PROMOTE_FUNCTION_MODE
492 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
493
494 #undef TARGET_FUNCTION_VALUE
495 #define TARGET_FUNCTION_VALUE sh_function_value
496 #undef TARGET_FUNCTION_VALUE_REGNO_P
497 #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
498 #undef TARGET_LIBCALL_VALUE
499 #define TARGET_LIBCALL_VALUE sh_libcall_value
500 #undef TARGET_STRUCT_VALUE_RTX
501 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
502 #undef TARGET_RETURN_IN_MEMORY
503 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
504
505 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
506 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
507 #undef TARGET_SETUP_INCOMING_VARARGS
508 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
509 #undef TARGET_STRICT_ARGUMENT_NAMING
510 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
511 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
512 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
513 #undef TARGET_MUST_PASS_IN_STACK
514 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
515 #undef TARGET_PASS_BY_REFERENCE
516 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
517 #undef TARGET_CALLEE_COPIES
518 #define TARGET_CALLEE_COPIES sh_callee_copies
519 #undef TARGET_ARG_PARTIAL_BYTES
520 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
521 #undef TARGET_FUNCTION_ARG
522 #define TARGET_FUNCTION_ARG sh_function_arg
523 #undef TARGET_FUNCTION_ARG_ADVANCE
524 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
525
526 #undef TARGET_BUILD_BUILTIN_VA_LIST
527 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
528 #undef TARGET_EXPAND_BUILTIN_VA_START
529 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
530 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
531 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
532
533 #undef TARGET_SCALAR_MODE_SUPPORTED_P
534 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
535 #undef TARGET_VECTOR_MODE_SUPPORTED_P
536 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
537
538 #undef TARGET_CHECK_PCH_TARGET_FLAGS
539 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
540
541 #undef TARGET_DWARF_CALLING_CONVENTION
542 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
543
544 #undef TARGET_FRAME_POINTER_REQUIRED
545 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
546
547 /* Return regmode weight for insn. */
548 #define INSN_REGMODE_WEIGHT(INSN, MODE)\
549 regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
550
551 /* Return current register pressure for regmode. */
552 #define CURR_REGMODE_PRESSURE(MODE)\
553 curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
554
555 #undef TARGET_ENCODE_SECTION_INFO
556 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
557
558 #undef TARGET_SECONDARY_RELOAD
559 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
560
561 #undef TARGET_PREFERRED_RELOAD_CLASS
562 #define TARGET_PREFERRED_RELOAD_CLASS sh_preferred_reload_class
563
564 #undef TARGET_CONDITIONAL_REGISTER_USAGE
565 #define TARGET_CONDITIONAL_REGISTER_USAGE sh_conditional_register_usage
566
567 #undef TARGET_LEGITIMATE_ADDRESS_P
568 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
569
570 #undef TARGET_TRAMPOLINE_INIT
571 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
572 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
573 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
574
575 #undef TARGET_LEGITIMATE_CONSTANT_P
576 #define TARGET_LEGITIMATE_CONSTANT_P sh_legitimate_constant_p
577
578 /* Machine-specific symbol_ref flags. */
579 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
580
581 /* The tas.b instruction sets the 7th bit in the byte, i.e. 0x80. This value
582 is used by optabs.c atomic op expansion code as well as in sync.md. */
583 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
584 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0x80
585
586 struct gcc_target targetm = TARGET_INITIALIZER;
587 \f
588 /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override
589 various options, and do some machine dependent initialization. */
590 static void
591 sh_option_override (void)
592 {
593 int regno;
594
595 SUBTARGET_OVERRIDE_OPTIONS;
596 if (optimize > 1 && !optimize_size)
597 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
598 if (flag_finite_math_only == 2)
599 flag_finite_math_only
600 = !flag_signaling_nans && TARGET_SH2E && ! TARGET_IEEE;
601 if (TARGET_SH2E && !flag_finite_math_only)
602 target_flags |= MASK_IEEE;
603 sh_cpu = PROCESSOR_SH1;
604 assembler_dialect = 0;
605 if (TARGET_SH2)
606 sh_cpu = PROCESSOR_SH2;
607 if (TARGET_SH2E)
608 sh_cpu = PROCESSOR_SH2E;
609 if (TARGET_SH2A)
610 sh_cpu = PROCESSOR_SH2A;
611 if (TARGET_SH3)
612 sh_cpu = PROCESSOR_SH3;
613 if (TARGET_SH3E)
614 sh_cpu = PROCESSOR_SH3E;
615 if (TARGET_SH4)
616 {
617 assembler_dialect = 1;
618 sh_cpu = PROCESSOR_SH4;
619 }
620 if (TARGET_SH4A_ARCH)
621 {
622 assembler_dialect = 1;
623 sh_cpu = PROCESSOR_SH4A;
624 }
625 if (TARGET_SH5)
626 {
627 sh_cpu = PROCESSOR_SH5;
628 target_flags |= MASK_ALIGN_DOUBLE;
629 if (TARGET_SHMEDIA_FPU)
630 target_flags |= MASK_FMOVD;
631 if (TARGET_SHMEDIA)
632 {
633 /* There are no delay slots on SHmedia. */
634 flag_delayed_branch = 0;
635 /* Relaxation isn't yet supported for SHmedia */
636 target_flags &= ~MASK_RELAX;
637 /* After reload, if conversion does little good but can cause
638 ICEs:
639 - find_if_block doesn't do anything for SH because we don't
640 have conditional execution patterns. (We use conditional
641 move patterns, which are handled differently, and only
642 before reload).
643 - find_cond_trap doesn't do anything for the SH because we
644 don't have conditional traps.
645 - find_if_case_1 uses redirect_edge_and_branch_force in
646 the only path that does an optimization, and this causes
647 an ICE when branch targets are in registers.
648 - find_if_case_2 doesn't do anything for the SHmedia after
649 reload except when it can redirect a tablejump - and
650 that's rather rare. */
651 flag_if_conversion2 = 0;
652 if (! strcmp (sh_div_str, "call"))
653 sh_div_strategy = SH_DIV_CALL;
654 else if (! strcmp (sh_div_str, "call2"))
655 sh_div_strategy = SH_DIV_CALL2;
656 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
657 sh_div_strategy = SH_DIV_FP;
658 else if (! strcmp (sh_div_str, "inv"))
659 sh_div_strategy = SH_DIV_INV;
660 else if (! strcmp (sh_div_str, "inv:minlat"))
661 sh_div_strategy = SH_DIV_INV_MINLAT;
662 else if (! strcmp (sh_div_str, "inv20u"))
663 sh_div_strategy = SH_DIV_INV20U;
664 else if (! strcmp (sh_div_str, "inv20l"))
665 sh_div_strategy = SH_DIV_INV20L;
666 else if (! strcmp (sh_div_str, "inv:call2"))
667 sh_div_strategy = SH_DIV_INV_CALL2;
668 else if (! strcmp (sh_div_str, "inv:call"))
669 sh_div_strategy = SH_DIV_INV_CALL;
670 else if (! strcmp (sh_div_str, "inv:fp"))
671 {
672 if (TARGET_FPU_ANY)
673 sh_div_strategy = SH_DIV_INV_FP;
674 else
675 sh_div_strategy = SH_DIV_INV;
676 }
677 TARGET_CBRANCHDI4 = 0;
678 /* Assembler CFI isn't yet fully supported for SHmedia. */
679 flag_dwarf2_cfi_asm = 0;
680 }
681 }
682 else
683 {
684 /* Only the sh64-elf assembler fully supports .quad properly. */
685 targetm.asm_out.aligned_op.di = NULL;
686 targetm.asm_out.unaligned_op.di = NULL;
687 }
688 if (TARGET_SH1)
689 {
690 if (! strcmp (sh_div_str, "call-div1"))
691 sh_div_strategy = SH_DIV_CALL_DIV1;
692 else if (! strcmp (sh_div_str, "call-fp")
693 && (TARGET_FPU_DOUBLE
694 || (TARGET_HARD_SH4 && TARGET_SH2E)
695 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
696 sh_div_strategy = SH_DIV_CALL_FP;
697 else if (! strcmp (sh_div_str, "call-table") && TARGET_SH2)
698 sh_div_strategy = SH_DIV_CALL_TABLE;
699 else
700 /* Pick one that makes most sense for the target in general.
701 It is not much good to use different functions depending
702 on -Os, since then we'll end up with two different functions
703 when some of the code is compiled for size, and some for
704 speed. */
705
706 /* SH4 tends to emphasize speed. */
707 if (TARGET_HARD_SH4)
708 sh_div_strategy = SH_DIV_CALL_TABLE;
709 /* These have their own way of doing things. */
710 else if (TARGET_SH2A)
711 sh_div_strategy = SH_DIV_INTRINSIC;
712 /* ??? Should we use the integer SHmedia function instead? */
713 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
714 sh_div_strategy = SH_DIV_CALL_FP;
715 /* SH1 .. SH3 cores often go into small-footprint systems, so
716 default to the smallest implementation available. */
717 else if (TARGET_SH2) /* ??? EXPERIMENTAL */
718 sh_div_strategy = SH_DIV_CALL_TABLE;
719 else
720 sh_div_strategy = SH_DIV_CALL_DIV1;
721 }
722 if (!TARGET_SH1)
723 TARGET_PRETEND_CMOVE = 0;
724 if (sh_divsi3_libfunc[0])
725 ; /* User supplied - leave it alone. */
726 else if (TARGET_DIVIDE_CALL_FP)
727 sh_divsi3_libfunc = "__sdivsi3_i4";
728 else if (TARGET_DIVIDE_CALL_TABLE)
729 sh_divsi3_libfunc = "__sdivsi3_i4i";
730 else if (TARGET_SH5)
731 sh_divsi3_libfunc = "__sdivsi3_1";
732 else
733 sh_divsi3_libfunc = "__sdivsi3";
734 if (sh_branch_cost == -1)
735 {
736 sh_branch_cost = 1;
737
738 /* The SH1 does not have delay slots, hence we get a pipeline stall
739 at every branch. The SH4 is superscalar, so the single delay slot
740 is not sufficient to keep both pipelines filled. */
741 if (! TARGET_SH2 || TARGET_HARD_SH4)
742 sh_branch_cost = 2;
743 }
744
745 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
746 if (! VALID_REGISTER_P (regno))
747 sh_register_names[regno][0] = '\0';
748
749 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
750 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
751 sh_additional_register_names[regno][0] = '\0';
752
753 if ((flag_pic && ! TARGET_PREFERGOT)
754 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
755 flag_no_function_cse = 1;
756
757 if (targetm.small_register_classes_for_mode_p (VOIDmode)) \
758 {
759 /* Never run scheduling before reload, since that can
760 break global alloc, and generates slower code anyway due
761 to the pressure on R0. */
762 /* Enable sched1 for SH4 if the user explicitly requests.
763 When sched1 is enabled, the ready queue will be reordered by
764 the target hooks if pressure is high. We can not do this for
765 PIC, SH3 and lower as they give spill failures for R0. */
766 if (!TARGET_HARD_SH4 || flag_pic)
767 flag_schedule_insns = 0;
768 /* ??? Current exception handling places basic block boundaries
769 after call_insns. It causes the high pressure on R0 and gives
770 spill failures for R0 in reload. See PR 22553 and the thread
771 on gcc-patches
772 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
773 else if (flag_exceptions)
774 {
775 if (flag_schedule_insns && global_options_set.x_flag_schedule_insns)
776 warning (0, "ignoring -fschedule-insns because of exception handling bug");
777 flag_schedule_insns = 0;
778 }
779 else if (flag_schedule_insns
780 && !global_options_set.x_flag_schedule_insns)
781 flag_schedule_insns = 0;
782 }
783
784 /* Unwind info is not correct around the CFG unless either a frame
785 pointer is present or M_A_O_A is set. Fixing this requires rewriting
786 unwind info generation to be aware of the CFG and propagating states
787 around edges. */
788 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
789 || flag_exceptions || flag_non_call_exceptions)
790 && flag_omit_frame_pointer && !TARGET_ACCUMULATE_OUTGOING_ARGS)
791 {
792 warning (0, "unwind tables currently require either a frame pointer "
793 "or -maccumulate-outgoing-args for correctness");
794 TARGET_ACCUMULATE_OUTGOING_ARGS = 1;
795 }
796
797 /* Unwinding with -freorder-blocks-and-partition does not work on this
798 architecture, because it requires far jumps to label crossing between
799 hot/cold sections which are rejected on this architecture. */
800 if (flag_reorder_blocks_and_partition)
801 {
802 if (flag_exceptions)
803 {
804 inform (input_location,
805 "-freorder-blocks-and-partition does not work with "
806 "exceptions on this architecture");
807 flag_reorder_blocks_and_partition = 0;
808 flag_reorder_blocks = 1;
809 }
810 else if (flag_unwind_tables)
811 {
812 inform (input_location,
813 "-freorder-blocks-and-partition does not support unwind "
814 "info on this architecture");
815 flag_reorder_blocks_and_partition = 0;
816 flag_reorder_blocks = 1;
817 }
818 }
819
820 /* Adjust loop, jump and function alignment values (in bytes), if those
821 were not specified by the user using -falign-loops, -falign-jumps
822 and -falign-functions options.
823 32 bit alignment is better for speed, because instructions can be
824 fetched as a pair from a longword boundary. For size use 16 bit
825 alignment to get more compact code.
826 Aligning all jumps increases the code size, even if it might
827 result in slightly faster code. Thus, it is set to the smallest
828 alignment possible if not specified by the user. */
829 if (align_loops == 0)
830 {
831 if (TARGET_SH5)
832 align_loops = 8;
833 else
834 align_loops = optimize_size ? 2 : 4;
835 }
836
837 if (align_jumps == 0)
838 {
839 if (TARGET_SHMEDIA)
840 align_jumps = 1 << CACHE_LOG;
841 else
842 align_jumps = 2;
843 }
844 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
845 align_jumps = TARGET_SHMEDIA ? 4 : 2;
846
847 if (align_functions == 0)
848 {
849 if (TARGET_SHMEDIA)
850 align_functions = optimize_size
851 ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
852 else
853 align_functions = optimize_size ? 2 : 4;
854 }
855
856 /* The linker relaxation code breaks when a function contains
857 alignments that are larger than that at the start of a
858 compilation unit. */
859 if (TARGET_RELAX)
860 {
861 int min_align
862 = align_loops > align_jumps ? align_loops : align_jumps;
863
864 /* Also take possible .long constants / mova tables int account. */
865 if (min_align < 4)
866 min_align = 4;
867 if (align_functions < min_align)
868 align_functions = min_align;
869 }
870
871 if (flag_unsafe_math_optimizations)
872 {
873 /* Enable fsca insn for SH4A if not otherwise specified by the user. */
874 if (global_options_set.x_TARGET_FSCA == 0 && TARGET_SH4A_FP)
875 TARGET_FSCA = 1;
876
877 /* Enable fsrra insn for SH4A if not otherwise specified by the user. */
878 if (global_options_set.x_TARGET_FSRRA == 0 && TARGET_SH4A_FP)
879 TARGET_FSRRA = 1;
880 }
881
882 /* Allow fsrra insn only if -funsafe-math-optimizations and
883 -ffinite-math-only is enabled. */
884 TARGET_FSRRA = TARGET_FSRRA
885 && flag_unsafe_math_optimizations
886 && flag_finite_math_only;
887
888 if (sh_fixed_range_str)
889 sh_fix_range (sh_fixed_range_str);
890
891 /* This target defaults to strict volatile bitfields. */
892 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2))
893 flag_strict_volatile_bitfields = 1;
894
895 /* Make sure that only one atomic mode is selected and that the selection
896 is valid for the current target CPU. */
897 if (TARGET_SOFT_ATOMIC && TARGET_HARD_ATOMIC)
898 error ("-msoft-atomic and -mhard-atomic cannot be used at the same time");
899 if (TARGET_HARD_ATOMIC && ! TARGET_SH4A_ARCH)
900 error ("-mhard-atomic is only available for SH4A targets");
901 }
902 \f
903 /* Print the operand address in x to the stream. */
904
905 static void
906 sh_print_operand_address (FILE *stream, rtx x)
907 {
908 switch (GET_CODE (x))
909 {
910 case REG:
911 case SUBREG:
912 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
913 break;
914
915 case PLUS:
916 {
917 rtx base = XEXP (x, 0);
918 rtx index = XEXP (x, 1);
919
920 switch (GET_CODE (index))
921 {
922 case CONST_INT:
923 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
924 reg_names[true_regnum (base)]);
925 break;
926
927 case REG:
928 case SUBREG:
929 {
930 int base_num = true_regnum (base);
931 int index_num = true_regnum (index);
932
933 fprintf (stream, "@(r0,%s)",
934 reg_names[MAX (base_num, index_num)]);
935 break;
936 }
937
938 default:
939 gcc_unreachable ();
940 }
941 }
942 break;
943
944 case PRE_DEC:
945 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
946 break;
947
948 case POST_INC:
949 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
950 break;
951
952 default:
953 x = mark_constant_pool_use (x);
954 output_addr_const (stream, x);
955 break;
956 }
957 }
958
959 /* Print operand x (an rtx) in assembler syntax to file stream
960 according to modifier code.
961
962 '.' print a .s if insn needs delay slot
963 ',' print LOCAL_LABEL_PREFIX
964 '@' print trap, rte or rts depending upon pragma interruptness
965 '#' output a nop if there is nothing to put in the delay slot
966 ''' print likelihood suffix (/u for unlikely).
967 '>' print branch target if -fverbose-asm
968 'O' print a constant without the #
969 'R' print the LSW of a dp value - changes if in little endian
970 'S' print the MSW of a dp value - changes if in little endian
971 'T' print the next word of a dp value - same as 'R' in big endian mode.
972 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
973 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
974 'N' print 'r63' if the operand is (const_int 0).
975 'd' print a V2SF reg as dN instead of fpN.
976 'm' print a pair `base,offset' or `base,index', for LD and ST.
977 'U' Likewise for {LD,ST}{HI,LO}.
978 'V' print the position of a single bit set.
979 'W' print the position of a single bit cleared.
980 't' print a memory address which is a register.
981 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
982 'o' output an operator. */
983
984 static void
985 sh_print_operand (FILE *stream, rtx x, int code)
986 {
987 int regno;
988 enum machine_mode mode;
989
990 switch (code)
991 {
992 tree trapa_attr;
993
994 case '.':
995 if (final_sequence
996 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
997 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
998 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
999 break;
1000 case ',':
1001 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1002 break;
1003 case '@':
1004 trapa_attr = lookup_attribute ("trap_exit",
1005 DECL_ATTRIBUTES (current_function_decl));
1006 if (trapa_attr)
1007 fprintf (stream, "trapa #%ld",
1008 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1009 else if (sh_cfun_interrupt_handler_p ())
1010 {
1011 if (sh_cfun_resbank_handler_p ())
1012 fprintf (stream, "resbank\n");
1013 fprintf (stream, "rte");
1014 }
1015 else
1016 fprintf (stream, "rts");
1017 break;
1018 case '#':
1019 /* Output a nop if there's nothing in the delay slot. */
1020 if (dbr_sequence_length () == 0)
1021 fprintf (stream, "\n\tnop");
1022 break;
1023 case '\'':
1024 {
1025 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1026
1027 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
1028 fputs ("/u", stream);
1029 break;
1030 }
1031 case '>':
1032 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1033 {
1034 fputs ("\t! target: ", stream);
1035 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1036 }
1037 break;
1038 case 'O':
1039 x = mark_constant_pool_use (x);
1040 output_addr_const (stream, x);
1041 break;
1042 /* N.B.: %R / %S / %T adjust memory addresses by four.
1043 For SHMEDIA, that means they can be used to access the first and
1044 second 32 bit part of a 64 bit (or larger) value that
1045 might be held in floating point registers or memory.
1046 While they can be used to access 64 bit parts of a larger value
1047 held in general purpose registers, that won't work with memory -
1048 neither for fp registers, since the frxx names are used. */
1049 case 'R':
1050 if (REG_P (x) || GET_CODE (x) == SUBREG)
1051 {
1052 regno = true_regnum (x);
1053 regno += FP_REGISTER_P (regno) ? 1 : LSW;
1054 fputs (reg_names[regno], (stream));
1055 }
1056 else if (MEM_P (x))
1057 {
1058 x = adjust_address (x, SImode, 4 * LSW);
1059 sh_print_operand_address (stream, XEXP (x, 0));
1060 }
1061 else
1062 {
1063 rtx sub = NULL_RTX;
1064
1065 mode = GET_MODE (x);
1066 if (mode == VOIDmode)
1067 mode = DImode;
1068 if (GET_MODE_SIZE (mode) >= 8)
1069 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
1070 if (sub)
1071 sh_print_operand (stream, sub, 0);
1072 else
1073 output_operand_lossage ("invalid operand to %%R");
1074 }
1075 break;
1076 case 'S':
1077 if (REG_P (x) || GET_CODE (x) == SUBREG)
1078 {
1079 regno = true_regnum (x);
1080 regno += FP_REGISTER_P (regno) ? 0 : MSW;
1081 fputs (reg_names[regno], (stream));
1082 }
1083 else if (MEM_P (x))
1084 {
1085 x = adjust_address (x, SImode, 4 * MSW);
1086 sh_print_operand_address (stream, XEXP (x, 0));
1087 }
1088 else
1089 {
1090 rtx sub = NULL_RTX;
1091
1092 mode = GET_MODE (x);
1093 if (mode == VOIDmode)
1094 mode = DImode;
1095 if (GET_MODE_SIZE (mode) >= 8)
1096 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
1097 if (sub)
1098 sh_print_operand (stream, sub, 0);
1099 else
1100 output_operand_lossage ("invalid operand to %%S");
1101 }
1102 break;
1103 case 'T':
1104 /* Next word of a double. */
1105 switch (GET_CODE (x))
1106 {
1107 case REG:
1108 fputs (reg_names[REGNO (x) + 1], (stream));
1109 break;
1110 case MEM:
1111 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1112 && GET_CODE (XEXP (x, 0)) != POST_INC)
1113 x = adjust_address (x, SImode, 4);
1114 sh_print_operand_address (stream, XEXP (x, 0));
1115 break;
1116 default:
1117 break;
1118 }
1119 break;
1120
1121 case 't':
1122 gcc_assert (MEM_P (x));
1123 x = XEXP (x, 0);
1124 switch (GET_CODE (x))
1125 {
1126 case REG:
1127 case SUBREG:
1128 sh_print_operand (stream, x, 0);
1129 break;
1130 default:
1131 break;
1132 }
1133 break;
1134
1135 case 'o':
1136 switch (GET_CODE (x))
1137 {
1138 case PLUS: fputs ("add", stream); break;
1139 case MINUS: fputs ("sub", stream); break;
1140 case MULT: fputs ("mul", stream); break;
1141 case DIV: fputs ("div", stream); break;
1142 case EQ: fputs ("eq", stream); break;
1143 case NE: fputs ("ne", stream); break;
1144 case GT: case LT: fputs ("gt", stream); break;
1145 case GE: case LE: fputs ("ge", stream); break;
1146 case GTU: case LTU: fputs ("gtu", stream); break;
1147 case GEU: case LEU: fputs ("geu", stream); break;
1148 default:
1149 break;
1150 }
1151 break;
1152 case 'M':
1153 if (TARGET_SHMEDIA)
1154 {
1155 if (MEM_P (x)
1156 && GET_CODE (XEXP (x, 0)) == PLUS
1157 && (REG_P (XEXP (XEXP (x, 0), 1))
1158 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1159 fputc ('x', stream);
1160 }
1161 else
1162 {
1163 if (MEM_P (x))
1164 {
1165 switch (GET_MODE (x))
1166 {
1167 case QImode: fputs (".b", stream); break;
1168 case HImode: fputs (".w", stream); break;
1169 case SImode: fputs (".l", stream); break;
1170 case SFmode: fputs (".s", stream); break;
1171 case DFmode: fputs (".d", stream); break;
1172 default: gcc_unreachable ();
1173 }
1174 }
1175 }
1176 break;
1177
1178 case 'm':
1179 gcc_assert (MEM_P (x));
1180 x = XEXP (x, 0);
1181 /* Fall through. */
1182 case 'U':
1183 switch (GET_CODE (x))
1184 {
1185 case REG:
1186 case SUBREG:
1187 sh_print_operand (stream, x, 0);
1188 fputs (", 0", stream);
1189 break;
1190
1191 case PLUS:
1192 sh_print_operand (stream, XEXP (x, 0), 0);
1193 fputs (", ", stream);
1194 sh_print_operand (stream, XEXP (x, 1), 0);
1195 break;
1196
1197 default:
1198 gcc_unreachable ();
1199 }
1200 break;
1201
1202 case 'V':
1203 {
1204 int num = exact_log2 (INTVAL (x));
1205 gcc_assert (num >= 0);
1206 fprintf (stream, "#%d", num);
1207 }
1208 break;
1209
1210 case 'W':
1211 {
1212 int num = exact_log2 (~INTVAL (x));
1213 gcc_assert (num >= 0);
1214 fprintf (stream, "#%d", num);
1215 }
1216 break;
1217
1218 case 'd':
1219 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1220
1221 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1222 break;
1223
1224 case 'N':
1225 if (x == CONST0_RTX (GET_MODE (x)))
1226 {
1227 fprintf ((stream), "r63");
1228 break;
1229 }
1230 goto default_output;
1231 case 'u':
1232 if (CONST_INT_P (x))
1233 {
1234 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1235 break;
1236 }
1237 /* Fall through. */
1238
1239 default_output:
1240 default:
1241 regno = 0;
1242 mode = GET_MODE (x);
1243
1244 switch (GET_CODE (x))
1245 {
1246 case TRUNCATE:
1247 {
1248 rtx inner = XEXP (x, 0);
1249 int offset = 0;
1250 enum machine_mode inner_mode;
1251
1252 /* We might see SUBREGs with vector mode registers inside. */
1253 if (GET_CODE (inner) == SUBREG
1254 && (GET_MODE_SIZE (GET_MODE (inner))
1255 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1256 && subreg_lowpart_p (inner))
1257 inner = SUBREG_REG (inner);
1258 if (CONST_INT_P (inner))
1259 {
1260 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1261 goto default_output;
1262 }
1263 inner_mode = GET_MODE (inner);
1264 if (GET_CODE (inner) == SUBREG
1265 && (GET_MODE_SIZE (GET_MODE (inner))
1266 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1267 && REG_P (SUBREG_REG (inner)))
1268 {
1269 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1270 GET_MODE (SUBREG_REG (inner)),
1271 SUBREG_BYTE (inner),
1272 GET_MODE (inner));
1273 inner = SUBREG_REG (inner);
1274 }
1275 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1276 abort ();
1277 /* Floating point register pairs are always big endian;
1278 general purpose registers are 64 bit wide. */
1279 regno = REGNO (inner);
1280 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1281 - HARD_REGNO_NREGS (regno, mode))
1282 + offset;
1283 x = inner;
1284 goto reg;
1285 }
1286 case SIGN_EXTEND:
1287 x = XEXP (x, 0);
1288 goto reg;
1289 /* FIXME: We need this on SHmedia32 because reload generates
1290 some sign-extended HI or QI loads into DImode registers
1291 but, because Pmode is SImode, the address ends up with a
1292 subreg:SI of the DImode register. Maybe reload should be
1293 fixed so as to apply alter_subreg to such loads? */
1294 case IF_THEN_ELSE:
1295 gcc_assert (trapping_target_operand (x, VOIDmode));
1296 x = XEXP (XEXP (x, 2), 0);
1297 goto default_output;
1298 case SUBREG:
1299 gcc_assert (SUBREG_BYTE (x) == 0
1300 && REG_P (SUBREG_REG (x)));
1301
1302 x = SUBREG_REG (x);
1303 /* Fall through. */
1304
1305 reg:
1306 case REG:
1307 regno += REGNO (x);
1308 if (FP_REGISTER_P (regno)
1309 && mode == V16SFmode)
1310 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1311 else if (FP_REGISTER_P (REGNO (x))
1312 && mode == V4SFmode)
1313 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1314 else if (REG_P (x)
1315 && mode == V2SFmode)
1316 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1317 else if (FP_REGISTER_P (REGNO (x))
1318 && GET_MODE_SIZE (mode) > 4)
1319 fprintf ((stream), "d%s", reg_names[regno] + 1);
1320 else
1321 fputs (reg_names[regno], (stream));
1322 break;
1323
1324 case MEM:
1325 output_address (XEXP (x, 0));
1326 break;
1327
1328 default:
1329 if (TARGET_SH1)
1330 fputc ('#', stream);
1331 output_addr_const (stream, x);
1332 break;
1333 }
1334 break;
1335 }
1336 }
1337
1338 static bool
1339 sh_print_operand_punct_valid_p (unsigned char code)
1340 {
1341 return (code == '.' || code == '#' || code == '@' || code == ','
1342 || code == '$' || code == '\'' || code == '>');
1343 }
1344
1345 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
1346
1347 static bool
1348 sh_asm_output_addr_const_extra (FILE *file, rtx x)
1349 {
1350 if (GET_CODE (x) == UNSPEC)
1351 {
1352 switch (XINT (x, 1))
1353 {
1354 case UNSPEC_DATALABEL:
1355 fputs ("datalabel ", file);
1356 output_addr_const (file, XVECEXP (x, 0, 0));
1357 break;
1358 case UNSPEC_PIC:
1359 /* GLOBAL_OFFSET_TABLE or local symbols, no suffix. */
1360 output_addr_const (file, XVECEXP (x, 0, 0));
1361 break;
1362 case UNSPEC_GOT:
1363 output_addr_const (file, XVECEXP (x, 0, 0));
1364 fputs ("@GOT", file);
1365 break;
1366 case UNSPEC_GOTOFF:
1367 output_addr_const (file, XVECEXP (x, 0, 0));
1368 fputs ("@GOTOFF", file);
1369 break;
1370 case UNSPEC_PLT:
1371 output_addr_const (file, XVECEXP (x, 0, 0));
1372 fputs ("@PLT", file);
1373 break;
1374 case UNSPEC_GOTPLT:
1375 output_addr_const (file, XVECEXP (x, 0, 0));
1376 fputs ("@GOTPLT", file);
1377 break;
1378 case UNSPEC_DTPOFF:
1379 output_addr_const (file, XVECEXP (x, 0, 0));
1380 fputs ("@DTPOFF", file);
1381 break;
1382 case UNSPEC_GOTTPOFF:
1383 output_addr_const (file, XVECEXP (x, 0, 0));
1384 fputs ("@GOTTPOFF", file);
1385 break;
1386 case UNSPEC_TPOFF:
1387 output_addr_const (file, XVECEXP (x, 0, 0));
1388 fputs ("@TPOFF", file);
1389 break;
1390 case UNSPEC_CALLER:
1391 {
1392 char name[32];
1393 /* LPCS stands for Label for PIC Call Site. */
1394 targetm.asm_out.generate_internal_label (name, "LPCS",
1395 INTVAL (XVECEXP (x, 0, 0)));
1396 assemble_name (file, name);
1397 }
1398 break;
1399 case UNSPEC_EXTRACT_S16:
1400 case UNSPEC_EXTRACT_U16:
1401 {
1402 rtx val, shift;
1403
1404 val = XVECEXP (x, 0, 0);
1405 shift = XVECEXP (x, 0, 1);
1406 fputc ('(', file);
1407 if (shift != const0_rtx)
1408 fputc ('(', file);
1409 if (GET_CODE (val) == CONST
1410 || GET_RTX_CLASS (GET_CODE (val)) != RTX_OBJ)
1411 {
1412 fputc ('(', file);
1413 output_addr_const (file, val);
1414 fputc (')', file);
1415 }
1416 else
1417 output_addr_const (file, val);
1418 if (shift != const0_rtx)
1419 {
1420 fputs (" >> ", file);
1421 output_addr_const (file, shift);
1422 fputc (')', file);
1423 }
1424 fputs (" & 65535)", file);
1425 }
1426 break;
1427 case UNSPEC_SYMOFF:
1428 output_addr_const (file, XVECEXP (x, 0, 0));
1429 fputc ('-', file);
1430 if (GET_CODE (XVECEXP (x, 0, 1)) == CONST)
1431 {
1432 fputc ('(', file);
1433 output_addr_const (file, XVECEXP (x, 0, 1));
1434 fputc (')', file);
1435 }
1436 else
1437 output_addr_const (file, XVECEXP (x, 0, 1));
1438 break;
1439 case UNSPEC_PCREL_SYMOFF:
1440 output_addr_const (file, XVECEXP (x, 0, 0));
1441 fputs ("-(", file);
1442 output_addr_const (file, XVECEXP (x, 0, 1));
1443 fputs ("-.)", file);
1444 break;
1445 default:
1446 return false;
1447 }
1448 return true;
1449 }
1450 else
1451 return false;
1452 }
1453 \f
1454
1455 /* Encode symbol attributes of a SYMBOL_REF into its
1456 SYMBOL_REF_FLAGS. */
1457 static void
1458 sh_encode_section_info (tree decl, rtx rtl, int first)
1459 {
1460 default_encode_section_info (decl, rtl, first);
1461
1462 if (TREE_CODE (decl) == FUNCTION_DECL
1463 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1464 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1465 }
1466
1467 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1468 static void
1469 force_into (rtx value, rtx target)
1470 {
1471 value = force_operand (value, target);
1472 if (! rtx_equal_p (value, target))
1473 emit_insn (gen_move_insn (target, value));
1474 }
1475
1476 /* Emit code to perform a block move. Choose the best method.
1477
1478 OPERANDS[0] is the destination.
1479 OPERANDS[1] is the source.
1480 OPERANDS[2] is the size.
1481 OPERANDS[3] is the alignment safe to use. */
1482
1483 bool
1484 expand_block_move (rtx *operands)
1485 {
1486 int align = INTVAL (operands[3]);
1487 int constp = (CONST_INT_P (operands[2]));
1488 int bytes = (constp ? INTVAL (operands[2]) : 0);
1489
1490 if (! constp)
1491 return false;
1492
1493 /* If we could use mov.l to move words and dest is word-aligned, we
1494 can use movua.l for loads and still generate a relatively short
1495 and efficient sequence. */
1496 if (TARGET_SH4A_ARCH && align < 4
1497 && MEM_ALIGN (operands[0]) >= 32
1498 && can_move_by_pieces (bytes, 32))
1499 {
1500 rtx dest = copy_rtx (operands[0]);
1501 rtx src = copy_rtx (operands[1]);
1502 /* We could use different pseudos for each copied word, but
1503 since movua can only load into r0, it's kind of
1504 pointless. */
1505 rtx temp = gen_reg_rtx (SImode);
1506 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1507 int copied = 0;
1508
1509 while (copied + 4 <= bytes)
1510 {
1511 rtx to = adjust_address (dest, SImode, copied);
1512 rtx from = adjust_automodify_address (src, BLKmode,
1513 src_addr, copied);
1514
1515 set_mem_size (from, 4);
1516 emit_insn (gen_movua (temp, from));
1517 emit_move_insn (src_addr, plus_constant (Pmode, src_addr, 4));
1518 emit_move_insn (to, temp);
1519 copied += 4;
1520 }
1521
1522 if (copied < bytes)
1523 move_by_pieces (adjust_address (dest, BLKmode, copied),
1524 adjust_automodify_address (src, BLKmode,
1525 src_addr, copied),
1526 bytes - copied, align, 0);
1527
1528 return true;
1529 }
1530
1531 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1532 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1533 if (align < 4 || (bytes % 4 != 0))
1534 return false;
1535
1536 if (TARGET_HARD_SH4)
1537 {
1538 if (bytes < 12)
1539 return false;
1540 else if (bytes == 12)
1541 {
1542 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1543 rtx r4 = gen_rtx_REG (SImode, 4);
1544 rtx r5 = gen_rtx_REG (SImode, 5);
1545
1546 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1547 force_into (XEXP (operands[0], 0), r4);
1548 force_into (XEXP (operands[1], 0), r5);
1549 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1550 return true;
1551 }
1552 else if (! optimize_size)
1553 {
1554 const char *entry_name;
1555 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1556 int dwords;
1557 rtx r4 = gen_rtx_REG (SImode, 4);
1558 rtx r5 = gen_rtx_REG (SImode, 5);
1559 rtx r6 = gen_rtx_REG (SImode, 6);
1560
1561 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1562 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1563 force_into (XEXP (operands[0], 0), r4);
1564 force_into (XEXP (operands[1], 0), r5);
1565
1566 dwords = bytes >> 3;
1567 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1568 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1569 return true;
1570 }
1571 else
1572 return false;
1573 }
1574 if (bytes < 64)
1575 {
1576 char entry[30];
1577 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1578 rtx r4 = gen_rtx_REG (SImode, 4);
1579 rtx r5 = gen_rtx_REG (SImode, 5);
1580
1581 sprintf (entry, "__movmemSI%d", bytes);
1582 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1583 force_into (XEXP (operands[0], 0), r4);
1584 force_into (XEXP (operands[1], 0), r5);
1585 emit_insn (gen_block_move_real (func_addr_rtx));
1586 return true;
1587 }
1588
1589 /* This is the same number of bytes as a memcpy call, but to a different
1590 less common function name, so this will occasionally use more space. */
1591 if (! optimize_size)
1592 {
1593 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1594 int final_switch, while_loop;
1595 rtx r4 = gen_rtx_REG (SImode, 4);
1596 rtx r5 = gen_rtx_REG (SImode, 5);
1597 rtx r6 = gen_rtx_REG (SImode, 6);
1598
1599 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1600 force_into (XEXP (operands[0], 0), r4);
1601 force_into (XEXP (operands[1], 0), r5);
1602
1603 /* r6 controls the size of the move. 16 is decremented from it
1604 for each 64 bytes moved. Then the negative bit left over is used
1605 as an index into a list of move instructions. e.g., a 72 byte move
1606 would be set up with size(r6) = 14, for one iteration through the
1607 big while loop, and a switch of -2 for the last part. */
1608
1609 final_switch = 16 - ((bytes / 4) % 16);
1610 while_loop = ((bytes / 4) / 16 - 1) * 16;
1611 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1612 emit_insn (gen_block_lump_real (func_addr_rtx));
1613 return true;
1614 }
1615
1616 return false;
1617 }
1618
1619 /* Prepare operands for a move define_expand; specifically, one of the
1620 operands must be in a register. */
1621
1622 void
1623 prepare_move_operands (rtx operands[], enum machine_mode mode)
1624 {
1625 if ((mode == SImode || mode == DImode)
1626 && flag_pic
1627 && ! ((mode == Pmode || mode == ptr_mode)
1628 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1629 {
1630 rtx temp;
1631 if (SYMBOLIC_CONST_P (operands[1]))
1632 {
1633 if (MEM_P (operands[0]))
1634 operands[1] = force_reg (Pmode, operands[1]);
1635 else if (TARGET_SHMEDIA
1636 && GET_CODE (operands[1]) == LABEL_REF
1637 && target_reg_operand (operands[0], mode))
1638 /* It's ok. */;
1639 else
1640 {
1641 temp = (!can_create_pseudo_p ()
1642 ? operands[0]
1643 : gen_reg_rtx (Pmode));
1644 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1645 }
1646 }
1647 else if (GET_CODE (operands[1]) == CONST
1648 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1649 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1650 {
1651 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1652 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1653 mode, temp);
1654 operands[1] = expand_binop (mode, add_optab, temp,
1655 XEXP (XEXP (operands[1], 0), 1),
1656 (!can_create_pseudo_p ()
1657 ? temp
1658 : gen_reg_rtx (Pmode)),
1659 0, OPTAB_LIB_WIDEN);
1660 }
1661 }
1662
1663 if (! reload_in_progress && ! reload_completed)
1664 {
1665 /* Copy the source to a register if both operands aren't registers. */
1666 if (! register_operand (operands[0], mode)
1667 && ! sh_register_operand (operands[1], mode))
1668 operands[1] = copy_to_mode_reg (mode, operands[1]);
1669
1670 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1671 {
1672 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1673 except that we can't use that function because it is static. */
1674 rtx new_rtx = change_address (operands[0], mode, 0);
1675 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1676 operands[0] = new_rtx;
1677 }
1678
1679 /* This case can happen while generating code to move the result
1680 of a library call to the target. Reject `st r0,@(rX,rY)' because
1681 reload will fail to find a spill register for rX, since r0 is already
1682 being used for the source. */
1683 else if (TARGET_SH1
1684 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1685 && MEM_P (operands[0])
1686 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1687 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1688 operands[1] = copy_to_mode_reg (mode, operands[1]);
1689 }
1690
1691 if (mode == Pmode || mode == ptr_mode)
1692 {
1693 rtx op0, op1, opc;
1694 enum tls_model tls_kind;
1695
1696 op0 = operands[0];
1697 op1 = operands[1];
1698 if (GET_CODE (op1) == CONST
1699 && GET_CODE (XEXP (op1, 0)) == PLUS
1700 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1701 != TLS_MODEL_NONE))
1702 {
1703 opc = XEXP (XEXP (op1, 0), 1);
1704 op1 = XEXP (XEXP (op1, 0), 0);
1705 }
1706 else
1707 opc = NULL_RTX;
1708
1709 if ((tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1710 {
1711 rtx tga_op1, tga_ret, tmp, tmp2;
1712
1713 if (! flag_pic
1714 && (tls_kind == TLS_MODEL_GLOBAL_DYNAMIC
1715 || tls_kind == TLS_MODEL_LOCAL_DYNAMIC
1716 || tls_kind == TLS_MODEL_INITIAL_EXEC))
1717 {
1718 /* Don't schedule insns for getting GOT address when
1719 the first scheduling is enabled, to avoid spill
1720 failures for R0. */
1721 if (flag_schedule_insns)
1722 emit_insn (gen_blockage ());
1723 emit_insn (gen_GOTaddr2picreg ());
1724 emit_use (gen_rtx_REG (SImode, PIC_REG));
1725 if (flag_schedule_insns)
1726 emit_insn (gen_blockage ());
1727 }
1728
1729 switch (tls_kind)
1730 {
1731 case TLS_MODEL_GLOBAL_DYNAMIC:
1732 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1733 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1734 tmp = gen_reg_rtx (Pmode);
1735 emit_move_insn (tmp, tga_ret);
1736 op1 = tmp;
1737 break;
1738
1739 case TLS_MODEL_LOCAL_DYNAMIC:
1740 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1741 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1742
1743 tmp = gen_reg_rtx (Pmode);
1744 emit_move_insn (tmp, tga_ret);
1745
1746 if (register_operand (op0, Pmode))
1747 tmp2 = op0;
1748 else
1749 tmp2 = gen_reg_rtx (Pmode);
1750
1751 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1752 op1 = tmp2;
1753 break;
1754
1755 case TLS_MODEL_INITIAL_EXEC:
1756 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1757 tmp = gen_sym2GOTTPOFF (op1);
1758 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1759 op1 = tga_op1;
1760 break;
1761
1762 case TLS_MODEL_LOCAL_EXEC:
1763 tmp2 = gen_reg_rtx (Pmode);
1764 emit_insn (gen_load_gbr (tmp2));
1765 tmp = gen_reg_rtx (Pmode);
1766 emit_insn (gen_symTPOFF2reg (tmp, op1));
1767
1768 if (register_operand (op0, Pmode))
1769 op1 = op0;
1770 else
1771 op1 = gen_reg_rtx (Pmode);
1772
1773 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1774 break;
1775
1776 default:
1777 gcc_unreachable ();
1778 }
1779 if (opc)
1780 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1781 operands[1] = op1;
1782 }
1783 }
1784 }
1785
1786 enum rtx_code
1787 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1788 enum rtx_code comparison)
1789 {
1790 rtx op1;
1791 rtx scratch = NULL_RTX;
1792
1793 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1794 comparison = GET_CODE (operands[0]);
1795 else
1796 scratch = operands[4];
1797 if (CONST_INT_P (operands[1])
1798 && !CONST_INT_P (operands[2]))
1799 {
1800 rtx tmp = operands[1];
1801
1802 operands[1] = operands[2];
1803 operands[2] = tmp;
1804 comparison = swap_condition (comparison);
1805 }
1806 if (CONST_INT_P (operands[2]))
1807 {
1808 HOST_WIDE_INT val = INTVAL (operands[2]);
1809 if ((val == -1 || val == -0x81)
1810 && (comparison == GT || comparison == LE))
1811 {
1812 comparison = (comparison == GT) ? GE : LT;
1813 operands[2] = gen_int_mode (val + 1, mode);
1814 }
1815 else if ((val == 1 || val == 0x80)
1816 && (comparison == GE || comparison == LT))
1817 {
1818 comparison = (comparison == GE) ? GT : LE;
1819 operands[2] = gen_int_mode (val - 1, mode);
1820 }
1821 else if (val == 1 && (comparison == GEU || comparison == LTU))
1822 {
1823 comparison = (comparison == GEU) ? NE : EQ;
1824 operands[2] = CONST0_RTX (mode);
1825 }
1826 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1827 {
1828 comparison = (comparison == GEU) ? GTU : LEU;
1829 operands[2] = gen_int_mode (val - 1, mode);
1830 }
1831 else if (val == 0 && (comparison == GTU || comparison == LEU))
1832 comparison = (comparison == GTU) ? NE : EQ;
1833 else if (mode == SImode
1834 && ((val == 0x7fffffff
1835 && (comparison == GTU || comparison == LEU))
1836 || ((unsigned HOST_WIDE_INT) val
1837 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1838 && (comparison == GEU || comparison == LTU))))
1839 {
1840 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1841 operands[2] = CONST0_RTX (mode);
1842 }
1843 }
1844 op1 = operands[1];
1845 if (can_create_pseudo_p ())
1846 operands[1] = force_reg (mode, op1);
1847 /* When we are handling DImode comparisons, we want to keep constants so
1848 that we can optimize the component comparisons; however, memory loads
1849 are better issued as a whole so that they can be scheduled well.
1850 SImode equality comparisons allow I08 constants, but only when they
1851 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1852 into a register, that register might as well be r0, and we allow the
1853 constant. If it is already in a register, this is likely to be
1854 allocated to a different hard register, thus we load the constant into
1855 a register unless it is zero. */
1856 if (!REG_P (operands[2])
1857 && (!CONST_INT_P (operands[2])
1858 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1859 && ((comparison != EQ && comparison != NE)
1860 || (REG_P (op1) && REGNO (op1) != R0_REG)
1861 || !satisfies_constraint_I08 (operands[2])))))
1862 {
1863 if (scratch && GET_MODE (scratch) == mode)
1864 {
1865 emit_move_insn (scratch, operands[2]);
1866 operands[2] = scratch;
1867 }
1868 else if (can_create_pseudo_p ())
1869 operands[2] = force_reg (mode, operands[2]);
1870 }
1871 return comparison;
1872 }
1873
1874 void
1875 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1876 {
1877 rtx (*branch_expander) (rtx) = gen_branch_true;
1878 rtx jump;
1879
1880 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1881 switch (comparison)
1882 {
1883 case NE: case LT: case LE: case LTU: case LEU:
1884 comparison = reverse_condition (comparison);
1885 branch_expander = gen_branch_false;
1886 default: ;
1887 }
1888 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1889 gen_rtx_fmt_ee (comparison, SImode,
1890 operands[1], operands[2])));
1891 jump = emit_jump_insn (branch_expander (operands[3]));
1892 if (probability >= 0)
1893 add_reg_note (jump, REG_BR_PROB, GEN_INT (probability));
1894
1895 }
1896
1897 /* ??? How should we distribute probabilities when more than one branch
1898 is generated. So far we only have some ad-hoc observations:
1899 - If the operands are random, they are likely to differ in both parts.
1900 - If comparing items in a hash chain, the operands are random or equal;
1901 operation should be EQ or NE.
1902 - If items are searched in an ordered tree from the root, we can expect
1903 the highpart to be unequal about half of the time; operation should be
1904 an inequality comparison, operands non-constant, and overall probability
1905 about 50%. Likewise for quicksort.
1906 - Range checks will be often made against constants. Even if we assume for
1907 simplicity an even distribution of the non-constant operand over a
1908 sub-range here, the same probability could be generated with differently
1909 wide sub-ranges - as long as the ratio of the part of the subrange that
1910 is before the threshold to the part that comes after the threshold stays
1911 the same. Thus, we can't really tell anything here;
1912 assuming random distribution is at least simple.
1913 */
1914
1915 bool
1916 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1917 {
1918 enum rtx_code msw_taken, msw_skip, lsw_taken;
1919 rtx skip_label = NULL_RTX;
1920 rtx op1h, op1l, op2h, op2l;
1921 int num_branches;
1922 int prob, rev_prob;
1923 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1924 rtx scratch = operands[4];
1925
1926 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1927 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1928 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1929 op1l = gen_lowpart (SImode, operands[1]);
1930 op2l = gen_lowpart (SImode, operands[2]);
1931 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
1932 prob = split_branch_probability;
1933 rev_prob = REG_BR_PROB_BASE - prob;
1934 switch (comparison)
1935 {
1936 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1937 That costs 1 cycle more when the first branch can be predicted taken,
1938 but saves us mispredicts because only one branch needs prediction.
1939 It also enables generating the cmpeqdi_t-1 pattern. */
1940 case EQ:
1941 if (TARGET_CMPEQDI_T)
1942 {
1943 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1944 emit_jump_insn (gen_branch_true (operands[3]));
1945 return true;
1946 }
1947 msw_skip = NE;
1948 lsw_taken = EQ;
1949 if (prob >= 0)
1950 {
1951 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1952 */
1953 msw_skip_prob = rev_prob;
1954 if (REG_BR_PROB_BASE <= 65535)
1955 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1956 else
1957 {
1958 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1959 lsw_taken_prob
1960 = (prob
1961 ? (REG_BR_PROB_BASE
1962 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1963 / ((HOST_WIDEST_INT) prob << 32)))
1964 : 0);
1965 }
1966 }
1967 break;
1968 case NE:
1969 if (TARGET_CMPEQDI_T)
1970 {
1971 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1972 emit_jump_insn (gen_branch_false (operands[3]));
1973 return true;
1974 }
1975 msw_taken = NE;
1976 msw_taken_prob = prob;
1977 lsw_taken = NE;
1978 lsw_taken_prob = 0;
1979 break;
1980 case GTU: case GT:
1981 msw_taken = comparison;
1982 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1983 break;
1984 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1985 msw_skip = swap_condition (msw_taken);
1986 lsw_taken = GTU;
1987 break;
1988 case GEU: case GE:
1989 if (op2l == CONST0_RTX (SImode))
1990 msw_taken = comparison;
1991 else
1992 {
1993 msw_taken = comparison == GE ? GT : GTU;
1994 msw_skip = swap_condition (msw_taken);
1995 lsw_taken = GEU;
1996 }
1997 break;
1998 case LTU: case LT:
1999 msw_taken = comparison;
2000 if (op2l == CONST0_RTX (SImode))
2001 break;
2002 msw_skip = swap_condition (msw_taken);
2003 lsw_taken = LTU;
2004 break;
2005 case LEU: case LE:
2006 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2007 msw_taken = comparison;
2008 else
2009 {
2010 lsw_taken = LEU;
2011 if (comparison == LE)
2012 msw_taken = LT;
2013 else if (op2h != CONST0_RTX (SImode))
2014 msw_taken = LTU;
2015 else
2016 {
2017 msw_skip = swap_condition (LTU);
2018 break;
2019 }
2020 msw_skip = swap_condition (msw_taken);
2021 }
2022 break;
2023 default: return false;
2024 }
2025 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
2026 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2027 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
2028 if (comparison != EQ && comparison != NE && num_branches > 1)
2029 {
2030 if (!CONSTANT_P (operands[2])
2031 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
2032 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
2033 {
2034 msw_taken_prob = prob / 2U;
2035 msw_skip_prob
2036 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
2037 lsw_taken_prob = prob;
2038 }
2039 else
2040 {
2041 msw_taken_prob = prob;
2042 msw_skip_prob = REG_BR_PROB_BASE;
2043 /* ??? If we have a constant op2h, should we use that when
2044 calculating lsw_taken_prob? */
2045 lsw_taken_prob = prob;
2046 }
2047 }
2048 operands[1] = op1h;
2049 operands[2] = op2h;
2050 operands[4] = NULL_RTX;
2051 if (reload_completed
2052 && ! arith_reg_or_0_operand (op2h, SImode)
2053 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
2054 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
2055 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
2056 {
2057 emit_move_insn (scratch, operands[2]);
2058 operands[2] = scratch;
2059 }
2060 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2061 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2062 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2063 {
2064 rtx taken_label = operands[3];
2065
2066 /* Operands were possibly modified, but msw_skip doesn't expect this.
2067 Always use the original ones. */
2068 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2069 {
2070 operands[1] = op1h;
2071 operands[2] = op2h;
2072 if (reload_completed
2073 && ! arith_reg_or_0_operand (op2h, SImode)
2074 && (true_regnum (op1h) || (comparison != EQ && comparison != NE)))
2075 {
2076 emit_move_insn (scratch, operands[2]);
2077 operands[2] = scratch;
2078 }
2079 }
2080
2081 operands[3] = skip_label = gen_label_rtx ();
2082 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2083 operands[3] = taken_label;
2084 }
2085 operands[1] = op1l;
2086 operands[2] = op2l;
2087 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2088 {
2089 if (reload_completed
2090 && ! arith_reg_or_0_operand (op2l, SImode)
2091 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2092 {
2093 emit_move_insn (scratch, operands[2]);
2094 operands[2] = scratch;
2095 }
2096 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2097 }
2098 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2099 emit_label (skip_label);
2100 return true;
2101 }
2102
2103 /* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */
2104
2105 static void
2106 sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
2107 {
2108 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
2109 {
2110 insn = gen_rtx_PARALLEL (VOIDmode,
2111 gen_rtvec (2, insn,
2112 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
2113 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
2114 }
2115 else
2116 emit_insn (insn);
2117 }
2118
2119 /* Prepare the operands for an scc instruction; make sure that the
2120 compare has been done and the result is in T_REG. */
2121 void
2122 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2123 {
2124 rtx t_reg = gen_rtx_REG (SImode, T_REG);
2125 enum rtx_code oldcode = code;
2126 enum machine_mode mode;
2127
2128 /* First need a compare insn. */
2129 switch (code)
2130 {
2131 case NE:
2132 /* It isn't possible to handle this case. */
2133 gcc_unreachable ();
2134 case LT:
2135 code = GT;
2136 break;
2137 case LE:
2138 code = GE;
2139 break;
2140 case LTU:
2141 code = GTU;
2142 break;
2143 case LEU:
2144 code = GEU;
2145 break;
2146 default:
2147 break;
2148 }
2149 if (code != oldcode)
2150 {
2151 rtx tmp = op0;
2152 op0 = op1;
2153 op1 = tmp;
2154 }
2155
2156 mode = GET_MODE (op0);
2157 if (mode == VOIDmode)
2158 mode = GET_MODE (op1);
2159
2160 op0 = force_reg (mode, op0);
2161 if ((code != EQ && code != NE
2162 && (op1 != const0_rtx
2163 || code == GTU || code == GEU || code == LTU || code == LEU))
2164 || (mode == DImode && op1 != const0_rtx)
2165 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2166 op1 = force_reg (mode, op1);
2167
2168 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2169 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2170 mode);
2171 }
2172
2173 rtx
2174 sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
2175 rtx op0, rtx op1)
2176 {
2177 rtx target = gen_reg_rtx (SImode);
2178 rtx tmp;
2179
2180 gcc_assert (TARGET_SHMEDIA);
2181 switch (code)
2182 {
2183 case EQ:
2184 case GT:
2185 case LT:
2186 case UNORDERED:
2187 case GTU:
2188 case LTU:
2189 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2190 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2191 code = NE;
2192 break;
2193
2194 case NE:
2195 case GE:
2196 case LE:
2197 case ORDERED:
2198 case GEU:
2199 case LEU:
2200 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2201 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2202 code = EQ;
2203 break;
2204
2205 case UNEQ:
2206 case UNGE:
2207 case UNGT:
2208 case UNLE:
2209 case UNLT:
2210 case LTGT:
2211 return NULL_RTX;
2212
2213 default:
2214 gcc_unreachable ();
2215 }
2216
2217 if (mode == DImode)
2218 {
2219 rtx t2 = gen_reg_rtx (DImode);
2220 emit_insn (gen_extendsidi2 (t2, target));
2221 target = t2;
2222 }
2223
2224 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2225 }
2226
2227 /* Called from the md file, set up the operands of a compare instruction. */
2228
2229 void
2230 sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
2231 {
2232 enum rtx_code code = GET_CODE (operands[0]);
2233 enum rtx_code branch_code;
2234 rtx op0 = operands[1];
2235 rtx op1 = operands[2];
2236 rtx insn, tem;
2237 bool need_ccmpeq = false;
2238
2239 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2240 {
2241 op0 = force_reg (mode, op0);
2242 op1 = force_reg (mode, op1);
2243 }
2244 else
2245 {
2246 if (code != EQ || mode == DImode)
2247 {
2248 /* Force args into regs, since we can't use constants here. */
2249 op0 = force_reg (mode, op0);
2250 if (op1 != const0_rtx || code == GTU || code == GEU)
2251 op1 = force_reg (mode, op1);
2252 }
2253 }
2254
2255 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2256 {
2257 if (code == LT
2258 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2259 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2260 {
2261 tem = op0, op0 = op1, op1 = tem;
2262 code = swap_condition (code);
2263 }
2264
2265 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2266 if (code == GE)
2267 {
2268 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2269 need_ccmpeq = true;
2270 code = GT;
2271 }
2272
2273 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2274 to EQ/GT respectively. */
2275 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2276 }
2277
2278 switch (code)
2279 {
2280 case EQ:
2281 case GT:
2282 case GE:
2283 case GTU:
2284 case GEU:
2285 branch_code = code;
2286 break;
2287 case NE:
2288 case LT:
2289 case LE:
2290 case LTU:
2291 case LEU:
2292 branch_code = reverse_condition (code);
2293 break;
2294 default:
2295 gcc_unreachable ();
2296 }
2297
2298 insn = gen_rtx_SET (VOIDmode,
2299 gen_rtx_REG (SImode, T_REG),
2300 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2301
2302 sh_emit_set_t_insn (insn, mode);
2303 if (need_ccmpeq)
2304 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2305
2306 if (branch_code == code)
2307 emit_jump_insn (gen_branch_true (operands[3]));
2308 else
2309 emit_jump_insn (gen_branch_false (operands[3]));
2310 }
2311
2312 void
2313 sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
2314 {
2315 enum rtx_code code = GET_CODE (operands[1]);
2316 rtx op0 = operands[2];
2317 rtx op1 = operands[3];
2318 rtx lab = NULL_RTX;
2319 bool invert = false;
2320 rtx tem;
2321
2322 op0 = force_reg (mode, op0);
2323 if ((code != EQ && code != NE
2324 && (op1 != const0_rtx
2325 || code == GTU || code == GEU || code == LTU || code == LEU))
2326 || (mode == DImode && op1 != const0_rtx)
2327 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2328 op1 = force_reg (mode, op1);
2329
2330 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2331 {
2332 if (code == LT || code == LE)
2333 {
2334 code = swap_condition (code);
2335 tem = op0, op0 = op1, op1 = tem;
2336 }
2337 if (code == GE)
2338 {
2339 if (TARGET_IEEE)
2340 {
2341 lab = gen_label_rtx ();
2342 sh_emit_scc_to_t (EQ, op0, op1);
2343 emit_jump_insn (gen_branch_true (lab));
2344 code = GT;
2345 }
2346 else
2347 {
2348 code = LT;
2349 invert = true;
2350 }
2351 }
2352 }
2353
2354 if (code == NE)
2355 {
2356 code = EQ;
2357 invert = true;
2358 }
2359
2360 sh_emit_scc_to_t (code, op0, op1);
2361 if (lab)
2362 emit_label (lab);
2363 if (invert)
2364 emit_insn (gen_movnegt (operands[0]));
2365 else
2366 emit_move_insn (operands[0], gen_rtx_REG (SImode, T_REG));
2367 }
2368 \f
2369 /* Functions to output assembly code. */
2370
2371 /* Return a sequence of instructions to perform DI or DF move.
2372
2373 Since the SH cannot move a DI or DF in one instruction, we have
2374 to take care when we see overlapping source and dest registers. */
2375
2376 const char *
2377 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2378 enum machine_mode mode)
2379 {
2380 rtx dst = operands[0];
2381 rtx src = operands[1];
2382
2383 if (MEM_P (dst)
2384 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2385 return "mov.l %T1,%0\n\tmov.l %1,%0";
2386
2387 if (register_operand (dst, mode)
2388 && register_operand (src, mode))
2389 {
2390 if (REGNO (src) == MACH_REG)
2391 return "sts mach,%S0\n\tsts macl,%R0";
2392
2393 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2394 when mov.d r1,r0 do r1->r0 then r2->r1. */
2395
2396 if (REGNO (src) + 1 == REGNO (dst))
2397 return "mov %T1,%T0\n\tmov %1,%0";
2398 else
2399 return "mov %1,%0\n\tmov %T1,%T0";
2400 }
2401 else if (CONST_INT_P (src))
2402 {
2403 if (INTVAL (src) < 0)
2404 output_asm_insn ("mov #-1,%S0", operands);
2405 else
2406 output_asm_insn ("mov #0,%S0", operands);
2407
2408 return "mov %1,%R0";
2409 }
2410 else if (MEM_P (src))
2411 {
2412 int ptrreg = -1;
2413 int dreg = REGNO (dst);
2414 rtx inside = XEXP (src, 0);
2415
2416 switch (GET_CODE (inside))
2417 {
2418 case REG:
2419 ptrreg = REGNO (inside);
2420 break;
2421
2422 case SUBREG:
2423 ptrreg = subreg_regno (inside);
2424 break;
2425
2426 case PLUS:
2427 ptrreg = REGNO (XEXP (inside, 0));
2428 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2429 an offsettable address. Unfortunately, offsettable addresses use
2430 QImode to check the offset, and a QImode offsettable address
2431 requires r0 for the other operand, which is not currently
2432 supported, so we can't use the 'o' constraint.
2433 Thus we must check for and handle r0+REG addresses here.
2434 We punt for now, since this is likely very rare. */
2435 gcc_assert (!REG_P (XEXP (inside, 1)));
2436 break;
2437
2438 case LABEL_REF:
2439 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
2440 case POST_INC:
2441 return "mov.l %1,%0\n\tmov.l %1,%T0";
2442 default:
2443 gcc_unreachable ();
2444 }
2445
2446 /* Work out the safe way to copy. Copy into the second half first. */
2447 if (dreg == ptrreg)
2448 return "mov.l %T1,%T0\n\tmov.l %1,%0";
2449 }
2450
2451 return "mov.l %1,%0\n\tmov.l %T1,%T0";
2452 }
2453
2454 /* Print an instruction which would have gone into a delay slot after
2455 another instruction, but couldn't because the other instruction expanded
2456 into a sequence where putting the slot insn at the end wouldn't work. */
2457
2458 static void
2459 print_slot (rtx insn)
2460 {
2461 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
2462
2463 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
2464 }
2465
2466 const char *
2467 output_far_jump (rtx insn, rtx op)
2468 {
2469 struct { rtx lab, reg, op; } this_jmp;
2470 rtx braf_base_lab = NULL_RTX;
2471 const char *jump;
2472 int far;
2473 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2474 rtx prev;
2475
2476 this_jmp.lab = gen_label_rtx ();
2477
2478 if (TARGET_SH2
2479 && offset >= -32764
2480 && offset - get_attr_length (insn) <= 32766)
2481 {
2482 far = 0;
2483 jump = "mov.w %O0,%1; braf %1";
2484 }
2485 else
2486 {
2487 far = 1;
2488 if (flag_pic)
2489 {
2490 if (TARGET_SH2)
2491 jump = "mov.l %O0,%1; braf %1";
2492 else
2493 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
2494 }
2495 else
2496 jump = "mov.l %O0,%1; jmp @%1";
2497 }
2498 /* If we have a scratch register available, use it. */
2499 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2500 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2501 {
2502 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2503 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2504 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
2505 output_asm_insn (jump, &this_jmp.lab);
2506 if (dbr_sequence_length ())
2507 print_slot (final_sequence);
2508 else
2509 output_asm_insn ("nop", 0);
2510 }
2511 else
2512 {
2513 /* Output the delay slot insn first if any. */
2514 if (dbr_sequence_length ())
2515 print_slot (final_sequence);
2516
2517 this_jmp.reg = gen_rtx_REG (SImode, 13);
2518 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2519 Fortunately, MACL is fixed and call-clobbered, and we never
2520 need its value across jumps, so save r13 in it instead of in
2521 the stack. */
2522 if (TARGET_SH5)
2523 output_asm_insn ("lds r13, macl", 0);
2524 else
2525 output_asm_insn ("mov.l r13,@-r15", 0);
2526 output_asm_insn (jump, &this_jmp.lab);
2527 if (TARGET_SH5)
2528 output_asm_insn ("sts macl, r13", 0);
2529 else
2530 output_asm_insn ("mov.l @r15+,r13", 0);
2531 }
2532 if (far && flag_pic && TARGET_SH2)
2533 {
2534 braf_base_lab = gen_label_rtx ();
2535 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2536 CODE_LABEL_NUMBER (braf_base_lab));
2537 }
2538 if (far)
2539 output_asm_insn (".align 2", 0);
2540 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2541 this_jmp.op = op;
2542 if (far && flag_pic)
2543 {
2544 if (TARGET_SH2)
2545 this_jmp.lab = braf_base_lab;
2546 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2547 }
2548 else
2549 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2550 return "";
2551 }
2552
2553 /* Local label counter, used for constants in the pool and inside
2554 pattern branches. */
2555
2556 static int lf = 100;
2557
2558 /* Output code for ordinary branches. */
2559
2560 const char *
2561 output_branch (int logic, rtx insn, rtx *operands)
2562 {
2563 switch (get_attr_length (insn))
2564 {
2565 case 6:
2566 /* This can happen if filling the delay slot has caused a forward
2567 branch to exceed its range (we could reverse it, but only
2568 when we know we won't overextend other branches; this should
2569 best be handled by relaxation).
2570 It can also happen when other condbranches hoist delay slot insn
2571 from their destination, thus leading to code size increase.
2572 But the branch will still be in the range -4092..+4098 bytes. */
2573
2574 if (! TARGET_RELAX)
2575 {
2576 int label = lf++;
2577 /* The call to print_slot will clobber the operands. */
2578 rtx op0 = operands[0];
2579
2580 /* If the instruction in the delay slot is annulled (true), then
2581 there is no delay slot where we can put it now. The only safe
2582 place for it is after the label. final will do that by default. */
2583
2584 if (final_sequence
2585 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2586 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2587 {
2588 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2589 ASSEMBLER_DIALECT ? "/" : ".", label);
2590 print_slot (final_sequence);
2591 }
2592 else
2593 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2594
2595 output_asm_insn ("bra\t%l0", &op0);
2596 fprintf (asm_out_file, "\tnop\n");
2597 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2598
2599 return "";
2600 }
2601 /* When relaxing, handle this like a short branch. The linker
2602 will fix it up if it still doesn't fit after relaxation. */
2603 case 2:
2604 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2605
2606 /* These are for SH2e, in which we have to account for the
2607 extra nop because of the hardware bug in annulled branches. */
2608 case 8:
2609 if (! TARGET_RELAX)
2610 {
2611 int label = lf++;
2612
2613 gcc_assert (!final_sequence
2614 || !(INSN_ANNULLED_BRANCH_P
2615 (XVECEXP (final_sequence, 0, 0))));
2616 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2617 logic ? "f" : "t",
2618 ASSEMBLER_DIALECT ? "/" : ".", label);
2619 fprintf (asm_out_file, "\tnop\n");
2620 output_asm_insn ("bra\t%l0", operands);
2621 fprintf (asm_out_file, "\tnop\n");
2622 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2623
2624 return "";
2625 }
2626 /* When relaxing, fall through. */
2627 case 4:
2628 {
2629 char buffer[10];
2630
2631 sprintf (buffer, "b%s%ss\t%%l0",
2632 logic ? "t" : "f",
2633 ASSEMBLER_DIALECT ? "/" : ".");
2634 output_asm_insn (buffer, &operands[0]);
2635 return "nop";
2636 }
2637
2638 default:
2639 /* There should be no longer branches now - that would
2640 indicate that something has destroyed the branches set
2641 up in machine_dependent_reorg. */
2642 gcc_unreachable ();
2643 }
2644 }
2645
2646 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2647 fill in operands 9 as a label to the successor insn.
2648 We try to use jump threading where possible.
2649 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2650 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2651 follow jmp and bt, if the address is in range. */
2652 const char *
2653 output_branchy_insn (enum rtx_code code, const char *templ,
2654 rtx insn, rtx *operands)
2655 {
2656 rtx next_insn = NEXT_INSN (insn);
2657
2658 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2659 {
2660 rtx src = SET_SRC (PATTERN (next_insn));
2661 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2662 {
2663 /* Following branch not taken */
2664 operands[9] = gen_label_rtx ();
2665 emit_label_after (operands[9], next_insn);
2666 INSN_ADDRESSES_NEW (operands[9],
2667 INSN_ADDRESSES (INSN_UID (next_insn))
2668 + get_attr_length (next_insn));
2669 return templ;
2670 }
2671 else
2672 {
2673 int offset = (branch_dest (next_insn)
2674 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2675 if (offset >= -252 && offset <= 258)
2676 {
2677 if (GET_CODE (src) == IF_THEN_ELSE)
2678 /* branch_true */
2679 src = XEXP (src, 1);
2680 operands[9] = src;
2681 return templ;
2682 }
2683 }
2684 }
2685 operands[9] = gen_label_rtx ();
2686 emit_label_after (operands[9], insn);
2687 INSN_ADDRESSES_NEW (operands[9],
2688 INSN_ADDRESSES (INSN_UID (insn))
2689 + get_attr_length (insn));
2690 return templ;
2691 }
2692
2693 const char *
2694 output_ieee_ccmpeq (rtx insn, rtx *operands)
2695 {
2696 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2697 insn, operands);
2698 }
2699 \f
2700 /* Output the start of the assembler file. */
2701
2702 static void
2703 sh_file_start (void)
2704 {
2705 default_file_start ();
2706
2707 if (TARGET_ELF)
2708 /* We need to show the text section with the proper
2709 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2710 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2711 will complain. We can teach GAS specifically about the
2712 default attributes for our choice of text section, but
2713 then we would have to change GAS again if/when we change
2714 the text section name. */
2715 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2716 else
2717 /* Switch to the data section so that the coffsem symbol
2718 isn't in the text section. */
2719 switch_to_section (data_section);
2720
2721 if (TARGET_LITTLE_ENDIAN)
2722 fputs ("\t.little\n", asm_out_file);
2723
2724 if (!TARGET_ELF)
2725 {
2726 if (TARGET_SHCOMPACT)
2727 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2728 else if (TARGET_SHMEDIA)
2729 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2730 TARGET_SHMEDIA64 ? 64 : 32);
2731 }
2732 }
2733 \f
2734 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2735
2736 static bool
2737 unspec_caller_rtx_p (rtx pat)
2738 {
2739 rtx base, offset;
2740 int i;
2741
2742 split_const (pat, &base, &offset);
2743 if (GET_CODE (base) == UNSPEC)
2744 {
2745 if (XINT (base, 1) == UNSPEC_CALLER)
2746 return true;
2747 for (i = 0; i < XVECLEN (base, 0); i++)
2748 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2749 return true;
2750 }
2751 return false;
2752 }
2753
2754 /* Indicate that INSN cannot be duplicated. This is true for insn
2755 that generates a unique label. */
2756
2757 static bool
2758 sh_cannot_copy_insn_p (rtx insn)
2759 {
2760 rtx pat;
2761
2762 if (!reload_completed || !flag_pic)
2763 return false;
2764
2765 if (!NONJUMP_INSN_P (insn))
2766 return false;
2767 if (asm_noperands (insn) >= 0)
2768 return false;
2769
2770 pat = PATTERN (insn);
2771 if (GET_CODE (pat) != SET)
2772 return false;
2773 pat = SET_SRC (pat);
2774
2775 if (unspec_caller_rtx_p (pat))
2776 return true;
2777
2778 return false;
2779 }
2780 \f
2781 /* Actual number of instructions used to make a shift by N. */
2782 static const char ashiftrt_insns[] =
2783 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2784
2785 /* Left shift and logical right shift are the same. */
2786 static const char shift_insns[] =
2787 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2788
2789 /* Individual shift amounts needed to get the above length sequences.
2790 One bit right shifts clobber the T bit, so when possible, put one bit
2791 shifts in the middle of the sequence, so the ends are eligible for
2792 branch delay slots. */
2793 static const short shift_amounts[32][5] = {
2794 {0}, {1}, {2}, {2, 1},
2795 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2796 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2797 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2798 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2799 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2800 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2801 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2802
2803 /* Likewise, but for shift amounts < 16, up to three highmost bits
2804 might be clobbered. This is typically used when combined with some
2805 kind of sign or zero extension. */
2806
2807 static const char ext_shift_insns[] =
2808 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2809
2810 static const short ext_shift_amounts[32][4] = {
2811 {0}, {1}, {2}, {2, 1},
2812 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2813 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2814 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2815 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2816 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2817 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2818 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2819
2820 /* Assuming we have a value that has been sign-extended by at least one bit,
2821 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2822 to shift it by N without data loss, and quicker than by other means? */
2823 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2824
2825 /* This is used in length attributes in sh.md to help compute the length
2826 of arbitrary constant shift instructions. */
2827
2828 int
2829 shift_insns_rtx (rtx insn)
2830 {
2831 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2832 int shift_count = INTVAL (XEXP (set_src, 1)) & 31;
2833 enum rtx_code shift_code = GET_CODE (set_src);
2834
2835 switch (shift_code)
2836 {
2837 case ASHIFTRT:
2838 return ashiftrt_insns[shift_count];
2839 case LSHIFTRT:
2840 case ASHIFT:
2841 return shift_insns[shift_count];
2842 default:
2843 gcc_unreachable ();
2844 }
2845 }
2846
2847 /* Return the cost of a shift. */
2848
2849 static inline int
2850 shiftcosts (rtx x)
2851 {
2852 int value;
2853
2854 /* There is no pattern for constant first operand. */
2855 if (CONST_INT_P (XEXP (x, 0)))
2856 return MAX_COST;
2857
2858 if (TARGET_SHMEDIA)
2859 return COSTS_N_INSNS (1);
2860
2861 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2862 {
2863 if (GET_MODE (x) == DImode
2864 && CONST_INT_P (XEXP (x, 1))
2865 && INTVAL (XEXP (x, 1)) == 1)
2866 return COSTS_N_INSNS (2);
2867
2868 /* Everything else is invalid, because there is no pattern for it. */
2869 return MAX_COST;
2870 }
2871 /* If shift by a non constant, then this will be expensive. */
2872 if (!CONST_INT_P (XEXP (x, 1)))
2873 return COSTS_N_INSNS (SH_DYNAMIC_SHIFT_COST);
2874
2875 /* Otherwise, return the true cost in instructions. Cope with out of range
2876 shift counts more or less arbitrarily. */
2877 value = INTVAL (XEXP (x, 1)) & 31;
2878
2879 if (GET_CODE (x) == ASHIFTRT)
2880 {
2881 int cost = ashiftrt_insns[value];
2882 /* If SH3, then we put the constant in a reg and use shad. */
2883 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2884 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2885 return COSTS_N_INSNS (cost);
2886 }
2887 else
2888 return COSTS_N_INSNS (shift_insns[value]);
2889 }
2890
2891 /* Return the cost of an AND/XOR/IOR operation. */
2892
2893 static inline int
2894 and_xor_ior_costs (rtx x, int code)
2895 {
2896 int i;
2897
2898 /* A logical operation with two registers is a single cycle
2899 instruction. */
2900 if (!CONST_INT_P (XEXP (x, 1)))
2901 return 1;
2902
2903 i = INTVAL (XEXP (x, 1));
2904
2905 if (TARGET_SHMEDIA)
2906 {
2907 if (satisfies_constraint_I10 (XEXP (x, 1))
2908 || satisfies_constraint_J16 (XEXP (x, 1)))
2909 return 1;
2910 else
2911 return 1 + rtx_cost (XEXP (x, 1), AND, 1, !optimize_size);
2912 }
2913
2914 /* These constants are single cycle extu.[bw] instructions. */
2915 if ((i == 0xff || i == 0xffff) && code == AND)
2916 return 1;
2917 /* Constants that can be used in an instruction as an immediate are
2918 a single cycle, but this requires r0, so make it a little more
2919 expensive. */
2920 if (CONST_OK_FOR_K08 (i))
2921 return 2;
2922 /* Constants that can be loaded with a mov immediate need one more cycle.
2923 This case is probably unnecessary. */
2924 if (CONST_OK_FOR_I08 (i))
2925 return 2;
2926 /* Any other constant requires an additional 2 cycle pc-relative load.
2927 This case is probably unnecessary. */
2928 return 3;
2929 }
2930
2931 /* Return the cost of an addition or a subtraction. */
2932
2933 static inline int
2934 addsubcosts (rtx x)
2935 {
2936 /* Adding a register is a single cycle insn. */
2937 if (REG_P (XEXP (x, 1))
2938 || GET_CODE (XEXP (x, 1)) == SUBREG)
2939 return 1;
2940
2941 /* Likewise for small constants. */
2942 if (CONST_INT_P (XEXP (x, 1))
2943 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2944 return 1;
2945
2946 if (TARGET_SHMEDIA)
2947 switch (GET_CODE (XEXP (x, 1)))
2948 {
2949 case CONST:
2950 case LABEL_REF:
2951 case SYMBOL_REF:
2952 return TARGET_SHMEDIA64 ? 5 : 3;
2953
2954 case CONST_INT:
2955 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2956 return 2;
2957 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2958 return 3;
2959 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2960 return 4;
2961
2962 /* Fall through. */
2963 default:
2964 return 5;
2965 }
2966
2967 /* Any other constant requires a 2 cycle pc-relative load plus an
2968 addition. */
2969 return 3;
2970 }
2971
2972 /* Return the cost of a multiply. */
2973 static inline int
2974 multcosts (rtx x ATTRIBUTE_UNUSED)
2975 {
2976 if (sh_multcost >= 0)
2977 return sh_multcost;
2978 if (TARGET_SHMEDIA)
2979 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2980 accept constants. Ideally, we would use a cost of one or two and
2981 add the cost of the operand, but disregard the latter when inside loops
2982 and loop invariant code motion is still to follow.
2983 Using a multiply first and splitting it later if it's a loss
2984 doesn't work because of different sign / zero extension semantics
2985 of multiplies vs. shifts. */
2986 return optimize_size ? 2 : 3;
2987
2988 if (TARGET_SH2)
2989 {
2990 /* We have a mul insn, so we can never take more than the mul and the
2991 read of the mac reg, but count more because of the latency and extra
2992 reg usage. */
2993 if (optimize_size)
2994 return 2;
2995 return 3;
2996 }
2997
2998 /* If we're aiming at small code, then just count the number of
2999 insns in a multiply call sequence. */
3000 if (optimize_size)
3001 return 5;
3002
3003 /* Otherwise count all the insns in the routine we'd be calling too. */
3004 return 20;
3005 }
3006
3007 /* Compute a (partial) cost for rtx X. Return true if the complete
3008 cost has been computed, and false if subexpressions should be
3009 scanned. In either case, *TOTAL contains the cost result. */
3010
3011 static bool
3012 sh_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
3013 int *total, bool speed ATTRIBUTE_UNUSED)
3014 {
3015 switch (code)
3016 {
3017 /* The lower-subreg pass decides whether to split multi-word regs
3018 into individual regs by looking at the cost for a SET of certain
3019 modes with the following patterns:
3020 (set (reg) (reg))
3021 (set (reg) (const_int 0))
3022 On machines that support vector-move operations a multi-word move
3023 is the same cost as individual reg move. On SH there is no
3024 vector-move, so we have to provide the correct cost in the number
3025 of move insns to load/store the reg of the mode in question. */
3026 case SET:
3027 if (register_operand (SET_DEST (x), VOIDmode)
3028 && (register_operand (SET_SRC (x), VOIDmode)
3029 || satisfies_constraint_Z (SET_SRC (x))))
3030 {
3031 const enum machine_mode mode = GET_MODE (SET_DEST (x));
3032 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)
3033 / mov_insn_size (mode, TARGET_SH2A));
3034 return true;
3035 }
3036 return false;
3037
3038 case CONST_INT:
3039 if (TARGET_SHMEDIA)
3040 {
3041 if (INTVAL (x) == 0)
3042 *total = 0;
3043 else if (outer_code == AND && and_operand ((x), DImode))
3044 *total = 0;
3045 else if ((outer_code == IOR || outer_code == XOR
3046 || outer_code == PLUS)
3047 && CONST_OK_FOR_I10 (INTVAL (x)))
3048 *total = 0;
3049 else if (CONST_OK_FOR_I16 (INTVAL (x)))
3050 *total = COSTS_N_INSNS (outer_code != SET);
3051 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
3052 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
3053 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
3054 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
3055 else
3056 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
3057 return true;
3058 }
3059 if (CONST_OK_FOR_I08 (INTVAL (x)))
3060 *total = 0;
3061 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
3062 && CONST_OK_FOR_K08 (INTVAL (x)))
3063 *total = 1;
3064 /* prepare_cmp_insn will force costly constants int registers before
3065 the cbranch[sd]i4 patterns can see them, so preserve potentially
3066 interesting ones not covered by I08 above. */
3067 else if (outer_code == COMPARE
3068 && ((unsigned HOST_WIDE_INT) INTVAL (x)
3069 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3070 || INTVAL (x) == 0x7fffffff
3071 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3072 *total = 1;
3073 else
3074 *total = 8;
3075 return true;
3076
3077 case EQ:
3078 /* An and with a constant compared against zero is
3079 most likely going to be a TST #imm, R0 instruction.
3080 Notice that this does not catch the zero_extract variants from
3081 the md file. */
3082 if (GET_CODE (XEXP (x, 0)) == AND
3083 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 0)
3084 {
3085 *total = 1;
3086 return true;
3087 }
3088 else
3089 return false;
3090
3091 case CONST:
3092 case LABEL_REF:
3093 case SYMBOL_REF:
3094 if (TARGET_SHMEDIA64)
3095 *total = COSTS_N_INSNS (4);
3096 else if (TARGET_SHMEDIA32)
3097 *total = COSTS_N_INSNS (2);
3098 else
3099 *total = 5;
3100 return true;
3101
3102 case CONST_DOUBLE:
3103 if (TARGET_SHMEDIA)
3104 *total = COSTS_N_INSNS (4);
3105 /* prepare_cmp_insn will force costly constants int registers before
3106 the cbranchdi4 pattern can see them, so preserve potentially
3107 interesting ones. */
3108 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
3109 *total = 1;
3110 else
3111 *total = 10;
3112 return true;
3113 case CONST_VECTOR:
3114 if (x == CONST0_RTX (GET_MODE (x)))
3115 *total = 0;
3116 else if (sh_1el_vec (x, VOIDmode))
3117 *total = outer_code != SET;
3118 if (sh_rep_vec (x, VOIDmode))
3119 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3120 + (outer_code != SET));
3121 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3122 return true;
3123
3124 case PLUS:
3125 case MINUS:
3126 *total = COSTS_N_INSNS (addsubcosts (x));
3127 return true;
3128
3129 case AND:
3130 case XOR:
3131 case IOR:
3132 *total = COSTS_N_INSNS (and_xor_ior_costs (x, code));
3133 return true;
3134
3135 case MULT:
3136 *total = COSTS_N_INSNS (multcosts (x));
3137 return true;
3138
3139 case ASHIFT:
3140 case ASHIFTRT:
3141 case LSHIFTRT:
3142 *total = shiftcosts (x);
3143 return true;
3144
3145 case DIV:
3146 case UDIV:
3147 case MOD:
3148 case UMOD:
3149 *total = COSTS_N_INSNS (20);
3150 return true;
3151
3152 case PARALLEL:
3153 if (sh_1el_vec (x, VOIDmode))
3154 *total = outer_code != SET;
3155 if (sh_rep_vec (x, VOIDmode))
3156 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3157 + (outer_code != SET));
3158 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3159 return true;
3160
3161 case FLOAT:
3162 case FIX:
3163 *total = 100;
3164 return true;
3165
3166 default:
3167 return false;
3168 }
3169 }
3170
3171 /* Determine the size of the fundamental move insn that will be used
3172 for the specified mode. */
3173
3174 static inline int
3175 mov_insn_size (enum machine_mode mode, bool consider_sh2a)
3176 {
3177 const int mode_sz = GET_MODE_SIZE (mode);
3178
3179 if ((consider_sh2a && TARGET_SH2A_DOUBLE && mode == DFmode)
3180 || (TARGET_FMOVD && mode == DFmode))
3181 return mode_sz;
3182 else
3183 {
3184 /* The max. available mode for actual move insns is SImode.
3185 Larger accesses will be split into multiple loads/stores. */
3186 const int max_mov_sz = GET_MODE_SIZE (SImode);
3187 return mode_sz >= max_mov_sz ? max_mov_sz : mode_sz;
3188 }
3189 }
3190
3191 /* Determine the maximum possible displacement for a move insn for the
3192 specified mode. */
3193
3194 static int
3195 max_mov_insn_displacement (enum machine_mode mode, bool consider_sh2a)
3196 {
3197 /* The 4 byte displacement move insns are the same as the 2 byte
3198 versions but take a 12 bit displacement. All we need to do is to
3199 scale the max. displacement value accordingly. */
3200 const int disp_scale = consider_sh2a ? (4095 / 15) : 1;
3201
3202 /* SH2A supports FPU move insns with 12 bit displacements.
3203 Other variants to do not support any kind of displacements for
3204 FPU move insns. */
3205 if (! consider_sh2a && TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT)
3206 return 0;
3207 else
3208 {
3209 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3210 const int mode_sz = GET_MODE_SIZE (mode);
3211 int r = 15 * mov_insn_sz * disp_scale;
3212
3213 /* If the mov insn will be split into multiple loads/stores, the
3214 maximum possible displacement is a bit smaller. */
3215 if (mode_sz > mov_insn_sz)
3216 r -= mode_sz - mov_insn_sz;
3217 return r;
3218 }
3219 }
3220
3221 /* Determine the alignment mask for a move insn of the
3222 specified mode. */
3223
3224 static inline int
3225 mov_insn_alignment_mask (enum machine_mode mode, bool consider_sh2a)
3226 {
3227 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3228 return mov_insn_sz > 0 ? (mov_insn_sz - 1) : 0;
3229 }
3230
3231 /* Return the displacement value of a displacement address. */
3232
3233 static inline HOST_WIDE_INT
3234 disp_addr_displacement (rtx x)
3235 {
3236 gcc_assert (satisfies_constraint_Sdd (x));
3237 return INTVAL (XEXP (XEXP (x, 0), 1));
3238 }
3239
3240 /* Compute the cost of an address. */
3241
3242 static int
3243 sh_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
3244 {
3245 /* 'reg + disp' addressing. */
3246 if (satisfies_constraint_Sdd (x))
3247 {
3248 const HOST_WIDE_INT offset = disp_addr_displacement (x);
3249 const enum machine_mode mode = GET_MODE (x);
3250
3251 /* The displacement would fit into a 2 byte move insn. */
3252 if (offset > 0 && offset <= max_mov_insn_displacement (mode, false))
3253 return 0;
3254
3255 /* The displacement would fit into a 4 byte move insn (SH2A). */
3256 if (TARGET_SH2A
3257 && offset > 0 && offset <= max_mov_insn_displacement (mode, true))
3258 return 1;
3259
3260 /* The displacement is probably out of range and will require extra
3261 calculations. */
3262 return 2;
3263 }
3264
3265 /* 'reg + reg' addressing. Account a slightly higher cost because of
3266 increased pressure on R0. */
3267 if (GET_CODE (x) == PLUS && ! CONSTANT_P (XEXP (x, 1))
3268 && ! TARGET_SHMEDIA)
3269 return 1;
3270
3271 return 0;
3272 }
3273
3274 /* Code to expand a shift. */
3275
3276 void
3277 gen_ashift (int type, int n, rtx reg)
3278 {
3279 /* Negative values here come from the shift_amounts array. */
3280 if (n < 0)
3281 {
3282 if (type == ASHIFT)
3283 type = LSHIFTRT;
3284 else
3285 type = ASHIFT;
3286 n = -n;
3287 }
3288
3289 switch (type)
3290 {
3291 case ASHIFTRT:
3292 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
3293 break;
3294 case LSHIFTRT:
3295 if (n == 1)
3296 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
3297 else
3298 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
3299 break;
3300 case ASHIFT:
3301 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
3302 break;
3303 }
3304 }
3305
3306 /* Same for HImode */
3307
3308 void
3309 gen_ashift_hi (int type, int n, rtx reg)
3310 {
3311 /* Negative values here come from the shift_amounts array. */
3312 if (n < 0)
3313 {
3314 if (type == ASHIFT)
3315 type = LSHIFTRT;
3316 else
3317 type = ASHIFT;
3318 n = -n;
3319 }
3320
3321 switch (type)
3322 {
3323 case ASHIFTRT:
3324 case LSHIFTRT:
3325 /* We don't have HImode right shift operations because using the
3326 ordinary 32 bit shift instructions for that doesn't generate proper
3327 zero/sign extension.
3328 gen_ashift_hi is only called in contexts where we know that the
3329 sign extension works out correctly. */
3330 {
3331 int offset = 0;
3332 if (GET_CODE (reg) == SUBREG)
3333 {
3334 offset = SUBREG_BYTE (reg);
3335 reg = SUBREG_REG (reg);
3336 }
3337 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3338 break;
3339 }
3340 case ASHIFT:
3341 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3342 break;
3343 }
3344 }
3345
3346 /* Output RTL to split a constant shift into its component SH constant
3347 shift instructions. */
3348
3349 void
3350 gen_shifty_op (int code, rtx *operands)
3351 {
3352 int value = INTVAL (operands[2]);
3353 int max, i;
3354
3355 /* Truncate the shift count in case it is out of bounds. */
3356 value = value & 31;
3357
3358 if (value == 31)
3359 {
3360 if (code == LSHIFTRT)
3361 {
3362 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3363 emit_insn (gen_movt (operands[0]));
3364 return;
3365 }
3366 else if (code == ASHIFT)
3367 {
3368 /* There is a two instruction sequence for 31 bit left shifts,
3369 but it requires r0. */
3370 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3371 {
3372 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3373 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3374 return;
3375 }
3376 }
3377 }
3378 else if (value == 0)
3379 {
3380 /* This can happen even when optimizing, if there were subregs before
3381 reload. Don't output a nop here, as this is never optimized away;
3382 use a no-op move instead. */
3383 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
3384 return;
3385 }
3386
3387 max = shift_insns[value];
3388 for (i = 0; i < max; i++)
3389 gen_ashift (code, shift_amounts[value][i], operands[0]);
3390 }
3391
3392 /* Same as above, but optimized for values where the topmost bits don't
3393 matter. */
3394
3395 void
3396 gen_shifty_hi_op (int code, rtx *operands)
3397 {
3398 int value = INTVAL (operands[2]);
3399 int max, i;
3400 void (*gen_fun) (int, int, rtx);
3401
3402 /* This operation is used by and_shl for SImode values with a few
3403 high bits known to be cleared. */
3404 value &= 31;
3405 if (value == 0)
3406 {
3407 emit_insn (gen_nop ());
3408 return;
3409 }
3410
3411 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3412 if (code == ASHIFT)
3413 {
3414 max = ext_shift_insns[value];
3415 for (i = 0; i < max; i++)
3416 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3417 }
3418 else
3419 /* When shifting right, emit the shifts in reverse order, so that
3420 solitary negative values come first. */
3421 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
3422 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3423 }
3424
3425 /* Output RTL for an arithmetic right shift. */
3426
3427 /* ??? Rewrite to use super-optimizer sequences. */
3428
3429 bool
3430 expand_ashiftrt (rtx *operands)
3431 {
3432 rtx wrk;
3433 char func[18];
3434 int value;
3435
3436 if (TARGET_SH3 || TARGET_SH2A)
3437 {
3438 if (!CONST_INT_P (operands[2]))
3439 {
3440 rtx count = copy_to_mode_reg (SImode, operands[2]);
3441 emit_insn (gen_negsi2 (count, count));
3442 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3443 return true;
3444 }
3445 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3446 > 1 + SH_DYNAMIC_SHIFT_COST)
3447 {
3448 rtx count
3449 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3450 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3451 return true;
3452 }
3453 }
3454 if (!CONST_INT_P (operands[2]))
3455 return false;
3456
3457 value = INTVAL (operands[2]) & 31;
3458
3459 if (value == 31)
3460 {
3461 /* If we are called from abs expansion, arrange things so that we
3462 we can use a single MT instruction that doesn't clobber the source,
3463 if LICM can hoist out the load of the constant zero. */
3464 if (currently_expanding_to_rtl)
3465 {
3466 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3467 operands[1]));
3468 emit_insn (gen_mov_neg_si_t (operands[0]));
3469 return true;
3470 }
3471 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3472 return true;
3473 }
3474 else if (value >= 16 && value <= 19)
3475 {
3476 wrk = gen_reg_rtx (SImode);
3477 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3478 value -= 16;
3479 while (value--)
3480 gen_ashift (ASHIFTRT, 1, wrk);
3481 emit_move_insn (operands[0], wrk);
3482 return true;
3483 }
3484 /* Expand a short sequence inline, longer call a magic routine. */
3485 else if (value <= 5)
3486 {
3487 wrk = gen_reg_rtx (SImode);
3488 emit_move_insn (wrk, operands[1]);
3489 while (value--)
3490 gen_ashift (ASHIFTRT, 1, wrk);
3491 emit_move_insn (operands[0], wrk);
3492 return true;
3493 }
3494
3495 wrk = gen_reg_rtx (Pmode);
3496
3497 /* Load the value into an arg reg and call a helper. */
3498 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3499 sprintf (func, "__ashiftrt_r4_%d", value);
3500 function_symbol (wrk, func, SFUNC_STATIC);
3501 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
3502 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3503 return true;
3504 }
3505
3506 bool
3507 sh_dynamicalize_shift_p (rtx count)
3508 {
3509 return shift_insns[INTVAL (count) & 31] > 1 + SH_DYNAMIC_SHIFT_COST;
3510 }
3511
3512 /* Try to find a good way to implement the combiner pattern
3513 [(set (match_operand:SI 0 "register_operand" "r")
3514 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3515 (match_operand:SI 2 "const_int_operand" "n"))
3516 (match_operand:SI 3 "const_int_operand" "n"))) .
3517 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3518 return 0 for simple right / left or left/right shift combination.
3519 return 1 for a combination of shifts with zero_extend.
3520 return 2 for a combination of shifts with an AND that needs r0.
3521 return 3 for a combination of shifts with an AND that needs an extra
3522 scratch register, when the three highmost bits of the AND mask are clear.
3523 return 4 for a combination of shifts with an AND that needs an extra
3524 scratch register, when any of the three highmost bits of the AND mask
3525 is set.
3526 If ATTRP is set, store an initial right shift width in ATTRP[0],
3527 and the instruction length in ATTRP[1] . These values are not valid
3528 when returning 0.
3529 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3530 shift_amounts for the last shift value that is to be used before the
3531 sign extend. */
3532 int
3533 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3534 {
3535 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3536 int left = INTVAL (left_rtx), right;
3537 int best = 0;
3538 int cost, best_cost = 10000;
3539 int best_right = 0, best_len = 0;
3540 int i;
3541 int can_ext;
3542
3543 if (left < 0 || left > 31)
3544 return 0;
3545 if (CONST_INT_P (mask_rtx))
3546 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3547 else
3548 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3549 /* Can this be expressed as a right shift / left shift pair? */
3550 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3551 right = exact_log2 (lsb);
3552 mask2 = ~(mask + lsb - 1);
3553 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3554 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3555 if (! mask2)
3556 best_cost = shift_insns[right] + shift_insns[right + left];
3557 /* mask has no trailing zeroes <==> ! right */
3558 else if (! right && mask2 == ~(lsb2 - 1))
3559 {
3560 int late_right = exact_log2 (lsb2);
3561 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
3562 }
3563 /* Try to use zero extend. */
3564 if (mask2 == ~(lsb2 - 1))
3565 {
3566 int width, first;
3567
3568 for (width = 8; width <= 16; width += 8)
3569 {
3570 /* Can we zero-extend right away? */
3571 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3572 {
3573 cost
3574 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
3575 if (cost < best_cost)
3576 {
3577 best = 1;
3578 best_cost = cost;
3579 best_right = right;
3580 best_len = cost;
3581 if (attrp)
3582 attrp[2] = -1;
3583 }
3584 continue;
3585 }
3586 /* ??? Could try to put zero extend into initial right shift,
3587 or even shift a bit left before the right shift. */
3588 /* Determine value of first part of left shift, to get to the
3589 zero extend cut-off point. */
3590 first = width - exact_log2 (lsb2) + right;
3591 if (first >= 0 && right + left - first >= 0)
3592 {
3593 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
3594 + ext_shift_insns[right + left - first];
3595 if (cost < best_cost)
3596 {
3597 best = 1;
3598 best_cost = cost;
3599 best_right = right;
3600 best_len = cost;
3601 if (attrp)
3602 attrp[2] = first;
3603 }
3604 }
3605 }
3606 }
3607 /* Try to use r0 AND pattern */
3608 for (i = 0; i <= 2; i++)
3609 {
3610 if (i > right)
3611 break;
3612 if (! CONST_OK_FOR_K08 (mask >> i))
3613 continue;
3614 cost = (i != 0) + 2 + ext_shift_insns[left + i];
3615 if (cost < best_cost)
3616 {
3617 best = 2;
3618 best_cost = cost;
3619 best_right = i;
3620 best_len = cost - 1;
3621 }
3622 }
3623 /* Try to use a scratch register to hold the AND operand. */
3624 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
3625 for (i = 0; i <= 2; i++)
3626 {
3627 if (i > right)
3628 break;
3629 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
3630 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
3631 if (cost < best_cost)
3632 {
3633 best = 4 - can_ext;
3634 best_cost = cost;
3635 best_right = i;
3636 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
3637 }
3638 }
3639
3640 if (attrp)
3641 {
3642 attrp[0] = best_right;
3643 attrp[1] = best_len;
3644 }
3645 return best;
3646 }
3647
3648 /* This is used in length attributes of the unnamed instructions
3649 corresponding to shl_and_kind return values of 1 and 2. */
3650 int
3651 shl_and_length (rtx insn)
3652 {
3653 rtx set_src, left_rtx, mask_rtx;
3654 int attributes[3];
3655
3656 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3657 left_rtx = XEXP (XEXP (set_src, 0), 1);
3658 mask_rtx = XEXP (set_src, 1);
3659 shl_and_kind (left_rtx, mask_rtx, attributes);
3660 return attributes[1];
3661 }
3662
3663 /* This is used in length attribute of the and_shl_scratch instruction. */
3664
3665 int
3666 shl_and_scr_length (rtx insn)
3667 {
3668 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3669 int len = shift_insns[INTVAL (XEXP (set_src, 1)) & 31];
3670 rtx op = XEXP (set_src, 0);
3671 len += shift_insns[INTVAL (XEXP (op, 1)) & 31] + 1;
3672 op = XEXP (XEXP (op, 0), 0);
3673 return len + shift_insns[INTVAL (XEXP (op, 1)) & 31];
3674 }
3675
3676 /* Generate rtl for instructions for which shl_and_kind advised a particular
3677 method of generating them, i.e. returned zero. */
3678
3679 bool
3680 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
3681 {
3682 int attributes[3];
3683 unsigned HOST_WIDE_INT mask;
3684 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
3685 int right, total_shift;
3686 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
3687
3688 right = attributes[0];
3689 total_shift = INTVAL (left_rtx) + right;
3690 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
3691 switch (kind)
3692 {
3693 default:
3694 return true;
3695 case 1:
3696 {
3697 int first = attributes[2];
3698 rtx operands[3];
3699
3700 if (first < 0)
3701 {
3702 emit_insn ((mask << right) <= 0xff
3703 ? gen_zero_extendqisi2 (dest,
3704 gen_lowpart (QImode, source))
3705 : gen_zero_extendhisi2 (dest,
3706 gen_lowpart (HImode, source)));
3707 source = dest;
3708 }
3709 if (source != dest)
3710 emit_insn (gen_movsi (dest, source));
3711 operands[0] = dest;
3712 if (right)
3713 {
3714 operands[2] = GEN_INT (right);
3715 gen_shifty_hi_op (LSHIFTRT, operands);
3716 }
3717 if (first > 0)
3718 {
3719 operands[2] = GEN_INT (first);
3720 gen_shifty_hi_op (ASHIFT, operands);
3721 total_shift -= first;
3722 mask <<= first;
3723 }
3724 if (first >= 0)
3725 emit_insn (mask <= 0xff
3726 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
3727 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3728 if (total_shift > 0)
3729 {
3730 operands[2] = GEN_INT (total_shift);
3731 gen_shifty_hi_op (ASHIFT, operands);
3732 }
3733 break;
3734 }
3735 case 4:
3736 shift_gen_fun = gen_shifty_op;
3737 case 3:
3738 /* If the topmost bit that matters is set, set the topmost bits
3739 that don't matter. This way, we might be able to get a shorter
3740 signed constant. */
3741 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
3742 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
3743 case 2:
3744 /* Don't expand fine-grained when combining, because that will
3745 make the pattern fail. */
3746 if (currently_expanding_to_rtl
3747 || reload_in_progress || reload_completed)
3748 {
3749 rtx operands[3];
3750
3751 /* Cases 3 and 4 should be handled by this split
3752 only while combining */
3753 gcc_assert (kind <= 2);
3754 if (right)
3755 {
3756 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
3757 source = dest;
3758 }
3759 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3760 if (total_shift)
3761 {
3762 operands[0] = dest;
3763 operands[1] = dest;
3764 operands[2] = GEN_INT (total_shift);
3765 shift_gen_fun (ASHIFT, operands);
3766 }
3767 break;
3768 }
3769 else
3770 {
3771 int neg = 0;
3772 if (kind != 4 && total_shift < 16)
3773 {
3774 neg = -ext_shift_amounts[total_shift][1];
3775 if (neg > 0)
3776 neg -= ext_shift_amounts[total_shift][2];
3777 else
3778 neg = 0;
3779 }
3780 emit_insn (gen_and_shl_scratch (dest, source,
3781 GEN_INT (right),
3782 GEN_INT (mask),
3783 GEN_INT (total_shift + neg),
3784 GEN_INT (neg)));
3785 emit_insn (gen_movsi (dest, dest));
3786 break;
3787 }
3788 }
3789 return false;
3790 }
3791
3792 /* Try to find a good way to implement the combiner pattern
3793 [(set (match_operand:SI 0 "register_operand" "=r")
3794 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3795 (match_operand:SI 2 "const_int_operand" "n")
3796 (match_operand:SI 3 "const_int_operand" "n")
3797 (const_int 0)))
3798 (clobber (reg:SI T_REG))]
3799 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3800 return 0 for simple left / right shift combination.
3801 return 1 for left shift / 8 bit sign extend / left shift.
3802 return 2 for left shift / 16 bit sign extend / left shift.
3803 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3804 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3805 return 5 for left shift / 16 bit sign extend / right shift
3806 return 6 for < 8 bit sign extend / left shift.
3807 return 7 for < 8 bit sign extend / left shift / single right shift.
3808 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3809
3810 int
3811 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3812 {
3813 int left, size, insize, ext;
3814 int cost = 0, best_cost;
3815 int kind;
3816
3817 left = INTVAL (left_rtx);
3818 size = INTVAL (size_rtx);
3819 insize = size - left;
3820 gcc_assert (insize > 0);
3821 /* Default to left / right shift. */
3822 kind = 0;
3823 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3824 if (size <= 16)
3825 {
3826 /* 16 bit shift / sign extend / 16 bit shift */
3827 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3828 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3829 below, by alternative 3 or something even better. */
3830 if (cost < best_cost)
3831 {
3832 kind = 5;
3833 best_cost = cost;
3834 }
3835 }
3836 /* Try a plain sign extend between two shifts. */
3837 for (ext = 16; ext >= insize; ext -= 8)
3838 {
3839 if (ext <= size)
3840 {
3841 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3842 if (cost < best_cost)
3843 {
3844 kind = ext / (unsigned) 8;
3845 best_cost = cost;
3846 }
3847 }
3848 /* Check if we can do a sloppy shift with a final signed shift
3849 restoring the sign. */
3850 if (EXT_SHIFT_SIGNED (size - ext))
3851 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3852 /* If not, maybe it's still cheaper to do the second shift sloppy,
3853 and do a final sign extend? */
3854 else if (size <= 16)
3855 cost = ext_shift_insns[ext - insize] + 1
3856 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3857 else
3858 continue;
3859 if (cost < best_cost)
3860 {
3861 kind = ext / (unsigned) 8 + 2;
3862 best_cost = cost;
3863 }
3864 }
3865 /* Check if we can sign extend in r0 */
3866 if (insize < 8)
3867 {
3868 cost = 3 + shift_insns[left];
3869 if (cost < best_cost)
3870 {
3871 kind = 6;
3872 best_cost = cost;
3873 }
3874 /* Try the same with a final signed shift. */
3875 if (left < 31)
3876 {
3877 cost = 3 + ext_shift_insns[left + 1] + 1;
3878 if (cost < best_cost)
3879 {
3880 kind = 7;
3881 best_cost = cost;
3882 }
3883 }
3884 }
3885 if (TARGET_SH3 || TARGET_SH2A)
3886 {
3887 /* Try to use a dynamic shift. */
3888 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3889 if (cost < best_cost)
3890 {
3891 kind = 0;
3892 best_cost = cost;
3893 }
3894 }
3895 if (costp)
3896 *costp = cost;
3897 return kind;
3898 }
3899
3900 /* Function to be used in the length attribute of the instructions
3901 implementing this pattern. */
3902
3903 int
3904 shl_sext_length (rtx insn)
3905 {
3906 rtx set_src, left_rtx, size_rtx;
3907 int cost;
3908
3909 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3910 left_rtx = XEXP (XEXP (set_src, 0), 1);
3911 size_rtx = XEXP (set_src, 1);
3912 shl_sext_kind (left_rtx, size_rtx, &cost);
3913 return cost;
3914 }
3915
3916 /* Generate rtl for this pattern */
3917
3918 bool
3919 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3920 {
3921 int kind;
3922 int left, size, insize, cost;
3923 rtx operands[3];
3924
3925 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3926 left = INTVAL (left_rtx);
3927 size = INTVAL (size_rtx);
3928 insize = size - left;
3929 switch (kind)
3930 {
3931 case 1:
3932 case 2:
3933 case 3:
3934 case 4:
3935 {
3936 int ext = kind & 1 ? 8 : 16;
3937 int shift2 = size - ext;
3938
3939 /* Don't expand fine-grained when combining, because that will
3940 make the pattern fail. */
3941 if (! currently_expanding_to_rtl
3942 && ! reload_in_progress && ! reload_completed)
3943 {
3944 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3945 emit_insn (gen_movsi (dest, source));
3946 break;
3947 }
3948 if (dest != source)
3949 emit_insn (gen_movsi (dest, source));
3950 operands[0] = dest;
3951 if (ext - insize)
3952 {
3953 operands[2] = GEN_INT (ext - insize);
3954 gen_shifty_hi_op (ASHIFT, operands);
3955 }
3956 emit_insn (kind & 1
3957 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3958 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3959 if (kind <= 2)
3960 {
3961 if (shift2)
3962 {
3963 operands[2] = GEN_INT (shift2);
3964 gen_shifty_op (ASHIFT, operands);
3965 }
3966 }
3967 else
3968 {
3969 if (shift2 > 0)
3970 {
3971 if (EXT_SHIFT_SIGNED (shift2))
3972 {
3973 operands[2] = GEN_INT (shift2 + 1);
3974 gen_shifty_op (ASHIFT, operands);
3975 operands[2] = const1_rtx;
3976 gen_shifty_op (ASHIFTRT, operands);
3977 break;
3978 }
3979 operands[2] = GEN_INT (shift2);
3980 gen_shifty_hi_op (ASHIFT, operands);
3981 }
3982 else if (shift2)
3983 {
3984 operands[2] = GEN_INT (-shift2);
3985 gen_shifty_hi_op (LSHIFTRT, operands);
3986 }
3987 emit_insn (size <= 8
3988 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3989 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3990 }
3991 break;
3992 }
3993 case 5:
3994 {
3995 int i = 16 - size;
3996 if (! currently_expanding_to_rtl
3997 && ! reload_in_progress && ! reload_completed)
3998 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3999 else
4000 {
4001 operands[0] = dest;
4002 operands[2] = GEN_INT (16 - insize);
4003 gen_shifty_hi_op (ASHIFT, operands);
4004 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4005 }
4006 /* Don't use gen_ashrsi3 because it generates new pseudos. */
4007 while (--i >= 0)
4008 gen_ashift (ASHIFTRT, 1, dest);
4009 break;
4010 }
4011 case 6:
4012 case 7:
4013 /* Don't expand fine-grained when combining, because that will
4014 make the pattern fail. */
4015 if (! currently_expanding_to_rtl
4016 && ! reload_in_progress && ! reload_completed)
4017 {
4018 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4019 emit_insn (gen_movsi (dest, source));
4020 break;
4021 }
4022 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
4023 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
4024 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
4025 operands[0] = dest;
4026 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
4027 gen_shifty_op (ASHIFT, operands);
4028 if (kind == 7)
4029 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
4030 break;
4031 default:
4032 return true;
4033 }
4034 return false;
4035 }
4036
4037 /* Prefix a symbol_ref name with "datalabel". */
4038
4039 rtx
4040 gen_datalabel_ref (rtx sym)
4041 {
4042 const char *str;
4043
4044 if (GET_CODE (sym) == LABEL_REF)
4045 return gen_rtx_CONST (GET_MODE (sym),
4046 gen_rtx_UNSPEC (GET_MODE (sym),
4047 gen_rtvec (1, sym),
4048 UNSPEC_DATALABEL));
4049
4050 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
4051
4052 str = XSTR (sym, 0);
4053 /* Share all SYMBOL_REF strings with the same value - that is important
4054 for cse. */
4055 str = IDENTIFIER_POINTER (get_identifier (str));
4056 XSTR (sym, 0) = str;
4057
4058 return sym;
4059 }
4060
4061 \f
4062 static alloc_pool label_ref_list_pool;
4063
4064 typedef struct label_ref_list_d
4065 {
4066 rtx label;
4067 struct label_ref_list_d *next;
4068 } *label_ref_list_t;
4069
4070 /* The SH cannot load a large constant into a register, constants have to
4071 come from a pc relative load. The reference of a pc relative load
4072 instruction must be less than 1k in front of the instruction. This
4073 means that we often have to dump a constant inside a function, and
4074 generate code to branch around it.
4075
4076 It is important to minimize this, since the branches will slow things
4077 down and make things bigger.
4078
4079 Worst case code looks like:
4080
4081 mov.l L1,rn
4082 bra L2
4083 nop
4084 align
4085 L1: .long value
4086 L2:
4087 ..
4088
4089 mov.l L3,rn
4090 bra L4
4091 nop
4092 align
4093 L3: .long value
4094 L4:
4095 ..
4096
4097 We fix this by performing a scan before scheduling, which notices which
4098 instructions need to have their operands fetched from the constant table
4099 and builds the table.
4100
4101 The algorithm is:
4102
4103 scan, find an instruction which needs a pcrel move. Look forward, find the
4104 last barrier which is within MAX_COUNT bytes of the requirement.
4105 If there isn't one, make one. Process all the instructions between
4106 the find and the barrier.
4107
4108 In the above example, we can tell that L3 is within 1k of L1, so
4109 the first move can be shrunk from the 3 insn+constant sequence into
4110 just 1 insn, and the constant moved to L3 to make:
4111
4112 mov.l L1,rn
4113 ..
4114 mov.l L3,rn
4115 bra L4
4116 nop
4117 align
4118 L3:.long value
4119 L4:.long value
4120
4121 Then the second move becomes the target for the shortening process. */
4122
4123 typedef struct
4124 {
4125 rtx value; /* Value in table. */
4126 rtx label; /* Label of value. */
4127 label_ref_list_t wend; /* End of window. */
4128 enum machine_mode mode; /* Mode of value. */
4129
4130 /* True if this constant is accessed as part of a post-increment
4131 sequence. Note that HImode constants are never accessed in this way. */
4132 bool part_of_sequence_p;
4133 } pool_node;
4134
4135 /* The maximum number of constants that can fit into one pool, since
4136 constants in the range 0..510 are at least 2 bytes long, and in the
4137 range from there to 1018 at least 4 bytes. */
4138
4139 #define MAX_POOL_SIZE 372
4140 static pool_node pool_vector[MAX_POOL_SIZE];
4141 static int pool_size;
4142 static rtx pool_window_label;
4143 static int pool_window_last;
4144
4145 static int max_labelno_before_reorg;
4146
4147 /* ??? If we need a constant in HImode which is the truncated value of a
4148 constant we need in SImode, we could combine the two entries thus saving
4149 two bytes. Is this common enough to be worth the effort of implementing
4150 it? */
4151
4152 /* ??? This stuff should be done at the same time that we shorten branches.
4153 As it is now, we must assume that all branches are the maximum size, and
4154 this causes us to almost always output constant pools sooner than
4155 necessary. */
4156
4157 /* Add a constant to the pool and return its label. */
4158
4159 static rtx
4160 add_constant (rtx x, enum machine_mode mode, rtx last_value)
4161 {
4162 int i;
4163 rtx lab, new_rtx;
4164 label_ref_list_t ref, newref;
4165
4166 /* First see if we've already got it. */
4167 for (i = 0; i < pool_size; i++)
4168 {
4169 if (x->code == pool_vector[i].value->code
4170 && mode == pool_vector[i].mode)
4171 {
4172 if (x->code == CODE_LABEL)
4173 {
4174 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4175 continue;
4176 }
4177 if (rtx_equal_p (x, pool_vector[i].value))
4178 {
4179 lab = new_rtx = 0;
4180 if (! last_value
4181 || ! i
4182 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4183 {
4184 new_rtx = gen_label_rtx ();
4185 LABEL_REFS (new_rtx) = pool_vector[i].label;
4186 pool_vector[i].label = lab = new_rtx;
4187 }
4188 if (lab && pool_window_label)
4189 {
4190 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4191 newref->label = pool_window_label;
4192 ref = pool_vector[pool_window_last].wend;
4193 newref->next = ref;
4194 pool_vector[pool_window_last].wend = newref;
4195 }
4196 if (new_rtx)
4197 pool_window_label = new_rtx;
4198 pool_window_last = i;
4199 return lab;
4200 }
4201 }
4202 }
4203
4204 /* Need a new one. */
4205 pool_vector[pool_size].value = x;
4206 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4207 {
4208 lab = 0;
4209 pool_vector[pool_size - 1].part_of_sequence_p = true;
4210 }
4211 else
4212 lab = gen_label_rtx ();
4213 pool_vector[pool_size].mode = mode;
4214 pool_vector[pool_size].label = lab;
4215 pool_vector[pool_size].wend = NULL;
4216 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4217 if (lab && pool_window_label)
4218 {
4219 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4220 newref->label = pool_window_label;
4221 ref = pool_vector[pool_window_last].wend;
4222 newref->next = ref;
4223 pool_vector[pool_window_last].wend = newref;
4224 }
4225 if (lab)
4226 pool_window_label = lab;
4227 pool_window_last = pool_size;
4228 pool_size++;
4229 return lab;
4230 }
4231
4232 /* Output the literal table. START, if nonzero, is the first instruction
4233 this table is needed for, and also indicates that there is at least one
4234 casesi_worker_2 instruction; We have to emit the operand3 labels from
4235 these insns at a 4-byte aligned position. BARRIER is the barrier
4236 after which we are to place the table. */
4237
4238 static void
4239 dump_table (rtx start, rtx barrier)
4240 {
4241 rtx scan = barrier;
4242 int i;
4243 bool need_align = true;
4244 rtx lab;
4245 label_ref_list_t ref;
4246 bool have_df = false;
4247
4248 /* Do two passes, first time dump out the HI sized constants. */
4249
4250 for (i = 0; i < pool_size; i++)
4251 {
4252 pool_node *p = &pool_vector[i];
4253
4254 if (p->mode == HImode)
4255 {
4256 if (need_align)
4257 {
4258 scan = emit_insn_after (gen_align_2 (), scan);
4259 need_align = false;
4260 }
4261 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4262 scan = emit_label_after (lab, scan);
4263 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4264 scan);
4265 for (ref = p->wend; ref; ref = ref->next)
4266 {
4267 lab = ref->label;
4268 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4269 }
4270 }
4271 else if (p->mode == DFmode)
4272 have_df = true;
4273 }
4274
4275 need_align = true;
4276
4277 if (start)
4278 {
4279 scan = emit_insn_after (gen_align_4 (), scan);
4280 need_align = false;
4281 for (; start != barrier; start = NEXT_INSN (start))
4282 if (NONJUMP_INSN_P (start)
4283 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4284 {
4285 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4286 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4287
4288 scan = emit_label_after (lab, scan);
4289 }
4290 }
4291 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4292 {
4293 rtx align_insn = NULL_RTX;
4294
4295 scan = emit_label_after (gen_label_rtx (), scan);
4296 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4297 need_align = false;
4298
4299 for (i = 0; i < pool_size; i++)
4300 {
4301 pool_node *p = &pool_vector[i];
4302
4303 switch (p->mode)
4304 {
4305 case HImode:
4306 break;
4307 case SImode:
4308 case SFmode:
4309 if (align_insn && !p->part_of_sequence_p)
4310 {
4311 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4312 emit_label_before (lab, align_insn);
4313 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4314 align_insn);
4315 for (ref = p->wend; ref; ref = ref->next)
4316 {
4317 lab = ref->label;
4318 emit_insn_before (gen_consttable_window_end (lab),
4319 align_insn);
4320 }
4321 delete_insn (align_insn);
4322 align_insn = NULL_RTX;
4323 continue;
4324 }
4325 else
4326 {
4327 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4328 scan = emit_label_after (lab, scan);
4329 scan = emit_insn_after (gen_consttable_4 (p->value,
4330 const0_rtx), scan);
4331 need_align = ! need_align;
4332 }
4333 break;
4334 case DFmode:
4335 if (need_align)
4336 {
4337 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4338 align_insn = scan;
4339 need_align = false;
4340 }
4341 case DImode:
4342 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4343 scan = emit_label_after (lab, scan);
4344 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4345 scan);
4346 break;
4347 default:
4348 gcc_unreachable ();
4349 }
4350
4351 if (p->mode != HImode)
4352 {
4353 for (ref = p->wend; ref; ref = ref->next)
4354 {
4355 lab = ref->label;
4356 scan = emit_insn_after (gen_consttable_window_end (lab),
4357 scan);
4358 }
4359 }
4360 }
4361
4362 pool_size = 0;
4363 }
4364
4365 for (i = 0; i < pool_size; i++)
4366 {
4367 pool_node *p = &pool_vector[i];
4368
4369 switch (p->mode)
4370 {
4371 case HImode:
4372 break;
4373 case SImode:
4374 case SFmode:
4375 if (need_align)
4376 {
4377 need_align = false;
4378 scan = emit_label_after (gen_label_rtx (), scan);
4379 scan = emit_insn_after (gen_align_4 (), scan);
4380 }
4381 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4382 scan = emit_label_after (lab, scan);
4383 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4384 scan);
4385 break;
4386 case DFmode:
4387 case DImode:
4388 if (need_align)
4389 {
4390 need_align = false;
4391 scan = emit_label_after (gen_label_rtx (), scan);
4392 scan = emit_insn_after (gen_align_4 (), scan);
4393 }
4394 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4395 scan = emit_label_after (lab, scan);
4396 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4397 scan);
4398 break;
4399 default:
4400 gcc_unreachable ();
4401 }
4402
4403 if (p->mode != HImode)
4404 {
4405 for (ref = p->wend; ref; ref = ref->next)
4406 {
4407 lab = ref->label;
4408 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4409 }
4410 }
4411 }
4412
4413 scan = emit_insn_after (gen_consttable_end (), scan);
4414 scan = emit_barrier_after (scan);
4415 pool_size = 0;
4416 pool_window_label = NULL_RTX;
4417 pool_window_last = 0;
4418 }
4419
4420 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4421
4422 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4423
4424 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4425 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4426 need to fix it if the input value is CONST_OK_FOR_I08. */
4427
4428 static bool
4429 broken_move (rtx insn)
4430 {
4431 if (NONJUMP_INSN_P (insn))
4432 {
4433 rtx pat = PATTERN (insn);
4434 if (GET_CODE (pat) == PARALLEL)
4435 pat = XVECEXP (pat, 0, 0);
4436 if (GET_CODE (pat) == SET
4437 /* We can load any 8-bit value if we don't care what the high
4438 order bits end up as. */
4439 && GET_MODE (SET_DEST (pat)) != QImode
4440 && (CONSTANT_P (SET_SRC (pat))
4441 /* Match mova_const. */
4442 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4443 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4444 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4445 && ! (TARGET_SH2E
4446 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4447 && (fp_zero_operand (SET_SRC (pat))
4448 || fp_one_operand (SET_SRC (pat)))
4449 /* In general we don't know the current setting of fpscr, so disable fldi.
4450 There is an exception if this was a register-register move
4451 before reload - and hence it was ascertained that we have
4452 single precision setting - and in a post-reload optimization
4453 we changed this to do a constant load. In that case
4454 we don't have an r0 clobber, hence we must use fldi. */
4455 && (TARGET_FMOVD
4456 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4457 == SCRATCH))
4458 && REG_P (SET_DEST (pat))
4459 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4460 && ! (TARGET_SH2A
4461 && GET_MODE (SET_DEST (pat)) == SImode
4462 && (satisfies_constraint_I20 (SET_SRC (pat))
4463 || satisfies_constraint_I28 (SET_SRC (pat))))
4464 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4465 return true;
4466 }
4467
4468 return false;
4469 }
4470
4471 static bool
4472 mova_p (rtx insn)
4473 {
4474 return (NONJUMP_INSN_P (insn)
4475 && GET_CODE (PATTERN (insn)) == SET
4476 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4477 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4478 /* Don't match mova_const. */
4479 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4480 }
4481
4482 /* Fix up a mova from a switch that went out of range. */
4483 static void
4484 fixup_mova (rtx mova)
4485 {
4486 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4487 if (! flag_pic)
4488 {
4489 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4490 INSN_CODE (mova) = -1;
4491 }
4492 else
4493 {
4494 rtx worker = mova;
4495 rtx lab = gen_label_rtx ();
4496 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4497
4498 do
4499 {
4500 worker = NEXT_INSN (worker);
4501 gcc_assert (worker
4502 && !LABEL_P (worker)
4503 && !JUMP_P (worker));
4504 } while (NOTE_P (worker)
4505 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4506 wpat = PATTERN (worker);
4507 wpat0 = XVECEXP (wpat, 0, 0);
4508 wpat1 = XVECEXP (wpat, 0, 1);
4509 wsrc = SET_SRC (wpat0);
4510 PATTERN (worker) = (gen_casesi_worker_2
4511 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4512 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4513 XEXP (wpat1, 0)));
4514 INSN_CODE (worker) = -1;
4515 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4516 base = gen_rtx_LABEL_REF (Pmode, lab);
4517 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4518 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4519 INSN_CODE (mova) = -1;
4520 }
4521 }
4522
4523 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4524 *num_mova, and check if the new mova is not nested within the first one.
4525 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4526 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4527 static int
4528 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
4529 {
4530 int n_addr = 0; /* Initialization to shut up spurious warning. */
4531 int f_target, n_target = 0; /* Likewise. */
4532
4533 if (optimize)
4534 {
4535 /* If NEW_MOVA has no address yet, it will be handled later. */
4536 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4537 return -1;
4538
4539 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4540 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4541 if (n_addr > n_target || n_addr + 1022 < n_target)
4542 {
4543 /* Change the mova into a load.
4544 broken_move will then return true for it. */
4545 fixup_mova (new_mova);
4546 return 1;
4547 }
4548 }
4549 if (!(*num_mova)++)
4550 {
4551 *first_mova = new_mova;
4552 return 2;
4553 }
4554 if (!optimize
4555 || ((f_target
4556 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4557 >= n_target))
4558 return -1;
4559
4560 (*num_mova)--;
4561 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4562 > n_target - n_addr)
4563 {
4564 fixup_mova (*first_mova);
4565 return 0;
4566 }
4567 else
4568 {
4569 fixup_mova (new_mova);
4570 return 1;
4571 }
4572 }
4573
4574 /* Find the last barrier from insn FROM which is close enough to hold the
4575 constant pool. If we can't find one, then create one near the end of
4576 the range. */
4577
4578 static rtx
4579 find_barrier (int num_mova, rtx mova, rtx from)
4580 {
4581 int count_si = 0;
4582 int count_hi = 0;
4583 int found_hi = 0;
4584 int found_si = 0;
4585 int found_di = 0;
4586 int hi_align = 2;
4587 int si_align = 2;
4588 int leading_mova = num_mova;
4589 rtx barrier_before_mova = NULL_RTX;
4590 rtx found_barrier = NULL_RTX;
4591 rtx good_barrier = NULL_RTX;
4592 int si_limit;
4593 int hi_limit;
4594 rtx orig = from;
4595 rtx last_got = NULL_RTX;
4596 rtx last_symoff = NULL_RTX;
4597
4598 /* For HImode: range is 510, add 4 because pc counts from address of
4599 second instruction after this one, subtract 2 for the jump instruction
4600 that we may need to emit before the table, subtract 2 for the instruction
4601 that fills the jump delay slot (in very rare cases, reorg will take an
4602 instruction from after the constant pool or will leave the delay slot
4603 empty). This gives 510.
4604 For SImode: range is 1020, add 4 because pc counts from address of
4605 second instruction after this one, subtract 2 in case pc is 2 byte
4606 aligned, subtract 2 for the jump instruction that we may need to emit
4607 before the table, subtract 2 for the instruction that fills the jump
4608 delay slot. This gives 1018. */
4609
4610 /* The branch will always be shortened now that the reference address for
4611 forward branches is the successor address, thus we need no longer make
4612 adjustments to the [sh]i_limit for -O0. */
4613
4614 si_limit = 1018;
4615 hi_limit = 510;
4616
4617 while (from && count_si < si_limit && count_hi < hi_limit)
4618 {
4619 int inc = get_attr_length (from);
4620 int new_align = 1;
4621
4622 /* If this is a label that existed at the time of the compute_alignments
4623 call, determine the alignment. N.B. When find_barrier recurses for
4624 an out-of-reach mova, we might see labels at the start of previously
4625 inserted constant tables. */
4626 if (LABEL_P (from)
4627 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
4628 {
4629 if (optimize)
4630 new_align = 1 << label_to_alignment (from);
4631 else if (BARRIER_P (prev_nonnote_insn (from)))
4632 new_align = 1 << barrier_align (from);
4633 else
4634 new_align = 1;
4635 inc = 0;
4636 }
4637 /* In case we are scanning a constant table because of recursion, check
4638 for explicit alignments. If the table is long, we might be forced
4639 to emit the new table in front of it; the length of the alignment
4640 might be the last straw. */
4641 else if (NONJUMP_INSN_P (from)
4642 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4643 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
4644 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
4645 /* When we find the end of a constant table, paste the new constant
4646 at the end. That is better than putting it in front because
4647 this way, we don't need extra alignment for adding a 4-byte-aligned
4648 mov(a) label to a 2/4 or 8/4 byte aligned table. */
4649 else if (NONJUMP_INSN_P (from)
4650 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4651 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
4652 return from;
4653
4654 if (BARRIER_P (from))
4655 {
4656 rtx next;
4657
4658 found_barrier = from;
4659
4660 /* If we are at the end of the function, or in front of an alignment
4661 instruction, we need not insert an extra alignment. We prefer
4662 this kind of barrier. */
4663 if (barrier_align (from) > 2)
4664 good_barrier = from;
4665
4666 /* If we are at the end of a hot/cold block, dump the constants
4667 here. */
4668 next = NEXT_INSN (from);
4669 if (next
4670 && NOTE_P (next)
4671 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
4672 break;
4673 }
4674
4675 if (broken_move (from))
4676 {
4677 rtx pat, src, dst;
4678 enum machine_mode mode;
4679
4680 pat = PATTERN (from);
4681 if (GET_CODE (pat) == PARALLEL)
4682 pat = XVECEXP (pat, 0, 0);
4683 src = SET_SRC (pat);
4684 dst = SET_DEST (pat);
4685 mode = GET_MODE (dst);
4686
4687 /* GOT pcrelat setting comes in pair of
4688 mova .L8,r0
4689 mov.l .L8,r12
4690 instructions. (plus add r0,r12).
4691 Remember if we see one without the other. */
4692 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
4693 last_got = last_got ? NULL_RTX : from;
4694 else if (PIC_ADDR_P (src))
4695 last_got = last_got ? NULL_RTX : from;
4696
4697 /* We must explicitly check the mode, because sometimes the
4698 front end will generate code to load unsigned constants into
4699 HImode targets without properly sign extending them. */
4700 if (mode == HImode
4701 || (mode == SImode && satisfies_constraint_I16 (src)
4702 && REGNO (dst) != FPUL_REG))
4703 {
4704 found_hi += 2;
4705 /* We put the short constants before the long constants, so
4706 we must count the length of short constants in the range
4707 for the long constants. */
4708 /* ??? This isn't optimal, but is easy to do. */
4709 si_limit -= 2;
4710 }
4711 else
4712 {
4713 /* We dump DF/DI constants before SF/SI ones, because
4714 the limit is the same, but the alignment requirements
4715 are higher. We may waste up to 4 additional bytes
4716 for alignment, and the DF/DI constant may have
4717 another SF/SI constant placed before it. */
4718 if (TARGET_SHCOMPACT
4719 && ! found_di
4720 && (mode == DFmode || mode == DImode))
4721 {
4722 found_di = 1;
4723 si_limit -= 8;
4724 }
4725 while (si_align > 2 && found_si + si_align - 2 > count_si)
4726 si_align >>= 1;
4727 if (found_si > count_si)
4728 count_si = found_si;
4729 found_si += GET_MODE_SIZE (mode);
4730 if (num_mova)
4731 si_limit -= GET_MODE_SIZE (mode);
4732 }
4733 }
4734
4735 if (mova_p (from))
4736 {
4737 switch (untangle_mova (&num_mova, &mova, from))
4738 {
4739 case 1:
4740 if (flag_pic)
4741 {
4742 rtx src = SET_SRC (PATTERN (from));
4743 if (GET_CODE (src) == CONST
4744 && GET_CODE (XEXP (src, 0)) == UNSPEC
4745 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
4746 last_symoff = from;
4747 }
4748 break;
4749 case 0: return find_barrier (0, 0, mova);
4750 case 2:
4751 {
4752 leading_mova = 0;
4753 barrier_before_mova
4754 = good_barrier ? good_barrier : found_barrier;
4755 }
4756 default: break;
4757 }
4758 if (found_si > count_si)
4759 count_si = found_si;
4760 }
4761 else if (JUMP_TABLE_DATA_P (from))
4762 {
4763 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
4764 || (num_mova
4765 && (prev_nonnote_insn (from)
4766 == XEXP (MOVA_LABELREF (mova), 0))))
4767 num_mova--;
4768 if (barrier_align (next_real_insn (from)) == align_jumps_log)
4769 {
4770 /* We have just passed the barrier in front of the
4771 ADDR_DIFF_VEC, which is stored in found_barrier. Since
4772 the ADDR_DIFF_VEC is accessed as data, just like our pool
4773 constants, this is a good opportunity to accommodate what
4774 we have gathered so far.
4775 If we waited any longer, we could end up at a barrier in
4776 front of code, which gives worse cache usage for separated
4777 instruction / data caches. */
4778 good_barrier = found_barrier;
4779 break;
4780 }
4781 else
4782 {
4783 rtx body = PATTERN (from);
4784 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
4785 }
4786 }
4787 /* For the SH1, we generate alignments even after jumps-around-jumps. */
4788 else if (JUMP_P (from)
4789 && ! TARGET_SH2
4790 && ! optimize_size)
4791 new_align = 4;
4792
4793 /* There is a possibility that a bf is transformed into a bf/s by the
4794 delay slot scheduler. */
4795 if (JUMP_P (from) && !JUMP_TABLE_DATA_P (from)
4796 && get_attr_type (from) == TYPE_CBRANCH
4797 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (from)))) != SEQUENCE)
4798 inc += 2;
4799
4800 if (found_si)
4801 {
4802 count_si += inc;
4803 if (new_align > si_align)
4804 {
4805 si_limit -= (count_si - 1) & (new_align - si_align);
4806 si_align = new_align;
4807 }
4808 count_si = (count_si + new_align - 1) & -new_align;
4809 }
4810 if (found_hi)
4811 {
4812 count_hi += inc;
4813 if (new_align > hi_align)
4814 {
4815 hi_limit -= (count_hi - 1) & (new_align - hi_align);
4816 hi_align = new_align;
4817 }
4818 count_hi = (count_hi + new_align - 1) & -new_align;
4819 }
4820 from = NEXT_INSN (from);
4821 }
4822
4823 if (num_mova)
4824 {
4825 if (leading_mova)
4826 {
4827 /* Try as we might, the leading mova is out of range. Change
4828 it into a load (which will become a pcload) and retry. */
4829 fixup_mova (mova);
4830 return find_barrier (0, 0, mova);
4831 }
4832 else
4833 {
4834 /* Insert the constant pool table before the mova instruction,
4835 to prevent the mova label reference from going out of range. */
4836 from = mova;
4837 good_barrier = found_barrier = barrier_before_mova;
4838 }
4839 }
4840
4841 if (found_barrier)
4842 {
4843 if (good_barrier && next_real_insn (found_barrier))
4844 found_barrier = good_barrier;
4845 }
4846 else
4847 {
4848 /* We didn't find a barrier in time to dump our stuff,
4849 so we'll make one. */
4850 rtx label = gen_label_rtx ();
4851
4852 /* Don't emit a constant table in the middle of insns for
4853 casesi_worker_2. This is a bit overkill but is enough
4854 because casesi_worker_2 wouldn't appear so frequently. */
4855 if (last_symoff)
4856 from = last_symoff;
4857
4858 /* If we exceeded the range, then we must back up over the last
4859 instruction we looked at. Otherwise, we just need to undo the
4860 NEXT_INSN at the end of the loop. */
4861 if (PREV_INSN (from) != orig
4862 && (count_hi > hi_limit || count_si > si_limit))
4863 from = PREV_INSN (PREV_INSN (from));
4864 else
4865 from = PREV_INSN (from);
4866
4867 /* Don't emit a constant table int the middle of global pointer setting,
4868 since that that would move the addressing base GOT into another table.
4869 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
4870 in the pool anyway, so just move up the whole constant pool.
4871
4872 However, avoid doing so when the last single GOT mov is the starting
4873 insn itself. Going past above the start insn would create a negative
4874 offset, causing errors. */
4875 if (last_got && last_got != orig)
4876 from = PREV_INSN (last_got);
4877
4878 /* Don't insert the constant pool table at the position which
4879 may be the landing pad. */
4880 if (flag_exceptions
4881 && CALL_P (from)
4882 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
4883 from = PREV_INSN (from);
4884
4885 /* Walk back to be just before any jump or label.
4886 Putting it before a label reduces the number of times the branch
4887 around the constant pool table will be hit. Putting it before
4888 a jump makes it more likely that the bra delay slot will be
4889 filled. */
4890 while (NOTE_P (from) || JUMP_P (from)
4891 || LABEL_P (from))
4892 from = PREV_INSN (from);
4893
4894 /* Make sure we do not split between a call and its corresponding
4895 CALL_ARG_LOCATION note. */
4896 if (CALL_P (from))
4897 {
4898 rtx next = NEXT_INSN (from);
4899 if (next && NOTE_P (next)
4900 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
4901 from = next;
4902 }
4903
4904 from = emit_jump_insn_after (gen_jump (label), from);
4905 JUMP_LABEL (from) = label;
4906 LABEL_NUSES (label) = 1;
4907 found_barrier = emit_barrier_after (from);
4908 emit_label_after (label, found_barrier);
4909 }
4910
4911 return found_barrier;
4912 }
4913
4914 /* If the instruction INSN is implemented by a special function, and we can
4915 positively find the register that is used to call the sfunc, and this
4916 register is not used anywhere else in this instruction - except as the
4917 destination of a set, return this register; else, return 0. */
4918 rtx
4919 sfunc_uses_reg (rtx insn)
4920 {
4921 int i;
4922 rtx pattern, part, reg_part, reg;
4923
4924 if (!NONJUMP_INSN_P (insn))
4925 return NULL_RTX;
4926 pattern = PATTERN (insn);
4927 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
4928 return NULL_RTX;
4929
4930 for (reg_part = NULL_RTX, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4931 {
4932 part = XVECEXP (pattern, 0, i);
4933 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
4934 reg_part = part;
4935 }
4936 if (! reg_part)
4937 return NULL_RTX;
4938 reg = XEXP (reg_part, 0);
4939 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
4940 {
4941 part = XVECEXP (pattern, 0, i);
4942 if (part == reg_part || GET_CODE (part) == CLOBBER)
4943 continue;
4944 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
4945 && REG_P (SET_DEST (part)))
4946 ? SET_SRC (part) : part)))
4947 return NULL_RTX;
4948 }
4949 return reg;
4950 }
4951
4952 /* See if the only way in which INSN uses REG is by calling it, or by
4953 setting it while calling it. Set *SET to a SET rtx if the register
4954 is set by INSN. */
4955
4956 static bool
4957 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
4958 {
4959 rtx pattern, reg2;
4960
4961 *set = NULL_RTX;
4962
4963 reg2 = sfunc_uses_reg (insn);
4964 if (reg2 && REGNO (reg2) == REGNO (reg))
4965 {
4966 pattern = single_set (insn);
4967 if (pattern
4968 && REG_P (SET_DEST (pattern))
4969 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4970 *set = pattern;
4971 return false;
4972 }
4973 if (!CALL_P (insn))
4974 {
4975 /* We don't use rtx_equal_p because we don't care if the mode is
4976 different. */
4977 pattern = single_set (insn);
4978 if (pattern
4979 && REG_P (SET_DEST (pattern))
4980 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4981 {
4982 rtx par, part;
4983 int i;
4984
4985 *set = pattern;
4986 par = PATTERN (insn);
4987 if (GET_CODE (par) == PARALLEL)
4988 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
4989 {
4990 part = XVECEXP (par, 0, i);
4991 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
4992 return true;
4993 }
4994 return reg_mentioned_p (reg, SET_SRC (pattern));
4995 }
4996
4997 return true;
4998 }
4999
5000 pattern = PATTERN (insn);
5001
5002 if (GET_CODE (pattern) == PARALLEL)
5003 {
5004 int i;
5005
5006 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5007 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
5008 return true;
5009 pattern = XVECEXP (pattern, 0, 0);
5010 }
5011
5012 if (GET_CODE (pattern) == SET)
5013 {
5014 if (reg_mentioned_p (reg, SET_DEST (pattern)))
5015 {
5016 /* We don't use rtx_equal_p, because we don't care if the
5017 mode is different. */
5018 if (!REG_P (SET_DEST (pattern))
5019 || REGNO (reg) != REGNO (SET_DEST (pattern)))
5020 return true;
5021
5022 *set = pattern;
5023 }
5024
5025 pattern = SET_SRC (pattern);
5026 }
5027
5028 if (GET_CODE (pattern) != CALL
5029 || !MEM_P (XEXP (pattern, 0))
5030 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
5031 return true;
5032
5033 return false;
5034 }
5035
5036 /* Given a X, a pattern of an insn or a part of it, return a mask of used
5037 general registers. Bits 0..15 mean that the respective registers
5038 are used as inputs in the instruction. Bits 16..31 mean that the
5039 registers 0..15, respectively, are used as outputs, or are clobbered.
5040 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
5041 int
5042 regs_used (rtx x, int is_dest)
5043 {
5044 enum rtx_code code;
5045 const char *fmt;
5046 int i, used = 0;
5047
5048 if (! x)
5049 return used;
5050 code = GET_CODE (x);
5051 switch (code)
5052 {
5053 case REG:
5054 if (REGNO (x) < 16)
5055 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5056 << (REGNO (x) + is_dest));
5057 return 0;
5058 case SUBREG:
5059 {
5060 rtx y = SUBREG_REG (x);
5061
5062 if (!REG_P (y))
5063 break;
5064 if (REGNO (y) < 16)
5065 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5066 << (REGNO (y) +
5067 subreg_regno_offset (REGNO (y),
5068 GET_MODE (y),
5069 SUBREG_BYTE (x),
5070 GET_MODE (x)) + is_dest));
5071 return 0;
5072 }
5073 case SET:
5074 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
5075 case RETURN:
5076 /* If there was a return value, it must have been indicated with USE. */
5077 return 0x00ffff00;
5078 case CLOBBER:
5079 is_dest = 1;
5080 break;
5081 case MEM:
5082 is_dest = 0;
5083 break;
5084 case CALL:
5085 used |= 0x00ff00f0;
5086 break;
5087 default:
5088 break;
5089 }
5090
5091 fmt = GET_RTX_FORMAT (code);
5092
5093 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5094 {
5095 if (fmt[i] == 'E')
5096 {
5097 int j;
5098 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5099 used |= regs_used (XVECEXP (x, i, j), is_dest);
5100 }
5101 else if (fmt[i] == 'e')
5102 used |= regs_used (XEXP (x, i), is_dest);
5103 }
5104 return used;
5105 }
5106
5107 /* Create an instruction that prevents redirection of a conditional branch
5108 to the destination of the JUMP with address ADDR.
5109 If the branch needs to be implemented as an indirect jump, try to find
5110 a scratch register for it.
5111 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
5112 If any preceding insn that doesn't fit into a delay slot is good enough,
5113 pass 1. Pass 2 if a definite blocking insn is needed.
5114 -1 is used internally to avoid deep recursion.
5115 If a blocking instruction is made or recognized, return it. */
5116
5117 static rtx
5118 gen_block_redirect (rtx jump, int addr, int need_block)
5119 {
5120 int dead = 0;
5121 rtx prev = prev_nonnote_insn (jump);
5122 rtx dest;
5123
5124 /* First, check if we already have an instruction that satisfies our need. */
5125 if (prev && NONJUMP_INSN_P (prev) && ! INSN_DELETED_P (prev))
5126 {
5127 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
5128 return prev;
5129 if (GET_CODE (PATTERN (prev)) == USE
5130 || GET_CODE (PATTERN (prev)) == CLOBBER
5131 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5132 prev = jump;
5133 else if ((need_block &= ~1) < 0)
5134 return prev;
5135 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
5136 need_block = 0;
5137 }
5138 if (GET_CODE (PATTERN (jump)) == RETURN)
5139 {
5140 if (! need_block)
5141 return prev;
5142 /* Reorg even does nasty things with return insns that cause branches
5143 to go out of range - see find_end_label and callers. */
5144 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
5145 }
5146 /* We can't use JUMP_LABEL here because it might be undefined
5147 when not optimizing. */
5148 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
5149 /* If the branch is out of range, try to find a scratch register for it. */
5150 if (optimize
5151 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5152 > 4092 + 4098))
5153 {
5154 rtx scan;
5155 /* Don't look for the stack pointer as a scratch register,
5156 it would cause trouble if an interrupt occurred. */
5157 unsigned attempt = 0x7fff, used;
5158 int jump_left = flag_expensive_optimizations + 1;
5159
5160 /* It is likely that the most recent eligible instruction is wanted for
5161 the delay slot. Therefore, find out which registers it uses, and
5162 try to avoid using them. */
5163
5164 for (scan = jump; (scan = PREV_INSN (scan)); )
5165 {
5166 enum rtx_code code;
5167
5168 if (INSN_DELETED_P (scan))
5169 continue;
5170 code = GET_CODE (scan);
5171 if (code == CODE_LABEL || code == JUMP_INSN)
5172 break;
5173 if (code == INSN
5174 && GET_CODE (PATTERN (scan)) != USE
5175 && GET_CODE (PATTERN (scan)) != CLOBBER
5176 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5177 {
5178 attempt &= ~regs_used (PATTERN (scan), 0);
5179 break;
5180 }
5181 }
5182 for (used = dead = 0, scan = JUMP_LABEL (jump);
5183 (scan = NEXT_INSN (scan)); )
5184 {
5185 enum rtx_code code;
5186
5187 if (INSN_DELETED_P (scan))
5188 continue;
5189 code = GET_CODE (scan);
5190 if (INSN_P (scan))
5191 {
5192 used |= regs_used (PATTERN (scan), 0);
5193 if (code == CALL_INSN)
5194 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5195 dead |= (used >> 16) & ~used;
5196 if (dead & attempt)
5197 {
5198 dead &= attempt;
5199 break;
5200 }
5201 if (code == JUMP_INSN)
5202 {
5203 if (jump_left-- && simplejump_p (scan))
5204 scan = JUMP_LABEL (scan);
5205 else
5206 break;
5207 }
5208 }
5209 }
5210 /* Mask out the stack pointer again, in case it was
5211 the only 'free' register we have found. */
5212 dead &= 0x7fff;
5213 }
5214 /* If the immediate destination is still in range, check for possible
5215 threading with a jump beyond the delay slot insn.
5216 Don't check if we are called recursively; the jump has been or will be
5217 checked in a different invocation then. */
5218
5219 else if (optimize && need_block >= 0)
5220 {
5221 rtx next = next_active_insn (next_active_insn (dest));
5222 if (next && JUMP_P (next)
5223 && GET_CODE (PATTERN (next)) == SET
5224 && recog_memoized (next) == CODE_FOR_jump_compact)
5225 {
5226 dest = JUMP_LABEL (next);
5227 if (dest
5228 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5229 > 4092 + 4098))
5230 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5231 }
5232 }
5233
5234 if (dead)
5235 {
5236 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5237
5238 /* It would be nice if we could convert the jump into an indirect
5239 jump / far branch right now, and thus exposing all constituent
5240 instructions to further optimization. However, reorg uses
5241 simplejump_p to determine if there is an unconditional jump where
5242 it should try to schedule instructions from the target of the
5243 branch; simplejump_p fails for indirect jumps even if they have
5244 a JUMP_LABEL. */
5245 rtx insn = emit_insn_before (gen_indirect_jump_scratch
5246 (reg, GEN_INT (unspec_bbr_uid++)),
5247 jump);
5248 /* ??? We would like this to have the scope of the jump, but that
5249 scope will change when a delay slot insn of an inner scope is added.
5250 Hence, after delay slot scheduling, we'll have to expect
5251 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5252 the jump. */
5253
5254 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
5255 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5256 return insn;
5257 }
5258 else if (need_block)
5259 /* We can't use JUMP_LABEL here because it might be undefined
5260 when not optimizing. */
5261 return emit_insn_before (gen_block_branch_redirect
5262 (GEN_INT (unspec_bbr_uid++)),
5263 jump);
5264 return prev;
5265 }
5266
5267 #define CONDJUMP_MIN -252
5268 #define CONDJUMP_MAX 262
5269 struct far_branch
5270 {
5271 /* A label (to be placed) in front of the jump
5272 that jumps to our ultimate destination. */
5273 rtx near_label;
5274 /* Where we are going to insert it if we cannot move the jump any farther,
5275 or the jump itself if we have picked up an existing jump. */
5276 rtx insert_place;
5277 /* The ultimate destination. */
5278 rtx far_label;
5279 struct far_branch *prev;
5280 /* If the branch has already been created, its address;
5281 else the address of its first prospective user. */
5282 int address;
5283 };
5284
5285 static void gen_far_branch (struct far_branch *);
5286 enum mdep_reorg_phase_e mdep_reorg_phase;
5287 static void
5288 gen_far_branch (struct far_branch *bp)
5289 {
5290 rtx insn = bp->insert_place;
5291 rtx jump;
5292 rtx label = gen_label_rtx ();
5293 int ok;
5294
5295 emit_label_after (label, insn);
5296 if (bp->far_label)
5297 {
5298 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5299 LABEL_NUSES (bp->far_label)++;
5300 }
5301 else
5302 jump = emit_jump_insn_after (gen_return (), insn);
5303 /* Emit a barrier so that reorg knows that any following instructions
5304 are not reachable via a fall-through path.
5305 But don't do this when not optimizing, since we wouldn't suppress the
5306 alignment for the barrier then, and could end up with out-of-range
5307 pc-relative loads. */
5308 if (optimize)
5309 emit_barrier_after (jump);
5310 emit_label_after (bp->near_label, insn);
5311 JUMP_LABEL (jump) = bp->far_label;
5312 ok = invert_jump (insn, label, 1);
5313 gcc_assert (ok);
5314
5315 /* If we are branching around a jump (rather than a return), prevent
5316 reorg from using an insn from the jump target as the delay slot insn -
5317 when reorg did this, it pessimized code (we rather hide the delay slot)
5318 and it could cause branches to go out of range. */
5319 if (bp->far_label)
5320 (emit_insn_after
5321 (gen_stuff_delay_slot
5322 (GEN_INT (unspec_bbr_uid++),
5323 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5324 insn));
5325 /* Prevent reorg from undoing our splits. */
5326 gen_block_redirect (jump, bp->address += 2, 2);
5327 }
5328
5329 /* Fix up ADDR_DIFF_VECs. */
5330 void
5331 fixup_addr_diff_vecs (rtx first)
5332 {
5333 rtx insn;
5334
5335 for (insn = first; insn; insn = NEXT_INSN (insn))
5336 {
5337 rtx vec_lab, pat, prev, prevpat, x, braf_label;
5338
5339 if (!JUMP_P (insn)
5340 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5341 continue;
5342 pat = PATTERN (insn);
5343 vec_lab = XEXP (XEXP (pat, 0), 0);
5344
5345 /* Search the matching casesi_jump_2. */
5346 for (prev = vec_lab; ; prev = PREV_INSN (prev))
5347 {
5348 if (!JUMP_P (prev))
5349 continue;
5350 prevpat = PATTERN (prev);
5351 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5352 continue;
5353 x = XVECEXP (prevpat, 0, 1);
5354 if (GET_CODE (x) != USE)
5355 continue;
5356 x = XEXP (x, 0);
5357 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5358 break;
5359 }
5360 /* FIXME: This is a bug in the optimizer, but it seems harmless
5361 to just avoid panicing. */
5362 if (!prev)
5363 continue;
5364
5365 /* Emit the reference label of the braf where it belongs, right after
5366 the casesi_jump_2 (i.e. braf). */
5367 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5368 emit_label_after (braf_label, prev);
5369
5370 /* Fix up the ADDR_DIF_VEC to be relative
5371 to the reference address of the braf. */
5372 XEXP (XEXP (pat, 0), 0) = braf_label;
5373 }
5374 }
5375
5376 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5377 a barrier. Return the base 2 logarithm of the desired alignment. */
5378 int
5379 barrier_align (rtx barrier_or_label)
5380 {
5381 rtx next = next_real_insn (barrier_or_label), pat, prev;
5382
5383 if (! next)
5384 return 0;
5385
5386 pat = PATTERN (next);
5387
5388 if (GET_CODE (pat) == ADDR_DIFF_VEC)
5389 return 2;
5390
5391 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5392 /* This is a barrier in front of a constant table. */
5393 return 0;
5394
5395 prev = prev_real_insn (barrier_or_label);
5396 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
5397 {
5398 pat = PATTERN (prev);
5399 /* If this is a very small table, we want to keep the alignment after
5400 the table to the minimum for proper code alignment. */
5401 return ((optimize_size
5402 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5403 <= (unsigned) 1 << (CACHE_LOG - 2)))
5404 ? 1 << TARGET_SHMEDIA : align_jumps_log);
5405 }
5406
5407 if (optimize_size)
5408 return 0;
5409
5410 if (! TARGET_SH2 || ! optimize)
5411 return align_jumps_log;
5412
5413 /* When fixing up pcloads, a constant table might be inserted just before
5414 the basic block that ends with the barrier. Thus, we can't trust the
5415 instruction lengths before that. */
5416 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5417 {
5418 /* Check if there is an immediately preceding branch to the insn beyond
5419 the barrier. We must weight the cost of discarding useful information
5420 from the current cache line when executing this branch and there is
5421 an alignment, against that of fetching unneeded insn in front of the
5422 branch target when there is no alignment. */
5423
5424 /* There are two delay_slot cases to consider. One is the simple case
5425 where the preceding branch is to the insn beyond the barrier (simple
5426 delay slot filling), and the other is where the preceding branch has
5427 a delay slot that is a duplicate of the insn after the barrier
5428 (fill_eager_delay_slots) and the branch is to the insn after the insn
5429 after the barrier. */
5430
5431 /* PREV is presumed to be the JUMP_INSN for the barrier under
5432 investigation. Skip to the insn before it. */
5433
5434 int slot, credit;
5435 bool jump_to_next = false;
5436
5437 prev = prev_real_insn (prev);
5438
5439 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5440 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5441 prev = prev_real_insn (prev))
5442 {
5443 jump_to_next = false;
5444 if (GET_CODE (PATTERN (prev)) == USE
5445 || GET_CODE (PATTERN (prev)) == CLOBBER)
5446 continue;
5447 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
5448 {
5449 prev = XVECEXP (PATTERN (prev), 0, 1);
5450 if (INSN_UID (prev) == INSN_UID (next))
5451 {
5452 /* Delay slot was filled with insn at jump target. */
5453 jump_to_next = true;
5454 continue;
5455 }
5456 }
5457
5458 if (slot &&
5459 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5460 slot = 0;
5461 credit -= get_attr_length (prev);
5462 }
5463 if (prev && jump_to_label_p (prev))
5464 {
5465 rtx x;
5466 if (jump_to_next
5467 || next_real_insn (JUMP_LABEL (prev)) == next
5468 /* If relax_delay_slots() decides NEXT was redundant
5469 with some previous instruction, it will have
5470 redirected PREV's jump to the following insn. */
5471 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5472 /* There is no upper bound on redundant instructions
5473 that might have been skipped, but we must not put an
5474 alignment where none had been before. */
5475 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5476 (INSN_P (x)
5477 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5478 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5479 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5480 {
5481 rtx pat = PATTERN (prev);
5482 if (GET_CODE (pat) == PARALLEL)
5483 pat = XVECEXP (pat, 0, 0);
5484 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5485 return 0;
5486 }
5487 }
5488 }
5489
5490 return align_jumps_log;
5491 }
5492
5493 /* If we are inside a phony loop, almost any kind of label can turn up as the
5494 first one in the loop. Aligning a braf label causes incorrect switch
5495 destination addresses; we can detect braf labels because they are
5496 followed by a BARRIER.
5497 Applying loop alignment to small constant or switch tables is a waste
5498 of space, so we suppress this too. */
5499 int
5500 sh_loop_align (rtx label)
5501 {
5502 rtx next = label;
5503
5504 if (! optimize || optimize_size)
5505 return 0;
5506
5507 do
5508 next = next_nonnote_insn (next);
5509 while (next && LABEL_P (next));
5510
5511 if (! next
5512 || ! INSN_P (next)
5513 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
5514 || recog_memoized (next) == CODE_FOR_consttable_2)
5515 return 0;
5516
5517 return align_loops_log;
5518 }
5519
5520 /* Do a final pass over the function, just before delayed branch
5521 scheduling. */
5522
5523 static void
5524 sh_reorg (void)
5525 {
5526 rtx first, insn, mova = NULL_RTX;
5527 int num_mova;
5528 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5529 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5530
5531 first = get_insns ();
5532 max_labelno_before_reorg = max_label_num ();
5533
5534 /* We must split call insns before introducing `mova's. If we're
5535 optimizing, they'll have already been split. Otherwise, make
5536 sure we don't split them too late. */
5537 if (! optimize)
5538 split_all_insns_noflow ();
5539
5540 if (TARGET_SHMEDIA)
5541 return;
5542
5543 /* If relaxing, generate pseudo-ops to associate function calls with
5544 the symbols they call. It does no harm to not generate these
5545 pseudo-ops. However, when we can generate them, it enables the
5546 linker to potentially relax the jsr to a bsr, and eliminate the
5547 register load and, possibly, the constant pool entry. */
5548
5549 mdep_reorg_phase = SH_INSERT_USES_LABELS;
5550 if (TARGET_RELAX)
5551 {
5552 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
5553 own purposes. This works because none of the remaining passes
5554 need to look at them.
5555
5556 ??? But it may break in the future. We should use a machine
5557 dependent REG_NOTE, or some other approach entirely. */
5558 for (insn = first; insn; insn = NEXT_INSN (insn))
5559 {
5560 if (INSN_P (insn))
5561 {
5562 rtx note;
5563
5564 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
5565 NULL_RTX)) != 0)
5566 remove_note (insn, note);
5567 }
5568 }
5569
5570 for (insn = first; insn; insn = NEXT_INSN (insn))
5571 {
5572 rtx pattern, reg, link, set, scan, dies, label;
5573 int rescan = 0, foundinsn = 0;
5574
5575 if (CALL_P (insn))
5576 {
5577 pattern = PATTERN (insn);
5578
5579 if (GET_CODE (pattern) == PARALLEL)
5580 pattern = XVECEXP (pattern, 0, 0);
5581 if (GET_CODE (pattern) == SET)
5582 pattern = SET_SRC (pattern);
5583
5584 if (GET_CODE (pattern) != CALL
5585 || !MEM_P (XEXP (pattern, 0)))
5586 continue;
5587
5588 reg = XEXP (XEXP (pattern, 0), 0);
5589 }
5590 else
5591 {
5592 reg = sfunc_uses_reg (insn);
5593 if (! reg)
5594 continue;
5595 }
5596
5597 if (!REG_P (reg))
5598 continue;
5599
5600 /* Try scanning backward to find where the register is set. */
5601 link = NULL;
5602 for (scan = PREV_INSN (insn);
5603 scan && !LABEL_P (scan);
5604 scan = PREV_INSN (scan))
5605 {
5606 if (! INSN_P (scan))
5607 continue;
5608
5609 if (! reg_mentioned_p (reg, scan))
5610 continue;
5611
5612 if (noncall_uses_reg (reg, scan, &set))
5613 break;
5614
5615 if (set)
5616 {
5617 link = scan;
5618 break;
5619 }
5620 }
5621
5622 if (! link)
5623 continue;
5624
5625 /* The register is set at LINK. */
5626
5627 /* We can only optimize the function call if the register is
5628 being set to a symbol. In theory, we could sometimes
5629 optimize calls to a constant location, but the assembler
5630 and linker do not support that at present. */
5631 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
5632 && GET_CODE (SET_SRC (set)) != LABEL_REF)
5633 continue;
5634
5635 /* Scan forward from LINK to the place where REG dies, and
5636 make sure that the only insns which use REG are
5637 themselves function calls. */
5638
5639 /* ??? This doesn't work for call targets that were allocated
5640 by reload, since there may not be a REG_DEAD note for the
5641 register. */
5642
5643 dies = NULL_RTX;
5644 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
5645 {
5646 rtx scanset;
5647
5648 /* Don't try to trace forward past a CODE_LABEL if we haven't
5649 seen INSN yet. Ordinarily, we will only find the setting insn
5650 if it is in the same basic block. However,
5651 cross-jumping can insert code labels in between the load and
5652 the call, and can result in situations where a single call
5653 insn may have two targets depending on where we came from. */
5654
5655 if (LABEL_P (scan) && ! foundinsn)
5656 break;
5657
5658 if (! INSN_P (scan))
5659 continue;
5660
5661 /* Don't try to trace forward past a JUMP. To optimize
5662 safely, we would have to check that all the
5663 instructions at the jump destination did not use REG. */
5664
5665 if (JUMP_P (scan))
5666 break;
5667
5668 if (! reg_mentioned_p (reg, scan))
5669 continue;
5670
5671 if (noncall_uses_reg (reg, scan, &scanset))
5672 break;
5673
5674 if (scan == insn)
5675 foundinsn = 1;
5676
5677 if (scan != insn
5678 && (CALL_P (scan) || sfunc_uses_reg (scan)))
5679 {
5680 /* There is a function call to this register other
5681 than the one we are checking. If we optimize
5682 this call, we need to rescan again below. */
5683 rescan = 1;
5684 }
5685
5686 /* ??? We shouldn't have to worry about SCANSET here.
5687 We should just be able to check for a REG_DEAD note
5688 on a function call. However, the REG_DEAD notes are
5689 apparently not dependable around libcalls; c-torture
5690 execute/920501-2 is a test case. If SCANSET is set,
5691 then this insn sets the register, so it must have
5692 died earlier. Unfortunately, this will only handle
5693 the cases in which the register is, in fact, set in a
5694 later insn. */
5695
5696 /* ??? We shouldn't have to use FOUNDINSN here.
5697 This dates back to when we used LOG_LINKS to find
5698 the most recent insn which sets the register. */
5699
5700 if (foundinsn
5701 && (scanset
5702 || find_reg_note (scan, REG_DEAD, reg)))
5703 {
5704 dies = scan;
5705 break;
5706 }
5707 }
5708
5709 if (! dies)
5710 {
5711 /* Either there was a branch, or some insn used REG
5712 other than as a function call address. */
5713 continue;
5714 }
5715
5716 /* Create a code label, and put it in a REG_LABEL_OPERAND note
5717 on the insn which sets the register, and on each call insn
5718 which uses the register. In final_prescan_insn we look for
5719 the REG_LABEL_OPERAND notes, and output the appropriate label
5720 or pseudo-op. */
5721
5722 label = gen_label_rtx ();
5723 add_reg_note (link, REG_LABEL_OPERAND, label);
5724 add_reg_note (insn, REG_LABEL_OPERAND, label);
5725 if (rescan)
5726 {
5727 scan = link;
5728 do
5729 {
5730 rtx reg2;
5731
5732 scan = NEXT_INSN (scan);
5733 if (scan != insn
5734 && ((CALL_P (scan)
5735 && reg_mentioned_p (reg, scan))
5736 || ((reg2 = sfunc_uses_reg (scan))
5737 && REGNO (reg2) == REGNO (reg))))
5738 add_reg_note (scan, REG_LABEL_OPERAND, label);
5739 }
5740 while (scan != dies);
5741 }
5742 }
5743 }
5744
5745 if (TARGET_SH2)
5746 fixup_addr_diff_vecs (first);
5747
5748 if (optimize)
5749 {
5750 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
5751 shorten_branches (first);
5752 }
5753
5754 /* Scan the function looking for move instructions which have to be
5755 changed to pc-relative loads and insert the literal tables. */
5756 label_ref_list_pool = create_alloc_pool ("label references list",
5757 sizeof (struct label_ref_list_d),
5758 30);
5759 mdep_reorg_phase = SH_FIXUP_PCLOAD;
5760 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
5761 {
5762 if (mova_p (insn))
5763 {
5764 /* ??? basic block reordering can move a switch table dispatch
5765 below the switch table. Check if that has happened.
5766 We only have the addresses available when optimizing; but then,
5767 this check shouldn't be needed when not optimizing. */
5768 if (!untangle_mova (&num_mova, &mova, insn))
5769 {
5770 insn = mova;
5771 num_mova = 0;
5772 }
5773 }
5774 else if (JUMP_P (insn)
5775 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
5776 && num_mova
5777 /* ??? loop invariant motion can also move a mova out of a
5778 loop. Since loop does this code motion anyway, maybe we
5779 should wrap UNSPEC_MOVA into a CONST, so that reload can
5780 move it back. */
5781 && ((num_mova > 1
5782 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
5783 || (prev_nonnote_insn (insn)
5784 == XEXP (MOVA_LABELREF (mova), 0))))
5785 {
5786 rtx scan;
5787 int total;
5788
5789 num_mova--;
5790
5791 /* Some code might have been inserted between the mova and
5792 its ADDR_DIFF_VEC. Check if the mova is still in range. */
5793 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
5794 total += get_attr_length (scan);
5795
5796 /* range of mova is 1020, add 4 because pc counts from address of
5797 second instruction after this one, subtract 2 in case pc is 2
5798 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
5799 cancels out with alignment effects of the mova itself. */
5800 if (total > 1022)
5801 {
5802 /* Change the mova into a load, and restart scanning
5803 there. broken_move will then return true for mova. */
5804 fixup_mova (mova);
5805 insn = mova;
5806 }
5807 }
5808 if (broken_move (insn)
5809 || (NONJUMP_INSN_P (insn)
5810 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
5811 {
5812 rtx scan;
5813 /* Scan ahead looking for a barrier to stick the constant table
5814 behind. */
5815 rtx barrier = find_barrier (num_mova, mova, insn);
5816 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
5817 int need_aligned_label = 0;
5818
5819 if (num_mova && ! mova_p (mova))
5820 {
5821 /* find_barrier had to change the first mova into a
5822 pcload; thus, we have to start with this new pcload. */
5823 insn = mova;
5824 num_mova = 0;
5825 }
5826 /* Now find all the moves between the points and modify them. */
5827 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
5828 {
5829 if (LABEL_P (scan))
5830 last_float = 0;
5831 if (NONJUMP_INSN_P (scan)
5832 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
5833 need_aligned_label = 1;
5834 if (broken_move (scan))
5835 {
5836 rtx *patp = &PATTERN (scan), pat = *patp;
5837 rtx src, dst;
5838 rtx lab;
5839 rtx newsrc;
5840 enum machine_mode mode;
5841
5842 if (GET_CODE (pat) == PARALLEL)
5843 patp = &XVECEXP (pat, 0, 0), pat = *patp;
5844 src = SET_SRC (pat);
5845 dst = SET_DEST (pat);
5846 mode = GET_MODE (dst);
5847
5848 if (mode == SImode && satisfies_constraint_I16 (src)
5849 && REGNO (dst) != FPUL_REG)
5850 {
5851 int offset = 0;
5852
5853 mode = HImode;
5854 while (GET_CODE (dst) == SUBREG)
5855 {
5856 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
5857 GET_MODE (SUBREG_REG (dst)),
5858 SUBREG_BYTE (dst),
5859 GET_MODE (dst));
5860 dst = SUBREG_REG (dst);
5861 }
5862 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
5863 }
5864 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
5865 {
5866 /* This must be an insn that clobbers r0. */
5867 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
5868 XVECLEN (PATTERN (scan), 0)
5869 - 1);
5870 rtx clobber = *clobberp;
5871
5872 gcc_assert (GET_CODE (clobber) == CLOBBER
5873 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
5874
5875 if (last_float
5876 && reg_set_between_p (r0_rtx, last_float_move, scan))
5877 last_float = 0;
5878 if (last_float
5879 && TARGET_SHCOMPACT
5880 && GET_MODE_SIZE (mode) != 4
5881 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
5882 last_float = 0;
5883 lab = add_constant (src, mode, last_float);
5884 if (lab)
5885 emit_insn_before (gen_mova (lab), scan);
5886 else
5887 {
5888 /* There will be a REG_UNUSED note for r0 on
5889 LAST_FLOAT_MOVE; we have to change it to REG_INC,
5890 lest reorg:mark_target_live_regs will not
5891 consider r0 to be used, and we end up with delay
5892 slot insn in front of SCAN that clobbers r0. */
5893 rtx note
5894 = find_regno_note (last_float_move, REG_UNUSED, 0);
5895
5896 /* If we are not optimizing, then there may not be
5897 a note. */
5898 if (note)
5899 PUT_REG_NOTE_KIND (note, REG_INC);
5900
5901 *last_float_addr = r0_inc_rtx;
5902 }
5903 last_float_move = scan;
5904 last_float = src;
5905 newsrc = gen_const_mem (mode,
5906 (((TARGET_SH4 && ! TARGET_FMOVD)
5907 || REGNO (dst) == FPUL_REG)
5908 ? r0_inc_rtx
5909 : r0_rtx));
5910 last_float_addr = &XEXP (newsrc, 0);
5911
5912 /* Remove the clobber of r0. */
5913 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
5914 gen_rtx_SCRATCH (Pmode));
5915 }
5916 /* This is a mova needing a label. Create it. */
5917 else if (GET_CODE (src) == UNSPEC
5918 && XINT (src, 1) == UNSPEC_MOVA
5919 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
5920 {
5921 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
5922 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5923 newsrc = gen_rtx_UNSPEC (SImode,
5924 gen_rtvec (1, newsrc),
5925 UNSPEC_MOVA);
5926 }
5927 else
5928 {
5929 lab = add_constant (src, mode, 0);
5930 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5931 newsrc = gen_const_mem (mode, newsrc);
5932 }
5933 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
5934 INSN_CODE (scan) = -1;
5935 }
5936 }
5937 dump_table (need_aligned_label ? insn : 0, barrier);
5938 insn = barrier;
5939 }
5940 }
5941 free_alloc_pool (label_ref_list_pool);
5942 for (insn = first; insn; insn = NEXT_INSN (insn))
5943 PUT_MODE (insn, VOIDmode);
5944
5945 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
5946 INSN_ADDRESSES_FREE ();
5947 split_branches (first);
5948
5949 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
5950 also has an effect on the register that holds the address of the sfunc.
5951 Insert an extra dummy insn in front of each sfunc that pretends to
5952 use this register. */
5953 if (flag_delayed_branch)
5954 {
5955 for (insn = first; insn; insn = NEXT_INSN (insn))
5956 {
5957 rtx reg = sfunc_uses_reg (insn);
5958
5959 if (! reg)
5960 continue;
5961 emit_insn_before (gen_use_sfunc_addr (reg), insn);
5962 }
5963 }
5964 #if 0
5965 /* fpscr is not actually a user variable, but we pretend it is for the
5966 sake of the previous optimization passes, since we want it handled like
5967 one. However, we don't have any debugging information for it, so turn
5968 it into a non-user variable now. */
5969 if (TARGET_SH4)
5970 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
5971 #endif
5972 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
5973 }
5974
5975 int
5976 get_dest_uid (rtx label, int max_uid)
5977 {
5978 rtx dest = next_real_insn (label);
5979 int dest_uid;
5980 if (! dest)
5981 /* This can happen for an undefined label. */
5982 return 0;
5983 dest_uid = INSN_UID (dest);
5984 /* If this is a newly created branch redirection blocking instruction,
5985 we cannot index the branch_uid or insn_addresses arrays with its
5986 uid. But then, we won't need to, because the actual destination is
5987 the following branch. */
5988 while (dest_uid >= max_uid)
5989 {
5990 dest = NEXT_INSN (dest);
5991 dest_uid = INSN_UID (dest);
5992 }
5993 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
5994 return 0;
5995 return dest_uid;
5996 }
5997
5998 /* Split condbranches that are out of range. Also add clobbers for
5999 scratch registers that are needed in far jumps.
6000 We do this before delay slot scheduling, so that it can take our
6001 newly created instructions into account. It also allows us to
6002 find branches with common targets more easily. */
6003
6004 static void
6005 split_branches (rtx first)
6006 {
6007 rtx insn;
6008 struct far_branch **uid_branch, *far_branch_list = 0;
6009 int max_uid = get_max_uid ();
6010 int ok;
6011
6012 /* Find out which branches are out of range. */
6013 shorten_branches (first);
6014
6015 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
6016 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
6017
6018 for (insn = first; insn; insn = NEXT_INSN (insn))
6019 if (! INSN_P (insn))
6020 continue;
6021 else if (INSN_DELETED_P (insn))
6022 {
6023 /* Shorten_branches would split this instruction again,
6024 so transform it into a note. */
6025 SET_INSN_DELETED (insn);
6026 }
6027 else if (JUMP_P (insn)
6028 /* Don't mess with ADDR_DIFF_VEC */
6029 && (GET_CODE (PATTERN (insn)) == SET
6030 || GET_CODE (PATTERN (insn)) == RETURN))
6031 {
6032 enum attr_type type = get_attr_type (insn);
6033 if (type == TYPE_CBRANCH)
6034 {
6035 rtx next, beyond;
6036
6037 if (get_attr_length (insn) > 4)
6038 {
6039 rtx src = SET_SRC (PATTERN (insn));
6040 rtx olabel = XEXP (XEXP (src, 1), 0);
6041 int addr = INSN_ADDRESSES (INSN_UID (insn));
6042 rtx label = 0;
6043 int dest_uid = get_dest_uid (olabel, max_uid);
6044 struct far_branch *bp = uid_branch[dest_uid];
6045
6046 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
6047 the label if the LABEL_NUSES count drops to zero. There is
6048 always a jump_optimize pass that sets these values, but it
6049 proceeds to delete unreferenced code, and then if not
6050 optimizing, to un-delete the deleted instructions, thus
6051 leaving labels with too low uses counts. */
6052 if (! optimize)
6053 {
6054 JUMP_LABEL (insn) = olabel;
6055 LABEL_NUSES (olabel)++;
6056 }
6057 if (! bp)
6058 {
6059 bp = (struct far_branch *) alloca (sizeof *bp);
6060 uid_branch[dest_uid] = bp;
6061 bp->prev = far_branch_list;
6062 far_branch_list = bp;
6063 bp->far_label
6064 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
6065 LABEL_NUSES (bp->far_label)++;
6066 }
6067 else
6068 {
6069 label = bp->near_label;
6070 if (! label && bp->address - addr >= CONDJUMP_MIN)
6071 {
6072 rtx block = bp->insert_place;
6073
6074 if (GET_CODE (PATTERN (block)) == RETURN)
6075 block = PREV_INSN (block);
6076 else
6077 block = gen_block_redirect (block,
6078 bp->address, 2);
6079 label = emit_label_after (gen_label_rtx (),
6080 PREV_INSN (block));
6081 bp->near_label = label;
6082 }
6083 else if (label && ! NEXT_INSN (label))
6084 {
6085 if (addr + 2 - bp->address <= CONDJUMP_MAX)
6086 bp->insert_place = insn;
6087 else
6088 gen_far_branch (bp);
6089 }
6090 }
6091 if (! label
6092 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
6093 {
6094 bp->near_label = label = gen_label_rtx ();
6095 bp->insert_place = insn;
6096 bp->address = addr;
6097 }
6098 ok = redirect_jump (insn, label, 0);
6099 gcc_assert (ok);
6100 }
6101 else
6102 {
6103 /* get_attr_length (insn) == 2 */
6104 /* Check if we have a pattern where reorg wants to redirect
6105 the branch to a label from an unconditional branch that
6106 is too far away. */
6107 /* We can't use JUMP_LABEL here because it might be undefined
6108 when not optimizing. */
6109 /* A syntax error might cause beyond to be NULL_RTX. */
6110 beyond
6111 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6112 0));
6113
6114 if (beyond
6115 && (JUMP_P (beyond)
6116 || ((beyond = next_active_insn (beyond))
6117 && JUMP_P (beyond)))
6118 && GET_CODE (PATTERN (beyond)) == SET
6119 && recog_memoized (beyond) == CODE_FOR_jump_compact
6120 && ((INSN_ADDRESSES
6121 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
6122 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6123 > 252 + 258 + 2))
6124 gen_block_redirect (beyond,
6125 INSN_ADDRESSES (INSN_UID (beyond)), 1);
6126 }
6127
6128 next = next_active_insn (insn);
6129
6130 if (next
6131 && (JUMP_P (next)
6132 || ((next = next_active_insn (next))
6133 && JUMP_P (next)))
6134 && GET_CODE (PATTERN (next)) == SET
6135 && recog_memoized (next) == CODE_FOR_jump_compact
6136 && ((INSN_ADDRESSES
6137 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
6138 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6139 > 252 + 258 + 2))
6140 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
6141 }
6142 else if (type == TYPE_JUMP || type == TYPE_RETURN)
6143 {
6144 int addr = INSN_ADDRESSES (INSN_UID (insn));
6145 rtx far_label = 0;
6146 int dest_uid = 0;
6147 struct far_branch *bp;
6148
6149 if (type == TYPE_JUMP)
6150 {
6151 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
6152 dest_uid = get_dest_uid (far_label, max_uid);
6153 if (! dest_uid)
6154 {
6155 /* Parse errors can lead to labels outside
6156 the insn stream. */
6157 if (! NEXT_INSN (far_label))
6158 continue;
6159
6160 if (! optimize)
6161 {
6162 JUMP_LABEL (insn) = far_label;
6163 LABEL_NUSES (far_label)++;
6164 }
6165 redirect_jump (insn, ret_rtx, 1);
6166 far_label = 0;
6167 }
6168 }
6169 bp = uid_branch[dest_uid];
6170 if (! bp)
6171 {
6172 bp = (struct far_branch *) alloca (sizeof *bp);
6173 uid_branch[dest_uid] = bp;
6174 bp->prev = far_branch_list;
6175 far_branch_list = bp;
6176 bp->near_label = 0;
6177 bp->far_label = far_label;
6178 if (far_label)
6179 LABEL_NUSES (far_label)++;
6180 }
6181 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6182 if (addr - bp->address <= CONDJUMP_MAX)
6183 emit_label_after (bp->near_label, PREV_INSN (insn));
6184 else
6185 {
6186 gen_far_branch (bp);
6187 bp->near_label = 0;
6188 }
6189 else
6190 bp->near_label = 0;
6191 bp->address = addr;
6192 bp->insert_place = insn;
6193 if (! far_label)
6194 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6195 else
6196 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6197 }
6198 }
6199 /* Generate all pending far branches,
6200 and free our references to the far labels. */
6201 while (far_branch_list)
6202 {
6203 if (far_branch_list->near_label
6204 && ! NEXT_INSN (far_branch_list->near_label))
6205 gen_far_branch (far_branch_list);
6206 if (optimize
6207 && far_branch_list->far_label
6208 && ! --LABEL_NUSES (far_branch_list->far_label))
6209 delete_insn (far_branch_list->far_label);
6210 far_branch_list = far_branch_list->prev;
6211 }
6212
6213 /* Instruction length information is no longer valid due to the new
6214 instructions that have been generated. */
6215 init_insn_lengths ();
6216 }
6217
6218 /* Dump out instruction addresses, which is useful for debugging the
6219 constant pool table stuff.
6220
6221 If relaxing, output the label and pseudo-ops used to link together
6222 calls and the instruction which set the registers. */
6223
6224 /* ??? The addresses printed by this routine for insns are nonsense for
6225 insns which are inside of a sequence where none of the inner insns have
6226 variable length. This is because the second pass of shorten_branches
6227 does not bother to update them. */
6228
6229 void
6230 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
6231 int noperands ATTRIBUTE_UNUSED)
6232 {
6233 if (TARGET_DUMPISIZE)
6234 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6235
6236 if (TARGET_RELAX)
6237 {
6238 rtx note;
6239
6240 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
6241 if (note)
6242 {
6243 rtx pattern;
6244
6245 pattern = PATTERN (insn);
6246 if (GET_CODE (pattern) == PARALLEL)
6247 pattern = XVECEXP (pattern, 0, 0);
6248 switch (GET_CODE (pattern))
6249 {
6250 case SET:
6251 if (GET_CODE (SET_SRC (pattern)) != CALL
6252 && get_attr_type (insn) != TYPE_SFUNC)
6253 {
6254 targetm.asm_out.internal_label
6255 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6256 break;
6257 }
6258 /* else FALLTHROUGH */
6259 case CALL:
6260 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6261 CODE_LABEL_NUMBER (XEXP (note, 0)));
6262 break;
6263
6264 default:
6265 gcc_unreachable ();
6266 }
6267 }
6268 }
6269 }
6270
6271 /* Dump out any constants accumulated in the final pass. These will
6272 only be labels. */
6273
6274 const char *
6275 output_jump_label_table (void)
6276 {
6277 int i;
6278
6279 if (pool_size)
6280 {
6281 fprintf (asm_out_file, "\t.align 2\n");
6282 for (i = 0; i < pool_size; i++)
6283 {
6284 pool_node *p = &pool_vector[i];
6285
6286 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6287 CODE_LABEL_NUMBER (p->label));
6288 output_asm_insn (".long %O0", &p->value);
6289 }
6290 pool_size = 0;
6291 }
6292
6293 return "";
6294 }
6295 \f
6296 /* A full frame looks like:
6297
6298 arg-5
6299 arg-4
6300 [ if current_function_anonymous_args
6301 arg-3
6302 arg-2
6303 arg-1
6304 arg-0 ]
6305 saved-fp
6306 saved-r10
6307 saved-r11
6308 saved-r12
6309 saved-pr
6310 local-n
6311 ..
6312 local-1
6313 local-0 <- fp points here. */
6314
6315 /* Number of bytes pushed for anonymous args, used to pass information
6316 between expand_prologue and expand_epilogue. */
6317
6318 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6319 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6320 for an epilogue and a negative value means that it's for a sibcall
6321 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6322 all the registers that are about to be restored, and hence dead. */
6323
6324 static void
6325 output_stack_adjust (int size, rtx reg, int epilogue_p,
6326 HARD_REG_SET *live_regs_mask, bool frame_p)
6327 {
6328 rtx (*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6329 if (size)
6330 {
6331 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6332
6333 /* This test is bogus, as output_stack_adjust is used to re-align the
6334 stack. */
6335 #if 0
6336 gcc_assert (!(size % align));
6337 #endif
6338
6339 if (CONST_OK_FOR_ADD (size))
6340 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6341 /* Try to do it with two partial adjustments; however, we must make
6342 sure that the stack is properly aligned at all times, in case
6343 an interrupt occurs between the two partial adjustments. */
6344 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6345 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6346 {
6347 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6348 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6349 }
6350 else
6351 {
6352 rtx const_reg;
6353 rtx insn;
6354 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6355 int i;
6356
6357 /* If TEMP is invalid, we could temporarily save a general
6358 register to MACL. However, there is currently no need
6359 to handle this case, so just die when we see it. */
6360 if (epilogue_p < 0
6361 || current_function_interrupt
6362 || ! call_really_used_regs[temp] || fixed_regs[temp])
6363 temp = -1;
6364 if (temp < 0 && ! current_function_interrupt
6365 && (TARGET_SHMEDIA || epilogue_p >= 0))
6366 {
6367 HARD_REG_SET temps;
6368 COPY_HARD_REG_SET (temps, call_used_reg_set);
6369 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6370 if (epilogue_p > 0)
6371 {
6372 int nreg = 0;
6373 if (crtl->return_rtx)
6374 {
6375 enum machine_mode mode;
6376 mode = GET_MODE (crtl->return_rtx);
6377 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6378 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6379 }
6380 for (i = 0; i < nreg; i++)
6381 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6382 if (crtl->calls_eh_return)
6383 {
6384 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6385 for (i = 0; i <= 3; i++)
6386 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6387 }
6388 }
6389 if (TARGET_SHMEDIA && epilogue_p < 0)
6390 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
6391 CLEAR_HARD_REG_BIT (temps, i);
6392 if (epilogue_p <= 0)
6393 {
6394 for (i = FIRST_PARM_REG;
6395 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6396 CLEAR_HARD_REG_BIT (temps, i);
6397 if (cfun->static_chain_decl != NULL)
6398 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6399 }
6400 temp = scavenge_reg (&temps);
6401 }
6402 if (temp < 0 && live_regs_mask)
6403 {
6404 HARD_REG_SET temps;
6405
6406 COPY_HARD_REG_SET (temps, *live_regs_mask);
6407 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6408 temp = scavenge_reg (&temps);
6409 }
6410 if (temp < 0)
6411 {
6412 rtx adj_reg, tmp_reg, mem;
6413
6414 /* If we reached here, the most likely case is the (sibcall)
6415 epilogue for non SHmedia. Put a special push/pop sequence
6416 for such case as the last resort. This looks lengthy but
6417 would not be problem because it seems to be very
6418 rare. */
6419
6420 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
6421
6422
6423 /* ??? There is still the slight possibility that r4 or
6424 r5 have been reserved as fixed registers or assigned
6425 as global registers, and they change during an
6426 interrupt. There are possible ways to handle this:
6427
6428 - If we are adjusting the frame pointer (r14), we can do
6429 with a single temp register and an ordinary push / pop
6430 on the stack.
6431 - Grab any call-used or call-saved registers (i.e. not
6432 fixed or globals) for the temps we need. We might
6433 also grab r14 if we are adjusting the stack pointer.
6434 If we can't find enough available registers, issue
6435 a diagnostic and die - the user must have reserved
6436 way too many registers.
6437 But since all this is rather unlikely to happen and
6438 would require extra testing, we just die if r4 / r5
6439 are not available. */
6440 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6441 && !global_regs[4] && !global_regs[5]);
6442
6443 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6444 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6445 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6446 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6447 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6448 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6449 emit_move_insn (mem, tmp_reg);
6450 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6451 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6452 emit_move_insn (mem, tmp_reg);
6453 emit_move_insn (reg, adj_reg);
6454 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6455 emit_move_insn (adj_reg, mem);
6456 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6457 emit_move_insn (tmp_reg, mem);
6458 /* Tell flow the insns that pop r4/r5 aren't dead. */
6459 emit_use (tmp_reg);
6460 emit_use (adj_reg);
6461 return;
6462 }
6463 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6464
6465 /* If SIZE is negative, subtract the positive value.
6466 This sometimes allows a constant pool entry to be shared
6467 between prologue and epilogue code. */
6468 if (size < 0)
6469 {
6470 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6471 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6472 }
6473 else
6474 {
6475 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6476 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6477 }
6478 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6479 gen_rtx_SET (VOIDmode, reg,
6480 gen_rtx_PLUS (SImode, reg,
6481 GEN_INT (size))));
6482 }
6483 }
6484 }
6485
6486 static rtx
6487 frame_insn (rtx x)
6488 {
6489 x = emit_insn (x);
6490 RTX_FRAME_RELATED_P (x) = 1;
6491 return x;
6492 }
6493
6494 /* Output RTL to push register RN onto the stack. */
6495
6496 static rtx
6497 push (int rn)
6498 {
6499 rtx x;
6500 if (rn == FPUL_REG)
6501 x = gen_push_fpul ();
6502 else if (rn == FPSCR_REG)
6503 x = gen_push_fpscr ();
6504 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6505 && FP_OR_XD_REGISTER_P (rn))
6506 {
6507 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6508 return NULL_RTX;
6509 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6510 }
6511 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6512 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6513 else
6514 x = gen_push (gen_rtx_REG (SImode, rn));
6515
6516 x = frame_insn (x);
6517 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6518 return x;
6519 }
6520
6521 /* Output RTL to pop register RN from the stack. */
6522
6523 static void
6524 pop (int rn)
6525 {
6526 rtx x, sp_reg, reg;
6527 if (rn == FPUL_REG)
6528 x = gen_pop_fpul ();
6529 else if (rn == FPSCR_REG)
6530 x = gen_pop_fpscr ();
6531 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6532 && FP_OR_XD_REGISTER_P (rn))
6533 {
6534 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6535 return;
6536 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6537 }
6538 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6539 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6540 else
6541 x = gen_pop (gen_rtx_REG (SImode, rn));
6542
6543 x = emit_insn (x);
6544
6545 sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
6546 reg = copy_rtx (GET_CODE (PATTERN (x)) == PARALLEL
6547 ? SET_DEST (XVECEXP (PATTERN (x), 0, 0))
6548 : SET_DEST (PATTERN (x)));
6549 add_reg_note (x, REG_CFA_RESTORE, reg);
6550 add_reg_note (x, REG_CFA_ADJUST_CFA,
6551 gen_rtx_SET (SImode, sp_reg,
6552 plus_constant (SImode, sp_reg,
6553 GET_MODE_SIZE (GET_MODE (reg)))));
6554 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6555 RTX_FRAME_RELATED_P (x) = 1;
6556 }
6557
6558 /* Generate code to push the regs specified in the mask. */
6559
6560 static void
6561 push_regs (HARD_REG_SET *mask, int interrupt_handler)
6562 {
6563 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
6564 int skip_fpscr = 0;
6565
6566 /* Push PR last; this gives better latencies after the prologue, and
6567 candidates for the return delay slot when there are no general
6568 registers pushed. */
6569 for (; i < FIRST_PSEUDO_REGISTER; i++)
6570 {
6571 /* If this is an interrupt handler, and the SZ bit varies,
6572 and we have to push any floating point register, we need
6573 to switch to the correct precision first. */
6574 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
6575 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
6576 {
6577 HARD_REG_SET unsaved;
6578
6579 push (FPSCR_REG);
6580 COMPL_HARD_REG_SET (unsaved, *mask);
6581 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
6582 skip_fpscr = 1;
6583 }
6584 if (i != PR_REG
6585 && (i != FPSCR_REG || ! skip_fpscr)
6586 && TEST_HARD_REG_BIT (*mask, i))
6587 {
6588 /* If the ISR has RESBANK attribute assigned, don't push any of
6589 the following registers - R0-R14, MACH, MACL and GBR. */
6590 if (! (sh_cfun_resbank_handler_p ()
6591 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
6592 || i == MACH_REG
6593 || i == MACL_REG
6594 || i == GBR_REG)))
6595 push (i);
6596 }
6597 }
6598
6599 /* Push banked registers last to improve delay slot opportunities. */
6600 if (interrupt_handler)
6601 {
6602 bool use_movml = false;
6603
6604 if (TARGET_SH2A)
6605 {
6606 unsigned int count = 0;
6607
6608 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6609 if (TEST_HARD_REG_BIT (*mask, i))
6610 count++;
6611 else
6612 break;
6613
6614 /* Use movml when all banked registers are pushed. */
6615 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
6616 use_movml = true;
6617 }
6618
6619 if (sh_cfun_resbank_handler_p ())
6620 ; /* Do nothing. */
6621 else if (use_movml)
6622 {
6623 rtx x, mem, reg, set;
6624 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
6625
6626 /* We must avoid scheduling multiple store insn with another
6627 insns. */
6628 emit_insn (gen_blockage ());
6629 x = gen_movml_push_banked (sp_reg);
6630 x = frame_insn (x);
6631 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6632 {
6633 mem = gen_rtx_MEM (SImode, plus_constant (Pmode, sp_reg, i * 4));
6634 reg = gen_rtx_REG (SImode, i);
6635 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (SImode, mem, reg));
6636 }
6637
6638 set = gen_rtx_SET (SImode, sp_reg,
6639 plus_constant (Pmode, sp_reg, - 32));
6640 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
6641 emit_insn (gen_blockage ());
6642 }
6643 else
6644 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6645 if (TEST_HARD_REG_BIT (*mask, i))
6646 push (i);
6647 }
6648
6649 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
6650 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
6651 push (PR_REG);
6652 }
6653
6654 /* Calculate how much extra space is needed to save all callee-saved
6655 target registers.
6656 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6657
6658 static int
6659 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
6660 {
6661 int reg;
6662 int stack_space = 0;
6663 int interrupt_handler = sh_cfun_interrupt_handler_p ();
6664
6665 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6666 if ((! call_really_used_regs[reg] || interrupt_handler)
6667 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6668 /* Leave space to save this target register on the stack,
6669 in case target register allocation wants to use it. */
6670 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6671 return stack_space;
6672 }
6673
6674 /* Decide whether we should reserve space for callee-save target registers,
6675 in case target register allocation wants to use them. REGS_SAVED is
6676 the space, in bytes, that is already required for register saves.
6677 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6678
6679 static int
6680 shmedia_reserve_space_for_target_registers_p (int regs_saved,
6681 HARD_REG_SET *live_regs_mask)
6682 {
6683 if (optimize_size)
6684 return 0;
6685 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
6686 }
6687
6688 /* Decide how much space to reserve for callee-save target registers
6689 in case target register allocation wants to use them.
6690 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6691
6692 static int
6693 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
6694 {
6695 if (shmedia_space_reserved_for_target_registers)
6696 return shmedia_target_regs_stack_space (live_regs_mask);
6697 else
6698 return 0;
6699 }
6700
6701 /* Work out the registers which need to be saved, both as a mask and a
6702 count of saved words. Return the count.
6703
6704 If doing a pragma interrupt function, then push all regs used by the
6705 function, and if we call another function (we can tell by looking at PR),
6706 make sure that all the regs it clobbers are safe too. */
6707
6708 static int
6709 calc_live_regs (HARD_REG_SET *live_regs_mask)
6710 {
6711 unsigned int reg;
6712 int count;
6713 tree attrs;
6714 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
6715 bool nosave_low_regs;
6716 int pr_live, has_call;
6717
6718 attrs = DECL_ATTRIBUTES (current_function_decl);
6719 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
6720 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
6721 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
6722 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
6723
6724 CLEAR_HARD_REG_SET (*live_regs_mask);
6725 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
6726 && df_regs_ever_live_p (FPSCR_REG))
6727 target_flags &= ~MASK_FPU_SINGLE;
6728 /* If we can save a lot of saves by switching to double mode, do that. */
6729 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
6730 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
6731 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
6732 && (! call_really_used_regs[reg]
6733 || interrupt_handler)
6734 && ++count > 2)
6735 {
6736 target_flags &= ~MASK_FPU_SINGLE;
6737 break;
6738 }
6739 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
6740 knows how to use it. That means the pseudo originally allocated for
6741 the initial value can become the PR_MEDIA_REG hard register, as seen for
6742 execute/20010122-1.c:test9. */
6743 if (TARGET_SHMEDIA)
6744 /* ??? this function is called from initial_elimination_offset, hence we
6745 can't use the result of sh_media_register_for_return here. */
6746 pr_live = sh_pr_n_sets ();
6747 else
6748 {
6749 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
6750 pr_live = (pr_initial
6751 ? (!REG_P (pr_initial)
6752 || REGNO (pr_initial) != (PR_REG))
6753 : df_regs_ever_live_p (PR_REG));
6754 /* For Shcompact, if not optimizing, we end up with a memory reference
6755 using the return address pointer for __builtin_return_address even
6756 though there is no actual need to put the PR register on the stack. */
6757 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
6758 }
6759 /* Force PR to be live if the prologue has to call the SHmedia
6760 argument decoder or register saver. */
6761 if (TARGET_SHCOMPACT
6762 && ((crtl->args.info.call_cookie
6763 & ~ CALL_COOKIE_RET_TRAMP (1))
6764 || crtl->saves_all_registers))
6765 pr_live = 1;
6766 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
6767 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
6768 {
6769 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
6770 ? pr_live
6771 : interrupt_handler
6772 ? (/* Need to save all the regs ever live. */
6773 (df_regs_ever_live_p (reg)
6774 || (call_really_used_regs[reg]
6775 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
6776 || reg == PIC_OFFSET_TABLE_REGNUM)
6777 && has_call)
6778 || (TARGET_SHMEDIA && has_call
6779 && REGISTER_NATURAL_MODE (reg) == SImode
6780 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
6781 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
6782 && reg != RETURN_ADDRESS_POINTER_REGNUM
6783 && reg != T_REG && reg != GBR_REG
6784 /* Push fpscr only on targets which have FPU */
6785 && (reg != FPSCR_REG || TARGET_FPU_ANY))
6786 : (/* Only push those regs which are used and need to be saved. */
6787 (TARGET_SHCOMPACT
6788 && flag_pic
6789 && crtl->args.info.call_cookie
6790 && reg == PIC_OFFSET_TABLE_REGNUM)
6791 || (df_regs_ever_live_p (reg)
6792 && ((!call_really_used_regs[reg]
6793 && !(reg != PIC_OFFSET_TABLE_REGNUM
6794 && fixed_regs[reg] && call_used_regs[reg]))
6795 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
6796 || (crtl->calls_eh_return
6797 && (reg == EH_RETURN_DATA_REGNO (0)
6798 || reg == EH_RETURN_DATA_REGNO (1)
6799 || reg == EH_RETURN_DATA_REGNO (2)
6800 || reg == EH_RETURN_DATA_REGNO (3)))
6801 || ((reg == MACL_REG || reg == MACH_REG)
6802 && df_regs_ever_live_p (reg)
6803 && sh_cfun_attr_renesas_p ())
6804 ))
6805 {
6806 SET_HARD_REG_BIT (*live_regs_mask, reg);
6807 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6808
6809 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
6810 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
6811 {
6812 if (FP_REGISTER_P (reg))
6813 {
6814 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
6815 {
6816 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
6817 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
6818 }
6819 }
6820 else if (XD_REGISTER_P (reg))
6821 {
6822 /* Must switch to double mode to access these registers. */
6823 target_flags &= ~MASK_FPU_SINGLE;
6824 }
6825 }
6826 }
6827 if (nosave_low_regs && reg == R8_REG)
6828 break;
6829 }
6830 /* If we have a target register optimization pass after prologue / epilogue
6831 threading, we need to assume all target registers will be live even if
6832 they aren't now. */
6833 if (flag_branch_target_load_optimize2
6834 && TARGET_SAVE_ALL_TARGET_REGS
6835 && shmedia_space_reserved_for_target_registers)
6836 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6837 if ((! call_really_used_regs[reg] || interrupt_handler)
6838 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6839 {
6840 SET_HARD_REG_BIT (*live_regs_mask, reg);
6841 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6842 }
6843 /* If this is an interrupt handler, we don't have any call-clobbered
6844 registers we can conveniently use for target register save/restore.
6845 Make sure we save at least one general purpose register when we need
6846 to save target registers. */
6847 if (interrupt_handler
6848 && hard_reg_set_intersect_p (*live_regs_mask,
6849 reg_class_contents[TARGET_REGS])
6850 && ! hard_reg_set_intersect_p (*live_regs_mask,
6851 reg_class_contents[GENERAL_REGS]))
6852 {
6853 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
6854 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
6855 }
6856
6857 return count;
6858 }
6859
6860 /* Code to generate prologue and epilogue sequences */
6861
6862 /* PUSHED is the number of bytes that are being pushed on the
6863 stack for register saves. Return the frame size, padded
6864 appropriately so that the stack stays properly aligned. */
6865 static HOST_WIDE_INT
6866 rounded_frame_size (int pushed)
6867 {
6868 HOST_WIDE_INT size = get_frame_size ();
6869 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6870
6871 if (ACCUMULATE_OUTGOING_ARGS)
6872 size += crtl->outgoing_args_size;
6873
6874 return ((size + pushed + align - 1) & -align) - pushed;
6875 }
6876
6877 /* Choose a call-clobbered target-branch register that remains
6878 unchanged along the whole function. We set it up as the return
6879 value in the prologue. */
6880 int
6881 sh_media_register_for_return (void)
6882 {
6883 int regno;
6884 int tr0_used;
6885
6886 if (! crtl->is_leaf)
6887 return -1;
6888 if (lookup_attribute ("interrupt_handler",
6889 DECL_ATTRIBUTES (current_function_decl)))
6890 return -1;
6891 if (sh_cfun_interrupt_handler_p ())
6892 return -1;
6893
6894 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
6895
6896 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
6897 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
6898 return regno;
6899
6900 return -1;
6901 }
6902
6903 /* The maximum registers we need to save are:
6904 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
6905 - 32 floating point registers (for each pair, we save none,
6906 one single precision value, or a double precision value).
6907 - 8 target registers
6908 - add 1 entry for a delimiter. */
6909 #define MAX_SAVED_REGS (62+32+8)
6910
6911 typedef struct save_entry_s
6912 {
6913 unsigned char reg;
6914 unsigned char mode;
6915 short offset;
6916 } save_entry;
6917
6918 #define MAX_TEMPS 4
6919
6920 /* There will be a delimiter entry with VOIDmode both at the start and the
6921 end of a filled in schedule. The end delimiter has the offset of the
6922 save with the smallest (i.e. most negative) offset. */
6923 typedef struct save_schedule_s
6924 {
6925 save_entry entries[MAX_SAVED_REGS + 2];
6926 int temps[MAX_TEMPS+1];
6927 } save_schedule;
6928
6929 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
6930 use reverse order. Returns the last entry written to (not counting
6931 the delimiter). OFFSET_BASE is a number to be added to all offset
6932 entries. */
6933
6934 static save_entry *
6935 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
6936 int offset_base)
6937 {
6938 int align, i;
6939 save_entry *entry = schedule->entries;
6940 int tmpx = 0;
6941 int offset;
6942
6943 if (! current_function_interrupt)
6944 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
6945 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
6946 && ! FUNCTION_ARG_REGNO_P (i)
6947 && i != FIRST_RET_REG
6948 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
6949 && ! (crtl->calls_eh_return
6950 && (i == EH_RETURN_STACKADJ_REGNO
6951 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
6952 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
6953 schedule->temps[tmpx++] = i;
6954 entry->reg = -1;
6955 entry->mode = VOIDmode;
6956 entry->offset = offset_base;
6957 entry++;
6958 /* We loop twice: first, we save 8-byte aligned registers in the
6959 higher addresses, that are known to be aligned. Then, we
6960 proceed to saving 32-bit registers that don't need 8-byte
6961 alignment.
6962 If this is an interrupt function, all registers that need saving
6963 need to be saved in full. moreover, we need to postpone saving
6964 target registers till we have saved some general purpose registers
6965 we can then use as scratch registers. */
6966 offset = offset_base;
6967 for (align = 1; align >= 0; align--)
6968 {
6969 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6970 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6971 {
6972 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6973 int reg = i;
6974
6975 if (current_function_interrupt)
6976 {
6977 if (TARGET_REGISTER_P (i))
6978 continue;
6979 if (GENERAL_REGISTER_P (i))
6980 mode = DImode;
6981 }
6982 if (mode == SFmode && (i % 2) == 1
6983 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6984 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
6985 {
6986 mode = DFmode;
6987 i--;
6988 reg--;
6989 }
6990
6991 /* If we're doing the aligned pass and this is not aligned,
6992 or we're doing the unaligned pass and this is aligned,
6993 skip it. */
6994 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
6995 != align)
6996 continue;
6997
6998 if (current_function_interrupt
6999 && GENERAL_REGISTER_P (i)
7000 && tmpx < MAX_TEMPS)
7001 schedule->temps[tmpx++] = i;
7002
7003 offset -= GET_MODE_SIZE (mode);
7004 entry->reg = i;
7005 entry->mode = mode;
7006 entry->offset = offset;
7007 entry++;
7008 }
7009 if (align && current_function_interrupt)
7010 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
7011 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
7012 {
7013 offset -= GET_MODE_SIZE (DImode);
7014 entry->reg = i;
7015 entry->mode = DImode;
7016 entry->offset = offset;
7017 entry++;
7018 }
7019 }
7020 entry->reg = -1;
7021 entry->mode = VOIDmode;
7022 entry->offset = offset;
7023 schedule->temps[tmpx] = -1;
7024 return entry - 1;
7025 }
7026
7027 void
7028 sh_expand_prologue (void)
7029 {
7030 HARD_REG_SET live_regs_mask;
7031 int d, i;
7032 int d_rounding = 0;
7033 int save_flags = target_flags;
7034 int pretend_args;
7035 int stack_usage;
7036 tree sp_switch_attr
7037 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
7038
7039 current_function_interrupt = sh_cfun_interrupt_handler_p ();
7040
7041 /* We have pretend args if we had an object sent partially in registers
7042 and partially on the stack, e.g. a large structure. */
7043 pretend_args = crtl->args.pretend_args_size;
7044 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
7045 && (NPARM_REGS(SImode)
7046 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
7047 pretend_args = 0;
7048
7049 output_stack_adjust (-pretend_args
7050 - crtl->args.info.stack_regs * 8,
7051 stack_pointer_rtx, 0, NULL, true);
7052 stack_usage = pretend_args + crtl->args.info.stack_regs * 8;
7053
7054 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
7055 /* We're going to use the PIC register to load the address of the
7056 incoming-argument decoder and/or of the return trampoline from
7057 the GOT, so make sure the PIC register is preserved and
7058 initialized. */
7059 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7060
7061 if (TARGET_SHCOMPACT
7062 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7063 {
7064 int reg;
7065
7066 /* First, make all registers with incoming arguments that will
7067 be pushed onto the stack live, so that register renaming
7068 doesn't overwrite them. */
7069 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
7070 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
7071 >= NPARM_REGS (SImode) - reg)
7072 for (; reg < NPARM_REGS (SImode); reg++)
7073 emit_insn (gen_shcompact_preserve_incoming_args
7074 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7075 else if (CALL_COOKIE_INT_REG_GET
7076 (crtl->args.info.call_cookie, reg) == 1)
7077 emit_insn (gen_shcompact_preserve_incoming_args
7078 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7079
7080 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
7081 stack_pointer_rtx);
7082 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
7083 GEN_INT (crtl->args.info.call_cookie));
7084 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
7085 gen_rtx_REG (SImode, R0_REG));
7086 }
7087 else if (TARGET_SHMEDIA)
7088 {
7089 int tr = sh_media_register_for_return ();
7090
7091 if (tr >= 0)
7092 emit_move_insn (gen_rtx_REG (DImode, tr),
7093 gen_rtx_REG (DImode, PR_MEDIA_REG));
7094 }
7095
7096 /* Emit the code for SETUP_VARARGS. */
7097 if (cfun->stdarg)
7098 {
7099 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7100 {
7101 /* Push arg regs as if they'd been provided by caller in stack. */
7102 for (i = 0; i < NPARM_REGS(SImode); i++)
7103 {
7104 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
7105
7106 if (i >= (NPARM_REGS(SImode)
7107 - crtl->args.info.arg_count[(int) SH_ARG_INT]
7108 ))
7109 break;
7110 push (rn);
7111 stack_usage += GET_MODE_SIZE (SImode);
7112 }
7113 }
7114 }
7115
7116 /* If we're supposed to switch stacks at function entry, do so now. */
7117 if (sp_switch_attr)
7118 {
7119 rtx lab, newsrc;
7120 /* The argument specifies a variable holding the address of the
7121 stack the interrupt function should switch to/from at entry/exit. */
7122 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
7123 const char *s
7124 = ggc_strdup (TREE_STRING_POINTER (arg));
7125 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
7126
7127 lab = add_constant (sp_switch, SImode, 0);
7128 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
7129 newsrc = gen_const_mem (SImode, newsrc);
7130
7131 emit_insn (gen_sp_switch_1 (newsrc));
7132 }
7133
7134 d = calc_live_regs (&live_regs_mask);
7135 /* ??? Maybe we could save some switching if we can move a mode switch
7136 that already happens to be at the function start into the prologue. */
7137 if (target_flags != save_flags && ! current_function_interrupt)
7138 emit_insn (gen_toggle_sz ());
7139
7140 if (TARGET_SH5)
7141 {
7142 int offset_base, offset;
7143 rtx r0 = NULL_RTX;
7144 int offset_in_r0 = -1;
7145 int sp_in_r0 = 0;
7146 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7147 int total_size, save_size;
7148 save_schedule schedule;
7149 save_entry *entry;
7150 int *tmp_pnt;
7151
7152 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
7153 && ! current_function_interrupt)
7154 r0 = gen_rtx_REG (Pmode, R0_REG);
7155
7156 /* D is the actual number of bytes that we need for saving registers,
7157 however, in initial_elimination_offset we have committed to using
7158 an additional TREGS_SPACE amount of bytes - in order to keep both
7159 addresses to arguments supplied by the caller and local variables
7160 valid, we must keep this gap. Place it between the incoming
7161 arguments and the actually saved registers in a bid to optimize
7162 locality of reference. */
7163 total_size = d + tregs_space;
7164 total_size += rounded_frame_size (total_size);
7165 save_size = total_size - rounded_frame_size (d);
7166 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
7167 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7168 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
7169
7170 /* If adjusting the stack in a single step costs nothing extra, do so.
7171 I.e. either if a single addi is enough, or we need a movi anyway,
7172 and we don't exceed the maximum offset range (the test for the
7173 latter is conservative for simplicity). */
7174 if (TARGET_SHMEDIA
7175 && (CONST_OK_FOR_I10 (-total_size)
7176 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
7177 && total_size <= 2044)))
7178 d_rounding = total_size - save_size;
7179
7180 offset_base = d + d_rounding;
7181
7182 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
7183 0, NULL, true);
7184 stack_usage += save_size + d_rounding;
7185
7186 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
7187 tmp_pnt = schedule.temps;
7188 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7189 {
7190 enum machine_mode mode = (enum machine_mode) entry->mode;
7191 unsigned int reg = entry->reg;
7192 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
7193 rtx orig_reg_rtx;
7194
7195 offset = entry->offset;
7196
7197 reg_rtx = gen_rtx_REG (mode, reg);
7198
7199 mem_rtx = gen_frame_mem (mode,
7200 gen_rtx_PLUS (Pmode,
7201 stack_pointer_rtx,
7202 GEN_INT (offset)));
7203
7204 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7205 {
7206 gcc_assert (r0);
7207 mem_rtx = NULL_RTX;
7208 }
7209
7210 if (HAVE_PRE_DECREMENT
7211 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
7212 || mem_rtx == NULL_RTX
7213 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7214 {
7215 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
7216
7217 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
7218 pre_dec = NULL_RTX;
7219 else
7220 {
7221 mem_rtx = NULL_RTX;
7222 offset += GET_MODE_SIZE (mode);
7223 }
7224 }
7225
7226 if (mem_rtx != NULL_RTX)
7227 goto addr_ok;
7228
7229 if (offset_in_r0 == -1)
7230 {
7231 emit_move_insn (r0, GEN_INT (offset));
7232 offset_in_r0 = offset;
7233 }
7234 else if (offset != offset_in_r0)
7235 {
7236 emit_move_insn (r0,
7237 gen_rtx_PLUS
7238 (Pmode, r0,
7239 GEN_INT (offset - offset_in_r0)));
7240 offset_in_r0 += offset - offset_in_r0;
7241 }
7242
7243 if (pre_dec != NULL_RTX)
7244 {
7245 if (! sp_in_r0)
7246 {
7247 emit_move_insn (r0,
7248 gen_rtx_PLUS
7249 (Pmode, r0, stack_pointer_rtx));
7250 sp_in_r0 = 1;
7251 }
7252
7253 offset -= GET_MODE_SIZE (mode);
7254 offset_in_r0 -= GET_MODE_SIZE (mode);
7255
7256 mem_rtx = pre_dec;
7257 }
7258 else if (sp_in_r0)
7259 mem_rtx = gen_frame_mem (mode, r0);
7260 else
7261 mem_rtx = gen_frame_mem (mode,
7262 gen_rtx_PLUS (Pmode,
7263 stack_pointer_rtx,
7264 r0));
7265
7266 /* We must not use an r0-based address for target-branch
7267 registers or for special registers without pre-dec
7268 memory addresses, since we store their values in r0
7269 first. */
7270 gcc_assert (!TARGET_REGISTER_P (reg)
7271 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7272 || mem_rtx == pre_dec));
7273
7274 addr_ok:
7275 orig_reg_rtx = reg_rtx;
7276 if (TARGET_REGISTER_P (reg)
7277 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7278 && mem_rtx != pre_dec))
7279 {
7280 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
7281
7282 emit_move_insn (tmp_reg, reg_rtx);
7283
7284 if (REGNO (tmp_reg) == R0_REG)
7285 {
7286 offset_in_r0 = -1;
7287 sp_in_r0 = 0;
7288 gcc_assert (!refers_to_regno_p
7289 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
7290 }
7291
7292 if (*++tmp_pnt <= 0)
7293 tmp_pnt = schedule.temps;
7294
7295 reg_rtx = tmp_reg;
7296 }
7297 {
7298 rtx insn;
7299
7300 /* Mark as interesting for dwarf cfi generator */
7301 insn = emit_move_insn (mem_rtx, reg_rtx);
7302 RTX_FRAME_RELATED_P (insn) = 1;
7303 /* If we use an intermediate register for the save, we can't
7304 describe this exactly in cfi as a copy of the to-be-saved
7305 register into the temporary register and then the temporary
7306 register on the stack, because the temporary register can
7307 have a different natural size than the to-be-saved register.
7308 Thus, we gloss over the intermediate copy and pretend we do
7309 a direct save from the to-be-saved register. */
7310 if (REGNO (reg_rtx) != reg)
7311 {
7312 rtx set;
7313
7314 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
7315 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7316 }
7317
7318 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
7319 {
7320 rtx reg_rtx = gen_rtx_REG (mode, reg);
7321 rtx set;
7322 rtx mem_rtx = gen_frame_mem (mode,
7323 gen_rtx_PLUS (Pmode,
7324 stack_pointer_rtx,
7325 GEN_INT (offset)));
7326
7327 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
7328 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7329 }
7330 }
7331 }
7332
7333 gcc_assert (entry->offset == d_rounding);
7334 }
7335 else
7336 {
7337 push_regs (&live_regs_mask, current_function_interrupt);
7338 stack_usage += d;
7339 }
7340
7341 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7342 emit_insn (gen_GOTaddr2picreg ());
7343
7344 if (SHMEDIA_REGS_STACK_ADJUST ())
7345 {
7346 /* This must NOT go through the PLT, otherwise mach and macl
7347 may be clobbered. */
7348 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7349 (TARGET_FPU_ANY
7350 ? "__GCC_push_shmedia_regs"
7351 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7352 emit_insn (gen_shmedia_save_restore_regs_compact
7353 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7354 }
7355
7356 if (target_flags != save_flags && ! current_function_interrupt)
7357 emit_insn (gen_toggle_sz ());
7358
7359 target_flags = save_flags;
7360
7361 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7362 stack_pointer_rtx, 0, NULL, true);
7363 stack_usage += rounded_frame_size (d) - d_rounding;
7364
7365 if (frame_pointer_needed)
7366 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7367
7368 if (TARGET_SHCOMPACT
7369 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7370 {
7371 /* This must NOT go through the PLT, otherwise mach and macl
7372 may be clobbered. */
7373 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7374 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7375 emit_insn (gen_shcompact_incoming_args ());
7376 }
7377
7378 /* If we are profiling, make sure no instructions are scheduled before
7379 the call to mcount. Similarly if some call instructions are swapped
7380 before frame related insns, it'll confuse the unwinder because
7381 currently SH has no unwind info for function epilogues. */
7382 if (crtl->profile || flag_exceptions || flag_unwind_tables)
7383 emit_insn (gen_blockage ());
7384
7385 if (flag_stack_usage_info)
7386 current_function_static_stack_size = stack_usage;
7387 }
7388
7389 void
7390 sh_expand_epilogue (bool sibcall_p)
7391 {
7392 HARD_REG_SET live_regs_mask;
7393 int d, i;
7394 int d_rounding = 0;
7395
7396 int save_flags = target_flags;
7397 int frame_size, save_size;
7398 int fpscr_deferred = 0;
7399 int e = sibcall_p ? -1 : 1;
7400
7401 d = calc_live_regs (&live_regs_mask);
7402
7403 save_size = d;
7404 frame_size = rounded_frame_size (d);
7405
7406 if (TARGET_SH5)
7407 {
7408 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7409 int total_size;
7410 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
7411 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7412 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
7413
7414 total_size = d + tregs_space;
7415 total_size += rounded_frame_size (total_size);
7416 save_size = total_size - frame_size;
7417
7418 /* If adjusting the stack in a single step costs nothing extra, do so.
7419 I.e. either if a single addi is enough, or we need a movi anyway,
7420 and we don't exceed the maximum offset range (the test for the
7421 latter is conservative for simplicity). */
7422 if (TARGET_SHMEDIA
7423 && ! frame_pointer_needed
7424 && (CONST_OK_FOR_I10 (total_size)
7425 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
7426 && total_size <= 2044)))
7427 d_rounding = frame_size;
7428
7429 frame_size -= d_rounding;
7430 }
7431
7432 if (frame_pointer_needed)
7433 {
7434 /* We must avoid scheduling the epilogue with previous basic blocks.
7435 See PR/18032 and PR/40313. */
7436 emit_insn (gen_blockage ());
7437 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7438 &live_regs_mask, true);
7439
7440 /* We must avoid moving the stack pointer adjustment past code
7441 which reads from the local frame, else an interrupt could
7442 occur after the SP adjustment and clobber data in the local
7443 frame. */
7444 emit_insn (gen_blockage ());
7445 frame_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7446 }
7447 else if (frame_size)
7448 {
7449 /* We must avoid moving the stack pointer adjustment past code
7450 which reads from the local frame, else an interrupt could
7451 occur after the SP adjustment and clobber data in the local
7452 frame. */
7453 emit_insn (gen_blockage ());
7454 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7455 &live_regs_mask, true);
7456 }
7457
7458 if (SHMEDIA_REGS_STACK_ADJUST ())
7459 {
7460 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7461 (TARGET_FPU_ANY
7462 ? "__GCC_pop_shmedia_regs"
7463 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
7464 /* This must NOT go through the PLT, otherwise mach and macl
7465 may be clobbered. */
7466 emit_insn (gen_shmedia_save_restore_regs_compact
7467 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
7468 }
7469
7470 /* Pop all the registers. */
7471
7472 if (target_flags != save_flags && ! current_function_interrupt)
7473 emit_insn (gen_toggle_sz ());
7474 if (TARGET_SH5)
7475 {
7476 int offset_base, offset;
7477 int offset_in_r0 = -1;
7478 int sp_in_r0 = 0;
7479 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
7480 save_schedule schedule;
7481 save_entry *entry;
7482 int *tmp_pnt;
7483
7484 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
7485 offset_base = -entry[1].offset + d_rounding;
7486 tmp_pnt = schedule.temps;
7487 for (; entry->mode != VOIDmode; entry--)
7488 {
7489 enum machine_mode mode = (enum machine_mode) entry->mode;
7490 int reg = entry->reg;
7491 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX;
7492
7493 offset = offset_base + entry->offset;
7494 reg_rtx = gen_rtx_REG (mode, reg);
7495
7496 mem_rtx = gen_frame_mem (mode,
7497 gen_rtx_PLUS (Pmode,
7498 stack_pointer_rtx,
7499 GEN_INT (offset)));
7500
7501 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7502 mem_rtx = NULL_RTX;
7503
7504 if (HAVE_POST_INCREMENT
7505 && (offset == offset_in_r0
7506 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
7507 && mem_rtx == NULL_RTX)
7508 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7509 {
7510 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
7511
7512 if (!memory_address_p (mode, XEXP (post_inc, 0)))
7513 post_inc = NULL_RTX;
7514 else
7515 mem_rtx = NULL_RTX;
7516 }
7517
7518 if (mem_rtx != NULL_RTX)
7519 goto addr_ok;
7520
7521 if (offset_in_r0 == -1)
7522 {
7523 emit_move_insn (r0, GEN_INT (offset));
7524 offset_in_r0 = offset;
7525 }
7526 else if (offset != offset_in_r0)
7527 {
7528 emit_move_insn (r0,
7529 gen_rtx_PLUS
7530 (Pmode, r0,
7531 GEN_INT (offset - offset_in_r0)));
7532 offset_in_r0 += offset - offset_in_r0;
7533 }
7534
7535 if (post_inc != NULL_RTX)
7536 {
7537 if (! sp_in_r0)
7538 {
7539 emit_move_insn (r0,
7540 gen_rtx_PLUS
7541 (Pmode, r0, stack_pointer_rtx));
7542 sp_in_r0 = 1;
7543 }
7544
7545 mem_rtx = post_inc;
7546
7547 offset_in_r0 += GET_MODE_SIZE (mode);
7548 }
7549 else if (sp_in_r0)
7550 mem_rtx = gen_frame_mem (mode, r0);
7551 else
7552 mem_rtx = gen_frame_mem (mode,
7553 gen_rtx_PLUS (Pmode,
7554 stack_pointer_rtx,
7555 r0));
7556
7557 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7558 || mem_rtx == post_inc);
7559
7560 addr_ok:
7561 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7562 && mem_rtx != post_inc)
7563 {
7564 emit_move_insn (r0, mem_rtx);
7565 mem_rtx = r0;
7566 }
7567 else if (TARGET_REGISTER_P (reg))
7568 {
7569 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
7570
7571 /* Give the scheduler a bit of freedom by using up to
7572 MAX_TEMPS registers in a round-robin fashion. */
7573 emit_move_insn (tmp_reg, mem_rtx);
7574 mem_rtx = tmp_reg;
7575 if (*++tmp_pnt < 0)
7576 tmp_pnt = schedule.temps;
7577 }
7578
7579 emit_move_insn (reg_rtx, mem_rtx);
7580 }
7581
7582 gcc_assert (entry->offset + offset_base == d + d_rounding);
7583 }
7584 else /* ! TARGET_SH5 */
7585 {
7586 int last_reg;
7587
7588 save_size = 0;
7589 /* For an ISR with RESBANK attribute assigned, don't pop PR
7590 register. */
7591 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
7592 && !sh_cfun_resbank_handler_p ())
7593 {
7594 if (!frame_pointer_needed)
7595 emit_insn (gen_blockage ());
7596 pop (PR_REG);
7597 }
7598
7599 /* Banked registers are popped first to avoid being scheduled in the
7600 delay slot. RTE switches banks before the ds instruction. */
7601 if (current_function_interrupt)
7602 {
7603 bool use_movml = false;
7604
7605 if (TARGET_SH2A)
7606 {
7607 unsigned int count = 0;
7608
7609 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7610 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7611 count++;
7612 else
7613 break;
7614
7615 /* Use movml when all banked register are poped. */
7616 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7617 use_movml = true;
7618 }
7619
7620 if (sh_cfun_resbank_handler_p ())
7621 ; /* Do nothing. */
7622 else if (use_movml)
7623 {
7624 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7625
7626 /* We must avoid scheduling multiple load insn with another
7627 insns. */
7628 emit_insn (gen_blockage ());
7629 emit_insn (gen_movml_pop_banked (sp_reg));
7630 emit_insn (gen_blockage ());
7631 }
7632 else
7633 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
7634 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7635 pop (i);
7636
7637 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
7638 }
7639 else
7640 last_reg = FIRST_PSEUDO_REGISTER;
7641
7642 for (i = 0; i < last_reg; i++)
7643 {
7644 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
7645
7646 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
7647 && hard_reg_set_intersect_p (live_regs_mask,
7648 reg_class_contents[DF_REGS]))
7649 fpscr_deferred = 1;
7650 /* For an ISR with RESBANK attribute assigned, don't pop
7651 following registers, R0-R14, MACH, MACL and GBR. */
7652 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
7653 && ! (sh_cfun_resbank_handler_p ()
7654 && ((j >= FIRST_GENERAL_REG
7655 && j < LAST_GENERAL_REG)
7656 || j == MACH_REG
7657 || j == MACL_REG
7658 || j == GBR_REG)))
7659 pop (j);
7660
7661 if (j == FIRST_FP_REG && fpscr_deferred)
7662 pop (FPSCR_REG);
7663 }
7664 }
7665 if (target_flags != save_flags && ! current_function_interrupt)
7666 emit_insn (gen_toggle_sz ());
7667 target_flags = save_flags;
7668
7669 output_stack_adjust (crtl->args.pretend_args_size
7670 + save_size + d_rounding
7671 + crtl->args.info.stack_regs * 8,
7672 stack_pointer_rtx, e, NULL, true);
7673
7674 if (crtl->calls_eh_return)
7675 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
7676 EH_RETURN_STACKADJ_RTX));
7677
7678 /* Switch back to the normal stack if necessary. */
7679 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
7680 emit_insn (gen_sp_switch_2 ());
7681
7682 /* Tell flow the insn that pops PR isn't dead. */
7683 /* PR_REG will never be live in SHmedia mode, and we don't need to
7684 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
7685 by the return pattern. */
7686 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7687 emit_use (gen_rtx_REG (SImode, PR_REG));
7688 }
7689
7690 static int sh_need_epilogue_known = 0;
7691
7692 bool
7693 sh_need_epilogue (void)
7694 {
7695 if (! sh_need_epilogue_known)
7696 {
7697 rtx epilogue;
7698
7699 start_sequence ();
7700 sh_expand_epilogue (0);
7701 epilogue = get_insns ();
7702 end_sequence ();
7703 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
7704 }
7705 return sh_need_epilogue_known > 0;
7706 }
7707
7708 /* Emit code to change the current function's return address to RA.
7709 TEMP is available as a scratch register, if needed. */
7710
7711 void
7712 sh_set_return_address (rtx ra, rtx tmp)
7713 {
7714 HARD_REG_SET live_regs_mask;
7715 int d;
7716 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7717 int pr_offset;
7718
7719 d = calc_live_regs (&live_regs_mask);
7720
7721 /* If pr_reg isn't life, we can set it (or the register given in
7722 sh_media_register_for_return) directly. */
7723 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7724 {
7725 rtx rr;
7726
7727 if (TARGET_SHMEDIA)
7728 {
7729 int rr_regno = sh_media_register_for_return ();
7730
7731 if (rr_regno < 0)
7732 rr_regno = pr_reg;
7733
7734 rr = gen_rtx_REG (DImode, rr_regno);
7735 }
7736 else
7737 rr = gen_rtx_REG (SImode, pr_reg);
7738
7739 emit_insn (GEN_MOV (rr, ra));
7740 /* Tell flow the register for return isn't dead. */
7741 emit_use (rr);
7742 return;
7743 }
7744
7745 if (TARGET_SH5)
7746 {
7747 int offset;
7748 save_schedule schedule;
7749 save_entry *entry;
7750
7751 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
7752 offset = entry[1].offset;
7753 for (; entry->mode != VOIDmode; entry--)
7754 if (entry->reg == pr_reg)
7755 goto found;
7756
7757 /* We can't find pr register. */
7758 gcc_unreachable ();
7759
7760 found:
7761 offset = entry->offset - offset;
7762 pr_offset = (rounded_frame_size (d) + offset
7763 + SHMEDIA_REGS_STACK_ADJUST ());
7764 }
7765 else
7766 pr_offset = rounded_frame_size (d);
7767
7768 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
7769
7770 if (frame_pointer_needed)
7771 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
7772 else
7773 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
7774
7775 tmp = gen_frame_mem (Pmode, tmp);
7776 emit_insn (GEN_MOV (tmp, ra));
7777 /* Tell this store isn't dead. */
7778 emit_use (tmp);
7779 }
7780
7781 /* Clear variables at function end. */
7782
7783 static void
7784 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
7785 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
7786 {
7787 sh_need_epilogue_known = 0;
7788 }
7789
7790 static rtx
7791 sh_builtin_saveregs (void)
7792 {
7793 /* First unnamed integer register. */
7794 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
7795 /* Number of integer registers we need to save. */
7796 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
7797 /* First unnamed SFmode float reg */
7798 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
7799 /* Number of SFmode float regs to save. */
7800 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
7801 rtx regbuf, fpregs;
7802 int bufsize, regno;
7803 alias_set_type alias_set;
7804
7805 if (TARGET_SH5)
7806 {
7807 if (n_intregs)
7808 {
7809 int pushregs = n_intregs;
7810
7811 while (pushregs < NPARM_REGS (SImode) - 1
7812 && (CALL_COOKIE_INT_REG_GET
7813 (crtl->args.info.call_cookie,
7814 NPARM_REGS (SImode) - pushregs)
7815 == 1))
7816 {
7817 crtl->args.info.call_cookie
7818 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7819 - pushregs, 1);
7820 pushregs++;
7821 }
7822
7823 if (pushregs == NPARM_REGS (SImode))
7824 crtl->args.info.call_cookie
7825 |= (CALL_COOKIE_INT_REG (0, 1)
7826 | CALL_COOKIE_STACKSEQ (pushregs - 1));
7827 else
7828 crtl->args.info.call_cookie
7829 |= CALL_COOKIE_STACKSEQ (pushregs);
7830
7831 crtl->args.pretend_args_size += 8 * n_intregs;
7832 }
7833 if (TARGET_SHCOMPACT)
7834 return const0_rtx;
7835 }
7836
7837 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
7838 {
7839 error ("__builtin_saveregs not supported by this subtarget");
7840 return const0_rtx;
7841 }
7842
7843 if (TARGET_SHMEDIA)
7844 n_floatregs = 0;
7845
7846 /* Allocate block of memory for the regs. */
7847 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
7848 Or can assign_stack_local accept a 0 SIZE argument? */
7849 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
7850
7851 if (TARGET_SHMEDIA)
7852 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
7853 else if (n_floatregs & 1)
7854 {
7855 rtx addr;
7856
7857 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7858 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
7859 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
7860 regbuf = change_address (regbuf, BLKmode, addr);
7861 }
7862 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
7863 {
7864 rtx addr, mask;
7865
7866 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7867 addr = copy_to_mode_reg (Pmode, plus_constant (Pmode,
7868 XEXP (regbuf, 0), 4));
7869 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
7870 emit_insn (gen_andsi3 (addr, addr, mask));
7871 regbuf = change_address (regbuf, BLKmode, addr);
7872 }
7873 else
7874 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7875 alias_set = get_varargs_alias_set ();
7876 set_mem_alias_set (regbuf, alias_set);
7877
7878 /* Save int args.
7879 This is optimized to only save the regs that are necessary. Explicitly
7880 named args need not be saved. */
7881 if (n_intregs > 0)
7882 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7883 adjust_address (regbuf, BLKmode,
7884 n_floatregs * UNITS_PER_WORD),
7885 n_intregs);
7886
7887 if (TARGET_SHMEDIA)
7888 /* Return the address of the regbuf. */
7889 return XEXP (regbuf, 0);
7890
7891 /* Save float args.
7892 This is optimized to only save the regs that are necessary. Explicitly
7893 named args need not be saved.
7894 We explicitly build a pointer to the buffer because it halves the insn
7895 count when not optimizing (otherwise the pointer is built for each reg
7896 saved).
7897 We emit the moves in reverse order so that we can use predecrement. */
7898
7899 fpregs = copy_to_mode_reg (Pmode,
7900 plus_constant (Pmode, XEXP (regbuf, 0),
7901 n_floatregs * UNITS_PER_WORD));
7902 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7903 {
7904 rtx mem;
7905 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7906 {
7907 emit_insn (gen_addsi3 (fpregs, fpregs,
7908 GEN_INT (-2 * UNITS_PER_WORD)));
7909 mem = change_address (regbuf, DFmode, fpregs);
7910 emit_move_insn (mem,
7911 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7912 }
7913 regno = first_floatreg;
7914 if (regno & 1)
7915 {
7916 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7917 mem = change_address (regbuf, SFmode, fpregs);
7918 emit_move_insn (mem,
7919 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
7920 - (TARGET_LITTLE_ENDIAN != 0)));
7921 }
7922 }
7923 else
7924 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7925 {
7926 rtx mem;
7927
7928 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7929 mem = change_address (regbuf, SFmode, fpregs);
7930 emit_move_insn (mem,
7931 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7932 }
7933
7934 /* Return the address of the regbuf. */
7935 return XEXP (regbuf, 0);
7936 }
7937
7938 /* Define the `__builtin_va_list' type for the ABI. */
7939
7940 static tree
7941 sh_build_builtin_va_list (void)
7942 {
7943 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7944 tree record, type_decl;
7945
7946 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
7947 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7948 return ptr_type_node;
7949
7950 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7951 type_decl = build_decl (BUILTINS_LOCATION,
7952 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7953
7954 f_next_o = build_decl (BUILTINS_LOCATION,
7955 FIELD_DECL, get_identifier ("__va_next_o"),
7956 ptr_type_node);
7957 f_next_o_limit = build_decl (BUILTINS_LOCATION,
7958 FIELD_DECL,
7959 get_identifier ("__va_next_o_limit"),
7960 ptr_type_node);
7961 f_next_fp = build_decl (BUILTINS_LOCATION,
7962 FIELD_DECL, get_identifier ("__va_next_fp"),
7963 ptr_type_node);
7964 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
7965 FIELD_DECL,
7966 get_identifier ("__va_next_fp_limit"),
7967 ptr_type_node);
7968 f_next_stack = build_decl (BUILTINS_LOCATION,
7969 FIELD_DECL, get_identifier ("__va_next_stack"),
7970 ptr_type_node);
7971
7972 DECL_FIELD_CONTEXT (f_next_o) = record;
7973 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7974 DECL_FIELD_CONTEXT (f_next_fp) = record;
7975 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7976 DECL_FIELD_CONTEXT (f_next_stack) = record;
7977
7978 TYPE_STUB_DECL (record) = type_decl;
7979 TYPE_NAME (record) = type_decl;
7980 TYPE_FIELDS (record) = f_next_o;
7981 DECL_CHAIN (f_next_o) = f_next_o_limit;
7982 DECL_CHAIN (f_next_o_limit) = f_next_fp;
7983 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
7984 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
7985
7986 layout_type (record);
7987
7988 return record;
7989 }
7990
7991 /* Implement `va_start' for varargs and stdarg. */
7992
7993 static void
7994 sh_va_start (tree valist, rtx nextarg)
7995 {
7996 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7997 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7998 tree t, u;
7999 int nfp, nint;
8000
8001 if (TARGET_SH5)
8002 {
8003 expand_builtin_saveregs ();
8004 std_expand_builtin_va_start (valist, nextarg);
8005 return;
8006 }
8007
8008 if ((! TARGET_SH2E && ! TARGET_SH4)
8009 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
8010 {
8011 std_expand_builtin_va_start (valist, nextarg);
8012 return;
8013 }
8014
8015 f_next_o = TYPE_FIELDS (va_list_type_node);
8016 f_next_o_limit = DECL_CHAIN (f_next_o);
8017 f_next_fp = DECL_CHAIN (f_next_o_limit);
8018 f_next_fp_limit = DECL_CHAIN (f_next_fp);
8019 f_next_stack = DECL_CHAIN (f_next_fp_limit);
8020
8021 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
8022 NULL_TREE);
8023 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
8024 valist, f_next_o_limit, NULL_TREE);
8025 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
8026 NULL_TREE);
8027 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
8028 valist, f_next_fp_limit, NULL_TREE);
8029 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
8030 valist, f_next_stack, NULL_TREE);
8031
8032 /* Call __builtin_saveregs. */
8033 u = make_tree (sizetype, expand_builtin_saveregs ());
8034 u = fold_convert (ptr_type_node, u);
8035 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
8036 TREE_SIDE_EFFECTS (t) = 1;
8037 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8038
8039 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
8040 if (nfp < 8)
8041 nfp = 8 - nfp;
8042 else
8043 nfp = 0;
8044 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nfp);
8045 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
8046 TREE_SIDE_EFFECTS (t) = 1;
8047 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8048
8049 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
8050 TREE_SIDE_EFFECTS (t) = 1;
8051 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8052
8053 nint = crtl->args.info.arg_count[SH_ARG_INT];
8054 if (nint < 4)
8055 nint = 4 - nint;
8056 else
8057 nint = 0;
8058 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nint);
8059 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
8060 TREE_SIDE_EFFECTS (t) = 1;
8061 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8062
8063 u = make_tree (ptr_type_node, nextarg);
8064 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
8065 TREE_SIDE_EFFECTS (t) = 1;
8066 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8067 }
8068
8069 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
8070 member, return it. */
8071 static tree
8072 find_sole_member (tree type)
8073 {
8074 tree field, member = NULL_TREE;
8075
8076 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8077 {
8078 if (TREE_CODE (field) != FIELD_DECL)
8079 continue;
8080 if (!DECL_SIZE (field))
8081 return NULL_TREE;
8082 if (integer_zerop (DECL_SIZE (field)))
8083 continue;
8084 if (member)
8085 return NULL_TREE;
8086 member = field;
8087 }
8088 return member;
8089 }
8090 /* Implement `va_arg'. */
8091
8092 static tree
8093 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
8094 gimple_seq *post_p ATTRIBUTE_UNUSED)
8095 {
8096 HOST_WIDE_INT size, rsize;
8097 tree tmp, pptr_type_node;
8098 tree addr, lab_over = NULL, result = NULL;
8099 bool pass_by_ref;
8100 tree eff_type;
8101
8102 if (!VOID_TYPE_P (type))
8103 pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
8104 else
8105 pass_by_ref = false;
8106
8107 if (pass_by_ref)
8108 type = build_pointer_type (type);
8109
8110 size = int_size_in_bytes (type);
8111 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
8112 pptr_type_node = build_pointer_type (ptr_type_node);
8113
8114 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
8115 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
8116 {
8117 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8118 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
8119 int pass_as_float;
8120 tree lab_false;
8121 tree member;
8122
8123 f_next_o = TYPE_FIELDS (va_list_type_node);
8124 f_next_o_limit = DECL_CHAIN (f_next_o);
8125 f_next_fp = DECL_CHAIN (f_next_o_limit);
8126 f_next_fp_limit = DECL_CHAIN (f_next_fp);
8127 f_next_stack = DECL_CHAIN (f_next_fp_limit);
8128
8129 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
8130 NULL_TREE);
8131 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
8132 valist, f_next_o_limit, NULL_TREE);
8133 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
8134 valist, f_next_fp, NULL_TREE);
8135 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
8136 valist, f_next_fp_limit, NULL_TREE);
8137 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
8138 valist, f_next_stack, NULL_TREE);
8139
8140 /* Structures with a single member with a distinct mode are passed
8141 like their member. This is relevant if the latter has a REAL_TYPE
8142 or COMPLEX_TYPE type. */
8143 eff_type = type;
8144 while (TREE_CODE (eff_type) == RECORD_TYPE
8145 && (member = find_sole_member (eff_type))
8146 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
8147 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
8148 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
8149 {
8150 tree field_type = TREE_TYPE (member);
8151
8152 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
8153 eff_type = field_type;
8154 else
8155 {
8156 gcc_assert ((TYPE_ALIGN (eff_type)
8157 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
8158 || (TYPE_ALIGN (eff_type)
8159 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
8160 break;
8161 }
8162 }
8163
8164 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
8165 {
8166 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
8167 || (TREE_CODE (eff_type) == COMPLEX_TYPE
8168 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
8169 && size <= 16));
8170 }
8171 else
8172 {
8173 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
8174 }
8175
8176 addr = create_tmp_var (pptr_type_node, NULL);
8177 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8178 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8179
8180 valist = build_simple_mem_ref (addr);
8181
8182 if (pass_as_float)
8183 {
8184 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
8185 tree cmp;
8186 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
8187
8188 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
8189 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8190
8191 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
8192 tmp = next_fp_limit;
8193 if (size > 4 && !is_double)
8194 tmp = fold_build_pointer_plus_hwi (unshare_expr (tmp), 4 - size);
8195 tmp = build2 (GE_EXPR, boolean_type_node,
8196 unshare_expr (next_fp_tmp), unshare_expr (tmp));
8197 cmp = build3 (COND_EXPR, void_type_node, tmp,
8198 build1 (GOTO_EXPR, void_type_node,
8199 unshare_expr (lab_false)), NULL_TREE);
8200 if (!is_double)
8201 gimplify_and_add (cmp, pre_p);
8202
8203 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
8204 || (is_double || size == 16))
8205 {
8206 tmp = fold_convert (sizetype, next_fp_tmp);
8207 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
8208 size_int (UNITS_PER_WORD));
8209 tmp = fold_build_pointer_plus (unshare_expr (next_fp_tmp), tmp);
8210 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
8211 }
8212 if (is_double)
8213 gimplify_and_add (cmp, pre_p);
8214
8215 #ifdef FUNCTION_ARG_SCmode_WART
8216 if (TYPE_MODE (eff_type) == SCmode
8217 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
8218 {
8219 tree subtype = TREE_TYPE (eff_type);
8220 tree real, imag;
8221
8222 imag
8223 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8224 imag = get_initialized_tmp_var (imag, pre_p, NULL);
8225
8226 real
8227 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8228 real = get_initialized_tmp_var (real, pre_p, NULL);
8229
8230 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
8231 if (type != eff_type)
8232 result = build1 (VIEW_CONVERT_EXPR, type, result);
8233 result = get_initialized_tmp_var (result, pre_p, NULL);
8234 }
8235 #endif /* FUNCTION_ARG_SCmode_WART */
8236
8237 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8238 gimplify_and_add (tmp, pre_p);
8239
8240 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8241 gimplify_and_add (tmp, pre_p);
8242
8243 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8244 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8245 gimplify_assign (unshare_expr (next_fp_tmp),
8246 unshare_expr (valist), pre_p);
8247
8248 gimplify_assign (unshare_expr (valist),
8249 unshare_expr (next_fp_tmp), post_p);
8250 valist = next_fp_tmp;
8251 }
8252 else
8253 {
8254 tmp = fold_build_pointer_plus_hwi (unshare_expr (next_o), rsize);
8255 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
8256 unshare_expr (next_o_limit));
8257 tmp = build3 (COND_EXPR, void_type_node, tmp,
8258 build1 (GOTO_EXPR, void_type_node,
8259 unshare_expr (lab_false)),
8260 NULL_TREE);
8261 gimplify_and_add (tmp, pre_p);
8262
8263 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
8264 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8265
8266 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8267 gimplify_and_add (tmp, pre_p);
8268
8269 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8270 gimplify_and_add (tmp, pre_p);
8271
8272 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
8273 gimplify_assign (unshare_expr (next_o),
8274 unshare_expr (next_o_limit), pre_p);
8275
8276 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8277 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8278 }
8279
8280 if (!result)
8281 {
8282 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8283 gimplify_and_add (tmp, pre_p);
8284 }
8285 }
8286
8287 /* ??? In va-sh.h, there had been code to make values larger than
8288 size 8 indirect. This does not match the FUNCTION_ARG macros. */
8289
8290 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
8291 if (result)
8292 {
8293 gimplify_assign (result, tmp, pre_p);
8294 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
8295 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8296 gimplify_and_add (tmp, pre_p);
8297 }
8298 else
8299 result = tmp;
8300
8301 if (pass_by_ref)
8302 result = build_va_arg_indirect_ref (result);
8303
8304 return result;
8305 }
8306
8307 /* 64 bit floating points memory transfers are paired single precision loads
8308 or store. So DWARF information needs fixing in little endian (unless
8309 PR=SZ=1 in FPSCR). */
8310 rtx
8311 sh_dwarf_register_span (rtx reg)
8312 {
8313 unsigned regno = REGNO (reg);
8314
8315 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
8316 return NULL_RTX;
8317
8318 return
8319 gen_rtx_PARALLEL (VOIDmode,
8320 gen_rtvec (2,
8321 gen_rtx_REG (SFmode, regno + 1),
8322 gen_rtx_REG (SFmode, regno)));
8323 }
8324
8325 static enum machine_mode
8326 sh_promote_function_mode (const_tree type, enum machine_mode mode,
8327 int *punsignedp, const_tree funtype,
8328 int for_return)
8329 {
8330 if (sh_promote_prototypes (funtype))
8331 return promote_mode (type, mode, punsignedp);
8332 else
8333 return default_promote_function_mode (type, mode, punsignedp, funtype,
8334 for_return);
8335 }
8336
8337 static bool
8338 sh_promote_prototypes (const_tree type)
8339 {
8340 if (TARGET_HITACHI)
8341 return false;
8342 if (! type)
8343 return true;
8344 return ! sh_attr_renesas_p (type);
8345 }
8346
8347 /* Whether an argument must be passed by reference. On SHcompact, we
8348 pretend arguments wider than 32-bits that would have been passed in
8349 registers are passed by reference, so that an SHmedia trampoline
8350 loads them into the full 64-bits registers. */
8351
8352 static int
8353 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
8354 const_tree type, bool named)
8355 {
8356 unsigned HOST_WIDE_INT size;
8357
8358 if (type)
8359 size = int_size_in_bytes (type);
8360 else
8361 size = GET_MODE_SIZE (mode);
8362
8363 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
8364 && (!named
8365 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
8366 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
8367 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
8368 && size > 4
8369 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
8370 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8371 return size;
8372 else
8373 return 0;
8374 }
8375
8376 static bool
8377 sh_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
8378 const_tree type, bool named)
8379 {
8380 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8381
8382 if (targetm.calls.must_pass_in_stack (mode, type))
8383 return true;
8384
8385 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
8386 wants to know about pass-by-reference semantics for incoming
8387 arguments. */
8388 if (! cum)
8389 return false;
8390
8391 if (TARGET_SHCOMPACT)
8392 {
8393 cum->byref = shcompact_byref (cum, mode, type, named);
8394 return cum->byref != 0;
8395 }
8396
8397 return false;
8398 }
8399
8400 static bool
8401 sh_callee_copies (cumulative_args_t cum, enum machine_mode mode,
8402 const_tree type, bool named ATTRIBUTE_UNUSED)
8403 {
8404 /* ??? How can it possibly be correct to return true only on the
8405 caller side of the equation? Is there someplace else in the
8406 sh backend that's magically producing the copies? */
8407 return (get_cumulative_args (cum)->outgoing
8408 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
8409 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
8410 }
8411
8412 static int
8413 sh_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
8414 tree type, bool named ATTRIBUTE_UNUSED)
8415 {
8416 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8417 int words = 0;
8418
8419 if (!TARGET_SH5
8420 && PASS_IN_REG_P (*cum, mode, type)
8421 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
8422 && (ROUND_REG (*cum, mode)
8423 + (mode != BLKmode
8424 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
8425 : ROUND_ADVANCE (int_size_in_bytes (type)))
8426 > NPARM_REGS (mode)))
8427 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
8428
8429 else if (!TARGET_SHCOMPACT
8430 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8431 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
8432
8433 return words * UNITS_PER_WORD;
8434 }
8435
8436
8437 /* Define where to put the arguments to a function.
8438 Value is zero to push the argument on the stack,
8439 or a hard register in which to store the argument.
8440
8441 MODE is the argument's machine mode.
8442 TYPE is the data type of the argument (as a tree).
8443 This is null for libcalls where that information may
8444 not be available.
8445 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8446 the preceding args and about the function being called.
8447 NAMED is nonzero if this argument is a named parameter
8448 (otherwise it is an extra parameter matching an ellipsis).
8449
8450 On SH the first args are normally in registers
8451 and the rest are pushed. Any arg that starts within the first
8452 NPARM_REGS words is at least partially passed in a register unless
8453 its data type forbids. */
8454
8455 static rtx
8456 sh_function_arg (cumulative_args_t ca_v, enum machine_mode mode,
8457 const_tree type, bool named)
8458 {
8459 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8460
8461 if (! TARGET_SH5 && mode == VOIDmode)
8462 return GEN_INT (ca->renesas_abi ? 1 : 0);
8463
8464 if (! TARGET_SH5
8465 && PASS_IN_REG_P (*ca, mode, type)
8466 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8467 {
8468 int regno;
8469
8470 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8471 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
8472 {
8473 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8474 gen_rtx_REG (SFmode,
8475 BASE_ARG_REG (mode)
8476 + (ROUND_REG (*ca, mode) ^ 1)),
8477 const0_rtx);
8478 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8479 gen_rtx_REG (SFmode,
8480 BASE_ARG_REG (mode)
8481 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
8482 GEN_INT (4));
8483 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8484 }
8485
8486 /* If the alignment of a DF value causes an SF register to be
8487 skipped, we will use that skipped register for the next SF
8488 value. */
8489 if ((TARGET_HITACHI || ca->renesas_abi)
8490 && ca->free_single_fp_reg
8491 && mode == SFmode)
8492 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8493
8494 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
8495 ^ (mode == SFmode && TARGET_SH4
8496 && TARGET_LITTLE_ENDIAN
8497 && ! TARGET_HITACHI && ! ca->renesas_abi);
8498 return gen_rtx_REG (mode, regno);
8499
8500 }
8501
8502 if (TARGET_SH5)
8503 {
8504 if (mode == VOIDmode && TARGET_SHCOMPACT)
8505 return GEN_INT (ca->call_cookie);
8506
8507 /* The following test assumes unnamed arguments are promoted to
8508 DFmode. */
8509 if (mode == SFmode && ca->free_single_fp_reg)
8510 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
8511
8512 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
8513 && (named || ! ca->prototype_p)
8514 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
8515 {
8516 if (! ca->prototype_p && TARGET_SHMEDIA)
8517 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
8518
8519 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
8520 FIRST_FP_PARM_REG
8521 + ca->arg_count[(int) SH_ARG_FLOAT]);
8522 }
8523
8524 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
8525 && (! TARGET_SHCOMPACT
8526 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
8527 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
8528 type, named))))
8529 {
8530 return gen_rtx_REG (mode, (FIRST_PARM_REG
8531 + ca->arg_count[(int) SH_ARG_INT]));
8532 }
8533
8534 return NULL_RTX;
8535 }
8536
8537 return NULL_RTX;
8538 }
8539
8540 /* Update the data in CUM to advance over an argument
8541 of mode MODE and data type TYPE.
8542 (TYPE is null for libcalls where that information may not be
8543 available.) */
8544
8545 static void
8546 sh_function_arg_advance (cumulative_args_t ca_v, enum machine_mode mode,
8547 const_tree type, bool named)
8548 {
8549 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8550
8551 if (ca->force_mem)
8552 ca->force_mem = 0;
8553 else if (TARGET_SH5)
8554 {
8555 const_tree type2 = (ca->byref && type
8556 ? TREE_TYPE (type)
8557 : type);
8558 enum machine_mode mode2 = (ca->byref && type
8559 ? TYPE_MODE (type2)
8560 : mode);
8561 int dwords = ((ca->byref
8562 ? ca->byref
8563 : mode2 == BLKmode
8564 ? int_size_in_bytes (type2)
8565 : GET_MODE_SIZE (mode2)) + 7) / 8;
8566 int numregs = MIN (dwords, NPARM_REGS (SImode)
8567 - ca->arg_count[(int) SH_ARG_INT]);
8568
8569 if (numregs)
8570 {
8571 ca->arg_count[(int) SH_ARG_INT] += numregs;
8572 if (TARGET_SHCOMPACT
8573 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
8574 {
8575 ca->call_cookie
8576 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8577 - numregs, 1);
8578 /* N.B. We want this also for outgoing. */
8579 ca->stack_regs += numregs;
8580 }
8581 else if (ca->byref)
8582 {
8583 if (! ca->outgoing)
8584 ca->stack_regs += numregs;
8585 ca->byref_regs += numregs;
8586 ca->byref = 0;
8587 do
8588 ca->call_cookie
8589 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8590 - numregs, 2);
8591 while (--numregs);
8592 ca->call_cookie
8593 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8594 - 1, 1);
8595 }
8596 else if (dwords > numregs)
8597 {
8598 int pushregs = numregs;
8599
8600 if (TARGET_SHCOMPACT)
8601 ca->stack_regs += numregs;
8602 while (pushregs < NPARM_REGS (SImode) - 1
8603 && (CALL_COOKIE_INT_REG_GET
8604 (ca->call_cookie,
8605 NPARM_REGS (SImode) - pushregs)
8606 == 1))
8607 {
8608 ca->call_cookie
8609 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8610 - pushregs, 1);
8611 pushregs++;
8612 }
8613 if (numregs == NPARM_REGS (SImode))
8614 ca->call_cookie
8615 |= CALL_COOKIE_INT_REG (0, 1)
8616 | CALL_COOKIE_STACKSEQ (numregs - 1);
8617 else
8618 ca->call_cookie
8619 |= CALL_COOKIE_STACKSEQ (numregs);
8620 }
8621 }
8622 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
8623 && (named || ! ca->prototype_p))
8624 {
8625 if (mode2 == SFmode && ca->free_single_fp_reg)
8626 ca->free_single_fp_reg = 0;
8627 else if (ca->arg_count[(int) SH_ARG_FLOAT]
8628 < NPARM_REGS (SFmode))
8629 {
8630 int numfpregs
8631 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
8632 NPARM_REGS (SFmode)
8633 - ca->arg_count[(int) SH_ARG_FLOAT]);
8634
8635 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
8636
8637 if (TARGET_SHCOMPACT && ! ca->prototype_p)
8638 {
8639 if (ca->outgoing && numregs > 0)
8640 do
8641 {
8642 ca->call_cookie
8643 |= (CALL_COOKIE_INT_REG
8644 (ca->arg_count[(int) SH_ARG_INT]
8645 - numregs + ((numfpregs - 2) / 2),
8646 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
8647 - numfpregs) / 2));
8648 }
8649 while (numfpregs -= 2);
8650 }
8651 else if (mode2 == SFmode && (named)
8652 && (ca->arg_count[(int) SH_ARG_FLOAT]
8653 < NPARM_REGS (SFmode)))
8654 ca->free_single_fp_reg
8655 = FIRST_FP_PARM_REG - numfpregs
8656 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
8657 }
8658 }
8659 return;
8660 }
8661
8662 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
8663 {
8664 /* Note that we've used the skipped register. */
8665 if (mode == SFmode && ca->free_single_fp_reg)
8666 {
8667 ca->free_single_fp_reg = 0;
8668 return;
8669 }
8670 /* When we have a DF after an SF, there's an SF register that get
8671 skipped in order to align the DF value. We note this skipped
8672 register, because the next SF value will use it, and not the
8673 SF that follows the DF. */
8674 if (mode == DFmode
8675 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
8676 {
8677 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
8678 + BASE_ARG_REG (mode));
8679 }
8680 }
8681
8682 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
8683 || PASS_IN_REG_P (*ca, mode, type))
8684 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
8685 = (ROUND_REG (*ca, mode)
8686 + (mode == BLKmode
8687 ? ROUND_ADVANCE (int_size_in_bytes (type))
8688 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
8689 }
8690
8691 /* The Renesas calling convention doesn't quite fit into this scheme since
8692 the address is passed like an invisible argument, but one that is always
8693 passed in memory. */
8694 static rtx
8695 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
8696 {
8697 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8698 return NULL_RTX;
8699 return gen_rtx_REG (Pmode, 2);
8700 }
8701
8702 /* Worker function for TARGET_FUNCTION_VALUE.
8703
8704 For the SH, this is like LIBCALL_VALUE, except that we must change the
8705 mode like PROMOTE_MODE does.
8706 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
8707 tested here has to be kept in sync with the one in explow.c:promote_mode.
8708 */
8709
8710 static rtx
8711 sh_function_value (const_tree valtype,
8712 const_tree fn_decl_or_type,
8713 bool outgoing ATTRIBUTE_UNUSED)
8714 {
8715 if (fn_decl_or_type
8716 && !DECL_P (fn_decl_or_type))
8717 fn_decl_or_type = NULL;
8718
8719 return gen_rtx_REG (
8720 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
8721 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
8722 && (TREE_CODE (valtype) == INTEGER_TYPE
8723 || TREE_CODE (valtype) == ENUMERAL_TYPE
8724 || TREE_CODE (valtype) == BOOLEAN_TYPE
8725 || TREE_CODE (valtype) == REAL_TYPE
8726 || TREE_CODE (valtype) == OFFSET_TYPE))
8727 && sh_promote_prototypes (fn_decl_or_type)
8728 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
8729 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
8730 }
8731
8732 /* Worker function for TARGET_LIBCALL_VALUE. */
8733
8734 static rtx
8735 sh_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
8736 {
8737 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
8738 }
8739
8740 /* Return true if N is a possible register number of function value. */
8741
8742 static bool
8743 sh_function_value_regno_p (const unsigned int regno)
8744 {
8745 return ((regno) == FIRST_RET_REG
8746 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
8747 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
8748 }
8749
8750 /* Worker function for TARGET_RETURN_IN_MEMORY. */
8751
8752 static bool
8753 sh_return_in_memory (const_tree type, const_tree fndecl)
8754 {
8755 if (TARGET_SH5)
8756 {
8757 if (TYPE_MODE (type) == BLKmode)
8758 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
8759 else
8760 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
8761 }
8762 else
8763 {
8764 return (TYPE_MODE (type) == BLKmode
8765 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8766 && TREE_CODE (type) == RECORD_TYPE));
8767 }
8768 }
8769
8770 /* We actually emit the code in sh_expand_prologue. We used to use
8771 a static variable to flag that we need to emit this code, but that
8772 doesn't when inlining, when functions are deferred and then emitted
8773 later. Fortunately, we already have two flags that are part of struct
8774 function that tell if a function uses varargs or stdarg. */
8775 static void
8776 sh_setup_incoming_varargs (cumulative_args_t ca,
8777 enum machine_mode mode,
8778 tree type,
8779 int *pretend_arg_size,
8780 int second_time ATTRIBUTE_UNUSED)
8781 {
8782 gcc_assert (cfun->stdarg);
8783 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
8784 {
8785 int named_parm_regs, anon_parm_regs;
8786
8787 named_parm_regs = (ROUND_REG (*get_cumulative_args (ca), mode)
8788 + (mode == BLKmode
8789 ? ROUND_ADVANCE (int_size_in_bytes (type))
8790 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
8791 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
8792 if (anon_parm_regs > 0)
8793 *pretend_arg_size = anon_parm_regs * 4;
8794 }
8795 }
8796
8797 static bool
8798 sh_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
8799 {
8800 return TARGET_SH5;
8801 }
8802
8803 static bool
8804 sh_pretend_outgoing_varargs_named (cumulative_args_t ca_v)
8805 {
8806 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8807
8808 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
8809 }
8810
8811
8812 /* Define the offset between two registers, one to be eliminated, and
8813 the other its replacement, at the start of a routine. */
8814
8815 int
8816 initial_elimination_offset (int from, int to)
8817 {
8818 int regs_saved;
8819 int regs_saved_rounding = 0;
8820 int total_saved_regs_space;
8821 int total_auto_space;
8822 int save_flags = target_flags;
8823 int copy_flags;
8824 HARD_REG_SET live_regs_mask;
8825
8826 shmedia_space_reserved_for_target_registers = false;
8827 regs_saved = calc_live_regs (&live_regs_mask);
8828 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
8829
8830 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
8831 {
8832 shmedia_space_reserved_for_target_registers = true;
8833 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
8834 }
8835
8836 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
8837 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
8838 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
8839
8840 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
8841 copy_flags = target_flags;
8842 target_flags = save_flags;
8843
8844 total_saved_regs_space = regs_saved + regs_saved_rounding;
8845
8846 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8847 return total_saved_regs_space + total_auto_space
8848 + crtl->args.info.byref_regs * 8;
8849
8850 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8851 return total_saved_regs_space + total_auto_space
8852 + crtl->args.info.byref_regs * 8;
8853
8854 /* Initial gap between fp and sp is 0. */
8855 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8856 return 0;
8857
8858 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8859 return rounded_frame_size (0);
8860
8861 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8862 return rounded_frame_size (0);
8863
8864 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
8865 && (to == HARD_FRAME_POINTER_REGNUM
8866 || to == STACK_POINTER_REGNUM));
8867 if (TARGET_SH5)
8868 {
8869 int n = total_saved_regs_space;
8870 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8871 save_schedule schedule;
8872 save_entry *entry;
8873
8874 n += total_auto_space;
8875
8876 /* If it wasn't saved, there's not much we can do. */
8877 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8878 return n;
8879
8880 target_flags = copy_flags;
8881
8882 sh5_schedule_saves (&live_regs_mask, &schedule, n);
8883 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
8884 if (entry->reg == pr_reg)
8885 {
8886 target_flags = save_flags;
8887 return entry->offset;
8888 }
8889 gcc_unreachable ();
8890 }
8891 else
8892 return total_auto_space;
8893 }
8894
8895 /* Parse the -mfixed-range= option string. */
8896 void
8897 sh_fix_range (const char *const_str)
8898 {
8899 int i, first, last;
8900 char *str, *dash, *comma;
8901
8902 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
8903 REG2 are either register names or register numbers. The effect
8904 of this option is to mark the registers in the range from REG1 to
8905 REG2 as ``fixed'' so they won't be used by the compiler. */
8906
8907 i = strlen (const_str);
8908 str = (char *) alloca (i + 1);
8909 memcpy (str, const_str, i + 1);
8910
8911 while (1)
8912 {
8913 dash = strchr (str, '-');
8914 if (!dash)
8915 {
8916 warning (0, "value of -mfixed-range must have form REG1-REG2");
8917 return;
8918 }
8919 *dash = '\0';
8920 comma = strchr (dash + 1, ',');
8921 if (comma)
8922 *comma = '\0';
8923
8924 first = decode_reg_name (str);
8925 if (first < 0)
8926 {
8927 warning (0, "unknown register name: %s", str);
8928 return;
8929 }
8930
8931 last = decode_reg_name (dash + 1);
8932 if (last < 0)
8933 {
8934 warning (0, "unknown register name: %s", dash + 1);
8935 return;
8936 }
8937
8938 *dash = '-';
8939
8940 if (first > last)
8941 {
8942 warning (0, "%s-%s is an empty range", str, dash + 1);
8943 return;
8944 }
8945
8946 for (i = first; i <= last; ++i)
8947 fixed_regs[i] = call_used_regs[i] = 1;
8948
8949 if (!comma)
8950 break;
8951
8952 *comma = ',';
8953 str = comma + 1;
8954 }
8955 }
8956 \f
8957 /* Insert any deferred function attributes from earlier pragmas. */
8958 static void
8959 sh_insert_attributes (tree node, tree *attributes)
8960 {
8961 tree attrs;
8962
8963 if (TREE_CODE (node) != FUNCTION_DECL)
8964 return;
8965
8966 /* We are only interested in fields. */
8967 if (!DECL_P (node))
8968 return;
8969
8970 /* Append the attributes to the deferred attributes. */
8971 *sh_deferred_function_attributes_tail = *attributes;
8972 attrs = sh_deferred_function_attributes;
8973 if (!attrs)
8974 return;
8975
8976 /* Some attributes imply or require the interrupt attribute. */
8977 if (!lookup_attribute ("interrupt_handler", attrs)
8978 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8979 {
8980 /* If we have a trapa_handler, but no interrupt_handler attribute,
8981 insert an interrupt_handler attribute. */
8982 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8983 /* We can't use sh_pr_interrupt here because that's not in the
8984 java frontend. */
8985 attrs
8986 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8987 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8988 if the interrupt attribute is missing, we ignore the attribute
8989 and warn. */
8990 else if (lookup_attribute ("sp_switch", attrs)
8991 || lookup_attribute ("trap_exit", attrs)
8992 || lookup_attribute ("nosave_low_regs", attrs)
8993 || lookup_attribute ("resbank", attrs))
8994 {
8995 tree *tail;
8996
8997 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8998 {
8999 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
9000 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
9001 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
9002 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
9003 warning (OPT_Wattributes,
9004 "%qE attribute only applies to interrupt functions",
9005 TREE_PURPOSE (attrs));
9006 else
9007 {
9008 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
9009 NULL_TREE);
9010 tail = &TREE_CHAIN (*tail);
9011 }
9012 }
9013 attrs = *attributes;
9014 }
9015 }
9016
9017 /* Install the processed list. */
9018 *attributes = attrs;
9019
9020 /* Clear deferred attributes. */
9021 sh_deferred_function_attributes = NULL_TREE;
9022 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
9023
9024 return;
9025 }
9026
9027 /* Supported attributes:
9028
9029 interrupt_handler -- specifies this function is an interrupt handler.
9030
9031 trapa_handler - like above, but don't save all registers.
9032
9033 sp_switch -- specifies an alternate stack for an interrupt handler
9034 to run on.
9035
9036 trap_exit -- use a trapa to exit an interrupt function instead of
9037 an rte instruction.
9038
9039 nosave_low_regs - don't save r0..r7 in an interrupt handler.
9040 This is useful on the SH3 and upwards,
9041 which has a separate set of low regs for User and Supervisor modes.
9042 This should only be used for the lowest level of interrupts. Higher levels
9043 of interrupts must save the registers in case they themselves are
9044 interrupted.
9045
9046 renesas -- use Renesas calling/layout conventions (functions and
9047 structures).
9048
9049 resbank -- In case of an ISR, use a register bank to save registers
9050 R0-R14, MACH, MACL, GBR and PR. This is useful only on SH2A targets.
9051 */
9052
9053 /* Handle a 'resbank' attribute. */
9054 static tree
9055 sh_handle_resbank_handler_attribute (tree * node, tree name,
9056 tree args ATTRIBUTE_UNUSED,
9057 int flags ATTRIBUTE_UNUSED,
9058 bool * no_add_attrs)
9059 {
9060 if (!TARGET_SH2A)
9061 {
9062 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
9063 name);
9064 *no_add_attrs = true;
9065 }
9066 if (TREE_CODE (*node) != FUNCTION_DECL)
9067 {
9068 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9069 name);
9070 *no_add_attrs = true;
9071 }
9072
9073 return NULL_TREE;
9074 }
9075
9076 /* Handle an "interrupt_handler" attribute; arguments as in
9077 struct attribute_spec.handler. */
9078 static tree
9079 sh_handle_interrupt_handler_attribute (tree *node, tree name,
9080 tree args ATTRIBUTE_UNUSED,
9081 int flags ATTRIBUTE_UNUSED,
9082 bool *no_add_attrs)
9083 {
9084 if (TREE_CODE (*node) != FUNCTION_DECL)
9085 {
9086 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9087 name);
9088 *no_add_attrs = true;
9089 }
9090 else if (TARGET_SHCOMPACT)
9091 {
9092 error ("attribute interrupt_handler is not compatible with -m5-compact");
9093 *no_add_attrs = true;
9094 }
9095
9096 return NULL_TREE;
9097 }
9098
9099 /* Handle an 'function_vector' attribute; arguments as in
9100 struct attribute_spec.handler. */
9101 static tree
9102 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
9103 tree args ATTRIBUTE_UNUSED,
9104 int flags ATTRIBUTE_UNUSED,
9105 bool * no_add_attrs)
9106 {
9107 if (!TARGET_SH2A)
9108 {
9109 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
9110 name);
9111 *no_add_attrs = true;
9112 }
9113 else if (TREE_CODE (*node) != FUNCTION_DECL)
9114 {
9115 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9116 name);
9117 *no_add_attrs = true;
9118 }
9119 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9120 {
9121 /* The argument must be a constant integer. */
9122 warning (OPT_Wattributes,
9123 "%qE attribute argument not an integer constant",
9124 name);
9125 *no_add_attrs = true;
9126 }
9127 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
9128 {
9129 /* The argument value must be between 0 to 255. */
9130 warning (OPT_Wattributes,
9131 "%qE attribute argument should be between 0 to 255",
9132 name);
9133 *no_add_attrs = true;
9134 }
9135 return NULL_TREE;
9136 }
9137
9138 /* Returns true if current function has been assigned the attribute
9139 'function_vector'. */
9140 bool
9141 sh2a_is_function_vector_call (rtx x)
9142 {
9143 if (GET_CODE (x) == SYMBOL_REF
9144 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9145 {
9146 tree tr = SYMBOL_REF_DECL (x);
9147
9148 if (sh2a_function_vector_p (tr))
9149 return true;
9150 }
9151
9152 return false;
9153 }
9154
9155 /* Returns the function vector number, if the attribute
9156 'function_vector' is assigned, otherwise returns zero. */
9157 int
9158 sh2a_get_function_vector_number (rtx x)
9159 {
9160 int num;
9161 tree list, t;
9162
9163 if ((GET_CODE (x) == SYMBOL_REF)
9164 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9165 {
9166 t = SYMBOL_REF_DECL (x);
9167
9168 if (TREE_CODE (t) != FUNCTION_DECL)
9169 return 0;
9170
9171 list = SH_ATTRIBUTES (t);
9172 while (list)
9173 {
9174 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9175 {
9176 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
9177 return num;
9178 }
9179
9180 list = TREE_CHAIN (list);
9181 }
9182
9183 return 0;
9184 }
9185 else
9186 return 0;
9187 }
9188
9189 /* Handle an "sp_switch" attribute; arguments as in
9190 struct attribute_spec.handler. */
9191 static tree
9192 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
9193 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9194 {
9195 if (TREE_CODE (*node) != FUNCTION_DECL)
9196 {
9197 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9198 name);
9199 *no_add_attrs = true;
9200 }
9201 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
9202 {
9203 /* The argument must be a constant string. */
9204 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
9205 name);
9206 *no_add_attrs = true;
9207 }
9208
9209 return NULL_TREE;
9210 }
9211
9212 /* Handle an "trap_exit" attribute; arguments as in
9213 struct attribute_spec.handler. */
9214 static tree
9215 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
9216 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9217 {
9218 if (TREE_CODE (*node) != FUNCTION_DECL)
9219 {
9220 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9221 name);
9222 *no_add_attrs = true;
9223 }
9224 /* The argument specifies a trap number to be used in a trapa instruction
9225 at function exit (instead of an rte instruction). */
9226 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9227 {
9228 /* The argument must be a constant integer. */
9229 warning (OPT_Wattributes, "%qE attribute argument not an "
9230 "integer constant", name);
9231 *no_add_attrs = true;
9232 }
9233
9234 return NULL_TREE;
9235 }
9236
9237 static tree
9238 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
9239 tree name ATTRIBUTE_UNUSED,
9240 tree args ATTRIBUTE_UNUSED,
9241 int flags ATTRIBUTE_UNUSED,
9242 bool *no_add_attrs ATTRIBUTE_UNUSED)
9243 {
9244 return NULL_TREE;
9245 }
9246
9247 /* True if __attribute__((renesas)) or -mrenesas. */
9248 bool
9249 sh_attr_renesas_p (const_tree td)
9250 {
9251 if (TARGET_HITACHI)
9252 return true;
9253 if (td == NULL_TREE)
9254 return false;
9255 if (DECL_P (td))
9256 td = TREE_TYPE (td);
9257 if (td == error_mark_node)
9258 return false;
9259 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
9260 != NULL_TREE);
9261 }
9262
9263 /* True if __attribute__((renesas)) or -mrenesas, for the current
9264 function. */
9265 bool
9266 sh_cfun_attr_renesas_p (void)
9267 {
9268 return sh_attr_renesas_p (current_function_decl);
9269 }
9270
9271 bool
9272 sh_cfun_interrupt_handler_p (void)
9273 {
9274 return (lookup_attribute ("interrupt_handler",
9275 DECL_ATTRIBUTES (current_function_decl))
9276 != NULL_TREE);
9277 }
9278
9279 /* Returns true if FUNC has been assigned the attribute
9280 "function_vector". */
9281 bool
9282 sh2a_function_vector_p (tree func)
9283 {
9284 tree list;
9285 if (TREE_CODE (func) != FUNCTION_DECL)
9286 return false;
9287
9288 list = SH_ATTRIBUTES (func);
9289 while (list)
9290 {
9291 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9292 return true;
9293
9294 list = TREE_CHAIN (list);
9295 }
9296 return false;
9297 }
9298
9299 /* Returns TRUE if given tree has the "resbank" attribute. */
9300
9301 bool
9302 sh_cfun_resbank_handler_p (void)
9303 {
9304 return ((lookup_attribute ("resbank",
9305 DECL_ATTRIBUTES (current_function_decl))
9306 != NULL_TREE)
9307 && (lookup_attribute ("interrupt_handler",
9308 DECL_ATTRIBUTES (current_function_decl))
9309 != NULL_TREE) && TARGET_SH2A);
9310 }
9311
9312 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
9313
9314 static const char *
9315 sh_check_pch_target_flags (int old_flags)
9316 {
9317 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
9318 | MASK_SH_E | MASK_HARD_SH4
9319 | MASK_FPU_SINGLE | MASK_SH4))
9320 return _("created and used with different architectures / ABIs");
9321 if ((old_flags ^ target_flags) & MASK_HITACHI)
9322 return _("created and used with different ABIs");
9323 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
9324 return _("created and used with different endianness");
9325 return NULL;
9326 }
9327 \f
9328 /* Predicates used by the templates. */
9329
9330 /* Returns true if OP is MACL, MACH or PR. The input must be a REG rtx.
9331 Used only in general_movsrc_operand. */
9332
9333 bool
9334 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9335 {
9336 switch (REGNO (op))
9337 {
9338 case PR_REG:
9339 case MACL_REG:
9340 case MACH_REG:
9341 return true;
9342 }
9343 return false;
9344 }
9345
9346 /* Returns true if OP is a floating point value with value 0.0. */
9347
9348 bool
9349 fp_zero_operand (rtx op)
9350 {
9351 REAL_VALUE_TYPE r;
9352
9353 if (GET_MODE (op) != SFmode)
9354 return false;
9355
9356 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9357 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
9358 }
9359
9360 /* Returns true if OP is a floating point value with value 1.0. */
9361
9362 bool
9363 fp_one_operand (rtx op)
9364 {
9365 REAL_VALUE_TYPE r;
9366
9367 if (GET_MODE (op) != SFmode)
9368 return false;
9369
9370 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9371 return REAL_VALUES_EQUAL (r, dconst1);
9372 }
9373
9374 /* In general mode switching is used. If we are
9375 compiling without -mfmovd, movsf_ie isn't taken into account for
9376 mode switching. We could check in machine_dependent_reorg for
9377 cases where we know we are in single precision mode, but there is
9378 interface to find that out during reload, so we must avoid
9379 choosing an fldi alternative during reload and thus failing to
9380 allocate a scratch register for the constant loading. */
9381 bool
9382 fldi_ok (void)
9383 {
9384 return true;
9385 }
9386
9387 /* Return the TLS type for TLS symbols, 0 for otherwise. */
9388 enum tls_model
9389 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9390 {
9391 if (GET_CODE (op) != SYMBOL_REF)
9392 return TLS_MODEL_NONE;
9393 return SYMBOL_REF_TLS_MODEL (op);
9394 }
9395 \f
9396 /* Return the destination address of a branch. */
9397
9398 static int
9399 branch_dest (rtx branch)
9400 {
9401 rtx dest = SET_SRC (PATTERN (branch));
9402 int dest_uid;
9403
9404 if (GET_CODE (dest) == IF_THEN_ELSE)
9405 dest = XEXP (dest, 1);
9406 dest = XEXP (dest, 0);
9407 dest_uid = INSN_UID (dest);
9408 return INSN_ADDRESSES (dest_uid);
9409 }
9410 \f
9411 /* Return nonzero if REG is not used after INSN.
9412 We assume REG is a reload reg, and therefore does
9413 not live past labels. It may live past calls or jumps though. */
9414 bool
9415 reg_unused_after (rtx reg, rtx insn)
9416 {
9417 enum rtx_code code;
9418 rtx set;
9419
9420 /* If the reg is set by this instruction, then it is safe for our
9421 case. Disregard the case where this is a store to memory, since
9422 we are checking a register used in the store address. */
9423 set = single_set (insn);
9424 if (set && !MEM_P (SET_DEST (set))
9425 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9426 return true;
9427
9428 while ((insn = NEXT_INSN (insn)))
9429 {
9430 rtx set;
9431 if (!INSN_P (insn))
9432 continue;
9433
9434 code = GET_CODE (insn);
9435
9436 #if 0
9437 /* If this is a label that existed before reload, then the register
9438 is dead here. However, if this is a label added by reorg, then
9439 the register may still be live here. We can't tell the difference,
9440 so we just ignore labels completely. */
9441 if (code == CODE_LABEL)
9442 return 1;
9443 /* else */
9444 #endif
9445
9446 if (code == JUMP_INSN)
9447 return false;
9448
9449 /* If this is a sequence, we must handle them all at once.
9450 We could have for instance a call that sets the target register,
9451 and an insn in a delay slot that uses the register. In this case,
9452 we must return 0. */
9453 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
9454 {
9455 int i;
9456 int retval = 0;
9457
9458 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
9459 {
9460 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
9461 rtx set = single_set (this_insn);
9462
9463 if (CALL_P (this_insn))
9464 code = CALL_INSN;
9465 else if (JUMP_P (this_insn))
9466 {
9467 if (INSN_ANNULLED_BRANCH_P (this_insn))
9468 return false;
9469 code = JUMP_INSN;
9470 }
9471
9472 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9473 return false;
9474 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9475 {
9476 if (!MEM_P (SET_DEST (set)))
9477 retval = true;
9478 else
9479 return false;
9480 }
9481 if (set == NULL_RTX
9482 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
9483 return false;
9484 }
9485 if (retval == 1)
9486 return true;
9487 else if (code == JUMP_INSN)
9488 return false;
9489 }
9490
9491 set = single_set (insn);
9492 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9493 return false;
9494 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9495 return !MEM_P (SET_DEST (set));
9496 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
9497 return false;
9498
9499 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
9500 return true;
9501 }
9502 return true;
9503 }
9504 \f
9505 #include "ggc.h"
9506
9507 static GTY(()) rtx fpscr_rtx;
9508 rtx
9509 get_fpscr_rtx (void)
9510 {
9511 if (! fpscr_rtx)
9512 {
9513 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
9514 REG_USERVAR_P (fpscr_rtx) = 1;
9515 mark_user_reg (fpscr_rtx);
9516 }
9517 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
9518 mark_user_reg (fpscr_rtx);
9519 return fpscr_rtx;
9520 }
9521
9522 static GTY(()) tree fpscr_values;
9523
9524 static void
9525 emit_fpu_switch (rtx scratch, int index)
9526 {
9527 rtx dst, src;
9528
9529 if (fpscr_values == NULL)
9530 {
9531 tree t;
9532
9533 t = build_index_type (integer_one_node);
9534 t = build_array_type (integer_type_node, t);
9535 t = build_decl (BUILTINS_LOCATION,
9536 VAR_DECL, get_identifier ("__fpscr_values"), t);
9537 DECL_ARTIFICIAL (t) = 1;
9538 DECL_IGNORED_P (t) = 1;
9539 DECL_EXTERNAL (t) = 1;
9540 TREE_STATIC (t) = 1;
9541 TREE_PUBLIC (t) = 1;
9542 TREE_USED (t) = 1;
9543
9544 fpscr_values = t;
9545 }
9546
9547 src = DECL_RTL (fpscr_values);
9548 if (!can_create_pseudo_p ())
9549 {
9550 emit_move_insn (scratch, XEXP (src, 0));
9551 if (index != 0)
9552 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
9553 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
9554 }
9555 else
9556 src = adjust_address (src, PSImode, index * 4);
9557
9558 dst = get_fpscr_rtx ();
9559 emit_move_insn (dst, src);
9560 }
9561
9562 void
9563 emit_sf_insn (rtx pat)
9564 {
9565 emit_insn (pat);
9566 }
9567
9568 void
9569 emit_df_insn (rtx pat)
9570 {
9571 emit_insn (pat);
9572 }
9573
9574 void
9575 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9576 {
9577 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9578 }
9579
9580 void
9581 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9582 {
9583 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
9584 get_fpscr_rtx ()));
9585 }
9586
9587 void
9588 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9589 {
9590 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9591 }
9592
9593 void
9594 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9595 {
9596 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
9597 get_fpscr_rtx ()));
9598 }
9599 \f
9600 static rtx get_free_reg (HARD_REG_SET);
9601
9602 /* This function returns a register to use to load the address to load
9603 the fpscr from. Currently it always returns r1 or r7, but when we are
9604 able to use pseudo registers after combine, or have a better mechanism
9605 for choosing a register, it should be done here. */
9606 /* REGS_LIVE is the liveness information for the point for which we
9607 need this allocation. In some bare-bones exit blocks, r1 is live at the
9608 start. We can even have all of r0..r3 being live:
9609 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
9610 INSN before which new insns are placed with will clobber the register
9611 we return. If a basic block consists only of setting the return value
9612 register to a pseudo and using that register, the return value is not
9613 live before or after this block, yet we we'll insert our insns right in
9614 the middle. */
9615
9616 static rtx
9617 get_free_reg (HARD_REG_SET regs_live)
9618 {
9619 if (! TEST_HARD_REG_BIT (regs_live, 1))
9620 return gen_rtx_REG (Pmode, 1);
9621
9622 /* Hard reg 1 is live; since this is a small register classes target,
9623 there shouldn't be anything but a jump before the function end. */
9624 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
9625 return gen_rtx_REG (Pmode, 7);
9626 }
9627
9628 /* This function will set the fpscr from memory.
9629 MODE is the mode we are setting it to. */
9630 void
9631 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
9632 {
9633 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
9634 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
9635 rtx addr_reg;
9636
9637 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
9638 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
9639 }
9640
9641 /* Is the given character a logical line separator for the assembler? */
9642 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
9643 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
9644 #endif
9645
9646 int
9647 sh_insn_length_adjustment (rtx insn)
9648 {
9649 /* Instructions with unfilled delay slots take up an extra two bytes for
9650 the nop in the delay slot. */
9651 if (((NONJUMP_INSN_P (insn)
9652 && GET_CODE (PATTERN (insn)) != USE
9653 && GET_CODE (PATTERN (insn)) != CLOBBER)
9654 || CALL_P (insn)
9655 || (JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)))
9656 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
9657 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
9658 return 2;
9659
9660 /* SH2e has a bug that prevents the use of annulled branches, so if
9661 the delay slot is not filled, we'll have to put a NOP in it. */
9662 if (sh_cpu_attr == CPU_SH2E
9663 && JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)
9664 && get_attr_type (insn) == TYPE_CBRANCH
9665 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
9666 return 2;
9667
9668 /* sh-dsp parallel processing insn take four bytes instead of two. */
9669
9670 if (NONJUMP_INSN_P (insn))
9671 {
9672 int sum = 0;
9673 rtx body = PATTERN (insn);
9674 const char *templ;
9675 char c;
9676 bool maybe_label = true;
9677
9678 if (GET_CODE (body) == ASM_INPUT)
9679 templ = XSTR (body, 0);
9680 else if (asm_noperands (body) >= 0)
9681 templ
9682 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
9683 else
9684 return 0;
9685 do
9686 {
9687 int ppi_adjust = 0;
9688
9689 do
9690 c = *templ++;
9691 while (c == ' ' || c == '\t');
9692 /* all sh-dsp parallel-processing insns start with p.
9693 The only non-ppi sh insn starting with p is pref.
9694 The only ppi starting with pr is prnd. */
9695 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
9696 ppi_adjust = 2;
9697 /* The repeat pseudo-insn expands two three insns, a total of
9698 six bytes in size. */
9699 else if ((c == 'r' || c == 'R')
9700 && ! strncasecmp ("epeat", templ, 5))
9701 ppi_adjust = 4;
9702 while (c && c != '\n'
9703 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
9704 {
9705 /* If this is a label, it is obviously not a ppi insn. */
9706 if (c == ':' && maybe_label)
9707 {
9708 ppi_adjust = 0;
9709 break;
9710 }
9711 else if (c == '\'' || c == '"')
9712 maybe_label = false;
9713 c = *templ++;
9714 }
9715 sum += ppi_adjust;
9716 maybe_label = c != ':';
9717 }
9718 while (c);
9719 return sum;
9720 }
9721 return 0;
9722 }
9723 \f
9724 /* Return TRUE for a valid displacement for the REG+disp addressing
9725 with MODE. */
9726
9727 bool
9728 sh_legitimate_index_p (enum machine_mode mode, rtx op, bool consider_sh2a,
9729 bool allow_zero)
9730 {
9731 if (! CONST_INT_P (op))
9732 return false;
9733
9734 if (TARGET_SHMEDIA)
9735 {
9736 int size;
9737
9738 /* Check if this is the address of an unaligned load / store. */
9739 if (mode == VOIDmode)
9740 return CONST_OK_FOR_I06 (INTVAL (op));
9741
9742 size = GET_MODE_SIZE (mode);
9743 return (!(INTVAL (op) & (size - 1))
9744 && INTVAL (op) >= -512 * size
9745 && INTVAL (op) < 512 * size);
9746 }
9747 else
9748 {
9749 const HOST_WIDE_INT offset = INTVAL (op);
9750 const int max_disp = max_mov_insn_displacement (mode, consider_sh2a);
9751 const int align_mask = mov_insn_alignment_mask (mode, consider_sh2a);
9752
9753 /* If the mode does not support any displacement always return false.
9754 Even though an index of '0' is actually always valid, it will cause
9755 troubles when e.g. a DFmode move is split into two SFmode moves,
9756 where one SFmode move will have index '0' and the other move will
9757 have index '4'. */
9758 if (!allow_zero && max_disp < 1)
9759 return false;
9760
9761 return offset >= 0 && offset <= max_disp && (offset & align_mask) == 0;
9762 }
9763 }
9764
9765 /* Recognize an RTL expression that is a valid memory address for
9766 an instruction.
9767 The MODE argument is the machine mode for the MEM expression
9768 that wants to use this address.
9769 Allow REG
9770 REG+disp
9771 REG+r0
9772 REG++
9773 --REG */
9774
9775 static bool
9776 sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
9777 {
9778 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
9779 return true;
9780 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
9781 && ! TARGET_SHMEDIA
9782 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
9783 return true;
9784 else if (GET_CODE (x) == PLUS
9785 && (mode != PSImode || reload_completed))
9786 {
9787 rtx xop0 = XEXP (x, 0);
9788 rtx xop1 = XEXP (x, 1);
9789
9790 if (GET_MODE_SIZE (mode) <= 8
9791 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
9792 && sh_legitimate_index_p (mode, xop1, TARGET_SH2A, false))
9793 return true;
9794
9795 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
9796 || ((xop0 == stack_pointer_rtx
9797 || xop0 == hard_frame_pointer_rtx)
9798 && REG_P (xop1) && REGNO (xop1) == R0_REG)
9799 || ((xop1 == stack_pointer_rtx
9800 || xop1 == hard_frame_pointer_rtx)
9801 && REG_P (xop0) && REGNO (xop0) == R0_REG))
9802 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
9803 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
9804 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
9805 && TARGET_FMOVD && mode == DFmode)))
9806 {
9807 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
9808 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
9809 return true;
9810 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
9811 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
9812 return true;
9813 }
9814 }
9815
9816 return false;
9817 }
9818 \f
9819 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
9820 isn't protected by a PIC unspec. */
9821 bool
9822 nonpic_symbol_mentioned_p (rtx x)
9823 {
9824 const char *fmt;
9825 int i;
9826
9827 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
9828 || GET_CODE (x) == PC)
9829 return true;
9830
9831 /* We don't want to look into the possible MEM location of a
9832 CONST_DOUBLE, since we're not going to use it, in general. */
9833 if (GET_CODE (x) == CONST_DOUBLE)
9834 return false;
9835
9836 if (GET_CODE (x) == UNSPEC
9837 && (XINT (x, 1) == UNSPEC_PIC
9838 || XINT (x, 1) == UNSPEC_GOT
9839 || XINT (x, 1) == UNSPEC_GOTOFF
9840 || XINT (x, 1) == UNSPEC_GOTPLT
9841 || XINT (x, 1) == UNSPEC_GOTTPOFF
9842 || XINT (x, 1) == UNSPEC_DTPOFF
9843 || XINT (x, 1) == UNSPEC_TPOFF
9844 || XINT (x, 1) == UNSPEC_PLT
9845 || XINT (x, 1) == UNSPEC_SYMOFF
9846 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
9847 return false;
9848
9849 fmt = GET_RTX_FORMAT (GET_CODE (x));
9850 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9851 {
9852 if (fmt[i] == 'E')
9853 {
9854 int j;
9855 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9856 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
9857 return true;
9858 }
9859 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
9860 return true;
9861 }
9862
9863 return false;
9864 }
9865
9866 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
9867 @GOTOFF in `reg'. */
9868 rtx
9869 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
9870 rtx reg)
9871 {
9872 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
9873 return orig;
9874
9875 if (GET_CODE (orig) == LABEL_REF
9876 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
9877 {
9878 if (reg == NULL_RTX)
9879 reg = gen_reg_rtx (Pmode);
9880
9881 emit_insn (gen_symGOTOFF2reg (reg, orig));
9882 return reg;
9883 }
9884 else if (GET_CODE (orig) == SYMBOL_REF)
9885 {
9886 if (reg == NULL_RTX)
9887 reg = gen_reg_rtx (Pmode);
9888
9889 emit_insn (gen_symGOT2reg (reg, orig));
9890 return reg;
9891 }
9892 return orig;
9893 }
9894
9895 /* Given a (logical) mode size and an offset in bytes, try to find a the
9896 appropriate displacement value for a mov insn. On SH the displacements
9897 are limited to max. 60 bytes for SImode, max. 30 bytes in HImode and max.
9898 15 bytes in QImode. To compensate this we create a new base address by
9899 adding an adjustment value to it.
9900
9901 If the originally requested offset is greater than 127 we prefer using
9902 values 124..127 over 128..131 to increase opportunities to use the
9903 add #imm, Rn insn.
9904
9905 In some cases it is possible that a requested offset might seem unaligned
9906 or inappropriate for the mode size, like offset = 2 and mode size = 4.
9907 This is compensated by adjusting the base address so that the effective
9908 address of the displacement move insn will be aligned.
9909
9910 This is not the best possible way of rebasing the base address, as it
9911 does not look at other present displacement addressings around it.
9912 In some cases this can create more base address adjustments than would
9913 actually be necessary. */
9914
9915 struct disp_adjust
9916 {
9917 rtx offset_adjust;
9918 rtx mov_disp;
9919 };
9920
9921 static struct disp_adjust
9922 sh_find_mov_disp_adjust (enum machine_mode mode, HOST_WIDE_INT offset)
9923 {
9924 struct disp_adjust res = { NULL_RTX, NULL_RTX };
9925
9926 /* Do not try to use SH2A's large displacements here, because this would
9927 effectively disable the small displacement insns. */
9928 const int mode_sz = GET_MODE_SIZE (mode);
9929 const int mov_insn_sz = mov_insn_size (mode, false);
9930 const int max_disp = max_mov_insn_displacement (mode, false);
9931 const int max_disp_next = max_disp + mov_insn_sz;
9932 HOST_WIDE_INT align_modifier = offset > 127 ? mov_insn_sz : 0;
9933 HOST_WIDE_INT offset_adjust;
9934
9935 /* In some cases this actually does happen and we must check for it. */
9936 if (mode_sz < 1 || mode_sz > 8 || max_disp < 1)
9937 return res;
9938
9939 /* Keeps the previous behavior for QImode displacement addressing.
9940 This just decides how the offset is re-based. Removing this special
9941 case will result in slightly bigger code on average, but it's not that
9942 bad actually. */
9943 if (mov_insn_sz == 1)
9944 align_modifier = 0;
9945
9946 offset_adjust = ((offset + align_modifier) & ~max_disp) - align_modifier;
9947
9948 if (mode_sz + offset - offset_adjust <= max_disp_next)
9949 {
9950 res.offset_adjust = GEN_INT (offset_adjust);
9951 res.mov_disp = GEN_INT (offset - offset_adjust);
9952 }
9953
9954 return res;
9955 }
9956
9957 /* Try to modify an illegitimate address and make it legitimate.
9958 If we find one, return the new, valid address.
9959 Otherwise, return the original address. */
9960
9961 static rtx
9962 sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
9963 {
9964 if (flag_pic)
9965 x = legitimize_pic_address (oldx, mode, NULL_RTX);
9966
9967 if (TARGET_SHMEDIA)
9968 return x;
9969
9970 if (((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
9971 || (TARGET_SH2E && mode == SFmode))
9972 return x;
9973
9974 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))
9975 && BASE_REGISTER_RTX_P (XEXP (x, 0)))
9976 {
9977 struct disp_adjust adj = sh_find_mov_disp_adjust (mode,
9978 INTVAL (XEXP (x, 1)));
9979
9980 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
9981 {
9982 rtx sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
9983 adj.offset_adjust, NULL_RTX, 0,
9984 OPTAB_LIB_WIDEN);
9985 return gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
9986 }
9987 }
9988
9989 return x;
9990 }
9991
9992 /* Attempt to replace *p, which is an address that needs reloading, with
9993 a valid memory address for an operand of mode MODE.
9994 Like for sh_legitimize_address, for the SH we try to get a normal form
9995 of the address. That will allow inheritance of the address reloads. */
9996
9997 bool
9998 sh_legitimize_reload_address (rtx *p, enum machine_mode mode, int opnum,
9999 int itype)
10000 {
10001 enum reload_type type = (enum reload_type) itype;
10002 const int mode_sz = GET_MODE_SIZE (mode);
10003
10004 if (TARGET_SHMEDIA)
10005 return false;
10006
10007 if (GET_CODE (*p) == PLUS && CONST_INT_P (XEXP (*p, 1))
10008 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true)
10009 && ! (mode == PSImode && type == RELOAD_FOR_INPUT_ADDRESS)
10010 && (ALLOW_INDEXED_ADDRESS
10011 || XEXP (*p, 0) == stack_pointer_rtx
10012 || XEXP (*p, 0) == hard_frame_pointer_rtx))
10013 {
10014 const HOST_WIDE_INT offset = INTVAL (XEXP (*p, 1));
10015 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, offset);
10016
10017 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
10018 {
10019 push_reload (*p, NULL_RTX, p, NULL,
10020 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10021 return true;
10022 }
10023
10024 if (TARGET_SH2E && mode == SFmode)
10025 {
10026 *p = copy_rtx (*p);
10027 push_reload (*p, NULL_RTX, p, NULL,
10028 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10029 return true;
10030 }
10031
10032 /* FIXME: Do not allow to legitimize QImode and HImode displacement
10033 moves because then reload has a problem figuring the constraint
10034 that the move insn target/source reg must be R0.
10035 Or maybe some handling is wrong in sh_secondary_reload for this
10036 to work properly? */
10037 if ((mode_sz == 4 || mode_sz == 8)
10038 && ! (TARGET_SH4 && mode == DFmode)
10039 && adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
10040 {
10041 rtx sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), adj.offset_adjust);
10042 *p = gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
10043 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
10044 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10045 return true;
10046 }
10047 }
10048
10049 /* We must re-recognize what we created before. */
10050 if (GET_CODE (*p) == PLUS
10051 && (mode_sz == 4 || mode_sz == 8)
10052 && GET_CODE (XEXP (*p, 0)) == PLUS
10053 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
10054 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
10055 && CONST_INT_P (XEXP (*p, 1))
10056 && ! (TARGET_SH2E && mode == SFmode))
10057 {
10058 /* Because this address is so complex, we know it must have
10059 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
10060 it is already unshared, and needs no further unsharing. */
10061 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
10062 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10063 return true;
10064 }
10065
10066 return false;
10067 }
10068
10069 /* In the name of slightly smaller debug output, and to cater to
10070 general assembler lossage, recognize various UNSPEC sequences
10071 and turn them back into a direct symbol reference. */
10072
10073 static rtx
10074 sh_delegitimize_address (rtx orig_x)
10075 {
10076 rtx x, y;
10077
10078 orig_x = delegitimize_mem_from_attrs (orig_x);
10079
10080 x = orig_x;
10081 if (MEM_P (x))
10082 x = XEXP (x, 0);
10083 if (GET_CODE (x) == CONST)
10084 {
10085 y = XEXP (x, 0);
10086 if (GET_CODE (y) == UNSPEC)
10087 {
10088 if (XINT (y, 1) == UNSPEC_GOT
10089 || XINT (y, 1) == UNSPEC_GOTOFF
10090 || XINT (y, 1) == UNSPEC_SYMOFF)
10091 return XVECEXP (y, 0, 0);
10092 else if (XINT (y, 1) == UNSPEC_PCREL_SYMOFF)
10093 {
10094 if (GET_CODE (XVECEXP (y, 0, 0)) == CONST)
10095 {
10096 rtx symplt = XEXP (XVECEXP (y, 0, 0), 0);
10097
10098 if (GET_CODE (symplt) == UNSPEC
10099 && XINT (symplt, 1) == UNSPEC_PLT)
10100 return XVECEXP (symplt, 0, 0);
10101 }
10102 }
10103 else if (TARGET_SHMEDIA
10104 && (XINT (y, 1) == UNSPEC_EXTRACT_S16
10105 || XINT (y, 1) == UNSPEC_EXTRACT_U16))
10106 {
10107 rtx offset = XVECEXP (y, 0, 1);
10108
10109 x = gen_rtx_PLUS (Pmode, XVECEXP (y, 0, 0), offset);
10110 if (MEM_P (orig_x))
10111 x = replace_equiv_address_nv (orig_x, x);
10112 return x;
10113 }
10114 }
10115 }
10116
10117 return orig_x;
10118 }
10119
10120 /* Mark the use of a constant in the literal table. If the constant
10121 has multiple labels, make it unique. */
10122 static rtx
10123 mark_constant_pool_use (rtx x)
10124 {
10125 rtx insn, lab, pattern;
10126
10127 if (x == NULL_RTX)
10128 return x;
10129
10130 switch (GET_CODE (x))
10131 {
10132 case LABEL_REF:
10133 x = XEXP (x, 0);
10134 case CODE_LABEL:
10135 break;
10136 default:
10137 return x;
10138 }
10139
10140 /* Get the first label in the list of labels for the same constant
10141 and delete another labels in the list. */
10142 lab = x;
10143 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
10144 {
10145 if (!LABEL_P (insn)
10146 || LABEL_REFS (insn) != NEXT_INSN (insn))
10147 break;
10148 lab = insn;
10149 }
10150
10151 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
10152 INSN_DELETED_P (insn) = 1;
10153
10154 /* Mark constants in a window. */
10155 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
10156 {
10157 if (!NONJUMP_INSN_P (insn))
10158 continue;
10159
10160 pattern = PATTERN (insn);
10161 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
10162 continue;
10163
10164 switch (XINT (pattern, 1))
10165 {
10166 case UNSPECV_CONST2:
10167 case UNSPECV_CONST4:
10168 case UNSPECV_CONST8:
10169 XVECEXP (pattern, 0, 1) = const1_rtx;
10170 break;
10171 case UNSPECV_WINDOW_END:
10172 if (XVECEXP (pattern, 0, 0) == x)
10173 return lab;
10174 break;
10175 case UNSPECV_CONST_END:
10176 return lab;
10177 default:
10178 break;
10179 }
10180 }
10181
10182 return lab;
10183 }
10184 \f
10185 /* Return true if it's possible to redirect BRANCH1 to the destination
10186 of an unconditional jump BRANCH2. We only want to do this if the
10187 resulting branch will have a short displacement. */
10188 bool
10189 sh_can_redirect_branch (rtx branch1, rtx branch2)
10190 {
10191 if (flag_expensive_optimizations && simplejump_p (branch2))
10192 {
10193 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
10194 rtx insn;
10195 int distance;
10196
10197 for (distance = 0, insn = NEXT_INSN (branch1);
10198 insn && distance < 256;
10199 insn = PREV_INSN (insn))
10200 {
10201 if (insn == dest)
10202 return true;
10203 else
10204 distance += get_attr_length (insn);
10205 }
10206 for (distance = 0, insn = NEXT_INSN (branch1);
10207 insn && distance < 256;
10208 insn = NEXT_INSN (insn))
10209 {
10210 if (insn == dest)
10211 return true;
10212 else
10213 distance += get_attr_length (insn);
10214 }
10215 }
10216 return false;
10217 }
10218
10219 /* Return nonzero if register old_reg can be renamed to register new_reg. */
10220 bool
10221 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
10222 unsigned int new_reg)
10223 {
10224 /* Interrupt functions can only use registers that have already been
10225 saved by the prologue, even if they would normally be
10226 call-clobbered. */
10227
10228 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
10229 return false;
10230
10231 return true;
10232 }
10233
10234 /* Function to update the integer COST
10235 based on the relationship between INSN that is dependent on
10236 DEP_INSN through the dependence LINK. The default is to make no
10237 adjustment to COST. This can be used for example to specify to
10238 the scheduler that an output- or anti-dependence does not incur
10239 the same cost as a data-dependence. The return value should be
10240 the new value for COST. */
10241 static int
10242 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
10243 {
10244 rtx reg, use_pat;
10245
10246 if (TARGET_SHMEDIA)
10247 {
10248 /* On SHmedia, if the dependence is an anti-dependence or
10249 output-dependence, there is no cost. */
10250 if (REG_NOTE_KIND (link) != 0)
10251 {
10252 /* However, dependencies between target register loads and
10253 uses of the register in a subsequent block that are separated
10254 by a conditional branch are not modelled - we have to do with
10255 the anti-dependency between the target register load and the
10256 conditional branch that ends the current block. */
10257 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
10258 && GET_CODE (PATTERN (dep_insn)) == SET
10259 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
10260 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
10261 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
10262 {
10263 int orig_cost = cost;
10264 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
10265 rtx target = ((! note
10266 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
10267 ? insn : JUMP_LABEL (insn));
10268 /* On the likely path, the branch costs 1, on the unlikely path,
10269 it costs 3. */
10270 cost--;
10271 do
10272 target = next_active_insn (target);
10273 while (target && ! flow_dependent_p (target, dep_insn)
10274 && --cost > 0);
10275 /* If two branches are executed in immediate succession, with the
10276 first branch properly predicted, this causes a stall at the
10277 second branch, hence we won't need the target for the
10278 second branch for two cycles after the launch of the first
10279 branch. */
10280 if (cost > orig_cost - 2)
10281 cost = orig_cost - 2;
10282 }
10283 else
10284 cost = 0;
10285 }
10286
10287 else if (get_attr_is_mac_media (insn)
10288 && get_attr_is_mac_media (dep_insn))
10289 cost = 1;
10290
10291 else if (! reload_completed
10292 && GET_CODE (PATTERN (insn)) == SET
10293 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
10294 && GET_CODE (PATTERN (dep_insn)) == SET
10295 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
10296 && cost < 4)
10297 cost = 4;
10298 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
10299 that is needed at the target. */
10300 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
10301 && ! flow_dependent_p (insn, dep_insn))
10302 cost--;
10303 }
10304 else if (REG_NOTE_KIND (link) == 0)
10305 {
10306 enum attr_type type;
10307 rtx dep_set;
10308
10309 if (recog_memoized (insn) < 0
10310 || recog_memoized (dep_insn) < 0)
10311 return cost;
10312
10313 dep_set = single_set (dep_insn);
10314
10315 /* The latency that we specify in the scheduling description refers
10316 to the actual output, not to an auto-increment register; for that,
10317 the latency is one. */
10318 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
10319 {
10320 rtx set = single_set (insn);
10321
10322 if (set
10323 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
10324 && (!MEM_P (SET_DEST (set))
10325 || !reg_mentioned_p (SET_DEST (dep_set),
10326 XEXP (SET_DEST (set), 0))))
10327 cost = 1;
10328 }
10329 /* The only input for a call that is timing-critical is the
10330 function's address. */
10331 if (CALL_P (insn))
10332 {
10333 rtx call = PATTERN (insn);
10334
10335 if (GET_CODE (call) == PARALLEL)
10336 call = XVECEXP (call, 0 ,0);
10337 if (GET_CODE (call) == SET)
10338 call = SET_SRC (call);
10339 if (GET_CODE (call) == CALL && MEM_P (XEXP (call, 0))
10340 /* sibcalli_thunk uses a symbol_ref in an unspec. */
10341 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
10342 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
10343 cost -= TARGET_SH4_300 ? 3 : 6;
10344 }
10345 /* Likewise, the most timing critical input for an sfuncs call
10346 is the function address. However, sfuncs typically start
10347 using their arguments pretty quickly.
10348 Assume a four cycle delay for SH4 before they are needed.
10349 Cached ST40-300 calls are quicker, so assume only a one
10350 cycle delay there.
10351 ??? Maybe we should encode the delays till input registers
10352 are needed by sfuncs into the sfunc call insn. */
10353 /* All sfunc calls are parallels with at least four components.
10354 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
10355 else if (GET_CODE (PATTERN (insn)) == PARALLEL
10356 && XVECLEN (PATTERN (insn), 0) >= 4
10357 && (reg = sfunc_uses_reg (insn)))
10358 {
10359 if (! reg_set_p (reg, dep_insn))
10360 cost -= TARGET_SH4_300 ? 1 : 4;
10361 }
10362 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
10363 {
10364 enum attr_type dep_type = get_attr_type (dep_insn);
10365
10366 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
10367 cost--;
10368 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
10369 && (type = get_attr_type (insn)) != TYPE_CALL
10370 && type != TYPE_SFUNC)
10371 cost--;
10372 /* When the preceding instruction loads the shift amount of
10373 the following SHAD/SHLD, the latency of the load is increased
10374 by 1 cycle. */
10375 if (get_attr_type (insn) == TYPE_DYN_SHIFT
10376 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
10377 && reg_overlap_mentioned_p (SET_DEST (dep_set),
10378 XEXP (SET_SRC (single_set (insn)),
10379 1)))
10380 cost++;
10381 /* When an LS group instruction with a latency of less than
10382 3 cycles is followed by a double-precision floating-point
10383 instruction, FIPR, or FTRV, the latency of the first
10384 instruction is increased to 3 cycles. */
10385 else if (cost < 3
10386 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
10387 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
10388 cost = 3;
10389 /* The lsw register of a double-precision computation is ready one
10390 cycle earlier. */
10391 else if (reload_completed
10392 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
10393 && (use_pat = single_set (insn))
10394 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
10395 SET_SRC (use_pat)))
10396 cost -= 1;
10397
10398 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
10399 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
10400 cost -= 1;
10401 }
10402 else if (TARGET_SH4_300)
10403 {
10404 /* Stores need their input register two cycles later. */
10405 if (dep_set && cost >= 1
10406 && ((type = get_attr_type (insn)) == TYPE_STORE
10407 || type == TYPE_PSTORE
10408 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
10409 {
10410 rtx set = single_set (insn);
10411
10412 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
10413 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
10414 {
10415 cost -= 2;
10416 /* But don't reduce the cost below 1 if the address depends
10417 on a side effect of dep_insn. */
10418 if (cost < 1
10419 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
10420 cost = 1;
10421 }
10422 }
10423 }
10424 }
10425 /* An anti-dependence penalty of two applies if the first insn is a double
10426 precision fadd / fsub / fmul. */
10427 else if (!TARGET_SH4_300
10428 && REG_NOTE_KIND (link) == REG_DEP_ANTI
10429 && recog_memoized (dep_insn) >= 0
10430 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
10431 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
10432 /* A lot of alleged anti-flow dependences are fake,
10433 so check this one is real. */
10434 && flow_dependent_p (dep_insn, insn))
10435 cost = 2;
10436
10437 return cost;
10438 }
10439
10440 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
10441 if DEP_INSN is anti-flow dependent on INSN. */
10442 static bool
10443 flow_dependent_p (rtx insn, rtx dep_insn)
10444 {
10445 rtx tmp = PATTERN (insn);
10446
10447 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
10448 return tmp == NULL_RTX;
10449 }
10450
10451 /* A helper function for flow_dependent_p called through note_stores. */
10452 static void
10453 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
10454 {
10455 rtx * pinsn = (rtx *) data;
10456
10457 if (*pinsn && reg_referenced_p (x, *pinsn))
10458 *pinsn = NULL_RTX;
10459 }
10460
10461 /* For use by sh_allocate_initial_value. Note that sh.md contains some
10462 'special function' patterns (type sfunc) that clobber pr, but that
10463 do not look like function calls to leaf_function_p. Hence we must
10464 do this extra check. */
10465 static int
10466 sh_pr_n_sets (void)
10467 {
10468 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10469 }
10470
10471 /* Return where to allocate pseudo for a given hard register initial
10472 value. */
10473 static rtx
10474 sh_allocate_initial_value (rtx hard_reg)
10475 {
10476 rtx x;
10477
10478 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
10479 {
10480 if (crtl->is_leaf
10481 && ! sh_pr_n_sets ()
10482 && ! (TARGET_SHCOMPACT
10483 && ((crtl->args.info.call_cookie
10484 & ~ CALL_COOKIE_RET_TRAMP (1))
10485 || crtl->saves_all_registers)))
10486 x = hard_reg;
10487 else
10488 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
10489 }
10490 else
10491 x = NULL_RTX;
10492
10493 return x;
10494 }
10495
10496 /* This function returns "2" to indicate dual issue for the SH4
10497 processor. To be used by the DFA pipeline description. */
10498 static int
10499 sh_issue_rate (void)
10500 {
10501 if (TARGET_SUPERSCALAR)
10502 return 2;
10503 else
10504 return 1;
10505 }
10506
10507 /* Functions for ready queue reordering for sched1. */
10508
10509 /* Get weight for mode for a set x. */
10510 static short
10511 find_set_regmode_weight (rtx x, enum machine_mode mode)
10512 {
10513 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
10514 return 1;
10515 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
10516 {
10517 if (REG_P (SET_DEST (x)))
10518 {
10519 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
10520 return 1;
10521 else
10522 return 0;
10523 }
10524 return 1;
10525 }
10526 return 0;
10527 }
10528
10529 /* Get regmode weight for insn. */
10530 static short
10531 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
10532 {
10533 short reg_weight = 0;
10534 rtx x;
10535
10536 /* Increment weight for each register born here. */
10537 x = PATTERN (insn);
10538 reg_weight += find_set_regmode_weight (x, mode);
10539 if (GET_CODE (x) == PARALLEL)
10540 {
10541 int j;
10542 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
10543 {
10544 x = XVECEXP (PATTERN (insn), 0, j);
10545 reg_weight += find_set_regmode_weight (x, mode);
10546 }
10547 }
10548 /* Decrement weight for each register that dies here. */
10549 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
10550 {
10551 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
10552 {
10553 rtx note = XEXP (x, 0);
10554 if (REG_P (note) && GET_MODE (note) == mode)
10555 reg_weight--;
10556 }
10557 }
10558 return reg_weight;
10559 }
10560
10561 /* Calculate regmode weights for all insns of a basic block. */
10562 static void
10563 find_regmode_weight (basic_block b, enum machine_mode mode)
10564 {
10565 rtx insn, next_tail, head, tail;
10566
10567 get_ebb_head_tail (b, b, &head, &tail);
10568 next_tail = NEXT_INSN (tail);
10569
10570 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
10571 {
10572 /* Handle register life information. */
10573 if (!INSN_P (insn))
10574 continue;
10575
10576 if (mode == SFmode)
10577 INSN_REGMODE_WEIGHT (insn, mode) =
10578 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
10579 else if (mode == SImode)
10580 INSN_REGMODE_WEIGHT (insn, mode) =
10581 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
10582 }
10583 }
10584
10585 /* Comparison function for ready queue sorting. */
10586 static int
10587 rank_for_reorder (const void *x, const void *y)
10588 {
10589 rtx tmp = *(const rtx *) y;
10590 rtx tmp2 = *(const rtx *) x;
10591
10592 /* The insn in a schedule group should be issued the first. */
10593 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
10594 return SCHED_GROUP_P (tmp2) ? 1 : -1;
10595
10596 /* If insns are equally good, sort by INSN_LUID (original insn order), This
10597 minimizes instruction movement, thus minimizing sched's effect on
10598 register pressure. */
10599 return INSN_LUID (tmp) - INSN_LUID (tmp2);
10600 }
10601
10602 /* Resort the array A in which only element at index N may be out of order. */
10603 static void
10604 swap_reorder (rtx *a, int n)
10605 {
10606 rtx insn = a[n - 1];
10607 int i = n - 2;
10608
10609 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
10610 {
10611 a[i + 1] = a[i];
10612 i -= 1;
10613 }
10614 a[i + 1] = insn;
10615 }
10616
10617 /* Sort the ready list by ascending priority. */
10618 static void
10619 ready_reorder (rtx *ready, int nready)
10620 {
10621 if (nready == 2)
10622 swap_reorder (ready, nready);
10623 else if (nready > 2)
10624 qsort (ready, nready, sizeof (rtx), rank_for_reorder);
10625 }
10626
10627 /* Count life regions of r0 for a block. */
10628 static int
10629 find_r0_life_regions (basic_block b)
10630 {
10631 rtx end, insn;
10632 rtx pset;
10633 rtx r0_reg;
10634 int live;
10635 int set;
10636 int death = 0;
10637
10638 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
10639 {
10640 set = 1;
10641 live = 1;
10642 }
10643 else
10644 {
10645 set = 0;
10646 live = 0;
10647 }
10648
10649 insn = BB_HEAD (b);
10650 end = BB_END (b);
10651 r0_reg = gen_rtx_REG (SImode, R0_REG);
10652 while (1)
10653 {
10654 if (INSN_P (insn))
10655 {
10656 if (find_regno_note (insn, REG_DEAD, R0_REG))
10657 {
10658 death++;
10659 live = 0;
10660 }
10661 if (!live
10662 && (pset = single_set (insn))
10663 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
10664 && !find_regno_note (insn, REG_UNUSED, R0_REG))
10665 {
10666 set++;
10667 live = 1;
10668 }
10669 }
10670 if (insn == end)
10671 break;
10672 insn = NEXT_INSN (insn);
10673 }
10674 return set - death;
10675 }
10676
10677 /* Calculate regmode weights for all insns of all basic block. */
10678 static void
10679 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
10680 int verbose ATTRIBUTE_UNUSED,
10681 int old_max_uid)
10682 {
10683 basic_block b;
10684
10685 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
10686 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
10687 r0_life_regions = 0;
10688
10689 FOR_EACH_BB_REVERSE (b)
10690 {
10691 find_regmode_weight (b, SImode);
10692 find_regmode_weight (b, SFmode);
10693 if (!reload_completed)
10694 r0_life_regions += find_r0_life_regions (b);
10695 }
10696
10697 CURR_REGMODE_PRESSURE (SImode) = 0;
10698 CURR_REGMODE_PRESSURE (SFmode) = 0;
10699
10700 }
10701
10702 /* Cleanup. */
10703 static void
10704 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
10705 int verbose ATTRIBUTE_UNUSED)
10706 {
10707 if (regmode_weight[0])
10708 {
10709 free (regmode_weight[0]);
10710 regmode_weight[0] = NULL;
10711 }
10712 if (regmode_weight[1])
10713 {
10714 free (regmode_weight[1]);
10715 regmode_weight[1] = NULL;
10716 }
10717 }
10718
10719 /* The scalar modes supported differs from the default version in TImode
10720 for 32-bit SHMEDIA. */
10721 static bool
10722 sh_scalar_mode_supported_p (enum machine_mode mode)
10723 {
10724 if (TARGET_SHMEDIA32 && mode == TImode)
10725 return false;
10726
10727 return default_scalar_mode_supported_p (mode);
10728 }
10729
10730 /* Cache the can_issue_more so that we can return it from reorder2. Also,
10731 keep count of register pressures on SImode and SFmode. */
10732 static int
10733 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
10734 int sched_verbose ATTRIBUTE_UNUSED,
10735 rtx insn,
10736 int can_issue_more)
10737 {
10738 if (GET_CODE (PATTERN (insn)) != USE
10739 && GET_CODE (PATTERN (insn)) != CLOBBER)
10740 cached_can_issue_more = can_issue_more - 1;
10741 else
10742 cached_can_issue_more = can_issue_more;
10743
10744 if (reload_completed)
10745 return cached_can_issue_more;
10746
10747 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
10748 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
10749
10750 return cached_can_issue_more;
10751 }
10752
10753 static void
10754 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
10755 int verbose ATTRIBUTE_UNUSED,
10756 int veclen ATTRIBUTE_UNUSED)
10757 {
10758 CURR_REGMODE_PRESSURE (SImode) = 0;
10759 CURR_REGMODE_PRESSURE (SFmode) = 0;
10760 }
10761
10762 /* Some magic numbers. */
10763 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10764 functions that already have high pressure on r0. */
10765 #define R0_MAX_LIFE_REGIONS 2
10766 /* Register Pressure thresholds for SImode and SFmode registers. */
10767 #define SIMODE_MAX_WEIGHT 5
10768 #define SFMODE_MAX_WEIGHT 10
10769
10770 /* Return true if the pressure is high for MODE. */
10771 static bool
10772 high_pressure (enum machine_mode mode)
10773 {
10774 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10775 functions that already have high pressure on r0. */
10776 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
10777 return true;
10778
10779 if (mode == SFmode)
10780 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
10781 else
10782 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
10783 }
10784
10785 /* Reorder ready queue if register pressure is high. */
10786 static int
10787 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
10788 int sched_verbose ATTRIBUTE_UNUSED,
10789 rtx *ready,
10790 int *n_readyp,
10791 int clock_var ATTRIBUTE_UNUSED)
10792 {
10793 if (reload_completed)
10794 return sh_issue_rate ();
10795
10796 if (high_pressure (SFmode) || high_pressure (SImode))
10797 {
10798 ready_reorder (ready, *n_readyp);
10799 }
10800
10801 return sh_issue_rate ();
10802 }
10803
10804 /* Skip cycles if the current register pressure is high. */
10805 static int
10806 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
10807 int sched_verbose ATTRIBUTE_UNUSED,
10808 rtx *ready ATTRIBUTE_UNUSED,
10809 int *n_readyp ATTRIBUTE_UNUSED,
10810 int clock_var ATTRIBUTE_UNUSED)
10811 {
10812 if (reload_completed)
10813 return cached_can_issue_more;
10814
10815 if (high_pressure(SFmode) || high_pressure (SImode))
10816 skip_cycles = 1;
10817
10818 return cached_can_issue_more;
10819 }
10820
10821 /* Skip cycles without sorting the ready queue. This will move insn from
10822 Q->R. If this is the last cycle we are skipping; allow sorting of ready
10823 queue by sh_reorder. */
10824
10825 /* Generally, skipping these many cycles are sufficient for all insns to move
10826 from Q -> R. */
10827 #define MAX_SKIPS 8
10828
10829 static int
10830 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
10831 int sched_verbose ATTRIBUTE_UNUSED,
10832 rtx insn ATTRIBUTE_UNUSED,
10833 int last_clock_var,
10834 int clock_var,
10835 int *sort_p)
10836 {
10837 if (reload_completed)
10838 return 0;
10839
10840 if (skip_cycles)
10841 {
10842 if ((clock_var - last_clock_var) < MAX_SKIPS)
10843 {
10844 *sort_p = 0;
10845 return 1;
10846 }
10847 /* If this is the last cycle we are skipping, allow reordering of R. */
10848 if ((clock_var - last_clock_var) == MAX_SKIPS)
10849 {
10850 *sort_p = 1;
10851 return 1;
10852 }
10853 }
10854
10855 skip_cycles = 0;
10856
10857 return 0;
10858 }
10859
10860 /* SHmedia requires registers for branches, so we can't generate new
10861 branches past reload. */
10862 static bool
10863 sh_cannot_modify_jumps_p (void)
10864 {
10865 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
10866 }
10867
10868 static reg_class_t
10869 sh_target_reg_class (void)
10870 {
10871 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
10872 }
10873
10874 static bool
10875 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
10876 {
10877 HARD_REG_SET dummy;
10878 #if 0
10879 rtx insn;
10880 #endif
10881
10882 if (! shmedia_space_reserved_for_target_registers)
10883 return 0;
10884 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
10885 return 0;
10886 if (calc_live_regs (&dummy) >= 6 * 8)
10887 return 1;
10888 return 0;
10889 }
10890
10891 static bool
10892 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
10893 {
10894 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
10895 }
10896 \f
10897 /*
10898 On the SH1..SH4, the trampoline looks like
10899 2 0002 D202 mov.l l2,r2
10900 1 0000 D301 mov.l l1,r3
10901 3 0004 422B jmp @r2
10902 4 0006 0009 nop
10903 5 0008 00000000 l1: .long area
10904 6 000c 00000000 l2: .long function
10905
10906 SH5 (compact) uses r1 instead of r3 for the static chain. */
10907
10908
10909 /* Emit RTL insns to initialize the variable parts of a trampoline.
10910 FNADDR is an RTX for the address of the function's pure code.
10911 CXT is an RTX for the static chain value for the function. */
10912
10913 static void
10914 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
10915 {
10916 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10917 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
10918
10919 if (TARGET_SHMEDIA64)
10920 {
10921 rtx tramp_templ;
10922 int fixed_len;
10923
10924 rtx movi1 = GEN_INT (0xcc000010);
10925 rtx shori1 = GEN_INT (0xc8000010);
10926 rtx src, dst;
10927
10928 /* The following trampoline works within a +- 128 KB range for cxt:
10929 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
10930 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
10931 gettr tr1,r1; blink tr0,r63 */
10932 /* Address rounding makes it hard to compute the exact bounds of the
10933 offset for this trampoline, but we have a rather generous offset
10934 range, so frame_offset should do fine as an upper bound. */
10935 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
10936 {
10937 /* ??? could optimize this trampoline initialization
10938 by writing DImode words with two insns each. */
10939 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
10940 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
10941 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
10942 insn = gen_rtx_AND (DImode, insn, mask);
10943 /* Or in ptb/u .,tr1 pattern */
10944 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
10945 insn = force_operand (insn, NULL_RTX);
10946 insn = gen_lowpart (SImode, insn);
10947 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
10948 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
10949 insn = gen_rtx_AND (DImode, insn, mask);
10950 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
10951 insn = gen_lowpart (SImode, insn);
10952 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
10953 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
10954 insn = gen_rtx_AND (DImode, insn, mask);
10955 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10956 insn = gen_lowpart (SImode, insn);
10957 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
10958 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
10959 insn = gen_rtx_AND (DImode, insn, mask);
10960 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10961 insn = gen_lowpart (SImode, insn);
10962 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
10963 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
10964 insn = gen_rtx_AND (DImode, insn, mask);
10965 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10966 insn = gen_lowpart (SImode, insn);
10967 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
10968 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
10969 GEN_INT (0x6bf10600));
10970 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
10971 GEN_INT (0x4415fc10));
10972 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
10973 GEN_INT (0x4401fff0));
10974 emit_insn (gen_ic_invalidate_line (tramp));
10975 return;
10976 }
10977 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
10978 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
10979
10980 tramp_templ = gen_datalabel_ref (tramp_templ);
10981 dst = tramp_mem;
10982 src = gen_const_mem (BLKmode, tramp_templ);
10983 set_mem_align (dst, 256);
10984 set_mem_align (src, 64);
10985 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
10986
10987 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
10988 emit_move_insn (adjust_address (tramp_mem, Pmode,
10989 fixed_len + GET_MODE_SIZE (Pmode)),
10990 cxt);
10991 emit_insn (gen_ic_invalidate_line (tramp));
10992 return;
10993 }
10994 else if (TARGET_SHMEDIA)
10995 {
10996 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
10997 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
10998 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
10999 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
11000 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
11001 rotated 10 right, and higher 16 bit of every 32 selected. */
11002 rtx movishori
11003 = force_reg (V2HImode, (simplify_gen_subreg
11004 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
11005 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
11006 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
11007
11008 fnaddr = force_reg (SImode, fnaddr);
11009 cxt = force_reg (SImode, cxt);
11010 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
11011 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
11012 movishori));
11013 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
11014 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
11015 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
11016 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
11017 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
11018 gen_rtx_SUBREG (V2HImode, cxt, 0),
11019 movishori));
11020 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
11021 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
11022 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
11023 if (TARGET_LITTLE_ENDIAN)
11024 {
11025 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
11026 emit_insn (gen_mextr4 (quad2, cxtload, blink));
11027 }
11028 else
11029 {
11030 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
11031 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
11032 }
11033 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
11034 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
11035 emit_insn (gen_ic_invalidate_line (tramp));
11036 return;
11037 }
11038 else if (TARGET_SHCOMPACT)
11039 {
11040 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
11041 return;
11042 }
11043 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
11044 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
11045 SImode));
11046 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
11047 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
11048 SImode));
11049 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
11050 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
11051 if (TARGET_HARVARD)
11052 {
11053 if (!TARGET_INLINE_IC_INVALIDATE
11054 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
11055 emit_library_call (function_symbol (NULL, "__ic_invalidate",
11056 FUNCTION_ORDINARY),
11057 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
11058 else
11059 emit_insn (gen_ic_invalidate_line (tramp));
11060 }
11061 }
11062
11063 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
11064
11065 static rtx
11066 sh_trampoline_adjust_address (rtx tramp)
11067 {
11068 if (TARGET_SHMEDIA)
11069 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
11070 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
11071 return tramp;
11072 }
11073
11074 /* FIXME: This is overly conservative. A SHcompact function that
11075 receives arguments ``by reference'' will have them stored in its
11076 own stack frame, so it must not pass pointers or references to
11077 these arguments to other functions by means of sibling calls. */
11078 /* If PIC, we cannot make sibling calls to global functions
11079 because the PLT requires r12 to be live. */
11080 static bool
11081 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
11082 {
11083 return (1
11084 && (! TARGET_SHCOMPACT
11085 || crtl->args.info.stack_regs == 0)
11086 && ! sh_cfun_interrupt_handler_p ()
11087 && (! flag_pic
11088 || (decl && ! TREE_PUBLIC (decl))
11089 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
11090 }
11091 \f
11092 /* Machine specific built-in functions. */
11093
11094 struct builtin_description
11095 {
11096 const enum insn_code icode;
11097 const char *const name;
11098 int signature;
11099 tree fndecl;
11100 };
11101
11102 /* describe number and signedness of arguments; arg[0] == result
11103 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
11104 /* 9: 64-bit pointer, 10: 32-bit pointer */
11105 static const char signature_args[][4] =
11106 {
11107 #define SH_BLTIN_V2SI2 0
11108 { 4, 4 },
11109 #define SH_BLTIN_V4HI2 1
11110 { 4, 4 },
11111 #define SH_BLTIN_V2SI3 2
11112 { 4, 4, 4 },
11113 #define SH_BLTIN_V4HI3 3
11114 { 4, 4, 4 },
11115 #define SH_BLTIN_V8QI3 4
11116 { 4, 4, 4 },
11117 #define SH_BLTIN_MAC_HISI 5
11118 { 1, 4, 4, 1 },
11119 #define SH_BLTIN_SH_HI 6
11120 { 4, 4, 1 },
11121 #define SH_BLTIN_SH_SI 7
11122 { 4, 4, 1 },
11123 #define SH_BLTIN_V4HI2V2SI 8
11124 { 4, 4, 4 },
11125 #define SH_BLTIN_V4HI2V8QI 9
11126 { 4, 4, 4 },
11127 #define SH_BLTIN_SISF 10
11128 { 4, 2 },
11129 #define SH_BLTIN_LDUA_L 11
11130 { 2, 10 },
11131 #define SH_BLTIN_LDUA_Q 12
11132 { 1, 10 },
11133 #define SH_BLTIN_STUA_L 13
11134 { 0, 10, 2 },
11135 #define SH_BLTIN_STUA_Q 14
11136 { 0, 10, 1 },
11137 #define SH_BLTIN_LDUA_L64 15
11138 { 2, 9 },
11139 #define SH_BLTIN_LDUA_Q64 16
11140 { 1, 9 },
11141 #define SH_BLTIN_STUA_L64 17
11142 { 0, 9, 2 },
11143 #define SH_BLTIN_STUA_Q64 18
11144 { 0, 9, 1 },
11145 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
11146 #define SH_BLTIN_2 19
11147 #define SH_BLTIN_SU 19
11148 { 1, 2 },
11149 #define SH_BLTIN_3 20
11150 #define SH_BLTIN_SUS 20
11151 { 2, 2, 1 },
11152 #define SH_BLTIN_PSSV 21
11153 { 0, 8, 2, 2 },
11154 #define SH_BLTIN_XXUU 22
11155 #define SH_BLTIN_UUUU 22
11156 { 1, 1, 1, 1 },
11157 #define SH_BLTIN_PV 23
11158 { 0, 8 },
11159 };
11160 /* mcmv: operands considered unsigned. */
11161 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
11162 /* mperm: control value considered unsigned int. */
11163 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
11164 /* mshards_q: returns signed short. */
11165 /* nsb: takes long long arg, returns unsigned char. */
11166 static struct builtin_description bdesc[] =
11167 {
11168 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
11169 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
11170 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
11171 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
11172 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
11173 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
11174 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
11175 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
11176 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
11177 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
11178 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
11179 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
11180 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
11181 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
11182 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
11183 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
11184 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
11185 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
11186 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
11187 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
11188 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
11189 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
11190 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
11191 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
11192 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
11193 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
11194 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
11195 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
11196 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
11197 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
11198 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
11199 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
11200 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
11201 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
11202 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
11203 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
11204 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
11205 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
11206 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
11207 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
11208 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
11209 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
11210 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
11211 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
11212 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
11213 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
11214 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
11215 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
11216 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
11217 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
11218 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
11219 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
11220 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
11221 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
11222 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
11223 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
11224 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
11225 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
11226 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
11227 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
11228 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
11229 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
11230 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
11231 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
11232 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
11233 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
11234 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
11235 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
11236 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
11237 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
11238 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
11239 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
11240 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
11241 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
11242 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
11243 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
11244 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
11245 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
11246 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
11247 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
11248 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
11249 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
11250 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
11251 };
11252
11253 static void
11254 sh_media_init_builtins (void)
11255 {
11256 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
11257 struct builtin_description *d;
11258
11259 memset (shared, 0, sizeof shared);
11260 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
11261 {
11262 tree type, arg_type = 0;
11263 int signature = d->signature;
11264 int i;
11265
11266 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
11267 type = shared[signature];
11268 else
11269 {
11270 int has_result = signature_args[signature][0] != 0;
11271 tree args[3];
11272
11273 if ((signature_args[signature][1] & 8)
11274 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
11275 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
11276 continue;
11277 if (! TARGET_FPU_ANY
11278 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
11279 continue;
11280 for (i = 0; i < (int) ARRAY_SIZE (args); i++)
11281 args[i] = NULL_TREE;
11282 for (i = 3; ; i--)
11283 {
11284 int arg = signature_args[signature][i];
11285 int opno = i - 1 + has_result;
11286
11287 if (arg & 8)
11288 arg_type = ptr_type_node;
11289 else if (arg)
11290 arg_type = (*lang_hooks.types.type_for_mode)
11291 (insn_data[d->icode].operand[opno].mode,
11292 (arg & 1));
11293 else if (i)
11294 continue;
11295 else
11296 arg_type = void_type_node;
11297 if (i == 0)
11298 break;
11299 args[i-1] = arg_type;
11300 }
11301 type = build_function_type_list (arg_type, args[0], args[1],
11302 args[2], NULL_TREE);
11303 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
11304 shared[signature] = type;
11305 }
11306 d->fndecl =
11307 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
11308 NULL, NULL_TREE);
11309 }
11310 }
11311
11312 /* Returns the shmedia builtin decl for CODE. */
11313
11314 static tree
11315 sh_media_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11316 {
11317 if (code >= ARRAY_SIZE (bdesc))
11318 return error_mark_node;
11319
11320 return bdesc[code].fndecl;
11321 }
11322
11323 /* Implements target hook vector_mode_supported_p. */
11324 bool
11325 sh_vector_mode_supported_p (enum machine_mode mode)
11326 {
11327 if (TARGET_FPU_ANY
11328 && ((mode == V2SFmode)
11329 || (mode == V4SFmode)
11330 || (mode == V16SFmode)))
11331 return true;
11332
11333 else if (TARGET_SHMEDIA
11334 && ((mode == V8QImode)
11335 || (mode == V2HImode)
11336 || (mode == V4HImode)
11337 || (mode == V2SImode)))
11338 return true;
11339
11340 return false;
11341 }
11342
11343 bool
11344 sh_frame_pointer_required (void)
11345 {
11346 /* If needed override this in other tm.h files to cope with various OS
11347 lossage requiring a frame pointer. */
11348 if (SUBTARGET_FRAME_POINTER_REQUIRED)
11349 return true;
11350
11351 if (crtl->profile)
11352 return true;
11353
11354 return false;
11355 }
11356
11357 /* Implements target hook dwarf_calling_convention. Return an enum
11358 of dwarf_calling_convention. */
11359 int
11360 sh_dwarf_calling_convention (const_tree func)
11361 {
11362 if (sh_attr_renesas_p (func))
11363 return DW_CC_GNU_renesas_sh;
11364
11365 return DW_CC_normal;
11366 }
11367
11368 static void
11369 sh_init_builtins (void)
11370 {
11371 if (TARGET_SHMEDIA)
11372 sh_media_init_builtins ();
11373 }
11374
11375 /* Returns the sh builtin decl for CODE. */
11376
11377 static tree
11378 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11379 {
11380 if (TARGET_SHMEDIA)
11381 return sh_media_builtin_decl (code, initialize_p);
11382
11383 return error_mark_node;
11384 }
11385
11386 /* Expand an expression EXP that calls a built-in function,
11387 with result going to TARGET if that's convenient
11388 (and in mode MODE if that's convenient).
11389 SUBTARGET may be used as the target for computing one of EXP's operands.
11390 IGNORE is nonzero if the value is to be ignored. */
11391
11392 static rtx
11393 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
11394 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
11395 {
11396 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11397 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
11398 const struct builtin_description *d = &bdesc[fcode];
11399 enum insn_code icode = d->icode;
11400 int signature = d->signature;
11401 enum machine_mode tmode = VOIDmode;
11402 int nop = 0, i;
11403 rtx op[4];
11404 rtx pat = NULL_RTX;
11405
11406 if (signature_args[signature][0])
11407 {
11408 if (ignore)
11409 return NULL_RTX;
11410
11411 tmode = insn_data[icode].operand[0].mode;
11412 if (! target
11413 || GET_MODE (target) != tmode
11414 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11415 target = gen_reg_rtx (tmode);
11416 op[nop++] = target;
11417 }
11418 else
11419 target = 0;
11420
11421 for (i = 1; i <= 3; i++, nop++)
11422 {
11423 tree arg;
11424 enum machine_mode opmode, argmode;
11425 tree optype;
11426
11427 if (! signature_args[signature][i])
11428 break;
11429 arg = CALL_EXPR_ARG (exp, i - 1);
11430 if (arg == error_mark_node)
11431 return const0_rtx;
11432 if (signature_args[signature][i] & 8)
11433 {
11434 opmode = ptr_mode;
11435 optype = ptr_type_node;
11436 }
11437 else
11438 {
11439 opmode = insn_data[icode].operand[nop].mode;
11440 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
11441 }
11442 argmode = TYPE_MODE (TREE_TYPE (arg));
11443 if (argmode != opmode)
11444 arg = build1 (NOP_EXPR, optype, arg);
11445 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
11446 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
11447 op[nop] = copy_to_mode_reg (opmode, op[nop]);
11448 }
11449
11450 switch (nop)
11451 {
11452 case 1:
11453 pat = (*insn_data[d->icode].genfun) (op[0]);
11454 break;
11455 case 2:
11456 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
11457 break;
11458 case 3:
11459 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
11460 break;
11461 case 4:
11462 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
11463 break;
11464 default:
11465 gcc_unreachable ();
11466 }
11467 if (! pat)
11468 return NULL_RTX;
11469 emit_insn (pat);
11470 return target;
11471 }
11472
11473 void
11474 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
11475 {
11476 rtx sel0 = const0_rtx;
11477 rtx sel1 = const1_rtx;
11478 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
11479 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
11480
11481 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
11482 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
11483 }
11484
11485 void
11486 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
11487 {
11488 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
11489
11490 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
11491 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
11492 }
11493
11494 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
11495 We can allow any mode in any general register. The special registers
11496 only allow SImode. Don't allow any mode in the PR.
11497
11498 We cannot hold DCmode values in the XD registers because alter_reg
11499 handles subregs of them incorrectly. We could work around this by
11500 spacing the XD registers like the DR registers, but this would require
11501 additional memory in every compilation to hold larger register vectors.
11502 We could hold SFmode / SCmode values in XD registers, but that
11503 would require a tertiary reload when reloading from / to memory,
11504 and a secondary reload to reload from / to general regs; that
11505 seems to be a losing proposition.
11506
11507 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
11508 it won't be ferried through GP registers first. */
11509
11510 bool
11511 sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
11512 {
11513 if (SPECIAL_REGISTER_P (regno))
11514 return mode == SImode;
11515
11516 if (regno == FPUL_REG)
11517 return (mode == SImode || mode == SFmode);
11518
11519 if (FP_REGISTER_P (regno) && mode == SFmode)
11520 return true;
11521
11522 if (mode == V2SFmode)
11523 {
11524 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
11525 || GENERAL_REGISTER_P (regno)))
11526 return true;
11527 else
11528 return false;
11529 }
11530
11531 if (mode == V4SFmode)
11532 {
11533 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
11534 || GENERAL_REGISTER_P (regno))
11535 return true;
11536 else
11537 return false;
11538 }
11539
11540 if (mode == V16SFmode)
11541 {
11542 if (TARGET_SHMEDIA)
11543 {
11544 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
11545 return true;
11546 else
11547 return false;
11548 }
11549 else
11550 return regno == FIRST_XD_REG;
11551 }
11552
11553 if (FP_REGISTER_P (regno))
11554 {
11555 if (mode == SFmode
11556 || mode == SImode
11557 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
11558 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
11559 || mode == DCmode
11560 || (TARGET_SHMEDIA
11561 && (mode == DFmode || mode == DImode
11562 || mode == V2SFmode || mode == TImode)))
11563 && ((regno - FIRST_FP_REG) & 1) == 0)
11564 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
11565 && ((regno - FIRST_FP_REG) & 3) == 0))
11566 return true;
11567 else
11568 return false;
11569 }
11570
11571 if (XD_REGISTER_P (regno))
11572 return mode == DFmode;
11573
11574 if (TARGET_REGISTER_P (regno))
11575 return (mode == DImode || mode == SImode || mode == PDImode);
11576
11577 if (regno == PR_REG)
11578 return mode == SImode;
11579
11580 if (regno == FPSCR_REG)
11581 return mode == PSImode;
11582
11583 /* FIXME. This works around PR target/37633 for -O0. */
11584 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
11585 {
11586 unsigned int n = GET_MODE_SIZE (mode) / 8;
11587
11588 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
11589 && regno <= FIRST_GENERAL_REG + 14)
11590 return false;
11591 }
11592
11593 return true;
11594 }
11595
11596 /* Return the class of registers for which a mode change from FROM to TO
11597 is invalid. */
11598 bool
11599 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
11600 enum reg_class rclass)
11601 {
11602 /* We want to enable the use of SUBREGs as a means to
11603 VEC_SELECT a single element of a vector. */
11604
11605 /* This effectively disallows using GENERAL_REGS for SFmode vector subregs.
11606 This can be problematic when SFmode vector subregs need to be accessed
11607 on the stack with displacement addressing, as it happens with -O0.
11608 Thus we disallow the mode change for -O0. */
11609 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
11610 return optimize ? (reg_classes_intersect_p (GENERAL_REGS, rclass)) : false;
11611
11612 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
11613 {
11614 if (TARGET_LITTLE_ENDIAN)
11615 {
11616 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
11617 return reg_classes_intersect_p (DF_REGS, rclass);
11618 }
11619 else
11620 {
11621 if (GET_MODE_SIZE (from) < 8)
11622 return reg_classes_intersect_p (DF_HI_REGS, rclass);
11623 }
11624 }
11625 return false;
11626 }
11627
11628 /* Return true if registers in machine mode MODE will likely be
11629 allocated to registers in small register classes. */
11630
11631 bool
11632 sh_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
11633 {
11634 return (! TARGET_SHMEDIA);
11635 }
11636
11637 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
11638 that label is used. */
11639
11640 void
11641 sh_mark_label (rtx address, int nuses)
11642 {
11643 if (GOTOFF_P (address))
11644 {
11645 /* Extract the label or symbol. */
11646 address = XEXP (address, 0);
11647 if (GET_CODE (address) == PLUS)
11648 address = XEXP (address, 0);
11649 address = XVECEXP (address, 0, 0);
11650 }
11651 if (GET_CODE (address) == LABEL_REF
11652 && LABEL_P (XEXP (address, 0)))
11653 LABEL_NUSES (XEXP (address, 0)) += nuses;
11654 }
11655
11656 /* Compute extra cost of moving data between one register class
11657 and another. */
11658
11659 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
11660 uses this information. Hence, the general register <-> floating point
11661 register information here is not used for SFmode. */
11662
11663 static int
11664 sh_register_move_cost (enum machine_mode mode,
11665 reg_class_t srcclass, reg_class_t dstclass)
11666 {
11667 if (dstclass == T_REGS || dstclass == PR_REGS)
11668 return 10;
11669
11670 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
11671 return 4;
11672
11673 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
11674 && REGCLASS_HAS_FP_REG (srcclass)
11675 && REGCLASS_HAS_FP_REG (dstclass))
11676 return 4;
11677
11678 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
11679 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
11680
11681 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
11682 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
11683 return 9;
11684
11685 if ((REGCLASS_HAS_FP_REG (dstclass)
11686 && REGCLASS_HAS_GENERAL_REG (srcclass))
11687 || (REGCLASS_HAS_GENERAL_REG (dstclass)
11688 && REGCLASS_HAS_FP_REG (srcclass)))
11689 {
11690 /* Discourage trying to use fp regs for a pointer. This also
11691 discourages fp regs with SImode because Pmode is an alias
11692 of SImode on this target. See PR target/48596. */
11693 int addend = (mode == Pmode) ? 40 : 0;
11694
11695 return (((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12) + addend)
11696 * ((GET_MODE_SIZE (mode) + 7) / 8U));
11697 }
11698
11699 if ((dstclass == FPUL_REGS
11700 && REGCLASS_HAS_GENERAL_REG (srcclass))
11701 || (srcclass == FPUL_REGS
11702 && REGCLASS_HAS_GENERAL_REG (dstclass)))
11703 return 5;
11704
11705 if ((dstclass == FPUL_REGS
11706 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
11707 || (srcclass == FPUL_REGS
11708 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
11709 return 7;
11710
11711 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11712 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11713 return 20;
11714
11715 /* ??? ptabs faults on (value & 0x3) == 0x3 */
11716 if (TARGET_SHMEDIA
11717 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
11718 {
11719 if (sh_gettrcost >= 0)
11720 return sh_gettrcost;
11721 else if (!TARGET_PT_FIXED)
11722 return 100;
11723 }
11724
11725 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11726 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11727 return 4;
11728
11729 if (TARGET_SHMEDIA
11730 || (TARGET_FMOVD
11731 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
11732 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
11733 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
11734
11735 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
11736 }
11737
11738 static rtx emit_load_ptr (rtx, rtx);
11739
11740 static rtx
11741 emit_load_ptr (rtx reg, rtx addr)
11742 {
11743 rtx mem = gen_const_mem (ptr_mode, addr);
11744
11745 if (Pmode != ptr_mode)
11746 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
11747 return emit_move_insn (reg, mem);
11748 }
11749
11750 static void
11751 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11752 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11753 tree function)
11754 {
11755 CUMULATIVE_ARGS cum;
11756 int structure_value_byref = 0;
11757 rtx this_rtx, this_value, sibcall, insns, funexp;
11758 tree funtype = TREE_TYPE (function);
11759 int simple_add = CONST_OK_FOR_ADD (delta);
11760 int did_load = 0;
11761 rtx scratch0, scratch1, scratch2;
11762 unsigned i;
11763
11764 reload_completed = 1;
11765 epilogue_completed = 1;
11766 crtl->uses_only_leaf_regs = 1;
11767
11768 emit_note (NOTE_INSN_PROLOGUE_END);
11769
11770 /* Find the "this" pointer. We have such a wide range of ABIs for the
11771 SH that it's best to do this completely machine independently.
11772 "this" is passed as first argument, unless a structure return pointer
11773 comes first, in which case "this" comes second. */
11774 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
11775 #ifndef PCC_STATIC_STRUCT_RETURN
11776 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11777 structure_value_byref = 1;
11778 #endif /* not PCC_STATIC_STRUCT_RETURN */
11779 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
11780 {
11781 tree ptype = build_pointer_type (TREE_TYPE (funtype));
11782
11783 sh_function_arg_advance (pack_cumulative_args (&cum), Pmode, ptype, true);
11784 }
11785 this_rtx
11786 = sh_function_arg (pack_cumulative_args (&cum), Pmode, ptr_type_node, true);
11787
11788 /* For SHcompact, we only have r0 for a scratch register: r1 is the
11789 static chain pointer (even if you can't have nested virtual functions
11790 right now, someone might implement them sometime), and the rest of the
11791 registers are used for argument passing, are callee-saved, or reserved. */
11792 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
11793 -ffixed-reg has been used. */
11794 if (! call_used_regs[0] || fixed_regs[0])
11795 error ("r0 needs to be available as a call-clobbered register");
11796 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
11797 if (! TARGET_SH5)
11798 {
11799 if (call_used_regs[1] && ! fixed_regs[1])
11800 scratch1 = gen_rtx_REG (ptr_mode, 1);
11801 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
11802 pointing where to return struct values. */
11803 if (call_used_regs[3] && ! fixed_regs[3])
11804 scratch2 = gen_rtx_REG (Pmode, 3);
11805 }
11806 else if (TARGET_SHMEDIA)
11807 {
11808 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
11809 if (i != REGNO (scratch0) &&
11810 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
11811 {
11812 scratch1 = gen_rtx_REG (ptr_mode, i);
11813 break;
11814 }
11815 if (scratch1 == scratch0)
11816 error ("need a second call-clobbered general purpose register");
11817 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
11818 if (call_used_regs[i] && ! fixed_regs[i])
11819 {
11820 scratch2 = gen_rtx_REG (Pmode, i);
11821 break;
11822 }
11823 if (scratch2 == scratch0)
11824 error ("need a call-clobbered target register");
11825 }
11826
11827 this_value = plus_constant (Pmode, this_rtx, delta);
11828 if (vcall_offset
11829 && (simple_add || scratch0 != scratch1)
11830 && strict_memory_address_p (ptr_mode, this_value))
11831 {
11832 emit_load_ptr (scratch0, this_value);
11833 did_load = 1;
11834 }
11835
11836 if (!delta)
11837 ; /* Do nothing. */
11838 else if (simple_add)
11839 emit_move_insn (this_rtx, this_value);
11840 else
11841 {
11842 emit_move_insn (scratch1, GEN_INT (delta));
11843 emit_insn (gen_add2_insn (this_rtx, scratch1));
11844 }
11845
11846 if (vcall_offset)
11847 {
11848 rtx offset_addr;
11849
11850 if (!did_load)
11851 emit_load_ptr (scratch0, this_rtx);
11852
11853 offset_addr = plus_constant (Pmode, scratch0, vcall_offset);
11854 if (strict_memory_address_p (ptr_mode, offset_addr))
11855 ; /* Do nothing. */
11856 else if (! TARGET_SH5 && scratch0 != scratch1)
11857 {
11858 /* scratch0 != scratch1, and we have indexed loads. Get better
11859 schedule by loading the offset into r1 and using an indexed
11860 load - then the load of r1 can issue before the load from
11861 (this_rtx + delta) finishes. */
11862 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11863 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
11864 }
11865 else if (CONST_OK_FOR_ADD (vcall_offset))
11866 {
11867 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
11868 offset_addr = scratch0;
11869 }
11870 else if (scratch0 != scratch1)
11871 {
11872 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11873 emit_insn (gen_add2_insn (scratch0, scratch1));
11874 offset_addr = scratch0;
11875 }
11876 else
11877 gcc_unreachable (); /* FIXME */
11878 emit_load_ptr (scratch0, offset_addr);
11879
11880 if (Pmode != ptr_mode)
11881 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
11882 emit_insn (gen_add2_insn (this_rtx, scratch0));
11883 }
11884
11885 /* Generate a tail call to the target function. */
11886 if (! TREE_USED (function))
11887 {
11888 assemble_external (function);
11889 TREE_USED (function) = 1;
11890 }
11891 funexp = XEXP (DECL_RTL (function), 0);
11892 /* If the function is overridden, so is the thunk, hence we don't
11893 need GOT addressing even if this is a public symbol. */
11894 #if 0
11895 if (TARGET_SH1 && ! flag_weak)
11896 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
11897 else
11898 #endif
11899 if (TARGET_SH2 && flag_pic)
11900 {
11901 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
11902 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
11903 }
11904 else
11905 {
11906 if (TARGET_SHMEDIA && flag_pic)
11907 {
11908 funexp = gen_sym2PIC (funexp);
11909 PUT_MODE (funexp, Pmode);
11910 }
11911 emit_move_insn (scratch2, funexp);
11912 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
11913 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
11914 }
11915 sibcall = emit_call_insn (sibcall);
11916 SIBLING_CALL_P (sibcall) = 1;
11917 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
11918 emit_barrier ();
11919
11920 /* Run just enough of rest_of_compilation to do scheduling and get
11921 the insns emitted. Note that use_thunk calls
11922 assemble_start_function and assemble_end_function. */
11923
11924 insn_locators_alloc ();
11925 insns = get_insns ();
11926
11927 if (optimize > 0)
11928 {
11929 if (! cfun->cfg)
11930 init_flow (cfun);
11931 split_all_insns_noflow ();
11932 }
11933
11934 sh_reorg ();
11935 shorten_branches (insns);
11936 final_start_function (insns, file, 1);
11937 final (insns, file, 1);
11938 final_end_function ();
11939
11940 reload_completed = 0;
11941 epilogue_completed = 0;
11942 }
11943
11944 rtx
11945 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
11946 {
11947 rtx sym;
11948
11949 /* If this is not an ordinary function, the name usually comes from a
11950 string literal or an sprintf buffer. Make sure we use the same
11951 string consistently, so that cse will be able to unify address loads. */
11952 if (kind != FUNCTION_ORDINARY)
11953 name = IDENTIFIER_POINTER (get_identifier (name));
11954 sym = gen_rtx_SYMBOL_REF (Pmode, name);
11955 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
11956 if (flag_pic)
11957 switch (kind)
11958 {
11959 case FUNCTION_ORDINARY:
11960 break;
11961 case SFUNC_GOT:
11962 {
11963 rtx reg = target ? target : gen_reg_rtx (Pmode);
11964
11965 emit_insn (gen_symGOT2reg (reg, sym));
11966 sym = reg;
11967 break;
11968 }
11969 case SFUNC_STATIC:
11970 {
11971 /* ??? To allow cse to work, we use GOTOFF relocations.
11972 we could add combiner patterns to transform this into
11973 straight pc-relative calls with sym2PIC / bsrf when
11974 label load and function call are still 1:1 and in the
11975 same basic block during combine. */
11976 rtx reg = target ? target : gen_reg_rtx (Pmode);
11977
11978 emit_insn (gen_symGOTOFF2reg (reg, sym));
11979 sym = reg;
11980 break;
11981 }
11982 }
11983 if (target && sym != target)
11984 {
11985 emit_move_insn (target, sym);
11986 return target;
11987 }
11988 return sym;
11989 }
11990
11991 /* Find the number of a general purpose register in S. */
11992 static int
11993 scavenge_reg (HARD_REG_SET *s)
11994 {
11995 int r;
11996 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
11997 if (TEST_HARD_REG_BIT (*s, r))
11998 return r;
11999 return -1;
12000 }
12001
12002 rtx
12003 sh_get_pr_initial_val (void)
12004 {
12005 rtx val;
12006
12007 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
12008 PR register on SHcompact, because it might be clobbered by the prologue.
12009 We check first if that is known to be the case. */
12010 if (TARGET_SHCOMPACT
12011 && ((crtl->args.info.call_cookie
12012 & ~ CALL_COOKIE_RET_TRAMP (1))
12013 || crtl->saves_all_registers))
12014 return gen_frame_mem (SImode, return_address_pointer_rtx);
12015
12016 /* If we haven't finished rtl generation, there might be a nonlocal label
12017 that we haven't seen yet.
12018 ??? get_hard_reg_initial_val fails if it is called after register
12019 allocation has started, unless it has been called before for the
12020 same register. And even then, we end in trouble if we didn't use
12021 the register in the same basic block before. So call
12022 get_hard_reg_initial_val now and wrap it in an unspec if we might
12023 need to replace it. */
12024 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
12025 combine can put the pseudo returned by get_hard_reg_initial_val into
12026 instructions that need a general purpose registers, which will fail to
12027 be recognized when the pseudo becomes allocated to PR. */
12028 val
12029 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
12030 if (TARGET_SH1)
12031 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
12032 return val;
12033 }
12034
12035 bool
12036 sh_expand_t_scc (rtx operands[])
12037 {
12038 enum rtx_code code = GET_CODE (operands[1]);
12039 rtx target = operands[0];
12040 rtx op0 = operands[2];
12041 rtx op1 = operands[3];
12042 rtx result = target;
12043 HOST_WIDE_INT val;
12044
12045 if (!REG_P (op0) || REGNO (op0) != T_REG
12046 || !CONST_INT_P (op1))
12047 return false;
12048 if (!REG_P (result))
12049 result = gen_reg_rtx (SImode);
12050 val = INTVAL (op1);
12051 if ((code == EQ && val == 1) || (code == NE && val == 0))
12052 emit_insn (gen_movt (result));
12053 else if ((code == EQ && val == 0) || (code == NE && val == 1))
12054 emit_insn (gen_movnegt (result));
12055 else if (code == EQ || code == NE)
12056 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
12057 else
12058 return false;
12059 if (result != target)
12060 emit_move_insn (target, result);
12061 return true;
12062 }
12063
12064 /* INSN is an sfunc; return the rtx that describes the address used. */
12065 static rtx
12066 extract_sfunc_addr (rtx insn)
12067 {
12068 rtx pattern, part = NULL_RTX;
12069 int len, i;
12070
12071 pattern = PATTERN (insn);
12072 len = XVECLEN (pattern, 0);
12073 for (i = 0; i < len; i++)
12074 {
12075 part = XVECEXP (pattern, 0, i);
12076 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
12077 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
12078 return XEXP (part, 0);
12079 }
12080 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
12081 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
12082 }
12083
12084 /* Verify that the register in use_sfunc_addr still agrees with the address
12085 used in the sfunc. This prevents fill_slots_from_thread from changing
12086 use_sfunc_addr.
12087 INSN is the use_sfunc_addr instruction, and REG is the register it
12088 guards. */
12089 bool
12090 check_use_sfunc_addr (rtx insn, rtx reg)
12091 {
12092 /* Search for the sfunc. It should really come right after INSN. */
12093 while ((insn = NEXT_INSN (insn)))
12094 {
12095 if (LABEL_P (insn) || JUMP_P (insn))
12096 break;
12097 if (! INSN_P (insn))
12098 continue;
12099
12100 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
12101 insn = XVECEXP (PATTERN (insn), 0, 0);
12102 if (GET_CODE (PATTERN (insn)) != PARALLEL
12103 || get_attr_type (insn) != TYPE_SFUNC)
12104 continue;
12105 return rtx_equal_p (extract_sfunc_addr (insn), reg);
12106 }
12107 gcc_unreachable ();
12108 }
12109
12110 /* This function returns a constant rtx that represents pi / 2**15 in
12111 SFmode. it's used to scale SFmode angles, in radians, to a
12112 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
12113 maps to 0x10000). */
12114
12115 static GTY(()) rtx sh_fsca_sf2int_rtx;
12116
12117 rtx
12118 sh_fsca_sf2int (void)
12119 {
12120 if (! sh_fsca_sf2int_rtx)
12121 {
12122 REAL_VALUE_TYPE rv;
12123
12124 real_from_string (&rv, "10430.378350470453");
12125 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
12126 }
12127
12128 return sh_fsca_sf2int_rtx;
12129 }
12130
12131 /* This function returns a constant rtx that represents 2**15 / pi in
12132 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
12133 of a full circle back to a SFmode value, i.e., 0x10000 maps to
12134 2*pi). */
12135
12136 static GTY(()) rtx sh_fsca_int2sf_rtx;
12137
12138 rtx
12139 sh_fsca_int2sf (void)
12140 {
12141 if (! sh_fsca_int2sf_rtx)
12142 {
12143 REAL_VALUE_TYPE rv;
12144
12145 real_from_string (&rv, "9.587379924285257e-5");
12146 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
12147 }
12148
12149 return sh_fsca_int2sf_rtx;
12150 }
12151
12152 /* Initialize the CUMULATIVE_ARGS structure. */
12153
12154 void
12155 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
12156 tree fntype,
12157 rtx libname ATTRIBUTE_UNUSED,
12158 tree fndecl,
12159 signed int n_named_args,
12160 enum machine_mode mode)
12161 {
12162 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
12163 pcum->free_single_fp_reg = 0;
12164 pcum->stack_regs = 0;
12165 pcum->byref_regs = 0;
12166 pcum->byref = 0;
12167 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
12168
12169 /* XXX - Should we check TARGET_HITACHI here ??? */
12170 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
12171
12172 if (fntype)
12173 {
12174 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
12175 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
12176 pcum->prototype_p = prototype_p (fntype);
12177 pcum->arg_count [(int) SH_ARG_INT]
12178 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
12179
12180 pcum->call_cookie
12181 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12182 && pcum->arg_count [(int) SH_ARG_INT] == 0
12183 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
12184 ? int_size_in_bytes (TREE_TYPE (fntype))
12185 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
12186 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
12187 == FIRST_RET_REG));
12188 }
12189 else
12190 {
12191 pcum->arg_count [(int) SH_ARG_INT] = 0;
12192 pcum->prototype_p = FALSE;
12193 if (mode != VOIDmode)
12194 {
12195 pcum->call_cookie =
12196 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12197 && GET_MODE_SIZE (mode) > 4
12198 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
12199
12200 /* If the default ABI is the Renesas ABI then all library
12201 calls must assume that the library will be using the
12202 Renesas ABI. So if the function would return its result
12203 in memory then we must force the address of this memory
12204 block onto the stack. Ideally we would like to call
12205 targetm.calls.return_in_memory() here but we do not have
12206 the TYPE or the FNDECL available so we synthesize the
12207 contents of that function as best we can. */
12208 pcum->force_mem =
12209 (TARGET_DEFAULT & MASK_HITACHI)
12210 && (mode == BLKmode
12211 || (GET_MODE_SIZE (mode) > 4
12212 && !(mode == DFmode
12213 && TARGET_FPU_DOUBLE)));
12214 }
12215 else
12216 {
12217 pcum->call_cookie = 0;
12218 pcum->force_mem = FALSE;
12219 }
12220 }
12221 }
12222
12223 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
12224 not enter into CONST_DOUBLE for the replace.
12225
12226 Note that copying is not done so X must not be shared unless all copies
12227 are to be modified.
12228
12229 This is like replace_rtx, except that we operate on N_REPLACEMENTS
12230 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
12231 replacements[n*2+1] - and that we take mode changes into account.
12232
12233 If a replacement is ambiguous, return NULL_RTX.
12234
12235 If MODIFY is zero, don't modify any rtl in place,
12236 just return zero or nonzero for failure / success. */
12237
12238 rtx
12239 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
12240 {
12241 int i, j;
12242 const char *fmt;
12243
12244 /* The following prevents loops occurrence when we change MEM in
12245 CONST_DOUBLE onto the same CONST_DOUBLE. */
12246 if (x != NULL_RTX && GET_CODE (x) == CONST_DOUBLE)
12247 return x;
12248
12249 for (i = n_replacements - 1; i >= 0 ; i--)
12250 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
12251 return replacements[i*2+1];
12252
12253 /* Allow this function to make replacements in EXPR_LISTs. */
12254 if (x == NULL_RTX)
12255 return NULL_RTX;
12256
12257 if (GET_CODE (x) == SUBREG)
12258 {
12259 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
12260 n_replacements, modify);
12261
12262 if (CONST_INT_P (new_rtx))
12263 {
12264 x = simplify_subreg (GET_MODE (x), new_rtx,
12265 GET_MODE (SUBREG_REG (x)),
12266 SUBREG_BYTE (x));
12267 if (! x)
12268 abort ();
12269 }
12270 else if (modify)
12271 SUBREG_REG (x) = new_rtx;
12272
12273 return x;
12274 }
12275 else if (REG_P (x))
12276 {
12277 unsigned regno = REGNO (x);
12278 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
12279 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
12280 rtx result = NULL_RTX;
12281
12282 for (i = n_replacements - 1; i >= 0; i--)
12283 {
12284 rtx from = replacements[i*2];
12285 rtx to = replacements[i*2+1];
12286 unsigned from_regno, from_nregs, to_regno, new_regno;
12287
12288 if (!REG_P (from))
12289 continue;
12290 from_regno = REGNO (from);
12291 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
12292 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
12293 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
12294 {
12295 if (regno < from_regno
12296 || regno + nregs > from_regno + nregs
12297 || !REG_P (to)
12298 || result)
12299 return NULL_RTX;
12300 to_regno = REGNO (to);
12301 if (to_regno < FIRST_PSEUDO_REGISTER)
12302 {
12303 new_regno = regno + to_regno - from_regno;
12304 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
12305 != nregs)
12306 return NULL_RTX;
12307 result = gen_rtx_REG (GET_MODE (x), new_regno);
12308 }
12309 else if (GET_MODE (x) <= GET_MODE (to))
12310 result = gen_lowpart_common (GET_MODE (x), to);
12311 else
12312 result = gen_lowpart_SUBREG (GET_MODE (x), to);
12313 }
12314 }
12315 return result ? result : x;
12316 }
12317 else if (GET_CODE (x) == ZERO_EXTEND)
12318 {
12319 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
12320 n_replacements, modify);
12321
12322 if (CONST_INT_P (new_rtx))
12323 {
12324 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
12325 new_rtx, GET_MODE (XEXP (x, 0)));
12326 if (! x)
12327 abort ();
12328 }
12329 else if (modify)
12330 XEXP (x, 0) = new_rtx;
12331
12332 return x;
12333 }
12334
12335 fmt = GET_RTX_FORMAT (GET_CODE (x));
12336 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12337 {
12338 rtx new_rtx;
12339
12340 if (fmt[i] == 'e')
12341 {
12342 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
12343 n_replacements, modify);
12344 if (!new_rtx)
12345 return NULL_RTX;
12346 if (modify)
12347 XEXP (x, i) = new_rtx;
12348 }
12349 else if (fmt[i] == 'E')
12350 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12351 {
12352 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
12353 n_replacements, modify);
12354 if (!new_rtx)
12355 return NULL_RTX;
12356 if (modify)
12357 XVECEXP (x, i, j) = new_rtx;
12358 }
12359 }
12360
12361 return x;
12362 }
12363
12364 rtx
12365 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
12366 {
12367 enum rtx_code code = TRUNCATE;
12368
12369 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
12370 {
12371 rtx inner = XEXP (x, 0);
12372 enum machine_mode inner_mode = GET_MODE (inner);
12373
12374 if (inner_mode == mode)
12375 return inner;
12376 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
12377 x = inner;
12378 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
12379 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
12380 {
12381 code = GET_CODE (x);
12382 x = inner;
12383 }
12384 }
12385 return gen_rtx_fmt_e (code, mode, x);
12386 }
12387
12388 /* called via for_each_rtx after reload, to clean up truncates of
12389 registers that span multiple actual hard registers. */
12390 int
12391 shmedia_cleanup_truncate (rtx *p, void *n_changes)
12392 {
12393 rtx x = *p, reg;
12394
12395 if (GET_CODE (x) != TRUNCATE)
12396 return 0;
12397 reg = XEXP (x, 0);
12398 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && REG_P (reg))
12399 {
12400 enum machine_mode reg_mode = GET_MODE (reg);
12401 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
12402 subreg_lowpart_offset (DImode, reg_mode));
12403 *(int*) n_changes += 1;
12404 return -1;
12405 }
12406 return 0;
12407 }
12408
12409 /* Load and store depend on the highpart of the address. However,
12410 set_attr_alternative does not give well-defined results before reload,
12411 so we must look at the rtl ourselves to see if any of the feeding
12412 registers is used in a memref. */
12413
12414 /* Called by sh_contains_memref_p via for_each_rtx. */
12415 static int
12416 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
12417 {
12418 return (MEM_P (*loc));
12419 }
12420
12421 /* Return true iff INSN contains a MEM. */
12422 bool
12423 sh_contains_memref_p (rtx insn)
12424 {
12425 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
12426 }
12427
12428 /* Return true iff INSN loads a banked register. */
12429 bool
12430 sh_loads_bankedreg_p (rtx insn)
12431 {
12432 if (GET_CODE (PATTERN (insn)) == SET)
12433 {
12434 rtx op = SET_DEST (PATTERN(insn));
12435 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
12436 return true;
12437 }
12438
12439 return false;
12440 }
12441
12442 /* FNADDR is the MEM expression from a call expander. Return an address
12443 to use in an SHmedia insn pattern. */
12444 rtx
12445 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
12446 {
12447 int is_sym;
12448
12449 fnaddr = XEXP (fnaddr, 0);
12450 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
12451 if (flag_pic && is_sym)
12452 {
12453 if (! SYMBOL_REF_LOCAL_P (fnaddr))
12454 {
12455 rtx reg = gen_reg_rtx (Pmode);
12456
12457 /* We must not use GOTPLT for sibcalls, because PIC_REG
12458 must be restored before the PLT code gets to run. */
12459 if (is_sibcall)
12460 emit_insn (gen_symGOT2reg (reg, fnaddr));
12461 else
12462 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
12463 fnaddr = reg;
12464 }
12465 else
12466 {
12467 fnaddr = gen_sym2PIC (fnaddr);
12468 PUT_MODE (fnaddr, Pmode);
12469 }
12470 }
12471 /* If ptabs might trap, make this visible to the rest of the compiler.
12472 We generally assume that symbols pertain to valid locations, but
12473 it is possible to generate invalid symbols with asm or linker tricks.
12474 In a list of functions where each returns its successor, an invalid
12475 symbol might denote an empty list. */
12476 if (!TARGET_PT_FIXED
12477 && (!is_sym || TARGET_INVALID_SYMBOLS)
12478 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
12479 {
12480 rtx tr = gen_reg_rtx (PDImode);
12481
12482 emit_insn (gen_ptabs (tr, fnaddr));
12483 fnaddr = tr;
12484 }
12485 else if (! target_reg_operand (fnaddr, Pmode))
12486 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
12487 return fnaddr;
12488 }
12489
12490 /* Implement TARGET_PREFERRED_RELOAD_CLASS. */
12491
12492 static reg_class_t
12493 sh_preferred_reload_class (rtx x, reg_class_t rclass)
12494 {
12495 if (rclass == NO_REGS
12496 && TARGET_SHMEDIA
12497 && (CONST_DOUBLE_P (x)
12498 || GET_CODE (x) == SYMBOL_REF
12499 || PIC_ADDR_P (x)))
12500 return GENERAL_REGS;
12501
12502 return rclass;
12503 }
12504
12505 /* Implement TARGET_SECONDARY_RELOAD. */
12506
12507 static reg_class_t
12508 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12509 enum machine_mode mode, secondary_reload_info *sri)
12510 {
12511 enum reg_class rclass = (enum reg_class) rclass_i;
12512
12513 if (in_p)
12514 {
12515 if (REGCLASS_HAS_FP_REG (rclass)
12516 && ! TARGET_SHMEDIA
12517 && immediate_operand ((x), mode)
12518 && ! ((fp_zero_operand (x) || fp_one_operand (x))
12519 && mode == SFmode && fldi_ok ()))
12520 switch (mode)
12521 {
12522 case SFmode:
12523 sri->icode = CODE_FOR_reload_insf__frn;
12524 return NO_REGS;
12525 case DFmode:
12526 sri->icode = CODE_FOR_reload_indf__frn;
12527 return NO_REGS;
12528 case SImode:
12529 /* ??? If we knew that we are in the appropriate mode -
12530 single precision - we could use a reload pattern directly. */
12531 return FPUL_REGS;
12532 default:
12533 abort ();
12534 }
12535 if (rclass == FPUL_REGS
12536 && ((REG_P (x)
12537 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
12538 || REGNO (x) == T_REG))
12539 || GET_CODE (x) == PLUS))
12540 return GENERAL_REGS;
12541 if (rclass == FPUL_REGS && immediate_operand (x, mode))
12542 {
12543 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
12544 return GENERAL_REGS;
12545 else if (mode == SFmode)
12546 return FP_REGS;
12547 sri->icode = CODE_FOR_reload_insi__i_fpul;
12548 return NO_REGS;
12549 }
12550 if (rclass == FPSCR_REGS
12551 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
12552 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
12553 return GENERAL_REGS;
12554 if (REGCLASS_HAS_FP_REG (rclass)
12555 && TARGET_SHMEDIA
12556 && immediate_operand (x, mode)
12557 && x != CONST0_RTX (GET_MODE (x))
12558 && GET_MODE (x) != V4SFmode)
12559 return GENERAL_REGS;
12560 if ((mode == QImode || mode == HImode)
12561 && TARGET_SHMEDIA && inqhi_operand (x, mode))
12562 {
12563 sri->icode = ((mode == QImode)
12564 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
12565 return NO_REGS;
12566 }
12567 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
12568 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
12569 return TARGET_REGS;
12570 } /* end of input-only processing. */
12571
12572 if (((REGCLASS_HAS_FP_REG (rclass)
12573 && (REG_P (x)
12574 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
12575 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
12576 && TARGET_FMOVD))))
12577 || (REGCLASS_HAS_GENERAL_REG (rclass)
12578 && REG_P (x)
12579 && FP_REGISTER_P (REGNO (x))))
12580 && ! TARGET_SHMEDIA
12581 && (mode == SFmode || mode == SImode))
12582 return FPUL_REGS;
12583 if ((rclass == FPUL_REGS
12584 || (REGCLASS_HAS_FP_REG (rclass)
12585 && ! TARGET_SHMEDIA && mode == SImode))
12586 && (MEM_P (x)
12587 || (REG_P (x)
12588 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
12589 || REGNO (x) == T_REG
12590 || system_reg_operand (x, VOIDmode)))))
12591 {
12592 if (rclass == FPUL_REGS)
12593 return GENERAL_REGS;
12594 return FPUL_REGS;
12595 }
12596 if ((rclass == TARGET_REGS
12597 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
12598 && !satisfies_constraint_Csy (x)
12599 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
12600 return GENERAL_REGS;
12601 if ((rclass == MAC_REGS || rclass == PR_REGS)
12602 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
12603 && rclass != REGNO_REG_CLASS (REGNO (x)))
12604 return GENERAL_REGS;
12605 if (rclass != GENERAL_REGS && REG_P (x)
12606 && TARGET_REGISTER_P (REGNO (x)))
12607 return GENERAL_REGS;
12608
12609 /* If here fall back to loading FPUL register through general registers.
12610 This case can happen when movsi_ie insn is picked initially to
12611 load/store the FPUL register from/to another register, and then the
12612 other register is allocated on the stack. */
12613 if (rclass == FPUL_REGS && true_regnum (x) == -1)
12614 return GENERAL_REGS;
12615
12616 /* Force mov.b / mov.w displacement addressing insn to use R0 as
12617 the other operand.
12618 On SH2A could also just leave it alone here, which would result in a
12619 4 byte move insn being generated instead. However, for this to work
12620 the insns must have the appropriate alternatives. */
12621 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
12622 && satisfies_constraint_Sdd (x)
12623 && disp_addr_displacement (x) <= max_mov_insn_displacement (mode, false))
12624 return R0_REGS;
12625
12626 /* When reload is trying to address a QImode or HImode subreg on the stack,
12627 force any subreg byte into R0_REGS, as this is going to become a
12628 displacement address.
12629 We could restrict this to SUBREG_BYTE (x) > 0, but if the actual reg
12630 is on the stack, the memref to it might already require a displacement
12631 and that has to be added to the final address. At this point we don't
12632 know the cumulative displacement so we assume the worst case. */
12633 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
12634 && GET_CODE (x) == SUBREG && true_regnum (x) == -1)
12635 return R0_REGS;
12636
12637 return NO_REGS;
12638 }
12639
12640 static void
12641 sh_conditional_register_usage (void)
12642 {
12643 int regno;
12644 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)
12645 if (! VALID_REGISTER_P (regno))
12646 fixed_regs[regno] = call_used_regs[regno] = 1;
12647 /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs. */
12648 if (TARGET_SH5)
12649 {
12650 call_used_regs[FIRST_GENERAL_REG + 8]
12651 = call_used_regs[FIRST_GENERAL_REG + 9] = 1;
12652 call_really_used_regs[FIRST_GENERAL_REG + 8]
12653 = call_really_used_regs[FIRST_GENERAL_REG + 9] = 1;
12654 }
12655 if (TARGET_SHMEDIA)
12656 {
12657 regno_reg_class[FIRST_GENERAL_REG] = GENERAL_REGS;
12658 CLEAR_HARD_REG_SET (reg_class_contents[FP0_REGS]);
12659 regno_reg_class[FIRST_FP_REG] = FP_REGS;
12660 }
12661 if (flag_pic)
12662 {
12663 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12664 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12665 }
12666 /* Renesas saves and restores mac registers on call. */
12667 if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
12668 {
12669 call_really_used_regs[MACH_REG] = 0;
12670 call_really_used_regs[MACL_REG] = 0;
12671 }
12672 for (regno = FIRST_FP_REG + (TARGET_LITTLE_ENDIAN != 0);
12673 regno <= LAST_FP_REG; regno += 2)
12674 SET_HARD_REG_BIT (reg_class_contents[DF_HI_REGS], regno);
12675 if (TARGET_SHMEDIA)
12676 {
12677 for (regno = FIRST_TARGET_REG; regno <= LAST_TARGET_REG; regno ++)
12678 if (! fixed_regs[regno] && call_really_used_regs[regno])
12679 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
12680 }
12681 else
12682 for (regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++)
12683 if (! fixed_regs[regno] && call_really_used_regs[regno])
12684 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
12685 }
12686
12687 /* Implement TARGET_LEGITIMATE_CONSTANT_P
12688
12689 can_store_by_pieces constructs VOIDmode CONST_DOUBLEs. */
12690
12691 static bool
12692 sh_legitimate_constant_p (enum machine_mode mode, rtx x)
12693 {
12694 return (TARGET_SHMEDIA
12695 ? ((mode != DFmode && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
12696 || x == CONST0_RTX (mode)
12697 || !TARGET_SHMEDIA_FPU
12698 || TARGET_SHMEDIA64)
12699 : (GET_CODE (x) != CONST_DOUBLE
12700 || mode == DFmode || mode == SFmode
12701 || mode == DImode || GET_MODE (x) == VOIDmode));
12702 }
12703
12704 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
12705
12706 static void
12707 sh_init_sync_libfuncs (void)
12708 {
12709 init_sync_libfuncs (UNITS_PER_WORD);
12710 }
12711
12712 #include "gt-sh.h"