re PR target/52479 (SH Target: SH4A DFmode fsca tests failing)
[gcc.git] / gcc / config / sh / sh.c
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
4 Free Software Foundation, Inc.
5 Contributed by Steve Chamberlain (sac@cygnus.com).
6 Improved by Jim Wilson (wilson@cygnus.com).
7
8 This file is part of GCC.
9
10 GCC is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
14
15 GCC is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "insn-config.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "flags.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "reload.h"
35 #include "function.h"
36 #include "regs.h"
37 #include "hard-reg-set.h"
38 #include "output.h"
39 #include "insn-attr.h"
40 #include "diagnostic-core.h"
41 #include "recog.h"
42 #include "integrate.h"
43 #include "dwarf2.h"
44 #include "tm_p.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "langhooks.h"
48 #include "basic-block.h"
49 #include "df.h"
50 #include "cfglayout.h"
51 #include "intl.h"
52 #include "sched-int.h"
53 #include "params.h"
54 #include "ggc.h"
55 #include "gimple.h"
56 #include "cfgloop.h"
57 #include "alloc-pool.h"
58 #include "tm-constrs.h"
59 #include "opts.h"
60
61
62 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
63
64 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
65 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
66
67 /* These are some macros to abstract register modes. */
68 #define CONST_OK_FOR_ADD(size) \
69 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
70 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
71 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
72 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
73
74 /* Used to simplify the logic below. Find the attributes wherever
75 they may be. */
76 #define SH_ATTRIBUTES(decl) \
77 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
78 : DECL_ATTRIBUTES (decl) \
79 ? (DECL_ATTRIBUTES (decl)) \
80 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
81
82 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
83 int current_function_interrupt;
84
85 tree sh_deferred_function_attributes;
86 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
87
88 /* Global variables for machine-dependent things. */
89
90 /* Which cpu are we scheduling for. */
91 enum processor_type sh_cpu;
92
93 /* Definitions used in ready queue reordering for first scheduling pass. */
94
95 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
96 static short *regmode_weight[2];
97
98 /* Total SFmode and SImode weights of scheduled insns. */
99 static int curr_regmode_pressure[2];
100
101 /* Number of r0 life regions. */
102 static int r0_life_regions;
103
104 /* If true, skip cycles for Q -> R movement. */
105 static int skip_cycles = 0;
106
107 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
108 and returned from sh_reorder2. */
109 static short cached_can_issue_more;
110
111 /* Unique number for UNSPEC_BBR pattern. */
112 static unsigned int unspec_bbr_uid = 1;
113
114 /* Provides the class number of the smallest class containing
115 reg number. */
116
117 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
118 {
119 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
152 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
153 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
154 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
155 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
156 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
157 GENERAL_REGS, GENERAL_REGS,
158 };
159
160 char sh_register_names[FIRST_PSEUDO_REGISTER] \
161 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
162
163 char sh_additional_register_names[ADDREGNAMES_SIZE] \
164 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
165 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
166
167 int assembler_dialect;
168
169 static bool shmedia_space_reserved_for_target_registers;
170
171 static void split_branches (rtx);
172 static int branch_dest (rtx);
173 static void force_into (rtx, rtx);
174 static void print_slot (rtx);
175 static rtx add_constant (rtx, enum machine_mode, rtx);
176 static void dump_table (rtx, rtx);
177 static int hi_const (rtx);
178 static int broken_move (rtx);
179 static int mova_p (rtx);
180 static rtx find_barrier (int, rtx, rtx);
181 static int noncall_uses_reg (rtx, rtx, rtx *);
182 static rtx gen_block_redirect (rtx, int, int);
183 static void sh_reorg (void);
184 static void sh_option_override (void);
185 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
186 static rtx frame_insn (rtx);
187 static rtx push (int);
188 static void pop (int);
189 static void push_regs (HARD_REG_SET *, int);
190 static int calc_live_regs (HARD_REG_SET *);
191 static HOST_WIDE_INT rounded_frame_size (int);
192 static bool sh_frame_pointer_required (void);
193 static rtx mark_constant_pool_use (rtx);
194 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
195 static tree sh_handle_resbank_handler_attribute (tree *, tree,
196 tree, int, bool *);
197 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
198 tree, int, bool *);
199 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
200 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
201 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
202 static void sh_print_operand (FILE *, rtx, int);
203 static void sh_print_operand_address (FILE *, rtx);
204 static bool sh_print_operand_punct_valid_p (unsigned char code);
205 static bool sh_asm_output_addr_const_extra (FILE *file, rtx x);
206 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
207 static void sh_insert_attributes (tree, tree *);
208 static const char *sh_check_pch_target_flags (int);
209 static int sh_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
210 static int sh_adjust_cost (rtx, rtx, rtx, int);
211 static int sh_issue_rate (void);
212 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
213 static short find_set_regmode_weight (rtx, enum machine_mode);
214 static short find_insn_regmode_weight (rtx, enum machine_mode);
215 static void find_regmode_weight (basic_block, enum machine_mode);
216 static int find_r0_life_regions (basic_block);
217 static void sh_md_init_global (FILE *, int, int);
218 static void sh_md_finish_global (FILE *, int);
219 static int rank_for_reorder (const void *, const void *);
220 static void swap_reorder (rtx *, int);
221 static void ready_reorder (rtx *, int);
222 static short high_pressure (enum machine_mode);
223 static int sh_reorder (FILE *, int, rtx *, int *, int);
224 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
225 static void sh_md_init (FILE *, int, int);
226 static int sh_variable_issue (FILE *, int, rtx, int);
227
228 static bool sh_function_ok_for_sibcall (tree, tree);
229
230 static bool sh_cannot_modify_jumps_p (void);
231 static reg_class_t sh_target_reg_class (void);
232 static bool sh_optimize_target_register_callee_saved (bool);
233 static bool sh_ms_bitfield_layout_p (const_tree);
234
235 static void sh_init_builtins (void);
236 static tree sh_builtin_decl (unsigned, bool);
237 static void sh_media_init_builtins (void);
238 static tree sh_media_builtin_decl (unsigned, bool);
239 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
240 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
241 static void sh_file_start (void);
242 static int flow_dependent_p (rtx, rtx);
243 static void flow_dependent_p_1 (rtx, const_rtx, void *);
244 static int shiftcosts (rtx);
245 static int and_xor_ior_costs (rtx, int);
246 static int addsubcosts (rtx);
247 static int multcosts (rtx);
248 static bool unspec_caller_rtx_p (rtx);
249 static bool sh_cannot_copy_insn_p (rtx);
250 static bool sh_rtx_costs (rtx, int, int, int, int *, bool);
251 static int sh_address_cost (rtx, bool);
252 static int sh_pr_n_sets (void);
253 static rtx sh_allocate_initial_value (rtx);
254 static reg_class_t sh_preferred_reload_class (rtx, reg_class_t);
255 static reg_class_t sh_secondary_reload (bool, rtx, reg_class_t,
256 enum machine_mode,
257 struct secondary_reload_info *);
258 static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
259 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
260 static rtx sh_delegitimize_address (rtx);
261 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
262 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
263 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
264 static int scavenge_reg (HARD_REG_SET *s);
265 struct save_schedule_s;
266 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
267 struct save_schedule_s *, int);
268
269 static rtx sh_struct_value_rtx (tree, int);
270 static rtx sh_function_value (const_tree, const_tree, bool);
271 static bool sh_function_value_regno_p (const unsigned int);
272 static rtx sh_libcall_value (enum machine_mode, const_rtx);
273 static bool sh_return_in_memory (const_tree, const_tree);
274 static rtx sh_builtin_saveregs (void);
275 static void sh_setup_incoming_varargs (cumulative_args_t, enum machine_mode, tree, int *, int);
276 static bool sh_strict_argument_naming (cumulative_args_t);
277 static bool sh_pretend_outgoing_varargs_named (cumulative_args_t);
278 static tree sh_build_builtin_va_list (void);
279 static void sh_va_start (tree, rtx);
280 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
281 static bool sh_promote_prototypes (const_tree);
282 static enum machine_mode sh_promote_function_mode (const_tree type,
283 enum machine_mode,
284 int *punsignedp,
285 const_tree funtype,
286 int for_return);
287 static bool sh_pass_by_reference (cumulative_args_t, enum machine_mode,
288 const_tree, bool);
289 static bool sh_callee_copies (cumulative_args_t, enum machine_mode,
290 const_tree, bool);
291 static int sh_arg_partial_bytes (cumulative_args_t, enum machine_mode,
292 tree, bool);
293 static void sh_function_arg_advance (cumulative_args_t, enum machine_mode,
294 const_tree, bool);
295 static rtx sh_function_arg (cumulative_args_t, enum machine_mode,
296 const_tree, bool);
297 static bool sh_scalar_mode_supported_p (enum machine_mode);
298 static int sh_dwarf_calling_convention (const_tree);
299 static void sh_encode_section_info (tree, rtx, int);
300 static int sh2a_function_vector_p (tree);
301 static void sh_trampoline_init (rtx, tree, rtx);
302 static rtx sh_trampoline_adjust_address (rtx);
303 static void sh_conditional_register_usage (void);
304 static bool sh_legitimate_constant_p (enum machine_mode, rtx);
305
306 static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED;
307 \f
308 static const struct attribute_spec sh_attribute_table[] =
309 {
310 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
311 affects_type_identity } */
312 { "interrupt_handler", 0, 0, true, false, false,
313 sh_handle_interrupt_handler_attribute, false },
314 { "sp_switch", 1, 1, true, false, false,
315 sh_handle_sp_switch_attribute, false },
316 { "trap_exit", 1, 1, true, false, false,
317 sh_handle_trap_exit_attribute, false },
318 { "renesas", 0, 0, false, true, false,
319 sh_handle_renesas_attribute, false },
320 { "trapa_handler", 0, 0, true, false, false,
321 sh_handle_interrupt_handler_attribute, false },
322 { "nosave_low_regs", 0, 0, true, false, false,
323 sh_handle_interrupt_handler_attribute, false },
324 { "resbank", 0, 0, true, false, false,
325 sh_handle_resbank_handler_attribute, false },
326 { "function_vector", 1, 1, true, false, false,
327 sh2a_handle_function_vector_handler_attribute, false },
328 { NULL, 0, 0, false, false, false, NULL, false }
329 };
330 \f
331 /* Initialize the GCC target structure. */
332 #undef TARGET_ATTRIBUTE_TABLE
333 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
334
335 /* The next two are used for debug info when compiling with -gdwarf. */
336 #undef TARGET_ASM_UNALIGNED_HI_OP
337 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
338 #undef TARGET_ASM_UNALIGNED_SI_OP
339 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
340
341 /* These are NULLed out on non-SH5 in TARGET_OPTION_OVERRIDE. */
342 #undef TARGET_ASM_UNALIGNED_DI_OP
343 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
344 #undef TARGET_ASM_ALIGNED_DI_OP
345 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
346
347 #undef TARGET_OPTION_OVERRIDE
348 #define TARGET_OPTION_OVERRIDE sh_option_override
349
350 #undef TARGET_PRINT_OPERAND
351 #define TARGET_PRINT_OPERAND sh_print_operand
352 #undef TARGET_PRINT_OPERAND_ADDRESS
353 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
354 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
355 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
356 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
357 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra
358
359 #undef TARGET_ASM_FUNCTION_EPILOGUE
360 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
361
362 #undef TARGET_ASM_OUTPUT_MI_THUNK
363 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
364
365 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
366 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
367
368 #undef TARGET_ASM_FILE_START
369 #define TARGET_ASM_FILE_START sh_file_start
370 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
371 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
372
373 #undef TARGET_REGISTER_MOVE_COST
374 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
375
376 #undef TARGET_INSERT_ATTRIBUTES
377 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
378
379 #undef TARGET_SCHED_ADJUST_COST
380 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
381
382 #undef TARGET_SCHED_ISSUE_RATE
383 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
384
385 /* The next 5 hooks have been implemented for reenabling sched1. With the
386 help of these macros we are limiting the movement of insns in sched1 to
387 reduce the register pressure. The overall idea is to keep count of SImode
388 and SFmode regs required by already scheduled insns. When these counts
389 cross some threshold values; give priority to insns that free registers.
390 The insn that frees registers is most likely to be the insn with lowest
391 LUID (original insn order); but such an insn might be there in the stalled
392 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
393 upto a max of 8 cycles so that such insns may move from Q -> R.
394
395 The description of the hooks are as below:
396
397 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
398 scheduler; it is called inside the sched_init function just after
399 find_insn_reg_weights function call. It is used to calculate the SImode
400 and SFmode weights of insns of basic blocks; much similar to what
401 find_insn_reg_weights does.
402 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
403
404 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
405 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
406 (Q)->(R).
407
408 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
409 high; reorder the ready queue so that the insn with lowest LUID will be
410 issued next.
411
412 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
413 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
414
415 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
416 can be returned from TARGET_SCHED_REORDER2.
417
418 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
419
420 #undef TARGET_SCHED_DFA_NEW_CYCLE
421 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
422
423 #undef TARGET_SCHED_INIT_GLOBAL
424 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
425
426 #undef TARGET_SCHED_FINISH_GLOBAL
427 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
428
429 #undef TARGET_SCHED_VARIABLE_ISSUE
430 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
431
432 #undef TARGET_SCHED_REORDER
433 #define TARGET_SCHED_REORDER sh_reorder
434
435 #undef TARGET_SCHED_REORDER2
436 #define TARGET_SCHED_REORDER2 sh_reorder2
437
438 #undef TARGET_SCHED_INIT
439 #define TARGET_SCHED_INIT sh_md_init
440
441 #undef TARGET_DELEGITIMIZE_ADDRESS
442 #define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address
443
444 #undef TARGET_LEGITIMIZE_ADDRESS
445 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
446
447 #undef TARGET_CANNOT_MODIFY_JUMPS_P
448 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
449 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
450 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
451 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
452 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
453 sh_optimize_target_register_callee_saved
454
455 #undef TARGET_MS_BITFIELD_LAYOUT_P
456 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
457
458 #undef TARGET_INIT_BUILTINS
459 #define TARGET_INIT_BUILTINS sh_init_builtins
460 #undef TARGET_BUILTIN_DECL
461 #define TARGET_BUILTIN_DECL sh_builtin_decl
462 #undef TARGET_EXPAND_BUILTIN
463 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
464
465 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
466 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
467
468 #undef TARGET_CANNOT_COPY_INSN_P
469 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
470 #undef TARGET_RTX_COSTS
471 #define TARGET_RTX_COSTS sh_rtx_costs
472 #undef TARGET_ADDRESS_COST
473 #define TARGET_ADDRESS_COST sh_address_cost
474 #undef TARGET_ALLOCATE_INITIAL_VALUE
475 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
476
477 #undef TARGET_MACHINE_DEPENDENT_REORG
478 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
479
480 #undef TARGET_DWARF_REGISTER_SPAN
481 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
482
483 #ifdef HAVE_AS_TLS
484 #undef TARGET_HAVE_TLS
485 #define TARGET_HAVE_TLS true
486 #endif
487
488 #undef TARGET_PROMOTE_PROTOTYPES
489 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
490 #undef TARGET_PROMOTE_FUNCTION_MODE
491 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
492
493 #undef TARGET_FUNCTION_VALUE
494 #define TARGET_FUNCTION_VALUE sh_function_value
495 #undef TARGET_FUNCTION_VALUE_REGNO_P
496 #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
497 #undef TARGET_LIBCALL_VALUE
498 #define TARGET_LIBCALL_VALUE sh_libcall_value
499 #undef TARGET_STRUCT_VALUE_RTX
500 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
501 #undef TARGET_RETURN_IN_MEMORY
502 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
503
504 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
505 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
506 #undef TARGET_SETUP_INCOMING_VARARGS
507 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
508 #undef TARGET_STRICT_ARGUMENT_NAMING
509 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
510 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
511 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
512 #undef TARGET_MUST_PASS_IN_STACK
513 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
514 #undef TARGET_PASS_BY_REFERENCE
515 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
516 #undef TARGET_CALLEE_COPIES
517 #define TARGET_CALLEE_COPIES sh_callee_copies
518 #undef TARGET_ARG_PARTIAL_BYTES
519 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
520 #undef TARGET_FUNCTION_ARG
521 #define TARGET_FUNCTION_ARG sh_function_arg
522 #undef TARGET_FUNCTION_ARG_ADVANCE
523 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
524
525 #undef TARGET_BUILD_BUILTIN_VA_LIST
526 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
527 #undef TARGET_EXPAND_BUILTIN_VA_START
528 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
529 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
530 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
531
532 #undef TARGET_SCALAR_MODE_SUPPORTED_P
533 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
534 #undef TARGET_VECTOR_MODE_SUPPORTED_P
535 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
536
537 #undef TARGET_CHECK_PCH_TARGET_FLAGS
538 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
539
540 #undef TARGET_DWARF_CALLING_CONVENTION
541 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
542
543 #undef TARGET_FRAME_POINTER_REQUIRED
544 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
545
546 /* Return regmode weight for insn. */
547 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
548
549 /* Return current register pressure for regmode. */
550 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
551
552 #undef TARGET_ENCODE_SECTION_INFO
553 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
554
555 #undef TARGET_SECONDARY_RELOAD
556 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
557
558 #undef TARGET_PREFERRED_RELOAD_CLASS
559 #define TARGET_PREFERRED_RELOAD_CLASS sh_preferred_reload_class
560
561 #undef TARGET_CONDITIONAL_REGISTER_USAGE
562 #define TARGET_CONDITIONAL_REGISTER_USAGE sh_conditional_register_usage
563
564 #undef TARGET_LEGITIMATE_ADDRESS_P
565 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
566
567 #undef TARGET_TRAMPOLINE_INIT
568 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
569 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
570 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
571
572 #undef TARGET_LEGITIMATE_CONSTANT_P
573 #define TARGET_LEGITIMATE_CONSTANT_P sh_legitimate_constant_p
574
575 /* Machine-specific symbol_ref flags. */
576 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
577
578 /* The tas.b instruction sets the 7th bit in the byte, i.e. 0x80. This value
579 is used by optabs.c atomic op expansion code as well as in sync.md. */
580 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
581 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0x80
582
583 struct gcc_target targetm = TARGET_INITIALIZER;
584 \f
585 /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override
586 various options, and do some machine dependent initialization. */
587 static void
588 sh_option_override (void)
589 {
590 int regno;
591
592 SUBTARGET_OVERRIDE_OPTIONS;
593 if (optimize > 1 && !optimize_size)
594 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
595 if (flag_finite_math_only == 2)
596 flag_finite_math_only
597 = !flag_signaling_nans && TARGET_SH2E && ! TARGET_IEEE;
598 if (TARGET_SH2E && !flag_finite_math_only)
599 target_flags |= MASK_IEEE;
600 sh_cpu = PROCESSOR_SH1;
601 assembler_dialect = 0;
602 if (TARGET_SH2)
603 sh_cpu = PROCESSOR_SH2;
604 if (TARGET_SH2E)
605 sh_cpu = PROCESSOR_SH2E;
606 if (TARGET_SH2A)
607 sh_cpu = PROCESSOR_SH2A;
608 if (TARGET_SH3)
609 sh_cpu = PROCESSOR_SH3;
610 if (TARGET_SH3E)
611 sh_cpu = PROCESSOR_SH3E;
612 if (TARGET_SH4)
613 {
614 assembler_dialect = 1;
615 sh_cpu = PROCESSOR_SH4;
616 }
617 if (TARGET_SH4A_ARCH)
618 {
619 assembler_dialect = 1;
620 sh_cpu = PROCESSOR_SH4A;
621 }
622 if (TARGET_SH5)
623 {
624 sh_cpu = PROCESSOR_SH5;
625 target_flags |= MASK_ALIGN_DOUBLE;
626 if (TARGET_SHMEDIA_FPU)
627 target_flags |= MASK_FMOVD;
628 if (TARGET_SHMEDIA)
629 {
630 /* There are no delay slots on SHmedia. */
631 flag_delayed_branch = 0;
632 /* Relaxation isn't yet supported for SHmedia */
633 target_flags &= ~MASK_RELAX;
634 /* After reload, if conversion does little good but can cause
635 ICEs:
636 - find_if_block doesn't do anything for SH because we don't
637 have conditional execution patterns. (We use conditional
638 move patterns, which are handled differently, and only
639 before reload).
640 - find_cond_trap doesn't do anything for the SH because we
641 don't have conditional traps.
642 - find_if_case_1 uses redirect_edge_and_branch_force in
643 the only path that does an optimization, and this causes
644 an ICE when branch targets are in registers.
645 - find_if_case_2 doesn't do anything for the SHmedia after
646 reload except when it can redirect a tablejump - and
647 that's rather rare. */
648 flag_if_conversion2 = 0;
649 if (! strcmp (sh_div_str, "call"))
650 sh_div_strategy = SH_DIV_CALL;
651 else if (! strcmp (sh_div_str, "call2"))
652 sh_div_strategy = SH_DIV_CALL2;
653 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
654 sh_div_strategy = SH_DIV_FP;
655 else if (! strcmp (sh_div_str, "inv"))
656 sh_div_strategy = SH_DIV_INV;
657 else if (! strcmp (sh_div_str, "inv:minlat"))
658 sh_div_strategy = SH_DIV_INV_MINLAT;
659 else if (! strcmp (sh_div_str, "inv20u"))
660 sh_div_strategy = SH_DIV_INV20U;
661 else if (! strcmp (sh_div_str, "inv20l"))
662 sh_div_strategy = SH_DIV_INV20L;
663 else if (! strcmp (sh_div_str, "inv:call2"))
664 sh_div_strategy = SH_DIV_INV_CALL2;
665 else if (! strcmp (sh_div_str, "inv:call"))
666 sh_div_strategy = SH_DIV_INV_CALL;
667 else if (! strcmp (sh_div_str, "inv:fp"))
668 {
669 if (TARGET_FPU_ANY)
670 sh_div_strategy = SH_DIV_INV_FP;
671 else
672 sh_div_strategy = SH_DIV_INV;
673 }
674 TARGET_CBRANCHDI4 = 0;
675 /* Assembler CFI isn't yet fully supported for SHmedia. */
676 flag_dwarf2_cfi_asm = 0;
677 }
678 }
679 else
680 {
681 /* Only the sh64-elf assembler fully supports .quad properly. */
682 targetm.asm_out.aligned_op.di = NULL;
683 targetm.asm_out.unaligned_op.di = NULL;
684 }
685 if (TARGET_SH1)
686 {
687 if (! strcmp (sh_div_str, "call-div1"))
688 sh_div_strategy = SH_DIV_CALL_DIV1;
689 else if (! strcmp (sh_div_str, "call-fp")
690 && (TARGET_FPU_DOUBLE
691 || (TARGET_HARD_SH4 && TARGET_SH2E)
692 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
693 sh_div_strategy = SH_DIV_CALL_FP;
694 else if (! strcmp (sh_div_str, "call-table") && TARGET_SH2)
695 sh_div_strategy = SH_DIV_CALL_TABLE;
696 else
697 /* Pick one that makes most sense for the target in general.
698 It is not much good to use different functions depending
699 on -Os, since then we'll end up with two different functions
700 when some of the code is compiled for size, and some for
701 speed. */
702
703 /* SH4 tends to emphasize speed. */
704 if (TARGET_HARD_SH4)
705 sh_div_strategy = SH_DIV_CALL_TABLE;
706 /* These have their own way of doing things. */
707 else if (TARGET_SH2A)
708 sh_div_strategy = SH_DIV_INTRINSIC;
709 /* ??? Should we use the integer SHmedia function instead? */
710 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
711 sh_div_strategy = SH_DIV_CALL_FP;
712 /* SH1 .. SH3 cores often go into small-footprint systems, so
713 default to the smallest implementation available. */
714 else if (TARGET_SH2) /* ??? EXPERIMENTAL */
715 sh_div_strategy = SH_DIV_CALL_TABLE;
716 else
717 sh_div_strategy = SH_DIV_CALL_DIV1;
718 }
719 if (!TARGET_SH1)
720 TARGET_PRETEND_CMOVE = 0;
721 if (sh_divsi3_libfunc[0])
722 ; /* User supplied - leave it alone. */
723 else if (TARGET_DIVIDE_CALL_FP)
724 sh_divsi3_libfunc = "__sdivsi3_i4";
725 else if (TARGET_DIVIDE_CALL_TABLE)
726 sh_divsi3_libfunc = "__sdivsi3_i4i";
727 else if (TARGET_SH5)
728 sh_divsi3_libfunc = "__sdivsi3_1";
729 else
730 sh_divsi3_libfunc = "__sdivsi3";
731 if (sh_branch_cost == -1)
732 {
733 sh_branch_cost = 1;
734
735 /* The SH1 does not have delay slots, hence we get a pipeline stall
736 at every branch. The SH4 is superscalar, so the single delay slot
737 is not sufficient to keep both pipelines filled. */
738 if (! TARGET_SH2 || TARGET_HARD_SH4)
739 sh_branch_cost = 2;
740 }
741
742 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
743 if (! VALID_REGISTER_P (regno))
744 sh_register_names[regno][0] = '\0';
745
746 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
747 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
748 sh_additional_register_names[regno][0] = '\0';
749
750 flag_omit_frame_pointer = (PREFERRED_DEBUGGING_TYPE == DWARF2_DEBUG);
751
752 if ((flag_pic && ! TARGET_PREFERGOT)
753 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
754 flag_no_function_cse = 1;
755
756 if (targetm.small_register_classes_for_mode_p (VOIDmode)) \
757 {
758 /* Never run scheduling before reload, since that can
759 break global alloc, and generates slower code anyway due
760 to the pressure on R0. */
761 /* Enable sched1 for SH4 if the user explicitly requests.
762 When sched1 is enabled, the ready queue will be reordered by
763 the target hooks if pressure is high. We can not do this for
764 PIC, SH3 and lower as they give spill failures for R0. */
765 if (!TARGET_HARD_SH4 || flag_pic)
766 flag_schedule_insns = 0;
767 /* ??? Current exception handling places basic block boundaries
768 after call_insns. It causes the high pressure on R0 and gives
769 spill failures for R0 in reload. See PR 22553 and the thread
770 on gcc-patches
771 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
772 else if (flag_exceptions)
773 {
774 if (flag_schedule_insns && global_options_set.x_flag_schedule_insns)
775 warning (0, "ignoring -fschedule-insns because of exception handling bug");
776 flag_schedule_insns = 0;
777 }
778 else if (flag_schedule_insns
779 && !global_options_set.x_flag_schedule_insns)
780 flag_schedule_insns = 0;
781 }
782
783 if ((target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) == 0)
784 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
785
786 /* Unwind info is not correct around the CFG unless either a frame
787 pointer is present or M_A_O_A is set. Fixing this requires rewriting
788 unwind info generation to be aware of the CFG and propagating states
789 around edges. */
790 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
791 || flag_exceptions || flag_non_call_exceptions)
792 && flag_omit_frame_pointer
793 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
794 {
795 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
796 warning (0, "unwind tables currently require either a frame pointer "
797 "or -maccumulate-outgoing-args for correctness");
798 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
799 }
800
801 /* Unwinding with -freorder-blocks-and-partition does not work on this
802 architecture, because it requires far jumps to label crossing between
803 hot/cold sections which are rejected on this architecture. */
804 if (flag_reorder_blocks_and_partition)
805 {
806 if (flag_exceptions)
807 {
808 inform (input_location,
809 "-freorder-blocks-and-partition does not work with "
810 "exceptions on this architecture");
811 flag_reorder_blocks_and_partition = 0;
812 flag_reorder_blocks = 1;
813 }
814 else if (flag_unwind_tables)
815 {
816 inform (input_location,
817 "-freorder-blocks-and-partition does not support unwind "
818 "info on this architecture");
819 flag_reorder_blocks_and_partition = 0;
820 flag_reorder_blocks = 1;
821 }
822 }
823
824 /* Adjust loop, jump and function alignment values (in bytes), if those
825 were not specified by the user using -falign-loops, -falign-jumps
826 and -falign-functions options.
827 32 bit alignment is better for speed, because instructions can be
828 fetched as a pair from a longword boundary. For size use 16 bit
829 alignment to get more compact code.
830 Aligning all jumps increases the code size, even if it might
831 result in slightly faster code. Thus, it is set to the smallest
832 alignment possible if not specified by the user. */
833 if (align_loops == 0)
834 {
835 if (TARGET_SH5)
836 align_loops = 8;
837 else
838 align_loops = optimize_size ? 2 : 4;
839 }
840
841 if (align_jumps == 0)
842 {
843 if (TARGET_SHMEDIA)
844 align_jumps = 1 << CACHE_LOG;
845 else
846 align_jumps = 2;
847 }
848 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
849 align_jumps = TARGET_SHMEDIA ? 4 : 2;
850
851 if (align_functions == 0)
852 {
853 if (TARGET_SHMEDIA)
854 align_functions = optimize_size
855 ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
856 else
857 align_functions = optimize_size ? 2 : 4;
858 }
859
860 /* The linker relaxation code breaks when a function contains
861 alignments that are larger than that at the start of a
862 compilation unit. */
863 if (TARGET_RELAX)
864 {
865 int min_align
866 = align_loops > align_jumps ? align_loops : align_jumps;
867
868 /* Also take possible .long constants / mova tables int account. */
869 if (min_align < 4)
870 min_align = 4;
871 if (align_functions < min_align)
872 align_functions = min_align;
873 }
874
875 if (sh_fixed_range_str)
876 sh_fix_range (sh_fixed_range_str);
877
878 /* This target defaults to strict volatile bitfields. */
879 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2))
880 flag_strict_volatile_bitfields = 1;
881 }
882 \f
883 /* Print the operand address in x to the stream. */
884
885 static void
886 sh_print_operand_address (FILE *stream, rtx x)
887 {
888 switch (GET_CODE (x))
889 {
890 case REG:
891 case SUBREG:
892 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
893 break;
894
895 case PLUS:
896 {
897 rtx base = XEXP (x, 0);
898 rtx index = XEXP (x, 1);
899
900 switch (GET_CODE (index))
901 {
902 case CONST_INT:
903 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
904 reg_names[true_regnum (base)]);
905 break;
906
907 case REG:
908 case SUBREG:
909 {
910 int base_num = true_regnum (base);
911 int index_num = true_regnum (index);
912
913 fprintf (stream, "@(r0,%s)",
914 reg_names[MAX (base_num, index_num)]);
915 break;
916 }
917
918 default:
919 gcc_unreachable ();
920 }
921 }
922 break;
923
924 case PRE_DEC:
925 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
926 break;
927
928 case POST_INC:
929 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
930 break;
931
932 default:
933 x = mark_constant_pool_use (x);
934 output_addr_const (stream, x);
935 break;
936 }
937 }
938
939 /* Print operand x (an rtx) in assembler syntax to file stream
940 according to modifier code.
941
942 '.' print a .s if insn needs delay slot
943 ',' print LOCAL_LABEL_PREFIX
944 '@' print trap, rte or rts depending upon pragma interruptness
945 '#' output a nop if there is nothing to put in the delay slot
946 ''' print likelihood suffix (/u for unlikely).
947 '>' print branch target if -fverbose-asm
948 'O' print a constant without the #
949 'R' print the LSW of a dp value - changes if in little endian
950 'S' print the MSW of a dp value - changes if in little endian
951 'T' print the next word of a dp value - same as 'R' in big endian mode.
952 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
953 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
954 'N' print 'r63' if the operand is (const_int 0).
955 'd' print a V2SF reg as dN instead of fpN.
956 'm' print a pair `base,offset' or `base,index', for LD and ST.
957 'U' Likewise for {LD,ST}{HI,LO}.
958 'V' print the position of a single bit set.
959 'W' print the position of a single bit cleared.
960 't' print a memory address which is a register.
961 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
962 'o' output an operator. */
963
964 static void
965 sh_print_operand (FILE *stream, rtx x, int code)
966 {
967 int regno;
968 enum machine_mode mode;
969
970 switch (code)
971 {
972 tree trapa_attr;
973
974 case '.':
975 if (final_sequence
976 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
977 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
978 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
979 break;
980 case ',':
981 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
982 break;
983 case '@':
984 trapa_attr = lookup_attribute ("trap_exit",
985 DECL_ATTRIBUTES (current_function_decl));
986 if (trapa_attr)
987 fprintf (stream, "trapa #%ld",
988 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
989 else if (sh_cfun_interrupt_handler_p ())
990 {
991 if (sh_cfun_resbank_handler_p ())
992 fprintf (stream, "resbank\n");
993 fprintf (stream, "rte");
994 }
995 else
996 fprintf (stream, "rts");
997 break;
998 case '#':
999 /* Output a nop if there's nothing in the delay slot. */
1000 if (dbr_sequence_length () == 0)
1001 fprintf (stream, "\n\tnop");
1002 break;
1003 case '\'':
1004 {
1005 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1006
1007 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
1008 fputs ("/u", stream);
1009 break;
1010 }
1011 case '>':
1012 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1013 {
1014 fputs ("\t! target: ", stream);
1015 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1016 }
1017 break;
1018 case 'O':
1019 x = mark_constant_pool_use (x);
1020 output_addr_const (stream, x);
1021 break;
1022 /* N.B.: %R / %S / %T adjust memory addresses by four.
1023 For SHMEDIA, that means they can be used to access the first and
1024 second 32 bit part of a 64 bit (or larger) value that
1025 might be held in floating point registers or memory.
1026 While they can be used to access 64 bit parts of a larger value
1027 held in general purpose registers, that won't work with memory -
1028 neither for fp registers, since the frxx names are used. */
1029 case 'R':
1030 if (REG_P (x) || GET_CODE (x) == SUBREG)
1031 {
1032 regno = true_regnum (x);
1033 regno += FP_REGISTER_P (regno) ? 1 : LSW;
1034 fputs (reg_names[regno], (stream));
1035 }
1036 else if (MEM_P (x))
1037 {
1038 x = adjust_address (x, SImode, 4 * LSW);
1039 sh_print_operand_address (stream, XEXP (x, 0));
1040 }
1041 else
1042 {
1043 rtx sub = NULL_RTX;
1044
1045 mode = GET_MODE (x);
1046 if (mode == VOIDmode)
1047 mode = DImode;
1048 if (GET_MODE_SIZE (mode) >= 8)
1049 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
1050 if (sub)
1051 sh_print_operand (stream, sub, 0);
1052 else
1053 output_operand_lossage ("invalid operand to %%R");
1054 }
1055 break;
1056 case 'S':
1057 if (REG_P (x) || GET_CODE (x) == SUBREG)
1058 {
1059 regno = true_regnum (x);
1060 regno += FP_REGISTER_P (regno) ? 0 : MSW;
1061 fputs (reg_names[regno], (stream));
1062 }
1063 else if (MEM_P (x))
1064 {
1065 x = adjust_address (x, SImode, 4 * MSW);
1066 sh_print_operand_address (stream, XEXP (x, 0));
1067 }
1068 else
1069 {
1070 rtx sub = NULL_RTX;
1071
1072 mode = GET_MODE (x);
1073 if (mode == VOIDmode)
1074 mode = DImode;
1075 if (GET_MODE_SIZE (mode) >= 8)
1076 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
1077 if (sub)
1078 sh_print_operand (stream, sub, 0);
1079 else
1080 output_operand_lossage ("invalid operand to %%S");
1081 }
1082 break;
1083 case 'T':
1084 /* Next word of a double. */
1085 switch (GET_CODE (x))
1086 {
1087 case REG:
1088 fputs (reg_names[REGNO (x) + 1], (stream));
1089 break;
1090 case MEM:
1091 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1092 && GET_CODE (XEXP (x, 0)) != POST_INC)
1093 x = adjust_address (x, SImode, 4);
1094 sh_print_operand_address (stream, XEXP (x, 0));
1095 break;
1096 default:
1097 break;
1098 }
1099 break;
1100
1101 case 't':
1102 gcc_assert (MEM_P (x));
1103 x = XEXP (x, 0);
1104 switch (GET_CODE (x))
1105 {
1106 case REG:
1107 case SUBREG:
1108 sh_print_operand (stream, x, 0);
1109 break;
1110 default:
1111 break;
1112 }
1113 break;
1114
1115 case 'o':
1116 switch (GET_CODE (x))
1117 {
1118 case PLUS: fputs ("add", stream); break;
1119 case MINUS: fputs ("sub", stream); break;
1120 case MULT: fputs ("mul", stream); break;
1121 case DIV: fputs ("div", stream); break;
1122 case EQ: fputs ("eq", stream); break;
1123 case NE: fputs ("ne", stream); break;
1124 case GT: case LT: fputs ("gt", stream); break;
1125 case GE: case LE: fputs ("ge", stream); break;
1126 case GTU: case LTU: fputs ("gtu", stream); break;
1127 case GEU: case LEU: fputs ("geu", stream); break;
1128 default:
1129 break;
1130 }
1131 break;
1132 case 'M':
1133 if (TARGET_SHMEDIA)
1134 {
1135 if (MEM_P (x)
1136 && GET_CODE (XEXP (x, 0)) == PLUS
1137 && (REG_P (XEXP (XEXP (x, 0), 1))
1138 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1139 fputc ('x', stream);
1140 }
1141 else
1142 {
1143 if (MEM_P (x))
1144 {
1145 switch (GET_MODE (x))
1146 {
1147 case QImode: fputs (".b", stream); break;
1148 case HImode: fputs (".w", stream); break;
1149 case SImode: fputs (".l", stream); break;
1150 case SFmode: fputs (".s", stream); break;
1151 case DFmode: fputs (".d", stream); break;
1152 default: gcc_unreachable ();
1153 }
1154 }
1155 }
1156 break;
1157
1158 case 'm':
1159 gcc_assert (MEM_P (x));
1160 x = XEXP (x, 0);
1161 /* Fall through. */
1162 case 'U':
1163 switch (GET_CODE (x))
1164 {
1165 case REG:
1166 case SUBREG:
1167 sh_print_operand (stream, x, 0);
1168 fputs (", 0", stream);
1169 break;
1170
1171 case PLUS:
1172 sh_print_operand (stream, XEXP (x, 0), 0);
1173 fputs (", ", stream);
1174 sh_print_operand (stream, XEXP (x, 1), 0);
1175 break;
1176
1177 default:
1178 gcc_unreachable ();
1179 }
1180 break;
1181
1182 case 'V':
1183 {
1184 int num = exact_log2 (INTVAL (x));
1185 gcc_assert (num >= 0);
1186 fprintf (stream, "#%d", num);
1187 }
1188 break;
1189
1190 case 'W':
1191 {
1192 int num = exact_log2 (~INTVAL (x));
1193 gcc_assert (num >= 0);
1194 fprintf (stream, "#%d", num);
1195 }
1196 break;
1197
1198 case 'd':
1199 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1200
1201 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1202 break;
1203
1204 case 'N':
1205 if (x == CONST0_RTX (GET_MODE (x)))
1206 {
1207 fprintf ((stream), "r63");
1208 break;
1209 }
1210 goto default_output;
1211 case 'u':
1212 if (CONST_INT_P (x))
1213 {
1214 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1215 break;
1216 }
1217 /* Fall through. */
1218
1219 default_output:
1220 default:
1221 regno = 0;
1222 mode = GET_MODE (x);
1223
1224 switch (GET_CODE (x))
1225 {
1226 case TRUNCATE:
1227 {
1228 rtx inner = XEXP (x, 0);
1229 int offset = 0;
1230 enum machine_mode inner_mode;
1231
1232 /* We might see SUBREGs with vector mode registers inside. */
1233 if (GET_CODE (inner) == SUBREG
1234 && (GET_MODE_SIZE (GET_MODE (inner))
1235 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1236 && subreg_lowpart_p (inner))
1237 inner = SUBREG_REG (inner);
1238 if (CONST_INT_P (inner))
1239 {
1240 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1241 goto default_output;
1242 }
1243 inner_mode = GET_MODE (inner);
1244 if (GET_CODE (inner) == SUBREG
1245 && (GET_MODE_SIZE (GET_MODE (inner))
1246 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1247 && REG_P (SUBREG_REG (inner)))
1248 {
1249 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1250 GET_MODE (SUBREG_REG (inner)),
1251 SUBREG_BYTE (inner),
1252 GET_MODE (inner));
1253 inner = SUBREG_REG (inner);
1254 }
1255 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1256 abort ();
1257 /* Floating point register pairs are always big endian;
1258 general purpose registers are 64 bit wide. */
1259 regno = REGNO (inner);
1260 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1261 - HARD_REGNO_NREGS (regno, mode))
1262 + offset;
1263 x = inner;
1264 goto reg;
1265 }
1266 case SIGN_EXTEND:
1267 x = XEXP (x, 0);
1268 goto reg;
1269 /* FIXME: We need this on SHmedia32 because reload generates
1270 some sign-extended HI or QI loads into DImode registers
1271 but, because Pmode is SImode, the address ends up with a
1272 subreg:SI of the DImode register. Maybe reload should be
1273 fixed so as to apply alter_subreg to such loads? */
1274 case IF_THEN_ELSE:
1275 gcc_assert (trapping_target_operand (x, VOIDmode));
1276 x = XEXP (XEXP (x, 2), 0);
1277 goto default_output;
1278 case SUBREG:
1279 gcc_assert (SUBREG_BYTE (x) == 0
1280 && REG_P (SUBREG_REG (x)));
1281
1282 x = SUBREG_REG (x);
1283 /* Fall through. */
1284
1285 reg:
1286 case REG:
1287 regno += REGNO (x);
1288 if (FP_REGISTER_P (regno)
1289 && mode == V16SFmode)
1290 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1291 else if (FP_REGISTER_P (REGNO (x))
1292 && mode == V4SFmode)
1293 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1294 else if (REG_P (x)
1295 && mode == V2SFmode)
1296 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1297 else if (FP_REGISTER_P (REGNO (x))
1298 && GET_MODE_SIZE (mode) > 4)
1299 fprintf ((stream), "d%s", reg_names[regno] + 1);
1300 else
1301 fputs (reg_names[regno], (stream));
1302 break;
1303
1304 case MEM:
1305 output_address (XEXP (x, 0));
1306 break;
1307
1308 default:
1309 if (TARGET_SH1)
1310 fputc ('#', stream);
1311 output_addr_const (stream, x);
1312 break;
1313 }
1314 break;
1315 }
1316 }
1317
1318 static bool
1319 sh_print_operand_punct_valid_p (unsigned char code)
1320 {
1321 return (code == '.' || code == '#' || code == '@' || code == ','
1322 || code == '$' || code == '\'' || code == '>');
1323 }
1324
1325 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
1326
1327 static bool
1328 sh_asm_output_addr_const_extra (FILE *file, rtx x)
1329 {
1330 if (GET_CODE (x) == UNSPEC)
1331 {
1332 switch (XINT (x, 1))
1333 {
1334 case UNSPEC_DATALABEL:
1335 fputs ("datalabel ", file);
1336 output_addr_const (file, XVECEXP (x, 0, 0));
1337 break;
1338 case UNSPEC_PIC:
1339 /* GLOBAL_OFFSET_TABLE or local symbols, no suffix. */
1340 output_addr_const (file, XVECEXP (x, 0, 0));
1341 break;
1342 case UNSPEC_GOT:
1343 output_addr_const (file, XVECEXP (x, 0, 0));
1344 fputs ("@GOT", file);
1345 break;
1346 case UNSPEC_GOTOFF:
1347 output_addr_const (file, XVECEXP (x, 0, 0));
1348 fputs ("@GOTOFF", file);
1349 break;
1350 case UNSPEC_PLT:
1351 output_addr_const (file, XVECEXP (x, 0, 0));
1352 fputs ("@PLT", file);
1353 break;
1354 case UNSPEC_GOTPLT:
1355 output_addr_const (file, XVECEXP (x, 0, 0));
1356 fputs ("@GOTPLT", file);
1357 break;
1358 case UNSPEC_DTPOFF:
1359 output_addr_const (file, XVECEXP (x, 0, 0));
1360 fputs ("@DTPOFF", file);
1361 break;
1362 case UNSPEC_GOTTPOFF:
1363 output_addr_const (file, XVECEXP (x, 0, 0));
1364 fputs ("@GOTTPOFF", file);
1365 break;
1366 case UNSPEC_TPOFF:
1367 output_addr_const (file, XVECEXP (x, 0, 0));
1368 fputs ("@TPOFF", file);
1369 break;
1370 case UNSPEC_CALLER:
1371 {
1372 char name[32];
1373 /* LPCS stands for Label for PIC Call Site. */
1374 targetm.asm_out.generate_internal_label (name, "LPCS",
1375 INTVAL (XVECEXP (x, 0, 0)));
1376 assemble_name (file, name);
1377 }
1378 break;
1379 case UNSPEC_EXTRACT_S16:
1380 case UNSPEC_EXTRACT_U16:
1381 {
1382 rtx val, shift;
1383
1384 val = XVECEXP (x, 0, 0);
1385 shift = XVECEXP (x, 0, 1);
1386 fputc ('(', file);
1387 if (shift != const0_rtx)
1388 fputc ('(', file);
1389 if (GET_CODE (val) == CONST
1390 || GET_RTX_CLASS (GET_CODE (val)) != RTX_OBJ)
1391 {
1392 fputc ('(', file);
1393 output_addr_const (file, val);
1394 fputc (')', file);
1395 }
1396 else
1397 output_addr_const (file, val);
1398 if (shift != const0_rtx)
1399 {
1400 fputs (" >> ", file);
1401 output_addr_const (file, shift);
1402 fputc (')', file);
1403 }
1404 fputs (" & 65535)", file);
1405 }
1406 break;
1407 case UNSPEC_SYMOFF:
1408 output_addr_const (file, XVECEXP (x, 0, 0));
1409 fputc ('-', file);
1410 if (GET_CODE (XVECEXP (x, 0, 1)) == CONST)
1411 {
1412 fputc ('(', file);
1413 output_addr_const (file, XVECEXP (x, 0, 1));
1414 fputc (')', file);
1415 }
1416 else
1417 output_addr_const (file, XVECEXP (x, 0, 1));
1418 break;
1419 case UNSPEC_PCREL_SYMOFF:
1420 output_addr_const (file, XVECEXP (x, 0, 0));
1421 fputs ("-(", file);
1422 output_addr_const (file, XVECEXP (x, 0, 1));
1423 fputs ("-.)", file);
1424 break;
1425 default:
1426 return false;
1427 }
1428 return true;
1429 }
1430 else
1431 return false;
1432 }
1433 \f
1434
1435 /* Encode symbol attributes of a SYMBOL_REF into its
1436 SYMBOL_REF_FLAGS. */
1437 static void
1438 sh_encode_section_info (tree decl, rtx rtl, int first)
1439 {
1440 default_encode_section_info (decl, rtl, first);
1441
1442 if (TREE_CODE (decl) == FUNCTION_DECL
1443 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1444 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1445 }
1446
1447 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1448 static void
1449 force_into (rtx value, rtx target)
1450 {
1451 value = force_operand (value, target);
1452 if (! rtx_equal_p (value, target))
1453 emit_insn (gen_move_insn (target, value));
1454 }
1455
1456 /* Emit code to perform a block move. Choose the best method.
1457
1458 OPERANDS[0] is the destination.
1459 OPERANDS[1] is the source.
1460 OPERANDS[2] is the size.
1461 OPERANDS[3] is the alignment safe to use. */
1462
1463 int
1464 expand_block_move (rtx *operands)
1465 {
1466 int align = INTVAL (operands[3]);
1467 int constp = (CONST_INT_P (operands[2]));
1468 int bytes = (constp ? INTVAL (operands[2]) : 0);
1469
1470 if (! constp)
1471 return 0;
1472
1473 /* If we could use mov.l to move words and dest is word-aligned, we
1474 can use movua.l for loads and still generate a relatively short
1475 and efficient sequence. */
1476 if (TARGET_SH4A_ARCH && align < 4
1477 && MEM_ALIGN (operands[0]) >= 32
1478 && can_move_by_pieces (bytes, 32))
1479 {
1480 rtx dest = copy_rtx (operands[0]);
1481 rtx src = copy_rtx (operands[1]);
1482 /* We could use different pseudos for each copied word, but
1483 since movua can only load into r0, it's kind of
1484 pointless. */
1485 rtx temp = gen_reg_rtx (SImode);
1486 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1487 int copied = 0;
1488
1489 while (copied + 4 <= bytes)
1490 {
1491 rtx to = adjust_address (dest, SImode, copied);
1492 rtx from = adjust_automodify_address (src, BLKmode,
1493 src_addr, copied);
1494
1495 set_mem_size (from, 4);
1496 emit_insn (gen_movua (temp, from));
1497 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1498 emit_move_insn (to, temp);
1499 copied += 4;
1500 }
1501
1502 if (copied < bytes)
1503 move_by_pieces (adjust_address (dest, BLKmode, copied),
1504 adjust_automodify_address (src, BLKmode,
1505 src_addr, copied),
1506 bytes - copied, align, 0);
1507
1508 return 1;
1509 }
1510
1511 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1512 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1513 if (align < 4 || (bytes % 4 != 0))
1514 return 0;
1515
1516 if (TARGET_HARD_SH4)
1517 {
1518 if (bytes < 12)
1519 return 0;
1520 else if (bytes == 12)
1521 {
1522 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1523 rtx r4 = gen_rtx_REG (SImode, 4);
1524 rtx r5 = gen_rtx_REG (SImode, 5);
1525
1526 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1527 force_into (XEXP (operands[0], 0), r4);
1528 force_into (XEXP (operands[1], 0), r5);
1529 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1530 return 1;
1531 }
1532 else if (! optimize_size)
1533 {
1534 const char *entry_name;
1535 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1536 int dwords;
1537 rtx r4 = gen_rtx_REG (SImode, 4);
1538 rtx r5 = gen_rtx_REG (SImode, 5);
1539 rtx r6 = gen_rtx_REG (SImode, 6);
1540
1541 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1542 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1543 force_into (XEXP (operands[0], 0), r4);
1544 force_into (XEXP (operands[1], 0), r5);
1545
1546 dwords = bytes >> 3;
1547 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1548 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1549 return 1;
1550 }
1551 else
1552 return 0;
1553 }
1554 if (bytes < 64)
1555 {
1556 char entry[30];
1557 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1558 rtx r4 = gen_rtx_REG (SImode, 4);
1559 rtx r5 = gen_rtx_REG (SImode, 5);
1560
1561 sprintf (entry, "__movmemSI%d", bytes);
1562 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1563 force_into (XEXP (operands[0], 0), r4);
1564 force_into (XEXP (operands[1], 0), r5);
1565 emit_insn (gen_block_move_real (func_addr_rtx));
1566 return 1;
1567 }
1568
1569 /* This is the same number of bytes as a memcpy call, but to a different
1570 less common function name, so this will occasionally use more space. */
1571 if (! optimize_size)
1572 {
1573 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1574 int final_switch, while_loop;
1575 rtx r4 = gen_rtx_REG (SImode, 4);
1576 rtx r5 = gen_rtx_REG (SImode, 5);
1577 rtx r6 = gen_rtx_REG (SImode, 6);
1578
1579 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1580 force_into (XEXP (operands[0], 0), r4);
1581 force_into (XEXP (operands[1], 0), r5);
1582
1583 /* r6 controls the size of the move. 16 is decremented from it
1584 for each 64 bytes moved. Then the negative bit left over is used
1585 as an index into a list of move instructions. e.g., a 72 byte move
1586 would be set up with size(r6) = 14, for one iteration through the
1587 big while loop, and a switch of -2 for the last part. */
1588
1589 final_switch = 16 - ((bytes / 4) % 16);
1590 while_loop = ((bytes / 4) / 16 - 1) * 16;
1591 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1592 emit_insn (gen_block_lump_real (func_addr_rtx));
1593 return 1;
1594 }
1595
1596 return 0;
1597 }
1598
1599 /* Prepare operands for a move define_expand; specifically, one of the
1600 operands must be in a register. */
1601
1602 int
1603 prepare_move_operands (rtx operands[], enum machine_mode mode)
1604 {
1605 if ((mode == SImode || mode == DImode)
1606 && flag_pic
1607 && ! ((mode == Pmode || mode == ptr_mode)
1608 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1609 {
1610 rtx temp;
1611 if (SYMBOLIC_CONST_P (operands[1]))
1612 {
1613 if (MEM_P (operands[0]))
1614 operands[1] = force_reg (Pmode, operands[1]);
1615 else if (TARGET_SHMEDIA
1616 && GET_CODE (operands[1]) == LABEL_REF
1617 && target_reg_operand (operands[0], mode))
1618 /* It's ok. */;
1619 else
1620 {
1621 temp = (!can_create_pseudo_p ()
1622 ? operands[0]
1623 : gen_reg_rtx (Pmode));
1624 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1625 }
1626 }
1627 else if (GET_CODE (operands[1]) == CONST
1628 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1629 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1630 {
1631 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1632 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1633 mode, temp);
1634 operands[1] = expand_binop (mode, add_optab, temp,
1635 XEXP (XEXP (operands[1], 0), 1),
1636 (!can_create_pseudo_p ()
1637 ? temp
1638 : gen_reg_rtx (Pmode)),
1639 0, OPTAB_LIB_WIDEN);
1640 }
1641 }
1642
1643 if (! reload_in_progress && ! reload_completed)
1644 {
1645 /* Copy the source to a register if both operands aren't registers. */
1646 if (! register_operand (operands[0], mode)
1647 && ! sh_register_operand (operands[1], mode))
1648 operands[1] = copy_to_mode_reg (mode, operands[1]);
1649
1650 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1651 {
1652 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1653 except that we can't use that function because it is static. */
1654 rtx new_rtx = change_address (operands[0], mode, 0);
1655 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1656 operands[0] = new_rtx;
1657 }
1658
1659 /* This case can happen while generating code to move the result
1660 of a library call to the target. Reject `st r0,@(rX,rY)' because
1661 reload will fail to find a spill register for rX, since r0 is already
1662 being used for the source. */
1663 else if (TARGET_SH1
1664 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1665 && MEM_P (operands[0])
1666 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1667 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1668 operands[1] = copy_to_mode_reg (mode, operands[1]);
1669 }
1670
1671 if (mode == Pmode || mode == ptr_mode)
1672 {
1673 rtx op0, op1, opc;
1674 enum tls_model tls_kind;
1675
1676 op0 = operands[0];
1677 op1 = operands[1];
1678 if (GET_CODE (op1) == CONST
1679 && GET_CODE (XEXP (op1, 0)) == PLUS
1680 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1681 != TLS_MODEL_NONE))
1682 {
1683 opc = XEXP (XEXP (op1, 0), 1);
1684 op1 = XEXP (XEXP (op1, 0), 0);
1685 }
1686 else
1687 opc = NULL_RTX;
1688
1689 if ((tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1690 {
1691 rtx tga_op1, tga_ret, tmp, tmp2;
1692
1693 if (! flag_pic
1694 && (tls_kind == TLS_MODEL_GLOBAL_DYNAMIC
1695 || tls_kind == TLS_MODEL_LOCAL_DYNAMIC
1696 || tls_kind == TLS_MODEL_INITIAL_EXEC))
1697 {
1698 /* Don't schedule insns for getting GOT address when
1699 the first scheduling is enabled, to avoid spill
1700 failures for R0. */
1701 if (flag_schedule_insns)
1702 emit_insn (gen_blockage ());
1703 emit_insn (gen_GOTaddr2picreg ());
1704 emit_use (gen_rtx_REG (SImode, PIC_REG));
1705 if (flag_schedule_insns)
1706 emit_insn (gen_blockage ());
1707 }
1708
1709 switch (tls_kind)
1710 {
1711 case TLS_MODEL_GLOBAL_DYNAMIC:
1712 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1713 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1714 tmp = gen_reg_rtx (Pmode);
1715 emit_move_insn (tmp, tga_ret);
1716 op1 = tmp;
1717 break;
1718
1719 case TLS_MODEL_LOCAL_DYNAMIC:
1720 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1721 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1722
1723 tmp = gen_reg_rtx (Pmode);
1724 emit_move_insn (tmp, tga_ret);
1725
1726 if (register_operand (op0, Pmode))
1727 tmp2 = op0;
1728 else
1729 tmp2 = gen_reg_rtx (Pmode);
1730
1731 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1732 op1 = tmp2;
1733 break;
1734
1735 case TLS_MODEL_INITIAL_EXEC:
1736 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1737 tmp = gen_sym2GOTTPOFF (op1);
1738 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1739 op1 = tga_op1;
1740 break;
1741
1742 case TLS_MODEL_LOCAL_EXEC:
1743 tmp2 = gen_reg_rtx (Pmode);
1744 emit_insn (gen_load_gbr (tmp2));
1745 tmp = gen_reg_rtx (Pmode);
1746 emit_insn (gen_symTPOFF2reg (tmp, op1));
1747
1748 if (register_operand (op0, Pmode))
1749 op1 = op0;
1750 else
1751 op1 = gen_reg_rtx (Pmode);
1752
1753 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1754 break;
1755
1756 default:
1757 gcc_unreachable ();
1758 }
1759 if (opc)
1760 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1761 operands[1] = op1;
1762 }
1763 }
1764
1765 return 0;
1766 }
1767
1768 enum rtx_code
1769 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1770 enum rtx_code comparison)
1771 {
1772 rtx op1;
1773 rtx scratch = NULL_RTX;
1774
1775 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1776 comparison = GET_CODE (operands[0]);
1777 else
1778 scratch = operands[4];
1779 if (CONST_INT_P (operands[1])
1780 && !CONST_INT_P (operands[2]))
1781 {
1782 rtx tmp = operands[1];
1783
1784 operands[1] = operands[2];
1785 operands[2] = tmp;
1786 comparison = swap_condition (comparison);
1787 }
1788 if (CONST_INT_P (operands[2]))
1789 {
1790 HOST_WIDE_INT val = INTVAL (operands[2]);
1791 if ((val == -1 || val == -0x81)
1792 && (comparison == GT || comparison == LE))
1793 {
1794 comparison = (comparison == GT) ? GE : LT;
1795 operands[2] = gen_int_mode (val + 1, mode);
1796 }
1797 else if ((val == 1 || val == 0x80)
1798 && (comparison == GE || comparison == LT))
1799 {
1800 comparison = (comparison == GE) ? GT : LE;
1801 operands[2] = gen_int_mode (val - 1, mode);
1802 }
1803 else if (val == 1 && (comparison == GEU || comparison == LTU))
1804 {
1805 comparison = (comparison == GEU) ? NE : EQ;
1806 operands[2] = CONST0_RTX (mode);
1807 }
1808 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1809 {
1810 comparison = (comparison == GEU) ? GTU : LEU;
1811 operands[2] = gen_int_mode (val - 1, mode);
1812 }
1813 else if (val == 0 && (comparison == GTU || comparison == LEU))
1814 comparison = (comparison == GTU) ? NE : EQ;
1815 else if (mode == SImode
1816 && ((val == 0x7fffffff
1817 && (comparison == GTU || comparison == LEU))
1818 || ((unsigned HOST_WIDE_INT) val
1819 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1820 && (comparison == GEU || comparison == LTU))))
1821 {
1822 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1823 operands[2] = CONST0_RTX (mode);
1824 }
1825 }
1826 op1 = operands[1];
1827 if (can_create_pseudo_p ())
1828 operands[1] = force_reg (mode, op1);
1829 /* When we are handling DImode comparisons, we want to keep constants so
1830 that we can optimize the component comparisons; however, memory loads
1831 are better issued as a whole so that they can be scheduled well.
1832 SImode equality comparisons allow I08 constants, but only when they
1833 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1834 into a register, that register might as well be r0, and we allow the
1835 constant. If it is already in a register, this is likely to be
1836 allocated to a different hard register, thus we load the constant into
1837 a register unless it is zero. */
1838 if (!REG_P (operands[2])
1839 && (!CONST_INT_P (operands[2])
1840 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1841 && ((comparison != EQ && comparison != NE)
1842 || (REG_P (op1) && REGNO (op1) != R0_REG)
1843 || !satisfies_constraint_I08 (operands[2])))))
1844 {
1845 if (scratch && GET_MODE (scratch) == mode)
1846 {
1847 emit_move_insn (scratch, operands[2]);
1848 operands[2] = scratch;
1849 }
1850 else if (can_create_pseudo_p ())
1851 operands[2] = force_reg (mode, operands[2]);
1852 }
1853 return comparison;
1854 }
1855
1856 void
1857 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1858 {
1859 rtx (*branch_expander) (rtx) = gen_branch_true;
1860 rtx jump;
1861
1862 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1863 switch (comparison)
1864 {
1865 case NE: case LT: case LE: case LTU: case LEU:
1866 comparison = reverse_condition (comparison);
1867 branch_expander = gen_branch_false;
1868 default: ;
1869 }
1870 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1871 gen_rtx_fmt_ee (comparison, SImode,
1872 operands[1], operands[2])));
1873 jump = emit_jump_insn (branch_expander (operands[3]));
1874 if (probability >= 0)
1875 add_reg_note (jump, REG_BR_PROB, GEN_INT (probability));
1876
1877 }
1878
1879 /* ??? How should we distribute probabilities when more than one branch
1880 is generated. So far we only have some ad-hoc observations:
1881 - If the operands are random, they are likely to differ in both parts.
1882 - If comparing items in a hash chain, the operands are random or equal;
1883 operation should be EQ or NE.
1884 - If items are searched in an ordered tree from the root, we can expect
1885 the highpart to be unequal about half of the time; operation should be
1886 an inequality comparison, operands non-constant, and overall probability
1887 about 50%. Likewise for quicksort.
1888 - Range checks will be often made against constants. Even if we assume for
1889 simplicity an even distribution of the non-constant operand over a
1890 sub-range here, the same probability could be generated with differently
1891 wide sub-ranges - as long as the ratio of the part of the subrange that
1892 is before the threshold to the part that comes after the threshold stays
1893 the same. Thus, we can't really tell anything here;
1894 assuming random distribution is at least simple.
1895 */
1896
1897 bool
1898 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1899 {
1900 enum rtx_code msw_taken, msw_skip, lsw_taken;
1901 rtx skip_label = NULL_RTX;
1902 rtx op1h, op1l, op2h, op2l;
1903 int num_branches;
1904 int prob, rev_prob;
1905 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1906 rtx scratch = operands[4];
1907
1908 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1909 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1910 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1911 op1l = gen_lowpart (SImode, operands[1]);
1912 op2l = gen_lowpart (SImode, operands[2]);
1913 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
1914 prob = split_branch_probability;
1915 rev_prob = REG_BR_PROB_BASE - prob;
1916 switch (comparison)
1917 {
1918 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1919 That costs 1 cycle more when the first branch can be predicted taken,
1920 but saves us mispredicts because only one branch needs prediction.
1921 It also enables generating the cmpeqdi_t-1 pattern. */
1922 case EQ:
1923 if (TARGET_CMPEQDI_T)
1924 {
1925 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1926 emit_jump_insn (gen_branch_true (operands[3]));
1927 return true;
1928 }
1929 msw_skip = NE;
1930 lsw_taken = EQ;
1931 if (prob >= 0)
1932 {
1933 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1934 */
1935 msw_skip_prob = rev_prob;
1936 if (REG_BR_PROB_BASE <= 65535)
1937 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1938 else
1939 {
1940 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1941 lsw_taken_prob
1942 = (prob
1943 ? (REG_BR_PROB_BASE
1944 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1945 / ((HOST_WIDEST_INT) prob << 32)))
1946 : 0);
1947 }
1948 }
1949 break;
1950 case NE:
1951 if (TARGET_CMPEQDI_T)
1952 {
1953 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1954 emit_jump_insn (gen_branch_false (operands[3]));
1955 return true;
1956 }
1957 msw_taken = NE;
1958 msw_taken_prob = prob;
1959 lsw_taken = NE;
1960 lsw_taken_prob = 0;
1961 break;
1962 case GTU: case GT:
1963 msw_taken = comparison;
1964 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1965 break;
1966 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1967 msw_skip = swap_condition (msw_taken);
1968 lsw_taken = GTU;
1969 break;
1970 case GEU: case GE:
1971 if (op2l == CONST0_RTX (SImode))
1972 msw_taken = comparison;
1973 else
1974 {
1975 msw_taken = comparison == GE ? GT : GTU;
1976 msw_skip = swap_condition (msw_taken);
1977 lsw_taken = GEU;
1978 }
1979 break;
1980 case LTU: case LT:
1981 msw_taken = comparison;
1982 if (op2l == CONST0_RTX (SImode))
1983 break;
1984 msw_skip = swap_condition (msw_taken);
1985 lsw_taken = LTU;
1986 break;
1987 case LEU: case LE:
1988 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1989 msw_taken = comparison;
1990 else
1991 {
1992 lsw_taken = LEU;
1993 if (comparison == LE)
1994 msw_taken = LT;
1995 else if (op2h != CONST0_RTX (SImode))
1996 msw_taken = LTU;
1997 else
1998 {
1999 msw_skip = swap_condition (LTU);
2000 break;
2001 }
2002 msw_skip = swap_condition (msw_taken);
2003 }
2004 break;
2005 default: return false;
2006 }
2007 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
2008 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2009 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
2010 if (comparison != EQ && comparison != NE && num_branches > 1)
2011 {
2012 if (!CONSTANT_P (operands[2])
2013 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
2014 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
2015 {
2016 msw_taken_prob = prob / 2U;
2017 msw_skip_prob
2018 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
2019 lsw_taken_prob = prob;
2020 }
2021 else
2022 {
2023 msw_taken_prob = prob;
2024 msw_skip_prob = REG_BR_PROB_BASE;
2025 /* ??? If we have a constant op2h, should we use that when
2026 calculating lsw_taken_prob? */
2027 lsw_taken_prob = prob;
2028 }
2029 }
2030 operands[1] = op1h;
2031 operands[2] = op2h;
2032 operands[4] = NULL_RTX;
2033 if (reload_completed
2034 && ! arith_reg_or_0_operand (op2h, SImode)
2035 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
2036 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
2037 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
2038 {
2039 emit_move_insn (scratch, operands[2]);
2040 operands[2] = scratch;
2041 }
2042 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2043 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2044 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2045 {
2046 rtx taken_label = operands[3];
2047
2048 /* Operands were possibly modified, but msw_skip doesn't expect this.
2049 Always use the original ones. */
2050 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2051 {
2052 operands[1] = op1h;
2053 operands[2] = op2h;
2054 if (reload_completed
2055 && ! arith_reg_or_0_operand (op2h, SImode)
2056 && (true_regnum (op1h) || (comparison != EQ && comparison != NE)))
2057 {
2058 emit_move_insn (scratch, operands[2]);
2059 operands[2] = scratch;
2060 }
2061 }
2062
2063 operands[3] = skip_label = gen_label_rtx ();
2064 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2065 operands[3] = taken_label;
2066 }
2067 operands[1] = op1l;
2068 operands[2] = op2l;
2069 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2070 {
2071 if (reload_completed
2072 && ! arith_reg_or_0_operand (op2l, SImode)
2073 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2074 {
2075 emit_move_insn (scratch, operands[2]);
2076 operands[2] = scratch;
2077 }
2078 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2079 }
2080 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2081 emit_label (skip_label);
2082 return true;
2083 }
2084
2085 /* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */
2086
2087 static void
2088 sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
2089 {
2090 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
2091 {
2092 insn = gen_rtx_PARALLEL (VOIDmode,
2093 gen_rtvec (2, insn,
2094 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
2095 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
2096 }
2097 else
2098 emit_insn (insn);
2099 }
2100
2101 /* Prepare the operands for an scc instruction; make sure that the
2102 compare has been done and the result is in T_REG. */
2103 void
2104 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2105 {
2106 rtx t_reg = gen_rtx_REG (SImode, T_REG);
2107 enum rtx_code oldcode = code;
2108 enum machine_mode mode;
2109
2110 /* First need a compare insn. */
2111 switch (code)
2112 {
2113 case NE:
2114 /* It isn't possible to handle this case. */
2115 gcc_unreachable ();
2116 case LT:
2117 code = GT;
2118 break;
2119 case LE:
2120 code = GE;
2121 break;
2122 case LTU:
2123 code = GTU;
2124 break;
2125 case LEU:
2126 code = GEU;
2127 break;
2128 default:
2129 break;
2130 }
2131 if (code != oldcode)
2132 {
2133 rtx tmp = op0;
2134 op0 = op1;
2135 op1 = tmp;
2136 }
2137
2138 mode = GET_MODE (op0);
2139 if (mode == VOIDmode)
2140 mode = GET_MODE (op1);
2141
2142 op0 = force_reg (mode, op0);
2143 if ((code != EQ && code != NE
2144 && (op1 != const0_rtx
2145 || code == GTU || code == GEU || code == LTU || code == LEU))
2146 || (mode == DImode && op1 != const0_rtx)
2147 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2148 op1 = force_reg (mode, op1);
2149
2150 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2151 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2152 mode);
2153 }
2154
2155 rtx
2156 sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
2157 rtx op0, rtx op1)
2158 {
2159 rtx target = gen_reg_rtx (SImode);
2160 rtx tmp;
2161
2162 gcc_assert (TARGET_SHMEDIA);
2163 switch (code)
2164 {
2165 case EQ:
2166 case GT:
2167 case LT:
2168 case UNORDERED:
2169 case GTU:
2170 case LTU:
2171 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2172 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2173 code = NE;
2174 break;
2175
2176 case NE:
2177 case GE:
2178 case LE:
2179 case ORDERED:
2180 case GEU:
2181 case LEU:
2182 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2183 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2184 code = EQ;
2185 break;
2186
2187 case UNEQ:
2188 case UNGE:
2189 case UNGT:
2190 case UNLE:
2191 case UNLT:
2192 case LTGT:
2193 return NULL_RTX;
2194
2195 default:
2196 gcc_unreachable ();
2197 }
2198
2199 if (mode == DImode)
2200 {
2201 rtx t2 = gen_reg_rtx (DImode);
2202 emit_insn (gen_extendsidi2 (t2, target));
2203 target = t2;
2204 }
2205
2206 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2207 }
2208
2209 /* Called from the md file, set up the operands of a compare instruction. */
2210
2211 void
2212 sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
2213 {
2214 enum rtx_code code = GET_CODE (operands[0]);
2215 enum rtx_code branch_code;
2216 rtx op0 = operands[1];
2217 rtx op1 = operands[2];
2218 rtx insn, tem;
2219 bool need_ccmpeq = false;
2220
2221 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2222 {
2223 op0 = force_reg (mode, op0);
2224 op1 = force_reg (mode, op1);
2225 }
2226 else
2227 {
2228 if (code != EQ || mode == DImode)
2229 {
2230 /* Force args into regs, since we can't use constants here. */
2231 op0 = force_reg (mode, op0);
2232 if (op1 != const0_rtx || code == GTU || code == GEU)
2233 op1 = force_reg (mode, op1);
2234 }
2235 }
2236
2237 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2238 {
2239 if (code == LT
2240 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2241 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2242 {
2243 tem = op0, op0 = op1, op1 = tem;
2244 code = swap_condition (code);
2245 }
2246
2247 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2248 if (code == GE)
2249 {
2250 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2251 need_ccmpeq = true;
2252 code = GT;
2253 }
2254
2255 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2256 to EQ/GT respectively. */
2257 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2258 }
2259
2260 switch (code)
2261 {
2262 case EQ:
2263 case GT:
2264 case GE:
2265 case GTU:
2266 case GEU:
2267 branch_code = code;
2268 break;
2269 case NE:
2270 case LT:
2271 case LE:
2272 case LTU:
2273 case LEU:
2274 branch_code = reverse_condition (code);
2275 break;
2276 default:
2277 gcc_unreachable ();
2278 }
2279
2280 insn = gen_rtx_SET (VOIDmode,
2281 gen_rtx_REG (SImode, T_REG),
2282 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2283
2284 sh_emit_set_t_insn (insn, mode);
2285 if (need_ccmpeq)
2286 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2287
2288 if (branch_code == code)
2289 emit_jump_insn (gen_branch_true (operands[3]));
2290 else
2291 emit_jump_insn (gen_branch_false (operands[3]));
2292 }
2293
2294 void
2295 sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
2296 {
2297 enum rtx_code code = GET_CODE (operands[1]);
2298 rtx op0 = operands[2];
2299 rtx op1 = operands[3];
2300 rtx lab = NULL_RTX;
2301 bool invert = false;
2302 rtx tem;
2303
2304 op0 = force_reg (mode, op0);
2305 if ((code != EQ && code != NE
2306 && (op1 != const0_rtx
2307 || code == GTU || code == GEU || code == LTU || code == LEU))
2308 || (mode == DImode && op1 != const0_rtx)
2309 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2310 op1 = force_reg (mode, op1);
2311
2312 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2313 {
2314 if (code == LT || code == LE)
2315 {
2316 code = swap_condition (code);
2317 tem = op0, op0 = op1, op1 = tem;
2318 }
2319 if (code == GE)
2320 {
2321 if (TARGET_IEEE)
2322 {
2323 lab = gen_label_rtx ();
2324 sh_emit_scc_to_t (EQ, op0, op1);
2325 emit_jump_insn (gen_branch_true (lab));
2326 code = GT;
2327 }
2328 else
2329 {
2330 code = LT;
2331 invert = true;
2332 }
2333 }
2334 }
2335
2336 if (code == NE)
2337 {
2338 code = EQ;
2339 invert = true;
2340 }
2341
2342 sh_emit_scc_to_t (code, op0, op1);
2343 if (lab)
2344 emit_label (lab);
2345 if (invert)
2346 emit_insn (gen_movnegt (operands[0]));
2347 else
2348 emit_move_insn (operands[0], gen_rtx_REG (SImode, T_REG));
2349 }
2350 \f
2351 /* Functions to output assembly code. */
2352
2353 /* Return a sequence of instructions to perform DI or DF move.
2354
2355 Since the SH cannot move a DI or DF in one instruction, we have
2356 to take care when we see overlapping source and dest registers. */
2357
2358 const char *
2359 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2360 enum machine_mode mode)
2361 {
2362 rtx dst = operands[0];
2363 rtx src = operands[1];
2364
2365 if (MEM_P (dst)
2366 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2367 return "mov.l %T1,%0\n\tmov.l %1,%0";
2368
2369 if (register_operand (dst, mode)
2370 && register_operand (src, mode))
2371 {
2372 if (REGNO (src) == MACH_REG)
2373 return "sts mach,%S0\n\tsts macl,%R0";
2374
2375 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2376 when mov.d r1,r0 do r1->r0 then r2->r1. */
2377
2378 if (REGNO (src) + 1 == REGNO (dst))
2379 return "mov %T1,%T0\n\tmov %1,%0";
2380 else
2381 return "mov %1,%0\n\tmov %T1,%T0";
2382 }
2383 else if (CONST_INT_P (src))
2384 {
2385 if (INTVAL (src) < 0)
2386 output_asm_insn ("mov #-1,%S0", operands);
2387 else
2388 output_asm_insn ("mov #0,%S0", operands);
2389
2390 return "mov %1,%R0";
2391 }
2392 else if (MEM_P (src))
2393 {
2394 int ptrreg = -1;
2395 int dreg = REGNO (dst);
2396 rtx inside = XEXP (src, 0);
2397
2398 switch (GET_CODE (inside))
2399 {
2400 case REG:
2401 ptrreg = REGNO (inside);
2402 break;
2403
2404 case SUBREG:
2405 ptrreg = subreg_regno (inside);
2406 break;
2407
2408 case PLUS:
2409 ptrreg = REGNO (XEXP (inside, 0));
2410 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2411 an offsettable address. Unfortunately, offsettable addresses use
2412 QImode to check the offset, and a QImode offsettable address
2413 requires r0 for the other operand, which is not currently
2414 supported, so we can't use the 'o' constraint.
2415 Thus we must check for and handle r0+REG addresses here.
2416 We punt for now, since this is likely very rare. */
2417 gcc_assert (!REG_P (XEXP (inside, 1)));
2418 break;
2419
2420 case LABEL_REF:
2421 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
2422 case POST_INC:
2423 return "mov.l %1,%0\n\tmov.l %1,%T0";
2424 default:
2425 gcc_unreachable ();
2426 }
2427
2428 /* Work out the safe way to copy. Copy into the second half first. */
2429 if (dreg == ptrreg)
2430 return "mov.l %T1,%T0\n\tmov.l %1,%0";
2431 }
2432
2433 return "mov.l %1,%0\n\tmov.l %T1,%T0";
2434 }
2435
2436 /* Print an instruction which would have gone into a delay slot after
2437 another instruction, but couldn't because the other instruction expanded
2438 into a sequence where putting the slot insn at the end wouldn't work. */
2439
2440 static void
2441 print_slot (rtx insn)
2442 {
2443 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
2444
2445 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
2446 }
2447
2448 const char *
2449 output_far_jump (rtx insn, rtx op)
2450 {
2451 struct { rtx lab, reg, op; } this_jmp;
2452 rtx braf_base_lab = NULL_RTX;
2453 const char *jump;
2454 int far;
2455 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2456 rtx prev;
2457
2458 this_jmp.lab = gen_label_rtx ();
2459
2460 if (TARGET_SH2
2461 && offset >= -32764
2462 && offset - get_attr_length (insn) <= 32766)
2463 {
2464 far = 0;
2465 jump = "mov.w %O0,%1; braf %1";
2466 }
2467 else
2468 {
2469 far = 1;
2470 if (flag_pic)
2471 {
2472 if (TARGET_SH2)
2473 jump = "mov.l %O0,%1; braf %1";
2474 else
2475 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
2476 }
2477 else
2478 jump = "mov.l %O0,%1; jmp @%1";
2479 }
2480 /* If we have a scratch register available, use it. */
2481 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2482 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2483 {
2484 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2485 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2486 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
2487 output_asm_insn (jump, &this_jmp.lab);
2488 if (dbr_sequence_length ())
2489 print_slot (final_sequence);
2490 else
2491 output_asm_insn ("nop", 0);
2492 }
2493 else
2494 {
2495 /* Output the delay slot insn first if any. */
2496 if (dbr_sequence_length ())
2497 print_slot (final_sequence);
2498
2499 this_jmp.reg = gen_rtx_REG (SImode, 13);
2500 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2501 Fortunately, MACL is fixed and call-clobbered, and we never
2502 need its value across jumps, so save r13 in it instead of in
2503 the stack. */
2504 if (TARGET_SH5)
2505 output_asm_insn ("lds r13, macl", 0);
2506 else
2507 output_asm_insn ("mov.l r13,@-r15", 0);
2508 output_asm_insn (jump, &this_jmp.lab);
2509 if (TARGET_SH5)
2510 output_asm_insn ("sts macl, r13", 0);
2511 else
2512 output_asm_insn ("mov.l @r15+,r13", 0);
2513 }
2514 if (far && flag_pic && TARGET_SH2)
2515 {
2516 braf_base_lab = gen_label_rtx ();
2517 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2518 CODE_LABEL_NUMBER (braf_base_lab));
2519 }
2520 if (far)
2521 output_asm_insn (".align 2", 0);
2522 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2523 this_jmp.op = op;
2524 if (far && flag_pic)
2525 {
2526 if (TARGET_SH2)
2527 this_jmp.lab = braf_base_lab;
2528 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2529 }
2530 else
2531 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2532 return "";
2533 }
2534
2535 /* Local label counter, used for constants in the pool and inside
2536 pattern branches. */
2537
2538 static int lf = 100;
2539
2540 /* Output code for ordinary branches. */
2541
2542 const char *
2543 output_branch (int logic, rtx insn, rtx *operands)
2544 {
2545 switch (get_attr_length (insn))
2546 {
2547 case 6:
2548 /* This can happen if filling the delay slot has caused a forward
2549 branch to exceed its range (we could reverse it, but only
2550 when we know we won't overextend other branches; this should
2551 best be handled by relaxation).
2552 It can also happen when other condbranches hoist delay slot insn
2553 from their destination, thus leading to code size increase.
2554 But the branch will still be in the range -4092..+4098 bytes. */
2555
2556 if (! TARGET_RELAX)
2557 {
2558 int label = lf++;
2559 /* The call to print_slot will clobber the operands. */
2560 rtx op0 = operands[0];
2561
2562 /* If the instruction in the delay slot is annulled (true), then
2563 there is no delay slot where we can put it now. The only safe
2564 place for it is after the label. final will do that by default. */
2565
2566 if (final_sequence
2567 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2568 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2569 {
2570 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2571 ASSEMBLER_DIALECT ? "/" : ".", label);
2572 print_slot (final_sequence);
2573 }
2574 else
2575 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2576
2577 output_asm_insn ("bra\t%l0", &op0);
2578 fprintf (asm_out_file, "\tnop\n");
2579 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2580
2581 return "";
2582 }
2583 /* When relaxing, handle this like a short branch. The linker
2584 will fix it up if it still doesn't fit after relaxation. */
2585 case 2:
2586 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2587
2588 /* These are for SH2e, in which we have to account for the
2589 extra nop because of the hardware bug in annulled branches. */
2590 case 8:
2591 if (! TARGET_RELAX)
2592 {
2593 int label = lf++;
2594
2595 gcc_assert (!final_sequence
2596 || !(INSN_ANNULLED_BRANCH_P
2597 (XVECEXP (final_sequence, 0, 0))));
2598 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2599 logic ? "f" : "t",
2600 ASSEMBLER_DIALECT ? "/" : ".", label);
2601 fprintf (asm_out_file, "\tnop\n");
2602 output_asm_insn ("bra\t%l0", operands);
2603 fprintf (asm_out_file, "\tnop\n");
2604 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2605
2606 return "";
2607 }
2608 /* When relaxing, fall through. */
2609 case 4:
2610 {
2611 char buffer[10];
2612
2613 sprintf (buffer, "b%s%ss\t%%l0",
2614 logic ? "t" : "f",
2615 ASSEMBLER_DIALECT ? "/" : ".");
2616 output_asm_insn (buffer, &operands[0]);
2617 return "nop";
2618 }
2619
2620 default:
2621 /* There should be no longer branches now - that would
2622 indicate that something has destroyed the branches set
2623 up in machine_dependent_reorg. */
2624 gcc_unreachable ();
2625 }
2626 }
2627
2628 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2629 fill in operands 9 as a label to the successor insn.
2630 We try to use jump threading where possible.
2631 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2632 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2633 follow jmp and bt, if the address is in range. */
2634 const char *
2635 output_branchy_insn (enum rtx_code code, const char *templ,
2636 rtx insn, rtx *operands)
2637 {
2638 rtx next_insn = NEXT_INSN (insn);
2639
2640 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2641 {
2642 rtx src = SET_SRC (PATTERN (next_insn));
2643 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2644 {
2645 /* Following branch not taken */
2646 operands[9] = gen_label_rtx ();
2647 emit_label_after (operands[9], next_insn);
2648 INSN_ADDRESSES_NEW (operands[9],
2649 INSN_ADDRESSES (INSN_UID (next_insn))
2650 + get_attr_length (next_insn));
2651 return templ;
2652 }
2653 else
2654 {
2655 int offset = (branch_dest (next_insn)
2656 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2657 if (offset >= -252 && offset <= 258)
2658 {
2659 if (GET_CODE (src) == IF_THEN_ELSE)
2660 /* branch_true */
2661 src = XEXP (src, 1);
2662 operands[9] = src;
2663 return templ;
2664 }
2665 }
2666 }
2667 operands[9] = gen_label_rtx ();
2668 emit_label_after (operands[9], insn);
2669 INSN_ADDRESSES_NEW (operands[9],
2670 INSN_ADDRESSES (INSN_UID (insn))
2671 + get_attr_length (insn));
2672 return templ;
2673 }
2674
2675 const char *
2676 output_ieee_ccmpeq (rtx insn, rtx *operands)
2677 {
2678 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2679 insn, operands);
2680 }
2681 \f
2682 /* Output the start of the assembler file. */
2683
2684 static void
2685 sh_file_start (void)
2686 {
2687 default_file_start ();
2688
2689 if (TARGET_ELF)
2690 /* We need to show the text section with the proper
2691 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2692 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2693 will complain. We can teach GAS specifically about the
2694 default attributes for our choice of text section, but
2695 then we would have to change GAS again if/when we change
2696 the text section name. */
2697 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2698 else
2699 /* Switch to the data section so that the coffsem symbol
2700 isn't in the text section. */
2701 switch_to_section (data_section);
2702
2703 if (TARGET_LITTLE_ENDIAN)
2704 fputs ("\t.little\n", asm_out_file);
2705
2706 if (!TARGET_ELF)
2707 {
2708 if (TARGET_SHCOMPACT)
2709 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2710 else if (TARGET_SHMEDIA)
2711 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2712 TARGET_SHMEDIA64 ? 64 : 32);
2713 }
2714 }
2715 \f
2716 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2717
2718 static bool
2719 unspec_caller_rtx_p (rtx pat)
2720 {
2721 rtx base, offset;
2722 int i;
2723
2724 split_const (pat, &base, &offset);
2725 if (GET_CODE (base) == UNSPEC)
2726 {
2727 if (XINT (base, 1) == UNSPEC_CALLER)
2728 return true;
2729 for (i = 0; i < XVECLEN (base, 0); i++)
2730 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2731 return true;
2732 }
2733 return false;
2734 }
2735
2736 /* Indicate that INSN cannot be duplicated. This is true for insn
2737 that generates a unique label. */
2738
2739 static bool
2740 sh_cannot_copy_insn_p (rtx insn)
2741 {
2742 rtx pat;
2743
2744 if (!reload_completed || !flag_pic)
2745 return false;
2746
2747 if (!NONJUMP_INSN_P (insn))
2748 return false;
2749 if (asm_noperands (insn) >= 0)
2750 return false;
2751
2752 pat = PATTERN (insn);
2753 if (GET_CODE (pat) != SET)
2754 return false;
2755 pat = SET_SRC (pat);
2756
2757 if (unspec_caller_rtx_p (pat))
2758 return true;
2759
2760 return false;
2761 }
2762 \f
2763 /* Actual number of instructions used to make a shift by N. */
2764 static const char ashiftrt_insns[] =
2765 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2766
2767 /* Left shift and logical right shift are the same. */
2768 static const char shift_insns[] =
2769 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2770
2771 /* Individual shift amounts needed to get the above length sequences.
2772 One bit right shifts clobber the T bit, so when possible, put one bit
2773 shifts in the middle of the sequence, so the ends are eligible for
2774 branch delay slots. */
2775 static const short shift_amounts[32][5] = {
2776 {0}, {1}, {2}, {2, 1},
2777 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2778 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2779 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2780 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2781 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2782 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2783 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2784
2785 /* Likewise, but for shift amounts < 16, up to three highmost bits
2786 might be clobbered. This is typically used when combined with some
2787 kind of sign or zero extension. */
2788
2789 static const char ext_shift_insns[] =
2790 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2791
2792 static const short ext_shift_amounts[32][4] = {
2793 {0}, {1}, {2}, {2, 1},
2794 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2795 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2796 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2797 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2798 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2799 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2800 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2801
2802 /* Assuming we have a value that has been sign-extended by at least one bit,
2803 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2804 to shift it by N without data loss, and quicker than by other means? */
2805 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2806
2807 /* This is used in length attributes in sh.md to help compute the length
2808 of arbitrary constant shift instructions. */
2809
2810 int
2811 shift_insns_rtx (rtx insn)
2812 {
2813 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2814 int shift_count = INTVAL (XEXP (set_src, 1)) & 31;
2815 enum rtx_code shift_code = GET_CODE (set_src);
2816
2817 switch (shift_code)
2818 {
2819 case ASHIFTRT:
2820 return ashiftrt_insns[shift_count];
2821 case LSHIFTRT:
2822 case ASHIFT:
2823 return shift_insns[shift_count];
2824 default:
2825 gcc_unreachable ();
2826 }
2827 }
2828
2829 /* Return the cost of a shift. */
2830
2831 static inline int
2832 shiftcosts (rtx x)
2833 {
2834 int value;
2835
2836 /* There is no pattern for constant first operand. */
2837 if (CONST_INT_P (XEXP (x, 0)))
2838 return MAX_COST;
2839
2840 if (TARGET_SHMEDIA)
2841 return COSTS_N_INSNS (1);
2842
2843 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2844 {
2845 if (GET_MODE (x) == DImode
2846 && CONST_INT_P (XEXP (x, 1))
2847 && INTVAL (XEXP (x, 1)) == 1)
2848 return COSTS_N_INSNS (2);
2849
2850 /* Everything else is invalid, because there is no pattern for it. */
2851 return MAX_COST;
2852 }
2853 /* If shift by a non constant, then this will be expensive. */
2854 if (!CONST_INT_P (XEXP (x, 1)))
2855 return COSTS_N_INSNS (SH_DYNAMIC_SHIFT_COST);
2856
2857 /* Otherwise, return the true cost in instructions. Cope with out of range
2858 shift counts more or less arbitrarily. */
2859 value = INTVAL (XEXP (x, 1)) & 31;
2860
2861 if (GET_CODE (x) == ASHIFTRT)
2862 {
2863 int cost = ashiftrt_insns[value];
2864 /* If SH3, then we put the constant in a reg and use shad. */
2865 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2866 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2867 return COSTS_N_INSNS (cost);
2868 }
2869 else
2870 return COSTS_N_INSNS (shift_insns[value]);
2871 }
2872
2873 /* Return the cost of an AND/XOR/IOR operation. */
2874
2875 static inline int
2876 and_xor_ior_costs (rtx x, int code)
2877 {
2878 int i;
2879
2880 /* A logical operation with two registers is a single cycle
2881 instruction. */
2882 if (!CONST_INT_P (XEXP (x, 1)))
2883 return 1;
2884
2885 i = INTVAL (XEXP (x, 1));
2886
2887 if (TARGET_SHMEDIA)
2888 {
2889 if (satisfies_constraint_I10 (XEXP (x, 1))
2890 || satisfies_constraint_J16 (XEXP (x, 1)))
2891 return 1;
2892 else
2893 return 1 + rtx_cost (XEXP (x, 1), AND, 1, !optimize_size);
2894 }
2895
2896 /* These constants are single cycle extu.[bw] instructions. */
2897 if ((i == 0xff || i == 0xffff) && code == AND)
2898 return 1;
2899 /* Constants that can be used in an instruction as an immediate are
2900 a single cycle, but this requires r0, so make it a little more
2901 expensive. */
2902 if (CONST_OK_FOR_K08 (i))
2903 return 2;
2904 /* Constants that can be loaded with a mov immediate need one more cycle.
2905 This case is probably unnecessary. */
2906 if (CONST_OK_FOR_I08 (i))
2907 return 2;
2908 /* Any other constant requires an additional 2 cycle pc-relative load.
2909 This case is probably unnecessary. */
2910 return 3;
2911 }
2912
2913 /* Return the cost of an addition or a subtraction. */
2914
2915 static inline int
2916 addsubcosts (rtx x)
2917 {
2918 /* Adding a register is a single cycle insn. */
2919 if (REG_P (XEXP (x, 1))
2920 || GET_CODE (XEXP (x, 1)) == SUBREG)
2921 return 1;
2922
2923 /* Likewise for small constants. */
2924 if (CONST_INT_P (XEXP (x, 1))
2925 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2926 return 1;
2927
2928 if (TARGET_SHMEDIA)
2929 switch (GET_CODE (XEXP (x, 1)))
2930 {
2931 case CONST:
2932 case LABEL_REF:
2933 case SYMBOL_REF:
2934 return TARGET_SHMEDIA64 ? 5 : 3;
2935
2936 case CONST_INT:
2937 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2938 return 2;
2939 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2940 return 3;
2941 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2942 return 4;
2943
2944 /* Fall through. */
2945 default:
2946 return 5;
2947 }
2948
2949 /* Any other constant requires a 2 cycle pc-relative load plus an
2950 addition. */
2951 return 3;
2952 }
2953
2954 /* Return the cost of a multiply. */
2955 static inline int
2956 multcosts (rtx x ATTRIBUTE_UNUSED)
2957 {
2958 if (sh_multcost >= 0)
2959 return sh_multcost;
2960 if (TARGET_SHMEDIA)
2961 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2962 accept constants. Ideally, we would use a cost of one or two and
2963 add the cost of the operand, but disregard the latter when inside loops
2964 and loop invariant code motion is still to follow.
2965 Using a multiply first and splitting it later if it's a loss
2966 doesn't work because of different sign / zero extension semantics
2967 of multiplies vs. shifts. */
2968 return optimize_size ? 2 : 3;
2969
2970 if (TARGET_SH2)
2971 {
2972 /* We have a mul insn, so we can never take more than the mul and the
2973 read of the mac reg, but count more because of the latency and extra
2974 reg usage. */
2975 if (optimize_size)
2976 return 2;
2977 return 3;
2978 }
2979
2980 /* If we're aiming at small code, then just count the number of
2981 insns in a multiply call sequence. */
2982 if (optimize_size)
2983 return 5;
2984
2985 /* Otherwise count all the insns in the routine we'd be calling too. */
2986 return 20;
2987 }
2988
2989 /* Compute a (partial) cost for rtx X. Return true if the complete
2990 cost has been computed, and false if subexpressions should be
2991 scanned. In either case, *TOTAL contains the cost result. */
2992
2993 static bool
2994 sh_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
2995 int *total, bool speed ATTRIBUTE_UNUSED)
2996 {
2997 switch (code)
2998 {
2999 case CONST_INT:
3000 if (TARGET_SHMEDIA)
3001 {
3002 if (INTVAL (x) == 0)
3003 *total = 0;
3004 else if (outer_code == AND && and_operand ((x), DImode))
3005 *total = 0;
3006 else if ((outer_code == IOR || outer_code == XOR
3007 || outer_code == PLUS)
3008 && CONST_OK_FOR_I10 (INTVAL (x)))
3009 *total = 0;
3010 else if (CONST_OK_FOR_I16 (INTVAL (x)))
3011 *total = COSTS_N_INSNS (outer_code != SET);
3012 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
3013 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
3014 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
3015 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
3016 else
3017 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
3018 return true;
3019 }
3020 if (CONST_OK_FOR_I08 (INTVAL (x)))
3021 *total = 0;
3022 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
3023 && CONST_OK_FOR_K08 (INTVAL (x)))
3024 *total = 1;
3025 /* prepare_cmp_insn will force costly constants int registers before
3026 the cbranch[sd]i4 patterns can see them, so preserve potentially
3027 interesting ones not covered by I08 above. */
3028 else if (outer_code == COMPARE
3029 && ((unsigned HOST_WIDE_INT) INTVAL (x)
3030 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3031 || INTVAL (x) == 0x7fffffff
3032 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3033 *total = 1;
3034 else
3035 *total = 8;
3036 return true;
3037
3038 case EQ:
3039 /* An and with a constant compared against zero is
3040 most likely going to be a TST #imm, R0 instruction.
3041 Notice that this does not catch the zero_extract variants from
3042 the md file. */
3043 if (GET_CODE (XEXP (x, 0)) == AND
3044 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 0)
3045 {
3046 *total = 1;
3047 return true;
3048 }
3049 else
3050 return false;
3051
3052 case CONST:
3053 case LABEL_REF:
3054 case SYMBOL_REF:
3055 if (TARGET_SHMEDIA64)
3056 *total = COSTS_N_INSNS (4);
3057 else if (TARGET_SHMEDIA32)
3058 *total = COSTS_N_INSNS (2);
3059 else
3060 *total = 5;
3061 return true;
3062
3063 case CONST_DOUBLE:
3064 if (TARGET_SHMEDIA)
3065 *total = COSTS_N_INSNS (4);
3066 /* prepare_cmp_insn will force costly constants int registers before
3067 the cbranchdi4 pattern can see them, so preserve potentially
3068 interesting ones. */
3069 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
3070 *total = 1;
3071 else
3072 *total = 10;
3073 return true;
3074 case CONST_VECTOR:
3075 if (x == CONST0_RTX (GET_MODE (x)))
3076 *total = 0;
3077 else if (sh_1el_vec (x, VOIDmode))
3078 *total = outer_code != SET;
3079 if (sh_rep_vec (x, VOIDmode))
3080 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3081 + (outer_code != SET));
3082 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3083 return true;
3084
3085 case PLUS:
3086 case MINUS:
3087 *total = COSTS_N_INSNS (addsubcosts (x));
3088 return true;
3089
3090 case AND:
3091 case XOR:
3092 case IOR:
3093 *total = COSTS_N_INSNS (and_xor_ior_costs (x, code));
3094 return true;
3095
3096 case MULT:
3097 *total = COSTS_N_INSNS (multcosts (x));
3098 return true;
3099
3100 case ASHIFT:
3101 case ASHIFTRT:
3102 case LSHIFTRT:
3103 *total = shiftcosts (x);
3104 return true;
3105
3106 case DIV:
3107 case UDIV:
3108 case MOD:
3109 case UMOD:
3110 *total = COSTS_N_INSNS (20);
3111 return true;
3112
3113 case PARALLEL:
3114 if (sh_1el_vec (x, VOIDmode))
3115 *total = outer_code != SET;
3116 if (sh_rep_vec (x, VOIDmode))
3117 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3118 + (outer_code != SET));
3119 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3120 return true;
3121
3122 case FLOAT:
3123 case FIX:
3124 *total = 100;
3125 return true;
3126
3127 default:
3128 return false;
3129 }
3130 }
3131
3132 /* Compute the cost of an address. For the SH, all valid addresses are
3133 the same cost. Use a slightly higher cost for reg + reg addressing,
3134 since it increases pressure on r0. */
3135
3136 static int
3137 sh_address_cost (rtx X,
3138 bool speed ATTRIBUTE_UNUSED)
3139 {
3140 /* SH2A supports 4 byte displacement mov insns with higher offsets.
3141 Consider those as more expensive than 2 byte insns. */
3142 if (DISP_ADDR_P (X) && GET_MODE (X) == QImode)
3143 return DISP_ADDR_OFFSET (X) < 16 ? 0 : 1;
3144
3145 return (GET_CODE (X) == PLUS
3146 && ! CONSTANT_P (XEXP (X, 1))
3147 && ! TARGET_SHMEDIA ? 1 : 0);
3148 }
3149
3150 /* Code to expand a shift. */
3151
3152 void
3153 gen_ashift (int type, int n, rtx reg)
3154 {
3155 /* Negative values here come from the shift_amounts array. */
3156 if (n < 0)
3157 {
3158 if (type == ASHIFT)
3159 type = LSHIFTRT;
3160 else
3161 type = ASHIFT;
3162 n = -n;
3163 }
3164
3165 switch (type)
3166 {
3167 case ASHIFTRT:
3168 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
3169 break;
3170 case LSHIFTRT:
3171 if (n == 1)
3172 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
3173 else
3174 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
3175 break;
3176 case ASHIFT:
3177 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
3178 break;
3179 }
3180 }
3181
3182 /* Same for HImode */
3183
3184 void
3185 gen_ashift_hi (int type, int n, rtx reg)
3186 {
3187 /* Negative values here come from the shift_amounts array. */
3188 if (n < 0)
3189 {
3190 if (type == ASHIFT)
3191 type = LSHIFTRT;
3192 else
3193 type = ASHIFT;
3194 n = -n;
3195 }
3196
3197 switch (type)
3198 {
3199 case ASHIFTRT:
3200 case LSHIFTRT:
3201 /* We don't have HImode right shift operations because using the
3202 ordinary 32 bit shift instructions for that doesn't generate proper
3203 zero/sign extension.
3204 gen_ashift_hi is only called in contexts where we know that the
3205 sign extension works out correctly. */
3206 {
3207 int offset = 0;
3208 if (GET_CODE (reg) == SUBREG)
3209 {
3210 offset = SUBREG_BYTE (reg);
3211 reg = SUBREG_REG (reg);
3212 }
3213 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3214 break;
3215 }
3216 case ASHIFT:
3217 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3218 break;
3219 }
3220 }
3221
3222 /* Output RTL to split a constant shift into its component SH constant
3223 shift instructions. */
3224
3225 void
3226 gen_shifty_op (int code, rtx *operands)
3227 {
3228 int value = INTVAL (operands[2]);
3229 int max, i;
3230
3231 /* Truncate the shift count in case it is out of bounds. */
3232 value = value & 31;
3233
3234 if (value == 31)
3235 {
3236 if (code == LSHIFTRT)
3237 {
3238 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3239 emit_insn (gen_movt (operands[0]));
3240 return;
3241 }
3242 else if (code == ASHIFT)
3243 {
3244 /* There is a two instruction sequence for 31 bit left shifts,
3245 but it requires r0. */
3246 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3247 {
3248 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3249 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3250 return;
3251 }
3252 }
3253 }
3254 else if (value == 0)
3255 {
3256 /* This can happen even when optimizing, if there were subregs before
3257 reload. Don't output a nop here, as this is never optimized away;
3258 use a no-op move instead. */
3259 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
3260 return;
3261 }
3262
3263 max = shift_insns[value];
3264 for (i = 0; i < max; i++)
3265 gen_ashift (code, shift_amounts[value][i], operands[0]);
3266 }
3267
3268 /* Same as above, but optimized for values where the topmost bits don't
3269 matter. */
3270
3271 void
3272 gen_shifty_hi_op (int code, rtx *operands)
3273 {
3274 int value = INTVAL (operands[2]);
3275 int max, i;
3276 void (*gen_fun) (int, int, rtx);
3277
3278 /* This operation is used by and_shl for SImode values with a few
3279 high bits known to be cleared. */
3280 value &= 31;
3281 if (value == 0)
3282 {
3283 emit_insn (gen_nop ());
3284 return;
3285 }
3286
3287 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3288 if (code == ASHIFT)
3289 {
3290 max = ext_shift_insns[value];
3291 for (i = 0; i < max; i++)
3292 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3293 }
3294 else
3295 /* When shifting right, emit the shifts in reverse order, so that
3296 solitary negative values come first. */
3297 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
3298 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3299 }
3300
3301 /* Output RTL for an arithmetic right shift. */
3302
3303 /* ??? Rewrite to use super-optimizer sequences. */
3304
3305 int
3306 expand_ashiftrt (rtx *operands)
3307 {
3308 rtx wrk;
3309 char func[18];
3310 int value;
3311
3312 if (TARGET_SH3 || TARGET_SH2A)
3313 {
3314 if (!CONST_INT_P (operands[2]))
3315 {
3316 rtx count = copy_to_mode_reg (SImode, operands[2]);
3317 emit_insn (gen_negsi2 (count, count));
3318 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3319 return 1;
3320 }
3321 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3322 > 1 + SH_DYNAMIC_SHIFT_COST)
3323 {
3324 rtx count
3325 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3326 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3327 return 1;
3328 }
3329 }
3330 if (!CONST_INT_P (operands[2]))
3331 return 0;
3332
3333 value = INTVAL (operands[2]) & 31;
3334
3335 if (value == 31)
3336 {
3337 /* If we are called from abs expansion, arrange things so that we
3338 we can use a single MT instruction that doesn't clobber the source,
3339 if LICM can hoist out the load of the constant zero. */
3340 if (currently_expanding_to_rtl)
3341 {
3342 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3343 operands[1]));
3344 emit_insn (gen_mov_neg_si_t (operands[0]));
3345 return 1;
3346 }
3347 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3348 return 1;
3349 }
3350 else if (value >= 16 && value <= 19)
3351 {
3352 wrk = gen_reg_rtx (SImode);
3353 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3354 value -= 16;
3355 while (value--)
3356 gen_ashift (ASHIFTRT, 1, wrk);
3357 emit_move_insn (operands[0], wrk);
3358 return 1;
3359 }
3360 /* Expand a short sequence inline, longer call a magic routine. */
3361 else if (value <= 5)
3362 {
3363 wrk = gen_reg_rtx (SImode);
3364 emit_move_insn (wrk, operands[1]);
3365 while (value--)
3366 gen_ashift (ASHIFTRT, 1, wrk);
3367 emit_move_insn (operands[0], wrk);
3368 return 1;
3369 }
3370
3371 wrk = gen_reg_rtx (Pmode);
3372
3373 /* Load the value into an arg reg and call a helper. */
3374 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3375 sprintf (func, "__ashiftrt_r4_%d", value);
3376 function_symbol (wrk, func, SFUNC_STATIC);
3377 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
3378 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3379 return 1;
3380 }
3381
3382 int
3383 sh_dynamicalize_shift_p (rtx count)
3384 {
3385 return shift_insns[INTVAL (count) & 31] > 1 + SH_DYNAMIC_SHIFT_COST;
3386 }
3387
3388 /* Try to find a good way to implement the combiner pattern
3389 [(set (match_operand:SI 0 "register_operand" "r")
3390 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3391 (match_operand:SI 2 "const_int_operand" "n"))
3392 (match_operand:SI 3 "const_int_operand" "n"))) .
3393 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3394 return 0 for simple right / left or left/right shift combination.
3395 return 1 for a combination of shifts with zero_extend.
3396 return 2 for a combination of shifts with an AND that needs r0.
3397 return 3 for a combination of shifts with an AND that needs an extra
3398 scratch register, when the three highmost bits of the AND mask are clear.
3399 return 4 for a combination of shifts with an AND that needs an extra
3400 scratch register, when any of the three highmost bits of the AND mask
3401 is set.
3402 If ATTRP is set, store an initial right shift width in ATTRP[0],
3403 and the instruction length in ATTRP[1] . These values are not valid
3404 when returning 0.
3405 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3406 shift_amounts for the last shift value that is to be used before the
3407 sign extend. */
3408 int
3409 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3410 {
3411 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3412 int left = INTVAL (left_rtx), right;
3413 int best = 0;
3414 int cost, best_cost = 10000;
3415 int best_right = 0, best_len = 0;
3416 int i;
3417 int can_ext;
3418
3419 if (left < 0 || left > 31)
3420 return 0;
3421 if (CONST_INT_P (mask_rtx))
3422 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3423 else
3424 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3425 /* Can this be expressed as a right shift / left shift pair? */
3426 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3427 right = exact_log2 (lsb);
3428 mask2 = ~(mask + lsb - 1);
3429 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3430 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3431 if (! mask2)
3432 best_cost = shift_insns[right] + shift_insns[right + left];
3433 /* mask has no trailing zeroes <==> ! right */
3434 else if (! right && mask2 == ~(lsb2 - 1))
3435 {
3436 int late_right = exact_log2 (lsb2);
3437 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
3438 }
3439 /* Try to use zero extend. */
3440 if (mask2 == ~(lsb2 - 1))
3441 {
3442 int width, first;
3443
3444 for (width = 8; width <= 16; width += 8)
3445 {
3446 /* Can we zero-extend right away? */
3447 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3448 {
3449 cost
3450 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
3451 if (cost < best_cost)
3452 {
3453 best = 1;
3454 best_cost = cost;
3455 best_right = right;
3456 best_len = cost;
3457 if (attrp)
3458 attrp[2] = -1;
3459 }
3460 continue;
3461 }
3462 /* ??? Could try to put zero extend into initial right shift,
3463 or even shift a bit left before the right shift. */
3464 /* Determine value of first part of left shift, to get to the
3465 zero extend cut-off point. */
3466 first = width - exact_log2 (lsb2) + right;
3467 if (first >= 0 && right + left - first >= 0)
3468 {
3469 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
3470 + ext_shift_insns[right + left - first];
3471 if (cost < best_cost)
3472 {
3473 best = 1;
3474 best_cost = cost;
3475 best_right = right;
3476 best_len = cost;
3477 if (attrp)
3478 attrp[2] = first;
3479 }
3480 }
3481 }
3482 }
3483 /* Try to use r0 AND pattern */
3484 for (i = 0; i <= 2; i++)
3485 {
3486 if (i > right)
3487 break;
3488 if (! CONST_OK_FOR_K08 (mask >> i))
3489 continue;
3490 cost = (i != 0) + 2 + ext_shift_insns[left + i];
3491 if (cost < best_cost)
3492 {
3493 best = 2;
3494 best_cost = cost;
3495 best_right = i;
3496 best_len = cost - 1;
3497 }
3498 }
3499 /* Try to use a scratch register to hold the AND operand. */
3500 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
3501 for (i = 0; i <= 2; i++)
3502 {
3503 if (i > right)
3504 break;
3505 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
3506 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
3507 if (cost < best_cost)
3508 {
3509 best = 4 - can_ext;
3510 best_cost = cost;
3511 best_right = i;
3512 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
3513 }
3514 }
3515
3516 if (attrp)
3517 {
3518 attrp[0] = best_right;
3519 attrp[1] = best_len;
3520 }
3521 return best;
3522 }
3523
3524 /* This is used in length attributes of the unnamed instructions
3525 corresponding to shl_and_kind return values of 1 and 2. */
3526 int
3527 shl_and_length (rtx insn)
3528 {
3529 rtx set_src, left_rtx, mask_rtx;
3530 int attributes[3];
3531
3532 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3533 left_rtx = XEXP (XEXP (set_src, 0), 1);
3534 mask_rtx = XEXP (set_src, 1);
3535 shl_and_kind (left_rtx, mask_rtx, attributes);
3536 return attributes[1];
3537 }
3538
3539 /* This is used in length attribute of the and_shl_scratch instruction. */
3540
3541 int
3542 shl_and_scr_length (rtx insn)
3543 {
3544 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3545 int len = shift_insns[INTVAL (XEXP (set_src, 1)) & 31];
3546 rtx op = XEXP (set_src, 0);
3547 len += shift_insns[INTVAL (XEXP (op, 1)) & 31] + 1;
3548 op = XEXP (XEXP (op, 0), 0);
3549 return len + shift_insns[INTVAL (XEXP (op, 1)) & 31];
3550 }
3551
3552 /* Generate rtl for instructions for which shl_and_kind advised a particular
3553 method of generating them, i.e. returned zero. */
3554
3555 int
3556 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
3557 {
3558 int attributes[3];
3559 unsigned HOST_WIDE_INT mask;
3560 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
3561 int right, total_shift;
3562 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
3563
3564 right = attributes[0];
3565 total_shift = INTVAL (left_rtx) + right;
3566 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
3567 switch (kind)
3568 {
3569 default:
3570 return -1;
3571 case 1:
3572 {
3573 int first = attributes[2];
3574 rtx operands[3];
3575
3576 if (first < 0)
3577 {
3578 emit_insn ((mask << right) <= 0xff
3579 ? gen_zero_extendqisi2 (dest,
3580 gen_lowpart (QImode, source))
3581 : gen_zero_extendhisi2 (dest,
3582 gen_lowpart (HImode, source)));
3583 source = dest;
3584 }
3585 if (source != dest)
3586 emit_insn (gen_movsi (dest, source));
3587 operands[0] = dest;
3588 if (right)
3589 {
3590 operands[2] = GEN_INT (right);
3591 gen_shifty_hi_op (LSHIFTRT, operands);
3592 }
3593 if (first > 0)
3594 {
3595 operands[2] = GEN_INT (first);
3596 gen_shifty_hi_op (ASHIFT, operands);
3597 total_shift -= first;
3598 mask <<= first;
3599 }
3600 if (first >= 0)
3601 emit_insn (mask <= 0xff
3602 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
3603 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3604 if (total_shift > 0)
3605 {
3606 operands[2] = GEN_INT (total_shift);
3607 gen_shifty_hi_op (ASHIFT, operands);
3608 }
3609 break;
3610 }
3611 case 4:
3612 shift_gen_fun = gen_shifty_op;
3613 case 3:
3614 /* If the topmost bit that matters is set, set the topmost bits
3615 that don't matter. This way, we might be able to get a shorter
3616 signed constant. */
3617 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
3618 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
3619 case 2:
3620 /* Don't expand fine-grained when combining, because that will
3621 make the pattern fail. */
3622 if (currently_expanding_to_rtl
3623 || reload_in_progress || reload_completed)
3624 {
3625 rtx operands[3];
3626
3627 /* Cases 3 and 4 should be handled by this split
3628 only while combining */
3629 gcc_assert (kind <= 2);
3630 if (right)
3631 {
3632 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
3633 source = dest;
3634 }
3635 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3636 if (total_shift)
3637 {
3638 operands[0] = dest;
3639 operands[1] = dest;
3640 operands[2] = GEN_INT (total_shift);
3641 shift_gen_fun (ASHIFT, operands);
3642 }
3643 break;
3644 }
3645 else
3646 {
3647 int neg = 0;
3648 if (kind != 4 && total_shift < 16)
3649 {
3650 neg = -ext_shift_amounts[total_shift][1];
3651 if (neg > 0)
3652 neg -= ext_shift_amounts[total_shift][2];
3653 else
3654 neg = 0;
3655 }
3656 emit_insn (gen_and_shl_scratch (dest, source,
3657 GEN_INT (right),
3658 GEN_INT (mask),
3659 GEN_INT (total_shift + neg),
3660 GEN_INT (neg)));
3661 emit_insn (gen_movsi (dest, dest));
3662 break;
3663 }
3664 }
3665 return 0;
3666 }
3667
3668 /* Try to find a good way to implement the combiner pattern
3669 [(set (match_operand:SI 0 "register_operand" "=r")
3670 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3671 (match_operand:SI 2 "const_int_operand" "n")
3672 (match_operand:SI 3 "const_int_operand" "n")
3673 (const_int 0)))
3674 (clobber (reg:SI T_REG))]
3675 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3676 return 0 for simple left / right shift combination.
3677 return 1 for left shift / 8 bit sign extend / left shift.
3678 return 2 for left shift / 16 bit sign extend / left shift.
3679 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3680 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3681 return 5 for left shift / 16 bit sign extend / right shift
3682 return 6 for < 8 bit sign extend / left shift.
3683 return 7 for < 8 bit sign extend / left shift / single right shift.
3684 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3685
3686 int
3687 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3688 {
3689 int left, size, insize, ext;
3690 int cost = 0, best_cost;
3691 int kind;
3692
3693 left = INTVAL (left_rtx);
3694 size = INTVAL (size_rtx);
3695 insize = size - left;
3696 gcc_assert (insize > 0);
3697 /* Default to left / right shift. */
3698 kind = 0;
3699 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3700 if (size <= 16)
3701 {
3702 /* 16 bit shift / sign extend / 16 bit shift */
3703 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3704 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3705 below, by alternative 3 or something even better. */
3706 if (cost < best_cost)
3707 {
3708 kind = 5;
3709 best_cost = cost;
3710 }
3711 }
3712 /* Try a plain sign extend between two shifts. */
3713 for (ext = 16; ext >= insize; ext -= 8)
3714 {
3715 if (ext <= size)
3716 {
3717 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3718 if (cost < best_cost)
3719 {
3720 kind = ext / (unsigned) 8;
3721 best_cost = cost;
3722 }
3723 }
3724 /* Check if we can do a sloppy shift with a final signed shift
3725 restoring the sign. */
3726 if (EXT_SHIFT_SIGNED (size - ext))
3727 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3728 /* If not, maybe it's still cheaper to do the second shift sloppy,
3729 and do a final sign extend? */
3730 else if (size <= 16)
3731 cost = ext_shift_insns[ext - insize] + 1
3732 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3733 else
3734 continue;
3735 if (cost < best_cost)
3736 {
3737 kind = ext / (unsigned) 8 + 2;
3738 best_cost = cost;
3739 }
3740 }
3741 /* Check if we can sign extend in r0 */
3742 if (insize < 8)
3743 {
3744 cost = 3 + shift_insns[left];
3745 if (cost < best_cost)
3746 {
3747 kind = 6;
3748 best_cost = cost;
3749 }
3750 /* Try the same with a final signed shift. */
3751 if (left < 31)
3752 {
3753 cost = 3 + ext_shift_insns[left + 1] + 1;
3754 if (cost < best_cost)
3755 {
3756 kind = 7;
3757 best_cost = cost;
3758 }
3759 }
3760 }
3761 if (TARGET_SH3 || TARGET_SH2A)
3762 {
3763 /* Try to use a dynamic shift. */
3764 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3765 if (cost < best_cost)
3766 {
3767 kind = 0;
3768 best_cost = cost;
3769 }
3770 }
3771 if (costp)
3772 *costp = cost;
3773 return kind;
3774 }
3775
3776 /* Function to be used in the length attribute of the instructions
3777 implementing this pattern. */
3778
3779 int
3780 shl_sext_length (rtx insn)
3781 {
3782 rtx set_src, left_rtx, size_rtx;
3783 int cost;
3784
3785 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3786 left_rtx = XEXP (XEXP (set_src, 0), 1);
3787 size_rtx = XEXP (set_src, 1);
3788 shl_sext_kind (left_rtx, size_rtx, &cost);
3789 return cost;
3790 }
3791
3792 /* Generate rtl for this pattern */
3793
3794 int
3795 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3796 {
3797 int kind;
3798 int left, size, insize, cost;
3799 rtx operands[3];
3800
3801 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3802 left = INTVAL (left_rtx);
3803 size = INTVAL (size_rtx);
3804 insize = size - left;
3805 switch (kind)
3806 {
3807 case 1:
3808 case 2:
3809 case 3:
3810 case 4:
3811 {
3812 int ext = kind & 1 ? 8 : 16;
3813 int shift2 = size - ext;
3814
3815 /* Don't expand fine-grained when combining, because that will
3816 make the pattern fail. */
3817 if (! currently_expanding_to_rtl
3818 && ! reload_in_progress && ! reload_completed)
3819 {
3820 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3821 emit_insn (gen_movsi (dest, source));
3822 break;
3823 }
3824 if (dest != source)
3825 emit_insn (gen_movsi (dest, source));
3826 operands[0] = dest;
3827 if (ext - insize)
3828 {
3829 operands[2] = GEN_INT (ext - insize);
3830 gen_shifty_hi_op (ASHIFT, operands);
3831 }
3832 emit_insn (kind & 1
3833 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3834 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3835 if (kind <= 2)
3836 {
3837 if (shift2)
3838 {
3839 operands[2] = GEN_INT (shift2);
3840 gen_shifty_op (ASHIFT, operands);
3841 }
3842 }
3843 else
3844 {
3845 if (shift2 > 0)
3846 {
3847 if (EXT_SHIFT_SIGNED (shift2))
3848 {
3849 operands[2] = GEN_INT (shift2 + 1);
3850 gen_shifty_op (ASHIFT, operands);
3851 operands[2] = const1_rtx;
3852 gen_shifty_op (ASHIFTRT, operands);
3853 break;
3854 }
3855 operands[2] = GEN_INT (shift2);
3856 gen_shifty_hi_op (ASHIFT, operands);
3857 }
3858 else if (shift2)
3859 {
3860 operands[2] = GEN_INT (-shift2);
3861 gen_shifty_hi_op (LSHIFTRT, operands);
3862 }
3863 emit_insn (size <= 8
3864 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3865 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3866 }
3867 break;
3868 }
3869 case 5:
3870 {
3871 int i = 16 - size;
3872 if (! currently_expanding_to_rtl
3873 && ! reload_in_progress && ! reload_completed)
3874 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3875 else
3876 {
3877 operands[0] = dest;
3878 operands[2] = GEN_INT (16 - insize);
3879 gen_shifty_hi_op (ASHIFT, operands);
3880 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3881 }
3882 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3883 while (--i >= 0)
3884 gen_ashift (ASHIFTRT, 1, dest);
3885 break;
3886 }
3887 case 6:
3888 case 7:
3889 /* Don't expand fine-grained when combining, because that will
3890 make the pattern fail. */
3891 if (! currently_expanding_to_rtl
3892 && ! reload_in_progress && ! reload_completed)
3893 {
3894 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3895 emit_insn (gen_movsi (dest, source));
3896 break;
3897 }
3898 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
3899 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
3900 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
3901 operands[0] = dest;
3902 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
3903 gen_shifty_op (ASHIFT, operands);
3904 if (kind == 7)
3905 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
3906 break;
3907 default:
3908 return -1;
3909 }
3910 return 0;
3911 }
3912
3913 /* Prefix a symbol_ref name with "datalabel". */
3914
3915 rtx
3916 gen_datalabel_ref (rtx sym)
3917 {
3918 const char *str;
3919
3920 if (GET_CODE (sym) == LABEL_REF)
3921 return gen_rtx_CONST (GET_MODE (sym),
3922 gen_rtx_UNSPEC (GET_MODE (sym),
3923 gen_rtvec (1, sym),
3924 UNSPEC_DATALABEL));
3925
3926 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
3927
3928 str = XSTR (sym, 0);
3929 /* Share all SYMBOL_REF strings with the same value - that is important
3930 for cse. */
3931 str = IDENTIFIER_POINTER (get_identifier (str));
3932 XSTR (sym, 0) = str;
3933
3934 return sym;
3935 }
3936
3937 \f
3938 static alloc_pool label_ref_list_pool;
3939
3940 typedef struct label_ref_list_d
3941 {
3942 rtx label;
3943 struct label_ref_list_d *next;
3944 } *label_ref_list_t;
3945
3946 /* The SH cannot load a large constant into a register, constants have to
3947 come from a pc relative load. The reference of a pc relative load
3948 instruction must be less than 1k in front of the instruction. This
3949 means that we often have to dump a constant inside a function, and
3950 generate code to branch around it.
3951
3952 It is important to minimize this, since the branches will slow things
3953 down and make things bigger.
3954
3955 Worst case code looks like:
3956
3957 mov.l L1,rn
3958 bra L2
3959 nop
3960 align
3961 L1: .long value
3962 L2:
3963 ..
3964
3965 mov.l L3,rn
3966 bra L4
3967 nop
3968 align
3969 L3: .long value
3970 L4:
3971 ..
3972
3973 We fix this by performing a scan before scheduling, which notices which
3974 instructions need to have their operands fetched from the constant table
3975 and builds the table.
3976
3977 The algorithm is:
3978
3979 scan, find an instruction which needs a pcrel move. Look forward, find the
3980 last barrier which is within MAX_COUNT bytes of the requirement.
3981 If there isn't one, make one. Process all the instructions between
3982 the find and the barrier.
3983
3984 In the above example, we can tell that L3 is within 1k of L1, so
3985 the first move can be shrunk from the 3 insn+constant sequence into
3986 just 1 insn, and the constant moved to L3 to make:
3987
3988 mov.l L1,rn
3989 ..
3990 mov.l L3,rn
3991 bra L4
3992 nop
3993 align
3994 L3:.long value
3995 L4:.long value
3996
3997 Then the second move becomes the target for the shortening process. */
3998
3999 typedef struct
4000 {
4001 rtx value; /* Value in table. */
4002 rtx label; /* Label of value. */
4003 label_ref_list_t wend; /* End of window. */
4004 enum machine_mode mode; /* Mode of value. */
4005
4006 /* True if this constant is accessed as part of a post-increment
4007 sequence. Note that HImode constants are never accessed in this way. */
4008 bool part_of_sequence_p;
4009 } pool_node;
4010
4011 /* The maximum number of constants that can fit into one pool, since
4012 constants in the range 0..510 are at least 2 bytes long, and in the
4013 range from there to 1018 at least 4 bytes. */
4014
4015 #define MAX_POOL_SIZE 372
4016 static pool_node pool_vector[MAX_POOL_SIZE];
4017 static int pool_size;
4018 static rtx pool_window_label;
4019 static int pool_window_last;
4020
4021 static int max_labelno_before_reorg;
4022
4023 /* ??? If we need a constant in HImode which is the truncated value of a
4024 constant we need in SImode, we could combine the two entries thus saving
4025 two bytes. Is this common enough to be worth the effort of implementing
4026 it? */
4027
4028 /* ??? This stuff should be done at the same time that we shorten branches.
4029 As it is now, we must assume that all branches are the maximum size, and
4030 this causes us to almost always output constant pools sooner than
4031 necessary. */
4032
4033 /* Add a constant to the pool and return its label. */
4034
4035 static rtx
4036 add_constant (rtx x, enum machine_mode mode, rtx last_value)
4037 {
4038 int i;
4039 rtx lab, new_rtx;
4040 label_ref_list_t ref, newref;
4041
4042 /* First see if we've already got it. */
4043 for (i = 0; i < pool_size; i++)
4044 {
4045 if (x->code == pool_vector[i].value->code
4046 && mode == pool_vector[i].mode)
4047 {
4048 if (x->code == CODE_LABEL)
4049 {
4050 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4051 continue;
4052 }
4053 if (rtx_equal_p (x, pool_vector[i].value))
4054 {
4055 lab = new_rtx = 0;
4056 if (! last_value
4057 || ! i
4058 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4059 {
4060 new_rtx = gen_label_rtx ();
4061 LABEL_REFS (new_rtx) = pool_vector[i].label;
4062 pool_vector[i].label = lab = new_rtx;
4063 }
4064 if (lab && pool_window_label)
4065 {
4066 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4067 newref->label = pool_window_label;
4068 ref = pool_vector[pool_window_last].wend;
4069 newref->next = ref;
4070 pool_vector[pool_window_last].wend = newref;
4071 }
4072 if (new_rtx)
4073 pool_window_label = new_rtx;
4074 pool_window_last = i;
4075 return lab;
4076 }
4077 }
4078 }
4079
4080 /* Need a new one. */
4081 pool_vector[pool_size].value = x;
4082 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4083 {
4084 lab = 0;
4085 pool_vector[pool_size - 1].part_of_sequence_p = true;
4086 }
4087 else
4088 lab = gen_label_rtx ();
4089 pool_vector[pool_size].mode = mode;
4090 pool_vector[pool_size].label = lab;
4091 pool_vector[pool_size].wend = NULL;
4092 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4093 if (lab && pool_window_label)
4094 {
4095 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4096 newref->label = pool_window_label;
4097 ref = pool_vector[pool_window_last].wend;
4098 newref->next = ref;
4099 pool_vector[pool_window_last].wend = newref;
4100 }
4101 if (lab)
4102 pool_window_label = lab;
4103 pool_window_last = pool_size;
4104 pool_size++;
4105 return lab;
4106 }
4107
4108 /* Output the literal table. START, if nonzero, is the first instruction
4109 this table is needed for, and also indicates that there is at least one
4110 casesi_worker_2 instruction; We have to emit the operand3 labels from
4111 these insns at a 4-byte aligned position. BARRIER is the barrier
4112 after which we are to place the table. */
4113
4114 static void
4115 dump_table (rtx start, rtx barrier)
4116 {
4117 rtx scan = barrier;
4118 int i;
4119 int need_align = 1;
4120 rtx lab;
4121 label_ref_list_t ref;
4122 int have_df = 0;
4123
4124 /* Do two passes, first time dump out the HI sized constants. */
4125
4126 for (i = 0; i < pool_size; i++)
4127 {
4128 pool_node *p = &pool_vector[i];
4129
4130 if (p->mode == HImode)
4131 {
4132 if (need_align)
4133 {
4134 scan = emit_insn_after (gen_align_2 (), scan);
4135 need_align = 0;
4136 }
4137 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4138 scan = emit_label_after (lab, scan);
4139 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4140 scan);
4141 for (ref = p->wend; ref; ref = ref->next)
4142 {
4143 lab = ref->label;
4144 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4145 }
4146 }
4147 else if (p->mode == DFmode)
4148 have_df = 1;
4149 }
4150
4151 need_align = 1;
4152
4153 if (start)
4154 {
4155 scan = emit_insn_after (gen_align_4 (), scan);
4156 need_align = 0;
4157 for (; start != barrier; start = NEXT_INSN (start))
4158 if (NONJUMP_INSN_P (start)
4159 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4160 {
4161 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4162 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4163
4164 scan = emit_label_after (lab, scan);
4165 }
4166 }
4167 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4168 {
4169 rtx align_insn = NULL_RTX;
4170
4171 scan = emit_label_after (gen_label_rtx (), scan);
4172 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4173 need_align = 0;
4174
4175 for (i = 0; i < pool_size; i++)
4176 {
4177 pool_node *p = &pool_vector[i];
4178
4179 switch (p->mode)
4180 {
4181 case HImode:
4182 break;
4183 case SImode:
4184 case SFmode:
4185 if (align_insn && !p->part_of_sequence_p)
4186 {
4187 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4188 emit_label_before (lab, align_insn);
4189 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4190 align_insn);
4191 for (ref = p->wend; ref; ref = ref->next)
4192 {
4193 lab = ref->label;
4194 emit_insn_before (gen_consttable_window_end (lab),
4195 align_insn);
4196 }
4197 delete_insn (align_insn);
4198 align_insn = NULL_RTX;
4199 continue;
4200 }
4201 else
4202 {
4203 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4204 scan = emit_label_after (lab, scan);
4205 scan = emit_insn_after (gen_consttable_4 (p->value,
4206 const0_rtx), scan);
4207 need_align = ! need_align;
4208 }
4209 break;
4210 case DFmode:
4211 if (need_align)
4212 {
4213 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4214 align_insn = scan;
4215 need_align = 0;
4216 }
4217 case DImode:
4218 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4219 scan = emit_label_after (lab, scan);
4220 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4221 scan);
4222 break;
4223 default:
4224 gcc_unreachable ();
4225 }
4226
4227 if (p->mode != HImode)
4228 {
4229 for (ref = p->wend; ref; ref = ref->next)
4230 {
4231 lab = ref->label;
4232 scan = emit_insn_after (gen_consttable_window_end (lab),
4233 scan);
4234 }
4235 }
4236 }
4237
4238 pool_size = 0;
4239 }
4240
4241 for (i = 0; i < pool_size; i++)
4242 {
4243 pool_node *p = &pool_vector[i];
4244
4245 switch (p->mode)
4246 {
4247 case HImode:
4248 break;
4249 case SImode:
4250 case SFmode:
4251 if (need_align)
4252 {
4253 need_align = 0;
4254 scan = emit_label_after (gen_label_rtx (), scan);
4255 scan = emit_insn_after (gen_align_4 (), scan);
4256 }
4257 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4258 scan = emit_label_after (lab, scan);
4259 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4260 scan);
4261 break;
4262 case DFmode:
4263 case DImode:
4264 if (need_align)
4265 {
4266 need_align = 0;
4267 scan = emit_label_after (gen_label_rtx (), scan);
4268 scan = emit_insn_after (gen_align_4 (), scan);
4269 }
4270 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4271 scan = emit_label_after (lab, scan);
4272 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4273 scan);
4274 break;
4275 default:
4276 gcc_unreachable ();
4277 }
4278
4279 if (p->mode != HImode)
4280 {
4281 for (ref = p->wend; ref; ref = ref->next)
4282 {
4283 lab = ref->label;
4284 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4285 }
4286 }
4287 }
4288
4289 scan = emit_insn_after (gen_consttable_end (), scan);
4290 scan = emit_barrier_after (scan);
4291 pool_size = 0;
4292 pool_window_label = NULL_RTX;
4293 pool_window_last = 0;
4294 }
4295
4296 /* Return nonzero if constant would be an ok source for a
4297 mov.w instead of a mov.l. */
4298
4299 static int
4300 hi_const (rtx src)
4301 {
4302 return (CONST_INT_P (src)
4303 && INTVAL (src) >= -32768
4304 && INTVAL (src) <= 32767);
4305 }
4306
4307 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4308
4309 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4310
4311 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4312 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4313 need to fix it if the input value is CONST_OK_FOR_I08. */
4314
4315 static int
4316 broken_move (rtx insn)
4317 {
4318 if (NONJUMP_INSN_P (insn))
4319 {
4320 rtx pat = PATTERN (insn);
4321 if (GET_CODE (pat) == PARALLEL)
4322 pat = XVECEXP (pat, 0, 0);
4323 if (GET_CODE (pat) == SET
4324 /* We can load any 8-bit value if we don't care what the high
4325 order bits end up as. */
4326 && GET_MODE (SET_DEST (pat)) != QImode
4327 && (CONSTANT_P (SET_SRC (pat))
4328 /* Match mova_const. */
4329 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4330 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4331 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4332 && ! (TARGET_SH2E
4333 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4334 && (fp_zero_operand (SET_SRC (pat))
4335 || fp_one_operand (SET_SRC (pat)))
4336 /* In general we don't know the current setting of fpscr, so disable fldi.
4337 There is an exception if this was a register-register move
4338 before reload - and hence it was ascertained that we have
4339 single precision setting - and in a post-reload optimization
4340 we changed this to do a constant load. In that case
4341 we don't have an r0 clobber, hence we must use fldi. */
4342 && (TARGET_FMOVD
4343 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4344 == SCRATCH))
4345 && REG_P (SET_DEST (pat))
4346 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4347 && ! (TARGET_SH2A
4348 && GET_MODE (SET_DEST (pat)) == SImode
4349 && (satisfies_constraint_I20 (SET_SRC (pat))
4350 || satisfies_constraint_I28 (SET_SRC (pat))))
4351 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4352 return 1;
4353 }
4354
4355 return 0;
4356 }
4357
4358 static int
4359 mova_p (rtx insn)
4360 {
4361 return (NONJUMP_INSN_P (insn)
4362 && GET_CODE (PATTERN (insn)) == SET
4363 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4364 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4365 /* Don't match mova_const. */
4366 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4367 }
4368
4369 /* Fix up a mova from a switch that went out of range. */
4370 static void
4371 fixup_mova (rtx mova)
4372 {
4373 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4374 if (! flag_pic)
4375 {
4376 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4377 INSN_CODE (mova) = -1;
4378 }
4379 else
4380 {
4381 rtx worker = mova;
4382 rtx lab = gen_label_rtx ();
4383 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4384
4385 do
4386 {
4387 worker = NEXT_INSN (worker);
4388 gcc_assert (worker
4389 && !LABEL_P (worker)
4390 && !JUMP_P (worker));
4391 } while (NOTE_P (worker)
4392 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4393 wpat = PATTERN (worker);
4394 wpat0 = XVECEXP (wpat, 0, 0);
4395 wpat1 = XVECEXP (wpat, 0, 1);
4396 wsrc = SET_SRC (wpat0);
4397 PATTERN (worker) = (gen_casesi_worker_2
4398 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4399 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4400 XEXP (wpat1, 0)));
4401 INSN_CODE (worker) = -1;
4402 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4403 base = gen_rtx_LABEL_REF (Pmode, lab);
4404 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4405 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4406 INSN_CODE (mova) = -1;
4407 }
4408 }
4409
4410 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4411 *num_mova, and check if the new mova is not nested within the first one.
4412 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4413 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4414 static int
4415 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
4416 {
4417 int n_addr = 0; /* Initialization to shut up spurious warning. */
4418 int f_target, n_target = 0; /* Likewise. */
4419
4420 if (optimize)
4421 {
4422 /* If NEW_MOVA has no address yet, it will be handled later. */
4423 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4424 return -1;
4425
4426 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4427 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4428 if (n_addr > n_target || n_addr + 1022 < n_target)
4429 {
4430 /* Change the mova into a load.
4431 broken_move will then return true for it. */
4432 fixup_mova (new_mova);
4433 return 1;
4434 }
4435 }
4436 if (!(*num_mova)++)
4437 {
4438 *first_mova = new_mova;
4439 return 2;
4440 }
4441 if (!optimize
4442 || ((f_target
4443 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4444 >= n_target))
4445 return -1;
4446
4447 (*num_mova)--;
4448 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4449 > n_target - n_addr)
4450 {
4451 fixup_mova (*first_mova);
4452 return 0;
4453 }
4454 else
4455 {
4456 fixup_mova (new_mova);
4457 return 1;
4458 }
4459 }
4460
4461 /* Find the last barrier from insn FROM which is close enough to hold the
4462 constant pool. If we can't find one, then create one near the end of
4463 the range. */
4464
4465 static rtx
4466 find_barrier (int num_mova, rtx mova, rtx from)
4467 {
4468 int count_si = 0;
4469 int count_hi = 0;
4470 int found_hi = 0;
4471 int found_si = 0;
4472 int found_di = 0;
4473 int hi_align = 2;
4474 int si_align = 2;
4475 int leading_mova = num_mova;
4476 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
4477 int si_limit;
4478 int hi_limit;
4479 rtx orig = from;
4480 rtx last_got = NULL_RTX;
4481 rtx last_symoff = NULL_RTX;
4482
4483 /* For HImode: range is 510, add 4 because pc counts from address of
4484 second instruction after this one, subtract 2 for the jump instruction
4485 that we may need to emit before the table, subtract 2 for the instruction
4486 that fills the jump delay slot (in very rare cases, reorg will take an
4487 instruction from after the constant pool or will leave the delay slot
4488 empty). This gives 510.
4489 For SImode: range is 1020, add 4 because pc counts from address of
4490 second instruction after this one, subtract 2 in case pc is 2 byte
4491 aligned, subtract 2 for the jump instruction that we may need to emit
4492 before the table, subtract 2 for the instruction that fills the jump
4493 delay slot. This gives 1018. */
4494
4495 /* The branch will always be shortened now that the reference address for
4496 forward branches is the successor address, thus we need no longer make
4497 adjustments to the [sh]i_limit for -O0. */
4498
4499 si_limit = 1018;
4500 hi_limit = 510;
4501
4502 while (from && count_si < si_limit && count_hi < hi_limit)
4503 {
4504 int inc = get_attr_length (from);
4505 int new_align = 1;
4506
4507 /* If this is a label that existed at the time of the compute_alignments
4508 call, determine the alignment. N.B. When find_barrier recurses for
4509 an out-of-reach mova, we might see labels at the start of previously
4510 inserted constant tables. */
4511 if (LABEL_P (from)
4512 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
4513 {
4514 if (optimize)
4515 new_align = 1 << label_to_alignment (from);
4516 else if (BARRIER_P (prev_nonnote_insn (from)))
4517 new_align = 1 << barrier_align (from);
4518 else
4519 new_align = 1;
4520 inc = 0;
4521 }
4522 /* In case we are scanning a constant table because of recursion, check
4523 for explicit alignments. If the table is long, we might be forced
4524 to emit the new table in front of it; the length of the alignment
4525 might be the last straw. */
4526 else if (NONJUMP_INSN_P (from)
4527 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4528 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
4529 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
4530 /* When we find the end of a constant table, paste the new constant
4531 at the end. That is better than putting it in front because
4532 this way, we don't need extra alignment for adding a 4-byte-aligned
4533 mov(a) label to a 2/4 or 8/4 byte aligned table. */
4534 else if (NONJUMP_INSN_P (from)
4535 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4536 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
4537 return from;
4538
4539 if (BARRIER_P (from))
4540 {
4541 rtx next;
4542
4543 found_barrier = from;
4544
4545 /* If we are at the end of the function, or in front of an alignment
4546 instruction, we need not insert an extra alignment. We prefer
4547 this kind of barrier. */
4548 if (barrier_align (from) > 2)
4549 good_barrier = from;
4550
4551 /* If we are at the end of a hot/cold block, dump the constants
4552 here. */
4553 next = NEXT_INSN (from);
4554 if (next
4555 && NOTE_P (next)
4556 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
4557 break;
4558 }
4559
4560 if (broken_move (from))
4561 {
4562 rtx pat, src, dst;
4563 enum machine_mode mode;
4564
4565 pat = PATTERN (from);
4566 if (GET_CODE (pat) == PARALLEL)
4567 pat = XVECEXP (pat, 0, 0);
4568 src = SET_SRC (pat);
4569 dst = SET_DEST (pat);
4570 mode = GET_MODE (dst);
4571
4572 /* GOT pcrelat setting comes in pair of
4573 mova .L8,r0
4574 mov.l .L8,r12
4575 instructions. (plus add r0,r12).
4576 Remember if we see one without the other. */
4577 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
4578 last_got = last_got ? NULL_RTX : from;
4579 else if (PIC_ADDR_P (src))
4580 last_got = last_got ? NULL_RTX : from;
4581
4582 /* We must explicitly check the mode, because sometimes the
4583 front end will generate code to load unsigned constants into
4584 HImode targets without properly sign extending them. */
4585 if (mode == HImode
4586 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
4587 {
4588 found_hi += 2;
4589 /* We put the short constants before the long constants, so
4590 we must count the length of short constants in the range
4591 for the long constants. */
4592 /* ??? This isn't optimal, but is easy to do. */
4593 si_limit -= 2;
4594 }
4595 else
4596 {
4597 /* We dump DF/DI constants before SF/SI ones, because
4598 the limit is the same, but the alignment requirements
4599 are higher. We may waste up to 4 additional bytes
4600 for alignment, and the DF/DI constant may have
4601 another SF/SI constant placed before it. */
4602 if (TARGET_SHCOMPACT
4603 && ! found_di
4604 && (mode == DFmode || mode == DImode))
4605 {
4606 found_di = 1;
4607 si_limit -= 8;
4608 }
4609 while (si_align > 2 && found_si + si_align - 2 > count_si)
4610 si_align >>= 1;
4611 if (found_si > count_si)
4612 count_si = found_si;
4613 found_si += GET_MODE_SIZE (mode);
4614 if (num_mova)
4615 si_limit -= GET_MODE_SIZE (mode);
4616 }
4617 }
4618
4619 if (mova_p (from))
4620 {
4621 switch (untangle_mova (&num_mova, &mova, from))
4622 {
4623 case 1:
4624 if (flag_pic)
4625 {
4626 rtx src = SET_SRC (PATTERN (from));
4627 if (GET_CODE (src) == CONST
4628 && GET_CODE (XEXP (src, 0)) == UNSPEC
4629 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
4630 last_symoff = from;
4631 }
4632 break;
4633 case 0: return find_barrier (0, 0, mova);
4634 case 2:
4635 {
4636 leading_mova = 0;
4637 barrier_before_mova
4638 = good_barrier ? good_barrier : found_barrier;
4639 }
4640 default: break;
4641 }
4642 if (found_si > count_si)
4643 count_si = found_si;
4644 }
4645 else if (JUMP_TABLE_DATA_P (from))
4646 {
4647 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
4648 || (num_mova
4649 && (prev_nonnote_insn (from)
4650 == XEXP (MOVA_LABELREF (mova), 0))))
4651 num_mova--;
4652 if (barrier_align (next_real_insn (from)) == align_jumps_log)
4653 {
4654 /* We have just passed the barrier in front of the
4655 ADDR_DIFF_VEC, which is stored in found_barrier. Since
4656 the ADDR_DIFF_VEC is accessed as data, just like our pool
4657 constants, this is a good opportunity to accommodate what
4658 we have gathered so far.
4659 If we waited any longer, we could end up at a barrier in
4660 front of code, which gives worse cache usage for separated
4661 instruction / data caches. */
4662 good_barrier = found_barrier;
4663 break;
4664 }
4665 else
4666 {
4667 rtx body = PATTERN (from);
4668 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
4669 }
4670 }
4671 /* For the SH1, we generate alignments even after jumps-around-jumps. */
4672 else if (JUMP_P (from)
4673 && ! TARGET_SH2
4674 && ! optimize_size)
4675 new_align = 4;
4676
4677 /* There is a possibility that a bf is transformed into a bf/s by the
4678 delay slot scheduler. */
4679 if (JUMP_P (from) && !JUMP_TABLE_DATA_P (from)
4680 && get_attr_type (from) == TYPE_CBRANCH
4681 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (from)))) != SEQUENCE)
4682 inc += 2;
4683
4684 if (found_si)
4685 {
4686 count_si += inc;
4687 if (new_align > si_align)
4688 {
4689 si_limit -= (count_si - 1) & (new_align - si_align);
4690 si_align = new_align;
4691 }
4692 count_si = (count_si + new_align - 1) & -new_align;
4693 }
4694 if (found_hi)
4695 {
4696 count_hi += inc;
4697 if (new_align > hi_align)
4698 {
4699 hi_limit -= (count_hi - 1) & (new_align - hi_align);
4700 hi_align = new_align;
4701 }
4702 count_hi = (count_hi + new_align - 1) & -new_align;
4703 }
4704 from = NEXT_INSN (from);
4705 }
4706
4707 if (num_mova)
4708 {
4709 if (leading_mova)
4710 {
4711 /* Try as we might, the leading mova is out of range. Change
4712 it into a load (which will become a pcload) and retry. */
4713 fixup_mova (mova);
4714 return find_barrier (0, 0, mova);
4715 }
4716 else
4717 {
4718 /* Insert the constant pool table before the mova instruction,
4719 to prevent the mova label reference from going out of range. */
4720 from = mova;
4721 good_barrier = found_barrier = barrier_before_mova;
4722 }
4723 }
4724
4725 if (found_barrier)
4726 {
4727 if (good_barrier && next_real_insn (found_barrier))
4728 found_barrier = good_barrier;
4729 }
4730 else
4731 {
4732 /* We didn't find a barrier in time to dump our stuff,
4733 so we'll make one. */
4734 rtx label = gen_label_rtx ();
4735
4736 /* Don't emit a constant table in the middle of insns for
4737 casesi_worker_2. This is a bit overkill but is enough
4738 because casesi_worker_2 wouldn't appear so frequently. */
4739 if (last_symoff)
4740 from = last_symoff;
4741
4742 /* If we exceeded the range, then we must back up over the last
4743 instruction we looked at. Otherwise, we just need to undo the
4744 NEXT_INSN at the end of the loop. */
4745 if (PREV_INSN (from) != orig
4746 && (count_hi > hi_limit || count_si > si_limit))
4747 from = PREV_INSN (PREV_INSN (from));
4748 else
4749 from = PREV_INSN (from);
4750
4751 /* Don't emit a constant table int the middle of global pointer setting,
4752 since that that would move the addressing base GOT into another table.
4753 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
4754 in the pool anyway, so just move up the whole constant pool. */
4755 if (last_got)
4756 from = PREV_INSN (last_got);
4757
4758 /* Don't insert the constant pool table at the position which
4759 may be the landing pad. */
4760 if (flag_exceptions
4761 && CALL_P (from)
4762 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
4763 from = PREV_INSN (from);
4764
4765 /* Walk back to be just before any jump or label.
4766 Putting it before a label reduces the number of times the branch
4767 around the constant pool table will be hit. Putting it before
4768 a jump makes it more likely that the bra delay slot will be
4769 filled. */
4770 while (NOTE_P (from) || JUMP_P (from)
4771 || LABEL_P (from))
4772 from = PREV_INSN (from);
4773
4774 /* Make sure we do not split between a call and its corresponding
4775 CALL_ARG_LOCATION note. */
4776 if (CALL_P (from))
4777 {
4778 rtx next = NEXT_INSN (from);
4779 if (next && NOTE_P (next)
4780 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
4781 from = next;
4782 }
4783
4784 from = emit_jump_insn_after (gen_jump (label), from);
4785 JUMP_LABEL (from) = label;
4786 LABEL_NUSES (label) = 1;
4787 found_barrier = emit_barrier_after (from);
4788 emit_label_after (label, found_barrier);
4789 }
4790
4791 return found_barrier;
4792 }
4793
4794 /* If the instruction INSN is implemented by a special function, and we can
4795 positively find the register that is used to call the sfunc, and this
4796 register is not used anywhere else in this instruction - except as the
4797 destination of a set, return this register; else, return 0. */
4798 rtx
4799 sfunc_uses_reg (rtx insn)
4800 {
4801 int i;
4802 rtx pattern, part, reg_part, reg;
4803
4804 if (!NONJUMP_INSN_P (insn))
4805 return 0;
4806 pattern = PATTERN (insn);
4807 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
4808 return 0;
4809
4810 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4811 {
4812 part = XVECEXP (pattern, 0, i);
4813 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
4814 reg_part = part;
4815 }
4816 if (! reg_part)
4817 return 0;
4818 reg = XEXP (reg_part, 0);
4819 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
4820 {
4821 part = XVECEXP (pattern, 0, i);
4822 if (part == reg_part || GET_CODE (part) == CLOBBER)
4823 continue;
4824 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
4825 && REG_P (SET_DEST (part)))
4826 ? SET_SRC (part) : part)))
4827 return 0;
4828 }
4829 return reg;
4830 }
4831
4832 /* See if the only way in which INSN uses REG is by calling it, or by
4833 setting it while calling it. Set *SET to a SET rtx if the register
4834 is set by INSN. */
4835
4836 static int
4837 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
4838 {
4839 rtx pattern, reg2;
4840
4841 *set = NULL_RTX;
4842
4843 reg2 = sfunc_uses_reg (insn);
4844 if (reg2 && REGNO (reg2) == REGNO (reg))
4845 {
4846 pattern = single_set (insn);
4847 if (pattern
4848 && REG_P (SET_DEST (pattern))
4849 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4850 *set = pattern;
4851 return 0;
4852 }
4853 if (!CALL_P (insn))
4854 {
4855 /* We don't use rtx_equal_p because we don't care if the mode is
4856 different. */
4857 pattern = single_set (insn);
4858 if (pattern
4859 && REG_P (SET_DEST (pattern))
4860 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4861 {
4862 rtx par, part;
4863 int i;
4864
4865 *set = pattern;
4866 par = PATTERN (insn);
4867 if (GET_CODE (par) == PARALLEL)
4868 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
4869 {
4870 part = XVECEXP (par, 0, i);
4871 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
4872 return 1;
4873 }
4874 return reg_mentioned_p (reg, SET_SRC (pattern));
4875 }
4876
4877 return 1;
4878 }
4879
4880 pattern = PATTERN (insn);
4881
4882 if (GET_CODE (pattern) == PARALLEL)
4883 {
4884 int i;
4885
4886 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4887 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
4888 return 1;
4889 pattern = XVECEXP (pattern, 0, 0);
4890 }
4891
4892 if (GET_CODE (pattern) == SET)
4893 {
4894 if (reg_mentioned_p (reg, SET_DEST (pattern)))
4895 {
4896 /* We don't use rtx_equal_p, because we don't care if the
4897 mode is different. */
4898 if (!REG_P (SET_DEST (pattern))
4899 || REGNO (reg) != REGNO (SET_DEST (pattern)))
4900 return 1;
4901
4902 *set = pattern;
4903 }
4904
4905 pattern = SET_SRC (pattern);
4906 }
4907
4908 if (GET_CODE (pattern) != CALL
4909 || !MEM_P (XEXP (pattern, 0))
4910 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
4911 return 1;
4912
4913 return 0;
4914 }
4915
4916 /* Given a X, a pattern of an insn or a part of it, return a mask of used
4917 general registers. Bits 0..15 mean that the respective registers
4918 are used as inputs in the instruction. Bits 16..31 mean that the
4919 registers 0..15, respectively, are used as outputs, or are clobbered.
4920 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
4921 int
4922 regs_used (rtx x, int is_dest)
4923 {
4924 enum rtx_code code;
4925 const char *fmt;
4926 int i, used = 0;
4927
4928 if (! x)
4929 return used;
4930 code = GET_CODE (x);
4931 switch (code)
4932 {
4933 case REG:
4934 if (REGNO (x) < 16)
4935 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4936 << (REGNO (x) + is_dest));
4937 return 0;
4938 case SUBREG:
4939 {
4940 rtx y = SUBREG_REG (x);
4941
4942 if (!REG_P (y))
4943 break;
4944 if (REGNO (y) < 16)
4945 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4946 << (REGNO (y) +
4947 subreg_regno_offset (REGNO (y),
4948 GET_MODE (y),
4949 SUBREG_BYTE (x),
4950 GET_MODE (x)) + is_dest));
4951 return 0;
4952 }
4953 case SET:
4954 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
4955 case RETURN:
4956 /* If there was a return value, it must have been indicated with USE. */
4957 return 0x00ffff00;
4958 case CLOBBER:
4959 is_dest = 1;
4960 break;
4961 case MEM:
4962 is_dest = 0;
4963 break;
4964 case CALL:
4965 used |= 0x00ff00f0;
4966 break;
4967 default:
4968 break;
4969 }
4970
4971 fmt = GET_RTX_FORMAT (code);
4972
4973 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4974 {
4975 if (fmt[i] == 'E')
4976 {
4977 register int j;
4978 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4979 used |= regs_used (XVECEXP (x, i, j), is_dest);
4980 }
4981 else if (fmt[i] == 'e')
4982 used |= regs_used (XEXP (x, i), is_dest);
4983 }
4984 return used;
4985 }
4986
4987 /* Create an instruction that prevents redirection of a conditional branch
4988 to the destination of the JUMP with address ADDR.
4989 If the branch needs to be implemented as an indirect jump, try to find
4990 a scratch register for it.
4991 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
4992 If any preceding insn that doesn't fit into a delay slot is good enough,
4993 pass 1. Pass 2 if a definite blocking insn is needed.
4994 -1 is used internally to avoid deep recursion.
4995 If a blocking instruction is made or recognized, return it. */
4996
4997 static rtx
4998 gen_block_redirect (rtx jump, int addr, int need_block)
4999 {
5000 int dead = 0;
5001 rtx prev = prev_nonnote_insn (jump);
5002 rtx dest;
5003
5004 /* First, check if we already have an instruction that satisfies our need. */
5005 if (prev && NONJUMP_INSN_P (prev) && ! INSN_DELETED_P (prev))
5006 {
5007 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
5008 return prev;
5009 if (GET_CODE (PATTERN (prev)) == USE
5010 || GET_CODE (PATTERN (prev)) == CLOBBER
5011 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5012 prev = jump;
5013 else if ((need_block &= ~1) < 0)
5014 return prev;
5015 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
5016 need_block = 0;
5017 }
5018 if (GET_CODE (PATTERN (jump)) == RETURN)
5019 {
5020 if (! need_block)
5021 return prev;
5022 /* Reorg even does nasty things with return insns that cause branches
5023 to go out of range - see find_end_label and callers. */
5024 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
5025 }
5026 /* We can't use JUMP_LABEL here because it might be undefined
5027 when not optimizing. */
5028 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
5029 /* If the branch is out of range, try to find a scratch register for it. */
5030 if (optimize
5031 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5032 > 4092 + 4098))
5033 {
5034 rtx scan;
5035 /* Don't look for the stack pointer as a scratch register,
5036 it would cause trouble if an interrupt occurred. */
5037 unsigned attempt = 0x7fff, used;
5038 int jump_left = flag_expensive_optimizations + 1;
5039
5040 /* It is likely that the most recent eligible instruction is wanted for
5041 the delay slot. Therefore, find out which registers it uses, and
5042 try to avoid using them. */
5043
5044 for (scan = jump; (scan = PREV_INSN (scan)); )
5045 {
5046 enum rtx_code code;
5047
5048 if (INSN_DELETED_P (scan))
5049 continue;
5050 code = GET_CODE (scan);
5051 if (code == CODE_LABEL || code == JUMP_INSN)
5052 break;
5053 if (code == INSN
5054 && GET_CODE (PATTERN (scan)) != USE
5055 && GET_CODE (PATTERN (scan)) != CLOBBER
5056 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5057 {
5058 attempt &= ~regs_used (PATTERN (scan), 0);
5059 break;
5060 }
5061 }
5062 for (used = dead = 0, scan = JUMP_LABEL (jump);
5063 (scan = NEXT_INSN (scan)); )
5064 {
5065 enum rtx_code code;
5066
5067 if (INSN_DELETED_P (scan))
5068 continue;
5069 code = GET_CODE (scan);
5070 if (INSN_P (scan))
5071 {
5072 used |= regs_used (PATTERN (scan), 0);
5073 if (code == CALL_INSN)
5074 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5075 dead |= (used >> 16) & ~used;
5076 if (dead & attempt)
5077 {
5078 dead &= attempt;
5079 break;
5080 }
5081 if (code == JUMP_INSN)
5082 {
5083 if (jump_left-- && simplejump_p (scan))
5084 scan = JUMP_LABEL (scan);
5085 else
5086 break;
5087 }
5088 }
5089 }
5090 /* Mask out the stack pointer again, in case it was
5091 the only 'free' register we have found. */
5092 dead &= 0x7fff;
5093 }
5094 /* If the immediate destination is still in range, check for possible
5095 threading with a jump beyond the delay slot insn.
5096 Don't check if we are called recursively; the jump has been or will be
5097 checked in a different invocation then. */
5098
5099 else if (optimize && need_block >= 0)
5100 {
5101 rtx next = next_active_insn (next_active_insn (dest));
5102 if (next && JUMP_P (next)
5103 && GET_CODE (PATTERN (next)) == SET
5104 && recog_memoized (next) == CODE_FOR_jump_compact)
5105 {
5106 dest = JUMP_LABEL (next);
5107 if (dest
5108 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5109 > 4092 + 4098))
5110 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5111 }
5112 }
5113
5114 if (dead)
5115 {
5116 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5117
5118 /* It would be nice if we could convert the jump into an indirect
5119 jump / far branch right now, and thus exposing all constituent
5120 instructions to further optimization. However, reorg uses
5121 simplejump_p to determine if there is an unconditional jump where
5122 it should try to schedule instructions from the target of the
5123 branch; simplejump_p fails for indirect jumps even if they have
5124 a JUMP_LABEL. */
5125 rtx insn = emit_insn_before (gen_indirect_jump_scratch
5126 (reg, GEN_INT (unspec_bbr_uid++)),
5127 jump);
5128 /* ??? We would like this to have the scope of the jump, but that
5129 scope will change when a delay slot insn of an inner scope is added.
5130 Hence, after delay slot scheduling, we'll have to expect
5131 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5132 the jump. */
5133
5134 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
5135 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5136 return insn;
5137 }
5138 else if (need_block)
5139 /* We can't use JUMP_LABEL here because it might be undefined
5140 when not optimizing. */
5141 return emit_insn_before (gen_block_branch_redirect
5142 (GEN_INT (unspec_bbr_uid++)),
5143 jump);
5144 return prev;
5145 }
5146
5147 #define CONDJUMP_MIN -252
5148 #define CONDJUMP_MAX 262
5149 struct far_branch
5150 {
5151 /* A label (to be placed) in front of the jump
5152 that jumps to our ultimate destination. */
5153 rtx near_label;
5154 /* Where we are going to insert it if we cannot move the jump any farther,
5155 or the jump itself if we have picked up an existing jump. */
5156 rtx insert_place;
5157 /* The ultimate destination. */
5158 rtx far_label;
5159 struct far_branch *prev;
5160 /* If the branch has already been created, its address;
5161 else the address of its first prospective user. */
5162 int address;
5163 };
5164
5165 static void gen_far_branch (struct far_branch *);
5166 enum mdep_reorg_phase_e mdep_reorg_phase;
5167 static void
5168 gen_far_branch (struct far_branch *bp)
5169 {
5170 rtx insn = bp->insert_place;
5171 rtx jump;
5172 rtx label = gen_label_rtx ();
5173 int ok;
5174
5175 emit_label_after (label, insn);
5176 if (bp->far_label)
5177 {
5178 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5179 LABEL_NUSES (bp->far_label)++;
5180 }
5181 else
5182 jump = emit_jump_insn_after (gen_return (), insn);
5183 /* Emit a barrier so that reorg knows that any following instructions
5184 are not reachable via a fall-through path.
5185 But don't do this when not optimizing, since we wouldn't suppress the
5186 alignment for the barrier then, and could end up with out-of-range
5187 pc-relative loads. */
5188 if (optimize)
5189 emit_barrier_after (jump);
5190 emit_label_after (bp->near_label, insn);
5191 JUMP_LABEL (jump) = bp->far_label;
5192 ok = invert_jump (insn, label, 1);
5193 gcc_assert (ok);
5194
5195 /* If we are branching around a jump (rather than a return), prevent
5196 reorg from using an insn from the jump target as the delay slot insn -
5197 when reorg did this, it pessimized code (we rather hide the delay slot)
5198 and it could cause branches to go out of range. */
5199 if (bp->far_label)
5200 (emit_insn_after
5201 (gen_stuff_delay_slot
5202 (GEN_INT (unspec_bbr_uid++),
5203 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5204 insn));
5205 /* Prevent reorg from undoing our splits. */
5206 gen_block_redirect (jump, bp->address += 2, 2);
5207 }
5208
5209 /* Fix up ADDR_DIFF_VECs. */
5210 void
5211 fixup_addr_diff_vecs (rtx first)
5212 {
5213 rtx insn;
5214
5215 for (insn = first; insn; insn = NEXT_INSN (insn))
5216 {
5217 rtx vec_lab, pat, prev, prevpat, x, braf_label;
5218
5219 if (!JUMP_P (insn)
5220 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5221 continue;
5222 pat = PATTERN (insn);
5223 vec_lab = XEXP (XEXP (pat, 0), 0);
5224
5225 /* Search the matching casesi_jump_2. */
5226 for (prev = vec_lab; ; prev = PREV_INSN (prev))
5227 {
5228 if (!JUMP_P (prev))
5229 continue;
5230 prevpat = PATTERN (prev);
5231 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5232 continue;
5233 x = XVECEXP (prevpat, 0, 1);
5234 if (GET_CODE (x) != USE)
5235 continue;
5236 x = XEXP (x, 0);
5237 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5238 break;
5239 }
5240 /* FIXME: This is a bug in the optimizer, but it seems harmless
5241 to just avoid panicing. */
5242 if (!prev)
5243 continue;
5244
5245 /* Emit the reference label of the braf where it belongs, right after
5246 the casesi_jump_2 (i.e. braf). */
5247 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5248 emit_label_after (braf_label, prev);
5249
5250 /* Fix up the ADDR_DIF_VEC to be relative
5251 to the reference address of the braf. */
5252 XEXP (XEXP (pat, 0), 0) = braf_label;
5253 }
5254 }
5255
5256 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5257 a barrier. Return the base 2 logarithm of the desired alignment. */
5258 int
5259 barrier_align (rtx barrier_or_label)
5260 {
5261 rtx next = next_real_insn (barrier_or_label), pat, prev;
5262 int slot, credit, jump_to_next = 0;
5263
5264 if (! next)
5265 return 0;
5266
5267 pat = PATTERN (next);
5268
5269 if (GET_CODE (pat) == ADDR_DIFF_VEC)
5270 return 2;
5271
5272 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5273 /* This is a barrier in front of a constant table. */
5274 return 0;
5275
5276 prev = prev_real_insn (barrier_or_label);
5277 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
5278 {
5279 pat = PATTERN (prev);
5280 /* If this is a very small table, we want to keep the alignment after
5281 the table to the minimum for proper code alignment. */
5282 return ((optimize_size
5283 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5284 <= (unsigned) 1 << (CACHE_LOG - 2)))
5285 ? 1 << TARGET_SHMEDIA : align_jumps_log);
5286 }
5287
5288 if (optimize_size)
5289 return 0;
5290
5291 if (! TARGET_SH2 || ! optimize)
5292 return align_jumps_log;
5293
5294 /* When fixing up pcloads, a constant table might be inserted just before
5295 the basic block that ends with the barrier. Thus, we can't trust the
5296 instruction lengths before that. */
5297 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5298 {
5299 /* Check if there is an immediately preceding branch to the insn beyond
5300 the barrier. We must weight the cost of discarding useful information
5301 from the current cache line when executing this branch and there is
5302 an alignment, against that of fetching unneeded insn in front of the
5303 branch target when there is no alignment. */
5304
5305 /* There are two delay_slot cases to consider. One is the simple case
5306 where the preceding branch is to the insn beyond the barrier (simple
5307 delay slot filling), and the other is where the preceding branch has
5308 a delay slot that is a duplicate of the insn after the barrier
5309 (fill_eager_delay_slots) and the branch is to the insn after the insn
5310 after the barrier. */
5311
5312 /* PREV is presumed to be the JUMP_INSN for the barrier under
5313 investigation. Skip to the insn before it. */
5314 prev = prev_real_insn (prev);
5315
5316 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5317 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5318 prev = prev_real_insn (prev))
5319 {
5320 jump_to_next = 0;
5321 if (GET_CODE (PATTERN (prev)) == USE
5322 || GET_CODE (PATTERN (prev)) == CLOBBER)
5323 continue;
5324 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
5325 {
5326 prev = XVECEXP (PATTERN (prev), 0, 1);
5327 if (INSN_UID (prev) == INSN_UID (next))
5328 {
5329 /* Delay slot was filled with insn at jump target. */
5330 jump_to_next = 1;
5331 continue;
5332 }
5333 }
5334
5335 if (slot &&
5336 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5337 slot = 0;
5338 credit -= get_attr_length (prev);
5339 }
5340 if (prev && jump_to_label_p (prev))
5341 {
5342 rtx x;
5343 if (jump_to_next
5344 || next_real_insn (JUMP_LABEL (prev)) == next
5345 /* If relax_delay_slots() decides NEXT was redundant
5346 with some previous instruction, it will have
5347 redirected PREV's jump to the following insn. */
5348 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5349 /* There is no upper bound on redundant instructions
5350 that might have been skipped, but we must not put an
5351 alignment where none had been before. */
5352 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5353 (INSN_P (x)
5354 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5355 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5356 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5357 {
5358 rtx pat = PATTERN (prev);
5359 if (GET_CODE (pat) == PARALLEL)
5360 pat = XVECEXP (pat, 0, 0);
5361 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5362 return 0;
5363 }
5364 }
5365 }
5366
5367 return align_jumps_log;
5368 }
5369
5370 /* If we are inside a phony loop, almost any kind of label can turn up as the
5371 first one in the loop. Aligning a braf label causes incorrect switch
5372 destination addresses; we can detect braf labels because they are
5373 followed by a BARRIER.
5374 Applying loop alignment to small constant or switch tables is a waste
5375 of space, so we suppress this too. */
5376 int
5377 sh_loop_align (rtx label)
5378 {
5379 rtx next = label;
5380
5381 if (! optimize || optimize_size)
5382 return 0;
5383
5384 do
5385 next = next_nonnote_insn (next);
5386 while (next && LABEL_P (next));
5387
5388 if (! next
5389 || ! INSN_P (next)
5390 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
5391 || recog_memoized (next) == CODE_FOR_consttable_2)
5392 return 0;
5393
5394 return align_loops_log;
5395 }
5396
5397 /* Do a final pass over the function, just before delayed branch
5398 scheduling. */
5399
5400 static void
5401 sh_reorg (void)
5402 {
5403 rtx first, insn, mova = NULL_RTX;
5404 int num_mova;
5405 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5406 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5407
5408 first = get_insns ();
5409 max_labelno_before_reorg = max_label_num ();
5410
5411 /* We must split call insns before introducing `mova's. If we're
5412 optimizing, they'll have already been split. Otherwise, make
5413 sure we don't split them too late. */
5414 if (! optimize)
5415 split_all_insns_noflow ();
5416
5417 if (TARGET_SHMEDIA)
5418 return;
5419
5420 /* If relaxing, generate pseudo-ops to associate function calls with
5421 the symbols they call. It does no harm to not generate these
5422 pseudo-ops. However, when we can generate them, it enables the
5423 linker to potentially relax the jsr to a bsr, and eliminate the
5424 register load and, possibly, the constant pool entry. */
5425
5426 mdep_reorg_phase = SH_INSERT_USES_LABELS;
5427 if (TARGET_RELAX)
5428 {
5429 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
5430 own purposes. This works because none of the remaining passes
5431 need to look at them.
5432
5433 ??? But it may break in the future. We should use a machine
5434 dependent REG_NOTE, or some other approach entirely. */
5435 for (insn = first; insn; insn = NEXT_INSN (insn))
5436 {
5437 if (INSN_P (insn))
5438 {
5439 rtx note;
5440
5441 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
5442 NULL_RTX)) != 0)
5443 remove_note (insn, note);
5444 }
5445 }
5446
5447 for (insn = first; insn; insn = NEXT_INSN (insn))
5448 {
5449 rtx pattern, reg, link, set, scan, dies, label;
5450 int rescan = 0, foundinsn = 0;
5451
5452 if (CALL_P (insn))
5453 {
5454 pattern = PATTERN (insn);
5455
5456 if (GET_CODE (pattern) == PARALLEL)
5457 pattern = XVECEXP (pattern, 0, 0);
5458 if (GET_CODE (pattern) == SET)
5459 pattern = SET_SRC (pattern);
5460
5461 if (GET_CODE (pattern) != CALL
5462 || !MEM_P (XEXP (pattern, 0)))
5463 continue;
5464
5465 reg = XEXP (XEXP (pattern, 0), 0);
5466 }
5467 else
5468 {
5469 reg = sfunc_uses_reg (insn);
5470 if (! reg)
5471 continue;
5472 }
5473
5474 if (!REG_P (reg))
5475 continue;
5476
5477 /* Try scanning backward to find where the register is set. */
5478 link = NULL;
5479 for (scan = PREV_INSN (insn);
5480 scan && !LABEL_P (scan);
5481 scan = PREV_INSN (scan))
5482 {
5483 if (! INSN_P (scan))
5484 continue;
5485
5486 if (! reg_mentioned_p (reg, scan))
5487 continue;
5488
5489 if (noncall_uses_reg (reg, scan, &set))
5490 break;
5491
5492 if (set)
5493 {
5494 link = scan;
5495 break;
5496 }
5497 }
5498
5499 if (! link)
5500 continue;
5501
5502 /* The register is set at LINK. */
5503
5504 /* We can only optimize the function call if the register is
5505 being set to a symbol. In theory, we could sometimes
5506 optimize calls to a constant location, but the assembler
5507 and linker do not support that at present. */
5508 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
5509 && GET_CODE (SET_SRC (set)) != LABEL_REF)
5510 continue;
5511
5512 /* Scan forward from LINK to the place where REG dies, and
5513 make sure that the only insns which use REG are
5514 themselves function calls. */
5515
5516 /* ??? This doesn't work for call targets that were allocated
5517 by reload, since there may not be a REG_DEAD note for the
5518 register. */
5519
5520 dies = NULL_RTX;
5521 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
5522 {
5523 rtx scanset;
5524
5525 /* Don't try to trace forward past a CODE_LABEL if we haven't
5526 seen INSN yet. Ordinarily, we will only find the setting insn
5527 if it is in the same basic block. However,
5528 cross-jumping can insert code labels in between the load and
5529 the call, and can result in situations where a single call
5530 insn may have two targets depending on where we came from. */
5531
5532 if (LABEL_P (scan) && ! foundinsn)
5533 break;
5534
5535 if (! INSN_P (scan))
5536 continue;
5537
5538 /* Don't try to trace forward past a JUMP. To optimize
5539 safely, we would have to check that all the
5540 instructions at the jump destination did not use REG. */
5541
5542 if (JUMP_P (scan))
5543 break;
5544
5545 if (! reg_mentioned_p (reg, scan))
5546 continue;
5547
5548 if (noncall_uses_reg (reg, scan, &scanset))
5549 break;
5550
5551 if (scan == insn)
5552 foundinsn = 1;
5553
5554 if (scan != insn
5555 && (CALL_P (scan) || sfunc_uses_reg (scan)))
5556 {
5557 /* There is a function call to this register other
5558 than the one we are checking. If we optimize
5559 this call, we need to rescan again below. */
5560 rescan = 1;
5561 }
5562
5563 /* ??? We shouldn't have to worry about SCANSET here.
5564 We should just be able to check for a REG_DEAD note
5565 on a function call. However, the REG_DEAD notes are
5566 apparently not dependable around libcalls; c-torture
5567 execute/920501-2 is a test case. If SCANSET is set,
5568 then this insn sets the register, so it must have
5569 died earlier. Unfortunately, this will only handle
5570 the cases in which the register is, in fact, set in a
5571 later insn. */
5572
5573 /* ??? We shouldn't have to use FOUNDINSN here.
5574 This dates back to when we used LOG_LINKS to find
5575 the most recent insn which sets the register. */
5576
5577 if (foundinsn
5578 && (scanset
5579 || find_reg_note (scan, REG_DEAD, reg)))
5580 {
5581 dies = scan;
5582 break;
5583 }
5584 }
5585
5586 if (! dies)
5587 {
5588 /* Either there was a branch, or some insn used REG
5589 other than as a function call address. */
5590 continue;
5591 }
5592
5593 /* Create a code label, and put it in a REG_LABEL_OPERAND note
5594 on the insn which sets the register, and on each call insn
5595 which uses the register. In final_prescan_insn we look for
5596 the REG_LABEL_OPERAND notes, and output the appropriate label
5597 or pseudo-op. */
5598
5599 label = gen_label_rtx ();
5600 add_reg_note (link, REG_LABEL_OPERAND, label);
5601 add_reg_note (insn, REG_LABEL_OPERAND, label);
5602 if (rescan)
5603 {
5604 scan = link;
5605 do
5606 {
5607 rtx reg2;
5608
5609 scan = NEXT_INSN (scan);
5610 if (scan != insn
5611 && ((CALL_P (scan)
5612 && reg_mentioned_p (reg, scan))
5613 || ((reg2 = sfunc_uses_reg (scan))
5614 && REGNO (reg2) == REGNO (reg))))
5615 add_reg_note (scan, REG_LABEL_OPERAND, label);
5616 }
5617 while (scan != dies);
5618 }
5619 }
5620 }
5621
5622 if (TARGET_SH2)
5623 fixup_addr_diff_vecs (first);
5624
5625 if (optimize)
5626 {
5627 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
5628 shorten_branches (first);
5629 }
5630
5631 /* Scan the function looking for move instructions which have to be
5632 changed to pc-relative loads and insert the literal tables. */
5633 label_ref_list_pool = create_alloc_pool ("label references list",
5634 sizeof (struct label_ref_list_d),
5635 30);
5636 mdep_reorg_phase = SH_FIXUP_PCLOAD;
5637 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
5638 {
5639 if (mova_p (insn))
5640 {
5641 /* ??? basic block reordering can move a switch table dispatch
5642 below the switch table. Check if that has happened.
5643 We only have the addresses available when optimizing; but then,
5644 this check shouldn't be needed when not optimizing. */
5645 if (!untangle_mova (&num_mova, &mova, insn))
5646 {
5647 insn = mova;
5648 num_mova = 0;
5649 }
5650 }
5651 else if (JUMP_P (insn)
5652 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
5653 && num_mova
5654 /* ??? loop invariant motion can also move a mova out of a
5655 loop. Since loop does this code motion anyway, maybe we
5656 should wrap UNSPEC_MOVA into a CONST, so that reload can
5657 move it back. */
5658 && ((num_mova > 1
5659 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
5660 || (prev_nonnote_insn (insn)
5661 == XEXP (MOVA_LABELREF (mova), 0))))
5662 {
5663 rtx scan;
5664 int total;
5665
5666 num_mova--;
5667
5668 /* Some code might have been inserted between the mova and
5669 its ADDR_DIFF_VEC. Check if the mova is still in range. */
5670 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
5671 total += get_attr_length (scan);
5672
5673 /* range of mova is 1020, add 4 because pc counts from address of
5674 second instruction after this one, subtract 2 in case pc is 2
5675 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
5676 cancels out with alignment effects of the mova itself. */
5677 if (total > 1022)
5678 {
5679 /* Change the mova into a load, and restart scanning
5680 there. broken_move will then return true for mova. */
5681 fixup_mova (mova);
5682 insn = mova;
5683 }
5684 }
5685 if (broken_move (insn)
5686 || (NONJUMP_INSN_P (insn)
5687 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
5688 {
5689 rtx scan;
5690 /* Scan ahead looking for a barrier to stick the constant table
5691 behind. */
5692 rtx barrier = find_barrier (num_mova, mova, insn);
5693 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
5694 int need_aligned_label = 0;
5695
5696 if (num_mova && ! mova_p (mova))
5697 {
5698 /* find_barrier had to change the first mova into a
5699 pcload; thus, we have to start with this new pcload. */
5700 insn = mova;
5701 num_mova = 0;
5702 }
5703 /* Now find all the moves between the points and modify them. */
5704 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
5705 {
5706 if (LABEL_P (scan))
5707 last_float = 0;
5708 if (NONJUMP_INSN_P (scan)
5709 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
5710 need_aligned_label = 1;
5711 if (broken_move (scan))
5712 {
5713 rtx *patp = &PATTERN (scan), pat = *patp;
5714 rtx src, dst;
5715 rtx lab;
5716 rtx newsrc;
5717 enum machine_mode mode;
5718
5719 if (GET_CODE (pat) == PARALLEL)
5720 patp = &XVECEXP (pat, 0, 0), pat = *patp;
5721 src = SET_SRC (pat);
5722 dst = SET_DEST (pat);
5723 mode = GET_MODE (dst);
5724
5725 if (mode == SImode && hi_const (src)
5726 && REGNO (dst) != FPUL_REG)
5727 {
5728 int offset = 0;
5729
5730 mode = HImode;
5731 while (GET_CODE (dst) == SUBREG)
5732 {
5733 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
5734 GET_MODE (SUBREG_REG (dst)),
5735 SUBREG_BYTE (dst),
5736 GET_MODE (dst));
5737 dst = SUBREG_REG (dst);
5738 }
5739 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
5740 }
5741 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
5742 {
5743 /* This must be an insn that clobbers r0. */
5744 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
5745 XVECLEN (PATTERN (scan), 0)
5746 - 1);
5747 rtx clobber = *clobberp;
5748
5749 gcc_assert (GET_CODE (clobber) == CLOBBER
5750 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
5751
5752 if (last_float
5753 && reg_set_between_p (r0_rtx, last_float_move, scan))
5754 last_float = 0;
5755 if (last_float
5756 && TARGET_SHCOMPACT
5757 && GET_MODE_SIZE (mode) != 4
5758 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
5759 last_float = 0;
5760 lab = add_constant (src, mode, last_float);
5761 if (lab)
5762 emit_insn_before (gen_mova (lab), scan);
5763 else
5764 {
5765 /* There will be a REG_UNUSED note for r0 on
5766 LAST_FLOAT_MOVE; we have to change it to REG_INC,
5767 lest reorg:mark_target_live_regs will not
5768 consider r0 to be used, and we end up with delay
5769 slot insn in front of SCAN that clobbers r0. */
5770 rtx note
5771 = find_regno_note (last_float_move, REG_UNUSED, 0);
5772
5773 /* If we are not optimizing, then there may not be
5774 a note. */
5775 if (note)
5776 PUT_REG_NOTE_KIND (note, REG_INC);
5777
5778 *last_float_addr = r0_inc_rtx;
5779 }
5780 last_float_move = scan;
5781 last_float = src;
5782 newsrc = gen_const_mem (mode,
5783 (((TARGET_SH4 && ! TARGET_FMOVD)
5784 || REGNO (dst) == FPUL_REG)
5785 ? r0_inc_rtx
5786 : r0_rtx));
5787 last_float_addr = &XEXP (newsrc, 0);
5788
5789 /* Remove the clobber of r0. */
5790 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
5791 gen_rtx_SCRATCH (Pmode));
5792 }
5793 /* This is a mova needing a label. Create it. */
5794 else if (GET_CODE (src) == UNSPEC
5795 && XINT (src, 1) == UNSPEC_MOVA
5796 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
5797 {
5798 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
5799 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5800 newsrc = gen_rtx_UNSPEC (SImode,
5801 gen_rtvec (1, newsrc),
5802 UNSPEC_MOVA);
5803 }
5804 else
5805 {
5806 lab = add_constant (src, mode, 0);
5807 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5808 newsrc = gen_const_mem (mode, newsrc);
5809 }
5810 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
5811 INSN_CODE (scan) = -1;
5812 }
5813 }
5814 dump_table (need_aligned_label ? insn : 0, barrier);
5815 insn = barrier;
5816 }
5817 }
5818 free_alloc_pool (label_ref_list_pool);
5819 for (insn = first; insn; insn = NEXT_INSN (insn))
5820 PUT_MODE (insn, VOIDmode);
5821
5822 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
5823 INSN_ADDRESSES_FREE ();
5824 split_branches (first);
5825
5826 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
5827 also has an effect on the register that holds the address of the sfunc.
5828 Insert an extra dummy insn in front of each sfunc that pretends to
5829 use this register. */
5830 if (flag_delayed_branch)
5831 {
5832 for (insn = first; insn; insn = NEXT_INSN (insn))
5833 {
5834 rtx reg = sfunc_uses_reg (insn);
5835
5836 if (! reg)
5837 continue;
5838 emit_insn_before (gen_use_sfunc_addr (reg), insn);
5839 }
5840 }
5841 #if 0
5842 /* fpscr is not actually a user variable, but we pretend it is for the
5843 sake of the previous optimization passes, since we want it handled like
5844 one. However, we don't have any debugging information for it, so turn
5845 it into a non-user variable now. */
5846 if (TARGET_SH4)
5847 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
5848 #endif
5849 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
5850 }
5851
5852 int
5853 get_dest_uid (rtx label, int max_uid)
5854 {
5855 rtx dest = next_real_insn (label);
5856 int dest_uid;
5857 if (! dest)
5858 /* This can happen for an undefined label. */
5859 return 0;
5860 dest_uid = INSN_UID (dest);
5861 /* If this is a newly created branch redirection blocking instruction,
5862 we cannot index the branch_uid or insn_addresses arrays with its
5863 uid. But then, we won't need to, because the actual destination is
5864 the following branch. */
5865 while (dest_uid >= max_uid)
5866 {
5867 dest = NEXT_INSN (dest);
5868 dest_uid = INSN_UID (dest);
5869 }
5870 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
5871 return 0;
5872 return dest_uid;
5873 }
5874
5875 /* Split condbranches that are out of range. Also add clobbers for
5876 scratch registers that are needed in far jumps.
5877 We do this before delay slot scheduling, so that it can take our
5878 newly created instructions into account. It also allows us to
5879 find branches with common targets more easily. */
5880
5881 static void
5882 split_branches (rtx first)
5883 {
5884 rtx insn;
5885 struct far_branch **uid_branch, *far_branch_list = 0;
5886 int max_uid = get_max_uid ();
5887 int ok;
5888
5889 /* Find out which branches are out of range. */
5890 shorten_branches (first);
5891
5892 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
5893 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
5894
5895 for (insn = first; insn; insn = NEXT_INSN (insn))
5896 if (! INSN_P (insn))
5897 continue;
5898 else if (INSN_DELETED_P (insn))
5899 {
5900 /* Shorten_branches would split this instruction again,
5901 so transform it into a note. */
5902 SET_INSN_DELETED (insn);
5903 }
5904 else if (JUMP_P (insn)
5905 /* Don't mess with ADDR_DIFF_VEC */
5906 && (GET_CODE (PATTERN (insn)) == SET
5907 || GET_CODE (PATTERN (insn)) == RETURN))
5908 {
5909 enum attr_type type = get_attr_type (insn);
5910 if (type == TYPE_CBRANCH)
5911 {
5912 rtx next, beyond;
5913
5914 if (get_attr_length (insn) > 4)
5915 {
5916 rtx src = SET_SRC (PATTERN (insn));
5917 rtx olabel = XEXP (XEXP (src, 1), 0);
5918 int addr = INSN_ADDRESSES (INSN_UID (insn));
5919 rtx label = 0;
5920 int dest_uid = get_dest_uid (olabel, max_uid);
5921 struct far_branch *bp = uid_branch[dest_uid];
5922
5923 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
5924 the label if the LABEL_NUSES count drops to zero. There is
5925 always a jump_optimize pass that sets these values, but it
5926 proceeds to delete unreferenced code, and then if not
5927 optimizing, to un-delete the deleted instructions, thus
5928 leaving labels with too low uses counts. */
5929 if (! optimize)
5930 {
5931 JUMP_LABEL (insn) = olabel;
5932 LABEL_NUSES (olabel)++;
5933 }
5934 if (! bp)
5935 {
5936 bp = (struct far_branch *) alloca (sizeof *bp);
5937 uid_branch[dest_uid] = bp;
5938 bp->prev = far_branch_list;
5939 far_branch_list = bp;
5940 bp->far_label
5941 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
5942 LABEL_NUSES (bp->far_label)++;
5943 }
5944 else
5945 {
5946 label = bp->near_label;
5947 if (! label && bp->address - addr >= CONDJUMP_MIN)
5948 {
5949 rtx block = bp->insert_place;
5950
5951 if (GET_CODE (PATTERN (block)) == RETURN)
5952 block = PREV_INSN (block);
5953 else
5954 block = gen_block_redirect (block,
5955 bp->address, 2);
5956 label = emit_label_after (gen_label_rtx (),
5957 PREV_INSN (block));
5958 bp->near_label = label;
5959 }
5960 else if (label && ! NEXT_INSN (label))
5961 {
5962 if (addr + 2 - bp->address <= CONDJUMP_MAX)
5963 bp->insert_place = insn;
5964 else
5965 gen_far_branch (bp);
5966 }
5967 }
5968 if (! label
5969 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
5970 {
5971 bp->near_label = label = gen_label_rtx ();
5972 bp->insert_place = insn;
5973 bp->address = addr;
5974 }
5975 ok = redirect_jump (insn, label, 0);
5976 gcc_assert (ok);
5977 }
5978 else
5979 {
5980 /* get_attr_length (insn) == 2 */
5981 /* Check if we have a pattern where reorg wants to redirect
5982 the branch to a label from an unconditional branch that
5983 is too far away. */
5984 /* We can't use JUMP_LABEL here because it might be undefined
5985 when not optimizing. */
5986 /* A syntax error might cause beyond to be NULL_RTX. */
5987 beyond
5988 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
5989 0));
5990
5991 if (beyond
5992 && (JUMP_P (beyond)
5993 || ((beyond = next_active_insn (beyond))
5994 && JUMP_P (beyond)))
5995 && GET_CODE (PATTERN (beyond)) == SET
5996 && recog_memoized (beyond) == CODE_FOR_jump_compact
5997 && ((INSN_ADDRESSES
5998 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
5999 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6000 > 252 + 258 + 2))
6001 gen_block_redirect (beyond,
6002 INSN_ADDRESSES (INSN_UID (beyond)), 1);
6003 }
6004
6005 next = next_active_insn (insn);
6006
6007 if (next
6008 && (JUMP_P (next)
6009 || ((next = next_active_insn (next))
6010 && JUMP_P (next)))
6011 && GET_CODE (PATTERN (next)) == SET
6012 && recog_memoized (next) == CODE_FOR_jump_compact
6013 && ((INSN_ADDRESSES
6014 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
6015 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6016 > 252 + 258 + 2))
6017 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
6018 }
6019 else if (type == TYPE_JUMP || type == TYPE_RETURN)
6020 {
6021 int addr = INSN_ADDRESSES (INSN_UID (insn));
6022 rtx far_label = 0;
6023 int dest_uid = 0;
6024 struct far_branch *bp;
6025
6026 if (type == TYPE_JUMP)
6027 {
6028 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
6029 dest_uid = get_dest_uid (far_label, max_uid);
6030 if (! dest_uid)
6031 {
6032 /* Parse errors can lead to labels outside
6033 the insn stream. */
6034 if (! NEXT_INSN (far_label))
6035 continue;
6036
6037 if (! optimize)
6038 {
6039 JUMP_LABEL (insn) = far_label;
6040 LABEL_NUSES (far_label)++;
6041 }
6042 redirect_jump (insn, ret_rtx, 1);
6043 far_label = 0;
6044 }
6045 }
6046 bp = uid_branch[dest_uid];
6047 if (! bp)
6048 {
6049 bp = (struct far_branch *) alloca (sizeof *bp);
6050 uid_branch[dest_uid] = bp;
6051 bp->prev = far_branch_list;
6052 far_branch_list = bp;
6053 bp->near_label = 0;
6054 bp->far_label = far_label;
6055 if (far_label)
6056 LABEL_NUSES (far_label)++;
6057 }
6058 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6059 if (addr - bp->address <= CONDJUMP_MAX)
6060 emit_label_after (bp->near_label, PREV_INSN (insn));
6061 else
6062 {
6063 gen_far_branch (bp);
6064 bp->near_label = 0;
6065 }
6066 else
6067 bp->near_label = 0;
6068 bp->address = addr;
6069 bp->insert_place = insn;
6070 if (! far_label)
6071 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6072 else
6073 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6074 }
6075 }
6076 /* Generate all pending far branches,
6077 and free our references to the far labels. */
6078 while (far_branch_list)
6079 {
6080 if (far_branch_list->near_label
6081 && ! NEXT_INSN (far_branch_list->near_label))
6082 gen_far_branch (far_branch_list);
6083 if (optimize
6084 && far_branch_list->far_label
6085 && ! --LABEL_NUSES (far_branch_list->far_label))
6086 delete_insn (far_branch_list->far_label);
6087 far_branch_list = far_branch_list->prev;
6088 }
6089
6090 /* Instruction length information is no longer valid due to the new
6091 instructions that have been generated. */
6092 init_insn_lengths ();
6093 }
6094
6095 /* Dump out instruction addresses, which is useful for debugging the
6096 constant pool table stuff.
6097
6098 If relaxing, output the label and pseudo-ops used to link together
6099 calls and the instruction which set the registers. */
6100
6101 /* ??? The addresses printed by this routine for insns are nonsense for
6102 insns which are inside of a sequence where none of the inner insns have
6103 variable length. This is because the second pass of shorten_branches
6104 does not bother to update them. */
6105
6106 void
6107 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
6108 int noperands ATTRIBUTE_UNUSED)
6109 {
6110 if (TARGET_DUMPISIZE)
6111 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6112
6113 if (TARGET_RELAX)
6114 {
6115 rtx note;
6116
6117 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
6118 if (note)
6119 {
6120 rtx pattern;
6121
6122 pattern = PATTERN (insn);
6123 if (GET_CODE (pattern) == PARALLEL)
6124 pattern = XVECEXP (pattern, 0, 0);
6125 switch (GET_CODE (pattern))
6126 {
6127 case SET:
6128 if (GET_CODE (SET_SRC (pattern)) != CALL
6129 && get_attr_type (insn) != TYPE_SFUNC)
6130 {
6131 targetm.asm_out.internal_label
6132 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6133 break;
6134 }
6135 /* else FALLTHROUGH */
6136 case CALL:
6137 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6138 CODE_LABEL_NUMBER (XEXP (note, 0)));
6139 break;
6140
6141 default:
6142 gcc_unreachable ();
6143 }
6144 }
6145 }
6146 }
6147
6148 /* Dump out any constants accumulated in the final pass. These will
6149 only be labels. */
6150
6151 const char *
6152 output_jump_label_table (void)
6153 {
6154 int i;
6155
6156 if (pool_size)
6157 {
6158 fprintf (asm_out_file, "\t.align 2\n");
6159 for (i = 0; i < pool_size; i++)
6160 {
6161 pool_node *p = &pool_vector[i];
6162
6163 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6164 CODE_LABEL_NUMBER (p->label));
6165 output_asm_insn (".long %O0", &p->value);
6166 }
6167 pool_size = 0;
6168 }
6169
6170 return "";
6171 }
6172 \f
6173 /* A full frame looks like:
6174
6175 arg-5
6176 arg-4
6177 [ if current_function_anonymous_args
6178 arg-3
6179 arg-2
6180 arg-1
6181 arg-0 ]
6182 saved-fp
6183 saved-r10
6184 saved-r11
6185 saved-r12
6186 saved-pr
6187 local-n
6188 ..
6189 local-1
6190 local-0 <- fp points here. */
6191
6192 /* Number of bytes pushed for anonymous args, used to pass information
6193 between expand_prologue and expand_epilogue. */
6194
6195 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6196 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6197 for an epilogue and a negative value means that it's for a sibcall
6198 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6199 all the registers that are about to be restored, and hence dead. */
6200
6201 static void
6202 output_stack_adjust (int size, rtx reg, int epilogue_p,
6203 HARD_REG_SET *live_regs_mask, bool frame_p)
6204 {
6205 rtx (*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6206 if (size)
6207 {
6208 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6209
6210 /* This test is bogus, as output_stack_adjust is used to re-align the
6211 stack. */
6212 #if 0
6213 gcc_assert (!(size % align));
6214 #endif
6215
6216 if (CONST_OK_FOR_ADD (size))
6217 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6218 /* Try to do it with two partial adjustments; however, we must make
6219 sure that the stack is properly aligned at all times, in case
6220 an interrupt occurs between the two partial adjustments. */
6221 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6222 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6223 {
6224 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6225 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6226 }
6227 else
6228 {
6229 rtx const_reg;
6230 rtx insn;
6231 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6232 int i;
6233
6234 /* If TEMP is invalid, we could temporarily save a general
6235 register to MACL. However, there is currently no need
6236 to handle this case, so just die when we see it. */
6237 if (epilogue_p < 0
6238 || current_function_interrupt
6239 || ! call_really_used_regs[temp] || fixed_regs[temp])
6240 temp = -1;
6241 if (temp < 0 && ! current_function_interrupt
6242 && (TARGET_SHMEDIA || epilogue_p >= 0))
6243 {
6244 HARD_REG_SET temps;
6245 COPY_HARD_REG_SET (temps, call_used_reg_set);
6246 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6247 if (epilogue_p > 0)
6248 {
6249 int nreg = 0;
6250 if (crtl->return_rtx)
6251 {
6252 enum machine_mode mode;
6253 mode = GET_MODE (crtl->return_rtx);
6254 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6255 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6256 }
6257 for (i = 0; i < nreg; i++)
6258 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6259 if (crtl->calls_eh_return)
6260 {
6261 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6262 for (i = 0; i <= 3; i++)
6263 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6264 }
6265 }
6266 if (TARGET_SHMEDIA && epilogue_p < 0)
6267 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
6268 CLEAR_HARD_REG_BIT (temps, i);
6269 if (epilogue_p <= 0)
6270 {
6271 for (i = FIRST_PARM_REG;
6272 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6273 CLEAR_HARD_REG_BIT (temps, i);
6274 if (cfun->static_chain_decl != NULL)
6275 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6276 }
6277 temp = scavenge_reg (&temps);
6278 }
6279 if (temp < 0 && live_regs_mask)
6280 {
6281 HARD_REG_SET temps;
6282
6283 COPY_HARD_REG_SET (temps, *live_regs_mask);
6284 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6285 temp = scavenge_reg (&temps);
6286 }
6287 if (temp < 0)
6288 {
6289 rtx adj_reg, tmp_reg, mem;
6290
6291 /* If we reached here, the most likely case is the (sibcall)
6292 epilogue for non SHmedia. Put a special push/pop sequence
6293 for such case as the last resort. This looks lengthy but
6294 would not be problem because it seems to be very
6295 rare. */
6296
6297 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
6298
6299
6300 /* ??? There is still the slight possibility that r4 or
6301 r5 have been reserved as fixed registers or assigned
6302 as global registers, and they change during an
6303 interrupt. There are possible ways to handle this:
6304
6305 - If we are adjusting the frame pointer (r14), we can do
6306 with a single temp register and an ordinary push / pop
6307 on the stack.
6308 - Grab any call-used or call-saved registers (i.e. not
6309 fixed or globals) for the temps we need. We might
6310 also grab r14 if we are adjusting the stack pointer.
6311 If we can't find enough available registers, issue
6312 a diagnostic and die - the user must have reserved
6313 way too many registers.
6314 But since all this is rather unlikely to happen and
6315 would require extra testing, we just die if r4 / r5
6316 are not available. */
6317 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6318 && !global_regs[4] && !global_regs[5]);
6319
6320 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6321 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6322 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6323 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6324 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6325 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6326 emit_move_insn (mem, tmp_reg);
6327 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6328 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6329 emit_move_insn (mem, tmp_reg);
6330 emit_move_insn (reg, adj_reg);
6331 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6332 emit_move_insn (adj_reg, mem);
6333 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6334 emit_move_insn (tmp_reg, mem);
6335 /* Tell flow the insns that pop r4/r5 aren't dead. */
6336 emit_use (tmp_reg);
6337 emit_use (adj_reg);
6338 return;
6339 }
6340 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6341
6342 /* If SIZE is negative, subtract the positive value.
6343 This sometimes allows a constant pool entry to be shared
6344 between prologue and epilogue code. */
6345 if (size < 0)
6346 {
6347 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6348 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6349 }
6350 else
6351 {
6352 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6353 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6354 }
6355 if (! epilogue_p)
6356 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6357 gen_rtx_SET (VOIDmode, reg,
6358 gen_rtx_PLUS (SImode, reg,
6359 GEN_INT (size))));
6360 }
6361 }
6362 }
6363
6364 static rtx
6365 frame_insn (rtx x)
6366 {
6367 x = emit_insn (x);
6368 RTX_FRAME_RELATED_P (x) = 1;
6369 return x;
6370 }
6371
6372 /* Output RTL to push register RN onto the stack. */
6373
6374 static rtx
6375 push (int rn)
6376 {
6377 rtx x;
6378 if (rn == FPUL_REG)
6379 x = gen_push_fpul ();
6380 else if (rn == FPSCR_REG)
6381 x = gen_push_fpscr ();
6382 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6383 && FP_OR_XD_REGISTER_P (rn))
6384 {
6385 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6386 return NULL_RTX;
6387 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6388 }
6389 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6390 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6391 else
6392 x = gen_push (gen_rtx_REG (SImode, rn));
6393
6394 x = frame_insn (x);
6395 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6396 return x;
6397 }
6398
6399 /* Output RTL to pop register RN from the stack. */
6400
6401 static void
6402 pop (int rn)
6403 {
6404 rtx x;
6405 if (rn == FPUL_REG)
6406 x = gen_pop_fpul ();
6407 else if (rn == FPSCR_REG)
6408 x = gen_pop_fpscr ();
6409 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6410 && FP_OR_XD_REGISTER_P (rn))
6411 {
6412 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6413 return;
6414 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6415 }
6416 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6417 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6418 else
6419 x = gen_pop (gen_rtx_REG (SImode, rn));
6420
6421 x = emit_insn (x);
6422 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6423 }
6424
6425 /* Generate code to push the regs specified in the mask. */
6426
6427 static void
6428 push_regs (HARD_REG_SET *mask, int interrupt_handler)
6429 {
6430 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
6431 int skip_fpscr = 0;
6432
6433 /* Push PR last; this gives better latencies after the prologue, and
6434 candidates for the return delay slot when there are no general
6435 registers pushed. */
6436 for (; i < FIRST_PSEUDO_REGISTER; i++)
6437 {
6438 /* If this is an interrupt handler, and the SZ bit varies,
6439 and we have to push any floating point register, we need
6440 to switch to the correct precision first. */
6441 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
6442 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
6443 {
6444 HARD_REG_SET unsaved;
6445
6446 push (FPSCR_REG);
6447 COMPL_HARD_REG_SET (unsaved, *mask);
6448 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
6449 skip_fpscr = 1;
6450 }
6451 if (i != PR_REG
6452 && (i != FPSCR_REG || ! skip_fpscr)
6453 && TEST_HARD_REG_BIT (*mask, i))
6454 {
6455 /* If the ISR has RESBANK attribute assigned, don't push any of
6456 the following registers - R0-R14, MACH, MACL and GBR. */
6457 if (! (sh_cfun_resbank_handler_p ()
6458 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
6459 || i == MACH_REG
6460 || i == MACL_REG
6461 || i == GBR_REG)))
6462 push (i);
6463 }
6464 }
6465
6466 /* Push banked registers last to improve delay slot opportunities. */
6467 if (interrupt_handler)
6468 {
6469 bool use_movml = false;
6470
6471 if (TARGET_SH2A)
6472 {
6473 unsigned int count = 0;
6474
6475 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6476 if (TEST_HARD_REG_BIT (*mask, i))
6477 count++;
6478 else
6479 break;
6480
6481 /* Use movml when all banked registers are pushed. */
6482 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
6483 use_movml = true;
6484 }
6485
6486 if (use_movml)
6487 {
6488 rtx x, mem, reg, set;
6489 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
6490
6491 /* We must avoid scheduling multiple store insn with another
6492 insns. */
6493 emit_insn (gen_blockage ());
6494 x = gen_movml_push_banked (sp_reg);
6495 x = frame_insn (x);
6496 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6497 {
6498 mem = gen_rtx_MEM (SImode, plus_constant (sp_reg, i * 4));
6499 reg = gen_rtx_REG (SImode, i);
6500 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (SImode, mem, reg));
6501 }
6502
6503 set = gen_rtx_SET (SImode, sp_reg, plus_constant (sp_reg, - 32));
6504 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
6505 emit_insn (gen_blockage ());
6506 }
6507 else
6508 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6509 if (TEST_HARD_REG_BIT (*mask, i))
6510 push (i);
6511 }
6512
6513 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
6514 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
6515 push (PR_REG);
6516 }
6517
6518 /* Calculate how much extra space is needed to save all callee-saved
6519 target registers.
6520 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6521
6522 static int
6523 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
6524 {
6525 int reg;
6526 int stack_space = 0;
6527 int interrupt_handler = sh_cfun_interrupt_handler_p ();
6528
6529 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6530 if ((! call_really_used_regs[reg] || interrupt_handler)
6531 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6532 /* Leave space to save this target register on the stack,
6533 in case target register allocation wants to use it. */
6534 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6535 return stack_space;
6536 }
6537
6538 /* Decide whether we should reserve space for callee-save target registers,
6539 in case target register allocation wants to use them. REGS_SAVED is
6540 the space, in bytes, that is already required for register saves.
6541 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6542
6543 static int
6544 shmedia_reserve_space_for_target_registers_p (int regs_saved,
6545 HARD_REG_SET *live_regs_mask)
6546 {
6547 if (optimize_size)
6548 return 0;
6549 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
6550 }
6551
6552 /* Decide how much space to reserve for callee-save target registers
6553 in case target register allocation wants to use them.
6554 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6555
6556 static int
6557 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
6558 {
6559 if (shmedia_space_reserved_for_target_registers)
6560 return shmedia_target_regs_stack_space (live_regs_mask);
6561 else
6562 return 0;
6563 }
6564
6565 /* Work out the registers which need to be saved, both as a mask and a
6566 count of saved words. Return the count.
6567
6568 If doing a pragma interrupt function, then push all regs used by the
6569 function, and if we call another function (we can tell by looking at PR),
6570 make sure that all the regs it clobbers are safe too. */
6571
6572 static int
6573 calc_live_regs (HARD_REG_SET *live_regs_mask)
6574 {
6575 unsigned int reg;
6576 int count;
6577 tree attrs;
6578 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
6579 bool nosave_low_regs;
6580 int pr_live, has_call;
6581
6582 attrs = DECL_ATTRIBUTES (current_function_decl);
6583 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
6584 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
6585 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
6586 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
6587
6588 CLEAR_HARD_REG_SET (*live_regs_mask);
6589 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
6590 && df_regs_ever_live_p (FPSCR_REG))
6591 target_flags &= ~MASK_FPU_SINGLE;
6592 /* If we can save a lot of saves by switching to double mode, do that. */
6593 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
6594 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
6595 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
6596 && (! call_really_used_regs[reg]
6597 || interrupt_handler)
6598 && ++count > 2)
6599 {
6600 target_flags &= ~MASK_FPU_SINGLE;
6601 break;
6602 }
6603 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
6604 knows how to use it. That means the pseudo originally allocated for
6605 the initial value can become the PR_MEDIA_REG hard register, as seen for
6606 execute/20010122-1.c:test9. */
6607 if (TARGET_SHMEDIA)
6608 /* ??? this function is called from initial_elimination_offset, hence we
6609 can't use the result of sh_media_register_for_return here. */
6610 pr_live = sh_pr_n_sets ();
6611 else
6612 {
6613 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
6614 pr_live = (pr_initial
6615 ? (!REG_P (pr_initial)
6616 || REGNO (pr_initial) != (PR_REG))
6617 : df_regs_ever_live_p (PR_REG));
6618 /* For Shcompact, if not optimizing, we end up with a memory reference
6619 using the return address pointer for __builtin_return_address even
6620 though there is no actual need to put the PR register on the stack. */
6621 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
6622 }
6623 /* Force PR to be live if the prologue has to call the SHmedia
6624 argument decoder or register saver. */
6625 if (TARGET_SHCOMPACT
6626 && ((crtl->args.info.call_cookie
6627 & ~ CALL_COOKIE_RET_TRAMP (1))
6628 || crtl->saves_all_registers))
6629 pr_live = 1;
6630 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
6631 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
6632 {
6633 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
6634 ? pr_live
6635 : interrupt_handler
6636 ? (/* Need to save all the regs ever live. */
6637 (df_regs_ever_live_p (reg)
6638 || (call_really_used_regs[reg]
6639 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
6640 || reg == PIC_OFFSET_TABLE_REGNUM)
6641 && has_call)
6642 || (TARGET_SHMEDIA && has_call
6643 && REGISTER_NATURAL_MODE (reg) == SImode
6644 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
6645 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
6646 && reg != RETURN_ADDRESS_POINTER_REGNUM
6647 && reg != T_REG && reg != GBR_REG
6648 /* Push fpscr only on targets which have FPU */
6649 && (reg != FPSCR_REG || TARGET_FPU_ANY))
6650 : (/* Only push those regs which are used and need to be saved. */
6651 (TARGET_SHCOMPACT
6652 && flag_pic
6653 && crtl->args.info.call_cookie
6654 && reg == PIC_OFFSET_TABLE_REGNUM)
6655 || (df_regs_ever_live_p (reg)
6656 && ((!call_really_used_regs[reg]
6657 && !(reg != PIC_OFFSET_TABLE_REGNUM
6658 && fixed_regs[reg] && call_used_regs[reg]))
6659 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
6660 || (crtl->calls_eh_return
6661 && (reg == EH_RETURN_DATA_REGNO (0)
6662 || reg == EH_RETURN_DATA_REGNO (1)
6663 || reg == EH_RETURN_DATA_REGNO (2)
6664 || reg == EH_RETURN_DATA_REGNO (3)))
6665 || ((reg == MACL_REG || reg == MACH_REG)
6666 && df_regs_ever_live_p (reg)
6667 && sh_cfun_attr_renesas_p ())
6668 ))
6669 {
6670 SET_HARD_REG_BIT (*live_regs_mask, reg);
6671 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6672
6673 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
6674 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
6675 {
6676 if (FP_REGISTER_P (reg))
6677 {
6678 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
6679 {
6680 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
6681 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
6682 }
6683 }
6684 else if (XD_REGISTER_P (reg))
6685 {
6686 /* Must switch to double mode to access these registers. */
6687 target_flags &= ~MASK_FPU_SINGLE;
6688 }
6689 }
6690 }
6691 if (nosave_low_regs && reg == R8_REG)
6692 break;
6693 }
6694 /* If we have a target register optimization pass after prologue / epilogue
6695 threading, we need to assume all target registers will be live even if
6696 they aren't now. */
6697 if (flag_branch_target_load_optimize2
6698 && TARGET_SAVE_ALL_TARGET_REGS
6699 && shmedia_space_reserved_for_target_registers)
6700 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6701 if ((! call_really_used_regs[reg] || interrupt_handler)
6702 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6703 {
6704 SET_HARD_REG_BIT (*live_regs_mask, reg);
6705 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6706 }
6707 /* If this is an interrupt handler, we don't have any call-clobbered
6708 registers we can conveniently use for target register save/restore.
6709 Make sure we save at least one general purpose register when we need
6710 to save target registers. */
6711 if (interrupt_handler
6712 && hard_reg_set_intersect_p (*live_regs_mask,
6713 reg_class_contents[TARGET_REGS])
6714 && ! hard_reg_set_intersect_p (*live_regs_mask,
6715 reg_class_contents[GENERAL_REGS]))
6716 {
6717 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
6718 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
6719 }
6720
6721 return count;
6722 }
6723
6724 /* Code to generate prologue and epilogue sequences */
6725
6726 /* PUSHED is the number of bytes that are being pushed on the
6727 stack for register saves. Return the frame size, padded
6728 appropriately so that the stack stays properly aligned. */
6729 static HOST_WIDE_INT
6730 rounded_frame_size (int pushed)
6731 {
6732 HOST_WIDE_INT size = get_frame_size ();
6733 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6734
6735 if (ACCUMULATE_OUTGOING_ARGS)
6736 size += crtl->outgoing_args_size;
6737
6738 return ((size + pushed + align - 1) & -align) - pushed;
6739 }
6740
6741 /* Choose a call-clobbered target-branch register that remains
6742 unchanged along the whole function. We set it up as the return
6743 value in the prologue. */
6744 int
6745 sh_media_register_for_return (void)
6746 {
6747 int regno;
6748 int tr0_used;
6749
6750 if (! current_function_is_leaf)
6751 return -1;
6752 if (lookup_attribute ("interrupt_handler",
6753 DECL_ATTRIBUTES (current_function_decl)))
6754 return -1;
6755 if (sh_cfun_interrupt_handler_p ())
6756 return -1;
6757
6758 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
6759
6760 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
6761 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
6762 return regno;
6763
6764 return -1;
6765 }
6766
6767 /* The maximum registers we need to save are:
6768 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
6769 - 32 floating point registers (for each pair, we save none,
6770 one single precision value, or a double precision value).
6771 - 8 target registers
6772 - add 1 entry for a delimiter. */
6773 #define MAX_SAVED_REGS (62+32+8)
6774
6775 typedef struct save_entry_s
6776 {
6777 unsigned char reg;
6778 unsigned char mode;
6779 short offset;
6780 } save_entry;
6781
6782 #define MAX_TEMPS 4
6783
6784 /* There will be a delimiter entry with VOIDmode both at the start and the
6785 end of a filled in schedule. The end delimiter has the offset of the
6786 save with the smallest (i.e. most negative) offset. */
6787 typedef struct save_schedule_s
6788 {
6789 save_entry entries[MAX_SAVED_REGS + 2];
6790 int temps[MAX_TEMPS+1];
6791 } save_schedule;
6792
6793 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
6794 use reverse order. Returns the last entry written to (not counting
6795 the delimiter). OFFSET_BASE is a number to be added to all offset
6796 entries. */
6797
6798 static save_entry *
6799 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
6800 int offset_base)
6801 {
6802 int align, i;
6803 save_entry *entry = schedule->entries;
6804 int tmpx = 0;
6805 int offset;
6806
6807 if (! current_function_interrupt)
6808 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
6809 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
6810 && ! FUNCTION_ARG_REGNO_P (i)
6811 && i != FIRST_RET_REG
6812 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
6813 && ! (crtl->calls_eh_return
6814 && (i == EH_RETURN_STACKADJ_REGNO
6815 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
6816 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
6817 schedule->temps[tmpx++] = i;
6818 entry->reg = -1;
6819 entry->mode = VOIDmode;
6820 entry->offset = offset_base;
6821 entry++;
6822 /* We loop twice: first, we save 8-byte aligned registers in the
6823 higher addresses, that are known to be aligned. Then, we
6824 proceed to saving 32-bit registers that don't need 8-byte
6825 alignment.
6826 If this is an interrupt function, all registers that need saving
6827 need to be saved in full. moreover, we need to postpone saving
6828 target registers till we have saved some general purpose registers
6829 we can then use as scratch registers. */
6830 offset = offset_base;
6831 for (align = 1; align >= 0; align--)
6832 {
6833 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6834 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6835 {
6836 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6837 int reg = i;
6838
6839 if (current_function_interrupt)
6840 {
6841 if (TARGET_REGISTER_P (i))
6842 continue;
6843 if (GENERAL_REGISTER_P (i))
6844 mode = DImode;
6845 }
6846 if (mode == SFmode && (i % 2) == 1
6847 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6848 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
6849 {
6850 mode = DFmode;
6851 i--;
6852 reg--;
6853 }
6854
6855 /* If we're doing the aligned pass and this is not aligned,
6856 or we're doing the unaligned pass and this is aligned,
6857 skip it. */
6858 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
6859 != align)
6860 continue;
6861
6862 if (current_function_interrupt
6863 && GENERAL_REGISTER_P (i)
6864 && tmpx < MAX_TEMPS)
6865 schedule->temps[tmpx++] = i;
6866
6867 offset -= GET_MODE_SIZE (mode);
6868 entry->reg = i;
6869 entry->mode = mode;
6870 entry->offset = offset;
6871 entry++;
6872 }
6873 if (align && current_function_interrupt)
6874 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
6875 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6876 {
6877 offset -= GET_MODE_SIZE (DImode);
6878 entry->reg = i;
6879 entry->mode = DImode;
6880 entry->offset = offset;
6881 entry++;
6882 }
6883 }
6884 entry->reg = -1;
6885 entry->mode = VOIDmode;
6886 entry->offset = offset;
6887 schedule->temps[tmpx] = -1;
6888 return entry - 1;
6889 }
6890
6891 void
6892 sh_expand_prologue (void)
6893 {
6894 HARD_REG_SET live_regs_mask;
6895 int d, i;
6896 int d_rounding = 0;
6897 int save_flags = target_flags;
6898 int pretend_args;
6899 int stack_usage;
6900 tree sp_switch_attr
6901 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
6902
6903 current_function_interrupt = sh_cfun_interrupt_handler_p ();
6904
6905 /* We have pretend args if we had an object sent partially in registers
6906 and partially on the stack, e.g. a large structure. */
6907 pretend_args = crtl->args.pretend_args_size;
6908 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
6909 && (NPARM_REGS(SImode)
6910 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
6911 pretend_args = 0;
6912
6913 output_stack_adjust (-pretend_args
6914 - crtl->args.info.stack_regs * 8,
6915 stack_pointer_rtx, 0, NULL, true);
6916 stack_usage = pretend_args + crtl->args.info.stack_regs * 8;
6917
6918 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
6919 /* We're going to use the PIC register to load the address of the
6920 incoming-argument decoder and/or of the return trampoline from
6921 the GOT, so make sure the PIC register is preserved and
6922 initialized. */
6923 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
6924
6925 if (TARGET_SHCOMPACT
6926 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6927 {
6928 int reg;
6929
6930 /* First, make all registers with incoming arguments that will
6931 be pushed onto the stack live, so that register renaming
6932 doesn't overwrite them. */
6933 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
6934 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
6935 >= NPARM_REGS (SImode) - reg)
6936 for (; reg < NPARM_REGS (SImode); reg++)
6937 emit_insn (gen_shcompact_preserve_incoming_args
6938 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6939 else if (CALL_COOKIE_INT_REG_GET
6940 (crtl->args.info.call_cookie, reg) == 1)
6941 emit_insn (gen_shcompact_preserve_incoming_args
6942 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6943
6944 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
6945 stack_pointer_rtx);
6946 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
6947 GEN_INT (crtl->args.info.call_cookie));
6948 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
6949 gen_rtx_REG (SImode, R0_REG));
6950 }
6951 else if (TARGET_SHMEDIA)
6952 {
6953 int tr = sh_media_register_for_return ();
6954
6955 if (tr >= 0)
6956 emit_move_insn (gen_rtx_REG (DImode, tr),
6957 gen_rtx_REG (DImode, PR_MEDIA_REG));
6958 }
6959
6960 /* Emit the code for SETUP_VARARGS. */
6961 if (cfun->stdarg)
6962 {
6963 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6964 {
6965 /* Push arg regs as if they'd been provided by caller in stack. */
6966 for (i = 0; i < NPARM_REGS(SImode); i++)
6967 {
6968 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
6969
6970 if (i >= (NPARM_REGS(SImode)
6971 - crtl->args.info.arg_count[(int) SH_ARG_INT]
6972 ))
6973 break;
6974 push (rn);
6975 stack_usage += GET_MODE_SIZE (SImode);
6976 }
6977 }
6978 }
6979
6980 /* If we're supposed to switch stacks at function entry, do so now. */
6981 if (sp_switch_attr)
6982 {
6983 rtx lab, newsrc;
6984 /* The argument specifies a variable holding the address of the
6985 stack the interrupt function should switch to/from at entry/exit. */
6986 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
6987 const char *s
6988 = ggc_strdup (TREE_STRING_POINTER (arg));
6989 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
6990
6991 lab = add_constant (sp_switch, SImode, 0);
6992 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6993 newsrc = gen_const_mem (SImode, newsrc);
6994
6995 emit_insn (gen_sp_switch_1 (newsrc));
6996 }
6997
6998 d = calc_live_regs (&live_regs_mask);
6999 /* ??? Maybe we could save some switching if we can move a mode switch
7000 that already happens to be at the function start into the prologue. */
7001 if (target_flags != save_flags && ! current_function_interrupt)
7002 emit_insn (gen_toggle_sz ());
7003
7004 if (TARGET_SH5)
7005 {
7006 int offset_base, offset;
7007 rtx r0 = NULL_RTX;
7008 int offset_in_r0 = -1;
7009 int sp_in_r0 = 0;
7010 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7011 int total_size, save_size;
7012 save_schedule schedule;
7013 save_entry *entry;
7014 int *tmp_pnt;
7015
7016 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
7017 && ! current_function_interrupt)
7018 r0 = gen_rtx_REG (Pmode, R0_REG);
7019
7020 /* D is the actual number of bytes that we need for saving registers,
7021 however, in initial_elimination_offset we have committed to using
7022 an additional TREGS_SPACE amount of bytes - in order to keep both
7023 addresses to arguments supplied by the caller and local variables
7024 valid, we must keep this gap. Place it between the incoming
7025 arguments and the actually saved registers in a bid to optimize
7026 locality of reference. */
7027 total_size = d + tregs_space;
7028 total_size += rounded_frame_size (total_size);
7029 save_size = total_size - rounded_frame_size (d);
7030 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
7031 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7032 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
7033
7034 /* If adjusting the stack in a single step costs nothing extra, do so.
7035 I.e. either if a single addi is enough, or we need a movi anyway,
7036 and we don't exceed the maximum offset range (the test for the
7037 latter is conservative for simplicity). */
7038 if (TARGET_SHMEDIA
7039 && (CONST_OK_FOR_I10 (-total_size)
7040 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
7041 && total_size <= 2044)))
7042 d_rounding = total_size - save_size;
7043
7044 offset_base = d + d_rounding;
7045
7046 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
7047 0, NULL, true);
7048 stack_usage += save_size + d_rounding;
7049
7050 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
7051 tmp_pnt = schedule.temps;
7052 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7053 {
7054 enum machine_mode mode = (enum machine_mode) entry->mode;
7055 unsigned int reg = entry->reg;
7056 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
7057 rtx orig_reg_rtx;
7058
7059 offset = entry->offset;
7060
7061 reg_rtx = gen_rtx_REG (mode, reg);
7062
7063 mem_rtx = gen_frame_mem (mode,
7064 gen_rtx_PLUS (Pmode,
7065 stack_pointer_rtx,
7066 GEN_INT (offset)));
7067
7068 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7069 {
7070 gcc_assert (r0);
7071 mem_rtx = NULL_RTX;
7072 }
7073
7074 if (HAVE_PRE_DECREMENT
7075 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
7076 || mem_rtx == NULL_RTX
7077 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7078 {
7079 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
7080
7081 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
7082 pre_dec = NULL_RTX;
7083 else
7084 {
7085 mem_rtx = NULL_RTX;
7086 offset += GET_MODE_SIZE (mode);
7087 }
7088 }
7089
7090 if (mem_rtx != NULL_RTX)
7091 goto addr_ok;
7092
7093 if (offset_in_r0 == -1)
7094 {
7095 emit_move_insn (r0, GEN_INT (offset));
7096 offset_in_r0 = offset;
7097 }
7098 else if (offset != offset_in_r0)
7099 {
7100 emit_move_insn (r0,
7101 gen_rtx_PLUS
7102 (Pmode, r0,
7103 GEN_INT (offset - offset_in_r0)));
7104 offset_in_r0 += offset - offset_in_r0;
7105 }
7106
7107 if (pre_dec != NULL_RTX)
7108 {
7109 if (! sp_in_r0)
7110 {
7111 emit_move_insn (r0,
7112 gen_rtx_PLUS
7113 (Pmode, r0, stack_pointer_rtx));
7114 sp_in_r0 = 1;
7115 }
7116
7117 offset -= GET_MODE_SIZE (mode);
7118 offset_in_r0 -= GET_MODE_SIZE (mode);
7119
7120 mem_rtx = pre_dec;
7121 }
7122 else if (sp_in_r0)
7123 mem_rtx = gen_frame_mem (mode, r0);
7124 else
7125 mem_rtx = gen_frame_mem (mode,
7126 gen_rtx_PLUS (Pmode,
7127 stack_pointer_rtx,
7128 r0));
7129
7130 /* We must not use an r0-based address for target-branch
7131 registers or for special registers without pre-dec
7132 memory addresses, since we store their values in r0
7133 first. */
7134 gcc_assert (!TARGET_REGISTER_P (reg)
7135 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7136 || mem_rtx == pre_dec));
7137
7138 addr_ok:
7139 orig_reg_rtx = reg_rtx;
7140 if (TARGET_REGISTER_P (reg)
7141 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7142 && mem_rtx != pre_dec))
7143 {
7144 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
7145
7146 emit_move_insn (tmp_reg, reg_rtx);
7147
7148 if (REGNO (tmp_reg) == R0_REG)
7149 {
7150 offset_in_r0 = -1;
7151 sp_in_r0 = 0;
7152 gcc_assert (!refers_to_regno_p
7153 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
7154 }
7155
7156 if (*++tmp_pnt <= 0)
7157 tmp_pnt = schedule.temps;
7158
7159 reg_rtx = tmp_reg;
7160 }
7161 {
7162 rtx insn;
7163
7164 /* Mark as interesting for dwarf cfi generator */
7165 insn = emit_move_insn (mem_rtx, reg_rtx);
7166 RTX_FRAME_RELATED_P (insn) = 1;
7167 /* If we use an intermediate register for the save, we can't
7168 describe this exactly in cfi as a copy of the to-be-saved
7169 register into the temporary register and then the temporary
7170 register on the stack, because the temporary register can
7171 have a different natural size than the to-be-saved register.
7172 Thus, we gloss over the intermediate copy and pretend we do
7173 a direct save from the to-be-saved register. */
7174 if (REGNO (reg_rtx) != reg)
7175 {
7176 rtx set;
7177
7178 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
7179 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7180 }
7181
7182 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
7183 {
7184 rtx reg_rtx = gen_rtx_REG (mode, reg);
7185 rtx set;
7186 rtx mem_rtx = gen_frame_mem (mode,
7187 gen_rtx_PLUS (Pmode,
7188 stack_pointer_rtx,
7189 GEN_INT (offset)));
7190
7191 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
7192 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7193 }
7194 }
7195 }
7196
7197 gcc_assert (entry->offset == d_rounding);
7198 }
7199 else
7200 {
7201 push_regs (&live_regs_mask, current_function_interrupt);
7202 stack_usage += d;
7203 }
7204
7205 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7206 emit_insn (gen_GOTaddr2picreg ());
7207
7208 if (SHMEDIA_REGS_STACK_ADJUST ())
7209 {
7210 /* This must NOT go through the PLT, otherwise mach and macl
7211 may be clobbered. */
7212 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7213 (TARGET_FPU_ANY
7214 ? "__GCC_push_shmedia_regs"
7215 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7216 emit_insn (gen_shmedia_save_restore_regs_compact
7217 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7218 }
7219
7220 if (target_flags != save_flags && ! current_function_interrupt)
7221 emit_insn (gen_toggle_sz ());
7222
7223 target_flags = save_flags;
7224
7225 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7226 stack_pointer_rtx, 0, NULL, true);
7227 stack_usage += rounded_frame_size (d) - d_rounding;
7228
7229 if (frame_pointer_needed)
7230 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7231
7232 if (TARGET_SHCOMPACT
7233 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7234 {
7235 /* This must NOT go through the PLT, otherwise mach and macl
7236 may be clobbered. */
7237 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7238 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7239 emit_insn (gen_shcompact_incoming_args ());
7240 }
7241
7242 /* If we are profiling, make sure no instructions are scheduled before
7243 the call to mcount. Similarly if some call instructions are swapped
7244 before frame related insns, it'll confuse the unwinder because
7245 currently SH has no unwind info for function epilogues. */
7246 if (crtl->profile || flag_exceptions || flag_unwind_tables)
7247 emit_insn (gen_blockage ());
7248
7249 if (flag_stack_usage_info)
7250 current_function_static_stack_size = stack_usage;
7251 }
7252
7253 void
7254 sh_expand_epilogue (bool sibcall_p)
7255 {
7256 HARD_REG_SET live_regs_mask;
7257 int d, i;
7258 int d_rounding = 0;
7259
7260 int save_flags = target_flags;
7261 int frame_size, save_size;
7262 int fpscr_deferred = 0;
7263 int e = sibcall_p ? -1 : 1;
7264
7265 d = calc_live_regs (&live_regs_mask);
7266
7267 save_size = d;
7268 frame_size = rounded_frame_size (d);
7269
7270 if (TARGET_SH5)
7271 {
7272 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7273 int total_size;
7274 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
7275 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7276 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
7277
7278 total_size = d + tregs_space;
7279 total_size += rounded_frame_size (total_size);
7280 save_size = total_size - frame_size;
7281
7282 /* If adjusting the stack in a single step costs nothing extra, do so.
7283 I.e. either if a single addi is enough, or we need a movi anyway,
7284 and we don't exceed the maximum offset range (the test for the
7285 latter is conservative for simplicity). */
7286 if (TARGET_SHMEDIA
7287 && ! frame_pointer_needed
7288 && (CONST_OK_FOR_I10 (total_size)
7289 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
7290 && total_size <= 2044)))
7291 d_rounding = frame_size;
7292
7293 frame_size -= d_rounding;
7294 }
7295
7296 if (frame_pointer_needed)
7297 {
7298 /* We must avoid scheduling the epilogue with previous basic blocks.
7299 See PR/18032 and PR/40313. */
7300 emit_insn (gen_blockage ());
7301 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7302 &live_regs_mask, false);
7303
7304 /* We must avoid moving the stack pointer adjustment past code
7305 which reads from the local frame, else an interrupt could
7306 occur after the SP adjustment and clobber data in the local
7307 frame. */
7308 emit_insn (gen_blockage ());
7309 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7310 }
7311 else if (frame_size)
7312 {
7313 /* We must avoid moving the stack pointer adjustment past code
7314 which reads from the local frame, else an interrupt could
7315 occur after the SP adjustment and clobber data in the local
7316 frame. */
7317 emit_insn (gen_blockage ());
7318 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7319 &live_regs_mask, false);
7320 }
7321
7322 if (SHMEDIA_REGS_STACK_ADJUST ())
7323 {
7324 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7325 (TARGET_FPU_ANY
7326 ? "__GCC_pop_shmedia_regs"
7327 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
7328 /* This must NOT go through the PLT, otherwise mach and macl
7329 may be clobbered. */
7330 emit_insn (gen_shmedia_save_restore_regs_compact
7331 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
7332 }
7333
7334 /* Pop all the registers. */
7335
7336 if (target_flags != save_flags && ! current_function_interrupt)
7337 emit_insn (gen_toggle_sz ());
7338 if (TARGET_SH5)
7339 {
7340 int offset_base, offset;
7341 int offset_in_r0 = -1;
7342 int sp_in_r0 = 0;
7343 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
7344 save_schedule schedule;
7345 save_entry *entry;
7346 int *tmp_pnt;
7347
7348 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
7349 offset_base = -entry[1].offset + d_rounding;
7350 tmp_pnt = schedule.temps;
7351 for (; entry->mode != VOIDmode; entry--)
7352 {
7353 enum machine_mode mode = (enum machine_mode) entry->mode;
7354 int reg = entry->reg;
7355 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX;
7356
7357 offset = offset_base + entry->offset;
7358 reg_rtx = gen_rtx_REG (mode, reg);
7359
7360 mem_rtx = gen_frame_mem (mode,
7361 gen_rtx_PLUS (Pmode,
7362 stack_pointer_rtx,
7363 GEN_INT (offset)));
7364
7365 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7366 mem_rtx = NULL_RTX;
7367
7368 if (HAVE_POST_INCREMENT
7369 && (offset == offset_in_r0
7370 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
7371 && mem_rtx == NULL_RTX)
7372 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7373 {
7374 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
7375
7376 if (!memory_address_p (mode, XEXP (post_inc, 0)))
7377 post_inc = NULL_RTX;
7378 else
7379 mem_rtx = NULL_RTX;
7380 }
7381
7382 if (mem_rtx != NULL_RTX)
7383 goto addr_ok;
7384
7385 if (offset_in_r0 == -1)
7386 {
7387 emit_move_insn (r0, GEN_INT (offset));
7388 offset_in_r0 = offset;
7389 }
7390 else if (offset != offset_in_r0)
7391 {
7392 emit_move_insn (r0,
7393 gen_rtx_PLUS
7394 (Pmode, r0,
7395 GEN_INT (offset - offset_in_r0)));
7396 offset_in_r0 += offset - offset_in_r0;
7397 }
7398
7399 if (post_inc != NULL_RTX)
7400 {
7401 if (! sp_in_r0)
7402 {
7403 emit_move_insn (r0,
7404 gen_rtx_PLUS
7405 (Pmode, r0, stack_pointer_rtx));
7406 sp_in_r0 = 1;
7407 }
7408
7409 mem_rtx = post_inc;
7410
7411 offset_in_r0 += GET_MODE_SIZE (mode);
7412 }
7413 else if (sp_in_r0)
7414 mem_rtx = gen_frame_mem (mode, r0);
7415 else
7416 mem_rtx = gen_frame_mem (mode,
7417 gen_rtx_PLUS (Pmode,
7418 stack_pointer_rtx,
7419 r0));
7420
7421 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7422 || mem_rtx == post_inc);
7423
7424 addr_ok:
7425 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7426 && mem_rtx != post_inc)
7427 {
7428 emit_move_insn (r0, mem_rtx);
7429 mem_rtx = r0;
7430 }
7431 else if (TARGET_REGISTER_P (reg))
7432 {
7433 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
7434
7435 /* Give the scheduler a bit of freedom by using up to
7436 MAX_TEMPS registers in a round-robin fashion. */
7437 emit_move_insn (tmp_reg, mem_rtx);
7438 mem_rtx = tmp_reg;
7439 if (*++tmp_pnt < 0)
7440 tmp_pnt = schedule.temps;
7441 }
7442
7443 emit_move_insn (reg_rtx, mem_rtx);
7444 }
7445
7446 gcc_assert (entry->offset + offset_base == d + d_rounding);
7447 }
7448 else /* ! TARGET_SH5 */
7449 {
7450 int last_reg;
7451
7452 save_size = 0;
7453 /* For an ISR with RESBANK attribute assigned, don't pop PR
7454 register. */
7455 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
7456 && !sh_cfun_resbank_handler_p ())
7457 {
7458 if (!frame_pointer_needed)
7459 emit_insn (gen_blockage ());
7460 pop (PR_REG);
7461 }
7462
7463 /* Banked registers are popped first to avoid being scheduled in the
7464 delay slot. RTE switches banks before the ds instruction. */
7465 if (current_function_interrupt)
7466 {
7467 bool use_movml = false;
7468
7469 if (TARGET_SH2A)
7470 {
7471 unsigned int count = 0;
7472
7473 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7474 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7475 count++;
7476 else
7477 break;
7478
7479 /* Use movml when all banked register are poped. */
7480 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7481 use_movml = true;
7482 }
7483
7484 if (use_movml)
7485 {
7486 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7487
7488 /* We must avoid scheduling multiple load insn with another
7489 insns. */
7490 emit_insn (gen_blockage ());
7491 emit_insn (gen_movml_pop_banked (sp_reg));
7492 emit_insn (gen_blockage ());
7493 }
7494 else
7495 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
7496 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7497 pop (i);
7498
7499 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
7500 }
7501 else
7502 last_reg = FIRST_PSEUDO_REGISTER;
7503
7504 for (i = 0; i < last_reg; i++)
7505 {
7506 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
7507
7508 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
7509 && hard_reg_set_intersect_p (live_regs_mask,
7510 reg_class_contents[DF_REGS]))
7511 fpscr_deferred = 1;
7512 /* For an ISR with RESBANK attribute assigned, don't pop
7513 following registers, R0-R14, MACH, MACL and GBR. */
7514 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
7515 && ! (sh_cfun_resbank_handler_p ()
7516 && ((j >= FIRST_GENERAL_REG
7517 && j < LAST_GENERAL_REG)
7518 || j == MACH_REG
7519 || j == MACL_REG
7520 || j == GBR_REG)))
7521 pop (j);
7522
7523 if (j == FIRST_FP_REG && fpscr_deferred)
7524 pop (FPSCR_REG);
7525 }
7526 }
7527 if (target_flags != save_flags && ! current_function_interrupt)
7528 emit_insn (gen_toggle_sz ());
7529 target_flags = save_flags;
7530
7531 output_stack_adjust (crtl->args.pretend_args_size
7532 + save_size + d_rounding
7533 + crtl->args.info.stack_regs * 8,
7534 stack_pointer_rtx, e, NULL, false);
7535
7536 if (crtl->calls_eh_return)
7537 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
7538 EH_RETURN_STACKADJ_RTX));
7539
7540 /* Switch back to the normal stack if necessary. */
7541 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
7542 emit_insn (gen_sp_switch_2 ());
7543
7544 /* Tell flow the insn that pops PR isn't dead. */
7545 /* PR_REG will never be live in SHmedia mode, and we don't need to
7546 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
7547 by the return pattern. */
7548 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7549 emit_use (gen_rtx_REG (SImode, PR_REG));
7550 }
7551
7552 static int sh_need_epilogue_known = 0;
7553
7554 int
7555 sh_need_epilogue (void)
7556 {
7557 if (! sh_need_epilogue_known)
7558 {
7559 rtx epilogue;
7560
7561 start_sequence ();
7562 sh_expand_epilogue (0);
7563 epilogue = get_insns ();
7564 end_sequence ();
7565 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
7566 }
7567 return sh_need_epilogue_known > 0;
7568 }
7569
7570 /* Emit code to change the current function's return address to RA.
7571 TEMP is available as a scratch register, if needed. */
7572
7573 void
7574 sh_set_return_address (rtx ra, rtx tmp)
7575 {
7576 HARD_REG_SET live_regs_mask;
7577 int d;
7578 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7579 int pr_offset;
7580
7581 d = calc_live_regs (&live_regs_mask);
7582
7583 /* If pr_reg isn't life, we can set it (or the register given in
7584 sh_media_register_for_return) directly. */
7585 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7586 {
7587 rtx rr;
7588
7589 if (TARGET_SHMEDIA)
7590 {
7591 int rr_regno = sh_media_register_for_return ();
7592
7593 if (rr_regno < 0)
7594 rr_regno = pr_reg;
7595
7596 rr = gen_rtx_REG (DImode, rr_regno);
7597 }
7598 else
7599 rr = gen_rtx_REG (SImode, pr_reg);
7600
7601 emit_insn (GEN_MOV (rr, ra));
7602 /* Tell flow the register for return isn't dead. */
7603 emit_use (rr);
7604 return;
7605 }
7606
7607 if (TARGET_SH5)
7608 {
7609 int offset;
7610 save_schedule schedule;
7611 save_entry *entry;
7612
7613 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
7614 offset = entry[1].offset;
7615 for (; entry->mode != VOIDmode; entry--)
7616 if (entry->reg == pr_reg)
7617 goto found;
7618
7619 /* We can't find pr register. */
7620 gcc_unreachable ();
7621
7622 found:
7623 offset = entry->offset - offset;
7624 pr_offset = (rounded_frame_size (d) + offset
7625 + SHMEDIA_REGS_STACK_ADJUST ());
7626 }
7627 else
7628 pr_offset = rounded_frame_size (d);
7629
7630 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
7631
7632 if (frame_pointer_needed)
7633 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
7634 else
7635 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
7636
7637 tmp = gen_frame_mem (Pmode, tmp);
7638 emit_insn (GEN_MOV (tmp, ra));
7639 /* Tell this store isn't dead. */
7640 emit_use (tmp);
7641 }
7642
7643 /* Clear variables at function end. */
7644
7645 static void
7646 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
7647 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
7648 {
7649 sh_need_epilogue_known = 0;
7650 }
7651
7652 static rtx
7653 sh_builtin_saveregs (void)
7654 {
7655 /* First unnamed integer register. */
7656 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
7657 /* Number of integer registers we need to save. */
7658 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
7659 /* First unnamed SFmode float reg */
7660 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
7661 /* Number of SFmode float regs to save. */
7662 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
7663 rtx regbuf, fpregs;
7664 int bufsize, regno;
7665 alias_set_type alias_set;
7666
7667 if (TARGET_SH5)
7668 {
7669 if (n_intregs)
7670 {
7671 int pushregs = n_intregs;
7672
7673 while (pushregs < NPARM_REGS (SImode) - 1
7674 && (CALL_COOKIE_INT_REG_GET
7675 (crtl->args.info.call_cookie,
7676 NPARM_REGS (SImode) - pushregs)
7677 == 1))
7678 {
7679 crtl->args.info.call_cookie
7680 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7681 - pushregs, 1);
7682 pushregs++;
7683 }
7684
7685 if (pushregs == NPARM_REGS (SImode))
7686 crtl->args.info.call_cookie
7687 |= (CALL_COOKIE_INT_REG (0, 1)
7688 | CALL_COOKIE_STACKSEQ (pushregs - 1));
7689 else
7690 crtl->args.info.call_cookie
7691 |= CALL_COOKIE_STACKSEQ (pushregs);
7692
7693 crtl->args.pretend_args_size += 8 * n_intregs;
7694 }
7695 if (TARGET_SHCOMPACT)
7696 return const0_rtx;
7697 }
7698
7699 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
7700 {
7701 error ("__builtin_saveregs not supported by this subtarget");
7702 return const0_rtx;
7703 }
7704
7705 if (TARGET_SHMEDIA)
7706 n_floatregs = 0;
7707
7708 /* Allocate block of memory for the regs. */
7709 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
7710 Or can assign_stack_local accept a 0 SIZE argument? */
7711 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
7712
7713 if (TARGET_SHMEDIA)
7714 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
7715 else if (n_floatregs & 1)
7716 {
7717 rtx addr;
7718
7719 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7720 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
7721 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
7722 regbuf = change_address (regbuf, BLKmode, addr);
7723 }
7724 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
7725 {
7726 rtx addr, mask;
7727
7728 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7729 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
7730 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
7731 emit_insn (gen_andsi3 (addr, addr, mask));
7732 regbuf = change_address (regbuf, BLKmode, addr);
7733 }
7734 else
7735 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7736 alias_set = get_varargs_alias_set ();
7737 set_mem_alias_set (regbuf, alias_set);
7738
7739 /* Save int args.
7740 This is optimized to only save the regs that are necessary. Explicitly
7741 named args need not be saved. */
7742 if (n_intregs > 0)
7743 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7744 adjust_address (regbuf, BLKmode,
7745 n_floatregs * UNITS_PER_WORD),
7746 n_intregs);
7747
7748 if (TARGET_SHMEDIA)
7749 /* Return the address of the regbuf. */
7750 return XEXP (regbuf, 0);
7751
7752 /* Save float args.
7753 This is optimized to only save the regs that are necessary. Explicitly
7754 named args need not be saved.
7755 We explicitly build a pointer to the buffer because it halves the insn
7756 count when not optimizing (otherwise the pointer is built for each reg
7757 saved).
7758 We emit the moves in reverse order so that we can use predecrement. */
7759
7760 fpregs = copy_to_mode_reg (Pmode,
7761 plus_constant (XEXP (regbuf, 0),
7762 n_floatregs * UNITS_PER_WORD));
7763 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7764 {
7765 rtx mem;
7766 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7767 {
7768 emit_insn (gen_addsi3 (fpregs, fpregs,
7769 GEN_INT (-2 * UNITS_PER_WORD)));
7770 mem = change_address (regbuf, DFmode, fpregs);
7771 emit_move_insn (mem,
7772 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7773 }
7774 regno = first_floatreg;
7775 if (regno & 1)
7776 {
7777 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7778 mem = change_address (regbuf, SFmode, fpregs);
7779 emit_move_insn (mem,
7780 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
7781 - (TARGET_LITTLE_ENDIAN != 0)));
7782 }
7783 }
7784 else
7785 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7786 {
7787 rtx mem;
7788
7789 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7790 mem = change_address (regbuf, SFmode, fpregs);
7791 emit_move_insn (mem,
7792 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7793 }
7794
7795 /* Return the address of the regbuf. */
7796 return XEXP (regbuf, 0);
7797 }
7798
7799 /* Define the `__builtin_va_list' type for the ABI. */
7800
7801 static tree
7802 sh_build_builtin_va_list (void)
7803 {
7804 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7805 tree record, type_decl;
7806
7807 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
7808 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7809 return ptr_type_node;
7810
7811 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7812 type_decl = build_decl (BUILTINS_LOCATION,
7813 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7814
7815 f_next_o = build_decl (BUILTINS_LOCATION,
7816 FIELD_DECL, get_identifier ("__va_next_o"),
7817 ptr_type_node);
7818 f_next_o_limit = build_decl (BUILTINS_LOCATION,
7819 FIELD_DECL,
7820 get_identifier ("__va_next_o_limit"),
7821 ptr_type_node);
7822 f_next_fp = build_decl (BUILTINS_LOCATION,
7823 FIELD_DECL, get_identifier ("__va_next_fp"),
7824 ptr_type_node);
7825 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
7826 FIELD_DECL,
7827 get_identifier ("__va_next_fp_limit"),
7828 ptr_type_node);
7829 f_next_stack = build_decl (BUILTINS_LOCATION,
7830 FIELD_DECL, get_identifier ("__va_next_stack"),
7831 ptr_type_node);
7832
7833 DECL_FIELD_CONTEXT (f_next_o) = record;
7834 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7835 DECL_FIELD_CONTEXT (f_next_fp) = record;
7836 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7837 DECL_FIELD_CONTEXT (f_next_stack) = record;
7838
7839 TYPE_STUB_DECL (record) = type_decl;
7840 TYPE_NAME (record) = type_decl;
7841 TYPE_FIELDS (record) = f_next_o;
7842 DECL_CHAIN (f_next_o) = f_next_o_limit;
7843 DECL_CHAIN (f_next_o_limit) = f_next_fp;
7844 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
7845 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
7846
7847 layout_type (record);
7848
7849 return record;
7850 }
7851
7852 /* Implement `va_start' for varargs and stdarg. */
7853
7854 static void
7855 sh_va_start (tree valist, rtx nextarg)
7856 {
7857 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7858 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7859 tree t, u;
7860 int nfp, nint;
7861
7862 if (TARGET_SH5)
7863 {
7864 expand_builtin_saveregs ();
7865 std_expand_builtin_va_start (valist, nextarg);
7866 return;
7867 }
7868
7869 if ((! TARGET_SH2E && ! TARGET_SH4)
7870 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7871 {
7872 std_expand_builtin_va_start (valist, nextarg);
7873 return;
7874 }
7875
7876 f_next_o = TYPE_FIELDS (va_list_type_node);
7877 f_next_o_limit = DECL_CHAIN (f_next_o);
7878 f_next_fp = DECL_CHAIN (f_next_o_limit);
7879 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7880 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7881
7882 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7883 NULL_TREE);
7884 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7885 valist, f_next_o_limit, NULL_TREE);
7886 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7887 NULL_TREE);
7888 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7889 valist, f_next_fp_limit, NULL_TREE);
7890 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7891 valist, f_next_stack, NULL_TREE);
7892
7893 /* Call __builtin_saveregs. */
7894 u = make_tree (sizetype, expand_builtin_saveregs ());
7895 u = fold_convert (ptr_type_node, u);
7896 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
7897 TREE_SIDE_EFFECTS (t) = 1;
7898 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7899
7900 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7901 if (nfp < 8)
7902 nfp = 8 - nfp;
7903 else
7904 nfp = 0;
7905 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nfp);
7906 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
7907 TREE_SIDE_EFFECTS (t) = 1;
7908 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7909
7910 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
7911 TREE_SIDE_EFFECTS (t) = 1;
7912 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7913
7914 nint = crtl->args.info.arg_count[SH_ARG_INT];
7915 if (nint < 4)
7916 nint = 4 - nint;
7917 else
7918 nint = 0;
7919 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nint);
7920 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
7921 TREE_SIDE_EFFECTS (t) = 1;
7922 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7923
7924 u = make_tree (ptr_type_node, nextarg);
7925 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
7926 TREE_SIDE_EFFECTS (t) = 1;
7927 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7928 }
7929
7930 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7931 member, return it. */
7932 static tree
7933 find_sole_member (tree type)
7934 {
7935 tree field, member = NULL_TREE;
7936
7937 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7938 {
7939 if (TREE_CODE (field) != FIELD_DECL)
7940 continue;
7941 if (!DECL_SIZE (field))
7942 return NULL_TREE;
7943 if (integer_zerop (DECL_SIZE (field)))
7944 continue;
7945 if (member)
7946 return NULL_TREE;
7947 member = field;
7948 }
7949 return member;
7950 }
7951 /* Implement `va_arg'. */
7952
7953 static tree
7954 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7955 gimple_seq *post_p ATTRIBUTE_UNUSED)
7956 {
7957 HOST_WIDE_INT size, rsize;
7958 tree tmp, pptr_type_node;
7959 tree addr, lab_over = NULL, result = NULL;
7960 bool pass_by_ref;
7961 tree eff_type;
7962
7963 if (!VOID_TYPE_P (type))
7964 pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7965 else
7966 pass_by_ref = false;
7967
7968 if (pass_by_ref)
7969 type = build_pointer_type (type);
7970
7971 size = int_size_in_bytes (type);
7972 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7973 pptr_type_node = build_pointer_type (ptr_type_node);
7974
7975 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
7976 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7977 {
7978 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7979 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7980 int pass_as_float;
7981 tree lab_false;
7982 tree member;
7983
7984 f_next_o = TYPE_FIELDS (va_list_type_node);
7985 f_next_o_limit = DECL_CHAIN (f_next_o);
7986 f_next_fp = DECL_CHAIN (f_next_o_limit);
7987 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7988 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7989
7990 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7991 NULL_TREE);
7992 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7993 valist, f_next_o_limit, NULL_TREE);
7994 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7995 valist, f_next_fp, NULL_TREE);
7996 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7997 valist, f_next_fp_limit, NULL_TREE);
7998 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7999 valist, f_next_stack, NULL_TREE);
8000
8001 /* Structures with a single member with a distinct mode are passed
8002 like their member. This is relevant if the latter has a REAL_TYPE
8003 or COMPLEX_TYPE type. */
8004 eff_type = type;
8005 while (TREE_CODE (eff_type) == RECORD_TYPE
8006 && (member = find_sole_member (eff_type))
8007 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
8008 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
8009 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
8010 {
8011 tree field_type = TREE_TYPE (member);
8012
8013 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
8014 eff_type = field_type;
8015 else
8016 {
8017 gcc_assert ((TYPE_ALIGN (eff_type)
8018 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
8019 || (TYPE_ALIGN (eff_type)
8020 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
8021 break;
8022 }
8023 }
8024
8025 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
8026 {
8027 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
8028 || (TREE_CODE (eff_type) == COMPLEX_TYPE
8029 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
8030 && size <= 16));
8031 }
8032 else
8033 {
8034 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
8035 }
8036
8037 addr = create_tmp_var (pptr_type_node, NULL);
8038 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8039 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8040
8041 valist = build_simple_mem_ref (addr);
8042
8043 if (pass_as_float)
8044 {
8045 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
8046 tree cmp;
8047 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
8048
8049 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
8050 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8051
8052 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
8053 tmp = next_fp_limit;
8054 if (size > 4 && !is_double)
8055 tmp = fold_build_pointer_plus_hwi (unshare_expr (tmp), 4 - size);
8056 tmp = build2 (GE_EXPR, boolean_type_node,
8057 unshare_expr (next_fp_tmp), unshare_expr (tmp));
8058 cmp = build3 (COND_EXPR, void_type_node, tmp,
8059 build1 (GOTO_EXPR, void_type_node,
8060 unshare_expr (lab_false)), NULL_TREE);
8061 if (!is_double)
8062 gimplify_and_add (cmp, pre_p);
8063
8064 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
8065 || (is_double || size == 16))
8066 {
8067 tmp = fold_convert (sizetype, next_fp_tmp);
8068 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
8069 size_int (UNITS_PER_WORD));
8070 tmp = fold_build_pointer_plus (unshare_expr (next_fp_tmp), tmp);
8071 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
8072 }
8073 if (is_double)
8074 gimplify_and_add (cmp, pre_p);
8075
8076 #ifdef FUNCTION_ARG_SCmode_WART
8077 if (TYPE_MODE (eff_type) == SCmode
8078 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
8079 {
8080 tree subtype = TREE_TYPE (eff_type);
8081 tree real, imag;
8082
8083 imag
8084 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8085 imag = get_initialized_tmp_var (imag, pre_p, NULL);
8086
8087 real
8088 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8089 real = get_initialized_tmp_var (real, pre_p, NULL);
8090
8091 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
8092 if (type != eff_type)
8093 result = build1 (VIEW_CONVERT_EXPR, type, result);
8094 result = get_initialized_tmp_var (result, pre_p, NULL);
8095 }
8096 #endif /* FUNCTION_ARG_SCmode_WART */
8097
8098 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8099 gimplify_and_add (tmp, pre_p);
8100
8101 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8102 gimplify_and_add (tmp, pre_p);
8103
8104 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8105 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8106 gimplify_assign (unshare_expr (next_fp_tmp),
8107 unshare_expr (valist), pre_p);
8108
8109 gimplify_assign (unshare_expr (valist),
8110 unshare_expr (next_fp_tmp), post_p);
8111 valist = next_fp_tmp;
8112 }
8113 else
8114 {
8115 tmp = fold_build_pointer_plus_hwi (unshare_expr (next_o), rsize);
8116 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
8117 unshare_expr (next_o_limit));
8118 tmp = build3 (COND_EXPR, void_type_node, tmp,
8119 build1 (GOTO_EXPR, void_type_node,
8120 unshare_expr (lab_false)),
8121 NULL_TREE);
8122 gimplify_and_add (tmp, pre_p);
8123
8124 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
8125 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8126
8127 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8128 gimplify_and_add (tmp, pre_p);
8129
8130 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8131 gimplify_and_add (tmp, pre_p);
8132
8133 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
8134 gimplify_assign (unshare_expr (next_o),
8135 unshare_expr (next_o_limit), pre_p);
8136
8137 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8138 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8139 }
8140
8141 if (!result)
8142 {
8143 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8144 gimplify_and_add (tmp, pre_p);
8145 }
8146 }
8147
8148 /* ??? In va-sh.h, there had been code to make values larger than
8149 size 8 indirect. This does not match the FUNCTION_ARG macros. */
8150
8151 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
8152 if (result)
8153 {
8154 gimplify_assign (result, tmp, pre_p);
8155 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
8156 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8157 gimplify_and_add (tmp, pre_p);
8158 }
8159 else
8160 result = tmp;
8161
8162 if (pass_by_ref)
8163 result = build_va_arg_indirect_ref (result);
8164
8165 return result;
8166 }
8167
8168 /* 64 bit floating points memory transfers are paired single precision loads
8169 or store. So DWARF information needs fixing in little endian (unless
8170 PR=SZ=1 in FPSCR). */
8171 rtx
8172 sh_dwarf_register_span (rtx reg)
8173 {
8174 unsigned regno = REGNO (reg);
8175
8176 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
8177 return NULL_RTX;
8178
8179 return
8180 gen_rtx_PARALLEL (VOIDmode,
8181 gen_rtvec (2,
8182 gen_rtx_REG (SFmode, regno + 1),
8183 gen_rtx_REG (SFmode, regno)));
8184 }
8185
8186 static enum machine_mode
8187 sh_promote_function_mode (const_tree type, enum machine_mode mode,
8188 int *punsignedp, const_tree funtype,
8189 int for_return)
8190 {
8191 if (sh_promote_prototypes (funtype))
8192 return promote_mode (type, mode, punsignedp);
8193 else
8194 return default_promote_function_mode (type, mode, punsignedp, funtype,
8195 for_return);
8196 }
8197
8198 static bool
8199 sh_promote_prototypes (const_tree type)
8200 {
8201 if (TARGET_HITACHI)
8202 return 0;
8203 if (! type)
8204 return 1;
8205 return ! sh_attr_renesas_p (type);
8206 }
8207
8208 /* Whether an argument must be passed by reference. On SHcompact, we
8209 pretend arguments wider than 32-bits that would have been passed in
8210 registers are passed by reference, so that an SHmedia trampoline
8211 loads them into the full 64-bits registers. */
8212
8213 static int
8214 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
8215 const_tree type, bool named)
8216 {
8217 unsigned HOST_WIDE_INT size;
8218
8219 if (type)
8220 size = int_size_in_bytes (type);
8221 else
8222 size = GET_MODE_SIZE (mode);
8223
8224 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
8225 && (!named
8226 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
8227 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
8228 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
8229 && size > 4
8230 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
8231 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8232 return size;
8233 else
8234 return 0;
8235 }
8236
8237 static bool
8238 sh_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
8239 const_tree type, bool named)
8240 {
8241 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8242
8243 if (targetm.calls.must_pass_in_stack (mode, type))
8244 return true;
8245
8246 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
8247 wants to know about pass-by-reference semantics for incoming
8248 arguments. */
8249 if (! cum)
8250 return false;
8251
8252 if (TARGET_SHCOMPACT)
8253 {
8254 cum->byref = shcompact_byref (cum, mode, type, named);
8255 return cum->byref != 0;
8256 }
8257
8258 return false;
8259 }
8260
8261 static bool
8262 sh_callee_copies (cumulative_args_t cum, enum machine_mode mode,
8263 const_tree type, bool named ATTRIBUTE_UNUSED)
8264 {
8265 /* ??? How can it possibly be correct to return true only on the
8266 caller side of the equation? Is there someplace else in the
8267 sh backend that's magically producing the copies? */
8268 return (get_cumulative_args (cum)->outgoing
8269 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
8270 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
8271 }
8272
8273 static int
8274 sh_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
8275 tree type, bool named ATTRIBUTE_UNUSED)
8276 {
8277 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8278 int words = 0;
8279
8280 if (!TARGET_SH5
8281 && PASS_IN_REG_P (*cum, mode, type)
8282 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
8283 && (ROUND_REG (*cum, mode)
8284 + (mode != BLKmode
8285 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
8286 : ROUND_ADVANCE (int_size_in_bytes (type)))
8287 > NPARM_REGS (mode)))
8288 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
8289
8290 else if (!TARGET_SHCOMPACT
8291 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8292 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
8293
8294 return words * UNITS_PER_WORD;
8295 }
8296
8297
8298 /* Define where to put the arguments to a function.
8299 Value is zero to push the argument on the stack,
8300 or a hard register in which to store the argument.
8301
8302 MODE is the argument's machine mode.
8303 TYPE is the data type of the argument (as a tree).
8304 This is null for libcalls where that information may
8305 not be available.
8306 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8307 the preceding args and about the function being called.
8308 NAMED is nonzero if this argument is a named parameter
8309 (otherwise it is an extra parameter matching an ellipsis).
8310
8311 On SH the first args are normally in registers
8312 and the rest are pushed. Any arg that starts within the first
8313 NPARM_REGS words is at least partially passed in a register unless
8314 its data type forbids. */
8315
8316 static rtx
8317 sh_function_arg (cumulative_args_t ca_v, enum machine_mode mode,
8318 const_tree type, bool named)
8319 {
8320 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8321
8322 if (! TARGET_SH5 && mode == VOIDmode)
8323 return GEN_INT (ca->renesas_abi ? 1 : 0);
8324
8325 if (! TARGET_SH5
8326 && PASS_IN_REG_P (*ca, mode, type)
8327 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8328 {
8329 int regno;
8330
8331 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8332 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
8333 {
8334 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8335 gen_rtx_REG (SFmode,
8336 BASE_ARG_REG (mode)
8337 + (ROUND_REG (*ca, mode) ^ 1)),
8338 const0_rtx);
8339 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8340 gen_rtx_REG (SFmode,
8341 BASE_ARG_REG (mode)
8342 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
8343 GEN_INT (4));
8344 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8345 }
8346
8347 /* If the alignment of a DF value causes an SF register to be
8348 skipped, we will use that skipped register for the next SF
8349 value. */
8350 if ((TARGET_HITACHI || ca->renesas_abi)
8351 && ca->free_single_fp_reg
8352 && mode == SFmode)
8353 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8354
8355 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
8356 ^ (mode == SFmode && TARGET_SH4
8357 && TARGET_LITTLE_ENDIAN != 0
8358 && ! TARGET_HITACHI && ! ca->renesas_abi);
8359 return gen_rtx_REG (mode, regno);
8360
8361 }
8362
8363 if (TARGET_SH5)
8364 {
8365 if (mode == VOIDmode && TARGET_SHCOMPACT)
8366 return GEN_INT (ca->call_cookie);
8367
8368 /* The following test assumes unnamed arguments are promoted to
8369 DFmode. */
8370 if (mode == SFmode && ca->free_single_fp_reg)
8371 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
8372
8373 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
8374 && (named || ! ca->prototype_p)
8375 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
8376 {
8377 if (! ca->prototype_p && TARGET_SHMEDIA)
8378 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
8379
8380 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
8381 FIRST_FP_PARM_REG
8382 + ca->arg_count[(int) SH_ARG_FLOAT]);
8383 }
8384
8385 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
8386 && (! TARGET_SHCOMPACT
8387 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
8388 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
8389 type, named))))
8390 {
8391 return gen_rtx_REG (mode, (FIRST_PARM_REG
8392 + ca->arg_count[(int) SH_ARG_INT]));
8393 }
8394
8395 return 0;
8396 }
8397
8398 return 0;
8399 }
8400
8401 /* Update the data in CUM to advance over an argument
8402 of mode MODE and data type TYPE.
8403 (TYPE is null for libcalls where that information may not be
8404 available.) */
8405
8406 static void
8407 sh_function_arg_advance (cumulative_args_t ca_v, enum machine_mode mode,
8408 const_tree type, bool named)
8409 {
8410 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8411
8412 if (ca->force_mem)
8413 ca->force_mem = 0;
8414 else if (TARGET_SH5)
8415 {
8416 const_tree type2 = (ca->byref && type
8417 ? TREE_TYPE (type)
8418 : type);
8419 enum machine_mode mode2 = (ca->byref && type
8420 ? TYPE_MODE (type2)
8421 : mode);
8422 int dwords = ((ca->byref
8423 ? ca->byref
8424 : mode2 == BLKmode
8425 ? int_size_in_bytes (type2)
8426 : GET_MODE_SIZE (mode2)) + 7) / 8;
8427 int numregs = MIN (dwords, NPARM_REGS (SImode)
8428 - ca->arg_count[(int) SH_ARG_INT]);
8429
8430 if (numregs)
8431 {
8432 ca->arg_count[(int) SH_ARG_INT] += numregs;
8433 if (TARGET_SHCOMPACT
8434 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
8435 {
8436 ca->call_cookie
8437 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8438 - numregs, 1);
8439 /* N.B. We want this also for outgoing. */
8440 ca->stack_regs += numregs;
8441 }
8442 else if (ca->byref)
8443 {
8444 if (! ca->outgoing)
8445 ca->stack_regs += numregs;
8446 ca->byref_regs += numregs;
8447 ca->byref = 0;
8448 do
8449 ca->call_cookie
8450 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8451 - numregs, 2);
8452 while (--numregs);
8453 ca->call_cookie
8454 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8455 - 1, 1);
8456 }
8457 else if (dwords > numregs)
8458 {
8459 int pushregs = numregs;
8460
8461 if (TARGET_SHCOMPACT)
8462 ca->stack_regs += numregs;
8463 while (pushregs < NPARM_REGS (SImode) - 1
8464 && (CALL_COOKIE_INT_REG_GET
8465 (ca->call_cookie,
8466 NPARM_REGS (SImode) - pushregs)
8467 == 1))
8468 {
8469 ca->call_cookie
8470 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8471 - pushregs, 1);
8472 pushregs++;
8473 }
8474 if (numregs == NPARM_REGS (SImode))
8475 ca->call_cookie
8476 |= CALL_COOKIE_INT_REG (0, 1)
8477 | CALL_COOKIE_STACKSEQ (numregs - 1);
8478 else
8479 ca->call_cookie
8480 |= CALL_COOKIE_STACKSEQ (numregs);
8481 }
8482 }
8483 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
8484 && (named || ! ca->prototype_p))
8485 {
8486 if (mode2 == SFmode && ca->free_single_fp_reg)
8487 ca->free_single_fp_reg = 0;
8488 else if (ca->arg_count[(int) SH_ARG_FLOAT]
8489 < NPARM_REGS (SFmode))
8490 {
8491 int numfpregs
8492 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
8493 NPARM_REGS (SFmode)
8494 - ca->arg_count[(int) SH_ARG_FLOAT]);
8495
8496 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
8497
8498 if (TARGET_SHCOMPACT && ! ca->prototype_p)
8499 {
8500 if (ca->outgoing && numregs > 0)
8501 do
8502 {
8503 ca->call_cookie
8504 |= (CALL_COOKIE_INT_REG
8505 (ca->arg_count[(int) SH_ARG_INT]
8506 - numregs + ((numfpregs - 2) / 2),
8507 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
8508 - numfpregs) / 2));
8509 }
8510 while (numfpregs -= 2);
8511 }
8512 else if (mode2 == SFmode && (named)
8513 && (ca->arg_count[(int) SH_ARG_FLOAT]
8514 < NPARM_REGS (SFmode)))
8515 ca->free_single_fp_reg
8516 = FIRST_FP_PARM_REG - numfpregs
8517 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
8518 }
8519 }
8520 return;
8521 }
8522
8523 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
8524 {
8525 /* Note that we've used the skipped register. */
8526 if (mode == SFmode && ca->free_single_fp_reg)
8527 {
8528 ca->free_single_fp_reg = 0;
8529 return;
8530 }
8531 /* When we have a DF after an SF, there's an SF register that get
8532 skipped in order to align the DF value. We note this skipped
8533 register, because the next SF value will use it, and not the
8534 SF that follows the DF. */
8535 if (mode == DFmode
8536 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
8537 {
8538 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
8539 + BASE_ARG_REG (mode));
8540 }
8541 }
8542
8543 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
8544 || PASS_IN_REG_P (*ca, mode, type))
8545 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
8546 = (ROUND_REG (*ca, mode)
8547 + (mode == BLKmode
8548 ? ROUND_ADVANCE (int_size_in_bytes (type))
8549 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
8550 }
8551
8552 /* The Renesas calling convention doesn't quite fit into this scheme since
8553 the address is passed like an invisible argument, but one that is always
8554 passed in memory. */
8555 static rtx
8556 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
8557 {
8558 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8559 return 0;
8560 return gen_rtx_REG (Pmode, 2);
8561 }
8562
8563 /* Worker function for TARGET_FUNCTION_VALUE.
8564
8565 For the SH, this is like LIBCALL_VALUE, except that we must change the
8566 mode like PROMOTE_MODE does.
8567 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
8568 tested here has to be kept in sync with the one in explow.c:promote_mode.
8569 */
8570
8571 static rtx
8572 sh_function_value (const_tree valtype,
8573 const_tree fn_decl_or_type,
8574 bool outgoing ATTRIBUTE_UNUSED)
8575 {
8576 if (fn_decl_or_type
8577 && !DECL_P (fn_decl_or_type))
8578 fn_decl_or_type = NULL;
8579
8580 return gen_rtx_REG (
8581 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
8582 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
8583 && (TREE_CODE (valtype) == INTEGER_TYPE
8584 || TREE_CODE (valtype) == ENUMERAL_TYPE
8585 || TREE_CODE (valtype) == BOOLEAN_TYPE
8586 || TREE_CODE (valtype) == REAL_TYPE
8587 || TREE_CODE (valtype) == OFFSET_TYPE))
8588 && sh_promote_prototypes (fn_decl_or_type)
8589 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
8590 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
8591 }
8592
8593 /* Worker function for TARGET_LIBCALL_VALUE. */
8594
8595 static rtx
8596 sh_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
8597 {
8598 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
8599 }
8600
8601 /* Return true if N is a possible register number of function value. */
8602
8603 static bool
8604 sh_function_value_regno_p (const unsigned int regno)
8605 {
8606 return ((regno) == FIRST_RET_REG
8607 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
8608 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
8609 }
8610
8611 /* Worker function for TARGET_RETURN_IN_MEMORY. */
8612
8613 static bool
8614 sh_return_in_memory (const_tree type, const_tree fndecl)
8615 {
8616 if (TARGET_SH5)
8617 {
8618 if (TYPE_MODE (type) == BLKmode)
8619 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
8620 else
8621 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
8622 }
8623 else
8624 {
8625 return (TYPE_MODE (type) == BLKmode
8626 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8627 && TREE_CODE (type) == RECORD_TYPE));
8628 }
8629 }
8630
8631 /* We actually emit the code in sh_expand_prologue. We used to use
8632 a static variable to flag that we need to emit this code, but that
8633 doesn't when inlining, when functions are deferred and then emitted
8634 later. Fortunately, we already have two flags that are part of struct
8635 function that tell if a function uses varargs or stdarg. */
8636 static void
8637 sh_setup_incoming_varargs (cumulative_args_t ca,
8638 enum machine_mode mode,
8639 tree type,
8640 int *pretend_arg_size,
8641 int second_time ATTRIBUTE_UNUSED)
8642 {
8643 gcc_assert (cfun->stdarg);
8644 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
8645 {
8646 int named_parm_regs, anon_parm_regs;
8647
8648 named_parm_regs = (ROUND_REG (*get_cumulative_args (ca), mode)
8649 + (mode == BLKmode
8650 ? ROUND_ADVANCE (int_size_in_bytes (type))
8651 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
8652 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
8653 if (anon_parm_regs > 0)
8654 *pretend_arg_size = anon_parm_regs * 4;
8655 }
8656 }
8657
8658 static bool
8659 sh_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
8660 {
8661 return TARGET_SH5;
8662 }
8663
8664 static bool
8665 sh_pretend_outgoing_varargs_named (cumulative_args_t ca_v)
8666 {
8667 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8668
8669 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
8670 }
8671
8672
8673 /* Define the offset between two registers, one to be eliminated, and
8674 the other its replacement, at the start of a routine. */
8675
8676 int
8677 initial_elimination_offset (int from, int to)
8678 {
8679 int regs_saved;
8680 int regs_saved_rounding = 0;
8681 int total_saved_regs_space;
8682 int total_auto_space;
8683 int save_flags = target_flags;
8684 int copy_flags;
8685 HARD_REG_SET live_regs_mask;
8686
8687 shmedia_space_reserved_for_target_registers = false;
8688 regs_saved = calc_live_regs (&live_regs_mask);
8689 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
8690
8691 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
8692 {
8693 shmedia_space_reserved_for_target_registers = true;
8694 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
8695 }
8696
8697 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
8698 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
8699 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
8700
8701 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
8702 copy_flags = target_flags;
8703 target_flags = save_flags;
8704
8705 total_saved_regs_space = regs_saved + regs_saved_rounding;
8706
8707 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8708 return total_saved_regs_space + total_auto_space
8709 + crtl->args.info.byref_regs * 8;
8710
8711 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8712 return total_saved_regs_space + total_auto_space
8713 + crtl->args.info.byref_regs * 8;
8714
8715 /* Initial gap between fp and sp is 0. */
8716 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8717 return 0;
8718
8719 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8720 return rounded_frame_size (0);
8721
8722 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8723 return rounded_frame_size (0);
8724
8725 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
8726 && (to == HARD_FRAME_POINTER_REGNUM
8727 || to == STACK_POINTER_REGNUM));
8728 if (TARGET_SH5)
8729 {
8730 int n = total_saved_regs_space;
8731 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8732 save_schedule schedule;
8733 save_entry *entry;
8734
8735 n += total_auto_space;
8736
8737 /* If it wasn't saved, there's not much we can do. */
8738 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8739 return n;
8740
8741 target_flags = copy_flags;
8742
8743 sh5_schedule_saves (&live_regs_mask, &schedule, n);
8744 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
8745 if (entry->reg == pr_reg)
8746 {
8747 target_flags = save_flags;
8748 return entry->offset;
8749 }
8750 gcc_unreachable ();
8751 }
8752 else
8753 return total_auto_space;
8754 }
8755
8756 /* Parse the -mfixed-range= option string. */
8757 void
8758 sh_fix_range (const char *const_str)
8759 {
8760 int i, first, last;
8761 char *str, *dash, *comma;
8762
8763 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
8764 REG2 are either register names or register numbers. The effect
8765 of this option is to mark the registers in the range from REG1 to
8766 REG2 as ``fixed'' so they won't be used by the compiler. */
8767
8768 i = strlen (const_str);
8769 str = (char *) alloca (i + 1);
8770 memcpy (str, const_str, i + 1);
8771
8772 while (1)
8773 {
8774 dash = strchr (str, '-');
8775 if (!dash)
8776 {
8777 warning (0, "value of -mfixed-range must have form REG1-REG2");
8778 return;
8779 }
8780 *dash = '\0';
8781 comma = strchr (dash + 1, ',');
8782 if (comma)
8783 *comma = '\0';
8784
8785 first = decode_reg_name (str);
8786 if (first < 0)
8787 {
8788 warning (0, "unknown register name: %s", str);
8789 return;
8790 }
8791
8792 last = decode_reg_name (dash + 1);
8793 if (last < 0)
8794 {
8795 warning (0, "unknown register name: %s", dash + 1);
8796 return;
8797 }
8798
8799 *dash = '-';
8800
8801 if (first > last)
8802 {
8803 warning (0, "%s-%s is an empty range", str, dash + 1);
8804 return;
8805 }
8806
8807 for (i = first; i <= last; ++i)
8808 fixed_regs[i] = call_used_regs[i] = 1;
8809
8810 if (!comma)
8811 break;
8812
8813 *comma = ',';
8814 str = comma + 1;
8815 }
8816 }
8817 \f
8818 /* Insert any deferred function attributes from earlier pragmas. */
8819 static void
8820 sh_insert_attributes (tree node, tree *attributes)
8821 {
8822 tree attrs;
8823
8824 if (TREE_CODE (node) != FUNCTION_DECL)
8825 return;
8826
8827 /* We are only interested in fields. */
8828 if (!DECL_P (node))
8829 return;
8830
8831 /* Append the attributes to the deferred attributes. */
8832 *sh_deferred_function_attributes_tail = *attributes;
8833 attrs = sh_deferred_function_attributes;
8834 if (!attrs)
8835 return;
8836
8837 /* Some attributes imply or require the interrupt attribute. */
8838 if (!lookup_attribute ("interrupt_handler", attrs)
8839 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8840 {
8841 /* If we have a trapa_handler, but no interrupt_handler attribute,
8842 insert an interrupt_handler attribute. */
8843 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8844 /* We can't use sh_pr_interrupt here because that's not in the
8845 java frontend. */
8846 attrs
8847 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8848 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8849 if the interrupt attribute is missing, we ignore the attribute
8850 and warn. */
8851 else if (lookup_attribute ("sp_switch", attrs)
8852 || lookup_attribute ("trap_exit", attrs)
8853 || lookup_attribute ("nosave_low_regs", attrs)
8854 || lookup_attribute ("resbank", attrs))
8855 {
8856 tree *tail;
8857
8858 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8859 {
8860 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8861 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8862 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8863 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8864 warning (OPT_Wattributes,
8865 "%qE attribute only applies to interrupt functions",
8866 TREE_PURPOSE (attrs));
8867 else
8868 {
8869 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8870 NULL_TREE);
8871 tail = &TREE_CHAIN (*tail);
8872 }
8873 }
8874 attrs = *attributes;
8875 }
8876 }
8877
8878 /* Install the processed list. */
8879 *attributes = attrs;
8880
8881 /* Clear deferred attributes. */
8882 sh_deferred_function_attributes = NULL_TREE;
8883 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8884
8885 return;
8886 }
8887
8888 /* Supported attributes:
8889
8890 interrupt_handler -- specifies this function is an interrupt handler.
8891
8892 trapa_handler - like above, but don't save all registers.
8893
8894 sp_switch -- specifies an alternate stack for an interrupt handler
8895 to run on.
8896
8897 trap_exit -- use a trapa to exit an interrupt function instead of
8898 an rte instruction.
8899
8900 nosave_low_regs - don't save r0..r7 in an interrupt handler.
8901 This is useful on the SH3 and upwards,
8902 which has a separate set of low regs for User and Supervisor modes.
8903 This should only be used for the lowest level of interrupts. Higher levels
8904 of interrupts must save the registers in case they themselves are
8905 interrupted.
8906
8907 renesas -- use Renesas calling/layout conventions (functions and
8908 structures).
8909
8910 resbank -- In case of an ISR, use a register bank to save registers
8911 R0-R14, MACH, MACL, GBR and PR. This is useful only on SH2A targets.
8912 */
8913
8914 /* Handle a 'resbank' attribute. */
8915 static tree
8916 sh_handle_resbank_handler_attribute (tree * node, tree name,
8917 tree args ATTRIBUTE_UNUSED,
8918 int flags ATTRIBUTE_UNUSED,
8919 bool * no_add_attrs)
8920 {
8921 if (!TARGET_SH2A)
8922 {
8923 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
8924 name);
8925 *no_add_attrs = true;
8926 }
8927 if (TREE_CODE (*node) != FUNCTION_DECL)
8928 {
8929 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8930 name);
8931 *no_add_attrs = true;
8932 }
8933
8934 return NULL_TREE;
8935 }
8936
8937 /* Handle an "interrupt_handler" attribute; arguments as in
8938 struct attribute_spec.handler. */
8939 static tree
8940 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8941 tree args ATTRIBUTE_UNUSED,
8942 int flags ATTRIBUTE_UNUSED,
8943 bool *no_add_attrs)
8944 {
8945 if (TREE_CODE (*node) != FUNCTION_DECL)
8946 {
8947 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8948 name);
8949 *no_add_attrs = true;
8950 }
8951 else if (TARGET_SHCOMPACT)
8952 {
8953 error ("attribute interrupt_handler is not compatible with -m5-compact");
8954 *no_add_attrs = true;
8955 }
8956
8957 return NULL_TREE;
8958 }
8959
8960 /* Handle an 'function_vector' attribute; arguments as in
8961 struct attribute_spec.handler. */
8962 static tree
8963 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
8964 tree args ATTRIBUTE_UNUSED,
8965 int flags ATTRIBUTE_UNUSED,
8966 bool * no_add_attrs)
8967 {
8968 if (!TARGET_SH2A)
8969 {
8970 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
8971 name);
8972 *no_add_attrs = true;
8973 }
8974 else if (TREE_CODE (*node) != FUNCTION_DECL)
8975 {
8976 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8977 name);
8978 *no_add_attrs = true;
8979 }
8980 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8981 {
8982 /* The argument must be a constant integer. */
8983 warning (OPT_Wattributes,
8984 "%qE attribute argument not an integer constant",
8985 name);
8986 *no_add_attrs = true;
8987 }
8988 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
8989 {
8990 /* The argument value must be between 0 to 255. */
8991 warning (OPT_Wattributes,
8992 "%qE attribute argument should be between 0 to 255",
8993 name);
8994 *no_add_attrs = true;
8995 }
8996 return NULL_TREE;
8997 }
8998
8999 /* Returns 1 if current function has been assigned the attribute
9000 'function_vector'. */
9001 int
9002 sh2a_is_function_vector_call (rtx x)
9003 {
9004 if (GET_CODE (x) == SYMBOL_REF
9005 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9006 {
9007 tree tr = SYMBOL_REF_DECL (x);
9008
9009 if (sh2a_function_vector_p (tr))
9010 return 1;
9011 }
9012
9013 return 0;
9014 }
9015
9016 /* Returns the function vector number, if the attribute
9017 'function_vector' is assigned, otherwise returns zero. */
9018 int
9019 sh2a_get_function_vector_number (rtx x)
9020 {
9021 int num;
9022 tree list, t;
9023
9024 if ((GET_CODE (x) == SYMBOL_REF)
9025 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9026 {
9027 t = SYMBOL_REF_DECL (x);
9028
9029 if (TREE_CODE (t) != FUNCTION_DECL)
9030 return 0;
9031
9032 list = SH_ATTRIBUTES (t);
9033 while (list)
9034 {
9035 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9036 {
9037 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
9038 return num;
9039 }
9040
9041 list = TREE_CHAIN (list);
9042 }
9043
9044 return 0;
9045 }
9046 else
9047 return 0;
9048 }
9049
9050 /* Handle an "sp_switch" attribute; arguments as in
9051 struct attribute_spec.handler. */
9052 static tree
9053 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
9054 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9055 {
9056 if (TREE_CODE (*node) != FUNCTION_DECL)
9057 {
9058 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9059 name);
9060 *no_add_attrs = true;
9061 }
9062 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
9063 {
9064 /* The argument must be a constant string. */
9065 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
9066 name);
9067 *no_add_attrs = true;
9068 }
9069
9070 return NULL_TREE;
9071 }
9072
9073 /* Handle an "trap_exit" attribute; arguments as in
9074 struct attribute_spec.handler. */
9075 static tree
9076 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
9077 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9078 {
9079 if (TREE_CODE (*node) != FUNCTION_DECL)
9080 {
9081 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9082 name);
9083 *no_add_attrs = true;
9084 }
9085 /* The argument specifies a trap number to be used in a trapa instruction
9086 at function exit (instead of an rte instruction). */
9087 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9088 {
9089 /* The argument must be a constant integer. */
9090 warning (OPT_Wattributes, "%qE attribute argument not an "
9091 "integer constant", name);
9092 *no_add_attrs = true;
9093 }
9094
9095 return NULL_TREE;
9096 }
9097
9098 static tree
9099 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
9100 tree name ATTRIBUTE_UNUSED,
9101 tree args ATTRIBUTE_UNUSED,
9102 int flags ATTRIBUTE_UNUSED,
9103 bool *no_add_attrs ATTRIBUTE_UNUSED)
9104 {
9105 return NULL_TREE;
9106 }
9107
9108 /* True if __attribute__((renesas)) or -mrenesas. */
9109 int
9110 sh_attr_renesas_p (const_tree td)
9111 {
9112 if (TARGET_HITACHI)
9113 return 1;
9114 if (td == 0)
9115 return 0;
9116 if (DECL_P (td))
9117 td = TREE_TYPE (td);
9118 if (td == error_mark_node)
9119 return 0;
9120 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
9121 != NULL_TREE);
9122 }
9123
9124 /* True if __attribute__((renesas)) or -mrenesas, for the current
9125 function. */
9126 int
9127 sh_cfun_attr_renesas_p (void)
9128 {
9129 return sh_attr_renesas_p (current_function_decl);
9130 }
9131
9132 int
9133 sh_cfun_interrupt_handler_p (void)
9134 {
9135 return (lookup_attribute ("interrupt_handler",
9136 DECL_ATTRIBUTES (current_function_decl))
9137 != NULL_TREE);
9138 }
9139
9140 /* Returns 1 if FUNC has been assigned the attribute
9141 "function_vector". */
9142 int
9143 sh2a_function_vector_p (tree func)
9144 {
9145 tree list;
9146 if (TREE_CODE (func) != FUNCTION_DECL)
9147 return 0;
9148
9149 list = SH_ATTRIBUTES (func);
9150 while (list)
9151 {
9152 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9153 return 1;
9154
9155 list = TREE_CHAIN (list);
9156 }
9157 return 0;
9158 }
9159
9160 /* Returns TRUE if given tree has the "resbank" attribute. */
9161
9162 int
9163 sh_cfun_resbank_handler_p (void)
9164 {
9165 return ((lookup_attribute ("resbank",
9166 DECL_ATTRIBUTES (current_function_decl))
9167 != NULL_TREE)
9168 && (lookup_attribute ("interrupt_handler",
9169 DECL_ATTRIBUTES (current_function_decl))
9170 != NULL_TREE) && TARGET_SH2A);
9171 }
9172
9173 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
9174
9175 static const char *
9176 sh_check_pch_target_flags (int old_flags)
9177 {
9178 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
9179 | MASK_SH_E | MASK_HARD_SH4
9180 | MASK_FPU_SINGLE | MASK_SH4))
9181 return _("created and used with different architectures / ABIs");
9182 if ((old_flags ^ target_flags) & MASK_HITACHI)
9183 return _("created and used with different ABIs");
9184 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
9185 return _("created and used with different endianness");
9186 return NULL;
9187 }
9188 \f
9189 /* Predicates used by the templates. */
9190
9191 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
9192 Used only in general_movsrc_operand. */
9193
9194 int
9195 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9196 {
9197 switch (REGNO (op))
9198 {
9199 case PR_REG:
9200 case MACL_REG:
9201 case MACH_REG:
9202 return 1;
9203 }
9204 return 0;
9205 }
9206
9207 /* Nonzero if OP is a floating point value with value 0.0. */
9208
9209 int
9210 fp_zero_operand (rtx op)
9211 {
9212 REAL_VALUE_TYPE r;
9213
9214 if (GET_MODE (op) != SFmode)
9215 return 0;
9216
9217 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9218 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
9219 }
9220
9221 /* Nonzero if OP is a floating point value with value 1.0. */
9222
9223 int
9224 fp_one_operand (rtx op)
9225 {
9226 REAL_VALUE_TYPE r;
9227
9228 if (GET_MODE (op) != SFmode)
9229 return 0;
9230
9231 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9232 return REAL_VALUES_EQUAL (r, dconst1);
9233 }
9234
9235 /* In general mode switching is used. If we are
9236 compiling without -mfmovd, movsf_ie isn't taken into account for
9237 mode switching. We could check in machine_dependent_reorg for
9238 cases where we know we are in single precision mode, but there is
9239 interface to find that out during reload, so we must avoid
9240 choosing an fldi alternative during reload and thus failing to
9241 allocate a scratch register for the constant loading. */
9242 int
9243 fldi_ok (void)
9244 {
9245 return 1;
9246 }
9247
9248 /* Return the TLS type for TLS symbols, 0 for otherwise. */
9249 enum tls_model
9250 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9251 {
9252 if (GET_CODE (op) != SYMBOL_REF)
9253 return TLS_MODEL_NONE;
9254 return SYMBOL_REF_TLS_MODEL (op);
9255 }
9256 \f
9257 /* Return the destination address of a branch. */
9258
9259 static int
9260 branch_dest (rtx branch)
9261 {
9262 rtx dest = SET_SRC (PATTERN (branch));
9263 int dest_uid;
9264
9265 if (GET_CODE (dest) == IF_THEN_ELSE)
9266 dest = XEXP (dest, 1);
9267 dest = XEXP (dest, 0);
9268 dest_uid = INSN_UID (dest);
9269 return INSN_ADDRESSES (dest_uid);
9270 }
9271 \f
9272 /* Return nonzero if REG is not used after INSN.
9273 We assume REG is a reload reg, and therefore does
9274 not live past labels. It may live past calls or jumps though. */
9275 int
9276 reg_unused_after (rtx reg, rtx insn)
9277 {
9278 enum rtx_code code;
9279 rtx set;
9280
9281 /* If the reg is set by this instruction, then it is safe for our
9282 case. Disregard the case where this is a store to memory, since
9283 we are checking a register used in the store address. */
9284 set = single_set (insn);
9285 if (set && !MEM_P (SET_DEST (set))
9286 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9287 return 1;
9288
9289 while ((insn = NEXT_INSN (insn)))
9290 {
9291 rtx set;
9292 if (!INSN_P (insn))
9293 continue;
9294
9295 code = GET_CODE (insn);
9296
9297 #if 0
9298 /* If this is a label that existed before reload, then the register
9299 is dead here. However, if this is a label added by reorg, then
9300 the register may still be live here. We can't tell the difference,
9301 so we just ignore labels completely. */
9302 if (code == CODE_LABEL)
9303 return 1;
9304 /* else */
9305 #endif
9306
9307 if (code == JUMP_INSN)
9308 return 0;
9309
9310 /* If this is a sequence, we must handle them all at once.
9311 We could have for instance a call that sets the target register,
9312 and an insn in a delay slot that uses the register. In this case,
9313 we must return 0. */
9314 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
9315 {
9316 int i;
9317 int retval = 0;
9318
9319 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
9320 {
9321 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
9322 rtx set = single_set (this_insn);
9323
9324 if (CALL_P (this_insn))
9325 code = CALL_INSN;
9326 else if (JUMP_P (this_insn))
9327 {
9328 if (INSN_ANNULLED_BRANCH_P (this_insn))
9329 return 0;
9330 code = JUMP_INSN;
9331 }
9332
9333 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9334 return 0;
9335 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9336 {
9337 if (!MEM_P (SET_DEST (set)))
9338 retval = 1;
9339 else
9340 return 0;
9341 }
9342 if (set == 0
9343 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
9344 return 0;
9345 }
9346 if (retval == 1)
9347 return 1;
9348 else if (code == JUMP_INSN)
9349 return 0;
9350 }
9351
9352 set = single_set (insn);
9353 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9354 return 0;
9355 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9356 return !MEM_P (SET_DEST (set));
9357 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
9358 return 0;
9359
9360 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
9361 return 1;
9362 }
9363 return 1;
9364 }
9365 \f
9366 #include "ggc.h"
9367
9368 static GTY(()) rtx fpscr_rtx;
9369 rtx
9370 get_fpscr_rtx (void)
9371 {
9372 if (! fpscr_rtx)
9373 {
9374 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
9375 REG_USERVAR_P (fpscr_rtx) = 1;
9376 mark_user_reg (fpscr_rtx);
9377 }
9378 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
9379 mark_user_reg (fpscr_rtx);
9380 return fpscr_rtx;
9381 }
9382
9383 static GTY(()) tree fpscr_values;
9384
9385 static void
9386 emit_fpu_switch (rtx scratch, int index)
9387 {
9388 rtx dst, src;
9389
9390 if (fpscr_values == NULL)
9391 {
9392 tree t;
9393
9394 t = build_index_type (integer_one_node);
9395 t = build_array_type (integer_type_node, t);
9396 t = build_decl (BUILTINS_LOCATION,
9397 VAR_DECL, get_identifier ("__fpscr_values"), t);
9398 DECL_ARTIFICIAL (t) = 1;
9399 DECL_IGNORED_P (t) = 1;
9400 DECL_EXTERNAL (t) = 1;
9401 TREE_STATIC (t) = 1;
9402 TREE_PUBLIC (t) = 1;
9403 TREE_USED (t) = 1;
9404
9405 fpscr_values = t;
9406 }
9407
9408 src = DECL_RTL (fpscr_values);
9409 if (!can_create_pseudo_p ())
9410 {
9411 emit_move_insn (scratch, XEXP (src, 0));
9412 if (index != 0)
9413 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
9414 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
9415 }
9416 else
9417 src = adjust_address (src, PSImode, index * 4);
9418
9419 dst = get_fpscr_rtx ();
9420 emit_move_insn (dst, src);
9421 }
9422
9423 void
9424 emit_sf_insn (rtx pat)
9425 {
9426 emit_insn (pat);
9427 }
9428
9429 void
9430 emit_df_insn (rtx pat)
9431 {
9432 emit_insn (pat);
9433 }
9434
9435 void
9436 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9437 {
9438 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9439 }
9440
9441 void
9442 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9443 {
9444 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
9445 get_fpscr_rtx ()));
9446 }
9447
9448 void
9449 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9450 {
9451 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9452 }
9453
9454 void
9455 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9456 {
9457 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
9458 get_fpscr_rtx ()));
9459 }
9460 \f
9461 static rtx get_free_reg (HARD_REG_SET);
9462
9463 /* This function returns a register to use to load the address to load
9464 the fpscr from. Currently it always returns r1 or r7, but when we are
9465 able to use pseudo registers after combine, or have a better mechanism
9466 for choosing a register, it should be done here. */
9467 /* REGS_LIVE is the liveness information for the point for which we
9468 need this allocation. In some bare-bones exit blocks, r1 is live at the
9469 start. We can even have all of r0..r3 being live:
9470 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
9471 INSN before which new insns are placed with will clobber the register
9472 we return. If a basic block consists only of setting the return value
9473 register to a pseudo and using that register, the return value is not
9474 live before or after this block, yet we we'll insert our insns right in
9475 the middle. */
9476
9477 static rtx
9478 get_free_reg (HARD_REG_SET regs_live)
9479 {
9480 if (! TEST_HARD_REG_BIT (regs_live, 1))
9481 return gen_rtx_REG (Pmode, 1);
9482
9483 /* Hard reg 1 is live; since this is a small register classes target,
9484 there shouldn't be anything but a jump before the function end. */
9485 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
9486 return gen_rtx_REG (Pmode, 7);
9487 }
9488
9489 /* This function will set the fpscr from memory.
9490 MODE is the mode we are setting it to. */
9491 void
9492 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
9493 {
9494 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
9495 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
9496 rtx addr_reg;
9497
9498 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
9499 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
9500 }
9501
9502 /* Is the given character a logical line separator for the assembler? */
9503 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
9504 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
9505 #endif
9506
9507 int
9508 sh_insn_length_adjustment (rtx insn)
9509 {
9510 /* Instructions with unfilled delay slots take up an extra two bytes for
9511 the nop in the delay slot. */
9512 if (((NONJUMP_INSN_P (insn)
9513 && GET_CODE (PATTERN (insn)) != USE
9514 && GET_CODE (PATTERN (insn)) != CLOBBER)
9515 || CALL_P (insn)
9516 || (JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)))
9517 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
9518 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
9519 return 2;
9520
9521 /* SH2e has a bug that prevents the use of annulled branches, so if
9522 the delay slot is not filled, we'll have to put a NOP in it. */
9523 if (sh_cpu_attr == CPU_SH2E
9524 && JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)
9525 && get_attr_type (insn) == TYPE_CBRANCH
9526 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
9527 return 2;
9528
9529 /* sh-dsp parallel processing insn take four bytes instead of two. */
9530
9531 if (NONJUMP_INSN_P (insn))
9532 {
9533 int sum = 0;
9534 rtx body = PATTERN (insn);
9535 const char *templ;
9536 char c;
9537 int maybe_label = 1;
9538
9539 if (GET_CODE (body) == ASM_INPUT)
9540 templ = XSTR (body, 0);
9541 else if (asm_noperands (body) >= 0)
9542 templ
9543 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
9544 else
9545 return 0;
9546 do
9547 {
9548 int ppi_adjust = 0;
9549
9550 do
9551 c = *templ++;
9552 while (c == ' ' || c == '\t');
9553 /* all sh-dsp parallel-processing insns start with p.
9554 The only non-ppi sh insn starting with p is pref.
9555 The only ppi starting with pr is prnd. */
9556 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
9557 ppi_adjust = 2;
9558 /* The repeat pseudo-insn expands two three insns, a total of
9559 six bytes in size. */
9560 else if ((c == 'r' || c == 'R')
9561 && ! strncasecmp ("epeat", templ, 5))
9562 ppi_adjust = 4;
9563 while (c && c != '\n'
9564 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
9565 {
9566 /* If this is a label, it is obviously not a ppi insn. */
9567 if (c == ':' && maybe_label)
9568 {
9569 ppi_adjust = 0;
9570 break;
9571 }
9572 else if (c == '\'' || c == '"')
9573 maybe_label = 0;
9574 c = *templ++;
9575 }
9576 sum += ppi_adjust;
9577 maybe_label = c != ':';
9578 }
9579 while (c);
9580 return sum;
9581 }
9582 return 0;
9583 }
9584 \f
9585 /* Return TRUE for a valid displacement for the REG+disp addressing
9586 with MODE. */
9587
9588 /* ??? The SH2e does not have the REG+disp addressing mode when loading values
9589 into the FRx registers. We implement this by setting the maximum offset
9590 to zero when the value is SFmode. This also restricts loading of SFmode
9591 values into the integer registers, but that can't be helped. */
9592
9593 /* The SH allows a displacement in a QI or HI amode, but only when the
9594 other operand is R0. GCC doesn't handle this very well, so we forgot
9595 all of that.
9596
9597 A legitimate index for a QI or HI is 0, SI can be any number 0..63,
9598 DI can be any number 0..60. */
9599
9600 bool
9601 sh_legitimate_index_p (enum machine_mode mode, rtx op)
9602 {
9603 if (CONST_INT_P (op))
9604 {
9605 if (TARGET_SHMEDIA)
9606 {
9607 int size;
9608
9609 /* Check if this is the address of an unaligned load / store. */
9610 if (mode == VOIDmode)
9611 return CONST_OK_FOR_I06 (INTVAL (op));
9612
9613 size = GET_MODE_SIZE (mode);
9614 return (!(INTVAL (op) & (size - 1))
9615 && INTVAL (op) >= -512 * size
9616 && INTVAL (op) < 512 * size);
9617 }
9618
9619 if (TARGET_SH2A)
9620 {
9621 if (mode == QImode && (unsigned) INTVAL (op) < 4096)
9622 return true;
9623 }
9624
9625 if (mode == QImode && (unsigned) INTVAL (op) < 16)
9626 return true;
9627
9628 if ((GET_MODE_SIZE (mode) == 4
9629 && (unsigned) INTVAL (op) < 64
9630 && !(INTVAL (op) & 3)
9631 && !(TARGET_SH2E && mode == SFmode))
9632 || (GET_MODE_SIZE (mode) == 4
9633 && (unsigned) INTVAL (op) < 16383
9634 && !(INTVAL (op) & 3) && TARGET_SH2A))
9635 return true;
9636
9637 if ((GET_MODE_SIZE (mode) == 8
9638 && (unsigned) INTVAL (op) < 60
9639 && !(INTVAL (op) & 3)
9640 && !((TARGET_SH4 || TARGET_SH2A) && mode == DFmode))
9641 || ((GET_MODE_SIZE (mode)==8)
9642 && (unsigned) INTVAL (op) < 8192
9643 && !(INTVAL (op) & (TARGET_SH2A_DOUBLE ? 7 : 3))
9644 && (TARGET_SH2A && mode == DFmode)))
9645 return true;
9646 }
9647
9648 return false;
9649 }
9650
9651 /* Recognize an RTL expression that is a valid memory address for
9652 an instruction.
9653 The MODE argument is the machine mode for the MEM expression
9654 that wants to use this address.
9655 Allow REG
9656 REG+disp
9657 REG+r0
9658 REG++
9659 --REG */
9660
9661 static bool
9662 sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
9663 {
9664 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
9665 return true;
9666 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
9667 && ! TARGET_SHMEDIA
9668 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
9669 return true;
9670 else if (GET_CODE (x) == PLUS
9671 && (mode != PSImode || reload_completed))
9672 {
9673 rtx xop0 = XEXP (x, 0);
9674 rtx xop1 = XEXP (x, 1);
9675
9676 if (GET_MODE_SIZE (mode) <= 8
9677 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
9678 && sh_legitimate_index_p (mode, xop1))
9679 return true;
9680
9681 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
9682 || ((xop0 == stack_pointer_rtx
9683 || xop0 == hard_frame_pointer_rtx)
9684 && REG_P (xop1) && REGNO (xop1) == R0_REG)
9685 || ((xop1 == stack_pointer_rtx
9686 || xop1 == hard_frame_pointer_rtx)
9687 && REG_P (xop0) && REGNO (xop0) == R0_REG))
9688 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
9689 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
9690 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
9691 && TARGET_FMOVD && mode == DFmode)))
9692 {
9693 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
9694 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
9695 return true;
9696 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
9697 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
9698 return true;
9699 }
9700 }
9701
9702 return false;
9703 }
9704 \f
9705 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
9706 isn't protected by a PIC unspec. */
9707 int
9708 nonpic_symbol_mentioned_p (rtx x)
9709 {
9710 register const char *fmt;
9711 register int i;
9712
9713 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
9714 || GET_CODE (x) == PC)
9715 return 1;
9716
9717 /* We don't want to look into the possible MEM location of a
9718 CONST_DOUBLE, since we're not going to use it, in general. */
9719 if (GET_CODE (x) == CONST_DOUBLE)
9720 return 0;
9721
9722 if (GET_CODE (x) == UNSPEC
9723 && (XINT (x, 1) == UNSPEC_PIC
9724 || XINT (x, 1) == UNSPEC_GOT
9725 || XINT (x, 1) == UNSPEC_GOTOFF
9726 || XINT (x, 1) == UNSPEC_GOTPLT
9727 || XINT (x, 1) == UNSPEC_GOTTPOFF
9728 || XINT (x, 1) == UNSPEC_DTPOFF
9729 || XINT (x, 1) == UNSPEC_TPOFF
9730 || XINT (x, 1) == UNSPEC_PLT
9731 || XINT (x, 1) == UNSPEC_SYMOFF
9732 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
9733 return 0;
9734
9735 fmt = GET_RTX_FORMAT (GET_CODE (x));
9736 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9737 {
9738 if (fmt[i] == 'E')
9739 {
9740 register int j;
9741
9742 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9743 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
9744 return 1;
9745 }
9746 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
9747 return 1;
9748 }
9749
9750 return 0;
9751 }
9752
9753 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
9754 @GOTOFF in `reg'. */
9755 rtx
9756 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
9757 rtx reg)
9758 {
9759 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
9760 return orig;
9761
9762 if (GET_CODE (orig) == LABEL_REF
9763 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
9764 {
9765 if (reg == 0)
9766 reg = gen_reg_rtx (Pmode);
9767
9768 emit_insn (gen_symGOTOFF2reg (reg, orig));
9769 return reg;
9770 }
9771 else if (GET_CODE (orig) == SYMBOL_REF)
9772 {
9773 if (reg == 0)
9774 reg = gen_reg_rtx (Pmode);
9775
9776 emit_insn (gen_symGOT2reg (reg, orig));
9777 return reg;
9778 }
9779 return orig;
9780 }
9781
9782 /* Try machine-dependent ways of modifying an illegitimate address
9783 to be legitimate. If we find one, return the new, valid address.
9784 Otherwise, return X.
9785
9786 For the SH, if X is almost suitable for indexing, but the offset is
9787 out of range, convert it into a normal form so that CSE has a chance
9788 of reducing the number of address registers used. */
9789
9790 static rtx
9791 sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
9792 {
9793 if (flag_pic)
9794 x = legitimize_pic_address (oldx, mode, NULL_RTX);
9795
9796 if (GET_CODE (x) == PLUS
9797 && (GET_MODE_SIZE (mode) == 4
9798 || GET_MODE_SIZE (mode) == 8)
9799 && CONST_INT_P (XEXP (x, 1))
9800 && BASE_REGISTER_RTX_P (XEXP (x, 0))
9801 && ! TARGET_SHMEDIA
9802 && ! ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
9803 && ! (TARGET_SH2E && mode == SFmode))
9804 {
9805 rtx index_rtx = XEXP (x, 1);
9806 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9807 rtx sum;
9808
9809 /* On rare occasions, we might get an unaligned pointer
9810 that is indexed in a way to give an aligned address.
9811 Therefore, keep the lower two bits in offset_base. */
9812 /* Instead of offset_base 128..131 use 124..127, so that
9813 simple add suffices. */
9814 if (offset > 127)
9815 offset_base = ((offset + 4) & ~60) - 4;
9816 else
9817 offset_base = offset & ~60;
9818
9819 /* Sometimes the normal form does not suit DImode. We
9820 could avoid that by using smaller ranges, but that
9821 would give less optimized code when SImode is
9822 prevalent. */
9823 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9824 {
9825 sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
9826 GEN_INT (offset_base), NULL_RTX, 0,
9827 OPTAB_LIB_WIDEN);
9828
9829 return gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9830 }
9831 }
9832
9833 /* This could be generalized for SImode, HImode, QImode displacement
9834 addressing. */
9835 if (mode == QImode && GET_CODE (x) == PLUS
9836 && BASE_REGISTER_RTX_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
9837 {
9838 rtx index_rtx = XEXP (x, 1);
9839 HOST_WIDE_INT offset = INTVAL (index_rtx);
9840 HOST_WIDE_INT offset_base = offset & ~15;
9841
9842 if (offset - offset_base <= 16)
9843 {
9844 rtx sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
9845 GEN_INT (offset_base), NULL_RTX, 0,
9846 OPTAB_LIB_WIDEN);
9847
9848 return gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9849 }
9850 }
9851
9852 return x;
9853 }
9854
9855 /* Attempt to replace *P, which is an address that needs reloading, with
9856 a valid memory address for an operand of mode MODE.
9857 Like for sh_legitimize_address, for the SH we try to get a normal form
9858 of the address. That will allow inheritance of the address reloads. */
9859
9860 bool
9861 sh_legitimize_reload_address (rtx *p, enum machine_mode mode, int opnum,
9862 int itype)
9863 {
9864 enum reload_type type = (enum reload_type) itype;
9865
9866 if (GET_CODE (*p) == PLUS
9867 && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
9868 && CONST_INT_P (XEXP (*p, 1))
9869 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true)
9870 && ! TARGET_SHMEDIA
9871 && ! (TARGET_SH4 && mode == DFmode)
9872 && ! (mode == PSImode && type == RELOAD_FOR_INPUT_ADDRESS)
9873 && (ALLOW_INDEXED_ADDRESS
9874 || XEXP (*p, 0) == stack_pointer_rtx
9875 || XEXP (*p, 0) == hard_frame_pointer_rtx))
9876 {
9877 rtx index_rtx = XEXP (*p, 1);
9878 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9879 rtx sum;
9880
9881 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
9882 {
9883 push_reload (*p, NULL_RTX, p, NULL,
9884 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9885 goto win;
9886 }
9887 if (TARGET_SH2E && mode == SFmode)
9888 {
9889 *p = copy_rtx (*p);
9890 push_reload (*p, NULL_RTX, p, NULL,
9891 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9892 goto win;
9893 }
9894 /* Instead of offset_base 128..131 use 124..127, so that
9895 simple add suffices. */
9896 if (offset > 127)
9897 offset_base = ((offset + 4) & ~60) - 4;
9898 else
9899 offset_base = offset & ~60;
9900 /* Sometimes the normal form does not suit DImode. We could avoid
9901 that by using smaller ranges, but that would give less optimized
9902 code when SImode is prevalent. */
9903 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9904 {
9905 sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), GEN_INT (offset_base));
9906 *p = gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9907 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
9908 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9909 goto win;
9910 }
9911 }
9912 /* We must re-recognize what we created before. */
9913 else if (GET_CODE (*p) == PLUS
9914 && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
9915 && GET_CODE (XEXP (*p, 0)) == PLUS
9916 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
9917 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
9918 && CONST_INT_P (XEXP (*p, 1))
9919 && ! TARGET_SHMEDIA
9920 && ! (TARGET_SH2E && mode == SFmode))
9921 {
9922 /* Because this address is so complex, we know it must have
9923 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
9924 it is already unshared, and needs no further unsharing. */
9925 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
9926 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9927 goto win;
9928 }
9929
9930 return false;
9931
9932 win:
9933 return true;
9934 }
9935
9936 /* In the name of slightly smaller debug output, and to cater to
9937 general assembler lossage, recognize various UNSPEC sequences
9938 and turn them back into a direct symbol reference. */
9939
9940 static rtx
9941 sh_delegitimize_address (rtx orig_x)
9942 {
9943 rtx x, y;
9944
9945 orig_x = delegitimize_mem_from_attrs (orig_x);
9946
9947 x = orig_x;
9948 if (MEM_P (x))
9949 x = XEXP (x, 0);
9950 if (GET_CODE (x) == CONST)
9951 {
9952 y = XEXP (x, 0);
9953 if (GET_CODE (y) == UNSPEC)
9954 {
9955 if (XINT (y, 1) == UNSPEC_GOT
9956 || XINT (y, 1) == UNSPEC_GOTOFF
9957 || XINT (y, 1) == UNSPEC_SYMOFF)
9958 return XVECEXP (y, 0, 0);
9959 else if (XINT (y, 1) == UNSPEC_PCREL_SYMOFF)
9960 {
9961 if (GET_CODE (XVECEXP (y, 0, 0)) == CONST)
9962 {
9963 rtx symplt = XEXP (XVECEXP (y, 0, 0), 0);
9964
9965 if (GET_CODE (symplt) == UNSPEC
9966 && XINT (symplt, 1) == UNSPEC_PLT)
9967 return XVECEXP (symplt, 0, 0);
9968 }
9969 }
9970 else if (TARGET_SHMEDIA
9971 && (XINT (y, 1) == UNSPEC_EXTRACT_S16
9972 || XINT (y, 1) == UNSPEC_EXTRACT_U16))
9973 {
9974 rtx offset = XVECEXP (y, 0, 1);
9975
9976 x = gen_rtx_PLUS (Pmode, XVECEXP (y, 0, 0), offset);
9977 if (MEM_P (orig_x))
9978 x = replace_equiv_address_nv (orig_x, x);
9979 return x;
9980 }
9981 }
9982 }
9983
9984 return orig_x;
9985 }
9986
9987 /* Mark the use of a constant in the literal table. If the constant
9988 has multiple labels, make it unique. */
9989 static rtx
9990 mark_constant_pool_use (rtx x)
9991 {
9992 rtx insn, lab, pattern;
9993
9994 if (x == NULL)
9995 return x;
9996
9997 switch (GET_CODE (x))
9998 {
9999 case LABEL_REF:
10000 x = XEXP (x, 0);
10001 case CODE_LABEL:
10002 break;
10003 default:
10004 return x;
10005 }
10006
10007 /* Get the first label in the list of labels for the same constant
10008 and delete another labels in the list. */
10009 lab = x;
10010 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
10011 {
10012 if (!LABEL_P (insn)
10013 || LABEL_REFS (insn) != NEXT_INSN (insn))
10014 break;
10015 lab = insn;
10016 }
10017
10018 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
10019 INSN_DELETED_P (insn) = 1;
10020
10021 /* Mark constants in a window. */
10022 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
10023 {
10024 if (!NONJUMP_INSN_P (insn))
10025 continue;
10026
10027 pattern = PATTERN (insn);
10028 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
10029 continue;
10030
10031 switch (XINT (pattern, 1))
10032 {
10033 case UNSPECV_CONST2:
10034 case UNSPECV_CONST4:
10035 case UNSPECV_CONST8:
10036 XVECEXP (pattern, 0, 1) = const1_rtx;
10037 break;
10038 case UNSPECV_WINDOW_END:
10039 if (XVECEXP (pattern, 0, 0) == x)
10040 return lab;
10041 break;
10042 case UNSPECV_CONST_END:
10043 return lab;
10044 default:
10045 break;
10046 }
10047 }
10048
10049 return lab;
10050 }
10051 \f
10052 /* Return true if it's possible to redirect BRANCH1 to the destination
10053 of an unconditional jump BRANCH2. We only want to do this if the
10054 resulting branch will have a short displacement. */
10055 int
10056 sh_can_redirect_branch (rtx branch1, rtx branch2)
10057 {
10058 if (flag_expensive_optimizations && simplejump_p (branch2))
10059 {
10060 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
10061 rtx insn;
10062 int distance;
10063
10064 for (distance = 0, insn = NEXT_INSN (branch1);
10065 insn && distance < 256;
10066 insn = PREV_INSN (insn))
10067 {
10068 if (insn == dest)
10069 return 1;
10070 else
10071 distance += get_attr_length (insn);
10072 }
10073 for (distance = 0, insn = NEXT_INSN (branch1);
10074 insn && distance < 256;
10075 insn = NEXT_INSN (insn))
10076 {
10077 if (insn == dest)
10078 return 1;
10079 else
10080 distance += get_attr_length (insn);
10081 }
10082 }
10083 return 0;
10084 }
10085
10086 /* Return nonzero if register old_reg can be renamed to register new_reg. */
10087 int
10088 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
10089 unsigned int new_reg)
10090 {
10091 /* Interrupt functions can only use registers that have already been
10092 saved by the prologue, even if they would normally be
10093 call-clobbered. */
10094
10095 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
10096 return 0;
10097
10098 return 1;
10099 }
10100
10101 /* Function to update the integer COST
10102 based on the relationship between INSN that is dependent on
10103 DEP_INSN through the dependence LINK. The default is to make no
10104 adjustment to COST. This can be used for example to specify to
10105 the scheduler that an output- or anti-dependence does not incur
10106 the same cost as a data-dependence. The return value should be
10107 the new value for COST. */
10108 static int
10109 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
10110 {
10111 rtx reg, use_pat;
10112
10113 if (TARGET_SHMEDIA)
10114 {
10115 /* On SHmedia, if the dependence is an anti-dependence or
10116 output-dependence, there is no cost. */
10117 if (REG_NOTE_KIND (link) != 0)
10118 {
10119 /* However, dependencies between target register loads and
10120 uses of the register in a subsequent block that are separated
10121 by a conditional branch are not modelled - we have to do with
10122 the anti-dependency between the target register load and the
10123 conditional branch that ends the current block. */
10124 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
10125 && GET_CODE (PATTERN (dep_insn)) == SET
10126 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
10127 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
10128 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
10129 {
10130 int orig_cost = cost;
10131 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
10132 rtx target = ((! note
10133 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
10134 ? insn : JUMP_LABEL (insn));
10135 /* On the likely path, the branch costs 1, on the unlikely path,
10136 it costs 3. */
10137 cost--;
10138 do
10139 target = next_active_insn (target);
10140 while (target && ! flow_dependent_p (target, dep_insn)
10141 && --cost > 0);
10142 /* If two branches are executed in immediate succession, with the
10143 first branch properly predicted, this causes a stall at the
10144 second branch, hence we won't need the target for the
10145 second branch for two cycles after the launch of the first
10146 branch. */
10147 if (cost > orig_cost - 2)
10148 cost = orig_cost - 2;
10149 }
10150 else
10151 cost = 0;
10152 }
10153
10154 else if (get_attr_is_mac_media (insn)
10155 && get_attr_is_mac_media (dep_insn))
10156 cost = 1;
10157
10158 else if (! reload_completed
10159 && GET_CODE (PATTERN (insn)) == SET
10160 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
10161 && GET_CODE (PATTERN (dep_insn)) == SET
10162 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
10163 && cost < 4)
10164 cost = 4;
10165 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
10166 that is needed at the target. */
10167 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
10168 && ! flow_dependent_p (insn, dep_insn))
10169 cost--;
10170 }
10171 else if (REG_NOTE_KIND (link) == 0)
10172 {
10173 enum attr_type type;
10174 rtx dep_set;
10175
10176 if (recog_memoized (insn) < 0
10177 || recog_memoized (dep_insn) < 0)
10178 return cost;
10179
10180 dep_set = single_set (dep_insn);
10181
10182 /* The latency that we specify in the scheduling description refers
10183 to the actual output, not to an auto-increment register; for that,
10184 the latency is one. */
10185 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
10186 {
10187 rtx set = single_set (insn);
10188
10189 if (set
10190 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
10191 && (!MEM_P (SET_DEST (set))
10192 || !reg_mentioned_p (SET_DEST (dep_set),
10193 XEXP (SET_DEST (set), 0))))
10194 cost = 1;
10195 }
10196 /* The only input for a call that is timing-critical is the
10197 function's address. */
10198 if (CALL_P (insn))
10199 {
10200 rtx call = PATTERN (insn);
10201
10202 if (GET_CODE (call) == PARALLEL)
10203 call = XVECEXP (call, 0 ,0);
10204 if (GET_CODE (call) == SET)
10205 call = SET_SRC (call);
10206 if (GET_CODE (call) == CALL && MEM_P (XEXP (call, 0))
10207 /* sibcalli_thunk uses a symbol_ref in an unspec. */
10208 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
10209 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
10210 cost -= TARGET_SH4_300 ? 3 : 6;
10211 }
10212 /* Likewise, the most timing critical input for an sfuncs call
10213 is the function address. However, sfuncs typically start
10214 using their arguments pretty quickly.
10215 Assume a four cycle delay for SH4 before they are needed.
10216 Cached ST40-300 calls are quicker, so assume only a one
10217 cycle delay there.
10218 ??? Maybe we should encode the delays till input registers
10219 are needed by sfuncs into the sfunc call insn. */
10220 /* All sfunc calls are parallels with at least four components.
10221 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
10222 else if (GET_CODE (PATTERN (insn)) == PARALLEL
10223 && XVECLEN (PATTERN (insn), 0) >= 4
10224 && (reg = sfunc_uses_reg (insn)))
10225 {
10226 if (! reg_set_p (reg, dep_insn))
10227 cost -= TARGET_SH4_300 ? 1 : 4;
10228 }
10229 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
10230 {
10231 enum attr_type dep_type = get_attr_type (dep_insn);
10232
10233 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
10234 cost--;
10235 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
10236 && (type = get_attr_type (insn)) != TYPE_CALL
10237 && type != TYPE_SFUNC)
10238 cost--;
10239 /* When the preceding instruction loads the shift amount of
10240 the following SHAD/SHLD, the latency of the load is increased
10241 by 1 cycle. */
10242 if (get_attr_type (insn) == TYPE_DYN_SHIFT
10243 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
10244 && reg_overlap_mentioned_p (SET_DEST (dep_set),
10245 XEXP (SET_SRC (single_set (insn)),
10246 1)))
10247 cost++;
10248 /* When an LS group instruction with a latency of less than
10249 3 cycles is followed by a double-precision floating-point
10250 instruction, FIPR, or FTRV, the latency of the first
10251 instruction is increased to 3 cycles. */
10252 else if (cost < 3
10253 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
10254 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
10255 cost = 3;
10256 /* The lsw register of a double-precision computation is ready one
10257 cycle earlier. */
10258 else if (reload_completed
10259 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
10260 && (use_pat = single_set (insn))
10261 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
10262 SET_SRC (use_pat)))
10263 cost -= 1;
10264
10265 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
10266 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
10267 cost -= 1;
10268 }
10269 else if (TARGET_SH4_300)
10270 {
10271 /* Stores need their input register two cycles later. */
10272 if (dep_set && cost >= 1
10273 && ((type = get_attr_type (insn)) == TYPE_STORE
10274 || type == TYPE_PSTORE
10275 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
10276 {
10277 rtx set = single_set (insn);
10278
10279 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
10280 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
10281 {
10282 cost -= 2;
10283 /* But don't reduce the cost below 1 if the address depends
10284 on a side effect of dep_insn. */
10285 if (cost < 1
10286 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
10287 cost = 1;
10288 }
10289 }
10290 }
10291 }
10292 /* An anti-dependence penalty of two applies if the first insn is a double
10293 precision fadd / fsub / fmul. */
10294 else if (!TARGET_SH4_300
10295 && REG_NOTE_KIND (link) == REG_DEP_ANTI
10296 && recog_memoized (dep_insn) >= 0
10297 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
10298 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
10299 /* A lot of alleged anti-flow dependences are fake,
10300 so check this one is real. */
10301 && flow_dependent_p (dep_insn, insn))
10302 cost = 2;
10303
10304 return cost;
10305 }
10306
10307 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
10308 if DEP_INSN is anti-flow dependent on INSN. */
10309 static int
10310 flow_dependent_p (rtx insn, rtx dep_insn)
10311 {
10312 rtx tmp = PATTERN (insn);
10313
10314 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
10315 return tmp == NULL_RTX;
10316 }
10317
10318 /* A helper function for flow_dependent_p called through note_stores. */
10319 static void
10320 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
10321 {
10322 rtx * pinsn = (rtx *) data;
10323
10324 if (*pinsn && reg_referenced_p (x, *pinsn))
10325 *pinsn = NULL_RTX;
10326 }
10327
10328 /* For use by sh_allocate_initial_value. Note that sh.md contains some
10329 'special function' patterns (type sfunc) that clobber pr, but that
10330 do not look like function calls to leaf_function_p. Hence we must
10331 do this extra check. */
10332 static int
10333 sh_pr_n_sets (void)
10334 {
10335 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10336 }
10337
10338 /* Return where to allocate pseudo for a given hard register initial
10339 value. */
10340 static rtx
10341 sh_allocate_initial_value (rtx hard_reg)
10342 {
10343 rtx x;
10344
10345 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
10346 {
10347 if (current_function_is_leaf
10348 && ! sh_pr_n_sets ()
10349 && ! (TARGET_SHCOMPACT
10350 && ((crtl->args.info.call_cookie
10351 & ~ CALL_COOKIE_RET_TRAMP (1))
10352 || crtl->saves_all_registers)))
10353 x = hard_reg;
10354 else
10355 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
10356 }
10357 else
10358 x = NULL_RTX;
10359
10360 return x;
10361 }
10362
10363 /* This function returns "2" to indicate dual issue for the SH4
10364 processor. To be used by the DFA pipeline description. */
10365 static int
10366 sh_issue_rate (void)
10367 {
10368 if (TARGET_SUPERSCALAR)
10369 return 2;
10370 else
10371 return 1;
10372 }
10373
10374 /* Functions for ready queue reordering for sched1. */
10375
10376 /* Get weight for mode for a set x. */
10377 static short
10378 find_set_regmode_weight (rtx x, enum machine_mode mode)
10379 {
10380 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
10381 return 1;
10382 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
10383 {
10384 if (REG_P (SET_DEST (x)))
10385 {
10386 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
10387 return 1;
10388 else
10389 return 0;
10390 }
10391 return 1;
10392 }
10393 return 0;
10394 }
10395
10396 /* Get regmode weight for insn. */
10397 static short
10398 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
10399 {
10400 short reg_weight = 0;
10401 rtx x;
10402
10403 /* Increment weight for each register born here. */
10404 x = PATTERN (insn);
10405 reg_weight += find_set_regmode_weight (x, mode);
10406 if (GET_CODE (x) == PARALLEL)
10407 {
10408 int j;
10409 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
10410 {
10411 x = XVECEXP (PATTERN (insn), 0, j);
10412 reg_weight += find_set_regmode_weight (x, mode);
10413 }
10414 }
10415 /* Decrement weight for each register that dies here. */
10416 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
10417 {
10418 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
10419 {
10420 rtx note = XEXP (x, 0);
10421 if (REG_P (note) && GET_MODE (note) == mode)
10422 reg_weight--;
10423 }
10424 }
10425 return reg_weight;
10426 }
10427
10428 /* Calculate regmode weights for all insns of a basic block. */
10429 static void
10430 find_regmode_weight (basic_block b, enum machine_mode mode)
10431 {
10432 rtx insn, next_tail, head, tail;
10433
10434 get_ebb_head_tail (b, b, &head, &tail);
10435 next_tail = NEXT_INSN (tail);
10436
10437 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
10438 {
10439 /* Handle register life information. */
10440 if (!INSN_P (insn))
10441 continue;
10442
10443 if (mode == SFmode)
10444 INSN_REGMODE_WEIGHT (insn, mode) =
10445 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
10446 else if (mode == SImode)
10447 INSN_REGMODE_WEIGHT (insn, mode) =
10448 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
10449 }
10450 }
10451
10452 /* Comparison function for ready queue sorting. */
10453 static int
10454 rank_for_reorder (const void *x, const void *y)
10455 {
10456 rtx tmp = *(const rtx *) y;
10457 rtx tmp2 = *(const rtx *) x;
10458
10459 /* The insn in a schedule group should be issued the first. */
10460 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
10461 return SCHED_GROUP_P (tmp2) ? 1 : -1;
10462
10463 /* If insns are equally good, sort by INSN_LUID (original insn order), This
10464 minimizes instruction movement, thus minimizing sched's effect on
10465 register pressure. */
10466 return INSN_LUID (tmp) - INSN_LUID (tmp2);
10467 }
10468
10469 /* Resort the array A in which only element at index N may be out of order. */
10470 static void
10471 swap_reorder (rtx *a, int n)
10472 {
10473 rtx insn = a[n - 1];
10474 int i = n - 2;
10475
10476 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
10477 {
10478 a[i + 1] = a[i];
10479 i -= 1;
10480 }
10481 a[i + 1] = insn;
10482 }
10483
10484 #define SCHED_REORDER(READY, N_READY) \
10485 do \
10486 { \
10487 if ((N_READY) == 2) \
10488 swap_reorder (READY, N_READY); \
10489 else if ((N_READY) > 2) \
10490 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
10491 } \
10492 while (0)
10493
10494 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
10495 macro. */
10496 static void
10497 ready_reorder (rtx *ready, int nready)
10498 {
10499 SCHED_REORDER (ready, nready);
10500 }
10501
10502 /* Count life regions of r0 for a block. */
10503 static int
10504 find_r0_life_regions (basic_block b)
10505 {
10506 rtx end, insn;
10507 rtx pset;
10508 rtx r0_reg;
10509 int live;
10510 int set;
10511 int death = 0;
10512
10513 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
10514 {
10515 set = 1;
10516 live = 1;
10517 }
10518 else
10519 {
10520 set = 0;
10521 live = 0;
10522 }
10523
10524 insn = BB_HEAD (b);
10525 end = BB_END (b);
10526 r0_reg = gen_rtx_REG (SImode, R0_REG);
10527 while (1)
10528 {
10529 if (INSN_P (insn))
10530 {
10531 if (find_regno_note (insn, REG_DEAD, R0_REG))
10532 {
10533 death++;
10534 live = 0;
10535 }
10536 if (!live
10537 && (pset = single_set (insn))
10538 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
10539 && !find_regno_note (insn, REG_UNUSED, R0_REG))
10540 {
10541 set++;
10542 live = 1;
10543 }
10544 }
10545 if (insn == end)
10546 break;
10547 insn = NEXT_INSN (insn);
10548 }
10549 return set - death;
10550 }
10551
10552 /* Calculate regmode weights for all insns of all basic block. */
10553 static void
10554 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
10555 int verbose ATTRIBUTE_UNUSED,
10556 int old_max_uid)
10557 {
10558 basic_block b;
10559
10560 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
10561 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
10562 r0_life_regions = 0;
10563
10564 FOR_EACH_BB_REVERSE (b)
10565 {
10566 find_regmode_weight (b, SImode);
10567 find_regmode_weight (b, SFmode);
10568 if (!reload_completed)
10569 r0_life_regions += find_r0_life_regions (b);
10570 }
10571
10572 CURR_REGMODE_PRESSURE (SImode) = 0;
10573 CURR_REGMODE_PRESSURE (SFmode) = 0;
10574
10575 }
10576
10577 /* Cleanup. */
10578 static void
10579 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
10580 int verbose ATTRIBUTE_UNUSED)
10581 {
10582 if (regmode_weight[0])
10583 {
10584 free (regmode_weight[0]);
10585 regmode_weight[0] = NULL;
10586 }
10587 if (regmode_weight[1])
10588 {
10589 free (regmode_weight[1]);
10590 regmode_weight[1] = NULL;
10591 }
10592 }
10593
10594 /* The scalar modes supported differs from the default version in TImode
10595 for 32-bit SHMEDIA. */
10596 static bool
10597 sh_scalar_mode_supported_p (enum machine_mode mode)
10598 {
10599 if (TARGET_SHMEDIA32 && mode == TImode)
10600 return false;
10601
10602 return default_scalar_mode_supported_p (mode);
10603 }
10604
10605 /* Cache the can_issue_more so that we can return it from reorder2. Also,
10606 keep count of register pressures on SImode and SFmode. */
10607 static int
10608 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
10609 int sched_verbose ATTRIBUTE_UNUSED,
10610 rtx insn,
10611 int can_issue_more)
10612 {
10613 if (GET_CODE (PATTERN (insn)) != USE
10614 && GET_CODE (PATTERN (insn)) != CLOBBER)
10615 cached_can_issue_more = can_issue_more - 1;
10616 else
10617 cached_can_issue_more = can_issue_more;
10618
10619 if (reload_completed)
10620 return cached_can_issue_more;
10621
10622 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
10623 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
10624
10625 return cached_can_issue_more;
10626 }
10627
10628 static void
10629 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
10630 int verbose ATTRIBUTE_UNUSED,
10631 int veclen ATTRIBUTE_UNUSED)
10632 {
10633 CURR_REGMODE_PRESSURE (SImode) = 0;
10634 CURR_REGMODE_PRESSURE (SFmode) = 0;
10635 }
10636
10637 /* Some magic numbers. */
10638 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10639 functions that already have high pressure on r0. */
10640 #define R0_MAX_LIFE_REGIONS 2
10641 /* Register Pressure thresholds for SImode and SFmode registers. */
10642 #define SIMODE_MAX_WEIGHT 5
10643 #define SFMODE_MAX_WEIGHT 10
10644
10645 /* Return true if the pressure is high for MODE. */
10646 static short
10647 high_pressure (enum machine_mode mode)
10648 {
10649 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10650 functions that already have high pressure on r0. */
10651 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
10652 return 1;
10653
10654 if (mode == SFmode)
10655 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
10656 else
10657 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
10658 }
10659
10660 /* Reorder ready queue if register pressure is high. */
10661 static int
10662 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
10663 int sched_verbose ATTRIBUTE_UNUSED,
10664 rtx *ready,
10665 int *n_readyp,
10666 int clock_var ATTRIBUTE_UNUSED)
10667 {
10668 if (reload_completed)
10669 return sh_issue_rate ();
10670
10671 if (high_pressure (SFmode) || high_pressure (SImode))
10672 {
10673 ready_reorder (ready, *n_readyp);
10674 }
10675
10676 return sh_issue_rate ();
10677 }
10678
10679 /* Skip cycles if the current register pressure is high. */
10680 static int
10681 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
10682 int sched_verbose ATTRIBUTE_UNUSED,
10683 rtx *ready ATTRIBUTE_UNUSED,
10684 int *n_readyp ATTRIBUTE_UNUSED,
10685 int clock_var ATTRIBUTE_UNUSED)
10686 {
10687 if (reload_completed)
10688 return cached_can_issue_more;
10689
10690 if (high_pressure(SFmode) || high_pressure (SImode))
10691 skip_cycles = 1;
10692
10693 return cached_can_issue_more;
10694 }
10695
10696 /* Skip cycles without sorting the ready queue. This will move insn from
10697 Q->R. If this is the last cycle we are skipping; allow sorting of ready
10698 queue by sh_reorder. */
10699
10700 /* Generally, skipping these many cycles are sufficient for all insns to move
10701 from Q -> R. */
10702 #define MAX_SKIPS 8
10703
10704 static int
10705 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
10706 int sched_verbose ATTRIBUTE_UNUSED,
10707 rtx insn ATTRIBUTE_UNUSED,
10708 int last_clock_var,
10709 int clock_var,
10710 int *sort_p)
10711 {
10712 if (reload_completed)
10713 return 0;
10714
10715 if (skip_cycles)
10716 {
10717 if ((clock_var - last_clock_var) < MAX_SKIPS)
10718 {
10719 *sort_p = 0;
10720 return 1;
10721 }
10722 /* If this is the last cycle we are skipping, allow reordering of R. */
10723 if ((clock_var - last_clock_var) == MAX_SKIPS)
10724 {
10725 *sort_p = 1;
10726 return 1;
10727 }
10728 }
10729
10730 skip_cycles = 0;
10731
10732 return 0;
10733 }
10734
10735 /* SHmedia requires registers for branches, so we can't generate new
10736 branches past reload. */
10737 static bool
10738 sh_cannot_modify_jumps_p (void)
10739 {
10740 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
10741 }
10742
10743 static reg_class_t
10744 sh_target_reg_class (void)
10745 {
10746 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
10747 }
10748
10749 static bool
10750 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
10751 {
10752 HARD_REG_SET dummy;
10753 #if 0
10754 rtx insn;
10755 #endif
10756
10757 if (! shmedia_space_reserved_for_target_registers)
10758 return 0;
10759 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
10760 return 0;
10761 if (calc_live_regs (&dummy) >= 6 * 8)
10762 return 1;
10763 return 0;
10764 }
10765
10766 static bool
10767 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
10768 {
10769 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
10770 }
10771 \f
10772 /*
10773 On the SH1..SH4, the trampoline looks like
10774 2 0002 D202 mov.l l2,r2
10775 1 0000 D301 mov.l l1,r3
10776 3 0004 422B jmp @r2
10777 4 0006 0009 nop
10778 5 0008 00000000 l1: .long area
10779 6 000c 00000000 l2: .long function
10780
10781 SH5 (compact) uses r1 instead of r3 for the static chain. */
10782
10783
10784 /* Emit RTL insns to initialize the variable parts of a trampoline.
10785 FNADDR is an RTX for the address of the function's pure code.
10786 CXT is an RTX for the static chain value for the function. */
10787
10788 static void
10789 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
10790 {
10791 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10792 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
10793
10794 if (TARGET_SHMEDIA64)
10795 {
10796 rtx tramp_templ;
10797 int fixed_len;
10798
10799 rtx movi1 = GEN_INT (0xcc000010);
10800 rtx shori1 = GEN_INT (0xc8000010);
10801 rtx src, dst;
10802
10803 /* The following trampoline works within a +- 128 KB range for cxt:
10804 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
10805 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
10806 gettr tr1,r1; blink tr0,r63 */
10807 /* Address rounding makes it hard to compute the exact bounds of the
10808 offset for this trampoline, but we have a rather generous offset
10809 range, so frame_offset should do fine as an upper bound. */
10810 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
10811 {
10812 /* ??? could optimize this trampoline initialization
10813 by writing DImode words with two insns each. */
10814 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
10815 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
10816 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
10817 insn = gen_rtx_AND (DImode, insn, mask);
10818 /* Or in ptb/u .,tr1 pattern */
10819 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
10820 insn = force_operand (insn, NULL_RTX);
10821 insn = gen_lowpart (SImode, insn);
10822 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
10823 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
10824 insn = gen_rtx_AND (DImode, insn, mask);
10825 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
10826 insn = gen_lowpart (SImode, insn);
10827 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
10828 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
10829 insn = gen_rtx_AND (DImode, insn, mask);
10830 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10831 insn = gen_lowpart (SImode, insn);
10832 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
10833 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
10834 insn = gen_rtx_AND (DImode, insn, mask);
10835 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10836 insn = gen_lowpart (SImode, insn);
10837 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
10838 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
10839 insn = gen_rtx_AND (DImode, insn, mask);
10840 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10841 insn = gen_lowpart (SImode, insn);
10842 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
10843 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
10844 GEN_INT (0x6bf10600));
10845 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
10846 GEN_INT (0x4415fc10));
10847 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
10848 GEN_INT (0x4401fff0));
10849 emit_insn (gen_ic_invalidate_line (tramp));
10850 return;
10851 }
10852 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
10853 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
10854
10855 tramp_templ = gen_datalabel_ref (tramp_templ);
10856 dst = tramp_mem;
10857 src = gen_const_mem (BLKmode, tramp_templ);
10858 set_mem_align (dst, 256);
10859 set_mem_align (src, 64);
10860 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
10861
10862 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
10863 emit_move_insn (adjust_address (tramp_mem, Pmode,
10864 fixed_len + GET_MODE_SIZE (Pmode)),
10865 cxt);
10866 emit_insn (gen_ic_invalidate_line (tramp));
10867 return;
10868 }
10869 else if (TARGET_SHMEDIA)
10870 {
10871 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
10872 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
10873 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
10874 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
10875 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
10876 rotated 10 right, and higher 16 bit of every 32 selected. */
10877 rtx movishori
10878 = force_reg (V2HImode, (simplify_gen_subreg
10879 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
10880 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
10881 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
10882
10883 fnaddr = force_reg (SImode, fnaddr);
10884 cxt = force_reg (SImode, cxt);
10885 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
10886 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
10887 movishori));
10888 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
10889 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10890 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
10891 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
10892 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
10893 gen_rtx_SUBREG (V2HImode, cxt, 0),
10894 movishori));
10895 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
10896 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10897 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
10898 if (TARGET_LITTLE_ENDIAN)
10899 {
10900 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
10901 emit_insn (gen_mextr4 (quad2, cxtload, blink));
10902 }
10903 else
10904 {
10905 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
10906 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
10907 }
10908 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
10909 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
10910 emit_insn (gen_ic_invalidate_line (tramp));
10911 return;
10912 }
10913 else if (TARGET_SHCOMPACT)
10914 {
10915 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
10916 return;
10917 }
10918 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
10919 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
10920 SImode));
10921 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
10922 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
10923 SImode));
10924 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
10925 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
10926 if (TARGET_HARVARD)
10927 {
10928 if (!TARGET_INLINE_IC_INVALIDATE
10929 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
10930 emit_library_call (function_symbol (NULL, "__ic_invalidate",
10931 FUNCTION_ORDINARY),
10932 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
10933 else
10934 emit_insn (gen_ic_invalidate_line (tramp));
10935 }
10936 }
10937
10938 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
10939
10940 static rtx
10941 sh_trampoline_adjust_address (rtx tramp)
10942 {
10943 if (TARGET_SHMEDIA)
10944 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
10945 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
10946 return tramp;
10947 }
10948
10949 /* FIXME: This is overly conservative. A SHcompact function that
10950 receives arguments ``by reference'' will have them stored in its
10951 own stack frame, so it must not pass pointers or references to
10952 these arguments to other functions by means of sibling calls. */
10953 /* If PIC, we cannot make sibling calls to global functions
10954 because the PLT requires r12 to be live. */
10955 static bool
10956 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10957 {
10958 return (1
10959 && (! TARGET_SHCOMPACT
10960 || crtl->args.info.stack_regs == 0)
10961 && ! sh_cfun_interrupt_handler_p ()
10962 && (! flag_pic
10963 || (decl && ! TREE_PUBLIC (decl))
10964 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
10965 }
10966 \f
10967 /* Machine specific built-in functions. */
10968
10969 struct builtin_description
10970 {
10971 const enum insn_code icode;
10972 const char *const name;
10973 int signature;
10974 tree fndecl;
10975 };
10976
10977 /* describe number and signedness of arguments; arg[0] == result
10978 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
10979 /* 9: 64-bit pointer, 10: 32-bit pointer */
10980 static const char signature_args[][4] =
10981 {
10982 #define SH_BLTIN_V2SI2 0
10983 { 4, 4 },
10984 #define SH_BLTIN_V4HI2 1
10985 { 4, 4 },
10986 #define SH_BLTIN_V2SI3 2
10987 { 4, 4, 4 },
10988 #define SH_BLTIN_V4HI3 3
10989 { 4, 4, 4 },
10990 #define SH_BLTIN_V8QI3 4
10991 { 4, 4, 4 },
10992 #define SH_BLTIN_MAC_HISI 5
10993 { 1, 4, 4, 1 },
10994 #define SH_BLTIN_SH_HI 6
10995 { 4, 4, 1 },
10996 #define SH_BLTIN_SH_SI 7
10997 { 4, 4, 1 },
10998 #define SH_BLTIN_V4HI2V2SI 8
10999 { 4, 4, 4 },
11000 #define SH_BLTIN_V4HI2V8QI 9
11001 { 4, 4, 4 },
11002 #define SH_BLTIN_SISF 10
11003 { 4, 2 },
11004 #define SH_BLTIN_LDUA_L 11
11005 { 2, 10 },
11006 #define SH_BLTIN_LDUA_Q 12
11007 { 1, 10 },
11008 #define SH_BLTIN_STUA_L 13
11009 { 0, 10, 2 },
11010 #define SH_BLTIN_STUA_Q 14
11011 { 0, 10, 1 },
11012 #define SH_BLTIN_LDUA_L64 15
11013 { 2, 9 },
11014 #define SH_BLTIN_LDUA_Q64 16
11015 { 1, 9 },
11016 #define SH_BLTIN_STUA_L64 17
11017 { 0, 9, 2 },
11018 #define SH_BLTIN_STUA_Q64 18
11019 { 0, 9, 1 },
11020 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
11021 #define SH_BLTIN_2 19
11022 #define SH_BLTIN_SU 19
11023 { 1, 2 },
11024 #define SH_BLTIN_3 20
11025 #define SH_BLTIN_SUS 20
11026 { 2, 2, 1 },
11027 #define SH_BLTIN_PSSV 21
11028 { 0, 8, 2, 2 },
11029 #define SH_BLTIN_XXUU 22
11030 #define SH_BLTIN_UUUU 22
11031 { 1, 1, 1, 1 },
11032 #define SH_BLTIN_PV 23
11033 { 0, 8 },
11034 };
11035 /* mcmv: operands considered unsigned. */
11036 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
11037 /* mperm: control value considered unsigned int. */
11038 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
11039 /* mshards_q: returns signed short. */
11040 /* nsb: takes long long arg, returns unsigned char. */
11041 static struct builtin_description bdesc[] =
11042 {
11043 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
11044 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
11045 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
11046 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
11047 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
11048 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
11049 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
11050 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
11051 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
11052 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
11053 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
11054 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
11055 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
11056 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
11057 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
11058 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
11059 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
11060 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
11061 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
11062 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
11063 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
11064 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
11065 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
11066 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
11067 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
11068 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
11069 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
11070 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
11071 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
11072 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
11073 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
11074 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
11075 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
11076 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
11077 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
11078 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
11079 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
11080 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
11081 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
11082 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
11083 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
11084 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
11085 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
11086 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
11087 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
11088 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
11089 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
11090 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
11091 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
11092 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
11093 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
11094 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
11095 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
11096 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
11097 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
11098 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
11099 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
11100 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
11101 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
11102 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
11103 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
11104 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3, 0 },
11105 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
11106 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
11107 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
11108 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
11109 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
11110 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
11111 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
11112 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
11113 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
11114 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
11115 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
11116 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
11117 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
11118 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
11119 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
11120 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
11121 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
11122 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
11123 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
11124 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
11125 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
11126 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
11127 };
11128
11129 static void
11130 sh_media_init_builtins (void)
11131 {
11132 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
11133 struct builtin_description *d;
11134
11135 memset (shared, 0, sizeof shared);
11136 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
11137 {
11138 tree type, arg_type = 0;
11139 int signature = d->signature;
11140 int i;
11141
11142 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
11143 type = shared[signature];
11144 else
11145 {
11146 int has_result = signature_args[signature][0] != 0;
11147 tree args[3];
11148
11149 if ((signature_args[signature][1] & 8)
11150 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
11151 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
11152 continue;
11153 if (! TARGET_FPU_ANY
11154 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
11155 continue;
11156 for (i = 0; i < (int) ARRAY_SIZE (args); i++)
11157 args[i] = NULL_TREE;
11158 for (i = 3; ; i--)
11159 {
11160 int arg = signature_args[signature][i];
11161 int opno = i - 1 + has_result;
11162
11163 if (arg & 8)
11164 arg_type = ptr_type_node;
11165 else if (arg)
11166 arg_type = (*lang_hooks.types.type_for_mode)
11167 (insn_data[d->icode].operand[opno].mode,
11168 (arg & 1));
11169 else if (i)
11170 continue;
11171 else
11172 arg_type = void_type_node;
11173 if (i == 0)
11174 break;
11175 args[i-1] = arg_type;
11176 }
11177 type = build_function_type_list (arg_type, args[0], args[1],
11178 args[2], NULL_TREE);
11179 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
11180 shared[signature] = type;
11181 }
11182 d->fndecl =
11183 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
11184 NULL, NULL_TREE);
11185 }
11186 }
11187
11188 /* Returns the shmedia builtin decl for CODE. */
11189
11190 static tree
11191 sh_media_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11192 {
11193 if (code >= ARRAY_SIZE (bdesc))
11194 return error_mark_node;
11195
11196 return bdesc[code].fndecl;
11197 }
11198
11199 /* Implements target hook vector_mode_supported_p. */
11200 bool
11201 sh_vector_mode_supported_p (enum machine_mode mode)
11202 {
11203 if (TARGET_FPU_ANY
11204 && ((mode == V2SFmode)
11205 || (mode == V4SFmode)
11206 || (mode == V16SFmode)))
11207 return true;
11208
11209 else if (TARGET_SHMEDIA
11210 && ((mode == V8QImode)
11211 || (mode == V2HImode)
11212 || (mode == V4HImode)
11213 || (mode == V2SImode)))
11214 return true;
11215
11216 return false;
11217 }
11218
11219 bool
11220 sh_frame_pointer_required (void)
11221 {
11222 /* If needed override this in other tm.h files to cope with various OS
11223 lossage requiring a frame pointer. */
11224 if (SUBTARGET_FRAME_POINTER_REQUIRED)
11225 return true;
11226
11227 if (crtl->profile)
11228 return true;
11229
11230 return false;
11231 }
11232
11233 /* Implements target hook dwarf_calling_convention. Return an enum
11234 of dwarf_calling_convention. */
11235 int
11236 sh_dwarf_calling_convention (const_tree func)
11237 {
11238 if (sh_attr_renesas_p (func))
11239 return DW_CC_GNU_renesas_sh;
11240
11241 return DW_CC_normal;
11242 }
11243
11244 static void
11245 sh_init_builtins (void)
11246 {
11247 if (TARGET_SHMEDIA)
11248 sh_media_init_builtins ();
11249 }
11250
11251 /* Returns the sh builtin decl for CODE. */
11252
11253 static tree
11254 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11255 {
11256 if (TARGET_SHMEDIA)
11257 return sh_media_builtin_decl (code, initialize_p);
11258
11259 return error_mark_node;
11260 }
11261
11262 /* Expand an expression EXP that calls a built-in function,
11263 with result going to TARGET if that's convenient
11264 (and in mode MODE if that's convenient).
11265 SUBTARGET may be used as the target for computing one of EXP's operands.
11266 IGNORE is nonzero if the value is to be ignored. */
11267
11268 static rtx
11269 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
11270 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
11271 {
11272 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11273 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
11274 const struct builtin_description *d = &bdesc[fcode];
11275 enum insn_code icode = d->icode;
11276 int signature = d->signature;
11277 enum machine_mode tmode = VOIDmode;
11278 int nop = 0, i;
11279 rtx op[4];
11280 rtx pat = 0;
11281
11282 if (signature_args[signature][0])
11283 {
11284 if (ignore)
11285 return 0;
11286
11287 tmode = insn_data[icode].operand[0].mode;
11288 if (! target
11289 || GET_MODE (target) != tmode
11290 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11291 target = gen_reg_rtx (tmode);
11292 op[nop++] = target;
11293 }
11294 else
11295 target = 0;
11296
11297 for (i = 1; i <= 3; i++, nop++)
11298 {
11299 tree arg;
11300 enum machine_mode opmode, argmode;
11301 tree optype;
11302
11303 if (! signature_args[signature][i])
11304 break;
11305 arg = CALL_EXPR_ARG (exp, i - 1);
11306 if (arg == error_mark_node)
11307 return const0_rtx;
11308 if (signature_args[signature][i] & 8)
11309 {
11310 opmode = ptr_mode;
11311 optype = ptr_type_node;
11312 }
11313 else
11314 {
11315 opmode = insn_data[icode].operand[nop].mode;
11316 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
11317 }
11318 argmode = TYPE_MODE (TREE_TYPE (arg));
11319 if (argmode != opmode)
11320 arg = build1 (NOP_EXPR, optype, arg);
11321 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
11322 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
11323 op[nop] = copy_to_mode_reg (opmode, op[nop]);
11324 }
11325
11326 switch (nop)
11327 {
11328 case 1:
11329 pat = (*insn_data[d->icode].genfun) (op[0]);
11330 break;
11331 case 2:
11332 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
11333 break;
11334 case 3:
11335 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
11336 break;
11337 case 4:
11338 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
11339 break;
11340 default:
11341 gcc_unreachable ();
11342 }
11343 if (! pat)
11344 return 0;
11345 emit_insn (pat);
11346 return target;
11347 }
11348
11349 void
11350 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
11351 {
11352 rtx sel0 = const0_rtx;
11353 rtx sel1 = const1_rtx;
11354 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
11355 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
11356
11357 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
11358 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
11359 }
11360
11361 void
11362 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
11363 {
11364 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
11365
11366 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
11367 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
11368 }
11369
11370 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
11371 We can allow any mode in any general register. The special registers
11372 only allow SImode. Don't allow any mode in the PR.
11373
11374 We cannot hold DCmode values in the XD registers because alter_reg
11375 handles subregs of them incorrectly. We could work around this by
11376 spacing the XD registers like the DR registers, but this would require
11377 additional memory in every compilation to hold larger register vectors.
11378 We could hold SFmode / SCmode values in XD registers, but that
11379 would require a tertiary reload when reloading from / to memory,
11380 and a secondary reload to reload from / to general regs; that
11381 seems to be a loosing proposition.
11382
11383 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
11384 it won't be ferried through GP registers first. */
11385
11386 bool
11387 sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
11388 {
11389 if (SPECIAL_REGISTER_P (regno))
11390 return mode == SImode;
11391
11392 if (regno == FPUL_REG)
11393 return (mode == SImode || mode == SFmode);
11394
11395 if (FP_REGISTER_P (regno) && mode == SFmode)
11396 return true;
11397
11398 if (mode == V2SFmode)
11399 {
11400 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
11401 || GENERAL_REGISTER_P (regno)))
11402 return true;
11403 else
11404 return false;
11405 }
11406
11407 if (mode == V4SFmode)
11408 {
11409 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
11410 || GENERAL_REGISTER_P (regno))
11411 return true;
11412 else
11413 return false;
11414 }
11415
11416 if (mode == V16SFmode)
11417 {
11418 if (TARGET_SHMEDIA)
11419 {
11420 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
11421 return true;
11422 else
11423 return false;
11424 }
11425 else
11426 return regno == FIRST_XD_REG;
11427 }
11428
11429 if (FP_REGISTER_P (regno))
11430 {
11431 if (mode == SFmode
11432 || mode == SImode
11433 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
11434 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
11435 || mode == DCmode
11436 || (TARGET_SHMEDIA
11437 && (mode == DFmode || mode == DImode
11438 || mode == V2SFmode || mode == TImode)))
11439 && ((regno - FIRST_FP_REG) & 1) == 0)
11440 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
11441 && ((regno - FIRST_FP_REG) & 3) == 0))
11442 return true;
11443 else
11444 return false;
11445 }
11446
11447 if (XD_REGISTER_P (regno))
11448 return mode == DFmode;
11449
11450 if (TARGET_REGISTER_P (regno))
11451 return (mode == DImode || mode == SImode || mode == PDImode);
11452
11453 if (regno == PR_REG)
11454 return mode == SImode;
11455
11456 if (regno == FPSCR_REG)
11457 return mode == PSImode;
11458
11459 /* FIXME. This works around PR target/37633 for -O0. */
11460 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
11461 {
11462 unsigned int n = GET_MODE_SIZE (mode) / 8;
11463
11464 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
11465 && regno <= FIRST_GENERAL_REG + 14)
11466 return false;
11467 }
11468
11469 return true;
11470 }
11471
11472 /* Return the class of registers for which a mode change from FROM to TO
11473 is invalid. */
11474 bool
11475 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
11476 enum reg_class rclass)
11477 {
11478 /* We want to enable the use of SUBREGs as a means to
11479 VEC_SELECT a single element of a vector. */
11480
11481 /* This effectively disallows using GENERAL_REGS for SFmode vector subregs.
11482 This can be problematic when SFmode vector subregs need to be accessed
11483 on the stack with displacement addressing, as it happens with -O0.
11484 Thus we disallow the mode change for -O0. */
11485 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
11486 return optimize ? (reg_classes_intersect_p (GENERAL_REGS, rclass)) : false;
11487
11488 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
11489 {
11490 if (TARGET_LITTLE_ENDIAN)
11491 {
11492 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
11493 return reg_classes_intersect_p (DF_REGS, rclass);
11494 }
11495 else
11496 {
11497 if (GET_MODE_SIZE (from) < 8)
11498 return reg_classes_intersect_p (DF_HI_REGS, rclass);
11499 }
11500 }
11501 return false;
11502 }
11503
11504 /* Return true if registers in machine mode MODE will likely be
11505 allocated to registers in small register classes. */
11506
11507 bool
11508 sh_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
11509 {
11510 return (! TARGET_SHMEDIA);
11511 }
11512
11513 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
11514 that label is used. */
11515
11516 void
11517 sh_mark_label (rtx address, int nuses)
11518 {
11519 if (GOTOFF_P (address))
11520 {
11521 /* Extract the label or symbol. */
11522 address = XEXP (address, 0);
11523 if (GET_CODE (address) == PLUS)
11524 address = XEXP (address, 0);
11525 address = XVECEXP (address, 0, 0);
11526 }
11527 if (GET_CODE (address) == LABEL_REF
11528 && LABEL_P (XEXP (address, 0)))
11529 LABEL_NUSES (XEXP (address, 0)) += nuses;
11530 }
11531
11532 /* Compute extra cost of moving data between one register class
11533 and another. */
11534
11535 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
11536 uses this information. Hence, the general register <-> floating point
11537 register information here is not used for SFmode. */
11538
11539 static int
11540 sh_register_move_cost (enum machine_mode mode,
11541 reg_class_t srcclass, reg_class_t dstclass)
11542 {
11543 if (dstclass == T_REGS || dstclass == PR_REGS)
11544 return 10;
11545
11546 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
11547 return 4;
11548
11549 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
11550 && REGCLASS_HAS_FP_REG (srcclass)
11551 && REGCLASS_HAS_FP_REG (dstclass))
11552 return 4;
11553
11554 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
11555 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
11556
11557 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
11558 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
11559 return 9;
11560
11561 if ((REGCLASS_HAS_FP_REG (dstclass)
11562 && REGCLASS_HAS_GENERAL_REG (srcclass))
11563 || (REGCLASS_HAS_GENERAL_REG (dstclass)
11564 && REGCLASS_HAS_FP_REG (srcclass)))
11565 {
11566 /* Discourage trying to use fp regs for a pointer. This also
11567 discourages fp regs with SImode because Pmode is an alias
11568 of SImode on this target. See PR target/48596. */
11569 int addend = (mode == Pmode) ? 40 : 0;
11570
11571 return (((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12) + addend)
11572 * ((GET_MODE_SIZE (mode) + 7) / 8U));
11573 }
11574
11575 if ((dstclass == FPUL_REGS
11576 && REGCLASS_HAS_GENERAL_REG (srcclass))
11577 || (srcclass == FPUL_REGS
11578 && REGCLASS_HAS_GENERAL_REG (dstclass)))
11579 return 5;
11580
11581 if ((dstclass == FPUL_REGS
11582 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
11583 || (srcclass == FPUL_REGS
11584 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
11585 return 7;
11586
11587 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11588 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11589 return 20;
11590
11591 /* ??? ptabs faults on (value & 0x3) == 0x3 */
11592 if (TARGET_SHMEDIA
11593 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
11594 {
11595 if (sh_gettrcost >= 0)
11596 return sh_gettrcost;
11597 else if (!TARGET_PT_FIXED)
11598 return 100;
11599 }
11600
11601 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11602 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11603 return 4;
11604
11605 if (TARGET_SHMEDIA
11606 || (TARGET_FMOVD
11607 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
11608 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
11609 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
11610
11611 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
11612 }
11613
11614 static rtx emit_load_ptr (rtx, rtx);
11615
11616 static rtx
11617 emit_load_ptr (rtx reg, rtx addr)
11618 {
11619 rtx mem = gen_const_mem (ptr_mode, addr);
11620
11621 if (Pmode != ptr_mode)
11622 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
11623 return emit_move_insn (reg, mem);
11624 }
11625
11626 static void
11627 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11628 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11629 tree function)
11630 {
11631 CUMULATIVE_ARGS cum;
11632 int structure_value_byref = 0;
11633 rtx this_rtx, this_value, sibcall, insns, funexp;
11634 tree funtype = TREE_TYPE (function);
11635 int simple_add = CONST_OK_FOR_ADD (delta);
11636 int did_load = 0;
11637 rtx scratch0, scratch1, scratch2;
11638 unsigned i;
11639
11640 reload_completed = 1;
11641 epilogue_completed = 1;
11642 current_function_uses_only_leaf_regs = 1;
11643
11644 emit_note (NOTE_INSN_PROLOGUE_END);
11645
11646 /* Find the "this" pointer. We have such a wide range of ABIs for the
11647 SH that it's best to do this completely machine independently.
11648 "this" is passed as first argument, unless a structure return pointer
11649 comes first, in which case "this" comes second. */
11650 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
11651 #ifndef PCC_STATIC_STRUCT_RETURN
11652 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11653 structure_value_byref = 1;
11654 #endif /* not PCC_STATIC_STRUCT_RETURN */
11655 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
11656 {
11657 tree ptype = build_pointer_type (TREE_TYPE (funtype));
11658
11659 sh_function_arg_advance (pack_cumulative_args (&cum), Pmode, ptype, true);
11660 }
11661 this_rtx
11662 = sh_function_arg (pack_cumulative_args (&cum), Pmode, ptr_type_node, true);
11663
11664 /* For SHcompact, we only have r0 for a scratch register: r1 is the
11665 static chain pointer (even if you can't have nested virtual functions
11666 right now, someone might implement them sometime), and the rest of the
11667 registers are used for argument passing, are callee-saved, or reserved. */
11668 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
11669 -ffixed-reg has been used. */
11670 if (! call_used_regs[0] || fixed_regs[0])
11671 error ("r0 needs to be available as a call-clobbered register");
11672 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
11673 if (! TARGET_SH5)
11674 {
11675 if (call_used_regs[1] && ! fixed_regs[1])
11676 scratch1 = gen_rtx_REG (ptr_mode, 1);
11677 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
11678 pointing where to return struct values. */
11679 if (call_used_regs[3] && ! fixed_regs[3])
11680 scratch2 = gen_rtx_REG (Pmode, 3);
11681 }
11682 else if (TARGET_SHMEDIA)
11683 {
11684 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
11685 if (i != REGNO (scratch0) &&
11686 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
11687 {
11688 scratch1 = gen_rtx_REG (ptr_mode, i);
11689 break;
11690 }
11691 if (scratch1 == scratch0)
11692 error ("need a second call-clobbered general purpose register");
11693 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
11694 if (call_used_regs[i] && ! fixed_regs[i])
11695 {
11696 scratch2 = gen_rtx_REG (Pmode, i);
11697 break;
11698 }
11699 if (scratch2 == scratch0)
11700 error ("need a call-clobbered target register");
11701 }
11702
11703 this_value = plus_constant (this_rtx, delta);
11704 if (vcall_offset
11705 && (simple_add || scratch0 != scratch1)
11706 && strict_memory_address_p (ptr_mode, this_value))
11707 {
11708 emit_load_ptr (scratch0, this_value);
11709 did_load = 1;
11710 }
11711
11712 if (!delta)
11713 ; /* Do nothing. */
11714 else if (simple_add)
11715 emit_move_insn (this_rtx, this_value);
11716 else
11717 {
11718 emit_move_insn (scratch1, GEN_INT (delta));
11719 emit_insn (gen_add2_insn (this_rtx, scratch1));
11720 }
11721
11722 if (vcall_offset)
11723 {
11724 rtx offset_addr;
11725
11726 if (!did_load)
11727 emit_load_ptr (scratch0, this_rtx);
11728
11729 offset_addr = plus_constant (scratch0, vcall_offset);
11730 if (strict_memory_address_p (ptr_mode, offset_addr))
11731 ; /* Do nothing. */
11732 else if (! TARGET_SH5 && scratch0 != scratch1)
11733 {
11734 /* scratch0 != scratch1, and we have indexed loads. Get better
11735 schedule by loading the offset into r1 and using an indexed
11736 load - then the load of r1 can issue before the load from
11737 (this_rtx + delta) finishes. */
11738 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11739 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
11740 }
11741 else if (CONST_OK_FOR_ADD (vcall_offset))
11742 {
11743 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
11744 offset_addr = scratch0;
11745 }
11746 else if (scratch0 != scratch1)
11747 {
11748 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11749 emit_insn (gen_add2_insn (scratch0, scratch1));
11750 offset_addr = scratch0;
11751 }
11752 else
11753 gcc_unreachable (); /* FIXME */
11754 emit_load_ptr (scratch0, offset_addr);
11755
11756 if (Pmode != ptr_mode)
11757 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
11758 emit_insn (gen_add2_insn (this_rtx, scratch0));
11759 }
11760
11761 /* Generate a tail call to the target function. */
11762 if (! TREE_USED (function))
11763 {
11764 assemble_external (function);
11765 TREE_USED (function) = 1;
11766 }
11767 funexp = XEXP (DECL_RTL (function), 0);
11768 /* If the function is overridden, so is the thunk, hence we don't
11769 need GOT addressing even if this is a public symbol. */
11770 #if 0
11771 if (TARGET_SH1 && ! flag_weak)
11772 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
11773 else
11774 #endif
11775 if (TARGET_SH2 && flag_pic)
11776 {
11777 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
11778 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
11779 }
11780 else
11781 {
11782 if (TARGET_SHMEDIA && flag_pic)
11783 {
11784 funexp = gen_sym2PIC (funexp);
11785 PUT_MODE (funexp, Pmode);
11786 }
11787 emit_move_insn (scratch2, funexp);
11788 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
11789 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
11790 }
11791 sibcall = emit_call_insn (sibcall);
11792 SIBLING_CALL_P (sibcall) = 1;
11793 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
11794 emit_barrier ();
11795
11796 /* Run just enough of rest_of_compilation to do scheduling and get
11797 the insns emitted. Note that use_thunk calls
11798 assemble_start_function and assemble_end_function. */
11799
11800 insn_locators_alloc ();
11801 insns = get_insns ();
11802
11803 if (optimize > 0)
11804 {
11805 if (! cfun->cfg)
11806 init_flow (cfun);
11807 split_all_insns_noflow ();
11808 }
11809
11810 sh_reorg ();
11811 shorten_branches (insns);
11812 final_start_function (insns, file, 1);
11813 final (insns, file, 1);
11814 final_end_function ();
11815
11816 reload_completed = 0;
11817 epilogue_completed = 0;
11818 }
11819
11820 rtx
11821 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
11822 {
11823 rtx sym;
11824
11825 /* If this is not an ordinary function, the name usually comes from a
11826 string literal or an sprintf buffer. Make sure we use the same
11827 string consistently, so that cse will be able to unify address loads. */
11828 if (kind != FUNCTION_ORDINARY)
11829 name = IDENTIFIER_POINTER (get_identifier (name));
11830 sym = gen_rtx_SYMBOL_REF (Pmode, name);
11831 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
11832 if (flag_pic)
11833 switch (kind)
11834 {
11835 case FUNCTION_ORDINARY:
11836 break;
11837 case SFUNC_GOT:
11838 {
11839 rtx reg = target ? target : gen_reg_rtx (Pmode);
11840
11841 emit_insn (gen_symGOT2reg (reg, sym));
11842 sym = reg;
11843 break;
11844 }
11845 case SFUNC_STATIC:
11846 {
11847 /* ??? To allow cse to work, we use GOTOFF relocations.
11848 we could add combiner patterns to transform this into
11849 straight pc-relative calls with sym2PIC / bsrf when
11850 label load and function call are still 1:1 and in the
11851 same basic block during combine. */
11852 rtx reg = target ? target : gen_reg_rtx (Pmode);
11853
11854 emit_insn (gen_symGOTOFF2reg (reg, sym));
11855 sym = reg;
11856 break;
11857 }
11858 }
11859 if (target && sym != target)
11860 {
11861 emit_move_insn (target, sym);
11862 return target;
11863 }
11864 return sym;
11865 }
11866
11867 /* Find the number of a general purpose register in S. */
11868 static int
11869 scavenge_reg (HARD_REG_SET *s)
11870 {
11871 int r;
11872 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
11873 if (TEST_HARD_REG_BIT (*s, r))
11874 return r;
11875 return -1;
11876 }
11877
11878 rtx
11879 sh_get_pr_initial_val (void)
11880 {
11881 rtx val;
11882
11883 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
11884 PR register on SHcompact, because it might be clobbered by the prologue.
11885 We check first if that is known to be the case. */
11886 if (TARGET_SHCOMPACT
11887 && ((crtl->args.info.call_cookie
11888 & ~ CALL_COOKIE_RET_TRAMP (1))
11889 || crtl->saves_all_registers))
11890 return gen_frame_mem (SImode, return_address_pointer_rtx);
11891
11892 /* If we haven't finished rtl generation, there might be a nonlocal label
11893 that we haven't seen yet.
11894 ??? get_hard_reg_initial_val fails if it is called after register
11895 allocation has started, unless it has been called before for the
11896 same register. And even then, we end in trouble if we didn't use
11897 the register in the same basic block before. So call
11898 get_hard_reg_initial_val now and wrap it in an unspec if we might
11899 need to replace it. */
11900 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
11901 combine can put the pseudo returned by get_hard_reg_initial_val into
11902 instructions that need a general purpose registers, which will fail to
11903 be recognized when the pseudo becomes allocated to PR. */
11904 val
11905 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
11906 if (TARGET_SH1)
11907 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
11908 return val;
11909 }
11910
11911 int
11912 sh_expand_t_scc (rtx operands[])
11913 {
11914 enum rtx_code code = GET_CODE (operands[1]);
11915 rtx target = operands[0];
11916 rtx op0 = operands[2];
11917 rtx op1 = operands[3];
11918 rtx result = target;
11919 HOST_WIDE_INT val;
11920
11921 if (!REG_P (op0) || REGNO (op0) != T_REG
11922 || !CONST_INT_P (op1))
11923 return 0;
11924 if (!REG_P (result))
11925 result = gen_reg_rtx (SImode);
11926 val = INTVAL (op1);
11927 if ((code == EQ && val == 1) || (code == NE && val == 0))
11928 emit_insn (gen_movt (result));
11929 else if ((code == EQ && val == 0) || (code == NE && val == 1))
11930 emit_insn (gen_movnegt (result));
11931 else if (code == EQ || code == NE)
11932 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
11933 else
11934 return 0;
11935 if (result != target)
11936 emit_move_insn (target, result);
11937 return 1;
11938 }
11939
11940 /* INSN is an sfunc; return the rtx that describes the address used. */
11941 static rtx
11942 extract_sfunc_addr (rtx insn)
11943 {
11944 rtx pattern, part = NULL_RTX;
11945 int len, i;
11946
11947 pattern = PATTERN (insn);
11948 len = XVECLEN (pattern, 0);
11949 for (i = 0; i < len; i++)
11950 {
11951 part = XVECEXP (pattern, 0, i);
11952 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
11953 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
11954 return XEXP (part, 0);
11955 }
11956 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
11957 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
11958 }
11959
11960 /* Verify that the register in use_sfunc_addr still agrees with the address
11961 used in the sfunc. This prevents fill_slots_from_thread from changing
11962 use_sfunc_addr.
11963 INSN is the use_sfunc_addr instruction, and REG is the register it
11964 guards. */
11965 int
11966 check_use_sfunc_addr (rtx insn, rtx reg)
11967 {
11968 /* Search for the sfunc. It should really come right after INSN. */
11969 while ((insn = NEXT_INSN (insn)))
11970 {
11971 if (LABEL_P (insn) || JUMP_P (insn))
11972 break;
11973 if (! INSN_P (insn))
11974 continue;
11975
11976 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
11977 insn = XVECEXP (PATTERN (insn), 0, 0);
11978 if (GET_CODE (PATTERN (insn)) != PARALLEL
11979 || get_attr_type (insn) != TYPE_SFUNC)
11980 continue;
11981 return rtx_equal_p (extract_sfunc_addr (insn), reg);
11982 }
11983 gcc_unreachable ();
11984 }
11985
11986 /* This function returns a constant rtx that represents pi / 2**15 in
11987 SFmode. it's used to scale SFmode angles, in radians, to a
11988 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11989 maps to 0x10000). */
11990
11991 static GTY(()) rtx sh_fsca_sf2int_rtx;
11992
11993 rtx
11994 sh_fsca_sf2int (void)
11995 {
11996 if (! sh_fsca_sf2int_rtx)
11997 {
11998 REAL_VALUE_TYPE rv;
11999
12000 real_from_string (&rv, "10430.378350470453");
12001 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
12002 }
12003
12004 return sh_fsca_sf2int_rtx;
12005 }
12006
12007 /* This function returns a constant rtx that represents 2**15 / pi in
12008 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
12009 of a full circle back to a SFmode value, i.e., 0x10000 maps to
12010 2*pi). */
12011
12012 static GTY(()) rtx sh_fsca_int2sf_rtx;
12013
12014 rtx
12015 sh_fsca_int2sf (void)
12016 {
12017 if (! sh_fsca_int2sf_rtx)
12018 {
12019 REAL_VALUE_TYPE rv;
12020
12021 real_from_string (&rv, "9.587379924285257e-5");
12022 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
12023 }
12024
12025 return sh_fsca_int2sf_rtx;
12026 }
12027
12028 /* Initialize the CUMULATIVE_ARGS structure. */
12029
12030 void
12031 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
12032 tree fntype,
12033 rtx libname ATTRIBUTE_UNUSED,
12034 tree fndecl,
12035 signed int n_named_args,
12036 enum machine_mode mode)
12037 {
12038 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
12039 pcum->free_single_fp_reg = 0;
12040 pcum->stack_regs = 0;
12041 pcum->byref_regs = 0;
12042 pcum->byref = 0;
12043 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
12044
12045 /* XXX - Should we check TARGET_HITACHI here ??? */
12046 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
12047
12048 if (fntype)
12049 {
12050 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
12051 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
12052 pcum->prototype_p = prototype_p (fntype);
12053 pcum->arg_count [(int) SH_ARG_INT]
12054 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
12055
12056 pcum->call_cookie
12057 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12058 && pcum->arg_count [(int) SH_ARG_INT] == 0
12059 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
12060 ? int_size_in_bytes (TREE_TYPE (fntype))
12061 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
12062 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
12063 == FIRST_RET_REG));
12064 }
12065 else
12066 {
12067 pcum->arg_count [(int) SH_ARG_INT] = 0;
12068 pcum->prototype_p = FALSE;
12069 if (mode != VOIDmode)
12070 {
12071 pcum->call_cookie =
12072 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12073 && GET_MODE_SIZE (mode) > 4
12074 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
12075
12076 /* If the default ABI is the Renesas ABI then all library
12077 calls must assume that the library will be using the
12078 Renesas ABI. So if the function would return its result
12079 in memory then we must force the address of this memory
12080 block onto the stack. Ideally we would like to call
12081 targetm.calls.return_in_memory() here but we do not have
12082 the TYPE or the FNDECL available so we synthesize the
12083 contents of that function as best we can. */
12084 pcum->force_mem =
12085 (TARGET_DEFAULT & MASK_HITACHI)
12086 && (mode == BLKmode
12087 || (GET_MODE_SIZE (mode) > 4
12088 && !(mode == DFmode
12089 && TARGET_FPU_DOUBLE)));
12090 }
12091 else
12092 {
12093 pcum->call_cookie = 0;
12094 pcum->force_mem = FALSE;
12095 }
12096 }
12097 }
12098
12099 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
12100 not enter into CONST_DOUBLE for the replace.
12101
12102 Note that copying is not done so X must not be shared unless all copies
12103 are to be modified.
12104
12105 This is like replace_rtx, except that we operate on N_REPLACEMENTS
12106 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
12107 replacements[n*2+1] - and that we take mode changes into account.
12108
12109 If a replacement is ambiguous, return NULL_RTX.
12110
12111 If MODIFY is zero, don't modify any rtl in place,
12112 just return zero or nonzero for failure / success. */
12113
12114 rtx
12115 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
12116 {
12117 int i, j;
12118 const char *fmt;
12119
12120 /* The following prevents loops occurrence when we change MEM in
12121 CONST_DOUBLE onto the same CONST_DOUBLE. */
12122 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
12123 return x;
12124
12125 for (i = n_replacements - 1; i >= 0 ; i--)
12126 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
12127 return replacements[i*2+1];
12128
12129 /* Allow this function to make replacements in EXPR_LISTs. */
12130 if (x == 0)
12131 return 0;
12132
12133 if (GET_CODE (x) == SUBREG)
12134 {
12135 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
12136 n_replacements, modify);
12137
12138 if (CONST_INT_P (new_rtx))
12139 {
12140 x = simplify_subreg (GET_MODE (x), new_rtx,
12141 GET_MODE (SUBREG_REG (x)),
12142 SUBREG_BYTE (x));
12143 if (! x)
12144 abort ();
12145 }
12146 else if (modify)
12147 SUBREG_REG (x) = new_rtx;
12148
12149 return x;
12150 }
12151 else if (REG_P (x))
12152 {
12153 unsigned regno = REGNO (x);
12154 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
12155 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
12156 rtx result = NULL_RTX;
12157
12158 for (i = n_replacements - 1; i >= 0; i--)
12159 {
12160 rtx from = replacements[i*2];
12161 rtx to = replacements[i*2+1];
12162 unsigned from_regno, from_nregs, to_regno, new_regno;
12163
12164 if (!REG_P (from))
12165 continue;
12166 from_regno = REGNO (from);
12167 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
12168 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
12169 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
12170 {
12171 if (regno < from_regno
12172 || regno + nregs > from_regno + nregs
12173 || !REG_P (to)
12174 || result)
12175 return NULL_RTX;
12176 to_regno = REGNO (to);
12177 if (to_regno < FIRST_PSEUDO_REGISTER)
12178 {
12179 new_regno = regno + to_regno - from_regno;
12180 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
12181 != nregs)
12182 return NULL_RTX;
12183 result = gen_rtx_REG (GET_MODE (x), new_regno);
12184 }
12185 else if (GET_MODE (x) <= GET_MODE (to))
12186 result = gen_lowpart_common (GET_MODE (x), to);
12187 else
12188 result = gen_lowpart_SUBREG (GET_MODE (x), to);
12189 }
12190 }
12191 return result ? result : x;
12192 }
12193 else if (GET_CODE (x) == ZERO_EXTEND)
12194 {
12195 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
12196 n_replacements, modify);
12197
12198 if (CONST_INT_P (new_rtx))
12199 {
12200 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
12201 new_rtx, GET_MODE (XEXP (x, 0)));
12202 if (! x)
12203 abort ();
12204 }
12205 else if (modify)
12206 XEXP (x, 0) = new_rtx;
12207
12208 return x;
12209 }
12210
12211 fmt = GET_RTX_FORMAT (GET_CODE (x));
12212 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12213 {
12214 rtx new_rtx;
12215
12216 if (fmt[i] == 'e')
12217 {
12218 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
12219 n_replacements, modify);
12220 if (!new_rtx)
12221 return NULL_RTX;
12222 if (modify)
12223 XEXP (x, i) = new_rtx;
12224 }
12225 else if (fmt[i] == 'E')
12226 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12227 {
12228 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
12229 n_replacements, modify);
12230 if (!new_rtx)
12231 return NULL_RTX;
12232 if (modify)
12233 XVECEXP (x, i, j) = new_rtx;
12234 }
12235 }
12236
12237 return x;
12238 }
12239
12240 rtx
12241 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
12242 {
12243 enum rtx_code code = TRUNCATE;
12244
12245 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
12246 {
12247 rtx inner = XEXP (x, 0);
12248 enum machine_mode inner_mode = GET_MODE (inner);
12249
12250 if (inner_mode == mode)
12251 return inner;
12252 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
12253 x = inner;
12254 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
12255 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
12256 {
12257 code = GET_CODE (x);
12258 x = inner;
12259 }
12260 }
12261 return gen_rtx_fmt_e (code, mode, x);
12262 }
12263
12264 /* called via for_each_rtx after reload, to clean up truncates of
12265 registers that span multiple actual hard registers. */
12266 int
12267 shmedia_cleanup_truncate (rtx *p, void *n_changes)
12268 {
12269 rtx x = *p, reg;
12270
12271 if (GET_CODE (x) != TRUNCATE)
12272 return 0;
12273 reg = XEXP (x, 0);
12274 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && REG_P (reg))
12275 {
12276 enum machine_mode reg_mode = GET_MODE (reg);
12277 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
12278 subreg_lowpart_offset (DImode, reg_mode));
12279 *(int*) n_changes += 1;
12280 return -1;
12281 }
12282 return 0;
12283 }
12284
12285 /* Load and store depend on the highpart of the address. However,
12286 set_attr_alternative does not give well-defined results before reload,
12287 so we must look at the rtl ourselves to see if any of the feeding
12288 registers is used in a memref. */
12289
12290 /* Called by sh_contains_memref_p via for_each_rtx. */
12291 static int
12292 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
12293 {
12294 return (MEM_P (*loc));
12295 }
12296
12297 /* Return nonzero iff INSN contains a MEM. */
12298 int
12299 sh_contains_memref_p (rtx insn)
12300 {
12301 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
12302 }
12303
12304 /* Return nonzero iff INSN loads a banked register. */
12305 int
12306 sh_loads_bankedreg_p (rtx insn)
12307 {
12308 if (GET_CODE (PATTERN (insn)) == SET)
12309 {
12310 rtx op = SET_DEST (PATTERN(insn));
12311 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
12312 return 1;
12313 }
12314
12315 return 0;
12316 }
12317
12318 /* FNADDR is the MEM expression from a call expander. Return an address
12319 to use in an SHmedia insn pattern. */
12320 rtx
12321 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
12322 {
12323 int is_sym;
12324
12325 fnaddr = XEXP (fnaddr, 0);
12326 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
12327 if (flag_pic && is_sym)
12328 {
12329 if (! SYMBOL_REF_LOCAL_P (fnaddr))
12330 {
12331 rtx reg = gen_reg_rtx (Pmode);
12332
12333 /* We must not use GOTPLT for sibcalls, because PIC_REG
12334 must be restored before the PLT code gets to run. */
12335 if (is_sibcall)
12336 emit_insn (gen_symGOT2reg (reg, fnaddr));
12337 else
12338 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
12339 fnaddr = reg;
12340 }
12341 else
12342 {
12343 fnaddr = gen_sym2PIC (fnaddr);
12344 PUT_MODE (fnaddr, Pmode);
12345 }
12346 }
12347 /* If ptabs might trap, make this visible to the rest of the compiler.
12348 We generally assume that symbols pertain to valid locations, but
12349 it is possible to generate invalid symbols with asm or linker tricks.
12350 In a list of functions where each returns its successor, an invalid
12351 symbol might denote an empty list. */
12352 if (!TARGET_PT_FIXED
12353 && (!is_sym || TARGET_INVALID_SYMBOLS)
12354 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
12355 {
12356 rtx tr = gen_reg_rtx (PDImode);
12357
12358 emit_insn (gen_ptabs (tr, fnaddr));
12359 fnaddr = tr;
12360 }
12361 else if (! target_reg_operand (fnaddr, Pmode))
12362 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
12363 return fnaddr;
12364 }
12365
12366 /* Implement TARGET_PREFERRED_RELOAD_CLASS. */
12367
12368 static reg_class_t
12369 sh_preferred_reload_class (rtx x, reg_class_t rclass)
12370 {
12371 if (rclass == NO_REGS
12372 && TARGET_SHMEDIA
12373 && (CONST_DOUBLE_P (x)
12374 || GET_CODE (x) == SYMBOL_REF
12375 || PIC_ADDR_P (x)))
12376 return GENERAL_REGS;
12377
12378 return rclass;
12379 }
12380
12381 /* Implement TARGET_SECONDARY_RELOAD. */
12382
12383 static reg_class_t
12384 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12385 enum machine_mode mode, secondary_reload_info *sri)
12386 {
12387 enum reg_class rclass = (enum reg_class) rclass_i;
12388
12389 if (in_p)
12390 {
12391 if (REGCLASS_HAS_FP_REG (rclass)
12392 && ! TARGET_SHMEDIA
12393 && immediate_operand ((x), mode)
12394 && ! ((fp_zero_operand (x) || fp_one_operand (x))
12395 && mode == SFmode && fldi_ok ()))
12396 switch (mode)
12397 {
12398 case SFmode:
12399 sri->icode = CODE_FOR_reload_insf__frn;
12400 return NO_REGS;
12401 case DFmode:
12402 sri->icode = CODE_FOR_reload_indf__frn;
12403 return NO_REGS;
12404 case SImode:
12405 /* ??? If we knew that we are in the appropriate mode -
12406 single precision - we could use a reload pattern directly. */
12407 return FPUL_REGS;
12408 default:
12409 abort ();
12410 }
12411 if (rclass == FPUL_REGS
12412 && ((REG_P (x)
12413 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
12414 || REGNO (x) == T_REG))
12415 || GET_CODE (x) == PLUS))
12416 return GENERAL_REGS;
12417 if (rclass == FPUL_REGS && immediate_operand (x, mode))
12418 {
12419 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
12420 return GENERAL_REGS;
12421 else if (mode == SFmode)
12422 return FP_REGS;
12423 sri->icode = CODE_FOR_reload_insi__i_fpul;
12424 return NO_REGS;
12425 }
12426 if (rclass == FPSCR_REGS
12427 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
12428 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
12429 return GENERAL_REGS;
12430 if (REGCLASS_HAS_FP_REG (rclass)
12431 && TARGET_SHMEDIA
12432 && immediate_operand (x, mode)
12433 && x != CONST0_RTX (GET_MODE (x))
12434 && GET_MODE (x) != V4SFmode)
12435 return GENERAL_REGS;
12436 if ((mode == QImode || mode == HImode)
12437 && TARGET_SHMEDIA && inqhi_operand (x, mode))
12438 {
12439 sri->icode = ((mode == QImode)
12440 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
12441 return NO_REGS;
12442 }
12443 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
12444 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
12445 return TARGET_REGS;
12446 } /* end of input-only processing. */
12447
12448 if (((REGCLASS_HAS_FP_REG (rclass)
12449 && (REG_P (x)
12450 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
12451 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
12452 && TARGET_FMOVD))))
12453 || (REGCLASS_HAS_GENERAL_REG (rclass)
12454 && REG_P (x)
12455 && FP_REGISTER_P (REGNO (x))))
12456 && ! TARGET_SHMEDIA
12457 && (mode == SFmode || mode == SImode))
12458 return FPUL_REGS;
12459 if ((rclass == FPUL_REGS
12460 || (REGCLASS_HAS_FP_REG (rclass)
12461 && ! TARGET_SHMEDIA && mode == SImode))
12462 && (MEM_P (x)
12463 || (REG_P (x)
12464 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
12465 || REGNO (x) == T_REG
12466 || system_reg_operand (x, VOIDmode)))))
12467 {
12468 if (rclass == FPUL_REGS)
12469 return GENERAL_REGS;
12470 return FPUL_REGS;
12471 }
12472 if ((rclass == TARGET_REGS
12473 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
12474 && !satisfies_constraint_Csy (x)
12475 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
12476 return GENERAL_REGS;
12477 if ((rclass == MAC_REGS || rclass == PR_REGS)
12478 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
12479 && rclass != REGNO_REG_CLASS (REGNO (x)))
12480 return GENERAL_REGS;
12481 if (rclass != GENERAL_REGS && REG_P (x)
12482 && TARGET_REGISTER_P (REGNO (x)))
12483 return GENERAL_REGS;
12484
12485 /* If here fall back to loading FPUL register through general registers.
12486 This case can happen when movsi_ie insn is picked initially to
12487 load/store the FPUL register from/to another register, and then the
12488 other register is allocated on the stack. */
12489 if (rclass == FPUL_REGS && true_regnum (x) == -1)
12490 return GENERAL_REGS;
12491
12492 /* Force mov.b displacement addressing insn to use R0 as the other operand.
12493 On SH2A could also just leave it alone here, which would result in a
12494 4 byte move insn being generated instead. However, for this to work
12495 the insns must have the appropriate alternatives. */
12496 if (mode == QImode && rclass != R0_REGS
12497 && DISP_ADDR_P (x) && DISP_ADDR_OFFSET (x) < 16)
12498 return R0_REGS;
12499
12500 /* When reload is trying to address a QImode or HImode subreg on the stack,
12501 force any subreg byte into R0_REGS, as this is going to become a
12502 displacement address.
12503 We could restrict this to SUBREG_BYTE (x) > 0, but if the actual reg
12504 is on the stack, the memref to it might already require a displacement
12505 and that has to be added to the final address. At this point we don't
12506 know the cumulative displacement so we assume the worst case. */
12507 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
12508 && GET_CODE (x) == SUBREG && true_regnum (x) == -1)
12509 return R0_REGS;
12510
12511 return NO_REGS;
12512 }
12513
12514 static void
12515 sh_conditional_register_usage (void)
12516 {
12517 int regno;
12518 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)
12519 if (! VALID_REGISTER_P (regno))
12520 fixed_regs[regno] = call_used_regs[regno] = 1;
12521 /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs. */
12522 if (TARGET_SH5)
12523 {
12524 call_used_regs[FIRST_GENERAL_REG + 8]
12525 = call_used_regs[FIRST_GENERAL_REG + 9] = 1;
12526 call_really_used_regs[FIRST_GENERAL_REG + 8]
12527 = call_really_used_regs[FIRST_GENERAL_REG + 9] = 1;
12528 }
12529 if (TARGET_SHMEDIA)
12530 {
12531 regno_reg_class[FIRST_GENERAL_REG] = GENERAL_REGS;
12532 CLEAR_HARD_REG_SET (reg_class_contents[FP0_REGS]);
12533 regno_reg_class[FIRST_FP_REG] = FP_REGS;
12534 }
12535 if (flag_pic)
12536 {
12537 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12538 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12539 }
12540 /* Renesas saves and restores mac registers on call. */
12541 if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
12542 {
12543 call_really_used_regs[MACH_REG] = 0;
12544 call_really_used_regs[MACL_REG] = 0;
12545 }
12546 for (regno = FIRST_FP_REG + (TARGET_LITTLE_ENDIAN != 0);
12547 regno <= LAST_FP_REG; regno += 2)
12548 SET_HARD_REG_BIT (reg_class_contents[DF_HI_REGS], regno);
12549 if (TARGET_SHMEDIA)
12550 {
12551 for (regno = FIRST_TARGET_REG; regno <= LAST_TARGET_REG; regno ++)
12552 if (! fixed_regs[regno] && call_really_used_regs[regno])
12553 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
12554 }
12555 else
12556 for (regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++)
12557 if (! fixed_regs[regno] && call_really_used_regs[regno])
12558 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
12559 }
12560
12561 /* Implement TARGET_LEGITIMATE_CONSTANT_P
12562
12563 can_store_by_pieces constructs VOIDmode CONST_DOUBLEs. */
12564
12565 static bool
12566 sh_legitimate_constant_p (enum machine_mode mode, rtx x)
12567 {
12568 return (TARGET_SHMEDIA
12569 ? ((mode != DFmode && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
12570 || x == CONST0_RTX (mode)
12571 || !TARGET_SHMEDIA_FPU
12572 || TARGET_SHMEDIA64)
12573 : (GET_CODE (x) != CONST_DOUBLE
12574 || mode == DFmode || mode == SFmode
12575 || mode == DImode || GET_MODE (x) == VOIDmode));
12576 }
12577
12578 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
12579
12580 static void
12581 sh_init_sync_libfuncs (void)
12582 {
12583 init_sync_libfuncs (UNITS_PER_WORD);
12584 }
12585
12586 #include "gt-sh.h"