b4c190029dfe4a7e9abae452dc46ea3b6541bdb2
[gcc.git] / gcc / config / sh / sh.c
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "insn-config.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "flags.h"
31 #include "expr.h"
32 #include "optabs.h"
33 #include "function.h"
34 #include "regs.h"
35 #include "hard-reg-set.h"
36 #include "output.h"
37 #include "insn-attr.h"
38 #include "toplev.h"
39 #include "recog.h"
40 #include "integrate.h"
41 #include "dwarf2.h"
42 #include "tm_p.h"
43 #include "target.h"
44 #include "target-def.h"
45 #include "real.h"
46 #include "langhooks.h"
47 #include "basic-block.h"
48 #include "df.h"
49 #include "cfglayout.h"
50 #include "intl.h"
51 #include "sched-int.h"
52 #include "params.h"
53 #include "ggc.h"
54 #include "gimple.h"
55 #include "cfgloop.h"
56 #include "alloc-pool.h"
57 #include "tm-constrs.h"
58
59
60 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
61
62 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
63 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
64
65 /* These are some macros to abstract register modes. */
66 #define CONST_OK_FOR_ADD(size) \
67 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
68 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
69 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
70 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
71
72 /* Used to simplify the logic below. Find the attributes wherever
73 they may be. */
74 #define SH_ATTRIBUTES(decl) \
75 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
76 : DECL_ATTRIBUTES (decl) \
77 ? (DECL_ATTRIBUTES (decl)) \
78 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
79
80 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
81 int current_function_interrupt;
82
83 tree sh_deferred_function_attributes;
84 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
85
86 /* Global variables for machine-dependent things. */
87
88 /* Which cpu are we scheduling for. */
89 enum processor_type sh_cpu;
90
91 /* Definitions used in ready queue reordering for first scheduling pass. */
92
93 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
94 static short *regmode_weight[2];
95
96 /* Total SFmode and SImode weights of scheduled insns. */
97 static int curr_regmode_pressure[2];
98
99 /* Number of r0 life regions. */
100 static int r0_life_regions;
101
102 /* If true, skip cycles for Q -> R movement. */
103 static int skip_cycles = 0;
104
105 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
106 and returned from sh_reorder2. */
107 static short cached_can_issue_more;
108
109 /* Provides the class number of the smallest class containing
110 reg number. */
111
112 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
113 {
114 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
115 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
116 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
117 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
118 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
131 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
132 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
133 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
134 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
147 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
148 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
149 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
150 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
151 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
152 GENERAL_REGS, GENERAL_REGS,
153 };
154
155 char sh_register_names[FIRST_PSEUDO_REGISTER] \
156 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
157
158 char sh_additional_register_names[ADDREGNAMES_SIZE] \
159 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
160 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
161
162 int assembler_dialect;
163
164 static bool shmedia_space_reserved_for_target_registers;
165
166 static bool sh_handle_option (size_t, const char *, int);
167 static void split_branches (rtx);
168 static int branch_dest (rtx);
169 static void force_into (rtx, rtx);
170 static void print_slot (rtx);
171 static rtx add_constant (rtx, enum machine_mode, rtx);
172 static void dump_table (rtx, rtx);
173 static int hi_const (rtx);
174 static int broken_move (rtx);
175 static int mova_p (rtx);
176 static rtx find_barrier (int, rtx, rtx);
177 static int noncall_uses_reg (rtx, rtx, rtx *);
178 static rtx gen_block_redirect (rtx, int, int);
179 static void sh_reorg (void);
180 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
181 static rtx frame_insn (rtx);
182 static rtx push (int);
183 static void pop (int);
184 static void push_regs (HARD_REG_SET *, int);
185 static int calc_live_regs (HARD_REG_SET *);
186 static HOST_WIDE_INT rounded_frame_size (int);
187 static rtx mark_constant_pool_use (rtx);
188 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
189 static tree sh_handle_resbank_handler_attribute (tree *, tree,
190 tree, int, bool *);
191 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
192 tree, int, bool *);
193 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
194 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
195 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
196 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
197 static void sh_insert_attributes (tree, tree *);
198 static const char *sh_check_pch_target_flags (int);
199 static int sh_adjust_cost (rtx, rtx, rtx, int);
200 static int sh_issue_rate (void);
201 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
202 static short find_set_regmode_weight (rtx, enum machine_mode);
203 static short find_insn_regmode_weight (rtx, enum machine_mode);
204 static void find_regmode_weight (basic_block, enum machine_mode);
205 static int find_r0_life_regions (basic_block);
206 static void sh_md_init_global (FILE *, int, int);
207 static void sh_md_finish_global (FILE *, int);
208 static int rank_for_reorder (const void *, const void *);
209 static void swap_reorder (rtx *, int);
210 static void ready_reorder (rtx *, int);
211 static short high_pressure (enum machine_mode);
212 static int sh_reorder (FILE *, int, rtx *, int *, int);
213 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
214 static void sh_md_init (FILE *, int, int);
215 static int sh_variable_issue (FILE *, int, rtx, int);
216
217 static bool sh_function_ok_for_sibcall (tree, tree);
218
219 static bool sh_cannot_modify_jumps_p (void);
220 static enum reg_class sh_target_reg_class (void);
221 static bool sh_optimize_target_register_callee_saved (bool);
222 static bool sh_ms_bitfield_layout_p (const_tree);
223
224 static void sh_init_builtins (void);
225 static void sh_media_init_builtins (void);
226 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
227 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
228 static void sh_file_start (void);
229 static int flow_dependent_p (rtx, rtx);
230 static void flow_dependent_p_1 (rtx, const_rtx, void *);
231 static int shiftcosts (rtx);
232 static int andcosts (rtx);
233 static int addsubcosts (rtx);
234 static int multcosts (rtx);
235 static bool unspec_caller_rtx_p (rtx);
236 static bool sh_cannot_copy_insn_p (rtx);
237 static bool sh_rtx_costs (rtx, int, int, int *, bool);
238 static int sh_address_cost (rtx, bool);
239 static int sh_pr_n_sets (void);
240 static rtx sh_allocate_initial_value (rtx);
241 static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
242 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
243 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
244 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
245 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
246 static int scavenge_reg (HARD_REG_SET *s);
247 struct save_schedule_s;
248 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
249 struct save_schedule_s *, int);
250
251 static rtx sh_struct_value_rtx (tree, int);
252 static bool sh_return_in_memory (const_tree, const_tree);
253 static rtx sh_builtin_saveregs (void);
254 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
255 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
256 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
257 static tree sh_build_builtin_va_list (void);
258 static void sh_va_start (tree, rtx);
259 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
260 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
261 const_tree, bool);
262 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
263 const_tree, bool);
264 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
265 tree, bool);
266 static bool sh_scalar_mode_supported_p (enum machine_mode);
267 static int sh_dwarf_calling_convention (const_tree);
268 static void sh_encode_section_info (tree, rtx, int);
269 static int sh2a_function_vector_p (tree);
270 \f
271 static const struct attribute_spec sh_attribute_table[] =
272 {
273 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
274 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
275 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
276 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
277 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
278 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
279 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
280 { "resbank", 0, 0, true, false, false, sh_handle_resbank_handler_attribute },
281 { "function_vector", 1, 1, true, false, false, sh2a_handle_function_vector_handler_attribute },
282 #ifdef SYMBIAN
283 /* Symbian support adds three new attributes:
284 dllexport - for exporting a function/variable that will live in a dll
285 dllimport - for importing a function/variable from a dll
286
287 Microsoft allows multiple declspecs in one __declspec, separating
288 them with spaces. We do NOT support this. Instead, use __declspec
289 multiple times. */
290 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
291 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
292 #endif
293 { NULL, 0, 0, false, false, false, NULL }
294 };
295 \f
296 /* Initialize the GCC target structure. */
297 #undef TARGET_ATTRIBUTE_TABLE
298 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
299
300 /* The next two are used for debug info when compiling with -gdwarf. */
301 #undef TARGET_ASM_UNALIGNED_HI_OP
302 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
303 #undef TARGET_ASM_UNALIGNED_SI_OP
304 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
305
306 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
307 #undef TARGET_ASM_UNALIGNED_DI_OP
308 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
309 #undef TARGET_ASM_ALIGNED_DI_OP
310 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
311
312 #undef TARGET_ASM_FUNCTION_EPILOGUE
313 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
314
315 #undef TARGET_ASM_OUTPUT_MI_THUNK
316 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
317
318 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
319 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
320
321 #undef TARGET_ASM_FILE_START
322 #define TARGET_ASM_FILE_START sh_file_start
323 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
324 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
325
326 #undef TARGET_DEFAULT_TARGET_FLAGS
327 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
328 #undef TARGET_HANDLE_OPTION
329 #define TARGET_HANDLE_OPTION sh_handle_option
330
331 #undef TARGET_INSERT_ATTRIBUTES
332 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
333
334 #undef TARGET_SCHED_ADJUST_COST
335 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
336
337 #undef TARGET_SCHED_ISSUE_RATE
338 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
339
340 /* The next 5 hooks have been implemented for reenabling sched1. With the
341 help of these macros we are limiting the movement of insns in sched1 to
342 reduce the register pressure. The overall idea is to keep count of SImode
343 and SFmode regs required by already scheduled insns. When these counts
344 cross some threshold values; give priority to insns that free registers.
345 The insn that frees registers is most likely to be the insn with lowest
346 LUID (original insn order); but such an insn might be there in the stalled
347 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
348 upto a max of 8 cycles so that such insns may move from Q -> R.
349
350 The description of the hooks are as below:
351
352 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
353 scheduler; it is called inside the sched_init function just after
354 find_insn_reg_weights function call. It is used to calculate the SImode
355 and SFmode weights of insns of basic blocks; much similar to what
356 find_insn_reg_weights does.
357 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
358
359 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
360 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
361 (Q)->(R).
362
363 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
364 high; reorder the ready queue so that the insn with lowest LUID will be
365 issued next.
366
367 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
368 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
369
370 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
371 can be returned from TARGET_SCHED_REORDER2.
372
373 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
374
375 #undef TARGET_SCHED_DFA_NEW_CYCLE
376 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
377
378 #undef TARGET_SCHED_INIT_GLOBAL
379 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
380
381 #undef TARGET_SCHED_FINISH_GLOBAL
382 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
383
384 #undef TARGET_SCHED_VARIABLE_ISSUE
385 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
386
387 #undef TARGET_SCHED_REORDER
388 #define TARGET_SCHED_REORDER sh_reorder
389
390 #undef TARGET_SCHED_REORDER2
391 #define TARGET_SCHED_REORDER2 sh_reorder2
392
393 #undef TARGET_SCHED_INIT
394 #define TARGET_SCHED_INIT sh_md_init
395
396 #undef TARGET_LEGITIMIZE_ADDRESS
397 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
398
399 #undef TARGET_CANNOT_MODIFY_JUMPS_P
400 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
401 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
402 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
403 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
404 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
405 sh_optimize_target_register_callee_saved
406
407 #undef TARGET_MS_BITFIELD_LAYOUT_P
408 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
409
410 #undef TARGET_INIT_BUILTINS
411 #define TARGET_INIT_BUILTINS sh_init_builtins
412 #undef TARGET_EXPAND_BUILTIN
413 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
414
415 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
416 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
417
418 #undef TARGET_CANNOT_COPY_INSN_P
419 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
420 #undef TARGET_RTX_COSTS
421 #define TARGET_RTX_COSTS sh_rtx_costs
422 #undef TARGET_ADDRESS_COST
423 #define TARGET_ADDRESS_COST sh_address_cost
424 #undef TARGET_ALLOCATE_INITIAL_VALUE
425 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
426
427 #undef TARGET_MACHINE_DEPENDENT_REORG
428 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
429
430 #undef TARGET_DWARF_REGISTER_SPAN
431 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
432
433 #ifdef HAVE_AS_TLS
434 #undef TARGET_HAVE_TLS
435 #define TARGET_HAVE_TLS true
436 #endif
437
438 #undef TARGET_PROMOTE_PROTOTYPES
439 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
440 #undef TARGET_PROMOTE_FUNCTION_ARGS
441 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
442 #undef TARGET_PROMOTE_FUNCTION_RETURN
443 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
444
445 #undef TARGET_STRUCT_VALUE_RTX
446 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
447 #undef TARGET_RETURN_IN_MEMORY
448 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
449
450 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
451 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
452 #undef TARGET_SETUP_INCOMING_VARARGS
453 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
454 #undef TARGET_STRICT_ARGUMENT_NAMING
455 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
456 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
457 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
458 #undef TARGET_MUST_PASS_IN_STACK
459 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
460 #undef TARGET_PASS_BY_REFERENCE
461 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
462 #undef TARGET_CALLEE_COPIES
463 #define TARGET_CALLEE_COPIES sh_callee_copies
464 #undef TARGET_ARG_PARTIAL_BYTES
465 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
466
467 #undef TARGET_BUILD_BUILTIN_VA_LIST
468 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
469 #undef TARGET_EXPAND_BUILTIN_VA_START
470 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
471 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
472 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
473
474 #undef TARGET_SCALAR_MODE_SUPPORTED_P
475 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
476 #undef TARGET_VECTOR_MODE_SUPPORTED_P
477 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
478
479 #undef TARGET_CHECK_PCH_TARGET_FLAGS
480 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
481
482 #undef TARGET_DWARF_CALLING_CONVENTION
483 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
484
485 /* Return regmode weight for insn. */
486 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
487
488 /* Return current register pressure for regmode. */
489 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
490
491 #undef TARGET_ENCODE_SECTION_INFO
492 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
493
494 #ifdef SYMBIAN
495
496 #undef TARGET_ENCODE_SECTION_INFO
497 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
498 #undef TARGET_STRIP_NAME_ENCODING
499 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
500 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
501 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
502
503 #endif /* SYMBIAN */
504
505 #undef TARGET_SECONDARY_RELOAD
506 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
507
508 #undef TARGET_LEGITIMATE_ADDRESS_P
509 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
510
511 /* Machine-specific symbol_ref flags. */
512 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
513
514 struct gcc_target targetm = TARGET_INITIALIZER;
515 \f
516 /* Implement TARGET_HANDLE_OPTION. */
517
518 static bool
519 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
520 int value ATTRIBUTE_UNUSED)
521 {
522 switch (code)
523 {
524 case OPT_m1:
525 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
526 return true;
527
528 case OPT_m2:
529 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
530 return true;
531
532 case OPT_m2a:
533 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
534 return true;
535
536 case OPT_m2a_nofpu:
537 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
538 return true;
539
540 case OPT_m2a_single:
541 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
542 return true;
543
544 case OPT_m2a_single_only:
545 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
546 return true;
547
548 case OPT_m2e:
549 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
550 return true;
551
552 case OPT_m3:
553 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
554 return true;
555
556 case OPT_m3e:
557 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
558 return true;
559
560 case OPT_m4:
561 case OPT_m4_100:
562 case OPT_m4_200:
563 case OPT_m4_300:
564 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
565 return true;
566
567 case OPT_m4_nofpu:
568 case OPT_m4_100_nofpu:
569 case OPT_m4_200_nofpu:
570 case OPT_m4_300_nofpu:
571 case OPT_m4_340:
572 case OPT_m4_400:
573 case OPT_m4_500:
574 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
575 return true;
576
577 case OPT_m4_single:
578 case OPT_m4_100_single:
579 case OPT_m4_200_single:
580 case OPT_m4_300_single:
581 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
582 return true;
583
584 case OPT_m4_single_only:
585 case OPT_m4_100_single_only:
586 case OPT_m4_200_single_only:
587 case OPT_m4_300_single_only:
588 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
589 return true;
590
591 case OPT_m4a:
592 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
593 return true;
594
595 case OPT_m4a_nofpu:
596 case OPT_m4al:
597 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
598 return true;
599
600 case OPT_m4a_single:
601 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
602 return true;
603
604 case OPT_m4a_single_only:
605 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
606 return true;
607
608 case OPT_m5_32media:
609 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
610 return true;
611
612 case OPT_m5_32media_nofpu:
613 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
614 return true;
615
616 case OPT_m5_64media:
617 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
618 return true;
619
620 case OPT_m5_64media_nofpu:
621 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
622 return true;
623
624 case OPT_m5_compact:
625 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
626 return true;
627
628 case OPT_m5_compact_nofpu:
629 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
630 return true;
631
632 default:
633 return true;
634 }
635 }
636 \f
637 /* Set default optimization options. */
638 void
639 sh_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
640 {
641 if (level)
642 {
643 flag_omit_frame_pointer = 2;
644 if (!size)
645 sh_div_str = "inv:minlat";
646 }
647 if (size)
648 {
649 target_flags |= MASK_SMALLCODE;
650 sh_div_str = SH_DIV_STR_FOR_SIZE ;
651 }
652 else
653 TARGET_CBRANCHDI4 = 1;
654 /* We can't meaningfully test TARGET_SHMEDIA here, because -m options
655 haven't been parsed yet, hence we'd read only the default.
656 sh_target_reg_class will return NO_REGS if this is not SHMEDIA, so
657 it's OK to always set flag_branch_target_load_optimize. */
658 if (level > 1)
659 {
660 flag_branch_target_load_optimize = 1;
661 if (!size)
662 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
663 }
664 /* Likewise, we can't meaningfully test TARGET_SH2E / TARGET_IEEE
665 here, so leave it to OVERRIDE_OPTIONS to set
666 flag_finite_math_only. We set it to 2 here so we know if the user
667 explicitly requested this to be on or off. */
668 flag_finite_math_only = 2;
669 /* If flag_schedule_insns is 1, we set it to 2 here so we know if
670 the user explicitly requested this to be on or off. */
671 if (flag_schedule_insns > 0)
672 flag_schedule_insns = 2;
673
674 set_param_value ("simultaneous-prefetches", 2);
675 }
676
677 /* Implement OVERRIDE_OPTIONS macro. Validate and override various
678 options, and do some machine dependent initialization. */
679 void
680 sh_override_options (void)
681 {
682 int regno;
683
684 SUBTARGET_OVERRIDE_OPTIONS;
685 if (flag_finite_math_only == 2)
686 flag_finite_math_only
687 = !flag_signaling_nans && TARGET_SH2E && ! TARGET_IEEE;
688 if (TARGET_SH2E && !flag_finite_math_only)
689 target_flags |= MASK_IEEE;
690 sh_cpu = PROCESSOR_SH1;
691 assembler_dialect = 0;
692 if (TARGET_SH2)
693 sh_cpu = PROCESSOR_SH2;
694 if (TARGET_SH2E)
695 sh_cpu = PROCESSOR_SH2E;
696 if (TARGET_SH2A)
697 sh_cpu = PROCESSOR_SH2A;
698 if (TARGET_SH3)
699 sh_cpu = PROCESSOR_SH3;
700 if (TARGET_SH3E)
701 sh_cpu = PROCESSOR_SH3E;
702 if (TARGET_SH4)
703 {
704 assembler_dialect = 1;
705 sh_cpu = PROCESSOR_SH4;
706 }
707 if (TARGET_SH4A_ARCH)
708 {
709 assembler_dialect = 1;
710 sh_cpu = PROCESSOR_SH4A;
711 }
712 if (TARGET_SH5)
713 {
714 sh_cpu = PROCESSOR_SH5;
715 target_flags |= MASK_ALIGN_DOUBLE;
716 if (TARGET_SHMEDIA_FPU)
717 target_flags |= MASK_FMOVD;
718 if (TARGET_SHMEDIA)
719 {
720 /* There are no delay slots on SHmedia. */
721 flag_delayed_branch = 0;
722 /* Relaxation isn't yet supported for SHmedia */
723 target_flags &= ~MASK_RELAX;
724 /* After reload, if conversion does little good but can cause
725 ICEs:
726 - find_if_block doesn't do anything for SH because we don't
727 have conditional execution patterns. (We use conditional
728 move patterns, which are handled differently, and only
729 before reload).
730 - find_cond_trap doesn't do anything for the SH because we
731 don't have conditional traps.
732 - find_if_case_1 uses redirect_edge_and_branch_force in
733 the only path that does an optimization, and this causes
734 an ICE when branch targets are in registers.
735 - find_if_case_2 doesn't do anything for the SHmedia after
736 reload except when it can redirect a tablejump - and
737 that's rather rare. */
738 flag_if_conversion2 = 0;
739 if (! strcmp (sh_div_str, "call"))
740 sh_div_strategy = SH_DIV_CALL;
741 else if (! strcmp (sh_div_str, "call2"))
742 sh_div_strategy = SH_DIV_CALL2;
743 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
744 sh_div_strategy = SH_DIV_FP;
745 else if (! strcmp (sh_div_str, "inv"))
746 sh_div_strategy = SH_DIV_INV;
747 else if (! strcmp (sh_div_str, "inv:minlat"))
748 sh_div_strategy = SH_DIV_INV_MINLAT;
749 else if (! strcmp (sh_div_str, "inv20u"))
750 sh_div_strategy = SH_DIV_INV20U;
751 else if (! strcmp (sh_div_str, "inv20l"))
752 sh_div_strategy = SH_DIV_INV20L;
753 else if (! strcmp (sh_div_str, "inv:call2"))
754 sh_div_strategy = SH_DIV_INV_CALL2;
755 else if (! strcmp (sh_div_str, "inv:call"))
756 sh_div_strategy = SH_DIV_INV_CALL;
757 else if (! strcmp (sh_div_str, "inv:fp"))
758 {
759 if (TARGET_FPU_ANY)
760 sh_div_strategy = SH_DIV_INV_FP;
761 else
762 sh_div_strategy = SH_DIV_INV;
763 }
764 TARGET_CBRANCHDI4 = 0;
765 /* Assembler CFI isn't yet fully supported for SHmedia. */
766 flag_dwarf2_cfi_asm = 0;
767 }
768 }
769 else
770 {
771 /* Only the sh64-elf assembler fully supports .quad properly. */
772 targetm.asm_out.aligned_op.di = NULL;
773 targetm.asm_out.unaligned_op.di = NULL;
774 }
775 if (TARGET_SH1)
776 {
777 if (! strcmp (sh_div_str, "call-div1"))
778 sh_div_strategy = SH_DIV_CALL_DIV1;
779 else if (! strcmp (sh_div_str, "call-fp")
780 && (TARGET_FPU_DOUBLE
781 || (TARGET_HARD_SH4 && TARGET_SH2E)
782 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
783 sh_div_strategy = SH_DIV_CALL_FP;
784 else if (! strcmp (sh_div_str, "call-table") && TARGET_SH2)
785 sh_div_strategy = SH_DIV_CALL_TABLE;
786 else
787 /* Pick one that makes most sense for the target in general.
788 It is not much good to use different functions depending
789 on -Os, since then we'll end up with two different functions
790 when some of the code is compiled for size, and some for
791 speed. */
792
793 /* SH4 tends to emphasize speed. */
794 if (TARGET_HARD_SH4)
795 sh_div_strategy = SH_DIV_CALL_TABLE;
796 /* These have their own way of doing things. */
797 else if (TARGET_SH2A)
798 sh_div_strategy = SH_DIV_INTRINSIC;
799 /* ??? Should we use the integer SHmedia function instead? */
800 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
801 sh_div_strategy = SH_DIV_CALL_FP;
802 /* SH1 .. SH3 cores often go into small-footprint systems, so
803 default to the smallest implementation available. */
804 else if (TARGET_SH2) /* ??? EXPERIMENTAL */
805 sh_div_strategy = SH_DIV_CALL_TABLE;
806 else
807 sh_div_strategy = SH_DIV_CALL_DIV1;
808 }
809 if (!TARGET_SH1)
810 TARGET_PRETEND_CMOVE = 0;
811 if (sh_divsi3_libfunc[0])
812 ; /* User supplied - leave it alone. */
813 else if (TARGET_DIVIDE_CALL_FP)
814 sh_divsi3_libfunc = "__sdivsi3_i4";
815 else if (TARGET_DIVIDE_CALL_TABLE)
816 sh_divsi3_libfunc = "__sdivsi3_i4i";
817 else if (TARGET_SH5)
818 sh_divsi3_libfunc = "__sdivsi3_1";
819 else
820 sh_divsi3_libfunc = "__sdivsi3";
821 if (sh_branch_cost == -1)
822 sh_branch_cost
823 = TARGET_SH5 ? 1 : ! TARGET_SH2 || TARGET_HARD_SH4 ? 2 : 1;
824
825 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
826 if (! VALID_REGISTER_P (regno))
827 sh_register_names[regno][0] = '\0';
828
829 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
830 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
831 sh_additional_register_names[regno][0] = '\0';
832
833 if (flag_omit_frame_pointer == 2)
834 {
835 /* The debugging information is sufficient,
836 but gdb doesn't implement this yet */
837 if (0)
838 flag_omit_frame_pointer
839 = (PREFERRED_DEBUGGING_TYPE == DWARF2_DEBUG);
840 else
841 flag_omit_frame_pointer = 0;
842 }
843
844 if ((flag_pic && ! TARGET_PREFERGOT)
845 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
846 flag_no_function_cse = 1;
847
848 if (SMALL_REGISTER_CLASSES)
849 {
850 /* Never run scheduling before reload, since that can
851 break global alloc, and generates slower code anyway due
852 to the pressure on R0. */
853 /* Enable sched1 for SH4 if the user explicitly requests.
854 When sched1 is enabled, the ready queue will be reordered by
855 the target hooks if pressure is high. We can not do this for
856 PIC, SH3 and lower as they give spill failures for R0. */
857 if (!TARGET_HARD_SH4 || flag_pic)
858 flag_schedule_insns = 0;
859 /* ??? Current exception handling places basic block boundaries
860 after call_insns. It causes the high pressure on R0 and gives
861 spill failures for R0 in reload. See PR 22553 and the thread
862 on gcc-patches
863 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
864 else if (flag_exceptions)
865 {
866 if (flag_schedule_insns == 1)
867 warning (0, "ignoring -fschedule-insns because of exception handling bug");
868 flag_schedule_insns = 0;
869 }
870 else if (flag_schedule_insns == 2)
871 flag_schedule_insns = 0;
872 }
873
874 if (align_loops == 0)
875 align_loops = 1 << (TARGET_SH5 ? 3 : 2);
876 if (align_jumps == 0)
877 align_jumps = 1 << CACHE_LOG;
878 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
879 align_jumps = TARGET_SHMEDIA ? 4 : 2;
880
881 /* Allocation boundary (in *bytes*) for the code of a function.
882 SH1: 32 bit alignment is faster, because instructions are always
883 fetched as a pair from a longword boundary.
884 SH2 .. SH5 : align to cache line start. */
885 if (align_functions == 0)
886 align_functions
887 = TARGET_SMALLCODE ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
888 /* The linker relaxation code breaks when a function contains
889 alignments that are larger than that at the start of a
890 compilation unit. */
891 if (TARGET_RELAX)
892 {
893 int min_align
894 = align_loops > align_jumps ? align_loops : align_jumps;
895
896 /* Also take possible .long constants / mova tables int account. */
897 if (min_align < 4)
898 min_align = 4;
899 if (align_functions < min_align)
900 align_functions = min_align;
901 }
902
903 if (sh_fixed_range_str)
904 sh_fix_range (sh_fixed_range_str);
905 }
906 \f
907 /* Print the operand address in x to the stream. */
908
909 void
910 print_operand_address (FILE *stream, rtx x)
911 {
912 switch (GET_CODE (x))
913 {
914 case REG:
915 case SUBREG:
916 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
917 break;
918
919 case PLUS:
920 {
921 rtx base = XEXP (x, 0);
922 rtx index = XEXP (x, 1);
923
924 switch (GET_CODE (index))
925 {
926 case CONST_INT:
927 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
928 reg_names[true_regnum (base)]);
929 break;
930
931 case REG:
932 case SUBREG:
933 {
934 int base_num = true_regnum (base);
935 int index_num = true_regnum (index);
936
937 fprintf (stream, "@(r0,%s)",
938 reg_names[MAX (base_num, index_num)]);
939 break;
940 }
941
942 default:
943 gcc_unreachable ();
944 }
945 }
946 break;
947
948 case PRE_DEC:
949 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
950 break;
951
952 case POST_INC:
953 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
954 break;
955
956 default:
957 x = mark_constant_pool_use (x);
958 output_addr_const (stream, x);
959 break;
960 }
961 }
962
963 /* Print operand x (an rtx) in assembler syntax to file stream
964 according to modifier code.
965
966 '.' print a .s if insn needs delay slot
967 ',' print LOCAL_LABEL_PREFIX
968 '@' print trap, rte or rts depending upon pragma interruptness
969 '#' output a nop if there is nothing to put in the delay slot
970 ''' print likelihood suffix (/u for unlikely).
971 '>' print branch target if -fverbose-asm
972 'O' print a constant without the #
973 'R' print the LSW of a dp value - changes if in little endian
974 'S' print the MSW of a dp value - changes if in little endian
975 'T' print the next word of a dp value - same as 'R' in big endian mode.
976 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
977 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
978 'N' print 'r63' if the operand is (const_int 0).
979 'd' print a V2SF reg as dN instead of fpN.
980 'm' print a pair `base,offset' or `base,index', for LD and ST.
981 'U' Likewise for {LD,ST}{HI,LO}.
982 'V' print the position of a single bit set.
983 'W' print the position of a single bit cleared.
984 't' print a memory address which is a register.
985 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
986 'o' output an operator. */
987
988 void
989 print_operand (FILE *stream, rtx x, int code)
990 {
991 int regno;
992 enum machine_mode mode;
993
994 switch (code)
995 {
996 tree trapa_attr;
997
998 case '.':
999 if (final_sequence
1000 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1001 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1002 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1003 break;
1004 case ',':
1005 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1006 break;
1007 case '@':
1008 trapa_attr = lookup_attribute ("trap_exit",
1009 DECL_ATTRIBUTES (current_function_decl));
1010 if (trapa_attr)
1011 fprintf (stream, "trapa #%ld",
1012 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1013 else if (sh_cfun_interrupt_handler_p ())
1014 {
1015 if (sh_cfun_resbank_handler_p ())
1016 fprintf (stream, "resbank\n");
1017 fprintf (stream, "rte");
1018 }
1019 else
1020 fprintf (stream, "rts");
1021 break;
1022 case '#':
1023 /* Output a nop if there's nothing in the delay slot. */
1024 if (dbr_sequence_length () == 0)
1025 fprintf (stream, "\n\tnop");
1026 break;
1027 case '\'':
1028 {
1029 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1030
1031 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
1032 fputs ("/u", stream);
1033 break;
1034 }
1035 case '>':
1036 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1037 {
1038 fputs ("\t! target: ", stream);
1039 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1040 }
1041 break;
1042 case 'O':
1043 x = mark_constant_pool_use (x);
1044 output_addr_const (stream, x);
1045 break;
1046 /* N.B.: %R / %S / %T adjust memory addresses by four.
1047 For SHMEDIA, that means they can be used to access the first and
1048 second 32 bit part of a 64 bit (or larger) value that
1049 might be held in floating point registers or memory.
1050 While they can be used to access 64 bit parts of a larger value
1051 held in general purpose registers, that won't work with memory -
1052 neither for fp registers, since the frxx names are used. */
1053 case 'R':
1054 if (REG_P (x) || GET_CODE (x) == SUBREG)
1055 {
1056 regno = true_regnum (x);
1057 regno += FP_REGISTER_P (regno) ? 1 : LSW;
1058 fputs (reg_names[regno], (stream));
1059 }
1060 else if (MEM_P (x))
1061 {
1062 x = adjust_address (x, SImode, 4 * LSW);
1063 print_operand_address (stream, XEXP (x, 0));
1064 }
1065 else
1066 {
1067 rtx sub = NULL_RTX;
1068
1069 mode = GET_MODE (x);
1070 if (mode == VOIDmode)
1071 mode = DImode;
1072 if (GET_MODE_SIZE (mode) >= 8)
1073 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
1074 if (sub)
1075 print_operand (stream, sub, 0);
1076 else
1077 output_operand_lossage ("invalid operand to %%R");
1078 }
1079 break;
1080 case 'S':
1081 if (REG_P (x) || GET_CODE (x) == SUBREG)
1082 {
1083 regno = true_regnum (x);
1084 regno += FP_REGISTER_P (regno) ? 0 : MSW;
1085 fputs (reg_names[regno], (stream));
1086 }
1087 else if (MEM_P (x))
1088 {
1089 x = adjust_address (x, SImode, 4 * MSW);
1090 print_operand_address (stream, XEXP (x, 0));
1091 }
1092 else
1093 {
1094 rtx sub = NULL_RTX;
1095
1096 mode = GET_MODE (x);
1097 if (mode == VOIDmode)
1098 mode = DImode;
1099 if (GET_MODE_SIZE (mode) >= 8)
1100 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
1101 if (sub)
1102 print_operand (stream, sub, 0);
1103 else
1104 output_operand_lossage ("invalid operand to %%S");
1105 }
1106 break;
1107 case 'T':
1108 /* Next word of a double. */
1109 switch (GET_CODE (x))
1110 {
1111 case REG:
1112 fputs (reg_names[REGNO (x) + 1], (stream));
1113 break;
1114 case MEM:
1115 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1116 && GET_CODE (XEXP (x, 0)) != POST_INC)
1117 x = adjust_address (x, SImode, 4);
1118 print_operand_address (stream, XEXP (x, 0));
1119 break;
1120 default:
1121 break;
1122 }
1123 break;
1124
1125 case 't':
1126 gcc_assert (MEM_P (x));
1127 x = XEXP (x, 0);
1128 switch (GET_CODE (x))
1129 {
1130 case REG:
1131 case SUBREG:
1132 print_operand (stream, x, 0);
1133 break;
1134 default:
1135 break;
1136 }
1137 break;
1138
1139 case 'o':
1140 switch (GET_CODE (x))
1141 {
1142 case PLUS: fputs ("add", stream); break;
1143 case MINUS: fputs ("sub", stream); break;
1144 case MULT: fputs ("mul", stream); break;
1145 case DIV: fputs ("div", stream); break;
1146 case EQ: fputs ("eq", stream); break;
1147 case NE: fputs ("ne", stream); break;
1148 case GT: case LT: fputs ("gt", stream); break;
1149 case GE: case LE: fputs ("ge", stream); break;
1150 case GTU: case LTU: fputs ("gtu", stream); break;
1151 case GEU: case LEU: fputs ("geu", stream); break;
1152 default:
1153 break;
1154 }
1155 break;
1156 case 'M':
1157 if (TARGET_SHMEDIA)
1158 {
1159 if (MEM_P (x)
1160 && GET_CODE (XEXP (x, 0)) == PLUS
1161 && (REG_P (XEXP (XEXP (x, 0), 1))
1162 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1163 fputc ('x', stream);
1164 }
1165 else
1166 {
1167 if (MEM_P (x))
1168 {
1169 switch (GET_MODE (x))
1170 {
1171 case QImode: fputs (".b", stream); break;
1172 case HImode: fputs (".w", stream); break;
1173 case SImode: fputs (".l", stream); break;
1174 case SFmode: fputs (".s", stream); break;
1175 case DFmode: fputs (".d", stream); break;
1176 default: gcc_unreachable ();
1177 }
1178 }
1179 }
1180 break;
1181
1182 case 'm':
1183 gcc_assert (MEM_P (x));
1184 x = XEXP (x, 0);
1185 /* Fall through. */
1186 case 'U':
1187 switch (GET_CODE (x))
1188 {
1189 case REG:
1190 case SUBREG:
1191 print_operand (stream, x, 0);
1192 fputs (", 0", stream);
1193 break;
1194
1195 case PLUS:
1196 print_operand (stream, XEXP (x, 0), 0);
1197 fputs (", ", stream);
1198 print_operand (stream, XEXP (x, 1), 0);
1199 break;
1200
1201 default:
1202 gcc_unreachable ();
1203 }
1204 break;
1205
1206 case 'V':
1207 {
1208 int num = exact_log2 (INTVAL (x));
1209 gcc_assert (num >= 0);
1210 fprintf (stream, "#%d", num);
1211 }
1212 break;
1213
1214 case 'W':
1215 {
1216 int num = exact_log2 (~INTVAL (x));
1217 gcc_assert (num >= 0);
1218 fprintf (stream, "#%d", num);
1219 }
1220 break;
1221
1222 case 'd':
1223 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1224
1225 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1226 break;
1227
1228 case 'N':
1229 if (x == CONST0_RTX (GET_MODE (x)))
1230 {
1231 fprintf ((stream), "r63");
1232 break;
1233 }
1234 goto default_output;
1235 case 'u':
1236 if (CONST_INT_P (x))
1237 {
1238 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1239 break;
1240 }
1241 /* Fall through. */
1242
1243 default_output:
1244 default:
1245 regno = 0;
1246 mode = GET_MODE (x);
1247
1248 switch (GET_CODE (x))
1249 {
1250 case TRUNCATE:
1251 {
1252 rtx inner = XEXP (x, 0);
1253 int offset = 0;
1254 enum machine_mode inner_mode;
1255
1256 /* We might see SUBREGs with vector mode registers inside. */
1257 if (GET_CODE (inner) == SUBREG
1258 && (GET_MODE_SIZE (GET_MODE (inner))
1259 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1260 && subreg_lowpart_p (inner))
1261 inner = SUBREG_REG (inner);
1262 if (CONST_INT_P (inner))
1263 {
1264 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1265 goto default_output;
1266 }
1267 inner_mode = GET_MODE (inner);
1268 if (GET_CODE (inner) == SUBREG
1269 && (GET_MODE_SIZE (GET_MODE (inner))
1270 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1271 && REG_P (SUBREG_REG (inner)))
1272 {
1273 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1274 GET_MODE (SUBREG_REG (inner)),
1275 SUBREG_BYTE (inner),
1276 GET_MODE (inner));
1277 inner = SUBREG_REG (inner);
1278 }
1279 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1280 abort ();
1281 /* Floating point register pairs are always big endian;
1282 general purpose registers are 64 bit wide. */
1283 regno = REGNO (inner);
1284 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1285 - HARD_REGNO_NREGS (regno, mode))
1286 + offset;
1287 x = inner;
1288 goto reg;
1289 }
1290 case SIGN_EXTEND:
1291 x = XEXP (x, 0);
1292 goto reg;
1293 /* FIXME: We need this on SHmedia32 because reload generates
1294 some sign-extended HI or QI loads into DImode registers
1295 but, because Pmode is SImode, the address ends up with a
1296 subreg:SI of the DImode register. Maybe reload should be
1297 fixed so as to apply alter_subreg to such loads? */
1298 case IF_THEN_ELSE:
1299 gcc_assert (trapping_target_operand (x, VOIDmode));
1300 x = XEXP (XEXP (x, 2), 0);
1301 goto default_output;
1302 case SUBREG:
1303 gcc_assert (SUBREG_BYTE (x) == 0
1304 && REG_P (SUBREG_REG (x)));
1305
1306 x = SUBREG_REG (x);
1307 /* Fall through. */
1308
1309 reg:
1310 case REG:
1311 regno += REGNO (x);
1312 if (FP_REGISTER_P (regno)
1313 && mode == V16SFmode)
1314 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1315 else if (FP_REGISTER_P (REGNO (x))
1316 && mode == V4SFmode)
1317 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1318 else if (REG_P (x)
1319 && mode == V2SFmode)
1320 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1321 else if (FP_REGISTER_P (REGNO (x))
1322 && GET_MODE_SIZE (mode) > 4)
1323 fprintf ((stream), "d%s", reg_names[regno] + 1);
1324 else
1325 fputs (reg_names[regno], (stream));
1326 break;
1327
1328 case MEM:
1329 output_address (XEXP (x, 0));
1330 break;
1331
1332 default:
1333 if (TARGET_SH1)
1334 fputc ('#', stream);
1335 output_addr_const (stream, x);
1336 break;
1337 }
1338 break;
1339 }
1340 }
1341 \f
1342
1343 /* Encode symbol attributes of a SYMBOL_REF into its
1344 SYMBOL_REF_FLAGS. */
1345 static void
1346 sh_encode_section_info (tree decl, rtx rtl, int first)
1347 {
1348 default_encode_section_info (decl, rtl, first);
1349
1350 if (TREE_CODE (decl) == FUNCTION_DECL
1351 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1352 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1353 }
1354
1355 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1356 static void
1357 force_into (rtx value, rtx target)
1358 {
1359 value = force_operand (value, target);
1360 if (! rtx_equal_p (value, target))
1361 emit_insn (gen_move_insn (target, value));
1362 }
1363
1364 /* Emit code to perform a block move. Choose the best method.
1365
1366 OPERANDS[0] is the destination.
1367 OPERANDS[1] is the source.
1368 OPERANDS[2] is the size.
1369 OPERANDS[3] is the alignment safe to use. */
1370
1371 int
1372 expand_block_move (rtx *operands)
1373 {
1374 int align = INTVAL (operands[3]);
1375 int constp = (CONST_INT_P (operands[2]));
1376 int bytes = (constp ? INTVAL (operands[2]) : 0);
1377
1378 if (! constp)
1379 return 0;
1380
1381 /* If we could use mov.l to move words and dest is word-aligned, we
1382 can use movua.l for loads and still generate a relatively short
1383 and efficient sequence. */
1384 if (TARGET_SH4A_ARCH && align < 4
1385 && MEM_ALIGN (operands[0]) >= 32
1386 && can_move_by_pieces (bytes, 32))
1387 {
1388 rtx dest = copy_rtx (operands[0]);
1389 rtx src = copy_rtx (operands[1]);
1390 /* We could use different pseudos for each copied word, but
1391 since movua can only load into r0, it's kind of
1392 pointless. */
1393 rtx temp = gen_reg_rtx (SImode);
1394 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1395 int copied = 0;
1396
1397 while (copied + 4 <= bytes)
1398 {
1399 rtx to = adjust_address (dest, SImode, copied);
1400 rtx from = adjust_automodify_address (src, BLKmode,
1401 src_addr, copied);
1402
1403 set_mem_size (from, GEN_INT (4));
1404 emit_insn (gen_movua (temp, from));
1405 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1406 emit_move_insn (to, temp);
1407 copied += 4;
1408 }
1409
1410 if (copied < bytes)
1411 move_by_pieces (adjust_address (dest, BLKmode, copied),
1412 adjust_automodify_address (src, BLKmode,
1413 src_addr, copied),
1414 bytes - copied, align, 0);
1415
1416 return 1;
1417 }
1418
1419 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1420 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1421 if (align < 4 || (bytes % 4 != 0))
1422 return 0;
1423
1424 if (TARGET_HARD_SH4)
1425 {
1426 if (bytes < 12)
1427 return 0;
1428 else if (bytes == 12)
1429 {
1430 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1431 rtx r4 = gen_rtx_REG (SImode, 4);
1432 rtx r5 = gen_rtx_REG (SImode, 5);
1433
1434 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1435 force_into (XEXP (operands[0], 0), r4);
1436 force_into (XEXP (operands[1], 0), r5);
1437 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1438 return 1;
1439 }
1440 else if (! TARGET_SMALLCODE)
1441 {
1442 const char *entry_name;
1443 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1444 int dwords;
1445 rtx r4 = gen_rtx_REG (SImode, 4);
1446 rtx r5 = gen_rtx_REG (SImode, 5);
1447 rtx r6 = gen_rtx_REG (SImode, 6);
1448
1449 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1450 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1451 force_into (XEXP (operands[0], 0), r4);
1452 force_into (XEXP (operands[1], 0), r5);
1453
1454 dwords = bytes >> 3;
1455 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1456 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1457 return 1;
1458 }
1459 else
1460 return 0;
1461 }
1462 if (bytes < 64)
1463 {
1464 char entry[30];
1465 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1466 rtx r4 = gen_rtx_REG (SImode, 4);
1467 rtx r5 = gen_rtx_REG (SImode, 5);
1468
1469 sprintf (entry, "__movmemSI%d", bytes);
1470 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1471 force_into (XEXP (operands[0], 0), r4);
1472 force_into (XEXP (operands[1], 0), r5);
1473 emit_insn (gen_block_move_real (func_addr_rtx));
1474 return 1;
1475 }
1476
1477 /* This is the same number of bytes as a memcpy call, but to a different
1478 less common function name, so this will occasionally use more space. */
1479 if (! TARGET_SMALLCODE)
1480 {
1481 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1482 int final_switch, while_loop;
1483 rtx r4 = gen_rtx_REG (SImode, 4);
1484 rtx r5 = gen_rtx_REG (SImode, 5);
1485 rtx r6 = gen_rtx_REG (SImode, 6);
1486
1487 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1488 force_into (XEXP (operands[0], 0), r4);
1489 force_into (XEXP (operands[1], 0), r5);
1490
1491 /* r6 controls the size of the move. 16 is decremented from it
1492 for each 64 bytes moved. Then the negative bit left over is used
1493 as an index into a list of move instructions. e.g., a 72 byte move
1494 would be set up with size(r6) = 14, for one iteration through the
1495 big while loop, and a switch of -2 for the last part. */
1496
1497 final_switch = 16 - ((bytes / 4) % 16);
1498 while_loop = ((bytes / 4) / 16 - 1) * 16;
1499 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1500 emit_insn (gen_block_lump_real (func_addr_rtx));
1501 return 1;
1502 }
1503
1504 return 0;
1505 }
1506
1507 /* Prepare operands for a move define_expand; specifically, one of the
1508 operands must be in a register. */
1509
1510 int
1511 prepare_move_operands (rtx operands[], enum machine_mode mode)
1512 {
1513 if ((mode == SImode || mode == DImode)
1514 && flag_pic
1515 && ! ((mode == Pmode || mode == ptr_mode)
1516 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1517 {
1518 rtx temp;
1519 if (SYMBOLIC_CONST_P (operands[1]))
1520 {
1521 if (MEM_P (operands[0]))
1522 operands[1] = force_reg (Pmode, operands[1]);
1523 else if (TARGET_SHMEDIA
1524 && GET_CODE (operands[1]) == LABEL_REF
1525 && target_reg_operand (operands[0], mode))
1526 /* It's ok. */;
1527 else
1528 {
1529 temp = (!can_create_pseudo_p ()
1530 ? operands[0]
1531 : gen_reg_rtx (Pmode));
1532 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1533 }
1534 }
1535 else if (GET_CODE (operands[1]) == CONST
1536 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1537 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1538 {
1539 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1540 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1541 mode, temp);
1542 operands[1] = expand_binop (mode, add_optab, temp,
1543 XEXP (XEXP (operands[1], 0), 1),
1544 (!can_create_pseudo_p ()
1545 ? temp
1546 : gen_reg_rtx (Pmode)),
1547 0, OPTAB_LIB_WIDEN);
1548 }
1549 }
1550
1551 if (! reload_in_progress && ! reload_completed)
1552 {
1553 /* Copy the source to a register if both operands aren't registers. */
1554 if (! register_operand (operands[0], mode)
1555 && ! sh_register_operand (operands[1], mode))
1556 operands[1] = copy_to_mode_reg (mode, operands[1]);
1557
1558 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1559 {
1560 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1561 except that we can't use that function because it is static. */
1562 rtx new_rtx = change_address (operands[0], mode, 0);
1563 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1564 operands[0] = new_rtx;
1565 }
1566
1567 /* This case can happen while generating code to move the result
1568 of a library call to the target. Reject `st r0,@(rX,rY)' because
1569 reload will fail to find a spill register for rX, since r0 is already
1570 being used for the source. */
1571 else if (TARGET_SH1
1572 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1573 && MEM_P (operands[0])
1574 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1575 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1576 operands[1] = copy_to_mode_reg (mode, operands[1]);
1577 }
1578
1579 if (mode == Pmode || mode == ptr_mode)
1580 {
1581 rtx op0, op1, opc;
1582 enum tls_model tls_kind;
1583
1584 op0 = operands[0];
1585 op1 = operands[1];
1586 if (GET_CODE (op1) == CONST
1587 && GET_CODE (XEXP (op1, 0)) == PLUS
1588 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1589 != TLS_MODEL_NONE))
1590 {
1591 opc = XEXP (XEXP (op1, 0), 1);
1592 op1 = XEXP (XEXP (op1, 0), 0);
1593 }
1594 else
1595 opc = NULL_RTX;
1596
1597 if ((tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1598 {
1599 rtx tga_op1, tga_ret, tmp, tmp2;
1600
1601 switch (tls_kind)
1602 {
1603 case TLS_MODEL_GLOBAL_DYNAMIC:
1604 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1605 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1606 op1 = tga_ret;
1607 break;
1608
1609 case TLS_MODEL_LOCAL_DYNAMIC:
1610 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1611 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1612
1613 tmp = gen_reg_rtx (Pmode);
1614 emit_move_insn (tmp, tga_ret);
1615
1616 if (register_operand (op0, Pmode))
1617 tmp2 = op0;
1618 else
1619 tmp2 = gen_reg_rtx (Pmode);
1620
1621 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1622 op1 = tmp2;
1623 break;
1624
1625 case TLS_MODEL_INITIAL_EXEC:
1626 if (! flag_pic)
1627 {
1628 /* Don't schedule insns for getting GOT address when
1629 the first scheduling is enabled, to avoid spill
1630 failures for R0. */
1631 if (flag_schedule_insns)
1632 emit_insn (gen_blockage ());
1633 emit_insn (gen_GOTaddr2picreg ());
1634 emit_use (gen_rtx_REG (SImode, PIC_REG));
1635 if (flag_schedule_insns)
1636 emit_insn (gen_blockage ());
1637 }
1638 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1639 tmp = gen_sym2GOTTPOFF (op1);
1640 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1641 op1 = tga_op1;
1642 break;
1643
1644 case TLS_MODEL_LOCAL_EXEC:
1645 tmp2 = gen_reg_rtx (Pmode);
1646 emit_insn (gen_load_gbr (tmp2));
1647 tmp = gen_reg_rtx (Pmode);
1648 emit_insn (gen_symTPOFF2reg (tmp, op1));
1649
1650 if (register_operand (op0, Pmode))
1651 op1 = op0;
1652 else
1653 op1 = gen_reg_rtx (Pmode);
1654
1655 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1656 break;
1657
1658 default:
1659 gcc_unreachable ();
1660 }
1661 if (opc)
1662 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1663 operands[1] = op1;
1664 }
1665 }
1666
1667 return 0;
1668 }
1669
1670 enum rtx_code
1671 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1672 enum rtx_code comparison)
1673 {
1674 rtx op1;
1675 rtx scratch = NULL_RTX;
1676
1677 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1678 comparison = GET_CODE (operands[0]);
1679 else
1680 scratch = operands[4];
1681 if (CONST_INT_P (operands[1])
1682 && !CONST_INT_P (operands[2]))
1683 {
1684 rtx tmp = operands[1];
1685
1686 operands[1] = operands[2];
1687 operands[2] = tmp;
1688 comparison = swap_condition (comparison);
1689 }
1690 if (CONST_INT_P (operands[2]))
1691 {
1692 HOST_WIDE_INT val = INTVAL (operands[2]);
1693 if ((val == -1 || val == -0x81)
1694 && (comparison == GT || comparison == LE))
1695 {
1696 comparison = (comparison == GT) ? GE : LT;
1697 operands[2] = gen_int_mode (val + 1, mode);
1698 }
1699 else if ((val == 1 || val == 0x80)
1700 && (comparison == GE || comparison == LT))
1701 {
1702 comparison = (comparison == GE) ? GT : LE;
1703 operands[2] = gen_int_mode (val - 1, mode);
1704 }
1705 else if (val == 1 && (comparison == GEU || comparison == LTU))
1706 {
1707 comparison = (comparison == GEU) ? NE : EQ;
1708 operands[2] = CONST0_RTX (mode);
1709 }
1710 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1711 {
1712 comparison = (comparison == GEU) ? GTU : LEU;
1713 operands[2] = gen_int_mode (val - 1, mode);
1714 }
1715 else if (val == 0 && (comparison == GTU || comparison == LEU))
1716 comparison = (comparison == GTU) ? NE : EQ;
1717 else if (mode == SImode
1718 && ((val == 0x7fffffff
1719 && (comparison == GTU || comparison == LEU))
1720 || ((unsigned HOST_WIDE_INT) val
1721 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1722 && (comparison == GEU || comparison == LTU))))
1723 {
1724 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1725 operands[2] = CONST0_RTX (mode);
1726 }
1727 }
1728 op1 = operands[1];
1729 if (can_create_pseudo_p ())
1730 operands[1] = force_reg (mode, op1);
1731 /* When we are handling DImode comparisons, we want to keep constants so
1732 that we can optimize the component comparisons; however, memory loads
1733 are better issued as a whole so that they can be scheduled well.
1734 SImode equality comparisons allow I08 constants, but only when they
1735 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1736 into a register, that register might as well be r0, and we allow the
1737 constant. If it is already in a register, this is likely to be
1738 allocated to a different hard register, thus we load the constant into
1739 a register unless it is zero. */
1740 if (!REG_P (operands[2])
1741 && (!CONST_INT_P (operands[2])
1742 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1743 && ((comparison != EQ && comparison != NE)
1744 || (REG_P (op1) && REGNO (op1) != R0_REG)
1745 || !satisfies_constraint_I08 (operands[2])))))
1746 {
1747 if (scratch && GET_MODE (scratch) == mode)
1748 {
1749 emit_move_insn (scratch, operands[2]);
1750 operands[2] = scratch;
1751 }
1752 else if (can_create_pseudo_p ())
1753 operands[2] = force_reg (mode, operands[2]);
1754 }
1755 return comparison;
1756 }
1757
1758 void
1759 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1760 {
1761 rtx (*branch_expander) (rtx) = gen_branch_true;
1762 rtx jump;
1763
1764 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1765 switch (comparison)
1766 {
1767 case NE: case LT: case LE: case LTU: case LEU:
1768 comparison = reverse_condition (comparison);
1769 branch_expander = gen_branch_false;
1770 default: ;
1771 }
1772 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1773 gen_rtx_fmt_ee (comparison, SImode,
1774 operands[1], operands[2])));
1775 jump = emit_jump_insn (branch_expander (operands[3]));
1776 if (probability >= 0)
1777 add_reg_note (jump, REG_BR_PROB, GEN_INT (probability));
1778
1779 }
1780
1781 /* ??? How should we distribute probabilities when more than one branch
1782 is generated. So far we only have soem ad-hoc observations:
1783 - If the operands are random, they are likely to differ in both parts.
1784 - If comparing items in a hash chain, the operands are random or equal;
1785 operation should be EQ or NE.
1786 - If items are searched in an ordered tree from the root, we can expect
1787 the highpart to be unequal about half of the time; operation should be
1788 an inequality comparison, operands non-constant, and overall probability
1789 about 50%. Likewise for quicksort.
1790 - Range checks will be often made against constants. Even if we assume for
1791 simplicity an even distribution of the non-constant operand over a
1792 sub-range here, the same probability could be generated with differently
1793 wide sub-ranges - as long as the ratio of the part of the subrange that
1794 is before the threshold to the part that comes after the threshold stays
1795 the same. Thus, we can't really tell anything here;
1796 assuming random distribution is at least simple.
1797 */
1798
1799 bool
1800 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1801 {
1802 enum rtx_code msw_taken, msw_skip, lsw_taken;
1803 rtx skip_label = NULL_RTX;
1804 rtx op1h, op1l, op2h, op2l;
1805 int num_branches;
1806 int prob, rev_prob;
1807 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1808 rtx scratch = operands[4];
1809
1810 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1811 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1812 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1813 op1l = gen_lowpart (SImode, operands[1]);
1814 op2l = gen_lowpart (SImode, operands[2]);
1815 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
1816 prob = split_branch_probability;
1817 rev_prob = REG_BR_PROB_BASE - prob;
1818 switch (comparison)
1819 {
1820 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1821 That costs 1 cycle more when the first branch can be predicted taken,
1822 but saves us mispredicts because only one branch needs prediction.
1823 It also enables generating the cmpeqdi_t-1 pattern. */
1824 case EQ:
1825 if (TARGET_CMPEQDI_T)
1826 {
1827 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1828 emit_jump_insn (gen_branch_true (operands[3]));
1829 return true;
1830 }
1831 msw_skip = NE;
1832 lsw_taken = EQ;
1833 if (prob >= 0)
1834 {
1835 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1836 */
1837 msw_skip_prob = rev_prob;
1838 if (REG_BR_PROB_BASE <= 65535)
1839 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1840 else
1841 {
1842 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1843 lsw_taken_prob
1844 = (prob
1845 ? (REG_BR_PROB_BASE
1846 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1847 / ((HOST_WIDEST_INT) prob << 32)))
1848 : 0);
1849 }
1850 }
1851 break;
1852 case NE:
1853 if (TARGET_CMPEQDI_T)
1854 {
1855 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1856 emit_jump_insn (gen_branch_false (operands[3]));
1857 return true;
1858 }
1859 msw_taken = NE;
1860 msw_taken_prob = prob;
1861 lsw_taken = NE;
1862 lsw_taken_prob = 0;
1863 break;
1864 case GTU: case GT:
1865 msw_taken = comparison;
1866 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1867 break;
1868 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1869 msw_skip = swap_condition (msw_taken);
1870 lsw_taken = GTU;
1871 break;
1872 case GEU: case GE:
1873 if (op2l == CONST0_RTX (SImode))
1874 msw_taken = comparison;
1875 else
1876 {
1877 msw_taken = comparison == GE ? GT : GTU;
1878 msw_skip = swap_condition (msw_taken);
1879 lsw_taken = GEU;
1880 }
1881 break;
1882 case LTU: case LT:
1883 msw_taken = comparison;
1884 if (op2l == CONST0_RTX (SImode))
1885 break;
1886 msw_skip = swap_condition (msw_taken);
1887 lsw_taken = LTU;
1888 break;
1889 case LEU: case LE:
1890 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1891 msw_taken = comparison;
1892 else
1893 {
1894 lsw_taken = LEU;
1895 if (comparison == LE)
1896 msw_taken = LT;
1897 else if (op2h != CONST0_RTX (SImode))
1898 msw_taken = LTU;
1899 else
1900 break;
1901 msw_skip = swap_condition (msw_taken);
1902 }
1903 break;
1904 default: return false;
1905 }
1906 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
1907 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
1908 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
1909 if (comparison != EQ && comparison != NE && num_branches > 1)
1910 {
1911 if (!CONSTANT_P (operands[2])
1912 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
1913 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
1914 {
1915 msw_taken_prob = prob / 2U;
1916 msw_skip_prob
1917 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
1918 lsw_taken_prob = prob;
1919 }
1920 else
1921 {
1922 msw_taken_prob = prob;
1923 msw_skip_prob = REG_BR_PROB_BASE;
1924 /* ??? If we have a constant op2h, should we use that when
1925 calculating lsw_taken_prob? */
1926 lsw_taken_prob = prob;
1927 }
1928 }
1929 operands[1] = op1h;
1930 operands[2] = op2h;
1931 operands[4] = NULL_RTX;
1932 if (reload_completed
1933 && ! arith_reg_or_0_operand (op2h, SImode)
1934 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
1935 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
1936 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
1937 {
1938 emit_move_insn (scratch, operands[2]);
1939 operands[2] = scratch;
1940 }
1941 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
1942 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
1943 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
1944 {
1945 rtx taken_label = operands[3];
1946
1947 /* Operands were possibly modified, but msw_skip doesn't expect this.
1948 Always use the original ones. */
1949 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
1950 {
1951 operands[1] = op1h;
1952 operands[2] = op2h;
1953 }
1954
1955 operands[3] = skip_label = gen_label_rtx ();
1956 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
1957 operands[3] = taken_label;
1958 }
1959 operands[1] = op1l;
1960 operands[2] = op2l;
1961 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
1962 {
1963 if (reload_completed
1964 && ! arith_reg_or_0_operand (op2l, SImode)
1965 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
1966 {
1967 emit_move_insn (scratch, operands[2]);
1968 operands[2] = scratch;
1969 }
1970 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
1971 }
1972 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
1973 emit_label (skip_label);
1974 return true;
1975 }
1976
1977 /* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */
1978
1979 static void
1980 sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
1981 {
1982 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1983 {
1984 insn = gen_rtx_PARALLEL (VOIDmode,
1985 gen_rtvec (2, insn,
1986 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1987 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1988 }
1989 else
1990 emit_insn (insn);
1991 }
1992
1993 /* Prepare the operands for an scc instruction; make sure that the
1994 compare has been done and the result is in T_REG. */
1995 void
1996 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
1997 {
1998 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1999 enum rtx_code oldcode = code;
2000 enum machine_mode mode;
2001
2002 /* First need a compare insn. */
2003 switch (code)
2004 {
2005 case NE:
2006 /* It isn't possible to handle this case. */
2007 gcc_unreachable ();
2008 case LT:
2009 code = GT;
2010 break;
2011 case LE:
2012 code = GE;
2013 break;
2014 case LTU:
2015 code = GTU;
2016 break;
2017 case LEU:
2018 code = GEU;
2019 break;
2020 default:
2021 break;
2022 }
2023 if (code != oldcode)
2024 {
2025 rtx tmp = op0;
2026 op0 = op1;
2027 op1 = tmp;
2028 }
2029
2030 mode = GET_MODE (op0);
2031 if (mode == VOIDmode)
2032 mode = GET_MODE (op1);
2033
2034 op0 = force_reg (mode, op0);
2035 if ((code != EQ && code != NE
2036 && (op1 != const0_rtx
2037 || code == GTU || code == GEU || code == LTU || code == LEU))
2038 || (mode == DImode && op1 != const0_rtx)
2039 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2040 op1 = force_reg (mode, op1);
2041
2042 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2043 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2044 mode);
2045 }
2046
2047 rtx
2048 sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
2049 rtx op0, rtx op1)
2050 {
2051 rtx target = gen_reg_rtx (SImode);
2052 rtx tmp;
2053
2054 gcc_assert (TARGET_SHMEDIA);
2055 switch (code)
2056 {
2057 case EQ:
2058 case GT:
2059 case LT:
2060 case UNORDERED:
2061 case GTU:
2062 case LTU:
2063 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2064 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2065 code = NE;
2066 break;
2067
2068 case NE:
2069 case GE:
2070 case LE:
2071 case ORDERED:
2072 case GEU:
2073 case LEU:
2074 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2075 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2076 code = EQ;
2077 break;
2078
2079 case UNEQ:
2080 case UNGE:
2081 case UNGT:
2082 case UNLE:
2083 case UNLT:
2084 case LTGT:
2085 return NULL_RTX;
2086
2087 default:
2088 gcc_unreachable ();
2089 }
2090
2091 if (mode == DImode)
2092 {
2093 rtx t2 = gen_reg_rtx (DImode);
2094 emit_insn (gen_extendsidi2 (t2, target));
2095 target = t2;
2096 }
2097
2098 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2099 }
2100
2101 /* Called from the md file, set up the operands of a compare instruction. */
2102
2103 void
2104 sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
2105 {
2106 enum rtx_code code = GET_CODE (operands[0]);
2107 enum rtx_code branch_code;
2108 rtx op0 = operands[1];
2109 rtx op1 = operands[2];
2110 rtx insn, tem;
2111 bool need_ccmpeq = false;
2112
2113 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2114 {
2115 op0 = force_reg (mode, op0);
2116 op1 = force_reg (mode, op1);
2117 }
2118 else
2119 {
2120 if (code != EQ || mode == DImode)
2121 {
2122 /* Force args into regs, since we can't use constants here. */
2123 op0 = force_reg (mode, op0);
2124 if (op1 != const0_rtx || code == GTU || code == GEU)
2125 op1 = force_reg (mode, op1);
2126 }
2127 }
2128
2129 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2130 {
2131 if (code == LT
2132 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2133 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2134 {
2135 tem = op0, op0 = op1, op1 = tem;
2136 code = swap_condition (code);
2137 }
2138
2139 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2140 if (code == GE)
2141 {
2142 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2143 need_ccmpeq = true;
2144 code = GT;
2145 }
2146
2147 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2148 to EQ/GT respectively. */
2149 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2150 }
2151
2152 switch (code)
2153 {
2154 case EQ:
2155 case GT:
2156 case GE:
2157 case GTU:
2158 case GEU:
2159 branch_code = code;
2160 break;
2161 case NE:
2162 case LT:
2163 case LE:
2164 case LTU:
2165 case LEU:
2166 branch_code = reverse_condition (code);
2167 break;
2168 default:
2169 gcc_unreachable ();
2170 }
2171
2172 insn = gen_rtx_SET (VOIDmode,
2173 gen_rtx_REG (SImode, T_REG),
2174 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2175
2176 sh_emit_set_t_insn (insn, mode);
2177 if (need_ccmpeq)
2178 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2179
2180 if (branch_code == code)
2181 emit_jump_insn (gen_branch_true (operands[3]));
2182 else
2183 emit_jump_insn (gen_branch_false (operands[3]));
2184 }
2185
2186 void
2187 sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
2188 {
2189 enum rtx_code code = GET_CODE (operands[1]);
2190 rtx op0 = operands[2];
2191 rtx op1 = operands[3];
2192 rtx lab = NULL_RTX;
2193 bool invert = false;
2194 rtx tem;
2195
2196 op0 = force_reg (mode, op0);
2197 if ((code != EQ && code != NE
2198 && (op1 != const0_rtx
2199 || code == GTU || code == GEU || code == LTU || code == LEU))
2200 || (mode == DImode && op1 != const0_rtx)
2201 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2202 op1 = force_reg (mode, op1);
2203
2204 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2205 {
2206 if (code == LT || code == LE)
2207 {
2208 code = swap_condition (code);
2209 tem = op0, op0 = op1, op1 = tem;
2210 }
2211 if (code == GE)
2212 {
2213 if (TARGET_IEEE)
2214 {
2215 lab = gen_label_rtx ();
2216 sh_emit_scc_to_t (EQ, op0, op1);
2217 emit_jump_insn (gen_branch_true (lab));
2218 code = GT;
2219 }
2220 else
2221 {
2222 code = LT;
2223 invert = true;
2224 }
2225 }
2226 }
2227
2228 if (code == NE)
2229 {
2230 code = EQ;
2231 invert = true;
2232 }
2233
2234 sh_emit_scc_to_t (code, op0, op1);
2235 if (lab)
2236 emit_label (lab);
2237 if (invert)
2238 emit_insn (gen_movnegt (operands[0]));
2239 else
2240 emit_move_insn (operands[0], gen_rtx_REG (SImode, T_REG));
2241 }
2242 \f
2243 /* Functions to output assembly code. */
2244
2245 /* Return a sequence of instructions to perform DI or DF move.
2246
2247 Since the SH cannot move a DI or DF in one instruction, we have
2248 to take care when we see overlapping source and dest registers. */
2249
2250 const char *
2251 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2252 enum machine_mode mode)
2253 {
2254 rtx dst = operands[0];
2255 rtx src = operands[1];
2256
2257 if (MEM_P (dst)
2258 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2259 return "mov.l %T1,%0\n\tmov.l %1,%0";
2260
2261 if (register_operand (dst, mode)
2262 && register_operand (src, mode))
2263 {
2264 if (REGNO (src) == MACH_REG)
2265 return "sts mach,%S0\n\tsts macl,%R0";
2266
2267 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2268 when mov.d r1,r0 do r1->r0 then r2->r1. */
2269
2270 if (REGNO (src) + 1 == REGNO (dst))
2271 return "mov %T1,%T0\n\tmov %1,%0";
2272 else
2273 return "mov %1,%0\n\tmov %T1,%T0";
2274 }
2275 else if (CONST_INT_P (src))
2276 {
2277 if (INTVAL (src) < 0)
2278 output_asm_insn ("mov #-1,%S0", operands);
2279 else
2280 output_asm_insn ("mov #0,%S0", operands);
2281
2282 return "mov %1,%R0";
2283 }
2284 else if (MEM_P (src))
2285 {
2286 int ptrreg = -1;
2287 int dreg = REGNO (dst);
2288 rtx inside = XEXP (src, 0);
2289
2290 switch (GET_CODE (inside))
2291 {
2292 case REG:
2293 ptrreg = REGNO (inside);
2294 break;
2295
2296 case SUBREG:
2297 ptrreg = subreg_regno (inside);
2298 break;
2299
2300 case PLUS:
2301 ptrreg = REGNO (XEXP (inside, 0));
2302 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2303 an offsettable address. Unfortunately, offsettable addresses use
2304 QImode to check the offset, and a QImode offsettable address
2305 requires r0 for the other operand, which is not currently
2306 supported, so we can't use the 'o' constraint.
2307 Thus we must check for and handle r0+REG addresses here.
2308 We punt for now, since this is likely very rare. */
2309 gcc_assert (!REG_P (XEXP (inside, 1)));
2310 break;
2311
2312 case LABEL_REF:
2313 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
2314 case POST_INC:
2315 return "mov.l %1,%0\n\tmov.l %1,%T0";
2316 default:
2317 gcc_unreachable ();
2318 }
2319
2320 /* Work out the safe way to copy. Copy into the second half first. */
2321 if (dreg == ptrreg)
2322 return "mov.l %T1,%T0\n\tmov.l %1,%0";
2323 }
2324
2325 return "mov.l %1,%0\n\tmov.l %T1,%T0";
2326 }
2327
2328 /* Print an instruction which would have gone into a delay slot after
2329 another instruction, but couldn't because the other instruction expanded
2330 into a sequence where putting the slot insn at the end wouldn't work. */
2331
2332 static void
2333 print_slot (rtx insn)
2334 {
2335 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
2336
2337 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
2338 }
2339
2340 const char *
2341 output_far_jump (rtx insn, rtx op)
2342 {
2343 struct { rtx lab, reg, op; } this_jmp;
2344 rtx braf_base_lab = NULL_RTX;
2345 const char *jump;
2346 int far;
2347 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2348 rtx prev;
2349
2350 this_jmp.lab = gen_label_rtx ();
2351
2352 if (TARGET_SH2
2353 && offset >= -32764
2354 && offset - get_attr_length (insn) <= 32766)
2355 {
2356 far = 0;
2357 jump = "mov.w %O0,%1; braf %1";
2358 }
2359 else
2360 {
2361 far = 1;
2362 if (flag_pic)
2363 {
2364 if (TARGET_SH2)
2365 jump = "mov.l %O0,%1; braf %1";
2366 else
2367 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
2368 }
2369 else
2370 jump = "mov.l %O0,%1; jmp @%1";
2371 }
2372 /* If we have a scratch register available, use it. */
2373 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2374 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2375 {
2376 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2377 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2378 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
2379 output_asm_insn (jump, &this_jmp.lab);
2380 if (dbr_sequence_length ())
2381 print_slot (final_sequence);
2382 else
2383 output_asm_insn ("nop", 0);
2384 }
2385 else
2386 {
2387 /* Output the delay slot insn first if any. */
2388 if (dbr_sequence_length ())
2389 print_slot (final_sequence);
2390
2391 this_jmp.reg = gen_rtx_REG (SImode, 13);
2392 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2393 Fortunately, MACL is fixed and call-clobbered, and we never
2394 need its value across jumps, so save r13 in it instead of in
2395 the stack. */
2396 if (TARGET_SH5)
2397 output_asm_insn ("lds r13, macl", 0);
2398 else
2399 output_asm_insn ("mov.l r13,@-r15", 0);
2400 output_asm_insn (jump, &this_jmp.lab);
2401 if (TARGET_SH5)
2402 output_asm_insn ("sts macl, r13", 0);
2403 else
2404 output_asm_insn ("mov.l @r15+,r13", 0);
2405 }
2406 if (far && flag_pic && TARGET_SH2)
2407 {
2408 braf_base_lab = gen_label_rtx ();
2409 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2410 CODE_LABEL_NUMBER (braf_base_lab));
2411 }
2412 if (far)
2413 output_asm_insn (".align 2", 0);
2414 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2415 this_jmp.op = op;
2416 if (far && flag_pic)
2417 {
2418 if (TARGET_SH2)
2419 this_jmp.lab = braf_base_lab;
2420 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2421 }
2422 else
2423 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2424 return "";
2425 }
2426
2427 /* Local label counter, used for constants in the pool and inside
2428 pattern branches. */
2429
2430 static int lf = 100;
2431
2432 /* Output code for ordinary branches. */
2433
2434 const char *
2435 output_branch (int logic, rtx insn, rtx *operands)
2436 {
2437 switch (get_attr_length (insn))
2438 {
2439 case 6:
2440 /* This can happen if filling the delay slot has caused a forward
2441 branch to exceed its range (we could reverse it, but only
2442 when we know we won't overextend other branches; this should
2443 best be handled by relaxation).
2444 It can also happen when other condbranches hoist delay slot insn
2445 from their destination, thus leading to code size increase.
2446 But the branch will still be in the range -4092..+4098 bytes. */
2447
2448 if (! TARGET_RELAX)
2449 {
2450 int label = lf++;
2451 /* The call to print_slot will clobber the operands. */
2452 rtx op0 = operands[0];
2453
2454 /* If the instruction in the delay slot is annulled (true), then
2455 there is no delay slot where we can put it now. The only safe
2456 place for it is after the label. final will do that by default. */
2457
2458 if (final_sequence
2459 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2460 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2461 {
2462 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2463 ASSEMBLER_DIALECT ? "/" : ".", label);
2464 print_slot (final_sequence);
2465 }
2466 else
2467 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2468
2469 output_asm_insn ("bra\t%l0", &op0);
2470 fprintf (asm_out_file, "\tnop\n");
2471 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2472
2473 return "";
2474 }
2475 /* When relaxing, handle this like a short branch. The linker
2476 will fix it up if it still doesn't fit after relaxation. */
2477 case 2:
2478 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2479
2480 /* These are for SH2e, in which we have to account for the
2481 extra nop because of the hardware bug in annulled branches. */
2482 case 8:
2483 if (! TARGET_RELAX)
2484 {
2485 int label = lf++;
2486
2487 gcc_assert (!final_sequence
2488 || !(INSN_ANNULLED_BRANCH_P
2489 (XVECEXP (final_sequence, 0, 0))));
2490 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2491 logic ? "f" : "t",
2492 ASSEMBLER_DIALECT ? "/" : ".", label);
2493 fprintf (asm_out_file, "\tnop\n");
2494 output_asm_insn ("bra\t%l0", operands);
2495 fprintf (asm_out_file, "\tnop\n");
2496 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2497
2498 return "";
2499 }
2500 /* When relaxing, fall through. */
2501 case 4:
2502 {
2503 char buffer[10];
2504
2505 sprintf (buffer, "b%s%ss\t%%l0",
2506 logic ? "t" : "f",
2507 ASSEMBLER_DIALECT ? "/" : ".");
2508 output_asm_insn (buffer, &operands[0]);
2509 return "nop";
2510 }
2511
2512 default:
2513 /* There should be no longer branches now - that would
2514 indicate that something has destroyed the branches set
2515 up in machine_dependent_reorg. */
2516 gcc_unreachable ();
2517 }
2518 }
2519
2520 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2521 fill in operands 9 as a label to the successor insn.
2522 We try to use jump threading where possible.
2523 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2524 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2525 follow jmp and bt, if the address is in range. */
2526 const char *
2527 output_branchy_insn (enum rtx_code code, const char *templ,
2528 rtx insn, rtx *operands)
2529 {
2530 rtx next_insn = NEXT_INSN (insn);
2531
2532 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2533 {
2534 rtx src = SET_SRC (PATTERN (next_insn));
2535 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2536 {
2537 /* Following branch not taken */
2538 operands[9] = gen_label_rtx ();
2539 emit_label_after (operands[9], next_insn);
2540 INSN_ADDRESSES_NEW (operands[9],
2541 INSN_ADDRESSES (INSN_UID (next_insn))
2542 + get_attr_length (next_insn));
2543 return templ;
2544 }
2545 else
2546 {
2547 int offset = (branch_dest (next_insn)
2548 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2549 if (offset >= -252 && offset <= 258)
2550 {
2551 if (GET_CODE (src) == IF_THEN_ELSE)
2552 /* branch_true */
2553 src = XEXP (src, 1);
2554 operands[9] = src;
2555 return templ;
2556 }
2557 }
2558 }
2559 operands[9] = gen_label_rtx ();
2560 emit_label_after (operands[9], insn);
2561 INSN_ADDRESSES_NEW (operands[9],
2562 INSN_ADDRESSES (INSN_UID (insn))
2563 + get_attr_length (insn));
2564 return templ;
2565 }
2566
2567 const char *
2568 output_ieee_ccmpeq (rtx insn, rtx *operands)
2569 {
2570 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2571 insn, operands);
2572 }
2573 \f
2574 /* Output the start of the assembler file. */
2575
2576 static void
2577 sh_file_start (void)
2578 {
2579 default_file_start ();
2580
2581 #ifdef SYMBIAN
2582 /* Declare the .directive section before it is used. */
2583 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
2584 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
2585 #endif
2586
2587 if (TARGET_ELF)
2588 /* We need to show the text section with the proper
2589 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2590 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2591 will complain. We can teach GAS specifically about the
2592 default attributes for our choice of text section, but
2593 then we would have to change GAS again if/when we change
2594 the text section name. */
2595 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2596 else
2597 /* Switch to the data section so that the coffsem symbol
2598 isn't in the text section. */
2599 switch_to_section (data_section);
2600
2601 if (TARGET_LITTLE_ENDIAN)
2602 fputs ("\t.little\n", asm_out_file);
2603
2604 if (!TARGET_ELF)
2605 {
2606 if (TARGET_SHCOMPACT)
2607 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2608 else if (TARGET_SHMEDIA)
2609 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2610 TARGET_SHMEDIA64 ? 64 : 32);
2611 }
2612 }
2613 \f
2614 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2615
2616 static bool
2617 unspec_caller_rtx_p (rtx pat)
2618 {
2619 rtx base, offset;
2620 int i;
2621
2622 split_const (pat, &base, &offset);
2623 if (GET_CODE (base) == UNSPEC)
2624 {
2625 if (XINT (base, 1) == UNSPEC_CALLER)
2626 return true;
2627 for (i = 0; i < XVECLEN (base, 0); i++)
2628 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2629 return true;
2630 }
2631 return false;
2632 }
2633
2634 /* Indicate that INSN cannot be duplicated. This is true for insn
2635 that generates a unique label. */
2636
2637 static bool
2638 sh_cannot_copy_insn_p (rtx insn)
2639 {
2640 rtx pat;
2641
2642 if (!reload_completed || !flag_pic)
2643 return false;
2644
2645 if (!NONJUMP_INSN_P (insn))
2646 return false;
2647 if (asm_noperands (insn) >= 0)
2648 return false;
2649
2650 pat = PATTERN (insn);
2651 if (GET_CODE (pat) != SET)
2652 return false;
2653 pat = SET_SRC (pat);
2654
2655 if (unspec_caller_rtx_p (pat))
2656 return true;
2657
2658 return false;
2659 }
2660 \f
2661 /* Actual number of instructions used to make a shift by N. */
2662 static const char ashiftrt_insns[] =
2663 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2664
2665 /* Left shift and logical right shift are the same. */
2666 static const char shift_insns[] =
2667 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2668
2669 /* Individual shift amounts needed to get the above length sequences.
2670 One bit right shifts clobber the T bit, so when possible, put one bit
2671 shifts in the middle of the sequence, so the ends are eligible for
2672 branch delay slots. */
2673 static const short shift_amounts[32][5] = {
2674 {0}, {1}, {2}, {2, 1},
2675 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2676 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2677 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2678 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2679 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2680 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2681 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2682
2683 /* Likewise, but for shift amounts < 16, up to three highmost bits
2684 might be clobbered. This is typically used when combined with some
2685 kind of sign or zero extension. */
2686
2687 static const char ext_shift_insns[] =
2688 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2689
2690 static const short ext_shift_amounts[32][4] = {
2691 {0}, {1}, {2}, {2, 1},
2692 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2693 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2694 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2695 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2696 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2697 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2698 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2699
2700 /* Assuming we have a value that has been sign-extended by at least one bit,
2701 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2702 to shift it by N without data loss, and quicker than by other means? */
2703 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2704
2705 /* This is used in length attributes in sh.md to help compute the length
2706 of arbitrary constant shift instructions. */
2707
2708 int
2709 shift_insns_rtx (rtx insn)
2710 {
2711 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2712 int shift_count = INTVAL (XEXP (set_src, 1)) & 31;
2713 enum rtx_code shift_code = GET_CODE (set_src);
2714
2715 switch (shift_code)
2716 {
2717 case ASHIFTRT:
2718 return ashiftrt_insns[shift_count];
2719 case LSHIFTRT:
2720 case ASHIFT:
2721 return shift_insns[shift_count];
2722 default:
2723 gcc_unreachable ();
2724 }
2725 }
2726
2727 /* Return the cost of a shift. */
2728
2729 static inline int
2730 shiftcosts (rtx x)
2731 {
2732 int value;
2733
2734 if (TARGET_SHMEDIA)
2735 return 1;
2736
2737 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2738 {
2739 if (GET_MODE (x) == DImode
2740 && CONST_INT_P (XEXP (x, 1))
2741 && INTVAL (XEXP (x, 1)) == 1)
2742 return 2;
2743
2744 /* Everything else is invalid, because there is no pattern for it. */
2745 return MAX_COST;
2746 }
2747 /* If shift by a non constant, then this will be expensive. */
2748 if (!CONST_INT_P (XEXP (x, 1)))
2749 return SH_DYNAMIC_SHIFT_COST;
2750
2751 /* Otherwise, return the true cost in instructions. Cope with out of range
2752 shift counts more or less arbitrarily. */
2753 value = INTVAL (XEXP (x, 1)) & 31;
2754
2755 if (GET_CODE (x) == ASHIFTRT)
2756 {
2757 int cost = ashiftrt_insns[value];
2758 /* If SH3, then we put the constant in a reg and use shad. */
2759 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2760 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2761 return cost;
2762 }
2763 else
2764 return shift_insns[value];
2765 }
2766
2767 /* Return the cost of an AND operation. */
2768
2769 static inline int
2770 andcosts (rtx x)
2771 {
2772 int i;
2773
2774 /* Anding with a register is a single cycle and instruction. */
2775 if (!CONST_INT_P (XEXP (x, 1)))
2776 return 1;
2777
2778 i = INTVAL (XEXP (x, 1));
2779
2780 if (TARGET_SHMEDIA)
2781 {
2782 if (satisfies_constraint_I10 (XEXP (x, 1))
2783 || satisfies_constraint_J16 (XEXP (x, 1)))
2784 return 1;
2785 else
2786 return 1 + rtx_cost (XEXP (x, 1), AND, !optimize_size);
2787 }
2788
2789 /* These constants are single cycle extu.[bw] instructions. */
2790 if (i == 0xff || i == 0xffff)
2791 return 1;
2792 /* Constants that can be used in an and immediate instruction in a single
2793 cycle, but this requires r0, so make it a little more expensive. */
2794 if (CONST_OK_FOR_K08 (i))
2795 return 2;
2796 /* Constants that can be loaded with a mov immediate and an and.
2797 This case is probably unnecessary. */
2798 if (CONST_OK_FOR_I08 (i))
2799 return 2;
2800 /* Any other constants requires a 2 cycle pc-relative load plus an and.
2801 This case is probably unnecessary. */
2802 return 3;
2803 }
2804
2805 /* Return the cost of an addition or a subtraction. */
2806
2807 static inline int
2808 addsubcosts (rtx x)
2809 {
2810 /* Adding a register is a single cycle insn. */
2811 if (REG_P (XEXP (x, 1))
2812 || GET_CODE (XEXP (x, 1)) == SUBREG)
2813 return 1;
2814
2815 /* Likewise for small constants. */
2816 if (CONST_INT_P (XEXP (x, 1))
2817 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2818 return 1;
2819
2820 if (TARGET_SHMEDIA)
2821 switch (GET_CODE (XEXP (x, 1)))
2822 {
2823 case CONST:
2824 case LABEL_REF:
2825 case SYMBOL_REF:
2826 return TARGET_SHMEDIA64 ? 5 : 3;
2827
2828 case CONST_INT:
2829 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2830 return 2;
2831 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2832 return 3;
2833 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2834 return 4;
2835
2836 /* Fall through. */
2837 default:
2838 return 5;
2839 }
2840
2841 /* Any other constant requires a 2 cycle pc-relative load plus an
2842 addition. */
2843 return 3;
2844 }
2845
2846 /* Return the cost of a multiply. */
2847 static inline int
2848 multcosts (rtx x ATTRIBUTE_UNUSED)
2849 {
2850 if (sh_multcost >= 0)
2851 return sh_multcost;
2852 if (TARGET_SHMEDIA)
2853 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2854 accept constants. Ideally, we would use a cost of one or two and
2855 add the cost of the operand, but disregard the latter when inside loops
2856 and loop invariant code motion is still to follow.
2857 Using a multiply first and splitting it later if it's a loss
2858 doesn't work because of different sign / zero extension semantics
2859 of multiplies vs. shifts. */
2860 return TARGET_SMALLCODE ? 2 : 3;
2861
2862 if (TARGET_SH2)
2863 {
2864 /* We have a mul insn, so we can never take more than the mul and the
2865 read of the mac reg, but count more because of the latency and extra
2866 reg usage. */
2867 if (TARGET_SMALLCODE)
2868 return 2;
2869 return 3;
2870 }
2871
2872 /* If we're aiming at small code, then just count the number of
2873 insns in a multiply call sequence. */
2874 if (TARGET_SMALLCODE)
2875 return 5;
2876
2877 /* Otherwise count all the insns in the routine we'd be calling too. */
2878 return 20;
2879 }
2880
2881 /* Compute a (partial) cost for rtx X. Return true if the complete
2882 cost has been computed, and false if subexpressions should be
2883 scanned. In either case, *TOTAL contains the cost result. */
2884
2885 static bool
2886 sh_rtx_costs (rtx x, int code, int outer_code, int *total,
2887 bool speed ATTRIBUTE_UNUSED)
2888 {
2889 switch (code)
2890 {
2891 case CONST_INT:
2892 if (TARGET_SHMEDIA)
2893 {
2894 if (INTVAL (x) == 0)
2895 *total = 0;
2896 else if (outer_code == AND && and_operand ((x), DImode))
2897 *total = 0;
2898 else if ((outer_code == IOR || outer_code == XOR
2899 || outer_code == PLUS)
2900 && CONST_OK_FOR_I10 (INTVAL (x)))
2901 *total = 0;
2902 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2903 *total = COSTS_N_INSNS (outer_code != SET);
2904 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2905 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2906 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2907 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2908 else
2909 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2910 return true;
2911 }
2912 if (CONST_OK_FOR_I08 (INTVAL (x)))
2913 *total = 0;
2914 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2915 && CONST_OK_FOR_K08 (INTVAL (x)))
2916 *total = 1;
2917 /* prepare_cmp_insn will force costly constants int registers before
2918 the cbranch[sd]i4 patterns can see them, so preserve potentially
2919 interesting ones not covered by I08 above. */
2920 else if (outer_code == COMPARE
2921 && ((unsigned HOST_WIDE_INT) INTVAL (x)
2922 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
2923 || INTVAL (x) == 0x7fffffff
2924 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
2925 *total = 1;
2926 else
2927 *total = 8;
2928 return true;
2929
2930 case CONST:
2931 case LABEL_REF:
2932 case SYMBOL_REF:
2933 if (TARGET_SHMEDIA64)
2934 *total = COSTS_N_INSNS (4);
2935 else if (TARGET_SHMEDIA32)
2936 *total = COSTS_N_INSNS (2);
2937 else
2938 *total = 5;
2939 return true;
2940
2941 case CONST_DOUBLE:
2942 if (TARGET_SHMEDIA)
2943 *total = COSTS_N_INSNS (4);
2944 /* prepare_cmp_insn will force costly constants int registers before
2945 the cbranchdi4 pattern can see them, so preserve potentially
2946 interesting ones. */
2947 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
2948 *total = 1;
2949 else
2950 *total = 10;
2951 return true;
2952 case CONST_VECTOR:
2953 if (x == CONST0_RTX (GET_MODE (x)))
2954 *total = 0;
2955 else if (sh_1el_vec (x, VOIDmode))
2956 *total = outer_code != SET;
2957 if (sh_rep_vec (x, VOIDmode))
2958 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2959 + (outer_code != SET));
2960 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2961 return true;
2962
2963 case PLUS:
2964 case MINUS:
2965 *total = COSTS_N_INSNS (addsubcosts (x));
2966 return true;
2967
2968 case AND:
2969 *total = COSTS_N_INSNS (andcosts (x));
2970 return true;
2971
2972 case MULT:
2973 *total = COSTS_N_INSNS (multcosts (x));
2974 return true;
2975
2976 case ASHIFT:
2977 case ASHIFTRT:
2978 case LSHIFTRT:
2979 *total = COSTS_N_INSNS (shiftcosts (x));
2980 return true;
2981
2982 case DIV:
2983 case UDIV:
2984 case MOD:
2985 case UMOD:
2986 *total = COSTS_N_INSNS (20);
2987 return true;
2988
2989 case PARALLEL:
2990 if (sh_1el_vec (x, VOIDmode))
2991 *total = outer_code != SET;
2992 if (sh_rep_vec (x, VOIDmode))
2993 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2994 + (outer_code != SET));
2995 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2996 return true;
2997
2998 case FLOAT:
2999 case FIX:
3000 *total = 100;
3001 return true;
3002
3003 default:
3004 return false;
3005 }
3006 }
3007
3008 /* Compute the cost of an address. For the SH, all valid addresses are
3009 the same cost. Use a slightly higher cost for reg + reg addressing,
3010 since it increases pressure on r0. */
3011
3012 static int
3013 sh_address_cost (rtx X,
3014 bool speed ATTRIBUTE_UNUSED)
3015 {
3016 return (GET_CODE (X) == PLUS
3017 && ! CONSTANT_P (XEXP (X, 1))
3018 && ! TARGET_SHMEDIA ? 1 : 0);
3019 }
3020
3021 /* Code to expand a shift. */
3022
3023 void
3024 gen_ashift (int type, int n, rtx reg)
3025 {
3026 /* Negative values here come from the shift_amounts array. */
3027 if (n < 0)
3028 {
3029 if (type == ASHIFT)
3030 type = LSHIFTRT;
3031 else
3032 type = ASHIFT;
3033 n = -n;
3034 }
3035
3036 switch (type)
3037 {
3038 case ASHIFTRT:
3039 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
3040 break;
3041 case LSHIFTRT:
3042 if (n == 1)
3043 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
3044 else
3045 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
3046 break;
3047 case ASHIFT:
3048 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
3049 break;
3050 }
3051 }
3052
3053 /* Same for HImode */
3054
3055 void
3056 gen_ashift_hi (int type, int n, rtx reg)
3057 {
3058 /* Negative values here come from the shift_amounts array. */
3059 if (n < 0)
3060 {
3061 if (type == ASHIFT)
3062 type = LSHIFTRT;
3063 else
3064 type = ASHIFT;
3065 n = -n;
3066 }
3067
3068 switch (type)
3069 {
3070 case ASHIFTRT:
3071 case LSHIFTRT:
3072 /* We don't have HImode right shift operations because using the
3073 ordinary 32 bit shift instructions for that doesn't generate proper
3074 zero/sign extension.
3075 gen_ashift_hi is only called in contexts where we know that the
3076 sign extension works out correctly. */
3077 {
3078 int offset = 0;
3079 if (GET_CODE (reg) == SUBREG)
3080 {
3081 offset = SUBREG_BYTE (reg);
3082 reg = SUBREG_REG (reg);
3083 }
3084 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3085 break;
3086 }
3087 case ASHIFT:
3088 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3089 break;
3090 }
3091 }
3092
3093 /* Output RTL to split a constant shift into its component SH constant
3094 shift instructions. */
3095
3096 void
3097 gen_shifty_op (int code, rtx *operands)
3098 {
3099 int value = INTVAL (operands[2]);
3100 int max, i;
3101
3102 /* Truncate the shift count in case it is out of bounds. */
3103 value = value & 31;
3104
3105 if (value == 31)
3106 {
3107 if (code == LSHIFTRT)
3108 {
3109 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3110 emit_insn (gen_movt (operands[0]));
3111 return;
3112 }
3113 else if (code == ASHIFT)
3114 {
3115 /* There is a two instruction sequence for 31 bit left shifts,
3116 but it requires r0. */
3117 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3118 {
3119 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3120 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3121 return;
3122 }
3123 }
3124 }
3125 else if (value == 0)
3126 {
3127 /* This can happen even when optimizing, if there were subregs before
3128 reload. Don't output a nop here, as this is never optimized away;
3129 use a no-op move instead. */
3130 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
3131 return;
3132 }
3133
3134 max = shift_insns[value];
3135 for (i = 0; i < max; i++)
3136 gen_ashift (code, shift_amounts[value][i], operands[0]);
3137 }
3138
3139 /* Same as above, but optimized for values where the topmost bits don't
3140 matter. */
3141
3142 void
3143 gen_shifty_hi_op (int code, rtx *operands)
3144 {
3145 int value = INTVAL (operands[2]);
3146 int max, i;
3147 void (*gen_fun) (int, int, rtx);
3148
3149 /* This operation is used by and_shl for SImode values with a few
3150 high bits known to be cleared. */
3151 value &= 31;
3152 if (value == 0)
3153 {
3154 emit_insn (gen_nop ());
3155 return;
3156 }
3157
3158 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3159 if (code == ASHIFT)
3160 {
3161 max = ext_shift_insns[value];
3162 for (i = 0; i < max; i++)
3163 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3164 }
3165 else
3166 /* When shifting right, emit the shifts in reverse order, so that
3167 solitary negative values come first. */
3168 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
3169 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3170 }
3171
3172 /* Output RTL for an arithmetic right shift. */
3173
3174 /* ??? Rewrite to use super-optimizer sequences. */
3175
3176 int
3177 expand_ashiftrt (rtx *operands)
3178 {
3179 rtx wrk;
3180 char func[18];
3181 int value;
3182
3183 if (TARGET_SH3)
3184 {
3185 if (!CONST_INT_P (operands[2]))
3186 {
3187 rtx count = copy_to_mode_reg (SImode, operands[2]);
3188 emit_insn (gen_negsi2 (count, count));
3189 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3190 return 1;
3191 }
3192 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3193 > 1 + SH_DYNAMIC_SHIFT_COST)
3194 {
3195 rtx count
3196 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3197 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3198 return 1;
3199 }
3200 }
3201 if (!CONST_INT_P (operands[2]))
3202 return 0;
3203
3204 value = INTVAL (operands[2]) & 31;
3205
3206 if (value == 31)
3207 {
3208 /* If we are called from abs expansion, arrange things so that we
3209 we can use a single MT instruction that doesn't clobber the source,
3210 if LICM can hoist out the load of the constant zero. */
3211 if (currently_expanding_to_rtl)
3212 {
3213 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3214 operands[1]));
3215 emit_insn (gen_mov_neg_si_t (operands[0]));
3216 return 1;
3217 }
3218 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3219 return 1;
3220 }
3221 else if (value >= 16 && value <= 19)
3222 {
3223 wrk = gen_reg_rtx (SImode);
3224 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3225 value -= 16;
3226 while (value--)
3227 gen_ashift (ASHIFTRT, 1, wrk);
3228 emit_move_insn (operands[0], wrk);
3229 return 1;
3230 }
3231 /* Expand a short sequence inline, longer call a magic routine. */
3232 else if (value <= 5)
3233 {
3234 wrk = gen_reg_rtx (SImode);
3235 emit_move_insn (wrk, operands[1]);
3236 while (value--)
3237 gen_ashift (ASHIFTRT, 1, wrk);
3238 emit_move_insn (operands[0], wrk);
3239 return 1;
3240 }
3241
3242 wrk = gen_reg_rtx (Pmode);
3243
3244 /* Load the value into an arg reg and call a helper. */
3245 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3246 sprintf (func, "__ashiftrt_r4_%d", value);
3247 function_symbol (wrk, func, SFUNC_STATIC);
3248 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
3249 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3250 return 1;
3251 }
3252
3253 int
3254 sh_dynamicalize_shift_p (rtx count)
3255 {
3256 return shift_insns[INTVAL (count) & 31] > 1 + SH_DYNAMIC_SHIFT_COST;
3257 }
3258
3259 /* Try to find a good way to implement the combiner pattern
3260 [(set (match_operand:SI 0 "register_operand" "r")
3261 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3262 (match_operand:SI 2 "const_int_operand" "n"))
3263 (match_operand:SI 3 "const_int_operand" "n"))) .
3264 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3265 return 0 for simple right / left or left/right shift combination.
3266 return 1 for a combination of shifts with zero_extend.
3267 return 2 for a combination of shifts with an AND that needs r0.
3268 return 3 for a combination of shifts with an AND that needs an extra
3269 scratch register, when the three highmost bits of the AND mask are clear.
3270 return 4 for a combination of shifts with an AND that needs an extra
3271 scratch register, when any of the three highmost bits of the AND mask
3272 is set.
3273 If ATTRP is set, store an initial right shift width in ATTRP[0],
3274 and the instruction length in ATTRP[1] . These values are not valid
3275 when returning 0.
3276 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3277 shift_amounts for the last shift value that is to be used before the
3278 sign extend. */
3279 int
3280 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3281 {
3282 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3283 int left = INTVAL (left_rtx), right;
3284 int best = 0;
3285 int cost, best_cost = 10000;
3286 int best_right = 0, best_len = 0;
3287 int i;
3288 int can_ext;
3289
3290 if (left < 0 || left > 31)
3291 return 0;
3292 if (CONST_INT_P (mask_rtx))
3293 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3294 else
3295 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3296 /* Can this be expressed as a right shift / left shift pair? */
3297 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3298 right = exact_log2 (lsb);
3299 mask2 = ~(mask + lsb - 1);
3300 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3301 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3302 if (! mask2)
3303 best_cost = shift_insns[right] + shift_insns[right + left];
3304 /* mask has no trailing zeroes <==> ! right */
3305 else if (! right && mask2 == ~(lsb2 - 1))
3306 {
3307 int late_right = exact_log2 (lsb2);
3308 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
3309 }
3310 /* Try to use zero extend. */
3311 if (mask2 == ~(lsb2 - 1))
3312 {
3313 int width, first;
3314
3315 for (width = 8; width <= 16; width += 8)
3316 {
3317 /* Can we zero-extend right away? */
3318 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3319 {
3320 cost
3321 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
3322 if (cost < best_cost)
3323 {
3324 best = 1;
3325 best_cost = cost;
3326 best_right = right;
3327 best_len = cost;
3328 if (attrp)
3329 attrp[2] = -1;
3330 }
3331 continue;
3332 }
3333 /* ??? Could try to put zero extend into initial right shift,
3334 or even shift a bit left before the right shift. */
3335 /* Determine value of first part of left shift, to get to the
3336 zero extend cut-off point. */
3337 first = width - exact_log2 (lsb2) + right;
3338 if (first >= 0 && right + left - first >= 0)
3339 {
3340 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
3341 + ext_shift_insns[right + left - first];
3342 if (cost < best_cost)
3343 {
3344 best = 1;
3345 best_cost = cost;
3346 best_right = right;
3347 best_len = cost;
3348 if (attrp)
3349 attrp[2] = first;
3350 }
3351 }
3352 }
3353 }
3354 /* Try to use r0 AND pattern */
3355 for (i = 0; i <= 2; i++)
3356 {
3357 if (i > right)
3358 break;
3359 if (! CONST_OK_FOR_K08 (mask >> i))
3360 continue;
3361 cost = (i != 0) + 2 + ext_shift_insns[left + i];
3362 if (cost < best_cost)
3363 {
3364 best = 2;
3365 best_cost = cost;
3366 best_right = i;
3367 best_len = cost - 1;
3368 }
3369 }
3370 /* Try to use a scratch register to hold the AND operand. */
3371 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
3372 for (i = 0; i <= 2; i++)
3373 {
3374 if (i > right)
3375 break;
3376 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
3377 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
3378 if (cost < best_cost)
3379 {
3380 best = 4 - can_ext;
3381 best_cost = cost;
3382 best_right = i;
3383 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
3384 }
3385 }
3386
3387 if (attrp)
3388 {
3389 attrp[0] = best_right;
3390 attrp[1] = best_len;
3391 }
3392 return best;
3393 }
3394
3395 /* This is used in length attributes of the unnamed instructions
3396 corresponding to shl_and_kind return values of 1 and 2. */
3397 int
3398 shl_and_length (rtx insn)
3399 {
3400 rtx set_src, left_rtx, mask_rtx;
3401 int attributes[3];
3402
3403 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3404 left_rtx = XEXP (XEXP (set_src, 0), 1);
3405 mask_rtx = XEXP (set_src, 1);
3406 shl_and_kind (left_rtx, mask_rtx, attributes);
3407 return attributes[1];
3408 }
3409
3410 /* This is used in length attribute of the and_shl_scratch instruction. */
3411
3412 int
3413 shl_and_scr_length (rtx insn)
3414 {
3415 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3416 int len = shift_insns[INTVAL (XEXP (set_src, 1)) & 31];
3417 rtx op = XEXP (set_src, 0);
3418 len += shift_insns[INTVAL (XEXP (op, 1)) & 31] + 1;
3419 op = XEXP (XEXP (op, 0), 0);
3420 return len + shift_insns[INTVAL (XEXP (op, 1)) & 31];
3421 }
3422
3423 /* Generate rtl for instructions for which shl_and_kind advised a particular
3424 method of generating them, i.e. returned zero. */
3425
3426 int
3427 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
3428 {
3429 int attributes[3];
3430 unsigned HOST_WIDE_INT mask;
3431 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
3432 int right, total_shift;
3433 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
3434
3435 right = attributes[0];
3436 total_shift = INTVAL (left_rtx) + right;
3437 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
3438 switch (kind)
3439 {
3440 default:
3441 return -1;
3442 case 1:
3443 {
3444 int first = attributes[2];
3445 rtx operands[3];
3446
3447 if (first < 0)
3448 {
3449 emit_insn ((mask << right) <= 0xff
3450 ? gen_zero_extendqisi2 (dest,
3451 gen_lowpart (QImode, source))
3452 : gen_zero_extendhisi2 (dest,
3453 gen_lowpart (HImode, source)));
3454 source = dest;
3455 }
3456 if (source != dest)
3457 emit_insn (gen_movsi (dest, source));
3458 operands[0] = dest;
3459 if (right)
3460 {
3461 operands[2] = GEN_INT (right);
3462 gen_shifty_hi_op (LSHIFTRT, operands);
3463 }
3464 if (first > 0)
3465 {
3466 operands[2] = GEN_INT (first);
3467 gen_shifty_hi_op (ASHIFT, operands);
3468 total_shift -= first;
3469 mask <<= first;
3470 }
3471 if (first >= 0)
3472 emit_insn (mask <= 0xff
3473 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
3474 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3475 if (total_shift > 0)
3476 {
3477 operands[2] = GEN_INT (total_shift);
3478 gen_shifty_hi_op (ASHIFT, operands);
3479 }
3480 break;
3481 }
3482 case 4:
3483 shift_gen_fun = gen_shifty_op;
3484 case 3:
3485 /* If the topmost bit that matters is set, set the topmost bits
3486 that don't matter. This way, we might be able to get a shorter
3487 signed constant. */
3488 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
3489 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
3490 case 2:
3491 /* Don't expand fine-grained when combining, because that will
3492 make the pattern fail. */
3493 if (currently_expanding_to_rtl
3494 || reload_in_progress || reload_completed)
3495 {
3496 rtx operands[3];
3497
3498 /* Cases 3 and 4 should be handled by this split
3499 only while combining */
3500 gcc_assert (kind <= 2);
3501 if (right)
3502 {
3503 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
3504 source = dest;
3505 }
3506 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3507 if (total_shift)
3508 {
3509 operands[0] = dest;
3510 operands[1] = dest;
3511 operands[2] = GEN_INT (total_shift);
3512 shift_gen_fun (ASHIFT, operands);
3513 }
3514 break;
3515 }
3516 else
3517 {
3518 int neg = 0;
3519 if (kind != 4 && total_shift < 16)
3520 {
3521 neg = -ext_shift_amounts[total_shift][1];
3522 if (neg > 0)
3523 neg -= ext_shift_amounts[total_shift][2];
3524 else
3525 neg = 0;
3526 }
3527 emit_insn (gen_and_shl_scratch (dest, source,
3528 GEN_INT (right),
3529 GEN_INT (mask),
3530 GEN_INT (total_shift + neg),
3531 GEN_INT (neg)));
3532 emit_insn (gen_movsi (dest, dest));
3533 break;
3534 }
3535 }
3536 return 0;
3537 }
3538
3539 /* Try to find a good way to implement the combiner pattern
3540 [(set (match_operand:SI 0 "register_operand" "=r")
3541 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3542 (match_operand:SI 2 "const_int_operand" "n")
3543 (match_operand:SI 3 "const_int_operand" "n")
3544 (const_int 0)))
3545 (clobber (reg:SI T_REG))]
3546 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3547 return 0 for simple left / right shift combination.
3548 return 1 for left shift / 8 bit sign extend / left shift.
3549 return 2 for left shift / 16 bit sign extend / left shift.
3550 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3551 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3552 return 5 for left shift / 16 bit sign extend / right shift
3553 return 6 for < 8 bit sign extend / left shift.
3554 return 7 for < 8 bit sign extend / left shift / single right shift.
3555 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3556
3557 int
3558 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3559 {
3560 int left, size, insize, ext;
3561 int cost = 0, best_cost;
3562 int kind;
3563
3564 left = INTVAL (left_rtx);
3565 size = INTVAL (size_rtx);
3566 insize = size - left;
3567 gcc_assert (insize > 0);
3568 /* Default to left / right shift. */
3569 kind = 0;
3570 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3571 if (size <= 16)
3572 {
3573 /* 16 bit shift / sign extend / 16 bit shift */
3574 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3575 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3576 below, by alternative 3 or something even better. */
3577 if (cost < best_cost)
3578 {
3579 kind = 5;
3580 best_cost = cost;
3581 }
3582 }
3583 /* Try a plain sign extend between two shifts. */
3584 for (ext = 16; ext >= insize; ext -= 8)
3585 {
3586 if (ext <= size)
3587 {
3588 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3589 if (cost < best_cost)
3590 {
3591 kind = ext / (unsigned) 8;
3592 best_cost = cost;
3593 }
3594 }
3595 /* Check if we can do a sloppy shift with a final signed shift
3596 restoring the sign. */
3597 if (EXT_SHIFT_SIGNED (size - ext))
3598 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3599 /* If not, maybe it's still cheaper to do the second shift sloppy,
3600 and do a final sign extend? */
3601 else if (size <= 16)
3602 cost = ext_shift_insns[ext - insize] + 1
3603 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3604 else
3605 continue;
3606 if (cost < best_cost)
3607 {
3608 kind = ext / (unsigned) 8 + 2;
3609 best_cost = cost;
3610 }
3611 }
3612 /* Check if we can sign extend in r0 */
3613 if (insize < 8)
3614 {
3615 cost = 3 + shift_insns[left];
3616 if (cost < best_cost)
3617 {
3618 kind = 6;
3619 best_cost = cost;
3620 }
3621 /* Try the same with a final signed shift. */
3622 if (left < 31)
3623 {
3624 cost = 3 + ext_shift_insns[left + 1] + 1;
3625 if (cost < best_cost)
3626 {
3627 kind = 7;
3628 best_cost = cost;
3629 }
3630 }
3631 }
3632 if (TARGET_SH3)
3633 {
3634 /* Try to use a dynamic shift. */
3635 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3636 if (cost < best_cost)
3637 {
3638 kind = 0;
3639 best_cost = cost;
3640 }
3641 }
3642 if (costp)
3643 *costp = cost;
3644 return kind;
3645 }
3646
3647 /* Function to be used in the length attribute of the instructions
3648 implementing this pattern. */
3649
3650 int
3651 shl_sext_length (rtx insn)
3652 {
3653 rtx set_src, left_rtx, size_rtx;
3654 int cost;
3655
3656 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3657 left_rtx = XEXP (XEXP (set_src, 0), 1);
3658 size_rtx = XEXP (set_src, 1);
3659 shl_sext_kind (left_rtx, size_rtx, &cost);
3660 return cost;
3661 }
3662
3663 /* Generate rtl for this pattern */
3664
3665 int
3666 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3667 {
3668 int kind;
3669 int left, size, insize, cost;
3670 rtx operands[3];
3671
3672 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3673 left = INTVAL (left_rtx);
3674 size = INTVAL (size_rtx);
3675 insize = size - left;
3676 switch (kind)
3677 {
3678 case 1:
3679 case 2:
3680 case 3:
3681 case 4:
3682 {
3683 int ext = kind & 1 ? 8 : 16;
3684 int shift2 = size - ext;
3685
3686 /* Don't expand fine-grained when combining, because that will
3687 make the pattern fail. */
3688 if (! currently_expanding_to_rtl
3689 && ! reload_in_progress && ! reload_completed)
3690 {
3691 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3692 emit_insn (gen_movsi (dest, source));
3693 break;
3694 }
3695 if (dest != source)
3696 emit_insn (gen_movsi (dest, source));
3697 operands[0] = dest;
3698 if (ext - insize)
3699 {
3700 operands[2] = GEN_INT (ext - insize);
3701 gen_shifty_hi_op (ASHIFT, operands);
3702 }
3703 emit_insn (kind & 1
3704 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3705 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3706 if (kind <= 2)
3707 {
3708 if (shift2)
3709 {
3710 operands[2] = GEN_INT (shift2);
3711 gen_shifty_op (ASHIFT, operands);
3712 }
3713 }
3714 else
3715 {
3716 if (shift2 > 0)
3717 {
3718 if (EXT_SHIFT_SIGNED (shift2))
3719 {
3720 operands[2] = GEN_INT (shift2 + 1);
3721 gen_shifty_op (ASHIFT, operands);
3722 operands[2] = const1_rtx;
3723 gen_shifty_op (ASHIFTRT, operands);
3724 break;
3725 }
3726 operands[2] = GEN_INT (shift2);
3727 gen_shifty_hi_op (ASHIFT, operands);
3728 }
3729 else if (shift2)
3730 {
3731 operands[2] = GEN_INT (-shift2);
3732 gen_shifty_hi_op (LSHIFTRT, operands);
3733 }
3734 emit_insn (size <= 8
3735 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3736 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3737 }
3738 break;
3739 }
3740 case 5:
3741 {
3742 int i = 16 - size;
3743 if (! currently_expanding_to_rtl
3744 && ! reload_in_progress && ! reload_completed)
3745 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3746 else
3747 {
3748 operands[0] = dest;
3749 operands[2] = GEN_INT (16 - insize);
3750 gen_shifty_hi_op (ASHIFT, operands);
3751 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3752 }
3753 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3754 while (--i >= 0)
3755 gen_ashift (ASHIFTRT, 1, dest);
3756 break;
3757 }
3758 case 6:
3759 case 7:
3760 /* Don't expand fine-grained when combining, because that will
3761 make the pattern fail. */
3762 if (! currently_expanding_to_rtl
3763 && ! reload_in_progress && ! reload_completed)
3764 {
3765 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3766 emit_insn (gen_movsi (dest, source));
3767 break;
3768 }
3769 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
3770 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
3771 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
3772 operands[0] = dest;
3773 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
3774 gen_shifty_op (ASHIFT, operands);
3775 if (kind == 7)
3776 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
3777 break;
3778 default:
3779 return -1;
3780 }
3781 return 0;
3782 }
3783
3784 /* Prefix a symbol_ref name with "datalabel". */
3785
3786 rtx
3787 gen_datalabel_ref (rtx sym)
3788 {
3789 const char *str;
3790
3791 if (GET_CODE (sym) == LABEL_REF)
3792 return gen_rtx_CONST (GET_MODE (sym),
3793 gen_rtx_UNSPEC (GET_MODE (sym),
3794 gen_rtvec (1, sym),
3795 UNSPEC_DATALABEL));
3796
3797 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
3798
3799 str = XSTR (sym, 0);
3800 /* Share all SYMBOL_REF strings with the same value - that is important
3801 for cse. */
3802 str = IDENTIFIER_POINTER (get_identifier (str));
3803 XSTR (sym, 0) = str;
3804
3805 return sym;
3806 }
3807
3808 \f
3809 static alloc_pool label_ref_list_pool;
3810
3811 typedef struct label_ref_list_d
3812 {
3813 rtx label;
3814 struct label_ref_list_d *next;
3815 } *label_ref_list_t;
3816
3817 /* The SH cannot load a large constant into a register, constants have to
3818 come from a pc relative load. The reference of a pc relative load
3819 instruction must be less than 1k in front of the instruction. This
3820 means that we often have to dump a constant inside a function, and
3821 generate code to branch around it.
3822
3823 It is important to minimize this, since the branches will slow things
3824 down and make things bigger.
3825
3826 Worst case code looks like:
3827
3828 mov.l L1,rn
3829 bra L2
3830 nop
3831 align
3832 L1: .long value
3833 L2:
3834 ..
3835
3836 mov.l L3,rn
3837 bra L4
3838 nop
3839 align
3840 L3: .long value
3841 L4:
3842 ..
3843
3844 We fix this by performing a scan before scheduling, which notices which
3845 instructions need to have their operands fetched from the constant table
3846 and builds the table.
3847
3848 The algorithm is:
3849
3850 scan, find an instruction which needs a pcrel move. Look forward, find the
3851 last barrier which is within MAX_COUNT bytes of the requirement.
3852 If there isn't one, make one. Process all the instructions between
3853 the find and the barrier.
3854
3855 In the above example, we can tell that L3 is within 1k of L1, so
3856 the first move can be shrunk from the 3 insn+constant sequence into
3857 just 1 insn, and the constant moved to L3 to make:
3858
3859 mov.l L1,rn
3860 ..
3861 mov.l L3,rn
3862 bra L4
3863 nop
3864 align
3865 L3:.long value
3866 L4:.long value
3867
3868 Then the second move becomes the target for the shortening process. */
3869
3870 typedef struct
3871 {
3872 rtx value; /* Value in table. */
3873 rtx label; /* Label of value. */
3874 label_ref_list_t wend; /* End of window. */
3875 enum machine_mode mode; /* Mode of value. */
3876
3877 /* True if this constant is accessed as part of a post-increment
3878 sequence. Note that HImode constants are never accessed in this way. */
3879 bool part_of_sequence_p;
3880 } pool_node;
3881
3882 /* The maximum number of constants that can fit into one pool, since
3883 constants in the range 0..510 are at least 2 bytes long, and in the
3884 range from there to 1018 at least 4 bytes. */
3885
3886 #define MAX_POOL_SIZE 372
3887 static pool_node pool_vector[MAX_POOL_SIZE];
3888 static int pool_size;
3889 static rtx pool_window_label;
3890 static int pool_window_last;
3891
3892 static int max_labelno_before_reorg;
3893
3894 /* ??? If we need a constant in HImode which is the truncated value of a
3895 constant we need in SImode, we could combine the two entries thus saving
3896 two bytes. Is this common enough to be worth the effort of implementing
3897 it? */
3898
3899 /* ??? This stuff should be done at the same time that we shorten branches.
3900 As it is now, we must assume that all branches are the maximum size, and
3901 this causes us to almost always output constant pools sooner than
3902 necessary. */
3903
3904 /* Add a constant to the pool and return its label. */
3905
3906 static rtx
3907 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3908 {
3909 int i;
3910 rtx lab, new_rtx;
3911 label_ref_list_t ref, newref;
3912
3913 /* First see if we've already got it. */
3914 for (i = 0; i < pool_size; i++)
3915 {
3916 if (x->code == pool_vector[i].value->code
3917 && mode == pool_vector[i].mode)
3918 {
3919 if (x->code == CODE_LABEL)
3920 {
3921 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3922 continue;
3923 }
3924 if (rtx_equal_p (x, pool_vector[i].value))
3925 {
3926 lab = new_rtx = 0;
3927 if (! last_value
3928 || ! i
3929 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3930 {
3931 new_rtx = gen_label_rtx ();
3932 LABEL_REFS (new_rtx) = pool_vector[i].label;
3933 pool_vector[i].label = lab = new_rtx;
3934 }
3935 if (lab && pool_window_label)
3936 {
3937 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3938 newref->label = pool_window_label;
3939 ref = pool_vector[pool_window_last].wend;
3940 newref->next = ref;
3941 pool_vector[pool_window_last].wend = newref;
3942 }
3943 if (new_rtx)
3944 pool_window_label = new_rtx;
3945 pool_window_last = i;
3946 return lab;
3947 }
3948 }
3949 }
3950
3951 /* Need a new one. */
3952 pool_vector[pool_size].value = x;
3953 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
3954 {
3955 lab = 0;
3956 pool_vector[pool_size - 1].part_of_sequence_p = true;
3957 }
3958 else
3959 lab = gen_label_rtx ();
3960 pool_vector[pool_size].mode = mode;
3961 pool_vector[pool_size].label = lab;
3962 pool_vector[pool_size].wend = NULL;
3963 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
3964 if (lab && pool_window_label)
3965 {
3966 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3967 newref->label = pool_window_label;
3968 ref = pool_vector[pool_window_last].wend;
3969 newref->next = ref;
3970 pool_vector[pool_window_last].wend = newref;
3971 }
3972 if (lab)
3973 pool_window_label = lab;
3974 pool_window_last = pool_size;
3975 pool_size++;
3976 return lab;
3977 }
3978
3979 /* Output the literal table. START, if nonzero, is the first instruction
3980 this table is needed for, and also indicates that there is at least one
3981 casesi_worker_2 instruction; We have to emit the operand3 labels from
3982 these insns at a 4-byte aligned position. BARRIER is the barrier
3983 after which we are to place the table. */
3984
3985 static void
3986 dump_table (rtx start, rtx barrier)
3987 {
3988 rtx scan = barrier;
3989 int i;
3990 int need_align = 1;
3991 rtx lab;
3992 label_ref_list_t ref;
3993 int have_df = 0;
3994
3995 /* Do two passes, first time dump out the HI sized constants. */
3996
3997 for (i = 0; i < pool_size; i++)
3998 {
3999 pool_node *p = &pool_vector[i];
4000
4001 if (p->mode == HImode)
4002 {
4003 if (need_align)
4004 {
4005 scan = emit_insn_after (gen_align_2 (), scan);
4006 need_align = 0;
4007 }
4008 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4009 scan = emit_label_after (lab, scan);
4010 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4011 scan);
4012 for (ref = p->wend; ref; ref = ref->next)
4013 {
4014 lab = ref->label;
4015 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4016 }
4017 }
4018 else if (p->mode == DFmode)
4019 have_df = 1;
4020 }
4021
4022 need_align = 1;
4023
4024 if (start)
4025 {
4026 scan = emit_insn_after (gen_align_4 (), scan);
4027 need_align = 0;
4028 for (; start != barrier; start = NEXT_INSN (start))
4029 if (NONJUMP_INSN_P (start)
4030 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4031 {
4032 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4033 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4034
4035 scan = emit_label_after (lab, scan);
4036 }
4037 }
4038 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4039 {
4040 rtx align_insn = NULL_RTX;
4041
4042 scan = emit_label_after (gen_label_rtx (), scan);
4043 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4044 need_align = 0;
4045
4046 for (i = 0; i < pool_size; i++)
4047 {
4048 pool_node *p = &pool_vector[i];
4049
4050 switch (p->mode)
4051 {
4052 case HImode:
4053 break;
4054 case SImode:
4055 case SFmode:
4056 if (align_insn && !p->part_of_sequence_p)
4057 {
4058 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4059 emit_label_before (lab, align_insn);
4060 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4061 align_insn);
4062 for (ref = p->wend; ref; ref = ref->next)
4063 {
4064 lab = ref->label;
4065 emit_insn_before (gen_consttable_window_end (lab),
4066 align_insn);
4067 }
4068 delete_insn (align_insn);
4069 align_insn = NULL_RTX;
4070 continue;
4071 }
4072 else
4073 {
4074 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4075 scan = emit_label_after (lab, scan);
4076 scan = emit_insn_after (gen_consttable_4 (p->value,
4077 const0_rtx), scan);
4078 need_align = ! need_align;
4079 }
4080 break;
4081 case DFmode:
4082 if (need_align)
4083 {
4084 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4085 align_insn = scan;
4086 need_align = 0;
4087 }
4088 case DImode:
4089 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4090 scan = emit_label_after (lab, scan);
4091 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4092 scan);
4093 break;
4094 default:
4095 gcc_unreachable ();
4096 }
4097
4098 if (p->mode != HImode)
4099 {
4100 for (ref = p->wend; ref; ref = ref->next)
4101 {
4102 lab = ref->label;
4103 scan = emit_insn_after (gen_consttable_window_end (lab),
4104 scan);
4105 }
4106 }
4107 }
4108
4109 pool_size = 0;
4110 }
4111
4112 for (i = 0; i < pool_size; i++)
4113 {
4114 pool_node *p = &pool_vector[i];
4115
4116 switch (p->mode)
4117 {
4118 case HImode:
4119 break;
4120 case SImode:
4121 case SFmode:
4122 if (need_align)
4123 {
4124 need_align = 0;
4125 scan = emit_label_after (gen_label_rtx (), scan);
4126 scan = emit_insn_after (gen_align_4 (), scan);
4127 }
4128 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4129 scan = emit_label_after (lab, scan);
4130 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4131 scan);
4132 break;
4133 case DFmode:
4134 case DImode:
4135 if (need_align)
4136 {
4137 need_align = 0;
4138 scan = emit_label_after (gen_label_rtx (), scan);
4139 scan = emit_insn_after (gen_align_4 (), scan);
4140 }
4141 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4142 scan = emit_label_after (lab, scan);
4143 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4144 scan);
4145 break;
4146 default:
4147 gcc_unreachable ();
4148 }
4149
4150 if (p->mode != HImode)
4151 {
4152 for (ref = p->wend; ref; ref = ref->next)
4153 {
4154 lab = ref->label;
4155 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4156 }
4157 }
4158 }
4159
4160 scan = emit_insn_after (gen_consttable_end (), scan);
4161 scan = emit_barrier_after (scan);
4162 pool_size = 0;
4163 pool_window_label = NULL_RTX;
4164 pool_window_last = 0;
4165 }
4166
4167 /* Return nonzero if constant would be an ok source for a
4168 mov.w instead of a mov.l. */
4169
4170 static int
4171 hi_const (rtx src)
4172 {
4173 return (CONST_INT_P (src)
4174 && INTVAL (src) >= -32768
4175 && INTVAL (src) <= 32767);
4176 }
4177
4178 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4179
4180 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4181
4182 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4183 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4184 need to fix it if the input value is CONST_OK_FOR_I08. */
4185
4186 static int
4187 broken_move (rtx insn)
4188 {
4189 if (NONJUMP_INSN_P (insn))
4190 {
4191 rtx pat = PATTERN (insn);
4192 if (GET_CODE (pat) == PARALLEL)
4193 pat = XVECEXP (pat, 0, 0);
4194 if (GET_CODE (pat) == SET
4195 /* We can load any 8-bit value if we don't care what the high
4196 order bits end up as. */
4197 && GET_MODE (SET_DEST (pat)) != QImode
4198 && (CONSTANT_P (SET_SRC (pat))
4199 /* Match mova_const. */
4200 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4201 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4202 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4203 && ! (TARGET_SH2E
4204 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4205 && (fp_zero_operand (SET_SRC (pat))
4206 || fp_one_operand (SET_SRC (pat)))
4207 /* In general we don't know the current setting of fpscr, so disable fldi.
4208 There is an exception if this was a register-register move
4209 before reload - and hence it was ascertained that we have
4210 single precision setting - and in a post-reload optimization
4211 we changed this to do a constant load. In that case
4212 we don't have an r0 clobber, hence we must use fldi. */
4213 && (TARGET_FMOVD
4214 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4215 == SCRATCH))
4216 && REG_P (SET_DEST (pat))
4217 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4218 && ! (TARGET_SH2A
4219 && GET_MODE (SET_DEST (pat)) == SImode
4220 && (satisfies_constraint_I20 (SET_SRC (pat))
4221 || satisfies_constraint_I28 (SET_SRC (pat))))
4222 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4223 return 1;
4224 }
4225
4226 return 0;
4227 }
4228
4229 static int
4230 mova_p (rtx insn)
4231 {
4232 return (NONJUMP_INSN_P (insn)
4233 && GET_CODE (PATTERN (insn)) == SET
4234 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4235 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4236 /* Don't match mova_const. */
4237 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4238 }
4239
4240 /* Fix up a mova from a switch that went out of range. */
4241 static void
4242 fixup_mova (rtx mova)
4243 {
4244 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4245 if (! flag_pic)
4246 {
4247 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4248 INSN_CODE (mova) = -1;
4249 }
4250 else
4251 {
4252 rtx worker = mova;
4253 rtx lab = gen_label_rtx ();
4254 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4255
4256 do
4257 {
4258 worker = NEXT_INSN (worker);
4259 gcc_assert (worker
4260 && !LABEL_P (worker)
4261 && !JUMP_P (worker));
4262 } while (NOTE_P (worker)
4263 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4264 wpat = PATTERN (worker);
4265 wpat0 = XVECEXP (wpat, 0, 0);
4266 wpat1 = XVECEXP (wpat, 0, 1);
4267 wsrc = SET_SRC (wpat0);
4268 PATTERN (worker) = (gen_casesi_worker_2
4269 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4270 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4271 XEXP (wpat1, 0)));
4272 INSN_CODE (worker) = -1;
4273 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4274 base = gen_rtx_LABEL_REF (Pmode, lab);
4275 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4276 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4277 INSN_CODE (mova) = -1;
4278 }
4279 }
4280
4281 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4282 *num_mova, and check if the new mova is not nested within the first one.
4283 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4284 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4285 static int
4286 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
4287 {
4288 int n_addr = 0; /* Initialization to shut up spurious warning. */
4289 int f_target, n_target = 0; /* Likewise. */
4290
4291 if (optimize)
4292 {
4293 /* If NEW_MOVA has no address yet, it will be handled later. */
4294 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4295 return -1;
4296
4297 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4298 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4299 if (n_addr > n_target || n_addr + 1022 < n_target)
4300 {
4301 /* Change the mova into a load.
4302 broken_move will then return true for it. */
4303 fixup_mova (new_mova);
4304 return 1;
4305 }
4306 }
4307 if (!(*num_mova)++)
4308 {
4309 *first_mova = new_mova;
4310 return 2;
4311 }
4312 if (!optimize
4313 || ((f_target
4314 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4315 >= n_target))
4316 return -1;
4317
4318 (*num_mova)--;
4319 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4320 > n_target - n_addr)
4321 {
4322 fixup_mova (*first_mova);
4323 return 0;
4324 }
4325 else
4326 {
4327 fixup_mova (new_mova);
4328 return 1;
4329 }
4330 }
4331
4332 /* Find the last barrier from insn FROM which is close enough to hold the
4333 constant pool. If we can't find one, then create one near the end of
4334 the range. */
4335
4336 static rtx
4337 find_barrier (int num_mova, rtx mova, rtx from)
4338 {
4339 int count_si = 0;
4340 int count_hi = 0;
4341 int found_hi = 0;
4342 int found_si = 0;
4343 int found_di = 0;
4344 int hi_align = 2;
4345 int si_align = 2;
4346 int leading_mova = num_mova;
4347 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
4348 int si_limit;
4349 int hi_limit;
4350 rtx orig = from;
4351
4352 /* For HImode: range is 510, add 4 because pc counts from address of
4353 second instruction after this one, subtract 2 for the jump instruction
4354 that we may need to emit before the table, subtract 2 for the instruction
4355 that fills the jump delay slot (in very rare cases, reorg will take an
4356 instruction from after the constant pool or will leave the delay slot
4357 empty). This gives 510.
4358 For SImode: range is 1020, add 4 because pc counts from address of
4359 second instruction after this one, subtract 2 in case pc is 2 byte
4360 aligned, subtract 2 for the jump instruction that we may need to emit
4361 before the table, subtract 2 for the instruction that fills the jump
4362 delay slot. This gives 1018. */
4363
4364 /* The branch will always be shortened now that the reference address for
4365 forward branches is the successor address, thus we need no longer make
4366 adjustments to the [sh]i_limit for -O0. */
4367
4368 si_limit = 1018;
4369 hi_limit = 510;
4370
4371 while (from && count_si < si_limit && count_hi < hi_limit)
4372 {
4373 int inc = get_attr_length (from);
4374 int new_align = 1;
4375
4376 /* If this is a label that existed at the time of the compute_alignments
4377 call, determine the alignment. N.B. When find_barrier recurses for
4378 an out-of-reach mova, we might see labels at the start of previously
4379 inserted constant tables. */
4380 if (LABEL_P (from)
4381 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
4382 {
4383 if (optimize)
4384 new_align = 1 << label_to_alignment (from);
4385 else if (BARRIER_P (prev_nonnote_insn (from)))
4386 new_align = 1 << barrier_align (from);
4387 else
4388 new_align = 1;
4389 inc = 0;
4390 }
4391 /* In case we are scanning a constant table because of recursion, check
4392 for explicit alignments. If the table is long, we might be forced
4393 to emit the new table in front of it; the length of the alignment
4394 might be the last straw. */
4395 else if (NONJUMP_INSN_P (from)
4396 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4397 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
4398 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
4399 /* When we find the end of a constant table, paste the new constant
4400 at the end. That is better than putting it in front because
4401 this way, we don't need extra alignment for adding a 4-byte-aligned
4402 mov(a) label to a 2/4 or 8/4 byte aligned table. */
4403 else if (NONJUMP_INSN_P (from)
4404 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4405 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
4406 return from;
4407
4408 if (BARRIER_P (from))
4409 {
4410 rtx next;
4411
4412 found_barrier = from;
4413
4414 /* If we are at the end of the function, or in front of an alignment
4415 instruction, we need not insert an extra alignment. We prefer
4416 this kind of barrier. */
4417 if (barrier_align (from) > 2)
4418 good_barrier = from;
4419
4420 /* If we are at the end of a hot/cold block, dump the constants
4421 here. */
4422 next = NEXT_INSN (from);
4423 if (next
4424 && NOTE_P (next)
4425 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
4426 break;
4427 }
4428
4429 if (broken_move (from))
4430 {
4431 rtx pat, src, dst;
4432 enum machine_mode mode;
4433
4434 pat = PATTERN (from);
4435 if (GET_CODE (pat) == PARALLEL)
4436 pat = XVECEXP (pat, 0, 0);
4437 src = SET_SRC (pat);
4438 dst = SET_DEST (pat);
4439 mode = GET_MODE (dst);
4440
4441 /* We must explicitly check the mode, because sometimes the
4442 front end will generate code to load unsigned constants into
4443 HImode targets without properly sign extending them. */
4444 if (mode == HImode
4445 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
4446 {
4447 found_hi += 2;
4448 /* We put the short constants before the long constants, so
4449 we must count the length of short constants in the range
4450 for the long constants. */
4451 /* ??? This isn't optimal, but is easy to do. */
4452 si_limit -= 2;
4453 }
4454 else
4455 {
4456 /* We dump DF/DI constants before SF/SI ones, because
4457 the limit is the same, but the alignment requirements
4458 are higher. We may waste up to 4 additional bytes
4459 for alignment, and the DF/DI constant may have
4460 another SF/SI constant placed before it. */
4461 if (TARGET_SHCOMPACT
4462 && ! found_di
4463 && (mode == DFmode || mode == DImode))
4464 {
4465 found_di = 1;
4466 si_limit -= 8;
4467 }
4468 while (si_align > 2 && found_si + si_align - 2 > count_si)
4469 si_align >>= 1;
4470 if (found_si > count_si)
4471 count_si = found_si;
4472 found_si += GET_MODE_SIZE (mode);
4473 if (num_mova)
4474 si_limit -= GET_MODE_SIZE (mode);
4475 }
4476 }
4477
4478 if (mova_p (from))
4479 {
4480 switch (untangle_mova (&num_mova, &mova, from))
4481 {
4482 case 0: return find_barrier (0, 0, mova);
4483 case 2:
4484 {
4485 leading_mova = 0;
4486 barrier_before_mova
4487 = good_barrier ? good_barrier : found_barrier;
4488 }
4489 default: break;
4490 }
4491 if (found_si > count_si)
4492 count_si = found_si;
4493 }
4494 else if (JUMP_TABLE_DATA_P (from))
4495 {
4496 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
4497 || (num_mova
4498 && (prev_nonnote_insn (from)
4499 == XEXP (MOVA_LABELREF (mova), 0))))
4500 num_mova--;
4501 if (barrier_align (next_real_insn (from)) == align_jumps_log)
4502 {
4503 /* We have just passed the barrier in front of the
4504 ADDR_DIFF_VEC, which is stored in found_barrier. Since
4505 the ADDR_DIFF_VEC is accessed as data, just like our pool
4506 constants, this is a good opportunity to accommodate what
4507 we have gathered so far.
4508 If we waited any longer, we could end up at a barrier in
4509 front of code, which gives worse cache usage for separated
4510 instruction / data caches. */
4511 good_barrier = found_barrier;
4512 break;
4513 }
4514 else
4515 {
4516 rtx body = PATTERN (from);
4517 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
4518 }
4519 }
4520 /* For the SH1, we generate alignments even after jumps-around-jumps. */
4521 else if (JUMP_P (from)
4522 && ! TARGET_SH2
4523 && ! TARGET_SMALLCODE)
4524 new_align = 4;
4525
4526 if (found_si)
4527 {
4528 count_si += inc;
4529 if (new_align > si_align)
4530 {
4531 si_limit -= (count_si - 1) & (new_align - si_align);
4532 si_align = new_align;
4533 }
4534 count_si = (count_si + new_align - 1) & -new_align;
4535 }
4536 if (found_hi)
4537 {
4538 count_hi += inc;
4539 if (new_align > hi_align)
4540 {
4541 hi_limit -= (count_hi - 1) & (new_align - hi_align);
4542 hi_align = new_align;
4543 }
4544 count_hi = (count_hi + new_align - 1) & -new_align;
4545 }
4546 from = NEXT_INSN (from);
4547 }
4548
4549 if (num_mova)
4550 {
4551 if (leading_mova)
4552 {
4553 /* Try as we might, the leading mova is out of range. Change
4554 it into a load (which will become a pcload) and retry. */
4555 fixup_mova (mova);
4556 return find_barrier (0, 0, mova);
4557 }
4558 else
4559 {
4560 /* Insert the constant pool table before the mova instruction,
4561 to prevent the mova label reference from going out of range. */
4562 from = mova;
4563 good_barrier = found_barrier = barrier_before_mova;
4564 }
4565 }
4566
4567 if (found_barrier)
4568 {
4569 if (good_barrier && next_real_insn (found_barrier))
4570 found_barrier = good_barrier;
4571 }
4572 else
4573 {
4574 /* We didn't find a barrier in time to dump our stuff,
4575 so we'll make one. */
4576 rtx label = gen_label_rtx ();
4577
4578 /* If we exceeded the range, then we must back up over the last
4579 instruction we looked at. Otherwise, we just need to undo the
4580 NEXT_INSN at the end of the loop. */
4581 if (PREV_INSN (from) != orig
4582 && (count_hi > hi_limit || count_si > si_limit))
4583 from = PREV_INSN (PREV_INSN (from));
4584 else
4585 from = PREV_INSN (from);
4586
4587 /* Walk back to be just before any jump or label.
4588 Putting it before a label reduces the number of times the branch
4589 around the constant pool table will be hit. Putting it before
4590 a jump makes it more likely that the bra delay slot will be
4591 filled. */
4592 while (NOTE_P (from) || JUMP_P (from)
4593 || LABEL_P (from))
4594 from = PREV_INSN (from);
4595
4596 from = emit_jump_insn_after (gen_jump (label), from);
4597 JUMP_LABEL (from) = label;
4598 LABEL_NUSES (label) = 1;
4599 found_barrier = emit_barrier_after (from);
4600 emit_label_after (label, found_barrier);
4601 }
4602
4603 return found_barrier;
4604 }
4605
4606 /* If the instruction INSN is implemented by a special function, and we can
4607 positively find the register that is used to call the sfunc, and this
4608 register is not used anywhere else in this instruction - except as the
4609 destination of a set, return this register; else, return 0. */
4610 rtx
4611 sfunc_uses_reg (rtx insn)
4612 {
4613 int i;
4614 rtx pattern, part, reg_part, reg;
4615
4616 if (!NONJUMP_INSN_P (insn))
4617 return 0;
4618 pattern = PATTERN (insn);
4619 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
4620 return 0;
4621
4622 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4623 {
4624 part = XVECEXP (pattern, 0, i);
4625 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
4626 reg_part = part;
4627 }
4628 if (! reg_part)
4629 return 0;
4630 reg = XEXP (reg_part, 0);
4631 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
4632 {
4633 part = XVECEXP (pattern, 0, i);
4634 if (part == reg_part || GET_CODE (part) == CLOBBER)
4635 continue;
4636 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
4637 && REG_P (SET_DEST (part)))
4638 ? SET_SRC (part) : part)))
4639 return 0;
4640 }
4641 return reg;
4642 }
4643
4644 /* See if the only way in which INSN uses REG is by calling it, or by
4645 setting it while calling it. Set *SET to a SET rtx if the register
4646 is set by INSN. */
4647
4648 static int
4649 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
4650 {
4651 rtx pattern, reg2;
4652
4653 *set = NULL_RTX;
4654
4655 reg2 = sfunc_uses_reg (insn);
4656 if (reg2 && REGNO (reg2) == REGNO (reg))
4657 {
4658 pattern = single_set (insn);
4659 if (pattern
4660 && REG_P (SET_DEST (pattern))
4661 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4662 *set = pattern;
4663 return 0;
4664 }
4665 if (!CALL_P (insn))
4666 {
4667 /* We don't use rtx_equal_p because we don't care if the mode is
4668 different. */
4669 pattern = single_set (insn);
4670 if (pattern
4671 && REG_P (SET_DEST (pattern))
4672 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4673 {
4674 rtx par, part;
4675 int i;
4676
4677 *set = pattern;
4678 par = PATTERN (insn);
4679 if (GET_CODE (par) == PARALLEL)
4680 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
4681 {
4682 part = XVECEXP (par, 0, i);
4683 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
4684 return 1;
4685 }
4686 return reg_mentioned_p (reg, SET_SRC (pattern));
4687 }
4688
4689 return 1;
4690 }
4691
4692 pattern = PATTERN (insn);
4693
4694 if (GET_CODE (pattern) == PARALLEL)
4695 {
4696 int i;
4697
4698 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4699 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
4700 return 1;
4701 pattern = XVECEXP (pattern, 0, 0);
4702 }
4703
4704 if (GET_CODE (pattern) == SET)
4705 {
4706 if (reg_mentioned_p (reg, SET_DEST (pattern)))
4707 {
4708 /* We don't use rtx_equal_p, because we don't care if the
4709 mode is different. */
4710 if (!REG_P (SET_DEST (pattern))
4711 || REGNO (reg) != REGNO (SET_DEST (pattern)))
4712 return 1;
4713
4714 *set = pattern;
4715 }
4716
4717 pattern = SET_SRC (pattern);
4718 }
4719
4720 if (GET_CODE (pattern) != CALL
4721 || !MEM_P (XEXP (pattern, 0))
4722 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
4723 return 1;
4724
4725 return 0;
4726 }
4727
4728 /* Given a X, a pattern of an insn or a part of it, return a mask of used
4729 general registers. Bits 0..15 mean that the respective registers
4730 are used as inputs in the instruction. Bits 16..31 mean that the
4731 registers 0..15, respectively, are used as outputs, or are clobbered.
4732 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
4733 int
4734 regs_used (rtx x, int is_dest)
4735 {
4736 enum rtx_code code;
4737 const char *fmt;
4738 int i, used = 0;
4739
4740 if (! x)
4741 return used;
4742 code = GET_CODE (x);
4743 switch (code)
4744 {
4745 case REG:
4746 if (REGNO (x) < 16)
4747 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4748 << (REGNO (x) + is_dest));
4749 return 0;
4750 case SUBREG:
4751 {
4752 rtx y = SUBREG_REG (x);
4753
4754 if (!REG_P (y))
4755 break;
4756 if (REGNO (y) < 16)
4757 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4758 << (REGNO (y) +
4759 subreg_regno_offset (REGNO (y),
4760 GET_MODE (y),
4761 SUBREG_BYTE (x),
4762 GET_MODE (x)) + is_dest));
4763 return 0;
4764 }
4765 case SET:
4766 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
4767 case RETURN:
4768 /* If there was a return value, it must have been indicated with USE. */
4769 return 0x00ffff00;
4770 case CLOBBER:
4771 is_dest = 1;
4772 break;
4773 case MEM:
4774 is_dest = 0;
4775 break;
4776 case CALL:
4777 used |= 0x00ff00f0;
4778 break;
4779 default:
4780 break;
4781 }
4782
4783 fmt = GET_RTX_FORMAT (code);
4784
4785 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4786 {
4787 if (fmt[i] == 'E')
4788 {
4789 register int j;
4790 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4791 used |= regs_used (XVECEXP (x, i, j), is_dest);
4792 }
4793 else if (fmt[i] == 'e')
4794 used |= regs_used (XEXP (x, i), is_dest);
4795 }
4796 return used;
4797 }
4798
4799 /* Create an instruction that prevents redirection of a conditional branch
4800 to the destination of the JUMP with address ADDR.
4801 If the branch needs to be implemented as an indirect jump, try to find
4802 a scratch register for it.
4803 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
4804 If any preceding insn that doesn't fit into a delay slot is good enough,
4805 pass 1. Pass 2 if a definite blocking insn is needed.
4806 -1 is used internally to avoid deep recursion.
4807 If a blocking instruction is made or recognized, return it. */
4808
4809 static rtx
4810 gen_block_redirect (rtx jump, int addr, int need_block)
4811 {
4812 int dead = 0;
4813 rtx prev = prev_nonnote_insn (jump);
4814 rtx dest;
4815
4816 /* First, check if we already have an instruction that satisfies our need. */
4817 if (prev && NONJUMP_INSN_P (prev) && ! INSN_DELETED_P (prev))
4818 {
4819 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
4820 return prev;
4821 if (GET_CODE (PATTERN (prev)) == USE
4822 || GET_CODE (PATTERN (prev)) == CLOBBER
4823 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4824 prev = jump;
4825 else if ((need_block &= ~1) < 0)
4826 return prev;
4827 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
4828 need_block = 0;
4829 }
4830 if (GET_CODE (PATTERN (jump)) == RETURN)
4831 {
4832 if (! need_block)
4833 return prev;
4834 /* Reorg even does nasty things with return insns that cause branches
4835 to go out of range - see find_end_label and callers. */
4836 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
4837 }
4838 /* We can't use JUMP_LABEL here because it might be undefined
4839 when not optimizing. */
4840 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
4841 /* If the branch is out of range, try to find a scratch register for it. */
4842 if (optimize
4843 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4844 > 4092 + 4098))
4845 {
4846 rtx scan;
4847 /* Don't look for the stack pointer as a scratch register,
4848 it would cause trouble if an interrupt occurred. */
4849 unsigned attempt = 0x7fff, used;
4850 int jump_left = flag_expensive_optimizations + 1;
4851
4852 /* It is likely that the most recent eligible instruction is wanted for
4853 the delay slot. Therefore, find out which registers it uses, and
4854 try to avoid using them. */
4855
4856 for (scan = jump; (scan = PREV_INSN (scan)); )
4857 {
4858 enum rtx_code code;
4859
4860 if (INSN_DELETED_P (scan))
4861 continue;
4862 code = GET_CODE (scan);
4863 if (code == CODE_LABEL || code == JUMP_INSN)
4864 break;
4865 if (code == INSN
4866 && GET_CODE (PATTERN (scan)) != USE
4867 && GET_CODE (PATTERN (scan)) != CLOBBER
4868 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
4869 {
4870 attempt &= ~regs_used (PATTERN (scan), 0);
4871 break;
4872 }
4873 }
4874 for (used = dead = 0, scan = JUMP_LABEL (jump);
4875 (scan = NEXT_INSN (scan)); )
4876 {
4877 enum rtx_code code;
4878
4879 if (INSN_DELETED_P (scan))
4880 continue;
4881 code = GET_CODE (scan);
4882 if (INSN_P (scan))
4883 {
4884 used |= regs_used (PATTERN (scan), 0);
4885 if (code == CALL_INSN)
4886 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
4887 dead |= (used >> 16) & ~used;
4888 if (dead & attempt)
4889 {
4890 dead &= attempt;
4891 break;
4892 }
4893 if (code == JUMP_INSN)
4894 {
4895 if (jump_left-- && simplejump_p (scan))
4896 scan = JUMP_LABEL (scan);
4897 else
4898 break;
4899 }
4900 }
4901 }
4902 /* Mask out the stack pointer again, in case it was
4903 the only 'free' register we have found. */
4904 dead &= 0x7fff;
4905 }
4906 /* If the immediate destination is still in range, check for possible
4907 threading with a jump beyond the delay slot insn.
4908 Don't check if we are called recursively; the jump has been or will be
4909 checked in a different invocation then. */
4910
4911 else if (optimize && need_block >= 0)
4912 {
4913 rtx next = next_active_insn (next_active_insn (dest));
4914 if (next && JUMP_P (next)
4915 && GET_CODE (PATTERN (next)) == SET
4916 && recog_memoized (next) == CODE_FOR_jump_compact)
4917 {
4918 dest = JUMP_LABEL (next);
4919 if (dest
4920 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4921 > 4092 + 4098))
4922 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
4923 }
4924 }
4925
4926 if (dead)
4927 {
4928 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
4929
4930 /* It would be nice if we could convert the jump into an indirect
4931 jump / far branch right now, and thus exposing all constituent
4932 instructions to further optimization. However, reorg uses
4933 simplejump_p to determine if there is an unconditional jump where
4934 it should try to schedule instructions from the target of the
4935 branch; simplejump_p fails for indirect jumps even if they have
4936 a JUMP_LABEL. */
4937 rtx insn = emit_insn_before (gen_indirect_jump_scratch
4938 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
4939 , jump);
4940 /* ??? We would like this to have the scope of the jump, but that
4941 scope will change when a delay slot insn of an inner scope is added.
4942 Hence, after delay slot scheduling, we'll have to expect
4943 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
4944 the jump. */
4945
4946 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
4947 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
4948 return insn;
4949 }
4950 else if (need_block)
4951 /* We can't use JUMP_LABEL here because it might be undefined
4952 when not optimizing. */
4953 return emit_insn_before (gen_block_branch_redirect
4954 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
4955 , jump);
4956 return prev;
4957 }
4958
4959 #define CONDJUMP_MIN -252
4960 #define CONDJUMP_MAX 262
4961 struct far_branch
4962 {
4963 /* A label (to be placed) in front of the jump
4964 that jumps to our ultimate destination. */
4965 rtx near_label;
4966 /* Where we are going to insert it if we cannot move the jump any farther,
4967 or the jump itself if we have picked up an existing jump. */
4968 rtx insert_place;
4969 /* The ultimate destination. */
4970 rtx far_label;
4971 struct far_branch *prev;
4972 /* If the branch has already been created, its address;
4973 else the address of its first prospective user. */
4974 int address;
4975 };
4976
4977 static void gen_far_branch (struct far_branch *);
4978 enum mdep_reorg_phase_e mdep_reorg_phase;
4979 static void
4980 gen_far_branch (struct far_branch *bp)
4981 {
4982 rtx insn = bp->insert_place;
4983 rtx jump;
4984 rtx label = gen_label_rtx ();
4985 int ok;
4986
4987 emit_label_after (label, insn);
4988 if (bp->far_label)
4989 {
4990 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
4991 LABEL_NUSES (bp->far_label)++;
4992 }
4993 else
4994 jump = emit_jump_insn_after (gen_return (), insn);
4995 /* Emit a barrier so that reorg knows that any following instructions
4996 are not reachable via a fall-through path.
4997 But don't do this when not optimizing, since we wouldn't suppress the
4998 alignment for the barrier then, and could end up with out-of-range
4999 pc-relative loads. */
5000 if (optimize)
5001 emit_barrier_after (jump);
5002 emit_label_after (bp->near_label, insn);
5003 JUMP_LABEL (jump) = bp->far_label;
5004 ok = invert_jump (insn, label, 1);
5005 gcc_assert (ok);
5006
5007 /* If we are branching around a jump (rather than a return), prevent
5008 reorg from using an insn from the jump target as the delay slot insn -
5009 when reorg did this, it pessimized code (we rather hide the delay slot)
5010 and it could cause branches to go out of range. */
5011 if (bp->far_label)
5012 (emit_insn_after
5013 (gen_stuff_delay_slot
5014 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
5015 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5016 insn));
5017 /* Prevent reorg from undoing our splits. */
5018 gen_block_redirect (jump, bp->address += 2, 2);
5019 }
5020
5021 /* Fix up ADDR_DIFF_VECs. */
5022 void
5023 fixup_addr_diff_vecs (rtx first)
5024 {
5025 rtx insn;
5026
5027 for (insn = first; insn; insn = NEXT_INSN (insn))
5028 {
5029 rtx vec_lab, pat, prev, prevpat, x, braf_label;
5030
5031 if (!JUMP_P (insn)
5032 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5033 continue;
5034 pat = PATTERN (insn);
5035 vec_lab = XEXP (XEXP (pat, 0), 0);
5036
5037 /* Search the matching casesi_jump_2. */
5038 for (prev = vec_lab; ; prev = PREV_INSN (prev))
5039 {
5040 if (!JUMP_P (prev))
5041 continue;
5042 prevpat = PATTERN (prev);
5043 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5044 continue;
5045 x = XVECEXP (prevpat, 0, 1);
5046 if (GET_CODE (x) != USE)
5047 continue;
5048 x = XEXP (x, 0);
5049 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5050 break;
5051 }
5052 /* FIXME: This is a bug in the optimizer, but it seems harmless
5053 to just avoid panicing. */
5054 if (!prev)
5055 continue;
5056
5057 /* Emit the reference label of the braf where it belongs, right after
5058 the casesi_jump_2 (i.e. braf). */
5059 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5060 emit_label_after (braf_label, prev);
5061
5062 /* Fix up the ADDR_DIF_VEC to be relative
5063 to the reference address of the braf. */
5064 XEXP (XEXP (pat, 0), 0) = braf_label;
5065 }
5066 }
5067
5068 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5069 a barrier. Return the base 2 logarithm of the desired alignment. */
5070 int
5071 barrier_align (rtx barrier_or_label)
5072 {
5073 rtx next = next_real_insn (barrier_or_label), pat, prev;
5074 int slot, credit, jump_to_next = 0;
5075
5076 if (! next)
5077 return 0;
5078
5079 pat = PATTERN (next);
5080
5081 if (GET_CODE (pat) == ADDR_DIFF_VEC)
5082 return 2;
5083
5084 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5085 /* This is a barrier in front of a constant table. */
5086 return 0;
5087
5088 prev = prev_real_insn (barrier_or_label);
5089 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
5090 {
5091 pat = PATTERN (prev);
5092 /* If this is a very small table, we want to keep the alignment after
5093 the table to the minimum for proper code alignment. */
5094 return ((TARGET_SMALLCODE
5095 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5096 <= (unsigned) 1 << (CACHE_LOG - 2)))
5097 ? 1 << TARGET_SHMEDIA : align_jumps_log);
5098 }
5099
5100 if (TARGET_SMALLCODE)
5101 return 0;
5102
5103 if (! TARGET_SH2 || ! optimize)
5104 return align_jumps_log;
5105
5106 /* When fixing up pcloads, a constant table might be inserted just before
5107 the basic block that ends with the barrier. Thus, we can't trust the
5108 instruction lengths before that. */
5109 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5110 {
5111 /* Check if there is an immediately preceding branch to the insn beyond
5112 the barrier. We must weight the cost of discarding useful information
5113 from the current cache line when executing this branch and there is
5114 an alignment, against that of fetching unneeded insn in front of the
5115 branch target when there is no alignment. */
5116
5117 /* There are two delay_slot cases to consider. One is the simple case
5118 where the preceding branch is to the insn beyond the barrier (simple
5119 delay slot filling), and the other is where the preceding branch has
5120 a delay slot that is a duplicate of the insn after the barrier
5121 (fill_eager_delay_slots) and the branch is to the insn after the insn
5122 after the barrier. */
5123
5124 /* PREV is presumed to be the JUMP_INSN for the barrier under
5125 investigation. Skip to the insn before it. */
5126 prev = prev_real_insn (prev);
5127
5128 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5129 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5130 prev = prev_real_insn (prev))
5131 {
5132 jump_to_next = 0;
5133 if (GET_CODE (PATTERN (prev)) == USE
5134 || GET_CODE (PATTERN (prev)) == CLOBBER)
5135 continue;
5136 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
5137 {
5138 prev = XVECEXP (PATTERN (prev), 0, 1);
5139 if (INSN_UID (prev) == INSN_UID (next))
5140 {
5141 /* Delay slot was filled with insn at jump target. */
5142 jump_to_next = 1;
5143 continue;
5144 }
5145 }
5146
5147 if (slot &&
5148 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5149 slot = 0;
5150 credit -= get_attr_length (prev);
5151 }
5152 if (prev
5153 && JUMP_P (prev)
5154 && JUMP_LABEL (prev))
5155 {
5156 rtx x;
5157 if (jump_to_next
5158 || next_real_insn (JUMP_LABEL (prev)) == next
5159 /* If relax_delay_slots() decides NEXT was redundant
5160 with some previous instruction, it will have
5161 redirected PREV's jump to the following insn. */
5162 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5163 /* There is no upper bound on redundant instructions
5164 that might have been skipped, but we must not put an
5165 alignment where none had been before. */
5166 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5167 (INSN_P (x)
5168 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5169 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5170 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5171 {
5172 rtx pat = PATTERN (prev);
5173 if (GET_CODE (pat) == PARALLEL)
5174 pat = XVECEXP (pat, 0, 0);
5175 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5176 return 0;
5177 }
5178 }
5179 }
5180
5181 return align_jumps_log;
5182 }
5183
5184 /* If we are inside a phony loop, almost any kind of label can turn up as the
5185 first one in the loop. Aligning a braf label causes incorrect switch
5186 destination addresses; we can detect braf labels because they are
5187 followed by a BARRIER.
5188 Applying loop alignment to small constant or switch tables is a waste
5189 of space, so we suppress this too. */
5190 int
5191 sh_loop_align (rtx label)
5192 {
5193 rtx next = label;
5194
5195 do
5196 next = next_nonnote_insn (next);
5197 while (next && LABEL_P (next));
5198
5199 if (! next
5200 || ! INSN_P (next)
5201 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
5202 || recog_memoized (next) == CODE_FOR_consttable_2)
5203 return 0;
5204
5205 return align_loops_log;
5206 }
5207
5208 /* Do a final pass over the function, just before delayed branch
5209 scheduling. */
5210
5211 static void
5212 sh_reorg (void)
5213 {
5214 rtx first, insn, mova = NULL_RTX;
5215 int num_mova;
5216 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5217 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5218
5219 first = get_insns ();
5220 max_labelno_before_reorg = max_label_num ();
5221
5222 /* We must split call insns before introducing `mova's. If we're
5223 optimizing, they'll have already been split. Otherwise, make
5224 sure we don't split them too late. */
5225 if (! optimize)
5226 split_all_insns_noflow ();
5227
5228 if (TARGET_SHMEDIA)
5229 return;
5230
5231 /* If relaxing, generate pseudo-ops to associate function calls with
5232 the symbols they call. It does no harm to not generate these
5233 pseudo-ops. However, when we can generate them, it enables to
5234 linker to potentially relax the jsr to a bsr, and eliminate the
5235 register load and, possibly, the constant pool entry. */
5236
5237 mdep_reorg_phase = SH_INSERT_USES_LABELS;
5238 if (TARGET_RELAX)
5239 {
5240 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
5241 own purposes. This works because none of the remaining passes
5242 need to look at them.
5243
5244 ??? But it may break in the future. We should use a machine
5245 dependent REG_NOTE, or some other approach entirely. */
5246 for (insn = first; insn; insn = NEXT_INSN (insn))
5247 {
5248 if (INSN_P (insn))
5249 {
5250 rtx note;
5251
5252 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
5253 NULL_RTX)) != 0)
5254 remove_note (insn, note);
5255 }
5256 }
5257
5258 for (insn = first; insn; insn = NEXT_INSN (insn))
5259 {
5260 rtx pattern, reg, link, set, scan, dies, label;
5261 int rescan = 0, foundinsn = 0;
5262
5263 if (CALL_P (insn))
5264 {
5265 pattern = PATTERN (insn);
5266
5267 if (GET_CODE (pattern) == PARALLEL)
5268 pattern = XVECEXP (pattern, 0, 0);
5269 if (GET_CODE (pattern) == SET)
5270 pattern = SET_SRC (pattern);
5271
5272 if (GET_CODE (pattern) != CALL
5273 || !MEM_P (XEXP (pattern, 0)))
5274 continue;
5275
5276 reg = XEXP (XEXP (pattern, 0), 0);
5277 }
5278 else
5279 {
5280 reg = sfunc_uses_reg (insn);
5281 if (! reg)
5282 continue;
5283 }
5284
5285 if (!REG_P (reg))
5286 continue;
5287
5288 /* Try scanning backward to find where the register is set. */
5289 link = NULL;
5290 for (scan = PREV_INSN (insn);
5291 scan && !LABEL_P (scan);
5292 scan = PREV_INSN (scan))
5293 {
5294 if (! INSN_P (scan))
5295 continue;
5296
5297 if (! reg_mentioned_p (reg, scan))
5298 continue;
5299
5300 if (noncall_uses_reg (reg, scan, &set))
5301 break;
5302
5303 if (set)
5304 {
5305 link = scan;
5306 break;
5307 }
5308 }
5309
5310 if (! link)
5311 continue;
5312
5313 /* The register is set at LINK. */
5314
5315 /* We can only optimize the function call if the register is
5316 being set to a symbol. In theory, we could sometimes
5317 optimize calls to a constant location, but the assembler
5318 and linker do not support that at present. */
5319 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
5320 && GET_CODE (SET_SRC (set)) != LABEL_REF)
5321 continue;
5322
5323 /* Scan forward from LINK to the place where REG dies, and
5324 make sure that the only insns which use REG are
5325 themselves function calls. */
5326
5327 /* ??? This doesn't work for call targets that were allocated
5328 by reload, since there may not be a REG_DEAD note for the
5329 register. */
5330
5331 dies = NULL_RTX;
5332 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
5333 {
5334 rtx scanset;
5335
5336 /* Don't try to trace forward past a CODE_LABEL if we haven't
5337 seen INSN yet. Ordinarily, we will only find the setting insn
5338 if it is in the same basic block. However,
5339 cross-jumping can insert code labels in between the load and
5340 the call, and can result in situations where a single call
5341 insn may have two targets depending on where we came from. */
5342
5343 if (LABEL_P (scan) && ! foundinsn)
5344 break;
5345
5346 if (! INSN_P (scan))
5347 continue;
5348
5349 /* Don't try to trace forward past a JUMP. To optimize
5350 safely, we would have to check that all the
5351 instructions at the jump destination did not use REG. */
5352
5353 if (JUMP_P (scan))
5354 break;
5355
5356 if (! reg_mentioned_p (reg, scan))
5357 continue;
5358
5359 if (noncall_uses_reg (reg, scan, &scanset))
5360 break;
5361
5362 if (scan == insn)
5363 foundinsn = 1;
5364
5365 if (scan != insn
5366 && (CALL_P (scan) || sfunc_uses_reg (scan)))
5367 {
5368 /* There is a function call to this register other
5369 than the one we are checking. If we optimize
5370 this call, we need to rescan again below. */
5371 rescan = 1;
5372 }
5373
5374 /* ??? We shouldn't have to worry about SCANSET here.
5375 We should just be able to check for a REG_DEAD note
5376 on a function call. However, the REG_DEAD notes are
5377 apparently not dependable around libcalls; c-torture
5378 execute/920501-2 is a test case. If SCANSET is set,
5379 then this insn sets the register, so it must have
5380 died earlier. Unfortunately, this will only handle
5381 the cases in which the register is, in fact, set in a
5382 later insn. */
5383
5384 /* ??? We shouldn't have to use FOUNDINSN here.
5385 This dates back to when we used LOG_LINKS to find
5386 the most recent insn which sets the register. */
5387
5388 if (foundinsn
5389 && (scanset
5390 || find_reg_note (scan, REG_DEAD, reg)))
5391 {
5392 dies = scan;
5393 break;
5394 }
5395 }
5396
5397 if (! dies)
5398 {
5399 /* Either there was a branch, or some insn used REG
5400 other than as a function call address. */
5401 continue;
5402 }
5403
5404 /* Create a code label, and put it in a REG_LABEL_OPERAND note
5405 on the insn which sets the register, and on each call insn
5406 which uses the register. In final_prescan_insn we look for
5407 the REG_LABEL_OPERAND notes, and output the appropriate label
5408 or pseudo-op. */
5409
5410 label = gen_label_rtx ();
5411 add_reg_note (link, REG_LABEL_OPERAND, label);
5412 add_reg_note (insn, REG_LABEL_OPERAND, label);
5413 if (rescan)
5414 {
5415 scan = link;
5416 do
5417 {
5418 rtx reg2;
5419
5420 scan = NEXT_INSN (scan);
5421 if (scan != insn
5422 && ((CALL_P (scan)
5423 && reg_mentioned_p (reg, scan))
5424 || ((reg2 = sfunc_uses_reg (scan))
5425 && REGNO (reg2) == REGNO (reg))))
5426 add_reg_note (scan, REG_LABEL_OPERAND, label);
5427 }
5428 while (scan != dies);
5429 }
5430 }
5431 }
5432
5433 if (TARGET_SH2)
5434 fixup_addr_diff_vecs (first);
5435
5436 if (optimize)
5437 {
5438 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
5439 shorten_branches (first);
5440 }
5441
5442 /* Scan the function looking for move instructions which have to be
5443 changed to pc-relative loads and insert the literal tables. */
5444 label_ref_list_pool = create_alloc_pool ("label references list",
5445 sizeof (struct label_ref_list_d),
5446 30);
5447 mdep_reorg_phase = SH_FIXUP_PCLOAD;
5448 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
5449 {
5450 if (mova_p (insn))
5451 {
5452 /* ??? basic block reordering can move a switch table dispatch
5453 below the switch table. Check if that has happened.
5454 We only have the addresses available when optimizing; but then,
5455 this check shouldn't be needed when not optimizing. */
5456 if (!untangle_mova (&num_mova, &mova, insn))
5457 {
5458 insn = mova;
5459 num_mova = 0;
5460 }
5461 }
5462 else if (JUMP_P (insn)
5463 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
5464 && num_mova
5465 /* ??? loop invariant motion can also move a mova out of a
5466 loop. Since loop does this code motion anyway, maybe we
5467 should wrap UNSPEC_MOVA into a CONST, so that reload can
5468 move it back. */
5469 && ((num_mova > 1
5470 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
5471 || (prev_nonnote_insn (insn)
5472 == XEXP (MOVA_LABELREF (mova), 0))))
5473 {
5474 rtx scan;
5475 int total;
5476
5477 num_mova--;
5478
5479 /* Some code might have been inserted between the mova and
5480 its ADDR_DIFF_VEC. Check if the mova is still in range. */
5481 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
5482 total += get_attr_length (scan);
5483
5484 /* range of mova is 1020, add 4 because pc counts from address of
5485 second instruction after this one, subtract 2 in case pc is 2
5486 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
5487 cancels out with alignment effects of the mova itself. */
5488 if (total > 1022)
5489 {
5490 /* Change the mova into a load, and restart scanning
5491 there. broken_move will then return true for mova. */
5492 fixup_mova (mova);
5493 insn = mova;
5494 }
5495 }
5496 if (broken_move (insn)
5497 || (NONJUMP_INSN_P (insn)
5498 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
5499 {
5500 rtx scan;
5501 /* Scan ahead looking for a barrier to stick the constant table
5502 behind. */
5503 rtx barrier = find_barrier (num_mova, mova, insn);
5504 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
5505 int need_aligned_label = 0;
5506
5507 if (num_mova && ! mova_p (mova))
5508 {
5509 /* find_barrier had to change the first mova into a
5510 pcload; thus, we have to start with this new pcload. */
5511 insn = mova;
5512 num_mova = 0;
5513 }
5514 /* Now find all the moves between the points and modify them. */
5515 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
5516 {
5517 if (LABEL_P (scan))
5518 last_float = 0;
5519 if (NONJUMP_INSN_P (scan)
5520 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
5521 need_aligned_label = 1;
5522 if (broken_move (scan))
5523 {
5524 rtx *patp = &PATTERN (scan), pat = *patp;
5525 rtx src, dst;
5526 rtx lab;
5527 rtx newsrc;
5528 enum machine_mode mode;
5529
5530 if (GET_CODE (pat) == PARALLEL)
5531 patp = &XVECEXP (pat, 0, 0), pat = *patp;
5532 src = SET_SRC (pat);
5533 dst = SET_DEST (pat);
5534 mode = GET_MODE (dst);
5535
5536 if (mode == SImode && hi_const (src)
5537 && REGNO (dst) != FPUL_REG)
5538 {
5539 int offset = 0;
5540
5541 mode = HImode;
5542 while (GET_CODE (dst) == SUBREG)
5543 {
5544 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
5545 GET_MODE (SUBREG_REG (dst)),
5546 SUBREG_BYTE (dst),
5547 GET_MODE (dst));
5548 dst = SUBREG_REG (dst);
5549 }
5550 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
5551 }
5552 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
5553 {
5554 /* This must be an insn that clobbers r0. */
5555 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
5556 XVECLEN (PATTERN (scan), 0)
5557 - 1);
5558 rtx clobber = *clobberp;
5559
5560 gcc_assert (GET_CODE (clobber) == CLOBBER
5561 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
5562
5563 if (last_float
5564 && reg_set_between_p (r0_rtx, last_float_move, scan))
5565 last_float = 0;
5566 if (last_float
5567 && TARGET_SHCOMPACT
5568 && GET_MODE_SIZE (mode) != 4
5569 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
5570 last_float = 0;
5571 lab = add_constant (src, mode, last_float);
5572 if (lab)
5573 emit_insn_before (gen_mova (lab), scan);
5574 else
5575 {
5576 /* There will be a REG_UNUSED note for r0 on
5577 LAST_FLOAT_MOVE; we have to change it to REG_INC,
5578 lest reorg:mark_target_live_regs will not
5579 consider r0 to be used, and we end up with delay
5580 slot insn in front of SCAN that clobbers r0. */
5581 rtx note
5582 = find_regno_note (last_float_move, REG_UNUSED, 0);
5583
5584 /* If we are not optimizing, then there may not be
5585 a note. */
5586 if (note)
5587 PUT_REG_NOTE_KIND (note, REG_INC);
5588
5589 *last_float_addr = r0_inc_rtx;
5590 }
5591 last_float_move = scan;
5592 last_float = src;
5593 newsrc = gen_const_mem (mode,
5594 (((TARGET_SH4 && ! TARGET_FMOVD)
5595 || REGNO (dst) == FPUL_REG)
5596 ? r0_inc_rtx
5597 : r0_rtx));
5598 last_float_addr = &XEXP (newsrc, 0);
5599
5600 /* Remove the clobber of r0. */
5601 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
5602 gen_rtx_SCRATCH (Pmode));
5603 }
5604 /* This is a mova needing a label. Create it. */
5605 else if (GET_CODE (src) == UNSPEC
5606 && XINT (src, 1) == UNSPEC_MOVA
5607 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
5608 {
5609 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
5610 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5611 newsrc = gen_rtx_UNSPEC (SImode,
5612 gen_rtvec (1, newsrc),
5613 UNSPEC_MOVA);
5614 }
5615 else
5616 {
5617 lab = add_constant (src, mode, 0);
5618 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5619 newsrc = gen_const_mem (mode, newsrc);
5620 }
5621 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
5622 INSN_CODE (scan) = -1;
5623 }
5624 }
5625 dump_table (need_aligned_label ? insn : 0, barrier);
5626 insn = barrier;
5627 }
5628 }
5629 free_alloc_pool (label_ref_list_pool);
5630 for (insn = first; insn; insn = NEXT_INSN (insn))
5631 PUT_MODE (insn, VOIDmode);
5632
5633 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
5634 INSN_ADDRESSES_FREE ();
5635 split_branches (first);
5636
5637 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
5638 also has an effect on the register that holds the address of the sfunc.
5639 Insert an extra dummy insn in front of each sfunc that pretends to
5640 use this register. */
5641 if (flag_delayed_branch)
5642 {
5643 for (insn = first; insn; insn = NEXT_INSN (insn))
5644 {
5645 rtx reg = sfunc_uses_reg (insn);
5646
5647 if (! reg)
5648 continue;
5649 emit_insn_before (gen_use_sfunc_addr (reg), insn);
5650 }
5651 }
5652 #if 0
5653 /* fpscr is not actually a user variable, but we pretend it is for the
5654 sake of the previous optimization passes, since we want it handled like
5655 one. However, we don't have any debugging information for it, so turn
5656 it into a non-user variable now. */
5657 if (TARGET_SH4)
5658 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
5659 #endif
5660 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
5661 }
5662
5663 int
5664 get_dest_uid (rtx label, int max_uid)
5665 {
5666 rtx dest = next_real_insn (label);
5667 int dest_uid;
5668 if (! dest)
5669 /* This can happen for an undefined label. */
5670 return 0;
5671 dest_uid = INSN_UID (dest);
5672 /* If this is a newly created branch redirection blocking instruction,
5673 we cannot index the branch_uid or insn_addresses arrays with its
5674 uid. But then, we won't need to, because the actual destination is
5675 the following branch. */
5676 while (dest_uid >= max_uid)
5677 {
5678 dest = NEXT_INSN (dest);
5679 dest_uid = INSN_UID (dest);
5680 }
5681 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
5682 return 0;
5683 return dest_uid;
5684 }
5685
5686 /* Split condbranches that are out of range. Also add clobbers for
5687 scratch registers that are needed in far jumps.
5688 We do this before delay slot scheduling, so that it can take our
5689 newly created instructions into account. It also allows us to
5690 find branches with common targets more easily. */
5691
5692 static void
5693 split_branches (rtx first)
5694 {
5695 rtx insn;
5696 struct far_branch **uid_branch, *far_branch_list = 0;
5697 int max_uid = get_max_uid ();
5698 int ok;
5699
5700 /* Find out which branches are out of range. */
5701 shorten_branches (first);
5702
5703 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
5704 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
5705
5706 for (insn = first; insn; insn = NEXT_INSN (insn))
5707 if (! INSN_P (insn))
5708 continue;
5709 else if (INSN_DELETED_P (insn))
5710 {
5711 /* Shorten_branches would split this instruction again,
5712 so transform it into a note. */
5713 SET_INSN_DELETED (insn);
5714 }
5715 else if (JUMP_P (insn)
5716 /* Don't mess with ADDR_DIFF_VEC */
5717 && (GET_CODE (PATTERN (insn)) == SET
5718 || GET_CODE (PATTERN (insn)) == RETURN))
5719 {
5720 enum attr_type type = get_attr_type (insn);
5721 if (type == TYPE_CBRANCH)
5722 {
5723 rtx next, beyond;
5724
5725 if (get_attr_length (insn) > 4)
5726 {
5727 rtx src = SET_SRC (PATTERN (insn));
5728 rtx olabel = XEXP (XEXP (src, 1), 0);
5729 int addr = INSN_ADDRESSES (INSN_UID (insn));
5730 rtx label = 0;
5731 int dest_uid = get_dest_uid (olabel, max_uid);
5732 struct far_branch *bp = uid_branch[dest_uid];
5733
5734 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
5735 the label if the LABEL_NUSES count drops to zero. There is
5736 always a jump_optimize pass that sets these values, but it
5737 proceeds to delete unreferenced code, and then if not
5738 optimizing, to un-delete the deleted instructions, thus
5739 leaving labels with too low uses counts. */
5740 if (! optimize)
5741 {
5742 JUMP_LABEL (insn) = olabel;
5743 LABEL_NUSES (olabel)++;
5744 }
5745 if (! bp)
5746 {
5747 bp = (struct far_branch *) alloca (sizeof *bp);
5748 uid_branch[dest_uid] = bp;
5749 bp->prev = far_branch_list;
5750 far_branch_list = bp;
5751 bp->far_label
5752 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
5753 LABEL_NUSES (bp->far_label)++;
5754 }
5755 else
5756 {
5757 label = bp->near_label;
5758 if (! label && bp->address - addr >= CONDJUMP_MIN)
5759 {
5760 rtx block = bp->insert_place;
5761
5762 if (GET_CODE (PATTERN (block)) == RETURN)
5763 block = PREV_INSN (block);
5764 else
5765 block = gen_block_redirect (block,
5766 bp->address, 2);
5767 label = emit_label_after (gen_label_rtx (),
5768 PREV_INSN (block));
5769 bp->near_label = label;
5770 }
5771 else if (label && ! NEXT_INSN (label))
5772 {
5773 if (addr + 2 - bp->address <= CONDJUMP_MAX)
5774 bp->insert_place = insn;
5775 else
5776 gen_far_branch (bp);
5777 }
5778 }
5779 if (! label
5780 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
5781 {
5782 bp->near_label = label = gen_label_rtx ();
5783 bp->insert_place = insn;
5784 bp->address = addr;
5785 }
5786 ok = redirect_jump (insn, label, 0);
5787 gcc_assert (ok);
5788 }
5789 else
5790 {
5791 /* get_attr_length (insn) == 2 */
5792 /* Check if we have a pattern where reorg wants to redirect
5793 the branch to a label from an unconditional branch that
5794 is too far away. */
5795 /* We can't use JUMP_LABEL here because it might be undefined
5796 when not optimizing. */
5797 /* A syntax error might cause beyond to be NULL_RTX. */
5798 beyond
5799 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
5800 0));
5801
5802 if (beyond
5803 && (JUMP_P (beyond)
5804 || ((beyond = next_active_insn (beyond))
5805 && JUMP_P (beyond)))
5806 && GET_CODE (PATTERN (beyond)) == SET
5807 && recog_memoized (beyond) == CODE_FOR_jump_compact
5808 && ((INSN_ADDRESSES
5809 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
5810 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5811 > 252 + 258 + 2))
5812 gen_block_redirect (beyond,
5813 INSN_ADDRESSES (INSN_UID (beyond)), 1);
5814 }
5815
5816 next = next_active_insn (insn);
5817
5818 if ((JUMP_P (next)
5819 || ((next = next_active_insn (next))
5820 && JUMP_P (next)))
5821 && GET_CODE (PATTERN (next)) == SET
5822 && recog_memoized (next) == CODE_FOR_jump_compact
5823 && ((INSN_ADDRESSES
5824 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
5825 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5826 > 252 + 258 + 2))
5827 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
5828 }
5829 else if (type == TYPE_JUMP || type == TYPE_RETURN)
5830 {
5831 int addr = INSN_ADDRESSES (INSN_UID (insn));
5832 rtx far_label = 0;
5833 int dest_uid = 0;
5834 struct far_branch *bp;
5835
5836 if (type == TYPE_JUMP)
5837 {
5838 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
5839 dest_uid = get_dest_uid (far_label, max_uid);
5840 if (! dest_uid)
5841 {
5842 /* Parse errors can lead to labels outside
5843 the insn stream. */
5844 if (! NEXT_INSN (far_label))
5845 continue;
5846
5847 if (! optimize)
5848 {
5849 JUMP_LABEL (insn) = far_label;
5850 LABEL_NUSES (far_label)++;
5851 }
5852 redirect_jump (insn, NULL_RTX, 1);
5853 far_label = 0;
5854 }
5855 }
5856 bp = uid_branch[dest_uid];
5857 if (! bp)
5858 {
5859 bp = (struct far_branch *) alloca (sizeof *bp);
5860 uid_branch[dest_uid] = bp;
5861 bp->prev = far_branch_list;
5862 far_branch_list = bp;
5863 bp->near_label = 0;
5864 bp->far_label = far_label;
5865 if (far_label)
5866 LABEL_NUSES (far_label)++;
5867 }
5868 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
5869 if (addr - bp->address <= CONDJUMP_MAX)
5870 emit_label_after (bp->near_label, PREV_INSN (insn));
5871 else
5872 {
5873 gen_far_branch (bp);
5874 bp->near_label = 0;
5875 }
5876 else
5877 bp->near_label = 0;
5878 bp->address = addr;
5879 bp->insert_place = insn;
5880 if (! far_label)
5881 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
5882 else
5883 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
5884 }
5885 }
5886 /* Generate all pending far branches,
5887 and free our references to the far labels. */
5888 while (far_branch_list)
5889 {
5890 if (far_branch_list->near_label
5891 && ! NEXT_INSN (far_branch_list->near_label))
5892 gen_far_branch (far_branch_list);
5893 if (optimize
5894 && far_branch_list->far_label
5895 && ! --LABEL_NUSES (far_branch_list->far_label))
5896 delete_insn (far_branch_list->far_label);
5897 far_branch_list = far_branch_list->prev;
5898 }
5899
5900 /* Instruction length information is no longer valid due to the new
5901 instructions that have been generated. */
5902 init_insn_lengths ();
5903 }
5904
5905 /* Dump out instruction addresses, which is useful for debugging the
5906 constant pool table stuff.
5907
5908 If relaxing, output the label and pseudo-ops used to link together
5909 calls and the instruction which set the registers. */
5910
5911 /* ??? The addresses printed by this routine for insns are nonsense for
5912 insns which are inside of a sequence where none of the inner insns have
5913 variable length. This is because the second pass of shorten_branches
5914 does not bother to update them. */
5915
5916 void
5917 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
5918 int noperands ATTRIBUTE_UNUSED)
5919 {
5920 if (TARGET_DUMPISIZE)
5921 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
5922
5923 if (TARGET_RELAX)
5924 {
5925 rtx note;
5926
5927 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
5928 if (note)
5929 {
5930 rtx pattern;
5931
5932 pattern = PATTERN (insn);
5933 if (GET_CODE (pattern) == PARALLEL)
5934 pattern = XVECEXP (pattern, 0, 0);
5935 switch (GET_CODE (pattern))
5936 {
5937 case SET:
5938 if (GET_CODE (SET_SRC (pattern)) != CALL
5939 && get_attr_type (insn) != TYPE_SFUNC)
5940 {
5941 targetm.asm_out.internal_label
5942 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
5943 break;
5944 }
5945 /* else FALLTHROUGH */
5946 case CALL:
5947 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
5948 CODE_LABEL_NUMBER (XEXP (note, 0)));
5949 break;
5950
5951 default:
5952 gcc_unreachable ();
5953 }
5954 }
5955 }
5956 }
5957
5958 /* Dump out any constants accumulated in the final pass. These will
5959 only be labels. */
5960
5961 const char *
5962 output_jump_label_table (void)
5963 {
5964 int i;
5965
5966 if (pool_size)
5967 {
5968 fprintf (asm_out_file, "\t.align 2\n");
5969 for (i = 0; i < pool_size; i++)
5970 {
5971 pool_node *p = &pool_vector[i];
5972
5973 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5974 CODE_LABEL_NUMBER (p->label));
5975 output_asm_insn (".long %O0", &p->value);
5976 }
5977 pool_size = 0;
5978 }
5979
5980 return "";
5981 }
5982 \f
5983 /* A full frame looks like:
5984
5985 arg-5
5986 arg-4
5987 [ if current_function_anonymous_args
5988 arg-3
5989 arg-2
5990 arg-1
5991 arg-0 ]
5992 saved-fp
5993 saved-r10
5994 saved-r11
5995 saved-r12
5996 saved-pr
5997 local-n
5998 ..
5999 local-1
6000 local-0 <- fp points here. */
6001
6002 /* Number of bytes pushed for anonymous args, used to pass information
6003 between expand_prologue and expand_epilogue. */
6004
6005 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6006 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6007 for an epilogue and a negative value means that it's for a sibcall
6008 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6009 all the registers that are about to be restored, and hence dead. */
6010
6011 static void
6012 output_stack_adjust (int size, rtx reg, int epilogue_p,
6013 HARD_REG_SET *live_regs_mask)
6014 {
6015 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
6016 if (size)
6017 {
6018 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6019
6020 /* This test is bogus, as output_stack_adjust is used to re-align the
6021 stack. */
6022 #if 0
6023 gcc_assert (!(size % align));
6024 #endif
6025
6026 if (CONST_OK_FOR_ADD (size))
6027 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6028 /* Try to do it with two partial adjustments; however, we must make
6029 sure that the stack is properly aligned at all times, in case
6030 an interrupt occurs between the two partial adjustments. */
6031 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6032 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6033 {
6034 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6035 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6036 }
6037 else
6038 {
6039 rtx const_reg;
6040 rtx insn;
6041 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6042 int i;
6043
6044 /* If TEMP is invalid, we could temporarily save a general
6045 register to MACL. However, there is currently no need
6046 to handle this case, so just die when we see it. */
6047 if (epilogue_p < 0
6048 || current_function_interrupt
6049 || ! call_really_used_regs[temp] || fixed_regs[temp])
6050 temp = -1;
6051 if (temp < 0 && ! current_function_interrupt
6052 && (TARGET_SHMEDIA || epilogue_p >= 0))
6053 {
6054 HARD_REG_SET temps;
6055 COPY_HARD_REG_SET (temps, call_used_reg_set);
6056 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6057 if (epilogue_p > 0)
6058 {
6059 int nreg = 0;
6060 if (crtl->return_rtx)
6061 {
6062 enum machine_mode mode;
6063 mode = GET_MODE (crtl->return_rtx);
6064 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6065 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6066 }
6067 for (i = 0; i < nreg; i++)
6068 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6069 if (crtl->calls_eh_return)
6070 {
6071 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6072 for (i = 0; i <= 3; i++)
6073 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6074 }
6075 }
6076 if (TARGET_SHMEDIA && epilogue_p < 0)
6077 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
6078 CLEAR_HARD_REG_BIT (temps, i);
6079 if (epilogue_p <= 0)
6080 {
6081 for (i = FIRST_PARM_REG;
6082 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6083 CLEAR_HARD_REG_BIT (temps, i);
6084 if (cfun->static_chain_decl != NULL)
6085 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6086 }
6087 temp = scavenge_reg (&temps);
6088 }
6089 if (temp < 0 && live_regs_mask)
6090 {
6091 HARD_REG_SET temps;
6092
6093 COPY_HARD_REG_SET (temps, *live_regs_mask);
6094 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6095 temp = scavenge_reg (&temps);
6096 }
6097 if (temp < 0)
6098 {
6099 rtx adj_reg, tmp_reg, mem;
6100
6101 /* If we reached here, the most likely case is the (sibcall)
6102 epilogue for non SHmedia. Put a special push/pop sequence
6103 for such case as the last resort. This looks lengthy but
6104 would not be problem because it seems to be very
6105 rare. */
6106
6107 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
6108
6109
6110 /* ??? There is still the slight possibility that r4 or
6111 r5 have been reserved as fixed registers or assigned
6112 as global registers, and they change during an
6113 interrupt. There are possible ways to handle this:
6114
6115 - If we are adjusting the frame pointer (r14), we can do
6116 with a single temp register and an ordinary push / pop
6117 on the stack.
6118 - Grab any call-used or call-saved registers (i.e. not
6119 fixed or globals) for the temps we need. We might
6120 also grab r14 if we are adjusting the stack pointer.
6121 If we can't find enough available registers, issue
6122 a diagnostic and die - the user must have reserved
6123 way too many registers.
6124 But since all this is rather unlikely to happen and
6125 would require extra testing, we just die if r4 / r5
6126 are not available. */
6127 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6128 && !global_regs[4] && !global_regs[5]);
6129
6130 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6131 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6132 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6133 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6134 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6135 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6136 emit_move_insn (mem, tmp_reg);
6137 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6138 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6139 emit_move_insn (mem, tmp_reg);
6140 emit_move_insn (reg, adj_reg);
6141 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6142 emit_move_insn (adj_reg, mem);
6143 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6144 emit_move_insn (tmp_reg, mem);
6145 /* Tell flow the insns that pop r4/r5 aren't dead. */
6146 emit_use (tmp_reg);
6147 emit_use (adj_reg);
6148 return;
6149 }
6150 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6151
6152 /* If SIZE is negative, subtract the positive value.
6153 This sometimes allows a constant pool entry to be shared
6154 between prologue and epilogue code. */
6155 if (size < 0)
6156 {
6157 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6158 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6159 }
6160 else
6161 {
6162 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6163 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6164 }
6165 if (! epilogue_p)
6166 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6167 gen_rtx_SET (VOIDmode, reg,
6168 gen_rtx_PLUS (SImode, reg,
6169 GEN_INT (size))));
6170 }
6171 }
6172 }
6173
6174 static rtx
6175 frame_insn (rtx x)
6176 {
6177 x = emit_insn (x);
6178 RTX_FRAME_RELATED_P (x) = 1;
6179 return x;
6180 }
6181
6182 /* Output RTL to push register RN onto the stack. */
6183
6184 static rtx
6185 push (int rn)
6186 {
6187 rtx x;
6188 if (rn == FPUL_REG)
6189 x = gen_push_fpul ();
6190 else if (rn == FPSCR_REG)
6191 x = gen_push_fpscr ();
6192 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6193 && FP_OR_XD_REGISTER_P (rn))
6194 {
6195 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6196 return NULL_RTX;
6197 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6198 }
6199 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6200 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6201 else
6202 x = gen_push (gen_rtx_REG (SImode, rn));
6203
6204 x = frame_insn (x);
6205 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6206 return x;
6207 }
6208
6209 /* Output RTL to pop register RN from the stack. */
6210
6211 static void
6212 pop (int rn)
6213 {
6214 rtx x;
6215 if (rn == FPUL_REG)
6216 x = gen_pop_fpul ();
6217 else if (rn == FPSCR_REG)
6218 x = gen_pop_fpscr ();
6219 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6220 && FP_OR_XD_REGISTER_P (rn))
6221 {
6222 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6223 return;
6224 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6225 }
6226 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6227 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6228 else
6229 x = gen_pop (gen_rtx_REG (SImode, rn));
6230
6231 x = emit_insn (x);
6232 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6233 }
6234
6235 /* Generate code to push the regs specified in the mask. */
6236
6237 static void
6238 push_regs (HARD_REG_SET *mask, int interrupt_handler)
6239 {
6240 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
6241 int skip_fpscr = 0;
6242
6243 /* Push PR last; this gives better latencies after the prologue, and
6244 candidates for the return delay slot when there are no general
6245 registers pushed. */
6246 for (; i < FIRST_PSEUDO_REGISTER; i++)
6247 {
6248 /* If this is an interrupt handler, and the SZ bit varies,
6249 and we have to push any floating point register, we need
6250 to switch to the correct precision first. */
6251 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
6252 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
6253 {
6254 HARD_REG_SET unsaved;
6255
6256 push (FPSCR_REG);
6257 COMPL_HARD_REG_SET (unsaved, *mask);
6258 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
6259 skip_fpscr = 1;
6260 }
6261 if (i != PR_REG
6262 && (i != FPSCR_REG || ! skip_fpscr)
6263 && TEST_HARD_REG_BIT (*mask, i))
6264 {
6265 /* If the ISR has RESBANK attribute assigned, don't push any of
6266 the following registers - R0-R14, MACH, MACL and GBR. */
6267 if (! (sh_cfun_resbank_handler_p ()
6268 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
6269 || i == MACH_REG
6270 || i == MACL_REG
6271 || i == GBR_REG)))
6272 push (i);
6273 }
6274 }
6275
6276 /* Push banked registers last to improve delay slot opportunities. */
6277 if (interrupt_handler)
6278 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6279 if (TEST_HARD_REG_BIT (*mask, i))
6280 push (i);
6281
6282 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
6283 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
6284 push (PR_REG);
6285 }
6286
6287 /* Calculate how much extra space is needed to save all callee-saved
6288 target registers.
6289 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6290
6291 static int
6292 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
6293 {
6294 int reg;
6295 int stack_space = 0;
6296 int interrupt_handler = sh_cfun_interrupt_handler_p ();
6297
6298 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6299 if ((! call_really_used_regs[reg] || interrupt_handler)
6300 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6301 /* Leave space to save this target register on the stack,
6302 in case target register allocation wants to use it. */
6303 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6304 return stack_space;
6305 }
6306
6307 /* Decide whether we should reserve space for callee-save target registers,
6308 in case target register allocation wants to use them. REGS_SAVED is
6309 the space, in bytes, that is already required for register saves.
6310 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6311
6312 static int
6313 shmedia_reserve_space_for_target_registers_p (int regs_saved,
6314 HARD_REG_SET *live_regs_mask)
6315 {
6316 if (optimize_size)
6317 return 0;
6318 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
6319 }
6320
6321 /* Decide how much space to reserve for callee-save target registers
6322 in case target register allocation wants to use them.
6323 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6324
6325 static int
6326 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
6327 {
6328 if (shmedia_space_reserved_for_target_registers)
6329 return shmedia_target_regs_stack_space (live_regs_mask);
6330 else
6331 return 0;
6332 }
6333
6334 /* Work out the registers which need to be saved, both as a mask and a
6335 count of saved words. Return the count.
6336
6337 If doing a pragma interrupt function, then push all regs used by the
6338 function, and if we call another function (we can tell by looking at PR),
6339 make sure that all the regs it clobbers are safe too. */
6340
6341 static int
6342 calc_live_regs (HARD_REG_SET *live_regs_mask)
6343 {
6344 unsigned int reg;
6345 int count;
6346 tree attrs;
6347 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
6348 bool nosave_low_regs;
6349 int pr_live, has_call;
6350
6351 attrs = DECL_ATTRIBUTES (current_function_decl);
6352 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
6353 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
6354 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
6355 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
6356
6357 CLEAR_HARD_REG_SET (*live_regs_mask);
6358 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
6359 && df_regs_ever_live_p (FPSCR_REG))
6360 target_flags &= ~MASK_FPU_SINGLE;
6361 /* If we can save a lot of saves by switching to double mode, do that. */
6362 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
6363 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
6364 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
6365 && (! call_really_used_regs[reg]
6366 || interrupt_handler)
6367 && ++count > 2)
6368 {
6369 target_flags &= ~MASK_FPU_SINGLE;
6370 break;
6371 }
6372 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
6373 knows how to use it. That means the pseudo originally allocated for
6374 the initial value can become the PR_MEDIA_REG hard register, as seen for
6375 execute/20010122-1.c:test9. */
6376 if (TARGET_SHMEDIA)
6377 /* ??? this function is called from initial_elimination_offset, hence we
6378 can't use the result of sh_media_register_for_return here. */
6379 pr_live = sh_pr_n_sets ();
6380 else
6381 {
6382 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
6383 pr_live = (pr_initial
6384 ? (!REG_P (pr_initial)
6385 || REGNO (pr_initial) != (PR_REG))
6386 : df_regs_ever_live_p (PR_REG));
6387 /* For Shcompact, if not optimizing, we end up with a memory reference
6388 using the return address pointer for __builtin_return_address even
6389 though there is no actual need to put the PR register on the stack. */
6390 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
6391 }
6392 /* Force PR to be live if the prologue has to call the SHmedia
6393 argument decoder or register saver. */
6394 if (TARGET_SHCOMPACT
6395 && ((crtl->args.info.call_cookie
6396 & ~ CALL_COOKIE_RET_TRAMP (1))
6397 || crtl->saves_all_registers))
6398 pr_live = 1;
6399 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
6400 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
6401 {
6402 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
6403 ? pr_live
6404 : interrupt_handler
6405 ? (/* Need to save all the regs ever live. */
6406 (df_regs_ever_live_p (reg)
6407 || (call_really_used_regs[reg]
6408 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
6409 || reg == PIC_OFFSET_TABLE_REGNUM)
6410 && has_call)
6411 || (TARGET_SHMEDIA && has_call
6412 && REGISTER_NATURAL_MODE (reg) == SImode
6413 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
6414 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
6415 && reg != RETURN_ADDRESS_POINTER_REGNUM
6416 && reg != T_REG && reg != GBR_REG
6417 /* Push fpscr only on targets which have FPU */
6418 && (reg != FPSCR_REG || TARGET_FPU_ANY))
6419 : (/* Only push those regs which are used and need to be saved. */
6420 (TARGET_SHCOMPACT
6421 && flag_pic
6422 && crtl->args.info.call_cookie
6423 && reg == PIC_OFFSET_TABLE_REGNUM)
6424 || (df_regs_ever_live_p (reg)
6425 && ((!call_really_used_regs[reg]
6426 && !(reg != PIC_OFFSET_TABLE_REGNUM
6427 && fixed_regs[reg] && call_used_regs[reg]))
6428 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
6429 || (crtl->calls_eh_return
6430 && (reg == EH_RETURN_DATA_REGNO (0)
6431 || reg == EH_RETURN_DATA_REGNO (1)
6432 || reg == EH_RETURN_DATA_REGNO (2)
6433 || reg == EH_RETURN_DATA_REGNO (3)))
6434 || ((reg == MACL_REG || reg == MACH_REG)
6435 && df_regs_ever_live_p (reg)
6436 && sh_cfun_attr_renesas_p ())
6437 ))
6438 {
6439 SET_HARD_REG_BIT (*live_regs_mask, reg);
6440 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6441
6442 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
6443 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
6444 {
6445 if (FP_REGISTER_P (reg))
6446 {
6447 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
6448 {
6449 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
6450 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
6451 }
6452 }
6453 else if (XD_REGISTER_P (reg))
6454 {
6455 /* Must switch to double mode to access these registers. */
6456 target_flags &= ~MASK_FPU_SINGLE;
6457 }
6458 }
6459 }
6460 if (nosave_low_regs && reg == R8_REG)
6461 break;
6462 }
6463 /* If we have a target register optimization pass after prologue / epilogue
6464 threading, we need to assume all target registers will be live even if
6465 they aren't now. */
6466 if (flag_branch_target_load_optimize2
6467 && TARGET_SAVE_ALL_TARGET_REGS
6468 && shmedia_space_reserved_for_target_registers)
6469 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6470 if ((! call_really_used_regs[reg] || interrupt_handler)
6471 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6472 {
6473 SET_HARD_REG_BIT (*live_regs_mask, reg);
6474 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6475 }
6476 /* If this is an interrupt handler, we don't have any call-clobbered
6477 registers we can conveniently use for target register save/restore.
6478 Make sure we save at least one general purpose register when we need
6479 to save target registers. */
6480 if (interrupt_handler
6481 && hard_reg_set_intersect_p (*live_regs_mask,
6482 reg_class_contents[TARGET_REGS])
6483 && ! hard_reg_set_intersect_p (*live_regs_mask,
6484 reg_class_contents[GENERAL_REGS]))
6485 {
6486 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
6487 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
6488 }
6489
6490 return count;
6491 }
6492
6493 /* Code to generate prologue and epilogue sequences */
6494
6495 /* PUSHED is the number of bytes that are being pushed on the
6496 stack for register saves. Return the frame size, padded
6497 appropriately so that the stack stays properly aligned. */
6498 static HOST_WIDE_INT
6499 rounded_frame_size (int pushed)
6500 {
6501 HOST_WIDE_INT size = get_frame_size ();
6502 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6503
6504 return ((size + pushed + align - 1) & -align) - pushed;
6505 }
6506
6507 /* Choose a call-clobbered target-branch register that remains
6508 unchanged along the whole function. We set it up as the return
6509 value in the prologue. */
6510 int
6511 sh_media_register_for_return (void)
6512 {
6513 int regno;
6514 int tr0_used;
6515
6516 if (! current_function_is_leaf)
6517 return -1;
6518 if (lookup_attribute ("interrupt_handler",
6519 DECL_ATTRIBUTES (current_function_decl)))
6520 return -1;
6521 if (sh_cfun_interrupt_handler_p ())
6522 return -1;
6523
6524 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
6525
6526 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
6527 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
6528 return regno;
6529
6530 return -1;
6531 }
6532
6533 /* The maximum registers we need to save are:
6534 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
6535 - 32 floating point registers (for each pair, we save none,
6536 one single precision value, or a double precision value).
6537 - 8 target registers
6538 - add 1 entry for a delimiter. */
6539 #define MAX_SAVED_REGS (62+32+8)
6540
6541 typedef struct save_entry_s
6542 {
6543 unsigned char reg;
6544 unsigned char mode;
6545 short offset;
6546 } save_entry;
6547
6548 #define MAX_TEMPS 4
6549
6550 /* There will be a delimiter entry with VOIDmode both at the start and the
6551 end of a filled in schedule. The end delimiter has the offset of the
6552 save with the smallest (i.e. most negative) offset. */
6553 typedef struct save_schedule_s
6554 {
6555 save_entry entries[MAX_SAVED_REGS + 2];
6556 int temps[MAX_TEMPS+1];
6557 } save_schedule;
6558
6559 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
6560 use reverse order. Returns the last entry written to (not counting
6561 the delimiter). OFFSET_BASE is a number to be added to all offset
6562 entries. */
6563
6564 static save_entry *
6565 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
6566 int offset_base)
6567 {
6568 int align, i;
6569 save_entry *entry = schedule->entries;
6570 int tmpx = 0;
6571 int offset;
6572
6573 if (! current_function_interrupt)
6574 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
6575 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
6576 && ! FUNCTION_ARG_REGNO_P (i)
6577 && i != FIRST_RET_REG
6578 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
6579 && ! (crtl->calls_eh_return
6580 && (i == EH_RETURN_STACKADJ_REGNO
6581 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
6582 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
6583 schedule->temps[tmpx++] = i;
6584 entry->reg = -1;
6585 entry->mode = VOIDmode;
6586 entry->offset = offset_base;
6587 entry++;
6588 /* We loop twice: first, we save 8-byte aligned registers in the
6589 higher addresses, that are known to be aligned. Then, we
6590 proceed to saving 32-bit registers that don't need 8-byte
6591 alignment.
6592 If this is an interrupt function, all registers that need saving
6593 need to be saved in full. moreover, we need to postpone saving
6594 target registers till we have saved some general purpose registers
6595 we can then use as scratch registers. */
6596 offset = offset_base;
6597 for (align = 1; align >= 0; align--)
6598 {
6599 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6600 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6601 {
6602 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6603 int reg = i;
6604
6605 if (current_function_interrupt)
6606 {
6607 if (TARGET_REGISTER_P (i))
6608 continue;
6609 if (GENERAL_REGISTER_P (i))
6610 mode = DImode;
6611 }
6612 if (mode == SFmode && (i % 2) == 1
6613 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6614 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
6615 {
6616 mode = DFmode;
6617 i--;
6618 reg--;
6619 }
6620
6621 /* If we're doing the aligned pass and this is not aligned,
6622 or we're doing the unaligned pass and this is aligned,
6623 skip it. */
6624 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
6625 != align)
6626 continue;
6627
6628 if (current_function_interrupt
6629 && GENERAL_REGISTER_P (i)
6630 && tmpx < MAX_TEMPS)
6631 schedule->temps[tmpx++] = i;
6632
6633 offset -= GET_MODE_SIZE (mode);
6634 entry->reg = i;
6635 entry->mode = mode;
6636 entry->offset = offset;
6637 entry++;
6638 }
6639 if (align && current_function_interrupt)
6640 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
6641 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6642 {
6643 offset -= GET_MODE_SIZE (DImode);
6644 entry->reg = i;
6645 entry->mode = DImode;
6646 entry->offset = offset;
6647 entry++;
6648 }
6649 }
6650 entry->reg = -1;
6651 entry->mode = VOIDmode;
6652 entry->offset = offset;
6653 schedule->temps[tmpx] = -1;
6654 return entry - 1;
6655 }
6656
6657 void
6658 sh_expand_prologue (void)
6659 {
6660 HARD_REG_SET live_regs_mask;
6661 int d, i;
6662 int d_rounding = 0;
6663 int save_flags = target_flags;
6664 int pretend_args;
6665 tree sp_switch_attr
6666 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
6667
6668 current_function_interrupt = sh_cfun_interrupt_handler_p ();
6669
6670 /* We have pretend args if we had an object sent partially in registers
6671 and partially on the stack, e.g. a large structure. */
6672 pretend_args = crtl->args.pretend_args_size;
6673 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
6674 && (NPARM_REGS(SImode)
6675 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
6676 pretend_args = 0;
6677 output_stack_adjust (-pretend_args
6678 - crtl->args.info.stack_regs * 8,
6679 stack_pointer_rtx, 0, NULL);
6680
6681 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
6682 /* We're going to use the PIC register to load the address of the
6683 incoming-argument decoder and/or of the return trampoline from
6684 the GOT, so make sure the PIC register is preserved and
6685 initialized. */
6686 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
6687
6688 if (TARGET_SHCOMPACT
6689 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6690 {
6691 int reg;
6692
6693 /* First, make all registers with incoming arguments that will
6694 be pushed onto the stack live, so that register renaming
6695 doesn't overwrite them. */
6696 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
6697 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
6698 >= NPARM_REGS (SImode) - reg)
6699 for (; reg < NPARM_REGS (SImode); reg++)
6700 emit_insn (gen_shcompact_preserve_incoming_args
6701 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6702 else if (CALL_COOKIE_INT_REG_GET
6703 (crtl->args.info.call_cookie, reg) == 1)
6704 emit_insn (gen_shcompact_preserve_incoming_args
6705 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6706
6707 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
6708 stack_pointer_rtx);
6709 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
6710 GEN_INT (crtl->args.info.call_cookie));
6711 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
6712 gen_rtx_REG (SImode, R0_REG));
6713 }
6714 else if (TARGET_SHMEDIA)
6715 {
6716 int tr = sh_media_register_for_return ();
6717
6718 if (tr >= 0)
6719 emit_move_insn (gen_rtx_REG (DImode, tr),
6720 gen_rtx_REG (DImode, PR_MEDIA_REG));
6721 }
6722
6723 /* Emit the code for SETUP_VARARGS. */
6724 if (cfun->stdarg)
6725 {
6726 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6727 {
6728 /* Push arg regs as if they'd been provided by caller in stack. */
6729 for (i = 0; i < NPARM_REGS(SImode); i++)
6730 {
6731 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
6732 rtx insn;
6733
6734 if (i >= (NPARM_REGS(SImode)
6735 - crtl->args.info.arg_count[(int) SH_ARG_INT]
6736 ))
6737 break;
6738 insn = push (rn);
6739 }
6740 }
6741 }
6742
6743 /* If we're supposed to switch stacks at function entry, do so now. */
6744 if (sp_switch_attr)
6745 {
6746 /* The argument specifies a variable holding the address of the
6747 stack the interrupt function should switch to/from at entry/exit. */
6748 const char *s
6749 = ggc_strdup (TREE_STRING_POINTER (TREE_VALUE (sp_switch_attr)));
6750 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
6751
6752 emit_insn (gen_sp_switch_1 (sp_switch));
6753 }
6754
6755 d = calc_live_regs (&live_regs_mask);
6756 /* ??? Maybe we could save some switching if we can move a mode switch
6757 that already happens to be at the function start into the prologue. */
6758 if (target_flags != save_flags && ! current_function_interrupt)
6759 emit_insn (gen_toggle_sz ());
6760
6761 if (TARGET_SH5)
6762 {
6763 int offset_base, offset;
6764 rtx r0 = NULL_RTX;
6765 int offset_in_r0 = -1;
6766 int sp_in_r0 = 0;
6767 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6768 int total_size, save_size;
6769 save_schedule schedule;
6770 save_entry *entry;
6771 int *tmp_pnt;
6772
6773 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
6774 && ! current_function_interrupt)
6775 r0 = gen_rtx_REG (Pmode, R0_REG);
6776
6777 /* D is the actual number of bytes that we need for saving registers,
6778 however, in initial_elimination_offset we have committed to using
6779 an additional TREGS_SPACE amount of bytes - in order to keep both
6780 addresses to arguments supplied by the caller and local variables
6781 valid, we must keep this gap. Place it between the incoming
6782 arguments and the actually saved registers in a bid to optimize
6783 locality of reference. */
6784 total_size = d + tregs_space;
6785 total_size += rounded_frame_size (total_size);
6786 save_size = total_size - rounded_frame_size (d);
6787 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
6788 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6789 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
6790
6791 /* If adjusting the stack in a single step costs nothing extra, do so.
6792 I.e. either if a single addi is enough, or we need a movi anyway,
6793 and we don't exceed the maximum offset range (the test for the
6794 latter is conservative for simplicity). */
6795 if (TARGET_SHMEDIA
6796 && (CONST_OK_FOR_I10 (-total_size)
6797 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
6798 && total_size <= 2044)))
6799 d_rounding = total_size - save_size;
6800
6801 offset_base = d + d_rounding;
6802
6803 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
6804 0, NULL);
6805
6806 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
6807 tmp_pnt = schedule.temps;
6808 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
6809 {
6810 enum machine_mode mode = (enum machine_mode) entry->mode;
6811 unsigned int reg = entry->reg;
6812 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
6813 rtx orig_reg_rtx;
6814
6815 offset = entry->offset;
6816
6817 reg_rtx = gen_rtx_REG (mode, reg);
6818
6819 mem_rtx = gen_frame_mem (mode,
6820 gen_rtx_PLUS (Pmode,
6821 stack_pointer_rtx,
6822 GEN_INT (offset)));
6823
6824 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
6825 {
6826 gcc_assert (r0);
6827 mem_rtx = NULL_RTX;
6828 }
6829
6830 if (HAVE_PRE_DECREMENT
6831 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
6832 || mem_rtx == NULL_RTX
6833 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6834 {
6835 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
6836
6837 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
6838 pre_dec = NULL_RTX;
6839 else
6840 {
6841 mem_rtx = NULL_RTX;
6842 offset += GET_MODE_SIZE (mode);
6843 }
6844 }
6845
6846 if (mem_rtx != NULL_RTX)
6847 goto addr_ok;
6848
6849 if (offset_in_r0 == -1)
6850 {
6851 emit_move_insn (r0, GEN_INT (offset));
6852 offset_in_r0 = offset;
6853 }
6854 else if (offset != offset_in_r0)
6855 {
6856 emit_move_insn (r0,
6857 gen_rtx_PLUS
6858 (Pmode, r0,
6859 GEN_INT (offset - offset_in_r0)));
6860 offset_in_r0 += offset - offset_in_r0;
6861 }
6862
6863 if (pre_dec != NULL_RTX)
6864 {
6865 if (! sp_in_r0)
6866 {
6867 emit_move_insn (r0,
6868 gen_rtx_PLUS
6869 (Pmode, r0, stack_pointer_rtx));
6870 sp_in_r0 = 1;
6871 }
6872
6873 offset -= GET_MODE_SIZE (mode);
6874 offset_in_r0 -= GET_MODE_SIZE (mode);
6875
6876 mem_rtx = pre_dec;
6877 }
6878 else if (sp_in_r0)
6879 mem_rtx = gen_frame_mem (mode, r0);
6880 else
6881 mem_rtx = gen_frame_mem (mode,
6882 gen_rtx_PLUS (Pmode,
6883 stack_pointer_rtx,
6884 r0));
6885
6886 /* We must not use an r0-based address for target-branch
6887 registers or for special registers without pre-dec
6888 memory addresses, since we store their values in r0
6889 first. */
6890 gcc_assert (!TARGET_REGISTER_P (reg)
6891 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6892 || mem_rtx == pre_dec));
6893
6894 addr_ok:
6895 orig_reg_rtx = reg_rtx;
6896 if (TARGET_REGISTER_P (reg)
6897 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6898 && mem_rtx != pre_dec))
6899 {
6900 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
6901
6902 emit_move_insn (tmp_reg, reg_rtx);
6903
6904 if (REGNO (tmp_reg) == R0_REG)
6905 {
6906 offset_in_r0 = -1;
6907 sp_in_r0 = 0;
6908 gcc_assert (!refers_to_regno_p
6909 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
6910 }
6911
6912 if (*++tmp_pnt <= 0)
6913 tmp_pnt = schedule.temps;
6914
6915 reg_rtx = tmp_reg;
6916 }
6917 {
6918 rtx insn;
6919
6920 /* Mark as interesting for dwarf cfi generator */
6921 insn = emit_move_insn (mem_rtx, reg_rtx);
6922 RTX_FRAME_RELATED_P (insn) = 1;
6923 /* If we use an intermediate register for the save, we can't
6924 describe this exactly in cfi as a copy of the to-be-saved
6925 register into the temporary register and then the temporary
6926 register on the stack, because the temporary register can
6927 have a different natural size than the to-be-saved register.
6928 Thus, we gloss over the intermediate copy and pretend we do
6929 a direct save from the to-be-saved register. */
6930 if (REGNO (reg_rtx) != reg)
6931 {
6932 rtx set;
6933
6934 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
6935 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
6936 }
6937
6938 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
6939 {
6940 rtx reg_rtx = gen_rtx_REG (mode, reg);
6941 rtx set;
6942 rtx mem_rtx = gen_frame_mem (mode,
6943 gen_rtx_PLUS (Pmode,
6944 stack_pointer_rtx,
6945 GEN_INT (offset)));
6946
6947 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
6948 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
6949 }
6950 }
6951 }
6952
6953 gcc_assert (entry->offset == d_rounding);
6954 }
6955 else
6956 push_regs (&live_regs_mask, current_function_interrupt);
6957
6958 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
6959 emit_insn (gen_GOTaddr2picreg ());
6960
6961 if (SHMEDIA_REGS_STACK_ADJUST ())
6962 {
6963 /* This must NOT go through the PLT, otherwise mach and macl
6964 may be clobbered. */
6965 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6966 (TARGET_FPU_ANY
6967 ? "__GCC_push_shmedia_regs"
6968 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
6969 emit_insn (gen_shmedia_save_restore_regs_compact
6970 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
6971 }
6972
6973 if (target_flags != save_flags && ! current_function_interrupt)
6974 emit_insn (gen_toggle_sz ());
6975
6976 target_flags = save_flags;
6977
6978 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
6979 stack_pointer_rtx, 0, NULL);
6980
6981 if (frame_pointer_needed)
6982 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
6983
6984 if (TARGET_SHCOMPACT
6985 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6986 {
6987 /* This must NOT go through the PLT, otherwise mach and macl
6988 may be clobbered. */
6989 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6990 "__GCC_shcompact_incoming_args", SFUNC_GOT);
6991 emit_insn (gen_shcompact_incoming_args ());
6992 }
6993 }
6994
6995 void
6996 sh_expand_epilogue (bool sibcall_p)
6997 {
6998 HARD_REG_SET live_regs_mask;
6999 int d, i;
7000 int d_rounding = 0;
7001
7002 int save_flags = target_flags;
7003 int frame_size, save_size;
7004 int fpscr_deferred = 0;
7005 int e = sibcall_p ? -1 : 1;
7006
7007 d = calc_live_regs (&live_regs_mask);
7008
7009 save_size = d;
7010 frame_size = rounded_frame_size (d);
7011
7012 if (TARGET_SH5)
7013 {
7014 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7015 int total_size;
7016 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
7017 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7018 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
7019
7020 total_size = d + tregs_space;
7021 total_size += rounded_frame_size (total_size);
7022 save_size = total_size - frame_size;
7023
7024 /* If adjusting the stack in a single step costs nothing extra, do so.
7025 I.e. either if a single addi is enough, or we need a movi anyway,
7026 and we don't exceed the maximum offset range (the test for the
7027 latter is conservative for simplicity). */
7028 if (TARGET_SHMEDIA
7029 && ! frame_pointer_needed
7030 && (CONST_OK_FOR_I10 (total_size)
7031 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
7032 && total_size <= 2044)))
7033 d_rounding = frame_size;
7034
7035 frame_size -= d_rounding;
7036 }
7037
7038 if (frame_pointer_needed)
7039 {
7040 /* We must avoid scheduling the epilogue with previous basic blocks.
7041 See PR/18032 and PR/40313. */
7042 emit_insn (gen_blockage ());
7043 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7044 &live_regs_mask);
7045
7046 /* We must avoid moving the stack pointer adjustment past code
7047 which reads from the local frame, else an interrupt could
7048 occur after the SP adjustment and clobber data in the local
7049 frame. */
7050 emit_insn (gen_blockage ());
7051 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7052 }
7053 else if (frame_size)
7054 {
7055 /* We must avoid moving the stack pointer adjustment past code
7056 which reads from the local frame, else an interrupt could
7057 occur after the SP adjustment and clobber data in the local
7058 frame. */
7059 emit_insn (gen_blockage ());
7060 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
7061 }
7062
7063 if (SHMEDIA_REGS_STACK_ADJUST ())
7064 {
7065 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7066 (TARGET_FPU_ANY
7067 ? "__GCC_pop_shmedia_regs"
7068 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
7069 /* This must NOT go through the PLT, otherwise mach and macl
7070 may be clobbered. */
7071 emit_insn (gen_shmedia_save_restore_regs_compact
7072 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
7073 }
7074
7075 /* Pop all the registers. */
7076
7077 if (target_flags != save_flags && ! current_function_interrupt)
7078 emit_insn (gen_toggle_sz ());
7079 if (TARGET_SH5)
7080 {
7081 int offset_base, offset;
7082 int offset_in_r0 = -1;
7083 int sp_in_r0 = 0;
7084 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
7085 save_schedule schedule;
7086 save_entry *entry;
7087 int *tmp_pnt;
7088
7089 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
7090 offset_base = -entry[1].offset + d_rounding;
7091 tmp_pnt = schedule.temps;
7092 for (; entry->mode != VOIDmode; entry--)
7093 {
7094 enum machine_mode mode = (enum machine_mode) entry->mode;
7095 int reg = entry->reg;
7096 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
7097
7098 offset = offset_base + entry->offset;
7099 reg_rtx = gen_rtx_REG (mode, reg);
7100
7101 mem_rtx = gen_frame_mem (mode,
7102 gen_rtx_PLUS (Pmode,
7103 stack_pointer_rtx,
7104 GEN_INT (offset)));
7105
7106 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7107 mem_rtx = NULL_RTX;
7108
7109 if (HAVE_POST_INCREMENT
7110 && (offset == offset_in_r0
7111 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
7112 && mem_rtx == NULL_RTX)
7113 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7114 {
7115 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
7116
7117 if (!memory_address_p (mode, XEXP (post_inc, 0)))
7118 post_inc = NULL_RTX;
7119 else
7120 mem_rtx = NULL_RTX;
7121 }
7122
7123 if (mem_rtx != NULL_RTX)
7124 goto addr_ok;
7125
7126 if (offset_in_r0 == -1)
7127 {
7128 emit_move_insn (r0, GEN_INT (offset));
7129 offset_in_r0 = offset;
7130 }
7131 else if (offset != offset_in_r0)
7132 {
7133 emit_move_insn (r0,
7134 gen_rtx_PLUS
7135 (Pmode, r0,
7136 GEN_INT (offset - offset_in_r0)));
7137 offset_in_r0 += offset - offset_in_r0;
7138 }
7139
7140 if (post_inc != NULL_RTX)
7141 {
7142 if (! sp_in_r0)
7143 {
7144 emit_move_insn (r0,
7145 gen_rtx_PLUS
7146 (Pmode, r0, stack_pointer_rtx));
7147 sp_in_r0 = 1;
7148 }
7149
7150 mem_rtx = post_inc;
7151
7152 offset_in_r0 += GET_MODE_SIZE (mode);
7153 }
7154 else if (sp_in_r0)
7155 mem_rtx = gen_frame_mem (mode, r0);
7156 else
7157 mem_rtx = gen_frame_mem (mode,
7158 gen_rtx_PLUS (Pmode,
7159 stack_pointer_rtx,
7160 r0));
7161
7162 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7163 || mem_rtx == post_inc);
7164
7165 addr_ok:
7166 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7167 && mem_rtx != post_inc)
7168 {
7169 insn = emit_move_insn (r0, mem_rtx);
7170 mem_rtx = r0;
7171 }
7172 else if (TARGET_REGISTER_P (reg))
7173 {
7174 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
7175
7176 /* Give the scheduler a bit of freedom by using up to
7177 MAX_TEMPS registers in a round-robin fashion. */
7178 insn = emit_move_insn (tmp_reg, mem_rtx);
7179 mem_rtx = tmp_reg;
7180 if (*++tmp_pnt < 0)
7181 tmp_pnt = schedule.temps;
7182 }
7183
7184 insn = emit_move_insn (reg_rtx, mem_rtx);
7185 }
7186
7187 gcc_assert (entry->offset + offset_base == d + d_rounding);
7188 }
7189 else /* ! TARGET_SH5 */
7190 {
7191 int last_reg;
7192
7193 save_size = 0;
7194 /* For an ISR with RESBANK attribute assigned, don't pop PR
7195 register. */
7196 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
7197 && !sh_cfun_resbank_handler_p ())
7198 {
7199 if (!frame_pointer_needed)
7200 emit_insn (gen_blockage ());
7201 pop (PR_REG);
7202 }
7203
7204 /* Banked registers are poped first to avoid being scheduled in the
7205 delay slot. RTE switches banks before the ds instruction. */
7206 if (current_function_interrupt)
7207 {
7208 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7209 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7210 pop (LAST_BANKED_REG - i);
7211
7212 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
7213 }
7214 else
7215 last_reg = FIRST_PSEUDO_REGISTER;
7216
7217 for (i = 0; i < last_reg; i++)
7218 {
7219 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
7220
7221 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
7222 && hard_reg_set_intersect_p (live_regs_mask,
7223 reg_class_contents[DF_REGS]))
7224 fpscr_deferred = 1;
7225 /* For an ISR with RESBANK attribute assigned, don't pop
7226 following registers, R0-R14, MACH, MACL and GBR. */
7227 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
7228 && ! (sh_cfun_resbank_handler_p ()
7229 && ((j >= FIRST_GENERAL_REG
7230 && j < LAST_GENERAL_REG)
7231 || j == MACH_REG
7232 || j == MACL_REG
7233 || j == GBR_REG)))
7234 pop (j);
7235
7236 if (j == FIRST_FP_REG && fpscr_deferred)
7237 pop (FPSCR_REG);
7238 }
7239 }
7240 if (target_flags != save_flags && ! current_function_interrupt)
7241 emit_insn (gen_toggle_sz ());
7242 target_flags = save_flags;
7243
7244 output_stack_adjust (crtl->args.pretend_args_size
7245 + save_size + d_rounding
7246 + crtl->args.info.stack_regs * 8,
7247 stack_pointer_rtx, e, NULL);
7248
7249 if (crtl->calls_eh_return)
7250 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
7251 EH_RETURN_STACKADJ_RTX));
7252
7253 /* Switch back to the normal stack if necessary. */
7254 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
7255 emit_insn (gen_sp_switch_2 ());
7256
7257 /* Tell flow the insn that pops PR isn't dead. */
7258 /* PR_REG will never be live in SHmedia mode, and we don't need to
7259 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
7260 by the return pattern. */
7261 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7262 emit_use (gen_rtx_REG (SImode, PR_REG));
7263 }
7264
7265 static int sh_need_epilogue_known = 0;
7266
7267 int
7268 sh_need_epilogue (void)
7269 {
7270 if (! sh_need_epilogue_known)
7271 {
7272 rtx epilogue;
7273
7274 start_sequence ();
7275 sh_expand_epilogue (0);
7276 epilogue = get_insns ();
7277 end_sequence ();
7278 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
7279 }
7280 return sh_need_epilogue_known > 0;
7281 }
7282
7283 /* Emit code to change the current function's return address to RA.
7284 TEMP is available as a scratch register, if needed. */
7285
7286 void
7287 sh_set_return_address (rtx ra, rtx tmp)
7288 {
7289 HARD_REG_SET live_regs_mask;
7290 int d;
7291 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7292 int pr_offset;
7293
7294 d = calc_live_regs (&live_regs_mask);
7295
7296 /* If pr_reg isn't life, we can set it (or the register given in
7297 sh_media_register_for_return) directly. */
7298 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7299 {
7300 rtx rr;
7301
7302 if (TARGET_SHMEDIA)
7303 {
7304 int rr_regno = sh_media_register_for_return ();
7305
7306 if (rr_regno < 0)
7307 rr_regno = pr_reg;
7308
7309 rr = gen_rtx_REG (DImode, rr_regno);
7310 }
7311 else
7312 rr = gen_rtx_REG (SImode, pr_reg);
7313
7314 emit_insn (GEN_MOV (rr, ra));
7315 /* Tell flow the register for return isn't dead. */
7316 emit_use (rr);
7317 return;
7318 }
7319
7320 if (TARGET_SH5)
7321 {
7322 int offset;
7323 save_schedule schedule;
7324 save_entry *entry;
7325
7326 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
7327 offset = entry[1].offset;
7328 for (; entry->mode != VOIDmode; entry--)
7329 if (entry->reg == pr_reg)
7330 goto found;
7331
7332 /* We can't find pr register. */
7333 gcc_unreachable ();
7334
7335 found:
7336 offset = entry->offset - offset;
7337 pr_offset = (rounded_frame_size (d) + offset
7338 + SHMEDIA_REGS_STACK_ADJUST ());
7339 }
7340 else
7341 pr_offset = rounded_frame_size (d);
7342
7343 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
7344 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
7345
7346 tmp = gen_frame_mem (Pmode, tmp);
7347 emit_insn (GEN_MOV (tmp, ra));
7348 /* Tell this store isn't dead. */
7349 emit_use (tmp);
7350 }
7351
7352 /* Clear variables at function end. */
7353
7354 static void
7355 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
7356 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
7357 {
7358 sh_need_epilogue_known = 0;
7359 }
7360
7361 static rtx
7362 sh_builtin_saveregs (void)
7363 {
7364 /* First unnamed integer register. */
7365 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
7366 /* Number of integer registers we need to save. */
7367 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
7368 /* First unnamed SFmode float reg */
7369 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
7370 /* Number of SFmode float regs to save. */
7371 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
7372 rtx regbuf, fpregs;
7373 int bufsize, regno;
7374 alias_set_type alias_set;
7375
7376 if (TARGET_SH5)
7377 {
7378 if (n_intregs)
7379 {
7380 int pushregs = n_intregs;
7381
7382 while (pushregs < NPARM_REGS (SImode) - 1
7383 && (CALL_COOKIE_INT_REG_GET
7384 (crtl->args.info.call_cookie,
7385 NPARM_REGS (SImode) - pushregs)
7386 == 1))
7387 {
7388 crtl->args.info.call_cookie
7389 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7390 - pushregs, 1);
7391 pushregs++;
7392 }
7393
7394 if (pushregs == NPARM_REGS (SImode))
7395 crtl->args.info.call_cookie
7396 |= (CALL_COOKIE_INT_REG (0, 1)
7397 | CALL_COOKIE_STACKSEQ (pushregs - 1));
7398 else
7399 crtl->args.info.call_cookie
7400 |= CALL_COOKIE_STACKSEQ (pushregs);
7401
7402 crtl->args.pretend_args_size += 8 * n_intregs;
7403 }
7404 if (TARGET_SHCOMPACT)
7405 return const0_rtx;
7406 }
7407
7408 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
7409 {
7410 error ("__builtin_saveregs not supported by this subtarget");
7411 return const0_rtx;
7412 }
7413
7414 if (TARGET_SHMEDIA)
7415 n_floatregs = 0;
7416
7417 /* Allocate block of memory for the regs. */
7418 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
7419 Or can assign_stack_local accept a 0 SIZE argument? */
7420 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
7421
7422 if (TARGET_SHMEDIA)
7423 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
7424 else if (n_floatregs & 1)
7425 {
7426 rtx addr;
7427
7428 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7429 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
7430 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
7431 regbuf = change_address (regbuf, BLKmode, addr);
7432 }
7433 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
7434 {
7435 rtx addr, mask;
7436
7437 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7438 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
7439 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
7440 emit_insn (gen_andsi3 (addr, addr, mask));
7441 regbuf = change_address (regbuf, BLKmode, addr);
7442 }
7443 else
7444 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7445 alias_set = get_varargs_alias_set ();
7446 set_mem_alias_set (regbuf, alias_set);
7447
7448 /* Save int args.
7449 This is optimized to only save the regs that are necessary. Explicitly
7450 named args need not be saved. */
7451 if (n_intregs > 0)
7452 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7453 adjust_address (regbuf, BLKmode,
7454 n_floatregs * UNITS_PER_WORD),
7455 n_intregs);
7456
7457 if (TARGET_SHMEDIA)
7458 /* Return the address of the regbuf. */
7459 return XEXP (regbuf, 0);
7460
7461 /* Save float args.
7462 This is optimized to only save the regs that are necessary. Explicitly
7463 named args need not be saved.
7464 We explicitly build a pointer to the buffer because it halves the insn
7465 count when not optimizing (otherwise the pointer is built for each reg
7466 saved).
7467 We emit the moves in reverse order so that we can use predecrement. */
7468
7469 fpregs = copy_to_mode_reg (Pmode,
7470 plus_constant (XEXP (regbuf, 0),
7471 n_floatregs * UNITS_PER_WORD));
7472 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7473 {
7474 rtx mem;
7475 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7476 {
7477 emit_insn (gen_addsi3 (fpregs, fpregs,
7478 GEN_INT (-2 * UNITS_PER_WORD)));
7479 mem = change_address (regbuf, DFmode, fpregs);
7480 emit_move_insn (mem,
7481 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7482 }
7483 regno = first_floatreg;
7484 if (regno & 1)
7485 {
7486 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7487 mem = change_address (regbuf, SFmode, fpregs);
7488 emit_move_insn (mem,
7489 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
7490 - (TARGET_LITTLE_ENDIAN != 0)));
7491 }
7492 }
7493 else
7494 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7495 {
7496 rtx mem;
7497
7498 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7499 mem = change_address (regbuf, SFmode, fpregs);
7500 emit_move_insn (mem,
7501 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7502 }
7503
7504 /* Return the address of the regbuf. */
7505 return XEXP (regbuf, 0);
7506 }
7507
7508 /* Define the `__builtin_va_list' type for the ABI. */
7509
7510 static tree
7511 sh_build_builtin_va_list (void)
7512 {
7513 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7514 tree record;
7515
7516 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
7517 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7518 return ptr_type_node;
7519
7520 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7521
7522 f_next_o = build_decl (BUILTINS_LOCATION,
7523 FIELD_DECL, get_identifier ("__va_next_o"),
7524 ptr_type_node);
7525 f_next_o_limit = build_decl (BUILTINS_LOCATION,
7526 FIELD_DECL,
7527 get_identifier ("__va_next_o_limit"),
7528 ptr_type_node);
7529 f_next_fp = build_decl (BUILTINS_LOCATION,
7530 FIELD_DECL, get_identifier ("__va_next_fp"),
7531 ptr_type_node);
7532 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
7533 FIELD_DECL,
7534 get_identifier ("__va_next_fp_limit"),
7535 ptr_type_node);
7536 f_next_stack = build_decl (BUILTINS_LOCATION,
7537 FIELD_DECL, get_identifier ("__va_next_stack"),
7538 ptr_type_node);
7539
7540 DECL_FIELD_CONTEXT (f_next_o) = record;
7541 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7542 DECL_FIELD_CONTEXT (f_next_fp) = record;
7543 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7544 DECL_FIELD_CONTEXT (f_next_stack) = record;
7545
7546 TYPE_FIELDS (record) = f_next_o;
7547 TREE_CHAIN (f_next_o) = f_next_o_limit;
7548 TREE_CHAIN (f_next_o_limit) = f_next_fp;
7549 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
7550 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
7551
7552 layout_type (record);
7553
7554 return record;
7555 }
7556
7557 /* Implement `va_start' for varargs and stdarg. */
7558
7559 static void
7560 sh_va_start (tree valist, rtx nextarg)
7561 {
7562 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7563 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7564 tree t, u;
7565 int nfp, nint;
7566
7567 if (TARGET_SH5)
7568 {
7569 expand_builtin_saveregs ();
7570 std_expand_builtin_va_start (valist, nextarg);
7571 return;
7572 }
7573
7574 if ((! TARGET_SH2E && ! TARGET_SH4)
7575 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7576 {
7577 std_expand_builtin_va_start (valist, nextarg);
7578 return;
7579 }
7580
7581 f_next_o = TYPE_FIELDS (va_list_type_node);
7582 f_next_o_limit = TREE_CHAIN (f_next_o);
7583 f_next_fp = TREE_CHAIN (f_next_o_limit);
7584 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7585 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7586
7587 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7588 NULL_TREE);
7589 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7590 valist, f_next_o_limit, NULL_TREE);
7591 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7592 NULL_TREE);
7593 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7594 valist, f_next_fp_limit, NULL_TREE);
7595 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7596 valist, f_next_stack, NULL_TREE);
7597
7598 /* Call __builtin_saveregs. */
7599 u = make_tree (sizetype, expand_builtin_saveregs ());
7600 u = fold_convert (ptr_type_node, u);
7601 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
7602 TREE_SIDE_EFFECTS (t) = 1;
7603 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7604
7605 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7606 if (nfp < 8)
7607 nfp = 8 - nfp;
7608 else
7609 nfp = 0;
7610 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7611 size_int (UNITS_PER_WORD * nfp));
7612 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
7613 TREE_SIDE_EFFECTS (t) = 1;
7614 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7615
7616 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
7617 TREE_SIDE_EFFECTS (t) = 1;
7618 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7619
7620 nint = crtl->args.info.arg_count[SH_ARG_INT];
7621 if (nint < 4)
7622 nint = 4 - nint;
7623 else
7624 nint = 0;
7625 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7626 size_int (UNITS_PER_WORD * nint));
7627 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
7628 TREE_SIDE_EFFECTS (t) = 1;
7629 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7630
7631 u = make_tree (ptr_type_node, nextarg);
7632 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
7633 TREE_SIDE_EFFECTS (t) = 1;
7634 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7635 }
7636
7637 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7638 member, return it. */
7639 static tree
7640 find_sole_member (tree type)
7641 {
7642 tree field, member = NULL_TREE;
7643
7644 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7645 {
7646 if (TREE_CODE (field) != FIELD_DECL)
7647 continue;
7648 if (!DECL_SIZE (field))
7649 return NULL_TREE;
7650 if (integer_zerop (DECL_SIZE (field)))
7651 continue;
7652 if (member)
7653 return NULL_TREE;
7654 member = field;
7655 }
7656 return member;
7657 }
7658 /* Implement `va_arg'. */
7659
7660 static tree
7661 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7662 gimple_seq *post_p ATTRIBUTE_UNUSED)
7663 {
7664 HOST_WIDE_INT size, rsize;
7665 tree tmp, pptr_type_node;
7666 tree addr, lab_over = NULL, result = NULL;
7667 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7668 tree eff_type;
7669
7670 if (pass_by_ref)
7671 type = build_pointer_type (type);
7672
7673 size = int_size_in_bytes (type);
7674 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7675 pptr_type_node = build_pointer_type (ptr_type_node);
7676
7677 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
7678 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7679 {
7680 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7681 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7682 int pass_as_float;
7683 tree lab_false;
7684 tree member;
7685
7686 f_next_o = TYPE_FIELDS (va_list_type_node);
7687 f_next_o_limit = TREE_CHAIN (f_next_o);
7688 f_next_fp = TREE_CHAIN (f_next_o_limit);
7689 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7690 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7691
7692 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7693 NULL_TREE);
7694 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7695 valist, f_next_o_limit, NULL_TREE);
7696 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7697 valist, f_next_fp, NULL_TREE);
7698 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7699 valist, f_next_fp_limit, NULL_TREE);
7700 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7701 valist, f_next_stack, NULL_TREE);
7702
7703 /* Structures with a single member with a distinct mode are passed
7704 like their member. This is relevant if the latter has a REAL_TYPE
7705 or COMPLEX_TYPE type. */
7706 eff_type = type;
7707 while (TREE_CODE (eff_type) == RECORD_TYPE
7708 && (member = find_sole_member (eff_type))
7709 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7710 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7711 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7712 {
7713 tree field_type = TREE_TYPE (member);
7714
7715 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7716 eff_type = field_type;
7717 else
7718 {
7719 gcc_assert ((TYPE_ALIGN (eff_type)
7720 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7721 || (TYPE_ALIGN (eff_type)
7722 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7723 break;
7724 }
7725 }
7726
7727 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7728 {
7729 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7730 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7731 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7732 && size <= 16));
7733 }
7734 else
7735 {
7736 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7737 }
7738
7739 addr = create_tmp_var (pptr_type_node, NULL);
7740 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7741 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7742
7743 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
7744
7745 if (pass_as_float)
7746 {
7747 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
7748 tree cmp;
7749 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
7750
7751 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
7752 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7753
7754 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
7755 tmp = next_fp_limit;
7756 if (size > 4 && !is_double)
7757 tmp = build2 (POINTER_PLUS_EXPR, TREE_TYPE (tmp),
7758 unshare_expr (tmp), size_int (4 - size));
7759 tmp = build2 (GE_EXPR, boolean_type_node,
7760 unshare_expr (next_fp_tmp), unshare_expr (tmp));
7761 cmp = build3 (COND_EXPR, void_type_node, tmp,
7762 build1 (GOTO_EXPR, void_type_node,
7763 unshare_expr (lab_false)), NULL_TREE);
7764 if (!is_double)
7765 gimplify_and_add (cmp, pre_p);
7766
7767 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
7768 || (is_double || size == 16))
7769 {
7770 tmp = fold_convert (sizetype, next_fp_tmp);
7771 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
7772 size_int (UNITS_PER_WORD));
7773 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7774 unshare_expr (next_fp_tmp), tmp);
7775 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
7776 }
7777 if (is_double)
7778 gimplify_and_add (cmp, pre_p);
7779
7780 #ifdef FUNCTION_ARG_SCmode_WART
7781 if (TYPE_MODE (eff_type) == SCmode
7782 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
7783 {
7784 tree subtype = TREE_TYPE (eff_type);
7785 tree real, imag;
7786
7787 imag
7788 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7789 imag = get_initialized_tmp_var (imag, pre_p, NULL);
7790
7791 real
7792 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7793 real = get_initialized_tmp_var (real, pre_p, NULL);
7794
7795 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
7796 if (type != eff_type)
7797 result = build1 (VIEW_CONVERT_EXPR, type, result);
7798 result = get_initialized_tmp_var (result, pre_p, NULL);
7799 }
7800 #endif /* FUNCTION_ARG_SCmode_WART */
7801
7802 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7803 gimplify_and_add (tmp, pre_p);
7804
7805 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7806 gimplify_and_add (tmp, pre_p);
7807
7808 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7809 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7810 gimplify_assign (unshare_expr (next_fp_tmp),
7811 unshare_expr (valist), pre_p);
7812
7813 gimplify_assign (unshare_expr (valist),
7814 unshare_expr (next_fp_tmp), post_p);
7815 valist = next_fp_tmp;
7816 }
7817 else
7818 {
7819 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7820 unshare_expr (next_o), size_int (rsize));
7821 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
7822 unshare_expr (next_o_limit));
7823 tmp = build3 (COND_EXPR, void_type_node, tmp,
7824 build1 (GOTO_EXPR, void_type_node,
7825 unshare_expr (lab_false)),
7826 NULL_TREE);
7827 gimplify_and_add (tmp, pre_p);
7828
7829 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
7830 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7831
7832 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7833 gimplify_and_add (tmp, pre_p);
7834
7835 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7836 gimplify_and_add (tmp, pre_p);
7837
7838 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
7839 gimplify_assign (unshare_expr (next_o),
7840 unshare_expr (next_o_limit), pre_p);
7841
7842 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7843 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7844 }
7845
7846 if (!result)
7847 {
7848 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7849 gimplify_and_add (tmp, pre_p);
7850 }
7851 }
7852
7853 /* ??? In va-sh.h, there had been code to make values larger than
7854 size 8 indirect. This does not match the FUNCTION_ARG macros. */
7855
7856 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
7857 if (result)
7858 {
7859 gimplify_assign (result, tmp, pre_p);
7860
7861 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7862 gimplify_and_add (tmp, pre_p);
7863 }
7864 else
7865 result = tmp;
7866
7867 if (pass_by_ref)
7868 result = build_va_arg_indirect_ref (result);
7869
7870 return result;
7871 }
7872
7873 /* 64 bit floating points memory transfers are paired single precision loads
7874 or store. So DWARF information needs fixing in little endian (unless
7875 PR=SZ=1 in FPSCR). */
7876 rtx
7877 sh_dwarf_register_span (rtx reg)
7878 {
7879 unsigned regno = REGNO (reg);
7880
7881 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
7882 return NULL_RTX;
7883
7884 return
7885 gen_rtx_PARALLEL (VOIDmode,
7886 gen_rtvec (2,
7887 gen_rtx_REG (SFmode,
7888 DBX_REGISTER_NUMBER (regno+1)),
7889 gen_rtx_REG (SFmode,
7890 DBX_REGISTER_NUMBER (regno))));
7891 }
7892
7893 bool
7894 sh_promote_prototypes (const_tree type)
7895 {
7896 if (TARGET_HITACHI)
7897 return 0;
7898 if (! type)
7899 return 1;
7900 return ! sh_attr_renesas_p (type);
7901 }
7902
7903 /* Whether an argument must be passed by reference. On SHcompact, we
7904 pretend arguments wider than 32-bits that would have been passed in
7905 registers are passed by reference, so that an SHmedia trampoline
7906 loads them into the full 64-bits registers. */
7907
7908 static int
7909 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7910 const_tree type, bool named)
7911 {
7912 unsigned HOST_WIDE_INT size;
7913
7914 if (type)
7915 size = int_size_in_bytes (type);
7916 else
7917 size = GET_MODE_SIZE (mode);
7918
7919 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
7920 && (!named
7921 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
7922 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
7923 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
7924 && size > 4
7925 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
7926 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7927 return size;
7928 else
7929 return 0;
7930 }
7931
7932 static bool
7933 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7934 const_tree type, bool named)
7935 {
7936 if (targetm.calls.must_pass_in_stack (mode, type))
7937 return true;
7938
7939 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
7940 wants to know about pass-by-reference semantics for incoming
7941 arguments. */
7942 if (! cum)
7943 return false;
7944
7945 if (TARGET_SHCOMPACT)
7946 {
7947 cum->byref = shcompact_byref (cum, mode, type, named);
7948 return cum->byref != 0;
7949 }
7950
7951 return false;
7952 }
7953
7954 static bool
7955 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7956 const_tree type, bool named ATTRIBUTE_UNUSED)
7957 {
7958 /* ??? How can it possibly be correct to return true only on the
7959 caller side of the equation? Is there someplace else in the
7960 sh backend that's magically producing the copies? */
7961 return (cum->outgoing
7962 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
7963 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
7964 }
7965
7966 static int
7967 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7968 tree type, bool named ATTRIBUTE_UNUSED)
7969 {
7970 int words = 0;
7971
7972 if (!TARGET_SH5
7973 && PASS_IN_REG_P (*cum, mode, type)
7974 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
7975 && (ROUND_REG (*cum, mode)
7976 + (mode != BLKmode
7977 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7978 : ROUND_ADVANCE (int_size_in_bytes (type)))
7979 > NPARM_REGS (mode)))
7980 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
7981
7982 else if (!TARGET_SHCOMPACT
7983 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7984 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
7985
7986 return words * UNITS_PER_WORD;
7987 }
7988
7989
7990 /* Define where to put the arguments to a function.
7991 Value is zero to push the argument on the stack,
7992 or a hard register in which to store the argument.
7993
7994 MODE is the argument's machine mode.
7995 TYPE is the data type of the argument (as a tree).
7996 This is null for libcalls where that information may
7997 not be available.
7998 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7999 the preceding args and about the function being called.
8000 NAMED is nonzero if this argument is a named parameter
8001 (otherwise it is an extra parameter matching an ellipsis).
8002
8003 On SH the first args are normally in registers
8004 and the rest are pushed. Any arg that starts within the first
8005 NPARM_REGS words is at least partially passed in a register unless
8006 its data type forbids. */
8007
8008
8009 rtx
8010 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8011 tree type, int named)
8012 {
8013 if (! TARGET_SH5 && mode == VOIDmode)
8014 return GEN_INT (ca->renesas_abi ? 1 : 0);
8015
8016 if (! TARGET_SH5
8017 && PASS_IN_REG_P (*ca, mode, type)
8018 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8019 {
8020 int regno;
8021
8022 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8023 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
8024 {
8025 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8026 gen_rtx_REG (SFmode,
8027 BASE_ARG_REG (mode)
8028 + (ROUND_REG (*ca, mode) ^ 1)),
8029 const0_rtx);
8030 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8031 gen_rtx_REG (SFmode,
8032 BASE_ARG_REG (mode)
8033 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
8034 GEN_INT (4));
8035 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8036 }
8037
8038 /* If the alignment of a DF value causes an SF register to be
8039 skipped, we will use that skipped register for the next SF
8040 value. */
8041 if ((TARGET_HITACHI || ca->renesas_abi)
8042 && ca->free_single_fp_reg
8043 && mode == SFmode)
8044 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8045
8046 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
8047 ^ (mode == SFmode && TARGET_SH4
8048 && TARGET_LITTLE_ENDIAN != 0
8049 && ! TARGET_HITACHI && ! ca->renesas_abi);
8050 return gen_rtx_REG (mode, regno);
8051
8052 }
8053
8054 if (TARGET_SH5)
8055 {
8056 if (mode == VOIDmode && TARGET_SHCOMPACT)
8057 return GEN_INT (ca->call_cookie);
8058
8059 /* The following test assumes unnamed arguments are promoted to
8060 DFmode. */
8061 if (mode == SFmode && ca->free_single_fp_reg)
8062 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
8063
8064 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
8065 && (named || ! ca->prototype_p)
8066 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
8067 {
8068 if (! ca->prototype_p && TARGET_SHMEDIA)
8069 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
8070
8071 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
8072 FIRST_FP_PARM_REG
8073 + ca->arg_count[(int) SH_ARG_FLOAT]);
8074 }
8075
8076 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
8077 && (! TARGET_SHCOMPACT
8078 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
8079 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
8080 type, named))))
8081 {
8082 return gen_rtx_REG (mode, (FIRST_PARM_REG
8083 + ca->arg_count[(int) SH_ARG_INT]));
8084 }
8085
8086 return 0;
8087 }
8088
8089 return 0;
8090 }
8091
8092 /* Update the data in CUM to advance over an argument
8093 of mode MODE and data type TYPE.
8094 (TYPE is null for libcalls where that information may not be
8095 available.) */
8096
8097 void
8098 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8099 tree type, int named)
8100 {
8101 if (ca->force_mem)
8102 ca->force_mem = 0;
8103 else if (TARGET_SH5)
8104 {
8105 tree type2 = (ca->byref && type
8106 ? TREE_TYPE (type)
8107 : type);
8108 enum machine_mode mode2 = (ca->byref && type
8109 ? TYPE_MODE (type2)
8110 : mode);
8111 int dwords = ((ca->byref
8112 ? ca->byref
8113 : mode2 == BLKmode
8114 ? int_size_in_bytes (type2)
8115 : GET_MODE_SIZE (mode2)) + 7) / 8;
8116 int numregs = MIN (dwords, NPARM_REGS (SImode)
8117 - ca->arg_count[(int) SH_ARG_INT]);
8118
8119 if (numregs)
8120 {
8121 ca->arg_count[(int) SH_ARG_INT] += numregs;
8122 if (TARGET_SHCOMPACT
8123 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
8124 {
8125 ca->call_cookie
8126 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8127 - numregs, 1);
8128 /* N.B. We want this also for outgoing. */
8129 ca->stack_regs += numregs;
8130 }
8131 else if (ca->byref)
8132 {
8133 if (! ca->outgoing)
8134 ca->stack_regs += numregs;
8135 ca->byref_regs += numregs;
8136 ca->byref = 0;
8137 do
8138 ca->call_cookie
8139 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8140 - numregs, 2);
8141 while (--numregs);
8142 ca->call_cookie
8143 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8144 - 1, 1);
8145 }
8146 else if (dwords > numregs)
8147 {
8148 int pushregs = numregs;
8149
8150 if (TARGET_SHCOMPACT)
8151 ca->stack_regs += numregs;
8152 while (pushregs < NPARM_REGS (SImode) - 1
8153 && (CALL_COOKIE_INT_REG_GET
8154 (ca->call_cookie,
8155 NPARM_REGS (SImode) - pushregs)
8156 == 1))
8157 {
8158 ca->call_cookie
8159 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8160 - pushregs, 1);
8161 pushregs++;
8162 }
8163 if (numregs == NPARM_REGS (SImode))
8164 ca->call_cookie
8165 |= CALL_COOKIE_INT_REG (0, 1)
8166 | CALL_COOKIE_STACKSEQ (numregs - 1);
8167 else
8168 ca->call_cookie
8169 |= CALL_COOKIE_STACKSEQ (numregs);
8170 }
8171 }
8172 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
8173 && (named || ! ca->prototype_p))
8174 {
8175 if (mode2 == SFmode && ca->free_single_fp_reg)
8176 ca->free_single_fp_reg = 0;
8177 else if (ca->arg_count[(int) SH_ARG_FLOAT]
8178 < NPARM_REGS (SFmode))
8179 {
8180 int numfpregs
8181 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
8182 NPARM_REGS (SFmode)
8183 - ca->arg_count[(int) SH_ARG_FLOAT]);
8184
8185 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
8186
8187 if (TARGET_SHCOMPACT && ! ca->prototype_p)
8188 {
8189 if (ca->outgoing && numregs > 0)
8190 do
8191 {
8192 ca->call_cookie
8193 |= (CALL_COOKIE_INT_REG
8194 (ca->arg_count[(int) SH_ARG_INT]
8195 - numregs + ((numfpregs - 2) / 2),
8196 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
8197 - numfpregs) / 2));
8198 }
8199 while (numfpregs -= 2);
8200 }
8201 else if (mode2 == SFmode && (named)
8202 && (ca->arg_count[(int) SH_ARG_FLOAT]
8203 < NPARM_REGS (SFmode)))
8204 ca->free_single_fp_reg
8205 = FIRST_FP_PARM_REG - numfpregs
8206 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
8207 }
8208 }
8209 return;
8210 }
8211
8212 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
8213 {
8214 /* Note that we've used the skipped register. */
8215 if (mode == SFmode && ca->free_single_fp_reg)
8216 {
8217 ca->free_single_fp_reg = 0;
8218 return;
8219 }
8220 /* When we have a DF after an SF, there's an SF register that get
8221 skipped in order to align the DF value. We note this skipped
8222 register, because the next SF value will use it, and not the
8223 SF that follows the DF. */
8224 if (mode == DFmode
8225 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
8226 {
8227 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
8228 + BASE_ARG_REG (mode));
8229 }
8230 }
8231
8232 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
8233 || PASS_IN_REG_P (*ca, mode, type))
8234 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
8235 = (ROUND_REG (*ca, mode)
8236 + (mode == BLKmode
8237 ? ROUND_ADVANCE (int_size_in_bytes (type))
8238 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
8239 }
8240
8241 /* The Renesas calling convention doesn't quite fit into this scheme since
8242 the address is passed like an invisible argument, but one that is always
8243 passed in memory. */
8244 static rtx
8245 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
8246 {
8247 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8248 return 0;
8249 return gen_rtx_REG (Pmode, 2);
8250 }
8251
8252 /* Worker function for TARGET_RETURN_IN_MEMORY. */
8253
8254 static bool
8255 sh_return_in_memory (const_tree type, const_tree fndecl)
8256 {
8257 if (TARGET_SH5)
8258 {
8259 if (TYPE_MODE (type) == BLKmode)
8260 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
8261 else
8262 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
8263 }
8264 else
8265 {
8266 return (TYPE_MODE (type) == BLKmode
8267 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8268 && TREE_CODE (type) == RECORD_TYPE));
8269 }
8270 }
8271
8272 /* We actually emit the code in sh_expand_prologue. We used to use
8273 a static variable to flag that we need to emit this code, but that
8274 doesn't when inlining, when functions are deferred and then emitted
8275 later. Fortunately, we already have two flags that are part of struct
8276 function that tell if a function uses varargs or stdarg. */
8277 static void
8278 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
8279 enum machine_mode mode,
8280 tree type,
8281 int *pretend_arg_size,
8282 int second_time ATTRIBUTE_UNUSED)
8283 {
8284 gcc_assert (cfun->stdarg);
8285 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
8286 {
8287 int named_parm_regs, anon_parm_regs;
8288
8289 named_parm_regs = (ROUND_REG (*ca, mode)
8290 + (mode == BLKmode
8291 ? ROUND_ADVANCE (int_size_in_bytes (type))
8292 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
8293 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
8294 if (anon_parm_regs > 0)
8295 *pretend_arg_size = anon_parm_regs * 4;
8296 }
8297 }
8298
8299 static bool
8300 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
8301 {
8302 return TARGET_SH5;
8303 }
8304
8305 static bool
8306 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
8307 {
8308 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
8309 }
8310
8311
8312 /* Define the offset between two registers, one to be eliminated, and
8313 the other its replacement, at the start of a routine. */
8314
8315 int
8316 initial_elimination_offset (int from, int to)
8317 {
8318 int regs_saved;
8319 int regs_saved_rounding = 0;
8320 int total_saved_regs_space;
8321 int total_auto_space;
8322 int save_flags = target_flags;
8323 int copy_flags;
8324 HARD_REG_SET live_regs_mask;
8325
8326 shmedia_space_reserved_for_target_registers = false;
8327 regs_saved = calc_live_regs (&live_regs_mask);
8328 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
8329
8330 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
8331 {
8332 shmedia_space_reserved_for_target_registers = true;
8333 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
8334 }
8335
8336 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
8337 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
8338 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
8339
8340 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
8341 copy_flags = target_flags;
8342 target_flags = save_flags;
8343
8344 total_saved_regs_space = regs_saved + regs_saved_rounding;
8345
8346 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8347 return total_saved_regs_space + total_auto_space
8348 + crtl->args.info.byref_regs * 8;
8349
8350 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8351 return total_saved_regs_space + total_auto_space
8352 + crtl->args.info.byref_regs * 8;
8353
8354 /* Initial gap between fp and sp is 0. */
8355 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8356 return 0;
8357
8358 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8359 return rounded_frame_size (0);
8360
8361 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8362 return rounded_frame_size (0);
8363
8364 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
8365 && (to == HARD_FRAME_POINTER_REGNUM
8366 || to == STACK_POINTER_REGNUM));
8367 if (TARGET_SH5)
8368 {
8369 int n = total_saved_regs_space;
8370 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8371 save_schedule schedule;
8372 save_entry *entry;
8373
8374 n += total_auto_space;
8375
8376 /* If it wasn't saved, there's not much we can do. */
8377 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8378 return n;
8379
8380 target_flags = copy_flags;
8381
8382 sh5_schedule_saves (&live_regs_mask, &schedule, n);
8383 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
8384 if (entry->reg == pr_reg)
8385 {
8386 target_flags = save_flags;
8387 return entry->offset;
8388 }
8389 gcc_unreachable ();
8390 }
8391 else
8392 return total_auto_space;
8393 }
8394
8395 /* Parse the -mfixed-range= option string. */
8396 void
8397 sh_fix_range (const char *const_str)
8398 {
8399 int i, first, last;
8400 char *str, *dash, *comma;
8401
8402 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
8403 REG2 are either register names or register numbers. The effect
8404 of this option is to mark the registers in the range from REG1 to
8405 REG2 as ``fixed'' so they won't be used by the compiler. */
8406
8407 i = strlen (const_str);
8408 str = (char *) alloca (i + 1);
8409 memcpy (str, const_str, i + 1);
8410
8411 while (1)
8412 {
8413 dash = strchr (str, '-');
8414 if (!dash)
8415 {
8416 warning (0, "value of -mfixed-range must have form REG1-REG2");
8417 return;
8418 }
8419 *dash = '\0';
8420 comma = strchr (dash + 1, ',');
8421 if (comma)
8422 *comma = '\0';
8423
8424 first = decode_reg_name (str);
8425 if (first < 0)
8426 {
8427 warning (0, "unknown register name: %s", str);
8428 return;
8429 }
8430
8431 last = decode_reg_name (dash + 1);
8432 if (last < 0)
8433 {
8434 warning (0, "unknown register name: %s", dash + 1);
8435 return;
8436 }
8437
8438 *dash = '-';
8439
8440 if (first > last)
8441 {
8442 warning (0, "%s-%s is an empty range", str, dash + 1);
8443 return;
8444 }
8445
8446 for (i = first; i <= last; ++i)
8447 fixed_regs[i] = call_used_regs[i] = 1;
8448
8449 if (!comma)
8450 break;
8451
8452 *comma = ',';
8453 str = comma + 1;
8454 }
8455 }
8456 \f
8457 /* Insert any deferred function attributes from earlier pragmas. */
8458 static void
8459 sh_insert_attributes (tree node, tree *attributes)
8460 {
8461 tree attrs;
8462
8463 if (TREE_CODE (node) != FUNCTION_DECL)
8464 return;
8465
8466 /* We are only interested in fields. */
8467 if (!DECL_P (node))
8468 return;
8469
8470 /* Append the attributes to the deferred attributes. */
8471 *sh_deferred_function_attributes_tail = *attributes;
8472 attrs = sh_deferred_function_attributes;
8473 if (!attrs)
8474 return;
8475
8476 /* Some attributes imply or require the interrupt attribute. */
8477 if (!lookup_attribute ("interrupt_handler", attrs)
8478 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8479 {
8480 /* If we have a trapa_handler, but no interrupt_handler attribute,
8481 insert an interrupt_handler attribute. */
8482 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8483 /* We can't use sh_pr_interrupt here because that's not in the
8484 java frontend. */
8485 attrs
8486 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8487 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8488 if the interrupt attribute is missing, we ignore the attribute
8489 and warn. */
8490 else if (lookup_attribute ("sp_switch", attrs)
8491 || lookup_attribute ("trap_exit", attrs)
8492 || lookup_attribute ("nosave_low_regs", attrs)
8493 || lookup_attribute ("resbank", attrs))
8494 {
8495 tree *tail;
8496
8497 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8498 {
8499 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8500 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8501 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8502 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8503 warning (OPT_Wattributes,
8504 "%qE attribute only applies to interrupt functions",
8505 TREE_PURPOSE (attrs));
8506 else
8507 {
8508 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8509 NULL_TREE);
8510 tail = &TREE_CHAIN (*tail);
8511 }
8512 }
8513 attrs = *attributes;
8514 }
8515 }
8516
8517 /* Install the processed list. */
8518 *attributes = attrs;
8519
8520 /* Clear deferred attributes. */
8521 sh_deferred_function_attributes = NULL_TREE;
8522 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8523
8524 return;
8525 }
8526
8527 /* Supported attributes:
8528
8529 interrupt_handler -- specifies this function is an interrupt handler.
8530
8531 trapa_handler - like above, but don't save all registers.
8532
8533 sp_switch -- specifies an alternate stack for an interrupt handler
8534 to run on.
8535
8536 trap_exit -- use a trapa to exit an interrupt function instead of
8537 an rte instruction.
8538
8539 nosave_low_regs - don't save r0..r7 in an interrupt handler.
8540 This is useful on the SH3 and upwards,
8541 which has a separate set of low regs for User and Supervisor modes.
8542 This should only be used for the lowest level of interrupts. Higher levels
8543 of interrupts must save the registers in case they themselves are
8544 interrupted.
8545
8546 renesas -- use Renesas calling/layout conventions (functions and
8547 structures).
8548
8549 resbank -- In case of an ISR, use a register bank to save registers
8550 R0-R14, MACH, MACL, GBR and PR. This is useful only on SH2A targets.
8551 */
8552
8553 /* Handle a 'resbank' attribute. */
8554 static tree
8555 sh_handle_resbank_handler_attribute (tree * node, tree name,
8556 tree args ATTRIBUTE_UNUSED,
8557 int flags ATTRIBUTE_UNUSED,
8558 bool * no_add_attrs)
8559 {
8560 if (!TARGET_SH2A)
8561 {
8562 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
8563 name);
8564 *no_add_attrs = true;
8565 }
8566 if (TREE_CODE (*node) != FUNCTION_DECL)
8567 {
8568 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8569 name);
8570 *no_add_attrs = true;
8571 }
8572
8573 return NULL_TREE;
8574 }
8575
8576 /* Handle an "interrupt_handler" attribute; arguments as in
8577 struct attribute_spec.handler. */
8578 static tree
8579 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8580 tree args ATTRIBUTE_UNUSED,
8581 int flags ATTRIBUTE_UNUSED,
8582 bool *no_add_attrs)
8583 {
8584 if (TREE_CODE (*node) != FUNCTION_DECL)
8585 {
8586 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8587 name);
8588 *no_add_attrs = true;
8589 }
8590 else if (TARGET_SHCOMPACT)
8591 {
8592 error ("attribute interrupt_handler is not compatible with -m5-compact");
8593 *no_add_attrs = true;
8594 }
8595
8596 return NULL_TREE;
8597 }
8598
8599 /* Handle an 'function_vector' attribute; arguments as in
8600 struct attribute_spec.handler. */
8601 static tree
8602 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
8603 tree args ATTRIBUTE_UNUSED,
8604 int flags ATTRIBUTE_UNUSED,
8605 bool * no_add_attrs)
8606 {
8607 if (!TARGET_SH2A)
8608 {
8609 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
8610 name);
8611 *no_add_attrs = true;
8612 }
8613 else if (TREE_CODE (*node) != FUNCTION_DECL)
8614 {
8615 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8616 name);
8617 *no_add_attrs = true;
8618 }
8619 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8620 {
8621 /* The argument must be a constant integer. */
8622 warning (OPT_Wattributes,
8623 "%qE attribute argument not an integer constant",
8624 name);
8625 *no_add_attrs = true;
8626 }
8627 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
8628 {
8629 /* The argument value must be between 0 to 255. */
8630 warning (OPT_Wattributes,
8631 "%qE attribute argument should be between 0 to 255",
8632 name);
8633 *no_add_attrs = true;
8634 }
8635 return NULL_TREE;
8636 }
8637
8638 /* Returns 1 if current function has been assigned the attribute
8639 'function_vector'. */
8640 int
8641 sh2a_is_function_vector_call (rtx x)
8642 {
8643 if (GET_CODE (x) == SYMBOL_REF
8644 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8645 {
8646 tree tr = SYMBOL_REF_DECL (x);
8647
8648 if (sh2a_function_vector_p (tr))
8649 return 1;
8650 }
8651
8652 return 0;
8653 }
8654
8655 /* Returns the function vector number, if the the attribute
8656 'function_vector' is assigned, otherwise returns zero. */
8657 int
8658 sh2a_get_function_vector_number (rtx x)
8659 {
8660 int num;
8661 tree list, t;
8662
8663 if ((GET_CODE (x) == SYMBOL_REF)
8664 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8665 {
8666 t = SYMBOL_REF_DECL (x);
8667
8668 if (TREE_CODE (t) != FUNCTION_DECL)
8669 return 0;
8670
8671 list = SH_ATTRIBUTES (t);
8672 while (list)
8673 {
8674 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8675 {
8676 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
8677 return num;
8678 }
8679
8680 list = TREE_CHAIN (list);
8681 }
8682
8683 return 0;
8684 }
8685 else
8686 return 0;
8687 }
8688
8689 /* Handle an "sp_switch" attribute; arguments as in
8690 struct attribute_spec.handler. */
8691 static tree
8692 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
8693 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8694 {
8695 if (TREE_CODE (*node) != FUNCTION_DECL)
8696 {
8697 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8698 name);
8699 *no_add_attrs = true;
8700 }
8701 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
8702 {
8703 /* The argument must be a constant string. */
8704 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
8705 name);
8706 *no_add_attrs = true;
8707 }
8708
8709 return NULL_TREE;
8710 }
8711
8712 /* Handle an "trap_exit" attribute; arguments as in
8713 struct attribute_spec.handler. */
8714 static tree
8715 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
8716 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8717 {
8718 if (TREE_CODE (*node) != FUNCTION_DECL)
8719 {
8720 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8721 name);
8722 *no_add_attrs = true;
8723 }
8724 /* The argument specifies a trap number to be used in a trapa instruction
8725 at function exit (instead of an rte instruction). */
8726 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8727 {
8728 /* The argument must be a constant integer. */
8729 warning (OPT_Wattributes, "%qE attribute argument not an "
8730 "integer constant", name);
8731 *no_add_attrs = true;
8732 }
8733
8734 return NULL_TREE;
8735 }
8736
8737 static tree
8738 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
8739 tree name ATTRIBUTE_UNUSED,
8740 tree args ATTRIBUTE_UNUSED,
8741 int flags ATTRIBUTE_UNUSED,
8742 bool *no_add_attrs ATTRIBUTE_UNUSED)
8743 {
8744 return NULL_TREE;
8745 }
8746
8747 /* True if __attribute__((renesas)) or -mrenesas. */
8748 int
8749 sh_attr_renesas_p (const_tree td)
8750 {
8751 if (TARGET_HITACHI)
8752 return 1;
8753 if (td == 0)
8754 return 0;
8755 if (DECL_P (td))
8756 td = TREE_TYPE (td);
8757 if (td == error_mark_node)
8758 return 0;
8759 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
8760 != NULL_TREE);
8761 }
8762
8763 /* True if __attribute__((renesas)) or -mrenesas, for the current
8764 function. */
8765 int
8766 sh_cfun_attr_renesas_p (void)
8767 {
8768 return sh_attr_renesas_p (current_function_decl);
8769 }
8770
8771 int
8772 sh_cfun_interrupt_handler_p (void)
8773 {
8774 return (lookup_attribute ("interrupt_handler",
8775 DECL_ATTRIBUTES (current_function_decl))
8776 != NULL_TREE);
8777 }
8778
8779 /* Returns 1 if FUNC has been assigned the attribute
8780 "function_vector". */
8781 int
8782 sh2a_function_vector_p (tree func)
8783 {
8784 tree list;
8785 if (TREE_CODE (func) != FUNCTION_DECL)
8786 return 0;
8787
8788 list = SH_ATTRIBUTES (func);
8789 while (list)
8790 {
8791 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8792 return 1;
8793
8794 list = TREE_CHAIN (list);
8795 }
8796 return 0;
8797 }
8798
8799 /* Returns TRUE if given tree has the "resbank" attribute. */
8800
8801 int
8802 sh_cfun_resbank_handler_p (void)
8803 {
8804 return ((lookup_attribute ("resbank",
8805 DECL_ATTRIBUTES (current_function_decl))
8806 != NULL_TREE)
8807 && (lookup_attribute ("interrupt_handler",
8808 DECL_ATTRIBUTES (current_function_decl))
8809 != NULL_TREE) && TARGET_SH2A);
8810 }
8811
8812 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
8813
8814 static const char *
8815 sh_check_pch_target_flags (int old_flags)
8816 {
8817 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
8818 | MASK_SH_E | MASK_HARD_SH4
8819 | MASK_FPU_SINGLE | MASK_SH4))
8820 return _("created and used with different architectures / ABIs");
8821 if ((old_flags ^ target_flags) & MASK_HITACHI)
8822 return _("created and used with different ABIs");
8823 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
8824 return _("created and used with different endianness");
8825 return NULL;
8826 }
8827 \f
8828 /* Predicates used by the templates. */
8829
8830 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
8831 Used only in general_movsrc_operand. */
8832
8833 int
8834 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8835 {
8836 switch (REGNO (op))
8837 {
8838 case PR_REG:
8839 case MACL_REG:
8840 case MACH_REG:
8841 return 1;
8842 }
8843 return 0;
8844 }
8845
8846 /* Nonzero if OP is a floating point value with value 0.0. */
8847
8848 int
8849 fp_zero_operand (rtx op)
8850 {
8851 REAL_VALUE_TYPE r;
8852
8853 if (GET_MODE (op) != SFmode)
8854 return 0;
8855
8856 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
8857 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
8858 }
8859
8860 /* Nonzero if OP is a floating point value with value 1.0. */
8861
8862 int
8863 fp_one_operand (rtx op)
8864 {
8865 REAL_VALUE_TYPE r;
8866
8867 if (GET_MODE (op) != SFmode)
8868 return 0;
8869
8870 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
8871 return REAL_VALUES_EQUAL (r, dconst1);
8872 }
8873
8874 /* In general mode switching is used. If we are
8875 compiling without -mfmovd, movsf_ie isn't taken into account for
8876 mode switching. We could check in machine_dependent_reorg for
8877 cases where we know we are in single precision mode, but there is
8878 interface to find that out during reload, so we must avoid
8879 choosing an fldi alternative during reload and thus failing to
8880 allocate a scratch register for the constant loading. */
8881 int
8882 fldi_ok (void)
8883 {
8884 return 1;
8885 }
8886
8887 int
8888 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8889 {
8890 enum rtx_code code = GET_CODE (op);
8891 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
8892 }
8893
8894 /* Return the TLS type for TLS symbols, 0 for otherwise. */
8895 enum tls_model
8896 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8897 {
8898 if (GET_CODE (op) != SYMBOL_REF)
8899 return TLS_MODEL_NONE;
8900 return SYMBOL_REF_TLS_MODEL (op);
8901 }
8902 \f
8903 /* Return the destination address of a branch. */
8904
8905 static int
8906 branch_dest (rtx branch)
8907 {
8908 rtx dest = SET_SRC (PATTERN (branch));
8909 int dest_uid;
8910
8911 if (GET_CODE (dest) == IF_THEN_ELSE)
8912 dest = XEXP (dest, 1);
8913 dest = XEXP (dest, 0);
8914 dest_uid = INSN_UID (dest);
8915 return INSN_ADDRESSES (dest_uid);
8916 }
8917 \f
8918 /* Return nonzero if REG is not used after INSN.
8919 We assume REG is a reload reg, and therefore does
8920 not live past labels. It may live past calls or jumps though. */
8921 int
8922 reg_unused_after (rtx reg, rtx insn)
8923 {
8924 enum rtx_code code;
8925 rtx set;
8926
8927 /* If the reg is set by this instruction, then it is safe for our
8928 case. Disregard the case where this is a store to memory, since
8929 we are checking a register used in the store address. */
8930 set = single_set (insn);
8931 if (set && !MEM_P (SET_DEST (set))
8932 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8933 return 1;
8934
8935 while ((insn = NEXT_INSN (insn)))
8936 {
8937 rtx set;
8938 if (!INSN_P (insn))
8939 continue;
8940
8941 code = GET_CODE (insn);
8942
8943 #if 0
8944 /* If this is a label that existed before reload, then the register
8945 if dead here. However, if this is a label added by reorg, then
8946 the register may still be live here. We can't tell the difference,
8947 so we just ignore labels completely. */
8948 if (code == CODE_LABEL)
8949 return 1;
8950 /* else */
8951 #endif
8952
8953 if (code == JUMP_INSN)
8954 return 0;
8955
8956 /* If this is a sequence, we must handle them all at once.
8957 We could have for instance a call that sets the target register,
8958 and an insn in a delay slot that uses the register. In this case,
8959 we must return 0. */
8960 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
8961 {
8962 int i;
8963 int retval = 0;
8964
8965 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
8966 {
8967 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
8968 rtx set = single_set (this_insn);
8969
8970 if (CALL_P (this_insn))
8971 code = CALL_INSN;
8972 else if (JUMP_P (this_insn))
8973 {
8974 if (INSN_ANNULLED_BRANCH_P (this_insn))
8975 return 0;
8976 code = JUMP_INSN;
8977 }
8978
8979 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8980 return 0;
8981 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8982 {
8983 if (!MEM_P (SET_DEST (set)))
8984 retval = 1;
8985 else
8986 return 0;
8987 }
8988 if (set == 0
8989 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
8990 return 0;
8991 }
8992 if (retval == 1)
8993 return 1;
8994 else if (code == JUMP_INSN)
8995 return 0;
8996 }
8997
8998 set = single_set (insn);
8999 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9000 return 0;
9001 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9002 return !MEM_P (SET_DEST (set));
9003 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
9004 return 0;
9005
9006 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
9007 return 1;
9008 }
9009 return 1;
9010 }
9011 \f
9012 #include "ggc.h"
9013
9014 static GTY(()) rtx fpscr_rtx;
9015 rtx
9016 get_fpscr_rtx (void)
9017 {
9018 if (! fpscr_rtx)
9019 {
9020 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
9021 REG_USERVAR_P (fpscr_rtx) = 1;
9022 mark_user_reg (fpscr_rtx);
9023 }
9024 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
9025 mark_user_reg (fpscr_rtx);
9026 return fpscr_rtx;
9027 }
9028
9029 static GTY(()) tree fpscr_values;
9030
9031 static void
9032 emit_fpu_switch (rtx scratch, int index)
9033 {
9034 rtx dst, src;
9035
9036 if (fpscr_values == NULL)
9037 {
9038 tree t;
9039
9040 t = build_index_type (integer_one_node);
9041 t = build_array_type (integer_type_node, t);
9042 t = build_decl (BUILTINS_LOCATION,
9043 VAR_DECL, get_identifier ("__fpscr_values"), t);
9044 DECL_ARTIFICIAL (t) = 1;
9045 DECL_IGNORED_P (t) = 1;
9046 DECL_EXTERNAL (t) = 1;
9047 TREE_STATIC (t) = 1;
9048 TREE_PUBLIC (t) = 1;
9049 TREE_USED (t) = 1;
9050
9051 fpscr_values = t;
9052 }
9053
9054 src = DECL_RTL (fpscr_values);
9055 if (!can_create_pseudo_p ())
9056 {
9057 emit_move_insn (scratch, XEXP (src, 0));
9058 if (index != 0)
9059 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
9060 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
9061 }
9062 else
9063 src = adjust_address (src, PSImode, index * 4);
9064
9065 dst = get_fpscr_rtx ();
9066 emit_move_insn (dst, src);
9067 }
9068
9069 void
9070 emit_sf_insn (rtx pat)
9071 {
9072 emit_insn (pat);
9073 }
9074
9075 void
9076 emit_df_insn (rtx pat)
9077 {
9078 emit_insn (pat);
9079 }
9080
9081 void
9082 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9083 {
9084 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9085 }
9086
9087 void
9088 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9089 {
9090 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
9091 get_fpscr_rtx ()));
9092 }
9093
9094 void
9095 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9096 {
9097 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9098 }
9099
9100 void
9101 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9102 {
9103 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
9104 get_fpscr_rtx ()));
9105 }
9106 \f
9107 static rtx get_free_reg (HARD_REG_SET);
9108
9109 /* This function returns a register to use to load the address to load
9110 the fpscr from. Currently it always returns r1 or r7, but when we are
9111 able to use pseudo registers after combine, or have a better mechanism
9112 for choosing a register, it should be done here. */
9113 /* REGS_LIVE is the liveness information for the point for which we
9114 need this allocation. In some bare-bones exit blocks, r1 is live at the
9115 start. We can even have all of r0..r3 being live:
9116 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
9117 INSN before which new insns are placed with will clobber the register
9118 we return. If a basic block consists only of setting the return value
9119 register to a pseudo and using that register, the return value is not
9120 live before or after this block, yet we we'll insert our insns right in
9121 the middle. */
9122
9123 static rtx
9124 get_free_reg (HARD_REG_SET regs_live)
9125 {
9126 if (! TEST_HARD_REG_BIT (regs_live, 1))
9127 return gen_rtx_REG (Pmode, 1);
9128
9129 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
9130 there shouldn't be anything but a jump before the function end. */
9131 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
9132 return gen_rtx_REG (Pmode, 7);
9133 }
9134
9135 /* This function will set the fpscr from memory.
9136 MODE is the mode we are setting it to. */
9137 void
9138 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
9139 {
9140 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
9141 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
9142 rtx addr_reg;
9143
9144 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
9145 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
9146 }
9147
9148 /* Is the given character a logical line separator for the assembler? */
9149 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
9150 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
9151 #endif
9152
9153 int
9154 sh_insn_length_adjustment (rtx insn)
9155 {
9156 /* Instructions with unfilled delay slots take up an extra two bytes for
9157 the nop in the delay slot. */
9158 if (((NONJUMP_INSN_P (insn)
9159 && GET_CODE (PATTERN (insn)) != USE
9160 && GET_CODE (PATTERN (insn)) != CLOBBER)
9161 || CALL_P (insn)
9162 || (JUMP_P (insn)
9163 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
9164 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
9165 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
9166 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
9167 return 2;
9168
9169 /* SH2e has a bug that prevents the use of annulled branches, so if
9170 the delay slot is not filled, we'll have to put a NOP in it. */
9171 if (sh_cpu_attr == CPU_SH2E
9172 && JUMP_P (insn)
9173 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
9174 && GET_CODE (PATTERN (insn)) != ADDR_VEC
9175 && get_attr_type (insn) == TYPE_CBRANCH
9176 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
9177 return 2;
9178
9179 /* sh-dsp parallel processing insn take four bytes instead of two. */
9180
9181 if (NONJUMP_INSN_P (insn))
9182 {
9183 int sum = 0;
9184 rtx body = PATTERN (insn);
9185 const char *templ;
9186 char c;
9187 int maybe_label = 1;
9188
9189 if (GET_CODE (body) == ASM_INPUT)
9190 templ = XSTR (body, 0);
9191 else if (asm_noperands (body) >= 0)
9192 templ
9193 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
9194 else
9195 return 0;
9196 do
9197 {
9198 int ppi_adjust = 0;
9199
9200 do
9201 c = *templ++;
9202 while (c == ' ' || c == '\t');
9203 /* all sh-dsp parallel-processing insns start with p.
9204 The only non-ppi sh insn starting with p is pref.
9205 The only ppi starting with pr is prnd. */
9206 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
9207 ppi_adjust = 2;
9208 /* The repeat pseudo-insn expands two three insns, a total of
9209 six bytes in size. */
9210 else if ((c == 'r' || c == 'R')
9211 && ! strncasecmp ("epeat", templ, 5))
9212 ppi_adjust = 4;
9213 while (c && c != '\n'
9214 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
9215 {
9216 /* If this is a label, it is obviously not a ppi insn. */
9217 if (c == ':' && maybe_label)
9218 {
9219 ppi_adjust = 0;
9220 break;
9221 }
9222 else if (c == '\'' || c == '"')
9223 maybe_label = 0;
9224 c = *templ++;
9225 }
9226 sum += ppi_adjust;
9227 maybe_label = c != ':';
9228 }
9229 while (c);
9230 return sum;
9231 }
9232 return 0;
9233 }
9234 \f
9235 /* Return TRUE for a valid displacement for the REG+disp addressing
9236 with MODE. */
9237
9238 /* ??? The SH2e does not have the REG+disp addressing mode when loading values
9239 into the FRx registers. We implement this by setting the maximum offset
9240 to zero when the value is SFmode. This also restricts loading of SFmode
9241 values into the integer registers, but that can't be helped. */
9242
9243 /* The SH allows a displacement in a QI or HI amode, but only when the
9244 other operand is R0. GCC doesn't handle this very well, so we forgot
9245 all of that.
9246
9247 A legitimate index for a QI or HI is 0, SI can be any number 0..63,
9248 DI can be any number 0..60. */
9249
9250 bool
9251 sh_legitimate_index_p (enum machine_mode mode, rtx op)
9252 {
9253 if (CONST_INT_P (op))
9254 {
9255 if (TARGET_SHMEDIA)
9256 {
9257 int size;
9258
9259 /* Check if this the address of an unaligned load / store. */
9260 if (mode == VOIDmode)
9261 return CONST_OK_FOR_I06 (INTVAL (op));
9262
9263 size = GET_MODE_SIZE (mode);
9264 return (!(INTVAL (op) & (size - 1))
9265 && INTVAL (op) >= -512 * size
9266 && INTVAL (op) < 512 * size);
9267 }
9268
9269 if (TARGET_SH2A)
9270 {
9271 if (GET_MODE_SIZE (mode) == 1
9272 && (unsigned) INTVAL (op) < 4096)
9273 return true;
9274 }
9275
9276 if ((GET_MODE_SIZE (mode) == 4
9277 && (unsigned) INTVAL (op) < 64
9278 && !(INTVAL (op) & 3)
9279 && !(TARGET_SH2E && mode == SFmode))
9280 || (GET_MODE_SIZE (mode) == 4
9281 && (unsigned) INTVAL (op) < 16383
9282 && !(INTVAL (op) & 3) && TARGET_SH2A))
9283 return true;
9284
9285 if ((GET_MODE_SIZE (mode) == 8
9286 && (unsigned) INTVAL (op) < 60
9287 && !(INTVAL (op) & 3)
9288 && !((TARGET_SH4 || TARGET_SH2A) && mode == DFmode))
9289 || ((GET_MODE_SIZE (mode)==8)
9290 && (unsigned) INTVAL (op) < 8192
9291 && !(INTVAL (op) & (TARGET_SH2A_DOUBLE ? 7 : 3))
9292 && (TARGET_SH2A && mode == DFmode)))
9293 return true;
9294 }
9295
9296 return false;
9297 }
9298
9299 /* Recognize an RTL expression that is a valid memory address for
9300 an instruction.
9301 The MODE argument is the machine mode for the MEM expression
9302 that wants to use this address.
9303 Allow REG
9304 REG+disp
9305 REG+r0
9306 REG++
9307 --REG */
9308
9309 static bool
9310 sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
9311 {
9312 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
9313 return true;
9314 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
9315 && ! TARGET_SHMEDIA
9316 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
9317 return true;
9318 else if (GET_CODE (x) == PLUS
9319 && (mode != PSImode || reload_completed))
9320 {
9321 rtx xop0 = XEXP (x, 0);
9322 rtx xop1 = XEXP (x, 1);
9323
9324 if (GET_MODE_SIZE (mode) <= 8
9325 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
9326 && sh_legitimate_index_p (mode, xop1))
9327 return true;
9328
9329 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
9330 || ((xop0 == stack_pointer_rtx
9331 || xop0 == hard_frame_pointer_rtx)
9332 && REG_P (xop1) && REGNO (xop1) == R0_REG)
9333 || ((xop1 == stack_pointer_rtx
9334 || xop1 == hard_frame_pointer_rtx)
9335 && REG_P (xop0) && REGNO (xop0) == R0_REG))
9336 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
9337 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
9338 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
9339 && TARGET_FMOVD && mode == DFmode)))
9340 {
9341 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
9342 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
9343 return true;
9344 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
9345 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
9346 return true;
9347 }
9348 }
9349
9350 return false;
9351 }
9352 \f
9353 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
9354 isn't protected by a PIC unspec. */
9355 int
9356 nonpic_symbol_mentioned_p (rtx x)
9357 {
9358 register const char *fmt;
9359 register int i;
9360
9361 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
9362 || GET_CODE (x) == PC)
9363 return 1;
9364
9365 /* We don't want to look into the possible MEM location of a
9366 CONST_DOUBLE, since we're not going to use it, in general. */
9367 if (GET_CODE (x) == CONST_DOUBLE)
9368 return 0;
9369
9370 if (GET_CODE (x) == UNSPEC
9371 && (XINT (x, 1) == UNSPEC_PIC
9372 || XINT (x, 1) == UNSPEC_GOT
9373 || XINT (x, 1) == UNSPEC_GOTOFF
9374 || XINT (x, 1) == UNSPEC_GOTPLT
9375 || XINT (x, 1) == UNSPEC_GOTTPOFF
9376 || XINT (x, 1) == UNSPEC_DTPOFF
9377 || XINT (x, 1) == UNSPEC_PLT
9378 || XINT (x, 1) == UNSPEC_SYMOFF
9379 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
9380 return 0;
9381
9382 fmt = GET_RTX_FORMAT (GET_CODE (x));
9383 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9384 {
9385 if (fmt[i] == 'E')
9386 {
9387 register int j;
9388
9389 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9390 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
9391 return 1;
9392 }
9393 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
9394 return 1;
9395 }
9396
9397 return 0;
9398 }
9399
9400 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
9401 @GOTOFF in `reg'. */
9402 rtx
9403 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
9404 rtx reg)
9405 {
9406 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
9407 return orig;
9408
9409 if (GET_CODE (orig) == LABEL_REF
9410 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
9411 {
9412 if (reg == 0)
9413 reg = gen_reg_rtx (Pmode);
9414
9415 emit_insn (gen_symGOTOFF2reg (reg, orig));
9416 return reg;
9417 }
9418 else if (GET_CODE (orig) == SYMBOL_REF)
9419 {
9420 if (reg == 0)
9421 reg = gen_reg_rtx (Pmode);
9422
9423 emit_insn (gen_symGOT2reg (reg, orig));
9424 return reg;
9425 }
9426 return orig;
9427 }
9428
9429 /* Try machine-dependent ways of modifying an illegitimate address
9430 to be legitimate. If we find one, return the new, valid address.
9431 Otherwise, return X.
9432
9433 For the SH, if X is almost suitable for indexing, but the offset is
9434 out of range, convert it into a normal form so that CSE has a chance
9435 of reducing the number of address registers used. */
9436
9437 static rtx
9438 sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
9439 {
9440 if (flag_pic)
9441 x = legitimize_pic_address (oldx, mode, NULL_RTX);
9442
9443 if (GET_CODE (x) == PLUS
9444 && (GET_MODE_SIZE (mode) == 4
9445 || GET_MODE_SIZE (mode) == 8)
9446 && CONST_INT_P (XEXP (x, 1))
9447 && BASE_REGISTER_RTX_P (XEXP (x, 0))
9448 && ! TARGET_SHMEDIA
9449 && ! ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
9450 && ! (TARGET_SH2E && mode == SFmode))
9451 {
9452 rtx index_rtx = XEXP (x, 1);
9453 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9454 rtx sum;
9455
9456 /* On rare occasions, we might get an unaligned pointer
9457 that is indexed in a way to give an aligned address.
9458 Therefore, keep the lower two bits in offset_base. */
9459 /* Instead of offset_base 128..131 use 124..127, so that
9460 simple add suffices. */
9461 if (offset > 127)
9462 offset_base = ((offset + 4) & ~60) - 4;
9463 else
9464 offset_base = offset & ~60;
9465
9466 /* Sometimes the normal form does not suit DImode. We
9467 could avoid that by using smaller ranges, but that
9468 would give less optimized code when SImode is
9469 prevalent. */
9470 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9471 {
9472 sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
9473 GEN_INT (offset_base), NULL_RTX, 0,
9474 OPTAB_LIB_WIDEN);
9475
9476 return gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9477 }
9478 }
9479
9480 return x;
9481 }
9482
9483 /* Mark the use of a constant in the literal table. If the constant
9484 has multiple labels, make it unique. */
9485 static rtx
9486 mark_constant_pool_use (rtx x)
9487 {
9488 rtx insn, lab, pattern;
9489
9490 if (x == NULL)
9491 return x;
9492
9493 switch (GET_CODE (x))
9494 {
9495 case LABEL_REF:
9496 x = XEXP (x, 0);
9497 case CODE_LABEL:
9498 break;
9499 default:
9500 return x;
9501 }
9502
9503 /* Get the first label in the list of labels for the same constant
9504 and delete another labels in the list. */
9505 lab = x;
9506 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
9507 {
9508 if (!LABEL_P (insn)
9509 || LABEL_REFS (insn) != NEXT_INSN (insn))
9510 break;
9511 lab = insn;
9512 }
9513
9514 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
9515 INSN_DELETED_P (insn) = 1;
9516
9517 /* Mark constants in a window. */
9518 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
9519 {
9520 if (!NONJUMP_INSN_P (insn))
9521 continue;
9522
9523 pattern = PATTERN (insn);
9524 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
9525 continue;
9526
9527 switch (XINT (pattern, 1))
9528 {
9529 case UNSPECV_CONST2:
9530 case UNSPECV_CONST4:
9531 case UNSPECV_CONST8:
9532 XVECEXP (pattern, 0, 1) = const1_rtx;
9533 break;
9534 case UNSPECV_WINDOW_END:
9535 if (XVECEXP (pattern, 0, 0) == x)
9536 return lab;
9537 break;
9538 case UNSPECV_CONST_END:
9539 return lab;
9540 default:
9541 break;
9542 }
9543 }
9544
9545 return lab;
9546 }
9547 \f
9548 /* Return true if it's possible to redirect BRANCH1 to the destination
9549 of an unconditional jump BRANCH2. We only want to do this if the
9550 resulting branch will have a short displacement. */
9551 int
9552 sh_can_redirect_branch (rtx branch1, rtx branch2)
9553 {
9554 if (flag_expensive_optimizations && simplejump_p (branch2))
9555 {
9556 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
9557 rtx insn;
9558 int distance;
9559
9560 for (distance = 0, insn = NEXT_INSN (branch1);
9561 insn && distance < 256;
9562 insn = PREV_INSN (insn))
9563 {
9564 if (insn == dest)
9565 return 1;
9566 else
9567 distance += get_attr_length (insn);
9568 }
9569 for (distance = 0, insn = NEXT_INSN (branch1);
9570 insn && distance < 256;
9571 insn = NEXT_INSN (insn))
9572 {
9573 if (insn == dest)
9574 return 1;
9575 else
9576 distance += get_attr_length (insn);
9577 }
9578 }
9579 return 0;
9580 }
9581
9582 /* Return nonzero if register old_reg can be renamed to register new_reg. */
9583 int
9584 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
9585 unsigned int new_reg)
9586 {
9587 /* Interrupt functions can only use registers that have already been
9588 saved by the prologue, even if they would normally be
9589 call-clobbered. */
9590
9591 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
9592 return 0;
9593
9594 return 1;
9595 }
9596
9597 /* Function to update the integer COST
9598 based on the relationship between INSN that is dependent on
9599 DEP_INSN through the dependence LINK. The default is to make no
9600 adjustment to COST. This can be used for example to specify to
9601 the scheduler that an output- or anti-dependence does not incur
9602 the same cost as a data-dependence. The return value should be
9603 the new value for COST. */
9604 static int
9605 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
9606 {
9607 rtx reg, use_pat;
9608
9609 if (TARGET_SHMEDIA)
9610 {
9611 /* On SHmedia, if the dependence is an anti-dependence or
9612 output-dependence, there is no cost. */
9613 if (REG_NOTE_KIND (link) != 0)
9614 {
9615 /* However, dependencies between target register loads and
9616 uses of the register in a subsequent block that are separated
9617 by a conditional branch are not modelled - we have to do with
9618 the anti-dependency between the target register load and the
9619 conditional branch that ends the current block. */
9620 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
9621 && GET_CODE (PATTERN (dep_insn)) == SET
9622 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
9623 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
9624 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
9625 {
9626 int orig_cost = cost;
9627 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
9628 rtx target = ((! note
9629 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
9630 ? insn : JUMP_LABEL (insn));
9631 /* On the likely path, the branch costs 1, on the unlikely path,
9632 it costs 3. */
9633 cost--;
9634 do
9635 target = next_active_insn (target);
9636 while (target && ! flow_dependent_p (target, dep_insn)
9637 && --cost > 0);
9638 /* If two branches are executed in immediate succession, with the
9639 first branch properly predicted, this causes a stall at the
9640 second branch, hence we won't need the target for the
9641 second branch for two cycles after the launch of the first
9642 branch. */
9643 if (cost > orig_cost - 2)
9644 cost = orig_cost - 2;
9645 }
9646 else
9647 cost = 0;
9648 }
9649
9650 else if (get_attr_is_mac_media (insn)
9651 && get_attr_is_mac_media (dep_insn))
9652 cost = 1;
9653
9654 else if (! reload_completed
9655 && GET_CODE (PATTERN (insn)) == SET
9656 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
9657 && GET_CODE (PATTERN (dep_insn)) == SET
9658 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
9659 && cost < 4)
9660 cost = 4;
9661 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
9662 that is needed at the target. */
9663 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
9664 && ! flow_dependent_p (insn, dep_insn))
9665 cost--;
9666 }
9667 else if (REG_NOTE_KIND (link) == 0)
9668 {
9669 enum attr_type type;
9670 rtx dep_set;
9671
9672 if (recog_memoized (insn) < 0
9673 || recog_memoized (dep_insn) < 0)
9674 return cost;
9675
9676 dep_set = single_set (dep_insn);
9677
9678 /* The latency that we specify in the scheduling description refers
9679 to the actual output, not to an auto-increment register; for that,
9680 the latency is one. */
9681 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
9682 {
9683 rtx set = single_set (insn);
9684
9685 if (set
9686 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
9687 && (!MEM_P (SET_DEST (set))
9688 || !reg_mentioned_p (SET_DEST (dep_set),
9689 XEXP (SET_DEST (set), 0))))
9690 cost = 1;
9691 }
9692 /* The only input for a call that is timing-critical is the
9693 function's address. */
9694 if (CALL_P (insn))
9695 {
9696 rtx call = PATTERN (insn);
9697
9698 if (GET_CODE (call) == PARALLEL)
9699 call = XVECEXP (call, 0 ,0);
9700 if (GET_CODE (call) == SET)
9701 call = SET_SRC (call);
9702 if (GET_CODE (call) == CALL && MEM_P (XEXP (call, 0))
9703 /* sibcalli_thunk uses a symbol_ref in an unspec. */
9704 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
9705 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
9706 cost -= TARGET_SH4_300 ? 3 : 6;
9707 }
9708 /* Likewise, the most timing critical input for an sfuncs call
9709 is the function address. However, sfuncs typically start
9710 using their arguments pretty quickly.
9711 Assume a four cycle delay for SH4 before they are needed.
9712 Cached ST40-300 calls are quicker, so assume only a one
9713 cycle delay there.
9714 ??? Maybe we should encode the delays till input registers
9715 are needed by sfuncs into the sfunc call insn. */
9716 /* All sfunc calls are parallels with at least four components.
9717 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
9718 else if (GET_CODE (PATTERN (insn)) == PARALLEL
9719 && XVECLEN (PATTERN (insn), 0) >= 4
9720 && (reg = sfunc_uses_reg (insn)))
9721 {
9722 if (! reg_set_p (reg, dep_insn))
9723 cost -= TARGET_SH4_300 ? 1 : 4;
9724 }
9725 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
9726 {
9727 enum attr_type dep_type = get_attr_type (dep_insn);
9728
9729 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
9730 cost--;
9731 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
9732 && (type = get_attr_type (insn)) != TYPE_CALL
9733 && type != TYPE_SFUNC)
9734 cost--;
9735 /* When the preceding instruction loads the shift amount of
9736 the following SHAD/SHLD, the latency of the load is increased
9737 by 1 cycle. */
9738 if (get_attr_type (insn) == TYPE_DYN_SHIFT
9739 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
9740 && reg_overlap_mentioned_p (SET_DEST (dep_set),
9741 XEXP (SET_SRC (single_set (insn)),
9742 1)))
9743 cost++;
9744 /* When an LS group instruction with a latency of less than
9745 3 cycles is followed by a double-precision floating-point
9746 instruction, FIPR, or FTRV, the latency of the first
9747 instruction is increased to 3 cycles. */
9748 else if (cost < 3
9749 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
9750 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
9751 cost = 3;
9752 /* The lsw register of a double-precision computation is ready one
9753 cycle earlier. */
9754 else if (reload_completed
9755 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
9756 && (use_pat = single_set (insn))
9757 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
9758 SET_SRC (use_pat)))
9759 cost -= 1;
9760
9761 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
9762 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
9763 cost -= 1;
9764 }
9765 else if (TARGET_SH4_300)
9766 {
9767 /* Stores need their input register two cycles later. */
9768 if (dep_set && cost >= 1
9769 && ((type = get_attr_type (insn)) == TYPE_STORE
9770 || type == TYPE_PSTORE
9771 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
9772 {
9773 rtx set = single_set (insn);
9774
9775 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
9776 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
9777 {
9778 cost -= 2;
9779 /* But don't reduce the cost below 1 if the address depends
9780 on a side effect of dep_insn. */
9781 if (cost < 1
9782 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
9783 cost = 1;
9784 }
9785 }
9786 }
9787 }
9788 /* An anti-dependence penalty of two applies if the first insn is a double
9789 precision fadd / fsub / fmul. */
9790 else if (!TARGET_SH4_300
9791 && REG_NOTE_KIND (link) == REG_DEP_ANTI
9792 && recog_memoized (dep_insn) >= 0
9793 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
9794 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
9795 /* A lot of alleged anti-flow dependences are fake,
9796 so check this one is real. */
9797 && flow_dependent_p (dep_insn, insn))
9798 cost = 2;
9799
9800 return cost;
9801 }
9802
9803 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
9804 if DEP_INSN is anti-flow dependent on INSN. */
9805 static int
9806 flow_dependent_p (rtx insn, rtx dep_insn)
9807 {
9808 rtx tmp = PATTERN (insn);
9809
9810 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
9811 return tmp == NULL_RTX;
9812 }
9813
9814 /* A helper function for flow_dependent_p called through note_stores. */
9815 static void
9816 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
9817 {
9818 rtx * pinsn = (rtx *) data;
9819
9820 if (*pinsn && reg_referenced_p (x, *pinsn))
9821 *pinsn = NULL_RTX;
9822 }
9823
9824 /* For use by sh_allocate_initial_value. Note that sh.md contains some
9825 'special function' patterns (type sfunc) that clobber pr, but that
9826 do not look like function calls to leaf_function_p. Hence we must
9827 do this extra check. */
9828 static int
9829 sh_pr_n_sets (void)
9830 {
9831 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9832 }
9833
9834 /* Return where to allocate pseudo for a given hard register initial
9835 value. */
9836 static rtx
9837 sh_allocate_initial_value (rtx hard_reg)
9838 {
9839 rtx x;
9840
9841 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
9842 {
9843 if (current_function_is_leaf
9844 && ! sh_pr_n_sets ()
9845 && ! (TARGET_SHCOMPACT
9846 && ((crtl->args.info.call_cookie
9847 & ~ CALL_COOKIE_RET_TRAMP (1))
9848 || crtl->saves_all_registers)))
9849 x = hard_reg;
9850 else
9851 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
9852 }
9853 else
9854 x = NULL_RTX;
9855
9856 return x;
9857 }
9858
9859 /* This function returns "2" to indicate dual issue for the SH4
9860 processor. To be used by the DFA pipeline description. */
9861 static int
9862 sh_issue_rate (void)
9863 {
9864 if (TARGET_SUPERSCALAR)
9865 return 2;
9866 else
9867 return 1;
9868 }
9869
9870 /* Functions for ready queue reordering for sched1. */
9871
9872 /* Get weight for mode for a set x. */
9873 static short
9874 find_set_regmode_weight (rtx x, enum machine_mode mode)
9875 {
9876 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
9877 return 1;
9878 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
9879 {
9880 if (REG_P (SET_DEST (x)))
9881 {
9882 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
9883 return 1;
9884 else
9885 return 0;
9886 }
9887 return 1;
9888 }
9889 return 0;
9890 }
9891
9892 /* Get regmode weight for insn. */
9893 static short
9894 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
9895 {
9896 short reg_weight = 0;
9897 rtx x;
9898
9899 /* Increment weight for each register born here. */
9900 x = PATTERN (insn);
9901 reg_weight += find_set_regmode_weight (x, mode);
9902 if (GET_CODE (x) == PARALLEL)
9903 {
9904 int j;
9905 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
9906 {
9907 x = XVECEXP (PATTERN (insn), 0, j);
9908 reg_weight += find_set_regmode_weight (x, mode);
9909 }
9910 }
9911 /* Decrement weight for each register that dies here. */
9912 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
9913 {
9914 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
9915 {
9916 rtx note = XEXP (x, 0);
9917 if (REG_P (note) && GET_MODE (note) == mode)
9918 reg_weight--;
9919 }
9920 }
9921 return reg_weight;
9922 }
9923
9924 /* Calculate regmode weights for all insns of a basic block. */
9925 static void
9926 find_regmode_weight (basic_block b, enum machine_mode mode)
9927 {
9928 rtx insn, next_tail, head, tail;
9929
9930 get_ebb_head_tail (b, b, &head, &tail);
9931 next_tail = NEXT_INSN (tail);
9932
9933 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
9934 {
9935 /* Handle register life information. */
9936 if (!INSN_P (insn))
9937 continue;
9938
9939 if (mode == SFmode)
9940 INSN_REGMODE_WEIGHT (insn, mode) =
9941 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
9942 else if (mode == SImode)
9943 INSN_REGMODE_WEIGHT (insn, mode) =
9944 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
9945 }
9946 }
9947
9948 /* Comparison function for ready queue sorting. */
9949 static int
9950 rank_for_reorder (const void *x, const void *y)
9951 {
9952 rtx tmp = *(const rtx *) y;
9953 rtx tmp2 = *(const rtx *) x;
9954
9955 /* The insn in a schedule group should be issued the first. */
9956 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
9957 return SCHED_GROUP_P (tmp2) ? 1 : -1;
9958
9959 /* If insns are equally good, sort by INSN_LUID (original insn order), This
9960 minimizes instruction movement, thus minimizing sched's effect on
9961 register pressure. */
9962 return INSN_LUID (tmp) - INSN_LUID (tmp2);
9963 }
9964
9965 /* Resort the array A in which only element at index N may be out of order. */
9966 static void
9967 swap_reorder (rtx *a, int n)
9968 {
9969 rtx insn = a[n - 1];
9970 int i = n - 2;
9971
9972 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
9973 {
9974 a[i + 1] = a[i];
9975 i -= 1;
9976 }
9977 a[i + 1] = insn;
9978 }
9979
9980 #define SCHED_REORDER(READY, N_READY) \
9981 do \
9982 { \
9983 if ((N_READY) == 2) \
9984 swap_reorder (READY, N_READY); \
9985 else if ((N_READY) > 2) \
9986 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
9987 } \
9988 while (0)
9989
9990 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
9991 macro. */
9992 static void
9993 ready_reorder (rtx *ready, int nready)
9994 {
9995 SCHED_REORDER (ready, nready);
9996 }
9997
9998 /* Count life regions of r0 for a block. */
9999 static int
10000 find_r0_life_regions (basic_block b)
10001 {
10002 rtx end, insn;
10003 rtx pset;
10004 rtx r0_reg;
10005 int live;
10006 int set;
10007 int death = 0;
10008
10009 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
10010 {
10011 set = 1;
10012 live = 1;
10013 }
10014 else
10015 {
10016 set = 0;
10017 live = 0;
10018 }
10019
10020 insn = BB_HEAD (b);
10021 end = BB_END (b);
10022 r0_reg = gen_rtx_REG (SImode, R0_REG);
10023 while (1)
10024 {
10025 if (INSN_P (insn))
10026 {
10027 if (find_regno_note (insn, REG_DEAD, R0_REG))
10028 {
10029 death++;
10030 live = 0;
10031 }
10032 if (!live
10033 && (pset = single_set (insn))
10034 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
10035 && !find_regno_note (insn, REG_UNUSED, R0_REG))
10036 {
10037 set++;
10038 live = 1;
10039 }
10040 }
10041 if (insn == end)
10042 break;
10043 insn = NEXT_INSN (insn);
10044 }
10045 return set - death;
10046 }
10047
10048 /* Calculate regmode weights for all insns of all basic block. */
10049 static void
10050 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
10051 int verbose ATTRIBUTE_UNUSED,
10052 int old_max_uid)
10053 {
10054 basic_block b;
10055
10056 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
10057 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
10058 r0_life_regions = 0;
10059
10060 FOR_EACH_BB_REVERSE (b)
10061 {
10062 find_regmode_weight (b, SImode);
10063 find_regmode_weight (b, SFmode);
10064 if (!reload_completed)
10065 r0_life_regions += find_r0_life_regions (b);
10066 }
10067
10068 CURR_REGMODE_PRESSURE (SImode) = 0;
10069 CURR_REGMODE_PRESSURE (SFmode) = 0;
10070
10071 }
10072
10073 /* Cleanup. */
10074 static void
10075 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
10076 int verbose ATTRIBUTE_UNUSED)
10077 {
10078 if (regmode_weight[0])
10079 {
10080 free (regmode_weight[0]);
10081 regmode_weight[0] = NULL;
10082 }
10083 if (regmode_weight[1])
10084 {
10085 free (regmode_weight[1]);
10086 regmode_weight[1] = NULL;
10087 }
10088 }
10089
10090 /* The scalar modes supported differs from the default version in TImode
10091 for 32-bit SHMEDIA. */
10092 static bool
10093 sh_scalar_mode_supported_p (enum machine_mode mode)
10094 {
10095 if (TARGET_SHMEDIA32 && mode == TImode)
10096 return false;
10097
10098 return default_scalar_mode_supported_p (mode);
10099 }
10100
10101 /* Cache the can_issue_more so that we can return it from reorder2. Also,
10102 keep count of register pressures on SImode and SFmode. */
10103 static int
10104 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
10105 int sched_verbose ATTRIBUTE_UNUSED,
10106 rtx insn,
10107 int can_issue_more)
10108 {
10109 if (GET_CODE (PATTERN (insn)) != USE
10110 && GET_CODE (PATTERN (insn)) != CLOBBER)
10111 cached_can_issue_more = can_issue_more - 1;
10112 else
10113 cached_can_issue_more = can_issue_more;
10114
10115 if (reload_completed)
10116 return cached_can_issue_more;
10117
10118 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
10119 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
10120
10121 return cached_can_issue_more;
10122 }
10123
10124 static void
10125 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
10126 int verbose ATTRIBUTE_UNUSED,
10127 int veclen ATTRIBUTE_UNUSED)
10128 {
10129 CURR_REGMODE_PRESSURE (SImode) = 0;
10130 CURR_REGMODE_PRESSURE (SFmode) = 0;
10131 }
10132
10133 /* Some magic numbers. */
10134 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10135 functions that already have high pressure on r0. */
10136 #define R0_MAX_LIFE_REGIONS 2
10137 /* Register Pressure thresholds for SImode and SFmode registers. */
10138 #define SIMODE_MAX_WEIGHT 5
10139 #define SFMODE_MAX_WEIGHT 10
10140
10141 /* Return true if the pressure is high for MODE. */
10142 static short
10143 high_pressure (enum machine_mode mode)
10144 {
10145 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10146 functions that already have high pressure on r0. */
10147 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
10148 return 1;
10149
10150 if (mode == SFmode)
10151 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
10152 else
10153 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
10154 }
10155
10156 /* Reorder ready queue if register pressure is high. */
10157 static int
10158 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
10159 int sched_verbose ATTRIBUTE_UNUSED,
10160 rtx *ready,
10161 int *n_readyp,
10162 int clock_var ATTRIBUTE_UNUSED)
10163 {
10164 if (reload_completed)
10165 return sh_issue_rate ();
10166
10167 if (high_pressure (SFmode) || high_pressure (SImode))
10168 {
10169 ready_reorder (ready, *n_readyp);
10170 }
10171
10172 return sh_issue_rate ();
10173 }
10174
10175 /* Skip cycles if the current register pressure is high. */
10176 static int
10177 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
10178 int sched_verbose ATTRIBUTE_UNUSED,
10179 rtx *ready ATTRIBUTE_UNUSED,
10180 int *n_readyp ATTRIBUTE_UNUSED,
10181 int clock_var ATTRIBUTE_UNUSED)
10182 {
10183 if (reload_completed)
10184 return cached_can_issue_more;
10185
10186 if (high_pressure(SFmode) || high_pressure (SImode))
10187 skip_cycles = 1;
10188
10189 return cached_can_issue_more;
10190 }
10191
10192 /* Skip cycles without sorting the ready queue. This will move insn from
10193 Q->R. If this is the last cycle we are skipping; allow sorting of ready
10194 queue by sh_reorder. */
10195
10196 /* Generally, skipping these many cycles are sufficient for all insns to move
10197 from Q -> R. */
10198 #define MAX_SKIPS 8
10199
10200 static int
10201 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
10202 int sched_verbose ATTRIBUTE_UNUSED,
10203 rtx insn ATTRIBUTE_UNUSED,
10204 int last_clock_var,
10205 int clock_var,
10206 int *sort_p)
10207 {
10208 if (reload_completed)
10209 return 0;
10210
10211 if (skip_cycles)
10212 {
10213 if ((clock_var - last_clock_var) < MAX_SKIPS)
10214 {
10215 *sort_p = 0;
10216 return 1;
10217 }
10218 /* If this is the last cycle we are skipping, allow reordering of R. */
10219 if ((clock_var - last_clock_var) == MAX_SKIPS)
10220 {
10221 *sort_p = 1;
10222 return 1;
10223 }
10224 }
10225
10226 skip_cycles = 0;
10227
10228 return 0;
10229 }
10230
10231 /* SHmedia requires registers for branches, so we can't generate new
10232 branches past reload. */
10233 static bool
10234 sh_cannot_modify_jumps_p (void)
10235 {
10236 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
10237 }
10238
10239 static enum reg_class
10240 sh_target_reg_class (void)
10241 {
10242 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
10243 }
10244
10245 static bool
10246 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
10247 {
10248 HARD_REG_SET dummy;
10249 #if 0
10250 rtx insn;
10251 #endif
10252
10253 if (! shmedia_space_reserved_for_target_registers)
10254 return 0;
10255 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
10256 return 0;
10257 if (calc_live_regs (&dummy) >= 6 * 8)
10258 return 1;
10259 return 0;
10260 }
10261
10262 static bool
10263 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
10264 {
10265 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
10266 }
10267 \f
10268 /*
10269 On the SH1..SH4, the trampoline looks like
10270 2 0002 D202 mov.l l2,r2
10271 1 0000 D301 mov.l l1,r3
10272 3 0004 422B jmp @r2
10273 4 0006 0009 nop
10274 5 0008 00000000 l1: .long area
10275 6 000c 00000000 l2: .long function
10276
10277 SH5 (compact) uses r1 instead of r3 for the static chain. */
10278
10279
10280 /* Emit RTL insns to initialize the variable parts of a trampoline.
10281 FNADDR is an RTX for the address of the function's pure code.
10282 CXT is an RTX for the static chain value for the function. */
10283
10284 void
10285 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
10286 {
10287 rtx tramp_mem = gen_frame_mem (BLKmode, tramp);
10288
10289 if (TARGET_SHMEDIA64)
10290 {
10291 rtx tramp_templ;
10292 int fixed_len;
10293
10294 rtx movi1 = GEN_INT (0xcc000010);
10295 rtx shori1 = GEN_INT (0xc8000010);
10296 rtx src, dst;
10297
10298 /* The following trampoline works within a +- 128 KB range for cxt:
10299 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
10300 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
10301 gettr tr1,r1; blink tr0,r63 */
10302 /* Address rounding makes it hard to compute the exact bounds of the
10303 offset for this trampoline, but we have a rather generous offset
10304 range, so frame_offset should do fine as an upper bound. */
10305 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
10306 {
10307 /* ??? could optimize this trampoline initialization
10308 by writing DImode words with two insns each. */
10309 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
10310 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
10311 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
10312 insn = gen_rtx_AND (DImode, insn, mask);
10313 /* Or in ptb/u .,tr1 pattern */
10314 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
10315 insn = force_operand (insn, NULL_RTX);
10316 insn = gen_lowpart (SImode, insn);
10317 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
10318 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
10319 insn = gen_rtx_AND (DImode, insn, mask);
10320 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
10321 insn = gen_lowpart (SImode, insn);
10322 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
10323 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
10324 insn = gen_rtx_AND (DImode, insn, mask);
10325 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10326 insn = gen_lowpart (SImode, insn);
10327 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
10328 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
10329 insn = gen_rtx_AND (DImode, insn, mask);
10330 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10331 insn = gen_lowpart (SImode, insn);
10332 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
10333 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
10334 insn = gen_rtx_AND (DImode, insn, mask);
10335 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10336 insn = gen_lowpart (SImode, insn);
10337 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
10338 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
10339 GEN_INT (0x6bf10600));
10340 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
10341 GEN_INT (0x4415fc10));
10342 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
10343 GEN_INT (0x4401fff0));
10344 emit_insn (gen_ic_invalidate_line (tramp));
10345 return;
10346 }
10347 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
10348 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
10349
10350 tramp_templ = gen_datalabel_ref (tramp_templ);
10351 dst = tramp_mem;
10352 src = gen_const_mem (BLKmode, tramp_templ);
10353 set_mem_align (dst, 256);
10354 set_mem_align (src, 64);
10355 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
10356
10357 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
10358 emit_move_insn (adjust_address (tramp_mem, Pmode,
10359 fixed_len + GET_MODE_SIZE (Pmode)),
10360 cxt);
10361 emit_insn (gen_ic_invalidate_line (tramp));
10362 return;
10363 }
10364 else if (TARGET_SHMEDIA)
10365 {
10366 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
10367 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
10368 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
10369 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
10370 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
10371 rotated 10 right, and higher 16 bit of every 32 selected. */
10372 rtx movishori
10373 = force_reg (V2HImode, (simplify_gen_subreg
10374 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
10375 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
10376 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
10377
10378 tramp = force_reg (Pmode, tramp);
10379 fnaddr = force_reg (SImode, fnaddr);
10380 cxt = force_reg (SImode, cxt);
10381 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
10382 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
10383 movishori));
10384 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
10385 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10386 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
10387 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
10388 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
10389 gen_rtx_SUBREG (V2HImode, cxt, 0),
10390 movishori));
10391 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
10392 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10393 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
10394 if (TARGET_LITTLE_ENDIAN)
10395 {
10396 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
10397 emit_insn (gen_mextr4 (quad2, cxtload, blink));
10398 }
10399 else
10400 {
10401 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
10402 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
10403 }
10404 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
10405 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
10406 emit_insn (gen_ic_invalidate_line (tramp));
10407 return;
10408 }
10409 else if (TARGET_SHCOMPACT)
10410 {
10411 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
10412 return;
10413 }
10414 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
10415 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
10416 SImode));
10417 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
10418 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
10419 SImode));
10420 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
10421 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
10422 if (TARGET_HARVARD)
10423 {
10424 if (!TARGET_INLINE_IC_INVALIDATE
10425 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
10426 emit_library_call (function_symbol (NULL, "__ic_invalidate",
10427 FUNCTION_ORDINARY),
10428 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
10429 else
10430 emit_insn (gen_ic_invalidate_line (tramp));
10431 }
10432 }
10433
10434 /* FIXME: This is overly conservative. A SHcompact function that
10435 receives arguments ``by reference'' will have them stored in its
10436 own stack frame, so it must not pass pointers or references to
10437 these arguments to other functions by means of sibling calls. */
10438 /* If PIC, we cannot make sibling calls to global functions
10439 because the PLT requires r12 to be live. */
10440 static bool
10441 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10442 {
10443 return (1
10444 && (! TARGET_SHCOMPACT
10445 || crtl->args.info.stack_regs == 0)
10446 && ! sh_cfun_interrupt_handler_p ()
10447 && (! flag_pic
10448 || (decl && ! TREE_PUBLIC (decl))
10449 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
10450 }
10451 \f
10452 /* Machine specific built-in functions. */
10453
10454 struct builtin_description
10455 {
10456 const enum insn_code icode;
10457 const char *const name;
10458 int signature;
10459 };
10460
10461 /* describe number and signedness of arguments; arg[0] == result
10462 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
10463 /* 9: 64-bit pointer, 10: 32-bit pointer */
10464 static const char signature_args[][4] =
10465 {
10466 #define SH_BLTIN_V2SI2 0
10467 { 4, 4 },
10468 #define SH_BLTIN_V4HI2 1
10469 { 4, 4 },
10470 #define SH_BLTIN_V2SI3 2
10471 { 4, 4, 4 },
10472 #define SH_BLTIN_V4HI3 3
10473 { 4, 4, 4 },
10474 #define SH_BLTIN_V8QI3 4
10475 { 4, 4, 4 },
10476 #define SH_BLTIN_MAC_HISI 5
10477 { 1, 4, 4, 1 },
10478 #define SH_BLTIN_SH_HI 6
10479 { 4, 4, 1 },
10480 #define SH_BLTIN_SH_SI 7
10481 { 4, 4, 1 },
10482 #define SH_BLTIN_V4HI2V2SI 8
10483 { 4, 4, 4 },
10484 #define SH_BLTIN_V4HI2V8QI 9
10485 { 4, 4, 4 },
10486 #define SH_BLTIN_SISF 10
10487 { 4, 2 },
10488 #define SH_BLTIN_LDUA_L 11
10489 { 2, 10 },
10490 #define SH_BLTIN_LDUA_Q 12
10491 { 1, 10 },
10492 #define SH_BLTIN_STUA_L 13
10493 { 0, 10, 2 },
10494 #define SH_BLTIN_STUA_Q 14
10495 { 0, 10, 1 },
10496 #define SH_BLTIN_LDUA_L64 15
10497 { 2, 9 },
10498 #define SH_BLTIN_LDUA_Q64 16
10499 { 1, 9 },
10500 #define SH_BLTIN_STUA_L64 17
10501 { 0, 9, 2 },
10502 #define SH_BLTIN_STUA_Q64 18
10503 { 0, 9, 1 },
10504 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
10505 #define SH_BLTIN_2 19
10506 #define SH_BLTIN_SU 19
10507 { 1, 2 },
10508 #define SH_BLTIN_3 20
10509 #define SH_BLTIN_SUS 20
10510 { 2, 2, 1 },
10511 #define SH_BLTIN_PSSV 21
10512 { 0, 8, 2, 2 },
10513 #define SH_BLTIN_XXUU 22
10514 #define SH_BLTIN_UUUU 22
10515 { 1, 1, 1, 1 },
10516 #define SH_BLTIN_PV 23
10517 { 0, 8 },
10518 };
10519 /* mcmv: operands considered unsigned. */
10520 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
10521 /* mperm: control value considered unsigned int. */
10522 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
10523 /* mshards_q: returns signed short. */
10524 /* nsb: takes long long arg, returns unsigned char. */
10525 static const struct builtin_description bdesc[] =
10526 {
10527 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
10528 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
10529 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
10530 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
10531 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
10532 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
10533 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
10534 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
10535 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
10536 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
10537 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
10538 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
10539 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
10540 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
10541 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
10542 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
10543 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
10544 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
10545 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3 },
10546 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3 },
10547 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3 },
10548 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3 },
10549 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3 },
10550 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3 },
10551 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3 },
10552 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
10553 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
10554 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
10555 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
10556 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
10557 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
10558 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
10559 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
10560 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
10561 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
10562 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
10563 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
10564 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
10565 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
10566 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
10567 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
10568 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
10569 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
10570 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
10571 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
10572 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
10573 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
10574 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
10575 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
10576 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
10577 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
10578 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
10579 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
10580 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
10581 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
10582 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
10583 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
10584 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
10585 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
10586 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
10587 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
10588 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3 },
10589 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2 },
10590 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2 },
10591 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
10592 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
10593 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
10594 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
10595 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
10596 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
10597 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
10598 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
10599 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
10600 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64 },
10601 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64 },
10602 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64 },
10603 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64 },
10604 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64 },
10605 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64 },
10606 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64 },
10607 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64 },
10608 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
10609 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
10610 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
10611 };
10612
10613 static void
10614 sh_media_init_builtins (void)
10615 {
10616 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
10617 const struct builtin_description *d;
10618
10619 memset (shared, 0, sizeof shared);
10620 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
10621 {
10622 tree type, arg_type = 0;
10623 int signature = d->signature;
10624 int i;
10625
10626 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
10627 type = shared[signature];
10628 else
10629 {
10630 int has_result = signature_args[signature][0] != 0;
10631
10632 if ((signature_args[signature][1] & 8)
10633 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
10634 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
10635 continue;
10636 if (! TARGET_FPU_ANY
10637 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
10638 continue;
10639 type = void_list_node;
10640 for (i = 3; ; i--)
10641 {
10642 int arg = signature_args[signature][i];
10643 int opno = i - 1 + has_result;
10644
10645 if (arg & 8)
10646 arg_type = ptr_type_node;
10647 else if (arg)
10648 arg_type = (*lang_hooks.types.type_for_mode)
10649 (insn_data[d->icode].operand[opno].mode,
10650 (arg & 1));
10651 else if (i)
10652 continue;
10653 else
10654 arg_type = void_type_node;
10655 if (i == 0)
10656 break;
10657 type = tree_cons (NULL_TREE, arg_type, type);
10658 }
10659 type = build_function_type (arg_type, type);
10660 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
10661 shared[signature] = type;
10662 }
10663 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
10664 NULL, NULL_TREE);
10665 }
10666 }
10667
10668 /* Implements target hook vector_mode_supported_p. */
10669 bool
10670 sh_vector_mode_supported_p (enum machine_mode mode)
10671 {
10672 if (TARGET_FPU_ANY
10673 && ((mode == V2SFmode)
10674 || (mode == V4SFmode)
10675 || (mode == V16SFmode)))
10676 return true;
10677
10678 else if (TARGET_SHMEDIA
10679 && ((mode == V8QImode)
10680 || (mode == V2HImode)
10681 || (mode == V4HImode)
10682 || (mode == V2SImode)))
10683 return true;
10684
10685 return false;
10686 }
10687
10688 /* Implements target hook dwarf_calling_convention. Return an enum
10689 of dwarf_calling_convention. */
10690 int
10691 sh_dwarf_calling_convention (const_tree func)
10692 {
10693 if (sh_attr_renesas_p (func))
10694 return DW_CC_GNU_renesas_sh;
10695
10696 return DW_CC_normal;
10697 }
10698
10699 static void
10700 sh_init_builtins (void)
10701 {
10702 if (TARGET_SHMEDIA)
10703 sh_media_init_builtins ();
10704 }
10705
10706 /* Expand an expression EXP that calls a built-in function,
10707 with result going to TARGET if that's convenient
10708 (and in mode MODE if that's convenient).
10709 SUBTARGET may be used as the target for computing one of EXP's operands.
10710 IGNORE is nonzero if the value is to be ignored. */
10711
10712 static rtx
10713 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10714 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
10715 {
10716 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10717 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10718 const struct builtin_description *d = &bdesc[fcode];
10719 enum insn_code icode = d->icode;
10720 int signature = d->signature;
10721 enum machine_mode tmode = VOIDmode;
10722 int nop = 0, i;
10723 rtx op[4];
10724 rtx pat = 0;
10725
10726 if (signature_args[signature][0])
10727 {
10728 if (ignore)
10729 return 0;
10730
10731 tmode = insn_data[icode].operand[0].mode;
10732 if (! target
10733 || GET_MODE (target) != tmode
10734 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10735 target = gen_reg_rtx (tmode);
10736 op[nop++] = target;
10737 }
10738 else
10739 target = 0;
10740
10741 for (i = 1; i <= 3; i++, nop++)
10742 {
10743 tree arg;
10744 enum machine_mode opmode, argmode;
10745 tree optype;
10746
10747 if (! signature_args[signature][i])
10748 break;
10749 arg = CALL_EXPR_ARG (exp, i - 1);
10750 if (arg == error_mark_node)
10751 return const0_rtx;
10752 if (signature_args[signature][i] & 8)
10753 {
10754 opmode = ptr_mode;
10755 optype = ptr_type_node;
10756 }
10757 else
10758 {
10759 opmode = insn_data[icode].operand[nop].mode;
10760 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
10761 }
10762 argmode = TYPE_MODE (TREE_TYPE (arg));
10763 if (argmode != opmode)
10764 arg = build1 (NOP_EXPR, optype, arg);
10765 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
10766 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
10767 op[nop] = copy_to_mode_reg (opmode, op[nop]);
10768 }
10769
10770 switch (nop)
10771 {
10772 case 1:
10773 pat = (*insn_data[d->icode].genfun) (op[0]);
10774 break;
10775 case 2:
10776 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
10777 break;
10778 case 3:
10779 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
10780 break;
10781 case 4:
10782 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
10783 break;
10784 default:
10785 gcc_unreachable ();
10786 }
10787 if (! pat)
10788 return 0;
10789 emit_insn (pat);
10790 return target;
10791 }
10792
10793 void
10794 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
10795 {
10796 rtx sel0 = const0_rtx;
10797 rtx sel1 = const1_rtx;
10798 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
10799 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
10800
10801 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
10802 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
10803 }
10804
10805 void
10806 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
10807 {
10808 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
10809
10810 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
10811 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
10812 }
10813
10814 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
10815 We can allow any mode in any general register. The special registers
10816 only allow SImode. Don't allow any mode in the PR.
10817
10818 We cannot hold DCmode values in the XD registers because alter_reg
10819 handles subregs of them incorrectly. We could work around this by
10820 spacing the XD registers like the DR registers, but this would require
10821 additional memory in every compilation to hold larger register vectors.
10822 We could hold SFmode / SCmode values in XD registers, but that
10823 would require a tertiary reload when reloading from / to memory,
10824 and a secondary reload to reload from / to general regs; that
10825 seems to be a loosing proposition.
10826
10827 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
10828 it won't be ferried through GP registers first. */
10829
10830 bool
10831 sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
10832 {
10833 if (SPECIAL_REGISTER_P (regno))
10834 return mode == SImode;
10835
10836 if (regno == FPUL_REG)
10837 return (mode == SImode || mode == SFmode);
10838
10839 if (FP_REGISTER_P (regno) && mode == SFmode)
10840 return true;
10841
10842 if (mode == V2SFmode)
10843 {
10844 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
10845 || GENERAL_REGISTER_P (regno)))
10846 return true;
10847 else
10848 return false;
10849 }
10850
10851 if (mode == V4SFmode)
10852 {
10853 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
10854 || GENERAL_REGISTER_P (regno))
10855 return true;
10856 else
10857 return false;
10858 }
10859
10860 if (mode == V16SFmode)
10861 {
10862 if (TARGET_SHMEDIA)
10863 {
10864 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
10865 return true;
10866 else
10867 return false;
10868 }
10869 else
10870 return regno == FIRST_XD_REG;
10871 }
10872
10873 if (FP_REGISTER_P (regno))
10874 {
10875 if (mode == SFmode
10876 || mode == SImode
10877 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
10878 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
10879 || mode == DCmode
10880 || (TARGET_SHMEDIA
10881 && (mode == DFmode || mode == DImode
10882 || mode == V2SFmode || mode == TImode)))
10883 && ((regno - FIRST_FP_REG) & 1) == 0)
10884 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
10885 && ((regno - FIRST_FP_REG) & 3) == 0))
10886 return true;
10887 else
10888 return false;
10889 }
10890
10891 if (XD_REGISTER_P (regno))
10892 return mode == DFmode;
10893
10894 if (TARGET_REGISTER_P (regno))
10895 return (mode == DImode || mode == SImode || mode == PDImode);
10896
10897 if (regno == PR_REG)
10898 return mode == SImode;
10899
10900 if (regno == FPSCR_REG)
10901 return mode == PSImode;
10902
10903 /* FIXME. This works around PR target/37633 for -O0. */
10904 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
10905 {
10906 unsigned int n = GET_MODE_SIZE (mode) / 8;
10907
10908 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
10909 && regno <= FIRST_GENERAL_REG + 14)
10910 return false;
10911 }
10912
10913 return true;
10914 }
10915
10916 /* Return the class of registers for which a mode change from FROM to TO
10917 is invalid. */
10918 bool
10919 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
10920 enum reg_class rclass)
10921 {
10922 /* We want to enable the use of SUBREGs as a means to
10923 VEC_SELECT a single element of a vector. */
10924 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
10925 return (reg_classes_intersect_p (GENERAL_REGS, rclass));
10926
10927 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
10928 {
10929 if (TARGET_LITTLE_ENDIAN)
10930 {
10931 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
10932 return reg_classes_intersect_p (DF_REGS, rclass);
10933 }
10934 else
10935 {
10936 if (GET_MODE_SIZE (from) < 8)
10937 return reg_classes_intersect_p (DF_HI_REGS, rclass);
10938 }
10939 }
10940 return 0;
10941 }
10942
10943
10944 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
10945 that label is used. */
10946
10947 void
10948 sh_mark_label (rtx address, int nuses)
10949 {
10950 if (GOTOFF_P (address))
10951 {
10952 /* Extract the label or symbol. */
10953 address = XEXP (address, 0);
10954 if (GET_CODE (address) == PLUS)
10955 address = XEXP (address, 0);
10956 address = XVECEXP (address, 0, 0);
10957 }
10958 if (GET_CODE (address) == LABEL_REF
10959 && LABEL_P (XEXP (address, 0)))
10960 LABEL_NUSES (XEXP (address, 0)) += nuses;
10961 }
10962
10963 /* Compute extra cost of moving data between one register class
10964 and another. */
10965
10966 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
10967 uses this information. Hence, the general register <-> floating point
10968 register information here is not used for SFmode. */
10969
10970 int
10971 sh_register_move_cost (enum machine_mode mode,
10972 enum reg_class srcclass, enum reg_class dstclass)
10973 {
10974 if (dstclass == T_REGS || dstclass == PR_REGS)
10975 return 10;
10976
10977 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
10978 return 4;
10979
10980 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
10981 && REGCLASS_HAS_FP_REG (srcclass)
10982 && REGCLASS_HAS_FP_REG (dstclass))
10983 return 4;
10984
10985 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
10986 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
10987
10988 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
10989 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
10990 return 9;
10991
10992 if ((REGCLASS_HAS_FP_REG (dstclass)
10993 && REGCLASS_HAS_GENERAL_REG (srcclass))
10994 || (REGCLASS_HAS_GENERAL_REG (dstclass)
10995 && REGCLASS_HAS_FP_REG (srcclass)))
10996 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
10997 * ((GET_MODE_SIZE (mode) + 7) / 8U));
10998
10999 if ((dstclass == FPUL_REGS
11000 && REGCLASS_HAS_GENERAL_REG (srcclass))
11001 || (srcclass == FPUL_REGS
11002 && REGCLASS_HAS_GENERAL_REG (dstclass)))
11003 return 5;
11004
11005 if ((dstclass == FPUL_REGS
11006 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
11007 || (srcclass == FPUL_REGS
11008 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
11009 return 7;
11010
11011 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11012 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11013 return 20;
11014
11015 /* ??? ptabs faults on (value & 0x3) == 0x3 */
11016 if (TARGET_SHMEDIA
11017 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
11018 {
11019 if (sh_gettrcost >= 0)
11020 return sh_gettrcost;
11021 else if (!TARGET_PT_FIXED)
11022 return 100;
11023 }
11024
11025 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11026 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11027 return 4;
11028
11029 if (TARGET_SHMEDIA
11030 || (TARGET_FMOVD
11031 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
11032 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
11033 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
11034
11035 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
11036 }
11037
11038 static rtx emit_load_ptr (rtx, rtx);
11039
11040 static rtx
11041 emit_load_ptr (rtx reg, rtx addr)
11042 {
11043 rtx mem = gen_const_mem (ptr_mode, addr);
11044
11045 if (Pmode != ptr_mode)
11046 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
11047 return emit_move_insn (reg, mem);
11048 }
11049
11050 static void
11051 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11052 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11053 tree function)
11054 {
11055 CUMULATIVE_ARGS cum;
11056 int structure_value_byref = 0;
11057 rtx this_rtx, this_value, sibcall, insns, funexp;
11058 tree funtype = TREE_TYPE (function);
11059 int simple_add = CONST_OK_FOR_ADD (delta);
11060 int did_load = 0;
11061 rtx scratch0, scratch1, scratch2;
11062 unsigned i;
11063
11064 reload_completed = 1;
11065 epilogue_completed = 1;
11066 current_function_uses_only_leaf_regs = 1;
11067
11068 emit_note (NOTE_INSN_PROLOGUE_END);
11069
11070 /* Find the "this" pointer. We have such a wide range of ABIs for the
11071 SH that it's best to do this completely machine independently.
11072 "this" is passed as first argument, unless a structure return pointer
11073 comes first, in which case "this" comes second. */
11074 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
11075 #ifndef PCC_STATIC_STRUCT_RETURN
11076 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11077 structure_value_byref = 1;
11078 #endif /* not PCC_STATIC_STRUCT_RETURN */
11079 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
11080 {
11081 tree ptype = build_pointer_type (TREE_TYPE (funtype));
11082
11083 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
11084 }
11085 this_rtx = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
11086
11087 /* For SHcompact, we only have r0 for a scratch register: r1 is the
11088 static chain pointer (even if you can't have nested virtual functions
11089 right now, someone might implement them sometime), and the rest of the
11090 registers are used for argument passing, are callee-saved, or reserved. */
11091 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
11092 -ffixed-reg has been used. */
11093 if (! call_used_regs[0] || fixed_regs[0])
11094 error ("r0 needs to be available as a call-clobbered register");
11095 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
11096 if (! TARGET_SH5)
11097 {
11098 if (call_used_regs[1] && ! fixed_regs[1])
11099 scratch1 = gen_rtx_REG (ptr_mode, 1);
11100 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
11101 pointing where to return struct values. */
11102 if (call_used_regs[3] && ! fixed_regs[3])
11103 scratch2 = gen_rtx_REG (Pmode, 3);
11104 }
11105 else if (TARGET_SHMEDIA)
11106 {
11107 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
11108 if (i != REGNO (scratch0) &&
11109 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
11110 {
11111 scratch1 = gen_rtx_REG (ptr_mode, i);
11112 break;
11113 }
11114 if (scratch1 == scratch0)
11115 error ("Need a second call-clobbered general purpose register");
11116 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
11117 if (call_used_regs[i] && ! fixed_regs[i])
11118 {
11119 scratch2 = gen_rtx_REG (Pmode, i);
11120 break;
11121 }
11122 if (scratch2 == scratch0)
11123 error ("Need a call-clobbered target register");
11124 }
11125
11126 this_value = plus_constant (this_rtx, delta);
11127 if (vcall_offset
11128 && (simple_add || scratch0 != scratch1)
11129 && strict_memory_address_p (ptr_mode, this_value))
11130 {
11131 emit_load_ptr (scratch0, this_value);
11132 did_load = 1;
11133 }
11134
11135 if (!delta)
11136 ; /* Do nothing. */
11137 else if (simple_add)
11138 emit_move_insn (this_rtx, this_value);
11139 else
11140 {
11141 emit_move_insn (scratch1, GEN_INT (delta));
11142 emit_insn (gen_add2_insn (this_rtx, scratch1));
11143 }
11144
11145 if (vcall_offset)
11146 {
11147 rtx offset_addr;
11148
11149 if (!did_load)
11150 emit_load_ptr (scratch0, this_rtx);
11151
11152 offset_addr = plus_constant (scratch0, vcall_offset);
11153 if (strict_memory_address_p (ptr_mode, offset_addr))
11154 ; /* Do nothing. */
11155 else if (! TARGET_SH5 && scratch0 != scratch1)
11156 {
11157 /* scratch0 != scratch1, and we have indexed loads. Get better
11158 schedule by loading the offset into r1 and using an indexed
11159 load - then the load of r1 can issue before the load from
11160 (this_rtx + delta) finishes. */
11161 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11162 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
11163 }
11164 else if (CONST_OK_FOR_ADD (vcall_offset))
11165 {
11166 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
11167 offset_addr = scratch0;
11168 }
11169 else if (scratch0 != scratch1)
11170 {
11171 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11172 emit_insn (gen_add2_insn (scratch0, scratch1));
11173 offset_addr = scratch0;
11174 }
11175 else
11176 gcc_unreachable (); /* FIXME */
11177 emit_load_ptr (scratch0, offset_addr);
11178
11179 if (Pmode != ptr_mode)
11180 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
11181 emit_insn (gen_add2_insn (this_rtx, scratch0));
11182 }
11183
11184 /* Generate a tail call to the target function. */
11185 if (! TREE_USED (function))
11186 {
11187 assemble_external (function);
11188 TREE_USED (function) = 1;
11189 }
11190 funexp = XEXP (DECL_RTL (function), 0);
11191 /* If the function is overridden, so is the thunk, hence we don't
11192 need GOT addressing even if this is a public symbol. */
11193 #if 0
11194 if (TARGET_SH1 && ! flag_weak)
11195 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
11196 else
11197 #endif
11198 if (TARGET_SH2 && flag_pic)
11199 {
11200 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
11201 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
11202 }
11203 else
11204 {
11205 if (TARGET_SHMEDIA && flag_pic)
11206 {
11207 funexp = gen_sym2PIC (funexp);
11208 PUT_MODE (funexp, Pmode);
11209 }
11210 emit_move_insn (scratch2, funexp);
11211 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
11212 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
11213 }
11214 sibcall = emit_call_insn (sibcall);
11215 SIBLING_CALL_P (sibcall) = 1;
11216 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
11217 emit_barrier ();
11218
11219 /* Run just enough of rest_of_compilation to do scheduling and get
11220 the insns emitted. Note that use_thunk calls
11221 assemble_start_function and assemble_end_function. */
11222
11223 insn_locators_alloc ();
11224 insns = get_insns ();
11225
11226 if (optimize > 0)
11227 {
11228 if (! cfun->cfg)
11229 init_flow (cfun);
11230 split_all_insns_noflow ();
11231 }
11232
11233 sh_reorg ();
11234
11235 if (optimize > 0 && flag_delayed_branch)
11236 dbr_schedule (insns);
11237
11238 shorten_branches (insns);
11239 final_start_function (insns, file, 1);
11240 final (insns, file, 1);
11241 final_end_function ();
11242 free_after_compilation (cfun);
11243
11244 reload_completed = 0;
11245 epilogue_completed = 0;
11246 }
11247
11248 rtx
11249 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
11250 {
11251 rtx sym;
11252
11253 /* If this is not an ordinary function, the name usually comes from a
11254 string literal or an sprintf buffer. Make sure we use the same
11255 string consistently, so that cse will be able to unify address loads. */
11256 if (kind != FUNCTION_ORDINARY)
11257 name = IDENTIFIER_POINTER (get_identifier (name));
11258 sym = gen_rtx_SYMBOL_REF (Pmode, name);
11259 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
11260 if (flag_pic)
11261 switch (kind)
11262 {
11263 case FUNCTION_ORDINARY:
11264 break;
11265 case SFUNC_GOT:
11266 {
11267 rtx reg = target ? target : gen_reg_rtx (Pmode);
11268
11269 emit_insn (gen_symGOT2reg (reg, sym));
11270 sym = reg;
11271 break;
11272 }
11273 case SFUNC_STATIC:
11274 {
11275 /* ??? To allow cse to work, we use GOTOFF relocations.
11276 we could add combiner patterns to transform this into
11277 straight pc-relative calls with sym2PIC / bsrf when
11278 label load and function call are still 1:1 and in the
11279 same basic block during combine. */
11280 rtx reg = target ? target : gen_reg_rtx (Pmode);
11281
11282 emit_insn (gen_symGOTOFF2reg (reg, sym));
11283 sym = reg;
11284 break;
11285 }
11286 }
11287 if (target && sym != target)
11288 {
11289 emit_move_insn (target, sym);
11290 return target;
11291 }
11292 return sym;
11293 }
11294
11295 /* Find the number of a general purpose register in S. */
11296 static int
11297 scavenge_reg (HARD_REG_SET *s)
11298 {
11299 int r;
11300 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
11301 if (TEST_HARD_REG_BIT (*s, r))
11302 return r;
11303 return -1;
11304 }
11305
11306 rtx
11307 sh_get_pr_initial_val (void)
11308 {
11309 rtx val;
11310
11311 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
11312 PR register on SHcompact, because it might be clobbered by the prologue.
11313 We check first if that is known to be the case. */
11314 if (TARGET_SHCOMPACT
11315 && ((crtl->args.info.call_cookie
11316 & ~ CALL_COOKIE_RET_TRAMP (1))
11317 || crtl->saves_all_registers))
11318 return gen_frame_mem (SImode, return_address_pointer_rtx);
11319
11320 /* If we haven't finished rtl generation, there might be a nonlocal label
11321 that we haven't seen yet.
11322 ??? get_hard_reg_initial_val fails if it is called after register
11323 allocation has started, unless it has been called before for the
11324 same register. And even then, we end in trouble if we didn't use
11325 the register in the same basic block before. So call
11326 get_hard_reg_initial_val now and wrap it in an unspec if we might
11327 need to replace it. */
11328 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
11329 combine can put the pseudo returned by get_hard_reg_initial_val into
11330 instructions that need a general purpose registers, which will fail to
11331 be recognized when the pseudo becomes allocated to PR. */
11332 val
11333 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
11334 if (TARGET_SH1)
11335 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
11336 return val;
11337 }
11338
11339 int
11340 sh_expand_t_scc (rtx operands[])
11341 {
11342 enum rtx_code code = GET_CODE (operands[1]);
11343 rtx target = operands[0];
11344 rtx op0 = operands[2];
11345 rtx op1 = operands[3];
11346 rtx result = target;
11347 HOST_WIDE_INT val;
11348
11349 if (!REG_P (op0) || REGNO (op0) != T_REG
11350 || !CONST_INT_P (op1))
11351 return 0;
11352 if (!REG_P (result))
11353 result = gen_reg_rtx (SImode);
11354 val = INTVAL (op1);
11355 if ((code == EQ && val == 1) || (code == NE && val == 0))
11356 emit_insn (gen_movt (result));
11357 else if (TARGET_SH2A && ((code == EQ && val == 0)
11358 || (code == NE && val == 1)))
11359 emit_insn (gen_xorsi3_movrt (result));
11360 else if ((code == EQ && val == 0) || (code == NE && val == 1))
11361 {
11362 emit_clobber (result);
11363 emit_insn (gen_subc (result, result, result));
11364 emit_insn (gen_addsi3 (result, result, const1_rtx));
11365 }
11366 else if (code == EQ || code == NE)
11367 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
11368 else
11369 return 0;
11370 if (result != target)
11371 emit_move_insn (target, result);
11372 return 1;
11373 }
11374
11375 /* INSN is an sfunc; return the rtx that describes the address used. */
11376 static rtx
11377 extract_sfunc_addr (rtx insn)
11378 {
11379 rtx pattern, part = NULL_RTX;
11380 int len, i;
11381
11382 pattern = PATTERN (insn);
11383 len = XVECLEN (pattern, 0);
11384 for (i = 0; i < len; i++)
11385 {
11386 part = XVECEXP (pattern, 0, i);
11387 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
11388 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
11389 return XEXP (part, 0);
11390 }
11391 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
11392 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
11393 }
11394
11395 /* Verify that the register in use_sfunc_addr still agrees with the address
11396 used in the sfunc. This prevents fill_slots_from_thread from changing
11397 use_sfunc_addr.
11398 INSN is the use_sfunc_addr instruction, and REG is the register it
11399 guards. */
11400 int
11401 check_use_sfunc_addr (rtx insn, rtx reg)
11402 {
11403 /* Search for the sfunc. It should really come right after INSN. */
11404 while ((insn = NEXT_INSN (insn)))
11405 {
11406 if (LABEL_P (insn) || JUMP_P (insn))
11407 break;
11408 if (! INSN_P (insn))
11409 continue;
11410
11411 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
11412 insn = XVECEXP (PATTERN (insn), 0, 0);
11413 if (GET_CODE (PATTERN (insn)) != PARALLEL
11414 || get_attr_type (insn) != TYPE_SFUNC)
11415 continue;
11416 return rtx_equal_p (extract_sfunc_addr (insn), reg);
11417 }
11418 gcc_unreachable ();
11419 }
11420
11421 /* This function returns a constant rtx that represents pi / 2**15 in
11422 SFmode. it's used to scale SFmode angles, in radians, to a
11423 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11424 maps to 0x10000). */
11425
11426 static GTY(()) rtx sh_fsca_sf2int_rtx;
11427
11428 rtx
11429 sh_fsca_sf2int (void)
11430 {
11431 if (! sh_fsca_sf2int_rtx)
11432 {
11433 REAL_VALUE_TYPE rv;
11434
11435 real_from_string (&rv, "10430.378350470453");
11436 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
11437 }
11438
11439 return sh_fsca_sf2int_rtx;
11440 }
11441
11442 /* This function returns a constant rtx that represents pi / 2**15 in
11443 DFmode. it's used to scale DFmode angles, in radians, to a
11444 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11445 maps to 0x10000). */
11446
11447 static GTY(()) rtx sh_fsca_df2int_rtx;
11448
11449 rtx
11450 sh_fsca_df2int (void)
11451 {
11452 if (! sh_fsca_df2int_rtx)
11453 {
11454 REAL_VALUE_TYPE rv;
11455
11456 real_from_string (&rv, "10430.378350470453");
11457 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
11458 }
11459
11460 return sh_fsca_df2int_rtx;
11461 }
11462
11463 /* This function returns a constant rtx that represents 2**15 / pi in
11464 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
11465 of a full circle back to a SFmode value, i.e., 0x10000 maps to
11466 2*pi). */
11467
11468 static GTY(()) rtx sh_fsca_int2sf_rtx;
11469
11470 rtx
11471 sh_fsca_int2sf (void)
11472 {
11473 if (! sh_fsca_int2sf_rtx)
11474 {
11475 REAL_VALUE_TYPE rv;
11476
11477 real_from_string (&rv, "9.587379924285257e-5");
11478 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
11479 }
11480
11481 return sh_fsca_int2sf_rtx;
11482 }
11483
11484 /* Initialize the CUMULATIVE_ARGS structure. */
11485
11486 void
11487 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
11488 tree fntype,
11489 rtx libname ATTRIBUTE_UNUSED,
11490 tree fndecl,
11491 signed int n_named_args,
11492 enum machine_mode mode)
11493 {
11494 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
11495 pcum->free_single_fp_reg = 0;
11496 pcum->stack_regs = 0;
11497 pcum->byref_regs = 0;
11498 pcum->byref = 0;
11499 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
11500
11501 /* XXX - Should we check TARGET_HITACHI here ??? */
11502 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
11503
11504 if (fntype)
11505 {
11506 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
11507 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
11508 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
11509 pcum->arg_count [(int) SH_ARG_INT]
11510 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
11511
11512 pcum->call_cookie
11513 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11514 && pcum->arg_count [(int) SH_ARG_INT] == 0
11515 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
11516 ? int_size_in_bytes (TREE_TYPE (fntype))
11517 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
11518 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
11519 == FIRST_RET_REG));
11520 }
11521 else
11522 {
11523 pcum->arg_count [(int) SH_ARG_INT] = 0;
11524 pcum->prototype_p = FALSE;
11525 if (mode != VOIDmode)
11526 {
11527 pcum->call_cookie =
11528 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11529 && GET_MODE_SIZE (mode) > 4
11530 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
11531
11532 /* If the default ABI is the Renesas ABI then all library
11533 calls must assume that the library will be using the
11534 Renesas ABI. So if the function would return its result
11535 in memory then we must force the address of this memory
11536 block onto the stack. Ideally we would like to call
11537 targetm.calls.return_in_memory() here but we do not have
11538 the TYPE or the FNDECL available so we synthesize the
11539 contents of that function as best we can. */
11540 pcum->force_mem =
11541 (TARGET_DEFAULT & MASK_HITACHI)
11542 && (mode == BLKmode
11543 || (GET_MODE_SIZE (mode) > 4
11544 && !(mode == DFmode
11545 && TARGET_FPU_DOUBLE)));
11546 }
11547 else
11548 {
11549 pcum->call_cookie = 0;
11550 pcum->force_mem = FALSE;
11551 }
11552 }
11553 }
11554
11555 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
11556 not enter into CONST_DOUBLE for the replace.
11557
11558 Note that copying is not done so X must not be shared unless all copies
11559 are to be modified.
11560
11561 This is like replace_rtx, except that we operate on N_REPLACEMENTS
11562 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
11563 replacements[n*2+1] - and that we take mode changes into account.
11564
11565 If a replacement is ambiguous, return NULL_RTX.
11566
11567 If MODIFY is zero, don't modify any rtl in place,
11568 just return zero or nonzero for failure / success. */
11569
11570 rtx
11571 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
11572 {
11573 int i, j;
11574 const char *fmt;
11575
11576 /* The following prevents loops occurrence when we change MEM in
11577 CONST_DOUBLE onto the same CONST_DOUBLE. */
11578 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
11579 return x;
11580
11581 for (i = n_replacements - 1; i >= 0 ; i--)
11582 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
11583 return replacements[i*2+1];
11584
11585 /* Allow this function to make replacements in EXPR_LISTs. */
11586 if (x == 0)
11587 return 0;
11588
11589 if (GET_CODE (x) == SUBREG)
11590 {
11591 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
11592 n_replacements, modify);
11593
11594 if (CONST_INT_P (new_rtx))
11595 {
11596 x = simplify_subreg (GET_MODE (x), new_rtx,
11597 GET_MODE (SUBREG_REG (x)),
11598 SUBREG_BYTE (x));
11599 if (! x)
11600 abort ();
11601 }
11602 else if (modify)
11603 SUBREG_REG (x) = new_rtx;
11604
11605 return x;
11606 }
11607 else if (REG_P (x))
11608 {
11609 unsigned regno = REGNO (x);
11610 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
11611 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
11612 rtx result = NULL_RTX;
11613
11614 for (i = n_replacements - 1; i >= 0; i--)
11615 {
11616 rtx from = replacements[i*2];
11617 rtx to = replacements[i*2+1];
11618 unsigned from_regno, from_nregs, to_regno, new_regno;
11619
11620 if (!REG_P (from))
11621 continue;
11622 from_regno = REGNO (from);
11623 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
11624 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
11625 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
11626 {
11627 if (regno < from_regno
11628 || regno + nregs > from_regno + nregs
11629 || !REG_P (to)
11630 || result)
11631 return NULL_RTX;
11632 to_regno = REGNO (to);
11633 if (to_regno < FIRST_PSEUDO_REGISTER)
11634 {
11635 new_regno = regno + to_regno - from_regno;
11636 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
11637 != nregs)
11638 return NULL_RTX;
11639 result = gen_rtx_REG (GET_MODE (x), new_regno);
11640 }
11641 else if (GET_MODE (x) <= GET_MODE (to))
11642 result = gen_lowpart_common (GET_MODE (x), to);
11643 else
11644 result = gen_lowpart_SUBREG (GET_MODE (x), to);
11645 }
11646 }
11647 return result ? result : x;
11648 }
11649 else if (GET_CODE (x) == ZERO_EXTEND)
11650 {
11651 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
11652 n_replacements, modify);
11653
11654 if (CONST_INT_P (new_rtx))
11655 {
11656 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
11657 new_rtx, GET_MODE (XEXP (x, 0)));
11658 if (! x)
11659 abort ();
11660 }
11661 else if (modify)
11662 XEXP (x, 0) = new_rtx;
11663
11664 return x;
11665 }
11666
11667 fmt = GET_RTX_FORMAT (GET_CODE (x));
11668 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
11669 {
11670 rtx new_rtx;
11671
11672 if (fmt[i] == 'e')
11673 {
11674 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
11675 n_replacements, modify);
11676 if (!new_rtx)
11677 return NULL_RTX;
11678 if (modify)
11679 XEXP (x, i) = new_rtx;
11680 }
11681 else if (fmt[i] == 'E')
11682 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11683 {
11684 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
11685 n_replacements, modify);
11686 if (!new_rtx)
11687 return NULL_RTX;
11688 if (modify)
11689 XVECEXP (x, i, j) = new_rtx;
11690 }
11691 }
11692
11693 return x;
11694 }
11695
11696 rtx
11697 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
11698 {
11699 enum rtx_code code = TRUNCATE;
11700
11701 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
11702 {
11703 rtx inner = XEXP (x, 0);
11704 enum machine_mode inner_mode = GET_MODE (inner);
11705
11706 if (inner_mode == mode)
11707 return inner;
11708 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
11709 x = inner;
11710 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
11711 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
11712 {
11713 code = GET_CODE (x);
11714 x = inner;
11715 }
11716 }
11717 return gen_rtx_fmt_e (code, mode, x);
11718 }
11719
11720 /* called via for_each_rtx after reload, to clean up truncates of
11721 registers that span multiple actual hard registers. */
11722 int
11723 shmedia_cleanup_truncate (rtx *p, void *n_changes)
11724 {
11725 rtx x = *p, reg;
11726
11727 if (GET_CODE (x) != TRUNCATE)
11728 return 0;
11729 reg = XEXP (x, 0);
11730 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && REG_P (reg))
11731 {
11732 enum machine_mode reg_mode = GET_MODE (reg);
11733 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
11734 subreg_lowpart_offset (DImode, reg_mode));
11735 *(int*) n_changes += 1;
11736 return -1;
11737 }
11738 return 0;
11739 }
11740
11741 /* Load and store depend on the highpart of the address. However,
11742 set_attr_alternative does not give well-defined results before reload,
11743 so we must look at the rtl ourselves to see if any of the feeding
11744 registers is used in a memref. */
11745
11746 /* Called by sh_contains_memref_p via for_each_rtx. */
11747 static int
11748 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
11749 {
11750 return (MEM_P (*loc));
11751 }
11752
11753 /* Return nonzero iff INSN contains a MEM. */
11754 int
11755 sh_contains_memref_p (rtx insn)
11756 {
11757 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
11758 }
11759
11760 /* Return nonzero iff INSN loads a banked register. */
11761 int
11762 sh_loads_bankedreg_p (rtx insn)
11763 {
11764 if (GET_CODE (PATTERN (insn)) == SET)
11765 {
11766 rtx op = SET_DEST (PATTERN(insn));
11767 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
11768 return 1;
11769 }
11770
11771 return 0;
11772 }
11773
11774 /* FNADDR is the MEM expression from a call expander. Return an address
11775 to use in an SHmedia insn pattern. */
11776 rtx
11777 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
11778 {
11779 int is_sym;
11780
11781 fnaddr = XEXP (fnaddr, 0);
11782 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
11783 if (flag_pic && is_sym)
11784 {
11785 if (! SYMBOL_REF_LOCAL_P (fnaddr))
11786 {
11787 rtx reg = gen_reg_rtx (Pmode);
11788
11789 /* We must not use GOTPLT for sibcalls, because PIC_REG
11790 must be restored before the PLT code gets to run. */
11791 if (is_sibcall)
11792 emit_insn (gen_symGOT2reg (reg, fnaddr));
11793 else
11794 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
11795 fnaddr = reg;
11796 }
11797 else
11798 {
11799 fnaddr = gen_sym2PIC (fnaddr);
11800 PUT_MODE (fnaddr, Pmode);
11801 }
11802 }
11803 /* If ptabs might trap, make this visible to the rest of the compiler.
11804 We generally assume that symbols pertain to valid locations, but
11805 it is possible to generate invalid symbols with asm or linker tricks.
11806 In a list of functions where each returns its successor, an invalid
11807 symbol might denote an empty list. */
11808 if (!TARGET_PT_FIXED
11809 && (!is_sym || TARGET_INVALID_SYMBOLS)
11810 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
11811 {
11812 rtx tr = gen_reg_rtx (PDImode);
11813
11814 emit_insn (gen_ptabs (tr, fnaddr));
11815 fnaddr = tr;
11816 }
11817 else if (! target_reg_operand (fnaddr, Pmode))
11818 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
11819 return fnaddr;
11820 }
11821
11822 enum reg_class
11823 sh_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
11824 enum machine_mode mode, secondary_reload_info *sri)
11825 {
11826 if (in_p)
11827 {
11828 if (REGCLASS_HAS_FP_REG (rclass)
11829 && ! TARGET_SHMEDIA
11830 && immediate_operand ((x), mode)
11831 && ! ((fp_zero_operand (x) || fp_one_operand (x))
11832 && mode == SFmode && fldi_ok ()))
11833 switch (mode)
11834 {
11835 case SFmode:
11836 sri->icode = CODE_FOR_reload_insf__frn;
11837 return NO_REGS;
11838 case DFmode:
11839 sri->icode = CODE_FOR_reload_indf__frn;
11840 return NO_REGS;
11841 case SImode:
11842 /* ??? If we knew that we are in the appropriate mode -
11843 single precision - we could use a reload pattern directly. */
11844 return FPUL_REGS;
11845 default:
11846 abort ();
11847 }
11848 if (rclass == FPUL_REGS
11849 && ((REG_P (x)
11850 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
11851 || REGNO (x) == T_REG))
11852 || GET_CODE (x) == PLUS))
11853 return GENERAL_REGS;
11854 if (rclass == FPUL_REGS && immediate_operand (x, mode))
11855 {
11856 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
11857 return GENERAL_REGS;
11858 else if (mode == SFmode)
11859 return FP_REGS;
11860 sri->icode = CODE_FOR_reload_insi__i_fpul;
11861 return NO_REGS;
11862 }
11863 if (rclass == FPSCR_REGS
11864 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
11865 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
11866 return GENERAL_REGS;
11867 if (REGCLASS_HAS_FP_REG (rclass)
11868 && TARGET_SHMEDIA
11869 && immediate_operand (x, mode)
11870 && x != CONST0_RTX (GET_MODE (x))
11871 && GET_MODE (x) != V4SFmode)
11872 return GENERAL_REGS;
11873 if ((mode == QImode || mode == HImode)
11874 && TARGET_SHMEDIA && inqhi_operand (x, mode))
11875 {
11876 sri->icode = ((mode == QImode)
11877 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
11878 return NO_REGS;
11879 }
11880 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
11881 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
11882 return TARGET_REGS;
11883 } /* end of input-only processing. */
11884
11885 if (((REGCLASS_HAS_FP_REG (rclass)
11886 && (REG_P (x)
11887 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
11888 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
11889 && TARGET_FMOVD))))
11890 || (REGCLASS_HAS_GENERAL_REG (rclass)
11891 && REG_P (x)
11892 && FP_REGISTER_P (REGNO (x))))
11893 && ! TARGET_SHMEDIA
11894 && (mode == SFmode || mode == SImode))
11895 return FPUL_REGS;
11896 if ((rclass == FPUL_REGS
11897 || (REGCLASS_HAS_FP_REG (rclass)
11898 && ! TARGET_SHMEDIA && mode == SImode))
11899 && (MEM_P (x)
11900 || (REG_P (x)
11901 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
11902 || REGNO (x) == T_REG
11903 || system_reg_operand (x, VOIDmode)))))
11904 {
11905 if (rclass == FPUL_REGS)
11906 return GENERAL_REGS;
11907 return FPUL_REGS;
11908 }
11909 if ((rclass == TARGET_REGS
11910 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
11911 && !satisfies_constraint_Csy (x)
11912 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
11913 return GENERAL_REGS;
11914 if ((rclass == MAC_REGS || rclass == PR_REGS)
11915 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
11916 && rclass != REGNO_REG_CLASS (REGNO (x)))
11917 return GENERAL_REGS;
11918 if (rclass != GENERAL_REGS && REG_P (x)
11919 && TARGET_REGISTER_P (REGNO (x)))
11920 return GENERAL_REGS;
11921 return NO_REGS;
11922 }
11923
11924 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
11925
11926 #include "gt-sh.h"