sh-protos.h (sh_promote_function_mode): Remove.
[gcc.git] / gcc / config / sh / sh.c
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "insn-config.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "flags.h"
31 #include "expr.h"
32 #include "optabs.h"
33 #include "function.h"
34 #include "regs.h"
35 #include "hard-reg-set.h"
36 #include "output.h"
37 #include "insn-attr.h"
38 #include "toplev.h"
39 #include "recog.h"
40 #include "integrate.h"
41 #include "dwarf2.h"
42 #include "tm_p.h"
43 #include "target.h"
44 #include "target-def.h"
45 #include "real.h"
46 #include "langhooks.h"
47 #include "basic-block.h"
48 #include "df.h"
49 #include "cfglayout.h"
50 #include "intl.h"
51 #include "sched-int.h"
52 #include "params.h"
53 #include "ggc.h"
54 #include "gimple.h"
55 #include "cfgloop.h"
56 #include "alloc-pool.h"
57 #include "tm-constrs.h"
58
59
60 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
61
62 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
63 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
64
65 /* These are some macros to abstract register modes. */
66 #define CONST_OK_FOR_ADD(size) \
67 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
68 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
69 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
70 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
71
72 /* Used to simplify the logic below. Find the attributes wherever
73 they may be. */
74 #define SH_ATTRIBUTES(decl) \
75 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
76 : DECL_ATTRIBUTES (decl) \
77 ? (DECL_ATTRIBUTES (decl)) \
78 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
79
80 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
81 int current_function_interrupt;
82
83 tree sh_deferred_function_attributes;
84 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
85
86 /* Global variables for machine-dependent things. */
87
88 /* Which cpu are we scheduling for. */
89 enum processor_type sh_cpu;
90
91 /* Definitions used in ready queue reordering for first scheduling pass. */
92
93 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
94 static short *regmode_weight[2];
95
96 /* Total SFmode and SImode weights of scheduled insns. */
97 static int curr_regmode_pressure[2];
98
99 /* Number of r0 life regions. */
100 static int r0_life_regions;
101
102 /* If true, skip cycles for Q -> R movement. */
103 static int skip_cycles = 0;
104
105 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
106 and returned from sh_reorder2. */
107 static short cached_can_issue_more;
108
109 /* Provides the class number of the smallest class containing
110 reg number. */
111
112 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
113 {
114 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
115 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
116 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
117 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
118 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
131 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
132 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
133 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
134 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
147 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
148 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
149 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
150 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
151 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
152 GENERAL_REGS, GENERAL_REGS,
153 };
154
155 char sh_register_names[FIRST_PSEUDO_REGISTER] \
156 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
157
158 char sh_additional_register_names[ADDREGNAMES_SIZE] \
159 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
160 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
161
162 int assembler_dialect;
163
164 static bool shmedia_space_reserved_for_target_registers;
165
166 static bool sh_handle_option (size_t, const char *, int);
167 static void split_branches (rtx);
168 static int branch_dest (rtx);
169 static void force_into (rtx, rtx);
170 static void print_slot (rtx);
171 static rtx add_constant (rtx, enum machine_mode, rtx);
172 static void dump_table (rtx, rtx);
173 static int hi_const (rtx);
174 static int broken_move (rtx);
175 static int mova_p (rtx);
176 static rtx find_barrier (int, rtx, rtx);
177 static int noncall_uses_reg (rtx, rtx, rtx *);
178 static rtx gen_block_redirect (rtx, int, int);
179 static void sh_reorg (void);
180 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
181 static rtx frame_insn (rtx);
182 static rtx push (int);
183 static void pop (int);
184 static void push_regs (HARD_REG_SET *, int);
185 static int calc_live_regs (HARD_REG_SET *);
186 static HOST_WIDE_INT rounded_frame_size (int);
187 static rtx mark_constant_pool_use (rtx);
188 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
189 static tree sh_handle_resbank_handler_attribute (tree *, tree,
190 tree, int, bool *);
191 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
192 tree, int, bool *);
193 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
194 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
195 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
196 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
197 static void sh_insert_attributes (tree, tree *);
198 static const char *sh_check_pch_target_flags (int);
199 static int sh_adjust_cost (rtx, rtx, rtx, int);
200 static int sh_issue_rate (void);
201 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
202 static short find_set_regmode_weight (rtx, enum machine_mode);
203 static short find_insn_regmode_weight (rtx, enum machine_mode);
204 static void find_regmode_weight (basic_block, enum machine_mode);
205 static int find_r0_life_regions (basic_block);
206 static void sh_md_init_global (FILE *, int, int);
207 static void sh_md_finish_global (FILE *, int);
208 static int rank_for_reorder (const void *, const void *);
209 static void swap_reorder (rtx *, int);
210 static void ready_reorder (rtx *, int);
211 static short high_pressure (enum machine_mode);
212 static int sh_reorder (FILE *, int, rtx *, int *, int);
213 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
214 static void sh_md_init (FILE *, int, int);
215 static int sh_variable_issue (FILE *, int, rtx, int);
216
217 static bool sh_function_ok_for_sibcall (tree, tree);
218
219 static bool sh_cannot_modify_jumps_p (void);
220 static enum reg_class sh_target_reg_class (void);
221 static bool sh_optimize_target_register_callee_saved (bool);
222 static bool sh_ms_bitfield_layout_p (const_tree);
223
224 static void sh_init_builtins (void);
225 static void sh_media_init_builtins (void);
226 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
227 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
228 static void sh_file_start (void);
229 static int flow_dependent_p (rtx, rtx);
230 static void flow_dependent_p_1 (rtx, const_rtx, void *);
231 static int shiftcosts (rtx);
232 static int andcosts (rtx);
233 static int addsubcosts (rtx);
234 static int multcosts (rtx);
235 static bool unspec_caller_rtx_p (rtx);
236 static bool sh_cannot_copy_insn_p (rtx);
237 static bool sh_rtx_costs (rtx, int, int, int *, bool);
238 static int sh_address_cost (rtx, bool);
239 static int sh_pr_n_sets (void);
240 static rtx sh_allocate_initial_value (rtx);
241 static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
242 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
243 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
244 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
245 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
246 static int scavenge_reg (HARD_REG_SET *s);
247 struct save_schedule_s;
248 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
249 struct save_schedule_s *, int);
250
251 static rtx sh_struct_value_rtx (tree, int);
252 static bool sh_return_in_memory (const_tree, const_tree);
253 static rtx sh_builtin_saveregs (void);
254 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
255 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
256 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
257 static tree sh_build_builtin_va_list (void);
258 static void sh_va_start (tree, rtx);
259 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
260 static enum machine_mode sh_promote_function_mode (const_tree type,
261 enum machine_mode,
262 int *punsignedp,
263 const_tree funtype,
264 int for_return);
265 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
266 const_tree, bool);
267 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
268 const_tree, bool);
269 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
270 tree, bool);
271 static bool sh_scalar_mode_supported_p (enum machine_mode);
272 static int sh_dwarf_calling_convention (const_tree);
273 static void sh_encode_section_info (tree, rtx, int);
274 static int sh2a_function_vector_p (tree);
275 \f
276 static const struct attribute_spec sh_attribute_table[] =
277 {
278 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
279 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
280 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
281 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
282 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
283 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
284 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
285 { "resbank", 0, 0, true, false, false, sh_handle_resbank_handler_attribute },
286 { "function_vector", 1, 1, true, false, false, sh2a_handle_function_vector_handler_attribute },
287 #ifdef SYMBIAN
288 /* Symbian support adds three new attributes:
289 dllexport - for exporting a function/variable that will live in a dll
290 dllimport - for importing a function/variable from a dll
291
292 Microsoft allows multiple declspecs in one __declspec, separating
293 them with spaces. We do NOT support this. Instead, use __declspec
294 multiple times. */
295 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
296 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
297 #endif
298 { NULL, 0, 0, false, false, false, NULL }
299 };
300 \f
301 /* Initialize the GCC target structure. */
302 #undef TARGET_ATTRIBUTE_TABLE
303 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
304
305 /* The next two are used for debug info when compiling with -gdwarf. */
306 #undef TARGET_ASM_UNALIGNED_HI_OP
307 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
308 #undef TARGET_ASM_UNALIGNED_SI_OP
309 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
310
311 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
312 #undef TARGET_ASM_UNALIGNED_DI_OP
313 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
314 #undef TARGET_ASM_ALIGNED_DI_OP
315 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
316
317 #undef TARGET_ASM_FUNCTION_EPILOGUE
318 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
319
320 #undef TARGET_ASM_OUTPUT_MI_THUNK
321 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
322
323 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
324 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
325
326 #undef TARGET_ASM_FILE_START
327 #define TARGET_ASM_FILE_START sh_file_start
328 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
329 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
330
331 #undef TARGET_DEFAULT_TARGET_FLAGS
332 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
333 #undef TARGET_HANDLE_OPTION
334 #define TARGET_HANDLE_OPTION sh_handle_option
335
336 #undef TARGET_INSERT_ATTRIBUTES
337 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
338
339 #undef TARGET_SCHED_ADJUST_COST
340 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
341
342 #undef TARGET_SCHED_ISSUE_RATE
343 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
344
345 /* The next 5 hooks have been implemented for reenabling sched1. With the
346 help of these macros we are limiting the movement of insns in sched1 to
347 reduce the register pressure. The overall idea is to keep count of SImode
348 and SFmode regs required by already scheduled insns. When these counts
349 cross some threshold values; give priority to insns that free registers.
350 The insn that frees registers is most likely to be the insn with lowest
351 LUID (original insn order); but such an insn might be there in the stalled
352 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
353 upto a max of 8 cycles so that such insns may move from Q -> R.
354
355 The description of the hooks are as below:
356
357 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
358 scheduler; it is called inside the sched_init function just after
359 find_insn_reg_weights function call. It is used to calculate the SImode
360 and SFmode weights of insns of basic blocks; much similar to what
361 find_insn_reg_weights does.
362 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
363
364 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
365 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
366 (Q)->(R).
367
368 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
369 high; reorder the ready queue so that the insn with lowest LUID will be
370 issued next.
371
372 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
373 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
374
375 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
376 can be returned from TARGET_SCHED_REORDER2.
377
378 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
379
380 #undef TARGET_SCHED_DFA_NEW_CYCLE
381 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
382
383 #undef TARGET_SCHED_INIT_GLOBAL
384 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
385
386 #undef TARGET_SCHED_FINISH_GLOBAL
387 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
388
389 #undef TARGET_SCHED_VARIABLE_ISSUE
390 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
391
392 #undef TARGET_SCHED_REORDER
393 #define TARGET_SCHED_REORDER sh_reorder
394
395 #undef TARGET_SCHED_REORDER2
396 #define TARGET_SCHED_REORDER2 sh_reorder2
397
398 #undef TARGET_SCHED_INIT
399 #define TARGET_SCHED_INIT sh_md_init
400
401 #undef TARGET_LEGITIMIZE_ADDRESS
402 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
403
404 #undef TARGET_CANNOT_MODIFY_JUMPS_P
405 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
406 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
407 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
408 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
409 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
410 sh_optimize_target_register_callee_saved
411
412 #undef TARGET_MS_BITFIELD_LAYOUT_P
413 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
414
415 #undef TARGET_INIT_BUILTINS
416 #define TARGET_INIT_BUILTINS sh_init_builtins
417 #undef TARGET_EXPAND_BUILTIN
418 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
419
420 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
421 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
422
423 #undef TARGET_CANNOT_COPY_INSN_P
424 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
425 #undef TARGET_RTX_COSTS
426 #define TARGET_RTX_COSTS sh_rtx_costs
427 #undef TARGET_ADDRESS_COST
428 #define TARGET_ADDRESS_COST sh_address_cost
429 #undef TARGET_ALLOCATE_INITIAL_VALUE
430 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
431
432 #undef TARGET_MACHINE_DEPENDENT_REORG
433 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
434
435 #undef TARGET_DWARF_REGISTER_SPAN
436 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
437
438 #ifdef HAVE_AS_TLS
439 #undef TARGET_HAVE_TLS
440 #define TARGET_HAVE_TLS true
441 #endif
442
443 #undef TARGET_PROMOTE_PROTOTYPES
444 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
445 #undef TARGET_PROMOTE_FUNCTION_MODE
446 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
447
448 #undef TARGET_STRUCT_VALUE_RTX
449 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
450 #undef TARGET_RETURN_IN_MEMORY
451 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
452
453 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
454 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
455 #undef TARGET_SETUP_INCOMING_VARARGS
456 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
457 #undef TARGET_STRICT_ARGUMENT_NAMING
458 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
459 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
460 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
461 #undef TARGET_MUST_PASS_IN_STACK
462 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
463 #undef TARGET_PASS_BY_REFERENCE
464 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
465 #undef TARGET_CALLEE_COPIES
466 #define TARGET_CALLEE_COPIES sh_callee_copies
467 #undef TARGET_ARG_PARTIAL_BYTES
468 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
469
470 #undef TARGET_BUILD_BUILTIN_VA_LIST
471 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
472 #undef TARGET_EXPAND_BUILTIN_VA_START
473 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
474 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
475 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
476
477 #undef TARGET_SCALAR_MODE_SUPPORTED_P
478 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
479 #undef TARGET_VECTOR_MODE_SUPPORTED_P
480 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
481
482 #undef TARGET_CHECK_PCH_TARGET_FLAGS
483 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
484
485 #undef TARGET_DWARF_CALLING_CONVENTION
486 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
487
488 /* Return regmode weight for insn. */
489 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
490
491 /* Return current register pressure for regmode. */
492 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
493
494 #undef TARGET_ENCODE_SECTION_INFO
495 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
496
497 #ifdef SYMBIAN
498
499 #undef TARGET_ENCODE_SECTION_INFO
500 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
501 #undef TARGET_STRIP_NAME_ENCODING
502 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
503 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
504 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
505
506 #endif /* SYMBIAN */
507
508 #undef TARGET_SECONDARY_RELOAD
509 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
510
511 #undef TARGET_LEGITIMATE_ADDRESS_P
512 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
513
514 /* Machine-specific symbol_ref flags. */
515 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
516
517 struct gcc_target targetm = TARGET_INITIALIZER;
518 \f
519 /* Implement TARGET_HANDLE_OPTION. */
520
521 static bool
522 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
523 int value ATTRIBUTE_UNUSED)
524 {
525 switch (code)
526 {
527 case OPT_m1:
528 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
529 return true;
530
531 case OPT_m2:
532 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
533 return true;
534
535 case OPT_m2a:
536 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
537 return true;
538
539 case OPT_m2a_nofpu:
540 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
541 return true;
542
543 case OPT_m2a_single:
544 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
545 return true;
546
547 case OPT_m2a_single_only:
548 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
549 return true;
550
551 case OPT_m2e:
552 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
553 return true;
554
555 case OPT_m3:
556 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
557 return true;
558
559 case OPT_m3e:
560 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
561 return true;
562
563 case OPT_m4:
564 case OPT_m4_100:
565 case OPT_m4_200:
566 case OPT_m4_300:
567 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
568 return true;
569
570 case OPT_m4_nofpu:
571 case OPT_m4_100_nofpu:
572 case OPT_m4_200_nofpu:
573 case OPT_m4_300_nofpu:
574 case OPT_m4_340:
575 case OPT_m4_400:
576 case OPT_m4_500:
577 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
578 return true;
579
580 case OPT_m4_single:
581 case OPT_m4_100_single:
582 case OPT_m4_200_single:
583 case OPT_m4_300_single:
584 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
585 return true;
586
587 case OPT_m4_single_only:
588 case OPT_m4_100_single_only:
589 case OPT_m4_200_single_only:
590 case OPT_m4_300_single_only:
591 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
592 return true;
593
594 case OPT_m4a:
595 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
596 return true;
597
598 case OPT_m4a_nofpu:
599 case OPT_m4al:
600 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
601 return true;
602
603 case OPT_m4a_single:
604 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
605 return true;
606
607 case OPT_m4a_single_only:
608 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
609 return true;
610
611 case OPT_m5_32media:
612 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
613 return true;
614
615 case OPT_m5_32media_nofpu:
616 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
617 return true;
618
619 case OPT_m5_64media:
620 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
621 return true;
622
623 case OPT_m5_64media_nofpu:
624 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
625 return true;
626
627 case OPT_m5_compact:
628 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
629 return true;
630
631 case OPT_m5_compact_nofpu:
632 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
633 return true;
634
635 default:
636 return true;
637 }
638 }
639 \f
640 /* Set default optimization options. */
641 void
642 sh_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
643 {
644 if (level)
645 {
646 flag_omit_frame_pointer = 2;
647 if (!size)
648 sh_div_str = "inv:minlat";
649 }
650 if (size)
651 {
652 target_flags |= MASK_SMALLCODE;
653 sh_div_str = SH_DIV_STR_FOR_SIZE ;
654 }
655 else
656 TARGET_CBRANCHDI4 = 1;
657 /* We can't meaningfully test TARGET_SHMEDIA here, because -m options
658 haven't been parsed yet, hence we'd read only the default.
659 sh_target_reg_class will return NO_REGS if this is not SHMEDIA, so
660 it's OK to always set flag_branch_target_load_optimize. */
661 if (level > 1)
662 {
663 flag_branch_target_load_optimize = 1;
664 if (!size)
665 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
666 }
667 /* Likewise, we can't meaningfully test TARGET_SH2E / TARGET_IEEE
668 here, so leave it to OVERRIDE_OPTIONS to set
669 flag_finite_math_only. We set it to 2 here so we know if the user
670 explicitly requested this to be on or off. */
671 flag_finite_math_only = 2;
672 /* If flag_schedule_insns is 1, we set it to 2 here so we know if
673 the user explicitly requested this to be on or off. */
674 if (flag_schedule_insns > 0)
675 flag_schedule_insns = 2;
676
677 set_param_value ("simultaneous-prefetches", 2);
678 }
679
680 /* Implement OVERRIDE_OPTIONS macro. Validate and override various
681 options, and do some machine dependent initialization. */
682 void
683 sh_override_options (void)
684 {
685 int regno;
686
687 SUBTARGET_OVERRIDE_OPTIONS;
688 if (flag_finite_math_only == 2)
689 flag_finite_math_only
690 = !flag_signaling_nans && TARGET_SH2E && ! TARGET_IEEE;
691 if (TARGET_SH2E && !flag_finite_math_only)
692 target_flags |= MASK_IEEE;
693 sh_cpu = PROCESSOR_SH1;
694 assembler_dialect = 0;
695 if (TARGET_SH2)
696 sh_cpu = PROCESSOR_SH2;
697 if (TARGET_SH2E)
698 sh_cpu = PROCESSOR_SH2E;
699 if (TARGET_SH2A)
700 sh_cpu = PROCESSOR_SH2A;
701 if (TARGET_SH3)
702 sh_cpu = PROCESSOR_SH3;
703 if (TARGET_SH3E)
704 sh_cpu = PROCESSOR_SH3E;
705 if (TARGET_SH4)
706 {
707 assembler_dialect = 1;
708 sh_cpu = PROCESSOR_SH4;
709 }
710 if (TARGET_SH4A_ARCH)
711 {
712 assembler_dialect = 1;
713 sh_cpu = PROCESSOR_SH4A;
714 }
715 if (TARGET_SH5)
716 {
717 sh_cpu = PROCESSOR_SH5;
718 target_flags |= MASK_ALIGN_DOUBLE;
719 if (TARGET_SHMEDIA_FPU)
720 target_flags |= MASK_FMOVD;
721 if (TARGET_SHMEDIA)
722 {
723 /* There are no delay slots on SHmedia. */
724 flag_delayed_branch = 0;
725 /* Relaxation isn't yet supported for SHmedia */
726 target_flags &= ~MASK_RELAX;
727 /* After reload, if conversion does little good but can cause
728 ICEs:
729 - find_if_block doesn't do anything for SH because we don't
730 have conditional execution patterns. (We use conditional
731 move patterns, which are handled differently, and only
732 before reload).
733 - find_cond_trap doesn't do anything for the SH because we
734 don't have conditional traps.
735 - find_if_case_1 uses redirect_edge_and_branch_force in
736 the only path that does an optimization, and this causes
737 an ICE when branch targets are in registers.
738 - find_if_case_2 doesn't do anything for the SHmedia after
739 reload except when it can redirect a tablejump - and
740 that's rather rare. */
741 flag_if_conversion2 = 0;
742 if (! strcmp (sh_div_str, "call"))
743 sh_div_strategy = SH_DIV_CALL;
744 else if (! strcmp (sh_div_str, "call2"))
745 sh_div_strategy = SH_DIV_CALL2;
746 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
747 sh_div_strategy = SH_DIV_FP;
748 else if (! strcmp (sh_div_str, "inv"))
749 sh_div_strategy = SH_DIV_INV;
750 else if (! strcmp (sh_div_str, "inv:minlat"))
751 sh_div_strategy = SH_DIV_INV_MINLAT;
752 else if (! strcmp (sh_div_str, "inv20u"))
753 sh_div_strategy = SH_DIV_INV20U;
754 else if (! strcmp (sh_div_str, "inv20l"))
755 sh_div_strategy = SH_DIV_INV20L;
756 else if (! strcmp (sh_div_str, "inv:call2"))
757 sh_div_strategy = SH_DIV_INV_CALL2;
758 else if (! strcmp (sh_div_str, "inv:call"))
759 sh_div_strategy = SH_DIV_INV_CALL;
760 else if (! strcmp (sh_div_str, "inv:fp"))
761 {
762 if (TARGET_FPU_ANY)
763 sh_div_strategy = SH_DIV_INV_FP;
764 else
765 sh_div_strategy = SH_DIV_INV;
766 }
767 TARGET_CBRANCHDI4 = 0;
768 /* Assembler CFI isn't yet fully supported for SHmedia. */
769 flag_dwarf2_cfi_asm = 0;
770 }
771 }
772 else
773 {
774 /* Only the sh64-elf assembler fully supports .quad properly. */
775 targetm.asm_out.aligned_op.di = NULL;
776 targetm.asm_out.unaligned_op.di = NULL;
777 }
778 if (TARGET_SH1)
779 {
780 if (! strcmp (sh_div_str, "call-div1"))
781 sh_div_strategy = SH_DIV_CALL_DIV1;
782 else if (! strcmp (sh_div_str, "call-fp")
783 && (TARGET_FPU_DOUBLE
784 || (TARGET_HARD_SH4 && TARGET_SH2E)
785 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
786 sh_div_strategy = SH_DIV_CALL_FP;
787 else if (! strcmp (sh_div_str, "call-table") && TARGET_SH2)
788 sh_div_strategy = SH_DIV_CALL_TABLE;
789 else
790 /* Pick one that makes most sense for the target in general.
791 It is not much good to use different functions depending
792 on -Os, since then we'll end up with two different functions
793 when some of the code is compiled for size, and some for
794 speed. */
795
796 /* SH4 tends to emphasize speed. */
797 if (TARGET_HARD_SH4)
798 sh_div_strategy = SH_DIV_CALL_TABLE;
799 /* These have their own way of doing things. */
800 else if (TARGET_SH2A)
801 sh_div_strategy = SH_DIV_INTRINSIC;
802 /* ??? Should we use the integer SHmedia function instead? */
803 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
804 sh_div_strategy = SH_DIV_CALL_FP;
805 /* SH1 .. SH3 cores often go into small-footprint systems, so
806 default to the smallest implementation available. */
807 else if (TARGET_SH2) /* ??? EXPERIMENTAL */
808 sh_div_strategy = SH_DIV_CALL_TABLE;
809 else
810 sh_div_strategy = SH_DIV_CALL_DIV1;
811 }
812 if (!TARGET_SH1)
813 TARGET_PRETEND_CMOVE = 0;
814 if (sh_divsi3_libfunc[0])
815 ; /* User supplied - leave it alone. */
816 else if (TARGET_DIVIDE_CALL_FP)
817 sh_divsi3_libfunc = "__sdivsi3_i4";
818 else if (TARGET_DIVIDE_CALL_TABLE)
819 sh_divsi3_libfunc = "__sdivsi3_i4i";
820 else if (TARGET_SH5)
821 sh_divsi3_libfunc = "__sdivsi3_1";
822 else
823 sh_divsi3_libfunc = "__sdivsi3";
824 if (sh_branch_cost == -1)
825 sh_branch_cost
826 = TARGET_SH5 ? 1 : ! TARGET_SH2 || TARGET_HARD_SH4 ? 2 : 1;
827
828 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
829 if (! VALID_REGISTER_P (regno))
830 sh_register_names[regno][0] = '\0';
831
832 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
833 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
834 sh_additional_register_names[regno][0] = '\0';
835
836 if (flag_omit_frame_pointer == 2)
837 {
838 /* The debugging information is sufficient,
839 but gdb doesn't implement this yet */
840 if (0)
841 flag_omit_frame_pointer
842 = (PREFERRED_DEBUGGING_TYPE == DWARF2_DEBUG);
843 else
844 flag_omit_frame_pointer = 0;
845 }
846
847 if ((flag_pic && ! TARGET_PREFERGOT)
848 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
849 flag_no_function_cse = 1;
850
851 if (SMALL_REGISTER_CLASSES)
852 {
853 /* Never run scheduling before reload, since that can
854 break global alloc, and generates slower code anyway due
855 to the pressure on R0. */
856 /* Enable sched1 for SH4 if the user explicitly requests.
857 When sched1 is enabled, the ready queue will be reordered by
858 the target hooks if pressure is high. We can not do this for
859 PIC, SH3 and lower as they give spill failures for R0. */
860 if (!TARGET_HARD_SH4 || flag_pic)
861 flag_schedule_insns = 0;
862 /* ??? Current exception handling places basic block boundaries
863 after call_insns. It causes the high pressure on R0 and gives
864 spill failures for R0 in reload. See PR 22553 and the thread
865 on gcc-patches
866 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
867 else if (flag_exceptions)
868 {
869 if (flag_schedule_insns == 1)
870 warning (0, "ignoring -fschedule-insns because of exception handling bug");
871 flag_schedule_insns = 0;
872 }
873 else if (flag_schedule_insns == 2)
874 flag_schedule_insns = 0;
875 }
876
877 if (align_loops == 0)
878 align_loops = 1 << (TARGET_SH5 ? 3 : 2);
879 if (align_jumps == 0)
880 align_jumps = 1 << CACHE_LOG;
881 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
882 align_jumps = TARGET_SHMEDIA ? 4 : 2;
883
884 /* Allocation boundary (in *bytes*) for the code of a function.
885 SH1: 32 bit alignment is faster, because instructions are always
886 fetched as a pair from a longword boundary.
887 SH2 .. SH5 : align to cache line start. */
888 if (align_functions == 0)
889 align_functions
890 = TARGET_SMALLCODE ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
891 /* The linker relaxation code breaks when a function contains
892 alignments that are larger than that at the start of a
893 compilation unit. */
894 if (TARGET_RELAX)
895 {
896 int min_align
897 = align_loops > align_jumps ? align_loops : align_jumps;
898
899 /* Also take possible .long constants / mova tables int account. */
900 if (min_align < 4)
901 min_align = 4;
902 if (align_functions < min_align)
903 align_functions = min_align;
904 }
905
906 if (sh_fixed_range_str)
907 sh_fix_range (sh_fixed_range_str);
908 }
909 \f
910 /* Print the operand address in x to the stream. */
911
912 void
913 print_operand_address (FILE *stream, rtx x)
914 {
915 switch (GET_CODE (x))
916 {
917 case REG:
918 case SUBREG:
919 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
920 break;
921
922 case PLUS:
923 {
924 rtx base = XEXP (x, 0);
925 rtx index = XEXP (x, 1);
926
927 switch (GET_CODE (index))
928 {
929 case CONST_INT:
930 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
931 reg_names[true_regnum (base)]);
932 break;
933
934 case REG:
935 case SUBREG:
936 {
937 int base_num = true_regnum (base);
938 int index_num = true_regnum (index);
939
940 fprintf (stream, "@(r0,%s)",
941 reg_names[MAX (base_num, index_num)]);
942 break;
943 }
944
945 default:
946 gcc_unreachable ();
947 }
948 }
949 break;
950
951 case PRE_DEC:
952 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
953 break;
954
955 case POST_INC:
956 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
957 break;
958
959 default:
960 x = mark_constant_pool_use (x);
961 output_addr_const (stream, x);
962 break;
963 }
964 }
965
966 /* Print operand x (an rtx) in assembler syntax to file stream
967 according to modifier code.
968
969 '.' print a .s if insn needs delay slot
970 ',' print LOCAL_LABEL_PREFIX
971 '@' print trap, rte or rts depending upon pragma interruptness
972 '#' output a nop if there is nothing to put in the delay slot
973 ''' print likelihood suffix (/u for unlikely).
974 '>' print branch target if -fverbose-asm
975 'O' print a constant without the #
976 'R' print the LSW of a dp value - changes if in little endian
977 'S' print the MSW of a dp value - changes if in little endian
978 'T' print the next word of a dp value - same as 'R' in big endian mode.
979 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
980 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
981 'N' print 'r63' if the operand is (const_int 0).
982 'd' print a V2SF reg as dN instead of fpN.
983 'm' print a pair `base,offset' or `base,index', for LD and ST.
984 'U' Likewise for {LD,ST}{HI,LO}.
985 'V' print the position of a single bit set.
986 'W' print the position of a single bit cleared.
987 't' print a memory address which is a register.
988 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
989 'o' output an operator. */
990
991 void
992 print_operand (FILE *stream, rtx x, int code)
993 {
994 int regno;
995 enum machine_mode mode;
996
997 switch (code)
998 {
999 tree trapa_attr;
1000
1001 case '.':
1002 if (final_sequence
1003 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1004 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1005 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1006 break;
1007 case ',':
1008 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1009 break;
1010 case '@':
1011 trapa_attr = lookup_attribute ("trap_exit",
1012 DECL_ATTRIBUTES (current_function_decl));
1013 if (trapa_attr)
1014 fprintf (stream, "trapa #%ld",
1015 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1016 else if (sh_cfun_interrupt_handler_p ())
1017 {
1018 if (sh_cfun_resbank_handler_p ())
1019 fprintf (stream, "resbank\n");
1020 fprintf (stream, "rte");
1021 }
1022 else
1023 fprintf (stream, "rts");
1024 break;
1025 case '#':
1026 /* Output a nop if there's nothing in the delay slot. */
1027 if (dbr_sequence_length () == 0)
1028 fprintf (stream, "\n\tnop");
1029 break;
1030 case '\'':
1031 {
1032 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1033
1034 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
1035 fputs ("/u", stream);
1036 break;
1037 }
1038 case '>':
1039 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1040 {
1041 fputs ("\t! target: ", stream);
1042 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1043 }
1044 break;
1045 case 'O':
1046 x = mark_constant_pool_use (x);
1047 output_addr_const (stream, x);
1048 break;
1049 /* N.B.: %R / %S / %T adjust memory addresses by four.
1050 For SHMEDIA, that means they can be used to access the first and
1051 second 32 bit part of a 64 bit (or larger) value that
1052 might be held in floating point registers or memory.
1053 While they can be used to access 64 bit parts of a larger value
1054 held in general purpose registers, that won't work with memory -
1055 neither for fp registers, since the frxx names are used. */
1056 case 'R':
1057 if (REG_P (x) || GET_CODE (x) == SUBREG)
1058 {
1059 regno = true_regnum (x);
1060 regno += FP_REGISTER_P (regno) ? 1 : LSW;
1061 fputs (reg_names[regno], (stream));
1062 }
1063 else if (MEM_P (x))
1064 {
1065 x = adjust_address (x, SImode, 4 * LSW);
1066 print_operand_address (stream, XEXP (x, 0));
1067 }
1068 else
1069 {
1070 rtx sub = NULL_RTX;
1071
1072 mode = GET_MODE (x);
1073 if (mode == VOIDmode)
1074 mode = DImode;
1075 if (GET_MODE_SIZE (mode) >= 8)
1076 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
1077 if (sub)
1078 print_operand (stream, sub, 0);
1079 else
1080 output_operand_lossage ("invalid operand to %%R");
1081 }
1082 break;
1083 case 'S':
1084 if (REG_P (x) || GET_CODE (x) == SUBREG)
1085 {
1086 regno = true_regnum (x);
1087 regno += FP_REGISTER_P (regno) ? 0 : MSW;
1088 fputs (reg_names[regno], (stream));
1089 }
1090 else if (MEM_P (x))
1091 {
1092 x = adjust_address (x, SImode, 4 * MSW);
1093 print_operand_address (stream, XEXP (x, 0));
1094 }
1095 else
1096 {
1097 rtx sub = NULL_RTX;
1098
1099 mode = GET_MODE (x);
1100 if (mode == VOIDmode)
1101 mode = DImode;
1102 if (GET_MODE_SIZE (mode) >= 8)
1103 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
1104 if (sub)
1105 print_operand (stream, sub, 0);
1106 else
1107 output_operand_lossage ("invalid operand to %%S");
1108 }
1109 break;
1110 case 'T':
1111 /* Next word of a double. */
1112 switch (GET_CODE (x))
1113 {
1114 case REG:
1115 fputs (reg_names[REGNO (x) + 1], (stream));
1116 break;
1117 case MEM:
1118 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1119 && GET_CODE (XEXP (x, 0)) != POST_INC)
1120 x = adjust_address (x, SImode, 4);
1121 print_operand_address (stream, XEXP (x, 0));
1122 break;
1123 default:
1124 break;
1125 }
1126 break;
1127
1128 case 't':
1129 gcc_assert (MEM_P (x));
1130 x = XEXP (x, 0);
1131 switch (GET_CODE (x))
1132 {
1133 case REG:
1134 case SUBREG:
1135 print_operand (stream, x, 0);
1136 break;
1137 default:
1138 break;
1139 }
1140 break;
1141
1142 case 'o':
1143 switch (GET_CODE (x))
1144 {
1145 case PLUS: fputs ("add", stream); break;
1146 case MINUS: fputs ("sub", stream); break;
1147 case MULT: fputs ("mul", stream); break;
1148 case DIV: fputs ("div", stream); break;
1149 case EQ: fputs ("eq", stream); break;
1150 case NE: fputs ("ne", stream); break;
1151 case GT: case LT: fputs ("gt", stream); break;
1152 case GE: case LE: fputs ("ge", stream); break;
1153 case GTU: case LTU: fputs ("gtu", stream); break;
1154 case GEU: case LEU: fputs ("geu", stream); break;
1155 default:
1156 break;
1157 }
1158 break;
1159 case 'M':
1160 if (TARGET_SHMEDIA)
1161 {
1162 if (MEM_P (x)
1163 && GET_CODE (XEXP (x, 0)) == PLUS
1164 && (REG_P (XEXP (XEXP (x, 0), 1))
1165 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1166 fputc ('x', stream);
1167 }
1168 else
1169 {
1170 if (MEM_P (x))
1171 {
1172 switch (GET_MODE (x))
1173 {
1174 case QImode: fputs (".b", stream); break;
1175 case HImode: fputs (".w", stream); break;
1176 case SImode: fputs (".l", stream); break;
1177 case SFmode: fputs (".s", stream); break;
1178 case DFmode: fputs (".d", stream); break;
1179 default: gcc_unreachable ();
1180 }
1181 }
1182 }
1183 break;
1184
1185 case 'm':
1186 gcc_assert (MEM_P (x));
1187 x = XEXP (x, 0);
1188 /* Fall through. */
1189 case 'U':
1190 switch (GET_CODE (x))
1191 {
1192 case REG:
1193 case SUBREG:
1194 print_operand (stream, x, 0);
1195 fputs (", 0", stream);
1196 break;
1197
1198 case PLUS:
1199 print_operand (stream, XEXP (x, 0), 0);
1200 fputs (", ", stream);
1201 print_operand (stream, XEXP (x, 1), 0);
1202 break;
1203
1204 default:
1205 gcc_unreachable ();
1206 }
1207 break;
1208
1209 case 'V':
1210 {
1211 int num = exact_log2 (INTVAL (x));
1212 gcc_assert (num >= 0);
1213 fprintf (stream, "#%d", num);
1214 }
1215 break;
1216
1217 case 'W':
1218 {
1219 int num = exact_log2 (~INTVAL (x));
1220 gcc_assert (num >= 0);
1221 fprintf (stream, "#%d", num);
1222 }
1223 break;
1224
1225 case 'd':
1226 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1227
1228 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1229 break;
1230
1231 case 'N':
1232 if (x == CONST0_RTX (GET_MODE (x)))
1233 {
1234 fprintf ((stream), "r63");
1235 break;
1236 }
1237 goto default_output;
1238 case 'u':
1239 if (CONST_INT_P (x))
1240 {
1241 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1242 break;
1243 }
1244 /* Fall through. */
1245
1246 default_output:
1247 default:
1248 regno = 0;
1249 mode = GET_MODE (x);
1250
1251 switch (GET_CODE (x))
1252 {
1253 case TRUNCATE:
1254 {
1255 rtx inner = XEXP (x, 0);
1256 int offset = 0;
1257 enum machine_mode inner_mode;
1258
1259 /* We might see SUBREGs with vector mode registers inside. */
1260 if (GET_CODE (inner) == SUBREG
1261 && (GET_MODE_SIZE (GET_MODE (inner))
1262 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1263 && subreg_lowpart_p (inner))
1264 inner = SUBREG_REG (inner);
1265 if (CONST_INT_P (inner))
1266 {
1267 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1268 goto default_output;
1269 }
1270 inner_mode = GET_MODE (inner);
1271 if (GET_CODE (inner) == SUBREG
1272 && (GET_MODE_SIZE (GET_MODE (inner))
1273 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1274 && REG_P (SUBREG_REG (inner)))
1275 {
1276 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1277 GET_MODE (SUBREG_REG (inner)),
1278 SUBREG_BYTE (inner),
1279 GET_MODE (inner));
1280 inner = SUBREG_REG (inner);
1281 }
1282 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1283 abort ();
1284 /* Floating point register pairs are always big endian;
1285 general purpose registers are 64 bit wide. */
1286 regno = REGNO (inner);
1287 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1288 - HARD_REGNO_NREGS (regno, mode))
1289 + offset;
1290 x = inner;
1291 goto reg;
1292 }
1293 case SIGN_EXTEND:
1294 x = XEXP (x, 0);
1295 goto reg;
1296 /* FIXME: We need this on SHmedia32 because reload generates
1297 some sign-extended HI or QI loads into DImode registers
1298 but, because Pmode is SImode, the address ends up with a
1299 subreg:SI of the DImode register. Maybe reload should be
1300 fixed so as to apply alter_subreg to such loads? */
1301 case IF_THEN_ELSE:
1302 gcc_assert (trapping_target_operand (x, VOIDmode));
1303 x = XEXP (XEXP (x, 2), 0);
1304 goto default_output;
1305 case SUBREG:
1306 gcc_assert (SUBREG_BYTE (x) == 0
1307 && REG_P (SUBREG_REG (x)));
1308
1309 x = SUBREG_REG (x);
1310 /* Fall through. */
1311
1312 reg:
1313 case REG:
1314 regno += REGNO (x);
1315 if (FP_REGISTER_P (regno)
1316 && mode == V16SFmode)
1317 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1318 else if (FP_REGISTER_P (REGNO (x))
1319 && mode == V4SFmode)
1320 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1321 else if (REG_P (x)
1322 && mode == V2SFmode)
1323 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1324 else if (FP_REGISTER_P (REGNO (x))
1325 && GET_MODE_SIZE (mode) > 4)
1326 fprintf ((stream), "d%s", reg_names[regno] + 1);
1327 else
1328 fputs (reg_names[regno], (stream));
1329 break;
1330
1331 case MEM:
1332 output_address (XEXP (x, 0));
1333 break;
1334
1335 default:
1336 if (TARGET_SH1)
1337 fputc ('#', stream);
1338 output_addr_const (stream, x);
1339 break;
1340 }
1341 break;
1342 }
1343 }
1344 \f
1345
1346 /* Encode symbol attributes of a SYMBOL_REF into its
1347 SYMBOL_REF_FLAGS. */
1348 static void
1349 sh_encode_section_info (tree decl, rtx rtl, int first)
1350 {
1351 default_encode_section_info (decl, rtl, first);
1352
1353 if (TREE_CODE (decl) == FUNCTION_DECL
1354 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1355 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1356 }
1357
1358 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1359 static void
1360 force_into (rtx value, rtx target)
1361 {
1362 value = force_operand (value, target);
1363 if (! rtx_equal_p (value, target))
1364 emit_insn (gen_move_insn (target, value));
1365 }
1366
1367 /* Emit code to perform a block move. Choose the best method.
1368
1369 OPERANDS[0] is the destination.
1370 OPERANDS[1] is the source.
1371 OPERANDS[2] is the size.
1372 OPERANDS[3] is the alignment safe to use. */
1373
1374 int
1375 expand_block_move (rtx *operands)
1376 {
1377 int align = INTVAL (operands[3]);
1378 int constp = (CONST_INT_P (operands[2]));
1379 int bytes = (constp ? INTVAL (operands[2]) : 0);
1380
1381 if (! constp)
1382 return 0;
1383
1384 /* If we could use mov.l to move words and dest is word-aligned, we
1385 can use movua.l for loads and still generate a relatively short
1386 and efficient sequence. */
1387 if (TARGET_SH4A_ARCH && align < 4
1388 && MEM_ALIGN (operands[0]) >= 32
1389 && can_move_by_pieces (bytes, 32))
1390 {
1391 rtx dest = copy_rtx (operands[0]);
1392 rtx src = copy_rtx (operands[1]);
1393 /* We could use different pseudos for each copied word, but
1394 since movua can only load into r0, it's kind of
1395 pointless. */
1396 rtx temp = gen_reg_rtx (SImode);
1397 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1398 int copied = 0;
1399
1400 while (copied + 4 <= bytes)
1401 {
1402 rtx to = adjust_address (dest, SImode, copied);
1403 rtx from = adjust_automodify_address (src, BLKmode,
1404 src_addr, copied);
1405
1406 set_mem_size (from, GEN_INT (4));
1407 emit_insn (gen_movua (temp, from));
1408 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1409 emit_move_insn (to, temp);
1410 copied += 4;
1411 }
1412
1413 if (copied < bytes)
1414 move_by_pieces (adjust_address (dest, BLKmode, copied),
1415 adjust_automodify_address (src, BLKmode,
1416 src_addr, copied),
1417 bytes - copied, align, 0);
1418
1419 return 1;
1420 }
1421
1422 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1423 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1424 if (align < 4 || (bytes % 4 != 0))
1425 return 0;
1426
1427 if (TARGET_HARD_SH4)
1428 {
1429 if (bytes < 12)
1430 return 0;
1431 else if (bytes == 12)
1432 {
1433 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1434 rtx r4 = gen_rtx_REG (SImode, 4);
1435 rtx r5 = gen_rtx_REG (SImode, 5);
1436
1437 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1438 force_into (XEXP (operands[0], 0), r4);
1439 force_into (XEXP (operands[1], 0), r5);
1440 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1441 return 1;
1442 }
1443 else if (! TARGET_SMALLCODE)
1444 {
1445 const char *entry_name;
1446 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1447 int dwords;
1448 rtx r4 = gen_rtx_REG (SImode, 4);
1449 rtx r5 = gen_rtx_REG (SImode, 5);
1450 rtx r6 = gen_rtx_REG (SImode, 6);
1451
1452 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1453 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1454 force_into (XEXP (operands[0], 0), r4);
1455 force_into (XEXP (operands[1], 0), r5);
1456
1457 dwords = bytes >> 3;
1458 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1459 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1460 return 1;
1461 }
1462 else
1463 return 0;
1464 }
1465 if (bytes < 64)
1466 {
1467 char entry[30];
1468 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1469 rtx r4 = gen_rtx_REG (SImode, 4);
1470 rtx r5 = gen_rtx_REG (SImode, 5);
1471
1472 sprintf (entry, "__movmemSI%d", bytes);
1473 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1474 force_into (XEXP (operands[0], 0), r4);
1475 force_into (XEXP (operands[1], 0), r5);
1476 emit_insn (gen_block_move_real (func_addr_rtx));
1477 return 1;
1478 }
1479
1480 /* This is the same number of bytes as a memcpy call, but to a different
1481 less common function name, so this will occasionally use more space. */
1482 if (! TARGET_SMALLCODE)
1483 {
1484 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1485 int final_switch, while_loop;
1486 rtx r4 = gen_rtx_REG (SImode, 4);
1487 rtx r5 = gen_rtx_REG (SImode, 5);
1488 rtx r6 = gen_rtx_REG (SImode, 6);
1489
1490 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1491 force_into (XEXP (operands[0], 0), r4);
1492 force_into (XEXP (operands[1], 0), r5);
1493
1494 /* r6 controls the size of the move. 16 is decremented from it
1495 for each 64 bytes moved. Then the negative bit left over is used
1496 as an index into a list of move instructions. e.g., a 72 byte move
1497 would be set up with size(r6) = 14, for one iteration through the
1498 big while loop, and a switch of -2 for the last part. */
1499
1500 final_switch = 16 - ((bytes / 4) % 16);
1501 while_loop = ((bytes / 4) / 16 - 1) * 16;
1502 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1503 emit_insn (gen_block_lump_real (func_addr_rtx));
1504 return 1;
1505 }
1506
1507 return 0;
1508 }
1509
1510 /* Prepare operands for a move define_expand; specifically, one of the
1511 operands must be in a register. */
1512
1513 int
1514 prepare_move_operands (rtx operands[], enum machine_mode mode)
1515 {
1516 if ((mode == SImode || mode == DImode)
1517 && flag_pic
1518 && ! ((mode == Pmode || mode == ptr_mode)
1519 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1520 {
1521 rtx temp;
1522 if (SYMBOLIC_CONST_P (operands[1]))
1523 {
1524 if (MEM_P (operands[0]))
1525 operands[1] = force_reg (Pmode, operands[1]);
1526 else if (TARGET_SHMEDIA
1527 && GET_CODE (operands[1]) == LABEL_REF
1528 && target_reg_operand (operands[0], mode))
1529 /* It's ok. */;
1530 else
1531 {
1532 temp = (!can_create_pseudo_p ()
1533 ? operands[0]
1534 : gen_reg_rtx (Pmode));
1535 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1536 }
1537 }
1538 else if (GET_CODE (operands[1]) == CONST
1539 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1540 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1541 {
1542 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1543 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1544 mode, temp);
1545 operands[1] = expand_binop (mode, add_optab, temp,
1546 XEXP (XEXP (operands[1], 0), 1),
1547 (!can_create_pseudo_p ()
1548 ? temp
1549 : gen_reg_rtx (Pmode)),
1550 0, OPTAB_LIB_WIDEN);
1551 }
1552 }
1553
1554 if (! reload_in_progress && ! reload_completed)
1555 {
1556 /* Copy the source to a register if both operands aren't registers. */
1557 if (! register_operand (operands[0], mode)
1558 && ! sh_register_operand (operands[1], mode))
1559 operands[1] = copy_to_mode_reg (mode, operands[1]);
1560
1561 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1562 {
1563 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1564 except that we can't use that function because it is static. */
1565 rtx new_rtx = change_address (operands[0], mode, 0);
1566 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1567 operands[0] = new_rtx;
1568 }
1569
1570 /* This case can happen while generating code to move the result
1571 of a library call to the target. Reject `st r0,@(rX,rY)' because
1572 reload will fail to find a spill register for rX, since r0 is already
1573 being used for the source. */
1574 else if (TARGET_SH1
1575 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1576 && MEM_P (operands[0])
1577 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1578 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1579 operands[1] = copy_to_mode_reg (mode, operands[1]);
1580 }
1581
1582 if (mode == Pmode || mode == ptr_mode)
1583 {
1584 rtx op0, op1, opc;
1585 enum tls_model tls_kind;
1586
1587 op0 = operands[0];
1588 op1 = operands[1];
1589 if (GET_CODE (op1) == CONST
1590 && GET_CODE (XEXP (op1, 0)) == PLUS
1591 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1592 != TLS_MODEL_NONE))
1593 {
1594 opc = XEXP (XEXP (op1, 0), 1);
1595 op1 = XEXP (XEXP (op1, 0), 0);
1596 }
1597 else
1598 opc = NULL_RTX;
1599
1600 if ((tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1601 {
1602 rtx tga_op1, tga_ret, tmp, tmp2;
1603
1604 switch (tls_kind)
1605 {
1606 case TLS_MODEL_GLOBAL_DYNAMIC:
1607 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1608 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1609 op1 = tga_ret;
1610 break;
1611
1612 case TLS_MODEL_LOCAL_DYNAMIC:
1613 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1614 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1615
1616 tmp = gen_reg_rtx (Pmode);
1617 emit_move_insn (tmp, tga_ret);
1618
1619 if (register_operand (op0, Pmode))
1620 tmp2 = op0;
1621 else
1622 tmp2 = gen_reg_rtx (Pmode);
1623
1624 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1625 op1 = tmp2;
1626 break;
1627
1628 case TLS_MODEL_INITIAL_EXEC:
1629 if (! flag_pic)
1630 {
1631 /* Don't schedule insns for getting GOT address when
1632 the first scheduling is enabled, to avoid spill
1633 failures for R0. */
1634 if (flag_schedule_insns)
1635 emit_insn (gen_blockage ());
1636 emit_insn (gen_GOTaddr2picreg ());
1637 emit_use (gen_rtx_REG (SImode, PIC_REG));
1638 if (flag_schedule_insns)
1639 emit_insn (gen_blockage ());
1640 }
1641 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1642 tmp = gen_sym2GOTTPOFF (op1);
1643 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1644 op1 = tga_op1;
1645 break;
1646
1647 case TLS_MODEL_LOCAL_EXEC:
1648 tmp2 = gen_reg_rtx (Pmode);
1649 emit_insn (gen_load_gbr (tmp2));
1650 tmp = gen_reg_rtx (Pmode);
1651 emit_insn (gen_symTPOFF2reg (tmp, op1));
1652
1653 if (register_operand (op0, Pmode))
1654 op1 = op0;
1655 else
1656 op1 = gen_reg_rtx (Pmode);
1657
1658 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1659 break;
1660
1661 default:
1662 gcc_unreachable ();
1663 }
1664 if (opc)
1665 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1666 operands[1] = op1;
1667 }
1668 }
1669
1670 return 0;
1671 }
1672
1673 enum rtx_code
1674 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1675 enum rtx_code comparison)
1676 {
1677 rtx op1;
1678 rtx scratch = NULL_RTX;
1679
1680 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1681 comparison = GET_CODE (operands[0]);
1682 else
1683 scratch = operands[4];
1684 if (CONST_INT_P (operands[1])
1685 && !CONST_INT_P (operands[2]))
1686 {
1687 rtx tmp = operands[1];
1688
1689 operands[1] = operands[2];
1690 operands[2] = tmp;
1691 comparison = swap_condition (comparison);
1692 }
1693 if (CONST_INT_P (operands[2]))
1694 {
1695 HOST_WIDE_INT val = INTVAL (operands[2]);
1696 if ((val == -1 || val == -0x81)
1697 && (comparison == GT || comparison == LE))
1698 {
1699 comparison = (comparison == GT) ? GE : LT;
1700 operands[2] = gen_int_mode (val + 1, mode);
1701 }
1702 else if ((val == 1 || val == 0x80)
1703 && (comparison == GE || comparison == LT))
1704 {
1705 comparison = (comparison == GE) ? GT : LE;
1706 operands[2] = gen_int_mode (val - 1, mode);
1707 }
1708 else if (val == 1 && (comparison == GEU || comparison == LTU))
1709 {
1710 comparison = (comparison == GEU) ? NE : EQ;
1711 operands[2] = CONST0_RTX (mode);
1712 }
1713 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1714 {
1715 comparison = (comparison == GEU) ? GTU : LEU;
1716 operands[2] = gen_int_mode (val - 1, mode);
1717 }
1718 else if (val == 0 && (comparison == GTU || comparison == LEU))
1719 comparison = (comparison == GTU) ? NE : EQ;
1720 else if (mode == SImode
1721 && ((val == 0x7fffffff
1722 && (comparison == GTU || comparison == LEU))
1723 || ((unsigned HOST_WIDE_INT) val
1724 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1725 && (comparison == GEU || comparison == LTU))))
1726 {
1727 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1728 operands[2] = CONST0_RTX (mode);
1729 }
1730 }
1731 op1 = operands[1];
1732 if (can_create_pseudo_p ())
1733 operands[1] = force_reg (mode, op1);
1734 /* When we are handling DImode comparisons, we want to keep constants so
1735 that we can optimize the component comparisons; however, memory loads
1736 are better issued as a whole so that they can be scheduled well.
1737 SImode equality comparisons allow I08 constants, but only when they
1738 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1739 into a register, that register might as well be r0, and we allow the
1740 constant. If it is already in a register, this is likely to be
1741 allocated to a different hard register, thus we load the constant into
1742 a register unless it is zero. */
1743 if (!REG_P (operands[2])
1744 && (!CONST_INT_P (operands[2])
1745 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1746 && ((comparison != EQ && comparison != NE)
1747 || (REG_P (op1) && REGNO (op1) != R0_REG)
1748 || !satisfies_constraint_I08 (operands[2])))))
1749 {
1750 if (scratch && GET_MODE (scratch) == mode)
1751 {
1752 emit_move_insn (scratch, operands[2]);
1753 operands[2] = scratch;
1754 }
1755 else if (can_create_pseudo_p ())
1756 operands[2] = force_reg (mode, operands[2]);
1757 }
1758 return comparison;
1759 }
1760
1761 void
1762 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1763 {
1764 rtx (*branch_expander) (rtx) = gen_branch_true;
1765 rtx jump;
1766
1767 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1768 switch (comparison)
1769 {
1770 case NE: case LT: case LE: case LTU: case LEU:
1771 comparison = reverse_condition (comparison);
1772 branch_expander = gen_branch_false;
1773 default: ;
1774 }
1775 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1776 gen_rtx_fmt_ee (comparison, SImode,
1777 operands[1], operands[2])));
1778 jump = emit_jump_insn (branch_expander (operands[3]));
1779 if (probability >= 0)
1780 add_reg_note (jump, REG_BR_PROB, GEN_INT (probability));
1781
1782 }
1783
1784 /* ??? How should we distribute probabilities when more than one branch
1785 is generated. So far we only have soem ad-hoc observations:
1786 - If the operands are random, they are likely to differ in both parts.
1787 - If comparing items in a hash chain, the operands are random or equal;
1788 operation should be EQ or NE.
1789 - If items are searched in an ordered tree from the root, we can expect
1790 the highpart to be unequal about half of the time; operation should be
1791 an inequality comparison, operands non-constant, and overall probability
1792 about 50%. Likewise for quicksort.
1793 - Range checks will be often made against constants. Even if we assume for
1794 simplicity an even distribution of the non-constant operand over a
1795 sub-range here, the same probability could be generated with differently
1796 wide sub-ranges - as long as the ratio of the part of the subrange that
1797 is before the threshold to the part that comes after the threshold stays
1798 the same. Thus, we can't really tell anything here;
1799 assuming random distribution is at least simple.
1800 */
1801
1802 bool
1803 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1804 {
1805 enum rtx_code msw_taken, msw_skip, lsw_taken;
1806 rtx skip_label = NULL_RTX;
1807 rtx op1h, op1l, op2h, op2l;
1808 int num_branches;
1809 int prob, rev_prob;
1810 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1811 rtx scratch = operands[4];
1812
1813 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1814 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1815 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1816 op1l = gen_lowpart (SImode, operands[1]);
1817 op2l = gen_lowpart (SImode, operands[2]);
1818 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
1819 prob = split_branch_probability;
1820 rev_prob = REG_BR_PROB_BASE - prob;
1821 switch (comparison)
1822 {
1823 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1824 That costs 1 cycle more when the first branch can be predicted taken,
1825 but saves us mispredicts because only one branch needs prediction.
1826 It also enables generating the cmpeqdi_t-1 pattern. */
1827 case EQ:
1828 if (TARGET_CMPEQDI_T)
1829 {
1830 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1831 emit_jump_insn (gen_branch_true (operands[3]));
1832 return true;
1833 }
1834 msw_skip = NE;
1835 lsw_taken = EQ;
1836 if (prob >= 0)
1837 {
1838 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1839 */
1840 msw_skip_prob = rev_prob;
1841 if (REG_BR_PROB_BASE <= 65535)
1842 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1843 else
1844 {
1845 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1846 lsw_taken_prob
1847 = (prob
1848 ? (REG_BR_PROB_BASE
1849 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1850 / ((HOST_WIDEST_INT) prob << 32)))
1851 : 0);
1852 }
1853 }
1854 break;
1855 case NE:
1856 if (TARGET_CMPEQDI_T)
1857 {
1858 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1859 emit_jump_insn (gen_branch_false (operands[3]));
1860 return true;
1861 }
1862 msw_taken = NE;
1863 msw_taken_prob = prob;
1864 lsw_taken = NE;
1865 lsw_taken_prob = 0;
1866 break;
1867 case GTU: case GT:
1868 msw_taken = comparison;
1869 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1870 break;
1871 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1872 msw_skip = swap_condition (msw_taken);
1873 lsw_taken = GTU;
1874 break;
1875 case GEU: case GE:
1876 if (op2l == CONST0_RTX (SImode))
1877 msw_taken = comparison;
1878 else
1879 {
1880 msw_taken = comparison == GE ? GT : GTU;
1881 msw_skip = swap_condition (msw_taken);
1882 lsw_taken = GEU;
1883 }
1884 break;
1885 case LTU: case LT:
1886 msw_taken = comparison;
1887 if (op2l == CONST0_RTX (SImode))
1888 break;
1889 msw_skip = swap_condition (msw_taken);
1890 lsw_taken = LTU;
1891 break;
1892 case LEU: case LE:
1893 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1894 msw_taken = comparison;
1895 else
1896 {
1897 lsw_taken = LEU;
1898 if (comparison == LE)
1899 msw_taken = LT;
1900 else if (op2h != CONST0_RTX (SImode))
1901 msw_taken = LTU;
1902 else
1903 break;
1904 msw_skip = swap_condition (msw_taken);
1905 }
1906 break;
1907 default: return false;
1908 }
1909 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
1910 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
1911 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
1912 if (comparison != EQ && comparison != NE && num_branches > 1)
1913 {
1914 if (!CONSTANT_P (operands[2])
1915 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
1916 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
1917 {
1918 msw_taken_prob = prob / 2U;
1919 msw_skip_prob
1920 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
1921 lsw_taken_prob = prob;
1922 }
1923 else
1924 {
1925 msw_taken_prob = prob;
1926 msw_skip_prob = REG_BR_PROB_BASE;
1927 /* ??? If we have a constant op2h, should we use that when
1928 calculating lsw_taken_prob? */
1929 lsw_taken_prob = prob;
1930 }
1931 }
1932 operands[1] = op1h;
1933 operands[2] = op2h;
1934 operands[4] = NULL_RTX;
1935 if (reload_completed
1936 && ! arith_reg_or_0_operand (op2h, SImode)
1937 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
1938 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
1939 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
1940 {
1941 emit_move_insn (scratch, operands[2]);
1942 operands[2] = scratch;
1943 }
1944 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
1945 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
1946 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
1947 {
1948 rtx taken_label = operands[3];
1949
1950 /* Operands were possibly modified, but msw_skip doesn't expect this.
1951 Always use the original ones. */
1952 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
1953 {
1954 operands[1] = op1h;
1955 operands[2] = op2h;
1956 }
1957
1958 operands[3] = skip_label = gen_label_rtx ();
1959 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
1960 operands[3] = taken_label;
1961 }
1962 operands[1] = op1l;
1963 operands[2] = op2l;
1964 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
1965 {
1966 if (reload_completed
1967 && ! arith_reg_or_0_operand (op2l, SImode)
1968 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
1969 {
1970 emit_move_insn (scratch, operands[2]);
1971 operands[2] = scratch;
1972 }
1973 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
1974 }
1975 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
1976 emit_label (skip_label);
1977 return true;
1978 }
1979
1980 /* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */
1981
1982 static void
1983 sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
1984 {
1985 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1986 {
1987 insn = gen_rtx_PARALLEL (VOIDmode,
1988 gen_rtvec (2, insn,
1989 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1990 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1991 }
1992 else
1993 emit_insn (insn);
1994 }
1995
1996 /* Prepare the operands for an scc instruction; make sure that the
1997 compare has been done and the result is in T_REG. */
1998 void
1999 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2000 {
2001 rtx t_reg = gen_rtx_REG (SImode, T_REG);
2002 enum rtx_code oldcode = code;
2003 enum machine_mode mode;
2004
2005 /* First need a compare insn. */
2006 switch (code)
2007 {
2008 case NE:
2009 /* It isn't possible to handle this case. */
2010 gcc_unreachable ();
2011 case LT:
2012 code = GT;
2013 break;
2014 case LE:
2015 code = GE;
2016 break;
2017 case LTU:
2018 code = GTU;
2019 break;
2020 case LEU:
2021 code = GEU;
2022 break;
2023 default:
2024 break;
2025 }
2026 if (code != oldcode)
2027 {
2028 rtx tmp = op0;
2029 op0 = op1;
2030 op1 = tmp;
2031 }
2032
2033 mode = GET_MODE (op0);
2034 if (mode == VOIDmode)
2035 mode = GET_MODE (op1);
2036
2037 op0 = force_reg (mode, op0);
2038 if ((code != EQ && code != NE
2039 && (op1 != const0_rtx
2040 || code == GTU || code == GEU || code == LTU || code == LEU))
2041 || (mode == DImode && op1 != const0_rtx)
2042 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2043 op1 = force_reg (mode, op1);
2044
2045 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2046 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2047 mode);
2048 }
2049
2050 rtx
2051 sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
2052 rtx op0, rtx op1)
2053 {
2054 rtx target = gen_reg_rtx (SImode);
2055 rtx tmp;
2056
2057 gcc_assert (TARGET_SHMEDIA);
2058 switch (code)
2059 {
2060 case EQ:
2061 case GT:
2062 case LT:
2063 case UNORDERED:
2064 case GTU:
2065 case LTU:
2066 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2067 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2068 code = NE;
2069 break;
2070
2071 case NE:
2072 case GE:
2073 case LE:
2074 case ORDERED:
2075 case GEU:
2076 case LEU:
2077 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2078 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2079 code = EQ;
2080 break;
2081
2082 case UNEQ:
2083 case UNGE:
2084 case UNGT:
2085 case UNLE:
2086 case UNLT:
2087 case LTGT:
2088 return NULL_RTX;
2089
2090 default:
2091 gcc_unreachable ();
2092 }
2093
2094 if (mode == DImode)
2095 {
2096 rtx t2 = gen_reg_rtx (DImode);
2097 emit_insn (gen_extendsidi2 (t2, target));
2098 target = t2;
2099 }
2100
2101 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2102 }
2103
2104 /* Called from the md file, set up the operands of a compare instruction. */
2105
2106 void
2107 sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
2108 {
2109 enum rtx_code code = GET_CODE (operands[0]);
2110 enum rtx_code branch_code;
2111 rtx op0 = operands[1];
2112 rtx op1 = operands[2];
2113 rtx insn, tem;
2114 bool need_ccmpeq = false;
2115
2116 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2117 {
2118 op0 = force_reg (mode, op0);
2119 op1 = force_reg (mode, op1);
2120 }
2121 else
2122 {
2123 if (code != EQ || mode == DImode)
2124 {
2125 /* Force args into regs, since we can't use constants here. */
2126 op0 = force_reg (mode, op0);
2127 if (op1 != const0_rtx || code == GTU || code == GEU)
2128 op1 = force_reg (mode, op1);
2129 }
2130 }
2131
2132 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2133 {
2134 if (code == LT
2135 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2136 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2137 {
2138 tem = op0, op0 = op1, op1 = tem;
2139 code = swap_condition (code);
2140 }
2141
2142 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2143 if (code == GE)
2144 {
2145 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2146 need_ccmpeq = true;
2147 code = GT;
2148 }
2149
2150 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2151 to EQ/GT respectively. */
2152 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2153 }
2154
2155 switch (code)
2156 {
2157 case EQ:
2158 case GT:
2159 case GE:
2160 case GTU:
2161 case GEU:
2162 branch_code = code;
2163 break;
2164 case NE:
2165 case LT:
2166 case LE:
2167 case LTU:
2168 case LEU:
2169 branch_code = reverse_condition (code);
2170 break;
2171 default:
2172 gcc_unreachable ();
2173 }
2174
2175 insn = gen_rtx_SET (VOIDmode,
2176 gen_rtx_REG (SImode, T_REG),
2177 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2178
2179 sh_emit_set_t_insn (insn, mode);
2180 if (need_ccmpeq)
2181 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2182
2183 if (branch_code == code)
2184 emit_jump_insn (gen_branch_true (operands[3]));
2185 else
2186 emit_jump_insn (gen_branch_false (operands[3]));
2187 }
2188
2189 void
2190 sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
2191 {
2192 enum rtx_code code = GET_CODE (operands[1]);
2193 rtx op0 = operands[2];
2194 rtx op1 = operands[3];
2195 rtx lab = NULL_RTX;
2196 bool invert = false;
2197 rtx tem;
2198
2199 op0 = force_reg (mode, op0);
2200 if ((code != EQ && code != NE
2201 && (op1 != const0_rtx
2202 || code == GTU || code == GEU || code == LTU || code == LEU))
2203 || (mode == DImode && op1 != const0_rtx)
2204 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2205 op1 = force_reg (mode, op1);
2206
2207 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2208 {
2209 if (code == LT || code == LE)
2210 {
2211 code = swap_condition (code);
2212 tem = op0, op0 = op1, op1 = tem;
2213 }
2214 if (code == GE)
2215 {
2216 if (TARGET_IEEE)
2217 {
2218 lab = gen_label_rtx ();
2219 sh_emit_scc_to_t (EQ, op0, op1);
2220 emit_jump_insn (gen_branch_true (lab));
2221 code = GT;
2222 }
2223 else
2224 {
2225 code = LT;
2226 invert = true;
2227 }
2228 }
2229 }
2230
2231 if (code == NE)
2232 {
2233 code = EQ;
2234 invert = true;
2235 }
2236
2237 sh_emit_scc_to_t (code, op0, op1);
2238 if (lab)
2239 emit_label (lab);
2240 if (invert)
2241 emit_insn (gen_movnegt (operands[0]));
2242 else
2243 emit_move_insn (operands[0], gen_rtx_REG (SImode, T_REG));
2244 }
2245 \f
2246 /* Functions to output assembly code. */
2247
2248 /* Return a sequence of instructions to perform DI or DF move.
2249
2250 Since the SH cannot move a DI or DF in one instruction, we have
2251 to take care when we see overlapping source and dest registers. */
2252
2253 const char *
2254 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2255 enum machine_mode mode)
2256 {
2257 rtx dst = operands[0];
2258 rtx src = operands[1];
2259
2260 if (MEM_P (dst)
2261 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2262 return "mov.l %T1,%0\n\tmov.l %1,%0";
2263
2264 if (register_operand (dst, mode)
2265 && register_operand (src, mode))
2266 {
2267 if (REGNO (src) == MACH_REG)
2268 return "sts mach,%S0\n\tsts macl,%R0";
2269
2270 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2271 when mov.d r1,r0 do r1->r0 then r2->r1. */
2272
2273 if (REGNO (src) + 1 == REGNO (dst))
2274 return "mov %T1,%T0\n\tmov %1,%0";
2275 else
2276 return "mov %1,%0\n\tmov %T1,%T0";
2277 }
2278 else if (CONST_INT_P (src))
2279 {
2280 if (INTVAL (src) < 0)
2281 output_asm_insn ("mov #-1,%S0", operands);
2282 else
2283 output_asm_insn ("mov #0,%S0", operands);
2284
2285 return "mov %1,%R0";
2286 }
2287 else if (MEM_P (src))
2288 {
2289 int ptrreg = -1;
2290 int dreg = REGNO (dst);
2291 rtx inside = XEXP (src, 0);
2292
2293 switch (GET_CODE (inside))
2294 {
2295 case REG:
2296 ptrreg = REGNO (inside);
2297 break;
2298
2299 case SUBREG:
2300 ptrreg = subreg_regno (inside);
2301 break;
2302
2303 case PLUS:
2304 ptrreg = REGNO (XEXP (inside, 0));
2305 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2306 an offsettable address. Unfortunately, offsettable addresses use
2307 QImode to check the offset, and a QImode offsettable address
2308 requires r0 for the other operand, which is not currently
2309 supported, so we can't use the 'o' constraint.
2310 Thus we must check for and handle r0+REG addresses here.
2311 We punt for now, since this is likely very rare. */
2312 gcc_assert (!REG_P (XEXP (inside, 1)));
2313 break;
2314
2315 case LABEL_REF:
2316 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
2317 case POST_INC:
2318 return "mov.l %1,%0\n\tmov.l %1,%T0";
2319 default:
2320 gcc_unreachable ();
2321 }
2322
2323 /* Work out the safe way to copy. Copy into the second half first. */
2324 if (dreg == ptrreg)
2325 return "mov.l %T1,%T0\n\tmov.l %1,%0";
2326 }
2327
2328 return "mov.l %1,%0\n\tmov.l %T1,%T0";
2329 }
2330
2331 /* Print an instruction which would have gone into a delay slot after
2332 another instruction, but couldn't because the other instruction expanded
2333 into a sequence where putting the slot insn at the end wouldn't work. */
2334
2335 static void
2336 print_slot (rtx insn)
2337 {
2338 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
2339
2340 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
2341 }
2342
2343 const char *
2344 output_far_jump (rtx insn, rtx op)
2345 {
2346 struct { rtx lab, reg, op; } this_jmp;
2347 rtx braf_base_lab = NULL_RTX;
2348 const char *jump;
2349 int far;
2350 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2351 rtx prev;
2352
2353 this_jmp.lab = gen_label_rtx ();
2354
2355 if (TARGET_SH2
2356 && offset >= -32764
2357 && offset - get_attr_length (insn) <= 32766)
2358 {
2359 far = 0;
2360 jump = "mov.w %O0,%1; braf %1";
2361 }
2362 else
2363 {
2364 far = 1;
2365 if (flag_pic)
2366 {
2367 if (TARGET_SH2)
2368 jump = "mov.l %O0,%1; braf %1";
2369 else
2370 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
2371 }
2372 else
2373 jump = "mov.l %O0,%1; jmp @%1";
2374 }
2375 /* If we have a scratch register available, use it. */
2376 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2377 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2378 {
2379 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2380 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2381 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
2382 output_asm_insn (jump, &this_jmp.lab);
2383 if (dbr_sequence_length ())
2384 print_slot (final_sequence);
2385 else
2386 output_asm_insn ("nop", 0);
2387 }
2388 else
2389 {
2390 /* Output the delay slot insn first if any. */
2391 if (dbr_sequence_length ())
2392 print_slot (final_sequence);
2393
2394 this_jmp.reg = gen_rtx_REG (SImode, 13);
2395 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2396 Fortunately, MACL is fixed and call-clobbered, and we never
2397 need its value across jumps, so save r13 in it instead of in
2398 the stack. */
2399 if (TARGET_SH5)
2400 output_asm_insn ("lds r13, macl", 0);
2401 else
2402 output_asm_insn ("mov.l r13,@-r15", 0);
2403 output_asm_insn (jump, &this_jmp.lab);
2404 if (TARGET_SH5)
2405 output_asm_insn ("sts macl, r13", 0);
2406 else
2407 output_asm_insn ("mov.l @r15+,r13", 0);
2408 }
2409 if (far && flag_pic && TARGET_SH2)
2410 {
2411 braf_base_lab = gen_label_rtx ();
2412 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2413 CODE_LABEL_NUMBER (braf_base_lab));
2414 }
2415 if (far)
2416 output_asm_insn (".align 2", 0);
2417 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2418 this_jmp.op = op;
2419 if (far && flag_pic)
2420 {
2421 if (TARGET_SH2)
2422 this_jmp.lab = braf_base_lab;
2423 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2424 }
2425 else
2426 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2427 return "";
2428 }
2429
2430 /* Local label counter, used for constants in the pool and inside
2431 pattern branches. */
2432
2433 static int lf = 100;
2434
2435 /* Output code for ordinary branches. */
2436
2437 const char *
2438 output_branch (int logic, rtx insn, rtx *operands)
2439 {
2440 switch (get_attr_length (insn))
2441 {
2442 case 6:
2443 /* This can happen if filling the delay slot has caused a forward
2444 branch to exceed its range (we could reverse it, but only
2445 when we know we won't overextend other branches; this should
2446 best be handled by relaxation).
2447 It can also happen when other condbranches hoist delay slot insn
2448 from their destination, thus leading to code size increase.
2449 But the branch will still be in the range -4092..+4098 bytes. */
2450
2451 if (! TARGET_RELAX)
2452 {
2453 int label = lf++;
2454 /* The call to print_slot will clobber the operands. */
2455 rtx op0 = operands[0];
2456
2457 /* If the instruction in the delay slot is annulled (true), then
2458 there is no delay slot where we can put it now. The only safe
2459 place for it is after the label. final will do that by default. */
2460
2461 if (final_sequence
2462 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2463 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2464 {
2465 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2466 ASSEMBLER_DIALECT ? "/" : ".", label);
2467 print_slot (final_sequence);
2468 }
2469 else
2470 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2471
2472 output_asm_insn ("bra\t%l0", &op0);
2473 fprintf (asm_out_file, "\tnop\n");
2474 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2475
2476 return "";
2477 }
2478 /* When relaxing, handle this like a short branch. The linker
2479 will fix it up if it still doesn't fit after relaxation. */
2480 case 2:
2481 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2482
2483 /* These are for SH2e, in which we have to account for the
2484 extra nop because of the hardware bug in annulled branches. */
2485 case 8:
2486 if (! TARGET_RELAX)
2487 {
2488 int label = lf++;
2489
2490 gcc_assert (!final_sequence
2491 || !(INSN_ANNULLED_BRANCH_P
2492 (XVECEXP (final_sequence, 0, 0))));
2493 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2494 logic ? "f" : "t",
2495 ASSEMBLER_DIALECT ? "/" : ".", label);
2496 fprintf (asm_out_file, "\tnop\n");
2497 output_asm_insn ("bra\t%l0", operands);
2498 fprintf (asm_out_file, "\tnop\n");
2499 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2500
2501 return "";
2502 }
2503 /* When relaxing, fall through. */
2504 case 4:
2505 {
2506 char buffer[10];
2507
2508 sprintf (buffer, "b%s%ss\t%%l0",
2509 logic ? "t" : "f",
2510 ASSEMBLER_DIALECT ? "/" : ".");
2511 output_asm_insn (buffer, &operands[0]);
2512 return "nop";
2513 }
2514
2515 default:
2516 /* There should be no longer branches now - that would
2517 indicate that something has destroyed the branches set
2518 up in machine_dependent_reorg. */
2519 gcc_unreachable ();
2520 }
2521 }
2522
2523 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2524 fill in operands 9 as a label to the successor insn.
2525 We try to use jump threading where possible.
2526 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2527 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2528 follow jmp and bt, if the address is in range. */
2529 const char *
2530 output_branchy_insn (enum rtx_code code, const char *templ,
2531 rtx insn, rtx *operands)
2532 {
2533 rtx next_insn = NEXT_INSN (insn);
2534
2535 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2536 {
2537 rtx src = SET_SRC (PATTERN (next_insn));
2538 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2539 {
2540 /* Following branch not taken */
2541 operands[9] = gen_label_rtx ();
2542 emit_label_after (operands[9], next_insn);
2543 INSN_ADDRESSES_NEW (operands[9],
2544 INSN_ADDRESSES (INSN_UID (next_insn))
2545 + get_attr_length (next_insn));
2546 return templ;
2547 }
2548 else
2549 {
2550 int offset = (branch_dest (next_insn)
2551 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2552 if (offset >= -252 && offset <= 258)
2553 {
2554 if (GET_CODE (src) == IF_THEN_ELSE)
2555 /* branch_true */
2556 src = XEXP (src, 1);
2557 operands[9] = src;
2558 return templ;
2559 }
2560 }
2561 }
2562 operands[9] = gen_label_rtx ();
2563 emit_label_after (operands[9], insn);
2564 INSN_ADDRESSES_NEW (operands[9],
2565 INSN_ADDRESSES (INSN_UID (insn))
2566 + get_attr_length (insn));
2567 return templ;
2568 }
2569
2570 const char *
2571 output_ieee_ccmpeq (rtx insn, rtx *operands)
2572 {
2573 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2574 insn, operands);
2575 }
2576 \f
2577 /* Output the start of the assembler file. */
2578
2579 static void
2580 sh_file_start (void)
2581 {
2582 default_file_start ();
2583
2584 #ifdef SYMBIAN
2585 /* Declare the .directive section before it is used. */
2586 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
2587 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
2588 #endif
2589
2590 if (TARGET_ELF)
2591 /* We need to show the text section with the proper
2592 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2593 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2594 will complain. We can teach GAS specifically about the
2595 default attributes for our choice of text section, but
2596 then we would have to change GAS again if/when we change
2597 the text section name. */
2598 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2599 else
2600 /* Switch to the data section so that the coffsem symbol
2601 isn't in the text section. */
2602 switch_to_section (data_section);
2603
2604 if (TARGET_LITTLE_ENDIAN)
2605 fputs ("\t.little\n", asm_out_file);
2606
2607 if (!TARGET_ELF)
2608 {
2609 if (TARGET_SHCOMPACT)
2610 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2611 else if (TARGET_SHMEDIA)
2612 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2613 TARGET_SHMEDIA64 ? 64 : 32);
2614 }
2615 }
2616 \f
2617 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2618
2619 static bool
2620 unspec_caller_rtx_p (rtx pat)
2621 {
2622 rtx base, offset;
2623 int i;
2624
2625 split_const (pat, &base, &offset);
2626 if (GET_CODE (base) == UNSPEC)
2627 {
2628 if (XINT (base, 1) == UNSPEC_CALLER)
2629 return true;
2630 for (i = 0; i < XVECLEN (base, 0); i++)
2631 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2632 return true;
2633 }
2634 return false;
2635 }
2636
2637 /* Indicate that INSN cannot be duplicated. This is true for insn
2638 that generates a unique label. */
2639
2640 static bool
2641 sh_cannot_copy_insn_p (rtx insn)
2642 {
2643 rtx pat;
2644
2645 if (!reload_completed || !flag_pic)
2646 return false;
2647
2648 if (!NONJUMP_INSN_P (insn))
2649 return false;
2650 if (asm_noperands (insn) >= 0)
2651 return false;
2652
2653 pat = PATTERN (insn);
2654 if (GET_CODE (pat) != SET)
2655 return false;
2656 pat = SET_SRC (pat);
2657
2658 if (unspec_caller_rtx_p (pat))
2659 return true;
2660
2661 return false;
2662 }
2663 \f
2664 /* Actual number of instructions used to make a shift by N. */
2665 static const char ashiftrt_insns[] =
2666 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2667
2668 /* Left shift and logical right shift are the same. */
2669 static const char shift_insns[] =
2670 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2671
2672 /* Individual shift amounts needed to get the above length sequences.
2673 One bit right shifts clobber the T bit, so when possible, put one bit
2674 shifts in the middle of the sequence, so the ends are eligible for
2675 branch delay slots. */
2676 static const short shift_amounts[32][5] = {
2677 {0}, {1}, {2}, {2, 1},
2678 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2679 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2680 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2681 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2682 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2683 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2684 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2685
2686 /* Likewise, but for shift amounts < 16, up to three highmost bits
2687 might be clobbered. This is typically used when combined with some
2688 kind of sign or zero extension. */
2689
2690 static const char ext_shift_insns[] =
2691 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2692
2693 static const short ext_shift_amounts[32][4] = {
2694 {0}, {1}, {2}, {2, 1},
2695 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2696 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2697 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2698 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2699 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2700 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2701 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2702
2703 /* Assuming we have a value that has been sign-extended by at least one bit,
2704 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2705 to shift it by N without data loss, and quicker than by other means? */
2706 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2707
2708 /* This is used in length attributes in sh.md to help compute the length
2709 of arbitrary constant shift instructions. */
2710
2711 int
2712 shift_insns_rtx (rtx insn)
2713 {
2714 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2715 int shift_count = INTVAL (XEXP (set_src, 1)) & 31;
2716 enum rtx_code shift_code = GET_CODE (set_src);
2717
2718 switch (shift_code)
2719 {
2720 case ASHIFTRT:
2721 return ashiftrt_insns[shift_count];
2722 case LSHIFTRT:
2723 case ASHIFT:
2724 return shift_insns[shift_count];
2725 default:
2726 gcc_unreachable ();
2727 }
2728 }
2729
2730 /* Return the cost of a shift. */
2731
2732 static inline int
2733 shiftcosts (rtx x)
2734 {
2735 int value;
2736
2737 if (TARGET_SHMEDIA)
2738 return 1;
2739
2740 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2741 {
2742 if (GET_MODE (x) == DImode
2743 && CONST_INT_P (XEXP (x, 1))
2744 && INTVAL (XEXP (x, 1)) == 1)
2745 return 2;
2746
2747 /* Everything else is invalid, because there is no pattern for it. */
2748 return MAX_COST;
2749 }
2750 /* If shift by a non constant, then this will be expensive. */
2751 if (!CONST_INT_P (XEXP (x, 1)))
2752 return SH_DYNAMIC_SHIFT_COST;
2753
2754 /* Otherwise, return the true cost in instructions. Cope with out of range
2755 shift counts more or less arbitrarily. */
2756 value = INTVAL (XEXP (x, 1)) & 31;
2757
2758 if (GET_CODE (x) == ASHIFTRT)
2759 {
2760 int cost = ashiftrt_insns[value];
2761 /* If SH3, then we put the constant in a reg and use shad. */
2762 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2763 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2764 return cost;
2765 }
2766 else
2767 return shift_insns[value];
2768 }
2769
2770 /* Return the cost of an AND operation. */
2771
2772 static inline int
2773 andcosts (rtx x)
2774 {
2775 int i;
2776
2777 /* Anding with a register is a single cycle and instruction. */
2778 if (!CONST_INT_P (XEXP (x, 1)))
2779 return 1;
2780
2781 i = INTVAL (XEXP (x, 1));
2782
2783 if (TARGET_SHMEDIA)
2784 {
2785 if (satisfies_constraint_I10 (XEXP (x, 1))
2786 || satisfies_constraint_J16 (XEXP (x, 1)))
2787 return 1;
2788 else
2789 return 1 + rtx_cost (XEXP (x, 1), AND, !optimize_size);
2790 }
2791
2792 /* These constants are single cycle extu.[bw] instructions. */
2793 if (i == 0xff || i == 0xffff)
2794 return 1;
2795 /* Constants that can be used in an and immediate instruction in a single
2796 cycle, but this requires r0, so make it a little more expensive. */
2797 if (CONST_OK_FOR_K08 (i))
2798 return 2;
2799 /* Constants that can be loaded with a mov immediate and an and.
2800 This case is probably unnecessary. */
2801 if (CONST_OK_FOR_I08 (i))
2802 return 2;
2803 /* Any other constants requires a 2 cycle pc-relative load plus an and.
2804 This case is probably unnecessary. */
2805 return 3;
2806 }
2807
2808 /* Return the cost of an addition or a subtraction. */
2809
2810 static inline int
2811 addsubcosts (rtx x)
2812 {
2813 /* Adding a register is a single cycle insn. */
2814 if (REG_P (XEXP (x, 1))
2815 || GET_CODE (XEXP (x, 1)) == SUBREG)
2816 return 1;
2817
2818 /* Likewise for small constants. */
2819 if (CONST_INT_P (XEXP (x, 1))
2820 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2821 return 1;
2822
2823 if (TARGET_SHMEDIA)
2824 switch (GET_CODE (XEXP (x, 1)))
2825 {
2826 case CONST:
2827 case LABEL_REF:
2828 case SYMBOL_REF:
2829 return TARGET_SHMEDIA64 ? 5 : 3;
2830
2831 case CONST_INT:
2832 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2833 return 2;
2834 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2835 return 3;
2836 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2837 return 4;
2838
2839 /* Fall through. */
2840 default:
2841 return 5;
2842 }
2843
2844 /* Any other constant requires a 2 cycle pc-relative load plus an
2845 addition. */
2846 return 3;
2847 }
2848
2849 /* Return the cost of a multiply. */
2850 static inline int
2851 multcosts (rtx x ATTRIBUTE_UNUSED)
2852 {
2853 if (sh_multcost >= 0)
2854 return sh_multcost;
2855 if (TARGET_SHMEDIA)
2856 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2857 accept constants. Ideally, we would use a cost of one or two and
2858 add the cost of the operand, but disregard the latter when inside loops
2859 and loop invariant code motion is still to follow.
2860 Using a multiply first and splitting it later if it's a loss
2861 doesn't work because of different sign / zero extension semantics
2862 of multiplies vs. shifts. */
2863 return TARGET_SMALLCODE ? 2 : 3;
2864
2865 if (TARGET_SH2)
2866 {
2867 /* We have a mul insn, so we can never take more than the mul and the
2868 read of the mac reg, but count more because of the latency and extra
2869 reg usage. */
2870 if (TARGET_SMALLCODE)
2871 return 2;
2872 return 3;
2873 }
2874
2875 /* If we're aiming at small code, then just count the number of
2876 insns in a multiply call sequence. */
2877 if (TARGET_SMALLCODE)
2878 return 5;
2879
2880 /* Otherwise count all the insns in the routine we'd be calling too. */
2881 return 20;
2882 }
2883
2884 /* Compute a (partial) cost for rtx X. Return true if the complete
2885 cost has been computed, and false if subexpressions should be
2886 scanned. In either case, *TOTAL contains the cost result. */
2887
2888 static bool
2889 sh_rtx_costs (rtx x, int code, int outer_code, int *total,
2890 bool speed ATTRIBUTE_UNUSED)
2891 {
2892 switch (code)
2893 {
2894 case CONST_INT:
2895 if (TARGET_SHMEDIA)
2896 {
2897 if (INTVAL (x) == 0)
2898 *total = 0;
2899 else if (outer_code == AND && and_operand ((x), DImode))
2900 *total = 0;
2901 else if ((outer_code == IOR || outer_code == XOR
2902 || outer_code == PLUS)
2903 && CONST_OK_FOR_I10 (INTVAL (x)))
2904 *total = 0;
2905 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2906 *total = COSTS_N_INSNS (outer_code != SET);
2907 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2908 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2909 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2910 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2911 else
2912 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2913 return true;
2914 }
2915 if (CONST_OK_FOR_I08 (INTVAL (x)))
2916 *total = 0;
2917 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2918 && CONST_OK_FOR_K08 (INTVAL (x)))
2919 *total = 1;
2920 /* prepare_cmp_insn will force costly constants int registers before
2921 the cbranch[sd]i4 patterns can see them, so preserve potentially
2922 interesting ones not covered by I08 above. */
2923 else if (outer_code == COMPARE
2924 && ((unsigned HOST_WIDE_INT) INTVAL (x)
2925 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
2926 || INTVAL (x) == 0x7fffffff
2927 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
2928 *total = 1;
2929 else
2930 *total = 8;
2931 return true;
2932
2933 case CONST:
2934 case LABEL_REF:
2935 case SYMBOL_REF:
2936 if (TARGET_SHMEDIA64)
2937 *total = COSTS_N_INSNS (4);
2938 else if (TARGET_SHMEDIA32)
2939 *total = COSTS_N_INSNS (2);
2940 else
2941 *total = 5;
2942 return true;
2943
2944 case CONST_DOUBLE:
2945 if (TARGET_SHMEDIA)
2946 *total = COSTS_N_INSNS (4);
2947 /* prepare_cmp_insn will force costly constants int registers before
2948 the cbranchdi4 pattern can see them, so preserve potentially
2949 interesting ones. */
2950 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
2951 *total = 1;
2952 else
2953 *total = 10;
2954 return true;
2955 case CONST_VECTOR:
2956 if (x == CONST0_RTX (GET_MODE (x)))
2957 *total = 0;
2958 else if (sh_1el_vec (x, VOIDmode))
2959 *total = outer_code != SET;
2960 if (sh_rep_vec (x, VOIDmode))
2961 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2962 + (outer_code != SET));
2963 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2964 return true;
2965
2966 case PLUS:
2967 case MINUS:
2968 *total = COSTS_N_INSNS (addsubcosts (x));
2969 return true;
2970
2971 case AND:
2972 *total = COSTS_N_INSNS (andcosts (x));
2973 return true;
2974
2975 case MULT:
2976 *total = COSTS_N_INSNS (multcosts (x));
2977 return true;
2978
2979 case ASHIFT:
2980 case ASHIFTRT:
2981 case LSHIFTRT:
2982 *total = COSTS_N_INSNS (shiftcosts (x));
2983 return true;
2984
2985 case DIV:
2986 case UDIV:
2987 case MOD:
2988 case UMOD:
2989 *total = COSTS_N_INSNS (20);
2990 return true;
2991
2992 case PARALLEL:
2993 if (sh_1el_vec (x, VOIDmode))
2994 *total = outer_code != SET;
2995 if (sh_rep_vec (x, VOIDmode))
2996 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2997 + (outer_code != SET));
2998 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2999 return true;
3000
3001 case FLOAT:
3002 case FIX:
3003 *total = 100;
3004 return true;
3005
3006 default:
3007 return false;
3008 }
3009 }
3010
3011 /* Compute the cost of an address. For the SH, all valid addresses are
3012 the same cost. Use a slightly higher cost for reg + reg addressing,
3013 since it increases pressure on r0. */
3014
3015 static int
3016 sh_address_cost (rtx X,
3017 bool speed ATTRIBUTE_UNUSED)
3018 {
3019 return (GET_CODE (X) == PLUS
3020 && ! CONSTANT_P (XEXP (X, 1))
3021 && ! TARGET_SHMEDIA ? 1 : 0);
3022 }
3023
3024 /* Code to expand a shift. */
3025
3026 void
3027 gen_ashift (int type, int n, rtx reg)
3028 {
3029 /* Negative values here come from the shift_amounts array. */
3030 if (n < 0)
3031 {
3032 if (type == ASHIFT)
3033 type = LSHIFTRT;
3034 else
3035 type = ASHIFT;
3036 n = -n;
3037 }
3038
3039 switch (type)
3040 {
3041 case ASHIFTRT:
3042 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
3043 break;
3044 case LSHIFTRT:
3045 if (n == 1)
3046 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
3047 else
3048 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
3049 break;
3050 case ASHIFT:
3051 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
3052 break;
3053 }
3054 }
3055
3056 /* Same for HImode */
3057
3058 void
3059 gen_ashift_hi (int type, int n, rtx reg)
3060 {
3061 /* Negative values here come from the shift_amounts array. */
3062 if (n < 0)
3063 {
3064 if (type == ASHIFT)
3065 type = LSHIFTRT;
3066 else
3067 type = ASHIFT;
3068 n = -n;
3069 }
3070
3071 switch (type)
3072 {
3073 case ASHIFTRT:
3074 case LSHIFTRT:
3075 /* We don't have HImode right shift operations because using the
3076 ordinary 32 bit shift instructions for that doesn't generate proper
3077 zero/sign extension.
3078 gen_ashift_hi is only called in contexts where we know that the
3079 sign extension works out correctly. */
3080 {
3081 int offset = 0;
3082 if (GET_CODE (reg) == SUBREG)
3083 {
3084 offset = SUBREG_BYTE (reg);
3085 reg = SUBREG_REG (reg);
3086 }
3087 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3088 break;
3089 }
3090 case ASHIFT:
3091 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3092 break;
3093 }
3094 }
3095
3096 /* Output RTL to split a constant shift into its component SH constant
3097 shift instructions. */
3098
3099 void
3100 gen_shifty_op (int code, rtx *operands)
3101 {
3102 int value = INTVAL (operands[2]);
3103 int max, i;
3104
3105 /* Truncate the shift count in case it is out of bounds. */
3106 value = value & 31;
3107
3108 if (value == 31)
3109 {
3110 if (code == LSHIFTRT)
3111 {
3112 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3113 emit_insn (gen_movt (operands[0]));
3114 return;
3115 }
3116 else if (code == ASHIFT)
3117 {
3118 /* There is a two instruction sequence for 31 bit left shifts,
3119 but it requires r0. */
3120 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3121 {
3122 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3123 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3124 return;
3125 }
3126 }
3127 }
3128 else if (value == 0)
3129 {
3130 /* This can happen even when optimizing, if there were subregs before
3131 reload. Don't output a nop here, as this is never optimized away;
3132 use a no-op move instead. */
3133 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
3134 return;
3135 }
3136
3137 max = shift_insns[value];
3138 for (i = 0; i < max; i++)
3139 gen_ashift (code, shift_amounts[value][i], operands[0]);
3140 }
3141
3142 /* Same as above, but optimized for values where the topmost bits don't
3143 matter. */
3144
3145 void
3146 gen_shifty_hi_op (int code, rtx *operands)
3147 {
3148 int value = INTVAL (operands[2]);
3149 int max, i;
3150 void (*gen_fun) (int, int, rtx);
3151
3152 /* This operation is used by and_shl for SImode values with a few
3153 high bits known to be cleared. */
3154 value &= 31;
3155 if (value == 0)
3156 {
3157 emit_insn (gen_nop ());
3158 return;
3159 }
3160
3161 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3162 if (code == ASHIFT)
3163 {
3164 max = ext_shift_insns[value];
3165 for (i = 0; i < max; i++)
3166 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3167 }
3168 else
3169 /* When shifting right, emit the shifts in reverse order, so that
3170 solitary negative values come first. */
3171 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
3172 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3173 }
3174
3175 /* Output RTL for an arithmetic right shift. */
3176
3177 /* ??? Rewrite to use super-optimizer sequences. */
3178
3179 int
3180 expand_ashiftrt (rtx *operands)
3181 {
3182 rtx wrk;
3183 char func[18];
3184 int value;
3185
3186 if (TARGET_SH3)
3187 {
3188 if (!CONST_INT_P (operands[2]))
3189 {
3190 rtx count = copy_to_mode_reg (SImode, operands[2]);
3191 emit_insn (gen_negsi2 (count, count));
3192 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3193 return 1;
3194 }
3195 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3196 > 1 + SH_DYNAMIC_SHIFT_COST)
3197 {
3198 rtx count
3199 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3200 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3201 return 1;
3202 }
3203 }
3204 if (!CONST_INT_P (operands[2]))
3205 return 0;
3206
3207 value = INTVAL (operands[2]) & 31;
3208
3209 if (value == 31)
3210 {
3211 /* If we are called from abs expansion, arrange things so that we
3212 we can use a single MT instruction that doesn't clobber the source,
3213 if LICM can hoist out the load of the constant zero. */
3214 if (currently_expanding_to_rtl)
3215 {
3216 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3217 operands[1]));
3218 emit_insn (gen_mov_neg_si_t (operands[0]));
3219 return 1;
3220 }
3221 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3222 return 1;
3223 }
3224 else if (value >= 16 && value <= 19)
3225 {
3226 wrk = gen_reg_rtx (SImode);
3227 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3228 value -= 16;
3229 while (value--)
3230 gen_ashift (ASHIFTRT, 1, wrk);
3231 emit_move_insn (operands[0], wrk);
3232 return 1;
3233 }
3234 /* Expand a short sequence inline, longer call a magic routine. */
3235 else if (value <= 5)
3236 {
3237 wrk = gen_reg_rtx (SImode);
3238 emit_move_insn (wrk, operands[1]);
3239 while (value--)
3240 gen_ashift (ASHIFTRT, 1, wrk);
3241 emit_move_insn (operands[0], wrk);
3242 return 1;
3243 }
3244
3245 wrk = gen_reg_rtx (Pmode);
3246
3247 /* Load the value into an arg reg and call a helper. */
3248 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3249 sprintf (func, "__ashiftrt_r4_%d", value);
3250 function_symbol (wrk, func, SFUNC_STATIC);
3251 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
3252 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3253 return 1;
3254 }
3255
3256 int
3257 sh_dynamicalize_shift_p (rtx count)
3258 {
3259 return shift_insns[INTVAL (count) & 31] > 1 + SH_DYNAMIC_SHIFT_COST;
3260 }
3261
3262 /* Try to find a good way to implement the combiner pattern
3263 [(set (match_operand:SI 0 "register_operand" "r")
3264 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3265 (match_operand:SI 2 "const_int_operand" "n"))
3266 (match_operand:SI 3 "const_int_operand" "n"))) .
3267 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3268 return 0 for simple right / left or left/right shift combination.
3269 return 1 for a combination of shifts with zero_extend.
3270 return 2 for a combination of shifts with an AND that needs r0.
3271 return 3 for a combination of shifts with an AND that needs an extra
3272 scratch register, when the three highmost bits of the AND mask are clear.
3273 return 4 for a combination of shifts with an AND that needs an extra
3274 scratch register, when any of the three highmost bits of the AND mask
3275 is set.
3276 If ATTRP is set, store an initial right shift width in ATTRP[0],
3277 and the instruction length in ATTRP[1] . These values are not valid
3278 when returning 0.
3279 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3280 shift_amounts for the last shift value that is to be used before the
3281 sign extend. */
3282 int
3283 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3284 {
3285 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3286 int left = INTVAL (left_rtx), right;
3287 int best = 0;
3288 int cost, best_cost = 10000;
3289 int best_right = 0, best_len = 0;
3290 int i;
3291 int can_ext;
3292
3293 if (left < 0 || left > 31)
3294 return 0;
3295 if (CONST_INT_P (mask_rtx))
3296 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3297 else
3298 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3299 /* Can this be expressed as a right shift / left shift pair? */
3300 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3301 right = exact_log2 (lsb);
3302 mask2 = ~(mask + lsb - 1);
3303 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3304 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3305 if (! mask2)
3306 best_cost = shift_insns[right] + shift_insns[right + left];
3307 /* mask has no trailing zeroes <==> ! right */
3308 else if (! right && mask2 == ~(lsb2 - 1))
3309 {
3310 int late_right = exact_log2 (lsb2);
3311 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
3312 }
3313 /* Try to use zero extend. */
3314 if (mask2 == ~(lsb2 - 1))
3315 {
3316 int width, first;
3317
3318 for (width = 8; width <= 16; width += 8)
3319 {
3320 /* Can we zero-extend right away? */
3321 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3322 {
3323 cost
3324 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
3325 if (cost < best_cost)
3326 {
3327 best = 1;
3328 best_cost = cost;
3329 best_right = right;
3330 best_len = cost;
3331 if (attrp)
3332 attrp[2] = -1;
3333 }
3334 continue;
3335 }
3336 /* ??? Could try to put zero extend into initial right shift,
3337 or even shift a bit left before the right shift. */
3338 /* Determine value of first part of left shift, to get to the
3339 zero extend cut-off point. */
3340 first = width - exact_log2 (lsb2) + right;
3341 if (first >= 0 && right + left - first >= 0)
3342 {
3343 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
3344 + ext_shift_insns[right + left - first];
3345 if (cost < best_cost)
3346 {
3347 best = 1;
3348 best_cost = cost;
3349 best_right = right;
3350 best_len = cost;
3351 if (attrp)
3352 attrp[2] = first;
3353 }
3354 }
3355 }
3356 }
3357 /* Try to use r0 AND pattern */
3358 for (i = 0; i <= 2; i++)
3359 {
3360 if (i > right)
3361 break;
3362 if (! CONST_OK_FOR_K08 (mask >> i))
3363 continue;
3364 cost = (i != 0) + 2 + ext_shift_insns[left + i];
3365 if (cost < best_cost)
3366 {
3367 best = 2;
3368 best_cost = cost;
3369 best_right = i;
3370 best_len = cost - 1;
3371 }
3372 }
3373 /* Try to use a scratch register to hold the AND operand. */
3374 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
3375 for (i = 0; i <= 2; i++)
3376 {
3377 if (i > right)
3378 break;
3379 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
3380 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
3381 if (cost < best_cost)
3382 {
3383 best = 4 - can_ext;
3384 best_cost = cost;
3385 best_right = i;
3386 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
3387 }
3388 }
3389
3390 if (attrp)
3391 {
3392 attrp[0] = best_right;
3393 attrp[1] = best_len;
3394 }
3395 return best;
3396 }
3397
3398 /* This is used in length attributes of the unnamed instructions
3399 corresponding to shl_and_kind return values of 1 and 2. */
3400 int
3401 shl_and_length (rtx insn)
3402 {
3403 rtx set_src, left_rtx, mask_rtx;
3404 int attributes[3];
3405
3406 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3407 left_rtx = XEXP (XEXP (set_src, 0), 1);
3408 mask_rtx = XEXP (set_src, 1);
3409 shl_and_kind (left_rtx, mask_rtx, attributes);
3410 return attributes[1];
3411 }
3412
3413 /* This is used in length attribute of the and_shl_scratch instruction. */
3414
3415 int
3416 shl_and_scr_length (rtx insn)
3417 {
3418 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3419 int len = shift_insns[INTVAL (XEXP (set_src, 1)) & 31];
3420 rtx op = XEXP (set_src, 0);
3421 len += shift_insns[INTVAL (XEXP (op, 1)) & 31] + 1;
3422 op = XEXP (XEXP (op, 0), 0);
3423 return len + shift_insns[INTVAL (XEXP (op, 1)) & 31];
3424 }
3425
3426 /* Generate rtl for instructions for which shl_and_kind advised a particular
3427 method of generating them, i.e. returned zero. */
3428
3429 int
3430 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
3431 {
3432 int attributes[3];
3433 unsigned HOST_WIDE_INT mask;
3434 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
3435 int right, total_shift;
3436 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
3437
3438 right = attributes[0];
3439 total_shift = INTVAL (left_rtx) + right;
3440 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
3441 switch (kind)
3442 {
3443 default:
3444 return -1;
3445 case 1:
3446 {
3447 int first = attributes[2];
3448 rtx operands[3];
3449
3450 if (first < 0)
3451 {
3452 emit_insn ((mask << right) <= 0xff
3453 ? gen_zero_extendqisi2 (dest,
3454 gen_lowpart (QImode, source))
3455 : gen_zero_extendhisi2 (dest,
3456 gen_lowpart (HImode, source)));
3457 source = dest;
3458 }
3459 if (source != dest)
3460 emit_insn (gen_movsi (dest, source));
3461 operands[0] = dest;
3462 if (right)
3463 {
3464 operands[2] = GEN_INT (right);
3465 gen_shifty_hi_op (LSHIFTRT, operands);
3466 }
3467 if (first > 0)
3468 {
3469 operands[2] = GEN_INT (first);
3470 gen_shifty_hi_op (ASHIFT, operands);
3471 total_shift -= first;
3472 mask <<= first;
3473 }
3474 if (first >= 0)
3475 emit_insn (mask <= 0xff
3476 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
3477 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3478 if (total_shift > 0)
3479 {
3480 operands[2] = GEN_INT (total_shift);
3481 gen_shifty_hi_op (ASHIFT, operands);
3482 }
3483 break;
3484 }
3485 case 4:
3486 shift_gen_fun = gen_shifty_op;
3487 case 3:
3488 /* If the topmost bit that matters is set, set the topmost bits
3489 that don't matter. This way, we might be able to get a shorter
3490 signed constant. */
3491 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
3492 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
3493 case 2:
3494 /* Don't expand fine-grained when combining, because that will
3495 make the pattern fail. */
3496 if (currently_expanding_to_rtl
3497 || reload_in_progress || reload_completed)
3498 {
3499 rtx operands[3];
3500
3501 /* Cases 3 and 4 should be handled by this split
3502 only while combining */
3503 gcc_assert (kind <= 2);
3504 if (right)
3505 {
3506 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
3507 source = dest;
3508 }
3509 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3510 if (total_shift)
3511 {
3512 operands[0] = dest;
3513 operands[1] = dest;
3514 operands[2] = GEN_INT (total_shift);
3515 shift_gen_fun (ASHIFT, operands);
3516 }
3517 break;
3518 }
3519 else
3520 {
3521 int neg = 0;
3522 if (kind != 4 && total_shift < 16)
3523 {
3524 neg = -ext_shift_amounts[total_shift][1];
3525 if (neg > 0)
3526 neg -= ext_shift_amounts[total_shift][2];
3527 else
3528 neg = 0;
3529 }
3530 emit_insn (gen_and_shl_scratch (dest, source,
3531 GEN_INT (right),
3532 GEN_INT (mask),
3533 GEN_INT (total_shift + neg),
3534 GEN_INT (neg)));
3535 emit_insn (gen_movsi (dest, dest));
3536 break;
3537 }
3538 }
3539 return 0;
3540 }
3541
3542 /* Try to find a good way to implement the combiner pattern
3543 [(set (match_operand:SI 0 "register_operand" "=r")
3544 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3545 (match_operand:SI 2 "const_int_operand" "n")
3546 (match_operand:SI 3 "const_int_operand" "n")
3547 (const_int 0)))
3548 (clobber (reg:SI T_REG))]
3549 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3550 return 0 for simple left / right shift combination.
3551 return 1 for left shift / 8 bit sign extend / left shift.
3552 return 2 for left shift / 16 bit sign extend / left shift.
3553 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3554 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3555 return 5 for left shift / 16 bit sign extend / right shift
3556 return 6 for < 8 bit sign extend / left shift.
3557 return 7 for < 8 bit sign extend / left shift / single right shift.
3558 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3559
3560 int
3561 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3562 {
3563 int left, size, insize, ext;
3564 int cost = 0, best_cost;
3565 int kind;
3566
3567 left = INTVAL (left_rtx);
3568 size = INTVAL (size_rtx);
3569 insize = size - left;
3570 gcc_assert (insize > 0);
3571 /* Default to left / right shift. */
3572 kind = 0;
3573 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3574 if (size <= 16)
3575 {
3576 /* 16 bit shift / sign extend / 16 bit shift */
3577 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3578 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3579 below, by alternative 3 or something even better. */
3580 if (cost < best_cost)
3581 {
3582 kind = 5;
3583 best_cost = cost;
3584 }
3585 }
3586 /* Try a plain sign extend between two shifts. */
3587 for (ext = 16; ext >= insize; ext -= 8)
3588 {
3589 if (ext <= size)
3590 {
3591 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3592 if (cost < best_cost)
3593 {
3594 kind = ext / (unsigned) 8;
3595 best_cost = cost;
3596 }
3597 }
3598 /* Check if we can do a sloppy shift with a final signed shift
3599 restoring the sign. */
3600 if (EXT_SHIFT_SIGNED (size - ext))
3601 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3602 /* If not, maybe it's still cheaper to do the second shift sloppy,
3603 and do a final sign extend? */
3604 else if (size <= 16)
3605 cost = ext_shift_insns[ext - insize] + 1
3606 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3607 else
3608 continue;
3609 if (cost < best_cost)
3610 {
3611 kind = ext / (unsigned) 8 + 2;
3612 best_cost = cost;
3613 }
3614 }
3615 /* Check if we can sign extend in r0 */
3616 if (insize < 8)
3617 {
3618 cost = 3 + shift_insns[left];
3619 if (cost < best_cost)
3620 {
3621 kind = 6;
3622 best_cost = cost;
3623 }
3624 /* Try the same with a final signed shift. */
3625 if (left < 31)
3626 {
3627 cost = 3 + ext_shift_insns[left + 1] + 1;
3628 if (cost < best_cost)
3629 {
3630 kind = 7;
3631 best_cost = cost;
3632 }
3633 }
3634 }
3635 if (TARGET_SH3)
3636 {
3637 /* Try to use a dynamic shift. */
3638 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3639 if (cost < best_cost)
3640 {
3641 kind = 0;
3642 best_cost = cost;
3643 }
3644 }
3645 if (costp)
3646 *costp = cost;
3647 return kind;
3648 }
3649
3650 /* Function to be used in the length attribute of the instructions
3651 implementing this pattern. */
3652
3653 int
3654 shl_sext_length (rtx insn)
3655 {
3656 rtx set_src, left_rtx, size_rtx;
3657 int cost;
3658
3659 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3660 left_rtx = XEXP (XEXP (set_src, 0), 1);
3661 size_rtx = XEXP (set_src, 1);
3662 shl_sext_kind (left_rtx, size_rtx, &cost);
3663 return cost;
3664 }
3665
3666 /* Generate rtl for this pattern */
3667
3668 int
3669 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3670 {
3671 int kind;
3672 int left, size, insize, cost;
3673 rtx operands[3];
3674
3675 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3676 left = INTVAL (left_rtx);
3677 size = INTVAL (size_rtx);
3678 insize = size - left;
3679 switch (kind)
3680 {
3681 case 1:
3682 case 2:
3683 case 3:
3684 case 4:
3685 {
3686 int ext = kind & 1 ? 8 : 16;
3687 int shift2 = size - ext;
3688
3689 /* Don't expand fine-grained when combining, because that will
3690 make the pattern fail. */
3691 if (! currently_expanding_to_rtl
3692 && ! reload_in_progress && ! reload_completed)
3693 {
3694 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3695 emit_insn (gen_movsi (dest, source));
3696 break;
3697 }
3698 if (dest != source)
3699 emit_insn (gen_movsi (dest, source));
3700 operands[0] = dest;
3701 if (ext - insize)
3702 {
3703 operands[2] = GEN_INT (ext - insize);
3704 gen_shifty_hi_op (ASHIFT, operands);
3705 }
3706 emit_insn (kind & 1
3707 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3708 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3709 if (kind <= 2)
3710 {
3711 if (shift2)
3712 {
3713 operands[2] = GEN_INT (shift2);
3714 gen_shifty_op (ASHIFT, operands);
3715 }
3716 }
3717 else
3718 {
3719 if (shift2 > 0)
3720 {
3721 if (EXT_SHIFT_SIGNED (shift2))
3722 {
3723 operands[2] = GEN_INT (shift2 + 1);
3724 gen_shifty_op (ASHIFT, operands);
3725 operands[2] = const1_rtx;
3726 gen_shifty_op (ASHIFTRT, operands);
3727 break;
3728 }
3729 operands[2] = GEN_INT (shift2);
3730 gen_shifty_hi_op (ASHIFT, operands);
3731 }
3732 else if (shift2)
3733 {
3734 operands[2] = GEN_INT (-shift2);
3735 gen_shifty_hi_op (LSHIFTRT, operands);
3736 }
3737 emit_insn (size <= 8
3738 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3739 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3740 }
3741 break;
3742 }
3743 case 5:
3744 {
3745 int i = 16 - size;
3746 if (! currently_expanding_to_rtl
3747 && ! reload_in_progress && ! reload_completed)
3748 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3749 else
3750 {
3751 operands[0] = dest;
3752 operands[2] = GEN_INT (16 - insize);
3753 gen_shifty_hi_op (ASHIFT, operands);
3754 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3755 }
3756 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3757 while (--i >= 0)
3758 gen_ashift (ASHIFTRT, 1, dest);
3759 break;
3760 }
3761 case 6:
3762 case 7:
3763 /* Don't expand fine-grained when combining, because that will
3764 make the pattern fail. */
3765 if (! currently_expanding_to_rtl
3766 && ! reload_in_progress && ! reload_completed)
3767 {
3768 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3769 emit_insn (gen_movsi (dest, source));
3770 break;
3771 }
3772 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
3773 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
3774 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
3775 operands[0] = dest;
3776 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
3777 gen_shifty_op (ASHIFT, operands);
3778 if (kind == 7)
3779 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
3780 break;
3781 default:
3782 return -1;
3783 }
3784 return 0;
3785 }
3786
3787 /* Prefix a symbol_ref name with "datalabel". */
3788
3789 rtx
3790 gen_datalabel_ref (rtx sym)
3791 {
3792 const char *str;
3793
3794 if (GET_CODE (sym) == LABEL_REF)
3795 return gen_rtx_CONST (GET_MODE (sym),
3796 gen_rtx_UNSPEC (GET_MODE (sym),
3797 gen_rtvec (1, sym),
3798 UNSPEC_DATALABEL));
3799
3800 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
3801
3802 str = XSTR (sym, 0);
3803 /* Share all SYMBOL_REF strings with the same value - that is important
3804 for cse. */
3805 str = IDENTIFIER_POINTER (get_identifier (str));
3806 XSTR (sym, 0) = str;
3807
3808 return sym;
3809 }
3810
3811 \f
3812 static alloc_pool label_ref_list_pool;
3813
3814 typedef struct label_ref_list_d
3815 {
3816 rtx label;
3817 struct label_ref_list_d *next;
3818 } *label_ref_list_t;
3819
3820 /* The SH cannot load a large constant into a register, constants have to
3821 come from a pc relative load. The reference of a pc relative load
3822 instruction must be less than 1k in front of the instruction. This
3823 means that we often have to dump a constant inside a function, and
3824 generate code to branch around it.
3825
3826 It is important to minimize this, since the branches will slow things
3827 down and make things bigger.
3828
3829 Worst case code looks like:
3830
3831 mov.l L1,rn
3832 bra L2
3833 nop
3834 align
3835 L1: .long value
3836 L2:
3837 ..
3838
3839 mov.l L3,rn
3840 bra L4
3841 nop
3842 align
3843 L3: .long value
3844 L4:
3845 ..
3846
3847 We fix this by performing a scan before scheduling, which notices which
3848 instructions need to have their operands fetched from the constant table
3849 and builds the table.
3850
3851 The algorithm is:
3852
3853 scan, find an instruction which needs a pcrel move. Look forward, find the
3854 last barrier which is within MAX_COUNT bytes of the requirement.
3855 If there isn't one, make one. Process all the instructions between
3856 the find and the barrier.
3857
3858 In the above example, we can tell that L3 is within 1k of L1, so
3859 the first move can be shrunk from the 3 insn+constant sequence into
3860 just 1 insn, and the constant moved to L3 to make:
3861
3862 mov.l L1,rn
3863 ..
3864 mov.l L3,rn
3865 bra L4
3866 nop
3867 align
3868 L3:.long value
3869 L4:.long value
3870
3871 Then the second move becomes the target for the shortening process. */
3872
3873 typedef struct
3874 {
3875 rtx value; /* Value in table. */
3876 rtx label; /* Label of value. */
3877 label_ref_list_t wend; /* End of window. */
3878 enum machine_mode mode; /* Mode of value. */
3879
3880 /* True if this constant is accessed as part of a post-increment
3881 sequence. Note that HImode constants are never accessed in this way. */
3882 bool part_of_sequence_p;
3883 } pool_node;
3884
3885 /* The maximum number of constants that can fit into one pool, since
3886 constants in the range 0..510 are at least 2 bytes long, and in the
3887 range from there to 1018 at least 4 bytes. */
3888
3889 #define MAX_POOL_SIZE 372
3890 static pool_node pool_vector[MAX_POOL_SIZE];
3891 static int pool_size;
3892 static rtx pool_window_label;
3893 static int pool_window_last;
3894
3895 static int max_labelno_before_reorg;
3896
3897 /* ??? If we need a constant in HImode which is the truncated value of a
3898 constant we need in SImode, we could combine the two entries thus saving
3899 two bytes. Is this common enough to be worth the effort of implementing
3900 it? */
3901
3902 /* ??? This stuff should be done at the same time that we shorten branches.
3903 As it is now, we must assume that all branches are the maximum size, and
3904 this causes us to almost always output constant pools sooner than
3905 necessary. */
3906
3907 /* Add a constant to the pool and return its label. */
3908
3909 static rtx
3910 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3911 {
3912 int i;
3913 rtx lab, new_rtx;
3914 label_ref_list_t ref, newref;
3915
3916 /* First see if we've already got it. */
3917 for (i = 0; i < pool_size; i++)
3918 {
3919 if (x->code == pool_vector[i].value->code
3920 && mode == pool_vector[i].mode)
3921 {
3922 if (x->code == CODE_LABEL)
3923 {
3924 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3925 continue;
3926 }
3927 if (rtx_equal_p (x, pool_vector[i].value))
3928 {
3929 lab = new_rtx = 0;
3930 if (! last_value
3931 || ! i
3932 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3933 {
3934 new_rtx = gen_label_rtx ();
3935 LABEL_REFS (new_rtx) = pool_vector[i].label;
3936 pool_vector[i].label = lab = new_rtx;
3937 }
3938 if (lab && pool_window_label)
3939 {
3940 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3941 newref->label = pool_window_label;
3942 ref = pool_vector[pool_window_last].wend;
3943 newref->next = ref;
3944 pool_vector[pool_window_last].wend = newref;
3945 }
3946 if (new_rtx)
3947 pool_window_label = new_rtx;
3948 pool_window_last = i;
3949 return lab;
3950 }
3951 }
3952 }
3953
3954 /* Need a new one. */
3955 pool_vector[pool_size].value = x;
3956 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
3957 {
3958 lab = 0;
3959 pool_vector[pool_size - 1].part_of_sequence_p = true;
3960 }
3961 else
3962 lab = gen_label_rtx ();
3963 pool_vector[pool_size].mode = mode;
3964 pool_vector[pool_size].label = lab;
3965 pool_vector[pool_size].wend = NULL;
3966 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
3967 if (lab && pool_window_label)
3968 {
3969 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3970 newref->label = pool_window_label;
3971 ref = pool_vector[pool_window_last].wend;
3972 newref->next = ref;
3973 pool_vector[pool_window_last].wend = newref;
3974 }
3975 if (lab)
3976 pool_window_label = lab;
3977 pool_window_last = pool_size;
3978 pool_size++;
3979 return lab;
3980 }
3981
3982 /* Output the literal table. START, if nonzero, is the first instruction
3983 this table is needed for, and also indicates that there is at least one
3984 casesi_worker_2 instruction; We have to emit the operand3 labels from
3985 these insns at a 4-byte aligned position. BARRIER is the barrier
3986 after which we are to place the table. */
3987
3988 static void
3989 dump_table (rtx start, rtx barrier)
3990 {
3991 rtx scan = barrier;
3992 int i;
3993 int need_align = 1;
3994 rtx lab;
3995 label_ref_list_t ref;
3996 int have_df = 0;
3997
3998 /* Do two passes, first time dump out the HI sized constants. */
3999
4000 for (i = 0; i < pool_size; i++)
4001 {
4002 pool_node *p = &pool_vector[i];
4003
4004 if (p->mode == HImode)
4005 {
4006 if (need_align)
4007 {
4008 scan = emit_insn_after (gen_align_2 (), scan);
4009 need_align = 0;
4010 }
4011 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4012 scan = emit_label_after (lab, scan);
4013 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4014 scan);
4015 for (ref = p->wend; ref; ref = ref->next)
4016 {
4017 lab = ref->label;
4018 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4019 }
4020 }
4021 else if (p->mode == DFmode)
4022 have_df = 1;
4023 }
4024
4025 need_align = 1;
4026
4027 if (start)
4028 {
4029 scan = emit_insn_after (gen_align_4 (), scan);
4030 need_align = 0;
4031 for (; start != barrier; start = NEXT_INSN (start))
4032 if (NONJUMP_INSN_P (start)
4033 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4034 {
4035 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4036 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4037
4038 scan = emit_label_after (lab, scan);
4039 }
4040 }
4041 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4042 {
4043 rtx align_insn = NULL_RTX;
4044
4045 scan = emit_label_after (gen_label_rtx (), scan);
4046 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4047 need_align = 0;
4048
4049 for (i = 0; i < pool_size; i++)
4050 {
4051 pool_node *p = &pool_vector[i];
4052
4053 switch (p->mode)
4054 {
4055 case HImode:
4056 break;
4057 case SImode:
4058 case SFmode:
4059 if (align_insn && !p->part_of_sequence_p)
4060 {
4061 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4062 emit_label_before (lab, align_insn);
4063 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4064 align_insn);
4065 for (ref = p->wend; ref; ref = ref->next)
4066 {
4067 lab = ref->label;
4068 emit_insn_before (gen_consttable_window_end (lab),
4069 align_insn);
4070 }
4071 delete_insn (align_insn);
4072 align_insn = NULL_RTX;
4073 continue;
4074 }
4075 else
4076 {
4077 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4078 scan = emit_label_after (lab, scan);
4079 scan = emit_insn_after (gen_consttable_4 (p->value,
4080 const0_rtx), scan);
4081 need_align = ! need_align;
4082 }
4083 break;
4084 case DFmode:
4085 if (need_align)
4086 {
4087 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4088 align_insn = scan;
4089 need_align = 0;
4090 }
4091 case DImode:
4092 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4093 scan = emit_label_after (lab, scan);
4094 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4095 scan);
4096 break;
4097 default:
4098 gcc_unreachable ();
4099 }
4100
4101 if (p->mode != HImode)
4102 {
4103 for (ref = p->wend; ref; ref = ref->next)
4104 {
4105 lab = ref->label;
4106 scan = emit_insn_after (gen_consttable_window_end (lab),
4107 scan);
4108 }
4109 }
4110 }
4111
4112 pool_size = 0;
4113 }
4114
4115 for (i = 0; i < pool_size; i++)
4116 {
4117 pool_node *p = &pool_vector[i];
4118
4119 switch (p->mode)
4120 {
4121 case HImode:
4122 break;
4123 case SImode:
4124 case SFmode:
4125 if (need_align)
4126 {
4127 need_align = 0;
4128 scan = emit_label_after (gen_label_rtx (), scan);
4129 scan = emit_insn_after (gen_align_4 (), scan);
4130 }
4131 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4132 scan = emit_label_after (lab, scan);
4133 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4134 scan);
4135 break;
4136 case DFmode:
4137 case DImode:
4138 if (need_align)
4139 {
4140 need_align = 0;
4141 scan = emit_label_after (gen_label_rtx (), scan);
4142 scan = emit_insn_after (gen_align_4 (), scan);
4143 }
4144 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4145 scan = emit_label_after (lab, scan);
4146 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4147 scan);
4148 break;
4149 default:
4150 gcc_unreachable ();
4151 }
4152
4153 if (p->mode != HImode)
4154 {
4155 for (ref = p->wend; ref; ref = ref->next)
4156 {
4157 lab = ref->label;
4158 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4159 }
4160 }
4161 }
4162
4163 scan = emit_insn_after (gen_consttable_end (), scan);
4164 scan = emit_barrier_after (scan);
4165 pool_size = 0;
4166 pool_window_label = NULL_RTX;
4167 pool_window_last = 0;
4168 }
4169
4170 /* Return nonzero if constant would be an ok source for a
4171 mov.w instead of a mov.l. */
4172
4173 static int
4174 hi_const (rtx src)
4175 {
4176 return (CONST_INT_P (src)
4177 && INTVAL (src) >= -32768
4178 && INTVAL (src) <= 32767);
4179 }
4180
4181 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4182
4183 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4184
4185 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4186 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4187 need to fix it if the input value is CONST_OK_FOR_I08. */
4188
4189 static int
4190 broken_move (rtx insn)
4191 {
4192 if (NONJUMP_INSN_P (insn))
4193 {
4194 rtx pat = PATTERN (insn);
4195 if (GET_CODE (pat) == PARALLEL)
4196 pat = XVECEXP (pat, 0, 0);
4197 if (GET_CODE (pat) == SET
4198 /* We can load any 8-bit value if we don't care what the high
4199 order bits end up as. */
4200 && GET_MODE (SET_DEST (pat)) != QImode
4201 && (CONSTANT_P (SET_SRC (pat))
4202 /* Match mova_const. */
4203 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4204 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4205 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4206 && ! (TARGET_SH2E
4207 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4208 && (fp_zero_operand (SET_SRC (pat))
4209 || fp_one_operand (SET_SRC (pat)))
4210 /* In general we don't know the current setting of fpscr, so disable fldi.
4211 There is an exception if this was a register-register move
4212 before reload - and hence it was ascertained that we have
4213 single precision setting - and in a post-reload optimization
4214 we changed this to do a constant load. In that case
4215 we don't have an r0 clobber, hence we must use fldi. */
4216 && (TARGET_FMOVD
4217 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4218 == SCRATCH))
4219 && REG_P (SET_DEST (pat))
4220 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4221 && ! (TARGET_SH2A
4222 && GET_MODE (SET_DEST (pat)) == SImode
4223 && (satisfies_constraint_I20 (SET_SRC (pat))
4224 || satisfies_constraint_I28 (SET_SRC (pat))))
4225 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4226 return 1;
4227 }
4228
4229 return 0;
4230 }
4231
4232 static int
4233 mova_p (rtx insn)
4234 {
4235 return (NONJUMP_INSN_P (insn)
4236 && GET_CODE (PATTERN (insn)) == SET
4237 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4238 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4239 /* Don't match mova_const. */
4240 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4241 }
4242
4243 /* Fix up a mova from a switch that went out of range. */
4244 static void
4245 fixup_mova (rtx mova)
4246 {
4247 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4248 if (! flag_pic)
4249 {
4250 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4251 INSN_CODE (mova) = -1;
4252 }
4253 else
4254 {
4255 rtx worker = mova;
4256 rtx lab = gen_label_rtx ();
4257 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4258
4259 do
4260 {
4261 worker = NEXT_INSN (worker);
4262 gcc_assert (worker
4263 && !LABEL_P (worker)
4264 && !JUMP_P (worker));
4265 } while (NOTE_P (worker)
4266 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4267 wpat = PATTERN (worker);
4268 wpat0 = XVECEXP (wpat, 0, 0);
4269 wpat1 = XVECEXP (wpat, 0, 1);
4270 wsrc = SET_SRC (wpat0);
4271 PATTERN (worker) = (gen_casesi_worker_2
4272 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4273 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4274 XEXP (wpat1, 0)));
4275 INSN_CODE (worker) = -1;
4276 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4277 base = gen_rtx_LABEL_REF (Pmode, lab);
4278 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4279 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4280 INSN_CODE (mova) = -1;
4281 }
4282 }
4283
4284 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4285 *num_mova, and check if the new mova is not nested within the first one.
4286 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4287 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4288 static int
4289 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
4290 {
4291 int n_addr = 0; /* Initialization to shut up spurious warning. */
4292 int f_target, n_target = 0; /* Likewise. */
4293
4294 if (optimize)
4295 {
4296 /* If NEW_MOVA has no address yet, it will be handled later. */
4297 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4298 return -1;
4299
4300 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4301 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4302 if (n_addr > n_target || n_addr + 1022 < n_target)
4303 {
4304 /* Change the mova into a load.
4305 broken_move will then return true for it. */
4306 fixup_mova (new_mova);
4307 return 1;
4308 }
4309 }
4310 if (!(*num_mova)++)
4311 {
4312 *first_mova = new_mova;
4313 return 2;
4314 }
4315 if (!optimize
4316 || ((f_target
4317 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4318 >= n_target))
4319 return -1;
4320
4321 (*num_mova)--;
4322 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4323 > n_target - n_addr)
4324 {
4325 fixup_mova (*first_mova);
4326 return 0;
4327 }
4328 else
4329 {
4330 fixup_mova (new_mova);
4331 return 1;
4332 }
4333 }
4334
4335 /* Find the last barrier from insn FROM which is close enough to hold the
4336 constant pool. If we can't find one, then create one near the end of
4337 the range. */
4338
4339 static rtx
4340 find_barrier (int num_mova, rtx mova, rtx from)
4341 {
4342 int count_si = 0;
4343 int count_hi = 0;
4344 int found_hi = 0;
4345 int found_si = 0;
4346 int found_di = 0;
4347 int hi_align = 2;
4348 int si_align = 2;
4349 int leading_mova = num_mova;
4350 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
4351 int si_limit;
4352 int hi_limit;
4353 rtx orig = from;
4354
4355 /* For HImode: range is 510, add 4 because pc counts from address of
4356 second instruction after this one, subtract 2 for the jump instruction
4357 that we may need to emit before the table, subtract 2 for the instruction
4358 that fills the jump delay slot (in very rare cases, reorg will take an
4359 instruction from after the constant pool or will leave the delay slot
4360 empty). This gives 510.
4361 For SImode: range is 1020, add 4 because pc counts from address of
4362 second instruction after this one, subtract 2 in case pc is 2 byte
4363 aligned, subtract 2 for the jump instruction that we may need to emit
4364 before the table, subtract 2 for the instruction that fills the jump
4365 delay slot. This gives 1018. */
4366
4367 /* The branch will always be shortened now that the reference address for
4368 forward branches is the successor address, thus we need no longer make
4369 adjustments to the [sh]i_limit for -O0. */
4370
4371 si_limit = 1018;
4372 hi_limit = 510;
4373
4374 while (from && count_si < si_limit && count_hi < hi_limit)
4375 {
4376 int inc = get_attr_length (from);
4377 int new_align = 1;
4378
4379 /* If this is a label that existed at the time of the compute_alignments
4380 call, determine the alignment. N.B. When find_barrier recurses for
4381 an out-of-reach mova, we might see labels at the start of previously
4382 inserted constant tables. */
4383 if (LABEL_P (from)
4384 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
4385 {
4386 if (optimize)
4387 new_align = 1 << label_to_alignment (from);
4388 else if (BARRIER_P (prev_nonnote_insn (from)))
4389 new_align = 1 << barrier_align (from);
4390 else
4391 new_align = 1;
4392 inc = 0;
4393 }
4394 /* In case we are scanning a constant table because of recursion, check
4395 for explicit alignments. If the table is long, we might be forced
4396 to emit the new table in front of it; the length of the alignment
4397 might be the last straw. */
4398 else if (NONJUMP_INSN_P (from)
4399 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4400 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
4401 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
4402 /* When we find the end of a constant table, paste the new constant
4403 at the end. That is better than putting it in front because
4404 this way, we don't need extra alignment for adding a 4-byte-aligned
4405 mov(a) label to a 2/4 or 8/4 byte aligned table. */
4406 else if (NONJUMP_INSN_P (from)
4407 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4408 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
4409 return from;
4410
4411 if (BARRIER_P (from))
4412 {
4413 rtx next;
4414
4415 found_barrier = from;
4416
4417 /* If we are at the end of the function, or in front of an alignment
4418 instruction, we need not insert an extra alignment. We prefer
4419 this kind of barrier. */
4420 if (barrier_align (from) > 2)
4421 good_barrier = from;
4422
4423 /* If we are at the end of a hot/cold block, dump the constants
4424 here. */
4425 next = NEXT_INSN (from);
4426 if (next
4427 && NOTE_P (next)
4428 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
4429 break;
4430 }
4431
4432 if (broken_move (from))
4433 {
4434 rtx pat, src, dst;
4435 enum machine_mode mode;
4436
4437 pat = PATTERN (from);
4438 if (GET_CODE (pat) == PARALLEL)
4439 pat = XVECEXP (pat, 0, 0);
4440 src = SET_SRC (pat);
4441 dst = SET_DEST (pat);
4442 mode = GET_MODE (dst);
4443
4444 /* We must explicitly check the mode, because sometimes the
4445 front end will generate code to load unsigned constants into
4446 HImode targets without properly sign extending them. */
4447 if (mode == HImode
4448 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
4449 {
4450 found_hi += 2;
4451 /* We put the short constants before the long constants, so
4452 we must count the length of short constants in the range
4453 for the long constants. */
4454 /* ??? This isn't optimal, but is easy to do. */
4455 si_limit -= 2;
4456 }
4457 else
4458 {
4459 /* We dump DF/DI constants before SF/SI ones, because
4460 the limit is the same, but the alignment requirements
4461 are higher. We may waste up to 4 additional bytes
4462 for alignment, and the DF/DI constant may have
4463 another SF/SI constant placed before it. */
4464 if (TARGET_SHCOMPACT
4465 && ! found_di
4466 && (mode == DFmode || mode == DImode))
4467 {
4468 found_di = 1;
4469 si_limit -= 8;
4470 }
4471 while (si_align > 2 && found_si + si_align - 2 > count_si)
4472 si_align >>= 1;
4473 if (found_si > count_si)
4474 count_si = found_si;
4475 found_si += GET_MODE_SIZE (mode);
4476 if (num_mova)
4477 si_limit -= GET_MODE_SIZE (mode);
4478 }
4479 }
4480
4481 if (mova_p (from))
4482 {
4483 switch (untangle_mova (&num_mova, &mova, from))
4484 {
4485 case 0: return find_barrier (0, 0, mova);
4486 case 2:
4487 {
4488 leading_mova = 0;
4489 barrier_before_mova
4490 = good_barrier ? good_barrier : found_barrier;
4491 }
4492 default: break;
4493 }
4494 if (found_si > count_si)
4495 count_si = found_si;
4496 }
4497 else if (JUMP_TABLE_DATA_P (from))
4498 {
4499 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
4500 || (num_mova
4501 && (prev_nonnote_insn (from)
4502 == XEXP (MOVA_LABELREF (mova), 0))))
4503 num_mova--;
4504 if (barrier_align (next_real_insn (from)) == align_jumps_log)
4505 {
4506 /* We have just passed the barrier in front of the
4507 ADDR_DIFF_VEC, which is stored in found_barrier. Since
4508 the ADDR_DIFF_VEC is accessed as data, just like our pool
4509 constants, this is a good opportunity to accommodate what
4510 we have gathered so far.
4511 If we waited any longer, we could end up at a barrier in
4512 front of code, which gives worse cache usage for separated
4513 instruction / data caches. */
4514 good_barrier = found_barrier;
4515 break;
4516 }
4517 else
4518 {
4519 rtx body = PATTERN (from);
4520 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
4521 }
4522 }
4523 /* For the SH1, we generate alignments even after jumps-around-jumps. */
4524 else if (JUMP_P (from)
4525 && ! TARGET_SH2
4526 && ! TARGET_SMALLCODE)
4527 new_align = 4;
4528
4529 if (found_si)
4530 {
4531 count_si += inc;
4532 if (new_align > si_align)
4533 {
4534 si_limit -= (count_si - 1) & (new_align - si_align);
4535 si_align = new_align;
4536 }
4537 count_si = (count_si + new_align - 1) & -new_align;
4538 }
4539 if (found_hi)
4540 {
4541 count_hi += inc;
4542 if (new_align > hi_align)
4543 {
4544 hi_limit -= (count_hi - 1) & (new_align - hi_align);
4545 hi_align = new_align;
4546 }
4547 count_hi = (count_hi + new_align - 1) & -new_align;
4548 }
4549 from = NEXT_INSN (from);
4550 }
4551
4552 if (num_mova)
4553 {
4554 if (leading_mova)
4555 {
4556 /* Try as we might, the leading mova is out of range. Change
4557 it into a load (which will become a pcload) and retry. */
4558 fixup_mova (mova);
4559 return find_barrier (0, 0, mova);
4560 }
4561 else
4562 {
4563 /* Insert the constant pool table before the mova instruction,
4564 to prevent the mova label reference from going out of range. */
4565 from = mova;
4566 good_barrier = found_barrier = barrier_before_mova;
4567 }
4568 }
4569
4570 if (found_barrier)
4571 {
4572 if (good_barrier && next_real_insn (found_barrier))
4573 found_barrier = good_barrier;
4574 }
4575 else
4576 {
4577 /* We didn't find a barrier in time to dump our stuff,
4578 so we'll make one. */
4579 rtx label = gen_label_rtx ();
4580
4581 /* If we exceeded the range, then we must back up over the last
4582 instruction we looked at. Otherwise, we just need to undo the
4583 NEXT_INSN at the end of the loop. */
4584 if (PREV_INSN (from) != orig
4585 && (count_hi > hi_limit || count_si > si_limit))
4586 from = PREV_INSN (PREV_INSN (from));
4587 else
4588 from = PREV_INSN (from);
4589
4590 /* Walk back to be just before any jump or label.
4591 Putting it before a label reduces the number of times the branch
4592 around the constant pool table will be hit. Putting it before
4593 a jump makes it more likely that the bra delay slot will be
4594 filled. */
4595 while (NOTE_P (from) || JUMP_P (from)
4596 || LABEL_P (from))
4597 from = PREV_INSN (from);
4598
4599 from = emit_jump_insn_after (gen_jump (label), from);
4600 JUMP_LABEL (from) = label;
4601 LABEL_NUSES (label) = 1;
4602 found_barrier = emit_barrier_after (from);
4603 emit_label_after (label, found_barrier);
4604 }
4605
4606 return found_barrier;
4607 }
4608
4609 /* If the instruction INSN is implemented by a special function, and we can
4610 positively find the register that is used to call the sfunc, and this
4611 register is not used anywhere else in this instruction - except as the
4612 destination of a set, return this register; else, return 0. */
4613 rtx
4614 sfunc_uses_reg (rtx insn)
4615 {
4616 int i;
4617 rtx pattern, part, reg_part, reg;
4618
4619 if (!NONJUMP_INSN_P (insn))
4620 return 0;
4621 pattern = PATTERN (insn);
4622 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
4623 return 0;
4624
4625 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4626 {
4627 part = XVECEXP (pattern, 0, i);
4628 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
4629 reg_part = part;
4630 }
4631 if (! reg_part)
4632 return 0;
4633 reg = XEXP (reg_part, 0);
4634 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
4635 {
4636 part = XVECEXP (pattern, 0, i);
4637 if (part == reg_part || GET_CODE (part) == CLOBBER)
4638 continue;
4639 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
4640 && REG_P (SET_DEST (part)))
4641 ? SET_SRC (part) : part)))
4642 return 0;
4643 }
4644 return reg;
4645 }
4646
4647 /* See if the only way in which INSN uses REG is by calling it, or by
4648 setting it while calling it. Set *SET to a SET rtx if the register
4649 is set by INSN. */
4650
4651 static int
4652 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
4653 {
4654 rtx pattern, reg2;
4655
4656 *set = NULL_RTX;
4657
4658 reg2 = sfunc_uses_reg (insn);
4659 if (reg2 && REGNO (reg2) == REGNO (reg))
4660 {
4661 pattern = single_set (insn);
4662 if (pattern
4663 && REG_P (SET_DEST (pattern))
4664 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4665 *set = pattern;
4666 return 0;
4667 }
4668 if (!CALL_P (insn))
4669 {
4670 /* We don't use rtx_equal_p because we don't care if the mode is
4671 different. */
4672 pattern = single_set (insn);
4673 if (pattern
4674 && REG_P (SET_DEST (pattern))
4675 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4676 {
4677 rtx par, part;
4678 int i;
4679
4680 *set = pattern;
4681 par = PATTERN (insn);
4682 if (GET_CODE (par) == PARALLEL)
4683 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
4684 {
4685 part = XVECEXP (par, 0, i);
4686 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
4687 return 1;
4688 }
4689 return reg_mentioned_p (reg, SET_SRC (pattern));
4690 }
4691
4692 return 1;
4693 }
4694
4695 pattern = PATTERN (insn);
4696
4697 if (GET_CODE (pattern) == PARALLEL)
4698 {
4699 int i;
4700
4701 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4702 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
4703 return 1;
4704 pattern = XVECEXP (pattern, 0, 0);
4705 }
4706
4707 if (GET_CODE (pattern) == SET)
4708 {
4709 if (reg_mentioned_p (reg, SET_DEST (pattern)))
4710 {
4711 /* We don't use rtx_equal_p, because we don't care if the
4712 mode is different. */
4713 if (!REG_P (SET_DEST (pattern))
4714 || REGNO (reg) != REGNO (SET_DEST (pattern)))
4715 return 1;
4716
4717 *set = pattern;
4718 }
4719
4720 pattern = SET_SRC (pattern);
4721 }
4722
4723 if (GET_CODE (pattern) != CALL
4724 || !MEM_P (XEXP (pattern, 0))
4725 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
4726 return 1;
4727
4728 return 0;
4729 }
4730
4731 /* Given a X, a pattern of an insn or a part of it, return a mask of used
4732 general registers. Bits 0..15 mean that the respective registers
4733 are used as inputs in the instruction. Bits 16..31 mean that the
4734 registers 0..15, respectively, are used as outputs, or are clobbered.
4735 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
4736 int
4737 regs_used (rtx x, int is_dest)
4738 {
4739 enum rtx_code code;
4740 const char *fmt;
4741 int i, used = 0;
4742
4743 if (! x)
4744 return used;
4745 code = GET_CODE (x);
4746 switch (code)
4747 {
4748 case REG:
4749 if (REGNO (x) < 16)
4750 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4751 << (REGNO (x) + is_dest));
4752 return 0;
4753 case SUBREG:
4754 {
4755 rtx y = SUBREG_REG (x);
4756
4757 if (!REG_P (y))
4758 break;
4759 if (REGNO (y) < 16)
4760 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4761 << (REGNO (y) +
4762 subreg_regno_offset (REGNO (y),
4763 GET_MODE (y),
4764 SUBREG_BYTE (x),
4765 GET_MODE (x)) + is_dest));
4766 return 0;
4767 }
4768 case SET:
4769 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
4770 case RETURN:
4771 /* If there was a return value, it must have been indicated with USE. */
4772 return 0x00ffff00;
4773 case CLOBBER:
4774 is_dest = 1;
4775 break;
4776 case MEM:
4777 is_dest = 0;
4778 break;
4779 case CALL:
4780 used |= 0x00ff00f0;
4781 break;
4782 default:
4783 break;
4784 }
4785
4786 fmt = GET_RTX_FORMAT (code);
4787
4788 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4789 {
4790 if (fmt[i] == 'E')
4791 {
4792 register int j;
4793 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4794 used |= regs_used (XVECEXP (x, i, j), is_dest);
4795 }
4796 else if (fmt[i] == 'e')
4797 used |= regs_used (XEXP (x, i), is_dest);
4798 }
4799 return used;
4800 }
4801
4802 /* Create an instruction that prevents redirection of a conditional branch
4803 to the destination of the JUMP with address ADDR.
4804 If the branch needs to be implemented as an indirect jump, try to find
4805 a scratch register for it.
4806 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
4807 If any preceding insn that doesn't fit into a delay slot is good enough,
4808 pass 1. Pass 2 if a definite blocking insn is needed.
4809 -1 is used internally to avoid deep recursion.
4810 If a blocking instruction is made or recognized, return it. */
4811
4812 static rtx
4813 gen_block_redirect (rtx jump, int addr, int need_block)
4814 {
4815 int dead = 0;
4816 rtx prev = prev_nonnote_insn (jump);
4817 rtx dest;
4818
4819 /* First, check if we already have an instruction that satisfies our need. */
4820 if (prev && NONJUMP_INSN_P (prev) && ! INSN_DELETED_P (prev))
4821 {
4822 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
4823 return prev;
4824 if (GET_CODE (PATTERN (prev)) == USE
4825 || GET_CODE (PATTERN (prev)) == CLOBBER
4826 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4827 prev = jump;
4828 else if ((need_block &= ~1) < 0)
4829 return prev;
4830 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
4831 need_block = 0;
4832 }
4833 if (GET_CODE (PATTERN (jump)) == RETURN)
4834 {
4835 if (! need_block)
4836 return prev;
4837 /* Reorg even does nasty things with return insns that cause branches
4838 to go out of range - see find_end_label and callers. */
4839 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
4840 }
4841 /* We can't use JUMP_LABEL here because it might be undefined
4842 when not optimizing. */
4843 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
4844 /* If the branch is out of range, try to find a scratch register for it. */
4845 if (optimize
4846 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4847 > 4092 + 4098))
4848 {
4849 rtx scan;
4850 /* Don't look for the stack pointer as a scratch register,
4851 it would cause trouble if an interrupt occurred. */
4852 unsigned attempt = 0x7fff, used;
4853 int jump_left = flag_expensive_optimizations + 1;
4854
4855 /* It is likely that the most recent eligible instruction is wanted for
4856 the delay slot. Therefore, find out which registers it uses, and
4857 try to avoid using them. */
4858
4859 for (scan = jump; (scan = PREV_INSN (scan)); )
4860 {
4861 enum rtx_code code;
4862
4863 if (INSN_DELETED_P (scan))
4864 continue;
4865 code = GET_CODE (scan);
4866 if (code == CODE_LABEL || code == JUMP_INSN)
4867 break;
4868 if (code == INSN
4869 && GET_CODE (PATTERN (scan)) != USE
4870 && GET_CODE (PATTERN (scan)) != CLOBBER
4871 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
4872 {
4873 attempt &= ~regs_used (PATTERN (scan), 0);
4874 break;
4875 }
4876 }
4877 for (used = dead = 0, scan = JUMP_LABEL (jump);
4878 (scan = NEXT_INSN (scan)); )
4879 {
4880 enum rtx_code code;
4881
4882 if (INSN_DELETED_P (scan))
4883 continue;
4884 code = GET_CODE (scan);
4885 if (INSN_P (scan))
4886 {
4887 used |= regs_used (PATTERN (scan), 0);
4888 if (code == CALL_INSN)
4889 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
4890 dead |= (used >> 16) & ~used;
4891 if (dead & attempt)
4892 {
4893 dead &= attempt;
4894 break;
4895 }
4896 if (code == JUMP_INSN)
4897 {
4898 if (jump_left-- && simplejump_p (scan))
4899 scan = JUMP_LABEL (scan);
4900 else
4901 break;
4902 }
4903 }
4904 }
4905 /* Mask out the stack pointer again, in case it was
4906 the only 'free' register we have found. */
4907 dead &= 0x7fff;
4908 }
4909 /* If the immediate destination is still in range, check for possible
4910 threading with a jump beyond the delay slot insn.
4911 Don't check if we are called recursively; the jump has been or will be
4912 checked in a different invocation then. */
4913
4914 else if (optimize && need_block >= 0)
4915 {
4916 rtx next = next_active_insn (next_active_insn (dest));
4917 if (next && JUMP_P (next)
4918 && GET_CODE (PATTERN (next)) == SET
4919 && recog_memoized (next) == CODE_FOR_jump_compact)
4920 {
4921 dest = JUMP_LABEL (next);
4922 if (dest
4923 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4924 > 4092 + 4098))
4925 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
4926 }
4927 }
4928
4929 if (dead)
4930 {
4931 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
4932
4933 /* It would be nice if we could convert the jump into an indirect
4934 jump / far branch right now, and thus exposing all constituent
4935 instructions to further optimization. However, reorg uses
4936 simplejump_p to determine if there is an unconditional jump where
4937 it should try to schedule instructions from the target of the
4938 branch; simplejump_p fails for indirect jumps even if they have
4939 a JUMP_LABEL. */
4940 rtx insn = emit_insn_before (gen_indirect_jump_scratch
4941 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
4942 , jump);
4943 /* ??? We would like this to have the scope of the jump, but that
4944 scope will change when a delay slot insn of an inner scope is added.
4945 Hence, after delay slot scheduling, we'll have to expect
4946 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
4947 the jump. */
4948
4949 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
4950 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
4951 return insn;
4952 }
4953 else if (need_block)
4954 /* We can't use JUMP_LABEL here because it might be undefined
4955 when not optimizing. */
4956 return emit_insn_before (gen_block_branch_redirect
4957 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
4958 , jump);
4959 return prev;
4960 }
4961
4962 #define CONDJUMP_MIN -252
4963 #define CONDJUMP_MAX 262
4964 struct far_branch
4965 {
4966 /* A label (to be placed) in front of the jump
4967 that jumps to our ultimate destination. */
4968 rtx near_label;
4969 /* Where we are going to insert it if we cannot move the jump any farther,
4970 or the jump itself if we have picked up an existing jump. */
4971 rtx insert_place;
4972 /* The ultimate destination. */
4973 rtx far_label;
4974 struct far_branch *prev;
4975 /* If the branch has already been created, its address;
4976 else the address of its first prospective user. */
4977 int address;
4978 };
4979
4980 static void gen_far_branch (struct far_branch *);
4981 enum mdep_reorg_phase_e mdep_reorg_phase;
4982 static void
4983 gen_far_branch (struct far_branch *bp)
4984 {
4985 rtx insn = bp->insert_place;
4986 rtx jump;
4987 rtx label = gen_label_rtx ();
4988 int ok;
4989
4990 emit_label_after (label, insn);
4991 if (bp->far_label)
4992 {
4993 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
4994 LABEL_NUSES (bp->far_label)++;
4995 }
4996 else
4997 jump = emit_jump_insn_after (gen_return (), insn);
4998 /* Emit a barrier so that reorg knows that any following instructions
4999 are not reachable via a fall-through path.
5000 But don't do this when not optimizing, since we wouldn't suppress the
5001 alignment for the barrier then, and could end up with out-of-range
5002 pc-relative loads. */
5003 if (optimize)
5004 emit_barrier_after (jump);
5005 emit_label_after (bp->near_label, insn);
5006 JUMP_LABEL (jump) = bp->far_label;
5007 ok = invert_jump (insn, label, 1);
5008 gcc_assert (ok);
5009
5010 /* If we are branching around a jump (rather than a return), prevent
5011 reorg from using an insn from the jump target as the delay slot insn -
5012 when reorg did this, it pessimized code (we rather hide the delay slot)
5013 and it could cause branches to go out of range. */
5014 if (bp->far_label)
5015 (emit_insn_after
5016 (gen_stuff_delay_slot
5017 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
5018 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5019 insn));
5020 /* Prevent reorg from undoing our splits. */
5021 gen_block_redirect (jump, bp->address += 2, 2);
5022 }
5023
5024 /* Fix up ADDR_DIFF_VECs. */
5025 void
5026 fixup_addr_diff_vecs (rtx first)
5027 {
5028 rtx insn;
5029
5030 for (insn = first; insn; insn = NEXT_INSN (insn))
5031 {
5032 rtx vec_lab, pat, prev, prevpat, x, braf_label;
5033
5034 if (!JUMP_P (insn)
5035 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5036 continue;
5037 pat = PATTERN (insn);
5038 vec_lab = XEXP (XEXP (pat, 0), 0);
5039
5040 /* Search the matching casesi_jump_2. */
5041 for (prev = vec_lab; ; prev = PREV_INSN (prev))
5042 {
5043 if (!JUMP_P (prev))
5044 continue;
5045 prevpat = PATTERN (prev);
5046 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5047 continue;
5048 x = XVECEXP (prevpat, 0, 1);
5049 if (GET_CODE (x) != USE)
5050 continue;
5051 x = XEXP (x, 0);
5052 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5053 break;
5054 }
5055 /* FIXME: This is a bug in the optimizer, but it seems harmless
5056 to just avoid panicing. */
5057 if (!prev)
5058 continue;
5059
5060 /* Emit the reference label of the braf where it belongs, right after
5061 the casesi_jump_2 (i.e. braf). */
5062 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5063 emit_label_after (braf_label, prev);
5064
5065 /* Fix up the ADDR_DIF_VEC to be relative
5066 to the reference address of the braf. */
5067 XEXP (XEXP (pat, 0), 0) = braf_label;
5068 }
5069 }
5070
5071 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5072 a barrier. Return the base 2 logarithm of the desired alignment. */
5073 int
5074 barrier_align (rtx barrier_or_label)
5075 {
5076 rtx next = next_real_insn (barrier_or_label), pat, prev;
5077 int slot, credit, jump_to_next = 0;
5078
5079 if (! next)
5080 return 0;
5081
5082 pat = PATTERN (next);
5083
5084 if (GET_CODE (pat) == ADDR_DIFF_VEC)
5085 return 2;
5086
5087 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5088 /* This is a barrier in front of a constant table. */
5089 return 0;
5090
5091 prev = prev_real_insn (barrier_or_label);
5092 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
5093 {
5094 pat = PATTERN (prev);
5095 /* If this is a very small table, we want to keep the alignment after
5096 the table to the minimum for proper code alignment. */
5097 return ((TARGET_SMALLCODE
5098 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5099 <= (unsigned) 1 << (CACHE_LOG - 2)))
5100 ? 1 << TARGET_SHMEDIA : align_jumps_log);
5101 }
5102
5103 if (TARGET_SMALLCODE)
5104 return 0;
5105
5106 if (! TARGET_SH2 || ! optimize)
5107 return align_jumps_log;
5108
5109 /* When fixing up pcloads, a constant table might be inserted just before
5110 the basic block that ends with the barrier. Thus, we can't trust the
5111 instruction lengths before that. */
5112 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5113 {
5114 /* Check if there is an immediately preceding branch to the insn beyond
5115 the barrier. We must weight the cost of discarding useful information
5116 from the current cache line when executing this branch and there is
5117 an alignment, against that of fetching unneeded insn in front of the
5118 branch target when there is no alignment. */
5119
5120 /* There are two delay_slot cases to consider. One is the simple case
5121 where the preceding branch is to the insn beyond the barrier (simple
5122 delay slot filling), and the other is where the preceding branch has
5123 a delay slot that is a duplicate of the insn after the barrier
5124 (fill_eager_delay_slots) and the branch is to the insn after the insn
5125 after the barrier. */
5126
5127 /* PREV is presumed to be the JUMP_INSN for the barrier under
5128 investigation. Skip to the insn before it. */
5129 prev = prev_real_insn (prev);
5130
5131 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5132 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5133 prev = prev_real_insn (prev))
5134 {
5135 jump_to_next = 0;
5136 if (GET_CODE (PATTERN (prev)) == USE
5137 || GET_CODE (PATTERN (prev)) == CLOBBER)
5138 continue;
5139 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
5140 {
5141 prev = XVECEXP (PATTERN (prev), 0, 1);
5142 if (INSN_UID (prev) == INSN_UID (next))
5143 {
5144 /* Delay slot was filled with insn at jump target. */
5145 jump_to_next = 1;
5146 continue;
5147 }
5148 }
5149
5150 if (slot &&
5151 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5152 slot = 0;
5153 credit -= get_attr_length (prev);
5154 }
5155 if (prev
5156 && JUMP_P (prev)
5157 && JUMP_LABEL (prev))
5158 {
5159 rtx x;
5160 if (jump_to_next
5161 || next_real_insn (JUMP_LABEL (prev)) == next
5162 /* If relax_delay_slots() decides NEXT was redundant
5163 with some previous instruction, it will have
5164 redirected PREV's jump to the following insn. */
5165 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5166 /* There is no upper bound on redundant instructions
5167 that might have been skipped, but we must not put an
5168 alignment where none had been before. */
5169 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5170 (INSN_P (x)
5171 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5172 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5173 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5174 {
5175 rtx pat = PATTERN (prev);
5176 if (GET_CODE (pat) == PARALLEL)
5177 pat = XVECEXP (pat, 0, 0);
5178 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5179 return 0;
5180 }
5181 }
5182 }
5183
5184 return align_jumps_log;
5185 }
5186
5187 /* If we are inside a phony loop, almost any kind of label can turn up as the
5188 first one in the loop. Aligning a braf label causes incorrect switch
5189 destination addresses; we can detect braf labels because they are
5190 followed by a BARRIER.
5191 Applying loop alignment to small constant or switch tables is a waste
5192 of space, so we suppress this too. */
5193 int
5194 sh_loop_align (rtx label)
5195 {
5196 rtx next = label;
5197
5198 do
5199 next = next_nonnote_insn (next);
5200 while (next && LABEL_P (next));
5201
5202 if (! next
5203 || ! INSN_P (next)
5204 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
5205 || recog_memoized (next) == CODE_FOR_consttable_2)
5206 return 0;
5207
5208 return align_loops_log;
5209 }
5210
5211 /* Do a final pass over the function, just before delayed branch
5212 scheduling. */
5213
5214 static void
5215 sh_reorg (void)
5216 {
5217 rtx first, insn, mova = NULL_RTX;
5218 int num_mova;
5219 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5220 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5221
5222 first = get_insns ();
5223 max_labelno_before_reorg = max_label_num ();
5224
5225 /* We must split call insns before introducing `mova's. If we're
5226 optimizing, they'll have already been split. Otherwise, make
5227 sure we don't split them too late. */
5228 if (! optimize)
5229 split_all_insns_noflow ();
5230
5231 if (TARGET_SHMEDIA)
5232 return;
5233
5234 /* If relaxing, generate pseudo-ops to associate function calls with
5235 the symbols they call. It does no harm to not generate these
5236 pseudo-ops. However, when we can generate them, it enables to
5237 linker to potentially relax the jsr to a bsr, and eliminate the
5238 register load and, possibly, the constant pool entry. */
5239
5240 mdep_reorg_phase = SH_INSERT_USES_LABELS;
5241 if (TARGET_RELAX)
5242 {
5243 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
5244 own purposes. This works because none of the remaining passes
5245 need to look at them.
5246
5247 ??? But it may break in the future. We should use a machine
5248 dependent REG_NOTE, or some other approach entirely. */
5249 for (insn = first; insn; insn = NEXT_INSN (insn))
5250 {
5251 if (INSN_P (insn))
5252 {
5253 rtx note;
5254
5255 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
5256 NULL_RTX)) != 0)
5257 remove_note (insn, note);
5258 }
5259 }
5260
5261 for (insn = first; insn; insn = NEXT_INSN (insn))
5262 {
5263 rtx pattern, reg, link, set, scan, dies, label;
5264 int rescan = 0, foundinsn = 0;
5265
5266 if (CALL_P (insn))
5267 {
5268 pattern = PATTERN (insn);
5269
5270 if (GET_CODE (pattern) == PARALLEL)
5271 pattern = XVECEXP (pattern, 0, 0);
5272 if (GET_CODE (pattern) == SET)
5273 pattern = SET_SRC (pattern);
5274
5275 if (GET_CODE (pattern) != CALL
5276 || !MEM_P (XEXP (pattern, 0)))
5277 continue;
5278
5279 reg = XEXP (XEXP (pattern, 0), 0);
5280 }
5281 else
5282 {
5283 reg = sfunc_uses_reg (insn);
5284 if (! reg)
5285 continue;
5286 }
5287
5288 if (!REG_P (reg))
5289 continue;
5290
5291 /* Try scanning backward to find where the register is set. */
5292 link = NULL;
5293 for (scan = PREV_INSN (insn);
5294 scan && !LABEL_P (scan);
5295 scan = PREV_INSN (scan))
5296 {
5297 if (! INSN_P (scan))
5298 continue;
5299
5300 if (! reg_mentioned_p (reg, scan))
5301 continue;
5302
5303 if (noncall_uses_reg (reg, scan, &set))
5304 break;
5305
5306 if (set)
5307 {
5308 link = scan;
5309 break;
5310 }
5311 }
5312
5313 if (! link)
5314 continue;
5315
5316 /* The register is set at LINK. */
5317
5318 /* We can only optimize the function call if the register is
5319 being set to a symbol. In theory, we could sometimes
5320 optimize calls to a constant location, but the assembler
5321 and linker do not support that at present. */
5322 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
5323 && GET_CODE (SET_SRC (set)) != LABEL_REF)
5324 continue;
5325
5326 /* Scan forward from LINK to the place where REG dies, and
5327 make sure that the only insns which use REG are
5328 themselves function calls. */
5329
5330 /* ??? This doesn't work for call targets that were allocated
5331 by reload, since there may not be a REG_DEAD note for the
5332 register. */
5333
5334 dies = NULL_RTX;
5335 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
5336 {
5337 rtx scanset;
5338
5339 /* Don't try to trace forward past a CODE_LABEL if we haven't
5340 seen INSN yet. Ordinarily, we will only find the setting insn
5341 if it is in the same basic block. However,
5342 cross-jumping can insert code labels in between the load and
5343 the call, and can result in situations where a single call
5344 insn may have two targets depending on where we came from. */
5345
5346 if (LABEL_P (scan) && ! foundinsn)
5347 break;
5348
5349 if (! INSN_P (scan))
5350 continue;
5351
5352 /* Don't try to trace forward past a JUMP. To optimize
5353 safely, we would have to check that all the
5354 instructions at the jump destination did not use REG. */
5355
5356 if (JUMP_P (scan))
5357 break;
5358
5359 if (! reg_mentioned_p (reg, scan))
5360 continue;
5361
5362 if (noncall_uses_reg (reg, scan, &scanset))
5363 break;
5364
5365 if (scan == insn)
5366 foundinsn = 1;
5367
5368 if (scan != insn
5369 && (CALL_P (scan) || sfunc_uses_reg (scan)))
5370 {
5371 /* There is a function call to this register other
5372 than the one we are checking. If we optimize
5373 this call, we need to rescan again below. */
5374 rescan = 1;
5375 }
5376
5377 /* ??? We shouldn't have to worry about SCANSET here.
5378 We should just be able to check for a REG_DEAD note
5379 on a function call. However, the REG_DEAD notes are
5380 apparently not dependable around libcalls; c-torture
5381 execute/920501-2 is a test case. If SCANSET is set,
5382 then this insn sets the register, so it must have
5383 died earlier. Unfortunately, this will only handle
5384 the cases in which the register is, in fact, set in a
5385 later insn. */
5386
5387 /* ??? We shouldn't have to use FOUNDINSN here.
5388 This dates back to when we used LOG_LINKS to find
5389 the most recent insn which sets the register. */
5390
5391 if (foundinsn
5392 && (scanset
5393 || find_reg_note (scan, REG_DEAD, reg)))
5394 {
5395 dies = scan;
5396 break;
5397 }
5398 }
5399
5400 if (! dies)
5401 {
5402 /* Either there was a branch, or some insn used REG
5403 other than as a function call address. */
5404 continue;
5405 }
5406
5407 /* Create a code label, and put it in a REG_LABEL_OPERAND note
5408 on the insn which sets the register, and on each call insn
5409 which uses the register. In final_prescan_insn we look for
5410 the REG_LABEL_OPERAND notes, and output the appropriate label
5411 or pseudo-op. */
5412
5413 label = gen_label_rtx ();
5414 add_reg_note (link, REG_LABEL_OPERAND, label);
5415 add_reg_note (insn, REG_LABEL_OPERAND, label);
5416 if (rescan)
5417 {
5418 scan = link;
5419 do
5420 {
5421 rtx reg2;
5422
5423 scan = NEXT_INSN (scan);
5424 if (scan != insn
5425 && ((CALL_P (scan)
5426 && reg_mentioned_p (reg, scan))
5427 || ((reg2 = sfunc_uses_reg (scan))
5428 && REGNO (reg2) == REGNO (reg))))
5429 add_reg_note (scan, REG_LABEL_OPERAND, label);
5430 }
5431 while (scan != dies);
5432 }
5433 }
5434 }
5435
5436 if (TARGET_SH2)
5437 fixup_addr_diff_vecs (first);
5438
5439 if (optimize)
5440 {
5441 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
5442 shorten_branches (first);
5443 }
5444
5445 /* Scan the function looking for move instructions which have to be
5446 changed to pc-relative loads and insert the literal tables. */
5447 label_ref_list_pool = create_alloc_pool ("label references list",
5448 sizeof (struct label_ref_list_d),
5449 30);
5450 mdep_reorg_phase = SH_FIXUP_PCLOAD;
5451 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
5452 {
5453 if (mova_p (insn))
5454 {
5455 /* ??? basic block reordering can move a switch table dispatch
5456 below the switch table. Check if that has happened.
5457 We only have the addresses available when optimizing; but then,
5458 this check shouldn't be needed when not optimizing. */
5459 if (!untangle_mova (&num_mova, &mova, insn))
5460 {
5461 insn = mova;
5462 num_mova = 0;
5463 }
5464 }
5465 else if (JUMP_P (insn)
5466 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
5467 && num_mova
5468 /* ??? loop invariant motion can also move a mova out of a
5469 loop. Since loop does this code motion anyway, maybe we
5470 should wrap UNSPEC_MOVA into a CONST, so that reload can
5471 move it back. */
5472 && ((num_mova > 1
5473 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
5474 || (prev_nonnote_insn (insn)
5475 == XEXP (MOVA_LABELREF (mova), 0))))
5476 {
5477 rtx scan;
5478 int total;
5479
5480 num_mova--;
5481
5482 /* Some code might have been inserted between the mova and
5483 its ADDR_DIFF_VEC. Check if the mova is still in range. */
5484 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
5485 total += get_attr_length (scan);
5486
5487 /* range of mova is 1020, add 4 because pc counts from address of
5488 second instruction after this one, subtract 2 in case pc is 2
5489 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
5490 cancels out with alignment effects of the mova itself. */
5491 if (total > 1022)
5492 {
5493 /* Change the mova into a load, and restart scanning
5494 there. broken_move will then return true for mova. */
5495 fixup_mova (mova);
5496 insn = mova;
5497 }
5498 }
5499 if (broken_move (insn)
5500 || (NONJUMP_INSN_P (insn)
5501 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
5502 {
5503 rtx scan;
5504 /* Scan ahead looking for a barrier to stick the constant table
5505 behind. */
5506 rtx barrier = find_barrier (num_mova, mova, insn);
5507 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
5508 int need_aligned_label = 0;
5509
5510 if (num_mova && ! mova_p (mova))
5511 {
5512 /* find_barrier had to change the first mova into a
5513 pcload; thus, we have to start with this new pcload. */
5514 insn = mova;
5515 num_mova = 0;
5516 }
5517 /* Now find all the moves between the points and modify them. */
5518 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
5519 {
5520 if (LABEL_P (scan))
5521 last_float = 0;
5522 if (NONJUMP_INSN_P (scan)
5523 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
5524 need_aligned_label = 1;
5525 if (broken_move (scan))
5526 {
5527 rtx *patp = &PATTERN (scan), pat = *patp;
5528 rtx src, dst;
5529 rtx lab;
5530 rtx newsrc;
5531 enum machine_mode mode;
5532
5533 if (GET_CODE (pat) == PARALLEL)
5534 patp = &XVECEXP (pat, 0, 0), pat = *patp;
5535 src = SET_SRC (pat);
5536 dst = SET_DEST (pat);
5537 mode = GET_MODE (dst);
5538
5539 if (mode == SImode && hi_const (src)
5540 && REGNO (dst) != FPUL_REG)
5541 {
5542 int offset = 0;
5543
5544 mode = HImode;
5545 while (GET_CODE (dst) == SUBREG)
5546 {
5547 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
5548 GET_MODE (SUBREG_REG (dst)),
5549 SUBREG_BYTE (dst),
5550 GET_MODE (dst));
5551 dst = SUBREG_REG (dst);
5552 }
5553 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
5554 }
5555 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
5556 {
5557 /* This must be an insn that clobbers r0. */
5558 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
5559 XVECLEN (PATTERN (scan), 0)
5560 - 1);
5561 rtx clobber = *clobberp;
5562
5563 gcc_assert (GET_CODE (clobber) == CLOBBER
5564 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
5565
5566 if (last_float
5567 && reg_set_between_p (r0_rtx, last_float_move, scan))
5568 last_float = 0;
5569 if (last_float
5570 && TARGET_SHCOMPACT
5571 && GET_MODE_SIZE (mode) != 4
5572 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
5573 last_float = 0;
5574 lab = add_constant (src, mode, last_float);
5575 if (lab)
5576 emit_insn_before (gen_mova (lab), scan);
5577 else
5578 {
5579 /* There will be a REG_UNUSED note for r0 on
5580 LAST_FLOAT_MOVE; we have to change it to REG_INC,
5581 lest reorg:mark_target_live_regs will not
5582 consider r0 to be used, and we end up with delay
5583 slot insn in front of SCAN that clobbers r0. */
5584 rtx note
5585 = find_regno_note (last_float_move, REG_UNUSED, 0);
5586
5587 /* If we are not optimizing, then there may not be
5588 a note. */
5589 if (note)
5590 PUT_REG_NOTE_KIND (note, REG_INC);
5591
5592 *last_float_addr = r0_inc_rtx;
5593 }
5594 last_float_move = scan;
5595 last_float = src;
5596 newsrc = gen_const_mem (mode,
5597 (((TARGET_SH4 && ! TARGET_FMOVD)
5598 || REGNO (dst) == FPUL_REG)
5599 ? r0_inc_rtx
5600 : r0_rtx));
5601 last_float_addr = &XEXP (newsrc, 0);
5602
5603 /* Remove the clobber of r0. */
5604 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
5605 gen_rtx_SCRATCH (Pmode));
5606 }
5607 /* This is a mova needing a label. Create it. */
5608 else if (GET_CODE (src) == UNSPEC
5609 && XINT (src, 1) == UNSPEC_MOVA
5610 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
5611 {
5612 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
5613 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5614 newsrc = gen_rtx_UNSPEC (SImode,
5615 gen_rtvec (1, newsrc),
5616 UNSPEC_MOVA);
5617 }
5618 else
5619 {
5620 lab = add_constant (src, mode, 0);
5621 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5622 newsrc = gen_const_mem (mode, newsrc);
5623 }
5624 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
5625 INSN_CODE (scan) = -1;
5626 }
5627 }
5628 dump_table (need_aligned_label ? insn : 0, barrier);
5629 insn = barrier;
5630 }
5631 }
5632 free_alloc_pool (label_ref_list_pool);
5633 for (insn = first; insn; insn = NEXT_INSN (insn))
5634 PUT_MODE (insn, VOIDmode);
5635
5636 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
5637 INSN_ADDRESSES_FREE ();
5638 split_branches (first);
5639
5640 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
5641 also has an effect on the register that holds the address of the sfunc.
5642 Insert an extra dummy insn in front of each sfunc that pretends to
5643 use this register. */
5644 if (flag_delayed_branch)
5645 {
5646 for (insn = first; insn; insn = NEXT_INSN (insn))
5647 {
5648 rtx reg = sfunc_uses_reg (insn);
5649
5650 if (! reg)
5651 continue;
5652 emit_insn_before (gen_use_sfunc_addr (reg), insn);
5653 }
5654 }
5655 #if 0
5656 /* fpscr is not actually a user variable, but we pretend it is for the
5657 sake of the previous optimization passes, since we want it handled like
5658 one. However, we don't have any debugging information for it, so turn
5659 it into a non-user variable now. */
5660 if (TARGET_SH4)
5661 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
5662 #endif
5663 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
5664 }
5665
5666 int
5667 get_dest_uid (rtx label, int max_uid)
5668 {
5669 rtx dest = next_real_insn (label);
5670 int dest_uid;
5671 if (! dest)
5672 /* This can happen for an undefined label. */
5673 return 0;
5674 dest_uid = INSN_UID (dest);
5675 /* If this is a newly created branch redirection blocking instruction,
5676 we cannot index the branch_uid or insn_addresses arrays with its
5677 uid. But then, we won't need to, because the actual destination is
5678 the following branch. */
5679 while (dest_uid >= max_uid)
5680 {
5681 dest = NEXT_INSN (dest);
5682 dest_uid = INSN_UID (dest);
5683 }
5684 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
5685 return 0;
5686 return dest_uid;
5687 }
5688
5689 /* Split condbranches that are out of range. Also add clobbers for
5690 scratch registers that are needed in far jumps.
5691 We do this before delay slot scheduling, so that it can take our
5692 newly created instructions into account. It also allows us to
5693 find branches with common targets more easily. */
5694
5695 static void
5696 split_branches (rtx first)
5697 {
5698 rtx insn;
5699 struct far_branch **uid_branch, *far_branch_list = 0;
5700 int max_uid = get_max_uid ();
5701 int ok;
5702
5703 /* Find out which branches are out of range. */
5704 shorten_branches (first);
5705
5706 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
5707 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
5708
5709 for (insn = first; insn; insn = NEXT_INSN (insn))
5710 if (! INSN_P (insn))
5711 continue;
5712 else if (INSN_DELETED_P (insn))
5713 {
5714 /* Shorten_branches would split this instruction again,
5715 so transform it into a note. */
5716 SET_INSN_DELETED (insn);
5717 }
5718 else if (JUMP_P (insn)
5719 /* Don't mess with ADDR_DIFF_VEC */
5720 && (GET_CODE (PATTERN (insn)) == SET
5721 || GET_CODE (PATTERN (insn)) == RETURN))
5722 {
5723 enum attr_type type = get_attr_type (insn);
5724 if (type == TYPE_CBRANCH)
5725 {
5726 rtx next, beyond;
5727
5728 if (get_attr_length (insn) > 4)
5729 {
5730 rtx src = SET_SRC (PATTERN (insn));
5731 rtx olabel = XEXP (XEXP (src, 1), 0);
5732 int addr = INSN_ADDRESSES (INSN_UID (insn));
5733 rtx label = 0;
5734 int dest_uid = get_dest_uid (olabel, max_uid);
5735 struct far_branch *bp = uid_branch[dest_uid];
5736
5737 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
5738 the label if the LABEL_NUSES count drops to zero. There is
5739 always a jump_optimize pass that sets these values, but it
5740 proceeds to delete unreferenced code, and then if not
5741 optimizing, to un-delete the deleted instructions, thus
5742 leaving labels with too low uses counts. */
5743 if (! optimize)
5744 {
5745 JUMP_LABEL (insn) = olabel;
5746 LABEL_NUSES (olabel)++;
5747 }
5748 if (! bp)
5749 {
5750 bp = (struct far_branch *) alloca (sizeof *bp);
5751 uid_branch[dest_uid] = bp;
5752 bp->prev = far_branch_list;
5753 far_branch_list = bp;
5754 bp->far_label
5755 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
5756 LABEL_NUSES (bp->far_label)++;
5757 }
5758 else
5759 {
5760 label = bp->near_label;
5761 if (! label && bp->address - addr >= CONDJUMP_MIN)
5762 {
5763 rtx block = bp->insert_place;
5764
5765 if (GET_CODE (PATTERN (block)) == RETURN)
5766 block = PREV_INSN (block);
5767 else
5768 block = gen_block_redirect (block,
5769 bp->address, 2);
5770 label = emit_label_after (gen_label_rtx (),
5771 PREV_INSN (block));
5772 bp->near_label = label;
5773 }
5774 else if (label && ! NEXT_INSN (label))
5775 {
5776 if (addr + 2 - bp->address <= CONDJUMP_MAX)
5777 bp->insert_place = insn;
5778 else
5779 gen_far_branch (bp);
5780 }
5781 }
5782 if (! label
5783 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
5784 {
5785 bp->near_label = label = gen_label_rtx ();
5786 bp->insert_place = insn;
5787 bp->address = addr;
5788 }
5789 ok = redirect_jump (insn, label, 0);
5790 gcc_assert (ok);
5791 }
5792 else
5793 {
5794 /* get_attr_length (insn) == 2 */
5795 /* Check if we have a pattern where reorg wants to redirect
5796 the branch to a label from an unconditional branch that
5797 is too far away. */
5798 /* We can't use JUMP_LABEL here because it might be undefined
5799 when not optimizing. */
5800 /* A syntax error might cause beyond to be NULL_RTX. */
5801 beyond
5802 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
5803 0));
5804
5805 if (beyond
5806 && (JUMP_P (beyond)
5807 || ((beyond = next_active_insn (beyond))
5808 && JUMP_P (beyond)))
5809 && GET_CODE (PATTERN (beyond)) == SET
5810 && recog_memoized (beyond) == CODE_FOR_jump_compact
5811 && ((INSN_ADDRESSES
5812 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
5813 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5814 > 252 + 258 + 2))
5815 gen_block_redirect (beyond,
5816 INSN_ADDRESSES (INSN_UID (beyond)), 1);
5817 }
5818
5819 next = next_active_insn (insn);
5820
5821 if ((JUMP_P (next)
5822 || ((next = next_active_insn (next))
5823 && JUMP_P (next)))
5824 && GET_CODE (PATTERN (next)) == SET
5825 && recog_memoized (next) == CODE_FOR_jump_compact
5826 && ((INSN_ADDRESSES
5827 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
5828 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5829 > 252 + 258 + 2))
5830 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
5831 }
5832 else if (type == TYPE_JUMP || type == TYPE_RETURN)
5833 {
5834 int addr = INSN_ADDRESSES (INSN_UID (insn));
5835 rtx far_label = 0;
5836 int dest_uid = 0;
5837 struct far_branch *bp;
5838
5839 if (type == TYPE_JUMP)
5840 {
5841 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
5842 dest_uid = get_dest_uid (far_label, max_uid);
5843 if (! dest_uid)
5844 {
5845 /* Parse errors can lead to labels outside
5846 the insn stream. */
5847 if (! NEXT_INSN (far_label))
5848 continue;
5849
5850 if (! optimize)
5851 {
5852 JUMP_LABEL (insn) = far_label;
5853 LABEL_NUSES (far_label)++;
5854 }
5855 redirect_jump (insn, NULL_RTX, 1);
5856 far_label = 0;
5857 }
5858 }
5859 bp = uid_branch[dest_uid];
5860 if (! bp)
5861 {
5862 bp = (struct far_branch *) alloca (sizeof *bp);
5863 uid_branch[dest_uid] = bp;
5864 bp->prev = far_branch_list;
5865 far_branch_list = bp;
5866 bp->near_label = 0;
5867 bp->far_label = far_label;
5868 if (far_label)
5869 LABEL_NUSES (far_label)++;
5870 }
5871 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
5872 if (addr - bp->address <= CONDJUMP_MAX)
5873 emit_label_after (bp->near_label, PREV_INSN (insn));
5874 else
5875 {
5876 gen_far_branch (bp);
5877 bp->near_label = 0;
5878 }
5879 else
5880 bp->near_label = 0;
5881 bp->address = addr;
5882 bp->insert_place = insn;
5883 if (! far_label)
5884 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
5885 else
5886 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
5887 }
5888 }
5889 /* Generate all pending far branches,
5890 and free our references to the far labels. */
5891 while (far_branch_list)
5892 {
5893 if (far_branch_list->near_label
5894 && ! NEXT_INSN (far_branch_list->near_label))
5895 gen_far_branch (far_branch_list);
5896 if (optimize
5897 && far_branch_list->far_label
5898 && ! --LABEL_NUSES (far_branch_list->far_label))
5899 delete_insn (far_branch_list->far_label);
5900 far_branch_list = far_branch_list->prev;
5901 }
5902
5903 /* Instruction length information is no longer valid due to the new
5904 instructions that have been generated. */
5905 init_insn_lengths ();
5906 }
5907
5908 /* Dump out instruction addresses, which is useful for debugging the
5909 constant pool table stuff.
5910
5911 If relaxing, output the label and pseudo-ops used to link together
5912 calls and the instruction which set the registers. */
5913
5914 /* ??? The addresses printed by this routine for insns are nonsense for
5915 insns which are inside of a sequence where none of the inner insns have
5916 variable length. This is because the second pass of shorten_branches
5917 does not bother to update them. */
5918
5919 void
5920 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
5921 int noperands ATTRIBUTE_UNUSED)
5922 {
5923 if (TARGET_DUMPISIZE)
5924 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
5925
5926 if (TARGET_RELAX)
5927 {
5928 rtx note;
5929
5930 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
5931 if (note)
5932 {
5933 rtx pattern;
5934
5935 pattern = PATTERN (insn);
5936 if (GET_CODE (pattern) == PARALLEL)
5937 pattern = XVECEXP (pattern, 0, 0);
5938 switch (GET_CODE (pattern))
5939 {
5940 case SET:
5941 if (GET_CODE (SET_SRC (pattern)) != CALL
5942 && get_attr_type (insn) != TYPE_SFUNC)
5943 {
5944 targetm.asm_out.internal_label
5945 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
5946 break;
5947 }
5948 /* else FALLTHROUGH */
5949 case CALL:
5950 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
5951 CODE_LABEL_NUMBER (XEXP (note, 0)));
5952 break;
5953
5954 default:
5955 gcc_unreachable ();
5956 }
5957 }
5958 }
5959 }
5960
5961 /* Dump out any constants accumulated in the final pass. These will
5962 only be labels. */
5963
5964 const char *
5965 output_jump_label_table (void)
5966 {
5967 int i;
5968
5969 if (pool_size)
5970 {
5971 fprintf (asm_out_file, "\t.align 2\n");
5972 for (i = 0; i < pool_size; i++)
5973 {
5974 pool_node *p = &pool_vector[i];
5975
5976 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5977 CODE_LABEL_NUMBER (p->label));
5978 output_asm_insn (".long %O0", &p->value);
5979 }
5980 pool_size = 0;
5981 }
5982
5983 return "";
5984 }
5985 \f
5986 /* A full frame looks like:
5987
5988 arg-5
5989 arg-4
5990 [ if current_function_anonymous_args
5991 arg-3
5992 arg-2
5993 arg-1
5994 arg-0 ]
5995 saved-fp
5996 saved-r10
5997 saved-r11
5998 saved-r12
5999 saved-pr
6000 local-n
6001 ..
6002 local-1
6003 local-0 <- fp points here. */
6004
6005 /* Number of bytes pushed for anonymous args, used to pass information
6006 between expand_prologue and expand_epilogue. */
6007
6008 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6009 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6010 for an epilogue and a negative value means that it's for a sibcall
6011 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6012 all the registers that are about to be restored, and hence dead. */
6013
6014 static void
6015 output_stack_adjust (int size, rtx reg, int epilogue_p,
6016 HARD_REG_SET *live_regs_mask)
6017 {
6018 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
6019 if (size)
6020 {
6021 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6022
6023 /* This test is bogus, as output_stack_adjust is used to re-align the
6024 stack. */
6025 #if 0
6026 gcc_assert (!(size % align));
6027 #endif
6028
6029 if (CONST_OK_FOR_ADD (size))
6030 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6031 /* Try to do it with two partial adjustments; however, we must make
6032 sure that the stack is properly aligned at all times, in case
6033 an interrupt occurs between the two partial adjustments. */
6034 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6035 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6036 {
6037 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6038 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6039 }
6040 else
6041 {
6042 rtx const_reg;
6043 rtx insn;
6044 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6045 int i;
6046
6047 /* If TEMP is invalid, we could temporarily save a general
6048 register to MACL. However, there is currently no need
6049 to handle this case, so just die when we see it. */
6050 if (epilogue_p < 0
6051 || current_function_interrupt
6052 || ! call_really_used_regs[temp] || fixed_regs[temp])
6053 temp = -1;
6054 if (temp < 0 && ! current_function_interrupt
6055 && (TARGET_SHMEDIA || epilogue_p >= 0))
6056 {
6057 HARD_REG_SET temps;
6058 COPY_HARD_REG_SET (temps, call_used_reg_set);
6059 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6060 if (epilogue_p > 0)
6061 {
6062 int nreg = 0;
6063 if (crtl->return_rtx)
6064 {
6065 enum machine_mode mode;
6066 mode = GET_MODE (crtl->return_rtx);
6067 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6068 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6069 }
6070 for (i = 0; i < nreg; i++)
6071 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6072 if (crtl->calls_eh_return)
6073 {
6074 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6075 for (i = 0; i <= 3; i++)
6076 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6077 }
6078 }
6079 if (TARGET_SHMEDIA && epilogue_p < 0)
6080 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
6081 CLEAR_HARD_REG_BIT (temps, i);
6082 if (epilogue_p <= 0)
6083 {
6084 for (i = FIRST_PARM_REG;
6085 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6086 CLEAR_HARD_REG_BIT (temps, i);
6087 if (cfun->static_chain_decl != NULL)
6088 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6089 }
6090 temp = scavenge_reg (&temps);
6091 }
6092 if (temp < 0 && live_regs_mask)
6093 {
6094 HARD_REG_SET temps;
6095
6096 COPY_HARD_REG_SET (temps, *live_regs_mask);
6097 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6098 temp = scavenge_reg (&temps);
6099 }
6100 if (temp < 0)
6101 {
6102 rtx adj_reg, tmp_reg, mem;
6103
6104 /* If we reached here, the most likely case is the (sibcall)
6105 epilogue for non SHmedia. Put a special push/pop sequence
6106 for such case as the last resort. This looks lengthy but
6107 would not be problem because it seems to be very
6108 rare. */
6109
6110 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
6111
6112
6113 /* ??? There is still the slight possibility that r4 or
6114 r5 have been reserved as fixed registers or assigned
6115 as global registers, and they change during an
6116 interrupt. There are possible ways to handle this:
6117
6118 - If we are adjusting the frame pointer (r14), we can do
6119 with a single temp register and an ordinary push / pop
6120 on the stack.
6121 - Grab any call-used or call-saved registers (i.e. not
6122 fixed or globals) for the temps we need. We might
6123 also grab r14 if we are adjusting the stack pointer.
6124 If we can't find enough available registers, issue
6125 a diagnostic and die - the user must have reserved
6126 way too many registers.
6127 But since all this is rather unlikely to happen and
6128 would require extra testing, we just die if r4 / r5
6129 are not available. */
6130 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6131 && !global_regs[4] && !global_regs[5]);
6132
6133 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6134 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6135 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6136 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6137 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6138 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6139 emit_move_insn (mem, tmp_reg);
6140 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6141 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6142 emit_move_insn (mem, tmp_reg);
6143 emit_move_insn (reg, adj_reg);
6144 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6145 emit_move_insn (adj_reg, mem);
6146 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6147 emit_move_insn (tmp_reg, mem);
6148 /* Tell flow the insns that pop r4/r5 aren't dead. */
6149 emit_use (tmp_reg);
6150 emit_use (adj_reg);
6151 return;
6152 }
6153 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6154
6155 /* If SIZE is negative, subtract the positive value.
6156 This sometimes allows a constant pool entry to be shared
6157 between prologue and epilogue code. */
6158 if (size < 0)
6159 {
6160 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6161 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6162 }
6163 else
6164 {
6165 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6166 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6167 }
6168 if (! epilogue_p)
6169 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6170 gen_rtx_SET (VOIDmode, reg,
6171 gen_rtx_PLUS (SImode, reg,
6172 GEN_INT (size))));
6173 }
6174 }
6175 }
6176
6177 static rtx
6178 frame_insn (rtx x)
6179 {
6180 x = emit_insn (x);
6181 RTX_FRAME_RELATED_P (x) = 1;
6182 return x;
6183 }
6184
6185 /* Output RTL to push register RN onto the stack. */
6186
6187 static rtx
6188 push (int rn)
6189 {
6190 rtx x;
6191 if (rn == FPUL_REG)
6192 x = gen_push_fpul ();
6193 else if (rn == FPSCR_REG)
6194 x = gen_push_fpscr ();
6195 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6196 && FP_OR_XD_REGISTER_P (rn))
6197 {
6198 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6199 return NULL_RTX;
6200 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6201 }
6202 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6203 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6204 else
6205 x = gen_push (gen_rtx_REG (SImode, rn));
6206
6207 x = frame_insn (x);
6208 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6209 return x;
6210 }
6211
6212 /* Output RTL to pop register RN from the stack. */
6213
6214 static void
6215 pop (int rn)
6216 {
6217 rtx x;
6218 if (rn == FPUL_REG)
6219 x = gen_pop_fpul ();
6220 else if (rn == FPSCR_REG)
6221 x = gen_pop_fpscr ();
6222 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6223 && FP_OR_XD_REGISTER_P (rn))
6224 {
6225 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6226 return;
6227 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6228 }
6229 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6230 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6231 else
6232 x = gen_pop (gen_rtx_REG (SImode, rn));
6233
6234 x = emit_insn (x);
6235 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6236 }
6237
6238 /* Generate code to push the regs specified in the mask. */
6239
6240 static void
6241 push_regs (HARD_REG_SET *mask, int interrupt_handler)
6242 {
6243 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
6244 int skip_fpscr = 0;
6245
6246 /* Push PR last; this gives better latencies after the prologue, and
6247 candidates for the return delay slot when there are no general
6248 registers pushed. */
6249 for (; i < FIRST_PSEUDO_REGISTER; i++)
6250 {
6251 /* If this is an interrupt handler, and the SZ bit varies,
6252 and we have to push any floating point register, we need
6253 to switch to the correct precision first. */
6254 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
6255 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
6256 {
6257 HARD_REG_SET unsaved;
6258
6259 push (FPSCR_REG);
6260 COMPL_HARD_REG_SET (unsaved, *mask);
6261 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
6262 skip_fpscr = 1;
6263 }
6264 if (i != PR_REG
6265 && (i != FPSCR_REG || ! skip_fpscr)
6266 && TEST_HARD_REG_BIT (*mask, i))
6267 {
6268 /* If the ISR has RESBANK attribute assigned, don't push any of
6269 the following registers - R0-R14, MACH, MACL and GBR. */
6270 if (! (sh_cfun_resbank_handler_p ()
6271 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
6272 || i == MACH_REG
6273 || i == MACL_REG
6274 || i == GBR_REG)))
6275 push (i);
6276 }
6277 }
6278
6279 /* Push banked registers last to improve delay slot opportunities. */
6280 if (interrupt_handler)
6281 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6282 if (TEST_HARD_REG_BIT (*mask, i))
6283 push (i);
6284
6285 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
6286 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
6287 push (PR_REG);
6288 }
6289
6290 /* Calculate how much extra space is needed to save all callee-saved
6291 target registers.
6292 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6293
6294 static int
6295 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
6296 {
6297 int reg;
6298 int stack_space = 0;
6299 int interrupt_handler = sh_cfun_interrupt_handler_p ();
6300
6301 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6302 if ((! call_really_used_regs[reg] || interrupt_handler)
6303 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6304 /* Leave space to save this target register on the stack,
6305 in case target register allocation wants to use it. */
6306 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6307 return stack_space;
6308 }
6309
6310 /* Decide whether we should reserve space for callee-save target registers,
6311 in case target register allocation wants to use them. REGS_SAVED is
6312 the space, in bytes, that is already required for register saves.
6313 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6314
6315 static int
6316 shmedia_reserve_space_for_target_registers_p (int regs_saved,
6317 HARD_REG_SET *live_regs_mask)
6318 {
6319 if (optimize_size)
6320 return 0;
6321 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
6322 }
6323
6324 /* Decide how much space to reserve for callee-save target registers
6325 in case target register allocation wants to use them.
6326 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6327
6328 static int
6329 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
6330 {
6331 if (shmedia_space_reserved_for_target_registers)
6332 return shmedia_target_regs_stack_space (live_regs_mask);
6333 else
6334 return 0;
6335 }
6336
6337 /* Work out the registers which need to be saved, both as a mask and a
6338 count of saved words. Return the count.
6339
6340 If doing a pragma interrupt function, then push all regs used by the
6341 function, and if we call another function (we can tell by looking at PR),
6342 make sure that all the regs it clobbers are safe too. */
6343
6344 static int
6345 calc_live_regs (HARD_REG_SET *live_regs_mask)
6346 {
6347 unsigned int reg;
6348 int count;
6349 tree attrs;
6350 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
6351 bool nosave_low_regs;
6352 int pr_live, has_call;
6353
6354 attrs = DECL_ATTRIBUTES (current_function_decl);
6355 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
6356 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
6357 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
6358 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
6359
6360 CLEAR_HARD_REG_SET (*live_regs_mask);
6361 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
6362 && df_regs_ever_live_p (FPSCR_REG))
6363 target_flags &= ~MASK_FPU_SINGLE;
6364 /* If we can save a lot of saves by switching to double mode, do that. */
6365 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
6366 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
6367 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
6368 && (! call_really_used_regs[reg]
6369 || interrupt_handler)
6370 && ++count > 2)
6371 {
6372 target_flags &= ~MASK_FPU_SINGLE;
6373 break;
6374 }
6375 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
6376 knows how to use it. That means the pseudo originally allocated for
6377 the initial value can become the PR_MEDIA_REG hard register, as seen for
6378 execute/20010122-1.c:test9. */
6379 if (TARGET_SHMEDIA)
6380 /* ??? this function is called from initial_elimination_offset, hence we
6381 can't use the result of sh_media_register_for_return here. */
6382 pr_live = sh_pr_n_sets ();
6383 else
6384 {
6385 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
6386 pr_live = (pr_initial
6387 ? (!REG_P (pr_initial)
6388 || REGNO (pr_initial) != (PR_REG))
6389 : df_regs_ever_live_p (PR_REG));
6390 /* For Shcompact, if not optimizing, we end up with a memory reference
6391 using the return address pointer for __builtin_return_address even
6392 though there is no actual need to put the PR register on the stack. */
6393 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
6394 }
6395 /* Force PR to be live if the prologue has to call the SHmedia
6396 argument decoder or register saver. */
6397 if (TARGET_SHCOMPACT
6398 && ((crtl->args.info.call_cookie
6399 & ~ CALL_COOKIE_RET_TRAMP (1))
6400 || crtl->saves_all_registers))
6401 pr_live = 1;
6402 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
6403 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
6404 {
6405 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
6406 ? pr_live
6407 : interrupt_handler
6408 ? (/* Need to save all the regs ever live. */
6409 (df_regs_ever_live_p (reg)
6410 || (call_really_used_regs[reg]
6411 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
6412 || reg == PIC_OFFSET_TABLE_REGNUM)
6413 && has_call)
6414 || (TARGET_SHMEDIA && has_call
6415 && REGISTER_NATURAL_MODE (reg) == SImode
6416 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
6417 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
6418 && reg != RETURN_ADDRESS_POINTER_REGNUM
6419 && reg != T_REG && reg != GBR_REG
6420 /* Push fpscr only on targets which have FPU */
6421 && (reg != FPSCR_REG || TARGET_FPU_ANY))
6422 : (/* Only push those regs which are used and need to be saved. */
6423 (TARGET_SHCOMPACT
6424 && flag_pic
6425 && crtl->args.info.call_cookie
6426 && reg == PIC_OFFSET_TABLE_REGNUM)
6427 || (df_regs_ever_live_p (reg)
6428 && ((!call_really_used_regs[reg]
6429 && !(reg != PIC_OFFSET_TABLE_REGNUM
6430 && fixed_regs[reg] && call_used_regs[reg]))
6431 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
6432 || (crtl->calls_eh_return
6433 && (reg == EH_RETURN_DATA_REGNO (0)
6434 || reg == EH_RETURN_DATA_REGNO (1)
6435 || reg == EH_RETURN_DATA_REGNO (2)
6436 || reg == EH_RETURN_DATA_REGNO (3)))
6437 || ((reg == MACL_REG || reg == MACH_REG)
6438 && df_regs_ever_live_p (reg)
6439 && sh_cfun_attr_renesas_p ())
6440 ))
6441 {
6442 SET_HARD_REG_BIT (*live_regs_mask, reg);
6443 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6444
6445 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
6446 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
6447 {
6448 if (FP_REGISTER_P (reg))
6449 {
6450 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
6451 {
6452 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
6453 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
6454 }
6455 }
6456 else if (XD_REGISTER_P (reg))
6457 {
6458 /* Must switch to double mode to access these registers. */
6459 target_flags &= ~MASK_FPU_SINGLE;
6460 }
6461 }
6462 }
6463 if (nosave_low_regs && reg == R8_REG)
6464 break;
6465 }
6466 /* If we have a target register optimization pass after prologue / epilogue
6467 threading, we need to assume all target registers will be live even if
6468 they aren't now. */
6469 if (flag_branch_target_load_optimize2
6470 && TARGET_SAVE_ALL_TARGET_REGS
6471 && shmedia_space_reserved_for_target_registers)
6472 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6473 if ((! call_really_used_regs[reg] || interrupt_handler)
6474 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6475 {
6476 SET_HARD_REG_BIT (*live_regs_mask, reg);
6477 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6478 }
6479 /* If this is an interrupt handler, we don't have any call-clobbered
6480 registers we can conveniently use for target register save/restore.
6481 Make sure we save at least one general purpose register when we need
6482 to save target registers. */
6483 if (interrupt_handler
6484 && hard_reg_set_intersect_p (*live_regs_mask,
6485 reg_class_contents[TARGET_REGS])
6486 && ! hard_reg_set_intersect_p (*live_regs_mask,
6487 reg_class_contents[GENERAL_REGS]))
6488 {
6489 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
6490 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
6491 }
6492
6493 return count;
6494 }
6495
6496 /* Code to generate prologue and epilogue sequences */
6497
6498 /* PUSHED is the number of bytes that are being pushed on the
6499 stack for register saves. Return the frame size, padded
6500 appropriately so that the stack stays properly aligned. */
6501 static HOST_WIDE_INT
6502 rounded_frame_size (int pushed)
6503 {
6504 HOST_WIDE_INT size = get_frame_size ();
6505 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6506
6507 return ((size + pushed + align - 1) & -align) - pushed;
6508 }
6509
6510 /* Choose a call-clobbered target-branch register that remains
6511 unchanged along the whole function. We set it up as the return
6512 value in the prologue. */
6513 int
6514 sh_media_register_for_return (void)
6515 {
6516 int regno;
6517 int tr0_used;
6518
6519 if (! current_function_is_leaf)
6520 return -1;
6521 if (lookup_attribute ("interrupt_handler",
6522 DECL_ATTRIBUTES (current_function_decl)))
6523 return -1;
6524 if (sh_cfun_interrupt_handler_p ())
6525 return -1;
6526
6527 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
6528
6529 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
6530 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
6531 return regno;
6532
6533 return -1;
6534 }
6535
6536 /* The maximum registers we need to save are:
6537 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
6538 - 32 floating point registers (for each pair, we save none,
6539 one single precision value, or a double precision value).
6540 - 8 target registers
6541 - add 1 entry for a delimiter. */
6542 #define MAX_SAVED_REGS (62+32+8)
6543
6544 typedef struct save_entry_s
6545 {
6546 unsigned char reg;
6547 unsigned char mode;
6548 short offset;
6549 } save_entry;
6550
6551 #define MAX_TEMPS 4
6552
6553 /* There will be a delimiter entry with VOIDmode both at the start and the
6554 end of a filled in schedule. The end delimiter has the offset of the
6555 save with the smallest (i.e. most negative) offset. */
6556 typedef struct save_schedule_s
6557 {
6558 save_entry entries[MAX_SAVED_REGS + 2];
6559 int temps[MAX_TEMPS+1];
6560 } save_schedule;
6561
6562 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
6563 use reverse order. Returns the last entry written to (not counting
6564 the delimiter). OFFSET_BASE is a number to be added to all offset
6565 entries. */
6566
6567 static save_entry *
6568 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
6569 int offset_base)
6570 {
6571 int align, i;
6572 save_entry *entry = schedule->entries;
6573 int tmpx = 0;
6574 int offset;
6575
6576 if (! current_function_interrupt)
6577 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
6578 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
6579 && ! FUNCTION_ARG_REGNO_P (i)
6580 && i != FIRST_RET_REG
6581 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
6582 && ! (crtl->calls_eh_return
6583 && (i == EH_RETURN_STACKADJ_REGNO
6584 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
6585 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
6586 schedule->temps[tmpx++] = i;
6587 entry->reg = -1;
6588 entry->mode = VOIDmode;
6589 entry->offset = offset_base;
6590 entry++;
6591 /* We loop twice: first, we save 8-byte aligned registers in the
6592 higher addresses, that are known to be aligned. Then, we
6593 proceed to saving 32-bit registers that don't need 8-byte
6594 alignment.
6595 If this is an interrupt function, all registers that need saving
6596 need to be saved in full. moreover, we need to postpone saving
6597 target registers till we have saved some general purpose registers
6598 we can then use as scratch registers. */
6599 offset = offset_base;
6600 for (align = 1; align >= 0; align--)
6601 {
6602 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6603 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6604 {
6605 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6606 int reg = i;
6607
6608 if (current_function_interrupt)
6609 {
6610 if (TARGET_REGISTER_P (i))
6611 continue;
6612 if (GENERAL_REGISTER_P (i))
6613 mode = DImode;
6614 }
6615 if (mode == SFmode && (i % 2) == 1
6616 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6617 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
6618 {
6619 mode = DFmode;
6620 i--;
6621 reg--;
6622 }
6623
6624 /* If we're doing the aligned pass and this is not aligned,
6625 or we're doing the unaligned pass and this is aligned,
6626 skip it. */
6627 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
6628 != align)
6629 continue;
6630
6631 if (current_function_interrupt
6632 && GENERAL_REGISTER_P (i)
6633 && tmpx < MAX_TEMPS)
6634 schedule->temps[tmpx++] = i;
6635
6636 offset -= GET_MODE_SIZE (mode);
6637 entry->reg = i;
6638 entry->mode = mode;
6639 entry->offset = offset;
6640 entry++;
6641 }
6642 if (align && current_function_interrupt)
6643 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
6644 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6645 {
6646 offset -= GET_MODE_SIZE (DImode);
6647 entry->reg = i;
6648 entry->mode = DImode;
6649 entry->offset = offset;
6650 entry++;
6651 }
6652 }
6653 entry->reg = -1;
6654 entry->mode = VOIDmode;
6655 entry->offset = offset;
6656 schedule->temps[tmpx] = -1;
6657 return entry - 1;
6658 }
6659
6660 void
6661 sh_expand_prologue (void)
6662 {
6663 HARD_REG_SET live_regs_mask;
6664 int d, i;
6665 int d_rounding = 0;
6666 int save_flags = target_flags;
6667 int pretend_args;
6668 tree sp_switch_attr
6669 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
6670
6671 current_function_interrupt = sh_cfun_interrupt_handler_p ();
6672
6673 /* We have pretend args if we had an object sent partially in registers
6674 and partially on the stack, e.g. a large structure. */
6675 pretend_args = crtl->args.pretend_args_size;
6676 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
6677 && (NPARM_REGS(SImode)
6678 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
6679 pretend_args = 0;
6680 output_stack_adjust (-pretend_args
6681 - crtl->args.info.stack_regs * 8,
6682 stack_pointer_rtx, 0, NULL);
6683
6684 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
6685 /* We're going to use the PIC register to load the address of the
6686 incoming-argument decoder and/or of the return trampoline from
6687 the GOT, so make sure the PIC register is preserved and
6688 initialized. */
6689 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
6690
6691 if (TARGET_SHCOMPACT
6692 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6693 {
6694 int reg;
6695
6696 /* First, make all registers with incoming arguments that will
6697 be pushed onto the stack live, so that register renaming
6698 doesn't overwrite them. */
6699 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
6700 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
6701 >= NPARM_REGS (SImode) - reg)
6702 for (; reg < NPARM_REGS (SImode); reg++)
6703 emit_insn (gen_shcompact_preserve_incoming_args
6704 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6705 else if (CALL_COOKIE_INT_REG_GET
6706 (crtl->args.info.call_cookie, reg) == 1)
6707 emit_insn (gen_shcompact_preserve_incoming_args
6708 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6709
6710 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
6711 stack_pointer_rtx);
6712 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
6713 GEN_INT (crtl->args.info.call_cookie));
6714 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
6715 gen_rtx_REG (SImode, R0_REG));
6716 }
6717 else if (TARGET_SHMEDIA)
6718 {
6719 int tr = sh_media_register_for_return ();
6720
6721 if (tr >= 0)
6722 emit_move_insn (gen_rtx_REG (DImode, tr),
6723 gen_rtx_REG (DImode, PR_MEDIA_REG));
6724 }
6725
6726 /* Emit the code for SETUP_VARARGS. */
6727 if (cfun->stdarg)
6728 {
6729 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6730 {
6731 /* Push arg regs as if they'd been provided by caller in stack. */
6732 for (i = 0; i < NPARM_REGS(SImode); i++)
6733 {
6734 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
6735 rtx insn;
6736
6737 if (i >= (NPARM_REGS(SImode)
6738 - crtl->args.info.arg_count[(int) SH_ARG_INT]
6739 ))
6740 break;
6741 insn = push (rn);
6742 }
6743 }
6744 }
6745
6746 /* If we're supposed to switch stacks at function entry, do so now. */
6747 if (sp_switch_attr)
6748 {
6749 rtx lab, newsrc;
6750 /* The argument specifies a variable holding the address of the
6751 stack the interrupt function should switch to/from at entry/exit. */
6752 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
6753 const char *s
6754 = ggc_strdup (TREE_STRING_POINTER (arg));
6755 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
6756
6757 lab = add_constant (sp_switch, SImode, 0);
6758 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6759 newsrc = gen_const_mem (SImode, newsrc);
6760
6761 emit_insn (gen_sp_switch_1 (newsrc));
6762 }
6763
6764 d = calc_live_regs (&live_regs_mask);
6765 /* ??? Maybe we could save some switching if we can move a mode switch
6766 that already happens to be at the function start into the prologue. */
6767 if (target_flags != save_flags && ! current_function_interrupt)
6768 emit_insn (gen_toggle_sz ());
6769
6770 if (TARGET_SH5)
6771 {
6772 int offset_base, offset;
6773 rtx r0 = NULL_RTX;
6774 int offset_in_r0 = -1;
6775 int sp_in_r0 = 0;
6776 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6777 int total_size, save_size;
6778 save_schedule schedule;
6779 save_entry *entry;
6780 int *tmp_pnt;
6781
6782 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
6783 && ! current_function_interrupt)
6784 r0 = gen_rtx_REG (Pmode, R0_REG);
6785
6786 /* D is the actual number of bytes that we need for saving registers,
6787 however, in initial_elimination_offset we have committed to using
6788 an additional TREGS_SPACE amount of bytes - in order to keep both
6789 addresses to arguments supplied by the caller and local variables
6790 valid, we must keep this gap. Place it between the incoming
6791 arguments and the actually saved registers in a bid to optimize
6792 locality of reference. */
6793 total_size = d + tregs_space;
6794 total_size += rounded_frame_size (total_size);
6795 save_size = total_size - rounded_frame_size (d);
6796 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
6797 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6798 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
6799
6800 /* If adjusting the stack in a single step costs nothing extra, do so.
6801 I.e. either if a single addi is enough, or we need a movi anyway,
6802 and we don't exceed the maximum offset range (the test for the
6803 latter is conservative for simplicity). */
6804 if (TARGET_SHMEDIA
6805 && (CONST_OK_FOR_I10 (-total_size)
6806 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
6807 && total_size <= 2044)))
6808 d_rounding = total_size - save_size;
6809
6810 offset_base = d + d_rounding;
6811
6812 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
6813 0, NULL);
6814
6815 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
6816 tmp_pnt = schedule.temps;
6817 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
6818 {
6819 enum machine_mode mode = (enum machine_mode) entry->mode;
6820 unsigned int reg = entry->reg;
6821 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
6822 rtx orig_reg_rtx;
6823
6824 offset = entry->offset;
6825
6826 reg_rtx = gen_rtx_REG (mode, reg);
6827
6828 mem_rtx = gen_frame_mem (mode,
6829 gen_rtx_PLUS (Pmode,
6830 stack_pointer_rtx,
6831 GEN_INT (offset)));
6832
6833 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
6834 {
6835 gcc_assert (r0);
6836 mem_rtx = NULL_RTX;
6837 }
6838
6839 if (HAVE_PRE_DECREMENT
6840 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
6841 || mem_rtx == NULL_RTX
6842 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6843 {
6844 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
6845
6846 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
6847 pre_dec = NULL_RTX;
6848 else
6849 {
6850 mem_rtx = NULL_RTX;
6851 offset += GET_MODE_SIZE (mode);
6852 }
6853 }
6854
6855 if (mem_rtx != NULL_RTX)
6856 goto addr_ok;
6857
6858 if (offset_in_r0 == -1)
6859 {
6860 emit_move_insn (r0, GEN_INT (offset));
6861 offset_in_r0 = offset;
6862 }
6863 else if (offset != offset_in_r0)
6864 {
6865 emit_move_insn (r0,
6866 gen_rtx_PLUS
6867 (Pmode, r0,
6868 GEN_INT (offset - offset_in_r0)));
6869 offset_in_r0 += offset - offset_in_r0;
6870 }
6871
6872 if (pre_dec != NULL_RTX)
6873 {
6874 if (! sp_in_r0)
6875 {
6876 emit_move_insn (r0,
6877 gen_rtx_PLUS
6878 (Pmode, r0, stack_pointer_rtx));
6879 sp_in_r0 = 1;
6880 }
6881
6882 offset -= GET_MODE_SIZE (mode);
6883 offset_in_r0 -= GET_MODE_SIZE (mode);
6884
6885 mem_rtx = pre_dec;
6886 }
6887 else if (sp_in_r0)
6888 mem_rtx = gen_frame_mem (mode, r0);
6889 else
6890 mem_rtx = gen_frame_mem (mode,
6891 gen_rtx_PLUS (Pmode,
6892 stack_pointer_rtx,
6893 r0));
6894
6895 /* We must not use an r0-based address for target-branch
6896 registers or for special registers without pre-dec
6897 memory addresses, since we store their values in r0
6898 first. */
6899 gcc_assert (!TARGET_REGISTER_P (reg)
6900 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6901 || mem_rtx == pre_dec));
6902
6903 addr_ok:
6904 orig_reg_rtx = reg_rtx;
6905 if (TARGET_REGISTER_P (reg)
6906 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6907 && mem_rtx != pre_dec))
6908 {
6909 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
6910
6911 emit_move_insn (tmp_reg, reg_rtx);
6912
6913 if (REGNO (tmp_reg) == R0_REG)
6914 {
6915 offset_in_r0 = -1;
6916 sp_in_r0 = 0;
6917 gcc_assert (!refers_to_regno_p
6918 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
6919 }
6920
6921 if (*++tmp_pnt <= 0)
6922 tmp_pnt = schedule.temps;
6923
6924 reg_rtx = tmp_reg;
6925 }
6926 {
6927 rtx insn;
6928
6929 /* Mark as interesting for dwarf cfi generator */
6930 insn = emit_move_insn (mem_rtx, reg_rtx);
6931 RTX_FRAME_RELATED_P (insn) = 1;
6932 /* If we use an intermediate register for the save, we can't
6933 describe this exactly in cfi as a copy of the to-be-saved
6934 register into the temporary register and then the temporary
6935 register on the stack, because the temporary register can
6936 have a different natural size than the to-be-saved register.
6937 Thus, we gloss over the intermediate copy and pretend we do
6938 a direct save from the to-be-saved register. */
6939 if (REGNO (reg_rtx) != reg)
6940 {
6941 rtx set;
6942
6943 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
6944 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
6945 }
6946
6947 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
6948 {
6949 rtx reg_rtx = gen_rtx_REG (mode, reg);
6950 rtx set;
6951 rtx mem_rtx = gen_frame_mem (mode,
6952 gen_rtx_PLUS (Pmode,
6953 stack_pointer_rtx,
6954 GEN_INT (offset)));
6955
6956 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
6957 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
6958 }
6959 }
6960 }
6961
6962 gcc_assert (entry->offset == d_rounding);
6963 }
6964 else
6965 push_regs (&live_regs_mask, current_function_interrupt);
6966
6967 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
6968 emit_insn (gen_GOTaddr2picreg ());
6969
6970 if (SHMEDIA_REGS_STACK_ADJUST ())
6971 {
6972 /* This must NOT go through the PLT, otherwise mach and macl
6973 may be clobbered. */
6974 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6975 (TARGET_FPU_ANY
6976 ? "__GCC_push_shmedia_regs"
6977 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
6978 emit_insn (gen_shmedia_save_restore_regs_compact
6979 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
6980 }
6981
6982 if (target_flags != save_flags && ! current_function_interrupt)
6983 emit_insn (gen_toggle_sz ());
6984
6985 target_flags = save_flags;
6986
6987 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
6988 stack_pointer_rtx, 0, NULL);
6989
6990 if (frame_pointer_needed)
6991 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
6992
6993 if (TARGET_SHCOMPACT
6994 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6995 {
6996 /* This must NOT go through the PLT, otherwise mach and macl
6997 may be clobbered. */
6998 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6999 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7000 emit_insn (gen_shcompact_incoming_args ());
7001 }
7002 }
7003
7004 void
7005 sh_expand_epilogue (bool sibcall_p)
7006 {
7007 HARD_REG_SET live_regs_mask;
7008 int d, i;
7009 int d_rounding = 0;
7010
7011 int save_flags = target_flags;
7012 int frame_size, save_size;
7013 int fpscr_deferred = 0;
7014 int e = sibcall_p ? -1 : 1;
7015
7016 d = calc_live_regs (&live_regs_mask);
7017
7018 save_size = d;
7019 frame_size = rounded_frame_size (d);
7020
7021 if (TARGET_SH5)
7022 {
7023 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7024 int total_size;
7025 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
7026 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7027 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
7028
7029 total_size = d + tregs_space;
7030 total_size += rounded_frame_size (total_size);
7031 save_size = total_size - frame_size;
7032
7033 /* If adjusting the stack in a single step costs nothing extra, do so.
7034 I.e. either if a single addi is enough, or we need a movi anyway,
7035 and we don't exceed the maximum offset range (the test for the
7036 latter is conservative for simplicity). */
7037 if (TARGET_SHMEDIA
7038 && ! frame_pointer_needed
7039 && (CONST_OK_FOR_I10 (total_size)
7040 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
7041 && total_size <= 2044)))
7042 d_rounding = frame_size;
7043
7044 frame_size -= d_rounding;
7045 }
7046
7047 if (frame_pointer_needed)
7048 {
7049 /* We must avoid scheduling the epilogue with previous basic blocks.
7050 See PR/18032 and PR/40313. */
7051 emit_insn (gen_blockage ());
7052 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7053 &live_regs_mask);
7054
7055 /* We must avoid moving the stack pointer adjustment past code
7056 which reads from the local frame, else an interrupt could
7057 occur after the SP adjustment and clobber data in the local
7058 frame. */
7059 emit_insn (gen_blockage ());
7060 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7061 }
7062 else if (frame_size)
7063 {
7064 /* We must avoid moving the stack pointer adjustment past code
7065 which reads from the local frame, else an interrupt could
7066 occur after the SP adjustment and clobber data in the local
7067 frame. */
7068 emit_insn (gen_blockage ());
7069 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
7070 }
7071
7072 if (SHMEDIA_REGS_STACK_ADJUST ())
7073 {
7074 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7075 (TARGET_FPU_ANY
7076 ? "__GCC_pop_shmedia_regs"
7077 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
7078 /* This must NOT go through the PLT, otherwise mach and macl
7079 may be clobbered. */
7080 emit_insn (gen_shmedia_save_restore_regs_compact
7081 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
7082 }
7083
7084 /* Pop all the registers. */
7085
7086 if (target_flags != save_flags && ! current_function_interrupt)
7087 emit_insn (gen_toggle_sz ());
7088 if (TARGET_SH5)
7089 {
7090 int offset_base, offset;
7091 int offset_in_r0 = -1;
7092 int sp_in_r0 = 0;
7093 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
7094 save_schedule schedule;
7095 save_entry *entry;
7096 int *tmp_pnt;
7097
7098 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
7099 offset_base = -entry[1].offset + d_rounding;
7100 tmp_pnt = schedule.temps;
7101 for (; entry->mode != VOIDmode; entry--)
7102 {
7103 enum machine_mode mode = (enum machine_mode) entry->mode;
7104 int reg = entry->reg;
7105 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
7106
7107 offset = offset_base + entry->offset;
7108 reg_rtx = gen_rtx_REG (mode, reg);
7109
7110 mem_rtx = gen_frame_mem (mode,
7111 gen_rtx_PLUS (Pmode,
7112 stack_pointer_rtx,
7113 GEN_INT (offset)));
7114
7115 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7116 mem_rtx = NULL_RTX;
7117
7118 if (HAVE_POST_INCREMENT
7119 && (offset == offset_in_r0
7120 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
7121 && mem_rtx == NULL_RTX)
7122 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7123 {
7124 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
7125
7126 if (!memory_address_p (mode, XEXP (post_inc, 0)))
7127 post_inc = NULL_RTX;
7128 else
7129 mem_rtx = NULL_RTX;
7130 }
7131
7132 if (mem_rtx != NULL_RTX)
7133 goto addr_ok;
7134
7135 if (offset_in_r0 == -1)
7136 {
7137 emit_move_insn (r0, GEN_INT (offset));
7138 offset_in_r0 = offset;
7139 }
7140 else if (offset != offset_in_r0)
7141 {
7142 emit_move_insn (r0,
7143 gen_rtx_PLUS
7144 (Pmode, r0,
7145 GEN_INT (offset - offset_in_r0)));
7146 offset_in_r0 += offset - offset_in_r0;
7147 }
7148
7149 if (post_inc != NULL_RTX)
7150 {
7151 if (! sp_in_r0)
7152 {
7153 emit_move_insn (r0,
7154 gen_rtx_PLUS
7155 (Pmode, r0, stack_pointer_rtx));
7156 sp_in_r0 = 1;
7157 }
7158
7159 mem_rtx = post_inc;
7160
7161 offset_in_r0 += GET_MODE_SIZE (mode);
7162 }
7163 else if (sp_in_r0)
7164 mem_rtx = gen_frame_mem (mode, r0);
7165 else
7166 mem_rtx = gen_frame_mem (mode,
7167 gen_rtx_PLUS (Pmode,
7168 stack_pointer_rtx,
7169 r0));
7170
7171 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7172 || mem_rtx == post_inc);
7173
7174 addr_ok:
7175 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7176 && mem_rtx != post_inc)
7177 {
7178 insn = emit_move_insn (r0, mem_rtx);
7179 mem_rtx = r0;
7180 }
7181 else if (TARGET_REGISTER_P (reg))
7182 {
7183 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
7184
7185 /* Give the scheduler a bit of freedom by using up to
7186 MAX_TEMPS registers in a round-robin fashion. */
7187 insn = emit_move_insn (tmp_reg, mem_rtx);
7188 mem_rtx = tmp_reg;
7189 if (*++tmp_pnt < 0)
7190 tmp_pnt = schedule.temps;
7191 }
7192
7193 insn = emit_move_insn (reg_rtx, mem_rtx);
7194 }
7195
7196 gcc_assert (entry->offset + offset_base == d + d_rounding);
7197 }
7198 else /* ! TARGET_SH5 */
7199 {
7200 int last_reg;
7201
7202 save_size = 0;
7203 /* For an ISR with RESBANK attribute assigned, don't pop PR
7204 register. */
7205 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
7206 && !sh_cfun_resbank_handler_p ())
7207 {
7208 if (!frame_pointer_needed)
7209 emit_insn (gen_blockage ());
7210 pop (PR_REG);
7211 }
7212
7213 /* Banked registers are poped first to avoid being scheduled in the
7214 delay slot. RTE switches banks before the ds instruction. */
7215 if (current_function_interrupt)
7216 {
7217 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7218 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7219 pop (LAST_BANKED_REG - i);
7220
7221 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
7222 }
7223 else
7224 last_reg = FIRST_PSEUDO_REGISTER;
7225
7226 for (i = 0; i < last_reg; i++)
7227 {
7228 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
7229
7230 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
7231 && hard_reg_set_intersect_p (live_regs_mask,
7232 reg_class_contents[DF_REGS]))
7233 fpscr_deferred = 1;
7234 /* For an ISR with RESBANK attribute assigned, don't pop
7235 following registers, R0-R14, MACH, MACL and GBR. */
7236 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
7237 && ! (sh_cfun_resbank_handler_p ()
7238 && ((j >= FIRST_GENERAL_REG
7239 && j < LAST_GENERAL_REG)
7240 || j == MACH_REG
7241 || j == MACL_REG
7242 || j == GBR_REG)))
7243 pop (j);
7244
7245 if (j == FIRST_FP_REG && fpscr_deferred)
7246 pop (FPSCR_REG);
7247 }
7248 }
7249 if (target_flags != save_flags && ! current_function_interrupt)
7250 emit_insn (gen_toggle_sz ());
7251 target_flags = save_flags;
7252
7253 output_stack_adjust (crtl->args.pretend_args_size
7254 + save_size + d_rounding
7255 + crtl->args.info.stack_regs * 8,
7256 stack_pointer_rtx, e, NULL);
7257
7258 if (crtl->calls_eh_return)
7259 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
7260 EH_RETURN_STACKADJ_RTX));
7261
7262 /* Switch back to the normal stack if necessary. */
7263 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
7264 emit_insn (gen_sp_switch_2 ());
7265
7266 /* Tell flow the insn that pops PR isn't dead. */
7267 /* PR_REG will never be live in SHmedia mode, and we don't need to
7268 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
7269 by the return pattern. */
7270 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7271 emit_use (gen_rtx_REG (SImode, PR_REG));
7272 }
7273
7274 static int sh_need_epilogue_known = 0;
7275
7276 int
7277 sh_need_epilogue (void)
7278 {
7279 if (! sh_need_epilogue_known)
7280 {
7281 rtx epilogue;
7282
7283 start_sequence ();
7284 sh_expand_epilogue (0);
7285 epilogue = get_insns ();
7286 end_sequence ();
7287 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
7288 }
7289 return sh_need_epilogue_known > 0;
7290 }
7291
7292 /* Emit code to change the current function's return address to RA.
7293 TEMP is available as a scratch register, if needed. */
7294
7295 void
7296 sh_set_return_address (rtx ra, rtx tmp)
7297 {
7298 HARD_REG_SET live_regs_mask;
7299 int d;
7300 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7301 int pr_offset;
7302
7303 d = calc_live_regs (&live_regs_mask);
7304
7305 /* If pr_reg isn't life, we can set it (or the register given in
7306 sh_media_register_for_return) directly. */
7307 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7308 {
7309 rtx rr;
7310
7311 if (TARGET_SHMEDIA)
7312 {
7313 int rr_regno = sh_media_register_for_return ();
7314
7315 if (rr_regno < 0)
7316 rr_regno = pr_reg;
7317
7318 rr = gen_rtx_REG (DImode, rr_regno);
7319 }
7320 else
7321 rr = gen_rtx_REG (SImode, pr_reg);
7322
7323 emit_insn (GEN_MOV (rr, ra));
7324 /* Tell flow the register for return isn't dead. */
7325 emit_use (rr);
7326 return;
7327 }
7328
7329 if (TARGET_SH5)
7330 {
7331 int offset;
7332 save_schedule schedule;
7333 save_entry *entry;
7334
7335 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
7336 offset = entry[1].offset;
7337 for (; entry->mode != VOIDmode; entry--)
7338 if (entry->reg == pr_reg)
7339 goto found;
7340
7341 /* We can't find pr register. */
7342 gcc_unreachable ();
7343
7344 found:
7345 offset = entry->offset - offset;
7346 pr_offset = (rounded_frame_size (d) + offset
7347 + SHMEDIA_REGS_STACK_ADJUST ());
7348 }
7349 else
7350 pr_offset = rounded_frame_size (d);
7351
7352 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
7353 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
7354
7355 tmp = gen_frame_mem (Pmode, tmp);
7356 emit_insn (GEN_MOV (tmp, ra));
7357 /* Tell this store isn't dead. */
7358 emit_use (tmp);
7359 }
7360
7361 /* Clear variables at function end. */
7362
7363 static void
7364 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
7365 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
7366 {
7367 sh_need_epilogue_known = 0;
7368 }
7369
7370 static rtx
7371 sh_builtin_saveregs (void)
7372 {
7373 /* First unnamed integer register. */
7374 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
7375 /* Number of integer registers we need to save. */
7376 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
7377 /* First unnamed SFmode float reg */
7378 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
7379 /* Number of SFmode float regs to save. */
7380 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
7381 rtx regbuf, fpregs;
7382 int bufsize, regno;
7383 alias_set_type alias_set;
7384
7385 if (TARGET_SH5)
7386 {
7387 if (n_intregs)
7388 {
7389 int pushregs = n_intregs;
7390
7391 while (pushregs < NPARM_REGS (SImode) - 1
7392 && (CALL_COOKIE_INT_REG_GET
7393 (crtl->args.info.call_cookie,
7394 NPARM_REGS (SImode) - pushregs)
7395 == 1))
7396 {
7397 crtl->args.info.call_cookie
7398 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7399 - pushregs, 1);
7400 pushregs++;
7401 }
7402
7403 if (pushregs == NPARM_REGS (SImode))
7404 crtl->args.info.call_cookie
7405 |= (CALL_COOKIE_INT_REG (0, 1)
7406 | CALL_COOKIE_STACKSEQ (pushregs - 1));
7407 else
7408 crtl->args.info.call_cookie
7409 |= CALL_COOKIE_STACKSEQ (pushregs);
7410
7411 crtl->args.pretend_args_size += 8 * n_intregs;
7412 }
7413 if (TARGET_SHCOMPACT)
7414 return const0_rtx;
7415 }
7416
7417 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
7418 {
7419 error ("__builtin_saveregs not supported by this subtarget");
7420 return const0_rtx;
7421 }
7422
7423 if (TARGET_SHMEDIA)
7424 n_floatregs = 0;
7425
7426 /* Allocate block of memory for the regs. */
7427 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
7428 Or can assign_stack_local accept a 0 SIZE argument? */
7429 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
7430
7431 if (TARGET_SHMEDIA)
7432 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
7433 else if (n_floatregs & 1)
7434 {
7435 rtx addr;
7436
7437 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7438 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
7439 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
7440 regbuf = change_address (regbuf, BLKmode, addr);
7441 }
7442 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
7443 {
7444 rtx addr, mask;
7445
7446 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7447 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
7448 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
7449 emit_insn (gen_andsi3 (addr, addr, mask));
7450 regbuf = change_address (regbuf, BLKmode, addr);
7451 }
7452 else
7453 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7454 alias_set = get_varargs_alias_set ();
7455 set_mem_alias_set (regbuf, alias_set);
7456
7457 /* Save int args.
7458 This is optimized to only save the regs that are necessary. Explicitly
7459 named args need not be saved. */
7460 if (n_intregs > 0)
7461 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7462 adjust_address (regbuf, BLKmode,
7463 n_floatregs * UNITS_PER_WORD),
7464 n_intregs);
7465
7466 if (TARGET_SHMEDIA)
7467 /* Return the address of the regbuf. */
7468 return XEXP (regbuf, 0);
7469
7470 /* Save float args.
7471 This is optimized to only save the regs that are necessary. Explicitly
7472 named args need not be saved.
7473 We explicitly build a pointer to the buffer because it halves the insn
7474 count when not optimizing (otherwise the pointer is built for each reg
7475 saved).
7476 We emit the moves in reverse order so that we can use predecrement. */
7477
7478 fpregs = copy_to_mode_reg (Pmode,
7479 plus_constant (XEXP (regbuf, 0),
7480 n_floatregs * UNITS_PER_WORD));
7481 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7482 {
7483 rtx mem;
7484 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7485 {
7486 emit_insn (gen_addsi3 (fpregs, fpregs,
7487 GEN_INT (-2 * UNITS_PER_WORD)));
7488 mem = change_address (regbuf, DFmode, fpregs);
7489 emit_move_insn (mem,
7490 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7491 }
7492 regno = first_floatreg;
7493 if (regno & 1)
7494 {
7495 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7496 mem = change_address (regbuf, SFmode, fpregs);
7497 emit_move_insn (mem,
7498 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
7499 - (TARGET_LITTLE_ENDIAN != 0)));
7500 }
7501 }
7502 else
7503 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7504 {
7505 rtx mem;
7506
7507 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7508 mem = change_address (regbuf, SFmode, fpregs);
7509 emit_move_insn (mem,
7510 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7511 }
7512
7513 /* Return the address of the regbuf. */
7514 return XEXP (regbuf, 0);
7515 }
7516
7517 /* Define the `__builtin_va_list' type for the ABI. */
7518
7519 static tree
7520 sh_build_builtin_va_list (void)
7521 {
7522 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7523 tree record;
7524
7525 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
7526 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7527 return ptr_type_node;
7528
7529 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7530
7531 f_next_o = build_decl (BUILTINS_LOCATION,
7532 FIELD_DECL, get_identifier ("__va_next_o"),
7533 ptr_type_node);
7534 f_next_o_limit = build_decl (BUILTINS_LOCATION,
7535 FIELD_DECL,
7536 get_identifier ("__va_next_o_limit"),
7537 ptr_type_node);
7538 f_next_fp = build_decl (BUILTINS_LOCATION,
7539 FIELD_DECL, get_identifier ("__va_next_fp"),
7540 ptr_type_node);
7541 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
7542 FIELD_DECL,
7543 get_identifier ("__va_next_fp_limit"),
7544 ptr_type_node);
7545 f_next_stack = build_decl (BUILTINS_LOCATION,
7546 FIELD_DECL, get_identifier ("__va_next_stack"),
7547 ptr_type_node);
7548
7549 DECL_FIELD_CONTEXT (f_next_o) = record;
7550 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7551 DECL_FIELD_CONTEXT (f_next_fp) = record;
7552 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7553 DECL_FIELD_CONTEXT (f_next_stack) = record;
7554
7555 TYPE_FIELDS (record) = f_next_o;
7556 TREE_CHAIN (f_next_o) = f_next_o_limit;
7557 TREE_CHAIN (f_next_o_limit) = f_next_fp;
7558 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
7559 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
7560
7561 layout_type (record);
7562
7563 return record;
7564 }
7565
7566 /* Implement `va_start' for varargs and stdarg. */
7567
7568 static void
7569 sh_va_start (tree valist, rtx nextarg)
7570 {
7571 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7572 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7573 tree t, u;
7574 int nfp, nint;
7575
7576 if (TARGET_SH5)
7577 {
7578 expand_builtin_saveregs ();
7579 std_expand_builtin_va_start (valist, nextarg);
7580 return;
7581 }
7582
7583 if ((! TARGET_SH2E && ! TARGET_SH4)
7584 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7585 {
7586 std_expand_builtin_va_start (valist, nextarg);
7587 return;
7588 }
7589
7590 f_next_o = TYPE_FIELDS (va_list_type_node);
7591 f_next_o_limit = TREE_CHAIN (f_next_o);
7592 f_next_fp = TREE_CHAIN (f_next_o_limit);
7593 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7594 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7595
7596 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7597 NULL_TREE);
7598 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7599 valist, f_next_o_limit, NULL_TREE);
7600 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7601 NULL_TREE);
7602 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7603 valist, f_next_fp_limit, NULL_TREE);
7604 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7605 valist, f_next_stack, NULL_TREE);
7606
7607 /* Call __builtin_saveregs. */
7608 u = make_tree (sizetype, expand_builtin_saveregs ());
7609 u = fold_convert (ptr_type_node, u);
7610 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
7611 TREE_SIDE_EFFECTS (t) = 1;
7612 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7613
7614 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7615 if (nfp < 8)
7616 nfp = 8 - nfp;
7617 else
7618 nfp = 0;
7619 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7620 size_int (UNITS_PER_WORD * nfp));
7621 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
7622 TREE_SIDE_EFFECTS (t) = 1;
7623 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7624
7625 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
7626 TREE_SIDE_EFFECTS (t) = 1;
7627 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7628
7629 nint = crtl->args.info.arg_count[SH_ARG_INT];
7630 if (nint < 4)
7631 nint = 4 - nint;
7632 else
7633 nint = 0;
7634 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7635 size_int (UNITS_PER_WORD * nint));
7636 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
7637 TREE_SIDE_EFFECTS (t) = 1;
7638 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7639
7640 u = make_tree (ptr_type_node, nextarg);
7641 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
7642 TREE_SIDE_EFFECTS (t) = 1;
7643 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7644 }
7645
7646 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7647 member, return it. */
7648 static tree
7649 find_sole_member (tree type)
7650 {
7651 tree field, member = NULL_TREE;
7652
7653 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7654 {
7655 if (TREE_CODE (field) != FIELD_DECL)
7656 continue;
7657 if (!DECL_SIZE (field))
7658 return NULL_TREE;
7659 if (integer_zerop (DECL_SIZE (field)))
7660 continue;
7661 if (member)
7662 return NULL_TREE;
7663 member = field;
7664 }
7665 return member;
7666 }
7667 /* Implement `va_arg'. */
7668
7669 static tree
7670 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7671 gimple_seq *post_p ATTRIBUTE_UNUSED)
7672 {
7673 HOST_WIDE_INT size, rsize;
7674 tree tmp, pptr_type_node;
7675 tree addr, lab_over = NULL, result = NULL;
7676 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7677 tree eff_type;
7678
7679 if (pass_by_ref)
7680 type = build_pointer_type (type);
7681
7682 size = int_size_in_bytes (type);
7683 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7684 pptr_type_node = build_pointer_type (ptr_type_node);
7685
7686 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
7687 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7688 {
7689 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7690 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7691 int pass_as_float;
7692 tree lab_false;
7693 tree member;
7694
7695 f_next_o = TYPE_FIELDS (va_list_type_node);
7696 f_next_o_limit = TREE_CHAIN (f_next_o);
7697 f_next_fp = TREE_CHAIN (f_next_o_limit);
7698 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7699 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7700
7701 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7702 NULL_TREE);
7703 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7704 valist, f_next_o_limit, NULL_TREE);
7705 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7706 valist, f_next_fp, NULL_TREE);
7707 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7708 valist, f_next_fp_limit, NULL_TREE);
7709 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7710 valist, f_next_stack, NULL_TREE);
7711
7712 /* Structures with a single member with a distinct mode are passed
7713 like their member. This is relevant if the latter has a REAL_TYPE
7714 or COMPLEX_TYPE type. */
7715 eff_type = type;
7716 while (TREE_CODE (eff_type) == RECORD_TYPE
7717 && (member = find_sole_member (eff_type))
7718 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7719 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7720 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7721 {
7722 tree field_type = TREE_TYPE (member);
7723
7724 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7725 eff_type = field_type;
7726 else
7727 {
7728 gcc_assert ((TYPE_ALIGN (eff_type)
7729 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7730 || (TYPE_ALIGN (eff_type)
7731 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7732 break;
7733 }
7734 }
7735
7736 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7737 {
7738 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7739 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7740 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7741 && size <= 16));
7742 }
7743 else
7744 {
7745 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7746 }
7747
7748 addr = create_tmp_var (pptr_type_node, NULL);
7749 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7750 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7751
7752 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
7753
7754 if (pass_as_float)
7755 {
7756 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
7757 tree cmp;
7758 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
7759
7760 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
7761 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7762
7763 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
7764 tmp = next_fp_limit;
7765 if (size > 4 && !is_double)
7766 tmp = build2 (POINTER_PLUS_EXPR, TREE_TYPE (tmp),
7767 unshare_expr (tmp), size_int (4 - size));
7768 tmp = build2 (GE_EXPR, boolean_type_node,
7769 unshare_expr (next_fp_tmp), unshare_expr (tmp));
7770 cmp = build3 (COND_EXPR, void_type_node, tmp,
7771 build1 (GOTO_EXPR, void_type_node,
7772 unshare_expr (lab_false)), NULL_TREE);
7773 if (!is_double)
7774 gimplify_and_add (cmp, pre_p);
7775
7776 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
7777 || (is_double || size == 16))
7778 {
7779 tmp = fold_convert (sizetype, next_fp_tmp);
7780 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
7781 size_int (UNITS_PER_WORD));
7782 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7783 unshare_expr (next_fp_tmp), tmp);
7784 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
7785 }
7786 if (is_double)
7787 gimplify_and_add (cmp, pre_p);
7788
7789 #ifdef FUNCTION_ARG_SCmode_WART
7790 if (TYPE_MODE (eff_type) == SCmode
7791 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
7792 {
7793 tree subtype = TREE_TYPE (eff_type);
7794 tree real, imag;
7795
7796 imag
7797 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7798 imag = get_initialized_tmp_var (imag, pre_p, NULL);
7799
7800 real
7801 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7802 real = get_initialized_tmp_var (real, pre_p, NULL);
7803
7804 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
7805 if (type != eff_type)
7806 result = build1 (VIEW_CONVERT_EXPR, type, result);
7807 result = get_initialized_tmp_var (result, pre_p, NULL);
7808 }
7809 #endif /* FUNCTION_ARG_SCmode_WART */
7810
7811 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7812 gimplify_and_add (tmp, pre_p);
7813
7814 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7815 gimplify_and_add (tmp, pre_p);
7816
7817 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7818 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7819 gimplify_assign (unshare_expr (next_fp_tmp),
7820 unshare_expr (valist), pre_p);
7821
7822 gimplify_assign (unshare_expr (valist),
7823 unshare_expr (next_fp_tmp), post_p);
7824 valist = next_fp_tmp;
7825 }
7826 else
7827 {
7828 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7829 unshare_expr (next_o), size_int (rsize));
7830 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
7831 unshare_expr (next_o_limit));
7832 tmp = build3 (COND_EXPR, void_type_node, tmp,
7833 build1 (GOTO_EXPR, void_type_node,
7834 unshare_expr (lab_false)),
7835 NULL_TREE);
7836 gimplify_and_add (tmp, pre_p);
7837
7838 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
7839 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7840
7841 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7842 gimplify_and_add (tmp, pre_p);
7843
7844 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7845 gimplify_and_add (tmp, pre_p);
7846
7847 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
7848 gimplify_assign (unshare_expr (next_o),
7849 unshare_expr (next_o_limit), pre_p);
7850
7851 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7852 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7853 }
7854
7855 if (!result)
7856 {
7857 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7858 gimplify_and_add (tmp, pre_p);
7859 }
7860 }
7861
7862 /* ??? In va-sh.h, there had been code to make values larger than
7863 size 8 indirect. This does not match the FUNCTION_ARG macros. */
7864
7865 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
7866 if (result)
7867 {
7868 gimplify_assign (result, tmp, pre_p);
7869 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
7870 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7871 gimplify_and_add (tmp, pre_p);
7872 }
7873 else
7874 result = tmp;
7875
7876 if (pass_by_ref)
7877 result = build_va_arg_indirect_ref (result);
7878
7879 return result;
7880 }
7881
7882 /* 64 bit floating points memory transfers are paired single precision loads
7883 or store. So DWARF information needs fixing in little endian (unless
7884 PR=SZ=1 in FPSCR). */
7885 rtx
7886 sh_dwarf_register_span (rtx reg)
7887 {
7888 unsigned regno = REGNO (reg);
7889
7890 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
7891 return NULL_RTX;
7892
7893 return
7894 gen_rtx_PARALLEL (VOIDmode,
7895 gen_rtvec (2,
7896 gen_rtx_REG (SFmode,
7897 DBX_REGISTER_NUMBER (regno+1)),
7898 gen_rtx_REG (SFmode,
7899 DBX_REGISTER_NUMBER (regno))));
7900 }
7901
7902 static enum machine_mode
7903 sh_promote_function_mode (const_tree type, enum machine_mode mode,
7904 int *punsignedp, const_tree funtype, int for_return)
7905 {
7906 if (sh_promote_prototypes (funtype))
7907 return promote_mode (type, mode, punsignedp);
7908 else
7909 return mode;
7910 }
7911
7912 bool
7913 sh_promote_prototypes (const_tree type)
7914 {
7915 if (TARGET_HITACHI)
7916 return 0;
7917 if (! type)
7918 return 1;
7919 return ! sh_attr_renesas_p (type);
7920 }
7921
7922 /* Whether an argument must be passed by reference. On SHcompact, we
7923 pretend arguments wider than 32-bits that would have been passed in
7924 registers are passed by reference, so that an SHmedia trampoline
7925 loads them into the full 64-bits registers. */
7926
7927 static int
7928 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7929 const_tree type, bool named)
7930 {
7931 unsigned HOST_WIDE_INT size;
7932
7933 if (type)
7934 size = int_size_in_bytes (type);
7935 else
7936 size = GET_MODE_SIZE (mode);
7937
7938 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
7939 && (!named
7940 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
7941 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
7942 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
7943 && size > 4
7944 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
7945 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7946 return size;
7947 else
7948 return 0;
7949 }
7950
7951 static bool
7952 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7953 const_tree type, bool named)
7954 {
7955 if (targetm.calls.must_pass_in_stack (mode, type))
7956 return true;
7957
7958 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
7959 wants to know about pass-by-reference semantics for incoming
7960 arguments. */
7961 if (! cum)
7962 return false;
7963
7964 if (TARGET_SHCOMPACT)
7965 {
7966 cum->byref = shcompact_byref (cum, mode, type, named);
7967 return cum->byref != 0;
7968 }
7969
7970 return false;
7971 }
7972
7973 static bool
7974 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7975 const_tree type, bool named ATTRIBUTE_UNUSED)
7976 {
7977 /* ??? How can it possibly be correct to return true only on the
7978 caller side of the equation? Is there someplace else in the
7979 sh backend that's magically producing the copies? */
7980 return (cum->outgoing
7981 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
7982 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
7983 }
7984
7985 static int
7986 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7987 tree type, bool named ATTRIBUTE_UNUSED)
7988 {
7989 int words = 0;
7990
7991 if (!TARGET_SH5
7992 && PASS_IN_REG_P (*cum, mode, type)
7993 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
7994 && (ROUND_REG (*cum, mode)
7995 + (mode != BLKmode
7996 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7997 : ROUND_ADVANCE (int_size_in_bytes (type)))
7998 > NPARM_REGS (mode)))
7999 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
8000
8001 else if (!TARGET_SHCOMPACT
8002 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8003 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
8004
8005 return words * UNITS_PER_WORD;
8006 }
8007
8008
8009 /* Define where to put the arguments to a function.
8010 Value is zero to push the argument on the stack,
8011 or a hard register in which to store the argument.
8012
8013 MODE is the argument's machine mode.
8014 TYPE is the data type of the argument (as a tree).
8015 This is null for libcalls where that information may
8016 not be available.
8017 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8018 the preceding args and about the function being called.
8019 NAMED is nonzero if this argument is a named parameter
8020 (otherwise it is an extra parameter matching an ellipsis).
8021
8022 On SH the first args are normally in registers
8023 and the rest are pushed. Any arg that starts within the first
8024 NPARM_REGS words is at least partially passed in a register unless
8025 its data type forbids. */
8026
8027
8028 rtx
8029 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8030 tree type, int named)
8031 {
8032 if (! TARGET_SH5 && mode == VOIDmode)
8033 return GEN_INT (ca->renesas_abi ? 1 : 0);
8034
8035 if (! TARGET_SH5
8036 && PASS_IN_REG_P (*ca, mode, type)
8037 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8038 {
8039 int regno;
8040
8041 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8042 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
8043 {
8044 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8045 gen_rtx_REG (SFmode,
8046 BASE_ARG_REG (mode)
8047 + (ROUND_REG (*ca, mode) ^ 1)),
8048 const0_rtx);
8049 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8050 gen_rtx_REG (SFmode,
8051 BASE_ARG_REG (mode)
8052 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
8053 GEN_INT (4));
8054 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8055 }
8056
8057 /* If the alignment of a DF value causes an SF register to be
8058 skipped, we will use that skipped register for the next SF
8059 value. */
8060 if ((TARGET_HITACHI || ca->renesas_abi)
8061 && ca->free_single_fp_reg
8062 && mode == SFmode)
8063 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8064
8065 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
8066 ^ (mode == SFmode && TARGET_SH4
8067 && TARGET_LITTLE_ENDIAN != 0
8068 && ! TARGET_HITACHI && ! ca->renesas_abi);
8069 return gen_rtx_REG (mode, regno);
8070
8071 }
8072
8073 if (TARGET_SH5)
8074 {
8075 if (mode == VOIDmode && TARGET_SHCOMPACT)
8076 return GEN_INT (ca->call_cookie);
8077
8078 /* The following test assumes unnamed arguments are promoted to
8079 DFmode. */
8080 if (mode == SFmode && ca->free_single_fp_reg)
8081 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
8082
8083 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
8084 && (named || ! ca->prototype_p)
8085 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
8086 {
8087 if (! ca->prototype_p && TARGET_SHMEDIA)
8088 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
8089
8090 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
8091 FIRST_FP_PARM_REG
8092 + ca->arg_count[(int) SH_ARG_FLOAT]);
8093 }
8094
8095 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
8096 && (! TARGET_SHCOMPACT
8097 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
8098 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
8099 type, named))))
8100 {
8101 return gen_rtx_REG (mode, (FIRST_PARM_REG
8102 + ca->arg_count[(int) SH_ARG_INT]));
8103 }
8104
8105 return 0;
8106 }
8107
8108 return 0;
8109 }
8110
8111 /* Update the data in CUM to advance over an argument
8112 of mode MODE and data type TYPE.
8113 (TYPE is null for libcalls where that information may not be
8114 available.) */
8115
8116 void
8117 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8118 tree type, int named)
8119 {
8120 if (ca->force_mem)
8121 ca->force_mem = 0;
8122 else if (TARGET_SH5)
8123 {
8124 tree type2 = (ca->byref && type
8125 ? TREE_TYPE (type)
8126 : type);
8127 enum machine_mode mode2 = (ca->byref && type
8128 ? TYPE_MODE (type2)
8129 : mode);
8130 int dwords = ((ca->byref
8131 ? ca->byref
8132 : mode2 == BLKmode
8133 ? int_size_in_bytes (type2)
8134 : GET_MODE_SIZE (mode2)) + 7) / 8;
8135 int numregs = MIN (dwords, NPARM_REGS (SImode)
8136 - ca->arg_count[(int) SH_ARG_INT]);
8137
8138 if (numregs)
8139 {
8140 ca->arg_count[(int) SH_ARG_INT] += numregs;
8141 if (TARGET_SHCOMPACT
8142 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
8143 {
8144 ca->call_cookie
8145 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8146 - numregs, 1);
8147 /* N.B. We want this also for outgoing. */
8148 ca->stack_regs += numregs;
8149 }
8150 else if (ca->byref)
8151 {
8152 if (! ca->outgoing)
8153 ca->stack_regs += numregs;
8154 ca->byref_regs += numregs;
8155 ca->byref = 0;
8156 do
8157 ca->call_cookie
8158 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8159 - numregs, 2);
8160 while (--numregs);
8161 ca->call_cookie
8162 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8163 - 1, 1);
8164 }
8165 else if (dwords > numregs)
8166 {
8167 int pushregs = numregs;
8168
8169 if (TARGET_SHCOMPACT)
8170 ca->stack_regs += numregs;
8171 while (pushregs < NPARM_REGS (SImode) - 1
8172 && (CALL_COOKIE_INT_REG_GET
8173 (ca->call_cookie,
8174 NPARM_REGS (SImode) - pushregs)
8175 == 1))
8176 {
8177 ca->call_cookie
8178 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8179 - pushregs, 1);
8180 pushregs++;
8181 }
8182 if (numregs == NPARM_REGS (SImode))
8183 ca->call_cookie
8184 |= CALL_COOKIE_INT_REG (0, 1)
8185 | CALL_COOKIE_STACKSEQ (numregs - 1);
8186 else
8187 ca->call_cookie
8188 |= CALL_COOKIE_STACKSEQ (numregs);
8189 }
8190 }
8191 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
8192 && (named || ! ca->prototype_p))
8193 {
8194 if (mode2 == SFmode && ca->free_single_fp_reg)
8195 ca->free_single_fp_reg = 0;
8196 else if (ca->arg_count[(int) SH_ARG_FLOAT]
8197 < NPARM_REGS (SFmode))
8198 {
8199 int numfpregs
8200 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
8201 NPARM_REGS (SFmode)
8202 - ca->arg_count[(int) SH_ARG_FLOAT]);
8203
8204 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
8205
8206 if (TARGET_SHCOMPACT && ! ca->prototype_p)
8207 {
8208 if (ca->outgoing && numregs > 0)
8209 do
8210 {
8211 ca->call_cookie
8212 |= (CALL_COOKIE_INT_REG
8213 (ca->arg_count[(int) SH_ARG_INT]
8214 - numregs + ((numfpregs - 2) / 2),
8215 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
8216 - numfpregs) / 2));
8217 }
8218 while (numfpregs -= 2);
8219 }
8220 else if (mode2 == SFmode && (named)
8221 && (ca->arg_count[(int) SH_ARG_FLOAT]
8222 < NPARM_REGS (SFmode)))
8223 ca->free_single_fp_reg
8224 = FIRST_FP_PARM_REG - numfpregs
8225 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
8226 }
8227 }
8228 return;
8229 }
8230
8231 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
8232 {
8233 /* Note that we've used the skipped register. */
8234 if (mode == SFmode && ca->free_single_fp_reg)
8235 {
8236 ca->free_single_fp_reg = 0;
8237 return;
8238 }
8239 /* When we have a DF after an SF, there's an SF register that get
8240 skipped in order to align the DF value. We note this skipped
8241 register, because the next SF value will use it, and not the
8242 SF that follows the DF. */
8243 if (mode == DFmode
8244 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
8245 {
8246 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
8247 + BASE_ARG_REG (mode));
8248 }
8249 }
8250
8251 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
8252 || PASS_IN_REG_P (*ca, mode, type))
8253 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
8254 = (ROUND_REG (*ca, mode)
8255 + (mode == BLKmode
8256 ? ROUND_ADVANCE (int_size_in_bytes (type))
8257 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
8258 }
8259
8260 /* The Renesas calling convention doesn't quite fit into this scheme since
8261 the address is passed like an invisible argument, but one that is always
8262 passed in memory. */
8263 static rtx
8264 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
8265 {
8266 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8267 return 0;
8268 return gen_rtx_REG (Pmode, 2);
8269 }
8270
8271 /* Worker function for TARGET_RETURN_IN_MEMORY. */
8272
8273 static bool
8274 sh_return_in_memory (const_tree type, const_tree fndecl)
8275 {
8276 if (TARGET_SH5)
8277 {
8278 if (TYPE_MODE (type) == BLKmode)
8279 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
8280 else
8281 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
8282 }
8283 else
8284 {
8285 return (TYPE_MODE (type) == BLKmode
8286 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8287 && TREE_CODE (type) == RECORD_TYPE));
8288 }
8289 }
8290
8291 /* We actually emit the code in sh_expand_prologue. We used to use
8292 a static variable to flag that we need to emit this code, but that
8293 doesn't when inlining, when functions are deferred and then emitted
8294 later. Fortunately, we already have two flags that are part of struct
8295 function that tell if a function uses varargs or stdarg. */
8296 static void
8297 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
8298 enum machine_mode mode,
8299 tree type,
8300 int *pretend_arg_size,
8301 int second_time ATTRIBUTE_UNUSED)
8302 {
8303 gcc_assert (cfun->stdarg);
8304 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
8305 {
8306 int named_parm_regs, anon_parm_regs;
8307
8308 named_parm_regs = (ROUND_REG (*ca, mode)
8309 + (mode == BLKmode
8310 ? ROUND_ADVANCE (int_size_in_bytes (type))
8311 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
8312 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
8313 if (anon_parm_regs > 0)
8314 *pretend_arg_size = anon_parm_regs * 4;
8315 }
8316 }
8317
8318 static bool
8319 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
8320 {
8321 return TARGET_SH5;
8322 }
8323
8324 static bool
8325 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
8326 {
8327 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
8328 }
8329
8330
8331 /* Define the offset between two registers, one to be eliminated, and
8332 the other its replacement, at the start of a routine. */
8333
8334 int
8335 initial_elimination_offset (int from, int to)
8336 {
8337 int regs_saved;
8338 int regs_saved_rounding = 0;
8339 int total_saved_regs_space;
8340 int total_auto_space;
8341 int save_flags = target_flags;
8342 int copy_flags;
8343 HARD_REG_SET live_regs_mask;
8344
8345 shmedia_space_reserved_for_target_registers = false;
8346 regs_saved = calc_live_regs (&live_regs_mask);
8347 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
8348
8349 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
8350 {
8351 shmedia_space_reserved_for_target_registers = true;
8352 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
8353 }
8354
8355 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
8356 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
8357 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
8358
8359 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
8360 copy_flags = target_flags;
8361 target_flags = save_flags;
8362
8363 total_saved_regs_space = regs_saved + regs_saved_rounding;
8364
8365 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8366 return total_saved_regs_space + total_auto_space
8367 + crtl->args.info.byref_regs * 8;
8368
8369 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8370 return total_saved_regs_space + total_auto_space
8371 + crtl->args.info.byref_regs * 8;
8372
8373 /* Initial gap between fp and sp is 0. */
8374 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8375 return 0;
8376
8377 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8378 return rounded_frame_size (0);
8379
8380 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8381 return rounded_frame_size (0);
8382
8383 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
8384 && (to == HARD_FRAME_POINTER_REGNUM
8385 || to == STACK_POINTER_REGNUM));
8386 if (TARGET_SH5)
8387 {
8388 int n = total_saved_regs_space;
8389 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8390 save_schedule schedule;
8391 save_entry *entry;
8392
8393 n += total_auto_space;
8394
8395 /* If it wasn't saved, there's not much we can do. */
8396 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8397 return n;
8398
8399 target_flags = copy_flags;
8400
8401 sh5_schedule_saves (&live_regs_mask, &schedule, n);
8402 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
8403 if (entry->reg == pr_reg)
8404 {
8405 target_flags = save_flags;
8406 return entry->offset;
8407 }
8408 gcc_unreachable ();
8409 }
8410 else
8411 return total_auto_space;
8412 }
8413
8414 /* Parse the -mfixed-range= option string. */
8415 void
8416 sh_fix_range (const char *const_str)
8417 {
8418 int i, first, last;
8419 char *str, *dash, *comma;
8420
8421 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
8422 REG2 are either register names or register numbers. The effect
8423 of this option is to mark the registers in the range from REG1 to
8424 REG2 as ``fixed'' so they won't be used by the compiler. */
8425
8426 i = strlen (const_str);
8427 str = (char *) alloca (i + 1);
8428 memcpy (str, const_str, i + 1);
8429
8430 while (1)
8431 {
8432 dash = strchr (str, '-');
8433 if (!dash)
8434 {
8435 warning (0, "value of -mfixed-range must have form REG1-REG2");
8436 return;
8437 }
8438 *dash = '\0';
8439 comma = strchr (dash + 1, ',');
8440 if (comma)
8441 *comma = '\0';
8442
8443 first = decode_reg_name (str);
8444 if (first < 0)
8445 {
8446 warning (0, "unknown register name: %s", str);
8447 return;
8448 }
8449
8450 last = decode_reg_name (dash + 1);
8451 if (last < 0)
8452 {
8453 warning (0, "unknown register name: %s", dash + 1);
8454 return;
8455 }
8456
8457 *dash = '-';
8458
8459 if (first > last)
8460 {
8461 warning (0, "%s-%s is an empty range", str, dash + 1);
8462 return;
8463 }
8464
8465 for (i = first; i <= last; ++i)
8466 fixed_regs[i] = call_used_regs[i] = 1;
8467
8468 if (!comma)
8469 break;
8470
8471 *comma = ',';
8472 str = comma + 1;
8473 }
8474 }
8475 \f
8476 /* Insert any deferred function attributes from earlier pragmas. */
8477 static void
8478 sh_insert_attributes (tree node, tree *attributes)
8479 {
8480 tree attrs;
8481
8482 if (TREE_CODE (node) != FUNCTION_DECL)
8483 return;
8484
8485 /* We are only interested in fields. */
8486 if (!DECL_P (node))
8487 return;
8488
8489 /* Append the attributes to the deferred attributes. */
8490 *sh_deferred_function_attributes_tail = *attributes;
8491 attrs = sh_deferred_function_attributes;
8492 if (!attrs)
8493 return;
8494
8495 /* Some attributes imply or require the interrupt attribute. */
8496 if (!lookup_attribute ("interrupt_handler", attrs)
8497 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8498 {
8499 /* If we have a trapa_handler, but no interrupt_handler attribute,
8500 insert an interrupt_handler attribute. */
8501 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8502 /* We can't use sh_pr_interrupt here because that's not in the
8503 java frontend. */
8504 attrs
8505 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8506 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8507 if the interrupt attribute is missing, we ignore the attribute
8508 and warn. */
8509 else if (lookup_attribute ("sp_switch", attrs)
8510 || lookup_attribute ("trap_exit", attrs)
8511 || lookup_attribute ("nosave_low_regs", attrs)
8512 || lookup_attribute ("resbank", attrs))
8513 {
8514 tree *tail;
8515
8516 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8517 {
8518 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8519 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8520 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8521 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8522 warning (OPT_Wattributes,
8523 "%qE attribute only applies to interrupt functions",
8524 TREE_PURPOSE (attrs));
8525 else
8526 {
8527 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8528 NULL_TREE);
8529 tail = &TREE_CHAIN (*tail);
8530 }
8531 }
8532 attrs = *attributes;
8533 }
8534 }
8535
8536 /* Install the processed list. */
8537 *attributes = attrs;
8538
8539 /* Clear deferred attributes. */
8540 sh_deferred_function_attributes = NULL_TREE;
8541 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8542
8543 return;
8544 }
8545
8546 /* Supported attributes:
8547
8548 interrupt_handler -- specifies this function is an interrupt handler.
8549
8550 trapa_handler - like above, but don't save all registers.
8551
8552 sp_switch -- specifies an alternate stack for an interrupt handler
8553 to run on.
8554
8555 trap_exit -- use a trapa to exit an interrupt function instead of
8556 an rte instruction.
8557
8558 nosave_low_regs - don't save r0..r7 in an interrupt handler.
8559 This is useful on the SH3 and upwards,
8560 which has a separate set of low regs for User and Supervisor modes.
8561 This should only be used for the lowest level of interrupts. Higher levels
8562 of interrupts must save the registers in case they themselves are
8563 interrupted.
8564
8565 renesas -- use Renesas calling/layout conventions (functions and
8566 structures).
8567
8568 resbank -- In case of an ISR, use a register bank to save registers
8569 R0-R14, MACH, MACL, GBR and PR. This is useful only on SH2A targets.
8570 */
8571
8572 /* Handle a 'resbank' attribute. */
8573 static tree
8574 sh_handle_resbank_handler_attribute (tree * node, tree name,
8575 tree args ATTRIBUTE_UNUSED,
8576 int flags ATTRIBUTE_UNUSED,
8577 bool * no_add_attrs)
8578 {
8579 if (!TARGET_SH2A)
8580 {
8581 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
8582 name);
8583 *no_add_attrs = true;
8584 }
8585 if (TREE_CODE (*node) != FUNCTION_DECL)
8586 {
8587 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8588 name);
8589 *no_add_attrs = true;
8590 }
8591
8592 return NULL_TREE;
8593 }
8594
8595 /* Handle an "interrupt_handler" attribute; arguments as in
8596 struct attribute_spec.handler. */
8597 static tree
8598 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8599 tree args ATTRIBUTE_UNUSED,
8600 int flags ATTRIBUTE_UNUSED,
8601 bool *no_add_attrs)
8602 {
8603 if (TREE_CODE (*node) != FUNCTION_DECL)
8604 {
8605 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8606 name);
8607 *no_add_attrs = true;
8608 }
8609 else if (TARGET_SHCOMPACT)
8610 {
8611 error ("attribute interrupt_handler is not compatible with -m5-compact");
8612 *no_add_attrs = true;
8613 }
8614
8615 return NULL_TREE;
8616 }
8617
8618 /* Handle an 'function_vector' attribute; arguments as in
8619 struct attribute_spec.handler. */
8620 static tree
8621 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
8622 tree args ATTRIBUTE_UNUSED,
8623 int flags ATTRIBUTE_UNUSED,
8624 bool * no_add_attrs)
8625 {
8626 if (!TARGET_SH2A)
8627 {
8628 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
8629 name);
8630 *no_add_attrs = true;
8631 }
8632 else if (TREE_CODE (*node) != FUNCTION_DECL)
8633 {
8634 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8635 name);
8636 *no_add_attrs = true;
8637 }
8638 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8639 {
8640 /* The argument must be a constant integer. */
8641 warning (OPT_Wattributes,
8642 "%qE attribute argument not an integer constant",
8643 name);
8644 *no_add_attrs = true;
8645 }
8646 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
8647 {
8648 /* The argument value must be between 0 to 255. */
8649 warning (OPT_Wattributes,
8650 "%qE attribute argument should be between 0 to 255",
8651 name);
8652 *no_add_attrs = true;
8653 }
8654 return NULL_TREE;
8655 }
8656
8657 /* Returns 1 if current function has been assigned the attribute
8658 'function_vector'. */
8659 int
8660 sh2a_is_function_vector_call (rtx x)
8661 {
8662 if (GET_CODE (x) == SYMBOL_REF
8663 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8664 {
8665 tree tr = SYMBOL_REF_DECL (x);
8666
8667 if (sh2a_function_vector_p (tr))
8668 return 1;
8669 }
8670
8671 return 0;
8672 }
8673
8674 /* Returns the function vector number, if the the attribute
8675 'function_vector' is assigned, otherwise returns zero. */
8676 int
8677 sh2a_get_function_vector_number (rtx x)
8678 {
8679 int num;
8680 tree list, t;
8681
8682 if ((GET_CODE (x) == SYMBOL_REF)
8683 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8684 {
8685 t = SYMBOL_REF_DECL (x);
8686
8687 if (TREE_CODE (t) != FUNCTION_DECL)
8688 return 0;
8689
8690 list = SH_ATTRIBUTES (t);
8691 while (list)
8692 {
8693 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8694 {
8695 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
8696 return num;
8697 }
8698
8699 list = TREE_CHAIN (list);
8700 }
8701
8702 return 0;
8703 }
8704 else
8705 return 0;
8706 }
8707
8708 /* Handle an "sp_switch" attribute; arguments as in
8709 struct attribute_spec.handler. */
8710 static tree
8711 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
8712 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8713 {
8714 if (TREE_CODE (*node) != FUNCTION_DECL)
8715 {
8716 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8717 name);
8718 *no_add_attrs = true;
8719 }
8720 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
8721 {
8722 /* The argument must be a constant string. */
8723 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
8724 name);
8725 *no_add_attrs = true;
8726 }
8727
8728 return NULL_TREE;
8729 }
8730
8731 /* Handle an "trap_exit" attribute; arguments as in
8732 struct attribute_spec.handler. */
8733 static tree
8734 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
8735 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8736 {
8737 if (TREE_CODE (*node) != FUNCTION_DECL)
8738 {
8739 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8740 name);
8741 *no_add_attrs = true;
8742 }
8743 /* The argument specifies a trap number to be used in a trapa instruction
8744 at function exit (instead of an rte instruction). */
8745 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8746 {
8747 /* The argument must be a constant integer. */
8748 warning (OPT_Wattributes, "%qE attribute argument not an "
8749 "integer constant", name);
8750 *no_add_attrs = true;
8751 }
8752
8753 return NULL_TREE;
8754 }
8755
8756 static tree
8757 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
8758 tree name ATTRIBUTE_UNUSED,
8759 tree args ATTRIBUTE_UNUSED,
8760 int flags ATTRIBUTE_UNUSED,
8761 bool *no_add_attrs ATTRIBUTE_UNUSED)
8762 {
8763 return NULL_TREE;
8764 }
8765
8766 /* True if __attribute__((renesas)) or -mrenesas. */
8767 int
8768 sh_attr_renesas_p (const_tree td)
8769 {
8770 if (TARGET_HITACHI)
8771 return 1;
8772 if (td == 0)
8773 return 0;
8774 if (DECL_P (td))
8775 td = TREE_TYPE (td);
8776 if (td == error_mark_node)
8777 return 0;
8778 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
8779 != NULL_TREE);
8780 }
8781
8782 /* True if __attribute__((renesas)) or -mrenesas, for the current
8783 function. */
8784 int
8785 sh_cfun_attr_renesas_p (void)
8786 {
8787 return sh_attr_renesas_p (current_function_decl);
8788 }
8789
8790 int
8791 sh_cfun_interrupt_handler_p (void)
8792 {
8793 return (lookup_attribute ("interrupt_handler",
8794 DECL_ATTRIBUTES (current_function_decl))
8795 != NULL_TREE);
8796 }
8797
8798 /* Returns 1 if FUNC has been assigned the attribute
8799 "function_vector". */
8800 int
8801 sh2a_function_vector_p (tree func)
8802 {
8803 tree list;
8804 if (TREE_CODE (func) != FUNCTION_DECL)
8805 return 0;
8806
8807 list = SH_ATTRIBUTES (func);
8808 while (list)
8809 {
8810 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8811 return 1;
8812
8813 list = TREE_CHAIN (list);
8814 }
8815 return 0;
8816 }
8817
8818 /* Returns TRUE if given tree has the "resbank" attribute. */
8819
8820 int
8821 sh_cfun_resbank_handler_p (void)
8822 {
8823 return ((lookup_attribute ("resbank",
8824 DECL_ATTRIBUTES (current_function_decl))
8825 != NULL_TREE)
8826 && (lookup_attribute ("interrupt_handler",
8827 DECL_ATTRIBUTES (current_function_decl))
8828 != NULL_TREE) && TARGET_SH2A);
8829 }
8830
8831 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
8832
8833 static const char *
8834 sh_check_pch_target_flags (int old_flags)
8835 {
8836 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
8837 | MASK_SH_E | MASK_HARD_SH4
8838 | MASK_FPU_SINGLE | MASK_SH4))
8839 return _("created and used with different architectures / ABIs");
8840 if ((old_flags ^ target_flags) & MASK_HITACHI)
8841 return _("created and used with different ABIs");
8842 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
8843 return _("created and used with different endianness");
8844 return NULL;
8845 }
8846 \f
8847 /* Predicates used by the templates. */
8848
8849 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
8850 Used only in general_movsrc_operand. */
8851
8852 int
8853 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8854 {
8855 switch (REGNO (op))
8856 {
8857 case PR_REG:
8858 case MACL_REG:
8859 case MACH_REG:
8860 return 1;
8861 }
8862 return 0;
8863 }
8864
8865 /* Nonzero if OP is a floating point value with value 0.0. */
8866
8867 int
8868 fp_zero_operand (rtx op)
8869 {
8870 REAL_VALUE_TYPE r;
8871
8872 if (GET_MODE (op) != SFmode)
8873 return 0;
8874
8875 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
8876 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
8877 }
8878
8879 /* Nonzero if OP is a floating point value with value 1.0. */
8880
8881 int
8882 fp_one_operand (rtx op)
8883 {
8884 REAL_VALUE_TYPE r;
8885
8886 if (GET_MODE (op) != SFmode)
8887 return 0;
8888
8889 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
8890 return REAL_VALUES_EQUAL (r, dconst1);
8891 }
8892
8893 /* In general mode switching is used. If we are
8894 compiling without -mfmovd, movsf_ie isn't taken into account for
8895 mode switching. We could check in machine_dependent_reorg for
8896 cases where we know we are in single precision mode, but there is
8897 interface to find that out during reload, so we must avoid
8898 choosing an fldi alternative during reload and thus failing to
8899 allocate a scratch register for the constant loading. */
8900 int
8901 fldi_ok (void)
8902 {
8903 return 1;
8904 }
8905
8906 int
8907 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8908 {
8909 enum rtx_code code = GET_CODE (op);
8910 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
8911 }
8912
8913 /* Return the TLS type for TLS symbols, 0 for otherwise. */
8914 enum tls_model
8915 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8916 {
8917 if (GET_CODE (op) != SYMBOL_REF)
8918 return TLS_MODEL_NONE;
8919 return SYMBOL_REF_TLS_MODEL (op);
8920 }
8921 \f
8922 /* Return the destination address of a branch. */
8923
8924 static int
8925 branch_dest (rtx branch)
8926 {
8927 rtx dest = SET_SRC (PATTERN (branch));
8928 int dest_uid;
8929
8930 if (GET_CODE (dest) == IF_THEN_ELSE)
8931 dest = XEXP (dest, 1);
8932 dest = XEXP (dest, 0);
8933 dest_uid = INSN_UID (dest);
8934 return INSN_ADDRESSES (dest_uid);
8935 }
8936 \f
8937 /* Return nonzero if REG is not used after INSN.
8938 We assume REG is a reload reg, and therefore does
8939 not live past labels. It may live past calls or jumps though. */
8940 int
8941 reg_unused_after (rtx reg, rtx insn)
8942 {
8943 enum rtx_code code;
8944 rtx set;
8945
8946 /* If the reg is set by this instruction, then it is safe for our
8947 case. Disregard the case where this is a store to memory, since
8948 we are checking a register used in the store address. */
8949 set = single_set (insn);
8950 if (set && !MEM_P (SET_DEST (set))
8951 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8952 return 1;
8953
8954 while ((insn = NEXT_INSN (insn)))
8955 {
8956 rtx set;
8957 if (!INSN_P (insn))
8958 continue;
8959
8960 code = GET_CODE (insn);
8961
8962 #if 0
8963 /* If this is a label that existed before reload, then the register
8964 if dead here. However, if this is a label added by reorg, then
8965 the register may still be live here. We can't tell the difference,
8966 so we just ignore labels completely. */
8967 if (code == CODE_LABEL)
8968 return 1;
8969 /* else */
8970 #endif
8971
8972 if (code == JUMP_INSN)
8973 return 0;
8974
8975 /* If this is a sequence, we must handle them all at once.
8976 We could have for instance a call that sets the target register,
8977 and an insn in a delay slot that uses the register. In this case,
8978 we must return 0. */
8979 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
8980 {
8981 int i;
8982 int retval = 0;
8983
8984 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
8985 {
8986 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
8987 rtx set = single_set (this_insn);
8988
8989 if (CALL_P (this_insn))
8990 code = CALL_INSN;
8991 else if (JUMP_P (this_insn))
8992 {
8993 if (INSN_ANNULLED_BRANCH_P (this_insn))
8994 return 0;
8995 code = JUMP_INSN;
8996 }
8997
8998 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8999 return 0;
9000 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9001 {
9002 if (!MEM_P (SET_DEST (set)))
9003 retval = 1;
9004 else
9005 return 0;
9006 }
9007 if (set == 0
9008 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
9009 return 0;
9010 }
9011 if (retval == 1)
9012 return 1;
9013 else if (code == JUMP_INSN)
9014 return 0;
9015 }
9016
9017 set = single_set (insn);
9018 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9019 return 0;
9020 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9021 return !MEM_P (SET_DEST (set));
9022 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
9023 return 0;
9024
9025 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
9026 return 1;
9027 }
9028 return 1;
9029 }
9030 \f
9031 #include "ggc.h"
9032
9033 static GTY(()) rtx fpscr_rtx;
9034 rtx
9035 get_fpscr_rtx (void)
9036 {
9037 if (! fpscr_rtx)
9038 {
9039 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
9040 REG_USERVAR_P (fpscr_rtx) = 1;
9041 mark_user_reg (fpscr_rtx);
9042 }
9043 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
9044 mark_user_reg (fpscr_rtx);
9045 return fpscr_rtx;
9046 }
9047
9048 static GTY(()) tree fpscr_values;
9049
9050 static void
9051 emit_fpu_switch (rtx scratch, int index)
9052 {
9053 rtx dst, src;
9054
9055 if (fpscr_values == NULL)
9056 {
9057 tree t;
9058
9059 t = build_index_type (integer_one_node);
9060 t = build_array_type (integer_type_node, t);
9061 t = build_decl (BUILTINS_LOCATION,
9062 VAR_DECL, get_identifier ("__fpscr_values"), t);
9063 DECL_ARTIFICIAL (t) = 1;
9064 DECL_IGNORED_P (t) = 1;
9065 DECL_EXTERNAL (t) = 1;
9066 TREE_STATIC (t) = 1;
9067 TREE_PUBLIC (t) = 1;
9068 TREE_USED (t) = 1;
9069
9070 fpscr_values = t;
9071 }
9072
9073 src = DECL_RTL (fpscr_values);
9074 if (!can_create_pseudo_p ())
9075 {
9076 emit_move_insn (scratch, XEXP (src, 0));
9077 if (index != 0)
9078 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
9079 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
9080 }
9081 else
9082 src = adjust_address (src, PSImode, index * 4);
9083
9084 dst = get_fpscr_rtx ();
9085 emit_move_insn (dst, src);
9086 }
9087
9088 void
9089 emit_sf_insn (rtx pat)
9090 {
9091 emit_insn (pat);
9092 }
9093
9094 void
9095 emit_df_insn (rtx pat)
9096 {
9097 emit_insn (pat);
9098 }
9099
9100 void
9101 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9102 {
9103 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9104 }
9105
9106 void
9107 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9108 {
9109 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
9110 get_fpscr_rtx ()));
9111 }
9112
9113 void
9114 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9115 {
9116 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9117 }
9118
9119 void
9120 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9121 {
9122 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
9123 get_fpscr_rtx ()));
9124 }
9125 \f
9126 static rtx get_free_reg (HARD_REG_SET);
9127
9128 /* This function returns a register to use to load the address to load
9129 the fpscr from. Currently it always returns r1 or r7, but when we are
9130 able to use pseudo registers after combine, or have a better mechanism
9131 for choosing a register, it should be done here. */
9132 /* REGS_LIVE is the liveness information for the point for which we
9133 need this allocation. In some bare-bones exit blocks, r1 is live at the
9134 start. We can even have all of r0..r3 being live:
9135 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
9136 INSN before which new insns are placed with will clobber the register
9137 we return. If a basic block consists only of setting the return value
9138 register to a pseudo and using that register, the return value is not
9139 live before or after this block, yet we we'll insert our insns right in
9140 the middle. */
9141
9142 static rtx
9143 get_free_reg (HARD_REG_SET regs_live)
9144 {
9145 if (! TEST_HARD_REG_BIT (regs_live, 1))
9146 return gen_rtx_REG (Pmode, 1);
9147
9148 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
9149 there shouldn't be anything but a jump before the function end. */
9150 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
9151 return gen_rtx_REG (Pmode, 7);
9152 }
9153
9154 /* This function will set the fpscr from memory.
9155 MODE is the mode we are setting it to. */
9156 void
9157 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
9158 {
9159 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
9160 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
9161 rtx addr_reg;
9162
9163 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
9164 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
9165 }
9166
9167 /* Is the given character a logical line separator for the assembler? */
9168 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
9169 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
9170 #endif
9171
9172 int
9173 sh_insn_length_adjustment (rtx insn)
9174 {
9175 /* Instructions with unfilled delay slots take up an extra two bytes for
9176 the nop in the delay slot. */
9177 if (((NONJUMP_INSN_P (insn)
9178 && GET_CODE (PATTERN (insn)) != USE
9179 && GET_CODE (PATTERN (insn)) != CLOBBER)
9180 || CALL_P (insn)
9181 || (JUMP_P (insn)
9182 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
9183 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
9184 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
9185 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
9186 return 2;
9187
9188 /* SH2e has a bug that prevents the use of annulled branches, so if
9189 the delay slot is not filled, we'll have to put a NOP in it. */
9190 if (sh_cpu_attr == CPU_SH2E
9191 && JUMP_P (insn)
9192 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
9193 && GET_CODE (PATTERN (insn)) != ADDR_VEC
9194 && get_attr_type (insn) == TYPE_CBRANCH
9195 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
9196 return 2;
9197
9198 /* sh-dsp parallel processing insn take four bytes instead of two. */
9199
9200 if (NONJUMP_INSN_P (insn))
9201 {
9202 int sum = 0;
9203 rtx body = PATTERN (insn);
9204 const char *templ;
9205 char c;
9206 int maybe_label = 1;
9207
9208 if (GET_CODE (body) == ASM_INPUT)
9209 templ = XSTR (body, 0);
9210 else if (asm_noperands (body) >= 0)
9211 templ
9212 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
9213 else
9214 return 0;
9215 do
9216 {
9217 int ppi_adjust = 0;
9218
9219 do
9220 c = *templ++;
9221 while (c == ' ' || c == '\t');
9222 /* all sh-dsp parallel-processing insns start with p.
9223 The only non-ppi sh insn starting with p is pref.
9224 The only ppi starting with pr is prnd. */
9225 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
9226 ppi_adjust = 2;
9227 /* The repeat pseudo-insn expands two three insns, a total of
9228 six bytes in size. */
9229 else if ((c == 'r' || c == 'R')
9230 && ! strncasecmp ("epeat", templ, 5))
9231 ppi_adjust = 4;
9232 while (c && c != '\n'
9233 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
9234 {
9235 /* If this is a label, it is obviously not a ppi insn. */
9236 if (c == ':' && maybe_label)
9237 {
9238 ppi_adjust = 0;
9239 break;
9240 }
9241 else if (c == '\'' || c == '"')
9242 maybe_label = 0;
9243 c = *templ++;
9244 }
9245 sum += ppi_adjust;
9246 maybe_label = c != ':';
9247 }
9248 while (c);
9249 return sum;
9250 }
9251 return 0;
9252 }
9253 \f
9254 /* Return TRUE for a valid displacement for the REG+disp addressing
9255 with MODE. */
9256
9257 /* ??? The SH2e does not have the REG+disp addressing mode when loading values
9258 into the FRx registers. We implement this by setting the maximum offset
9259 to zero when the value is SFmode. This also restricts loading of SFmode
9260 values into the integer registers, but that can't be helped. */
9261
9262 /* The SH allows a displacement in a QI or HI amode, but only when the
9263 other operand is R0. GCC doesn't handle this very well, so we forgot
9264 all of that.
9265
9266 A legitimate index for a QI or HI is 0, SI can be any number 0..63,
9267 DI can be any number 0..60. */
9268
9269 bool
9270 sh_legitimate_index_p (enum machine_mode mode, rtx op)
9271 {
9272 if (CONST_INT_P (op))
9273 {
9274 if (TARGET_SHMEDIA)
9275 {
9276 int size;
9277
9278 /* Check if this the address of an unaligned load / store. */
9279 if (mode == VOIDmode)
9280 return CONST_OK_FOR_I06 (INTVAL (op));
9281
9282 size = GET_MODE_SIZE (mode);
9283 return (!(INTVAL (op) & (size - 1))
9284 && INTVAL (op) >= -512 * size
9285 && INTVAL (op) < 512 * size);
9286 }
9287
9288 if (TARGET_SH2A)
9289 {
9290 if (GET_MODE_SIZE (mode) == 1
9291 && (unsigned) INTVAL (op) < 4096)
9292 return true;
9293 }
9294
9295 if ((GET_MODE_SIZE (mode) == 4
9296 && (unsigned) INTVAL (op) < 64
9297 && !(INTVAL (op) & 3)
9298 && !(TARGET_SH2E && mode == SFmode))
9299 || (GET_MODE_SIZE (mode) == 4
9300 && (unsigned) INTVAL (op) < 16383
9301 && !(INTVAL (op) & 3) && TARGET_SH2A))
9302 return true;
9303
9304 if ((GET_MODE_SIZE (mode) == 8
9305 && (unsigned) INTVAL (op) < 60
9306 && !(INTVAL (op) & 3)
9307 && !((TARGET_SH4 || TARGET_SH2A) && mode == DFmode))
9308 || ((GET_MODE_SIZE (mode)==8)
9309 && (unsigned) INTVAL (op) < 8192
9310 && !(INTVAL (op) & (TARGET_SH2A_DOUBLE ? 7 : 3))
9311 && (TARGET_SH2A && mode == DFmode)))
9312 return true;
9313 }
9314
9315 return false;
9316 }
9317
9318 /* Recognize an RTL expression that is a valid memory address for
9319 an instruction.
9320 The MODE argument is the machine mode for the MEM expression
9321 that wants to use this address.
9322 Allow REG
9323 REG+disp
9324 REG+r0
9325 REG++
9326 --REG */
9327
9328 static bool
9329 sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
9330 {
9331 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
9332 return true;
9333 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
9334 && ! TARGET_SHMEDIA
9335 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
9336 return true;
9337 else if (GET_CODE (x) == PLUS
9338 && (mode != PSImode || reload_completed))
9339 {
9340 rtx xop0 = XEXP (x, 0);
9341 rtx xop1 = XEXP (x, 1);
9342
9343 if (GET_MODE_SIZE (mode) <= 8
9344 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
9345 && sh_legitimate_index_p (mode, xop1))
9346 return true;
9347
9348 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
9349 || ((xop0 == stack_pointer_rtx
9350 || xop0 == hard_frame_pointer_rtx)
9351 && REG_P (xop1) && REGNO (xop1) == R0_REG)
9352 || ((xop1 == stack_pointer_rtx
9353 || xop1 == hard_frame_pointer_rtx)
9354 && REG_P (xop0) && REGNO (xop0) == R0_REG))
9355 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
9356 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
9357 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
9358 && TARGET_FMOVD && mode == DFmode)))
9359 {
9360 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
9361 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
9362 return true;
9363 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
9364 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
9365 return true;
9366 }
9367 }
9368
9369 return false;
9370 }
9371 \f
9372 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
9373 isn't protected by a PIC unspec. */
9374 int
9375 nonpic_symbol_mentioned_p (rtx x)
9376 {
9377 register const char *fmt;
9378 register int i;
9379
9380 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
9381 || GET_CODE (x) == PC)
9382 return 1;
9383
9384 /* We don't want to look into the possible MEM location of a
9385 CONST_DOUBLE, since we're not going to use it, in general. */
9386 if (GET_CODE (x) == CONST_DOUBLE)
9387 return 0;
9388
9389 if (GET_CODE (x) == UNSPEC
9390 && (XINT (x, 1) == UNSPEC_PIC
9391 || XINT (x, 1) == UNSPEC_GOT
9392 || XINT (x, 1) == UNSPEC_GOTOFF
9393 || XINT (x, 1) == UNSPEC_GOTPLT
9394 || XINT (x, 1) == UNSPEC_GOTTPOFF
9395 || XINT (x, 1) == UNSPEC_DTPOFF
9396 || XINT (x, 1) == UNSPEC_PLT
9397 || XINT (x, 1) == UNSPEC_SYMOFF
9398 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
9399 return 0;
9400
9401 fmt = GET_RTX_FORMAT (GET_CODE (x));
9402 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9403 {
9404 if (fmt[i] == 'E')
9405 {
9406 register int j;
9407
9408 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9409 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
9410 return 1;
9411 }
9412 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
9413 return 1;
9414 }
9415
9416 return 0;
9417 }
9418
9419 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
9420 @GOTOFF in `reg'. */
9421 rtx
9422 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
9423 rtx reg)
9424 {
9425 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
9426 return orig;
9427
9428 if (GET_CODE (orig) == LABEL_REF
9429 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
9430 {
9431 if (reg == 0)
9432 reg = gen_reg_rtx (Pmode);
9433
9434 emit_insn (gen_symGOTOFF2reg (reg, orig));
9435 return reg;
9436 }
9437 else if (GET_CODE (orig) == SYMBOL_REF)
9438 {
9439 if (reg == 0)
9440 reg = gen_reg_rtx (Pmode);
9441
9442 emit_insn (gen_symGOT2reg (reg, orig));
9443 return reg;
9444 }
9445 return orig;
9446 }
9447
9448 /* Try machine-dependent ways of modifying an illegitimate address
9449 to be legitimate. If we find one, return the new, valid address.
9450 Otherwise, return X.
9451
9452 For the SH, if X is almost suitable for indexing, but the offset is
9453 out of range, convert it into a normal form so that CSE has a chance
9454 of reducing the number of address registers used. */
9455
9456 static rtx
9457 sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
9458 {
9459 if (flag_pic)
9460 x = legitimize_pic_address (oldx, mode, NULL_RTX);
9461
9462 if (GET_CODE (x) == PLUS
9463 && (GET_MODE_SIZE (mode) == 4
9464 || GET_MODE_SIZE (mode) == 8)
9465 && CONST_INT_P (XEXP (x, 1))
9466 && BASE_REGISTER_RTX_P (XEXP (x, 0))
9467 && ! TARGET_SHMEDIA
9468 && ! ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
9469 && ! (TARGET_SH2E && mode == SFmode))
9470 {
9471 rtx index_rtx = XEXP (x, 1);
9472 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9473 rtx sum;
9474
9475 /* On rare occasions, we might get an unaligned pointer
9476 that is indexed in a way to give an aligned address.
9477 Therefore, keep the lower two bits in offset_base. */
9478 /* Instead of offset_base 128..131 use 124..127, so that
9479 simple add suffices. */
9480 if (offset > 127)
9481 offset_base = ((offset + 4) & ~60) - 4;
9482 else
9483 offset_base = offset & ~60;
9484
9485 /* Sometimes the normal form does not suit DImode. We
9486 could avoid that by using smaller ranges, but that
9487 would give less optimized code when SImode is
9488 prevalent. */
9489 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9490 {
9491 sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
9492 GEN_INT (offset_base), NULL_RTX, 0,
9493 OPTAB_LIB_WIDEN);
9494
9495 return gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9496 }
9497 }
9498
9499 return x;
9500 }
9501
9502 /* Mark the use of a constant in the literal table. If the constant
9503 has multiple labels, make it unique. */
9504 static rtx
9505 mark_constant_pool_use (rtx x)
9506 {
9507 rtx insn, lab, pattern;
9508
9509 if (x == NULL)
9510 return x;
9511
9512 switch (GET_CODE (x))
9513 {
9514 case LABEL_REF:
9515 x = XEXP (x, 0);
9516 case CODE_LABEL:
9517 break;
9518 default:
9519 return x;
9520 }
9521
9522 /* Get the first label in the list of labels for the same constant
9523 and delete another labels in the list. */
9524 lab = x;
9525 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
9526 {
9527 if (!LABEL_P (insn)
9528 || LABEL_REFS (insn) != NEXT_INSN (insn))
9529 break;
9530 lab = insn;
9531 }
9532
9533 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
9534 INSN_DELETED_P (insn) = 1;
9535
9536 /* Mark constants in a window. */
9537 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
9538 {
9539 if (!NONJUMP_INSN_P (insn))
9540 continue;
9541
9542 pattern = PATTERN (insn);
9543 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
9544 continue;
9545
9546 switch (XINT (pattern, 1))
9547 {
9548 case UNSPECV_CONST2:
9549 case UNSPECV_CONST4:
9550 case UNSPECV_CONST8:
9551 XVECEXP (pattern, 0, 1) = const1_rtx;
9552 break;
9553 case UNSPECV_WINDOW_END:
9554 if (XVECEXP (pattern, 0, 0) == x)
9555 return lab;
9556 break;
9557 case UNSPECV_CONST_END:
9558 return lab;
9559 default:
9560 break;
9561 }
9562 }
9563
9564 return lab;
9565 }
9566 \f
9567 /* Return true if it's possible to redirect BRANCH1 to the destination
9568 of an unconditional jump BRANCH2. We only want to do this if the
9569 resulting branch will have a short displacement. */
9570 int
9571 sh_can_redirect_branch (rtx branch1, rtx branch2)
9572 {
9573 if (flag_expensive_optimizations && simplejump_p (branch2))
9574 {
9575 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
9576 rtx insn;
9577 int distance;
9578
9579 for (distance = 0, insn = NEXT_INSN (branch1);
9580 insn && distance < 256;
9581 insn = PREV_INSN (insn))
9582 {
9583 if (insn == dest)
9584 return 1;
9585 else
9586 distance += get_attr_length (insn);
9587 }
9588 for (distance = 0, insn = NEXT_INSN (branch1);
9589 insn && distance < 256;
9590 insn = NEXT_INSN (insn))
9591 {
9592 if (insn == dest)
9593 return 1;
9594 else
9595 distance += get_attr_length (insn);
9596 }
9597 }
9598 return 0;
9599 }
9600
9601 /* Return nonzero if register old_reg can be renamed to register new_reg. */
9602 int
9603 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
9604 unsigned int new_reg)
9605 {
9606 /* Interrupt functions can only use registers that have already been
9607 saved by the prologue, even if they would normally be
9608 call-clobbered. */
9609
9610 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
9611 return 0;
9612
9613 return 1;
9614 }
9615
9616 /* Function to update the integer COST
9617 based on the relationship between INSN that is dependent on
9618 DEP_INSN through the dependence LINK. The default is to make no
9619 adjustment to COST. This can be used for example to specify to
9620 the scheduler that an output- or anti-dependence does not incur
9621 the same cost as a data-dependence. The return value should be
9622 the new value for COST. */
9623 static int
9624 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
9625 {
9626 rtx reg, use_pat;
9627
9628 if (TARGET_SHMEDIA)
9629 {
9630 /* On SHmedia, if the dependence is an anti-dependence or
9631 output-dependence, there is no cost. */
9632 if (REG_NOTE_KIND (link) != 0)
9633 {
9634 /* However, dependencies between target register loads and
9635 uses of the register in a subsequent block that are separated
9636 by a conditional branch are not modelled - we have to do with
9637 the anti-dependency between the target register load and the
9638 conditional branch that ends the current block. */
9639 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
9640 && GET_CODE (PATTERN (dep_insn)) == SET
9641 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
9642 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
9643 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
9644 {
9645 int orig_cost = cost;
9646 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
9647 rtx target = ((! note
9648 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
9649 ? insn : JUMP_LABEL (insn));
9650 /* On the likely path, the branch costs 1, on the unlikely path,
9651 it costs 3. */
9652 cost--;
9653 do
9654 target = next_active_insn (target);
9655 while (target && ! flow_dependent_p (target, dep_insn)
9656 && --cost > 0);
9657 /* If two branches are executed in immediate succession, with the
9658 first branch properly predicted, this causes a stall at the
9659 second branch, hence we won't need the target for the
9660 second branch for two cycles after the launch of the first
9661 branch. */
9662 if (cost > orig_cost - 2)
9663 cost = orig_cost - 2;
9664 }
9665 else
9666 cost = 0;
9667 }
9668
9669 else if (get_attr_is_mac_media (insn)
9670 && get_attr_is_mac_media (dep_insn))
9671 cost = 1;
9672
9673 else if (! reload_completed
9674 && GET_CODE (PATTERN (insn)) == SET
9675 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
9676 && GET_CODE (PATTERN (dep_insn)) == SET
9677 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
9678 && cost < 4)
9679 cost = 4;
9680 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
9681 that is needed at the target. */
9682 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
9683 && ! flow_dependent_p (insn, dep_insn))
9684 cost--;
9685 }
9686 else if (REG_NOTE_KIND (link) == 0)
9687 {
9688 enum attr_type type;
9689 rtx dep_set;
9690
9691 if (recog_memoized (insn) < 0
9692 || recog_memoized (dep_insn) < 0)
9693 return cost;
9694
9695 dep_set = single_set (dep_insn);
9696
9697 /* The latency that we specify in the scheduling description refers
9698 to the actual output, not to an auto-increment register; for that,
9699 the latency is one. */
9700 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
9701 {
9702 rtx set = single_set (insn);
9703
9704 if (set
9705 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
9706 && (!MEM_P (SET_DEST (set))
9707 || !reg_mentioned_p (SET_DEST (dep_set),
9708 XEXP (SET_DEST (set), 0))))
9709 cost = 1;
9710 }
9711 /* The only input for a call that is timing-critical is the
9712 function's address. */
9713 if (CALL_P (insn))
9714 {
9715 rtx call = PATTERN (insn);
9716
9717 if (GET_CODE (call) == PARALLEL)
9718 call = XVECEXP (call, 0 ,0);
9719 if (GET_CODE (call) == SET)
9720 call = SET_SRC (call);
9721 if (GET_CODE (call) == CALL && MEM_P (XEXP (call, 0))
9722 /* sibcalli_thunk uses a symbol_ref in an unspec. */
9723 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
9724 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
9725 cost -= TARGET_SH4_300 ? 3 : 6;
9726 }
9727 /* Likewise, the most timing critical input for an sfuncs call
9728 is the function address. However, sfuncs typically start
9729 using their arguments pretty quickly.
9730 Assume a four cycle delay for SH4 before they are needed.
9731 Cached ST40-300 calls are quicker, so assume only a one
9732 cycle delay there.
9733 ??? Maybe we should encode the delays till input registers
9734 are needed by sfuncs into the sfunc call insn. */
9735 /* All sfunc calls are parallels with at least four components.
9736 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
9737 else if (GET_CODE (PATTERN (insn)) == PARALLEL
9738 && XVECLEN (PATTERN (insn), 0) >= 4
9739 && (reg = sfunc_uses_reg (insn)))
9740 {
9741 if (! reg_set_p (reg, dep_insn))
9742 cost -= TARGET_SH4_300 ? 1 : 4;
9743 }
9744 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
9745 {
9746 enum attr_type dep_type = get_attr_type (dep_insn);
9747
9748 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
9749 cost--;
9750 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
9751 && (type = get_attr_type (insn)) != TYPE_CALL
9752 && type != TYPE_SFUNC)
9753 cost--;
9754 /* When the preceding instruction loads the shift amount of
9755 the following SHAD/SHLD, the latency of the load is increased
9756 by 1 cycle. */
9757 if (get_attr_type (insn) == TYPE_DYN_SHIFT
9758 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
9759 && reg_overlap_mentioned_p (SET_DEST (dep_set),
9760 XEXP (SET_SRC (single_set (insn)),
9761 1)))
9762 cost++;
9763 /* When an LS group instruction with a latency of less than
9764 3 cycles is followed by a double-precision floating-point
9765 instruction, FIPR, or FTRV, the latency of the first
9766 instruction is increased to 3 cycles. */
9767 else if (cost < 3
9768 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
9769 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
9770 cost = 3;
9771 /* The lsw register of a double-precision computation is ready one
9772 cycle earlier. */
9773 else if (reload_completed
9774 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
9775 && (use_pat = single_set (insn))
9776 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
9777 SET_SRC (use_pat)))
9778 cost -= 1;
9779
9780 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
9781 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
9782 cost -= 1;
9783 }
9784 else if (TARGET_SH4_300)
9785 {
9786 /* Stores need their input register two cycles later. */
9787 if (dep_set && cost >= 1
9788 && ((type = get_attr_type (insn)) == TYPE_STORE
9789 || type == TYPE_PSTORE
9790 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
9791 {
9792 rtx set = single_set (insn);
9793
9794 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
9795 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
9796 {
9797 cost -= 2;
9798 /* But don't reduce the cost below 1 if the address depends
9799 on a side effect of dep_insn. */
9800 if (cost < 1
9801 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
9802 cost = 1;
9803 }
9804 }
9805 }
9806 }
9807 /* An anti-dependence penalty of two applies if the first insn is a double
9808 precision fadd / fsub / fmul. */
9809 else if (!TARGET_SH4_300
9810 && REG_NOTE_KIND (link) == REG_DEP_ANTI
9811 && recog_memoized (dep_insn) >= 0
9812 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
9813 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
9814 /* A lot of alleged anti-flow dependences are fake,
9815 so check this one is real. */
9816 && flow_dependent_p (dep_insn, insn))
9817 cost = 2;
9818
9819 return cost;
9820 }
9821
9822 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
9823 if DEP_INSN is anti-flow dependent on INSN. */
9824 static int
9825 flow_dependent_p (rtx insn, rtx dep_insn)
9826 {
9827 rtx tmp = PATTERN (insn);
9828
9829 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
9830 return tmp == NULL_RTX;
9831 }
9832
9833 /* A helper function for flow_dependent_p called through note_stores. */
9834 static void
9835 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
9836 {
9837 rtx * pinsn = (rtx *) data;
9838
9839 if (*pinsn && reg_referenced_p (x, *pinsn))
9840 *pinsn = NULL_RTX;
9841 }
9842
9843 /* For use by sh_allocate_initial_value. Note that sh.md contains some
9844 'special function' patterns (type sfunc) that clobber pr, but that
9845 do not look like function calls to leaf_function_p. Hence we must
9846 do this extra check. */
9847 static int
9848 sh_pr_n_sets (void)
9849 {
9850 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9851 }
9852
9853 /* Return where to allocate pseudo for a given hard register initial
9854 value. */
9855 static rtx
9856 sh_allocate_initial_value (rtx hard_reg)
9857 {
9858 rtx x;
9859
9860 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
9861 {
9862 if (current_function_is_leaf
9863 && ! sh_pr_n_sets ()
9864 && ! (TARGET_SHCOMPACT
9865 && ((crtl->args.info.call_cookie
9866 & ~ CALL_COOKIE_RET_TRAMP (1))
9867 || crtl->saves_all_registers)))
9868 x = hard_reg;
9869 else
9870 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
9871 }
9872 else
9873 x = NULL_RTX;
9874
9875 return x;
9876 }
9877
9878 /* This function returns "2" to indicate dual issue for the SH4
9879 processor. To be used by the DFA pipeline description. */
9880 static int
9881 sh_issue_rate (void)
9882 {
9883 if (TARGET_SUPERSCALAR)
9884 return 2;
9885 else
9886 return 1;
9887 }
9888
9889 /* Functions for ready queue reordering for sched1. */
9890
9891 /* Get weight for mode for a set x. */
9892 static short
9893 find_set_regmode_weight (rtx x, enum machine_mode mode)
9894 {
9895 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
9896 return 1;
9897 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
9898 {
9899 if (REG_P (SET_DEST (x)))
9900 {
9901 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
9902 return 1;
9903 else
9904 return 0;
9905 }
9906 return 1;
9907 }
9908 return 0;
9909 }
9910
9911 /* Get regmode weight for insn. */
9912 static short
9913 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
9914 {
9915 short reg_weight = 0;
9916 rtx x;
9917
9918 /* Increment weight for each register born here. */
9919 x = PATTERN (insn);
9920 reg_weight += find_set_regmode_weight (x, mode);
9921 if (GET_CODE (x) == PARALLEL)
9922 {
9923 int j;
9924 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
9925 {
9926 x = XVECEXP (PATTERN (insn), 0, j);
9927 reg_weight += find_set_regmode_weight (x, mode);
9928 }
9929 }
9930 /* Decrement weight for each register that dies here. */
9931 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
9932 {
9933 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
9934 {
9935 rtx note = XEXP (x, 0);
9936 if (REG_P (note) && GET_MODE (note) == mode)
9937 reg_weight--;
9938 }
9939 }
9940 return reg_weight;
9941 }
9942
9943 /* Calculate regmode weights for all insns of a basic block. */
9944 static void
9945 find_regmode_weight (basic_block b, enum machine_mode mode)
9946 {
9947 rtx insn, next_tail, head, tail;
9948
9949 get_ebb_head_tail (b, b, &head, &tail);
9950 next_tail = NEXT_INSN (tail);
9951
9952 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
9953 {
9954 /* Handle register life information. */
9955 if (!INSN_P (insn))
9956 continue;
9957
9958 if (mode == SFmode)
9959 INSN_REGMODE_WEIGHT (insn, mode) =
9960 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
9961 else if (mode == SImode)
9962 INSN_REGMODE_WEIGHT (insn, mode) =
9963 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
9964 }
9965 }
9966
9967 /* Comparison function for ready queue sorting. */
9968 static int
9969 rank_for_reorder (const void *x, const void *y)
9970 {
9971 rtx tmp = *(const rtx *) y;
9972 rtx tmp2 = *(const rtx *) x;
9973
9974 /* The insn in a schedule group should be issued the first. */
9975 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
9976 return SCHED_GROUP_P (tmp2) ? 1 : -1;
9977
9978 /* If insns are equally good, sort by INSN_LUID (original insn order), This
9979 minimizes instruction movement, thus minimizing sched's effect on
9980 register pressure. */
9981 return INSN_LUID (tmp) - INSN_LUID (tmp2);
9982 }
9983
9984 /* Resort the array A in which only element at index N may be out of order. */
9985 static void
9986 swap_reorder (rtx *a, int n)
9987 {
9988 rtx insn = a[n - 1];
9989 int i = n - 2;
9990
9991 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
9992 {
9993 a[i + 1] = a[i];
9994 i -= 1;
9995 }
9996 a[i + 1] = insn;
9997 }
9998
9999 #define SCHED_REORDER(READY, N_READY) \
10000 do \
10001 { \
10002 if ((N_READY) == 2) \
10003 swap_reorder (READY, N_READY); \
10004 else if ((N_READY) > 2) \
10005 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
10006 } \
10007 while (0)
10008
10009 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
10010 macro. */
10011 static void
10012 ready_reorder (rtx *ready, int nready)
10013 {
10014 SCHED_REORDER (ready, nready);
10015 }
10016
10017 /* Count life regions of r0 for a block. */
10018 static int
10019 find_r0_life_regions (basic_block b)
10020 {
10021 rtx end, insn;
10022 rtx pset;
10023 rtx r0_reg;
10024 int live;
10025 int set;
10026 int death = 0;
10027
10028 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
10029 {
10030 set = 1;
10031 live = 1;
10032 }
10033 else
10034 {
10035 set = 0;
10036 live = 0;
10037 }
10038
10039 insn = BB_HEAD (b);
10040 end = BB_END (b);
10041 r0_reg = gen_rtx_REG (SImode, R0_REG);
10042 while (1)
10043 {
10044 if (INSN_P (insn))
10045 {
10046 if (find_regno_note (insn, REG_DEAD, R0_REG))
10047 {
10048 death++;
10049 live = 0;
10050 }
10051 if (!live
10052 && (pset = single_set (insn))
10053 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
10054 && !find_regno_note (insn, REG_UNUSED, R0_REG))
10055 {
10056 set++;
10057 live = 1;
10058 }
10059 }
10060 if (insn == end)
10061 break;
10062 insn = NEXT_INSN (insn);
10063 }
10064 return set - death;
10065 }
10066
10067 /* Calculate regmode weights for all insns of all basic block. */
10068 static void
10069 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
10070 int verbose ATTRIBUTE_UNUSED,
10071 int old_max_uid)
10072 {
10073 basic_block b;
10074
10075 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
10076 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
10077 r0_life_regions = 0;
10078
10079 FOR_EACH_BB_REVERSE (b)
10080 {
10081 find_regmode_weight (b, SImode);
10082 find_regmode_weight (b, SFmode);
10083 if (!reload_completed)
10084 r0_life_regions += find_r0_life_regions (b);
10085 }
10086
10087 CURR_REGMODE_PRESSURE (SImode) = 0;
10088 CURR_REGMODE_PRESSURE (SFmode) = 0;
10089
10090 }
10091
10092 /* Cleanup. */
10093 static void
10094 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
10095 int verbose ATTRIBUTE_UNUSED)
10096 {
10097 if (regmode_weight[0])
10098 {
10099 free (regmode_weight[0]);
10100 regmode_weight[0] = NULL;
10101 }
10102 if (regmode_weight[1])
10103 {
10104 free (regmode_weight[1]);
10105 regmode_weight[1] = NULL;
10106 }
10107 }
10108
10109 /* The scalar modes supported differs from the default version in TImode
10110 for 32-bit SHMEDIA. */
10111 static bool
10112 sh_scalar_mode_supported_p (enum machine_mode mode)
10113 {
10114 if (TARGET_SHMEDIA32 && mode == TImode)
10115 return false;
10116
10117 return default_scalar_mode_supported_p (mode);
10118 }
10119
10120 /* Cache the can_issue_more so that we can return it from reorder2. Also,
10121 keep count of register pressures on SImode and SFmode. */
10122 static int
10123 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
10124 int sched_verbose ATTRIBUTE_UNUSED,
10125 rtx insn,
10126 int can_issue_more)
10127 {
10128 if (GET_CODE (PATTERN (insn)) != USE
10129 && GET_CODE (PATTERN (insn)) != CLOBBER)
10130 cached_can_issue_more = can_issue_more - 1;
10131 else
10132 cached_can_issue_more = can_issue_more;
10133
10134 if (reload_completed)
10135 return cached_can_issue_more;
10136
10137 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
10138 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
10139
10140 return cached_can_issue_more;
10141 }
10142
10143 static void
10144 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
10145 int verbose ATTRIBUTE_UNUSED,
10146 int veclen ATTRIBUTE_UNUSED)
10147 {
10148 CURR_REGMODE_PRESSURE (SImode) = 0;
10149 CURR_REGMODE_PRESSURE (SFmode) = 0;
10150 }
10151
10152 /* Some magic numbers. */
10153 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10154 functions that already have high pressure on r0. */
10155 #define R0_MAX_LIFE_REGIONS 2
10156 /* Register Pressure thresholds for SImode and SFmode registers. */
10157 #define SIMODE_MAX_WEIGHT 5
10158 #define SFMODE_MAX_WEIGHT 10
10159
10160 /* Return true if the pressure is high for MODE. */
10161 static short
10162 high_pressure (enum machine_mode mode)
10163 {
10164 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10165 functions that already have high pressure on r0. */
10166 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
10167 return 1;
10168
10169 if (mode == SFmode)
10170 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
10171 else
10172 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
10173 }
10174
10175 /* Reorder ready queue if register pressure is high. */
10176 static int
10177 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
10178 int sched_verbose ATTRIBUTE_UNUSED,
10179 rtx *ready,
10180 int *n_readyp,
10181 int clock_var ATTRIBUTE_UNUSED)
10182 {
10183 if (reload_completed)
10184 return sh_issue_rate ();
10185
10186 if (high_pressure (SFmode) || high_pressure (SImode))
10187 {
10188 ready_reorder (ready, *n_readyp);
10189 }
10190
10191 return sh_issue_rate ();
10192 }
10193
10194 /* Skip cycles if the current register pressure is high. */
10195 static int
10196 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
10197 int sched_verbose ATTRIBUTE_UNUSED,
10198 rtx *ready ATTRIBUTE_UNUSED,
10199 int *n_readyp ATTRIBUTE_UNUSED,
10200 int clock_var ATTRIBUTE_UNUSED)
10201 {
10202 if (reload_completed)
10203 return cached_can_issue_more;
10204
10205 if (high_pressure(SFmode) || high_pressure (SImode))
10206 skip_cycles = 1;
10207
10208 return cached_can_issue_more;
10209 }
10210
10211 /* Skip cycles without sorting the ready queue. This will move insn from
10212 Q->R. If this is the last cycle we are skipping; allow sorting of ready
10213 queue by sh_reorder. */
10214
10215 /* Generally, skipping these many cycles are sufficient for all insns to move
10216 from Q -> R. */
10217 #define MAX_SKIPS 8
10218
10219 static int
10220 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
10221 int sched_verbose ATTRIBUTE_UNUSED,
10222 rtx insn ATTRIBUTE_UNUSED,
10223 int last_clock_var,
10224 int clock_var,
10225 int *sort_p)
10226 {
10227 if (reload_completed)
10228 return 0;
10229
10230 if (skip_cycles)
10231 {
10232 if ((clock_var - last_clock_var) < MAX_SKIPS)
10233 {
10234 *sort_p = 0;
10235 return 1;
10236 }
10237 /* If this is the last cycle we are skipping, allow reordering of R. */
10238 if ((clock_var - last_clock_var) == MAX_SKIPS)
10239 {
10240 *sort_p = 1;
10241 return 1;
10242 }
10243 }
10244
10245 skip_cycles = 0;
10246
10247 return 0;
10248 }
10249
10250 /* SHmedia requires registers for branches, so we can't generate new
10251 branches past reload. */
10252 static bool
10253 sh_cannot_modify_jumps_p (void)
10254 {
10255 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
10256 }
10257
10258 static enum reg_class
10259 sh_target_reg_class (void)
10260 {
10261 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
10262 }
10263
10264 static bool
10265 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
10266 {
10267 HARD_REG_SET dummy;
10268 #if 0
10269 rtx insn;
10270 #endif
10271
10272 if (! shmedia_space_reserved_for_target_registers)
10273 return 0;
10274 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
10275 return 0;
10276 if (calc_live_regs (&dummy) >= 6 * 8)
10277 return 1;
10278 return 0;
10279 }
10280
10281 static bool
10282 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
10283 {
10284 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
10285 }
10286 \f
10287 /*
10288 On the SH1..SH4, the trampoline looks like
10289 2 0002 D202 mov.l l2,r2
10290 1 0000 D301 mov.l l1,r3
10291 3 0004 422B jmp @r2
10292 4 0006 0009 nop
10293 5 0008 00000000 l1: .long area
10294 6 000c 00000000 l2: .long function
10295
10296 SH5 (compact) uses r1 instead of r3 for the static chain. */
10297
10298
10299 /* Emit RTL insns to initialize the variable parts of a trampoline.
10300 FNADDR is an RTX for the address of the function's pure code.
10301 CXT is an RTX for the static chain value for the function. */
10302
10303 void
10304 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
10305 {
10306 rtx tramp_mem = gen_frame_mem (BLKmode, tramp);
10307
10308 if (TARGET_SHMEDIA64)
10309 {
10310 rtx tramp_templ;
10311 int fixed_len;
10312
10313 rtx movi1 = GEN_INT (0xcc000010);
10314 rtx shori1 = GEN_INT (0xc8000010);
10315 rtx src, dst;
10316
10317 /* The following trampoline works within a +- 128 KB range for cxt:
10318 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
10319 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
10320 gettr tr1,r1; blink tr0,r63 */
10321 /* Address rounding makes it hard to compute the exact bounds of the
10322 offset for this trampoline, but we have a rather generous offset
10323 range, so frame_offset should do fine as an upper bound. */
10324 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
10325 {
10326 /* ??? could optimize this trampoline initialization
10327 by writing DImode words with two insns each. */
10328 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
10329 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
10330 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
10331 insn = gen_rtx_AND (DImode, insn, mask);
10332 /* Or in ptb/u .,tr1 pattern */
10333 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
10334 insn = force_operand (insn, NULL_RTX);
10335 insn = gen_lowpart (SImode, insn);
10336 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
10337 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
10338 insn = gen_rtx_AND (DImode, insn, mask);
10339 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
10340 insn = gen_lowpart (SImode, insn);
10341 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
10342 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
10343 insn = gen_rtx_AND (DImode, insn, mask);
10344 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10345 insn = gen_lowpart (SImode, insn);
10346 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
10347 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
10348 insn = gen_rtx_AND (DImode, insn, mask);
10349 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10350 insn = gen_lowpart (SImode, insn);
10351 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
10352 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
10353 insn = gen_rtx_AND (DImode, insn, mask);
10354 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10355 insn = gen_lowpart (SImode, insn);
10356 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
10357 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
10358 GEN_INT (0x6bf10600));
10359 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
10360 GEN_INT (0x4415fc10));
10361 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
10362 GEN_INT (0x4401fff0));
10363 emit_insn (gen_ic_invalidate_line (tramp));
10364 return;
10365 }
10366 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
10367 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
10368
10369 tramp_templ = gen_datalabel_ref (tramp_templ);
10370 dst = tramp_mem;
10371 src = gen_const_mem (BLKmode, tramp_templ);
10372 set_mem_align (dst, 256);
10373 set_mem_align (src, 64);
10374 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
10375
10376 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
10377 emit_move_insn (adjust_address (tramp_mem, Pmode,
10378 fixed_len + GET_MODE_SIZE (Pmode)),
10379 cxt);
10380 emit_insn (gen_ic_invalidate_line (tramp));
10381 return;
10382 }
10383 else if (TARGET_SHMEDIA)
10384 {
10385 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
10386 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
10387 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
10388 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
10389 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
10390 rotated 10 right, and higher 16 bit of every 32 selected. */
10391 rtx movishori
10392 = force_reg (V2HImode, (simplify_gen_subreg
10393 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
10394 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
10395 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
10396
10397 tramp = force_reg (Pmode, tramp);
10398 fnaddr = force_reg (SImode, fnaddr);
10399 cxt = force_reg (SImode, cxt);
10400 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
10401 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
10402 movishori));
10403 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
10404 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10405 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
10406 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
10407 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
10408 gen_rtx_SUBREG (V2HImode, cxt, 0),
10409 movishori));
10410 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
10411 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10412 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
10413 if (TARGET_LITTLE_ENDIAN)
10414 {
10415 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
10416 emit_insn (gen_mextr4 (quad2, cxtload, blink));
10417 }
10418 else
10419 {
10420 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
10421 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
10422 }
10423 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
10424 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
10425 emit_insn (gen_ic_invalidate_line (tramp));
10426 return;
10427 }
10428 else if (TARGET_SHCOMPACT)
10429 {
10430 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
10431 return;
10432 }
10433 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
10434 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
10435 SImode));
10436 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
10437 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
10438 SImode));
10439 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
10440 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
10441 if (TARGET_HARVARD)
10442 {
10443 if (!TARGET_INLINE_IC_INVALIDATE
10444 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
10445 emit_library_call (function_symbol (NULL, "__ic_invalidate",
10446 FUNCTION_ORDINARY),
10447 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
10448 else
10449 emit_insn (gen_ic_invalidate_line (tramp));
10450 }
10451 }
10452
10453 /* FIXME: This is overly conservative. A SHcompact function that
10454 receives arguments ``by reference'' will have them stored in its
10455 own stack frame, so it must not pass pointers or references to
10456 these arguments to other functions by means of sibling calls. */
10457 /* If PIC, we cannot make sibling calls to global functions
10458 because the PLT requires r12 to be live. */
10459 static bool
10460 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10461 {
10462 return (1
10463 && (! TARGET_SHCOMPACT
10464 || crtl->args.info.stack_regs == 0)
10465 && ! sh_cfun_interrupt_handler_p ()
10466 && (! flag_pic
10467 || (decl && ! TREE_PUBLIC (decl))
10468 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
10469 }
10470 \f
10471 /* Machine specific built-in functions. */
10472
10473 struct builtin_description
10474 {
10475 const enum insn_code icode;
10476 const char *const name;
10477 int signature;
10478 };
10479
10480 /* describe number and signedness of arguments; arg[0] == result
10481 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
10482 /* 9: 64-bit pointer, 10: 32-bit pointer */
10483 static const char signature_args[][4] =
10484 {
10485 #define SH_BLTIN_V2SI2 0
10486 { 4, 4 },
10487 #define SH_BLTIN_V4HI2 1
10488 { 4, 4 },
10489 #define SH_BLTIN_V2SI3 2
10490 { 4, 4, 4 },
10491 #define SH_BLTIN_V4HI3 3
10492 { 4, 4, 4 },
10493 #define SH_BLTIN_V8QI3 4
10494 { 4, 4, 4 },
10495 #define SH_BLTIN_MAC_HISI 5
10496 { 1, 4, 4, 1 },
10497 #define SH_BLTIN_SH_HI 6
10498 { 4, 4, 1 },
10499 #define SH_BLTIN_SH_SI 7
10500 { 4, 4, 1 },
10501 #define SH_BLTIN_V4HI2V2SI 8
10502 { 4, 4, 4 },
10503 #define SH_BLTIN_V4HI2V8QI 9
10504 { 4, 4, 4 },
10505 #define SH_BLTIN_SISF 10
10506 { 4, 2 },
10507 #define SH_BLTIN_LDUA_L 11
10508 { 2, 10 },
10509 #define SH_BLTIN_LDUA_Q 12
10510 { 1, 10 },
10511 #define SH_BLTIN_STUA_L 13
10512 { 0, 10, 2 },
10513 #define SH_BLTIN_STUA_Q 14
10514 { 0, 10, 1 },
10515 #define SH_BLTIN_LDUA_L64 15
10516 { 2, 9 },
10517 #define SH_BLTIN_LDUA_Q64 16
10518 { 1, 9 },
10519 #define SH_BLTIN_STUA_L64 17
10520 { 0, 9, 2 },
10521 #define SH_BLTIN_STUA_Q64 18
10522 { 0, 9, 1 },
10523 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
10524 #define SH_BLTIN_2 19
10525 #define SH_BLTIN_SU 19
10526 { 1, 2 },
10527 #define SH_BLTIN_3 20
10528 #define SH_BLTIN_SUS 20
10529 { 2, 2, 1 },
10530 #define SH_BLTIN_PSSV 21
10531 { 0, 8, 2, 2 },
10532 #define SH_BLTIN_XXUU 22
10533 #define SH_BLTIN_UUUU 22
10534 { 1, 1, 1, 1 },
10535 #define SH_BLTIN_PV 23
10536 { 0, 8 },
10537 };
10538 /* mcmv: operands considered unsigned. */
10539 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
10540 /* mperm: control value considered unsigned int. */
10541 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
10542 /* mshards_q: returns signed short. */
10543 /* nsb: takes long long arg, returns unsigned char. */
10544 static const struct builtin_description bdesc[] =
10545 {
10546 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
10547 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
10548 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
10549 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
10550 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
10551 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
10552 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
10553 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
10554 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
10555 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
10556 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
10557 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
10558 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
10559 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
10560 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
10561 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
10562 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
10563 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
10564 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3 },
10565 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3 },
10566 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3 },
10567 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3 },
10568 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3 },
10569 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3 },
10570 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3 },
10571 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
10572 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
10573 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
10574 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
10575 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
10576 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
10577 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
10578 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
10579 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
10580 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
10581 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
10582 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
10583 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
10584 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
10585 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
10586 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
10587 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
10588 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
10589 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
10590 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
10591 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
10592 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
10593 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
10594 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
10595 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
10596 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
10597 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
10598 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
10599 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
10600 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
10601 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
10602 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
10603 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
10604 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
10605 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
10606 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
10607 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3 },
10608 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2 },
10609 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2 },
10610 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
10611 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
10612 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
10613 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
10614 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
10615 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
10616 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
10617 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
10618 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
10619 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64 },
10620 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64 },
10621 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64 },
10622 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64 },
10623 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64 },
10624 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64 },
10625 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64 },
10626 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64 },
10627 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
10628 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
10629 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
10630 };
10631
10632 static void
10633 sh_media_init_builtins (void)
10634 {
10635 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
10636 const struct builtin_description *d;
10637
10638 memset (shared, 0, sizeof shared);
10639 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
10640 {
10641 tree type, arg_type = 0;
10642 int signature = d->signature;
10643 int i;
10644
10645 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
10646 type = shared[signature];
10647 else
10648 {
10649 int has_result = signature_args[signature][0] != 0;
10650
10651 if ((signature_args[signature][1] & 8)
10652 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
10653 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
10654 continue;
10655 if (! TARGET_FPU_ANY
10656 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
10657 continue;
10658 type = void_list_node;
10659 for (i = 3; ; i--)
10660 {
10661 int arg = signature_args[signature][i];
10662 int opno = i - 1 + has_result;
10663
10664 if (arg & 8)
10665 arg_type = ptr_type_node;
10666 else if (arg)
10667 arg_type = (*lang_hooks.types.type_for_mode)
10668 (insn_data[d->icode].operand[opno].mode,
10669 (arg & 1));
10670 else if (i)
10671 continue;
10672 else
10673 arg_type = void_type_node;
10674 if (i == 0)
10675 break;
10676 type = tree_cons (NULL_TREE, arg_type, type);
10677 }
10678 type = build_function_type (arg_type, type);
10679 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
10680 shared[signature] = type;
10681 }
10682 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
10683 NULL, NULL_TREE);
10684 }
10685 }
10686
10687 /* Implements target hook vector_mode_supported_p. */
10688 bool
10689 sh_vector_mode_supported_p (enum machine_mode mode)
10690 {
10691 if (TARGET_FPU_ANY
10692 && ((mode == V2SFmode)
10693 || (mode == V4SFmode)
10694 || (mode == V16SFmode)))
10695 return true;
10696
10697 else if (TARGET_SHMEDIA
10698 && ((mode == V8QImode)
10699 || (mode == V2HImode)
10700 || (mode == V4HImode)
10701 || (mode == V2SImode)))
10702 return true;
10703
10704 return false;
10705 }
10706
10707 /* Implements target hook dwarf_calling_convention. Return an enum
10708 of dwarf_calling_convention. */
10709 int
10710 sh_dwarf_calling_convention (const_tree func)
10711 {
10712 if (sh_attr_renesas_p (func))
10713 return DW_CC_GNU_renesas_sh;
10714
10715 return DW_CC_normal;
10716 }
10717
10718 static void
10719 sh_init_builtins (void)
10720 {
10721 if (TARGET_SHMEDIA)
10722 sh_media_init_builtins ();
10723 }
10724
10725 /* Expand an expression EXP that calls a built-in function,
10726 with result going to TARGET if that's convenient
10727 (and in mode MODE if that's convenient).
10728 SUBTARGET may be used as the target for computing one of EXP's operands.
10729 IGNORE is nonzero if the value is to be ignored. */
10730
10731 static rtx
10732 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10733 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
10734 {
10735 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10736 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10737 const struct builtin_description *d = &bdesc[fcode];
10738 enum insn_code icode = d->icode;
10739 int signature = d->signature;
10740 enum machine_mode tmode = VOIDmode;
10741 int nop = 0, i;
10742 rtx op[4];
10743 rtx pat = 0;
10744
10745 if (signature_args[signature][0])
10746 {
10747 if (ignore)
10748 return 0;
10749
10750 tmode = insn_data[icode].operand[0].mode;
10751 if (! target
10752 || GET_MODE (target) != tmode
10753 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10754 target = gen_reg_rtx (tmode);
10755 op[nop++] = target;
10756 }
10757 else
10758 target = 0;
10759
10760 for (i = 1; i <= 3; i++, nop++)
10761 {
10762 tree arg;
10763 enum machine_mode opmode, argmode;
10764 tree optype;
10765
10766 if (! signature_args[signature][i])
10767 break;
10768 arg = CALL_EXPR_ARG (exp, i - 1);
10769 if (arg == error_mark_node)
10770 return const0_rtx;
10771 if (signature_args[signature][i] & 8)
10772 {
10773 opmode = ptr_mode;
10774 optype = ptr_type_node;
10775 }
10776 else
10777 {
10778 opmode = insn_data[icode].operand[nop].mode;
10779 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
10780 }
10781 argmode = TYPE_MODE (TREE_TYPE (arg));
10782 if (argmode != opmode)
10783 arg = build1 (NOP_EXPR, optype, arg);
10784 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
10785 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
10786 op[nop] = copy_to_mode_reg (opmode, op[nop]);
10787 }
10788
10789 switch (nop)
10790 {
10791 case 1:
10792 pat = (*insn_data[d->icode].genfun) (op[0]);
10793 break;
10794 case 2:
10795 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
10796 break;
10797 case 3:
10798 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
10799 break;
10800 case 4:
10801 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
10802 break;
10803 default:
10804 gcc_unreachable ();
10805 }
10806 if (! pat)
10807 return 0;
10808 emit_insn (pat);
10809 return target;
10810 }
10811
10812 void
10813 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
10814 {
10815 rtx sel0 = const0_rtx;
10816 rtx sel1 = const1_rtx;
10817 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
10818 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
10819
10820 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
10821 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
10822 }
10823
10824 void
10825 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
10826 {
10827 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
10828
10829 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
10830 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
10831 }
10832
10833 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
10834 We can allow any mode in any general register. The special registers
10835 only allow SImode. Don't allow any mode in the PR.
10836
10837 We cannot hold DCmode values in the XD registers because alter_reg
10838 handles subregs of them incorrectly. We could work around this by
10839 spacing the XD registers like the DR registers, but this would require
10840 additional memory in every compilation to hold larger register vectors.
10841 We could hold SFmode / SCmode values in XD registers, but that
10842 would require a tertiary reload when reloading from / to memory,
10843 and a secondary reload to reload from / to general regs; that
10844 seems to be a loosing proposition.
10845
10846 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
10847 it won't be ferried through GP registers first. */
10848
10849 bool
10850 sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
10851 {
10852 if (SPECIAL_REGISTER_P (regno))
10853 return mode == SImode;
10854
10855 if (regno == FPUL_REG)
10856 return (mode == SImode || mode == SFmode);
10857
10858 if (FP_REGISTER_P (regno) && mode == SFmode)
10859 return true;
10860
10861 if (mode == V2SFmode)
10862 {
10863 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
10864 || GENERAL_REGISTER_P (regno)))
10865 return true;
10866 else
10867 return false;
10868 }
10869
10870 if (mode == V4SFmode)
10871 {
10872 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
10873 || GENERAL_REGISTER_P (regno))
10874 return true;
10875 else
10876 return false;
10877 }
10878
10879 if (mode == V16SFmode)
10880 {
10881 if (TARGET_SHMEDIA)
10882 {
10883 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
10884 return true;
10885 else
10886 return false;
10887 }
10888 else
10889 return regno == FIRST_XD_REG;
10890 }
10891
10892 if (FP_REGISTER_P (regno))
10893 {
10894 if (mode == SFmode
10895 || mode == SImode
10896 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
10897 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
10898 || mode == DCmode
10899 || (TARGET_SHMEDIA
10900 && (mode == DFmode || mode == DImode
10901 || mode == V2SFmode || mode == TImode)))
10902 && ((regno - FIRST_FP_REG) & 1) == 0)
10903 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
10904 && ((regno - FIRST_FP_REG) & 3) == 0))
10905 return true;
10906 else
10907 return false;
10908 }
10909
10910 if (XD_REGISTER_P (regno))
10911 return mode == DFmode;
10912
10913 if (TARGET_REGISTER_P (regno))
10914 return (mode == DImode || mode == SImode || mode == PDImode);
10915
10916 if (regno == PR_REG)
10917 return mode == SImode;
10918
10919 if (regno == FPSCR_REG)
10920 return mode == PSImode;
10921
10922 /* FIXME. This works around PR target/37633 for -O0. */
10923 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
10924 {
10925 unsigned int n = GET_MODE_SIZE (mode) / 8;
10926
10927 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
10928 && regno <= FIRST_GENERAL_REG + 14)
10929 return false;
10930 }
10931
10932 return true;
10933 }
10934
10935 /* Return the class of registers for which a mode change from FROM to TO
10936 is invalid. */
10937 bool
10938 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
10939 enum reg_class rclass)
10940 {
10941 /* We want to enable the use of SUBREGs as a means to
10942 VEC_SELECT a single element of a vector. */
10943 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
10944 return (reg_classes_intersect_p (GENERAL_REGS, rclass));
10945
10946 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
10947 {
10948 if (TARGET_LITTLE_ENDIAN)
10949 {
10950 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
10951 return reg_classes_intersect_p (DF_REGS, rclass);
10952 }
10953 else
10954 {
10955 if (GET_MODE_SIZE (from) < 8)
10956 return reg_classes_intersect_p (DF_HI_REGS, rclass);
10957 }
10958 }
10959 return 0;
10960 }
10961
10962
10963 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
10964 that label is used. */
10965
10966 void
10967 sh_mark_label (rtx address, int nuses)
10968 {
10969 if (GOTOFF_P (address))
10970 {
10971 /* Extract the label or symbol. */
10972 address = XEXP (address, 0);
10973 if (GET_CODE (address) == PLUS)
10974 address = XEXP (address, 0);
10975 address = XVECEXP (address, 0, 0);
10976 }
10977 if (GET_CODE (address) == LABEL_REF
10978 && LABEL_P (XEXP (address, 0)))
10979 LABEL_NUSES (XEXP (address, 0)) += nuses;
10980 }
10981
10982 /* Compute extra cost of moving data between one register class
10983 and another. */
10984
10985 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
10986 uses this information. Hence, the general register <-> floating point
10987 register information here is not used for SFmode. */
10988
10989 int
10990 sh_register_move_cost (enum machine_mode mode,
10991 enum reg_class srcclass, enum reg_class dstclass)
10992 {
10993 if (dstclass == T_REGS || dstclass == PR_REGS)
10994 return 10;
10995
10996 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
10997 return 4;
10998
10999 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
11000 && REGCLASS_HAS_FP_REG (srcclass)
11001 && REGCLASS_HAS_FP_REG (dstclass))
11002 return 4;
11003
11004 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
11005 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
11006
11007 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
11008 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
11009 return 9;
11010
11011 if ((REGCLASS_HAS_FP_REG (dstclass)
11012 && REGCLASS_HAS_GENERAL_REG (srcclass))
11013 || (REGCLASS_HAS_GENERAL_REG (dstclass)
11014 && REGCLASS_HAS_FP_REG (srcclass)))
11015 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
11016 * ((GET_MODE_SIZE (mode) + 7) / 8U));
11017
11018 if ((dstclass == FPUL_REGS
11019 && REGCLASS_HAS_GENERAL_REG (srcclass))
11020 || (srcclass == FPUL_REGS
11021 && REGCLASS_HAS_GENERAL_REG (dstclass)))
11022 return 5;
11023
11024 if ((dstclass == FPUL_REGS
11025 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
11026 || (srcclass == FPUL_REGS
11027 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
11028 return 7;
11029
11030 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11031 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11032 return 20;
11033
11034 /* ??? ptabs faults on (value & 0x3) == 0x3 */
11035 if (TARGET_SHMEDIA
11036 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
11037 {
11038 if (sh_gettrcost >= 0)
11039 return sh_gettrcost;
11040 else if (!TARGET_PT_FIXED)
11041 return 100;
11042 }
11043
11044 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11045 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11046 return 4;
11047
11048 if (TARGET_SHMEDIA
11049 || (TARGET_FMOVD
11050 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
11051 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
11052 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
11053
11054 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
11055 }
11056
11057 static rtx emit_load_ptr (rtx, rtx);
11058
11059 static rtx
11060 emit_load_ptr (rtx reg, rtx addr)
11061 {
11062 rtx mem = gen_const_mem (ptr_mode, addr);
11063
11064 if (Pmode != ptr_mode)
11065 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
11066 return emit_move_insn (reg, mem);
11067 }
11068
11069 static void
11070 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11071 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11072 tree function)
11073 {
11074 CUMULATIVE_ARGS cum;
11075 int structure_value_byref = 0;
11076 rtx this_rtx, this_value, sibcall, insns, funexp;
11077 tree funtype = TREE_TYPE (function);
11078 int simple_add = CONST_OK_FOR_ADD (delta);
11079 int did_load = 0;
11080 rtx scratch0, scratch1, scratch2;
11081 unsigned i;
11082
11083 reload_completed = 1;
11084 epilogue_completed = 1;
11085 current_function_uses_only_leaf_regs = 1;
11086
11087 emit_note (NOTE_INSN_PROLOGUE_END);
11088
11089 /* Find the "this" pointer. We have such a wide range of ABIs for the
11090 SH that it's best to do this completely machine independently.
11091 "this" is passed as first argument, unless a structure return pointer
11092 comes first, in which case "this" comes second. */
11093 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
11094 #ifndef PCC_STATIC_STRUCT_RETURN
11095 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11096 structure_value_byref = 1;
11097 #endif /* not PCC_STATIC_STRUCT_RETURN */
11098 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
11099 {
11100 tree ptype = build_pointer_type (TREE_TYPE (funtype));
11101
11102 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
11103 }
11104 this_rtx = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
11105
11106 /* For SHcompact, we only have r0 for a scratch register: r1 is the
11107 static chain pointer (even if you can't have nested virtual functions
11108 right now, someone might implement them sometime), and the rest of the
11109 registers are used for argument passing, are callee-saved, or reserved. */
11110 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
11111 -ffixed-reg has been used. */
11112 if (! call_used_regs[0] || fixed_regs[0])
11113 error ("r0 needs to be available as a call-clobbered register");
11114 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
11115 if (! TARGET_SH5)
11116 {
11117 if (call_used_regs[1] && ! fixed_regs[1])
11118 scratch1 = gen_rtx_REG (ptr_mode, 1);
11119 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
11120 pointing where to return struct values. */
11121 if (call_used_regs[3] && ! fixed_regs[3])
11122 scratch2 = gen_rtx_REG (Pmode, 3);
11123 }
11124 else if (TARGET_SHMEDIA)
11125 {
11126 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
11127 if (i != REGNO (scratch0) &&
11128 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
11129 {
11130 scratch1 = gen_rtx_REG (ptr_mode, i);
11131 break;
11132 }
11133 if (scratch1 == scratch0)
11134 error ("Need a second call-clobbered general purpose register");
11135 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
11136 if (call_used_regs[i] && ! fixed_regs[i])
11137 {
11138 scratch2 = gen_rtx_REG (Pmode, i);
11139 break;
11140 }
11141 if (scratch2 == scratch0)
11142 error ("Need a call-clobbered target register");
11143 }
11144
11145 this_value = plus_constant (this_rtx, delta);
11146 if (vcall_offset
11147 && (simple_add || scratch0 != scratch1)
11148 && strict_memory_address_p (ptr_mode, this_value))
11149 {
11150 emit_load_ptr (scratch0, this_value);
11151 did_load = 1;
11152 }
11153
11154 if (!delta)
11155 ; /* Do nothing. */
11156 else if (simple_add)
11157 emit_move_insn (this_rtx, this_value);
11158 else
11159 {
11160 emit_move_insn (scratch1, GEN_INT (delta));
11161 emit_insn (gen_add2_insn (this_rtx, scratch1));
11162 }
11163
11164 if (vcall_offset)
11165 {
11166 rtx offset_addr;
11167
11168 if (!did_load)
11169 emit_load_ptr (scratch0, this_rtx);
11170
11171 offset_addr = plus_constant (scratch0, vcall_offset);
11172 if (strict_memory_address_p (ptr_mode, offset_addr))
11173 ; /* Do nothing. */
11174 else if (! TARGET_SH5 && scratch0 != scratch1)
11175 {
11176 /* scratch0 != scratch1, and we have indexed loads. Get better
11177 schedule by loading the offset into r1 and using an indexed
11178 load - then the load of r1 can issue before the load from
11179 (this_rtx + delta) finishes. */
11180 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11181 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
11182 }
11183 else if (CONST_OK_FOR_ADD (vcall_offset))
11184 {
11185 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
11186 offset_addr = scratch0;
11187 }
11188 else if (scratch0 != scratch1)
11189 {
11190 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11191 emit_insn (gen_add2_insn (scratch0, scratch1));
11192 offset_addr = scratch0;
11193 }
11194 else
11195 gcc_unreachable (); /* FIXME */
11196 emit_load_ptr (scratch0, offset_addr);
11197
11198 if (Pmode != ptr_mode)
11199 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
11200 emit_insn (gen_add2_insn (this_rtx, scratch0));
11201 }
11202
11203 /* Generate a tail call to the target function. */
11204 if (! TREE_USED (function))
11205 {
11206 assemble_external (function);
11207 TREE_USED (function) = 1;
11208 }
11209 funexp = XEXP (DECL_RTL (function), 0);
11210 /* If the function is overridden, so is the thunk, hence we don't
11211 need GOT addressing even if this is a public symbol. */
11212 #if 0
11213 if (TARGET_SH1 && ! flag_weak)
11214 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
11215 else
11216 #endif
11217 if (TARGET_SH2 && flag_pic)
11218 {
11219 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
11220 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
11221 }
11222 else
11223 {
11224 if (TARGET_SHMEDIA && flag_pic)
11225 {
11226 funexp = gen_sym2PIC (funexp);
11227 PUT_MODE (funexp, Pmode);
11228 }
11229 emit_move_insn (scratch2, funexp);
11230 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
11231 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
11232 }
11233 sibcall = emit_call_insn (sibcall);
11234 SIBLING_CALL_P (sibcall) = 1;
11235 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
11236 emit_barrier ();
11237
11238 /* Run just enough of rest_of_compilation to do scheduling and get
11239 the insns emitted. Note that use_thunk calls
11240 assemble_start_function and assemble_end_function. */
11241
11242 insn_locators_alloc ();
11243 insns = get_insns ();
11244
11245 if (optimize > 0)
11246 {
11247 if (! cfun->cfg)
11248 init_flow (cfun);
11249 split_all_insns_noflow ();
11250 }
11251
11252 sh_reorg ();
11253
11254 if (optimize > 0 && flag_delayed_branch)
11255 dbr_schedule (insns);
11256
11257 shorten_branches (insns);
11258 final_start_function (insns, file, 1);
11259 final (insns, file, 1);
11260 final_end_function ();
11261 free_after_compilation (cfun);
11262
11263 reload_completed = 0;
11264 epilogue_completed = 0;
11265 }
11266
11267 rtx
11268 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
11269 {
11270 rtx sym;
11271
11272 /* If this is not an ordinary function, the name usually comes from a
11273 string literal or an sprintf buffer. Make sure we use the same
11274 string consistently, so that cse will be able to unify address loads. */
11275 if (kind != FUNCTION_ORDINARY)
11276 name = IDENTIFIER_POINTER (get_identifier (name));
11277 sym = gen_rtx_SYMBOL_REF (Pmode, name);
11278 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
11279 if (flag_pic)
11280 switch (kind)
11281 {
11282 case FUNCTION_ORDINARY:
11283 break;
11284 case SFUNC_GOT:
11285 {
11286 rtx reg = target ? target : gen_reg_rtx (Pmode);
11287
11288 emit_insn (gen_symGOT2reg (reg, sym));
11289 sym = reg;
11290 break;
11291 }
11292 case SFUNC_STATIC:
11293 {
11294 /* ??? To allow cse to work, we use GOTOFF relocations.
11295 we could add combiner patterns to transform this into
11296 straight pc-relative calls with sym2PIC / bsrf when
11297 label load and function call are still 1:1 and in the
11298 same basic block during combine. */
11299 rtx reg = target ? target : gen_reg_rtx (Pmode);
11300
11301 emit_insn (gen_symGOTOFF2reg (reg, sym));
11302 sym = reg;
11303 break;
11304 }
11305 }
11306 if (target && sym != target)
11307 {
11308 emit_move_insn (target, sym);
11309 return target;
11310 }
11311 return sym;
11312 }
11313
11314 /* Find the number of a general purpose register in S. */
11315 static int
11316 scavenge_reg (HARD_REG_SET *s)
11317 {
11318 int r;
11319 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
11320 if (TEST_HARD_REG_BIT (*s, r))
11321 return r;
11322 return -1;
11323 }
11324
11325 rtx
11326 sh_get_pr_initial_val (void)
11327 {
11328 rtx val;
11329
11330 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
11331 PR register on SHcompact, because it might be clobbered by the prologue.
11332 We check first if that is known to be the case. */
11333 if (TARGET_SHCOMPACT
11334 && ((crtl->args.info.call_cookie
11335 & ~ CALL_COOKIE_RET_TRAMP (1))
11336 || crtl->saves_all_registers))
11337 return gen_frame_mem (SImode, return_address_pointer_rtx);
11338
11339 /* If we haven't finished rtl generation, there might be a nonlocal label
11340 that we haven't seen yet.
11341 ??? get_hard_reg_initial_val fails if it is called after register
11342 allocation has started, unless it has been called before for the
11343 same register. And even then, we end in trouble if we didn't use
11344 the register in the same basic block before. So call
11345 get_hard_reg_initial_val now and wrap it in an unspec if we might
11346 need to replace it. */
11347 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
11348 combine can put the pseudo returned by get_hard_reg_initial_val into
11349 instructions that need a general purpose registers, which will fail to
11350 be recognized when the pseudo becomes allocated to PR. */
11351 val
11352 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
11353 if (TARGET_SH1)
11354 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
11355 return val;
11356 }
11357
11358 int
11359 sh_expand_t_scc (rtx operands[])
11360 {
11361 enum rtx_code code = GET_CODE (operands[1]);
11362 rtx target = operands[0];
11363 rtx op0 = operands[2];
11364 rtx op1 = operands[3];
11365 rtx result = target;
11366 HOST_WIDE_INT val;
11367
11368 if (!REG_P (op0) || REGNO (op0) != T_REG
11369 || !CONST_INT_P (op1))
11370 return 0;
11371 if (!REG_P (result))
11372 result = gen_reg_rtx (SImode);
11373 val = INTVAL (op1);
11374 if ((code == EQ && val == 1) || (code == NE && val == 0))
11375 emit_insn (gen_movt (result));
11376 else if (TARGET_SH2A && ((code == EQ && val == 0)
11377 || (code == NE && val == 1)))
11378 emit_insn (gen_xorsi3_movrt (result));
11379 else if ((code == EQ && val == 0) || (code == NE && val == 1))
11380 {
11381 emit_clobber (result);
11382 emit_insn (gen_subc (result, result, result));
11383 emit_insn (gen_addsi3 (result, result, const1_rtx));
11384 }
11385 else if (code == EQ || code == NE)
11386 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
11387 else
11388 return 0;
11389 if (result != target)
11390 emit_move_insn (target, result);
11391 return 1;
11392 }
11393
11394 /* INSN is an sfunc; return the rtx that describes the address used. */
11395 static rtx
11396 extract_sfunc_addr (rtx insn)
11397 {
11398 rtx pattern, part = NULL_RTX;
11399 int len, i;
11400
11401 pattern = PATTERN (insn);
11402 len = XVECLEN (pattern, 0);
11403 for (i = 0; i < len; i++)
11404 {
11405 part = XVECEXP (pattern, 0, i);
11406 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
11407 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
11408 return XEXP (part, 0);
11409 }
11410 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
11411 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
11412 }
11413
11414 /* Verify that the register in use_sfunc_addr still agrees with the address
11415 used in the sfunc. This prevents fill_slots_from_thread from changing
11416 use_sfunc_addr.
11417 INSN is the use_sfunc_addr instruction, and REG is the register it
11418 guards. */
11419 int
11420 check_use_sfunc_addr (rtx insn, rtx reg)
11421 {
11422 /* Search for the sfunc. It should really come right after INSN. */
11423 while ((insn = NEXT_INSN (insn)))
11424 {
11425 if (LABEL_P (insn) || JUMP_P (insn))
11426 break;
11427 if (! INSN_P (insn))
11428 continue;
11429
11430 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
11431 insn = XVECEXP (PATTERN (insn), 0, 0);
11432 if (GET_CODE (PATTERN (insn)) != PARALLEL
11433 || get_attr_type (insn) != TYPE_SFUNC)
11434 continue;
11435 return rtx_equal_p (extract_sfunc_addr (insn), reg);
11436 }
11437 gcc_unreachable ();
11438 }
11439
11440 /* This function returns a constant rtx that represents pi / 2**15 in
11441 SFmode. it's used to scale SFmode angles, in radians, to a
11442 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11443 maps to 0x10000). */
11444
11445 static GTY(()) rtx sh_fsca_sf2int_rtx;
11446
11447 rtx
11448 sh_fsca_sf2int (void)
11449 {
11450 if (! sh_fsca_sf2int_rtx)
11451 {
11452 REAL_VALUE_TYPE rv;
11453
11454 real_from_string (&rv, "10430.378350470453");
11455 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
11456 }
11457
11458 return sh_fsca_sf2int_rtx;
11459 }
11460
11461 /* This function returns a constant rtx that represents pi / 2**15 in
11462 DFmode. it's used to scale DFmode angles, in radians, to a
11463 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11464 maps to 0x10000). */
11465
11466 static GTY(()) rtx sh_fsca_df2int_rtx;
11467
11468 rtx
11469 sh_fsca_df2int (void)
11470 {
11471 if (! sh_fsca_df2int_rtx)
11472 {
11473 REAL_VALUE_TYPE rv;
11474
11475 real_from_string (&rv, "10430.378350470453");
11476 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
11477 }
11478
11479 return sh_fsca_df2int_rtx;
11480 }
11481
11482 /* This function returns a constant rtx that represents 2**15 / pi in
11483 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
11484 of a full circle back to a SFmode value, i.e., 0x10000 maps to
11485 2*pi). */
11486
11487 static GTY(()) rtx sh_fsca_int2sf_rtx;
11488
11489 rtx
11490 sh_fsca_int2sf (void)
11491 {
11492 if (! sh_fsca_int2sf_rtx)
11493 {
11494 REAL_VALUE_TYPE rv;
11495
11496 real_from_string (&rv, "9.587379924285257e-5");
11497 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
11498 }
11499
11500 return sh_fsca_int2sf_rtx;
11501 }
11502
11503 /* Initialize the CUMULATIVE_ARGS structure. */
11504
11505 void
11506 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
11507 tree fntype,
11508 rtx libname ATTRIBUTE_UNUSED,
11509 tree fndecl,
11510 signed int n_named_args,
11511 enum machine_mode mode)
11512 {
11513 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
11514 pcum->free_single_fp_reg = 0;
11515 pcum->stack_regs = 0;
11516 pcum->byref_regs = 0;
11517 pcum->byref = 0;
11518 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
11519
11520 /* XXX - Should we check TARGET_HITACHI here ??? */
11521 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
11522
11523 if (fntype)
11524 {
11525 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
11526 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
11527 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
11528 pcum->arg_count [(int) SH_ARG_INT]
11529 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
11530
11531 pcum->call_cookie
11532 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11533 && pcum->arg_count [(int) SH_ARG_INT] == 0
11534 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
11535 ? int_size_in_bytes (TREE_TYPE (fntype))
11536 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
11537 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
11538 == FIRST_RET_REG));
11539 }
11540 else
11541 {
11542 pcum->arg_count [(int) SH_ARG_INT] = 0;
11543 pcum->prototype_p = FALSE;
11544 if (mode != VOIDmode)
11545 {
11546 pcum->call_cookie =
11547 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11548 && GET_MODE_SIZE (mode) > 4
11549 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
11550
11551 /* If the default ABI is the Renesas ABI then all library
11552 calls must assume that the library will be using the
11553 Renesas ABI. So if the function would return its result
11554 in memory then we must force the address of this memory
11555 block onto the stack. Ideally we would like to call
11556 targetm.calls.return_in_memory() here but we do not have
11557 the TYPE or the FNDECL available so we synthesize the
11558 contents of that function as best we can. */
11559 pcum->force_mem =
11560 (TARGET_DEFAULT & MASK_HITACHI)
11561 && (mode == BLKmode
11562 || (GET_MODE_SIZE (mode) > 4
11563 && !(mode == DFmode
11564 && TARGET_FPU_DOUBLE)));
11565 }
11566 else
11567 {
11568 pcum->call_cookie = 0;
11569 pcum->force_mem = FALSE;
11570 }
11571 }
11572 }
11573
11574 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
11575 not enter into CONST_DOUBLE for the replace.
11576
11577 Note that copying is not done so X must not be shared unless all copies
11578 are to be modified.
11579
11580 This is like replace_rtx, except that we operate on N_REPLACEMENTS
11581 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
11582 replacements[n*2+1] - and that we take mode changes into account.
11583
11584 If a replacement is ambiguous, return NULL_RTX.
11585
11586 If MODIFY is zero, don't modify any rtl in place,
11587 just return zero or nonzero for failure / success. */
11588
11589 rtx
11590 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
11591 {
11592 int i, j;
11593 const char *fmt;
11594
11595 /* The following prevents loops occurrence when we change MEM in
11596 CONST_DOUBLE onto the same CONST_DOUBLE. */
11597 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
11598 return x;
11599
11600 for (i = n_replacements - 1; i >= 0 ; i--)
11601 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
11602 return replacements[i*2+1];
11603
11604 /* Allow this function to make replacements in EXPR_LISTs. */
11605 if (x == 0)
11606 return 0;
11607
11608 if (GET_CODE (x) == SUBREG)
11609 {
11610 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
11611 n_replacements, modify);
11612
11613 if (CONST_INT_P (new_rtx))
11614 {
11615 x = simplify_subreg (GET_MODE (x), new_rtx,
11616 GET_MODE (SUBREG_REG (x)),
11617 SUBREG_BYTE (x));
11618 if (! x)
11619 abort ();
11620 }
11621 else if (modify)
11622 SUBREG_REG (x) = new_rtx;
11623
11624 return x;
11625 }
11626 else if (REG_P (x))
11627 {
11628 unsigned regno = REGNO (x);
11629 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
11630 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
11631 rtx result = NULL_RTX;
11632
11633 for (i = n_replacements - 1; i >= 0; i--)
11634 {
11635 rtx from = replacements[i*2];
11636 rtx to = replacements[i*2+1];
11637 unsigned from_regno, from_nregs, to_regno, new_regno;
11638
11639 if (!REG_P (from))
11640 continue;
11641 from_regno = REGNO (from);
11642 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
11643 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
11644 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
11645 {
11646 if (regno < from_regno
11647 || regno + nregs > from_regno + nregs
11648 || !REG_P (to)
11649 || result)
11650 return NULL_RTX;
11651 to_regno = REGNO (to);
11652 if (to_regno < FIRST_PSEUDO_REGISTER)
11653 {
11654 new_regno = regno + to_regno - from_regno;
11655 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
11656 != nregs)
11657 return NULL_RTX;
11658 result = gen_rtx_REG (GET_MODE (x), new_regno);
11659 }
11660 else if (GET_MODE (x) <= GET_MODE (to))
11661 result = gen_lowpart_common (GET_MODE (x), to);
11662 else
11663 result = gen_lowpart_SUBREG (GET_MODE (x), to);
11664 }
11665 }
11666 return result ? result : x;
11667 }
11668 else if (GET_CODE (x) == ZERO_EXTEND)
11669 {
11670 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
11671 n_replacements, modify);
11672
11673 if (CONST_INT_P (new_rtx))
11674 {
11675 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
11676 new_rtx, GET_MODE (XEXP (x, 0)));
11677 if (! x)
11678 abort ();
11679 }
11680 else if (modify)
11681 XEXP (x, 0) = new_rtx;
11682
11683 return x;
11684 }
11685
11686 fmt = GET_RTX_FORMAT (GET_CODE (x));
11687 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
11688 {
11689 rtx new_rtx;
11690
11691 if (fmt[i] == 'e')
11692 {
11693 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
11694 n_replacements, modify);
11695 if (!new_rtx)
11696 return NULL_RTX;
11697 if (modify)
11698 XEXP (x, i) = new_rtx;
11699 }
11700 else if (fmt[i] == 'E')
11701 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11702 {
11703 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
11704 n_replacements, modify);
11705 if (!new_rtx)
11706 return NULL_RTX;
11707 if (modify)
11708 XVECEXP (x, i, j) = new_rtx;
11709 }
11710 }
11711
11712 return x;
11713 }
11714
11715 rtx
11716 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
11717 {
11718 enum rtx_code code = TRUNCATE;
11719
11720 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
11721 {
11722 rtx inner = XEXP (x, 0);
11723 enum machine_mode inner_mode = GET_MODE (inner);
11724
11725 if (inner_mode == mode)
11726 return inner;
11727 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
11728 x = inner;
11729 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
11730 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
11731 {
11732 code = GET_CODE (x);
11733 x = inner;
11734 }
11735 }
11736 return gen_rtx_fmt_e (code, mode, x);
11737 }
11738
11739 /* called via for_each_rtx after reload, to clean up truncates of
11740 registers that span multiple actual hard registers. */
11741 int
11742 shmedia_cleanup_truncate (rtx *p, void *n_changes)
11743 {
11744 rtx x = *p, reg;
11745
11746 if (GET_CODE (x) != TRUNCATE)
11747 return 0;
11748 reg = XEXP (x, 0);
11749 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && REG_P (reg))
11750 {
11751 enum machine_mode reg_mode = GET_MODE (reg);
11752 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
11753 subreg_lowpart_offset (DImode, reg_mode));
11754 *(int*) n_changes += 1;
11755 return -1;
11756 }
11757 return 0;
11758 }
11759
11760 /* Load and store depend on the highpart of the address. However,
11761 set_attr_alternative does not give well-defined results before reload,
11762 so we must look at the rtl ourselves to see if any of the feeding
11763 registers is used in a memref. */
11764
11765 /* Called by sh_contains_memref_p via for_each_rtx. */
11766 static int
11767 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
11768 {
11769 return (MEM_P (*loc));
11770 }
11771
11772 /* Return nonzero iff INSN contains a MEM. */
11773 int
11774 sh_contains_memref_p (rtx insn)
11775 {
11776 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
11777 }
11778
11779 /* Return nonzero iff INSN loads a banked register. */
11780 int
11781 sh_loads_bankedreg_p (rtx insn)
11782 {
11783 if (GET_CODE (PATTERN (insn)) == SET)
11784 {
11785 rtx op = SET_DEST (PATTERN(insn));
11786 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
11787 return 1;
11788 }
11789
11790 return 0;
11791 }
11792
11793 /* FNADDR is the MEM expression from a call expander. Return an address
11794 to use in an SHmedia insn pattern. */
11795 rtx
11796 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
11797 {
11798 int is_sym;
11799
11800 fnaddr = XEXP (fnaddr, 0);
11801 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
11802 if (flag_pic && is_sym)
11803 {
11804 if (! SYMBOL_REF_LOCAL_P (fnaddr))
11805 {
11806 rtx reg = gen_reg_rtx (Pmode);
11807
11808 /* We must not use GOTPLT for sibcalls, because PIC_REG
11809 must be restored before the PLT code gets to run. */
11810 if (is_sibcall)
11811 emit_insn (gen_symGOT2reg (reg, fnaddr));
11812 else
11813 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
11814 fnaddr = reg;
11815 }
11816 else
11817 {
11818 fnaddr = gen_sym2PIC (fnaddr);
11819 PUT_MODE (fnaddr, Pmode);
11820 }
11821 }
11822 /* If ptabs might trap, make this visible to the rest of the compiler.
11823 We generally assume that symbols pertain to valid locations, but
11824 it is possible to generate invalid symbols with asm or linker tricks.
11825 In a list of functions where each returns its successor, an invalid
11826 symbol might denote an empty list. */
11827 if (!TARGET_PT_FIXED
11828 && (!is_sym || TARGET_INVALID_SYMBOLS)
11829 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
11830 {
11831 rtx tr = gen_reg_rtx (PDImode);
11832
11833 emit_insn (gen_ptabs (tr, fnaddr));
11834 fnaddr = tr;
11835 }
11836 else if (! target_reg_operand (fnaddr, Pmode))
11837 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
11838 return fnaddr;
11839 }
11840
11841 enum reg_class
11842 sh_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
11843 enum machine_mode mode, secondary_reload_info *sri)
11844 {
11845 if (in_p)
11846 {
11847 if (REGCLASS_HAS_FP_REG (rclass)
11848 && ! TARGET_SHMEDIA
11849 && immediate_operand ((x), mode)
11850 && ! ((fp_zero_operand (x) || fp_one_operand (x))
11851 && mode == SFmode && fldi_ok ()))
11852 switch (mode)
11853 {
11854 case SFmode:
11855 sri->icode = CODE_FOR_reload_insf__frn;
11856 return NO_REGS;
11857 case DFmode:
11858 sri->icode = CODE_FOR_reload_indf__frn;
11859 return NO_REGS;
11860 case SImode:
11861 /* ??? If we knew that we are in the appropriate mode -
11862 single precision - we could use a reload pattern directly. */
11863 return FPUL_REGS;
11864 default:
11865 abort ();
11866 }
11867 if (rclass == FPUL_REGS
11868 && ((REG_P (x)
11869 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
11870 || REGNO (x) == T_REG))
11871 || GET_CODE (x) == PLUS))
11872 return GENERAL_REGS;
11873 if (rclass == FPUL_REGS && immediate_operand (x, mode))
11874 {
11875 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
11876 return GENERAL_REGS;
11877 else if (mode == SFmode)
11878 return FP_REGS;
11879 sri->icode = CODE_FOR_reload_insi__i_fpul;
11880 return NO_REGS;
11881 }
11882 if (rclass == FPSCR_REGS
11883 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
11884 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
11885 return GENERAL_REGS;
11886 if (REGCLASS_HAS_FP_REG (rclass)
11887 && TARGET_SHMEDIA
11888 && immediate_operand (x, mode)
11889 && x != CONST0_RTX (GET_MODE (x))
11890 && GET_MODE (x) != V4SFmode)
11891 return GENERAL_REGS;
11892 if ((mode == QImode || mode == HImode)
11893 && TARGET_SHMEDIA && inqhi_operand (x, mode))
11894 {
11895 sri->icode = ((mode == QImode)
11896 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
11897 return NO_REGS;
11898 }
11899 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
11900 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
11901 return TARGET_REGS;
11902 } /* end of input-only processing. */
11903
11904 if (((REGCLASS_HAS_FP_REG (rclass)
11905 && (REG_P (x)
11906 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
11907 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
11908 && TARGET_FMOVD))))
11909 || (REGCLASS_HAS_GENERAL_REG (rclass)
11910 && REG_P (x)
11911 && FP_REGISTER_P (REGNO (x))))
11912 && ! TARGET_SHMEDIA
11913 && (mode == SFmode || mode == SImode))
11914 return FPUL_REGS;
11915 if ((rclass == FPUL_REGS
11916 || (REGCLASS_HAS_FP_REG (rclass)
11917 && ! TARGET_SHMEDIA && mode == SImode))
11918 && (MEM_P (x)
11919 || (REG_P (x)
11920 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
11921 || REGNO (x) == T_REG
11922 || system_reg_operand (x, VOIDmode)))))
11923 {
11924 if (rclass == FPUL_REGS)
11925 return GENERAL_REGS;
11926 return FPUL_REGS;
11927 }
11928 if ((rclass == TARGET_REGS
11929 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
11930 && !satisfies_constraint_Csy (x)
11931 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
11932 return GENERAL_REGS;
11933 if ((rclass == MAC_REGS || rclass == PR_REGS)
11934 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
11935 && rclass != REGNO_REG_CLASS (REGNO (x)))
11936 return GENERAL_REGS;
11937 if (rclass != GENERAL_REGS && REG_P (x)
11938 && TARGET_REGISTER_P (REGNO (x)))
11939 return GENERAL_REGS;
11940 return NO_REGS;
11941 }
11942
11943 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
11944
11945 #include "gt-sh.h"