abd579ae1a7ad8a37a4d20e8c11bb6b4556f7434
[gcc.git] / gcc / config / sh / sh.c
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Steve Chamberlain (sac@cygnus.com).
6 Improved by Jim Wilson (wilson@cygnus.com).
7
8 This file is part of GCC.
9
10 GCC is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
14
15 GCC is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "insn-config.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "flags.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "reload.h"
35 #include "function.h"
36 #include "regs.h"
37 #include "hard-reg-set.h"
38 #include "output.h"
39 #include "insn-attr.h"
40 #include "toplev.h"
41 #include "recog.h"
42 #include "integrate.h"
43 #include "dwarf2.h"
44 #include "tm_p.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "langhooks.h"
48 #include "basic-block.h"
49 #include "df.h"
50 #include "cfglayout.h"
51 #include "intl.h"
52 #include "sched-int.h"
53 #include "params.h"
54 #include "ggc.h"
55 #include "gimple.h"
56 #include "cfgloop.h"
57 #include "alloc-pool.h"
58 #include "tm-constrs.h"
59
60
61 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
62
63 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
64 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
65
66 /* These are some macros to abstract register modes. */
67 #define CONST_OK_FOR_ADD(size) \
68 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
69 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
70 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
71 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
72
73 /* Used to simplify the logic below. Find the attributes wherever
74 they may be. */
75 #define SH_ATTRIBUTES(decl) \
76 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
77 : DECL_ATTRIBUTES (decl) \
78 ? (DECL_ATTRIBUTES (decl)) \
79 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
80
81 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
82 int current_function_interrupt;
83
84 tree sh_deferred_function_attributes;
85 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
86
87 /* Global variables for machine-dependent things. */
88
89 /* Which cpu are we scheduling for. */
90 enum processor_type sh_cpu;
91
92 /* Definitions used in ready queue reordering for first scheduling pass. */
93
94 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
95 static short *regmode_weight[2];
96
97 /* Total SFmode and SImode weights of scheduled insns. */
98 static int curr_regmode_pressure[2];
99
100 /* Number of r0 life regions. */
101 static int r0_life_regions;
102
103 /* If true, skip cycles for Q -> R movement. */
104 static int skip_cycles = 0;
105
106 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
107 and returned from sh_reorder2. */
108 static short cached_can_issue_more;
109
110 /* Unique number for UNSPEC_BBR pattern. */
111 static unsigned int unspec_bbr_uid = 1;
112
113 /* Provides the class number of the smallest class containing
114 reg number. */
115
116 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
117 {
118 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
151 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
152 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
153 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
154 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
155 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
156 GENERAL_REGS, GENERAL_REGS,
157 };
158
159 char sh_register_names[FIRST_PSEUDO_REGISTER] \
160 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
161
162 char sh_additional_register_names[ADDREGNAMES_SIZE] \
163 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
164 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
165
166 int assembler_dialect;
167
168 static bool shmedia_space_reserved_for_target_registers;
169
170 static bool sh_handle_option (size_t, const char *, int);
171 static void split_branches (rtx);
172 static int branch_dest (rtx);
173 static void force_into (rtx, rtx);
174 static void print_slot (rtx);
175 static rtx add_constant (rtx, enum machine_mode, rtx);
176 static void dump_table (rtx, rtx);
177 static int hi_const (rtx);
178 static int broken_move (rtx);
179 static int mova_p (rtx);
180 static rtx find_barrier (int, rtx, rtx);
181 static int noncall_uses_reg (rtx, rtx, rtx *);
182 static rtx gen_block_redirect (rtx, int, int);
183 static void sh_reorg (void);
184 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
185 static rtx frame_insn (rtx);
186 static rtx push (int);
187 static void pop (int);
188 static void push_regs (HARD_REG_SET *, int);
189 static int calc_live_regs (HARD_REG_SET *);
190 static HOST_WIDE_INT rounded_frame_size (int);
191 static bool sh_frame_pointer_required (void);
192 static rtx mark_constant_pool_use (rtx);
193 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
194 static tree sh_handle_resbank_handler_attribute (tree *, tree,
195 tree, int, bool *);
196 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
197 tree, int, bool *);
198 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
199 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
200 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
201 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
202 static void sh_insert_attributes (tree, tree *);
203 static const char *sh_check_pch_target_flags (int);
204 static int sh_adjust_cost (rtx, rtx, rtx, int);
205 static int sh_issue_rate (void);
206 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
207 static short find_set_regmode_weight (rtx, enum machine_mode);
208 static short find_insn_regmode_weight (rtx, enum machine_mode);
209 static void find_regmode_weight (basic_block, enum machine_mode);
210 static int find_r0_life_regions (basic_block);
211 static void sh_md_init_global (FILE *, int, int);
212 static void sh_md_finish_global (FILE *, int);
213 static int rank_for_reorder (const void *, const void *);
214 static void swap_reorder (rtx *, int);
215 static void ready_reorder (rtx *, int);
216 static short high_pressure (enum machine_mode);
217 static int sh_reorder (FILE *, int, rtx *, int *, int);
218 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
219 static void sh_md_init (FILE *, int, int);
220 static int sh_variable_issue (FILE *, int, rtx, int);
221
222 static bool sh_function_ok_for_sibcall (tree, tree);
223
224 static bool sh_cannot_modify_jumps_p (void);
225 static enum reg_class sh_target_reg_class (void);
226 static bool sh_optimize_target_register_callee_saved (bool);
227 static bool sh_ms_bitfield_layout_p (const_tree);
228
229 static void sh_init_builtins (void);
230 static tree sh_builtin_decl (unsigned, bool);
231 static void sh_media_init_builtins (void);
232 static tree sh_media_builtin_decl (unsigned, bool);
233 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
234 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
235 static void sh_file_start (void);
236 static int flow_dependent_p (rtx, rtx);
237 static void flow_dependent_p_1 (rtx, const_rtx, void *);
238 static int shiftcosts (rtx);
239 static int andcosts (rtx);
240 static int addsubcosts (rtx);
241 static int multcosts (rtx);
242 static bool unspec_caller_rtx_p (rtx);
243 static bool sh_cannot_copy_insn_p (rtx);
244 static bool sh_rtx_costs (rtx, int, int, int *, bool);
245 static int sh_address_cost (rtx, bool);
246 static int sh_pr_n_sets (void);
247 static rtx sh_allocate_initial_value (rtx);
248 static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
249 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
250 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
251 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
252 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
253 static int scavenge_reg (HARD_REG_SET *s);
254 struct save_schedule_s;
255 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
256 struct save_schedule_s *, int);
257
258 static rtx sh_struct_value_rtx (tree, int);
259 static rtx sh_function_value (const_tree, const_tree, bool);
260 static rtx sh_libcall_value (enum machine_mode, const_rtx);
261 static bool sh_return_in_memory (const_tree, const_tree);
262 static rtx sh_builtin_saveregs (void);
263 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
264 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
265 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
266 static tree sh_build_builtin_va_list (void);
267 static void sh_va_start (tree, rtx);
268 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
269 static bool sh_promote_prototypes (const_tree);
270 static enum machine_mode sh_promote_function_mode (const_tree type,
271 enum machine_mode,
272 int *punsignedp,
273 const_tree funtype,
274 int for_return);
275 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
276 const_tree, bool);
277 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
278 const_tree, bool);
279 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
280 tree, bool);
281 static bool sh_scalar_mode_supported_p (enum machine_mode);
282 static int sh_dwarf_calling_convention (const_tree);
283 static void sh_encode_section_info (tree, rtx, int);
284 static int sh2a_function_vector_p (tree);
285 static void sh_trampoline_init (rtx, tree, rtx);
286 static rtx sh_trampoline_adjust_address (rtx);
287 \f
288 static const struct attribute_spec sh_attribute_table[] =
289 {
290 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
291 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
292 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
293 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
294 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
295 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
296 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
297 { "resbank", 0, 0, true, false, false, sh_handle_resbank_handler_attribute },
298 { "function_vector", 1, 1, true, false, false, sh2a_handle_function_vector_handler_attribute },
299 #ifdef SYMBIAN
300 /* Symbian support adds three new attributes:
301 dllexport - for exporting a function/variable that will live in a dll
302 dllimport - for importing a function/variable from a dll
303
304 Microsoft allows multiple declspecs in one __declspec, separating
305 them with spaces. We do NOT support this. Instead, use __declspec
306 multiple times. */
307 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
308 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
309 #endif
310 { NULL, 0, 0, false, false, false, NULL }
311 };
312 \f
313 /* Initialize the GCC target structure. */
314 #undef TARGET_ATTRIBUTE_TABLE
315 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
316
317 /* The next two are used for debug info when compiling with -gdwarf. */
318 #undef TARGET_ASM_UNALIGNED_HI_OP
319 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
320 #undef TARGET_ASM_UNALIGNED_SI_OP
321 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
322
323 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
324 #undef TARGET_ASM_UNALIGNED_DI_OP
325 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
326 #undef TARGET_ASM_ALIGNED_DI_OP
327 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
328
329 #undef TARGET_ASM_FUNCTION_EPILOGUE
330 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
331
332 #undef TARGET_ASM_OUTPUT_MI_THUNK
333 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
334
335 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
336 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
337
338 #undef TARGET_ASM_FILE_START
339 #define TARGET_ASM_FILE_START sh_file_start
340 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
341 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
342
343 #undef TARGET_DEFAULT_TARGET_FLAGS
344 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
345 #undef TARGET_HANDLE_OPTION
346 #define TARGET_HANDLE_OPTION sh_handle_option
347
348 #undef TARGET_INSERT_ATTRIBUTES
349 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
350
351 #undef TARGET_SCHED_ADJUST_COST
352 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
353
354 #undef TARGET_SCHED_ISSUE_RATE
355 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
356
357 /* The next 5 hooks have been implemented for reenabling sched1. With the
358 help of these macros we are limiting the movement of insns in sched1 to
359 reduce the register pressure. The overall idea is to keep count of SImode
360 and SFmode regs required by already scheduled insns. When these counts
361 cross some threshold values; give priority to insns that free registers.
362 The insn that frees registers is most likely to be the insn with lowest
363 LUID (original insn order); but such an insn might be there in the stalled
364 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
365 upto a max of 8 cycles so that such insns may move from Q -> R.
366
367 The description of the hooks are as below:
368
369 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
370 scheduler; it is called inside the sched_init function just after
371 find_insn_reg_weights function call. It is used to calculate the SImode
372 and SFmode weights of insns of basic blocks; much similar to what
373 find_insn_reg_weights does.
374 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
375
376 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
377 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
378 (Q)->(R).
379
380 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
381 high; reorder the ready queue so that the insn with lowest LUID will be
382 issued next.
383
384 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
385 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
386
387 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
388 can be returned from TARGET_SCHED_REORDER2.
389
390 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
391
392 #undef TARGET_SCHED_DFA_NEW_CYCLE
393 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
394
395 #undef TARGET_SCHED_INIT_GLOBAL
396 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
397
398 #undef TARGET_SCHED_FINISH_GLOBAL
399 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
400
401 #undef TARGET_SCHED_VARIABLE_ISSUE
402 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
403
404 #undef TARGET_SCHED_REORDER
405 #define TARGET_SCHED_REORDER sh_reorder
406
407 #undef TARGET_SCHED_REORDER2
408 #define TARGET_SCHED_REORDER2 sh_reorder2
409
410 #undef TARGET_SCHED_INIT
411 #define TARGET_SCHED_INIT sh_md_init
412
413 #undef TARGET_LEGITIMIZE_ADDRESS
414 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
415
416 #undef TARGET_CANNOT_MODIFY_JUMPS_P
417 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
418 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
419 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
420 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
421 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
422 sh_optimize_target_register_callee_saved
423
424 #undef TARGET_MS_BITFIELD_LAYOUT_P
425 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
426
427 #undef TARGET_INIT_BUILTINS
428 #define TARGET_INIT_BUILTINS sh_init_builtins
429 #undef TARGET_BUILTIN_DECL
430 #define TARGET_BUILTIN_DECL sh_builtin_decl
431 #undef TARGET_EXPAND_BUILTIN
432 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
433
434 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
435 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
436
437 #undef TARGET_CANNOT_COPY_INSN_P
438 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
439 #undef TARGET_RTX_COSTS
440 #define TARGET_RTX_COSTS sh_rtx_costs
441 #undef TARGET_ADDRESS_COST
442 #define TARGET_ADDRESS_COST sh_address_cost
443 #undef TARGET_ALLOCATE_INITIAL_VALUE
444 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
445
446 #undef TARGET_MACHINE_DEPENDENT_REORG
447 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
448
449 #undef TARGET_DWARF_REGISTER_SPAN
450 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
451
452 #ifdef HAVE_AS_TLS
453 #undef TARGET_HAVE_TLS
454 #define TARGET_HAVE_TLS true
455 #endif
456
457 #undef TARGET_PROMOTE_PROTOTYPES
458 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
459 #undef TARGET_PROMOTE_FUNCTION_MODE
460 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
461
462 #undef TARGET_FUNCTION_VALUE
463 #define TARGET_FUNCTION_VALUE sh_function_value
464 #undef TARGET_LIBCALL_VALUE
465 #define TARGET_LIBCALL_VALUE sh_libcall_value
466 #undef TARGET_STRUCT_VALUE_RTX
467 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
468 #undef TARGET_RETURN_IN_MEMORY
469 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
470
471 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
472 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
473 #undef TARGET_SETUP_INCOMING_VARARGS
474 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
475 #undef TARGET_STRICT_ARGUMENT_NAMING
476 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
477 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
478 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
479 #undef TARGET_MUST_PASS_IN_STACK
480 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
481 #undef TARGET_PASS_BY_REFERENCE
482 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
483 #undef TARGET_CALLEE_COPIES
484 #define TARGET_CALLEE_COPIES sh_callee_copies
485 #undef TARGET_ARG_PARTIAL_BYTES
486 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
487
488 #undef TARGET_BUILD_BUILTIN_VA_LIST
489 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
490 #undef TARGET_EXPAND_BUILTIN_VA_START
491 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
492 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
493 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
494
495 #undef TARGET_SCALAR_MODE_SUPPORTED_P
496 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
497 #undef TARGET_VECTOR_MODE_SUPPORTED_P
498 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
499
500 #undef TARGET_CHECK_PCH_TARGET_FLAGS
501 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
502
503 #undef TARGET_DWARF_CALLING_CONVENTION
504 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
505
506 #undef TARGET_FRAME_POINTER_REQUIRED
507 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
508
509 /* Return regmode weight for insn. */
510 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
511
512 /* Return current register pressure for regmode. */
513 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
514
515 #undef TARGET_ENCODE_SECTION_INFO
516 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
517
518 #ifdef SYMBIAN
519
520 #undef TARGET_ENCODE_SECTION_INFO
521 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
522 #undef TARGET_STRIP_NAME_ENCODING
523 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
524 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
525 #define TARGET_CXX_IMPORT_EXPORT_CLASS sh_symbian_import_export_class
526
527 #endif /* SYMBIAN */
528
529 #undef TARGET_SECONDARY_RELOAD
530 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
531
532 #undef TARGET_LEGITIMATE_ADDRESS_P
533 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
534
535 #undef TARGET_TRAMPOLINE_INIT
536 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
537 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
538 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
539
540 /* Machine-specific symbol_ref flags. */
541 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
542
543 struct gcc_target targetm = TARGET_INITIALIZER;
544 \f
545 /* Implement TARGET_HANDLE_OPTION. */
546
547 static bool
548 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
549 int value ATTRIBUTE_UNUSED)
550 {
551 switch (code)
552 {
553 case OPT_m1:
554 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
555 return true;
556
557 case OPT_m2:
558 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
559 return true;
560
561 case OPT_m2a:
562 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
563 return true;
564
565 case OPT_m2a_nofpu:
566 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
567 return true;
568
569 case OPT_m2a_single:
570 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
571 return true;
572
573 case OPT_m2a_single_only:
574 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
575 return true;
576
577 case OPT_m2e:
578 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
579 return true;
580
581 case OPT_m3:
582 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
583 return true;
584
585 case OPT_m3e:
586 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
587 return true;
588
589 case OPT_m4:
590 case OPT_m4_100:
591 case OPT_m4_200:
592 case OPT_m4_300:
593 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
594 return true;
595
596 case OPT_m4_nofpu:
597 case OPT_m4_100_nofpu:
598 case OPT_m4_200_nofpu:
599 case OPT_m4_300_nofpu:
600 case OPT_m4_340:
601 case OPT_m4_400:
602 case OPT_m4_500:
603 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
604 return true;
605
606 case OPT_m4_single:
607 case OPT_m4_100_single:
608 case OPT_m4_200_single:
609 case OPT_m4_300_single:
610 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
611 return true;
612
613 case OPT_m4_single_only:
614 case OPT_m4_100_single_only:
615 case OPT_m4_200_single_only:
616 case OPT_m4_300_single_only:
617 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
618 return true;
619
620 case OPT_m4a:
621 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
622 return true;
623
624 case OPT_m4a_nofpu:
625 case OPT_m4al:
626 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
627 return true;
628
629 case OPT_m4a_single:
630 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
631 return true;
632
633 case OPT_m4a_single_only:
634 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
635 return true;
636
637 case OPT_m5_32media:
638 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
639 return true;
640
641 case OPT_m5_32media_nofpu:
642 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
643 return true;
644
645 case OPT_m5_64media:
646 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
647 return true;
648
649 case OPT_m5_64media_nofpu:
650 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
651 return true;
652
653 case OPT_m5_compact:
654 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
655 return true;
656
657 case OPT_m5_compact_nofpu:
658 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
659 return true;
660
661 default:
662 return true;
663 }
664 }
665 \f
666 /* Set default optimization options. */
667 void
668 sh_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
669 {
670 if (level)
671 {
672 if (!size)
673 sh_div_str = "inv:minlat";
674 }
675 if (size)
676 {
677 target_flags |= MASK_SMALLCODE;
678 sh_div_str = SH_DIV_STR_FOR_SIZE ;
679 }
680 else
681 TARGET_CBRANCHDI4 = 1;
682 /* We can't meaningfully test TARGET_SHMEDIA here, because -m options
683 haven't been parsed yet, hence we'd read only the default.
684 sh_target_reg_class will return NO_REGS if this is not SHMEDIA, so
685 it's OK to always set flag_branch_target_load_optimize. */
686 if (level > 1)
687 {
688 flag_branch_target_load_optimize = 1;
689 if (!size)
690 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
691 }
692 /* Likewise, we can't meaningfully test TARGET_SH2E / TARGET_IEEE
693 here, so leave it to OVERRIDE_OPTIONS to set
694 flag_finite_math_only. We set it to 2 here so we know if the user
695 explicitly requested this to be on or off. */
696 flag_finite_math_only = 2;
697 /* If flag_schedule_insns is 1, we set it to 2 here so we know if
698 the user explicitly requested this to be on or off. */
699 if (flag_schedule_insns > 0)
700 flag_schedule_insns = 2;
701
702 set_param_value ("simultaneous-prefetches", 2);
703 }
704
705 /* Implement OVERRIDE_OPTIONS macro. Validate and override various
706 options, and do some machine dependent initialization. */
707 void
708 sh_override_options (void)
709 {
710 int regno;
711
712 SUBTARGET_OVERRIDE_OPTIONS;
713 if (flag_finite_math_only == 2)
714 flag_finite_math_only
715 = !flag_signaling_nans && TARGET_SH2E && ! TARGET_IEEE;
716 if (TARGET_SH2E && !flag_finite_math_only)
717 target_flags |= MASK_IEEE;
718 sh_cpu = PROCESSOR_SH1;
719 assembler_dialect = 0;
720 if (TARGET_SH2)
721 sh_cpu = PROCESSOR_SH2;
722 if (TARGET_SH2E)
723 sh_cpu = PROCESSOR_SH2E;
724 if (TARGET_SH2A)
725 sh_cpu = PROCESSOR_SH2A;
726 if (TARGET_SH3)
727 sh_cpu = PROCESSOR_SH3;
728 if (TARGET_SH3E)
729 sh_cpu = PROCESSOR_SH3E;
730 if (TARGET_SH4)
731 {
732 assembler_dialect = 1;
733 sh_cpu = PROCESSOR_SH4;
734 }
735 if (TARGET_SH4A_ARCH)
736 {
737 assembler_dialect = 1;
738 sh_cpu = PROCESSOR_SH4A;
739 }
740 if (TARGET_SH5)
741 {
742 sh_cpu = PROCESSOR_SH5;
743 target_flags |= MASK_ALIGN_DOUBLE;
744 if (TARGET_SHMEDIA_FPU)
745 target_flags |= MASK_FMOVD;
746 if (TARGET_SHMEDIA)
747 {
748 /* There are no delay slots on SHmedia. */
749 flag_delayed_branch = 0;
750 /* Relaxation isn't yet supported for SHmedia */
751 target_flags &= ~MASK_RELAX;
752 /* After reload, if conversion does little good but can cause
753 ICEs:
754 - find_if_block doesn't do anything for SH because we don't
755 have conditional execution patterns. (We use conditional
756 move patterns, which are handled differently, and only
757 before reload).
758 - find_cond_trap doesn't do anything for the SH because we
759 don't have conditional traps.
760 - find_if_case_1 uses redirect_edge_and_branch_force in
761 the only path that does an optimization, and this causes
762 an ICE when branch targets are in registers.
763 - find_if_case_2 doesn't do anything for the SHmedia after
764 reload except when it can redirect a tablejump - and
765 that's rather rare. */
766 flag_if_conversion2 = 0;
767 if (! strcmp (sh_div_str, "call"))
768 sh_div_strategy = SH_DIV_CALL;
769 else if (! strcmp (sh_div_str, "call2"))
770 sh_div_strategy = SH_DIV_CALL2;
771 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
772 sh_div_strategy = SH_DIV_FP;
773 else if (! strcmp (sh_div_str, "inv"))
774 sh_div_strategy = SH_DIV_INV;
775 else if (! strcmp (sh_div_str, "inv:minlat"))
776 sh_div_strategy = SH_DIV_INV_MINLAT;
777 else if (! strcmp (sh_div_str, "inv20u"))
778 sh_div_strategy = SH_DIV_INV20U;
779 else if (! strcmp (sh_div_str, "inv20l"))
780 sh_div_strategy = SH_DIV_INV20L;
781 else if (! strcmp (sh_div_str, "inv:call2"))
782 sh_div_strategy = SH_DIV_INV_CALL2;
783 else if (! strcmp (sh_div_str, "inv:call"))
784 sh_div_strategy = SH_DIV_INV_CALL;
785 else if (! strcmp (sh_div_str, "inv:fp"))
786 {
787 if (TARGET_FPU_ANY)
788 sh_div_strategy = SH_DIV_INV_FP;
789 else
790 sh_div_strategy = SH_DIV_INV;
791 }
792 TARGET_CBRANCHDI4 = 0;
793 /* Assembler CFI isn't yet fully supported for SHmedia. */
794 flag_dwarf2_cfi_asm = 0;
795 }
796 }
797 else
798 {
799 /* Only the sh64-elf assembler fully supports .quad properly. */
800 targetm.asm_out.aligned_op.di = NULL;
801 targetm.asm_out.unaligned_op.di = NULL;
802 }
803 if (TARGET_SH1)
804 {
805 if (! strcmp (sh_div_str, "call-div1"))
806 sh_div_strategy = SH_DIV_CALL_DIV1;
807 else if (! strcmp (sh_div_str, "call-fp")
808 && (TARGET_FPU_DOUBLE
809 || (TARGET_HARD_SH4 && TARGET_SH2E)
810 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
811 sh_div_strategy = SH_DIV_CALL_FP;
812 else if (! strcmp (sh_div_str, "call-table") && TARGET_SH2)
813 sh_div_strategy = SH_DIV_CALL_TABLE;
814 else
815 /* Pick one that makes most sense for the target in general.
816 It is not much good to use different functions depending
817 on -Os, since then we'll end up with two different functions
818 when some of the code is compiled for size, and some for
819 speed. */
820
821 /* SH4 tends to emphasize speed. */
822 if (TARGET_HARD_SH4)
823 sh_div_strategy = SH_DIV_CALL_TABLE;
824 /* These have their own way of doing things. */
825 else if (TARGET_SH2A)
826 sh_div_strategy = SH_DIV_INTRINSIC;
827 /* ??? Should we use the integer SHmedia function instead? */
828 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
829 sh_div_strategy = SH_DIV_CALL_FP;
830 /* SH1 .. SH3 cores often go into small-footprint systems, so
831 default to the smallest implementation available. */
832 else if (TARGET_SH2) /* ??? EXPERIMENTAL */
833 sh_div_strategy = SH_DIV_CALL_TABLE;
834 else
835 sh_div_strategy = SH_DIV_CALL_DIV1;
836 }
837 if (!TARGET_SH1)
838 TARGET_PRETEND_CMOVE = 0;
839 if (sh_divsi3_libfunc[0])
840 ; /* User supplied - leave it alone. */
841 else if (TARGET_DIVIDE_CALL_FP)
842 sh_divsi3_libfunc = "__sdivsi3_i4";
843 else if (TARGET_DIVIDE_CALL_TABLE)
844 sh_divsi3_libfunc = "__sdivsi3_i4i";
845 else if (TARGET_SH5)
846 sh_divsi3_libfunc = "__sdivsi3_1";
847 else
848 sh_divsi3_libfunc = "__sdivsi3";
849 if (sh_branch_cost == -1)
850 sh_branch_cost
851 = TARGET_SH5 ? 1 : ! TARGET_SH2 || TARGET_HARD_SH4 ? 2 : 1;
852
853 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
854 if (! VALID_REGISTER_P (regno))
855 sh_register_names[regno][0] = '\0';
856
857 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
858 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
859 sh_additional_register_names[regno][0] = '\0';
860
861 flag_omit_frame_pointer = (PREFERRED_DEBUGGING_TYPE == DWARF2_DEBUG);
862
863 if ((flag_pic && ! TARGET_PREFERGOT)
864 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
865 flag_no_function_cse = 1;
866
867 if (targetm.small_register_classes_for_mode_p (VOIDmode)) \
868 {
869 /* Never run scheduling before reload, since that can
870 break global alloc, and generates slower code anyway due
871 to the pressure on R0. */
872 /* Enable sched1 for SH4 if the user explicitly requests.
873 When sched1 is enabled, the ready queue will be reordered by
874 the target hooks if pressure is high. We can not do this for
875 PIC, SH3 and lower as they give spill failures for R0. */
876 if (!TARGET_HARD_SH4 || flag_pic)
877 flag_schedule_insns = 0;
878 /* ??? Current exception handling places basic block boundaries
879 after call_insns. It causes the high pressure on R0 and gives
880 spill failures for R0 in reload. See PR 22553 and the thread
881 on gcc-patches
882 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
883 else if (flag_exceptions)
884 {
885 if (flag_schedule_insns == 1)
886 warning (0, "ignoring -fschedule-insns because of exception handling bug");
887 flag_schedule_insns = 0;
888 }
889 else if (flag_schedule_insns == 2)
890 flag_schedule_insns = 0;
891 }
892
893 if ((target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) == 0)
894 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
895
896 /* Unwind info is not correct around the CFG unless either a frame
897 pointer is present or M_A_O_A is set. Fixing this requires rewriting
898 unwind info generation to be aware of the CFG and propagating states
899 around edges. */
900 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
901 || flag_exceptions || flag_non_call_exceptions)
902 && flag_omit_frame_pointer
903 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
904 {
905 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
906 warning (0, "unwind tables currently require either a frame pointer "
907 "or -maccumulate-outgoing-args for correctness");
908 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
909 }
910
911 /* Unwinding with -freorder-blocks-and-partition does not work on this
912 architecture, because it requires far jumps to label crossing between
913 hot/cold sections which are rejected on this architecture. */
914 if (flag_reorder_blocks_and_partition)
915 {
916 if (flag_exceptions)
917 {
918 inform (input_location,
919 "-freorder-blocks-and-partition does not work with "
920 "exceptions on this architecture");
921 flag_reorder_blocks_and_partition = 0;
922 flag_reorder_blocks = 1;
923 }
924 else if (flag_unwind_tables)
925 {
926 inform (input_location,
927 "-freorder-blocks-and-partition does not support unwind "
928 "info on this architecture");
929 flag_reorder_blocks_and_partition = 0;
930 flag_reorder_blocks = 1;
931 }
932 }
933
934 if (align_loops == 0)
935 align_loops = 1 << (TARGET_SH5 ? 3 : 2);
936 if (align_jumps == 0)
937 align_jumps = 1 << CACHE_LOG;
938 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
939 align_jumps = TARGET_SHMEDIA ? 4 : 2;
940
941 /* Allocation boundary (in *bytes*) for the code of a function.
942 SH1: 32 bit alignment is faster, because instructions are always
943 fetched as a pair from a longword boundary.
944 SH2 .. SH5 : align to cache line start. */
945 if (align_functions == 0)
946 align_functions
947 = TARGET_SMALLCODE ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
948 /* The linker relaxation code breaks when a function contains
949 alignments that are larger than that at the start of a
950 compilation unit. */
951 if (TARGET_RELAX)
952 {
953 int min_align
954 = align_loops > align_jumps ? align_loops : align_jumps;
955
956 /* Also take possible .long constants / mova tables int account. */
957 if (min_align < 4)
958 min_align = 4;
959 if (align_functions < min_align)
960 align_functions = min_align;
961 }
962
963 if (sh_fixed_range_str)
964 sh_fix_range (sh_fixed_range_str);
965 }
966 \f
967 /* Print the operand address in x to the stream. */
968
969 void
970 print_operand_address (FILE *stream, rtx x)
971 {
972 switch (GET_CODE (x))
973 {
974 case REG:
975 case SUBREG:
976 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
977 break;
978
979 case PLUS:
980 {
981 rtx base = XEXP (x, 0);
982 rtx index = XEXP (x, 1);
983
984 switch (GET_CODE (index))
985 {
986 case CONST_INT:
987 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
988 reg_names[true_regnum (base)]);
989 break;
990
991 case REG:
992 case SUBREG:
993 {
994 int base_num = true_regnum (base);
995 int index_num = true_regnum (index);
996
997 fprintf (stream, "@(r0,%s)",
998 reg_names[MAX (base_num, index_num)]);
999 break;
1000 }
1001
1002 default:
1003 gcc_unreachable ();
1004 }
1005 }
1006 break;
1007
1008 case PRE_DEC:
1009 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1010 break;
1011
1012 case POST_INC:
1013 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1014 break;
1015
1016 default:
1017 x = mark_constant_pool_use (x);
1018 output_addr_const (stream, x);
1019 break;
1020 }
1021 }
1022
1023 /* Print operand x (an rtx) in assembler syntax to file stream
1024 according to modifier code.
1025
1026 '.' print a .s if insn needs delay slot
1027 ',' print LOCAL_LABEL_PREFIX
1028 '@' print trap, rte or rts depending upon pragma interruptness
1029 '#' output a nop if there is nothing to put in the delay slot
1030 ''' print likelihood suffix (/u for unlikely).
1031 '>' print branch target if -fverbose-asm
1032 'O' print a constant without the #
1033 'R' print the LSW of a dp value - changes if in little endian
1034 'S' print the MSW of a dp value - changes if in little endian
1035 'T' print the next word of a dp value - same as 'R' in big endian mode.
1036 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
1037 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
1038 'N' print 'r63' if the operand is (const_int 0).
1039 'd' print a V2SF reg as dN instead of fpN.
1040 'm' print a pair `base,offset' or `base,index', for LD and ST.
1041 'U' Likewise for {LD,ST}{HI,LO}.
1042 'V' print the position of a single bit set.
1043 'W' print the position of a single bit cleared.
1044 't' print a memory address which is a register.
1045 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1046 'o' output an operator. */
1047
1048 void
1049 print_operand (FILE *stream, rtx x, int code)
1050 {
1051 int regno;
1052 enum machine_mode mode;
1053
1054 switch (code)
1055 {
1056 tree trapa_attr;
1057
1058 case '.':
1059 if (final_sequence
1060 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1061 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1062 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1063 break;
1064 case ',':
1065 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1066 break;
1067 case '@':
1068 trapa_attr = lookup_attribute ("trap_exit",
1069 DECL_ATTRIBUTES (current_function_decl));
1070 if (trapa_attr)
1071 fprintf (stream, "trapa #%ld",
1072 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1073 else if (sh_cfun_interrupt_handler_p ())
1074 {
1075 if (sh_cfun_resbank_handler_p ())
1076 fprintf (stream, "resbank\n");
1077 fprintf (stream, "rte");
1078 }
1079 else
1080 fprintf (stream, "rts");
1081 break;
1082 case '#':
1083 /* Output a nop if there's nothing in the delay slot. */
1084 if (dbr_sequence_length () == 0)
1085 fprintf (stream, "\n\tnop");
1086 break;
1087 case '\'':
1088 {
1089 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1090
1091 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
1092 fputs ("/u", stream);
1093 break;
1094 }
1095 case '>':
1096 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1097 {
1098 fputs ("\t! target: ", stream);
1099 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1100 }
1101 break;
1102 case 'O':
1103 x = mark_constant_pool_use (x);
1104 output_addr_const (stream, x);
1105 break;
1106 /* N.B.: %R / %S / %T adjust memory addresses by four.
1107 For SHMEDIA, that means they can be used to access the first and
1108 second 32 bit part of a 64 bit (or larger) value that
1109 might be held in floating point registers or memory.
1110 While they can be used to access 64 bit parts of a larger value
1111 held in general purpose registers, that won't work with memory -
1112 neither for fp registers, since the frxx names are used. */
1113 case 'R':
1114 if (REG_P (x) || GET_CODE (x) == SUBREG)
1115 {
1116 regno = true_regnum (x);
1117 regno += FP_REGISTER_P (regno) ? 1 : LSW;
1118 fputs (reg_names[regno], (stream));
1119 }
1120 else if (MEM_P (x))
1121 {
1122 x = adjust_address (x, SImode, 4 * LSW);
1123 print_operand_address (stream, XEXP (x, 0));
1124 }
1125 else
1126 {
1127 rtx sub = NULL_RTX;
1128
1129 mode = GET_MODE (x);
1130 if (mode == VOIDmode)
1131 mode = DImode;
1132 if (GET_MODE_SIZE (mode) >= 8)
1133 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
1134 if (sub)
1135 print_operand (stream, sub, 0);
1136 else
1137 output_operand_lossage ("invalid operand to %%R");
1138 }
1139 break;
1140 case 'S':
1141 if (REG_P (x) || GET_CODE (x) == SUBREG)
1142 {
1143 regno = true_regnum (x);
1144 regno += FP_REGISTER_P (regno) ? 0 : MSW;
1145 fputs (reg_names[regno], (stream));
1146 }
1147 else if (MEM_P (x))
1148 {
1149 x = adjust_address (x, SImode, 4 * MSW);
1150 print_operand_address (stream, XEXP (x, 0));
1151 }
1152 else
1153 {
1154 rtx sub = NULL_RTX;
1155
1156 mode = GET_MODE (x);
1157 if (mode == VOIDmode)
1158 mode = DImode;
1159 if (GET_MODE_SIZE (mode) >= 8)
1160 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
1161 if (sub)
1162 print_operand (stream, sub, 0);
1163 else
1164 output_operand_lossage ("invalid operand to %%S");
1165 }
1166 break;
1167 case 'T':
1168 /* Next word of a double. */
1169 switch (GET_CODE (x))
1170 {
1171 case REG:
1172 fputs (reg_names[REGNO (x) + 1], (stream));
1173 break;
1174 case MEM:
1175 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1176 && GET_CODE (XEXP (x, 0)) != POST_INC)
1177 x = adjust_address (x, SImode, 4);
1178 print_operand_address (stream, XEXP (x, 0));
1179 break;
1180 default:
1181 break;
1182 }
1183 break;
1184
1185 case 't':
1186 gcc_assert (MEM_P (x));
1187 x = XEXP (x, 0);
1188 switch (GET_CODE (x))
1189 {
1190 case REG:
1191 case SUBREG:
1192 print_operand (stream, x, 0);
1193 break;
1194 default:
1195 break;
1196 }
1197 break;
1198
1199 case 'o':
1200 switch (GET_CODE (x))
1201 {
1202 case PLUS: fputs ("add", stream); break;
1203 case MINUS: fputs ("sub", stream); break;
1204 case MULT: fputs ("mul", stream); break;
1205 case DIV: fputs ("div", stream); break;
1206 case EQ: fputs ("eq", stream); break;
1207 case NE: fputs ("ne", stream); break;
1208 case GT: case LT: fputs ("gt", stream); break;
1209 case GE: case LE: fputs ("ge", stream); break;
1210 case GTU: case LTU: fputs ("gtu", stream); break;
1211 case GEU: case LEU: fputs ("geu", stream); break;
1212 default:
1213 break;
1214 }
1215 break;
1216 case 'M':
1217 if (TARGET_SHMEDIA)
1218 {
1219 if (MEM_P (x)
1220 && GET_CODE (XEXP (x, 0)) == PLUS
1221 && (REG_P (XEXP (XEXP (x, 0), 1))
1222 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1223 fputc ('x', stream);
1224 }
1225 else
1226 {
1227 if (MEM_P (x))
1228 {
1229 switch (GET_MODE (x))
1230 {
1231 case QImode: fputs (".b", stream); break;
1232 case HImode: fputs (".w", stream); break;
1233 case SImode: fputs (".l", stream); break;
1234 case SFmode: fputs (".s", stream); break;
1235 case DFmode: fputs (".d", stream); break;
1236 default: gcc_unreachable ();
1237 }
1238 }
1239 }
1240 break;
1241
1242 case 'm':
1243 gcc_assert (MEM_P (x));
1244 x = XEXP (x, 0);
1245 /* Fall through. */
1246 case 'U':
1247 switch (GET_CODE (x))
1248 {
1249 case REG:
1250 case SUBREG:
1251 print_operand (stream, x, 0);
1252 fputs (", 0", stream);
1253 break;
1254
1255 case PLUS:
1256 print_operand (stream, XEXP (x, 0), 0);
1257 fputs (", ", stream);
1258 print_operand (stream, XEXP (x, 1), 0);
1259 break;
1260
1261 default:
1262 gcc_unreachable ();
1263 }
1264 break;
1265
1266 case 'V':
1267 {
1268 int num = exact_log2 (INTVAL (x));
1269 gcc_assert (num >= 0);
1270 fprintf (stream, "#%d", num);
1271 }
1272 break;
1273
1274 case 'W':
1275 {
1276 int num = exact_log2 (~INTVAL (x));
1277 gcc_assert (num >= 0);
1278 fprintf (stream, "#%d", num);
1279 }
1280 break;
1281
1282 case 'd':
1283 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1284
1285 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1286 break;
1287
1288 case 'N':
1289 if (x == CONST0_RTX (GET_MODE (x)))
1290 {
1291 fprintf ((stream), "r63");
1292 break;
1293 }
1294 goto default_output;
1295 case 'u':
1296 if (CONST_INT_P (x))
1297 {
1298 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1299 break;
1300 }
1301 /* Fall through. */
1302
1303 default_output:
1304 default:
1305 regno = 0;
1306 mode = GET_MODE (x);
1307
1308 switch (GET_CODE (x))
1309 {
1310 case TRUNCATE:
1311 {
1312 rtx inner = XEXP (x, 0);
1313 int offset = 0;
1314 enum machine_mode inner_mode;
1315
1316 /* We might see SUBREGs with vector mode registers inside. */
1317 if (GET_CODE (inner) == SUBREG
1318 && (GET_MODE_SIZE (GET_MODE (inner))
1319 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1320 && subreg_lowpart_p (inner))
1321 inner = SUBREG_REG (inner);
1322 if (CONST_INT_P (inner))
1323 {
1324 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1325 goto default_output;
1326 }
1327 inner_mode = GET_MODE (inner);
1328 if (GET_CODE (inner) == SUBREG
1329 && (GET_MODE_SIZE (GET_MODE (inner))
1330 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1331 && REG_P (SUBREG_REG (inner)))
1332 {
1333 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1334 GET_MODE (SUBREG_REG (inner)),
1335 SUBREG_BYTE (inner),
1336 GET_MODE (inner));
1337 inner = SUBREG_REG (inner);
1338 }
1339 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1340 abort ();
1341 /* Floating point register pairs are always big endian;
1342 general purpose registers are 64 bit wide. */
1343 regno = REGNO (inner);
1344 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1345 - HARD_REGNO_NREGS (regno, mode))
1346 + offset;
1347 x = inner;
1348 goto reg;
1349 }
1350 case SIGN_EXTEND:
1351 x = XEXP (x, 0);
1352 goto reg;
1353 /* FIXME: We need this on SHmedia32 because reload generates
1354 some sign-extended HI or QI loads into DImode registers
1355 but, because Pmode is SImode, the address ends up with a
1356 subreg:SI of the DImode register. Maybe reload should be
1357 fixed so as to apply alter_subreg to such loads? */
1358 case IF_THEN_ELSE:
1359 gcc_assert (trapping_target_operand (x, VOIDmode));
1360 x = XEXP (XEXP (x, 2), 0);
1361 goto default_output;
1362 case SUBREG:
1363 gcc_assert (SUBREG_BYTE (x) == 0
1364 && REG_P (SUBREG_REG (x)));
1365
1366 x = SUBREG_REG (x);
1367 /* Fall through. */
1368
1369 reg:
1370 case REG:
1371 regno += REGNO (x);
1372 if (FP_REGISTER_P (regno)
1373 && mode == V16SFmode)
1374 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1375 else if (FP_REGISTER_P (REGNO (x))
1376 && mode == V4SFmode)
1377 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1378 else if (REG_P (x)
1379 && mode == V2SFmode)
1380 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1381 else if (FP_REGISTER_P (REGNO (x))
1382 && GET_MODE_SIZE (mode) > 4)
1383 fprintf ((stream), "d%s", reg_names[regno] + 1);
1384 else
1385 fputs (reg_names[regno], (stream));
1386 break;
1387
1388 case MEM:
1389 output_address (XEXP (x, 0));
1390 break;
1391
1392 default:
1393 if (TARGET_SH1)
1394 fputc ('#', stream);
1395 output_addr_const (stream, x);
1396 break;
1397 }
1398 break;
1399 }
1400 }
1401 \f
1402
1403 /* Encode symbol attributes of a SYMBOL_REF into its
1404 SYMBOL_REF_FLAGS. */
1405 static void
1406 sh_encode_section_info (tree decl, rtx rtl, int first)
1407 {
1408 default_encode_section_info (decl, rtl, first);
1409
1410 if (TREE_CODE (decl) == FUNCTION_DECL
1411 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1412 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1413 }
1414
1415 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1416 static void
1417 force_into (rtx value, rtx target)
1418 {
1419 value = force_operand (value, target);
1420 if (! rtx_equal_p (value, target))
1421 emit_insn (gen_move_insn (target, value));
1422 }
1423
1424 /* Emit code to perform a block move. Choose the best method.
1425
1426 OPERANDS[0] is the destination.
1427 OPERANDS[1] is the source.
1428 OPERANDS[2] is the size.
1429 OPERANDS[3] is the alignment safe to use. */
1430
1431 int
1432 expand_block_move (rtx *operands)
1433 {
1434 int align = INTVAL (operands[3]);
1435 int constp = (CONST_INT_P (operands[2]));
1436 int bytes = (constp ? INTVAL (operands[2]) : 0);
1437
1438 if (! constp)
1439 return 0;
1440
1441 /* If we could use mov.l to move words and dest is word-aligned, we
1442 can use movua.l for loads and still generate a relatively short
1443 and efficient sequence. */
1444 if (TARGET_SH4A_ARCH && align < 4
1445 && MEM_ALIGN (operands[0]) >= 32
1446 && can_move_by_pieces (bytes, 32))
1447 {
1448 rtx dest = copy_rtx (operands[0]);
1449 rtx src = copy_rtx (operands[1]);
1450 /* We could use different pseudos for each copied word, but
1451 since movua can only load into r0, it's kind of
1452 pointless. */
1453 rtx temp = gen_reg_rtx (SImode);
1454 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1455 int copied = 0;
1456
1457 while (copied + 4 <= bytes)
1458 {
1459 rtx to = adjust_address (dest, SImode, copied);
1460 rtx from = adjust_automodify_address (src, BLKmode,
1461 src_addr, copied);
1462
1463 set_mem_size (from, GEN_INT (4));
1464 emit_insn (gen_movua (temp, from));
1465 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1466 emit_move_insn (to, temp);
1467 copied += 4;
1468 }
1469
1470 if (copied < bytes)
1471 move_by_pieces (adjust_address (dest, BLKmode, copied),
1472 adjust_automodify_address (src, BLKmode,
1473 src_addr, copied),
1474 bytes - copied, align, 0);
1475
1476 return 1;
1477 }
1478
1479 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1480 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1481 if (align < 4 || (bytes % 4 != 0))
1482 return 0;
1483
1484 if (TARGET_HARD_SH4)
1485 {
1486 if (bytes < 12)
1487 return 0;
1488 else if (bytes == 12)
1489 {
1490 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1491 rtx r4 = gen_rtx_REG (SImode, 4);
1492 rtx r5 = gen_rtx_REG (SImode, 5);
1493
1494 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1495 force_into (XEXP (operands[0], 0), r4);
1496 force_into (XEXP (operands[1], 0), r5);
1497 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1498 return 1;
1499 }
1500 else if (! TARGET_SMALLCODE)
1501 {
1502 const char *entry_name;
1503 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1504 int dwords;
1505 rtx r4 = gen_rtx_REG (SImode, 4);
1506 rtx r5 = gen_rtx_REG (SImode, 5);
1507 rtx r6 = gen_rtx_REG (SImode, 6);
1508
1509 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1510 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1511 force_into (XEXP (operands[0], 0), r4);
1512 force_into (XEXP (operands[1], 0), r5);
1513
1514 dwords = bytes >> 3;
1515 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1516 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1517 return 1;
1518 }
1519 else
1520 return 0;
1521 }
1522 if (bytes < 64)
1523 {
1524 char entry[30];
1525 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1526 rtx r4 = gen_rtx_REG (SImode, 4);
1527 rtx r5 = gen_rtx_REG (SImode, 5);
1528
1529 sprintf (entry, "__movmemSI%d", bytes);
1530 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1531 force_into (XEXP (operands[0], 0), r4);
1532 force_into (XEXP (operands[1], 0), r5);
1533 emit_insn (gen_block_move_real (func_addr_rtx));
1534 return 1;
1535 }
1536
1537 /* This is the same number of bytes as a memcpy call, but to a different
1538 less common function name, so this will occasionally use more space. */
1539 if (! TARGET_SMALLCODE)
1540 {
1541 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1542 int final_switch, while_loop;
1543 rtx r4 = gen_rtx_REG (SImode, 4);
1544 rtx r5 = gen_rtx_REG (SImode, 5);
1545 rtx r6 = gen_rtx_REG (SImode, 6);
1546
1547 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1548 force_into (XEXP (operands[0], 0), r4);
1549 force_into (XEXP (operands[1], 0), r5);
1550
1551 /* r6 controls the size of the move. 16 is decremented from it
1552 for each 64 bytes moved. Then the negative bit left over is used
1553 as an index into a list of move instructions. e.g., a 72 byte move
1554 would be set up with size(r6) = 14, for one iteration through the
1555 big while loop, and a switch of -2 for the last part. */
1556
1557 final_switch = 16 - ((bytes / 4) % 16);
1558 while_loop = ((bytes / 4) / 16 - 1) * 16;
1559 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1560 emit_insn (gen_block_lump_real (func_addr_rtx));
1561 return 1;
1562 }
1563
1564 return 0;
1565 }
1566
1567 /* Prepare operands for a move define_expand; specifically, one of the
1568 operands must be in a register. */
1569
1570 int
1571 prepare_move_operands (rtx operands[], enum machine_mode mode)
1572 {
1573 if ((mode == SImode || mode == DImode)
1574 && flag_pic
1575 && ! ((mode == Pmode || mode == ptr_mode)
1576 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1577 {
1578 rtx temp;
1579 if (SYMBOLIC_CONST_P (operands[1]))
1580 {
1581 if (MEM_P (operands[0]))
1582 operands[1] = force_reg (Pmode, operands[1]);
1583 else if (TARGET_SHMEDIA
1584 && GET_CODE (operands[1]) == LABEL_REF
1585 && target_reg_operand (operands[0], mode))
1586 /* It's ok. */;
1587 else
1588 {
1589 temp = (!can_create_pseudo_p ()
1590 ? operands[0]
1591 : gen_reg_rtx (Pmode));
1592 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1593 }
1594 }
1595 else if (GET_CODE (operands[1]) == CONST
1596 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1597 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1598 {
1599 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1600 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1601 mode, temp);
1602 operands[1] = expand_binop (mode, add_optab, temp,
1603 XEXP (XEXP (operands[1], 0), 1),
1604 (!can_create_pseudo_p ()
1605 ? temp
1606 : gen_reg_rtx (Pmode)),
1607 0, OPTAB_LIB_WIDEN);
1608 }
1609 }
1610
1611 if (! reload_in_progress && ! reload_completed)
1612 {
1613 /* Copy the source to a register if both operands aren't registers. */
1614 if (! register_operand (operands[0], mode)
1615 && ! sh_register_operand (operands[1], mode))
1616 operands[1] = copy_to_mode_reg (mode, operands[1]);
1617
1618 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1619 {
1620 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1621 except that we can't use that function because it is static. */
1622 rtx new_rtx = change_address (operands[0], mode, 0);
1623 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1624 operands[0] = new_rtx;
1625 }
1626
1627 /* This case can happen while generating code to move the result
1628 of a library call to the target. Reject `st r0,@(rX,rY)' because
1629 reload will fail to find a spill register for rX, since r0 is already
1630 being used for the source. */
1631 else if (TARGET_SH1
1632 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1633 && MEM_P (operands[0])
1634 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1635 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1636 operands[1] = copy_to_mode_reg (mode, operands[1]);
1637 }
1638
1639 if (mode == Pmode || mode == ptr_mode)
1640 {
1641 rtx op0, op1, opc;
1642 enum tls_model tls_kind;
1643
1644 op0 = operands[0];
1645 op1 = operands[1];
1646 if (GET_CODE (op1) == CONST
1647 && GET_CODE (XEXP (op1, 0)) == PLUS
1648 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1649 != TLS_MODEL_NONE))
1650 {
1651 opc = XEXP (XEXP (op1, 0), 1);
1652 op1 = XEXP (XEXP (op1, 0), 0);
1653 }
1654 else
1655 opc = NULL_RTX;
1656
1657 if ((tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1658 {
1659 rtx tga_op1, tga_ret, tmp, tmp2;
1660
1661 switch (tls_kind)
1662 {
1663 case TLS_MODEL_GLOBAL_DYNAMIC:
1664 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1665 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1666 op1 = tga_ret;
1667 break;
1668
1669 case TLS_MODEL_LOCAL_DYNAMIC:
1670 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1671 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1672
1673 tmp = gen_reg_rtx (Pmode);
1674 emit_move_insn (tmp, tga_ret);
1675
1676 if (register_operand (op0, Pmode))
1677 tmp2 = op0;
1678 else
1679 tmp2 = gen_reg_rtx (Pmode);
1680
1681 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1682 op1 = tmp2;
1683 break;
1684
1685 case TLS_MODEL_INITIAL_EXEC:
1686 if (! flag_pic)
1687 {
1688 /* Don't schedule insns for getting GOT address when
1689 the first scheduling is enabled, to avoid spill
1690 failures for R0. */
1691 if (flag_schedule_insns)
1692 emit_insn (gen_blockage ());
1693 emit_insn (gen_GOTaddr2picreg ());
1694 emit_use (gen_rtx_REG (SImode, PIC_REG));
1695 if (flag_schedule_insns)
1696 emit_insn (gen_blockage ());
1697 }
1698 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1699 tmp = gen_sym2GOTTPOFF (op1);
1700 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1701 op1 = tga_op1;
1702 break;
1703
1704 case TLS_MODEL_LOCAL_EXEC:
1705 tmp2 = gen_reg_rtx (Pmode);
1706 emit_insn (gen_load_gbr (tmp2));
1707 tmp = gen_reg_rtx (Pmode);
1708 emit_insn (gen_symTPOFF2reg (tmp, op1));
1709
1710 if (register_operand (op0, Pmode))
1711 op1 = op0;
1712 else
1713 op1 = gen_reg_rtx (Pmode);
1714
1715 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1716 break;
1717
1718 default:
1719 gcc_unreachable ();
1720 }
1721 if (opc)
1722 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1723 operands[1] = op1;
1724 }
1725 }
1726
1727 return 0;
1728 }
1729
1730 enum rtx_code
1731 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1732 enum rtx_code comparison)
1733 {
1734 rtx op1;
1735 rtx scratch = NULL_RTX;
1736
1737 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1738 comparison = GET_CODE (operands[0]);
1739 else
1740 scratch = operands[4];
1741 if (CONST_INT_P (operands[1])
1742 && !CONST_INT_P (operands[2]))
1743 {
1744 rtx tmp = operands[1];
1745
1746 operands[1] = operands[2];
1747 operands[2] = tmp;
1748 comparison = swap_condition (comparison);
1749 }
1750 if (CONST_INT_P (operands[2]))
1751 {
1752 HOST_WIDE_INT val = INTVAL (operands[2]);
1753 if ((val == -1 || val == -0x81)
1754 && (comparison == GT || comparison == LE))
1755 {
1756 comparison = (comparison == GT) ? GE : LT;
1757 operands[2] = gen_int_mode (val + 1, mode);
1758 }
1759 else if ((val == 1 || val == 0x80)
1760 && (comparison == GE || comparison == LT))
1761 {
1762 comparison = (comparison == GE) ? GT : LE;
1763 operands[2] = gen_int_mode (val - 1, mode);
1764 }
1765 else if (val == 1 && (comparison == GEU || comparison == LTU))
1766 {
1767 comparison = (comparison == GEU) ? NE : EQ;
1768 operands[2] = CONST0_RTX (mode);
1769 }
1770 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1771 {
1772 comparison = (comparison == GEU) ? GTU : LEU;
1773 operands[2] = gen_int_mode (val - 1, mode);
1774 }
1775 else if (val == 0 && (comparison == GTU || comparison == LEU))
1776 comparison = (comparison == GTU) ? NE : EQ;
1777 else if (mode == SImode
1778 && ((val == 0x7fffffff
1779 && (comparison == GTU || comparison == LEU))
1780 || ((unsigned HOST_WIDE_INT) val
1781 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1782 && (comparison == GEU || comparison == LTU))))
1783 {
1784 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1785 operands[2] = CONST0_RTX (mode);
1786 }
1787 }
1788 op1 = operands[1];
1789 if (can_create_pseudo_p ())
1790 operands[1] = force_reg (mode, op1);
1791 /* When we are handling DImode comparisons, we want to keep constants so
1792 that we can optimize the component comparisons; however, memory loads
1793 are better issued as a whole so that they can be scheduled well.
1794 SImode equality comparisons allow I08 constants, but only when they
1795 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1796 into a register, that register might as well be r0, and we allow the
1797 constant. If it is already in a register, this is likely to be
1798 allocated to a different hard register, thus we load the constant into
1799 a register unless it is zero. */
1800 if (!REG_P (operands[2])
1801 && (!CONST_INT_P (operands[2])
1802 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1803 && ((comparison != EQ && comparison != NE)
1804 || (REG_P (op1) && REGNO (op1) != R0_REG)
1805 || !satisfies_constraint_I08 (operands[2])))))
1806 {
1807 if (scratch && GET_MODE (scratch) == mode)
1808 {
1809 emit_move_insn (scratch, operands[2]);
1810 operands[2] = scratch;
1811 }
1812 else if (can_create_pseudo_p ())
1813 operands[2] = force_reg (mode, operands[2]);
1814 }
1815 return comparison;
1816 }
1817
1818 void
1819 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1820 {
1821 rtx (*branch_expander) (rtx) = gen_branch_true;
1822 rtx jump;
1823
1824 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1825 switch (comparison)
1826 {
1827 case NE: case LT: case LE: case LTU: case LEU:
1828 comparison = reverse_condition (comparison);
1829 branch_expander = gen_branch_false;
1830 default: ;
1831 }
1832 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1833 gen_rtx_fmt_ee (comparison, SImode,
1834 operands[1], operands[2])));
1835 jump = emit_jump_insn (branch_expander (operands[3]));
1836 if (probability >= 0)
1837 add_reg_note (jump, REG_BR_PROB, GEN_INT (probability));
1838
1839 }
1840
1841 /* ??? How should we distribute probabilities when more than one branch
1842 is generated. So far we only have soem ad-hoc observations:
1843 - If the operands are random, they are likely to differ in both parts.
1844 - If comparing items in a hash chain, the operands are random or equal;
1845 operation should be EQ or NE.
1846 - If items are searched in an ordered tree from the root, we can expect
1847 the highpart to be unequal about half of the time; operation should be
1848 an inequality comparison, operands non-constant, and overall probability
1849 about 50%. Likewise for quicksort.
1850 - Range checks will be often made against constants. Even if we assume for
1851 simplicity an even distribution of the non-constant operand over a
1852 sub-range here, the same probability could be generated with differently
1853 wide sub-ranges - as long as the ratio of the part of the subrange that
1854 is before the threshold to the part that comes after the threshold stays
1855 the same. Thus, we can't really tell anything here;
1856 assuming random distribution is at least simple.
1857 */
1858
1859 bool
1860 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1861 {
1862 enum rtx_code msw_taken, msw_skip, lsw_taken;
1863 rtx skip_label = NULL_RTX;
1864 rtx op1h, op1l, op2h, op2l;
1865 int num_branches;
1866 int prob, rev_prob;
1867 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1868 rtx scratch = operands[4];
1869
1870 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1871 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1872 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1873 op1l = gen_lowpart (SImode, operands[1]);
1874 op2l = gen_lowpart (SImode, operands[2]);
1875 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
1876 prob = split_branch_probability;
1877 rev_prob = REG_BR_PROB_BASE - prob;
1878 switch (comparison)
1879 {
1880 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1881 That costs 1 cycle more when the first branch can be predicted taken,
1882 but saves us mispredicts because only one branch needs prediction.
1883 It also enables generating the cmpeqdi_t-1 pattern. */
1884 case EQ:
1885 if (TARGET_CMPEQDI_T)
1886 {
1887 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1888 emit_jump_insn (gen_branch_true (operands[3]));
1889 return true;
1890 }
1891 msw_skip = NE;
1892 lsw_taken = EQ;
1893 if (prob >= 0)
1894 {
1895 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1896 */
1897 msw_skip_prob = rev_prob;
1898 if (REG_BR_PROB_BASE <= 65535)
1899 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1900 else
1901 {
1902 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1903 lsw_taken_prob
1904 = (prob
1905 ? (REG_BR_PROB_BASE
1906 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1907 / ((HOST_WIDEST_INT) prob << 32)))
1908 : 0);
1909 }
1910 }
1911 break;
1912 case NE:
1913 if (TARGET_CMPEQDI_T)
1914 {
1915 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1916 emit_jump_insn (gen_branch_false (operands[3]));
1917 return true;
1918 }
1919 msw_taken = NE;
1920 msw_taken_prob = prob;
1921 lsw_taken = NE;
1922 lsw_taken_prob = 0;
1923 break;
1924 case GTU: case GT:
1925 msw_taken = comparison;
1926 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1927 break;
1928 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1929 msw_skip = swap_condition (msw_taken);
1930 lsw_taken = GTU;
1931 break;
1932 case GEU: case GE:
1933 if (op2l == CONST0_RTX (SImode))
1934 msw_taken = comparison;
1935 else
1936 {
1937 msw_taken = comparison == GE ? GT : GTU;
1938 msw_skip = swap_condition (msw_taken);
1939 lsw_taken = GEU;
1940 }
1941 break;
1942 case LTU: case LT:
1943 msw_taken = comparison;
1944 if (op2l == CONST0_RTX (SImode))
1945 break;
1946 msw_skip = swap_condition (msw_taken);
1947 lsw_taken = LTU;
1948 break;
1949 case LEU: case LE:
1950 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1951 msw_taken = comparison;
1952 else
1953 {
1954 lsw_taken = LEU;
1955 if (comparison == LE)
1956 msw_taken = LT;
1957 else if (op2h != CONST0_RTX (SImode))
1958 msw_taken = LTU;
1959 else
1960 break;
1961 msw_skip = swap_condition (msw_taken);
1962 }
1963 break;
1964 default: return false;
1965 }
1966 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
1967 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
1968 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
1969 if (comparison != EQ && comparison != NE && num_branches > 1)
1970 {
1971 if (!CONSTANT_P (operands[2])
1972 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
1973 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
1974 {
1975 msw_taken_prob = prob / 2U;
1976 msw_skip_prob
1977 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
1978 lsw_taken_prob = prob;
1979 }
1980 else
1981 {
1982 msw_taken_prob = prob;
1983 msw_skip_prob = REG_BR_PROB_BASE;
1984 /* ??? If we have a constant op2h, should we use that when
1985 calculating lsw_taken_prob? */
1986 lsw_taken_prob = prob;
1987 }
1988 }
1989 operands[1] = op1h;
1990 operands[2] = op2h;
1991 operands[4] = NULL_RTX;
1992 if (reload_completed
1993 && ! arith_reg_or_0_operand (op2h, SImode)
1994 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
1995 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
1996 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
1997 {
1998 emit_move_insn (scratch, operands[2]);
1999 operands[2] = scratch;
2000 }
2001 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2002 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2003 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2004 {
2005 rtx taken_label = operands[3];
2006
2007 /* Operands were possibly modified, but msw_skip doesn't expect this.
2008 Always use the original ones. */
2009 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2010 {
2011 operands[1] = op1h;
2012 operands[2] = op2h;
2013 }
2014
2015 operands[3] = skip_label = gen_label_rtx ();
2016 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2017 operands[3] = taken_label;
2018 }
2019 operands[1] = op1l;
2020 operands[2] = op2l;
2021 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2022 {
2023 if (reload_completed
2024 && ! arith_reg_or_0_operand (op2l, SImode)
2025 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2026 {
2027 emit_move_insn (scratch, operands[2]);
2028 operands[2] = scratch;
2029 }
2030 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2031 }
2032 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2033 emit_label (skip_label);
2034 return true;
2035 }
2036
2037 /* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */
2038
2039 static void
2040 sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
2041 {
2042 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
2043 {
2044 insn = gen_rtx_PARALLEL (VOIDmode,
2045 gen_rtvec (2, insn,
2046 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
2047 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
2048 }
2049 else
2050 emit_insn (insn);
2051 }
2052
2053 /* Prepare the operands for an scc instruction; make sure that the
2054 compare has been done and the result is in T_REG. */
2055 void
2056 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2057 {
2058 rtx t_reg = gen_rtx_REG (SImode, T_REG);
2059 enum rtx_code oldcode = code;
2060 enum machine_mode mode;
2061
2062 /* First need a compare insn. */
2063 switch (code)
2064 {
2065 case NE:
2066 /* It isn't possible to handle this case. */
2067 gcc_unreachable ();
2068 case LT:
2069 code = GT;
2070 break;
2071 case LE:
2072 code = GE;
2073 break;
2074 case LTU:
2075 code = GTU;
2076 break;
2077 case LEU:
2078 code = GEU;
2079 break;
2080 default:
2081 break;
2082 }
2083 if (code != oldcode)
2084 {
2085 rtx tmp = op0;
2086 op0 = op1;
2087 op1 = tmp;
2088 }
2089
2090 mode = GET_MODE (op0);
2091 if (mode == VOIDmode)
2092 mode = GET_MODE (op1);
2093
2094 op0 = force_reg (mode, op0);
2095 if ((code != EQ && code != NE
2096 && (op1 != const0_rtx
2097 || code == GTU || code == GEU || code == LTU || code == LEU))
2098 || (mode == DImode && op1 != const0_rtx)
2099 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2100 op1 = force_reg (mode, op1);
2101
2102 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2103 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2104 mode);
2105 }
2106
2107 rtx
2108 sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
2109 rtx op0, rtx op1)
2110 {
2111 rtx target = gen_reg_rtx (SImode);
2112 rtx tmp;
2113
2114 gcc_assert (TARGET_SHMEDIA);
2115 switch (code)
2116 {
2117 case EQ:
2118 case GT:
2119 case LT:
2120 case UNORDERED:
2121 case GTU:
2122 case LTU:
2123 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2124 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2125 code = NE;
2126 break;
2127
2128 case NE:
2129 case GE:
2130 case LE:
2131 case ORDERED:
2132 case GEU:
2133 case LEU:
2134 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2135 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2136 code = EQ;
2137 break;
2138
2139 case UNEQ:
2140 case UNGE:
2141 case UNGT:
2142 case UNLE:
2143 case UNLT:
2144 case LTGT:
2145 return NULL_RTX;
2146
2147 default:
2148 gcc_unreachable ();
2149 }
2150
2151 if (mode == DImode)
2152 {
2153 rtx t2 = gen_reg_rtx (DImode);
2154 emit_insn (gen_extendsidi2 (t2, target));
2155 target = t2;
2156 }
2157
2158 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2159 }
2160
2161 /* Called from the md file, set up the operands of a compare instruction. */
2162
2163 void
2164 sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
2165 {
2166 enum rtx_code code = GET_CODE (operands[0]);
2167 enum rtx_code branch_code;
2168 rtx op0 = operands[1];
2169 rtx op1 = operands[2];
2170 rtx insn, tem;
2171 bool need_ccmpeq = false;
2172
2173 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2174 {
2175 op0 = force_reg (mode, op0);
2176 op1 = force_reg (mode, op1);
2177 }
2178 else
2179 {
2180 if (code != EQ || mode == DImode)
2181 {
2182 /* Force args into regs, since we can't use constants here. */
2183 op0 = force_reg (mode, op0);
2184 if (op1 != const0_rtx || code == GTU || code == GEU)
2185 op1 = force_reg (mode, op1);
2186 }
2187 }
2188
2189 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2190 {
2191 if (code == LT
2192 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2193 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2194 {
2195 tem = op0, op0 = op1, op1 = tem;
2196 code = swap_condition (code);
2197 }
2198
2199 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2200 if (code == GE)
2201 {
2202 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2203 need_ccmpeq = true;
2204 code = GT;
2205 }
2206
2207 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2208 to EQ/GT respectively. */
2209 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2210 }
2211
2212 switch (code)
2213 {
2214 case EQ:
2215 case GT:
2216 case GE:
2217 case GTU:
2218 case GEU:
2219 branch_code = code;
2220 break;
2221 case NE:
2222 case LT:
2223 case LE:
2224 case LTU:
2225 case LEU:
2226 branch_code = reverse_condition (code);
2227 break;
2228 default:
2229 gcc_unreachable ();
2230 }
2231
2232 insn = gen_rtx_SET (VOIDmode,
2233 gen_rtx_REG (SImode, T_REG),
2234 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2235
2236 sh_emit_set_t_insn (insn, mode);
2237 if (need_ccmpeq)
2238 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2239
2240 if (branch_code == code)
2241 emit_jump_insn (gen_branch_true (operands[3]));
2242 else
2243 emit_jump_insn (gen_branch_false (operands[3]));
2244 }
2245
2246 void
2247 sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
2248 {
2249 enum rtx_code code = GET_CODE (operands[1]);
2250 rtx op0 = operands[2];
2251 rtx op1 = operands[3];
2252 rtx lab = NULL_RTX;
2253 bool invert = false;
2254 rtx tem;
2255
2256 op0 = force_reg (mode, op0);
2257 if ((code != EQ && code != NE
2258 && (op1 != const0_rtx
2259 || code == GTU || code == GEU || code == LTU || code == LEU))
2260 || (mode == DImode && op1 != const0_rtx)
2261 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2262 op1 = force_reg (mode, op1);
2263
2264 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2265 {
2266 if (code == LT || code == LE)
2267 {
2268 code = swap_condition (code);
2269 tem = op0, op0 = op1, op1 = tem;
2270 }
2271 if (code == GE)
2272 {
2273 if (TARGET_IEEE)
2274 {
2275 lab = gen_label_rtx ();
2276 sh_emit_scc_to_t (EQ, op0, op1);
2277 emit_jump_insn (gen_branch_true (lab));
2278 code = GT;
2279 }
2280 else
2281 {
2282 code = LT;
2283 invert = true;
2284 }
2285 }
2286 }
2287
2288 if (code == NE)
2289 {
2290 code = EQ;
2291 invert = true;
2292 }
2293
2294 sh_emit_scc_to_t (code, op0, op1);
2295 if (lab)
2296 emit_label (lab);
2297 if (invert)
2298 emit_insn (gen_movnegt (operands[0]));
2299 else
2300 emit_move_insn (operands[0], gen_rtx_REG (SImode, T_REG));
2301 }
2302 \f
2303 /* Functions to output assembly code. */
2304
2305 /* Return a sequence of instructions to perform DI or DF move.
2306
2307 Since the SH cannot move a DI or DF in one instruction, we have
2308 to take care when we see overlapping source and dest registers. */
2309
2310 const char *
2311 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2312 enum machine_mode mode)
2313 {
2314 rtx dst = operands[0];
2315 rtx src = operands[1];
2316
2317 if (MEM_P (dst)
2318 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2319 return "mov.l %T1,%0\n\tmov.l %1,%0";
2320
2321 if (register_operand (dst, mode)
2322 && register_operand (src, mode))
2323 {
2324 if (REGNO (src) == MACH_REG)
2325 return "sts mach,%S0\n\tsts macl,%R0";
2326
2327 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2328 when mov.d r1,r0 do r1->r0 then r2->r1. */
2329
2330 if (REGNO (src) + 1 == REGNO (dst))
2331 return "mov %T1,%T0\n\tmov %1,%0";
2332 else
2333 return "mov %1,%0\n\tmov %T1,%T0";
2334 }
2335 else if (CONST_INT_P (src))
2336 {
2337 if (INTVAL (src) < 0)
2338 output_asm_insn ("mov #-1,%S0", operands);
2339 else
2340 output_asm_insn ("mov #0,%S0", operands);
2341
2342 return "mov %1,%R0";
2343 }
2344 else if (MEM_P (src))
2345 {
2346 int ptrreg = -1;
2347 int dreg = REGNO (dst);
2348 rtx inside = XEXP (src, 0);
2349
2350 switch (GET_CODE (inside))
2351 {
2352 case REG:
2353 ptrreg = REGNO (inside);
2354 break;
2355
2356 case SUBREG:
2357 ptrreg = subreg_regno (inside);
2358 break;
2359
2360 case PLUS:
2361 ptrreg = REGNO (XEXP (inside, 0));
2362 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2363 an offsettable address. Unfortunately, offsettable addresses use
2364 QImode to check the offset, and a QImode offsettable address
2365 requires r0 for the other operand, which is not currently
2366 supported, so we can't use the 'o' constraint.
2367 Thus we must check for and handle r0+REG addresses here.
2368 We punt for now, since this is likely very rare. */
2369 gcc_assert (!REG_P (XEXP (inside, 1)));
2370 break;
2371
2372 case LABEL_REF:
2373 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
2374 case POST_INC:
2375 return "mov.l %1,%0\n\tmov.l %1,%T0";
2376 default:
2377 gcc_unreachable ();
2378 }
2379
2380 /* Work out the safe way to copy. Copy into the second half first. */
2381 if (dreg == ptrreg)
2382 return "mov.l %T1,%T0\n\tmov.l %1,%0";
2383 }
2384
2385 return "mov.l %1,%0\n\tmov.l %T1,%T0";
2386 }
2387
2388 /* Print an instruction which would have gone into a delay slot after
2389 another instruction, but couldn't because the other instruction expanded
2390 into a sequence where putting the slot insn at the end wouldn't work. */
2391
2392 static void
2393 print_slot (rtx insn)
2394 {
2395 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
2396
2397 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
2398 }
2399
2400 const char *
2401 output_far_jump (rtx insn, rtx op)
2402 {
2403 struct { rtx lab, reg, op; } this_jmp;
2404 rtx braf_base_lab = NULL_RTX;
2405 const char *jump;
2406 int far;
2407 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2408 rtx prev;
2409
2410 this_jmp.lab = gen_label_rtx ();
2411
2412 if (TARGET_SH2
2413 && offset >= -32764
2414 && offset - get_attr_length (insn) <= 32766)
2415 {
2416 far = 0;
2417 jump = "mov.w %O0,%1; braf %1";
2418 }
2419 else
2420 {
2421 far = 1;
2422 if (flag_pic)
2423 {
2424 if (TARGET_SH2)
2425 jump = "mov.l %O0,%1; braf %1";
2426 else
2427 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
2428 }
2429 else
2430 jump = "mov.l %O0,%1; jmp @%1";
2431 }
2432 /* If we have a scratch register available, use it. */
2433 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2434 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2435 {
2436 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2437 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2438 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
2439 output_asm_insn (jump, &this_jmp.lab);
2440 if (dbr_sequence_length ())
2441 print_slot (final_sequence);
2442 else
2443 output_asm_insn ("nop", 0);
2444 }
2445 else
2446 {
2447 /* Output the delay slot insn first if any. */
2448 if (dbr_sequence_length ())
2449 print_slot (final_sequence);
2450
2451 this_jmp.reg = gen_rtx_REG (SImode, 13);
2452 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2453 Fortunately, MACL is fixed and call-clobbered, and we never
2454 need its value across jumps, so save r13 in it instead of in
2455 the stack. */
2456 if (TARGET_SH5)
2457 output_asm_insn ("lds r13, macl", 0);
2458 else
2459 output_asm_insn ("mov.l r13,@-r15", 0);
2460 output_asm_insn (jump, &this_jmp.lab);
2461 if (TARGET_SH5)
2462 output_asm_insn ("sts macl, r13", 0);
2463 else
2464 output_asm_insn ("mov.l @r15+,r13", 0);
2465 }
2466 if (far && flag_pic && TARGET_SH2)
2467 {
2468 braf_base_lab = gen_label_rtx ();
2469 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2470 CODE_LABEL_NUMBER (braf_base_lab));
2471 }
2472 if (far)
2473 output_asm_insn (".align 2", 0);
2474 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2475 this_jmp.op = op;
2476 if (far && flag_pic)
2477 {
2478 if (TARGET_SH2)
2479 this_jmp.lab = braf_base_lab;
2480 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2481 }
2482 else
2483 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2484 return "";
2485 }
2486
2487 /* Local label counter, used for constants in the pool and inside
2488 pattern branches. */
2489
2490 static int lf = 100;
2491
2492 /* Output code for ordinary branches. */
2493
2494 const char *
2495 output_branch (int logic, rtx insn, rtx *operands)
2496 {
2497 switch (get_attr_length (insn))
2498 {
2499 case 6:
2500 /* This can happen if filling the delay slot has caused a forward
2501 branch to exceed its range (we could reverse it, but only
2502 when we know we won't overextend other branches; this should
2503 best be handled by relaxation).
2504 It can also happen when other condbranches hoist delay slot insn
2505 from their destination, thus leading to code size increase.
2506 But the branch will still be in the range -4092..+4098 bytes. */
2507
2508 if (! TARGET_RELAX)
2509 {
2510 int label = lf++;
2511 /* The call to print_slot will clobber the operands. */
2512 rtx op0 = operands[0];
2513
2514 /* If the instruction in the delay slot is annulled (true), then
2515 there is no delay slot where we can put it now. The only safe
2516 place for it is after the label. final will do that by default. */
2517
2518 if (final_sequence
2519 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2520 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2521 {
2522 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2523 ASSEMBLER_DIALECT ? "/" : ".", label);
2524 print_slot (final_sequence);
2525 }
2526 else
2527 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2528
2529 output_asm_insn ("bra\t%l0", &op0);
2530 fprintf (asm_out_file, "\tnop\n");
2531 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2532
2533 return "";
2534 }
2535 /* When relaxing, handle this like a short branch. The linker
2536 will fix it up if it still doesn't fit after relaxation. */
2537 case 2:
2538 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2539
2540 /* These are for SH2e, in which we have to account for the
2541 extra nop because of the hardware bug in annulled branches. */
2542 case 8:
2543 if (! TARGET_RELAX)
2544 {
2545 int label = lf++;
2546
2547 gcc_assert (!final_sequence
2548 || !(INSN_ANNULLED_BRANCH_P
2549 (XVECEXP (final_sequence, 0, 0))));
2550 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2551 logic ? "f" : "t",
2552 ASSEMBLER_DIALECT ? "/" : ".", label);
2553 fprintf (asm_out_file, "\tnop\n");
2554 output_asm_insn ("bra\t%l0", operands);
2555 fprintf (asm_out_file, "\tnop\n");
2556 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2557
2558 return "";
2559 }
2560 /* When relaxing, fall through. */
2561 case 4:
2562 {
2563 char buffer[10];
2564
2565 sprintf (buffer, "b%s%ss\t%%l0",
2566 logic ? "t" : "f",
2567 ASSEMBLER_DIALECT ? "/" : ".");
2568 output_asm_insn (buffer, &operands[0]);
2569 return "nop";
2570 }
2571
2572 default:
2573 /* There should be no longer branches now - that would
2574 indicate that something has destroyed the branches set
2575 up in machine_dependent_reorg. */
2576 gcc_unreachable ();
2577 }
2578 }
2579
2580 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2581 fill in operands 9 as a label to the successor insn.
2582 We try to use jump threading where possible.
2583 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2584 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2585 follow jmp and bt, if the address is in range. */
2586 const char *
2587 output_branchy_insn (enum rtx_code code, const char *templ,
2588 rtx insn, rtx *operands)
2589 {
2590 rtx next_insn = NEXT_INSN (insn);
2591
2592 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2593 {
2594 rtx src = SET_SRC (PATTERN (next_insn));
2595 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2596 {
2597 /* Following branch not taken */
2598 operands[9] = gen_label_rtx ();
2599 emit_label_after (operands[9], next_insn);
2600 INSN_ADDRESSES_NEW (operands[9],
2601 INSN_ADDRESSES (INSN_UID (next_insn))
2602 + get_attr_length (next_insn));
2603 return templ;
2604 }
2605 else
2606 {
2607 int offset = (branch_dest (next_insn)
2608 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2609 if (offset >= -252 && offset <= 258)
2610 {
2611 if (GET_CODE (src) == IF_THEN_ELSE)
2612 /* branch_true */
2613 src = XEXP (src, 1);
2614 operands[9] = src;
2615 return templ;
2616 }
2617 }
2618 }
2619 operands[9] = gen_label_rtx ();
2620 emit_label_after (operands[9], insn);
2621 INSN_ADDRESSES_NEW (operands[9],
2622 INSN_ADDRESSES (INSN_UID (insn))
2623 + get_attr_length (insn));
2624 return templ;
2625 }
2626
2627 const char *
2628 output_ieee_ccmpeq (rtx insn, rtx *operands)
2629 {
2630 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2631 insn, operands);
2632 }
2633 \f
2634 /* Output the start of the assembler file. */
2635
2636 static void
2637 sh_file_start (void)
2638 {
2639 default_file_start ();
2640
2641 #ifdef SYMBIAN
2642 /* Declare the .directive section before it is used. */
2643 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
2644 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
2645 #endif
2646
2647 if (TARGET_ELF)
2648 /* We need to show the text section with the proper
2649 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2650 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2651 will complain. We can teach GAS specifically about the
2652 default attributes for our choice of text section, but
2653 then we would have to change GAS again if/when we change
2654 the text section name. */
2655 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2656 else
2657 /* Switch to the data section so that the coffsem symbol
2658 isn't in the text section. */
2659 switch_to_section (data_section);
2660
2661 if (TARGET_LITTLE_ENDIAN)
2662 fputs ("\t.little\n", asm_out_file);
2663
2664 if (!TARGET_ELF)
2665 {
2666 if (TARGET_SHCOMPACT)
2667 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2668 else if (TARGET_SHMEDIA)
2669 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2670 TARGET_SHMEDIA64 ? 64 : 32);
2671 }
2672 }
2673 \f
2674 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2675
2676 static bool
2677 unspec_caller_rtx_p (rtx pat)
2678 {
2679 rtx base, offset;
2680 int i;
2681
2682 split_const (pat, &base, &offset);
2683 if (GET_CODE (base) == UNSPEC)
2684 {
2685 if (XINT (base, 1) == UNSPEC_CALLER)
2686 return true;
2687 for (i = 0; i < XVECLEN (base, 0); i++)
2688 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2689 return true;
2690 }
2691 return false;
2692 }
2693
2694 /* Indicate that INSN cannot be duplicated. This is true for insn
2695 that generates a unique label. */
2696
2697 static bool
2698 sh_cannot_copy_insn_p (rtx insn)
2699 {
2700 rtx pat;
2701
2702 if (!reload_completed || !flag_pic)
2703 return false;
2704
2705 if (!NONJUMP_INSN_P (insn))
2706 return false;
2707 if (asm_noperands (insn) >= 0)
2708 return false;
2709
2710 pat = PATTERN (insn);
2711 if (GET_CODE (pat) != SET)
2712 return false;
2713 pat = SET_SRC (pat);
2714
2715 if (unspec_caller_rtx_p (pat))
2716 return true;
2717
2718 return false;
2719 }
2720 \f
2721 /* Actual number of instructions used to make a shift by N. */
2722 static const char ashiftrt_insns[] =
2723 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2724
2725 /* Left shift and logical right shift are the same. */
2726 static const char shift_insns[] =
2727 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2728
2729 /* Individual shift amounts needed to get the above length sequences.
2730 One bit right shifts clobber the T bit, so when possible, put one bit
2731 shifts in the middle of the sequence, so the ends are eligible for
2732 branch delay slots. */
2733 static const short shift_amounts[32][5] = {
2734 {0}, {1}, {2}, {2, 1},
2735 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2736 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2737 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2738 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2739 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2740 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2741 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2742
2743 /* Likewise, but for shift amounts < 16, up to three highmost bits
2744 might be clobbered. This is typically used when combined with some
2745 kind of sign or zero extension. */
2746
2747 static const char ext_shift_insns[] =
2748 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2749
2750 static const short ext_shift_amounts[32][4] = {
2751 {0}, {1}, {2}, {2, 1},
2752 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2753 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2754 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2755 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2756 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2757 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2758 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2759
2760 /* Assuming we have a value that has been sign-extended by at least one bit,
2761 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2762 to shift it by N without data loss, and quicker than by other means? */
2763 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2764
2765 /* This is used in length attributes in sh.md to help compute the length
2766 of arbitrary constant shift instructions. */
2767
2768 int
2769 shift_insns_rtx (rtx insn)
2770 {
2771 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2772 int shift_count = INTVAL (XEXP (set_src, 1)) & 31;
2773 enum rtx_code shift_code = GET_CODE (set_src);
2774
2775 switch (shift_code)
2776 {
2777 case ASHIFTRT:
2778 return ashiftrt_insns[shift_count];
2779 case LSHIFTRT:
2780 case ASHIFT:
2781 return shift_insns[shift_count];
2782 default:
2783 gcc_unreachable ();
2784 }
2785 }
2786
2787 /* Return the cost of a shift. */
2788
2789 static inline int
2790 shiftcosts (rtx x)
2791 {
2792 int value;
2793
2794 if (TARGET_SHMEDIA)
2795 return 1;
2796
2797 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2798 {
2799 if (GET_MODE (x) == DImode
2800 && CONST_INT_P (XEXP (x, 1))
2801 && INTVAL (XEXP (x, 1)) == 1)
2802 return 2;
2803
2804 /* Everything else is invalid, because there is no pattern for it. */
2805 return MAX_COST;
2806 }
2807 /* If shift by a non constant, then this will be expensive. */
2808 if (!CONST_INT_P (XEXP (x, 1)))
2809 return SH_DYNAMIC_SHIFT_COST;
2810
2811 /* Otherwise, return the true cost in instructions. Cope with out of range
2812 shift counts more or less arbitrarily. */
2813 value = INTVAL (XEXP (x, 1)) & 31;
2814
2815 if (GET_CODE (x) == ASHIFTRT)
2816 {
2817 int cost = ashiftrt_insns[value];
2818 /* If SH3, then we put the constant in a reg and use shad. */
2819 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2820 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2821 return cost;
2822 }
2823 else
2824 return shift_insns[value];
2825 }
2826
2827 /* Return the cost of an AND operation. */
2828
2829 static inline int
2830 andcosts (rtx x)
2831 {
2832 int i;
2833
2834 /* Anding with a register is a single cycle and instruction. */
2835 if (!CONST_INT_P (XEXP (x, 1)))
2836 return 1;
2837
2838 i = INTVAL (XEXP (x, 1));
2839
2840 if (TARGET_SHMEDIA)
2841 {
2842 if (satisfies_constraint_I10 (XEXP (x, 1))
2843 || satisfies_constraint_J16 (XEXP (x, 1)))
2844 return 1;
2845 else
2846 return 1 + rtx_cost (XEXP (x, 1), AND, !optimize_size);
2847 }
2848
2849 /* These constants are single cycle extu.[bw] instructions. */
2850 if (i == 0xff || i == 0xffff)
2851 return 1;
2852 /* Constants that can be used in an and immediate instruction in a single
2853 cycle, but this requires r0, so make it a little more expensive. */
2854 if (CONST_OK_FOR_K08 (i))
2855 return 2;
2856 /* Constants that can be loaded with a mov immediate and an and.
2857 This case is probably unnecessary. */
2858 if (CONST_OK_FOR_I08 (i))
2859 return 2;
2860 /* Any other constants requires a 2 cycle pc-relative load plus an and.
2861 This case is probably unnecessary. */
2862 return 3;
2863 }
2864
2865 /* Return the cost of an addition or a subtraction. */
2866
2867 static inline int
2868 addsubcosts (rtx x)
2869 {
2870 /* Adding a register is a single cycle insn. */
2871 if (REG_P (XEXP (x, 1))
2872 || GET_CODE (XEXP (x, 1)) == SUBREG)
2873 return 1;
2874
2875 /* Likewise for small constants. */
2876 if (CONST_INT_P (XEXP (x, 1))
2877 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2878 return 1;
2879
2880 if (TARGET_SHMEDIA)
2881 switch (GET_CODE (XEXP (x, 1)))
2882 {
2883 case CONST:
2884 case LABEL_REF:
2885 case SYMBOL_REF:
2886 return TARGET_SHMEDIA64 ? 5 : 3;
2887
2888 case CONST_INT:
2889 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2890 return 2;
2891 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2892 return 3;
2893 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2894 return 4;
2895
2896 /* Fall through. */
2897 default:
2898 return 5;
2899 }
2900
2901 /* Any other constant requires a 2 cycle pc-relative load plus an
2902 addition. */
2903 return 3;
2904 }
2905
2906 /* Return the cost of a multiply. */
2907 static inline int
2908 multcosts (rtx x ATTRIBUTE_UNUSED)
2909 {
2910 if (sh_multcost >= 0)
2911 return sh_multcost;
2912 if (TARGET_SHMEDIA)
2913 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2914 accept constants. Ideally, we would use a cost of one or two and
2915 add the cost of the operand, but disregard the latter when inside loops
2916 and loop invariant code motion is still to follow.
2917 Using a multiply first and splitting it later if it's a loss
2918 doesn't work because of different sign / zero extension semantics
2919 of multiplies vs. shifts. */
2920 return TARGET_SMALLCODE ? 2 : 3;
2921
2922 if (TARGET_SH2)
2923 {
2924 /* We have a mul insn, so we can never take more than the mul and the
2925 read of the mac reg, but count more because of the latency and extra
2926 reg usage. */
2927 if (TARGET_SMALLCODE)
2928 return 2;
2929 return 3;
2930 }
2931
2932 /* If we're aiming at small code, then just count the number of
2933 insns in a multiply call sequence. */
2934 if (TARGET_SMALLCODE)
2935 return 5;
2936
2937 /* Otherwise count all the insns in the routine we'd be calling too. */
2938 return 20;
2939 }
2940
2941 /* Compute a (partial) cost for rtx X. Return true if the complete
2942 cost has been computed, and false if subexpressions should be
2943 scanned. In either case, *TOTAL contains the cost result. */
2944
2945 static bool
2946 sh_rtx_costs (rtx x, int code, int outer_code, int *total,
2947 bool speed ATTRIBUTE_UNUSED)
2948 {
2949 switch (code)
2950 {
2951 case CONST_INT:
2952 if (TARGET_SHMEDIA)
2953 {
2954 if (INTVAL (x) == 0)
2955 *total = 0;
2956 else if (outer_code == AND && and_operand ((x), DImode))
2957 *total = 0;
2958 else if ((outer_code == IOR || outer_code == XOR
2959 || outer_code == PLUS)
2960 && CONST_OK_FOR_I10 (INTVAL (x)))
2961 *total = 0;
2962 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2963 *total = COSTS_N_INSNS (outer_code != SET);
2964 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2965 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2966 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2967 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2968 else
2969 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2970 return true;
2971 }
2972 if (CONST_OK_FOR_I08 (INTVAL (x)))
2973 *total = 0;
2974 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2975 && CONST_OK_FOR_K08 (INTVAL (x)))
2976 *total = 1;
2977 /* prepare_cmp_insn will force costly constants int registers before
2978 the cbranch[sd]i4 patterns can see them, so preserve potentially
2979 interesting ones not covered by I08 above. */
2980 else if (outer_code == COMPARE
2981 && ((unsigned HOST_WIDE_INT) INTVAL (x)
2982 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
2983 || INTVAL (x) == 0x7fffffff
2984 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
2985 *total = 1;
2986 else
2987 *total = 8;
2988 return true;
2989
2990 case CONST:
2991 case LABEL_REF:
2992 case SYMBOL_REF:
2993 if (TARGET_SHMEDIA64)
2994 *total = COSTS_N_INSNS (4);
2995 else if (TARGET_SHMEDIA32)
2996 *total = COSTS_N_INSNS (2);
2997 else
2998 *total = 5;
2999 return true;
3000
3001 case CONST_DOUBLE:
3002 if (TARGET_SHMEDIA)
3003 *total = COSTS_N_INSNS (4);
3004 /* prepare_cmp_insn will force costly constants int registers before
3005 the cbranchdi4 pattern can see them, so preserve potentially
3006 interesting ones. */
3007 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
3008 *total = 1;
3009 else
3010 *total = 10;
3011 return true;
3012 case CONST_VECTOR:
3013 if (x == CONST0_RTX (GET_MODE (x)))
3014 *total = 0;
3015 else if (sh_1el_vec (x, VOIDmode))
3016 *total = outer_code != SET;
3017 if (sh_rep_vec (x, VOIDmode))
3018 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3019 + (outer_code != SET));
3020 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3021 return true;
3022
3023 case PLUS:
3024 case MINUS:
3025 *total = COSTS_N_INSNS (addsubcosts (x));
3026 return true;
3027
3028 case AND:
3029 *total = COSTS_N_INSNS (andcosts (x));
3030 return true;
3031
3032 case MULT:
3033 *total = COSTS_N_INSNS (multcosts (x));
3034 return true;
3035
3036 case ASHIFT:
3037 case ASHIFTRT:
3038 case LSHIFTRT:
3039 *total = COSTS_N_INSNS (shiftcosts (x));
3040 return true;
3041
3042 case DIV:
3043 case UDIV:
3044 case MOD:
3045 case UMOD:
3046 *total = COSTS_N_INSNS (20);
3047 return true;
3048
3049 case PARALLEL:
3050 if (sh_1el_vec (x, VOIDmode))
3051 *total = outer_code != SET;
3052 if (sh_rep_vec (x, VOIDmode))
3053 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3054 + (outer_code != SET));
3055 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3056 return true;
3057
3058 case FLOAT:
3059 case FIX:
3060 *total = 100;
3061 return true;
3062
3063 default:
3064 return false;
3065 }
3066 }
3067
3068 /* Compute the cost of an address. For the SH, all valid addresses are
3069 the same cost. Use a slightly higher cost for reg + reg addressing,
3070 since it increases pressure on r0. */
3071
3072 static int
3073 sh_address_cost (rtx X,
3074 bool speed ATTRIBUTE_UNUSED)
3075 {
3076 return (GET_CODE (X) == PLUS
3077 && ! CONSTANT_P (XEXP (X, 1))
3078 && ! TARGET_SHMEDIA ? 1 : 0);
3079 }
3080
3081 /* Code to expand a shift. */
3082
3083 void
3084 gen_ashift (int type, int n, rtx reg)
3085 {
3086 /* Negative values here come from the shift_amounts array. */
3087 if (n < 0)
3088 {
3089 if (type == ASHIFT)
3090 type = LSHIFTRT;
3091 else
3092 type = ASHIFT;
3093 n = -n;
3094 }
3095
3096 switch (type)
3097 {
3098 case ASHIFTRT:
3099 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
3100 break;
3101 case LSHIFTRT:
3102 if (n == 1)
3103 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
3104 else
3105 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
3106 break;
3107 case ASHIFT:
3108 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
3109 break;
3110 }
3111 }
3112
3113 /* Same for HImode */
3114
3115 void
3116 gen_ashift_hi (int type, int n, rtx reg)
3117 {
3118 /* Negative values here come from the shift_amounts array. */
3119 if (n < 0)
3120 {
3121 if (type == ASHIFT)
3122 type = LSHIFTRT;
3123 else
3124 type = ASHIFT;
3125 n = -n;
3126 }
3127
3128 switch (type)
3129 {
3130 case ASHIFTRT:
3131 case LSHIFTRT:
3132 /* We don't have HImode right shift operations because using the
3133 ordinary 32 bit shift instructions for that doesn't generate proper
3134 zero/sign extension.
3135 gen_ashift_hi is only called in contexts where we know that the
3136 sign extension works out correctly. */
3137 {
3138 int offset = 0;
3139 if (GET_CODE (reg) == SUBREG)
3140 {
3141 offset = SUBREG_BYTE (reg);
3142 reg = SUBREG_REG (reg);
3143 }
3144 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3145 break;
3146 }
3147 case ASHIFT:
3148 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3149 break;
3150 }
3151 }
3152
3153 /* Output RTL to split a constant shift into its component SH constant
3154 shift instructions. */
3155
3156 void
3157 gen_shifty_op (int code, rtx *operands)
3158 {
3159 int value = INTVAL (operands[2]);
3160 int max, i;
3161
3162 /* Truncate the shift count in case it is out of bounds. */
3163 value = value & 31;
3164
3165 if (value == 31)
3166 {
3167 if (code == LSHIFTRT)
3168 {
3169 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3170 emit_insn (gen_movt (operands[0]));
3171 return;
3172 }
3173 else if (code == ASHIFT)
3174 {
3175 /* There is a two instruction sequence for 31 bit left shifts,
3176 but it requires r0. */
3177 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3178 {
3179 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3180 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3181 return;
3182 }
3183 }
3184 }
3185 else if (value == 0)
3186 {
3187 /* This can happen even when optimizing, if there were subregs before
3188 reload. Don't output a nop here, as this is never optimized away;
3189 use a no-op move instead. */
3190 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
3191 return;
3192 }
3193
3194 max = shift_insns[value];
3195 for (i = 0; i < max; i++)
3196 gen_ashift (code, shift_amounts[value][i], operands[0]);
3197 }
3198
3199 /* Same as above, but optimized for values where the topmost bits don't
3200 matter. */
3201
3202 void
3203 gen_shifty_hi_op (int code, rtx *operands)
3204 {
3205 int value = INTVAL (operands[2]);
3206 int max, i;
3207 void (*gen_fun) (int, int, rtx);
3208
3209 /* This operation is used by and_shl for SImode values with a few
3210 high bits known to be cleared. */
3211 value &= 31;
3212 if (value == 0)
3213 {
3214 emit_insn (gen_nop ());
3215 return;
3216 }
3217
3218 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3219 if (code == ASHIFT)
3220 {
3221 max = ext_shift_insns[value];
3222 for (i = 0; i < max; i++)
3223 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3224 }
3225 else
3226 /* When shifting right, emit the shifts in reverse order, so that
3227 solitary negative values come first. */
3228 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
3229 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3230 }
3231
3232 /* Output RTL for an arithmetic right shift. */
3233
3234 /* ??? Rewrite to use super-optimizer sequences. */
3235
3236 int
3237 expand_ashiftrt (rtx *operands)
3238 {
3239 rtx wrk;
3240 char func[18];
3241 int value;
3242
3243 if (TARGET_SH3)
3244 {
3245 if (!CONST_INT_P (operands[2]))
3246 {
3247 rtx count = copy_to_mode_reg (SImode, operands[2]);
3248 emit_insn (gen_negsi2 (count, count));
3249 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3250 return 1;
3251 }
3252 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3253 > 1 + SH_DYNAMIC_SHIFT_COST)
3254 {
3255 rtx count
3256 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3257 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3258 return 1;
3259 }
3260 }
3261 if (!CONST_INT_P (operands[2]))
3262 return 0;
3263
3264 value = INTVAL (operands[2]) & 31;
3265
3266 if (value == 31)
3267 {
3268 /* If we are called from abs expansion, arrange things so that we
3269 we can use a single MT instruction that doesn't clobber the source,
3270 if LICM can hoist out the load of the constant zero. */
3271 if (currently_expanding_to_rtl)
3272 {
3273 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3274 operands[1]));
3275 emit_insn (gen_mov_neg_si_t (operands[0]));
3276 return 1;
3277 }
3278 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3279 return 1;
3280 }
3281 else if (value >= 16 && value <= 19)
3282 {
3283 wrk = gen_reg_rtx (SImode);
3284 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3285 value -= 16;
3286 while (value--)
3287 gen_ashift (ASHIFTRT, 1, wrk);
3288 emit_move_insn (operands[0], wrk);
3289 return 1;
3290 }
3291 /* Expand a short sequence inline, longer call a magic routine. */
3292 else if (value <= 5)
3293 {
3294 wrk = gen_reg_rtx (SImode);
3295 emit_move_insn (wrk, operands[1]);
3296 while (value--)
3297 gen_ashift (ASHIFTRT, 1, wrk);
3298 emit_move_insn (operands[0], wrk);
3299 return 1;
3300 }
3301
3302 wrk = gen_reg_rtx (Pmode);
3303
3304 /* Load the value into an arg reg and call a helper. */
3305 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3306 sprintf (func, "__ashiftrt_r4_%d", value);
3307 function_symbol (wrk, func, SFUNC_STATIC);
3308 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
3309 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3310 return 1;
3311 }
3312
3313 int
3314 sh_dynamicalize_shift_p (rtx count)
3315 {
3316 return shift_insns[INTVAL (count) & 31] > 1 + SH_DYNAMIC_SHIFT_COST;
3317 }
3318
3319 /* Try to find a good way to implement the combiner pattern
3320 [(set (match_operand:SI 0 "register_operand" "r")
3321 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3322 (match_operand:SI 2 "const_int_operand" "n"))
3323 (match_operand:SI 3 "const_int_operand" "n"))) .
3324 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3325 return 0 for simple right / left or left/right shift combination.
3326 return 1 for a combination of shifts with zero_extend.
3327 return 2 for a combination of shifts with an AND that needs r0.
3328 return 3 for a combination of shifts with an AND that needs an extra
3329 scratch register, when the three highmost bits of the AND mask are clear.
3330 return 4 for a combination of shifts with an AND that needs an extra
3331 scratch register, when any of the three highmost bits of the AND mask
3332 is set.
3333 If ATTRP is set, store an initial right shift width in ATTRP[0],
3334 and the instruction length in ATTRP[1] . These values are not valid
3335 when returning 0.
3336 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3337 shift_amounts for the last shift value that is to be used before the
3338 sign extend. */
3339 int
3340 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3341 {
3342 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3343 int left = INTVAL (left_rtx), right;
3344 int best = 0;
3345 int cost, best_cost = 10000;
3346 int best_right = 0, best_len = 0;
3347 int i;
3348 int can_ext;
3349
3350 if (left < 0 || left > 31)
3351 return 0;
3352 if (CONST_INT_P (mask_rtx))
3353 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3354 else
3355 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3356 /* Can this be expressed as a right shift / left shift pair? */
3357 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3358 right = exact_log2 (lsb);
3359 mask2 = ~(mask + lsb - 1);
3360 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3361 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3362 if (! mask2)
3363 best_cost = shift_insns[right] + shift_insns[right + left];
3364 /* mask has no trailing zeroes <==> ! right */
3365 else if (! right && mask2 == ~(lsb2 - 1))
3366 {
3367 int late_right = exact_log2 (lsb2);
3368 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
3369 }
3370 /* Try to use zero extend. */
3371 if (mask2 == ~(lsb2 - 1))
3372 {
3373 int width, first;
3374
3375 for (width = 8; width <= 16; width += 8)
3376 {
3377 /* Can we zero-extend right away? */
3378 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3379 {
3380 cost
3381 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
3382 if (cost < best_cost)
3383 {
3384 best = 1;
3385 best_cost = cost;
3386 best_right = right;
3387 best_len = cost;
3388 if (attrp)
3389 attrp[2] = -1;
3390 }
3391 continue;
3392 }
3393 /* ??? Could try to put zero extend into initial right shift,
3394 or even shift a bit left before the right shift. */
3395 /* Determine value of first part of left shift, to get to the
3396 zero extend cut-off point. */
3397 first = width - exact_log2 (lsb2) + right;
3398 if (first >= 0 && right + left - first >= 0)
3399 {
3400 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
3401 + ext_shift_insns[right + left - first];
3402 if (cost < best_cost)
3403 {
3404 best = 1;
3405 best_cost = cost;
3406 best_right = right;
3407 best_len = cost;
3408 if (attrp)
3409 attrp[2] = first;
3410 }
3411 }
3412 }
3413 }
3414 /* Try to use r0 AND pattern */
3415 for (i = 0; i <= 2; i++)
3416 {
3417 if (i > right)
3418 break;
3419 if (! CONST_OK_FOR_K08 (mask >> i))
3420 continue;
3421 cost = (i != 0) + 2 + ext_shift_insns[left + i];
3422 if (cost < best_cost)
3423 {
3424 best = 2;
3425 best_cost = cost;
3426 best_right = i;
3427 best_len = cost - 1;
3428 }
3429 }
3430 /* Try to use a scratch register to hold the AND operand. */
3431 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
3432 for (i = 0; i <= 2; i++)
3433 {
3434 if (i > right)
3435 break;
3436 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
3437 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
3438 if (cost < best_cost)
3439 {
3440 best = 4 - can_ext;
3441 best_cost = cost;
3442 best_right = i;
3443 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
3444 }
3445 }
3446
3447 if (attrp)
3448 {
3449 attrp[0] = best_right;
3450 attrp[1] = best_len;
3451 }
3452 return best;
3453 }
3454
3455 /* This is used in length attributes of the unnamed instructions
3456 corresponding to shl_and_kind return values of 1 and 2. */
3457 int
3458 shl_and_length (rtx insn)
3459 {
3460 rtx set_src, left_rtx, mask_rtx;
3461 int attributes[3];
3462
3463 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3464 left_rtx = XEXP (XEXP (set_src, 0), 1);
3465 mask_rtx = XEXP (set_src, 1);
3466 shl_and_kind (left_rtx, mask_rtx, attributes);
3467 return attributes[1];
3468 }
3469
3470 /* This is used in length attribute of the and_shl_scratch instruction. */
3471
3472 int
3473 shl_and_scr_length (rtx insn)
3474 {
3475 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3476 int len = shift_insns[INTVAL (XEXP (set_src, 1)) & 31];
3477 rtx op = XEXP (set_src, 0);
3478 len += shift_insns[INTVAL (XEXP (op, 1)) & 31] + 1;
3479 op = XEXP (XEXP (op, 0), 0);
3480 return len + shift_insns[INTVAL (XEXP (op, 1)) & 31];
3481 }
3482
3483 /* Generate rtl for instructions for which shl_and_kind advised a particular
3484 method of generating them, i.e. returned zero. */
3485
3486 int
3487 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
3488 {
3489 int attributes[3];
3490 unsigned HOST_WIDE_INT mask;
3491 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
3492 int right, total_shift;
3493 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
3494
3495 right = attributes[0];
3496 total_shift = INTVAL (left_rtx) + right;
3497 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
3498 switch (kind)
3499 {
3500 default:
3501 return -1;
3502 case 1:
3503 {
3504 int first = attributes[2];
3505 rtx operands[3];
3506
3507 if (first < 0)
3508 {
3509 emit_insn ((mask << right) <= 0xff
3510 ? gen_zero_extendqisi2 (dest,
3511 gen_lowpart (QImode, source))
3512 : gen_zero_extendhisi2 (dest,
3513 gen_lowpart (HImode, source)));
3514 source = dest;
3515 }
3516 if (source != dest)
3517 emit_insn (gen_movsi (dest, source));
3518 operands[0] = dest;
3519 if (right)
3520 {
3521 operands[2] = GEN_INT (right);
3522 gen_shifty_hi_op (LSHIFTRT, operands);
3523 }
3524 if (first > 0)
3525 {
3526 operands[2] = GEN_INT (first);
3527 gen_shifty_hi_op (ASHIFT, operands);
3528 total_shift -= first;
3529 mask <<= first;
3530 }
3531 if (first >= 0)
3532 emit_insn (mask <= 0xff
3533 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
3534 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3535 if (total_shift > 0)
3536 {
3537 operands[2] = GEN_INT (total_shift);
3538 gen_shifty_hi_op (ASHIFT, operands);
3539 }
3540 break;
3541 }
3542 case 4:
3543 shift_gen_fun = gen_shifty_op;
3544 case 3:
3545 /* If the topmost bit that matters is set, set the topmost bits
3546 that don't matter. This way, we might be able to get a shorter
3547 signed constant. */
3548 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
3549 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
3550 case 2:
3551 /* Don't expand fine-grained when combining, because that will
3552 make the pattern fail. */
3553 if (currently_expanding_to_rtl
3554 || reload_in_progress || reload_completed)
3555 {
3556 rtx operands[3];
3557
3558 /* Cases 3 and 4 should be handled by this split
3559 only while combining */
3560 gcc_assert (kind <= 2);
3561 if (right)
3562 {
3563 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
3564 source = dest;
3565 }
3566 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3567 if (total_shift)
3568 {
3569 operands[0] = dest;
3570 operands[1] = dest;
3571 operands[2] = GEN_INT (total_shift);
3572 shift_gen_fun (ASHIFT, operands);
3573 }
3574 break;
3575 }
3576 else
3577 {
3578 int neg = 0;
3579 if (kind != 4 && total_shift < 16)
3580 {
3581 neg = -ext_shift_amounts[total_shift][1];
3582 if (neg > 0)
3583 neg -= ext_shift_amounts[total_shift][2];
3584 else
3585 neg = 0;
3586 }
3587 emit_insn (gen_and_shl_scratch (dest, source,
3588 GEN_INT (right),
3589 GEN_INT (mask),
3590 GEN_INT (total_shift + neg),
3591 GEN_INT (neg)));
3592 emit_insn (gen_movsi (dest, dest));
3593 break;
3594 }
3595 }
3596 return 0;
3597 }
3598
3599 /* Try to find a good way to implement the combiner pattern
3600 [(set (match_operand:SI 0 "register_operand" "=r")
3601 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3602 (match_operand:SI 2 "const_int_operand" "n")
3603 (match_operand:SI 3 "const_int_operand" "n")
3604 (const_int 0)))
3605 (clobber (reg:SI T_REG))]
3606 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3607 return 0 for simple left / right shift combination.
3608 return 1 for left shift / 8 bit sign extend / left shift.
3609 return 2 for left shift / 16 bit sign extend / left shift.
3610 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3611 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3612 return 5 for left shift / 16 bit sign extend / right shift
3613 return 6 for < 8 bit sign extend / left shift.
3614 return 7 for < 8 bit sign extend / left shift / single right shift.
3615 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3616
3617 int
3618 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3619 {
3620 int left, size, insize, ext;
3621 int cost = 0, best_cost;
3622 int kind;
3623
3624 left = INTVAL (left_rtx);
3625 size = INTVAL (size_rtx);
3626 insize = size - left;
3627 gcc_assert (insize > 0);
3628 /* Default to left / right shift. */
3629 kind = 0;
3630 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3631 if (size <= 16)
3632 {
3633 /* 16 bit shift / sign extend / 16 bit shift */
3634 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3635 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3636 below, by alternative 3 or something even better. */
3637 if (cost < best_cost)
3638 {
3639 kind = 5;
3640 best_cost = cost;
3641 }
3642 }
3643 /* Try a plain sign extend between two shifts. */
3644 for (ext = 16; ext >= insize; ext -= 8)
3645 {
3646 if (ext <= size)
3647 {
3648 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3649 if (cost < best_cost)
3650 {
3651 kind = ext / (unsigned) 8;
3652 best_cost = cost;
3653 }
3654 }
3655 /* Check if we can do a sloppy shift with a final signed shift
3656 restoring the sign. */
3657 if (EXT_SHIFT_SIGNED (size - ext))
3658 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3659 /* If not, maybe it's still cheaper to do the second shift sloppy,
3660 and do a final sign extend? */
3661 else if (size <= 16)
3662 cost = ext_shift_insns[ext - insize] + 1
3663 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3664 else
3665 continue;
3666 if (cost < best_cost)
3667 {
3668 kind = ext / (unsigned) 8 + 2;
3669 best_cost = cost;
3670 }
3671 }
3672 /* Check if we can sign extend in r0 */
3673 if (insize < 8)
3674 {
3675 cost = 3 + shift_insns[left];
3676 if (cost < best_cost)
3677 {
3678 kind = 6;
3679 best_cost = cost;
3680 }
3681 /* Try the same with a final signed shift. */
3682 if (left < 31)
3683 {
3684 cost = 3 + ext_shift_insns[left + 1] + 1;
3685 if (cost < best_cost)
3686 {
3687 kind = 7;
3688 best_cost = cost;
3689 }
3690 }
3691 }
3692 if (TARGET_SH3)
3693 {
3694 /* Try to use a dynamic shift. */
3695 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3696 if (cost < best_cost)
3697 {
3698 kind = 0;
3699 best_cost = cost;
3700 }
3701 }
3702 if (costp)
3703 *costp = cost;
3704 return kind;
3705 }
3706
3707 /* Function to be used in the length attribute of the instructions
3708 implementing this pattern. */
3709
3710 int
3711 shl_sext_length (rtx insn)
3712 {
3713 rtx set_src, left_rtx, size_rtx;
3714 int cost;
3715
3716 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3717 left_rtx = XEXP (XEXP (set_src, 0), 1);
3718 size_rtx = XEXP (set_src, 1);
3719 shl_sext_kind (left_rtx, size_rtx, &cost);
3720 return cost;
3721 }
3722
3723 /* Generate rtl for this pattern */
3724
3725 int
3726 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3727 {
3728 int kind;
3729 int left, size, insize, cost;
3730 rtx operands[3];
3731
3732 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3733 left = INTVAL (left_rtx);
3734 size = INTVAL (size_rtx);
3735 insize = size - left;
3736 switch (kind)
3737 {
3738 case 1:
3739 case 2:
3740 case 3:
3741 case 4:
3742 {
3743 int ext = kind & 1 ? 8 : 16;
3744 int shift2 = size - ext;
3745
3746 /* Don't expand fine-grained when combining, because that will
3747 make the pattern fail. */
3748 if (! currently_expanding_to_rtl
3749 && ! reload_in_progress && ! reload_completed)
3750 {
3751 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3752 emit_insn (gen_movsi (dest, source));
3753 break;
3754 }
3755 if (dest != source)
3756 emit_insn (gen_movsi (dest, source));
3757 operands[0] = dest;
3758 if (ext - insize)
3759 {
3760 operands[2] = GEN_INT (ext - insize);
3761 gen_shifty_hi_op (ASHIFT, operands);
3762 }
3763 emit_insn (kind & 1
3764 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3765 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3766 if (kind <= 2)
3767 {
3768 if (shift2)
3769 {
3770 operands[2] = GEN_INT (shift2);
3771 gen_shifty_op (ASHIFT, operands);
3772 }
3773 }
3774 else
3775 {
3776 if (shift2 > 0)
3777 {
3778 if (EXT_SHIFT_SIGNED (shift2))
3779 {
3780 operands[2] = GEN_INT (shift2 + 1);
3781 gen_shifty_op (ASHIFT, operands);
3782 operands[2] = const1_rtx;
3783 gen_shifty_op (ASHIFTRT, operands);
3784 break;
3785 }
3786 operands[2] = GEN_INT (shift2);
3787 gen_shifty_hi_op (ASHIFT, operands);
3788 }
3789 else if (shift2)
3790 {
3791 operands[2] = GEN_INT (-shift2);
3792 gen_shifty_hi_op (LSHIFTRT, operands);
3793 }
3794 emit_insn (size <= 8
3795 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3796 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3797 }
3798 break;
3799 }
3800 case 5:
3801 {
3802 int i = 16 - size;
3803 if (! currently_expanding_to_rtl
3804 && ! reload_in_progress && ! reload_completed)
3805 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3806 else
3807 {
3808 operands[0] = dest;
3809 operands[2] = GEN_INT (16 - insize);
3810 gen_shifty_hi_op (ASHIFT, operands);
3811 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3812 }
3813 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3814 while (--i >= 0)
3815 gen_ashift (ASHIFTRT, 1, dest);
3816 break;
3817 }
3818 case 6:
3819 case 7:
3820 /* Don't expand fine-grained when combining, because that will
3821 make the pattern fail. */
3822 if (! currently_expanding_to_rtl
3823 && ! reload_in_progress && ! reload_completed)
3824 {
3825 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3826 emit_insn (gen_movsi (dest, source));
3827 break;
3828 }
3829 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
3830 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
3831 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
3832 operands[0] = dest;
3833 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
3834 gen_shifty_op (ASHIFT, operands);
3835 if (kind == 7)
3836 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
3837 break;
3838 default:
3839 return -1;
3840 }
3841 return 0;
3842 }
3843
3844 /* Prefix a symbol_ref name with "datalabel". */
3845
3846 rtx
3847 gen_datalabel_ref (rtx sym)
3848 {
3849 const char *str;
3850
3851 if (GET_CODE (sym) == LABEL_REF)
3852 return gen_rtx_CONST (GET_MODE (sym),
3853 gen_rtx_UNSPEC (GET_MODE (sym),
3854 gen_rtvec (1, sym),
3855 UNSPEC_DATALABEL));
3856
3857 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
3858
3859 str = XSTR (sym, 0);
3860 /* Share all SYMBOL_REF strings with the same value - that is important
3861 for cse. */
3862 str = IDENTIFIER_POINTER (get_identifier (str));
3863 XSTR (sym, 0) = str;
3864
3865 return sym;
3866 }
3867
3868 \f
3869 static alloc_pool label_ref_list_pool;
3870
3871 typedef struct label_ref_list_d
3872 {
3873 rtx label;
3874 struct label_ref_list_d *next;
3875 } *label_ref_list_t;
3876
3877 /* The SH cannot load a large constant into a register, constants have to
3878 come from a pc relative load. The reference of a pc relative load
3879 instruction must be less than 1k in front of the instruction. This
3880 means that we often have to dump a constant inside a function, and
3881 generate code to branch around it.
3882
3883 It is important to minimize this, since the branches will slow things
3884 down and make things bigger.
3885
3886 Worst case code looks like:
3887
3888 mov.l L1,rn
3889 bra L2
3890 nop
3891 align
3892 L1: .long value
3893 L2:
3894 ..
3895
3896 mov.l L3,rn
3897 bra L4
3898 nop
3899 align
3900 L3: .long value
3901 L4:
3902 ..
3903
3904 We fix this by performing a scan before scheduling, which notices which
3905 instructions need to have their operands fetched from the constant table
3906 and builds the table.
3907
3908 The algorithm is:
3909
3910 scan, find an instruction which needs a pcrel move. Look forward, find the
3911 last barrier which is within MAX_COUNT bytes of the requirement.
3912 If there isn't one, make one. Process all the instructions between
3913 the find and the barrier.
3914
3915 In the above example, we can tell that L3 is within 1k of L1, so
3916 the first move can be shrunk from the 3 insn+constant sequence into
3917 just 1 insn, and the constant moved to L3 to make:
3918
3919 mov.l L1,rn
3920 ..
3921 mov.l L3,rn
3922 bra L4
3923 nop
3924 align
3925 L3:.long value
3926 L4:.long value
3927
3928 Then the second move becomes the target for the shortening process. */
3929
3930 typedef struct
3931 {
3932 rtx value; /* Value in table. */
3933 rtx label; /* Label of value. */
3934 label_ref_list_t wend; /* End of window. */
3935 enum machine_mode mode; /* Mode of value. */
3936
3937 /* True if this constant is accessed as part of a post-increment
3938 sequence. Note that HImode constants are never accessed in this way. */
3939 bool part_of_sequence_p;
3940 } pool_node;
3941
3942 /* The maximum number of constants that can fit into one pool, since
3943 constants in the range 0..510 are at least 2 bytes long, and in the
3944 range from there to 1018 at least 4 bytes. */
3945
3946 #define MAX_POOL_SIZE 372
3947 static pool_node pool_vector[MAX_POOL_SIZE];
3948 static int pool_size;
3949 static rtx pool_window_label;
3950 static int pool_window_last;
3951
3952 static int max_labelno_before_reorg;
3953
3954 /* ??? If we need a constant in HImode which is the truncated value of a
3955 constant we need in SImode, we could combine the two entries thus saving
3956 two bytes. Is this common enough to be worth the effort of implementing
3957 it? */
3958
3959 /* ??? This stuff should be done at the same time that we shorten branches.
3960 As it is now, we must assume that all branches are the maximum size, and
3961 this causes us to almost always output constant pools sooner than
3962 necessary. */
3963
3964 /* Add a constant to the pool and return its label. */
3965
3966 static rtx
3967 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3968 {
3969 int i;
3970 rtx lab, new_rtx;
3971 label_ref_list_t ref, newref;
3972
3973 /* First see if we've already got it. */
3974 for (i = 0; i < pool_size; i++)
3975 {
3976 if (x->code == pool_vector[i].value->code
3977 && mode == pool_vector[i].mode)
3978 {
3979 if (x->code == CODE_LABEL)
3980 {
3981 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3982 continue;
3983 }
3984 if (rtx_equal_p (x, pool_vector[i].value))
3985 {
3986 lab = new_rtx = 0;
3987 if (! last_value
3988 || ! i
3989 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3990 {
3991 new_rtx = gen_label_rtx ();
3992 LABEL_REFS (new_rtx) = pool_vector[i].label;
3993 pool_vector[i].label = lab = new_rtx;
3994 }
3995 if (lab && pool_window_label)
3996 {
3997 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3998 newref->label = pool_window_label;
3999 ref = pool_vector[pool_window_last].wend;
4000 newref->next = ref;
4001 pool_vector[pool_window_last].wend = newref;
4002 }
4003 if (new_rtx)
4004 pool_window_label = new_rtx;
4005 pool_window_last = i;
4006 return lab;
4007 }
4008 }
4009 }
4010
4011 /* Need a new one. */
4012 pool_vector[pool_size].value = x;
4013 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4014 {
4015 lab = 0;
4016 pool_vector[pool_size - 1].part_of_sequence_p = true;
4017 }
4018 else
4019 lab = gen_label_rtx ();
4020 pool_vector[pool_size].mode = mode;
4021 pool_vector[pool_size].label = lab;
4022 pool_vector[pool_size].wend = NULL;
4023 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4024 if (lab && pool_window_label)
4025 {
4026 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4027 newref->label = pool_window_label;
4028 ref = pool_vector[pool_window_last].wend;
4029 newref->next = ref;
4030 pool_vector[pool_window_last].wend = newref;
4031 }
4032 if (lab)
4033 pool_window_label = lab;
4034 pool_window_last = pool_size;
4035 pool_size++;
4036 return lab;
4037 }
4038
4039 /* Output the literal table. START, if nonzero, is the first instruction
4040 this table is needed for, and also indicates that there is at least one
4041 casesi_worker_2 instruction; We have to emit the operand3 labels from
4042 these insns at a 4-byte aligned position. BARRIER is the barrier
4043 after which we are to place the table. */
4044
4045 static void
4046 dump_table (rtx start, rtx barrier)
4047 {
4048 rtx scan = barrier;
4049 int i;
4050 int need_align = 1;
4051 rtx lab;
4052 label_ref_list_t ref;
4053 int have_df = 0;
4054
4055 /* Do two passes, first time dump out the HI sized constants. */
4056
4057 for (i = 0; i < pool_size; i++)
4058 {
4059 pool_node *p = &pool_vector[i];
4060
4061 if (p->mode == HImode)
4062 {
4063 if (need_align)
4064 {
4065 scan = emit_insn_after (gen_align_2 (), scan);
4066 need_align = 0;
4067 }
4068 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4069 scan = emit_label_after (lab, scan);
4070 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4071 scan);
4072 for (ref = p->wend; ref; ref = ref->next)
4073 {
4074 lab = ref->label;
4075 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4076 }
4077 }
4078 else if (p->mode == DFmode)
4079 have_df = 1;
4080 }
4081
4082 need_align = 1;
4083
4084 if (start)
4085 {
4086 scan = emit_insn_after (gen_align_4 (), scan);
4087 need_align = 0;
4088 for (; start != barrier; start = NEXT_INSN (start))
4089 if (NONJUMP_INSN_P (start)
4090 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4091 {
4092 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4093 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4094
4095 scan = emit_label_after (lab, scan);
4096 }
4097 }
4098 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4099 {
4100 rtx align_insn = NULL_RTX;
4101
4102 scan = emit_label_after (gen_label_rtx (), scan);
4103 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4104 need_align = 0;
4105
4106 for (i = 0; i < pool_size; i++)
4107 {
4108 pool_node *p = &pool_vector[i];
4109
4110 switch (p->mode)
4111 {
4112 case HImode:
4113 break;
4114 case SImode:
4115 case SFmode:
4116 if (align_insn && !p->part_of_sequence_p)
4117 {
4118 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4119 emit_label_before (lab, align_insn);
4120 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4121 align_insn);
4122 for (ref = p->wend; ref; ref = ref->next)
4123 {
4124 lab = ref->label;
4125 emit_insn_before (gen_consttable_window_end (lab),
4126 align_insn);
4127 }
4128 delete_insn (align_insn);
4129 align_insn = NULL_RTX;
4130 continue;
4131 }
4132 else
4133 {
4134 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4135 scan = emit_label_after (lab, scan);
4136 scan = emit_insn_after (gen_consttable_4 (p->value,
4137 const0_rtx), scan);
4138 need_align = ! need_align;
4139 }
4140 break;
4141 case DFmode:
4142 if (need_align)
4143 {
4144 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4145 align_insn = scan;
4146 need_align = 0;
4147 }
4148 case DImode:
4149 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4150 scan = emit_label_after (lab, scan);
4151 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4152 scan);
4153 break;
4154 default:
4155 gcc_unreachable ();
4156 }
4157
4158 if (p->mode != HImode)
4159 {
4160 for (ref = p->wend; ref; ref = ref->next)
4161 {
4162 lab = ref->label;
4163 scan = emit_insn_after (gen_consttable_window_end (lab),
4164 scan);
4165 }
4166 }
4167 }
4168
4169 pool_size = 0;
4170 }
4171
4172 for (i = 0; i < pool_size; i++)
4173 {
4174 pool_node *p = &pool_vector[i];
4175
4176 switch (p->mode)
4177 {
4178 case HImode:
4179 break;
4180 case SImode:
4181 case SFmode:
4182 if (need_align)
4183 {
4184 need_align = 0;
4185 scan = emit_label_after (gen_label_rtx (), scan);
4186 scan = emit_insn_after (gen_align_4 (), scan);
4187 }
4188 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4189 scan = emit_label_after (lab, scan);
4190 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4191 scan);
4192 break;
4193 case DFmode:
4194 case DImode:
4195 if (need_align)
4196 {
4197 need_align = 0;
4198 scan = emit_label_after (gen_label_rtx (), scan);
4199 scan = emit_insn_after (gen_align_4 (), scan);
4200 }
4201 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4202 scan = emit_label_after (lab, scan);
4203 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4204 scan);
4205 break;
4206 default:
4207 gcc_unreachable ();
4208 }
4209
4210 if (p->mode != HImode)
4211 {
4212 for (ref = p->wend; ref; ref = ref->next)
4213 {
4214 lab = ref->label;
4215 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4216 }
4217 }
4218 }
4219
4220 scan = emit_insn_after (gen_consttable_end (), scan);
4221 scan = emit_barrier_after (scan);
4222 pool_size = 0;
4223 pool_window_label = NULL_RTX;
4224 pool_window_last = 0;
4225 }
4226
4227 /* Return nonzero if constant would be an ok source for a
4228 mov.w instead of a mov.l. */
4229
4230 static int
4231 hi_const (rtx src)
4232 {
4233 return (CONST_INT_P (src)
4234 && INTVAL (src) >= -32768
4235 && INTVAL (src) <= 32767);
4236 }
4237
4238 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4239
4240 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4241
4242 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4243 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4244 need to fix it if the input value is CONST_OK_FOR_I08. */
4245
4246 static int
4247 broken_move (rtx insn)
4248 {
4249 if (NONJUMP_INSN_P (insn))
4250 {
4251 rtx pat = PATTERN (insn);
4252 if (GET_CODE (pat) == PARALLEL)
4253 pat = XVECEXP (pat, 0, 0);
4254 if (GET_CODE (pat) == SET
4255 /* We can load any 8-bit value if we don't care what the high
4256 order bits end up as. */
4257 && GET_MODE (SET_DEST (pat)) != QImode
4258 && (CONSTANT_P (SET_SRC (pat))
4259 /* Match mova_const. */
4260 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4261 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4262 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4263 && ! (TARGET_SH2E
4264 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4265 && (fp_zero_operand (SET_SRC (pat))
4266 || fp_one_operand (SET_SRC (pat)))
4267 /* In general we don't know the current setting of fpscr, so disable fldi.
4268 There is an exception if this was a register-register move
4269 before reload - and hence it was ascertained that we have
4270 single precision setting - and in a post-reload optimization
4271 we changed this to do a constant load. In that case
4272 we don't have an r0 clobber, hence we must use fldi. */
4273 && (TARGET_FMOVD
4274 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4275 == SCRATCH))
4276 && REG_P (SET_DEST (pat))
4277 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4278 && ! (TARGET_SH2A
4279 && GET_MODE (SET_DEST (pat)) == SImode
4280 && (satisfies_constraint_I20 (SET_SRC (pat))
4281 || satisfies_constraint_I28 (SET_SRC (pat))))
4282 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4283 return 1;
4284 }
4285
4286 return 0;
4287 }
4288
4289 static int
4290 mova_p (rtx insn)
4291 {
4292 return (NONJUMP_INSN_P (insn)
4293 && GET_CODE (PATTERN (insn)) == SET
4294 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4295 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4296 /* Don't match mova_const. */
4297 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4298 }
4299
4300 /* Fix up a mova from a switch that went out of range. */
4301 static void
4302 fixup_mova (rtx mova)
4303 {
4304 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4305 if (! flag_pic)
4306 {
4307 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4308 INSN_CODE (mova) = -1;
4309 }
4310 else
4311 {
4312 rtx worker = mova;
4313 rtx lab = gen_label_rtx ();
4314 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4315
4316 do
4317 {
4318 worker = NEXT_INSN (worker);
4319 gcc_assert (worker
4320 && !LABEL_P (worker)
4321 && !JUMP_P (worker));
4322 } while (NOTE_P (worker)
4323 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4324 wpat = PATTERN (worker);
4325 wpat0 = XVECEXP (wpat, 0, 0);
4326 wpat1 = XVECEXP (wpat, 0, 1);
4327 wsrc = SET_SRC (wpat0);
4328 PATTERN (worker) = (gen_casesi_worker_2
4329 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4330 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4331 XEXP (wpat1, 0)));
4332 INSN_CODE (worker) = -1;
4333 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4334 base = gen_rtx_LABEL_REF (Pmode, lab);
4335 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4336 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4337 INSN_CODE (mova) = -1;
4338 }
4339 }
4340
4341 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4342 *num_mova, and check if the new mova is not nested within the first one.
4343 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4344 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4345 static int
4346 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
4347 {
4348 int n_addr = 0; /* Initialization to shut up spurious warning. */
4349 int f_target, n_target = 0; /* Likewise. */
4350
4351 if (optimize)
4352 {
4353 /* If NEW_MOVA has no address yet, it will be handled later. */
4354 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4355 return -1;
4356
4357 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4358 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4359 if (n_addr > n_target || n_addr + 1022 < n_target)
4360 {
4361 /* Change the mova into a load.
4362 broken_move will then return true for it. */
4363 fixup_mova (new_mova);
4364 return 1;
4365 }
4366 }
4367 if (!(*num_mova)++)
4368 {
4369 *first_mova = new_mova;
4370 return 2;
4371 }
4372 if (!optimize
4373 || ((f_target
4374 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4375 >= n_target))
4376 return -1;
4377
4378 (*num_mova)--;
4379 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4380 > n_target - n_addr)
4381 {
4382 fixup_mova (*first_mova);
4383 return 0;
4384 }
4385 else
4386 {
4387 fixup_mova (new_mova);
4388 return 1;
4389 }
4390 }
4391
4392 /* Find the last barrier from insn FROM which is close enough to hold the
4393 constant pool. If we can't find one, then create one near the end of
4394 the range. */
4395
4396 static rtx
4397 find_barrier (int num_mova, rtx mova, rtx from)
4398 {
4399 int count_si = 0;
4400 int count_hi = 0;
4401 int found_hi = 0;
4402 int found_si = 0;
4403 int found_di = 0;
4404 int hi_align = 2;
4405 int si_align = 2;
4406 int leading_mova = num_mova;
4407 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
4408 int si_limit;
4409 int hi_limit;
4410 rtx orig = from;
4411 rtx last_got = NULL_RTX;
4412 rtx last_symoff = NULL_RTX;
4413
4414 /* For HImode: range is 510, add 4 because pc counts from address of
4415 second instruction after this one, subtract 2 for the jump instruction
4416 that we may need to emit before the table, subtract 2 for the instruction
4417 that fills the jump delay slot (in very rare cases, reorg will take an
4418 instruction from after the constant pool or will leave the delay slot
4419 empty). This gives 510.
4420 For SImode: range is 1020, add 4 because pc counts from address of
4421 second instruction after this one, subtract 2 in case pc is 2 byte
4422 aligned, subtract 2 for the jump instruction that we may need to emit
4423 before the table, subtract 2 for the instruction that fills the jump
4424 delay slot. This gives 1018. */
4425
4426 /* The branch will always be shortened now that the reference address for
4427 forward branches is the successor address, thus we need no longer make
4428 adjustments to the [sh]i_limit for -O0. */
4429
4430 si_limit = 1018;
4431 hi_limit = 510;
4432
4433 while (from && count_si < si_limit && count_hi < hi_limit)
4434 {
4435 int inc = get_attr_length (from);
4436 int new_align = 1;
4437
4438 /* If this is a label that existed at the time of the compute_alignments
4439 call, determine the alignment. N.B. When find_barrier recurses for
4440 an out-of-reach mova, we might see labels at the start of previously
4441 inserted constant tables. */
4442 if (LABEL_P (from)
4443 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
4444 {
4445 if (optimize)
4446 new_align = 1 << label_to_alignment (from);
4447 else if (BARRIER_P (prev_nonnote_insn (from)))
4448 new_align = 1 << barrier_align (from);
4449 else
4450 new_align = 1;
4451 inc = 0;
4452 }
4453 /* In case we are scanning a constant table because of recursion, check
4454 for explicit alignments. If the table is long, we might be forced
4455 to emit the new table in front of it; the length of the alignment
4456 might be the last straw. */
4457 else if (NONJUMP_INSN_P (from)
4458 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4459 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
4460 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
4461 /* When we find the end of a constant table, paste the new constant
4462 at the end. That is better than putting it in front because
4463 this way, we don't need extra alignment for adding a 4-byte-aligned
4464 mov(a) label to a 2/4 or 8/4 byte aligned table. */
4465 else if (NONJUMP_INSN_P (from)
4466 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4467 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
4468 return from;
4469
4470 if (BARRIER_P (from))
4471 {
4472 rtx next;
4473
4474 found_barrier = from;
4475
4476 /* If we are at the end of the function, or in front of an alignment
4477 instruction, we need not insert an extra alignment. We prefer
4478 this kind of barrier. */
4479 if (barrier_align (from) > 2)
4480 good_barrier = from;
4481
4482 /* If we are at the end of a hot/cold block, dump the constants
4483 here. */
4484 next = NEXT_INSN (from);
4485 if (next
4486 && NOTE_P (next)
4487 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
4488 break;
4489 }
4490
4491 if (broken_move (from))
4492 {
4493 rtx pat, src, dst;
4494 enum machine_mode mode;
4495
4496 pat = PATTERN (from);
4497 if (GET_CODE (pat) == PARALLEL)
4498 pat = XVECEXP (pat, 0, 0);
4499 src = SET_SRC (pat);
4500 dst = SET_DEST (pat);
4501 mode = GET_MODE (dst);
4502
4503 /* GOT pcrelat setting comes in pair of
4504 mova .L8,r0
4505 mov.l .L8,r12
4506 instructions. (plus add r0,r12).
4507 Remember if we see one without the other. */
4508 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
4509 last_got = last_got ? NULL_RTX : from;
4510 else if (PIC_ADDR_P (src))
4511 last_got = last_got ? NULL_RTX : from;
4512
4513 /* We must explicitly check the mode, because sometimes the
4514 front end will generate code to load unsigned constants into
4515 HImode targets without properly sign extending them. */
4516 if (mode == HImode
4517 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
4518 {
4519 found_hi += 2;
4520 /* We put the short constants before the long constants, so
4521 we must count the length of short constants in the range
4522 for the long constants. */
4523 /* ??? This isn't optimal, but is easy to do. */
4524 si_limit -= 2;
4525 }
4526 else
4527 {
4528 /* We dump DF/DI constants before SF/SI ones, because
4529 the limit is the same, but the alignment requirements
4530 are higher. We may waste up to 4 additional bytes
4531 for alignment, and the DF/DI constant may have
4532 another SF/SI constant placed before it. */
4533 if (TARGET_SHCOMPACT
4534 && ! found_di
4535 && (mode == DFmode || mode == DImode))
4536 {
4537 found_di = 1;
4538 si_limit -= 8;
4539 }
4540 while (si_align > 2 && found_si + si_align - 2 > count_si)
4541 si_align >>= 1;
4542 if (found_si > count_si)
4543 count_si = found_si;
4544 found_si += GET_MODE_SIZE (mode);
4545 if (num_mova)
4546 si_limit -= GET_MODE_SIZE (mode);
4547 }
4548 }
4549
4550 if (mova_p (from))
4551 {
4552 switch (untangle_mova (&num_mova, &mova, from))
4553 {
4554 case 1:
4555 if (flag_pic)
4556 {
4557 rtx src = SET_SRC (PATTERN (from));
4558 if (GET_CODE (src) == CONST
4559 && GET_CODE (XEXP (src, 0)) == UNSPEC
4560 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
4561 last_symoff = from;
4562 }
4563 break;
4564 case 0: return find_barrier (0, 0, mova);
4565 case 2:
4566 {
4567 leading_mova = 0;
4568 barrier_before_mova
4569 = good_barrier ? good_barrier : found_barrier;
4570 }
4571 default: break;
4572 }
4573 if (found_si > count_si)
4574 count_si = found_si;
4575 }
4576 else if (JUMP_TABLE_DATA_P (from))
4577 {
4578 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
4579 || (num_mova
4580 && (prev_nonnote_insn (from)
4581 == XEXP (MOVA_LABELREF (mova), 0))))
4582 num_mova--;
4583 if (barrier_align (next_real_insn (from)) == align_jumps_log)
4584 {
4585 /* We have just passed the barrier in front of the
4586 ADDR_DIFF_VEC, which is stored in found_barrier. Since
4587 the ADDR_DIFF_VEC is accessed as data, just like our pool
4588 constants, this is a good opportunity to accommodate what
4589 we have gathered so far.
4590 If we waited any longer, we could end up at a barrier in
4591 front of code, which gives worse cache usage for separated
4592 instruction / data caches. */
4593 good_barrier = found_barrier;
4594 break;
4595 }
4596 else
4597 {
4598 rtx body = PATTERN (from);
4599 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
4600 }
4601 }
4602 /* For the SH1, we generate alignments even after jumps-around-jumps. */
4603 else if (JUMP_P (from)
4604 && ! TARGET_SH2
4605 && ! TARGET_SMALLCODE)
4606 new_align = 4;
4607
4608 /* There is a possibility that a bf is transformed into a bf/s by the
4609 delay slot scheduler. */
4610 if (JUMP_P (from) && !JUMP_TABLE_DATA_P (from)
4611 && get_attr_type (from) == TYPE_CBRANCH
4612 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (from)))) != SEQUENCE)
4613 inc += 2;
4614
4615 if (found_si)
4616 {
4617 count_si += inc;
4618 if (new_align > si_align)
4619 {
4620 si_limit -= (count_si - 1) & (new_align - si_align);
4621 si_align = new_align;
4622 }
4623 count_si = (count_si + new_align - 1) & -new_align;
4624 }
4625 if (found_hi)
4626 {
4627 count_hi += inc;
4628 if (new_align > hi_align)
4629 {
4630 hi_limit -= (count_hi - 1) & (new_align - hi_align);
4631 hi_align = new_align;
4632 }
4633 count_hi = (count_hi + new_align - 1) & -new_align;
4634 }
4635 from = NEXT_INSN (from);
4636 }
4637
4638 if (num_mova)
4639 {
4640 if (leading_mova)
4641 {
4642 /* Try as we might, the leading mova is out of range. Change
4643 it into a load (which will become a pcload) and retry. */
4644 fixup_mova (mova);
4645 return find_barrier (0, 0, mova);
4646 }
4647 else
4648 {
4649 /* Insert the constant pool table before the mova instruction,
4650 to prevent the mova label reference from going out of range. */
4651 from = mova;
4652 good_barrier = found_barrier = barrier_before_mova;
4653 }
4654 }
4655
4656 if (found_barrier)
4657 {
4658 if (good_barrier && next_real_insn (found_barrier))
4659 found_barrier = good_barrier;
4660 }
4661 else
4662 {
4663 /* We didn't find a barrier in time to dump our stuff,
4664 so we'll make one. */
4665 rtx label = gen_label_rtx ();
4666
4667 /* Don't emit a constant table in the middle of insns for
4668 casesi_worker_2. This is a bit overkill but is enough
4669 because casesi_worker_2 wouldn't appear so frequently. */
4670 if (last_symoff)
4671 from = last_symoff;
4672
4673 /* If we exceeded the range, then we must back up over the last
4674 instruction we looked at. Otherwise, we just need to undo the
4675 NEXT_INSN at the end of the loop. */
4676 if (PREV_INSN (from) != orig
4677 && (count_hi > hi_limit || count_si > si_limit))
4678 from = PREV_INSN (PREV_INSN (from));
4679 else
4680 from = PREV_INSN (from);
4681
4682 /* Don't emit a constant table int the middle of global pointer setting,
4683 since that that would move the addressing base GOT into another table.
4684 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
4685 in the pool anyway, so just move up the whole constant pool. */
4686 if (last_got)
4687 from = PREV_INSN (last_got);
4688
4689 /* Don't insert the constant pool table at the position which
4690 may be the landing pad. */
4691 if (flag_exceptions
4692 && CALL_P (from)
4693 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
4694 from = PREV_INSN (from);
4695
4696 /* Walk back to be just before any jump or label.
4697 Putting it before a label reduces the number of times the branch
4698 around the constant pool table will be hit. Putting it before
4699 a jump makes it more likely that the bra delay slot will be
4700 filled. */
4701 while (NOTE_P (from) || JUMP_P (from)
4702 || LABEL_P (from))
4703 from = PREV_INSN (from);
4704
4705 from = emit_jump_insn_after (gen_jump (label), from);
4706 JUMP_LABEL (from) = label;
4707 LABEL_NUSES (label) = 1;
4708 found_barrier = emit_barrier_after (from);
4709 emit_label_after (label, found_barrier);
4710 }
4711
4712 return found_barrier;
4713 }
4714
4715 /* If the instruction INSN is implemented by a special function, and we can
4716 positively find the register that is used to call the sfunc, and this
4717 register is not used anywhere else in this instruction - except as the
4718 destination of a set, return this register; else, return 0. */
4719 rtx
4720 sfunc_uses_reg (rtx insn)
4721 {
4722 int i;
4723 rtx pattern, part, reg_part, reg;
4724
4725 if (!NONJUMP_INSN_P (insn))
4726 return 0;
4727 pattern = PATTERN (insn);
4728 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
4729 return 0;
4730
4731 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4732 {
4733 part = XVECEXP (pattern, 0, i);
4734 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
4735 reg_part = part;
4736 }
4737 if (! reg_part)
4738 return 0;
4739 reg = XEXP (reg_part, 0);
4740 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
4741 {
4742 part = XVECEXP (pattern, 0, i);
4743 if (part == reg_part || GET_CODE (part) == CLOBBER)
4744 continue;
4745 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
4746 && REG_P (SET_DEST (part)))
4747 ? SET_SRC (part) : part)))
4748 return 0;
4749 }
4750 return reg;
4751 }
4752
4753 /* See if the only way in which INSN uses REG is by calling it, or by
4754 setting it while calling it. Set *SET to a SET rtx if the register
4755 is set by INSN. */
4756
4757 static int
4758 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
4759 {
4760 rtx pattern, reg2;
4761
4762 *set = NULL_RTX;
4763
4764 reg2 = sfunc_uses_reg (insn);
4765 if (reg2 && REGNO (reg2) == REGNO (reg))
4766 {
4767 pattern = single_set (insn);
4768 if (pattern
4769 && REG_P (SET_DEST (pattern))
4770 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4771 *set = pattern;
4772 return 0;
4773 }
4774 if (!CALL_P (insn))
4775 {
4776 /* We don't use rtx_equal_p because we don't care if the mode is
4777 different. */
4778 pattern = single_set (insn);
4779 if (pattern
4780 && REG_P (SET_DEST (pattern))
4781 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4782 {
4783 rtx par, part;
4784 int i;
4785
4786 *set = pattern;
4787 par = PATTERN (insn);
4788 if (GET_CODE (par) == PARALLEL)
4789 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
4790 {
4791 part = XVECEXP (par, 0, i);
4792 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
4793 return 1;
4794 }
4795 return reg_mentioned_p (reg, SET_SRC (pattern));
4796 }
4797
4798 return 1;
4799 }
4800
4801 pattern = PATTERN (insn);
4802
4803 if (GET_CODE (pattern) == PARALLEL)
4804 {
4805 int i;
4806
4807 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4808 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
4809 return 1;
4810 pattern = XVECEXP (pattern, 0, 0);
4811 }
4812
4813 if (GET_CODE (pattern) == SET)
4814 {
4815 if (reg_mentioned_p (reg, SET_DEST (pattern)))
4816 {
4817 /* We don't use rtx_equal_p, because we don't care if the
4818 mode is different. */
4819 if (!REG_P (SET_DEST (pattern))
4820 || REGNO (reg) != REGNO (SET_DEST (pattern)))
4821 return 1;
4822
4823 *set = pattern;
4824 }
4825
4826 pattern = SET_SRC (pattern);
4827 }
4828
4829 if (GET_CODE (pattern) != CALL
4830 || !MEM_P (XEXP (pattern, 0))
4831 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
4832 return 1;
4833
4834 return 0;
4835 }
4836
4837 /* Given a X, a pattern of an insn or a part of it, return a mask of used
4838 general registers. Bits 0..15 mean that the respective registers
4839 are used as inputs in the instruction. Bits 16..31 mean that the
4840 registers 0..15, respectively, are used as outputs, or are clobbered.
4841 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
4842 int
4843 regs_used (rtx x, int is_dest)
4844 {
4845 enum rtx_code code;
4846 const char *fmt;
4847 int i, used = 0;
4848
4849 if (! x)
4850 return used;
4851 code = GET_CODE (x);
4852 switch (code)
4853 {
4854 case REG:
4855 if (REGNO (x) < 16)
4856 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4857 << (REGNO (x) + is_dest));
4858 return 0;
4859 case SUBREG:
4860 {
4861 rtx y = SUBREG_REG (x);
4862
4863 if (!REG_P (y))
4864 break;
4865 if (REGNO (y) < 16)
4866 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4867 << (REGNO (y) +
4868 subreg_regno_offset (REGNO (y),
4869 GET_MODE (y),
4870 SUBREG_BYTE (x),
4871 GET_MODE (x)) + is_dest));
4872 return 0;
4873 }
4874 case SET:
4875 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
4876 case RETURN:
4877 /* If there was a return value, it must have been indicated with USE. */
4878 return 0x00ffff00;
4879 case CLOBBER:
4880 is_dest = 1;
4881 break;
4882 case MEM:
4883 is_dest = 0;
4884 break;
4885 case CALL:
4886 used |= 0x00ff00f0;
4887 break;
4888 default:
4889 break;
4890 }
4891
4892 fmt = GET_RTX_FORMAT (code);
4893
4894 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4895 {
4896 if (fmt[i] == 'E')
4897 {
4898 register int j;
4899 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4900 used |= regs_used (XVECEXP (x, i, j), is_dest);
4901 }
4902 else if (fmt[i] == 'e')
4903 used |= regs_used (XEXP (x, i), is_dest);
4904 }
4905 return used;
4906 }
4907
4908 /* Create an instruction that prevents redirection of a conditional branch
4909 to the destination of the JUMP with address ADDR.
4910 If the branch needs to be implemented as an indirect jump, try to find
4911 a scratch register for it.
4912 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
4913 If any preceding insn that doesn't fit into a delay slot is good enough,
4914 pass 1. Pass 2 if a definite blocking insn is needed.
4915 -1 is used internally to avoid deep recursion.
4916 If a blocking instruction is made or recognized, return it. */
4917
4918 static rtx
4919 gen_block_redirect (rtx jump, int addr, int need_block)
4920 {
4921 int dead = 0;
4922 rtx prev = prev_nonnote_insn (jump);
4923 rtx dest;
4924
4925 /* First, check if we already have an instruction that satisfies our need. */
4926 if (prev && NONJUMP_INSN_P (prev) && ! INSN_DELETED_P (prev))
4927 {
4928 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
4929 return prev;
4930 if (GET_CODE (PATTERN (prev)) == USE
4931 || GET_CODE (PATTERN (prev)) == CLOBBER
4932 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4933 prev = jump;
4934 else if ((need_block &= ~1) < 0)
4935 return prev;
4936 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
4937 need_block = 0;
4938 }
4939 if (GET_CODE (PATTERN (jump)) == RETURN)
4940 {
4941 if (! need_block)
4942 return prev;
4943 /* Reorg even does nasty things with return insns that cause branches
4944 to go out of range - see find_end_label and callers. */
4945 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
4946 }
4947 /* We can't use JUMP_LABEL here because it might be undefined
4948 when not optimizing. */
4949 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
4950 /* If the branch is out of range, try to find a scratch register for it. */
4951 if (optimize
4952 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4953 > 4092 + 4098))
4954 {
4955 rtx scan;
4956 /* Don't look for the stack pointer as a scratch register,
4957 it would cause trouble if an interrupt occurred. */
4958 unsigned attempt = 0x7fff, used;
4959 int jump_left = flag_expensive_optimizations + 1;
4960
4961 /* It is likely that the most recent eligible instruction is wanted for
4962 the delay slot. Therefore, find out which registers it uses, and
4963 try to avoid using them. */
4964
4965 for (scan = jump; (scan = PREV_INSN (scan)); )
4966 {
4967 enum rtx_code code;
4968
4969 if (INSN_DELETED_P (scan))
4970 continue;
4971 code = GET_CODE (scan);
4972 if (code == CODE_LABEL || code == JUMP_INSN)
4973 break;
4974 if (code == INSN
4975 && GET_CODE (PATTERN (scan)) != USE
4976 && GET_CODE (PATTERN (scan)) != CLOBBER
4977 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
4978 {
4979 attempt &= ~regs_used (PATTERN (scan), 0);
4980 break;
4981 }
4982 }
4983 for (used = dead = 0, scan = JUMP_LABEL (jump);
4984 (scan = NEXT_INSN (scan)); )
4985 {
4986 enum rtx_code code;
4987
4988 if (INSN_DELETED_P (scan))
4989 continue;
4990 code = GET_CODE (scan);
4991 if (INSN_P (scan))
4992 {
4993 used |= regs_used (PATTERN (scan), 0);
4994 if (code == CALL_INSN)
4995 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
4996 dead |= (used >> 16) & ~used;
4997 if (dead & attempt)
4998 {
4999 dead &= attempt;
5000 break;
5001 }
5002 if (code == JUMP_INSN)
5003 {
5004 if (jump_left-- && simplejump_p (scan))
5005 scan = JUMP_LABEL (scan);
5006 else
5007 break;
5008 }
5009 }
5010 }
5011 /* Mask out the stack pointer again, in case it was
5012 the only 'free' register we have found. */
5013 dead &= 0x7fff;
5014 }
5015 /* If the immediate destination is still in range, check for possible
5016 threading with a jump beyond the delay slot insn.
5017 Don't check if we are called recursively; the jump has been or will be
5018 checked in a different invocation then. */
5019
5020 else if (optimize && need_block >= 0)
5021 {
5022 rtx next = next_active_insn (next_active_insn (dest));
5023 if (next && JUMP_P (next)
5024 && GET_CODE (PATTERN (next)) == SET
5025 && recog_memoized (next) == CODE_FOR_jump_compact)
5026 {
5027 dest = JUMP_LABEL (next);
5028 if (dest
5029 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5030 > 4092 + 4098))
5031 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5032 }
5033 }
5034
5035 if (dead)
5036 {
5037 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5038
5039 /* It would be nice if we could convert the jump into an indirect
5040 jump / far branch right now, and thus exposing all constituent
5041 instructions to further optimization. However, reorg uses
5042 simplejump_p to determine if there is an unconditional jump where
5043 it should try to schedule instructions from the target of the
5044 branch; simplejump_p fails for indirect jumps even if they have
5045 a JUMP_LABEL. */
5046 rtx insn = emit_insn_before (gen_indirect_jump_scratch
5047 (reg, GEN_INT (unspec_bbr_uid++)),
5048 jump);
5049 /* ??? We would like this to have the scope of the jump, but that
5050 scope will change when a delay slot insn of an inner scope is added.
5051 Hence, after delay slot scheduling, we'll have to expect
5052 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5053 the jump. */
5054
5055 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
5056 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5057 return insn;
5058 }
5059 else if (need_block)
5060 /* We can't use JUMP_LABEL here because it might be undefined
5061 when not optimizing. */
5062 return emit_insn_before (gen_block_branch_redirect
5063 (GEN_INT (unspec_bbr_uid++)),
5064 jump);
5065 return prev;
5066 }
5067
5068 #define CONDJUMP_MIN -252
5069 #define CONDJUMP_MAX 262
5070 struct far_branch
5071 {
5072 /* A label (to be placed) in front of the jump
5073 that jumps to our ultimate destination. */
5074 rtx near_label;
5075 /* Where we are going to insert it if we cannot move the jump any farther,
5076 or the jump itself if we have picked up an existing jump. */
5077 rtx insert_place;
5078 /* The ultimate destination. */
5079 rtx far_label;
5080 struct far_branch *prev;
5081 /* If the branch has already been created, its address;
5082 else the address of its first prospective user. */
5083 int address;
5084 };
5085
5086 static void gen_far_branch (struct far_branch *);
5087 enum mdep_reorg_phase_e mdep_reorg_phase;
5088 static void
5089 gen_far_branch (struct far_branch *bp)
5090 {
5091 rtx insn = bp->insert_place;
5092 rtx jump;
5093 rtx label = gen_label_rtx ();
5094 int ok;
5095
5096 emit_label_after (label, insn);
5097 if (bp->far_label)
5098 {
5099 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5100 LABEL_NUSES (bp->far_label)++;
5101 }
5102 else
5103 jump = emit_jump_insn_after (gen_return (), insn);
5104 /* Emit a barrier so that reorg knows that any following instructions
5105 are not reachable via a fall-through path.
5106 But don't do this when not optimizing, since we wouldn't suppress the
5107 alignment for the barrier then, and could end up with out-of-range
5108 pc-relative loads. */
5109 if (optimize)
5110 emit_barrier_after (jump);
5111 emit_label_after (bp->near_label, insn);
5112 JUMP_LABEL (jump) = bp->far_label;
5113 ok = invert_jump (insn, label, 1);
5114 gcc_assert (ok);
5115
5116 /* If we are branching around a jump (rather than a return), prevent
5117 reorg from using an insn from the jump target as the delay slot insn -
5118 when reorg did this, it pessimized code (we rather hide the delay slot)
5119 and it could cause branches to go out of range. */
5120 if (bp->far_label)
5121 (emit_insn_after
5122 (gen_stuff_delay_slot
5123 (GEN_INT (unspec_bbr_uid++),
5124 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5125 insn));
5126 /* Prevent reorg from undoing our splits. */
5127 gen_block_redirect (jump, bp->address += 2, 2);
5128 }
5129
5130 /* Fix up ADDR_DIFF_VECs. */
5131 void
5132 fixup_addr_diff_vecs (rtx first)
5133 {
5134 rtx insn;
5135
5136 for (insn = first; insn; insn = NEXT_INSN (insn))
5137 {
5138 rtx vec_lab, pat, prev, prevpat, x, braf_label;
5139
5140 if (!JUMP_P (insn)
5141 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5142 continue;
5143 pat = PATTERN (insn);
5144 vec_lab = XEXP (XEXP (pat, 0), 0);
5145
5146 /* Search the matching casesi_jump_2. */
5147 for (prev = vec_lab; ; prev = PREV_INSN (prev))
5148 {
5149 if (!JUMP_P (prev))
5150 continue;
5151 prevpat = PATTERN (prev);
5152 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5153 continue;
5154 x = XVECEXP (prevpat, 0, 1);
5155 if (GET_CODE (x) != USE)
5156 continue;
5157 x = XEXP (x, 0);
5158 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5159 break;
5160 }
5161 /* FIXME: This is a bug in the optimizer, but it seems harmless
5162 to just avoid panicing. */
5163 if (!prev)
5164 continue;
5165
5166 /* Emit the reference label of the braf where it belongs, right after
5167 the casesi_jump_2 (i.e. braf). */
5168 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5169 emit_label_after (braf_label, prev);
5170
5171 /* Fix up the ADDR_DIF_VEC to be relative
5172 to the reference address of the braf. */
5173 XEXP (XEXP (pat, 0), 0) = braf_label;
5174 }
5175 }
5176
5177 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5178 a barrier. Return the base 2 logarithm of the desired alignment. */
5179 int
5180 barrier_align (rtx barrier_or_label)
5181 {
5182 rtx next = next_real_insn (barrier_or_label), pat, prev;
5183 int slot, credit, jump_to_next = 0;
5184
5185 if (! next)
5186 return 0;
5187
5188 pat = PATTERN (next);
5189
5190 if (GET_CODE (pat) == ADDR_DIFF_VEC)
5191 return 2;
5192
5193 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5194 /* This is a barrier in front of a constant table. */
5195 return 0;
5196
5197 prev = prev_real_insn (barrier_or_label);
5198 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
5199 {
5200 pat = PATTERN (prev);
5201 /* If this is a very small table, we want to keep the alignment after
5202 the table to the minimum for proper code alignment. */
5203 return ((TARGET_SMALLCODE
5204 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5205 <= (unsigned) 1 << (CACHE_LOG - 2)))
5206 ? 1 << TARGET_SHMEDIA : align_jumps_log);
5207 }
5208
5209 if (TARGET_SMALLCODE)
5210 return 0;
5211
5212 if (! TARGET_SH2 || ! optimize)
5213 return align_jumps_log;
5214
5215 /* When fixing up pcloads, a constant table might be inserted just before
5216 the basic block that ends with the barrier. Thus, we can't trust the
5217 instruction lengths before that. */
5218 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5219 {
5220 /* Check if there is an immediately preceding branch to the insn beyond
5221 the barrier. We must weight the cost of discarding useful information
5222 from the current cache line when executing this branch and there is
5223 an alignment, against that of fetching unneeded insn in front of the
5224 branch target when there is no alignment. */
5225
5226 /* There are two delay_slot cases to consider. One is the simple case
5227 where the preceding branch is to the insn beyond the barrier (simple
5228 delay slot filling), and the other is where the preceding branch has
5229 a delay slot that is a duplicate of the insn after the barrier
5230 (fill_eager_delay_slots) and the branch is to the insn after the insn
5231 after the barrier. */
5232
5233 /* PREV is presumed to be the JUMP_INSN for the barrier under
5234 investigation. Skip to the insn before it. */
5235 prev = prev_real_insn (prev);
5236
5237 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5238 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5239 prev = prev_real_insn (prev))
5240 {
5241 jump_to_next = 0;
5242 if (GET_CODE (PATTERN (prev)) == USE
5243 || GET_CODE (PATTERN (prev)) == CLOBBER)
5244 continue;
5245 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
5246 {
5247 prev = XVECEXP (PATTERN (prev), 0, 1);
5248 if (INSN_UID (prev) == INSN_UID (next))
5249 {
5250 /* Delay slot was filled with insn at jump target. */
5251 jump_to_next = 1;
5252 continue;
5253 }
5254 }
5255
5256 if (slot &&
5257 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5258 slot = 0;
5259 credit -= get_attr_length (prev);
5260 }
5261 if (prev
5262 && JUMP_P (prev)
5263 && JUMP_LABEL (prev))
5264 {
5265 rtx x;
5266 if (jump_to_next
5267 || next_real_insn (JUMP_LABEL (prev)) == next
5268 /* If relax_delay_slots() decides NEXT was redundant
5269 with some previous instruction, it will have
5270 redirected PREV's jump to the following insn. */
5271 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5272 /* There is no upper bound on redundant instructions
5273 that might have been skipped, but we must not put an
5274 alignment where none had been before. */
5275 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5276 (INSN_P (x)
5277 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5278 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5279 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5280 {
5281 rtx pat = PATTERN (prev);
5282 if (GET_CODE (pat) == PARALLEL)
5283 pat = XVECEXP (pat, 0, 0);
5284 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5285 return 0;
5286 }
5287 }
5288 }
5289
5290 return align_jumps_log;
5291 }
5292
5293 /* If we are inside a phony loop, almost any kind of label can turn up as the
5294 first one in the loop. Aligning a braf label causes incorrect switch
5295 destination addresses; we can detect braf labels because they are
5296 followed by a BARRIER.
5297 Applying loop alignment to small constant or switch tables is a waste
5298 of space, so we suppress this too. */
5299 int
5300 sh_loop_align (rtx label)
5301 {
5302 rtx next = label;
5303
5304 do
5305 next = next_nonnote_insn (next);
5306 while (next && LABEL_P (next));
5307
5308 if (! next
5309 || ! INSN_P (next)
5310 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
5311 || recog_memoized (next) == CODE_FOR_consttable_2)
5312 return 0;
5313
5314 return align_loops_log;
5315 }
5316
5317 /* Do a final pass over the function, just before delayed branch
5318 scheduling. */
5319
5320 static void
5321 sh_reorg (void)
5322 {
5323 rtx first, insn, mova = NULL_RTX;
5324 int num_mova;
5325 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5326 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5327
5328 first = get_insns ();
5329 max_labelno_before_reorg = max_label_num ();
5330
5331 /* We must split call insns before introducing `mova's. If we're
5332 optimizing, they'll have already been split. Otherwise, make
5333 sure we don't split them too late. */
5334 if (! optimize)
5335 split_all_insns_noflow ();
5336
5337 if (TARGET_SHMEDIA)
5338 return;
5339
5340 /* If relaxing, generate pseudo-ops to associate function calls with
5341 the symbols they call. It does no harm to not generate these
5342 pseudo-ops. However, when we can generate them, it enables to
5343 linker to potentially relax the jsr to a bsr, and eliminate the
5344 register load and, possibly, the constant pool entry. */
5345
5346 mdep_reorg_phase = SH_INSERT_USES_LABELS;
5347 if (TARGET_RELAX)
5348 {
5349 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
5350 own purposes. This works because none of the remaining passes
5351 need to look at them.
5352
5353 ??? But it may break in the future. We should use a machine
5354 dependent REG_NOTE, or some other approach entirely. */
5355 for (insn = first; insn; insn = NEXT_INSN (insn))
5356 {
5357 if (INSN_P (insn))
5358 {
5359 rtx note;
5360
5361 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
5362 NULL_RTX)) != 0)
5363 remove_note (insn, note);
5364 }
5365 }
5366
5367 for (insn = first; insn; insn = NEXT_INSN (insn))
5368 {
5369 rtx pattern, reg, link, set, scan, dies, label;
5370 int rescan = 0, foundinsn = 0;
5371
5372 if (CALL_P (insn))
5373 {
5374 pattern = PATTERN (insn);
5375
5376 if (GET_CODE (pattern) == PARALLEL)
5377 pattern = XVECEXP (pattern, 0, 0);
5378 if (GET_CODE (pattern) == SET)
5379 pattern = SET_SRC (pattern);
5380
5381 if (GET_CODE (pattern) != CALL
5382 || !MEM_P (XEXP (pattern, 0)))
5383 continue;
5384
5385 reg = XEXP (XEXP (pattern, 0), 0);
5386 }
5387 else
5388 {
5389 reg = sfunc_uses_reg (insn);
5390 if (! reg)
5391 continue;
5392 }
5393
5394 if (!REG_P (reg))
5395 continue;
5396
5397 /* Try scanning backward to find where the register is set. */
5398 link = NULL;
5399 for (scan = PREV_INSN (insn);
5400 scan && !LABEL_P (scan);
5401 scan = PREV_INSN (scan))
5402 {
5403 if (! INSN_P (scan))
5404 continue;
5405
5406 if (! reg_mentioned_p (reg, scan))
5407 continue;
5408
5409 if (noncall_uses_reg (reg, scan, &set))
5410 break;
5411
5412 if (set)
5413 {
5414 link = scan;
5415 break;
5416 }
5417 }
5418
5419 if (! link)
5420 continue;
5421
5422 /* The register is set at LINK. */
5423
5424 /* We can only optimize the function call if the register is
5425 being set to a symbol. In theory, we could sometimes
5426 optimize calls to a constant location, but the assembler
5427 and linker do not support that at present. */
5428 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
5429 && GET_CODE (SET_SRC (set)) != LABEL_REF)
5430 continue;
5431
5432 /* Scan forward from LINK to the place where REG dies, and
5433 make sure that the only insns which use REG are
5434 themselves function calls. */
5435
5436 /* ??? This doesn't work for call targets that were allocated
5437 by reload, since there may not be a REG_DEAD note for the
5438 register. */
5439
5440 dies = NULL_RTX;
5441 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
5442 {
5443 rtx scanset;
5444
5445 /* Don't try to trace forward past a CODE_LABEL if we haven't
5446 seen INSN yet. Ordinarily, we will only find the setting insn
5447 if it is in the same basic block. However,
5448 cross-jumping can insert code labels in between the load and
5449 the call, and can result in situations where a single call
5450 insn may have two targets depending on where we came from. */
5451
5452 if (LABEL_P (scan) && ! foundinsn)
5453 break;
5454
5455 if (! INSN_P (scan))
5456 continue;
5457
5458 /* Don't try to trace forward past a JUMP. To optimize
5459 safely, we would have to check that all the
5460 instructions at the jump destination did not use REG. */
5461
5462 if (JUMP_P (scan))
5463 break;
5464
5465 if (! reg_mentioned_p (reg, scan))
5466 continue;
5467
5468 if (noncall_uses_reg (reg, scan, &scanset))
5469 break;
5470
5471 if (scan == insn)
5472 foundinsn = 1;
5473
5474 if (scan != insn
5475 && (CALL_P (scan) || sfunc_uses_reg (scan)))
5476 {
5477 /* There is a function call to this register other
5478 than the one we are checking. If we optimize
5479 this call, we need to rescan again below. */
5480 rescan = 1;
5481 }
5482
5483 /* ??? We shouldn't have to worry about SCANSET here.
5484 We should just be able to check for a REG_DEAD note
5485 on a function call. However, the REG_DEAD notes are
5486 apparently not dependable around libcalls; c-torture
5487 execute/920501-2 is a test case. If SCANSET is set,
5488 then this insn sets the register, so it must have
5489 died earlier. Unfortunately, this will only handle
5490 the cases in which the register is, in fact, set in a
5491 later insn. */
5492
5493 /* ??? We shouldn't have to use FOUNDINSN here.
5494 This dates back to when we used LOG_LINKS to find
5495 the most recent insn which sets the register. */
5496
5497 if (foundinsn
5498 && (scanset
5499 || find_reg_note (scan, REG_DEAD, reg)))
5500 {
5501 dies = scan;
5502 break;
5503 }
5504 }
5505
5506 if (! dies)
5507 {
5508 /* Either there was a branch, or some insn used REG
5509 other than as a function call address. */
5510 continue;
5511 }
5512
5513 /* Create a code label, and put it in a REG_LABEL_OPERAND note
5514 on the insn which sets the register, and on each call insn
5515 which uses the register. In final_prescan_insn we look for
5516 the REG_LABEL_OPERAND notes, and output the appropriate label
5517 or pseudo-op. */
5518
5519 label = gen_label_rtx ();
5520 add_reg_note (link, REG_LABEL_OPERAND, label);
5521 add_reg_note (insn, REG_LABEL_OPERAND, label);
5522 if (rescan)
5523 {
5524 scan = link;
5525 do
5526 {
5527 rtx reg2;
5528
5529 scan = NEXT_INSN (scan);
5530 if (scan != insn
5531 && ((CALL_P (scan)
5532 && reg_mentioned_p (reg, scan))
5533 || ((reg2 = sfunc_uses_reg (scan))
5534 && REGNO (reg2) == REGNO (reg))))
5535 add_reg_note (scan, REG_LABEL_OPERAND, label);
5536 }
5537 while (scan != dies);
5538 }
5539 }
5540 }
5541
5542 if (TARGET_SH2)
5543 fixup_addr_diff_vecs (first);
5544
5545 if (optimize)
5546 {
5547 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
5548 shorten_branches (first);
5549 }
5550
5551 /* Scan the function looking for move instructions which have to be
5552 changed to pc-relative loads and insert the literal tables. */
5553 label_ref_list_pool = create_alloc_pool ("label references list",
5554 sizeof (struct label_ref_list_d),
5555 30);
5556 mdep_reorg_phase = SH_FIXUP_PCLOAD;
5557 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
5558 {
5559 if (mova_p (insn))
5560 {
5561 /* ??? basic block reordering can move a switch table dispatch
5562 below the switch table. Check if that has happened.
5563 We only have the addresses available when optimizing; but then,
5564 this check shouldn't be needed when not optimizing. */
5565 if (!untangle_mova (&num_mova, &mova, insn))
5566 {
5567 insn = mova;
5568 num_mova = 0;
5569 }
5570 }
5571 else if (JUMP_P (insn)
5572 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
5573 && num_mova
5574 /* ??? loop invariant motion can also move a mova out of a
5575 loop. Since loop does this code motion anyway, maybe we
5576 should wrap UNSPEC_MOVA into a CONST, so that reload can
5577 move it back. */
5578 && ((num_mova > 1
5579 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
5580 || (prev_nonnote_insn (insn)
5581 == XEXP (MOVA_LABELREF (mova), 0))))
5582 {
5583 rtx scan;
5584 int total;
5585
5586 num_mova--;
5587
5588 /* Some code might have been inserted between the mova and
5589 its ADDR_DIFF_VEC. Check if the mova is still in range. */
5590 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
5591 total += get_attr_length (scan);
5592
5593 /* range of mova is 1020, add 4 because pc counts from address of
5594 second instruction after this one, subtract 2 in case pc is 2
5595 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
5596 cancels out with alignment effects of the mova itself. */
5597 if (total > 1022)
5598 {
5599 /* Change the mova into a load, and restart scanning
5600 there. broken_move will then return true for mova. */
5601 fixup_mova (mova);
5602 insn = mova;
5603 }
5604 }
5605 if (broken_move (insn)
5606 || (NONJUMP_INSN_P (insn)
5607 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
5608 {
5609 rtx scan;
5610 /* Scan ahead looking for a barrier to stick the constant table
5611 behind. */
5612 rtx barrier = find_barrier (num_mova, mova, insn);
5613 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
5614 int need_aligned_label = 0;
5615
5616 if (num_mova && ! mova_p (mova))
5617 {
5618 /* find_barrier had to change the first mova into a
5619 pcload; thus, we have to start with this new pcload. */
5620 insn = mova;
5621 num_mova = 0;
5622 }
5623 /* Now find all the moves between the points and modify them. */
5624 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
5625 {
5626 if (LABEL_P (scan))
5627 last_float = 0;
5628 if (NONJUMP_INSN_P (scan)
5629 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
5630 need_aligned_label = 1;
5631 if (broken_move (scan))
5632 {
5633 rtx *patp = &PATTERN (scan), pat = *patp;
5634 rtx src, dst;
5635 rtx lab;
5636 rtx newsrc;
5637 enum machine_mode mode;
5638
5639 if (GET_CODE (pat) == PARALLEL)
5640 patp = &XVECEXP (pat, 0, 0), pat = *patp;
5641 src = SET_SRC (pat);
5642 dst = SET_DEST (pat);
5643 mode = GET_MODE (dst);
5644
5645 if (mode == SImode && hi_const (src)
5646 && REGNO (dst) != FPUL_REG)
5647 {
5648 int offset = 0;
5649
5650 mode = HImode;
5651 while (GET_CODE (dst) == SUBREG)
5652 {
5653 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
5654 GET_MODE (SUBREG_REG (dst)),
5655 SUBREG_BYTE (dst),
5656 GET_MODE (dst));
5657 dst = SUBREG_REG (dst);
5658 }
5659 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
5660 }
5661 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
5662 {
5663 /* This must be an insn that clobbers r0. */
5664 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
5665 XVECLEN (PATTERN (scan), 0)
5666 - 1);
5667 rtx clobber = *clobberp;
5668
5669 gcc_assert (GET_CODE (clobber) == CLOBBER
5670 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
5671
5672 if (last_float
5673 && reg_set_between_p (r0_rtx, last_float_move, scan))
5674 last_float = 0;
5675 if (last_float
5676 && TARGET_SHCOMPACT
5677 && GET_MODE_SIZE (mode) != 4
5678 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
5679 last_float = 0;
5680 lab = add_constant (src, mode, last_float);
5681 if (lab)
5682 emit_insn_before (gen_mova (lab), scan);
5683 else
5684 {
5685 /* There will be a REG_UNUSED note for r0 on
5686 LAST_FLOAT_MOVE; we have to change it to REG_INC,
5687 lest reorg:mark_target_live_regs will not
5688 consider r0 to be used, and we end up with delay
5689 slot insn in front of SCAN that clobbers r0. */
5690 rtx note
5691 = find_regno_note (last_float_move, REG_UNUSED, 0);
5692
5693 /* If we are not optimizing, then there may not be
5694 a note. */
5695 if (note)
5696 PUT_REG_NOTE_KIND (note, REG_INC);
5697
5698 *last_float_addr = r0_inc_rtx;
5699 }
5700 last_float_move = scan;
5701 last_float = src;
5702 newsrc = gen_const_mem (mode,
5703 (((TARGET_SH4 && ! TARGET_FMOVD)
5704 || REGNO (dst) == FPUL_REG)
5705 ? r0_inc_rtx
5706 : r0_rtx));
5707 last_float_addr = &XEXP (newsrc, 0);
5708
5709 /* Remove the clobber of r0. */
5710 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
5711 gen_rtx_SCRATCH (Pmode));
5712 }
5713 /* This is a mova needing a label. Create it. */
5714 else if (GET_CODE (src) == UNSPEC
5715 && XINT (src, 1) == UNSPEC_MOVA
5716 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
5717 {
5718 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
5719 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5720 newsrc = gen_rtx_UNSPEC (SImode,
5721 gen_rtvec (1, newsrc),
5722 UNSPEC_MOVA);
5723 }
5724 else
5725 {
5726 lab = add_constant (src, mode, 0);
5727 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5728 newsrc = gen_const_mem (mode, newsrc);
5729 }
5730 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
5731 INSN_CODE (scan) = -1;
5732 }
5733 }
5734 dump_table (need_aligned_label ? insn : 0, barrier);
5735 insn = barrier;
5736 }
5737 }
5738 free_alloc_pool (label_ref_list_pool);
5739 for (insn = first; insn; insn = NEXT_INSN (insn))
5740 PUT_MODE (insn, VOIDmode);
5741
5742 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
5743 INSN_ADDRESSES_FREE ();
5744 split_branches (first);
5745
5746 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
5747 also has an effect on the register that holds the address of the sfunc.
5748 Insert an extra dummy insn in front of each sfunc that pretends to
5749 use this register. */
5750 if (flag_delayed_branch)
5751 {
5752 for (insn = first; insn; insn = NEXT_INSN (insn))
5753 {
5754 rtx reg = sfunc_uses_reg (insn);
5755
5756 if (! reg)
5757 continue;
5758 emit_insn_before (gen_use_sfunc_addr (reg), insn);
5759 }
5760 }
5761 #if 0
5762 /* fpscr is not actually a user variable, but we pretend it is for the
5763 sake of the previous optimization passes, since we want it handled like
5764 one. However, we don't have any debugging information for it, so turn
5765 it into a non-user variable now. */
5766 if (TARGET_SH4)
5767 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
5768 #endif
5769 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
5770 }
5771
5772 int
5773 get_dest_uid (rtx label, int max_uid)
5774 {
5775 rtx dest = next_real_insn (label);
5776 int dest_uid;
5777 if (! dest)
5778 /* This can happen for an undefined label. */
5779 return 0;
5780 dest_uid = INSN_UID (dest);
5781 /* If this is a newly created branch redirection blocking instruction,
5782 we cannot index the branch_uid or insn_addresses arrays with its
5783 uid. But then, we won't need to, because the actual destination is
5784 the following branch. */
5785 while (dest_uid >= max_uid)
5786 {
5787 dest = NEXT_INSN (dest);
5788 dest_uid = INSN_UID (dest);
5789 }
5790 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
5791 return 0;
5792 return dest_uid;
5793 }
5794
5795 /* Split condbranches that are out of range. Also add clobbers for
5796 scratch registers that are needed in far jumps.
5797 We do this before delay slot scheduling, so that it can take our
5798 newly created instructions into account. It also allows us to
5799 find branches with common targets more easily. */
5800
5801 static void
5802 split_branches (rtx first)
5803 {
5804 rtx insn;
5805 struct far_branch **uid_branch, *far_branch_list = 0;
5806 int max_uid = get_max_uid ();
5807 int ok;
5808
5809 /* Find out which branches are out of range. */
5810 shorten_branches (first);
5811
5812 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
5813 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
5814
5815 for (insn = first; insn; insn = NEXT_INSN (insn))
5816 if (! INSN_P (insn))
5817 continue;
5818 else if (INSN_DELETED_P (insn))
5819 {
5820 /* Shorten_branches would split this instruction again,
5821 so transform it into a note. */
5822 SET_INSN_DELETED (insn);
5823 }
5824 else if (JUMP_P (insn)
5825 /* Don't mess with ADDR_DIFF_VEC */
5826 && (GET_CODE (PATTERN (insn)) == SET
5827 || GET_CODE (PATTERN (insn)) == RETURN))
5828 {
5829 enum attr_type type = get_attr_type (insn);
5830 if (type == TYPE_CBRANCH)
5831 {
5832 rtx next, beyond;
5833
5834 if (get_attr_length (insn) > 4)
5835 {
5836 rtx src = SET_SRC (PATTERN (insn));
5837 rtx olabel = XEXP (XEXP (src, 1), 0);
5838 int addr = INSN_ADDRESSES (INSN_UID (insn));
5839 rtx label = 0;
5840 int dest_uid = get_dest_uid (olabel, max_uid);
5841 struct far_branch *bp = uid_branch[dest_uid];
5842
5843 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
5844 the label if the LABEL_NUSES count drops to zero. There is
5845 always a jump_optimize pass that sets these values, but it
5846 proceeds to delete unreferenced code, and then if not
5847 optimizing, to un-delete the deleted instructions, thus
5848 leaving labels with too low uses counts. */
5849 if (! optimize)
5850 {
5851 JUMP_LABEL (insn) = olabel;
5852 LABEL_NUSES (olabel)++;
5853 }
5854 if (! bp)
5855 {
5856 bp = (struct far_branch *) alloca (sizeof *bp);
5857 uid_branch[dest_uid] = bp;
5858 bp->prev = far_branch_list;
5859 far_branch_list = bp;
5860 bp->far_label
5861 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
5862 LABEL_NUSES (bp->far_label)++;
5863 }
5864 else
5865 {
5866 label = bp->near_label;
5867 if (! label && bp->address - addr >= CONDJUMP_MIN)
5868 {
5869 rtx block = bp->insert_place;
5870
5871 if (GET_CODE (PATTERN (block)) == RETURN)
5872 block = PREV_INSN (block);
5873 else
5874 block = gen_block_redirect (block,
5875 bp->address, 2);
5876 label = emit_label_after (gen_label_rtx (),
5877 PREV_INSN (block));
5878 bp->near_label = label;
5879 }
5880 else if (label && ! NEXT_INSN (label))
5881 {
5882 if (addr + 2 - bp->address <= CONDJUMP_MAX)
5883 bp->insert_place = insn;
5884 else
5885 gen_far_branch (bp);
5886 }
5887 }
5888 if (! label
5889 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
5890 {
5891 bp->near_label = label = gen_label_rtx ();
5892 bp->insert_place = insn;
5893 bp->address = addr;
5894 }
5895 ok = redirect_jump (insn, label, 0);
5896 gcc_assert (ok);
5897 }
5898 else
5899 {
5900 /* get_attr_length (insn) == 2 */
5901 /* Check if we have a pattern where reorg wants to redirect
5902 the branch to a label from an unconditional branch that
5903 is too far away. */
5904 /* We can't use JUMP_LABEL here because it might be undefined
5905 when not optimizing. */
5906 /* A syntax error might cause beyond to be NULL_RTX. */
5907 beyond
5908 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
5909 0));
5910
5911 if (beyond
5912 && (JUMP_P (beyond)
5913 || ((beyond = next_active_insn (beyond))
5914 && JUMP_P (beyond)))
5915 && GET_CODE (PATTERN (beyond)) == SET
5916 && recog_memoized (beyond) == CODE_FOR_jump_compact
5917 && ((INSN_ADDRESSES
5918 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
5919 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5920 > 252 + 258 + 2))
5921 gen_block_redirect (beyond,
5922 INSN_ADDRESSES (INSN_UID (beyond)), 1);
5923 }
5924
5925 next = next_active_insn (insn);
5926
5927 if (next
5928 && (JUMP_P (next)
5929 || ((next = next_active_insn (next))
5930 && JUMP_P (next)))
5931 && GET_CODE (PATTERN (next)) == SET
5932 && recog_memoized (next) == CODE_FOR_jump_compact
5933 && ((INSN_ADDRESSES
5934 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
5935 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5936 > 252 + 258 + 2))
5937 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
5938 }
5939 else if (type == TYPE_JUMP || type == TYPE_RETURN)
5940 {
5941 int addr = INSN_ADDRESSES (INSN_UID (insn));
5942 rtx far_label = 0;
5943 int dest_uid = 0;
5944 struct far_branch *bp;
5945
5946 if (type == TYPE_JUMP)
5947 {
5948 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
5949 dest_uid = get_dest_uid (far_label, max_uid);
5950 if (! dest_uid)
5951 {
5952 /* Parse errors can lead to labels outside
5953 the insn stream. */
5954 if (! NEXT_INSN (far_label))
5955 continue;
5956
5957 if (! optimize)
5958 {
5959 JUMP_LABEL (insn) = far_label;
5960 LABEL_NUSES (far_label)++;
5961 }
5962 redirect_jump (insn, NULL_RTX, 1);
5963 far_label = 0;
5964 }
5965 }
5966 bp = uid_branch[dest_uid];
5967 if (! bp)
5968 {
5969 bp = (struct far_branch *) alloca (sizeof *bp);
5970 uid_branch[dest_uid] = bp;
5971 bp->prev = far_branch_list;
5972 far_branch_list = bp;
5973 bp->near_label = 0;
5974 bp->far_label = far_label;
5975 if (far_label)
5976 LABEL_NUSES (far_label)++;
5977 }
5978 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
5979 if (addr - bp->address <= CONDJUMP_MAX)
5980 emit_label_after (bp->near_label, PREV_INSN (insn));
5981 else
5982 {
5983 gen_far_branch (bp);
5984 bp->near_label = 0;
5985 }
5986 else
5987 bp->near_label = 0;
5988 bp->address = addr;
5989 bp->insert_place = insn;
5990 if (! far_label)
5991 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
5992 else
5993 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
5994 }
5995 }
5996 /* Generate all pending far branches,
5997 and free our references to the far labels. */
5998 while (far_branch_list)
5999 {
6000 if (far_branch_list->near_label
6001 && ! NEXT_INSN (far_branch_list->near_label))
6002 gen_far_branch (far_branch_list);
6003 if (optimize
6004 && far_branch_list->far_label
6005 && ! --LABEL_NUSES (far_branch_list->far_label))
6006 delete_insn (far_branch_list->far_label);
6007 far_branch_list = far_branch_list->prev;
6008 }
6009
6010 /* Instruction length information is no longer valid due to the new
6011 instructions that have been generated. */
6012 init_insn_lengths ();
6013 }
6014
6015 /* Dump out instruction addresses, which is useful for debugging the
6016 constant pool table stuff.
6017
6018 If relaxing, output the label and pseudo-ops used to link together
6019 calls and the instruction which set the registers. */
6020
6021 /* ??? The addresses printed by this routine for insns are nonsense for
6022 insns which are inside of a sequence where none of the inner insns have
6023 variable length. This is because the second pass of shorten_branches
6024 does not bother to update them. */
6025
6026 void
6027 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
6028 int noperands ATTRIBUTE_UNUSED)
6029 {
6030 if (TARGET_DUMPISIZE)
6031 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6032
6033 if (TARGET_RELAX)
6034 {
6035 rtx note;
6036
6037 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
6038 if (note)
6039 {
6040 rtx pattern;
6041
6042 pattern = PATTERN (insn);
6043 if (GET_CODE (pattern) == PARALLEL)
6044 pattern = XVECEXP (pattern, 0, 0);
6045 switch (GET_CODE (pattern))
6046 {
6047 case SET:
6048 if (GET_CODE (SET_SRC (pattern)) != CALL
6049 && get_attr_type (insn) != TYPE_SFUNC)
6050 {
6051 targetm.asm_out.internal_label
6052 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6053 break;
6054 }
6055 /* else FALLTHROUGH */
6056 case CALL:
6057 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6058 CODE_LABEL_NUMBER (XEXP (note, 0)));
6059 break;
6060
6061 default:
6062 gcc_unreachable ();
6063 }
6064 }
6065 }
6066 }
6067
6068 /* Dump out any constants accumulated in the final pass. These will
6069 only be labels. */
6070
6071 const char *
6072 output_jump_label_table (void)
6073 {
6074 int i;
6075
6076 if (pool_size)
6077 {
6078 fprintf (asm_out_file, "\t.align 2\n");
6079 for (i = 0; i < pool_size; i++)
6080 {
6081 pool_node *p = &pool_vector[i];
6082
6083 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6084 CODE_LABEL_NUMBER (p->label));
6085 output_asm_insn (".long %O0", &p->value);
6086 }
6087 pool_size = 0;
6088 }
6089
6090 return "";
6091 }
6092 \f
6093 /* A full frame looks like:
6094
6095 arg-5
6096 arg-4
6097 [ if current_function_anonymous_args
6098 arg-3
6099 arg-2
6100 arg-1
6101 arg-0 ]
6102 saved-fp
6103 saved-r10
6104 saved-r11
6105 saved-r12
6106 saved-pr
6107 local-n
6108 ..
6109 local-1
6110 local-0 <- fp points here. */
6111
6112 /* Number of bytes pushed for anonymous args, used to pass information
6113 between expand_prologue and expand_epilogue. */
6114
6115 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6116 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6117 for an epilogue and a negative value means that it's for a sibcall
6118 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6119 all the registers that are about to be restored, and hence dead. */
6120
6121 static void
6122 output_stack_adjust (int size, rtx reg, int epilogue_p,
6123 HARD_REG_SET *live_regs_mask, bool frame_p)
6124 {
6125 rtx (*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6126 if (size)
6127 {
6128 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6129
6130 /* This test is bogus, as output_stack_adjust is used to re-align the
6131 stack. */
6132 #if 0
6133 gcc_assert (!(size % align));
6134 #endif
6135
6136 if (CONST_OK_FOR_ADD (size))
6137 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6138 /* Try to do it with two partial adjustments; however, we must make
6139 sure that the stack is properly aligned at all times, in case
6140 an interrupt occurs between the two partial adjustments. */
6141 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6142 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6143 {
6144 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6145 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6146 }
6147 else
6148 {
6149 rtx const_reg;
6150 rtx insn;
6151 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6152 int i;
6153
6154 /* If TEMP is invalid, we could temporarily save a general
6155 register to MACL. However, there is currently no need
6156 to handle this case, so just die when we see it. */
6157 if (epilogue_p < 0
6158 || current_function_interrupt
6159 || ! call_really_used_regs[temp] || fixed_regs[temp])
6160 temp = -1;
6161 if (temp < 0 && ! current_function_interrupt
6162 && (TARGET_SHMEDIA || epilogue_p >= 0))
6163 {
6164 HARD_REG_SET temps;
6165 COPY_HARD_REG_SET (temps, call_used_reg_set);
6166 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6167 if (epilogue_p > 0)
6168 {
6169 int nreg = 0;
6170 if (crtl->return_rtx)
6171 {
6172 enum machine_mode mode;
6173 mode = GET_MODE (crtl->return_rtx);
6174 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6175 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6176 }
6177 for (i = 0; i < nreg; i++)
6178 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6179 if (crtl->calls_eh_return)
6180 {
6181 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6182 for (i = 0; i <= 3; i++)
6183 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6184 }
6185 }
6186 if (TARGET_SHMEDIA && epilogue_p < 0)
6187 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
6188 CLEAR_HARD_REG_BIT (temps, i);
6189 if (epilogue_p <= 0)
6190 {
6191 for (i = FIRST_PARM_REG;
6192 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6193 CLEAR_HARD_REG_BIT (temps, i);
6194 if (cfun->static_chain_decl != NULL)
6195 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6196 }
6197 temp = scavenge_reg (&temps);
6198 }
6199 if (temp < 0 && live_regs_mask)
6200 {
6201 HARD_REG_SET temps;
6202
6203 COPY_HARD_REG_SET (temps, *live_regs_mask);
6204 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6205 temp = scavenge_reg (&temps);
6206 }
6207 if (temp < 0)
6208 {
6209 rtx adj_reg, tmp_reg, mem;
6210
6211 /* If we reached here, the most likely case is the (sibcall)
6212 epilogue for non SHmedia. Put a special push/pop sequence
6213 for such case as the last resort. This looks lengthy but
6214 would not be problem because it seems to be very
6215 rare. */
6216
6217 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
6218
6219
6220 /* ??? There is still the slight possibility that r4 or
6221 r5 have been reserved as fixed registers or assigned
6222 as global registers, and they change during an
6223 interrupt. There are possible ways to handle this:
6224
6225 - If we are adjusting the frame pointer (r14), we can do
6226 with a single temp register and an ordinary push / pop
6227 on the stack.
6228 - Grab any call-used or call-saved registers (i.e. not
6229 fixed or globals) for the temps we need. We might
6230 also grab r14 if we are adjusting the stack pointer.
6231 If we can't find enough available registers, issue
6232 a diagnostic and die - the user must have reserved
6233 way too many registers.
6234 But since all this is rather unlikely to happen and
6235 would require extra testing, we just die if r4 / r5
6236 are not available. */
6237 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6238 && !global_regs[4] && !global_regs[5]);
6239
6240 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6241 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6242 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6243 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6244 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6245 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6246 emit_move_insn (mem, tmp_reg);
6247 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6248 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6249 emit_move_insn (mem, tmp_reg);
6250 emit_move_insn (reg, adj_reg);
6251 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6252 emit_move_insn (adj_reg, mem);
6253 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6254 emit_move_insn (tmp_reg, mem);
6255 /* Tell flow the insns that pop r4/r5 aren't dead. */
6256 emit_use (tmp_reg);
6257 emit_use (adj_reg);
6258 return;
6259 }
6260 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6261
6262 /* If SIZE is negative, subtract the positive value.
6263 This sometimes allows a constant pool entry to be shared
6264 between prologue and epilogue code. */
6265 if (size < 0)
6266 {
6267 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6268 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6269 }
6270 else
6271 {
6272 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6273 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6274 }
6275 if (! epilogue_p)
6276 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6277 gen_rtx_SET (VOIDmode, reg,
6278 gen_rtx_PLUS (SImode, reg,
6279 GEN_INT (size))));
6280 }
6281 }
6282 }
6283
6284 static rtx
6285 frame_insn (rtx x)
6286 {
6287 x = emit_insn (x);
6288 RTX_FRAME_RELATED_P (x) = 1;
6289 return x;
6290 }
6291
6292 /* Output RTL to push register RN onto the stack. */
6293
6294 static rtx
6295 push (int rn)
6296 {
6297 rtx x;
6298 if (rn == FPUL_REG)
6299 x = gen_push_fpul ();
6300 else if (rn == FPSCR_REG)
6301 x = gen_push_fpscr ();
6302 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6303 && FP_OR_XD_REGISTER_P (rn))
6304 {
6305 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6306 return NULL_RTX;
6307 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6308 }
6309 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6310 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6311 else
6312 x = gen_push (gen_rtx_REG (SImode, rn));
6313
6314 x = frame_insn (x);
6315 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6316 return x;
6317 }
6318
6319 /* Output RTL to pop register RN from the stack. */
6320
6321 static void
6322 pop (int rn)
6323 {
6324 rtx x;
6325 if (rn == FPUL_REG)
6326 x = gen_pop_fpul ();
6327 else if (rn == FPSCR_REG)
6328 x = gen_pop_fpscr ();
6329 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6330 && FP_OR_XD_REGISTER_P (rn))
6331 {
6332 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6333 return;
6334 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6335 }
6336 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6337 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6338 else
6339 x = gen_pop (gen_rtx_REG (SImode, rn));
6340
6341 x = emit_insn (x);
6342 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6343 }
6344
6345 /* Generate code to push the regs specified in the mask. */
6346
6347 static void
6348 push_regs (HARD_REG_SET *mask, int interrupt_handler)
6349 {
6350 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
6351 int skip_fpscr = 0;
6352
6353 /* Push PR last; this gives better latencies after the prologue, and
6354 candidates for the return delay slot when there are no general
6355 registers pushed. */
6356 for (; i < FIRST_PSEUDO_REGISTER; i++)
6357 {
6358 /* If this is an interrupt handler, and the SZ bit varies,
6359 and we have to push any floating point register, we need
6360 to switch to the correct precision first. */
6361 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
6362 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
6363 {
6364 HARD_REG_SET unsaved;
6365
6366 push (FPSCR_REG);
6367 COMPL_HARD_REG_SET (unsaved, *mask);
6368 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
6369 skip_fpscr = 1;
6370 }
6371 if (i != PR_REG
6372 && (i != FPSCR_REG || ! skip_fpscr)
6373 && TEST_HARD_REG_BIT (*mask, i))
6374 {
6375 /* If the ISR has RESBANK attribute assigned, don't push any of
6376 the following registers - R0-R14, MACH, MACL and GBR. */
6377 if (! (sh_cfun_resbank_handler_p ()
6378 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
6379 || i == MACH_REG
6380 || i == MACL_REG
6381 || i == GBR_REG)))
6382 push (i);
6383 }
6384 }
6385
6386 /* Push banked registers last to improve delay slot opportunities. */
6387 if (interrupt_handler)
6388 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6389 if (TEST_HARD_REG_BIT (*mask, i))
6390 push (i);
6391
6392 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
6393 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
6394 push (PR_REG);
6395 }
6396
6397 /* Calculate how much extra space is needed to save all callee-saved
6398 target registers.
6399 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6400
6401 static int
6402 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
6403 {
6404 int reg;
6405 int stack_space = 0;
6406 int interrupt_handler = sh_cfun_interrupt_handler_p ();
6407
6408 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6409 if ((! call_really_used_regs[reg] || interrupt_handler)
6410 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6411 /* Leave space to save this target register on the stack,
6412 in case target register allocation wants to use it. */
6413 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6414 return stack_space;
6415 }
6416
6417 /* Decide whether we should reserve space for callee-save target registers,
6418 in case target register allocation wants to use them. REGS_SAVED is
6419 the space, in bytes, that is already required for register saves.
6420 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6421
6422 static int
6423 shmedia_reserve_space_for_target_registers_p (int regs_saved,
6424 HARD_REG_SET *live_regs_mask)
6425 {
6426 if (optimize_size)
6427 return 0;
6428 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
6429 }
6430
6431 /* Decide how much space to reserve for callee-save target registers
6432 in case target register allocation wants to use them.
6433 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6434
6435 static int
6436 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
6437 {
6438 if (shmedia_space_reserved_for_target_registers)
6439 return shmedia_target_regs_stack_space (live_regs_mask);
6440 else
6441 return 0;
6442 }
6443
6444 /* Work out the registers which need to be saved, both as a mask and a
6445 count of saved words. Return the count.
6446
6447 If doing a pragma interrupt function, then push all regs used by the
6448 function, and if we call another function (we can tell by looking at PR),
6449 make sure that all the regs it clobbers are safe too. */
6450
6451 static int
6452 calc_live_regs (HARD_REG_SET *live_regs_mask)
6453 {
6454 unsigned int reg;
6455 int count;
6456 tree attrs;
6457 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
6458 bool nosave_low_regs;
6459 int pr_live, has_call;
6460
6461 attrs = DECL_ATTRIBUTES (current_function_decl);
6462 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
6463 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
6464 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
6465 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
6466
6467 CLEAR_HARD_REG_SET (*live_regs_mask);
6468 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
6469 && df_regs_ever_live_p (FPSCR_REG))
6470 target_flags &= ~MASK_FPU_SINGLE;
6471 /* If we can save a lot of saves by switching to double mode, do that. */
6472 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
6473 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
6474 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
6475 && (! call_really_used_regs[reg]
6476 || interrupt_handler)
6477 && ++count > 2)
6478 {
6479 target_flags &= ~MASK_FPU_SINGLE;
6480 break;
6481 }
6482 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
6483 knows how to use it. That means the pseudo originally allocated for
6484 the initial value can become the PR_MEDIA_REG hard register, as seen for
6485 execute/20010122-1.c:test9. */
6486 if (TARGET_SHMEDIA)
6487 /* ??? this function is called from initial_elimination_offset, hence we
6488 can't use the result of sh_media_register_for_return here. */
6489 pr_live = sh_pr_n_sets ();
6490 else
6491 {
6492 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
6493 pr_live = (pr_initial
6494 ? (!REG_P (pr_initial)
6495 || REGNO (pr_initial) != (PR_REG))
6496 : df_regs_ever_live_p (PR_REG));
6497 /* For Shcompact, if not optimizing, we end up with a memory reference
6498 using the return address pointer for __builtin_return_address even
6499 though there is no actual need to put the PR register on the stack. */
6500 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
6501 }
6502 /* Force PR to be live if the prologue has to call the SHmedia
6503 argument decoder or register saver. */
6504 if (TARGET_SHCOMPACT
6505 && ((crtl->args.info.call_cookie
6506 & ~ CALL_COOKIE_RET_TRAMP (1))
6507 || crtl->saves_all_registers))
6508 pr_live = 1;
6509 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
6510 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
6511 {
6512 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
6513 ? pr_live
6514 : interrupt_handler
6515 ? (/* Need to save all the regs ever live. */
6516 (df_regs_ever_live_p (reg)
6517 || (call_really_used_regs[reg]
6518 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
6519 || reg == PIC_OFFSET_TABLE_REGNUM)
6520 && has_call)
6521 || (TARGET_SHMEDIA && has_call
6522 && REGISTER_NATURAL_MODE (reg) == SImode
6523 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
6524 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
6525 && reg != RETURN_ADDRESS_POINTER_REGNUM
6526 && reg != T_REG && reg != GBR_REG
6527 /* Push fpscr only on targets which have FPU */
6528 && (reg != FPSCR_REG || TARGET_FPU_ANY))
6529 : (/* Only push those regs which are used and need to be saved. */
6530 (TARGET_SHCOMPACT
6531 && flag_pic
6532 && crtl->args.info.call_cookie
6533 && reg == PIC_OFFSET_TABLE_REGNUM)
6534 || (df_regs_ever_live_p (reg)
6535 && ((!call_really_used_regs[reg]
6536 && !(reg != PIC_OFFSET_TABLE_REGNUM
6537 && fixed_regs[reg] && call_used_regs[reg]))
6538 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
6539 || (crtl->calls_eh_return
6540 && (reg == EH_RETURN_DATA_REGNO (0)
6541 || reg == EH_RETURN_DATA_REGNO (1)
6542 || reg == EH_RETURN_DATA_REGNO (2)
6543 || reg == EH_RETURN_DATA_REGNO (3)))
6544 || ((reg == MACL_REG || reg == MACH_REG)
6545 && df_regs_ever_live_p (reg)
6546 && sh_cfun_attr_renesas_p ())
6547 ))
6548 {
6549 SET_HARD_REG_BIT (*live_regs_mask, reg);
6550 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6551
6552 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
6553 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
6554 {
6555 if (FP_REGISTER_P (reg))
6556 {
6557 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
6558 {
6559 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
6560 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
6561 }
6562 }
6563 else if (XD_REGISTER_P (reg))
6564 {
6565 /* Must switch to double mode to access these registers. */
6566 target_flags &= ~MASK_FPU_SINGLE;
6567 }
6568 }
6569 }
6570 if (nosave_low_regs && reg == R8_REG)
6571 break;
6572 }
6573 /* If we have a target register optimization pass after prologue / epilogue
6574 threading, we need to assume all target registers will be live even if
6575 they aren't now. */
6576 if (flag_branch_target_load_optimize2
6577 && TARGET_SAVE_ALL_TARGET_REGS
6578 && shmedia_space_reserved_for_target_registers)
6579 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6580 if ((! call_really_used_regs[reg] || interrupt_handler)
6581 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6582 {
6583 SET_HARD_REG_BIT (*live_regs_mask, reg);
6584 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6585 }
6586 /* If this is an interrupt handler, we don't have any call-clobbered
6587 registers we can conveniently use for target register save/restore.
6588 Make sure we save at least one general purpose register when we need
6589 to save target registers. */
6590 if (interrupt_handler
6591 && hard_reg_set_intersect_p (*live_regs_mask,
6592 reg_class_contents[TARGET_REGS])
6593 && ! hard_reg_set_intersect_p (*live_regs_mask,
6594 reg_class_contents[GENERAL_REGS]))
6595 {
6596 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
6597 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
6598 }
6599
6600 return count;
6601 }
6602
6603 /* Code to generate prologue and epilogue sequences */
6604
6605 /* PUSHED is the number of bytes that are being pushed on the
6606 stack for register saves. Return the frame size, padded
6607 appropriately so that the stack stays properly aligned. */
6608 static HOST_WIDE_INT
6609 rounded_frame_size (int pushed)
6610 {
6611 HOST_WIDE_INT size = get_frame_size ();
6612 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6613
6614 if (ACCUMULATE_OUTGOING_ARGS)
6615 size += crtl->outgoing_args_size;
6616
6617 return ((size + pushed + align - 1) & -align) - pushed;
6618 }
6619
6620 /* Choose a call-clobbered target-branch register that remains
6621 unchanged along the whole function. We set it up as the return
6622 value in the prologue. */
6623 int
6624 sh_media_register_for_return (void)
6625 {
6626 int regno;
6627 int tr0_used;
6628
6629 if (! current_function_is_leaf)
6630 return -1;
6631 if (lookup_attribute ("interrupt_handler",
6632 DECL_ATTRIBUTES (current_function_decl)))
6633 return -1;
6634 if (sh_cfun_interrupt_handler_p ())
6635 return -1;
6636
6637 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
6638
6639 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
6640 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
6641 return regno;
6642
6643 return -1;
6644 }
6645
6646 /* The maximum registers we need to save are:
6647 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
6648 - 32 floating point registers (for each pair, we save none,
6649 one single precision value, or a double precision value).
6650 - 8 target registers
6651 - add 1 entry for a delimiter. */
6652 #define MAX_SAVED_REGS (62+32+8)
6653
6654 typedef struct save_entry_s
6655 {
6656 unsigned char reg;
6657 unsigned char mode;
6658 short offset;
6659 } save_entry;
6660
6661 #define MAX_TEMPS 4
6662
6663 /* There will be a delimiter entry with VOIDmode both at the start and the
6664 end of a filled in schedule. The end delimiter has the offset of the
6665 save with the smallest (i.e. most negative) offset. */
6666 typedef struct save_schedule_s
6667 {
6668 save_entry entries[MAX_SAVED_REGS + 2];
6669 int temps[MAX_TEMPS+1];
6670 } save_schedule;
6671
6672 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
6673 use reverse order. Returns the last entry written to (not counting
6674 the delimiter). OFFSET_BASE is a number to be added to all offset
6675 entries. */
6676
6677 static save_entry *
6678 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
6679 int offset_base)
6680 {
6681 int align, i;
6682 save_entry *entry = schedule->entries;
6683 int tmpx = 0;
6684 int offset;
6685
6686 if (! current_function_interrupt)
6687 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
6688 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
6689 && ! FUNCTION_ARG_REGNO_P (i)
6690 && i != FIRST_RET_REG
6691 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
6692 && ! (crtl->calls_eh_return
6693 && (i == EH_RETURN_STACKADJ_REGNO
6694 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
6695 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
6696 schedule->temps[tmpx++] = i;
6697 entry->reg = -1;
6698 entry->mode = VOIDmode;
6699 entry->offset = offset_base;
6700 entry++;
6701 /* We loop twice: first, we save 8-byte aligned registers in the
6702 higher addresses, that are known to be aligned. Then, we
6703 proceed to saving 32-bit registers that don't need 8-byte
6704 alignment.
6705 If this is an interrupt function, all registers that need saving
6706 need to be saved in full. moreover, we need to postpone saving
6707 target registers till we have saved some general purpose registers
6708 we can then use as scratch registers. */
6709 offset = offset_base;
6710 for (align = 1; align >= 0; align--)
6711 {
6712 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6713 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6714 {
6715 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6716 int reg = i;
6717
6718 if (current_function_interrupt)
6719 {
6720 if (TARGET_REGISTER_P (i))
6721 continue;
6722 if (GENERAL_REGISTER_P (i))
6723 mode = DImode;
6724 }
6725 if (mode == SFmode && (i % 2) == 1
6726 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6727 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
6728 {
6729 mode = DFmode;
6730 i--;
6731 reg--;
6732 }
6733
6734 /* If we're doing the aligned pass and this is not aligned,
6735 or we're doing the unaligned pass and this is aligned,
6736 skip it. */
6737 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
6738 != align)
6739 continue;
6740
6741 if (current_function_interrupt
6742 && GENERAL_REGISTER_P (i)
6743 && tmpx < MAX_TEMPS)
6744 schedule->temps[tmpx++] = i;
6745
6746 offset -= GET_MODE_SIZE (mode);
6747 entry->reg = i;
6748 entry->mode = mode;
6749 entry->offset = offset;
6750 entry++;
6751 }
6752 if (align && current_function_interrupt)
6753 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
6754 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6755 {
6756 offset -= GET_MODE_SIZE (DImode);
6757 entry->reg = i;
6758 entry->mode = DImode;
6759 entry->offset = offset;
6760 entry++;
6761 }
6762 }
6763 entry->reg = -1;
6764 entry->mode = VOIDmode;
6765 entry->offset = offset;
6766 schedule->temps[tmpx] = -1;
6767 return entry - 1;
6768 }
6769
6770 void
6771 sh_expand_prologue (void)
6772 {
6773 HARD_REG_SET live_regs_mask;
6774 int d, i;
6775 int d_rounding = 0;
6776 int save_flags = target_flags;
6777 int pretend_args;
6778 tree sp_switch_attr
6779 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
6780
6781 current_function_interrupt = sh_cfun_interrupt_handler_p ();
6782
6783 /* We have pretend args if we had an object sent partially in registers
6784 and partially on the stack, e.g. a large structure. */
6785 pretend_args = crtl->args.pretend_args_size;
6786 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
6787 && (NPARM_REGS(SImode)
6788 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
6789 pretend_args = 0;
6790 /* Dwarf2 module doesn't expect frame related insns here. */
6791 output_stack_adjust (-pretend_args
6792 - crtl->args.info.stack_regs * 8,
6793 stack_pointer_rtx, 0, NULL, false);
6794
6795 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
6796 /* We're going to use the PIC register to load the address of the
6797 incoming-argument decoder and/or of the return trampoline from
6798 the GOT, so make sure the PIC register is preserved and
6799 initialized. */
6800 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
6801
6802 if (TARGET_SHCOMPACT
6803 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6804 {
6805 int reg;
6806
6807 /* First, make all registers with incoming arguments that will
6808 be pushed onto the stack live, so that register renaming
6809 doesn't overwrite them. */
6810 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
6811 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
6812 >= NPARM_REGS (SImode) - reg)
6813 for (; reg < NPARM_REGS (SImode); reg++)
6814 emit_insn (gen_shcompact_preserve_incoming_args
6815 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6816 else if (CALL_COOKIE_INT_REG_GET
6817 (crtl->args.info.call_cookie, reg) == 1)
6818 emit_insn (gen_shcompact_preserve_incoming_args
6819 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6820
6821 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
6822 stack_pointer_rtx);
6823 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
6824 GEN_INT (crtl->args.info.call_cookie));
6825 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
6826 gen_rtx_REG (SImode, R0_REG));
6827 }
6828 else if (TARGET_SHMEDIA)
6829 {
6830 int tr = sh_media_register_for_return ();
6831
6832 if (tr >= 0)
6833 emit_move_insn (gen_rtx_REG (DImode, tr),
6834 gen_rtx_REG (DImode, PR_MEDIA_REG));
6835 }
6836
6837 /* Emit the code for SETUP_VARARGS. */
6838 if (cfun->stdarg)
6839 {
6840 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6841 {
6842 /* Push arg regs as if they'd been provided by caller in stack. */
6843 for (i = 0; i < NPARM_REGS(SImode); i++)
6844 {
6845 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
6846 rtx insn;
6847
6848 if (i >= (NPARM_REGS(SImode)
6849 - crtl->args.info.arg_count[(int) SH_ARG_INT]
6850 ))
6851 break;
6852 insn = push (rn);
6853 }
6854 }
6855 }
6856
6857 /* If we're supposed to switch stacks at function entry, do so now. */
6858 if (sp_switch_attr)
6859 {
6860 rtx lab, newsrc;
6861 /* The argument specifies a variable holding the address of the
6862 stack the interrupt function should switch to/from at entry/exit. */
6863 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
6864 const char *s
6865 = ggc_strdup (TREE_STRING_POINTER (arg));
6866 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
6867
6868 lab = add_constant (sp_switch, SImode, 0);
6869 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6870 newsrc = gen_const_mem (SImode, newsrc);
6871
6872 emit_insn (gen_sp_switch_1 (newsrc));
6873 }
6874
6875 d = calc_live_regs (&live_regs_mask);
6876 /* ??? Maybe we could save some switching if we can move a mode switch
6877 that already happens to be at the function start into the prologue. */
6878 if (target_flags != save_flags && ! current_function_interrupt)
6879 emit_insn (gen_toggle_sz ());
6880
6881 if (TARGET_SH5)
6882 {
6883 int offset_base, offset;
6884 rtx r0 = NULL_RTX;
6885 int offset_in_r0 = -1;
6886 int sp_in_r0 = 0;
6887 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6888 int total_size, save_size;
6889 save_schedule schedule;
6890 save_entry *entry;
6891 int *tmp_pnt;
6892
6893 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
6894 && ! current_function_interrupt)
6895 r0 = gen_rtx_REG (Pmode, R0_REG);
6896
6897 /* D is the actual number of bytes that we need for saving registers,
6898 however, in initial_elimination_offset we have committed to using
6899 an additional TREGS_SPACE amount of bytes - in order to keep both
6900 addresses to arguments supplied by the caller and local variables
6901 valid, we must keep this gap. Place it between the incoming
6902 arguments and the actually saved registers in a bid to optimize
6903 locality of reference. */
6904 total_size = d + tregs_space;
6905 total_size += rounded_frame_size (total_size);
6906 save_size = total_size - rounded_frame_size (d);
6907 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
6908 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6909 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
6910
6911 /* If adjusting the stack in a single step costs nothing extra, do so.
6912 I.e. either if a single addi is enough, or we need a movi anyway,
6913 and we don't exceed the maximum offset range (the test for the
6914 latter is conservative for simplicity). */
6915 if (TARGET_SHMEDIA
6916 && (CONST_OK_FOR_I10 (-total_size)
6917 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
6918 && total_size <= 2044)))
6919 d_rounding = total_size - save_size;
6920
6921 offset_base = d + d_rounding;
6922
6923 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
6924 0, NULL, true);
6925
6926 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
6927 tmp_pnt = schedule.temps;
6928 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
6929 {
6930 enum machine_mode mode = (enum machine_mode) entry->mode;
6931 unsigned int reg = entry->reg;
6932 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
6933 rtx orig_reg_rtx;
6934
6935 offset = entry->offset;
6936
6937 reg_rtx = gen_rtx_REG (mode, reg);
6938
6939 mem_rtx = gen_frame_mem (mode,
6940 gen_rtx_PLUS (Pmode,
6941 stack_pointer_rtx,
6942 GEN_INT (offset)));
6943
6944 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
6945 {
6946 gcc_assert (r0);
6947 mem_rtx = NULL_RTX;
6948 }
6949
6950 if (HAVE_PRE_DECREMENT
6951 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
6952 || mem_rtx == NULL_RTX
6953 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6954 {
6955 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
6956
6957 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
6958 pre_dec = NULL_RTX;
6959 else
6960 {
6961 mem_rtx = NULL_RTX;
6962 offset += GET_MODE_SIZE (mode);
6963 }
6964 }
6965
6966 if (mem_rtx != NULL_RTX)
6967 goto addr_ok;
6968
6969 if (offset_in_r0 == -1)
6970 {
6971 emit_move_insn (r0, GEN_INT (offset));
6972 offset_in_r0 = offset;
6973 }
6974 else if (offset != offset_in_r0)
6975 {
6976 emit_move_insn (r0,
6977 gen_rtx_PLUS
6978 (Pmode, r0,
6979 GEN_INT (offset - offset_in_r0)));
6980 offset_in_r0 += offset - offset_in_r0;
6981 }
6982
6983 if (pre_dec != NULL_RTX)
6984 {
6985 if (! sp_in_r0)
6986 {
6987 emit_move_insn (r0,
6988 gen_rtx_PLUS
6989 (Pmode, r0, stack_pointer_rtx));
6990 sp_in_r0 = 1;
6991 }
6992
6993 offset -= GET_MODE_SIZE (mode);
6994 offset_in_r0 -= GET_MODE_SIZE (mode);
6995
6996 mem_rtx = pre_dec;
6997 }
6998 else if (sp_in_r0)
6999 mem_rtx = gen_frame_mem (mode, r0);
7000 else
7001 mem_rtx = gen_frame_mem (mode,
7002 gen_rtx_PLUS (Pmode,
7003 stack_pointer_rtx,
7004 r0));
7005
7006 /* We must not use an r0-based address for target-branch
7007 registers or for special registers without pre-dec
7008 memory addresses, since we store their values in r0
7009 first. */
7010 gcc_assert (!TARGET_REGISTER_P (reg)
7011 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7012 || mem_rtx == pre_dec));
7013
7014 addr_ok:
7015 orig_reg_rtx = reg_rtx;
7016 if (TARGET_REGISTER_P (reg)
7017 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7018 && mem_rtx != pre_dec))
7019 {
7020 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
7021
7022 emit_move_insn (tmp_reg, reg_rtx);
7023
7024 if (REGNO (tmp_reg) == R0_REG)
7025 {
7026 offset_in_r0 = -1;
7027 sp_in_r0 = 0;
7028 gcc_assert (!refers_to_regno_p
7029 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
7030 }
7031
7032 if (*++tmp_pnt <= 0)
7033 tmp_pnt = schedule.temps;
7034
7035 reg_rtx = tmp_reg;
7036 }
7037 {
7038 rtx insn;
7039
7040 /* Mark as interesting for dwarf cfi generator */
7041 insn = emit_move_insn (mem_rtx, reg_rtx);
7042 RTX_FRAME_RELATED_P (insn) = 1;
7043 /* If we use an intermediate register for the save, we can't
7044 describe this exactly in cfi as a copy of the to-be-saved
7045 register into the temporary register and then the temporary
7046 register on the stack, because the temporary register can
7047 have a different natural size than the to-be-saved register.
7048 Thus, we gloss over the intermediate copy and pretend we do
7049 a direct save from the to-be-saved register. */
7050 if (REGNO (reg_rtx) != reg)
7051 {
7052 rtx set;
7053
7054 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
7055 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7056 }
7057
7058 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
7059 {
7060 rtx reg_rtx = gen_rtx_REG (mode, reg);
7061 rtx set;
7062 rtx mem_rtx = gen_frame_mem (mode,
7063 gen_rtx_PLUS (Pmode,
7064 stack_pointer_rtx,
7065 GEN_INT (offset)));
7066
7067 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
7068 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7069 }
7070 }
7071 }
7072
7073 gcc_assert (entry->offset == d_rounding);
7074 }
7075 else
7076 push_regs (&live_regs_mask, current_function_interrupt);
7077
7078 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7079 emit_insn (gen_GOTaddr2picreg ());
7080
7081 if (SHMEDIA_REGS_STACK_ADJUST ())
7082 {
7083 /* This must NOT go through the PLT, otherwise mach and macl
7084 may be clobbered. */
7085 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7086 (TARGET_FPU_ANY
7087 ? "__GCC_push_shmedia_regs"
7088 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7089 emit_insn (gen_shmedia_save_restore_regs_compact
7090 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7091 }
7092
7093 if (target_flags != save_flags && ! current_function_interrupt)
7094 emit_insn (gen_toggle_sz ());
7095
7096 target_flags = save_flags;
7097
7098 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7099 stack_pointer_rtx, 0, NULL, true);
7100
7101 if (frame_pointer_needed)
7102 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7103
7104 if (TARGET_SHCOMPACT
7105 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7106 {
7107 /* This must NOT go through the PLT, otherwise mach and macl
7108 may be clobbered. */
7109 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7110 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7111 emit_insn (gen_shcompact_incoming_args ());
7112 }
7113 }
7114
7115 void
7116 sh_expand_epilogue (bool sibcall_p)
7117 {
7118 HARD_REG_SET live_regs_mask;
7119 int d, i;
7120 int d_rounding = 0;
7121
7122 int save_flags = target_flags;
7123 int frame_size, save_size;
7124 int fpscr_deferred = 0;
7125 int e = sibcall_p ? -1 : 1;
7126
7127 d = calc_live_regs (&live_regs_mask);
7128
7129 save_size = d;
7130 frame_size = rounded_frame_size (d);
7131
7132 if (TARGET_SH5)
7133 {
7134 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7135 int total_size;
7136 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
7137 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7138 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
7139
7140 total_size = d + tregs_space;
7141 total_size += rounded_frame_size (total_size);
7142 save_size = total_size - frame_size;
7143
7144 /* If adjusting the stack in a single step costs nothing extra, do so.
7145 I.e. either if a single addi is enough, or we need a movi anyway,
7146 and we don't exceed the maximum offset range (the test for the
7147 latter is conservative for simplicity). */
7148 if (TARGET_SHMEDIA
7149 && ! frame_pointer_needed
7150 && (CONST_OK_FOR_I10 (total_size)
7151 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
7152 && total_size <= 2044)))
7153 d_rounding = frame_size;
7154
7155 frame_size -= d_rounding;
7156 }
7157
7158 if (frame_pointer_needed)
7159 {
7160 /* We must avoid scheduling the epilogue with previous basic blocks.
7161 See PR/18032 and PR/40313. */
7162 emit_insn (gen_blockage ());
7163 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7164 &live_regs_mask, false);
7165
7166 /* We must avoid moving the stack pointer adjustment past code
7167 which reads from the local frame, else an interrupt could
7168 occur after the SP adjustment and clobber data in the local
7169 frame. */
7170 emit_insn (gen_blockage ());
7171 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7172 }
7173 else if (frame_size)
7174 {
7175 /* We must avoid moving the stack pointer adjustment past code
7176 which reads from the local frame, else an interrupt could
7177 occur after the SP adjustment and clobber data in the local
7178 frame. */
7179 emit_insn (gen_blockage ());
7180 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7181 &live_regs_mask, false);
7182 }
7183
7184 if (SHMEDIA_REGS_STACK_ADJUST ())
7185 {
7186 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7187 (TARGET_FPU_ANY
7188 ? "__GCC_pop_shmedia_regs"
7189 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
7190 /* This must NOT go through the PLT, otherwise mach and macl
7191 may be clobbered. */
7192 emit_insn (gen_shmedia_save_restore_regs_compact
7193 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
7194 }
7195
7196 /* Pop all the registers. */
7197
7198 if (target_flags != save_flags && ! current_function_interrupt)
7199 emit_insn (gen_toggle_sz ());
7200 if (TARGET_SH5)
7201 {
7202 int offset_base, offset;
7203 int offset_in_r0 = -1;
7204 int sp_in_r0 = 0;
7205 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
7206 save_schedule schedule;
7207 save_entry *entry;
7208 int *tmp_pnt;
7209
7210 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
7211 offset_base = -entry[1].offset + d_rounding;
7212 tmp_pnt = schedule.temps;
7213 for (; entry->mode != VOIDmode; entry--)
7214 {
7215 enum machine_mode mode = (enum machine_mode) entry->mode;
7216 int reg = entry->reg;
7217 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
7218
7219 offset = offset_base + entry->offset;
7220 reg_rtx = gen_rtx_REG (mode, reg);
7221
7222 mem_rtx = gen_frame_mem (mode,
7223 gen_rtx_PLUS (Pmode,
7224 stack_pointer_rtx,
7225 GEN_INT (offset)));
7226
7227 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7228 mem_rtx = NULL_RTX;
7229
7230 if (HAVE_POST_INCREMENT
7231 && (offset == offset_in_r0
7232 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
7233 && mem_rtx == NULL_RTX)
7234 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7235 {
7236 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
7237
7238 if (!memory_address_p (mode, XEXP (post_inc, 0)))
7239 post_inc = NULL_RTX;
7240 else
7241 mem_rtx = NULL_RTX;
7242 }
7243
7244 if (mem_rtx != NULL_RTX)
7245 goto addr_ok;
7246
7247 if (offset_in_r0 == -1)
7248 {
7249 emit_move_insn (r0, GEN_INT (offset));
7250 offset_in_r0 = offset;
7251 }
7252 else if (offset != offset_in_r0)
7253 {
7254 emit_move_insn (r0,
7255 gen_rtx_PLUS
7256 (Pmode, r0,
7257 GEN_INT (offset - offset_in_r0)));
7258 offset_in_r0 += offset - offset_in_r0;
7259 }
7260
7261 if (post_inc != NULL_RTX)
7262 {
7263 if (! sp_in_r0)
7264 {
7265 emit_move_insn (r0,
7266 gen_rtx_PLUS
7267 (Pmode, r0, stack_pointer_rtx));
7268 sp_in_r0 = 1;
7269 }
7270
7271 mem_rtx = post_inc;
7272
7273 offset_in_r0 += GET_MODE_SIZE (mode);
7274 }
7275 else if (sp_in_r0)
7276 mem_rtx = gen_frame_mem (mode, r0);
7277 else
7278 mem_rtx = gen_frame_mem (mode,
7279 gen_rtx_PLUS (Pmode,
7280 stack_pointer_rtx,
7281 r0));
7282
7283 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7284 || mem_rtx == post_inc);
7285
7286 addr_ok:
7287 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7288 && mem_rtx != post_inc)
7289 {
7290 insn = emit_move_insn (r0, mem_rtx);
7291 mem_rtx = r0;
7292 }
7293 else if (TARGET_REGISTER_P (reg))
7294 {
7295 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
7296
7297 /* Give the scheduler a bit of freedom by using up to
7298 MAX_TEMPS registers in a round-robin fashion. */
7299 insn = emit_move_insn (tmp_reg, mem_rtx);
7300 mem_rtx = tmp_reg;
7301 if (*++tmp_pnt < 0)
7302 tmp_pnt = schedule.temps;
7303 }
7304
7305 insn = emit_move_insn (reg_rtx, mem_rtx);
7306 }
7307
7308 gcc_assert (entry->offset + offset_base == d + d_rounding);
7309 }
7310 else /* ! TARGET_SH5 */
7311 {
7312 int last_reg;
7313
7314 save_size = 0;
7315 /* For an ISR with RESBANK attribute assigned, don't pop PR
7316 register. */
7317 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
7318 && !sh_cfun_resbank_handler_p ())
7319 {
7320 if (!frame_pointer_needed)
7321 emit_insn (gen_blockage ());
7322 pop (PR_REG);
7323 }
7324
7325 /* Banked registers are popped first to avoid being scheduled in the
7326 delay slot. RTE switches banks before the ds instruction. */
7327 if (current_function_interrupt)
7328 {
7329 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
7330 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7331 pop (i);
7332
7333 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
7334 }
7335 else
7336 last_reg = FIRST_PSEUDO_REGISTER;
7337
7338 for (i = 0; i < last_reg; i++)
7339 {
7340 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
7341
7342 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
7343 && hard_reg_set_intersect_p (live_regs_mask,
7344 reg_class_contents[DF_REGS]))
7345 fpscr_deferred = 1;
7346 /* For an ISR with RESBANK attribute assigned, don't pop
7347 following registers, R0-R14, MACH, MACL and GBR. */
7348 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
7349 && ! (sh_cfun_resbank_handler_p ()
7350 && ((j >= FIRST_GENERAL_REG
7351 && j < LAST_GENERAL_REG)
7352 || j == MACH_REG
7353 || j == MACL_REG
7354 || j == GBR_REG)))
7355 pop (j);
7356
7357 if (j == FIRST_FP_REG && fpscr_deferred)
7358 pop (FPSCR_REG);
7359 }
7360 }
7361 if (target_flags != save_flags && ! current_function_interrupt)
7362 emit_insn (gen_toggle_sz ());
7363 target_flags = save_flags;
7364
7365 output_stack_adjust (crtl->args.pretend_args_size
7366 + save_size + d_rounding
7367 + crtl->args.info.stack_regs * 8,
7368 stack_pointer_rtx, e, NULL, false);
7369
7370 if (crtl->calls_eh_return)
7371 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
7372 EH_RETURN_STACKADJ_RTX));
7373
7374 /* Switch back to the normal stack if necessary. */
7375 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
7376 emit_insn (gen_sp_switch_2 ());
7377
7378 /* Tell flow the insn that pops PR isn't dead. */
7379 /* PR_REG will never be live in SHmedia mode, and we don't need to
7380 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
7381 by the return pattern. */
7382 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7383 emit_use (gen_rtx_REG (SImode, PR_REG));
7384 }
7385
7386 static int sh_need_epilogue_known = 0;
7387
7388 int
7389 sh_need_epilogue (void)
7390 {
7391 if (! sh_need_epilogue_known)
7392 {
7393 rtx epilogue;
7394
7395 start_sequence ();
7396 sh_expand_epilogue (0);
7397 epilogue = get_insns ();
7398 end_sequence ();
7399 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
7400 }
7401 return sh_need_epilogue_known > 0;
7402 }
7403
7404 /* Emit code to change the current function's return address to RA.
7405 TEMP is available as a scratch register, if needed. */
7406
7407 void
7408 sh_set_return_address (rtx ra, rtx tmp)
7409 {
7410 HARD_REG_SET live_regs_mask;
7411 int d;
7412 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7413 int pr_offset;
7414
7415 d = calc_live_regs (&live_regs_mask);
7416
7417 /* If pr_reg isn't life, we can set it (or the register given in
7418 sh_media_register_for_return) directly. */
7419 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7420 {
7421 rtx rr;
7422
7423 if (TARGET_SHMEDIA)
7424 {
7425 int rr_regno = sh_media_register_for_return ();
7426
7427 if (rr_regno < 0)
7428 rr_regno = pr_reg;
7429
7430 rr = gen_rtx_REG (DImode, rr_regno);
7431 }
7432 else
7433 rr = gen_rtx_REG (SImode, pr_reg);
7434
7435 emit_insn (GEN_MOV (rr, ra));
7436 /* Tell flow the register for return isn't dead. */
7437 emit_use (rr);
7438 return;
7439 }
7440
7441 if (TARGET_SH5)
7442 {
7443 int offset;
7444 save_schedule schedule;
7445 save_entry *entry;
7446
7447 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
7448 offset = entry[1].offset;
7449 for (; entry->mode != VOIDmode; entry--)
7450 if (entry->reg == pr_reg)
7451 goto found;
7452
7453 /* We can't find pr register. */
7454 gcc_unreachable ();
7455
7456 found:
7457 offset = entry->offset - offset;
7458 pr_offset = (rounded_frame_size (d) + offset
7459 + SHMEDIA_REGS_STACK_ADJUST ());
7460 }
7461 else
7462 pr_offset = rounded_frame_size (d);
7463
7464 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
7465
7466 if (frame_pointer_needed)
7467 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
7468 else
7469 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
7470
7471 tmp = gen_frame_mem (Pmode, tmp);
7472 emit_insn (GEN_MOV (tmp, ra));
7473 /* Tell this store isn't dead. */
7474 emit_use (tmp);
7475 }
7476
7477 /* Clear variables at function end. */
7478
7479 static void
7480 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
7481 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
7482 {
7483 sh_need_epilogue_known = 0;
7484 }
7485
7486 static rtx
7487 sh_builtin_saveregs (void)
7488 {
7489 /* First unnamed integer register. */
7490 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
7491 /* Number of integer registers we need to save. */
7492 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
7493 /* First unnamed SFmode float reg */
7494 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
7495 /* Number of SFmode float regs to save. */
7496 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
7497 rtx regbuf, fpregs;
7498 int bufsize, regno;
7499 alias_set_type alias_set;
7500
7501 if (TARGET_SH5)
7502 {
7503 if (n_intregs)
7504 {
7505 int pushregs = n_intregs;
7506
7507 while (pushregs < NPARM_REGS (SImode) - 1
7508 && (CALL_COOKIE_INT_REG_GET
7509 (crtl->args.info.call_cookie,
7510 NPARM_REGS (SImode) - pushregs)
7511 == 1))
7512 {
7513 crtl->args.info.call_cookie
7514 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7515 - pushregs, 1);
7516 pushregs++;
7517 }
7518
7519 if (pushregs == NPARM_REGS (SImode))
7520 crtl->args.info.call_cookie
7521 |= (CALL_COOKIE_INT_REG (0, 1)
7522 | CALL_COOKIE_STACKSEQ (pushregs - 1));
7523 else
7524 crtl->args.info.call_cookie
7525 |= CALL_COOKIE_STACKSEQ (pushregs);
7526
7527 crtl->args.pretend_args_size += 8 * n_intregs;
7528 }
7529 if (TARGET_SHCOMPACT)
7530 return const0_rtx;
7531 }
7532
7533 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
7534 {
7535 error ("__builtin_saveregs not supported by this subtarget");
7536 return const0_rtx;
7537 }
7538
7539 if (TARGET_SHMEDIA)
7540 n_floatregs = 0;
7541
7542 /* Allocate block of memory for the regs. */
7543 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
7544 Or can assign_stack_local accept a 0 SIZE argument? */
7545 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
7546
7547 if (TARGET_SHMEDIA)
7548 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
7549 else if (n_floatregs & 1)
7550 {
7551 rtx addr;
7552
7553 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7554 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
7555 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
7556 regbuf = change_address (regbuf, BLKmode, addr);
7557 }
7558 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
7559 {
7560 rtx addr, mask;
7561
7562 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7563 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
7564 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
7565 emit_insn (gen_andsi3 (addr, addr, mask));
7566 regbuf = change_address (regbuf, BLKmode, addr);
7567 }
7568 else
7569 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7570 alias_set = get_varargs_alias_set ();
7571 set_mem_alias_set (regbuf, alias_set);
7572
7573 /* Save int args.
7574 This is optimized to only save the regs that are necessary. Explicitly
7575 named args need not be saved. */
7576 if (n_intregs > 0)
7577 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7578 adjust_address (regbuf, BLKmode,
7579 n_floatregs * UNITS_PER_WORD),
7580 n_intregs);
7581
7582 if (TARGET_SHMEDIA)
7583 /* Return the address of the regbuf. */
7584 return XEXP (regbuf, 0);
7585
7586 /* Save float args.
7587 This is optimized to only save the regs that are necessary. Explicitly
7588 named args need not be saved.
7589 We explicitly build a pointer to the buffer because it halves the insn
7590 count when not optimizing (otherwise the pointer is built for each reg
7591 saved).
7592 We emit the moves in reverse order so that we can use predecrement. */
7593
7594 fpregs = copy_to_mode_reg (Pmode,
7595 plus_constant (XEXP (regbuf, 0),
7596 n_floatregs * UNITS_PER_WORD));
7597 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7598 {
7599 rtx mem;
7600 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7601 {
7602 emit_insn (gen_addsi3 (fpregs, fpregs,
7603 GEN_INT (-2 * UNITS_PER_WORD)));
7604 mem = change_address (regbuf, DFmode, fpregs);
7605 emit_move_insn (mem,
7606 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7607 }
7608 regno = first_floatreg;
7609 if (regno & 1)
7610 {
7611 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7612 mem = change_address (regbuf, SFmode, fpregs);
7613 emit_move_insn (mem,
7614 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
7615 - (TARGET_LITTLE_ENDIAN != 0)));
7616 }
7617 }
7618 else
7619 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7620 {
7621 rtx mem;
7622
7623 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7624 mem = change_address (regbuf, SFmode, fpregs);
7625 emit_move_insn (mem,
7626 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7627 }
7628
7629 /* Return the address of the regbuf. */
7630 return XEXP (regbuf, 0);
7631 }
7632
7633 /* Define the `__builtin_va_list' type for the ABI. */
7634
7635 static tree
7636 sh_build_builtin_va_list (void)
7637 {
7638 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7639 tree record;
7640
7641 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
7642 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7643 return ptr_type_node;
7644
7645 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7646
7647 f_next_o = build_decl (BUILTINS_LOCATION,
7648 FIELD_DECL, get_identifier ("__va_next_o"),
7649 ptr_type_node);
7650 f_next_o_limit = build_decl (BUILTINS_LOCATION,
7651 FIELD_DECL,
7652 get_identifier ("__va_next_o_limit"),
7653 ptr_type_node);
7654 f_next_fp = build_decl (BUILTINS_LOCATION,
7655 FIELD_DECL, get_identifier ("__va_next_fp"),
7656 ptr_type_node);
7657 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
7658 FIELD_DECL,
7659 get_identifier ("__va_next_fp_limit"),
7660 ptr_type_node);
7661 f_next_stack = build_decl (BUILTINS_LOCATION,
7662 FIELD_DECL, get_identifier ("__va_next_stack"),
7663 ptr_type_node);
7664
7665 DECL_FIELD_CONTEXT (f_next_o) = record;
7666 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7667 DECL_FIELD_CONTEXT (f_next_fp) = record;
7668 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7669 DECL_FIELD_CONTEXT (f_next_stack) = record;
7670
7671 TYPE_FIELDS (record) = f_next_o;
7672 TREE_CHAIN (f_next_o) = f_next_o_limit;
7673 TREE_CHAIN (f_next_o_limit) = f_next_fp;
7674 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
7675 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
7676
7677 layout_type (record);
7678
7679 return record;
7680 }
7681
7682 /* Implement `va_start' for varargs and stdarg. */
7683
7684 static void
7685 sh_va_start (tree valist, rtx nextarg)
7686 {
7687 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7688 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7689 tree t, u;
7690 int nfp, nint;
7691
7692 if (TARGET_SH5)
7693 {
7694 expand_builtin_saveregs ();
7695 std_expand_builtin_va_start (valist, nextarg);
7696 return;
7697 }
7698
7699 if ((! TARGET_SH2E && ! TARGET_SH4)
7700 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7701 {
7702 std_expand_builtin_va_start (valist, nextarg);
7703 return;
7704 }
7705
7706 f_next_o = TYPE_FIELDS (va_list_type_node);
7707 f_next_o_limit = TREE_CHAIN (f_next_o);
7708 f_next_fp = TREE_CHAIN (f_next_o_limit);
7709 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7710 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7711
7712 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7713 NULL_TREE);
7714 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7715 valist, f_next_o_limit, NULL_TREE);
7716 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7717 NULL_TREE);
7718 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7719 valist, f_next_fp_limit, NULL_TREE);
7720 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7721 valist, f_next_stack, NULL_TREE);
7722
7723 /* Call __builtin_saveregs. */
7724 u = make_tree (sizetype, expand_builtin_saveregs ());
7725 u = fold_convert (ptr_type_node, u);
7726 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
7727 TREE_SIDE_EFFECTS (t) = 1;
7728 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7729
7730 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7731 if (nfp < 8)
7732 nfp = 8 - nfp;
7733 else
7734 nfp = 0;
7735 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7736 size_int (UNITS_PER_WORD * nfp));
7737 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
7738 TREE_SIDE_EFFECTS (t) = 1;
7739 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7740
7741 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
7742 TREE_SIDE_EFFECTS (t) = 1;
7743 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7744
7745 nint = crtl->args.info.arg_count[SH_ARG_INT];
7746 if (nint < 4)
7747 nint = 4 - nint;
7748 else
7749 nint = 0;
7750 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7751 size_int (UNITS_PER_WORD * nint));
7752 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
7753 TREE_SIDE_EFFECTS (t) = 1;
7754 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7755
7756 u = make_tree (ptr_type_node, nextarg);
7757 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
7758 TREE_SIDE_EFFECTS (t) = 1;
7759 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7760 }
7761
7762 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7763 member, return it. */
7764 static tree
7765 find_sole_member (tree type)
7766 {
7767 tree field, member = NULL_TREE;
7768
7769 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7770 {
7771 if (TREE_CODE (field) != FIELD_DECL)
7772 continue;
7773 if (!DECL_SIZE (field))
7774 return NULL_TREE;
7775 if (integer_zerop (DECL_SIZE (field)))
7776 continue;
7777 if (member)
7778 return NULL_TREE;
7779 member = field;
7780 }
7781 return member;
7782 }
7783 /* Implement `va_arg'. */
7784
7785 static tree
7786 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7787 gimple_seq *post_p ATTRIBUTE_UNUSED)
7788 {
7789 HOST_WIDE_INT size, rsize;
7790 tree tmp, pptr_type_node;
7791 tree addr, lab_over = NULL, result = NULL;
7792 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7793 tree eff_type;
7794
7795 if (pass_by_ref)
7796 type = build_pointer_type (type);
7797
7798 size = int_size_in_bytes (type);
7799 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7800 pptr_type_node = build_pointer_type (ptr_type_node);
7801
7802 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
7803 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7804 {
7805 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7806 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7807 int pass_as_float;
7808 tree lab_false;
7809 tree member;
7810
7811 f_next_o = TYPE_FIELDS (va_list_type_node);
7812 f_next_o_limit = TREE_CHAIN (f_next_o);
7813 f_next_fp = TREE_CHAIN (f_next_o_limit);
7814 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7815 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7816
7817 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7818 NULL_TREE);
7819 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7820 valist, f_next_o_limit, NULL_TREE);
7821 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7822 valist, f_next_fp, NULL_TREE);
7823 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7824 valist, f_next_fp_limit, NULL_TREE);
7825 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7826 valist, f_next_stack, NULL_TREE);
7827
7828 /* Structures with a single member with a distinct mode are passed
7829 like their member. This is relevant if the latter has a REAL_TYPE
7830 or COMPLEX_TYPE type. */
7831 eff_type = type;
7832 while (TREE_CODE (eff_type) == RECORD_TYPE
7833 && (member = find_sole_member (eff_type))
7834 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7835 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7836 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7837 {
7838 tree field_type = TREE_TYPE (member);
7839
7840 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7841 eff_type = field_type;
7842 else
7843 {
7844 gcc_assert ((TYPE_ALIGN (eff_type)
7845 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7846 || (TYPE_ALIGN (eff_type)
7847 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7848 break;
7849 }
7850 }
7851
7852 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7853 {
7854 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7855 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7856 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7857 && size <= 16));
7858 }
7859 else
7860 {
7861 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7862 }
7863
7864 addr = create_tmp_var (pptr_type_node, NULL);
7865 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7866 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7867
7868 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
7869
7870 if (pass_as_float)
7871 {
7872 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
7873 tree cmp;
7874 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
7875
7876 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
7877 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7878
7879 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
7880 tmp = next_fp_limit;
7881 if (size > 4 && !is_double)
7882 tmp = build2 (POINTER_PLUS_EXPR, TREE_TYPE (tmp),
7883 unshare_expr (tmp), size_int (4 - size));
7884 tmp = build2 (GE_EXPR, boolean_type_node,
7885 unshare_expr (next_fp_tmp), unshare_expr (tmp));
7886 cmp = build3 (COND_EXPR, void_type_node, tmp,
7887 build1 (GOTO_EXPR, void_type_node,
7888 unshare_expr (lab_false)), NULL_TREE);
7889 if (!is_double)
7890 gimplify_and_add (cmp, pre_p);
7891
7892 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
7893 || (is_double || size == 16))
7894 {
7895 tmp = fold_convert (sizetype, next_fp_tmp);
7896 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
7897 size_int (UNITS_PER_WORD));
7898 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7899 unshare_expr (next_fp_tmp), tmp);
7900 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
7901 }
7902 if (is_double)
7903 gimplify_and_add (cmp, pre_p);
7904
7905 #ifdef FUNCTION_ARG_SCmode_WART
7906 if (TYPE_MODE (eff_type) == SCmode
7907 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
7908 {
7909 tree subtype = TREE_TYPE (eff_type);
7910 tree real, imag;
7911
7912 imag
7913 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7914 imag = get_initialized_tmp_var (imag, pre_p, NULL);
7915
7916 real
7917 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7918 real = get_initialized_tmp_var (real, pre_p, NULL);
7919
7920 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
7921 if (type != eff_type)
7922 result = build1 (VIEW_CONVERT_EXPR, type, result);
7923 result = get_initialized_tmp_var (result, pre_p, NULL);
7924 }
7925 #endif /* FUNCTION_ARG_SCmode_WART */
7926
7927 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7928 gimplify_and_add (tmp, pre_p);
7929
7930 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7931 gimplify_and_add (tmp, pre_p);
7932
7933 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7934 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7935 gimplify_assign (unshare_expr (next_fp_tmp),
7936 unshare_expr (valist), pre_p);
7937
7938 gimplify_assign (unshare_expr (valist),
7939 unshare_expr (next_fp_tmp), post_p);
7940 valist = next_fp_tmp;
7941 }
7942 else
7943 {
7944 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7945 unshare_expr (next_o), size_int (rsize));
7946 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
7947 unshare_expr (next_o_limit));
7948 tmp = build3 (COND_EXPR, void_type_node, tmp,
7949 build1 (GOTO_EXPR, void_type_node,
7950 unshare_expr (lab_false)),
7951 NULL_TREE);
7952 gimplify_and_add (tmp, pre_p);
7953
7954 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
7955 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7956
7957 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7958 gimplify_and_add (tmp, pre_p);
7959
7960 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7961 gimplify_and_add (tmp, pre_p);
7962
7963 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
7964 gimplify_assign (unshare_expr (next_o),
7965 unshare_expr (next_o_limit), pre_p);
7966
7967 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7968 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7969 }
7970
7971 if (!result)
7972 {
7973 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7974 gimplify_and_add (tmp, pre_p);
7975 }
7976 }
7977
7978 /* ??? In va-sh.h, there had been code to make values larger than
7979 size 8 indirect. This does not match the FUNCTION_ARG macros. */
7980
7981 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
7982 if (result)
7983 {
7984 gimplify_assign (result, tmp, pre_p);
7985 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
7986 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7987 gimplify_and_add (tmp, pre_p);
7988 }
7989 else
7990 result = tmp;
7991
7992 if (pass_by_ref)
7993 result = build_va_arg_indirect_ref (result);
7994
7995 return result;
7996 }
7997
7998 /* 64 bit floating points memory transfers are paired single precision loads
7999 or store. So DWARF information needs fixing in little endian (unless
8000 PR=SZ=1 in FPSCR). */
8001 rtx
8002 sh_dwarf_register_span (rtx reg)
8003 {
8004 unsigned regno = REGNO (reg);
8005
8006 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
8007 return NULL_RTX;
8008
8009 return
8010 gen_rtx_PARALLEL (VOIDmode,
8011 gen_rtvec (2,
8012 gen_rtx_REG (SFmode,
8013 DBX_REGISTER_NUMBER (regno+1)),
8014 gen_rtx_REG (SFmode,
8015 DBX_REGISTER_NUMBER (regno))));
8016 }
8017
8018 static enum machine_mode
8019 sh_promote_function_mode (const_tree type, enum machine_mode mode,
8020 int *punsignedp, const_tree funtype,
8021 int for_return ATTRIBUTE_UNUSED)
8022 {
8023 if (sh_promote_prototypes (funtype))
8024 return promote_mode (type, mode, punsignedp);
8025 else
8026 return mode;
8027 }
8028
8029 static bool
8030 sh_promote_prototypes (const_tree type)
8031 {
8032 if (TARGET_HITACHI)
8033 return 0;
8034 if (! type)
8035 return 1;
8036 return ! sh_attr_renesas_p (type);
8037 }
8038
8039 /* Whether an argument must be passed by reference. On SHcompact, we
8040 pretend arguments wider than 32-bits that would have been passed in
8041 registers are passed by reference, so that an SHmedia trampoline
8042 loads them into the full 64-bits registers. */
8043
8044 static int
8045 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
8046 const_tree type, bool named)
8047 {
8048 unsigned HOST_WIDE_INT size;
8049
8050 if (type)
8051 size = int_size_in_bytes (type);
8052 else
8053 size = GET_MODE_SIZE (mode);
8054
8055 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
8056 && (!named
8057 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
8058 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
8059 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
8060 && size > 4
8061 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
8062 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8063 return size;
8064 else
8065 return 0;
8066 }
8067
8068 static bool
8069 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8070 const_tree type, bool named)
8071 {
8072 if (targetm.calls.must_pass_in_stack (mode, type))
8073 return true;
8074
8075 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
8076 wants to know about pass-by-reference semantics for incoming
8077 arguments. */
8078 if (! cum)
8079 return false;
8080
8081 if (TARGET_SHCOMPACT)
8082 {
8083 cum->byref = shcompact_byref (cum, mode, type, named);
8084 return cum->byref != 0;
8085 }
8086
8087 return false;
8088 }
8089
8090 static bool
8091 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8092 const_tree type, bool named ATTRIBUTE_UNUSED)
8093 {
8094 /* ??? How can it possibly be correct to return true only on the
8095 caller side of the equation? Is there someplace else in the
8096 sh backend that's magically producing the copies? */
8097 return (cum->outgoing
8098 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
8099 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
8100 }
8101
8102 static int
8103 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8104 tree type, bool named ATTRIBUTE_UNUSED)
8105 {
8106 int words = 0;
8107
8108 if (!TARGET_SH5
8109 && PASS_IN_REG_P (*cum, mode, type)
8110 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
8111 && (ROUND_REG (*cum, mode)
8112 + (mode != BLKmode
8113 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
8114 : ROUND_ADVANCE (int_size_in_bytes (type)))
8115 > NPARM_REGS (mode)))
8116 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
8117
8118 else if (!TARGET_SHCOMPACT
8119 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8120 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
8121
8122 return words * UNITS_PER_WORD;
8123 }
8124
8125
8126 /* Define where to put the arguments to a function.
8127 Value is zero to push the argument on the stack,
8128 or a hard register in which to store the argument.
8129
8130 MODE is the argument's machine mode.
8131 TYPE is the data type of the argument (as a tree).
8132 This is null for libcalls where that information may
8133 not be available.
8134 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8135 the preceding args and about the function being called.
8136 NAMED is nonzero if this argument is a named parameter
8137 (otherwise it is an extra parameter matching an ellipsis).
8138
8139 On SH the first args are normally in registers
8140 and the rest are pushed. Any arg that starts within the first
8141 NPARM_REGS words is at least partially passed in a register unless
8142 its data type forbids. */
8143
8144
8145 rtx
8146 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8147 tree type, int named)
8148 {
8149 if (! TARGET_SH5 && mode == VOIDmode)
8150 return GEN_INT (ca->renesas_abi ? 1 : 0);
8151
8152 if (! TARGET_SH5
8153 && PASS_IN_REG_P (*ca, mode, type)
8154 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8155 {
8156 int regno;
8157
8158 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8159 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
8160 {
8161 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8162 gen_rtx_REG (SFmode,
8163 BASE_ARG_REG (mode)
8164 + (ROUND_REG (*ca, mode) ^ 1)),
8165 const0_rtx);
8166 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8167 gen_rtx_REG (SFmode,
8168 BASE_ARG_REG (mode)
8169 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
8170 GEN_INT (4));
8171 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8172 }
8173
8174 /* If the alignment of a DF value causes an SF register to be
8175 skipped, we will use that skipped register for the next SF
8176 value. */
8177 if ((TARGET_HITACHI || ca->renesas_abi)
8178 && ca->free_single_fp_reg
8179 && mode == SFmode)
8180 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8181
8182 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
8183 ^ (mode == SFmode && TARGET_SH4
8184 && TARGET_LITTLE_ENDIAN != 0
8185 && ! TARGET_HITACHI && ! ca->renesas_abi);
8186 return gen_rtx_REG (mode, regno);
8187
8188 }
8189
8190 if (TARGET_SH5)
8191 {
8192 if (mode == VOIDmode && TARGET_SHCOMPACT)
8193 return GEN_INT (ca->call_cookie);
8194
8195 /* The following test assumes unnamed arguments are promoted to
8196 DFmode. */
8197 if (mode == SFmode && ca->free_single_fp_reg)
8198 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
8199
8200 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
8201 && (named || ! ca->prototype_p)
8202 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
8203 {
8204 if (! ca->prototype_p && TARGET_SHMEDIA)
8205 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
8206
8207 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
8208 FIRST_FP_PARM_REG
8209 + ca->arg_count[(int) SH_ARG_FLOAT]);
8210 }
8211
8212 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
8213 && (! TARGET_SHCOMPACT
8214 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
8215 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
8216 type, named))))
8217 {
8218 return gen_rtx_REG (mode, (FIRST_PARM_REG
8219 + ca->arg_count[(int) SH_ARG_INT]));
8220 }
8221
8222 return 0;
8223 }
8224
8225 return 0;
8226 }
8227
8228 /* Update the data in CUM to advance over an argument
8229 of mode MODE and data type TYPE.
8230 (TYPE is null for libcalls where that information may not be
8231 available.) */
8232
8233 void
8234 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8235 tree type, int named)
8236 {
8237 if (ca->force_mem)
8238 ca->force_mem = 0;
8239 else if (TARGET_SH5)
8240 {
8241 tree type2 = (ca->byref && type
8242 ? TREE_TYPE (type)
8243 : type);
8244 enum machine_mode mode2 = (ca->byref && type
8245 ? TYPE_MODE (type2)
8246 : mode);
8247 int dwords = ((ca->byref
8248 ? ca->byref
8249 : mode2 == BLKmode
8250 ? int_size_in_bytes (type2)
8251 : GET_MODE_SIZE (mode2)) + 7) / 8;
8252 int numregs = MIN (dwords, NPARM_REGS (SImode)
8253 - ca->arg_count[(int) SH_ARG_INT]);
8254
8255 if (numregs)
8256 {
8257 ca->arg_count[(int) SH_ARG_INT] += numregs;
8258 if (TARGET_SHCOMPACT
8259 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
8260 {
8261 ca->call_cookie
8262 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8263 - numregs, 1);
8264 /* N.B. We want this also for outgoing. */
8265 ca->stack_regs += numregs;
8266 }
8267 else if (ca->byref)
8268 {
8269 if (! ca->outgoing)
8270 ca->stack_regs += numregs;
8271 ca->byref_regs += numregs;
8272 ca->byref = 0;
8273 do
8274 ca->call_cookie
8275 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8276 - numregs, 2);
8277 while (--numregs);
8278 ca->call_cookie
8279 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8280 - 1, 1);
8281 }
8282 else if (dwords > numregs)
8283 {
8284 int pushregs = numregs;
8285
8286 if (TARGET_SHCOMPACT)
8287 ca->stack_regs += numregs;
8288 while (pushregs < NPARM_REGS (SImode) - 1
8289 && (CALL_COOKIE_INT_REG_GET
8290 (ca->call_cookie,
8291 NPARM_REGS (SImode) - pushregs)
8292 == 1))
8293 {
8294 ca->call_cookie
8295 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8296 - pushregs, 1);
8297 pushregs++;
8298 }
8299 if (numregs == NPARM_REGS (SImode))
8300 ca->call_cookie
8301 |= CALL_COOKIE_INT_REG (0, 1)
8302 | CALL_COOKIE_STACKSEQ (numregs - 1);
8303 else
8304 ca->call_cookie
8305 |= CALL_COOKIE_STACKSEQ (numregs);
8306 }
8307 }
8308 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
8309 && (named || ! ca->prototype_p))
8310 {
8311 if (mode2 == SFmode && ca->free_single_fp_reg)
8312 ca->free_single_fp_reg = 0;
8313 else if (ca->arg_count[(int) SH_ARG_FLOAT]
8314 < NPARM_REGS (SFmode))
8315 {
8316 int numfpregs
8317 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
8318 NPARM_REGS (SFmode)
8319 - ca->arg_count[(int) SH_ARG_FLOAT]);
8320
8321 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
8322
8323 if (TARGET_SHCOMPACT && ! ca->prototype_p)
8324 {
8325 if (ca->outgoing && numregs > 0)
8326 do
8327 {
8328 ca->call_cookie
8329 |= (CALL_COOKIE_INT_REG
8330 (ca->arg_count[(int) SH_ARG_INT]
8331 - numregs + ((numfpregs - 2) / 2),
8332 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
8333 - numfpregs) / 2));
8334 }
8335 while (numfpregs -= 2);
8336 }
8337 else if (mode2 == SFmode && (named)
8338 && (ca->arg_count[(int) SH_ARG_FLOAT]
8339 < NPARM_REGS (SFmode)))
8340 ca->free_single_fp_reg
8341 = FIRST_FP_PARM_REG - numfpregs
8342 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
8343 }
8344 }
8345 return;
8346 }
8347
8348 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
8349 {
8350 /* Note that we've used the skipped register. */
8351 if (mode == SFmode && ca->free_single_fp_reg)
8352 {
8353 ca->free_single_fp_reg = 0;
8354 return;
8355 }
8356 /* When we have a DF after an SF, there's an SF register that get
8357 skipped in order to align the DF value. We note this skipped
8358 register, because the next SF value will use it, and not the
8359 SF that follows the DF. */
8360 if (mode == DFmode
8361 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
8362 {
8363 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
8364 + BASE_ARG_REG (mode));
8365 }
8366 }
8367
8368 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
8369 || PASS_IN_REG_P (*ca, mode, type))
8370 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
8371 = (ROUND_REG (*ca, mode)
8372 + (mode == BLKmode
8373 ? ROUND_ADVANCE (int_size_in_bytes (type))
8374 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
8375 }
8376
8377 /* The Renesas calling convention doesn't quite fit into this scheme since
8378 the address is passed like an invisible argument, but one that is always
8379 passed in memory. */
8380 static rtx
8381 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
8382 {
8383 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8384 return 0;
8385 return gen_rtx_REG (Pmode, 2);
8386 }
8387
8388 /* Worker function for TARGET_FUNCTION_VALUE.
8389
8390 For the SH, this is like LIBCALL_VALUE, except that we must change the
8391 mode like PROMOTE_MODE does.
8392 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
8393 tested here has to be kept in sync with the one in explow.c:promote_mode.
8394 */
8395
8396 static rtx
8397 sh_function_value (const_tree valtype,
8398 const_tree fn_decl_or_type,
8399 bool outgoing ATTRIBUTE_UNUSED)
8400 {
8401 if (fn_decl_or_type
8402 && !DECL_P (fn_decl_or_type))
8403 fn_decl_or_type = NULL;
8404
8405 return gen_rtx_REG (
8406 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
8407 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
8408 && (TREE_CODE (valtype) == INTEGER_TYPE
8409 || TREE_CODE (valtype) == ENUMERAL_TYPE
8410 || TREE_CODE (valtype) == BOOLEAN_TYPE
8411 || TREE_CODE (valtype) == REAL_TYPE
8412 || TREE_CODE (valtype) == OFFSET_TYPE))
8413 && sh_promote_prototypes (fn_decl_or_type)
8414 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
8415 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
8416 }
8417
8418 /* Worker function for TARGET_LIBCALL_VALUE. */
8419
8420 static rtx
8421 sh_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
8422 {
8423 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
8424 }
8425
8426 /* Worker function for FUNCTION_VALUE_REGNO_P. */
8427
8428 bool
8429 sh_function_value_regno_p (const unsigned int regno)
8430 {
8431 return ((regno) == FIRST_RET_REG
8432 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
8433 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
8434 }
8435
8436 /* Worker function for TARGET_RETURN_IN_MEMORY. */
8437
8438 static bool
8439 sh_return_in_memory (const_tree type, const_tree fndecl)
8440 {
8441 if (TARGET_SH5)
8442 {
8443 if (TYPE_MODE (type) == BLKmode)
8444 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
8445 else
8446 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
8447 }
8448 else
8449 {
8450 return (TYPE_MODE (type) == BLKmode
8451 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8452 && TREE_CODE (type) == RECORD_TYPE));
8453 }
8454 }
8455
8456 /* We actually emit the code in sh_expand_prologue. We used to use
8457 a static variable to flag that we need to emit this code, but that
8458 doesn't when inlining, when functions are deferred and then emitted
8459 later. Fortunately, we already have two flags that are part of struct
8460 function that tell if a function uses varargs or stdarg. */
8461 static void
8462 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
8463 enum machine_mode mode,
8464 tree type,
8465 int *pretend_arg_size,
8466 int second_time ATTRIBUTE_UNUSED)
8467 {
8468 gcc_assert (cfun->stdarg);
8469 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
8470 {
8471 int named_parm_regs, anon_parm_regs;
8472
8473 named_parm_regs = (ROUND_REG (*ca, mode)
8474 + (mode == BLKmode
8475 ? ROUND_ADVANCE (int_size_in_bytes (type))
8476 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
8477 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
8478 if (anon_parm_regs > 0)
8479 *pretend_arg_size = anon_parm_regs * 4;
8480 }
8481 }
8482
8483 static bool
8484 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
8485 {
8486 return TARGET_SH5;
8487 }
8488
8489 static bool
8490 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
8491 {
8492 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
8493 }
8494
8495
8496 /* Define the offset between two registers, one to be eliminated, and
8497 the other its replacement, at the start of a routine. */
8498
8499 int
8500 initial_elimination_offset (int from, int to)
8501 {
8502 int regs_saved;
8503 int regs_saved_rounding = 0;
8504 int total_saved_regs_space;
8505 int total_auto_space;
8506 int save_flags = target_flags;
8507 int copy_flags;
8508 HARD_REG_SET live_regs_mask;
8509
8510 shmedia_space_reserved_for_target_registers = false;
8511 regs_saved = calc_live_regs (&live_regs_mask);
8512 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
8513
8514 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
8515 {
8516 shmedia_space_reserved_for_target_registers = true;
8517 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
8518 }
8519
8520 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
8521 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
8522 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
8523
8524 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
8525 copy_flags = target_flags;
8526 target_flags = save_flags;
8527
8528 total_saved_regs_space = regs_saved + regs_saved_rounding;
8529
8530 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8531 return total_saved_regs_space + total_auto_space
8532 + crtl->args.info.byref_regs * 8;
8533
8534 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8535 return total_saved_regs_space + total_auto_space
8536 + crtl->args.info.byref_regs * 8;
8537
8538 /* Initial gap between fp and sp is 0. */
8539 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8540 return 0;
8541
8542 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8543 return rounded_frame_size (0);
8544
8545 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8546 return rounded_frame_size (0);
8547
8548 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
8549 && (to == HARD_FRAME_POINTER_REGNUM
8550 || to == STACK_POINTER_REGNUM));
8551 if (TARGET_SH5)
8552 {
8553 int n = total_saved_regs_space;
8554 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8555 save_schedule schedule;
8556 save_entry *entry;
8557
8558 n += total_auto_space;
8559
8560 /* If it wasn't saved, there's not much we can do. */
8561 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8562 return n;
8563
8564 target_flags = copy_flags;
8565
8566 sh5_schedule_saves (&live_regs_mask, &schedule, n);
8567 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
8568 if (entry->reg == pr_reg)
8569 {
8570 target_flags = save_flags;
8571 return entry->offset;
8572 }
8573 gcc_unreachable ();
8574 }
8575 else
8576 return total_auto_space;
8577 }
8578
8579 /* Parse the -mfixed-range= option string. */
8580 void
8581 sh_fix_range (const char *const_str)
8582 {
8583 int i, first, last;
8584 char *str, *dash, *comma;
8585
8586 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
8587 REG2 are either register names or register numbers. The effect
8588 of this option is to mark the registers in the range from REG1 to
8589 REG2 as ``fixed'' so they won't be used by the compiler. */
8590
8591 i = strlen (const_str);
8592 str = (char *) alloca (i + 1);
8593 memcpy (str, const_str, i + 1);
8594
8595 while (1)
8596 {
8597 dash = strchr (str, '-');
8598 if (!dash)
8599 {
8600 warning (0, "value of -mfixed-range must have form REG1-REG2");
8601 return;
8602 }
8603 *dash = '\0';
8604 comma = strchr (dash + 1, ',');
8605 if (comma)
8606 *comma = '\0';
8607
8608 first = decode_reg_name (str);
8609 if (first < 0)
8610 {
8611 warning (0, "unknown register name: %s", str);
8612 return;
8613 }
8614
8615 last = decode_reg_name (dash + 1);
8616 if (last < 0)
8617 {
8618 warning (0, "unknown register name: %s", dash + 1);
8619 return;
8620 }
8621
8622 *dash = '-';
8623
8624 if (first > last)
8625 {
8626 warning (0, "%s-%s is an empty range", str, dash + 1);
8627 return;
8628 }
8629
8630 for (i = first; i <= last; ++i)
8631 fixed_regs[i] = call_used_regs[i] = 1;
8632
8633 if (!comma)
8634 break;
8635
8636 *comma = ',';
8637 str = comma + 1;
8638 }
8639 }
8640 \f
8641 /* Insert any deferred function attributes from earlier pragmas. */
8642 static void
8643 sh_insert_attributes (tree node, tree *attributes)
8644 {
8645 tree attrs;
8646
8647 if (TREE_CODE (node) != FUNCTION_DECL)
8648 return;
8649
8650 /* We are only interested in fields. */
8651 if (!DECL_P (node))
8652 return;
8653
8654 /* Append the attributes to the deferred attributes. */
8655 *sh_deferred_function_attributes_tail = *attributes;
8656 attrs = sh_deferred_function_attributes;
8657 if (!attrs)
8658 return;
8659
8660 /* Some attributes imply or require the interrupt attribute. */
8661 if (!lookup_attribute ("interrupt_handler", attrs)
8662 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8663 {
8664 /* If we have a trapa_handler, but no interrupt_handler attribute,
8665 insert an interrupt_handler attribute. */
8666 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8667 /* We can't use sh_pr_interrupt here because that's not in the
8668 java frontend. */
8669 attrs
8670 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8671 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8672 if the interrupt attribute is missing, we ignore the attribute
8673 and warn. */
8674 else if (lookup_attribute ("sp_switch", attrs)
8675 || lookup_attribute ("trap_exit", attrs)
8676 || lookup_attribute ("nosave_low_regs", attrs)
8677 || lookup_attribute ("resbank", attrs))
8678 {
8679 tree *tail;
8680
8681 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8682 {
8683 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8684 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8685 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8686 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8687 warning (OPT_Wattributes,
8688 "%qE attribute only applies to interrupt functions",
8689 TREE_PURPOSE (attrs));
8690 else
8691 {
8692 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8693 NULL_TREE);
8694 tail = &TREE_CHAIN (*tail);
8695 }
8696 }
8697 attrs = *attributes;
8698 }
8699 }
8700
8701 /* Install the processed list. */
8702 *attributes = attrs;
8703
8704 /* Clear deferred attributes. */
8705 sh_deferred_function_attributes = NULL_TREE;
8706 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8707
8708 return;
8709 }
8710
8711 /* Supported attributes:
8712
8713 interrupt_handler -- specifies this function is an interrupt handler.
8714
8715 trapa_handler - like above, but don't save all registers.
8716
8717 sp_switch -- specifies an alternate stack for an interrupt handler
8718 to run on.
8719
8720 trap_exit -- use a trapa to exit an interrupt function instead of
8721 an rte instruction.
8722
8723 nosave_low_regs - don't save r0..r7 in an interrupt handler.
8724 This is useful on the SH3 and upwards,
8725 which has a separate set of low regs for User and Supervisor modes.
8726 This should only be used for the lowest level of interrupts. Higher levels
8727 of interrupts must save the registers in case they themselves are
8728 interrupted.
8729
8730 renesas -- use Renesas calling/layout conventions (functions and
8731 structures).
8732
8733 resbank -- In case of an ISR, use a register bank to save registers
8734 R0-R14, MACH, MACL, GBR and PR. This is useful only on SH2A targets.
8735 */
8736
8737 /* Handle a 'resbank' attribute. */
8738 static tree
8739 sh_handle_resbank_handler_attribute (tree * node, tree name,
8740 tree args ATTRIBUTE_UNUSED,
8741 int flags ATTRIBUTE_UNUSED,
8742 bool * no_add_attrs)
8743 {
8744 if (!TARGET_SH2A)
8745 {
8746 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
8747 name);
8748 *no_add_attrs = true;
8749 }
8750 if (TREE_CODE (*node) != FUNCTION_DECL)
8751 {
8752 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8753 name);
8754 *no_add_attrs = true;
8755 }
8756
8757 return NULL_TREE;
8758 }
8759
8760 /* Handle an "interrupt_handler" attribute; arguments as in
8761 struct attribute_spec.handler. */
8762 static tree
8763 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8764 tree args ATTRIBUTE_UNUSED,
8765 int flags ATTRIBUTE_UNUSED,
8766 bool *no_add_attrs)
8767 {
8768 if (TREE_CODE (*node) != FUNCTION_DECL)
8769 {
8770 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8771 name);
8772 *no_add_attrs = true;
8773 }
8774 else if (TARGET_SHCOMPACT)
8775 {
8776 error ("attribute interrupt_handler is not compatible with -m5-compact");
8777 *no_add_attrs = true;
8778 }
8779
8780 return NULL_TREE;
8781 }
8782
8783 /* Handle an 'function_vector' attribute; arguments as in
8784 struct attribute_spec.handler. */
8785 static tree
8786 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
8787 tree args ATTRIBUTE_UNUSED,
8788 int flags ATTRIBUTE_UNUSED,
8789 bool * no_add_attrs)
8790 {
8791 if (!TARGET_SH2A)
8792 {
8793 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
8794 name);
8795 *no_add_attrs = true;
8796 }
8797 else if (TREE_CODE (*node) != FUNCTION_DECL)
8798 {
8799 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8800 name);
8801 *no_add_attrs = true;
8802 }
8803 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8804 {
8805 /* The argument must be a constant integer. */
8806 warning (OPT_Wattributes,
8807 "%qE attribute argument not an integer constant",
8808 name);
8809 *no_add_attrs = true;
8810 }
8811 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
8812 {
8813 /* The argument value must be between 0 to 255. */
8814 warning (OPT_Wattributes,
8815 "%qE attribute argument should be between 0 to 255",
8816 name);
8817 *no_add_attrs = true;
8818 }
8819 return NULL_TREE;
8820 }
8821
8822 /* Returns 1 if current function has been assigned the attribute
8823 'function_vector'. */
8824 int
8825 sh2a_is_function_vector_call (rtx x)
8826 {
8827 if (GET_CODE (x) == SYMBOL_REF
8828 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8829 {
8830 tree tr = SYMBOL_REF_DECL (x);
8831
8832 if (sh2a_function_vector_p (tr))
8833 return 1;
8834 }
8835
8836 return 0;
8837 }
8838
8839 /* Returns the function vector number, if the the attribute
8840 'function_vector' is assigned, otherwise returns zero. */
8841 int
8842 sh2a_get_function_vector_number (rtx x)
8843 {
8844 int num;
8845 tree list, t;
8846
8847 if ((GET_CODE (x) == SYMBOL_REF)
8848 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8849 {
8850 t = SYMBOL_REF_DECL (x);
8851
8852 if (TREE_CODE (t) != FUNCTION_DECL)
8853 return 0;
8854
8855 list = SH_ATTRIBUTES (t);
8856 while (list)
8857 {
8858 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8859 {
8860 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
8861 return num;
8862 }
8863
8864 list = TREE_CHAIN (list);
8865 }
8866
8867 return 0;
8868 }
8869 else
8870 return 0;
8871 }
8872
8873 /* Handle an "sp_switch" attribute; arguments as in
8874 struct attribute_spec.handler. */
8875 static tree
8876 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
8877 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8878 {
8879 if (TREE_CODE (*node) != FUNCTION_DECL)
8880 {
8881 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8882 name);
8883 *no_add_attrs = true;
8884 }
8885 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
8886 {
8887 /* The argument must be a constant string. */
8888 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
8889 name);
8890 *no_add_attrs = true;
8891 }
8892
8893 return NULL_TREE;
8894 }
8895
8896 /* Handle an "trap_exit" attribute; arguments as in
8897 struct attribute_spec.handler. */
8898 static tree
8899 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
8900 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8901 {
8902 if (TREE_CODE (*node) != FUNCTION_DECL)
8903 {
8904 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8905 name);
8906 *no_add_attrs = true;
8907 }
8908 /* The argument specifies a trap number to be used in a trapa instruction
8909 at function exit (instead of an rte instruction). */
8910 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8911 {
8912 /* The argument must be a constant integer. */
8913 warning (OPT_Wattributes, "%qE attribute argument not an "
8914 "integer constant", name);
8915 *no_add_attrs = true;
8916 }
8917
8918 return NULL_TREE;
8919 }
8920
8921 static tree
8922 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
8923 tree name ATTRIBUTE_UNUSED,
8924 tree args ATTRIBUTE_UNUSED,
8925 int flags ATTRIBUTE_UNUSED,
8926 bool *no_add_attrs ATTRIBUTE_UNUSED)
8927 {
8928 return NULL_TREE;
8929 }
8930
8931 /* True if __attribute__((renesas)) or -mrenesas. */
8932 int
8933 sh_attr_renesas_p (const_tree td)
8934 {
8935 if (TARGET_HITACHI)
8936 return 1;
8937 if (td == 0)
8938 return 0;
8939 if (DECL_P (td))
8940 td = TREE_TYPE (td);
8941 if (td == error_mark_node)
8942 return 0;
8943 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
8944 != NULL_TREE);
8945 }
8946
8947 /* True if __attribute__((renesas)) or -mrenesas, for the current
8948 function. */
8949 int
8950 sh_cfun_attr_renesas_p (void)
8951 {
8952 return sh_attr_renesas_p (current_function_decl);
8953 }
8954
8955 int
8956 sh_cfun_interrupt_handler_p (void)
8957 {
8958 return (lookup_attribute ("interrupt_handler",
8959 DECL_ATTRIBUTES (current_function_decl))
8960 != NULL_TREE);
8961 }
8962
8963 /* Returns 1 if FUNC has been assigned the attribute
8964 "function_vector". */
8965 int
8966 sh2a_function_vector_p (tree func)
8967 {
8968 tree list;
8969 if (TREE_CODE (func) != FUNCTION_DECL)
8970 return 0;
8971
8972 list = SH_ATTRIBUTES (func);
8973 while (list)
8974 {
8975 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8976 return 1;
8977
8978 list = TREE_CHAIN (list);
8979 }
8980 return 0;
8981 }
8982
8983 /* Returns TRUE if given tree has the "resbank" attribute. */
8984
8985 int
8986 sh_cfun_resbank_handler_p (void)
8987 {
8988 return ((lookup_attribute ("resbank",
8989 DECL_ATTRIBUTES (current_function_decl))
8990 != NULL_TREE)
8991 && (lookup_attribute ("interrupt_handler",
8992 DECL_ATTRIBUTES (current_function_decl))
8993 != NULL_TREE) && TARGET_SH2A);
8994 }
8995
8996 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
8997
8998 static const char *
8999 sh_check_pch_target_flags (int old_flags)
9000 {
9001 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
9002 | MASK_SH_E | MASK_HARD_SH4
9003 | MASK_FPU_SINGLE | MASK_SH4))
9004 return _("created and used with different architectures / ABIs");
9005 if ((old_flags ^ target_flags) & MASK_HITACHI)
9006 return _("created and used with different ABIs");
9007 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
9008 return _("created and used with different endianness");
9009 return NULL;
9010 }
9011 \f
9012 /* Predicates used by the templates. */
9013
9014 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
9015 Used only in general_movsrc_operand. */
9016
9017 int
9018 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9019 {
9020 switch (REGNO (op))
9021 {
9022 case PR_REG:
9023 case MACL_REG:
9024 case MACH_REG:
9025 return 1;
9026 }
9027 return 0;
9028 }
9029
9030 /* Nonzero if OP is a floating point value with value 0.0. */
9031
9032 int
9033 fp_zero_operand (rtx op)
9034 {
9035 REAL_VALUE_TYPE r;
9036
9037 if (GET_MODE (op) != SFmode)
9038 return 0;
9039
9040 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9041 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
9042 }
9043
9044 /* Nonzero if OP is a floating point value with value 1.0. */
9045
9046 int
9047 fp_one_operand (rtx op)
9048 {
9049 REAL_VALUE_TYPE r;
9050
9051 if (GET_MODE (op) != SFmode)
9052 return 0;
9053
9054 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9055 return REAL_VALUES_EQUAL (r, dconst1);
9056 }
9057
9058 /* In general mode switching is used. If we are
9059 compiling without -mfmovd, movsf_ie isn't taken into account for
9060 mode switching. We could check in machine_dependent_reorg for
9061 cases where we know we are in single precision mode, but there is
9062 interface to find that out during reload, so we must avoid
9063 choosing an fldi alternative during reload and thus failing to
9064 allocate a scratch register for the constant loading. */
9065 int
9066 fldi_ok (void)
9067 {
9068 return 1;
9069 }
9070
9071 int
9072 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9073 {
9074 enum rtx_code code = GET_CODE (op);
9075 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
9076 }
9077
9078 /* Return the TLS type for TLS symbols, 0 for otherwise. */
9079 enum tls_model
9080 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9081 {
9082 if (GET_CODE (op) != SYMBOL_REF)
9083 return TLS_MODEL_NONE;
9084 return SYMBOL_REF_TLS_MODEL (op);
9085 }
9086 \f
9087 /* Return the destination address of a branch. */
9088
9089 static int
9090 branch_dest (rtx branch)
9091 {
9092 rtx dest = SET_SRC (PATTERN (branch));
9093 int dest_uid;
9094
9095 if (GET_CODE (dest) == IF_THEN_ELSE)
9096 dest = XEXP (dest, 1);
9097 dest = XEXP (dest, 0);
9098 dest_uid = INSN_UID (dest);
9099 return INSN_ADDRESSES (dest_uid);
9100 }
9101 \f
9102 /* Return nonzero if REG is not used after INSN.
9103 We assume REG is a reload reg, and therefore does
9104 not live past labels. It may live past calls or jumps though. */
9105 int
9106 reg_unused_after (rtx reg, rtx insn)
9107 {
9108 enum rtx_code code;
9109 rtx set;
9110
9111 /* If the reg is set by this instruction, then it is safe for our
9112 case. Disregard the case where this is a store to memory, since
9113 we are checking a register used in the store address. */
9114 set = single_set (insn);
9115 if (set && !MEM_P (SET_DEST (set))
9116 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9117 return 1;
9118
9119 while ((insn = NEXT_INSN (insn)))
9120 {
9121 rtx set;
9122 if (!INSN_P (insn))
9123 continue;
9124
9125 code = GET_CODE (insn);
9126
9127 #if 0
9128 /* If this is a label that existed before reload, then the register
9129 if dead here. However, if this is a label added by reorg, then
9130 the register may still be live here. We can't tell the difference,
9131 so we just ignore labels completely. */
9132 if (code == CODE_LABEL)
9133 return 1;
9134 /* else */
9135 #endif
9136
9137 if (code == JUMP_INSN)
9138 return 0;
9139
9140 /* If this is a sequence, we must handle them all at once.
9141 We could have for instance a call that sets the target register,
9142 and an insn in a delay slot that uses the register. In this case,
9143 we must return 0. */
9144 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
9145 {
9146 int i;
9147 int retval = 0;
9148
9149 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
9150 {
9151 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
9152 rtx set = single_set (this_insn);
9153
9154 if (CALL_P (this_insn))
9155 code = CALL_INSN;
9156 else if (JUMP_P (this_insn))
9157 {
9158 if (INSN_ANNULLED_BRANCH_P (this_insn))
9159 return 0;
9160 code = JUMP_INSN;
9161 }
9162
9163 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9164 return 0;
9165 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9166 {
9167 if (!MEM_P (SET_DEST (set)))
9168 retval = 1;
9169 else
9170 return 0;
9171 }
9172 if (set == 0
9173 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
9174 return 0;
9175 }
9176 if (retval == 1)
9177 return 1;
9178 else if (code == JUMP_INSN)
9179 return 0;
9180 }
9181
9182 set = single_set (insn);
9183 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9184 return 0;
9185 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9186 return !MEM_P (SET_DEST (set));
9187 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
9188 return 0;
9189
9190 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
9191 return 1;
9192 }
9193 return 1;
9194 }
9195 \f
9196 #include "ggc.h"
9197
9198 static GTY(()) rtx fpscr_rtx;
9199 rtx
9200 get_fpscr_rtx (void)
9201 {
9202 if (! fpscr_rtx)
9203 {
9204 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
9205 REG_USERVAR_P (fpscr_rtx) = 1;
9206 mark_user_reg (fpscr_rtx);
9207 }
9208 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
9209 mark_user_reg (fpscr_rtx);
9210 return fpscr_rtx;
9211 }
9212
9213 static GTY(()) tree fpscr_values;
9214
9215 static void
9216 emit_fpu_switch (rtx scratch, int index)
9217 {
9218 rtx dst, src;
9219
9220 if (fpscr_values == NULL)
9221 {
9222 tree t;
9223
9224 t = build_index_type (integer_one_node);
9225 t = build_array_type (integer_type_node, t);
9226 t = build_decl (BUILTINS_LOCATION,
9227 VAR_DECL, get_identifier ("__fpscr_values"), t);
9228 DECL_ARTIFICIAL (t) = 1;
9229 DECL_IGNORED_P (t) = 1;
9230 DECL_EXTERNAL (t) = 1;
9231 TREE_STATIC (t) = 1;
9232 TREE_PUBLIC (t) = 1;
9233 TREE_USED (t) = 1;
9234
9235 fpscr_values = t;
9236 }
9237
9238 src = DECL_RTL (fpscr_values);
9239 if (!can_create_pseudo_p ())
9240 {
9241 emit_move_insn (scratch, XEXP (src, 0));
9242 if (index != 0)
9243 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
9244 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
9245 }
9246 else
9247 src = adjust_address (src, PSImode, index * 4);
9248
9249 dst = get_fpscr_rtx ();
9250 emit_move_insn (dst, src);
9251 }
9252
9253 void
9254 emit_sf_insn (rtx pat)
9255 {
9256 emit_insn (pat);
9257 }
9258
9259 void
9260 emit_df_insn (rtx pat)
9261 {
9262 emit_insn (pat);
9263 }
9264
9265 void
9266 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9267 {
9268 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9269 }
9270
9271 void
9272 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9273 {
9274 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
9275 get_fpscr_rtx ()));
9276 }
9277
9278 void
9279 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9280 {
9281 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9282 }
9283
9284 void
9285 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9286 {
9287 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
9288 get_fpscr_rtx ()));
9289 }
9290 \f
9291 static rtx get_free_reg (HARD_REG_SET);
9292
9293 /* This function returns a register to use to load the address to load
9294 the fpscr from. Currently it always returns r1 or r7, but when we are
9295 able to use pseudo registers after combine, or have a better mechanism
9296 for choosing a register, it should be done here. */
9297 /* REGS_LIVE is the liveness information for the point for which we
9298 need this allocation. In some bare-bones exit blocks, r1 is live at the
9299 start. We can even have all of r0..r3 being live:
9300 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
9301 INSN before which new insns are placed with will clobber the register
9302 we return. If a basic block consists only of setting the return value
9303 register to a pseudo and using that register, the return value is not
9304 live before or after this block, yet we we'll insert our insns right in
9305 the middle. */
9306
9307 static rtx
9308 get_free_reg (HARD_REG_SET regs_live)
9309 {
9310 if (! TEST_HARD_REG_BIT (regs_live, 1))
9311 return gen_rtx_REG (Pmode, 1);
9312
9313 /* Hard reg 1 is live; since this is a small register classes target,
9314 there shouldn't be anything but a jump before the function end. */
9315 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
9316 return gen_rtx_REG (Pmode, 7);
9317 }
9318
9319 /* This function will set the fpscr from memory.
9320 MODE is the mode we are setting it to. */
9321 void
9322 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
9323 {
9324 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
9325 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
9326 rtx addr_reg;
9327
9328 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
9329 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
9330 }
9331
9332 /* Is the given character a logical line separator for the assembler? */
9333 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
9334 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
9335 #endif
9336
9337 int
9338 sh_insn_length_adjustment (rtx insn)
9339 {
9340 /* Instructions with unfilled delay slots take up an extra two bytes for
9341 the nop in the delay slot. */
9342 if (((NONJUMP_INSN_P (insn)
9343 && GET_CODE (PATTERN (insn)) != USE
9344 && GET_CODE (PATTERN (insn)) != CLOBBER)
9345 || CALL_P (insn)
9346 || (JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)))
9347 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
9348 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
9349 return 2;
9350
9351 /* SH2e has a bug that prevents the use of annulled branches, so if
9352 the delay slot is not filled, we'll have to put a NOP in it. */
9353 if (sh_cpu_attr == CPU_SH2E
9354 && JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)
9355 && get_attr_type (insn) == TYPE_CBRANCH
9356 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
9357 return 2;
9358
9359 /* sh-dsp parallel processing insn take four bytes instead of two. */
9360
9361 if (NONJUMP_INSN_P (insn))
9362 {
9363 int sum = 0;
9364 rtx body = PATTERN (insn);
9365 const char *templ;
9366 char c;
9367 int maybe_label = 1;
9368
9369 if (GET_CODE (body) == ASM_INPUT)
9370 templ = XSTR (body, 0);
9371 else if (asm_noperands (body) >= 0)
9372 templ
9373 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
9374 else
9375 return 0;
9376 do
9377 {
9378 int ppi_adjust = 0;
9379
9380 do
9381 c = *templ++;
9382 while (c == ' ' || c == '\t');
9383 /* all sh-dsp parallel-processing insns start with p.
9384 The only non-ppi sh insn starting with p is pref.
9385 The only ppi starting with pr is prnd. */
9386 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
9387 ppi_adjust = 2;
9388 /* The repeat pseudo-insn expands two three insns, a total of
9389 six bytes in size. */
9390 else if ((c == 'r' || c == 'R')
9391 && ! strncasecmp ("epeat", templ, 5))
9392 ppi_adjust = 4;
9393 while (c && c != '\n'
9394 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
9395 {
9396 /* If this is a label, it is obviously not a ppi insn. */
9397 if (c == ':' && maybe_label)
9398 {
9399 ppi_adjust = 0;
9400 break;
9401 }
9402 else if (c == '\'' || c == '"')
9403 maybe_label = 0;
9404 c = *templ++;
9405 }
9406 sum += ppi_adjust;
9407 maybe_label = c != ':';
9408 }
9409 while (c);
9410 return sum;
9411 }
9412 return 0;
9413 }
9414 \f
9415 /* Return TRUE for a valid displacement for the REG+disp addressing
9416 with MODE. */
9417
9418 /* ??? The SH2e does not have the REG+disp addressing mode when loading values
9419 into the FRx registers. We implement this by setting the maximum offset
9420 to zero when the value is SFmode. This also restricts loading of SFmode
9421 values into the integer registers, but that can't be helped. */
9422
9423 /* The SH allows a displacement in a QI or HI amode, but only when the
9424 other operand is R0. GCC doesn't handle this very well, so we forgot
9425 all of that.
9426
9427 A legitimate index for a QI or HI is 0, SI can be any number 0..63,
9428 DI can be any number 0..60. */
9429
9430 bool
9431 sh_legitimate_index_p (enum machine_mode mode, rtx op)
9432 {
9433 if (CONST_INT_P (op))
9434 {
9435 if (TARGET_SHMEDIA)
9436 {
9437 int size;
9438
9439 /* Check if this the address of an unaligned load / store. */
9440 if (mode == VOIDmode)
9441 return CONST_OK_FOR_I06 (INTVAL (op));
9442
9443 size = GET_MODE_SIZE (mode);
9444 return (!(INTVAL (op) & (size - 1))
9445 && INTVAL (op) >= -512 * size
9446 && INTVAL (op) < 512 * size);
9447 }
9448
9449 if (TARGET_SH2A)
9450 {
9451 if (GET_MODE_SIZE (mode) == 1
9452 && (unsigned) INTVAL (op) < 4096)
9453 return true;
9454 }
9455
9456 if ((GET_MODE_SIZE (mode) == 4
9457 && (unsigned) INTVAL (op) < 64
9458 && !(INTVAL (op) & 3)
9459 && !(TARGET_SH2E && mode == SFmode))
9460 || (GET_MODE_SIZE (mode) == 4
9461 && (unsigned) INTVAL (op) < 16383
9462 && !(INTVAL (op) & 3) && TARGET_SH2A))
9463 return true;
9464
9465 if ((GET_MODE_SIZE (mode) == 8
9466 && (unsigned) INTVAL (op) < 60
9467 && !(INTVAL (op) & 3)
9468 && !((TARGET_SH4 || TARGET_SH2A) && mode == DFmode))
9469 || ((GET_MODE_SIZE (mode)==8)
9470 && (unsigned) INTVAL (op) < 8192
9471 && !(INTVAL (op) & (TARGET_SH2A_DOUBLE ? 7 : 3))
9472 && (TARGET_SH2A && mode == DFmode)))
9473 return true;
9474 }
9475
9476 return false;
9477 }
9478
9479 /* Recognize an RTL expression that is a valid memory address for
9480 an instruction.
9481 The MODE argument is the machine mode for the MEM expression
9482 that wants to use this address.
9483 Allow REG
9484 REG+disp
9485 REG+r0
9486 REG++
9487 --REG */
9488
9489 static bool
9490 sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
9491 {
9492 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
9493 return true;
9494 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
9495 && ! TARGET_SHMEDIA
9496 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
9497 return true;
9498 else if (GET_CODE (x) == PLUS
9499 && (mode != PSImode || reload_completed))
9500 {
9501 rtx xop0 = XEXP (x, 0);
9502 rtx xop1 = XEXP (x, 1);
9503
9504 if (GET_MODE_SIZE (mode) <= 8
9505 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
9506 && sh_legitimate_index_p (mode, xop1))
9507 return true;
9508
9509 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
9510 || ((xop0 == stack_pointer_rtx
9511 || xop0 == hard_frame_pointer_rtx)
9512 && REG_P (xop1) && REGNO (xop1) == R0_REG)
9513 || ((xop1 == stack_pointer_rtx
9514 || xop1 == hard_frame_pointer_rtx)
9515 && REG_P (xop0) && REGNO (xop0) == R0_REG))
9516 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
9517 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
9518 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
9519 && TARGET_FMOVD && mode == DFmode)))
9520 {
9521 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
9522 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
9523 return true;
9524 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
9525 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
9526 return true;
9527 }
9528 }
9529
9530 return false;
9531 }
9532 \f
9533 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
9534 isn't protected by a PIC unspec. */
9535 int
9536 nonpic_symbol_mentioned_p (rtx x)
9537 {
9538 register const char *fmt;
9539 register int i;
9540
9541 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
9542 || GET_CODE (x) == PC)
9543 return 1;
9544
9545 /* We don't want to look into the possible MEM location of a
9546 CONST_DOUBLE, since we're not going to use it, in general. */
9547 if (GET_CODE (x) == CONST_DOUBLE)
9548 return 0;
9549
9550 if (GET_CODE (x) == UNSPEC
9551 && (XINT (x, 1) == UNSPEC_PIC
9552 || XINT (x, 1) == UNSPEC_GOT
9553 || XINT (x, 1) == UNSPEC_GOTOFF
9554 || XINT (x, 1) == UNSPEC_GOTPLT
9555 || XINT (x, 1) == UNSPEC_GOTTPOFF
9556 || XINT (x, 1) == UNSPEC_DTPOFF
9557 || XINT (x, 1) == UNSPEC_TPOFF
9558 || XINT (x, 1) == UNSPEC_PLT
9559 || XINT (x, 1) == UNSPEC_SYMOFF
9560 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
9561 return 0;
9562
9563 fmt = GET_RTX_FORMAT (GET_CODE (x));
9564 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9565 {
9566 if (fmt[i] == 'E')
9567 {
9568 register int j;
9569
9570 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9571 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
9572 return 1;
9573 }
9574 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
9575 return 1;
9576 }
9577
9578 return 0;
9579 }
9580
9581 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
9582 @GOTOFF in `reg'. */
9583 rtx
9584 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
9585 rtx reg)
9586 {
9587 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
9588 return orig;
9589
9590 if (GET_CODE (orig) == LABEL_REF
9591 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
9592 {
9593 if (reg == 0)
9594 reg = gen_reg_rtx (Pmode);
9595
9596 emit_insn (gen_symGOTOFF2reg (reg, orig));
9597 return reg;
9598 }
9599 else if (GET_CODE (orig) == SYMBOL_REF)
9600 {
9601 if (reg == 0)
9602 reg = gen_reg_rtx (Pmode);
9603
9604 emit_insn (gen_symGOT2reg (reg, orig));
9605 return reg;
9606 }
9607 return orig;
9608 }
9609
9610 /* Try machine-dependent ways of modifying an illegitimate address
9611 to be legitimate. If we find one, return the new, valid address.
9612 Otherwise, return X.
9613
9614 For the SH, if X is almost suitable for indexing, but the offset is
9615 out of range, convert it into a normal form so that CSE has a chance
9616 of reducing the number of address registers used. */
9617
9618 static rtx
9619 sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
9620 {
9621 if (flag_pic)
9622 x = legitimize_pic_address (oldx, mode, NULL_RTX);
9623
9624 if (GET_CODE (x) == PLUS
9625 && (GET_MODE_SIZE (mode) == 4
9626 || GET_MODE_SIZE (mode) == 8)
9627 && CONST_INT_P (XEXP (x, 1))
9628 && BASE_REGISTER_RTX_P (XEXP (x, 0))
9629 && ! TARGET_SHMEDIA
9630 && ! ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
9631 && ! (TARGET_SH2E && mode == SFmode))
9632 {
9633 rtx index_rtx = XEXP (x, 1);
9634 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9635 rtx sum;
9636
9637 /* On rare occasions, we might get an unaligned pointer
9638 that is indexed in a way to give an aligned address.
9639 Therefore, keep the lower two bits in offset_base. */
9640 /* Instead of offset_base 128..131 use 124..127, so that
9641 simple add suffices. */
9642 if (offset > 127)
9643 offset_base = ((offset + 4) & ~60) - 4;
9644 else
9645 offset_base = offset & ~60;
9646
9647 /* Sometimes the normal form does not suit DImode. We
9648 could avoid that by using smaller ranges, but that
9649 would give less optimized code when SImode is
9650 prevalent. */
9651 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9652 {
9653 sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
9654 GEN_INT (offset_base), NULL_RTX, 0,
9655 OPTAB_LIB_WIDEN);
9656
9657 return gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9658 }
9659 }
9660
9661 return x;
9662 }
9663
9664 /* Attempt to replace *P, which is an address that needs reloading, with
9665 a valid memory address for an operand of mode MODE.
9666 Like for sh_legitimize_address, for the SH we try to get a normal form
9667 of the address. That will allow inheritance of the address reloads. */
9668
9669 bool
9670 sh_legitimize_reload_address (rtx *p, enum machine_mode mode, int opnum,
9671 int itype)
9672 {
9673 enum reload_type type = (enum reload_type) itype;
9674
9675 if (GET_CODE (*p) == PLUS
9676 && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
9677 && CONST_INT_P (XEXP (*p, 1))
9678 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true)
9679 && ! TARGET_SHMEDIA
9680 && ! (TARGET_SH4 && mode == DFmode)
9681 && ! (mode == PSImode && type == RELOAD_FOR_INPUT_ADDRESS)
9682 && (ALLOW_INDEXED_ADDRESS
9683 || XEXP (*p, 0) == stack_pointer_rtx
9684 || XEXP (*p, 0) == hard_frame_pointer_rtx))
9685 {
9686 rtx index_rtx = XEXP (*p, 1);
9687 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9688 rtx sum;
9689
9690 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
9691 {
9692 push_reload (*p, NULL_RTX, p, NULL,
9693 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9694 goto win;
9695 }
9696 if (TARGET_SH2E && mode == SFmode)
9697 {
9698 *p = copy_rtx (*p);
9699 push_reload (*p, NULL_RTX, p, NULL,
9700 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9701 goto win;
9702 }
9703 /* Instead of offset_base 128..131 use 124..127, so that
9704 simple add suffices. */
9705 if (offset > 127)
9706 offset_base = ((offset + 4) & ~60) - 4;
9707 else
9708 offset_base = offset & ~60;
9709 /* Sometimes the normal form does not suit DImode. We could avoid
9710 that by using smaller ranges, but that would give less optimized
9711 code when SImode is prevalent. */
9712 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9713 {
9714 sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), GEN_INT (offset_base));
9715 *p = gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9716 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
9717 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9718 goto win;
9719 }
9720 }
9721 /* We must re-recognize what we created before. */
9722 else if (GET_CODE (*p) == PLUS
9723 && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
9724 && GET_CODE (XEXP (*p, 0)) == PLUS
9725 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
9726 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
9727 && CONST_INT_P (XEXP (*p, 1))
9728 && ! TARGET_SHMEDIA
9729 && ! (TARGET_SH2E && mode == SFmode))
9730 {
9731 /* Because this address is so complex, we know it must have
9732 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
9733 it is already unshared, and needs no further unsharing. */
9734 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
9735 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9736 goto win;
9737 }
9738
9739 return false;
9740
9741 win:
9742 return true;
9743 }
9744
9745 /* Mark the use of a constant in the literal table. If the constant
9746 has multiple labels, make it unique. */
9747 static rtx
9748 mark_constant_pool_use (rtx x)
9749 {
9750 rtx insn, lab, pattern;
9751
9752 if (x == NULL)
9753 return x;
9754
9755 switch (GET_CODE (x))
9756 {
9757 case LABEL_REF:
9758 x = XEXP (x, 0);
9759 case CODE_LABEL:
9760 break;
9761 default:
9762 return x;
9763 }
9764
9765 /* Get the first label in the list of labels for the same constant
9766 and delete another labels in the list. */
9767 lab = x;
9768 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
9769 {
9770 if (!LABEL_P (insn)
9771 || LABEL_REFS (insn) != NEXT_INSN (insn))
9772 break;
9773 lab = insn;
9774 }
9775
9776 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
9777 INSN_DELETED_P (insn) = 1;
9778
9779 /* Mark constants in a window. */
9780 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
9781 {
9782 if (!NONJUMP_INSN_P (insn))
9783 continue;
9784
9785 pattern = PATTERN (insn);
9786 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
9787 continue;
9788
9789 switch (XINT (pattern, 1))
9790 {
9791 case UNSPECV_CONST2:
9792 case UNSPECV_CONST4:
9793 case UNSPECV_CONST8:
9794 XVECEXP (pattern, 0, 1) = const1_rtx;
9795 break;
9796 case UNSPECV_WINDOW_END:
9797 if (XVECEXP (pattern, 0, 0) == x)
9798 return lab;
9799 break;
9800 case UNSPECV_CONST_END:
9801 return lab;
9802 default:
9803 break;
9804 }
9805 }
9806
9807 return lab;
9808 }
9809 \f
9810 /* Return true if it's possible to redirect BRANCH1 to the destination
9811 of an unconditional jump BRANCH2. We only want to do this if the
9812 resulting branch will have a short displacement. */
9813 int
9814 sh_can_redirect_branch (rtx branch1, rtx branch2)
9815 {
9816 if (flag_expensive_optimizations && simplejump_p (branch2))
9817 {
9818 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
9819 rtx insn;
9820 int distance;
9821
9822 for (distance = 0, insn = NEXT_INSN (branch1);
9823 insn && distance < 256;
9824 insn = PREV_INSN (insn))
9825 {
9826 if (insn == dest)
9827 return 1;
9828 else
9829 distance += get_attr_length (insn);
9830 }
9831 for (distance = 0, insn = NEXT_INSN (branch1);
9832 insn && distance < 256;
9833 insn = NEXT_INSN (insn))
9834 {
9835 if (insn == dest)
9836 return 1;
9837 else
9838 distance += get_attr_length (insn);
9839 }
9840 }
9841 return 0;
9842 }
9843
9844 /* Return nonzero if register old_reg can be renamed to register new_reg. */
9845 int
9846 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
9847 unsigned int new_reg)
9848 {
9849 /* Interrupt functions can only use registers that have already been
9850 saved by the prologue, even if they would normally be
9851 call-clobbered. */
9852
9853 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
9854 return 0;
9855
9856 return 1;
9857 }
9858
9859 /* Function to update the integer COST
9860 based on the relationship between INSN that is dependent on
9861 DEP_INSN through the dependence LINK. The default is to make no
9862 adjustment to COST. This can be used for example to specify to
9863 the scheduler that an output- or anti-dependence does not incur
9864 the same cost as a data-dependence. The return value should be
9865 the new value for COST. */
9866 static int
9867 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
9868 {
9869 rtx reg, use_pat;
9870
9871 if (TARGET_SHMEDIA)
9872 {
9873 /* On SHmedia, if the dependence is an anti-dependence or
9874 output-dependence, there is no cost. */
9875 if (REG_NOTE_KIND (link) != 0)
9876 {
9877 /* However, dependencies between target register loads and
9878 uses of the register in a subsequent block that are separated
9879 by a conditional branch are not modelled - we have to do with
9880 the anti-dependency between the target register load and the
9881 conditional branch that ends the current block. */
9882 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
9883 && GET_CODE (PATTERN (dep_insn)) == SET
9884 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
9885 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
9886 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
9887 {
9888 int orig_cost = cost;
9889 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
9890 rtx target = ((! note
9891 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
9892 ? insn : JUMP_LABEL (insn));
9893 /* On the likely path, the branch costs 1, on the unlikely path,
9894 it costs 3. */
9895 cost--;
9896 do
9897 target = next_active_insn (target);
9898 while (target && ! flow_dependent_p (target, dep_insn)
9899 && --cost > 0);
9900 /* If two branches are executed in immediate succession, with the
9901 first branch properly predicted, this causes a stall at the
9902 second branch, hence we won't need the target for the
9903 second branch for two cycles after the launch of the first
9904 branch. */
9905 if (cost > orig_cost - 2)
9906 cost = orig_cost - 2;
9907 }
9908 else
9909 cost = 0;
9910 }
9911
9912 else if (get_attr_is_mac_media (insn)
9913 && get_attr_is_mac_media (dep_insn))
9914 cost = 1;
9915
9916 else if (! reload_completed
9917 && GET_CODE (PATTERN (insn)) == SET
9918 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
9919 && GET_CODE (PATTERN (dep_insn)) == SET
9920 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
9921 && cost < 4)
9922 cost = 4;
9923 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
9924 that is needed at the target. */
9925 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
9926 && ! flow_dependent_p (insn, dep_insn))
9927 cost--;
9928 }
9929 else if (REG_NOTE_KIND (link) == 0)
9930 {
9931 enum attr_type type;
9932 rtx dep_set;
9933
9934 if (recog_memoized (insn) < 0
9935 || recog_memoized (dep_insn) < 0)
9936 return cost;
9937
9938 dep_set = single_set (dep_insn);
9939
9940 /* The latency that we specify in the scheduling description refers
9941 to the actual output, not to an auto-increment register; for that,
9942 the latency is one. */
9943 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
9944 {
9945 rtx set = single_set (insn);
9946
9947 if (set
9948 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
9949 && (!MEM_P (SET_DEST (set))
9950 || !reg_mentioned_p (SET_DEST (dep_set),
9951 XEXP (SET_DEST (set), 0))))
9952 cost = 1;
9953 }
9954 /* The only input for a call that is timing-critical is the
9955 function's address. */
9956 if (CALL_P (insn))
9957 {
9958 rtx call = PATTERN (insn);
9959
9960 if (GET_CODE (call) == PARALLEL)
9961 call = XVECEXP (call, 0 ,0);
9962 if (GET_CODE (call) == SET)
9963 call = SET_SRC (call);
9964 if (GET_CODE (call) == CALL && MEM_P (XEXP (call, 0))
9965 /* sibcalli_thunk uses a symbol_ref in an unspec. */
9966 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
9967 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
9968 cost -= TARGET_SH4_300 ? 3 : 6;
9969 }
9970 /* Likewise, the most timing critical input for an sfuncs call
9971 is the function address. However, sfuncs typically start
9972 using their arguments pretty quickly.
9973 Assume a four cycle delay for SH4 before they are needed.
9974 Cached ST40-300 calls are quicker, so assume only a one
9975 cycle delay there.
9976 ??? Maybe we should encode the delays till input registers
9977 are needed by sfuncs into the sfunc call insn. */
9978 /* All sfunc calls are parallels with at least four components.
9979 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
9980 else if (GET_CODE (PATTERN (insn)) == PARALLEL
9981 && XVECLEN (PATTERN (insn), 0) >= 4
9982 && (reg = sfunc_uses_reg (insn)))
9983 {
9984 if (! reg_set_p (reg, dep_insn))
9985 cost -= TARGET_SH4_300 ? 1 : 4;
9986 }
9987 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
9988 {
9989 enum attr_type dep_type = get_attr_type (dep_insn);
9990
9991 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
9992 cost--;
9993 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
9994 && (type = get_attr_type (insn)) != TYPE_CALL
9995 && type != TYPE_SFUNC)
9996 cost--;
9997 /* When the preceding instruction loads the shift amount of
9998 the following SHAD/SHLD, the latency of the load is increased
9999 by 1 cycle. */
10000 if (get_attr_type (insn) == TYPE_DYN_SHIFT
10001 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
10002 && reg_overlap_mentioned_p (SET_DEST (dep_set),
10003 XEXP (SET_SRC (single_set (insn)),
10004 1)))
10005 cost++;
10006 /* When an LS group instruction with a latency of less than
10007 3 cycles is followed by a double-precision floating-point
10008 instruction, FIPR, or FTRV, the latency of the first
10009 instruction is increased to 3 cycles. */
10010 else if (cost < 3
10011 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
10012 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
10013 cost = 3;
10014 /* The lsw register of a double-precision computation is ready one
10015 cycle earlier. */
10016 else if (reload_completed
10017 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
10018 && (use_pat = single_set (insn))
10019 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
10020 SET_SRC (use_pat)))
10021 cost -= 1;
10022
10023 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
10024 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
10025 cost -= 1;
10026 }
10027 else if (TARGET_SH4_300)
10028 {
10029 /* Stores need their input register two cycles later. */
10030 if (dep_set && cost >= 1
10031 && ((type = get_attr_type (insn)) == TYPE_STORE
10032 || type == TYPE_PSTORE
10033 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
10034 {
10035 rtx set = single_set (insn);
10036
10037 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
10038 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
10039 {
10040 cost -= 2;
10041 /* But don't reduce the cost below 1 if the address depends
10042 on a side effect of dep_insn. */
10043 if (cost < 1
10044 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
10045 cost = 1;
10046 }
10047 }
10048 }
10049 }
10050 /* An anti-dependence penalty of two applies if the first insn is a double
10051 precision fadd / fsub / fmul. */
10052 else if (!TARGET_SH4_300
10053 && REG_NOTE_KIND (link) == REG_DEP_ANTI
10054 && recog_memoized (dep_insn) >= 0
10055 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
10056 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
10057 /* A lot of alleged anti-flow dependences are fake,
10058 so check this one is real. */
10059 && flow_dependent_p (dep_insn, insn))
10060 cost = 2;
10061
10062 return cost;
10063 }
10064
10065 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
10066 if DEP_INSN is anti-flow dependent on INSN. */
10067 static int
10068 flow_dependent_p (rtx insn, rtx dep_insn)
10069 {
10070 rtx tmp = PATTERN (insn);
10071
10072 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
10073 return tmp == NULL_RTX;
10074 }
10075
10076 /* A helper function for flow_dependent_p called through note_stores. */
10077 static void
10078 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
10079 {
10080 rtx * pinsn = (rtx *) data;
10081
10082 if (*pinsn && reg_referenced_p (x, *pinsn))
10083 *pinsn = NULL_RTX;
10084 }
10085
10086 /* For use by sh_allocate_initial_value. Note that sh.md contains some
10087 'special function' patterns (type sfunc) that clobber pr, but that
10088 do not look like function calls to leaf_function_p. Hence we must
10089 do this extra check. */
10090 static int
10091 sh_pr_n_sets (void)
10092 {
10093 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10094 }
10095
10096 /* Return where to allocate pseudo for a given hard register initial
10097 value. */
10098 static rtx
10099 sh_allocate_initial_value (rtx hard_reg)
10100 {
10101 rtx x;
10102
10103 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
10104 {
10105 if (current_function_is_leaf
10106 && ! sh_pr_n_sets ()
10107 && ! (TARGET_SHCOMPACT
10108 && ((crtl->args.info.call_cookie
10109 & ~ CALL_COOKIE_RET_TRAMP (1))
10110 || crtl->saves_all_registers)))
10111 x = hard_reg;
10112 else
10113 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
10114 }
10115 else
10116 x = NULL_RTX;
10117
10118 return x;
10119 }
10120
10121 /* This function returns "2" to indicate dual issue for the SH4
10122 processor. To be used by the DFA pipeline description. */
10123 static int
10124 sh_issue_rate (void)
10125 {
10126 if (TARGET_SUPERSCALAR)
10127 return 2;
10128 else
10129 return 1;
10130 }
10131
10132 /* Functions for ready queue reordering for sched1. */
10133
10134 /* Get weight for mode for a set x. */
10135 static short
10136 find_set_regmode_weight (rtx x, enum machine_mode mode)
10137 {
10138 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
10139 return 1;
10140 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
10141 {
10142 if (REG_P (SET_DEST (x)))
10143 {
10144 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
10145 return 1;
10146 else
10147 return 0;
10148 }
10149 return 1;
10150 }
10151 return 0;
10152 }
10153
10154 /* Get regmode weight for insn. */
10155 static short
10156 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
10157 {
10158 short reg_weight = 0;
10159 rtx x;
10160
10161 /* Increment weight for each register born here. */
10162 x = PATTERN (insn);
10163 reg_weight += find_set_regmode_weight (x, mode);
10164 if (GET_CODE (x) == PARALLEL)
10165 {
10166 int j;
10167 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
10168 {
10169 x = XVECEXP (PATTERN (insn), 0, j);
10170 reg_weight += find_set_regmode_weight (x, mode);
10171 }
10172 }
10173 /* Decrement weight for each register that dies here. */
10174 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
10175 {
10176 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
10177 {
10178 rtx note = XEXP (x, 0);
10179 if (REG_P (note) && GET_MODE (note) == mode)
10180 reg_weight--;
10181 }
10182 }
10183 return reg_weight;
10184 }
10185
10186 /* Calculate regmode weights for all insns of a basic block. */
10187 static void
10188 find_regmode_weight (basic_block b, enum machine_mode mode)
10189 {
10190 rtx insn, next_tail, head, tail;
10191
10192 get_ebb_head_tail (b, b, &head, &tail);
10193 next_tail = NEXT_INSN (tail);
10194
10195 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
10196 {
10197 /* Handle register life information. */
10198 if (!INSN_P (insn))
10199 continue;
10200
10201 if (mode == SFmode)
10202 INSN_REGMODE_WEIGHT (insn, mode) =
10203 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
10204 else if (mode == SImode)
10205 INSN_REGMODE_WEIGHT (insn, mode) =
10206 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
10207 }
10208 }
10209
10210 /* Comparison function for ready queue sorting. */
10211 static int
10212 rank_for_reorder (const void *x, const void *y)
10213 {
10214 rtx tmp = *(const rtx *) y;
10215 rtx tmp2 = *(const rtx *) x;
10216
10217 /* The insn in a schedule group should be issued the first. */
10218 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
10219 return SCHED_GROUP_P (tmp2) ? 1 : -1;
10220
10221 /* If insns are equally good, sort by INSN_LUID (original insn order), This
10222 minimizes instruction movement, thus minimizing sched's effect on
10223 register pressure. */
10224 return INSN_LUID (tmp) - INSN_LUID (tmp2);
10225 }
10226
10227 /* Resort the array A in which only element at index N may be out of order. */
10228 static void
10229 swap_reorder (rtx *a, int n)
10230 {
10231 rtx insn = a[n - 1];
10232 int i = n - 2;
10233
10234 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
10235 {
10236 a[i + 1] = a[i];
10237 i -= 1;
10238 }
10239 a[i + 1] = insn;
10240 }
10241
10242 #define SCHED_REORDER(READY, N_READY) \
10243 do \
10244 { \
10245 if ((N_READY) == 2) \
10246 swap_reorder (READY, N_READY); \
10247 else if ((N_READY) > 2) \
10248 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
10249 } \
10250 while (0)
10251
10252 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
10253 macro. */
10254 static void
10255 ready_reorder (rtx *ready, int nready)
10256 {
10257 SCHED_REORDER (ready, nready);
10258 }
10259
10260 /* Count life regions of r0 for a block. */
10261 static int
10262 find_r0_life_regions (basic_block b)
10263 {
10264 rtx end, insn;
10265 rtx pset;
10266 rtx r0_reg;
10267 int live;
10268 int set;
10269 int death = 0;
10270
10271 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
10272 {
10273 set = 1;
10274 live = 1;
10275 }
10276 else
10277 {
10278 set = 0;
10279 live = 0;
10280 }
10281
10282 insn = BB_HEAD (b);
10283 end = BB_END (b);
10284 r0_reg = gen_rtx_REG (SImode, R0_REG);
10285 while (1)
10286 {
10287 if (INSN_P (insn))
10288 {
10289 if (find_regno_note (insn, REG_DEAD, R0_REG))
10290 {
10291 death++;
10292 live = 0;
10293 }
10294 if (!live
10295 && (pset = single_set (insn))
10296 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
10297 && !find_regno_note (insn, REG_UNUSED, R0_REG))
10298 {
10299 set++;
10300 live = 1;
10301 }
10302 }
10303 if (insn == end)
10304 break;
10305 insn = NEXT_INSN (insn);
10306 }
10307 return set - death;
10308 }
10309
10310 /* Calculate regmode weights for all insns of all basic block. */
10311 static void
10312 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
10313 int verbose ATTRIBUTE_UNUSED,
10314 int old_max_uid)
10315 {
10316 basic_block b;
10317
10318 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
10319 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
10320 r0_life_regions = 0;
10321
10322 FOR_EACH_BB_REVERSE (b)
10323 {
10324 find_regmode_weight (b, SImode);
10325 find_regmode_weight (b, SFmode);
10326 if (!reload_completed)
10327 r0_life_regions += find_r0_life_regions (b);
10328 }
10329
10330 CURR_REGMODE_PRESSURE (SImode) = 0;
10331 CURR_REGMODE_PRESSURE (SFmode) = 0;
10332
10333 }
10334
10335 /* Cleanup. */
10336 static void
10337 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
10338 int verbose ATTRIBUTE_UNUSED)
10339 {
10340 if (regmode_weight[0])
10341 {
10342 free (regmode_weight[0]);
10343 regmode_weight[0] = NULL;
10344 }
10345 if (regmode_weight[1])
10346 {
10347 free (regmode_weight[1]);
10348 regmode_weight[1] = NULL;
10349 }
10350 }
10351
10352 /* The scalar modes supported differs from the default version in TImode
10353 for 32-bit SHMEDIA. */
10354 static bool
10355 sh_scalar_mode_supported_p (enum machine_mode mode)
10356 {
10357 if (TARGET_SHMEDIA32 && mode == TImode)
10358 return false;
10359
10360 return default_scalar_mode_supported_p (mode);
10361 }
10362
10363 /* Cache the can_issue_more so that we can return it from reorder2. Also,
10364 keep count of register pressures on SImode and SFmode. */
10365 static int
10366 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
10367 int sched_verbose ATTRIBUTE_UNUSED,
10368 rtx insn,
10369 int can_issue_more)
10370 {
10371 if (GET_CODE (PATTERN (insn)) != USE
10372 && GET_CODE (PATTERN (insn)) != CLOBBER)
10373 cached_can_issue_more = can_issue_more - 1;
10374 else
10375 cached_can_issue_more = can_issue_more;
10376
10377 if (reload_completed)
10378 return cached_can_issue_more;
10379
10380 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
10381 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
10382
10383 return cached_can_issue_more;
10384 }
10385
10386 static void
10387 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
10388 int verbose ATTRIBUTE_UNUSED,
10389 int veclen ATTRIBUTE_UNUSED)
10390 {
10391 CURR_REGMODE_PRESSURE (SImode) = 0;
10392 CURR_REGMODE_PRESSURE (SFmode) = 0;
10393 }
10394
10395 /* Some magic numbers. */
10396 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10397 functions that already have high pressure on r0. */
10398 #define R0_MAX_LIFE_REGIONS 2
10399 /* Register Pressure thresholds for SImode and SFmode registers. */
10400 #define SIMODE_MAX_WEIGHT 5
10401 #define SFMODE_MAX_WEIGHT 10
10402
10403 /* Return true if the pressure is high for MODE. */
10404 static short
10405 high_pressure (enum machine_mode mode)
10406 {
10407 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10408 functions that already have high pressure on r0. */
10409 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
10410 return 1;
10411
10412 if (mode == SFmode)
10413 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
10414 else
10415 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
10416 }
10417
10418 /* Reorder ready queue if register pressure is high. */
10419 static int
10420 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
10421 int sched_verbose ATTRIBUTE_UNUSED,
10422 rtx *ready,
10423 int *n_readyp,
10424 int clock_var ATTRIBUTE_UNUSED)
10425 {
10426 if (reload_completed)
10427 return sh_issue_rate ();
10428
10429 if (high_pressure (SFmode) || high_pressure (SImode))
10430 {
10431 ready_reorder (ready, *n_readyp);
10432 }
10433
10434 return sh_issue_rate ();
10435 }
10436
10437 /* Skip cycles if the current register pressure is high. */
10438 static int
10439 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
10440 int sched_verbose ATTRIBUTE_UNUSED,
10441 rtx *ready ATTRIBUTE_UNUSED,
10442 int *n_readyp ATTRIBUTE_UNUSED,
10443 int clock_var ATTRIBUTE_UNUSED)
10444 {
10445 if (reload_completed)
10446 return cached_can_issue_more;
10447
10448 if (high_pressure(SFmode) || high_pressure (SImode))
10449 skip_cycles = 1;
10450
10451 return cached_can_issue_more;
10452 }
10453
10454 /* Skip cycles without sorting the ready queue. This will move insn from
10455 Q->R. If this is the last cycle we are skipping; allow sorting of ready
10456 queue by sh_reorder. */
10457
10458 /* Generally, skipping these many cycles are sufficient for all insns to move
10459 from Q -> R. */
10460 #define MAX_SKIPS 8
10461
10462 static int
10463 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
10464 int sched_verbose ATTRIBUTE_UNUSED,
10465 rtx insn ATTRIBUTE_UNUSED,
10466 int last_clock_var,
10467 int clock_var,
10468 int *sort_p)
10469 {
10470 if (reload_completed)
10471 return 0;
10472
10473 if (skip_cycles)
10474 {
10475 if ((clock_var - last_clock_var) < MAX_SKIPS)
10476 {
10477 *sort_p = 0;
10478 return 1;
10479 }
10480 /* If this is the last cycle we are skipping, allow reordering of R. */
10481 if ((clock_var - last_clock_var) == MAX_SKIPS)
10482 {
10483 *sort_p = 1;
10484 return 1;
10485 }
10486 }
10487
10488 skip_cycles = 0;
10489
10490 return 0;
10491 }
10492
10493 /* SHmedia requires registers for branches, so we can't generate new
10494 branches past reload. */
10495 static bool
10496 sh_cannot_modify_jumps_p (void)
10497 {
10498 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
10499 }
10500
10501 static enum reg_class
10502 sh_target_reg_class (void)
10503 {
10504 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
10505 }
10506
10507 static bool
10508 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
10509 {
10510 HARD_REG_SET dummy;
10511 #if 0
10512 rtx insn;
10513 #endif
10514
10515 if (! shmedia_space_reserved_for_target_registers)
10516 return 0;
10517 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
10518 return 0;
10519 if (calc_live_regs (&dummy) >= 6 * 8)
10520 return 1;
10521 return 0;
10522 }
10523
10524 static bool
10525 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
10526 {
10527 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
10528 }
10529 \f
10530 /*
10531 On the SH1..SH4, the trampoline looks like
10532 2 0002 D202 mov.l l2,r2
10533 1 0000 D301 mov.l l1,r3
10534 3 0004 422B jmp @r2
10535 4 0006 0009 nop
10536 5 0008 00000000 l1: .long area
10537 6 000c 00000000 l2: .long function
10538
10539 SH5 (compact) uses r1 instead of r3 for the static chain. */
10540
10541
10542 /* Emit RTL insns to initialize the variable parts of a trampoline.
10543 FNADDR is an RTX for the address of the function's pure code.
10544 CXT is an RTX for the static chain value for the function. */
10545
10546 static void
10547 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
10548 {
10549 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10550 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
10551
10552 if (TARGET_SHMEDIA64)
10553 {
10554 rtx tramp_templ;
10555 int fixed_len;
10556
10557 rtx movi1 = GEN_INT (0xcc000010);
10558 rtx shori1 = GEN_INT (0xc8000010);
10559 rtx src, dst;
10560
10561 /* The following trampoline works within a +- 128 KB range for cxt:
10562 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
10563 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
10564 gettr tr1,r1; blink tr0,r63 */
10565 /* Address rounding makes it hard to compute the exact bounds of the
10566 offset for this trampoline, but we have a rather generous offset
10567 range, so frame_offset should do fine as an upper bound. */
10568 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
10569 {
10570 /* ??? could optimize this trampoline initialization
10571 by writing DImode words with two insns each. */
10572 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
10573 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
10574 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
10575 insn = gen_rtx_AND (DImode, insn, mask);
10576 /* Or in ptb/u .,tr1 pattern */
10577 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
10578 insn = force_operand (insn, NULL_RTX);
10579 insn = gen_lowpart (SImode, insn);
10580 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
10581 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
10582 insn = gen_rtx_AND (DImode, insn, mask);
10583 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
10584 insn = gen_lowpart (SImode, insn);
10585 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
10586 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
10587 insn = gen_rtx_AND (DImode, insn, mask);
10588 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10589 insn = gen_lowpart (SImode, insn);
10590 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
10591 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
10592 insn = gen_rtx_AND (DImode, insn, mask);
10593 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10594 insn = gen_lowpart (SImode, insn);
10595 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
10596 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
10597 insn = gen_rtx_AND (DImode, insn, mask);
10598 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10599 insn = gen_lowpart (SImode, insn);
10600 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
10601 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
10602 GEN_INT (0x6bf10600));
10603 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
10604 GEN_INT (0x4415fc10));
10605 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
10606 GEN_INT (0x4401fff0));
10607 emit_insn (gen_ic_invalidate_line (tramp));
10608 return;
10609 }
10610 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
10611 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
10612
10613 tramp_templ = gen_datalabel_ref (tramp_templ);
10614 dst = tramp_mem;
10615 src = gen_const_mem (BLKmode, tramp_templ);
10616 set_mem_align (dst, 256);
10617 set_mem_align (src, 64);
10618 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
10619
10620 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
10621 emit_move_insn (adjust_address (tramp_mem, Pmode,
10622 fixed_len + GET_MODE_SIZE (Pmode)),
10623 cxt);
10624 emit_insn (gen_ic_invalidate_line (tramp));
10625 return;
10626 }
10627 else if (TARGET_SHMEDIA)
10628 {
10629 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
10630 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
10631 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
10632 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
10633 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
10634 rotated 10 right, and higher 16 bit of every 32 selected. */
10635 rtx movishori
10636 = force_reg (V2HImode, (simplify_gen_subreg
10637 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
10638 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
10639 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
10640
10641 fnaddr = force_reg (SImode, fnaddr);
10642 cxt = force_reg (SImode, cxt);
10643 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
10644 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
10645 movishori));
10646 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
10647 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10648 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
10649 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
10650 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
10651 gen_rtx_SUBREG (V2HImode, cxt, 0),
10652 movishori));
10653 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
10654 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10655 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
10656 if (TARGET_LITTLE_ENDIAN)
10657 {
10658 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
10659 emit_insn (gen_mextr4 (quad2, cxtload, blink));
10660 }
10661 else
10662 {
10663 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
10664 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
10665 }
10666 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
10667 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
10668 emit_insn (gen_ic_invalidate_line (tramp));
10669 return;
10670 }
10671 else if (TARGET_SHCOMPACT)
10672 {
10673 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
10674 return;
10675 }
10676 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
10677 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
10678 SImode));
10679 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
10680 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
10681 SImode));
10682 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
10683 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
10684 if (TARGET_HARVARD)
10685 {
10686 if (!TARGET_INLINE_IC_INVALIDATE
10687 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
10688 emit_library_call (function_symbol (NULL, "__ic_invalidate",
10689 FUNCTION_ORDINARY),
10690 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
10691 else
10692 emit_insn (gen_ic_invalidate_line (tramp));
10693 }
10694 }
10695
10696 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
10697
10698 static rtx
10699 sh_trampoline_adjust_address (rtx tramp)
10700 {
10701 if (TARGET_SHMEDIA)
10702 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
10703 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
10704 return tramp;
10705 }
10706
10707 /* FIXME: This is overly conservative. A SHcompact function that
10708 receives arguments ``by reference'' will have them stored in its
10709 own stack frame, so it must not pass pointers or references to
10710 these arguments to other functions by means of sibling calls. */
10711 /* If PIC, we cannot make sibling calls to global functions
10712 because the PLT requires r12 to be live. */
10713 static bool
10714 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10715 {
10716 return (1
10717 && (! TARGET_SHCOMPACT
10718 || crtl->args.info.stack_regs == 0)
10719 && ! sh_cfun_interrupt_handler_p ()
10720 && (! flag_pic
10721 || (decl && ! TREE_PUBLIC (decl))
10722 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
10723 }
10724 \f
10725 /* Machine specific built-in functions. */
10726
10727 struct builtin_description
10728 {
10729 const enum insn_code icode;
10730 const char *const name;
10731 int signature;
10732 tree fndecl;
10733 };
10734
10735 /* describe number and signedness of arguments; arg[0] == result
10736 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
10737 /* 9: 64-bit pointer, 10: 32-bit pointer */
10738 static const char signature_args[][4] =
10739 {
10740 #define SH_BLTIN_V2SI2 0
10741 { 4, 4 },
10742 #define SH_BLTIN_V4HI2 1
10743 { 4, 4 },
10744 #define SH_BLTIN_V2SI3 2
10745 { 4, 4, 4 },
10746 #define SH_BLTIN_V4HI3 3
10747 { 4, 4, 4 },
10748 #define SH_BLTIN_V8QI3 4
10749 { 4, 4, 4 },
10750 #define SH_BLTIN_MAC_HISI 5
10751 { 1, 4, 4, 1 },
10752 #define SH_BLTIN_SH_HI 6
10753 { 4, 4, 1 },
10754 #define SH_BLTIN_SH_SI 7
10755 { 4, 4, 1 },
10756 #define SH_BLTIN_V4HI2V2SI 8
10757 { 4, 4, 4 },
10758 #define SH_BLTIN_V4HI2V8QI 9
10759 { 4, 4, 4 },
10760 #define SH_BLTIN_SISF 10
10761 { 4, 2 },
10762 #define SH_BLTIN_LDUA_L 11
10763 { 2, 10 },
10764 #define SH_BLTIN_LDUA_Q 12
10765 { 1, 10 },
10766 #define SH_BLTIN_STUA_L 13
10767 { 0, 10, 2 },
10768 #define SH_BLTIN_STUA_Q 14
10769 { 0, 10, 1 },
10770 #define SH_BLTIN_LDUA_L64 15
10771 { 2, 9 },
10772 #define SH_BLTIN_LDUA_Q64 16
10773 { 1, 9 },
10774 #define SH_BLTIN_STUA_L64 17
10775 { 0, 9, 2 },
10776 #define SH_BLTIN_STUA_Q64 18
10777 { 0, 9, 1 },
10778 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
10779 #define SH_BLTIN_2 19
10780 #define SH_BLTIN_SU 19
10781 { 1, 2 },
10782 #define SH_BLTIN_3 20
10783 #define SH_BLTIN_SUS 20
10784 { 2, 2, 1 },
10785 #define SH_BLTIN_PSSV 21
10786 { 0, 8, 2, 2 },
10787 #define SH_BLTIN_XXUU 22
10788 #define SH_BLTIN_UUUU 22
10789 { 1, 1, 1, 1 },
10790 #define SH_BLTIN_PV 23
10791 { 0, 8 },
10792 };
10793 /* mcmv: operands considered unsigned. */
10794 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
10795 /* mperm: control value considered unsigned int. */
10796 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
10797 /* mshards_q: returns signed short. */
10798 /* nsb: takes long long arg, returns unsigned char. */
10799 static struct builtin_description bdesc[] =
10800 {
10801 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
10802 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
10803 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
10804 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
10805 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
10806 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
10807 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
10808 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
10809 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
10810 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
10811 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
10812 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
10813 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
10814 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
10815 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
10816 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
10817 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
10818 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
10819 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
10820 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
10821 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
10822 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
10823 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
10824 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
10825 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
10826 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
10827 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
10828 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
10829 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
10830 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
10831 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
10832 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
10833 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
10834 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
10835 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
10836 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
10837 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
10838 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
10839 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
10840 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
10841 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
10842 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
10843 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
10844 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
10845 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
10846 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
10847 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
10848 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
10849 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
10850 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
10851 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
10852 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
10853 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
10854 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
10855 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
10856 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
10857 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
10858 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
10859 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
10860 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
10861 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
10862 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3, 0 },
10863 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
10864 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
10865 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
10866 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
10867 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
10868 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
10869 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
10870 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
10871 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
10872 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
10873 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
10874 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
10875 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
10876 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
10877 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
10878 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
10879 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
10880 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
10881 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
10882 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
10883 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
10884 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
10885 };
10886
10887 static void
10888 sh_media_init_builtins (void)
10889 {
10890 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
10891 struct builtin_description *d;
10892
10893 memset (shared, 0, sizeof shared);
10894 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
10895 {
10896 tree type, arg_type = 0;
10897 int signature = d->signature;
10898 int i;
10899
10900 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
10901 type = shared[signature];
10902 else
10903 {
10904 int has_result = signature_args[signature][0] != 0;
10905
10906 if ((signature_args[signature][1] & 8)
10907 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
10908 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
10909 continue;
10910 if (! TARGET_FPU_ANY
10911 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
10912 continue;
10913 type = void_list_node;
10914 for (i = 3; ; i--)
10915 {
10916 int arg = signature_args[signature][i];
10917 int opno = i - 1 + has_result;
10918
10919 if (arg & 8)
10920 arg_type = ptr_type_node;
10921 else if (arg)
10922 arg_type = (*lang_hooks.types.type_for_mode)
10923 (insn_data[d->icode].operand[opno].mode,
10924 (arg & 1));
10925 else if (i)
10926 continue;
10927 else
10928 arg_type = void_type_node;
10929 if (i == 0)
10930 break;
10931 type = tree_cons (NULL_TREE, arg_type, type);
10932 }
10933 type = build_function_type (arg_type, type);
10934 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
10935 shared[signature] = type;
10936 }
10937 d->fndecl =
10938 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
10939 NULL, NULL_TREE);
10940 }
10941 }
10942
10943 /* Returns the shmedia builtin decl for CODE. */
10944
10945 static tree
10946 sh_media_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10947 {
10948 if (code >= ARRAY_SIZE (bdesc))
10949 return error_mark_node;
10950
10951 return bdesc[code].fndecl;
10952 }
10953
10954 /* Implements target hook vector_mode_supported_p. */
10955 bool
10956 sh_vector_mode_supported_p (enum machine_mode mode)
10957 {
10958 if (TARGET_FPU_ANY
10959 && ((mode == V2SFmode)
10960 || (mode == V4SFmode)
10961 || (mode == V16SFmode)))
10962 return true;
10963
10964 else if (TARGET_SHMEDIA
10965 && ((mode == V8QImode)
10966 || (mode == V2HImode)
10967 || (mode == V4HImode)
10968 || (mode == V2SImode)))
10969 return true;
10970
10971 return false;
10972 }
10973
10974 bool
10975 sh_frame_pointer_required (void)
10976 {
10977 /* If needed override this in other tm.h files to cope with various OS
10978 lossage requiring a frame pointer. */
10979 if (SUBTARGET_FRAME_POINTER_REQUIRED)
10980 return true;
10981
10982 if (crtl->profile)
10983 return true;
10984
10985 return false;
10986 }
10987
10988 /* Implements target hook dwarf_calling_convention. Return an enum
10989 of dwarf_calling_convention. */
10990 int
10991 sh_dwarf_calling_convention (const_tree func)
10992 {
10993 if (sh_attr_renesas_p (func))
10994 return DW_CC_GNU_renesas_sh;
10995
10996 return DW_CC_normal;
10997 }
10998
10999 static void
11000 sh_init_builtins (void)
11001 {
11002 if (TARGET_SHMEDIA)
11003 sh_media_init_builtins ();
11004 }
11005
11006 /* Returns the sh builtin decl for CODE. */
11007
11008 static tree
11009 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11010 {
11011 if (TARGET_SHMEDIA)
11012 return sh_media_builtin_decl (code, initialize_p);
11013
11014 return error_mark_node;
11015 }
11016
11017 /* Expand an expression EXP that calls a built-in function,
11018 with result going to TARGET if that's convenient
11019 (and in mode MODE if that's convenient).
11020 SUBTARGET may be used as the target for computing one of EXP's operands.
11021 IGNORE is nonzero if the value is to be ignored. */
11022
11023 static rtx
11024 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
11025 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
11026 {
11027 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11028 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
11029 const struct builtin_description *d = &bdesc[fcode];
11030 enum insn_code icode = d->icode;
11031 int signature = d->signature;
11032 enum machine_mode tmode = VOIDmode;
11033 int nop = 0, i;
11034 rtx op[4];
11035 rtx pat = 0;
11036
11037 if (signature_args[signature][0])
11038 {
11039 if (ignore)
11040 return 0;
11041
11042 tmode = insn_data[icode].operand[0].mode;
11043 if (! target
11044 || GET_MODE (target) != tmode
11045 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11046 target = gen_reg_rtx (tmode);
11047 op[nop++] = target;
11048 }
11049 else
11050 target = 0;
11051
11052 for (i = 1; i <= 3; i++, nop++)
11053 {
11054 tree arg;
11055 enum machine_mode opmode, argmode;
11056 tree optype;
11057
11058 if (! signature_args[signature][i])
11059 break;
11060 arg = CALL_EXPR_ARG (exp, i - 1);
11061 if (arg == error_mark_node)
11062 return const0_rtx;
11063 if (signature_args[signature][i] & 8)
11064 {
11065 opmode = ptr_mode;
11066 optype = ptr_type_node;
11067 }
11068 else
11069 {
11070 opmode = insn_data[icode].operand[nop].mode;
11071 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
11072 }
11073 argmode = TYPE_MODE (TREE_TYPE (arg));
11074 if (argmode != opmode)
11075 arg = build1 (NOP_EXPR, optype, arg);
11076 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
11077 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
11078 op[nop] = copy_to_mode_reg (opmode, op[nop]);
11079 }
11080
11081 switch (nop)
11082 {
11083 case 1:
11084 pat = (*insn_data[d->icode].genfun) (op[0]);
11085 break;
11086 case 2:
11087 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
11088 break;
11089 case 3:
11090 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
11091 break;
11092 case 4:
11093 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
11094 break;
11095 default:
11096 gcc_unreachable ();
11097 }
11098 if (! pat)
11099 return 0;
11100 emit_insn (pat);
11101 return target;
11102 }
11103
11104 void
11105 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
11106 {
11107 rtx sel0 = const0_rtx;
11108 rtx sel1 = const1_rtx;
11109 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
11110 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
11111
11112 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
11113 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
11114 }
11115
11116 void
11117 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
11118 {
11119 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
11120
11121 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
11122 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
11123 }
11124
11125 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
11126 We can allow any mode in any general register. The special registers
11127 only allow SImode. Don't allow any mode in the PR.
11128
11129 We cannot hold DCmode values in the XD registers because alter_reg
11130 handles subregs of them incorrectly. We could work around this by
11131 spacing the XD registers like the DR registers, but this would require
11132 additional memory in every compilation to hold larger register vectors.
11133 We could hold SFmode / SCmode values in XD registers, but that
11134 would require a tertiary reload when reloading from / to memory,
11135 and a secondary reload to reload from / to general regs; that
11136 seems to be a loosing proposition.
11137
11138 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
11139 it won't be ferried through GP registers first. */
11140
11141 bool
11142 sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
11143 {
11144 if (SPECIAL_REGISTER_P (regno))
11145 return mode == SImode;
11146
11147 if (regno == FPUL_REG)
11148 return (mode == SImode || mode == SFmode);
11149
11150 if (FP_REGISTER_P (regno) && mode == SFmode)
11151 return true;
11152
11153 if (mode == V2SFmode)
11154 {
11155 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
11156 || GENERAL_REGISTER_P (regno)))
11157 return true;
11158 else
11159 return false;
11160 }
11161
11162 if (mode == V4SFmode)
11163 {
11164 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
11165 || GENERAL_REGISTER_P (regno))
11166 return true;
11167 else
11168 return false;
11169 }
11170
11171 if (mode == V16SFmode)
11172 {
11173 if (TARGET_SHMEDIA)
11174 {
11175 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
11176 return true;
11177 else
11178 return false;
11179 }
11180 else
11181 return regno == FIRST_XD_REG;
11182 }
11183
11184 if (FP_REGISTER_P (regno))
11185 {
11186 if (mode == SFmode
11187 || mode == SImode
11188 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
11189 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
11190 || mode == DCmode
11191 || (TARGET_SHMEDIA
11192 && (mode == DFmode || mode == DImode
11193 || mode == V2SFmode || mode == TImode)))
11194 && ((regno - FIRST_FP_REG) & 1) == 0)
11195 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
11196 && ((regno - FIRST_FP_REG) & 3) == 0))
11197 return true;
11198 else
11199 return false;
11200 }
11201
11202 if (XD_REGISTER_P (regno))
11203 return mode == DFmode;
11204
11205 if (TARGET_REGISTER_P (regno))
11206 return (mode == DImode || mode == SImode || mode == PDImode);
11207
11208 if (regno == PR_REG)
11209 return mode == SImode;
11210
11211 if (regno == FPSCR_REG)
11212 return mode == PSImode;
11213
11214 /* FIXME. This works around PR target/37633 for -O0. */
11215 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
11216 {
11217 unsigned int n = GET_MODE_SIZE (mode) / 8;
11218
11219 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
11220 && regno <= FIRST_GENERAL_REG + 14)
11221 return false;
11222 }
11223
11224 return true;
11225 }
11226
11227 /* Return the class of registers for which a mode change from FROM to TO
11228 is invalid. */
11229 bool
11230 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
11231 enum reg_class rclass)
11232 {
11233 /* We want to enable the use of SUBREGs as a means to
11234 VEC_SELECT a single element of a vector. */
11235 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
11236 return (reg_classes_intersect_p (GENERAL_REGS, rclass));
11237
11238 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
11239 {
11240 if (TARGET_LITTLE_ENDIAN)
11241 {
11242 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
11243 return reg_classes_intersect_p (DF_REGS, rclass);
11244 }
11245 else
11246 {
11247 if (GET_MODE_SIZE (from) < 8)
11248 return reg_classes_intersect_p (DF_HI_REGS, rclass);
11249 }
11250 }
11251 return 0;
11252 }
11253
11254 /* Return true if registers in machine mode MODE will likely be
11255 allocated to registers in small register classes. */
11256
11257 bool
11258 sh_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
11259 {
11260 return (! TARGET_SHMEDIA);
11261 }
11262
11263 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
11264 that label is used. */
11265
11266 void
11267 sh_mark_label (rtx address, int nuses)
11268 {
11269 if (GOTOFF_P (address))
11270 {
11271 /* Extract the label or symbol. */
11272 address = XEXP (address, 0);
11273 if (GET_CODE (address) == PLUS)
11274 address = XEXP (address, 0);
11275 address = XVECEXP (address, 0, 0);
11276 }
11277 if (GET_CODE (address) == LABEL_REF
11278 && LABEL_P (XEXP (address, 0)))
11279 LABEL_NUSES (XEXP (address, 0)) += nuses;
11280 }
11281
11282 /* Compute extra cost of moving data between one register class
11283 and another. */
11284
11285 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
11286 uses this information. Hence, the general register <-> floating point
11287 register information here is not used for SFmode. */
11288
11289 int
11290 sh_register_move_cost (enum machine_mode mode,
11291 enum reg_class srcclass, enum reg_class dstclass)
11292 {
11293 if (dstclass == T_REGS || dstclass == PR_REGS)
11294 return 10;
11295
11296 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
11297 return 4;
11298
11299 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
11300 && REGCLASS_HAS_FP_REG (srcclass)
11301 && REGCLASS_HAS_FP_REG (dstclass))
11302 return 4;
11303
11304 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
11305 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
11306
11307 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
11308 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
11309 return 9;
11310
11311 if ((REGCLASS_HAS_FP_REG (dstclass)
11312 && REGCLASS_HAS_GENERAL_REG (srcclass))
11313 || (REGCLASS_HAS_GENERAL_REG (dstclass)
11314 && REGCLASS_HAS_FP_REG (srcclass)))
11315 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
11316 * ((GET_MODE_SIZE (mode) + 7) / 8U));
11317
11318 if ((dstclass == FPUL_REGS
11319 && REGCLASS_HAS_GENERAL_REG (srcclass))
11320 || (srcclass == FPUL_REGS
11321 && REGCLASS_HAS_GENERAL_REG (dstclass)))
11322 return 5;
11323
11324 if ((dstclass == FPUL_REGS
11325 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
11326 || (srcclass == FPUL_REGS
11327 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
11328 return 7;
11329
11330 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11331 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11332 return 20;
11333
11334 /* ??? ptabs faults on (value & 0x3) == 0x3 */
11335 if (TARGET_SHMEDIA
11336 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
11337 {
11338 if (sh_gettrcost >= 0)
11339 return sh_gettrcost;
11340 else if (!TARGET_PT_FIXED)
11341 return 100;
11342 }
11343
11344 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11345 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11346 return 4;
11347
11348 if (TARGET_SHMEDIA
11349 || (TARGET_FMOVD
11350 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
11351 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
11352 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
11353
11354 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
11355 }
11356
11357 static rtx emit_load_ptr (rtx, rtx);
11358
11359 static rtx
11360 emit_load_ptr (rtx reg, rtx addr)
11361 {
11362 rtx mem = gen_const_mem (ptr_mode, addr);
11363
11364 if (Pmode != ptr_mode)
11365 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
11366 return emit_move_insn (reg, mem);
11367 }
11368
11369 static void
11370 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11371 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11372 tree function)
11373 {
11374 CUMULATIVE_ARGS cum;
11375 int structure_value_byref = 0;
11376 rtx this_rtx, this_value, sibcall, insns, funexp;
11377 tree funtype = TREE_TYPE (function);
11378 int simple_add = CONST_OK_FOR_ADD (delta);
11379 int did_load = 0;
11380 rtx scratch0, scratch1, scratch2;
11381 unsigned i;
11382
11383 reload_completed = 1;
11384 epilogue_completed = 1;
11385 current_function_uses_only_leaf_regs = 1;
11386
11387 emit_note (NOTE_INSN_PROLOGUE_END);
11388
11389 /* Find the "this" pointer. We have such a wide range of ABIs for the
11390 SH that it's best to do this completely machine independently.
11391 "this" is passed as first argument, unless a structure return pointer
11392 comes first, in which case "this" comes second. */
11393 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
11394 #ifndef PCC_STATIC_STRUCT_RETURN
11395 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11396 structure_value_byref = 1;
11397 #endif /* not PCC_STATIC_STRUCT_RETURN */
11398 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
11399 {
11400 tree ptype = build_pointer_type (TREE_TYPE (funtype));
11401
11402 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
11403 }
11404 this_rtx = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
11405
11406 /* For SHcompact, we only have r0 for a scratch register: r1 is the
11407 static chain pointer (even if you can't have nested virtual functions
11408 right now, someone might implement them sometime), and the rest of the
11409 registers are used for argument passing, are callee-saved, or reserved. */
11410 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
11411 -ffixed-reg has been used. */
11412 if (! call_used_regs[0] || fixed_regs[0])
11413 error ("r0 needs to be available as a call-clobbered register");
11414 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
11415 if (! TARGET_SH5)
11416 {
11417 if (call_used_regs[1] && ! fixed_regs[1])
11418 scratch1 = gen_rtx_REG (ptr_mode, 1);
11419 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
11420 pointing where to return struct values. */
11421 if (call_used_regs[3] && ! fixed_regs[3])
11422 scratch2 = gen_rtx_REG (Pmode, 3);
11423 }
11424 else if (TARGET_SHMEDIA)
11425 {
11426 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
11427 if (i != REGNO (scratch0) &&
11428 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
11429 {
11430 scratch1 = gen_rtx_REG (ptr_mode, i);
11431 break;
11432 }
11433 if (scratch1 == scratch0)
11434 error ("Need a second call-clobbered general purpose register");
11435 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
11436 if (call_used_regs[i] && ! fixed_regs[i])
11437 {
11438 scratch2 = gen_rtx_REG (Pmode, i);
11439 break;
11440 }
11441 if (scratch2 == scratch0)
11442 error ("Need a call-clobbered target register");
11443 }
11444
11445 this_value = plus_constant (this_rtx, delta);
11446 if (vcall_offset
11447 && (simple_add || scratch0 != scratch1)
11448 && strict_memory_address_p (ptr_mode, this_value))
11449 {
11450 emit_load_ptr (scratch0, this_value);
11451 did_load = 1;
11452 }
11453
11454 if (!delta)
11455 ; /* Do nothing. */
11456 else if (simple_add)
11457 emit_move_insn (this_rtx, this_value);
11458 else
11459 {
11460 emit_move_insn (scratch1, GEN_INT (delta));
11461 emit_insn (gen_add2_insn (this_rtx, scratch1));
11462 }
11463
11464 if (vcall_offset)
11465 {
11466 rtx offset_addr;
11467
11468 if (!did_load)
11469 emit_load_ptr (scratch0, this_rtx);
11470
11471 offset_addr = plus_constant (scratch0, vcall_offset);
11472 if (strict_memory_address_p (ptr_mode, offset_addr))
11473 ; /* Do nothing. */
11474 else if (! TARGET_SH5 && scratch0 != scratch1)
11475 {
11476 /* scratch0 != scratch1, and we have indexed loads. Get better
11477 schedule by loading the offset into r1 and using an indexed
11478 load - then the load of r1 can issue before the load from
11479 (this_rtx + delta) finishes. */
11480 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11481 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
11482 }
11483 else if (CONST_OK_FOR_ADD (vcall_offset))
11484 {
11485 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
11486 offset_addr = scratch0;
11487 }
11488 else if (scratch0 != scratch1)
11489 {
11490 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11491 emit_insn (gen_add2_insn (scratch0, scratch1));
11492 offset_addr = scratch0;
11493 }
11494 else
11495 gcc_unreachable (); /* FIXME */
11496 emit_load_ptr (scratch0, offset_addr);
11497
11498 if (Pmode != ptr_mode)
11499 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
11500 emit_insn (gen_add2_insn (this_rtx, scratch0));
11501 }
11502
11503 /* Generate a tail call to the target function. */
11504 if (! TREE_USED (function))
11505 {
11506 assemble_external (function);
11507 TREE_USED (function) = 1;
11508 }
11509 funexp = XEXP (DECL_RTL (function), 0);
11510 /* If the function is overridden, so is the thunk, hence we don't
11511 need GOT addressing even if this is a public symbol. */
11512 #if 0
11513 if (TARGET_SH1 && ! flag_weak)
11514 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
11515 else
11516 #endif
11517 if (TARGET_SH2 && flag_pic)
11518 {
11519 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
11520 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
11521 }
11522 else
11523 {
11524 if (TARGET_SHMEDIA && flag_pic)
11525 {
11526 funexp = gen_sym2PIC (funexp);
11527 PUT_MODE (funexp, Pmode);
11528 }
11529 emit_move_insn (scratch2, funexp);
11530 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
11531 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
11532 }
11533 sibcall = emit_call_insn (sibcall);
11534 SIBLING_CALL_P (sibcall) = 1;
11535 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
11536 emit_barrier ();
11537
11538 /* Run just enough of rest_of_compilation to do scheduling and get
11539 the insns emitted. Note that use_thunk calls
11540 assemble_start_function and assemble_end_function. */
11541
11542 insn_locators_alloc ();
11543 insns = get_insns ();
11544
11545 if (optimize > 0)
11546 {
11547 if (! cfun->cfg)
11548 init_flow (cfun);
11549 split_all_insns_noflow ();
11550 }
11551
11552 sh_reorg ();
11553
11554 if (optimize > 0 && flag_delayed_branch)
11555 dbr_schedule (insns);
11556
11557 shorten_branches (insns);
11558 final_start_function (insns, file, 1);
11559 final (insns, file, 1);
11560 final_end_function ();
11561
11562 reload_completed = 0;
11563 epilogue_completed = 0;
11564 }
11565
11566 rtx
11567 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
11568 {
11569 rtx sym;
11570
11571 /* If this is not an ordinary function, the name usually comes from a
11572 string literal or an sprintf buffer. Make sure we use the same
11573 string consistently, so that cse will be able to unify address loads. */
11574 if (kind != FUNCTION_ORDINARY)
11575 name = IDENTIFIER_POINTER (get_identifier (name));
11576 sym = gen_rtx_SYMBOL_REF (Pmode, name);
11577 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
11578 if (flag_pic)
11579 switch (kind)
11580 {
11581 case FUNCTION_ORDINARY:
11582 break;
11583 case SFUNC_GOT:
11584 {
11585 rtx reg = target ? target : gen_reg_rtx (Pmode);
11586
11587 emit_insn (gen_symGOT2reg (reg, sym));
11588 sym = reg;
11589 break;
11590 }
11591 case SFUNC_STATIC:
11592 {
11593 /* ??? To allow cse to work, we use GOTOFF relocations.
11594 we could add combiner patterns to transform this into
11595 straight pc-relative calls with sym2PIC / bsrf when
11596 label load and function call are still 1:1 and in the
11597 same basic block during combine. */
11598 rtx reg = target ? target : gen_reg_rtx (Pmode);
11599
11600 emit_insn (gen_symGOTOFF2reg (reg, sym));
11601 sym = reg;
11602 break;
11603 }
11604 }
11605 if (target && sym != target)
11606 {
11607 emit_move_insn (target, sym);
11608 return target;
11609 }
11610 return sym;
11611 }
11612
11613 /* Find the number of a general purpose register in S. */
11614 static int
11615 scavenge_reg (HARD_REG_SET *s)
11616 {
11617 int r;
11618 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
11619 if (TEST_HARD_REG_BIT (*s, r))
11620 return r;
11621 return -1;
11622 }
11623
11624 rtx
11625 sh_get_pr_initial_val (void)
11626 {
11627 rtx val;
11628
11629 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
11630 PR register on SHcompact, because it might be clobbered by the prologue.
11631 We check first if that is known to be the case. */
11632 if (TARGET_SHCOMPACT
11633 && ((crtl->args.info.call_cookie
11634 & ~ CALL_COOKIE_RET_TRAMP (1))
11635 || crtl->saves_all_registers))
11636 return gen_frame_mem (SImode, return_address_pointer_rtx);
11637
11638 /* If we haven't finished rtl generation, there might be a nonlocal label
11639 that we haven't seen yet.
11640 ??? get_hard_reg_initial_val fails if it is called after register
11641 allocation has started, unless it has been called before for the
11642 same register. And even then, we end in trouble if we didn't use
11643 the register in the same basic block before. So call
11644 get_hard_reg_initial_val now and wrap it in an unspec if we might
11645 need to replace it. */
11646 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
11647 combine can put the pseudo returned by get_hard_reg_initial_val into
11648 instructions that need a general purpose registers, which will fail to
11649 be recognized when the pseudo becomes allocated to PR. */
11650 val
11651 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
11652 if (TARGET_SH1)
11653 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
11654 return val;
11655 }
11656
11657 int
11658 sh_expand_t_scc (rtx operands[])
11659 {
11660 enum rtx_code code = GET_CODE (operands[1]);
11661 rtx target = operands[0];
11662 rtx op0 = operands[2];
11663 rtx op1 = operands[3];
11664 rtx result = target;
11665 HOST_WIDE_INT val;
11666
11667 if (!REG_P (op0) || REGNO (op0) != T_REG
11668 || !CONST_INT_P (op1))
11669 return 0;
11670 if (!REG_P (result))
11671 result = gen_reg_rtx (SImode);
11672 val = INTVAL (op1);
11673 if ((code == EQ && val == 1) || (code == NE && val == 0))
11674 emit_insn (gen_movt (result));
11675 else if (TARGET_SH2A && ((code == EQ && val == 0)
11676 || (code == NE && val == 1)))
11677 emit_insn (gen_xorsi3_movrt (result));
11678 else if ((code == EQ && val == 0) || (code == NE && val == 1))
11679 {
11680 emit_clobber (result);
11681 emit_insn (gen_subc (result, result, result));
11682 emit_insn (gen_addsi3 (result, result, const1_rtx));
11683 }
11684 else if (code == EQ || code == NE)
11685 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
11686 else
11687 return 0;
11688 if (result != target)
11689 emit_move_insn (target, result);
11690 return 1;
11691 }
11692
11693 /* INSN is an sfunc; return the rtx that describes the address used. */
11694 static rtx
11695 extract_sfunc_addr (rtx insn)
11696 {
11697 rtx pattern, part = NULL_RTX;
11698 int len, i;
11699
11700 pattern = PATTERN (insn);
11701 len = XVECLEN (pattern, 0);
11702 for (i = 0; i < len; i++)
11703 {
11704 part = XVECEXP (pattern, 0, i);
11705 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
11706 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
11707 return XEXP (part, 0);
11708 }
11709 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
11710 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
11711 }
11712
11713 /* Verify that the register in use_sfunc_addr still agrees with the address
11714 used in the sfunc. This prevents fill_slots_from_thread from changing
11715 use_sfunc_addr.
11716 INSN is the use_sfunc_addr instruction, and REG is the register it
11717 guards. */
11718 int
11719 check_use_sfunc_addr (rtx insn, rtx reg)
11720 {
11721 /* Search for the sfunc. It should really come right after INSN. */
11722 while ((insn = NEXT_INSN (insn)))
11723 {
11724 if (LABEL_P (insn) || JUMP_P (insn))
11725 break;
11726 if (! INSN_P (insn))
11727 continue;
11728
11729 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
11730 insn = XVECEXP (PATTERN (insn), 0, 0);
11731 if (GET_CODE (PATTERN (insn)) != PARALLEL
11732 || get_attr_type (insn) != TYPE_SFUNC)
11733 continue;
11734 return rtx_equal_p (extract_sfunc_addr (insn), reg);
11735 }
11736 gcc_unreachable ();
11737 }
11738
11739 /* This function returns a constant rtx that represents pi / 2**15 in
11740 SFmode. it's used to scale SFmode angles, in radians, to a
11741 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11742 maps to 0x10000). */
11743
11744 static GTY(()) rtx sh_fsca_sf2int_rtx;
11745
11746 rtx
11747 sh_fsca_sf2int (void)
11748 {
11749 if (! sh_fsca_sf2int_rtx)
11750 {
11751 REAL_VALUE_TYPE rv;
11752
11753 real_from_string (&rv, "10430.378350470453");
11754 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
11755 }
11756
11757 return sh_fsca_sf2int_rtx;
11758 }
11759
11760 /* This function returns a constant rtx that represents pi / 2**15 in
11761 DFmode. it's used to scale DFmode angles, in radians, to a
11762 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11763 maps to 0x10000). */
11764
11765 static GTY(()) rtx sh_fsca_df2int_rtx;
11766
11767 rtx
11768 sh_fsca_df2int (void)
11769 {
11770 if (! sh_fsca_df2int_rtx)
11771 {
11772 REAL_VALUE_TYPE rv;
11773
11774 real_from_string (&rv, "10430.378350470453");
11775 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
11776 }
11777
11778 return sh_fsca_df2int_rtx;
11779 }
11780
11781 /* This function returns a constant rtx that represents 2**15 / pi in
11782 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
11783 of a full circle back to a SFmode value, i.e., 0x10000 maps to
11784 2*pi). */
11785
11786 static GTY(()) rtx sh_fsca_int2sf_rtx;
11787
11788 rtx
11789 sh_fsca_int2sf (void)
11790 {
11791 if (! sh_fsca_int2sf_rtx)
11792 {
11793 REAL_VALUE_TYPE rv;
11794
11795 real_from_string (&rv, "9.587379924285257e-5");
11796 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
11797 }
11798
11799 return sh_fsca_int2sf_rtx;
11800 }
11801
11802 /* Initialize the CUMULATIVE_ARGS structure. */
11803
11804 void
11805 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
11806 tree fntype,
11807 rtx libname ATTRIBUTE_UNUSED,
11808 tree fndecl,
11809 signed int n_named_args,
11810 enum machine_mode mode)
11811 {
11812 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
11813 pcum->free_single_fp_reg = 0;
11814 pcum->stack_regs = 0;
11815 pcum->byref_regs = 0;
11816 pcum->byref = 0;
11817 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
11818
11819 /* XXX - Should we check TARGET_HITACHI here ??? */
11820 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
11821
11822 if (fntype)
11823 {
11824 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
11825 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
11826 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
11827 pcum->arg_count [(int) SH_ARG_INT]
11828 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
11829
11830 pcum->call_cookie
11831 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11832 && pcum->arg_count [(int) SH_ARG_INT] == 0
11833 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
11834 ? int_size_in_bytes (TREE_TYPE (fntype))
11835 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
11836 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
11837 == FIRST_RET_REG));
11838 }
11839 else
11840 {
11841 pcum->arg_count [(int) SH_ARG_INT] = 0;
11842 pcum->prototype_p = FALSE;
11843 if (mode != VOIDmode)
11844 {
11845 pcum->call_cookie =
11846 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11847 && GET_MODE_SIZE (mode) > 4
11848 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
11849
11850 /* If the default ABI is the Renesas ABI then all library
11851 calls must assume that the library will be using the
11852 Renesas ABI. So if the function would return its result
11853 in memory then we must force the address of this memory
11854 block onto the stack. Ideally we would like to call
11855 targetm.calls.return_in_memory() here but we do not have
11856 the TYPE or the FNDECL available so we synthesize the
11857 contents of that function as best we can. */
11858 pcum->force_mem =
11859 (TARGET_DEFAULT & MASK_HITACHI)
11860 && (mode == BLKmode
11861 || (GET_MODE_SIZE (mode) > 4
11862 && !(mode == DFmode
11863 && TARGET_FPU_DOUBLE)));
11864 }
11865 else
11866 {
11867 pcum->call_cookie = 0;
11868 pcum->force_mem = FALSE;
11869 }
11870 }
11871 }
11872
11873 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
11874 not enter into CONST_DOUBLE for the replace.
11875
11876 Note that copying is not done so X must not be shared unless all copies
11877 are to be modified.
11878
11879 This is like replace_rtx, except that we operate on N_REPLACEMENTS
11880 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
11881 replacements[n*2+1] - and that we take mode changes into account.
11882
11883 If a replacement is ambiguous, return NULL_RTX.
11884
11885 If MODIFY is zero, don't modify any rtl in place,
11886 just return zero or nonzero for failure / success. */
11887
11888 rtx
11889 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
11890 {
11891 int i, j;
11892 const char *fmt;
11893
11894 /* The following prevents loops occurrence when we change MEM in
11895 CONST_DOUBLE onto the same CONST_DOUBLE. */
11896 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
11897 return x;
11898
11899 for (i = n_replacements - 1; i >= 0 ; i--)
11900 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
11901 return replacements[i*2+1];
11902
11903 /* Allow this function to make replacements in EXPR_LISTs. */
11904 if (x == 0)
11905 return 0;
11906
11907 if (GET_CODE (x) == SUBREG)
11908 {
11909 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
11910 n_replacements, modify);
11911
11912 if (CONST_INT_P (new_rtx))
11913 {
11914 x = simplify_subreg (GET_MODE (x), new_rtx,
11915 GET_MODE (SUBREG_REG (x)),
11916 SUBREG_BYTE (x));
11917 if (! x)
11918 abort ();
11919 }
11920 else if (modify)
11921 SUBREG_REG (x) = new_rtx;
11922
11923 return x;
11924 }
11925 else if (REG_P (x))
11926 {
11927 unsigned regno = REGNO (x);
11928 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
11929 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
11930 rtx result = NULL_RTX;
11931
11932 for (i = n_replacements - 1; i >= 0; i--)
11933 {
11934 rtx from = replacements[i*2];
11935 rtx to = replacements[i*2+1];
11936 unsigned from_regno, from_nregs, to_regno, new_regno;
11937
11938 if (!REG_P (from))
11939 continue;
11940 from_regno = REGNO (from);
11941 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
11942 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
11943 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
11944 {
11945 if (regno < from_regno
11946 || regno + nregs > from_regno + nregs
11947 || !REG_P (to)
11948 || result)
11949 return NULL_RTX;
11950 to_regno = REGNO (to);
11951 if (to_regno < FIRST_PSEUDO_REGISTER)
11952 {
11953 new_regno = regno + to_regno - from_regno;
11954 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
11955 != nregs)
11956 return NULL_RTX;
11957 result = gen_rtx_REG (GET_MODE (x), new_regno);
11958 }
11959 else if (GET_MODE (x) <= GET_MODE (to))
11960 result = gen_lowpart_common (GET_MODE (x), to);
11961 else
11962 result = gen_lowpart_SUBREG (GET_MODE (x), to);
11963 }
11964 }
11965 return result ? result : x;
11966 }
11967 else if (GET_CODE (x) == ZERO_EXTEND)
11968 {
11969 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
11970 n_replacements, modify);
11971
11972 if (CONST_INT_P (new_rtx))
11973 {
11974 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
11975 new_rtx, GET_MODE (XEXP (x, 0)));
11976 if (! x)
11977 abort ();
11978 }
11979 else if (modify)
11980 XEXP (x, 0) = new_rtx;
11981
11982 return x;
11983 }
11984
11985 fmt = GET_RTX_FORMAT (GET_CODE (x));
11986 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
11987 {
11988 rtx new_rtx;
11989
11990 if (fmt[i] == 'e')
11991 {
11992 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
11993 n_replacements, modify);
11994 if (!new_rtx)
11995 return NULL_RTX;
11996 if (modify)
11997 XEXP (x, i) = new_rtx;
11998 }
11999 else if (fmt[i] == 'E')
12000 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12001 {
12002 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
12003 n_replacements, modify);
12004 if (!new_rtx)
12005 return NULL_RTX;
12006 if (modify)
12007 XVECEXP (x, i, j) = new_rtx;
12008 }
12009 }
12010
12011 return x;
12012 }
12013
12014 rtx
12015 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
12016 {
12017 enum rtx_code code = TRUNCATE;
12018
12019 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
12020 {
12021 rtx inner = XEXP (x, 0);
12022 enum machine_mode inner_mode = GET_MODE (inner);
12023
12024 if (inner_mode == mode)
12025 return inner;
12026 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
12027 x = inner;
12028 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
12029 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
12030 {
12031 code = GET_CODE (x);
12032 x = inner;
12033 }
12034 }
12035 return gen_rtx_fmt_e (code, mode, x);
12036 }
12037
12038 /* called via for_each_rtx after reload, to clean up truncates of
12039 registers that span multiple actual hard registers. */
12040 int
12041 shmedia_cleanup_truncate (rtx *p, void *n_changes)
12042 {
12043 rtx x = *p, reg;
12044
12045 if (GET_CODE (x) != TRUNCATE)
12046 return 0;
12047 reg = XEXP (x, 0);
12048 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && REG_P (reg))
12049 {
12050 enum machine_mode reg_mode = GET_MODE (reg);
12051 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
12052 subreg_lowpart_offset (DImode, reg_mode));
12053 *(int*) n_changes += 1;
12054 return -1;
12055 }
12056 return 0;
12057 }
12058
12059 /* Load and store depend on the highpart of the address. However,
12060 set_attr_alternative does not give well-defined results before reload,
12061 so we must look at the rtl ourselves to see if any of the feeding
12062 registers is used in a memref. */
12063
12064 /* Called by sh_contains_memref_p via for_each_rtx. */
12065 static int
12066 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
12067 {
12068 return (MEM_P (*loc));
12069 }
12070
12071 /* Return nonzero iff INSN contains a MEM. */
12072 int
12073 sh_contains_memref_p (rtx insn)
12074 {
12075 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
12076 }
12077
12078 /* Return nonzero iff INSN loads a banked register. */
12079 int
12080 sh_loads_bankedreg_p (rtx insn)
12081 {
12082 if (GET_CODE (PATTERN (insn)) == SET)
12083 {
12084 rtx op = SET_DEST (PATTERN(insn));
12085 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
12086 return 1;
12087 }
12088
12089 return 0;
12090 }
12091
12092 /* FNADDR is the MEM expression from a call expander. Return an address
12093 to use in an SHmedia insn pattern. */
12094 rtx
12095 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
12096 {
12097 int is_sym;
12098
12099 fnaddr = XEXP (fnaddr, 0);
12100 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
12101 if (flag_pic && is_sym)
12102 {
12103 if (! SYMBOL_REF_LOCAL_P (fnaddr))
12104 {
12105 rtx reg = gen_reg_rtx (Pmode);
12106
12107 /* We must not use GOTPLT for sibcalls, because PIC_REG
12108 must be restored before the PLT code gets to run. */
12109 if (is_sibcall)
12110 emit_insn (gen_symGOT2reg (reg, fnaddr));
12111 else
12112 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
12113 fnaddr = reg;
12114 }
12115 else
12116 {
12117 fnaddr = gen_sym2PIC (fnaddr);
12118 PUT_MODE (fnaddr, Pmode);
12119 }
12120 }
12121 /* If ptabs might trap, make this visible to the rest of the compiler.
12122 We generally assume that symbols pertain to valid locations, but
12123 it is possible to generate invalid symbols with asm or linker tricks.
12124 In a list of functions where each returns its successor, an invalid
12125 symbol might denote an empty list. */
12126 if (!TARGET_PT_FIXED
12127 && (!is_sym || TARGET_INVALID_SYMBOLS)
12128 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
12129 {
12130 rtx tr = gen_reg_rtx (PDImode);
12131
12132 emit_insn (gen_ptabs (tr, fnaddr));
12133 fnaddr = tr;
12134 }
12135 else if (! target_reg_operand (fnaddr, Pmode))
12136 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
12137 return fnaddr;
12138 }
12139
12140 enum reg_class
12141 sh_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
12142 enum machine_mode mode, secondary_reload_info *sri)
12143 {
12144 if (in_p)
12145 {
12146 if (REGCLASS_HAS_FP_REG (rclass)
12147 && ! TARGET_SHMEDIA
12148 && immediate_operand ((x), mode)
12149 && ! ((fp_zero_operand (x) || fp_one_operand (x))
12150 && mode == SFmode && fldi_ok ()))
12151 switch (mode)
12152 {
12153 case SFmode:
12154 sri->icode = CODE_FOR_reload_insf__frn;
12155 return NO_REGS;
12156 case DFmode:
12157 sri->icode = CODE_FOR_reload_indf__frn;
12158 return NO_REGS;
12159 case SImode:
12160 /* ??? If we knew that we are in the appropriate mode -
12161 single precision - we could use a reload pattern directly. */
12162 return FPUL_REGS;
12163 default:
12164 abort ();
12165 }
12166 if (rclass == FPUL_REGS
12167 && ((REG_P (x)
12168 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
12169 || REGNO (x) == T_REG))
12170 || GET_CODE (x) == PLUS))
12171 return GENERAL_REGS;
12172 if (rclass == FPUL_REGS && immediate_operand (x, mode))
12173 {
12174 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
12175 return GENERAL_REGS;
12176 else if (mode == SFmode)
12177 return FP_REGS;
12178 sri->icode = CODE_FOR_reload_insi__i_fpul;
12179 return NO_REGS;
12180 }
12181 if (rclass == FPSCR_REGS
12182 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
12183 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
12184 return GENERAL_REGS;
12185 if (REGCLASS_HAS_FP_REG (rclass)
12186 && TARGET_SHMEDIA
12187 && immediate_operand (x, mode)
12188 && x != CONST0_RTX (GET_MODE (x))
12189 && GET_MODE (x) != V4SFmode)
12190 return GENERAL_REGS;
12191 if ((mode == QImode || mode == HImode)
12192 && TARGET_SHMEDIA && inqhi_operand (x, mode))
12193 {
12194 sri->icode = ((mode == QImode)
12195 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
12196 return NO_REGS;
12197 }
12198 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
12199 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
12200 return TARGET_REGS;
12201 } /* end of input-only processing. */
12202
12203 if (((REGCLASS_HAS_FP_REG (rclass)
12204 && (REG_P (x)
12205 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
12206 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
12207 && TARGET_FMOVD))))
12208 || (REGCLASS_HAS_GENERAL_REG (rclass)
12209 && REG_P (x)
12210 && FP_REGISTER_P (REGNO (x))))
12211 && ! TARGET_SHMEDIA
12212 && (mode == SFmode || mode == SImode))
12213 return FPUL_REGS;
12214 if ((rclass == FPUL_REGS
12215 || (REGCLASS_HAS_FP_REG (rclass)
12216 && ! TARGET_SHMEDIA && mode == SImode))
12217 && (MEM_P (x)
12218 || (REG_P (x)
12219 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
12220 || REGNO (x) == T_REG
12221 || system_reg_operand (x, VOIDmode)))))
12222 {
12223 if (rclass == FPUL_REGS)
12224 return GENERAL_REGS;
12225 return FPUL_REGS;
12226 }
12227 if ((rclass == TARGET_REGS
12228 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
12229 && !satisfies_constraint_Csy (x)
12230 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
12231 return GENERAL_REGS;
12232 if ((rclass == MAC_REGS || rclass == PR_REGS)
12233 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
12234 && rclass != REGNO_REG_CLASS (REGNO (x)))
12235 return GENERAL_REGS;
12236 if (rclass != GENERAL_REGS && REG_P (x)
12237 && TARGET_REGISTER_P (REGNO (x)))
12238 return GENERAL_REGS;
12239 return NO_REGS;
12240 }
12241
12242 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
12243
12244 #include "gt-sh.h"