53b70ec83e8b8f007dfcb6f3f834d07783cc55f2
[gcc.git] / gcc / config / sh / sh.c
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Steve Chamberlain (sac@cygnus.com).
6 Improved by Jim Wilson (wilson@cygnus.com).
7
8 This file is part of GCC.
9
10 GCC is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
14
15 GCC is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "insn-config.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "flags.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "reload.h"
35 #include "function.h"
36 #include "regs.h"
37 #include "hard-reg-set.h"
38 #include "output.h"
39 #include "insn-attr.h"
40 #include "diagnostic-core.h"
41 #include "toplev.h"
42 #include "recog.h"
43 #include "integrate.h"
44 #include "dwarf2.h"
45 #include "tm_p.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
50 #include "df.h"
51 #include "cfglayout.h"
52 #include "intl.h"
53 #include "sched-int.h"
54 #include "params.h"
55 #include "ggc.h"
56 #include "gimple.h"
57 #include "cfgloop.h"
58 #include "alloc-pool.h"
59 #include "tm-constrs.h"
60
61
62 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
63
64 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
65 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
66
67 /* These are some macros to abstract register modes. */
68 #define CONST_OK_FOR_ADD(size) \
69 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
70 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
71 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
72 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
73
74 /* Used to simplify the logic below. Find the attributes wherever
75 they may be. */
76 #define SH_ATTRIBUTES(decl) \
77 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
78 : DECL_ATTRIBUTES (decl) \
79 ? (DECL_ATTRIBUTES (decl)) \
80 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
81
82 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
83 int current_function_interrupt;
84
85 tree sh_deferred_function_attributes;
86 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
87
88 /* Global variables for machine-dependent things. */
89
90 /* Which cpu are we scheduling for. */
91 enum processor_type sh_cpu;
92
93 /* Definitions used in ready queue reordering for first scheduling pass. */
94
95 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
96 static short *regmode_weight[2];
97
98 /* Total SFmode and SImode weights of scheduled insns. */
99 static int curr_regmode_pressure[2];
100
101 /* Number of r0 life regions. */
102 static int r0_life_regions;
103
104 /* If true, skip cycles for Q -> R movement. */
105 static int skip_cycles = 0;
106
107 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
108 and returned from sh_reorder2. */
109 static short cached_can_issue_more;
110
111 /* Unique number for UNSPEC_BBR pattern. */
112 static unsigned int unspec_bbr_uid = 1;
113
114 /* Provides the class number of the smallest class containing
115 reg number. */
116
117 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
118 {
119 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
152 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
153 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
154 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
155 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
156 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
157 GENERAL_REGS, GENERAL_REGS,
158 };
159
160 char sh_register_names[FIRST_PSEUDO_REGISTER] \
161 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
162
163 char sh_additional_register_names[ADDREGNAMES_SIZE] \
164 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
165 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
166
167 int assembler_dialect;
168
169 static bool shmedia_space_reserved_for_target_registers;
170
171 static bool sh_handle_option (size_t, const char *, int);
172 static void split_branches (rtx);
173 static int branch_dest (rtx);
174 static void force_into (rtx, rtx);
175 static void print_slot (rtx);
176 static rtx add_constant (rtx, enum machine_mode, rtx);
177 static void dump_table (rtx, rtx);
178 static int hi_const (rtx);
179 static int broken_move (rtx);
180 static int mova_p (rtx);
181 static rtx find_barrier (int, rtx, rtx);
182 static int noncall_uses_reg (rtx, rtx, rtx *);
183 static rtx gen_block_redirect (rtx, int, int);
184 static void sh_reorg (void);
185 static void sh_option_override (void);
186 static void sh_option_optimization (int, int);
187 static void sh_option_default_params (void);
188 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
189 static rtx frame_insn (rtx);
190 static rtx push (int);
191 static void pop (int);
192 static void push_regs (HARD_REG_SET *, int);
193 static int calc_live_regs (HARD_REG_SET *);
194 static HOST_WIDE_INT rounded_frame_size (int);
195 static bool sh_frame_pointer_required (void);
196 static rtx mark_constant_pool_use (rtx);
197 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
198 static tree sh_handle_resbank_handler_attribute (tree *, tree,
199 tree, int, bool *);
200 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
201 tree, int, bool *);
202 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
203 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
204 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
205 static void sh_print_operand (FILE *, rtx, int);
206 static void sh_print_operand_address (FILE *, rtx);
207 static bool sh_print_operand_punct_valid_p (unsigned char code);
208 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
209 static void sh_insert_attributes (tree, tree *);
210 static const char *sh_check_pch_target_flags (int);
211 static int sh_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
212 static int sh_adjust_cost (rtx, rtx, rtx, int);
213 static int sh_issue_rate (void);
214 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
215 static short find_set_regmode_weight (rtx, enum machine_mode);
216 static short find_insn_regmode_weight (rtx, enum machine_mode);
217 static void find_regmode_weight (basic_block, enum machine_mode);
218 static int find_r0_life_regions (basic_block);
219 static void sh_md_init_global (FILE *, int, int);
220 static void sh_md_finish_global (FILE *, int);
221 static int rank_for_reorder (const void *, const void *);
222 static void swap_reorder (rtx *, int);
223 static void ready_reorder (rtx *, int);
224 static short high_pressure (enum machine_mode);
225 static int sh_reorder (FILE *, int, rtx *, int *, int);
226 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
227 static void sh_md_init (FILE *, int, int);
228 static int sh_variable_issue (FILE *, int, rtx, int);
229
230 static bool sh_function_ok_for_sibcall (tree, tree);
231
232 static bool sh_cannot_modify_jumps_p (void);
233 static reg_class_t sh_target_reg_class (void);
234 static bool sh_optimize_target_register_callee_saved (bool);
235 static bool sh_ms_bitfield_layout_p (const_tree);
236
237 static void sh_init_builtins (void);
238 static tree sh_builtin_decl (unsigned, bool);
239 static void sh_media_init_builtins (void);
240 static tree sh_media_builtin_decl (unsigned, bool);
241 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
242 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
243 static void sh_file_start (void);
244 static int flow_dependent_p (rtx, rtx);
245 static void flow_dependent_p_1 (rtx, const_rtx, void *);
246 static int shiftcosts (rtx);
247 static int andcosts (rtx);
248 static int addsubcosts (rtx);
249 static int multcosts (rtx);
250 static bool unspec_caller_rtx_p (rtx);
251 static bool sh_cannot_copy_insn_p (rtx);
252 static bool sh_rtx_costs (rtx, int, int, int *, bool);
253 static int sh_address_cost (rtx, bool);
254 static int sh_pr_n_sets (void);
255 static rtx sh_allocate_initial_value (rtx);
256 static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
257 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
258 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
259 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
260 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
261 static int scavenge_reg (HARD_REG_SET *s);
262 struct save_schedule_s;
263 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
264 struct save_schedule_s *, int);
265
266 static rtx sh_struct_value_rtx (tree, int);
267 static rtx sh_function_value (const_tree, const_tree, bool);
268 static bool sh_function_value_regno_p (const unsigned int);
269 static rtx sh_libcall_value (enum machine_mode, const_rtx);
270 static bool sh_return_in_memory (const_tree, const_tree);
271 static rtx sh_builtin_saveregs (void);
272 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
273 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
274 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
275 static tree sh_build_builtin_va_list (void);
276 static void sh_va_start (tree, rtx);
277 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
278 static bool sh_promote_prototypes (const_tree);
279 static enum machine_mode sh_promote_function_mode (const_tree type,
280 enum machine_mode,
281 int *punsignedp,
282 const_tree funtype,
283 int for_return);
284 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
285 const_tree, bool);
286 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
287 const_tree, bool);
288 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
289 tree, bool);
290 static void sh_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
291 const_tree, bool);
292 static rtx sh_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
293 const_tree, bool);
294 static bool sh_scalar_mode_supported_p (enum machine_mode);
295 static int sh_dwarf_calling_convention (const_tree);
296 static void sh_encode_section_info (tree, rtx, int);
297 static int sh2a_function_vector_p (tree);
298 static void sh_trampoline_init (rtx, tree, rtx);
299 static rtx sh_trampoline_adjust_address (rtx);
300 \f
301 static const struct attribute_spec sh_attribute_table[] =
302 {
303 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
304 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
305 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
306 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
307 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
308 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
309 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
310 { "resbank", 0, 0, true, false, false, sh_handle_resbank_handler_attribute },
311 { "function_vector", 1, 1, true, false, false, sh2a_handle_function_vector_handler_attribute },
312 #ifdef SYMBIAN
313 /* Symbian support adds three new attributes:
314 dllexport - for exporting a function/variable that will live in a dll
315 dllimport - for importing a function/variable from a dll
316
317 Microsoft allows multiple declspecs in one __declspec, separating
318 them with spaces. We do NOT support this. Instead, use __declspec
319 multiple times. */
320 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
321 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
322 #endif
323 { NULL, 0, 0, false, false, false, NULL }
324 };
325 \f
326 /* Initialize the GCC target structure. */
327 #undef TARGET_ATTRIBUTE_TABLE
328 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
329
330 /* The next two are used for debug info when compiling with -gdwarf. */
331 #undef TARGET_ASM_UNALIGNED_HI_OP
332 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
333 #undef TARGET_ASM_UNALIGNED_SI_OP
334 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
335
336 /* These are NULLed out on non-SH5 in TARGET_OPTION_OVERRIDE. */
337 #undef TARGET_ASM_UNALIGNED_DI_OP
338 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
339 #undef TARGET_ASM_ALIGNED_DI_OP
340 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
341
342 #undef TARGET_OPTION_OVERRIDE
343 #define TARGET_OPTION_OVERRIDE sh_option_override
344 #undef TARGET_OPTION_OPTIMIZATION
345 #define TARGET_OPTION_OPTIMIZATION sh_option_optimization
346 #undef TARGET_OPTION_DEFAULT_PARAMS
347 #define TARGET_OPTION_DEFAULT_PARAMS sh_option_default_params
348
349 #undef TARGET_PRINT_OPERAND
350 #define TARGET_PRINT_OPERAND sh_print_operand
351 #undef TARGET_PRINT_OPERAND_ADDRESS
352 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
353 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
354 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
355
356 #undef TARGET_ASM_FUNCTION_EPILOGUE
357 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
358
359 #undef TARGET_ASM_OUTPUT_MI_THUNK
360 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
361
362 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
363 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
364
365 #undef TARGET_ASM_FILE_START
366 #define TARGET_ASM_FILE_START sh_file_start
367 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
368 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
369
370 #undef TARGET_DEFAULT_TARGET_FLAGS
371 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
372 #undef TARGET_HANDLE_OPTION
373 #define TARGET_HANDLE_OPTION sh_handle_option
374
375 #undef TARGET_REGISTER_MOVE_COST
376 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
377
378 #undef TARGET_INSERT_ATTRIBUTES
379 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
380
381 #undef TARGET_SCHED_ADJUST_COST
382 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
383
384 #undef TARGET_SCHED_ISSUE_RATE
385 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
386
387 /* The next 5 hooks have been implemented for reenabling sched1. With the
388 help of these macros we are limiting the movement of insns in sched1 to
389 reduce the register pressure. The overall idea is to keep count of SImode
390 and SFmode regs required by already scheduled insns. When these counts
391 cross some threshold values; give priority to insns that free registers.
392 The insn that frees registers is most likely to be the insn with lowest
393 LUID (original insn order); but such an insn might be there in the stalled
394 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
395 upto a max of 8 cycles so that such insns may move from Q -> R.
396
397 The description of the hooks are as below:
398
399 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
400 scheduler; it is called inside the sched_init function just after
401 find_insn_reg_weights function call. It is used to calculate the SImode
402 and SFmode weights of insns of basic blocks; much similar to what
403 find_insn_reg_weights does.
404 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
405
406 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
407 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
408 (Q)->(R).
409
410 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
411 high; reorder the ready queue so that the insn with lowest LUID will be
412 issued next.
413
414 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
415 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
416
417 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
418 can be returned from TARGET_SCHED_REORDER2.
419
420 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
421
422 #undef TARGET_SCHED_DFA_NEW_CYCLE
423 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
424
425 #undef TARGET_SCHED_INIT_GLOBAL
426 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
427
428 #undef TARGET_SCHED_FINISH_GLOBAL
429 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
430
431 #undef TARGET_SCHED_VARIABLE_ISSUE
432 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
433
434 #undef TARGET_SCHED_REORDER
435 #define TARGET_SCHED_REORDER sh_reorder
436
437 #undef TARGET_SCHED_REORDER2
438 #define TARGET_SCHED_REORDER2 sh_reorder2
439
440 #undef TARGET_SCHED_INIT
441 #define TARGET_SCHED_INIT sh_md_init
442
443 #undef TARGET_LEGITIMIZE_ADDRESS
444 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
445
446 #undef TARGET_CANNOT_MODIFY_JUMPS_P
447 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
448 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
449 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
450 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
451 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
452 sh_optimize_target_register_callee_saved
453
454 #undef TARGET_MS_BITFIELD_LAYOUT_P
455 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
456
457 #undef TARGET_INIT_BUILTINS
458 #define TARGET_INIT_BUILTINS sh_init_builtins
459 #undef TARGET_BUILTIN_DECL
460 #define TARGET_BUILTIN_DECL sh_builtin_decl
461 #undef TARGET_EXPAND_BUILTIN
462 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
463
464 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
465 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
466
467 #undef TARGET_CANNOT_COPY_INSN_P
468 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
469 #undef TARGET_RTX_COSTS
470 #define TARGET_RTX_COSTS sh_rtx_costs
471 #undef TARGET_ADDRESS_COST
472 #define TARGET_ADDRESS_COST sh_address_cost
473 #undef TARGET_ALLOCATE_INITIAL_VALUE
474 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
475
476 #undef TARGET_MACHINE_DEPENDENT_REORG
477 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
478
479 #undef TARGET_DWARF_REGISTER_SPAN
480 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
481
482 #ifdef HAVE_AS_TLS
483 #undef TARGET_HAVE_TLS
484 #define TARGET_HAVE_TLS true
485 #endif
486
487 #undef TARGET_PROMOTE_PROTOTYPES
488 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
489 #undef TARGET_PROMOTE_FUNCTION_MODE
490 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
491
492 #undef TARGET_FUNCTION_VALUE
493 #define TARGET_FUNCTION_VALUE sh_function_value
494 #undef TARGET_FUNCTION_VALUE_REGNO_P
495 #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
496 #undef TARGET_LIBCALL_VALUE
497 #define TARGET_LIBCALL_VALUE sh_libcall_value
498 #undef TARGET_STRUCT_VALUE_RTX
499 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
500 #undef TARGET_RETURN_IN_MEMORY
501 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
502
503 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
504 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
505 #undef TARGET_SETUP_INCOMING_VARARGS
506 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
507 #undef TARGET_STRICT_ARGUMENT_NAMING
508 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
509 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
510 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
511 #undef TARGET_MUST_PASS_IN_STACK
512 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
513 #undef TARGET_PASS_BY_REFERENCE
514 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
515 #undef TARGET_CALLEE_COPIES
516 #define TARGET_CALLEE_COPIES sh_callee_copies
517 #undef TARGET_ARG_PARTIAL_BYTES
518 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
519 #undef TARGET_FUNCTION_ARG
520 #define TARGET_FUNCTION_ARG sh_function_arg
521 #undef TARGET_FUNCTION_ARG_ADVANCE
522 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
523
524 #undef TARGET_BUILD_BUILTIN_VA_LIST
525 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
526 #undef TARGET_EXPAND_BUILTIN_VA_START
527 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
528 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
529 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
530
531 #undef TARGET_SCALAR_MODE_SUPPORTED_P
532 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
533 #undef TARGET_VECTOR_MODE_SUPPORTED_P
534 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
535
536 #undef TARGET_CHECK_PCH_TARGET_FLAGS
537 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
538
539 #undef TARGET_DWARF_CALLING_CONVENTION
540 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
541
542 #undef TARGET_FRAME_POINTER_REQUIRED
543 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
544
545 /* Return regmode weight for insn. */
546 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
547
548 /* Return current register pressure for regmode. */
549 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
550
551 #undef TARGET_ENCODE_SECTION_INFO
552 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
553
554 #ifdef SYMBIAN
555
556 #undef TARGET_ENCODE_SECTION_INFO
557 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
558 #undef TARGET_STRIP_NAME_ENCODING
559 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
560 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
561 #define TARGET_CXX_IMPORT_EXPORT_CLASS sh_symbian_import_export_class
562
563 #endif /* SYMBIAN */
564
565 #undef TARGET_SECONDARY_RELOAD
566 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
567
568 #undef TARGET_LEGITIMATE_ADDRESS_P
569 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
570
571 #undef TARGET_TRAMPOLINE_INIT
572 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
573 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
574 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
575
576 /* Machine-specific symbol_ref flags. */
577 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
578
579 struct gcc_target targetm = TARGET_INITIALIZER;
580 \f
581 /* Implement TARGET_HANDLE_OPTION. */
582
583 static bool
584 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
585 int value ATTRIBUTE_UNUSED)
586 {
587 switch (code)
588 {
589 case OPT_m1:
590 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
591 return true;
592
593 case OPT_m2:
594 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
595 return true;
596
597 case OPT_m2a:
598 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
599 return true;
600
601 case OPT_m2a_nofpu:
602 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
603 return true;
604
605 case OPT_m2a_single:
606 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
607 return true;
608
609 case OPT_m2a_single_only:
610 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
611 return true;
612
613 case OPT_m2e:
614 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
615 return true;
616
617 case OPT_m3:
618 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
619 return true;
620
621 case OPT_m3e:
622 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
623 return true;
624
625 case OPT_m4:
626 case OPT_m4_100:
627 case OPT_m4_200:
628 case OPT_m4_300:
629 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
630 return true;
631
632 case OPT_m4_nofpu:
633 case OPT_m4_100_nofpu:
634 case OPT_m4_200_nofpu:
635 case OPT_m4_300_nofpu:
636 case OPT_m4_340:
637 case OPT_m4_400:
638 case OPT_m4_500:
639 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
640 return true;
641
642 case OPT_m4_single:
643 case OPT_m4_100_single:
644 case OPT_m4_200_single:
645 case OPT_m4_300_single:
646 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
647 return true;
648
649 case OPT_m4_single_only:
650 case OPT_m4_100_single_only:
651 case OPT_m4_200_single_only:
652 case OPT_m4_300_single_only:
653 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
654 return true;
655
656 case OPT_m4a:
657 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
658 return true;
659
660 case OPT_m4a_nofpu:
661 case OPT_m4al:
662 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
663 return true;
664
665 case OPT_m4a_single:
666 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
667 return true;
668
669 case OPT_m4a_single_only:
670 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
671 return true;
672
673 case OPT_m5_32media:
674 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
675 return true;
676
677 case OPT_m5_32media_nofpu:
678 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
679 return true;
680
681 case OPT_m5_64media:
682 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
683 return true;
684
685 case OPT_m5_64media_nofpu:
686 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
687 return true;
688
689 case OPT_m5_compact:
690 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
691 return true;
692
693 case OPT_m5_compact_nofpu:
694 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
695 return true;
696
697 default:
698 return true;
699 }
700 }
701 \f
702 /* Set default optimization options. */
703 static void
704 sh_option_optimization (int level, int size)
705 {
706 if (level)
707 {
708 if (!size)
709 sh_div_str = "inv:minlat";
710 }
711 if (size)
712 {
713 target_flags |= MASK_SMALLCODE;
714 sh_div_str = SH_DIV_STR_FOR_SIZE ;
715 }
716 else
717 TARGET_CBRANCHDI4 = 1;
718 /* We can't meaningfully test TARGET_SHMEDIA here, because -m options
719 haven't been parsed yet, hence we'd read only the default.
720 sh_target_reg_class will return NO_REGS if this is not SHMEDIA, so
721 it's OK to always set flag_branch_target_load_optimize. */
722 if (level > 1)
723 {
724 flag_branch_target_load_optimize = 1;
725 if (!size)
726 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
727 }
728 /* Likewise, we can't meaningfully test TARGET_SH2E / TARGET_IEEE
729 here, so leave it to TARGET_OPTION_OVERRIDE to set
730 flag_finite_math_only. We set it to 2 here so we know if the user
731 explicitly requested this to be on or off. */
732 flag_finite_math_only = 2;
733 /* If flag_schedule_insns is 1, we set it to 2 here so we know if
734 the user explicitly requested this to be on or off. */
735 if (flag_schedule_insns > 0)
736 flag_schedule_insns = 2;
737 }
738
739 /* Implement TARGET_OPTION_DEFAULT_PARAMS. */
740 static void
741 sh_option_default_params (void)
742 {
743 set_default_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 2);
744 }
745
746 /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override
747 various options, and do some machine dependent initialization. */
748 static void
749 sh_option_override (void)
750 {
751 int regno;
752
753 SUBTARGET_OVERRIDE_OPTIONS;
754 if (flag_finite_math_only == 2)
755 flag_finite_math_only
756 = !flag_signaling_nans && TARGET_SH2E && ! TARGET_IEEE;
757 if (TARGET_SH2E && !flag_finite_math_only)
758 target_flags |= MASK_IEEE;
759 sh_cpu = PROCESSOR_SH1;
760 assembler_dialect = 0;
761 if (TARGET_SH2)
762 sh_cpu = PROCESSOR_SH2;
763 if (TARGET_SH2E)
764 sh_cpu = PROCESSOR_SH2E;
765 if (TARGET_SH2A)
766 sh_cpu = PROCESSOR_SH2A;
767 if (TARGET_SH3)
768 sh_cpu = PROCESSOR_SH3;
769 if (TARGET_SH3E)
770 sh_cpu = PROCESSOR_SH3E;
771 if (TARGET_SH4)
772 {
773 assembler_dialect = 1;
774 sh_cpu = PROCESSOR_SH4;
775 }
776 if (TARGET_SH4A_ARCH)
777 {
778 assembler_dialect = 1;
779 sh_cpu = PROCESSOR_SH4A;
780 }
781 if (TARGET_SH5)
782 {
783 sh_cpu = PROCESSOR_SH5;
784 target_flags |= MASK_ALIGN_DOUBLE;
785 if (TARGET_SHMEDIA_FPU)
786 target_flags |= MASK_FMOVD;
787 if (TARGET_SHMEDIA)
788 {
789 /* There are no delay slots on SHmedia. */
790 flag_delayed_branch = 0;
791 /* Relaxation isn't yet supported for SHmedia */
792 target_flags &= ~MASK_RELAX;
793 /* After reload, if conversion does little good but can cause
794 ICEs:
795 - find_if_block doesn't do anything for SH because we don't
796 have conditional execution patterns. (We use conditional
797 move patterns, which are handled differently, and only
798 before reload).
799 - find_cond_trap doesn't do anything for the SH because we
800 don't have conditional traps.
801 - find_if_case_1 uses redirect_edge_and_branch_force in
802 the only path that does an optimization, and this causes
803 an ICE when branch targets are in registers.
804 - find_if_case_2 doesn't do anything for the SHmedia after
805 reload except when it can redirect a tablejump - and
806 that's rather rare. */
807 flag_if_conversion2 = 0;
808 if (! strcmp (sh_div_str, "call"))
809 sh_div_strategy = SH_DIV_CALL;
810 else if (! strcmp (sh_div_str, "call2"))
811 sh_div_strategy = SH_DIV_CALL2;
812 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
813 sh_div_strategy = SH_DIV_FP;
814 else if (! strcmp (sh_div_str, "inv"))
815 sh_div_strategy = SH_DIV_INV;
816 else if (! strcmp (sh_div_str, "inv:minlat"))
817 sh_div_strategy = SH_DIV_INV_MINLAT;
818 else if (! strcmp (sh_div_str, "inv20u"))
819 sh_div_strategy = SH_DIV_INV20U;
820 else if (! strcmp (sh_div_str, "inv20l"))
821 sh_div_strategy = SH_DIV_INV20L;
822 else if (! strcmp (sh_div_str, "inv:call2"))
823 sh_div_strategy = SH_DIV_INV_CALL2;
824 else if (! strcmp (sh_div_str, "inv:call"))
825 sh_div_strategy = SH_DIV_INV_CALL;
826 else if (! strcmp (sh_div_str, "inv:fp"))
827 {
828 if (TARGET_FPU_ANY)
829 sh_div_strategy = SH_DIV_INV_FP;
830 else
831 sh_div_strategy = SH_DIV_INV;
832 }
833 TARGET_CBRANCHDI4 = 0;
834 /* Assembler CFI isn't yet fully supported for SHmedia. */
835 flag_dwarf2_cfi_asm = 0;
836 }
837 }
838 else
839 {
840 /* Only the sh64-elf assembler fully supports .quad properly. */
841 targetm.asm_out.aligned_op.di = NULL;
842 targetm.asm_out.unaligned_op.di = NULL;
843 }
844 if (TARGET_SH1)
845 {
846 if (! strcmp (sh_div_str, "call-div1"))
847 sh_div_strategy = SH_DIV_CALL_DIV1;
848 else if (! strcmp (sh_div_str, "call-fp")
849 && (TARGET_FPU_DOUBLE
850 || (TARGET_HARD_SH4 && TARGET_SH2E)
851 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
852 sh_div_strategy = SH_DIV_CALL_FP;
853 else if (! strcmp (sh_div_str, "call-table") && TARGET_SH2)
854 sh_div_strategy = SH_DIV_CALL_TABLE;
855 else
856 /* Pick one that makes most sense for the target in general.
857 It is not much good to use different functions depending
858 on -Os, since then we'll end up with two different functions
859 when some of the code is compiled for size, and some for
860 speed. */
861
862 /* SH4 tends to emphasize speed. */
863 if (TARGET_HARD_SH4)
864 sh_div_strategy = SH_DIV_CALL_TABLE;
865 /* These have their own way of doing things. */
866 else if (TARGET_SH2A)
867 sh_div_strategy = SH_DIV_INTRINSIC;
868 /* ??? Should we use the integer SHmedia function instead? */
869 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
870 sh_div_strategy = SH_DIV_CALL_FP;
871 /* SH1 .. SH3 cores often go into small-footprint systems, so
872 default to the smallest implementation available. */
873 else if (TARGET_SH2) /* ??? EXPERIMENTAL */
874 sh_div_strategy = SH_DIV_CALL_TABLE;
875 else
876 sh_div_strategy = SH_DIV_CALL_DIV1;
877 }
878 if (!TARGET_SH1)
879 TARGET_PRETEND_CMOVE = 0;
880 if (sh_divsi3_libfunc[0])
881 ; /* User supplied - leave it alone. */
882 else if (TARGET_DIVIDE_CALL_FP)
883 sh_divsi3_libfunc = "__sdivsi3_i4";
884 else if (TARGET_DIVIDE_CALL_TABLE)
885 sh_divsi3_libfunc = "__sdivsi3_i4i";
886 else if (TARGET_SH5)
887 sh_divsi3_libfunc = "__sdivsi3_1";
888 else
889 sh_divsi3_libfunc = "__sdivsi3";
890 if (sh_branch_cost == -1)
891 sh_branch_cost
892 = TARGET_SH5 ? 1 : ! TARGET_SH2 || TARGET_HARD_SH4 ? 2 : 1;
893
894 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
895 if (! VALID_REGISTER_P (regno))
896 sh_register_names[regno][0] = '\0';
897
898 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
899 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
900 sh_additional_register_names[regno][0] = '\0';
901
902 flag_omit_frame_pointer = (PREFERRED_DEBUGGING_TYPE == DWARF2_DEBUG);
903
904 if ((flag_pic && ! TARGET_PREFERGOT)
905 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
906 flag_no_function_cse = 1;
907
908 if (targetm.small_register_classes_for_mode_p (VOIDmode)) \
909 {
910 /* Never run scheduling before reload, since that can
911 break global alloc, and generates slower code anyway due
912 to the pressure on R0. */
913 /* Enable sched1 for SH4 if the user explicitly requests.
914 When sched1 is enabled, the ready queue will be reordered by
915 the target hooks if pressure is high. We can not do this for
916 PIC, SH3 and lower as they give spill failures for R0. */
917 if (!TARGET_HARD_SH4 || flag_pic)
918 flag_schedule_insns = 0;
919 /* ??? Current exception handling places basic block boundaries
920 after call_insns. It causes the high pressure on R0 and gives
921 spill failures for R0 in reload. See PR 22553 and the thread
922 on gcc-patches
923 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
924 else if (flag_exceptions)
925 {
926 if (flag_schedule_insns == 1)
927 warning (0, "ignoring -fschedule-insns because of exception handling bug");
928 flag_schedule_insns = 0;
929 }
930 else if (flag_schedule_insns == 2)
931 flag_schedule_insns = 0;
932 }
933
934 if ((target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) == 0)
935 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
936
937 /* Unwind info is not correct around the CFG unless either a frame
938 pointer is present or M_A_O_A is set. Fixing this requires rewriting
939 unwind info generation to be aware of the CFG and propagating states
940 around edges. */
941 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
942 || flag_exceptions || flag_non_call_exceptions)
943 && flag_omit_frame_pointer
944 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
945 {
946 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
947 warning (0, "unwind tables currently require either a frame pointer "
948 "or -maccumulate-outgoing-args for correctness");
949 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
950 }
951
952 /* Unwinding with -freorder-blocks-and-partition does not work on this
953 architecture, because it requires far jumps to label crossing between
954 hot/cold sections which are rejected on this architecture. */
955 if (flag_reorder_blocks_and_partition)
956 {
957 if (flag_exceptions)
958 {
959 inform (input_location,
960 "-freorder-blocks-and-partition does not work with "
961 "exceptions on this architecture");
962 flag_reorder_blocks_and_partition = 0;
963 flag_reorder_blocks = 1;
964 }
965 else if (flag_unwind_tables)
966 {
967 inform (input_location,
968 "-freorder-blocks-and-partition does not support unwind "
969 "info on this architecture");
970 flag_reorder_blocks_and_partition = 0;
971 flag_reorder_blocks = 1;
972 }
973 }
974
975 if (align_loops == 0)
976 align_loops = 1 << (TARGET_SH5 ? 3 : 2);
977 if (align_jumps == 0)
978 align_jumps = 1 << CACHE_LOG;
979 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
980 align_jumps = TARGET_SHMEDIA ? 4 : 2;
981
982 /* Allocation boundary (in *bytes*) for the code of a function.
983 SH1: 32 bit alignment is faster, because instructions are always
984 fetched as a pair from a longword boundary.
985 SH2 .. SH5 : align to cache line start. */
986 if (align_functions == 0)
987 align_functions
988 = TARGET_SMALLCODE ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
989 /* The linker relaxation code breaks when a function contains
990 alignments that are larger than that at the start of a
991 compilation unit. */
992 if (TARGET_RELAX)
993 {
994 int min_align
995 = align_loops > align_jumps ? align_loops : align_jumps;
996
997 /* Also take possible .long constants / mova tables int account. */
998 if (min_align < 4)
999 min_align = 4;
1000 if (align_functions < min_align)
1001 align_functions = min_align;
1002 }
1003
1004 if (sh_fixed_range_str)
1005 sh_fix_range (sh_fixed_range_str);
1006
1007 /* This target defaults to strict volatile bitfields. */
1008 if (flag_strict_volatile_bitfields < 0)
1009 flag_strict_volatile_bitfields = 1;
1010 }
1011 \f
1012 /* Print the operand address in x to the stream. */
1013
1014 static void
1015 sh_print_operand_address (FILE *stream, rtx x)
1016 {
1017 switch (GET_CODE (x))
1018 {
1019 case REG:
1020 case SUBREG:
1021 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
1022 break;
1023
1024 case PLUS:
1025 {
1026 rtx base = XEXP (x, 0);
1027 rtx index = XEXP (x, 1);
1028
1029 switch (GET_CODE (index))
1030 {
1031 case CONST_INT:
1032 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
1033 reg_names[true_regnum (base)]);
1034 break;
1035
1036 case REG:
1037 case SUBREG:
1038 {
1039 int base_num = true_regnum (base);
1040 int index_num = true_regnum (index);
1041
1042 fprintf (stream, "@(r0,%s)",
1043 reg_names[MAX (base_num, index_num)]);
1044 break;
1045 }
1046
1047 default:
1048 gcc_unreachable ();
1049 }
1050 }
1051 break;
1052
1053 case PRE_DEC:
1054 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1055 break;
1056
1057 case POST_INC:
1058 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1059 break;
1060
1061 default:
1062 x = mark_constant_pool_use (x);
1063 output_addr_const (stream, x);
1064 break;
1065 }
1066 }
1067
1068 /* Print operand x (an rtx) in assembler syntax to file stream
1069 according to modifier code.
1070
1071 '.' print a .s if insn needs delay slot
1072 ',' print LOCAL_LABEL_PREFIX
1073 '@' print trap, rte or rts depending upon pragma interruptness
1074 '#' output a nop if there is nothing to put in the delay slot
1075 ''' print likelihood suffix (/u for unlikely).
1076 '>' print branch target if -fverbose-asm
1077 'O' print a constant without the #
1078 'R' print the LSW of a dp value - changes if in little endian
1079 'S' print the MSW of a dp value - changes if in little endian
1080 'T' print the next word of a dp value - same as 'R' in big endian mode.
1081 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
1082 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
1083 'N' print 'r63' if the operand is (const_int 0).
1084 'd' print a V2SF reg as dN instead of fpN.
1085 'm' print a pair `base,offset' or `base,index', for LD and ST.
1086 'U' Likewise for {LD,ST}{HI,LO}.
1087 'V' print the position of a single bit set.
1088 'W' print the position of a single bit cleared.
1089 't' print a memory address which is a register.
1090 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1091 'o' output an operator. */
1092
1093 static void
1094 sh_print_operand (FILE *stream, rtx x, int code)
1095 {
1096 int regno;
1097 enum machine_mode mode;
1098
1099 switch (code)
1100 {
1101 tree trapa_attr;
1102
1103 case '.':
1104 if (final_sequence
1105 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1106 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1107 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1108 break;
1109 case ',':
1110 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1111 break;
1112 case '@':
1113 trapa_attr = lookup_attribute ("trap_exit",
1114 DECL_ATTRIBUTES (current_function_decl));
1115 if (trapa_attr)
1116 fprintf (stream, "trapa #%ld",
1117 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1118 else if (sh_cfun_interrupt_handler_p ())
1119 {
1120 if (sh_cfun_resbank_handler_p ())
1121 fprintf (stream, "resbank\n");
1122 fprintf (stream, "rte");
1123 }
1124 else
1125 fprintf (stream, "rts");
1126 break;
1127 case '#':
1128 /* Output a nop if there's nothing in the delay slot. */
1129 if (dbr_sequence_length () == 0)
1130 fprintf (stream, "\n\tnop");
1131 break;
1132 case '\'':
1133 {
1134 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1135
1136 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
1137 fputs ("/u", stream);
1138 break;
1139 }
1140 case '>':
1141 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1142 {
1143 fputs ("\t! target: ", stream);
1144 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1145 }
1146 break;
1147 case 'O':
1148 x = mark_constant_pool_use (x);
1149 output_addr_const (stream, x);
1150 break;
1151 /* N.B.: %R / %S / %T adjust memory addresses by four.
1152 For SHMEDIA, that means they can be used to access the first and
1153 second 32 bit part of a 64 bit (or larger) value that
1154 might be held in floating point registers or memory.
1155 While they can be used to access 64 bit parts of a larger value
1156 held in general purpose registers, that won't work with memory -
1157 neither for fp registers, since the frxx names are used. */
1158 case 'R':
1159 if (REG_P (x) || GET_CODE (x) == SUBREG)
1160 {
1161 regno = true_regnum (x);
1162 regno += FP_REGISTER_P (regno) ? 1 : LSW;
1163 fputs (reg_names[regno], (stream));
1164 }
1165 else if (MEM_P (x))
1166 {
1167 x = adjust_address (x, SImode, 4 * LSW);
1168 sh_print_operand_address (stream, XEXP (x, 0));
1169 }
1170 else
1171 {
1172 rtx sub = NULL_RTX;
1173
1174 mode = GET_MODE (x);
1175 if (mode == VOIDmode)
1176 mode = DImode;
1177 if (GET_MODE_SIZE (mode) >= 8)
1178 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
1179 if (sub)
1180 sh_print_operand (stream, sub, 0);
1181 else
1182 output_operand_lossage ("invalid operand to %%R");
1183 }
1184 break;
1185 case 'S':
1186 if (REG_P (x) || GET_CODE (x) == SUBREG)
1187 {
1188 regno = true_regnum (x);
1189 regno += FP_REGISTER_P (regno) ? 0 : MSW;
1190 fputs (reg_names[regno], (stream));
1191 }
1192 else if (MEM_P (x))
1193 {
1194 x = adjust_address (x, SImode, 4 * MSW);
1195 sh_print_operand_address (stream, XEXP (x, 0));
1196 }
1197 else
1198 {
1199 rtx sub = NULL_RTX;
1200
1201 mode = GET_MODE (x);
1202 if (mode == VOIDmode)
1203 mode = DImode;
1204 if (GET_MODE_SIZE (mode) >= 8)
1205 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
1206 if (sub)
1207 sh_print_operand (stream, sub, 0);
1208 else
1209 output_operand_lossage ("invalid operand to %%S");
1210 }
1211 break;
1212 case 'T':
1213 /* Next word of a double. */
1214 switch (GET_CODE (x))
1215 {
1216 case REG:
1217 fputs (reg_names[REGNO (x) + 1], (stream));
1218 break;
1219 case MEM:
1220 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1221 && GET_CODE (XEXP (x, 0)) != POST_INC)
1222 x = adjust_address (x, SImode, 4);
1223 sh_print_operand_address (stream, XEXP (x, 0));
1224 break;
1225 default:
1226 break;
1227 }
1228 break;
1229
1230 case 't':
1231 gcc_assert (MEM_P (x));
1232 x = XEXP (x, 0);
1233 switch (GET_CODE (x))
1234 {
1235 case REG:
1236 case SUBREG:
1237 sh_print_operand (stream, x, 0);
1238 break;
1239 default:
1240 break;
1241 }
1242 break;
1243
1244 case 'o':
1245 switch (GET_CODE (x))
1246 {
1247 case PLUS: fputs ("add", stream); break;
1248 case MINUS: fputs ("sub", stream); break;
1249 case MULT: fputs ("mul", stream); break;
1250 case DIV: fputs ("div", stream); break;
1251 case EQ: fputs ("eq", stream); break;
1252 case NE: fputs ("ne", stream); break;
1253 case GT: case LT: fputs ("gt", stream); break;
1254 case GE: case LE: fputs ("ge", stream); break;
1255 case GTU: case LTU: fputs ("gtu", stream); break;
1256 case GEU: case LEU: fputs ("geu", stream); break;
1257 default:
1258 break;
1259 }
1260 break;
1261 case 'M':
1262 if (TARGET_SHMEDIA)
1263 {
1264 if (MEM_P (x)
1265 && GET_CODE (XEXP (x, 0)) == PLUS
1266 && (REG_P (XEXP (XEXP (x, 0), 1))
1267 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1268 fputc ('x', stream);
1269 }
1270 else
1271 {
1272 if (MEM_P (x))
1273 {
1274 switch (GET_MODE (x))
1275 {
1276 case QImode: fputs (".b", stream); break;
1277 case HImode: fputs (".w", stream); break;
1278 case SImode: fputs (".l", stream); break;
1279 case SFmode: fputs (".s", stream); break;
1280 case DFmode: fputs (".d", stream); break;
1281 default: gcc_unreachable ();
1282 }
1283 }
1284 }
1285 break;
1286
1287 case 'm':
1288 gcc_assert (MEM_P (x));
1289 x = XEXP (x, 0);
1290 /* Fall through. */
1291 case 'U':
1292 switch (GET_CODE (x))
1293 {
1294 case REG:
1295 case SUBREG:
1296 sh_print_operand (stream, x, 0);
1297 fputs (", 0", stream);
1298 break;
1299
1300 case PLUS:
1301 sh_print_operand (stream, XEXP (x, 0), 0);
1302 fputs (", ", stream);
1303 sh_print_operand (stream, XEXP (x, 1), 0);
1304 break;
1305
1306 default:
1307 gcc_unreachable ();
1308 }
1309 break;
1310
1311 case 'V':
1312 {
1313 int num = exact_log2 (INTVAL (x));
1314 gcc_assert (num >= 0);
1315 fprintf (stream, "#%d", num);
1316 }
1317 break;
1318
1319 case 'W':
1320 {
1321 int num = exact_log2 (~INTVAL (x));
1322 gcc_assert (num >= 0);
1323 fprintf (stream, "#%d", num);
1324 }
1325 break;
1326
1327 case 'd':
1328 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1329
1330 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1331 break;
1332
1333 case 'N':
1334 if (x == CONST0_RTX (GET_MODE (x)))
1335 {
1336 fprintf ((stream), "r63");
1337 break;
1338 }
1339 goto default_output;
1340 case 'u':
1341 if (CONST_INT_P (x))
1342 {
1343 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1344 break;
1345 }
1346 /* Fall through. */
1347
1348 default_output:
1349 default:
1350 regno = 0;
1351 mode = GET_MODE (x);
1352
1353 switch (GET_CODE (x))
1354 {
1355 case TRUNCATE:
1356 {
1357 rtx inner = XEXP (x, 0);
1358 int offset = 0;
1359 enum machine_mode inner_mode;
1360
1361 /* We might see SUBREGs with vector mode registers inside. */
1362 if (GET_CODE (inner) == SUBREG
1363 && (GET_MODE_SIZE (GET_MODE (inner))
1364 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1365 && subreg_lowpart_p (inner))
1366 inner = SUBREG_REG (inner);
1367 if (CONST_INT_P (inner))
1368 {
1369 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1370 goto default_output;
1371 }
1372 inner_mode = GET_MODE (inner);
1373 if (GET_CODE (inner) == SUBREG
1374 && (GET_MODE_SIZE (GET_MODE (inner))
1375 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1376 && REG_P (SUBREG_REG (inner)))
1377 {
1378 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1379 GET_MODE (SUBREG_REG (inner)),
1380 SUBREG_BYTE (inner),
1381 GET_MODE (inner));
1382 inner = SUBREG_REG (inner);
1383 }
1384 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1385 abort ();
1386 /* Floating point register pairs are always big endian;
1387 general purpose registers are 64 bit wide. */
1388 regno = REGNO (inner);
1389 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1390 - HARD_REGNO_NREGS (regno, mode))
1391 + offset;
1392 x = inner;
1393 goto reg;
1394 }
1395 case SIGN_EXTEND:
1396 x = XEXP (x, 0);
1397 goto reg;
1398 /* FIXME: We need this on SHmedia32 because reload generates
1399 some sign-extended HI or QI loads into DImode registers
1400 but, because Pmode is SImode, the address ends up with a
1401 subreg:SI of the DImode register. Maybe reload should be
1402 fixed so as to apply alter_subreg to such loads? */
1403 case IF_THEN_ELSE:
1404 gcc_assert (trapping_target_operand (x, VOIDmode));
1405 x = XEXP (XEXP (x, 2), 0);
1406 goto default_output;
1407 case SUBREG:
1408 gcc_assert (SUBREG_BYTE (x) == 0
1409 && REG_P (SUBREG_REG (x)));
1410
1411 x = SUBREG_REG (x);
1412 /* Fall through. */
1413
1414 reg:
1415 case REG:
1416 regno += REGNO (x);
1417 if (FP_REGISTER_P (regno)
1418 && mode == V16SFmode)
1419 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1420 else if (FP_REGISTER_P (REGNO (x))
1421 && mode == V4SFmode)
1422 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1423 else if (REG_P (x)
1424 && mode == V2SFmode)
1425 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1426 else if (FP_REGISTER_P (REGNO (x))
1427 && GET_MODE_SIZE (mode) > 4)
1428 fprintf ((stream), "d%s", reg_names[regno] + 1);
1429 else
1430 fputs (reg_names[regno], (stream));
1431 break;
1432
1433 case MEM:
1434 output_address (XEXP (x, 0));
1435 break;
1436
1437 default:
1438 if (TARGET_SH1)
1439 fputc ('#', stream);
1440 output_addr_const (stream, x);
1441 break;
1442 }
1443 break;
1444 }
1445 }
1446
1447 static bool
1448 sh_print_operand_punct_valid_p (unsigned char code)
1449 {
1450 return (code == '.' || code == '#' || code == '@' || code == ','
1451 || code == '$' || code == '\'' || code == '>');
1452 }
1453 \f
1454
1455 /* Encode symbol attributes of a SYMBOL_REF into its
1456 SYMBOL_REF_FLAGS. */
1457 static void
1458 sh_encode_section_info (tree decl, rtx rtl, int first)
1459 {
1460 default_encode_section_info (decl, rtl, first);
1461
1462 if (TREE_CODE (decl) == FUNCTION_DECL
1463 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1464 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1465 }
1466
1467 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1468 static void
1469 force_into (rtx value, rtx target)
1470 {
1471 value = force_operand (value, target);
1472 if (! rtx_equal_p (value, target))
1473 emit_insn (gen_move_insn (target, value));
1474 }
1475
1476 /* Emit code to perform a block move. Choose the best method.
1477
1478 OPERANDS[0] is the destination.
1479 OPERANDS[1] is the source.
1480 OPERANDS[2] is the size.
1481 OPERANDS[3] is the alignment safe to use. */
1482
1483 int
1484 expand_block_move (rtx *operands)
1485 {
1486 int align = INTVAL (operands[3]);
1487 int constp = (CONST_INT_P (operands[2]));
1488 int bytes = (constp ? INTVAL (operands[2]) : 0);
1489
1490 if (! constp)
1491 return 0;
1492
1493 /* If we could use mov.l to move words and dest is word-aligned, we
1494 can use movua.l for loads and still generate a relatively short
1495 and efficient sequence. */
1496 if (TARGET_SH4A_ARCH && align < 4
1497 && MEM_ALIGN (operands[0]) >= 32
1498 && can_move_by_pieces (bytes, 32))
1499 {
1500 rtx dest = copy_rtx (operands[0]);
1501 rtx src = copy_rtx (operands[1]);
1502 /* We could use different pseudos for each copied word, but
1503 since movua can only load into r0, it's kind of
1504 pointless. */
1505 rtx temp = gen_reg_rtx (SImode);
1506 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1507 int copied = 0;
1508
1509 while (copied + 4 <= bytes)
1510 {
1511 rtx to = adjust_address (dest, SImode, copied);
1512 rtx from = adjust_automodify_address (src, BLKmode,
1513 src_addr, copied);
1514
1515 set_mem_size (from, GEN_INT (4));
1516 emit_insn (gen_movua (temp, from));
1517 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1518 emit_move_insn (to, temp);
1519 copied += 4;
1520 }
1521
1522 if (copied < bytes)
1523 move_by_pieces (adjust_address (dest, BLKmode, copied),
1524 adjust_automodify_address (src, BLKmode,
1525 src_addr, copied),
1526 bytes - copied, align, 0);
1527
1528 return 1;
1529 }
1530
1531 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1532 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1533 if (align < 4 || (bytes % 4 != 0))
1534 return 0;
1535
1536 if (TARGET_HARD_SH4)
1537 {
1538 if (bytes < 12)
1539 return 0;
1540 else if (bytes == 12)
1541 {
1542 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1543 rtx r4 = gen_rtx_REG (SImode, 4);
1544 rtx r5 = gen_rtx_REG (SImode, 5);
1545
1546 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1547 force_into (XEXP (operands[0], 0), r4);
1548 force_into (XEXP (operands[1], 0), r5);
1549 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1550 return 1;
1551 }
1552 else if (! TARGET_SMALLCODE)
1553 {
1554 const char *entry_name;
1555 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1556 int dwords;
1557 rtx r4 = gen_rtx_REG (SImode, 4);
1558 rtx r5 = gen_rtx_REG (SImode, 5);
1559 rtx r6 = gen_rtx_REG (SImode, 6);
1560
1561 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1562 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1563 force_into (XEXP (operands[0], 0), r4);
1564 force_into (XEXP (operands[1], 0), r5);
1565
1566 dwords = bytes >> 3;
1567 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1568 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1569 return 1;
1570 }
1571 else
1572 return 0;
1573 }
1574 if (bytes < 64)
1575 {
1576 char entry[30];
1577 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1578 rtx r4 = gen_rtx_REG (SImode, 4);
1579 rtx r5 = gen_rtx_REG (SImode, 5);
1580
1581 sprintf (entry, "__movmemSI%d", bytes);
1582 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1583 force_into (XEXP (operands[0], 0), r4);
1584 force_into (XEXP (operands[1], 0), r5);
1585 emit_insn (gen_block_move_real (func_addr_rtx));
1586 return 1;
1587 }
1588
1589 /* This is the same number of bytes as a memcpy call, but to a different
1590 less common function name, so this will occasionally use more space. */
1591 if (! TARGET_SMALLCODE)
1592 {
1593 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1594 int final_switch, while_loop;
1595 rtx r4 = gen_rtx_REG (SImode, 4);
1596 rtx r5 = gen_rtx_REG (SImode, 5);
1597 rtx r6 = gen_rtx_REG (SImode, 6);
1598
1599 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1600 force_into (XEXP (operands[0], 0), r4);
1601 force_into (XEXP (operands[1], 0), r5);
1602
1603 /* r6 controls the size of the move. 16 is decremented from it
1604 for each 64 bytes moved. Then the negative bit left over is used
1605 as an index into a list of move instructions. e.g., a 72 byte move
1606 would be set up with size(r6) = 14, for one iteration through the
1607 big while loop, and a switch of -2 for the last part. */
1608
1609 final_switch = 16 - ((bytes / 4) % 16);
1610 while_loop = ((bytes / 4) / 16 - 1) * 16;
1611 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1612 emit_insn (gen_block_lump_real (func_addr_rtx));
1613 return 1;
1614 }
1615
1616 return 0;
1617 }
1618
1619 /* Prepare operands for a move define_expand; specifically, one of the
1620 operands must be in a register. */
1621
1622 int
1623 prepare_move_operands (rtx operands[], enum machine_mode mode)
1624 {
1625 if ((mode == SImode || mode == DImode)
1626 && flag_pic
1627 && ! ((mode == Pmode || mode == ptr_mode)
1628 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1629 {
1630 rtx temp;
1631 if (SYMBOLIC_CONST_P (operands[1]))
1632 {
1633 if (MEM_P (operands[0]))
1634 operands[1] = force_reg (Pmode, operands[1]);
1635 else if (TARGET_SHMEDIA
1636 && GET_CODE (operands[1]) == LABEL_REF
1637 && target_reg_operand (operands[0], mode))
1638 /* It's ok. */;
1639 else
1640 {
1641 temp = (!can_create_pseudo_p ()
1642 ? operands[0]
1643 : gen_reg_rtx (Pmode));
1644 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1645 }
1646 }
1647 else if (GET_CODE (operands[1]) == CONST
1648 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1649 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1650 {
1651 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1652 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1653 mode, temp);
1654 operands[1] = expand_binop (mode, add_optab, temp,
1655 XEXP (XEXP (operands[1], 0), 1),
1656 (!can_create_pseudo_p ()
1657 ? temp
1658 : gen_reg_rtx (Pmode)),
1659 0, OPTAB_LIB_WIDEN);
1660 }
1661 }
1662
1663 if (! reload_in_progress && ! reload_completed)
1664 {
1665 /* Copy the source to a register if both operands aren't registers. */
1666 if (! register_operand (operands[0], mode)
1667 && ! sh_register_operand (operands[1], mode))
1668 operands[1] = copy_to_mode_reg (mode, operands[1]);
1669
1670 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1671 {
1672 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1673 except that we can't use that function because it is static. */
1674 rtx new_rtx = change_address (operands[0], mode, 0);
1675 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1676 operands[0] = new_rtx;
1677 }
1678
1679 /* This case can happen while generating code to move the result
1680 of a library call to the target. Reject `st r0,@(rX,rY)' because
1681 reload will fail to find a spill register for rX, since r0 is already
1682 being used for the source. */
1683 else if (TARGET_SH1
1684 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1685 && MEM_P (operands[0])
1686 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1687 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1688 operands[1] = copy_to_mode_reg (mode, operands[1]);
1689 }
1690
1691 if (mode == Pmode || mode == ptr_mode)
1692 {
1693 rtx op0, op1, opc;
1694 enum tls_model tls_kind;
1695
1696 op0 = operands[0];
1697 op1 = operands[1];
1698 if (GET_CODE (op1) == CONST
1699 && GET_CODE (XEXP (op1, 0)) == PLUS
1700 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1701 != TLS_MODEL_NONE))
1702 {
1703 opc = XEXP (XEXP (op1, 0), 1);
1704 op1 = XEXP (XEXP (op1, 0), 0);
1705 }
1706 else
1707 opc = NULL_RTX;
1708
1709 if ((tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1710 {
1711 rtx tga_op1, tga_ret, tmp, tmp2;
1712
1713 switch (tls_kind)
1714 {
1715 case TLS_MODEL_GLOBAL_DYNAMIC:
1716 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1717 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1718 op1 = tga_ret;
1719 break;
1720
1721 case TLS_MODEL_LOCAL_DYNAMIC:
1722 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1723 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1724
1725 tmp = gen_reg_rtx (Pmode);
1726 emit_move_insn (tmp, tga_ret);
1727
1728 if (register_operand (op0, Pmode))
1729 tmp2 = op0;
1730 else
1731 tmp2 = gen_reg_rtx (Pmode);
1732
1733 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1734 op1 = tmp2;
1735 break;
1736
1737 case TLS_MODEL_INITIAL_EXEC:
1738 if (! flag_pic)
1739 {
1740 /* Don't schedule insns for getting GOT address when
1741 the first scheduling is enabled, to avoid spill
1742 failures for R0. */
1743 if (flag_schedule_insns)
1744 emit_insn (gen_blockage ());
1745 emit_insn (gen_GOTaddr2picreg ());
1746 emit_use (gen_rtx_REG (SImode, PIC_REG));
1747 if (flag_schedule_insns)
1748 emit_insn (gen_blockage ());
1749 }
1750 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1751 tmp = gen_sym2GOTTPOFF (op1);
1752 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1753 op1 = tga_op1;
1754 break;
1755
1756 case TLS_MODEL_LOCAL_EXEC:
1757 tmp2 = gen_reg_rtx (Pmode);
1758 emit_insn (gen_load_gbr (tmp2));
1759 tmp = gen_reg_rtx (Pmode);
1760 emit_insn (gen_symTPOFF2reg (tmp, op1));
1761
1762 if (register_operand (op0, Pmode))
1763 op1 = op0;
1764 else
1765 op1 = gen_reg_rtx (Pmode);
1766
1767 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1768 break;
1769
1770 default:
1771 gcc_unreachable ();
1772 }
1773 if (opc)
1774 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1775 operands[1] = op1;
1776 }
1777 }
1778
1779 return 0;
1780 }
1781
1782 enum rtx_code
1783 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1784 enum rtx_code comparison)
1785 {
1786 rtx op1;
1787 rtx scratch = NULL_RTX;
1788
1789 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1790 comparison = GET_CODE (operands[0]);
1791 else
1792 scratch = operands[4];
1793 if (CONST_INT_P (operands[1])
1794 && !CONST_INT_P (operands[2]))
1795 {
1796 rtx tmp = operands[1];
1797
1798 operands[1] = operands[2];
1799 operands[2] = tmp;
1800 comparison = swap_condition (comparison);
1801 }
1802 if (CONST_INT_P (operands[2]))
1803 {
1804 HOST_WIDE_INT val = INTVAL (operands[2]);
1805 if ((val == -1 || val == -0x81)
1806 && (comparison == GT || comparison == LE))
1807 {
1808 comparison = (comparison == GT) ? GE : LT;
1809 operands[2] = gen_int_mode (val + 1, mode);
1810 }
1811 else if ((val == 1 || val == 0x80)
1812 && (comparison == GE || comparison == LT))
1813 {
1814 comparison = (comparison == GE) ? GT : LE;
1815 operands[2] = gen_int_mode (val - 1, mode);
1816 }
1817 else if (val == 1 && (comparison == GEU || comparison == LTU))
1818 {
1819 comparison = (comparison == GEU) ? NE : EQ;
1820 operands[2] = CONST0_RTX (mode);
1821 }
1822 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1823 {
1824 comparison = (comparison == GEU) ? GTU : LEU;
1825 operands[2] = gen_int_mode (val - 1, mode);
1826 }
1827 else if (val == 0 && (comparison == GTU || comparison == LEU))
1828 comparison = (comparison == GTU) ? NE : EQ;
1829 else if (mode == SImode
1830 && ((val == 0x7fffffff
1831 && (comparison == GTU || comparison == LEU))
1832 || ((unsigned HOST_WIDE_INT) val
1833 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1834 && (comparison == GEU || comparison == LTU))))
1835 {
1836 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1837 operands[2] = CONST0_RTX (mode);
1838 }
1839 }
1840 op1 = operands[1];
1841 if (can_create_pseudo_p ())
1842 operands[1] = force_reg (mode, op1);
1843 /* When we are handling DImode comparisons, we want to keep constants so
1844 that we can optimize the component comparisons; however, memory loads
1845 are better issued as a whole so that they can be scheduled well.
1846 SImode equality comparisons allow I08 constants, but only when they
1847 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1848 into a register, that register might as well be r0, and we allow the
1849 constant. If it is already in a register, this is likely to be
1850 allocated to a different hard register, thus we load the constant into
1851 a register unless it is zero. */
1852 if (!REG_P (operands[2])
1853 && (!CONST_INT_P (operands[2])
1854 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1855 && ((comparison != EQ && comparison != NE)
1856 || (REG_P (op1) && REGNO (op1) != R0_REG)
1857 || !satisfies_constraint_I08 (operands[2])))))
1858 {
1859 if (scratch && GET_MODE (scratch) == mode)
1860 {
1861 emit_move_insn (scratch, operands[2]);
1862 operands[2] = scratch;
1863 }
1864 else if (can_create_pseudo_p ())
1865 operands[2] = force_reg (mode, operands[2]);
1866 }
1867 return comparison;
1868 }
1869
1870 void
1871 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1872 {
1873 rtx (*branch_expander) (rtx) = gen_branch_true;
1874 rtx jump;
1875
1876 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1877 switch (comparison)
1878 {
1879 case NE: case LT: case LE: case LTU: case LEU:
1880 comparison = reverse_condition (comparison);
1881 branch_expander = gen_branch_false;
1882 default: ;
1883 }
1884 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1885 gen_rtx_fmt_ee (comparison, SImode,
1886 operands[1], operands[2])));
1887 jump = emit_jump_insn (branch_expander (operands[3]));
1888 if (probability >= 0)
1889 add_reg_note (jump, REG_BR_PROB, GEN_INT (probability));
1890
1891 }
1892
1893 /* ??? How should we distribute probabilities when more than one branch
1894 is generated. So far we only have soem ad-hoc observations:
1895 - If the operands are random, they are likely to differ in both parts.
1896 - If comparing items in a hash chain, the operands are random or equal;
1897 operation should be EQ or NE.
1898 - If items are searched in an ordered tree from the root, we can expect
1899 the highpart to be unequal about half of the time; operation should be
1900 an inequality comparison, operands non-constant, and overall probability
1901 about 50%. Likewise for quicksort.
1902 - Range checks will be often made against constants. Even if we assume for
1903 simplicity an even distribution of the non-constant operand over a
1904 sub-range here, the same probability could be generated with differently
1905 wide sub-ranges - as long as the ratio of the part of the subrange that
1906 is before the threshold to the part that comes after the threshold stays
1907 the same. Thus, we can't really tell anything here;
1908 assuming random distribution is at least simple.
1909 */
1910
1911 bool
1912 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1913 {
1914 enum rtx_code msw_taken, msw_skip, lsw_taken;
1915 rtx skip_label = NULL_RTX;
1916 rtx op1h, op1l, op2h, op2l;
1917 int num_branches;
1918 int prob, rev_prob;
1919 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1920 rtx scratch = operands[4];
1921
1922 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1923 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1924 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1925 op1l = gen_lowpart (SImode, operands[1]);
1926 op2l = gen_lowpart (SImode, operands[2]);
1927 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
1928 prob = split_branch_probability;
1929 rev_prob = REG_BR_PROB_BASE - prob;
1930 switch (comparison)
1931 {
1932 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1933 That costs 1 cycle more when the first branch can be predicted taken,
1934 but saves us mispredicts because only one branch needs prediction.
1935 It also enables generating the cmpeqdi_t-1 pattern. */
1936 case EQ:
1937 if (TARGET_CMPEQDI_T)
1938 {
1939 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1940 emit_jump_insn (gen_branch_true (operands[3]));
1941 return true;
1942 }
1943 msw_skip = NE;
1944 lsw_taken = EQ;
1945 if (prob >= 0)
1946 {
1947 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1948 */
1949 msw_skip_prob = rev_prob;
1950 if (REG_BR_PROB_BASE <= 65535)
1951 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1952 else
1953 {
1954 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1955 lsw_taken_prob
1956 = (prob
1957 ? (REG_BR_PROB_BASE
1958 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1959 / ((HOST_WIDEST_INT) prob << 32)))
1960 : 0);
1961 }
1962 }
1963 break;
1964 case NE:
1965 if (TARGET_CMPEQDI_T)
1966 {
1967 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1968 emit_jump_insn (gen_branch_false (operands[3]));
1969 return true;
1970 }
1971 msw_taken = NE;
1972 msw_taken_prob = prob;
1973 lsw_taken = NE;
1974 lsw_taken_prob = 0;
1975 break;
1976 case GTU: case GT:
1977 msw_taken = comparison;
1978 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1979 break;
1980 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1981 msw_skip = swap_condition (msw_taken);
1982 lsw_taken = GTU;
1983 break;
1984 case GEU: case GE:
1985 if (op2l == CONST0_RTX (SImode))
1986 msw_taken = comparison;
1987 else
1988 {
1989 msw_taken = comparison == GE ? GT : GTU;
1990 msw_skip = swap_condition (msw_taken);
1991 lsw_taken = GEU;
1992 }
1993 break;
1994 case LTU: case LT:
1995 msw_taken = comparison;
1996 if (op2l == CONST0_RTX (SImode))
1997 break;
1998 msw_skip = swap_condition (msw_taken);
1999 lsw_taken = LTU;
2000 break;
2001 case LEU: case LE:
2002 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2003 msw_taken = comparison;
2004 else
2005 {
2006 lsw_taken = LEU;
2007 if (comparison == LE)
2008 msw_taken = LT;
2009 else if (op2h != CONST0_RTX (SImode))
2010 msw_taken = LTU;
2011 else
2012 break;
2013 msw_skip = swap_condition (msw_taken);
2014 }
2015 break;
2016 default: return false;
2017 }
2018 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
2019 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2020 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
2021 if (comparison != EQ && comparison != NE && num_branches > 1)
2022 {
2023 if (!CONSTANT_P (operands[2])
2024 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
2025 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
2026 {
2027 msw_taken_prob = prob / 2U;
2028 msw_skip_prob
2029 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
2030 lsw_taken_prob = prob;
2031 }
2032 else
2033 {
2034 msw_taken_prob = prob;
2035 msw_skip_prob = REG_BR_PROB_BASE;
2036 /* ??? If we have a constant op2h, should we use that when
2037 calculating lsw_taken_prob? */
2038 lsw_taken_prob = prob;
2039 }
2040 }
2041 operands[1] = op1h;
2042 operands[2] = op2h;
2043 operands[4] = NULL_RTX;
2044 if (reload_completed
2045 && ! arith_reg_or_0_operand (op2h, SImode)
2046 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
2047 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
2048 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
2049 {
2050 emit_move_insn (scratch, operands[2]);
2051 operands[2] = scratch;
2052 }
2053 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2054 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2055 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2056 {
2057 rtx taken_label = operands[3];
2058
2059 /* Operands were possibly modified, but msw_skip doesn't expect this.
2060 Always use the original ones. */
2061 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2062 {
2063 operands[1] = op1h;
2064 operands[2] = op2h;
2065 }
2066
2067 operands[3] = skip_label = gen_label_rtx ();
2068 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2069 operands[3] = taken_label;
2070 }
2071 operands[1] = op1l;
2072 operands[2] = op2l;
2073 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2074 {
2075 if (reload_completed
2076 && ! arith_reg_or_0_operand (op2l, SImode)
2077 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2078 {
2079 emit_move_insn (scratch, operands[2]);
2080 operands[2] = scratch;
2081 }
2082 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2083 }
2084 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2085 emit_label (skip_label);
2086 return true;
2087 }
2088
2089 /* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */
2090
2091 static void
2092 sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
2093 {
2094 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
2095 {
2096 insn = gen_rtx_PARALLEL (VOIDmode,
2097 gen_rtvec (2, insn,
2098 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
2099 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
2100 }
2101 else
2102 emit_insn (insn);
2103 }
2104
2105 /* Prepare the operands for an scc instruction; make sure that the
2106 compare has been done and the result is in T_REG. */
2107 void
2108 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2109 {
2110 rtx t_reg = gen_rtx_REG (SImode, T_REG);
2111 enum rtx_code oldcode = code;
2112 enum machine_mode mode;
2113
2114 /* First need a compare insn. */
2115 switch (code)
2116 {
2117 case NE:
2118 /* It isn't possible to handle this case. */
2119 gcc_unreachable ();
2120 case LT:
2121 code = GT;
2122 break;
2123 case LE:
2124 code = GE;
2125 break;
2126 case LTU:
2127 code = GTU;
2128 break;
2129 case LEU:
2130 code = GEU;
2131 break;
2132 default:
2133 break;
2134 }
2135 if (code != oldcode)
2136 {
2137 rtx tmp = op0;
2138 op0 = op1;
2139 op1 = tmp;
2140 }
2141
2142 mode = GET_MODE (op0);
2143 if (mode == VOIDmode)
2144 mode = GET_MODE (op1);
2145
2146 op0 = force_reg (mode, op0);
2147 if ((code != EQ && code != NE
2148 && (op1 != const0_rtx
2149 || code == GTU || code == GEU || code == LTU || code == LEU))
2150 || (mode == DImode && op1 != const0_rtx)
2151 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2152 op1 = force_reg (mode, op1);
2153
2154 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2155 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2156 mode);
2157 }
2158
2159 rtx
2160 sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
2161 rtx op0, rtx op1)
2162 {
2163 rtx target = gen_reg_rtx (SImode);
2164 rtx tmp;
2165
2166 gcc_assert (TARGET_SHMEDIA);
2167 switch (code)
2168 {
2169 case EQ:
2170 case GT:
2171 case LT:
2172 case UNORDERED:
2173 case GTU:
2174 case LTU:
2175 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2176 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2177 code = NE;
2178 break;
2179
2180 case NE:
2181 case GE:
2182 case LE:
2183 case ORDERED:
2184 case GEU:
2185 case LEU:
2186 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2187 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2188 code = EQ;
2189 break;
2190
2191 case UNEQ:
2192 case UNGE:
2193 case UNGT:
2194 case UNLE:
2195 case UNLT:
2196 case LTGT:
2197 return NULL_RTX;
2198
2199 default:
2200 gcc_unreachable ();
2201 }
2202
2203 if (mode == DImode)
2204 {
2205 rtx t2 = gen_reg_rtx (DImode);
2206 emit_insn (gen_extendsidi2 (t2, target));
2207 target = t2;
2208 }
2209
2210 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2211 }
2212
2213 /* Called from the md file, set up the operands of a compare instruction. */
2214
2215 void
2216 sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
2217 {
2218 enum rtx_code code = GET_CODE (operands[0]);
2219 enum rtx_code branch_code;
2220 rtx op0 = operands[1];
2221 rtx op1 = operands[2];
2222 rtx insn, tem;
2223 bool need_ccmpeq = false;
2224
2225 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2226 {
2227 op0 = force_reg (mode, op0);
2228 op1 = force_reg (mode, op1);
2229 }
2230 else
2231 {
2232 if (code != EQ || mode == DImode)
2233 {
2234 /* Force args into regs, since we can't use constants here. */
2235 op0 = force_reg (mode, op0);
2236 if (op1 != const0_rtx || code == GTU || code == GEU)
2237 op1 = force_reg (mode, op1);
2238 }
2239 }
2240
2241 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2242 {
2243 if (code == LT
2244 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2245 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2246 {
2247 tem = op0, op0 = op1, op1 = tem;
2248 code = swap_condition (code);
2249 }
2250
2251 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2252 if (code == GE)
2253 {
2254 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2255 need_ccmpeq = true;
2256 code = GT;
2257 }
2258
2259 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2260 to EQ/GT respectively. */
2261 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2262 }
2263
2264 switch (code)
2265 {
2266 case EQ:
2267 case GT:
2268 case GE:
2269 case GTU:
2270 case GEU:
2271 branch_code = code;
2272 break;
2273 case NE:
2274 case LT:
2275 case LE:
2276 case LTU:
2277 case LEU:
2278 branch_code = reverse_condition (code);
2279 break;
2280 default:
2281 gcc_unreachable ();
2282 }
2283
2284 insn = gen_rtx_SET (VOIDmode,
2285 gen_rtx_REG (SImode, T_REG),
2286 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2287
2288 sh_emit_set_t_insn (insn, mode);
2289 if (need_ccmpeq)
2290 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2291
2292 if (branch_code == code)
2293 emit_jump_insn (gen_branch_true (operands[3]));
2294 else
2295 emit_jump_insn (gen_branch_false (operands[3]));
2296 }
2297
2298 void
2299 sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
2300 {
2301 enum rtx_code code = GET_CODE (operands[1]);
2302 rtx op0 = operands[2];
2303 rtx op1 = operands[3];
2304 rtx lab = NULL_RTX;
2305 bool invert = false;
2306 rtx tem;
2307
2308 op0 = force_reg (mode, op0);
2309 if ((code != EQ && code != NE
2310 && (op1 != const0_rtx
2311 || code == GTU || code == GEU || code == LTU || code == LEU))
2312 || (mode == DImode && op1 != const0_rtx)
2313 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2314 op1 = force_reg (mode, op1);
2315
2316 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2317 {
2318 if (code == LT || code == LE)
2319 {
2320 code = swap_condition (code);
2321 tem = op0, op0 = op1, op1 = tem;
2322 }
2323 if (code == GE)
2324 {
2325 if (TARGET_IEEE)
2326 {
2327 lab = gen_label_rtx ();
2328 sh_emit_scc_to_t (EQ, op0, op1);
2329 emit_jump_insn (gen_branch_true (lab));
2330 code = GT;
2331 }
2332 else
2333 {
2334 code = LT;
2335 invert = true;
2336 }
2337 }
2338 }
2339
2340 if (code == NE)
2341 {
2342 code = EQ;
2343 invert = true;
2344 }
2345
2346 sh_emit_scc_to_t (code, op0, op1);
2347 if (lab)
2348 emit_label (lab);
2349 if (invert)
2350 emit_insn (gen_movnegt (operands[0]));
2351 else
2352 emit_move_insn (operands[0], gen_rtx_REG (SImode, T_REG));
2353 }
2354 \f
2355 /* Functions to output assembly code. */
2356
2357 /* Return a sequence of instructions to perform DI or DF move.
2358
2359 Since the SH cannot move a DI or DF in one instruction, we have
2360 to take care when we see overlapping source and dest registers. */
2361
2362 const char *
2363 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2364 enum machine_mode mode)
2365 {
2366 rtx dst = operands[0];
2367 rtx src = operands[1];
2368
2369 if (MEM_P (dst)
2370 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2371 return "mov.l %T1,%0\n\tmov.l %1,%0";
2372
2373 if (register_operand (dst, mode)
2374 && register_operand (src, mode))
2375 {
2376 if (REGNO (src) == MACH_REG)
2377 return "sts mach,%S0\n\tsts macl,%R0";
2378
2379 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2380 when mov.d r1,r0 do r1->r0 then r2->r1. */
2381
2382 if (REGNO (src) + 1 == REGNO (dst))
2383 return "mov %T1,%T0\n\tmov %1,%0";
2384 else
2385 return "mov %1,%0\n\tmov %T1,%T0";
2386 }
2387 else if (CONST_INT_P (src))
2388 {
2389 if (INTVAL (src) < 0)
2390 output_asm_insn ("mov #-1,%S0", operands);
2391 else
2392 output_asm_insn ("mov #0,%S0", operands);
2393
2394 return "mov %1,%R0";
2395 }
2396 else if (MEM_P (src))
2397 {
2398 int ptrreg = -1;
2399 int dreg = REGNO (dst);
2400 rtx inside = XEXP (src, 0);
2401
2402 switch (GET_CODE (inside))
2403 {
2404 case REG:
2405 ptrreg = REGNO (inside);
2406 break;
2407
2408 case SUBREG:
2409 ptrreg = subreg_regno (inside);
2410 break;
2411
2412 case PLUS:
2413 ptrreg = REGNO (XEXP (inside, 0));
2414 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2415 an offsettable address. Unfortunately, offsettable addresses use
2416 QImode to check the offset, and a QImode offsettable address
2417 requires r0 for the other operand, which is not currently
2418 supported, so we can't use the 'o' constraint.
2419 Thus we must check for and handle r0+REG addresses here.
2420 We punt for now, since this is likely very rare. */
2421 gcc_assert (!REG_P (XEXP (inside, 1)));
2422 break;
2423
2424 case LABEL_REF:
2425 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
2426 case POST_INC:
2427 return "mov.l %1,%0\n\tmov.l %1,%T0";
2428 default:
2429 gcc_unreachable ();
2430 }
2431
2432 /* Work out the safe way to copy. Copy into the second half first. */
2433 if (dreg == ptrreg)
2434 return "mov.l %T1,%T0\n\tmov.l %1,%0";
2435 }
2436
2437 return "mov.l %1,%0\n\tmov.l %T1,%T0";
2438 }
2439
2440 /* Print an instruction which would have gone into a delay slot after
2441 another instruction, but couldn't because the other instruction expanded
2442 into a sequence where putting the slot insn at the end wouldn't work. */
2443
2444 static void
2445 print_slot (rtx insn)
2446 {
2447 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
2448
2449 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
2450 }
2451
2452 const char *
2453 output_far_jump (rtx insn, rtx op)
2454 {
2455 struct { rtx lab, reg, op; } this_jmp;
2456 rtx braf_base_lab = NULL_RTX;
2457 const char *jump;
2458 int far;
2459 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2460 rtx prev;
2461
2462 this_jmp.lab = gen_label_rtx ();
2463
2464 if (TARGET_SH2
2465 && offset >= -32764
2466 && offset - get_attr_length (insn) <= 32766)
2467 {
2468 far = 0;
2469 jump = "mov.w %O0,%1; braf %1";
2470 }
2471 else
2472 {
2473 far = 1;
2474 if (flag_pic)
2475 {
2476 if (TARGET_SH2)
2477 jump = "mov.l %O0,%1; braf %1";
2478 else
2479 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
2480 }
2481 else
2482 jump = "mov.l %O0,%1; jmp @%1";
2483 }
2484 /* If we have a scratch register available, use it. */
2485 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2486 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2487 {
2488 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2489 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2490 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
2491 output_asm_insn (jump, &this_jmp.lab);
2492 if (dbr_sequence_length ())
2493 print_slot (final_sequence);
2494 else
2495 output_asm_insn ("nop", 0);
2496 }
2497 else
2498 {
2499 /* Output the delay slot insn first if any. */
2500 if (dbr_sequence_length ())
2501 print_slot (final_sequence);
2502
2503 this_jmp.reg = gen_rtx_REG (SImode, 13);
2504 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2505 Fortunately, MACL is fixed and call-clobbered, and we never
2506 need its value across jumps, so save r13 in it instead of in
2507 the stack. */
2508 if (TARGET_SH5)
2509 output_asm_insn ("lds r13, macl", 0);
2510 else
2511 output_asm_insn ("mov.l r13,@-r15", 0);
2512 output_asm_insn (jump, &this_jmp.lab);
2513 if (TARGET_SH5)
2514 output_asm_insn ("sts macl, r13", 0);
2515 else
2516 output_asm_insn ("mov.l @r15+,r13", 0);
2517 }
2518 if (far && flag_pic && TARGET_SH2)
2519 {
2520 braf_base_lab = gen_label_rtx ();
2521 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2522 CODE_LABEL_NUMBER (braf_base_lab));
2523 }
2524 if (far)
2525 output_asm_insn (".align 2", 0);
2526 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2527 this_jmp.op = op;
2528 if (far && flag_pic)
2529 {
2530 if (TARGET_SH2)
2531 this_jmp.lab = braf_base_lab;
2532 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2533 }
2534 else
2535 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2536 return "";
2537 }
2538
2539 /* Local label counter, used for constants in the pool and inside
2540 pattern branches. */
2541
2542 static int lf = 100;
2543
2544 /* Output code for ordinary branches. */
2545
2546 const char *
2547 output_branch (int logic, rtx insn, rtx *operands)
2548 {
2549 switch (get_attr_length (insn))
2550 {
2551 case 6:
2552 /* This can happen if filling the delay slot has caused a forward
2553 branch to exceed its range (we could reverse it, but only
2554 when we know we won't overextend other branches; this should
2555 best be handled by relaxation).
2556 It can also happen when other condbranches hoist delay slot insn
2557 from their destination, thus leading to code size increase.
2558 But the branch will still be in the range -4092..+4098 bytes. */
2559
2560 if (! TARGET_RELAX)
2561 {
2562 int label = lf++;
2563 /* The call to print_slot will clobber the operands. */
2564 rtx op0 = operands[0];
2565
2566 /* If the instruction in the delay slot is annulled (true), then
2567 there is no delay slot where we can put it now. The only safe
2568 place for it is after the label. final will do that by default. */
2569
2570 if (final_sequence
2571 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2572 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2573 {
2574 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2575 ASSEMBLER_DIALECT ? "/" : ".", label);
2576 print_slot (final_sequence);
2577 }
2578 else
2579 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2580
2581 output_asm_insn ("bra\t%l0", &op0);
2582 fprintf (asm_out_file, "\tnop\n");
2583 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2584
2585 return "";
2586 }
2587 /* When relaxing, handle this like a short branch. The linker
2588 will fix it up if it still doesn't fit after relaxation. */
2589 case 2:
2590 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2591
2592 /* These are for SH2e, in which we have to account for the
2593 extra nop because of the hardware bug in annulled branches. */
2594 case 8:
2595 if (! TARGET_RELAX)
2596 {
2597 int label = lf++;
2598
2599 gcc_assert (!final_sequence
2600 || !(INSN_ANNULLED_BRANCH_P
2601 (XVECEXP (final_sequence, 0, 0))));
2602 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2603 logic ? "f" : "t",
2604 ASSEMBLER_DIALECT ? "/" : ".", label);
2605 fprintf (asm_out_file, "\tnop\n");
2606 output_asm_insn ("bra\t%l0", operands);
2607 fprintf (asm_out_file, "\tnop\n");
2608 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2609
2610 return "";
2611 }
2612 /* When relaxing, fall through. */
2613 case 4:
2614 {
2615 char buffer[10];
2616
2617 sprintf (buffer, "b%s%ss\t%%l0",
2618 logic ? "t" : "f",
2619 ASSEMBLER_DIALECT ? "/" : ".");
2620 output_asm_insn (buffer, &operands[0]);
2621 return "nop";
2622 }
2623
2624 default:
2625 /* There should be no longer branches now - that would
2626 indicate that something has destroyed the branches set
2627 up in machine_dependent_reorg. */
2628 gcc_unreachable ();
2629 }
2630 }
2631
2632 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2633 fill in operands 9 as a label to the successor insn.
2634 We try to use jump threading where possible.
2635 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2636 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2637 follow jmp and bt, if the address is in range. */
2638 const char *
2639 output_branchy_insn (enum rtx_code code, const char *templ,
2640 rtx insn, rtx *operands)
2641 {
2642 rtx next_insn = NEXT_INSN (insn);
2643
2644 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2645 {
2646 rtx src = SET_SRC (PATTERN (next_insn));
2647 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2648 {
2649 /* Following branch not taken */
2650 operands[9] = gen_label_rtx ();
2651 emit_label_after (operands[9], next_insn);
2652 INSN_ADDRESSES_NEW (operands[9],
2653 INSN_ADDRESSES (INSN_UID (next_insn))
2654 + get_attr_length (next_insn));
2655 return templ;
2656 }
2657 else
2658 {
2659 int offset = (branch_dest (next_insn)
2660 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2661 if (offset >= -252 && offset <= 258)
2662 {
2663 if (GET_CODE (src) == IF_THEN_ELSE)
2664 /* branch_true */
2665 src = XEXP (src, 1);
2666 operands[9] = src;
2667 return templ;
2668 }
2669 }
2670 }
2671 operands[9] = gen_label_rtx ();
2672 emit_label_after (operands[9], insn);
2673 INSN_ADDRESSES_NEW (operands[9],
2674 INSN_ADDRESSES (INSN_UID (insn))
2675 + get_attr_length (insn));
2676 return templ;
2677 }
2678
2679 const char *
2680 output_ieee_ccmpeq (rtx insn, rtx *operands)
2681 {
2682 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2683 insn, operands);
2684 }
2685 \f
2686 /* Output the start of the assembler file. */
2687
2688 static void
2689 sh_file_start (void)
2690 {
2691 default_file_start ();
2692
2693 #ifdef SYMBIAN
2694 /* Declare the .directive section before it is used. */
2695 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
2696 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
2697 #endif
2698
2699 if (TARGET_ELF)
2700 /* We need to show the text section with the proper
2701 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2702 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2703 will complain. We can teach GAS specifically about the
2704 default attributes for our choice of text section, but
2705 then we would have to change GAS again if/when we change
2706 the text section name. */
2707 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2708 else
2709 /* Switch to the data section so that the coffsem symbol
2710 isn't in the text section. */
2711 switch_to_section (data_section);
2712
2713 if (TARGET_LITTLE_ENDIAN)
2714 fputs ("\t.little\n", asm_out_file);
2715
2716 if (!TARGET_ELF)
2717 {
2718 if (TARGET_SHCOMPACT)
2719 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2720 else if (TARGET_SHMEDIA)
2721 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2722 TARGET_SHMEDIA64 ? 64 : 32);
2723 }
2724 }
2725 \f
2726 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2727
2728 static bool
2729 unspec_caller_rtx_p (rtx pat)
2730 {
2731 rtx base, offset;
2732 int i;
2733
2734 split_const (pat, &base, &offset);
2735 if (GET_CODE (base) == UNSPEC)
2736 {
2737 if (XINT (base, 1) == UNSPEC_CALLER)
2738 return true;
2739 for (i = 0; i < XVECLEN (base, 0); i++)
2740 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2741 return true;
2742 }
2743 return false;
2744 }
2745
2746 /* Indicate that INSN cannot be duplicated. This is true for insn
2747 that generates a unique label. */
2748
2749 static bool
2750 sh_cannot_copy_insn_p (rtx insn)
2751 {
2752 rtx pat;
2753
2754 if (!reload_completed || !flag_pic)
2755 return false;
2756
2757 if (!NONJUMP_INSN_P (insn))
2758 return false;
2759 if (asm_noperands (insn) >= 0)
2760 return false;
2761
2762 pat = PATTERN (insn);
2763 if (GET_CODE (pat) != SET)
2764 return false;
2765 pat = SET_SRC (pat);
2766
2767 if (unspec_caller_rtx_p (pat))
2768 return true;
2769
2770 return false;
2771 }
2772 \f
2773 /* Actual number of instructions used to make a shift by N. */
2774 static const char ashiftrt_insns[] =
2775 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2776
2777 /* Left shift and logical right shift are the same. */
2778 static const char shift_insns[] =
2779 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2780
2781 /* Individual shift amounts needed to get the above length sequences.
2782 One bit right shifts clobber the T bit, so when possible, put one bit
2783 shifts in the middle of the sequence, so the ends are eligible for
2784 branch delay slots. */
2785 static const short shift_amounts[32][5] = {
2786 {0}, {1}, {2}, {2, 1},
2787 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2788 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2789 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2790 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2791 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2792 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2793 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2794
2795 /* Likewise, but for shift amounts < 16, up to three highmost bits
2796 might be clobbered. This is typically used when combined with some
2797 kind of sign or zero extension. */
2798
2799 static const char ext_shift_insns[] =
2800 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2801
2802 static const short ext_shift_amounts[32][4] = {
2803 {0}, {1}, {2}, {2, 1},
2804 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2805 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2806 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2807 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2808 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2809 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2810 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2811
2812 /* Assuming we have a value that has been sign-extended by at least one bit,
2813 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2814 to shift it by N without data loss, and quicker than by other means? */
2815 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2816
2817 /* This is used in length attributes in sh.md to help compute the length
2818 of arbitrary constant shift instructions. */
2819
2820 int
2821 shift_insns_rtx (rtx insn)
2822 {
2823 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2824 int shift_count = INTVAL (XEXP (set_src, 1)) & 31;
2825 enum rtx_code shift_code = GET_CODE (set_src);
2826
2827 switch (shift_code)
2828 {
2829 case ASHIFTRT:
2830 return ashiftrt_insns[shift_count];
2831 case LSHIFTRT:
2832 case ASHIFT:
2833 return shift_insns[shift_count];
2834 default:
2835 gcc_unreachable ();
2836 }
2837 }
2838
2839 /* Return the cost of a shift. */
2840
2841 static inline int
2842 shiftcosts (rtx x)
2843 {
2844 int value;
2845
2846 if (TARGET_SHMEDIA)
2847 return 1;
2848
2849 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2850 {
2851 if (GET_MODE (x) == DImode
2852 && CONST_INT_P (XEXP (x, 1))
2853 && INTVAL (XEXP (x, 1)) == 1)
2854 return 2;
2855
2856 /* Everything else is invalid, because there is no pattern for it. */
2857 return MAX_COST;
2858 }
2859 /* If shift by a non constant, then this will be expensive. */
2860 if (!CONST_INT_P (XEXP (x, 1)))
2861 return SH_DYNAMIC_SHIFT_COST;
2862
2863 /* Otherwise, return the true cost in instructions. Cope with out of range
2864 shift counts more or less arbitrarily. */
2865 value = INTVAL (XEXP (x, 1)) & 31;
2866
2867 if (GET_CODE (x) == ASHIFTRT)
2868 {
2869 int cost = ashiftrt_insns[value];
2870 /* If SH3, then we put the constant in a reg and use shad. */
2871 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2872 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2873 return cost;
2874 }
2875 else
2876 return shift_insns[value];
2877 }
2878
2879 /* Return the cost of an AND operation. */
2880
2881 static inline int
2882 andcosts (rtx x)
2883 {
2884 int i;
2885
2886 /* Anding with a register is a single cycle and instruction. */
2887 if (!CONST_INT_P (XEXP (x, 1)))
2888 return 1;
2889
2890 i = INTVAL (XEXP (x, 1));
2891
2892 if (TARGET_SHMEDIA)
2893 {
2894 if (satisfies_constraint_I10 (XEXP (x, 1))
2895 || satisfies_constraint_J16 (XEXP (x, 1)))
2896 return 1;
2897 else
2898 return 1 + rtx_cost (XEXP (x, 1), AND, !optimize_size);
2899 }
2900
2901 /* These constants are single cycle extu.[bw] instructions. */
2902 if (i == 0xff || i == 0xffff)
2903 return 1;
2904 /* Constants that can be used in an and immediate instruction in a single
2905 cycle, but this requires r0, so make it a little more expensive. */
2906 if (CONST_OK_FOR_K08 (i))
2907 return 2;
2908 /* Constants that can be loaded with a mov immediate and an and.
2909 This case is probably unnecessary. */
2910 if (CONST_OK_FOR_I08 (i))
2911 return 2;
2912 /* Any other constants requires a 2 cycle pc-relative load plus an and.
2913 This case is probably unnecessary. */
2914 return 3;
2915 }
2916
2917 /* Return the cost of an addition or a subtraction. */
2918
2919 static inline int
2920 addsubcosts (rtx x)
2921 {
2922 /* Adding a register is a single cycle insn. */
2923 if (REG_P (XEXP (x, 1))
2924 || GET_CODE (XEXP (x, 1)) == SUBREG)
2925 return 1;
2926
2927 /* Likewise for small constants. */
2928 if (CONST_INT_P (XEXP (x, 1))
2929 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2930 return 1;
2931
2932 if (TARGET_SHMEDIA)
2933 switch (GET_CODE (XEXP (x, 1)))
2934 {
2935 case CONST:
2936 case LABEL_REF:
2937 case SYMBOL_REF:
2938 return TARGET_SHMEDIA64 ? 5 : 3;
2939
2940 case CONST_INT:
2941 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2942 return 2;
2943 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2944 return 3;
2945 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2946 return 4;
2947
2948 /* Fall through. */
2949 default:
2950 return 5;
2951 }
2952
2953 /* Any other constant requires a 2 cycle pc-relative load plus an
2954 addition. */
2955 return 3;
2956 }
2957
2958 /* Return the cost of a multiply. */
2959 static inline int
2960 multcosts (rtx x ATTRIBUTE_UNUSED)
2961 {
2962 if (sh_multcost >= 0)
2963 return sh_multcost;
2964 if (TARGET_SHMEDIA)
2965 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2966 accept constants. Ideally, we would use a cost of one or two and
2967 add the cost of the operand, but disregard the latter when inside loops
2968 and loop invariant code motion is still to follow.
2969 Using a multiply first and splitting it later if it's a loss
2970 doesn't work because of different sign / zero extension semantics
2971 of multiplies vs. shifts. */
2972 return TARGET_SMALLCODE ? 2 : 3;
2973
2974 if (TARGET_SH2)
2975 {
2976 /* We have a mul insn, so we can never take more than the mul and the
2977 read of the mac reg, but count more because of the latency and extra
2978 reg usage. */
2979 if (TARGET_SMALLCODE)
2980 return 2;
2981 return 3;
2982 }
2983
2984 /* If we're aiming at small code, then just count the number of
2985 insns in a multiply call sequence. */
2986 if (TARGET_SMALLCODE)
2987 return 5;
2988
2989 /* Otherwise count all the insns in the routine we'd be calling too. */
2990 return 20;
2991 }
2992
2993 /* Compute a (partial) cost for rtx X. Return true if the complete
2994 cost has been computed, and false if subexpressions should be
2995 scanned. In either case, *TOTAL contains the cost result. */
2996
2997 static bool
2998 sh_rtx_costs (rtx x, int code, int outer_code, int *total,
2999 bool speed ATTRIBUTE_UNUSED)
3000 {
3001 switch (code)
3002 {
3003 case CONST_INT:
3004 if (TARGET_SHMEDIA)
3005 {
3006 if (INTVAL (x) == 0)
3007 *total = 0;
3008 else if (outer_code == AND && and_operand ((x), DImode))
3009 *total = 0;
3010 else if ((outer_code == IOR || outer_code == XOR
3011 || outer_code == PLUS)
3012 && CONST_OK_FOR_I10 (INTVAL (x)))
3013 *total = 0;
3014 else if (CONST_OK_FOR_I16 (INTVAL (x)))
3015 *total = COSTS_N_INSNS (outer_code != SET);
3016 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
3017 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
3018 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
3019 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
3020 else
3021 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
3022 return true;
3023 }
3024 if (CONST_OK_FOR_I08 (INTVAL (x)))
3025 *total = 0;
3026 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
3027 && CONST_OK_FOR_K08 (INTVAL (x)))
3028 *total = 1;
3029 /* prepare_cmp_insn will force costly constants int registers before
3030 the cbranch[sd]i4 patterns can see them, so preserve potentially
3031 interesting ones not covered by I08 above. */
3032 else if (outer_code == COMPARE
3033 && ((unsigned HOST_WIDE_INT) INTVAL (x)
3034 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3035 || INTVAL (x) == 0x7fffffff
3036 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3037 *total = 1;
3038 else
3039 *total = 8;
3040 return true;
3041
3042 case CONST:
3043 case LABEL_REF:
3044 case SYMBOL_REF:
3045 if (TARGET_SHMEDIA64)
3046 *total = COSTS_N_INSNS (4);
3047 else if (TARGET_SHMEDIA32)
3048 *total = COSTS_N_INSNS (2);
3049 else
3050 *total = 5;
3051 return true;
3052
3053 case CONST_DOUBLE:
3054 if (TARGET_SHMEDIA)
3055 *total = COSTS_N_INSNS (4);
3056 /* prepare_cmp_insn will force costly constants int registers before
3057 the cbranchdi4 pattern can see them, so preserve potentially
3058 interesting ones. */
3059 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
3060 *total = 1;
3061 else
3062 *total = 10;
3063 return true;
3064 case CONST_VECTOR:
3065 if (x == CONST0_RTX (GET_MODE (x)))
3066 *total = 0;
3067 else if (sh_1el_vec (x, VOIDmode))
3068 *total = outer_code != SET;
3069 if (sh_rep_vec (x, VOIDmode))
3070 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3071 + (outer_code != SET));
3072 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3073 return true;
3074
3075 case PLUS:
3076 case MINUS:
3077 *total = COSTS_N_INSNS (addsubcosts (x));
3078 return true;
3079
3080 case AND:
3081 *total = COSTS_N_INSNS (andcosts (x));
3082 return true;
3083
3084 case MULT:
3085 *total = COSTS_N_INSNS (multcosts (x));
3086 return true;
3087
3088 case ASHIFT:
3089 case ASHIFTRT:
3090 case LSHIFTRT:
3091 *total = COSTS_N_INSNS (shiftcosts (x));
3092 return true;
3093
3094 case DIV:
3095 case UDIV:
3096 case MOD:
3097 case UMOD:
3098 *total = COSTS_N_INSNS (20);
3099 return true;
3100
3101 case PARALLEL:
3102 if (sh_1el_vec (x, VOIDmode))
3103 *total = outer_code != SET;
3104 if (sh_rep_vec (x, VOIDmode))
3105 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3106 + (outer_code != SET));
3107 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3108 return true;
3109
3110 case FLOAT:
3111 case FIX:
3112 *total = 100;
3113 return true;
3114
3115 default:
3116 return false;
3117 }
3118 }
3119
3120 /* Compute the cost of an address. For the SH, all valid addresses are
3121 the same cost. Use a slightly higher cost for reg + reg addressing,
3122 since it increases pressure on r0. */
3123
3124 static int
3125 sh_address_cost (rtx X,
3126 bool speed ATTRIBUTE_UNUSED)
3127 {
3128 return (GET_CODE (X) == PLUS
3129 && ! CONSTANT_P (XEXP (X, 1))
3130 && ! TARGET_SHMEDIA ? 1 : 0);
3131 }
3132
3133 /* Code to expand a shift. */
3134
3135 void
3136 gen_ashift (int type, int n, rtx reg)
3137 {
3138 /* Negative values here come from the shift_amounts array. */
3139 if (n < 0)
3140 {
3141 if (type == ASHIFT)
3142 type = LSHIFTRT;
3143 else
3144 type = ASHIFT;
3145 n = -n;
3146 }
3147
3148 switch (type)
3149 {
3150 case ASHIFTRT:
3151 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
3152 break;
3153 case LSHIFTRT:
3154 if (n == 1)
3155 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
3156 else
3157 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
3158 break;
3159 case ASHIFT:
3160 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
3161 break;
3162 }
3163 }
3164
3165 /* Same for HImode */
3166
3167 void
3168 gen_ashift_hi (int type, int n, rtx reg)
3169 {
3170 /* Negative values here come from the shift_amounts array. */
3171 if (n < 0)
3172 {
3173 if (type == ASHIFT)
3174 type = LSHIFTRT;
3175 else
3176 type = ASHIFT;
3177 n = -n;
3178 }
3179
3180 switch (type)
3181 {
3182 case ASHIFTRT:
3183 case LSHIFTRT:
3184 /* We don't have HImode right shift operations because using the
3185 ordinary 32 bit shift instructions for that doesn't generate proper
3186 zero/sign extension.
3187 gen_ashift_hi is only called in contexts where we know that the
3188 sign extension works out correctly. */
3189 {
3190 int offset = 0;
3191 if (GET_CODE (reg) == SUBREG)
3192 {
3193 offset = SUBREG_BYTE (reg);
3194 reg = SUBREG_REG (reg);
3195 }
3196 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3197 break;
3198 }
3199 case ASHIFT:
3200 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3201 break;
3202 }
3203 }
3204
3205 /* Output RTL to split a constant shift into its component SH constant
3206 shift instructions. */
3207
3208 void
3209 gen_shifty_op (int code, rtx *operands)
3210 {
3211 int value = INTVAL (operands[2]);
3212 int max, i;
3213
3214 /* Truncate the shift count in case it is out of bounds. */
3215 value = value & 31;
3216
3217 if (value == 31)
3218 {
3219 if (code == LSHIFTRT)
3220 {
3221 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3222 emit_insn (gen_movt (operands[0]));
3223 return;
3224 }
3225 else if (code == ASHIFT)
3226 {
3227 /* There is a two instruction sequence for 31 bit left shifts,
3228 but it requires r0. */
3229 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3230 {
3231 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3232 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3233 return;
3234 }
3235 }
3236 }
3237 else if (value == 0)
3238 {
3239 /* This can happen even when optimizing, if there were subregs before
3240 reload. Don't output a nop here, as this is never optimized away;
3241 use a no-op move instead. */
3242 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
3243 return;
3244 }
3245
3246 max = shift_insns[value];
3247 for (i = 0; i < max; i++)
3248 gen_ashift (code, shift_amounts[value][i], operands[0]);
3249 }
3250
3251 /* Same as above, but optimized for values where the topmost bits don't
3252 matter. */
3253
3254 void
3255 gen_shifty_hi_op (int code, rtx *operands)
3256 {
3257 int value = INTVAL (operands[2]);
3258 int max, i;
3259 void (*gen_fun) (int, int, rtx);
3260
3261 /* This operation is used by and_shl for SImode values with a few
3262 high bits known to be cleared. */
3263 value &= 31;
3264 if (value == 0)
3265 {
3266 emit_insn (gen_nop ());
3267 return;
3268 }
3269
3270 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3271 if (code == ASHIFT)
3272 {
3273 max = ext_shift_insns[value];
3274 for (i = 0; i < max; i++)
3275 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3276 }
3277 else
3278 /* When shifting right, emit the shifts in reverse order, so that
3279 solitary negative values come first. */
3280 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
3281 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3282 }
3283
3284 /* Output RTL for an arithmetic right shift. */
3285
3286 /* ??? Rewrite to use super-optimizer sequences. */
3287
3288 int
3289 expand_ashiftrt (rtx *operands)
3290 {
3291 rtx wrk;
3292 char func[18];
3293 int value;
3294
3295 if (TARGET_SH3)
3296 {
3297 if (!CONST_INT_P (operands[2]))
3298 {
3299 rtx count = copy_to_mode_reg (SImode, operands[2]);
3300 emit_insn (gen_negsi2 (count, count));
3301 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3302 return 1;
3303 }
3304 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3305 > 1 + SH_DYNAMIC_SHIFT_COST)
3306 {
3307 rtx count
3308 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3309 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3310 return 1;
3311 }
3312 }
3313 if (!CONST_INT_P (operands[2]))
3314 return 0;
3315
3316 value = INTVAL (operands[2]) & 31;
3317
3318 if (value == 31)
3319 {
3320 /* If we are called from abs expansion, arrange things so that we
3321 we can use a single MT instruction that doesn't clobber the source,
3322 if LICM can hoist out the load of the constant zero. */
3323 if (currently_expanding_to_rtl)
3324 {
3325 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3326 operands[1]));
3327 emit_insn (gen_mov_neg_si_t (operands[0]));
3328 return 1;
3329 }
3330 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3331 return 1;
3332 }
3333 else if (value >= 16 && value <= 19)
3334 {
3335 wrk = gen_reg_rtx (SImode);
3336 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3337 value -= 16;
3338 while (value--)
3339 gen_ashift (ASHIFTRT, 1, wrk);
3340 emit_move_insn (operands[0], wrk);
3341 return 1;
3342 }
3343 /* Expand a short sequence inline, longer call a magic routine. */
3344 else if (value <= 5)
3345 {
3346 wrk = gen_reg_rtx (SImode);
3347 emit_move_insn (wrk, operands[1]);
3348 while (value--)
3349 gen_ashift (ASHIFTRT, 1, wrk);
3350 emit_move_insn (operands[0], wrk);
3351 return 1;
3352 }
3353
3354 wrk = gen_reg_rtx (Pmode);
3355
3356 /* Load the value into an arg reg and call a helper. */
3357 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3358 sprintf (func, "__ashiftrt_r4_%d", value);
3359 function_symbol (wrk, func, SFUNC_STATIC);
3360 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
3361 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3362 return 1;
3363 }
3364
3365 int
3366 sh_dynamicalize_shift_p (rtx count)
3367 {
3368 return shift_insns[INTVAL (count) & 31] > 1 + SH_DYNAMIC_SHIFT_COST;
3369 }
3370
3371 /* Try to find a good way to implement the combiner pattern
3372 [(set (match_operand:SI 0 "register_operand" "r")
3373 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3374 (match_operand:SI 2 "const_int_operand" "n"))
3375 (match_operand:SI 3 "const_int_operand" "n"))) .
3376 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3377 return 0 for simple right / left or left/right shift combination.
3378 return 1 for a combination of shifts with zero_extend.
3379 return 2 for a combination of shifts with an AND that needs r0.
3380 return 3 for a combination of shifts with an AND that needs an extra
3381 scratch register, when the three highmost bits of the AND mask are clear.
3382 return 4 for a combination of shifts with an AND that needs an extra
3383 scratch register, when any of the three highmost bits of the AND mask
3384 is set.
3385 If ATTRP is set, store an initial right shift width in ATTRP[0],
3386 and the instruction length in ATTRP[1] . These values are not valid
3387 when returning 0.
3388 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3389 shift_amounts for the last shift value that is to be used before the
3390 sign extend. */
3391 int
3392 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3393 {
3394 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3395 int left = INTVAL (left_rtx), right;
3396 int best = 0;
3397 int cost, best_cost = 10000;
3398 int best_right = 0, best_len = 0;
3399 int i;
3400 int can_ext;
3401
3402 if (left < 0 || left > 31)
3403 return 0;
3404 if (CONST_INT_P (mask_rtx))
3405 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3406 else
3407 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3408 /* Can this be expressed as a right shift / left shift pair? */
3409 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3410 right = exact_log2 (lsb);
3411 mask2 = ~(mask + lsb - 1);
3412 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3413 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3414 if (! mask2)
3415 best_cost = shift_insns[right] + shift_insns[right + left];
3416 /* mask has no trailing zeroes <==> ! right */
3417 else if (! right && mask2 == ~(lsb2 - 1))
3418 {
3419 int late_right = exact_log2 (lsb2);
3420 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
3421 }
3422 /* Try to use zero extend. */
3423 if (mask2 == ~(lsb2 - 1))
3424 {
3425 int width, first;
3426
3427 for (width = 8; width <= 16; width += 8)
3428 {
3429 /* Can we zero-extend right away? */
3430 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3431 {
3432 cost
3433 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
3434 if (cost < best_cost)
3435 {
3436 best = 1;
3437 best_cost = cost;
3438 best_right = right;
3439 best_len = cost;
3440 if (attrp)
3441 attrp[2] = -1;
3442 }
3443 continue;
3444 }
3445 /* ??? Could try to put zero extend into initial right shift,
3446 or even shift a bit left before the right shift. */
3447 /* Determine value of first part of left shift, to get to the
3448 zero extend cut-off point. */
3449 first = width - exact_log2 (lsb2) + right;
3450 if (first >= 0 && right + left - first >= 0)
3451 {
3452 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
3453 + ext_shift_insns[right + left - first];
3454 if (cost < best_cost)
3455 {
3456 best = 1;
3457 best_cost = cost;
3458 best_right = right;
3459 best_len = cost;
3460 if (attrp)
3461 attrp[2] = first;
3462 }
3463 }
3464 }
3465 }
3466 /* Try to use r0 AND pattern */
3467 for (i = 0; i <= 2; i++)
3468 {
3469 if (i > right)
3470 break;
3471 if (! CONST_OK_FOR_K08 (mask >> i))
3472 continue;
3473 cost = (i != 0) + 2 + ext_shift_insns[left + i];
3474 if (cost < best_cost)
3475 {
3476 best = 2;
3477 best_cost = cost;
3478 best_right = i;
3479 best_len = cost - 1;
3480 }
3481 }
3482 /* Try to use a scratch register to hold the AND operand. */
3483 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
3484 for (i = 0; i <= 2; i++)
3485 {
3486 if (i > right)
3487 break;
3488 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
3489 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
3490 if (cost < best_cost)
3491 {
3492 best = 4 - can_ext;
3493 best_cost = cost;
3494 best_right = i;
3495 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
3496 }
3497 }
3498
3499 if (attrp)
3500 {
3501 attrp[0] = best_right;
3502 attrp[1] = best_len;
3503 }
3504 return best;
3505 }
3506
3507 /* This is used in length attributes of the unnamed instructions
3508 corresponding to shl_and_kind return values of 1 and 2. */
3509 int
3510 shl_and_length (rtx insn)
3511 {
3512 rtx set_src, left_rtx, mask_rtx;
3513 int attributes[3];
3514
3515 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3516 left_rtx = XEXP (XEXP (set_src, 0), 1);
3517 mask_rtx = XEXP (set_src, 1);
3518 shl_and_kind (left_rtx, mask_rtx, attributes);
3519 return attributes[1];
3520 }
3521
3522 /* This is used in length attribute of the and_shl_scratch instruction. */
3523
3524 int
3525 shl_and_scr_length (rtx insn)
3526 {
3527 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3528 int len = shift_insns[INTVAL (XEXP (set_src, 1)) & 31];
3529 rtx op = XEXP (set_src, 0);
3530 len += shift_insns[INTVAL (XEXP (op, 1)) & 31] + 1;
3531 op = XEXP (XEXP (op, 0), 0);
3532 return len + shift_insns[INTVAL (XEXP (op, 1)) & 31];
3533 }
3534
3535 /* Generate rtl for instructions for which shl_and_kind advised a particular
3536 method of generating them, i.e. returned zero. */
3537
3538 int
3539 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
3540 {
3541 int attributes[3];
3542 unsigned HOST_WIDE_INT mask;
3543 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
3544 int right, total_shift;
3545 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
3546
3547 right = attributes[0];
3548 total_shift = INTVAL (left_rtx) + right;
3549 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
3550 switch (kind)
3551 {
3552 default:
3553 return -1;
3554 case 1:
3555 {
3556 int first = attributes[2];
3557 rtx operands[3];
3558
3559 if (first < 0)
3560 {
3561 emit_insn ((mask << right) <= 0xff
3562 ? gen_zero_extendqisi2 (dest,
3563 gen_lowpart (QImode, source))
3564 : gen_zero_extendhisi2 (dest,
3565 gen_lowpart (HImode, source)));
3566 source = dest;
3567 }
3568 if (source != dest)
3569 emit_insn (gen_movsi (dest, source));
3570 operands[0] = dest;
3571 if (right)
3572 {
3573 operands[2] = GEN_INT (right);
3574 gen_shifty_hi_op (LSHIFTRT, operands);
3575 }
3576 if (first > 0)
3577 {
3578 operands[2] = GEN_INT (first);
3579 gen_shifty_hi_op (ASHIFT, operands);
3580 total_shift -= first;
3581 mask <<= first;
3582 }
3583 if (first >= 0)
3584 emit_insn (mask <= 0xff
3585 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
3586 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3587 if (total_shift > 0)
3588 {
3589 operands[2] = GEN_INT (total_shift);
3590 gen_shifty_hi_op (ASHIFT, operands);
3591 }
3592 break;
3593 }
3594 case 4:
3595 shift_gen_fun = gen_shifty_op;
3596 case 3:
3597 /* If the topmost bit that matters is set, set the topmost bits
3598 that don't matter. This way, we might be able to get a shorter
3599 signed constant. */
3600 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
3601 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
3602 case 2:
3603 /* Don't expand fine-grained when combining, because that will
3604 make the pattern fail. */
3605 if (currently_expanding_to_rtl
3606 || reload_in_progress || reload_completed)
3607 {
3608 rtx operands[3];
3609
3610 /* Cases 3 and 4 should be handled by this split
3611 only while combining */
3612 gcc_assert (kind <= 2);
3613 if (right)
3614 {
3615 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
3616 source = dest;
3617 }
3618 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3619 if (total_shift)
3620 {
3621 operands[0] = dest;
3622 operands[1] = dest;
3623 operands[2] = GEN_INT (total_shift);
3624 shift_gen_fun (ASHIFT, operands);
3625 }
3626 break;
3627 }
3628 else
3629 {
3630 int neg = 0;
3631 if (kind != 4 && total_shift < 16)
3632 {
3633 neg = -ext_shift_amounts[total_shift][1];
3634 if (neg > 0)
3635 neg -= ext_shift_amounts[total_shift][2];
3636 else
3637 neg = 0;
3638 }
3639 emit_insn (gen_and_shl_scratch (dest, source,
3640 GEN_INT (right),
3641 GEN_INT (mask),
3642 GEN_INT (total_shift + neg),
3643 GEN_INT (neg)));
3644 emit_insn (gen_movsi (dest, dest));
3645 break;
3646 }
3647 }
3648 return 0;
3649 }
3650
3651 /* Try to find a good way to implement the combiner pattern
3652 [(set (match_operand:SI 0 "register_operand" "=r")
3653 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3654 (match_operand:SI 2 "const_int_operand" "n")
3655 (match_operand:SI 3 "const_int_operand" "n")
3656 (const_int 0)))
3657 (clobber (reg:SI T_REG))]
3658 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3659 return 0 for simple left / right shift combination.
3660 return 1 for left shift / 8 bit sign extend / left shift.
3661 return 2 for left shift / 16 bit sign extend / left shift.
3662 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3663 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3664 return 5 for left shift / 16 bit sign extend / right shift
3665 return 6 for < 8 bit sign extend / left shift.
3666 return 7 for < 8 bit sign extend / left shift / single right shift.
3667 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3668
3669 int
3670 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3671 {
3672 int left, size, insize, ext;
3673 int cost = 0, best_cost;
3674 int kind;
3675
3676 left = INTVAL (left_rtx);
3677 size = INTVAL (size_rtx);
3678 insize = size - left;
3679 gcc_assert (insize > 0);
3680 /* Default to left / right shift. */
3681 kind = 0;
3682 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3683 if (size <= 16)
3684 {
3685 /* 16 bit shift / sign extend / 16 bit shift */
3686 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3687 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3688 below, by alternative 3 or something even better. */
3689 if (cost < best_cost)
3690 {
3691 kind = 5;
3692 best_cost = cost;
3693 }
3694 }
3695 /* Try a plain sign extend between two shifts. */
3696 for (ext = 16; ext >= insize; ext -= 8)
3697 {
3698 if (ext <= size)
3699 {
3700 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3701 if (cost < best_cost)
3702 {
3703 kind = ext / (unsigned) 8;
3704 best_cost = cost;
3705 }
3706 }
3707 /* Check if we can do a sloppy shift with a final signed shift
3708 restoring the sign. */
3709 if (EXT_SHIFT_SIGNED (size - ext))
3710 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3711 /* If not, maybe it's still cheaper to do the second shift sloppy,
3712 and do a final sign extend? */
3713 else if (size <= 16)
3714 cost = ext_shift_insns[ext - insize] + 1
3715 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3716 else
3717 continue;
3718 if (cost < best_cost)
3719 {
3720 kind = ext / (unsigned) 8 + 2;
3721 best_cost = cost;
3722 }
3723 }
3724 /* Check if we can sign extend in r0 */
3725 if (insize < 8)
3726 {
3727 cost = 3 + shift_insns[left];
3728 if (cost < best_cost)
3729 {
3730 kind = 6;
3731 best_cost = cost;
3732 }
3733 /* Try the same with a final signed shift. */
3734 if (left < 31)
3735 {
3736 cost = 3 + ext_shift_insns[left + 1] + 1;
3737 if (cost < best_cost)
3738 {
3739 kind = 7;
3740 best_cost = cost;
3741 }
3742 }
3743 }
3744 if (TARGET_SH3)
3745 {
3746 /* Try to use a dynamic shift. */
3747 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3748 if (cost < best_cost)
3749 {
3750 kind = 0;
3751 best_cost = cost;
3752 }
3753 }
3754 if (costp)
3755 *costp = cost;
3756 return kind;
3757 }
3758
3759 /* Function to be used in the length attribute of the instructions
3760 implementing this pattern. */
3761
3762 int
3763 shl_sext_length (rtx insn)
3764 {
3765 rtx set_src, left_rtx, size_rtx;
3766 int cost;
3767
3768 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3769 left_rtx = XEXP (XEXP (set_src, 0), 1);
3770 size_rtx = XEXP (set_src, 1);
3771 shl_sext_kind (left_rtx, size_rtx, &cost);
3772 return cost;
3773 }
3774
3775 /* Generate rtl for this pattern */
3776
3777 int
3778 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3779 {
3780 int kind;
3781 int left, size, insize, cost;
3782 rtx operands[3];
3783
3784 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3785 left = INTVAL (left_rtx);
3786 size = INTVAL (size_rtx);
3787 insize = size - left;
3788 switch (kind)
3789 {
3790 case 1:
3791 case 2:
3792 case 3:
3793 case 4:
3794 {
3795 int ext = kind & 1 ? 8 : 16;
3796 int shift2 = size - ext;
3797
3798 /* Don't expand fine-grained when combining, because that will
3799 make the pattern fail. */
3800 if (! currently_expanding_to_rtl
3801 && ! reload_in_progress && ! reload_completed)
3802 {
3803 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3804 emit_insn (gen_movsi (dest, source));
3805 break;
3806 }
3807 if (dest != source)
3808 emit_insn (gen_movsi (dest, source));
3809 operands[0] = dest;
3810 if (ext - insize)
3811 {
3812 operands[2] = GEN_INT (ext - insize);
3813 gen_shifty_hi_op (ASHIFT, operands);
3814 }
3815 emit_insn (kind & 1
3816 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3817 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3818 if (kind <= 2)
3819 {
3820 if (shift2)
3821 {
3822 operands[2] = GEN_INT (shift2);
3823 gen_shifty_op (ASHIFT, operands);
3824 }
3825 }
3826 else
3827 {
3828 if (shift2 > 0)
3829 {
3830 if (EXT_SHIFT_SIGNED (shift2))
3831 {
3832 operands[2] = GEN_INT (shift2 + 1);
3833 gen_shifty_op (ASHIFT, operands);
3834 operands[2] = const1_rtx;
3835 gen_shifty_op (ASHIFTRT, operands);
3836 break;
3837 }
3838 operands[2] = GEN_INT (shift2);
3839 gen_shifty_hi_op (ASHIFT, operands);
3840 }
3841 else if (shift2)
3842 {
3843 operands[2] = GEN_INT (-shift2);
3844 gen_shifty_hi_op (LSHIFTRT, operands);
3845 }
3846 emit_insn (size <= 8
3847 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3848 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3849 }
3850 break;
3851 }
3852 case 5:
3853 {
3854 int i = 16 - size;
3855 if (! currently_expanding_to_rtl
3856 && ! reload_in_progress && ! reload_completed)
3857 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3858 else
3859 {
3860 operands[0] = dest;
3861 operands[2] = GEN_INT (16 - insize);
3862 gen_shifty_hi_op (ASHIFT, operands);
3863 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3864 }
3865 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3866 while (--i >= 0)
3867 gen_ashift (ASHIFTRT, 1, dest);
3868 break;
3869 }
3870 case 6:
3871 case 7:
3872 /* Don't expand fine-grained when combining, because that will
3873 make the pattern fail. */
3874 if (! currently_expanding_to_rtl
3875 && ! reload_in_progress && ! reload_completed)
3876 {
3877 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3878 emit_insn (gen_movsi (dest, source));
3879 break;
3880 }
3881 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
3882 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
3883 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
3884 operands[0] = dest;
3885 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
3886 gen_shifty_op (ASHIFT, operands);
3887 if (kind == 7)
3888 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
3889 break;
3890 default:
3891 return -1;
3892 }
3893 return 0;
3894 }
3895
3896 /* Prefix a symbol_ref name with "datalabel". */
3897
3898 rtx
3899 gen_datalabel_ref (rtx sym)
3900 {
3901 const char *str;
3902
3903 if (GET_CODE (sym) == LABEL_REF)
3904 return gen_rtx_CONST (GET_MODE (sym),
3905 gen_rtx_UNSPEC (GET_MODE (sym),
3906 gen_rtvec (1, sym),
3907 UNSPEC_DATALABEL));
3908
3909 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
3910
3911 str = XSTR (sym, 0);
3912 /* Share all SYMBOL_REF strings with the same value - that is important
3913 for cse. */
3914 str = IDENTIFIER_POINTER (get_identifier (str));
3915 XSTR (sym, 0) = str;
3916
3917 return sym;
3918 }
3919
3920 \f
3921 static alloc_pool label_ref_list_pool;
3922
3923 typedef struct label_ref_list_d
3924 {
3925 rtx label;
3926 struct label_ref_list_d *next;
3927 } *label_ref_list_t;
3928
3929 /* The SH cannot load a large constant into a register, constants have to
3930 come from a pc relative load. The reference of a pc relative load
3931 instruction must be less than 1k in front of the instruction. This
3932 means that we often have to dump a constant inside a function, and
3933 generate code to branch around it.
3934
3935 It is important to minimize this, since the branches will slow things
3936 down and make things bigger.
3937
3938 Worst case code looks like:
3939
3940 mov.l L1,rn
3941 bra L2
3942 nop
3943 align
3944 L1: .long value
3945 L2:
3946 ..
3947
3948 mov.l L3,rn
3949 bra L4
3950 nop
3951 align
3952 L3: .long value
3953 L4:
3954 ..
3955
3956 We fix this by performing a scan before scheduling, which notices which
3957 instructions need to have their operands fetched from the constant table
3958 and builds the table.
3959
3960 The algorithm is:
3961
3962 scan, find an instruction which needs a pcrel move. Look forward, find the
3963 last barrier which is within MAX_COUNT bytes of the requirement.
3964 If there isn't one, make one. Process all the instructions between
3965 the find and the barrier.
3966
3967 In the above example, we can tell that L3 is within 1k of L1, so
3968 the first move can be shrunk from the 3 insn+constant sequence into
3969 just 1 insn, and the constant moved to L3 to make:
3970
3971 mov.l L1,rn
3972 ..
3973 mov.l L3,rn
3974 bra L4
3975 nop
3976 align
3977 L3:.long value
3978 L4:.long value
3979
3980 Then the second move becomes the target for the shortening process. */
3981
3982 typedef struct
3983 {
3984 rtx value; /* Value in table. */
3985 rtx label; /* Label of value. */
3986 label_ref_list_t wend; /* End of window. */
3987 enum machine_mode mode; /* Mode of value. */
3988
3989 /* True if this constant is accessed as part of a post-increment
3990 sequence. Note that HImode constants are never accessed in this way. */
3991 bool part_of_sequence_p;
3992 } pool_node;
3993
3994 /* The maximum number of constants that can fit into one pool, since
3995 constants in the range 0..510 are at least 2 bytes long, and in the
3996 range from there to 1018 at least 4 bytes. */
3997
3998 #define MAX_POOL_SIZE 372
3999 static pool_node pool_vector[MAX_POOL_SIZE];
4000 static int pool_size;
4001 static rtx pool_window_label;
4002 static int pool_window_last;
4003
4004 static int max_labelno_before_reorg;
4005
4006 /* ??? If we need a constant in HImode which is the truncated value of a
4007 constant we need in SImode, we could combine the two entries thus saving
4008 two bytes. Is this common enough to be worth the effort of implementing
4009 it? */
4010
4011 /* ??? This stuff should be done at the same time that we shorten branches.
4012 As it is now, we must assume that all branches are the maximum size, and
4013 this causes us to almost always output constant pools sooner than
4014 necessary. */
4015
4016 /* Add a constant to the pool and return its label. */
4017
4018 static rtx
4019 add_constant (rtx x, enum machine_mode mode, rtx last_value)
4020 {
4021 int i;
4022 rtx lab, new_rtx;
4023 label_ref_list_t ref, newref;
4024
4025 /* First see if we've already got it. */
4026 for (i = 0; i < pool_size; i++)
4027 {
4028 if (x->code == pool_vector[i].value->code
4029 && mode == pool_vector[i].mode)
4030 {
4031 if (x->code == CODE_LABEL)
4032 {
4033 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4034 continue;
4035 }
4036 if (rtx_equal_p (x, pool_vector[i].value))
4037 {
4038 lab = new_rtx = 0;
4039 if (! last_value
4040 || ! i
4041 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4042 {
4043 new_rtx = gen_label_rtx ();
4044 LABEL_REFS (new_rtx) = pool_vector[i].label;
4045 pool_vector[i].label = lab = new_rtx;
4046 }
4047 if (lab && pool_window_label)
4048 {
4049 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4050 newref->label = pool_window_label;
4051 ref = pool_vector[pool_window_last].wend;
4052 newref->next = ref;
4053 pool_vector[pool_window_last].wend = newref;
4054 }
4055 if (new_rtx)
4056 pool_window_label = new_rtx;
4057 pool_window_last = i;
4058 return lab;
4059 }
4060 }
4061 }
4062
4063 /* Need a new one. */
4064 pool_vector[pool_size].value = x;
4065 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4066 {
4067 lab = 0;
4068 pool_vector[pool_size - 1].part_of_sequence_p = true;
4069 }
4070 else
4071 lab = gen_label_rtx ();
4072 pool_vector[pool_size].mode = mode;
4073 pool_vector[pool_size].label = lab;
4074 pool_vector[pool_size].wend = NULL;
4075 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4076 if (lab && pool_window_label)
4077 {
4078 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4079 newref->label = pool_window_label;
4080 ref = pool_vector[pool_window_last].wend;
4081 newref->next = ref;
4082 pool_vector[pool_window_last].wend = newref;
4083 }
4084 if (lab)
4085 pool_window_label = lab;
4086 pool_window_last = pool_size;
4087 pool_size++;
4088 return lab;
4089 }
4090
4091 /* Output the literal table. START, if nonzero, is the first instruction
4092 this table is needed for, and also indicates that there is at least one
4093 casesi_worker_2 instruction; We have to emit the operand3 labels from
4094 these insns at a 4-byte aligned position. BARRIER is the barrier
4095 after which we are to place the table. */
4096
4097 static void
4098 dump_table (rtx start, rtx barrier)
4099 {
4100 rtx scan = barrier;
4101 int i;
4102 int need_align = 1;
4103 rtx lab;
4104 label_ref_list_t ref;
4105 int have_df = 0;
4106
4107 /* Do two passes, first time dump out the HI sized constants. */
4108
4109 for (i = 0; i < pool_size; i++)
4110 {
4111 pool_node *p = &pool_vector[i];
4112
4113 if (p->mode == HImode)
4114 {
4115 if (need_align)
4116 {
4117 scan = emit_insn_after (gen_align_2 (), scan);
4118 need_align = 0;
4119 }
4120 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4121 scan = emit_label_after (lab, scan);
4122 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4123 scan);
4124 for (ref = p->wend; ref; ref = ref->next)
4125 {
4126 lab = ref->label;
4127 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4128 }
4129 }
4130 else if (p->mode == DFmode)
4131 have_df = 1;
4132 }
4133
4134 need_align = 1;
4135
4136 if (start)
4137 {
4138 scan = emit_insn_after (gen_align_4 (), scan);
4139 need_align = 0;
4140 for (; start != barrier; start = NEXT_INSN (start))
4141 if (NONJUMP_INSN_P (start)
4142 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4143 {
4144 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4145 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4146
4147 scan = emit_label_after (lab, scan);
4148 }
4149 }
4150 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4151 {
4152 rtx align_insn = NULL_RTX;
4153
4154 scan = emit_label_after (gen_label_rtx (), scan);
4155 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4156 need_align = 0;
4157
4158 for (i = 0; i < pool_size; i++)
4159 {
4160 pool_node *p = &pool_vector[i];
4161
4162 switch (p->mode)
4163 {
4164 case HImode:
4165 break;
4166 case SImode:
4167 case SFmode:
4168 if (align_insn && !p->part_of_sequence_p)
4169 {
4170 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4171 emit_label_before (lab, align_insn);
4172 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4173 align_insn);
4174 for (ref = p->wend; ref; ref = ref->next)
4175 {
4176 lab = ref->label;
4177 emit_insn_before (gen_consttable_window_end (lab),
4178 align_insn);
4179 }
4180 delete_insn (align_insn);
4181 align_insn = NULL_RTX;
4182 continue;
4183 }
4184 else
4185 {
4186 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4187 scan = emit_label_after (lab, scan);
4188 scan = emit_insn_after (gen_consttable_4 (p->value,
4189 const0_rtx), scan);
4190 need_align = ! need_align;
4191 }
4192 break;
4193 case DFmode:
4194 if (need_align)
4195 {
4196 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4197 align_insn = scan;
4198 need_align = 0;
4199 }
4200 case DImode:
4201 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4202 scan = emit_label_after (lab, scan);
4203 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4204 scan);
4205 break;
4206 default:
4207 gcc_unreachable ();
4208 }
4209
4210 if (p->mode != HImode)
4211 {
4212 for (ref = p->wend; ref; ref = ref->next)
4213 {
4214 lab = ref->label;
4215 scan = emit_insn_after (gen_consttable_window_end (lab),
4216 scan);
4217 }
4218 }
4219 }
4220
4221 pool_size = 0;
4222 }
4223
4224 for (i = 0; i < pool_size; i++)
4225 {
4226 pool_node *p = &pool_vector[i];
4227
4228 switch (p->mode)
4229 {
4230 case HImode:
4231 break;
4232 case SImode:
4233 case SFmode:
4234 if (need_align)
4235 {
4236 need_align = 0;
4237 scan = emit_label_after (gen_label_rtx (), scan);
4238 scan = emit_insn_after (gen_align_4 (), scan);
4239 }
4240 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4241 scan = emit_label_after (lab, scan);
4242 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4243 scan);
4244 break;
4245 case DFmode:
4246 case DImode:
4247 if (need_align)
4248 {
4249 need_align = 0;
4250 scan = emit_label_after (gen_label_rtx (), scan);
4251 scan = emit_insn_after (gen_align_4 (), scan);
4252 }
4253 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4254 scan = emit_label_after (lab, scan);
4255 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4256 scan);
4257 break;
4258 default:
4259 gcc_unreachable ();
4260 }
4261
4262 if (p->mode != HImode)
4263 {
4264 for (ref = p->wend; ref; ref = ref->next)
4265 {
4266 lab = ref->label;
4267 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4268 }
4269 }
4270 }
4271
4272 scan = emit_insn_after (gen_consttable_end (), scan);
4273 scan = emit_barrier_after (scan);
4274 pool_size = 0;
4275 pool_window_label = NULL_RTX;
4276 pool_window_last = 0;
4277 }
4278
4279 /* Return nonzero if constant would be an ok source for a
4280 mov.w instead of a mov.l. */
4281
4282 static int
4283 hi_const (rtx src)
4284 {
4285 return (CONST_INT_P (src)
4286 && INTVAL (src) >= -32768
4287 && INTVAL (src) <= 32767);
4288 }
4289
4290 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4291
4292 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4293
4294 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4295 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4296 need to fix it if the input value is CONST_OK_FOR_I08. */
4297
4298 static int
4299 broken_move (rtx insn)
4300 {
4301 if (NONJUMP_INSN_P (insn))
4302 {
4303 rtx pat = PATTERN (insn);
4304 if (GET_CODE (pat) == PARALLEL)
4305 pat = XVECEXP (pat, 0, 0);
4306 if (GET_CODE (pat) == SET
4307 /* We can load any 8-bit value if we don't care what the high
4308 order bits end up as. */
4309 && GET_MODE (SET_DEST (pat)) != QImode
4310 && (CONSTANT_P (SET_SRC (pat))
4311 /* Match mova_const. */
4312 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4313 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4314 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4315 && ! (TARGET_SH2E
4316 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4317 && (fp_zero_operand (SET_SRC (pat))
4318 || fp_one_operand (SET_SRC (pat)))
4319 /* In general we don't know the current setting of fpscr, so disable fldi.
4320 There is an exception if this was a register-register move
4321 before reload - and hence it was ascertained that we have
4322 single precision setting - and in a post-reload optimization
4323 we changed this to do a constant load. In that case
4324 we don't have an r0 clobber, hence we must use fldi. */
4325 && (TARGET_FMOVD
4326 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4327 == SCRATCH))
4328 && REG_P (SET_DEST (pat))
4329 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4330 && ! (TARGET_SH2A
4331 && GET_MODE (SET_DEST (pat)) == SImode
4332 && (satisfies_constraint_I20 (SET_SRC (pat))
4333 || satisfies_constraint_I28 (SET_SRC (pat))))
4334 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4335 return 1;
4336 }
4337
4338 return 0;
4339 }
4340
4341 static int
4342 mova_p (rtx insn)
4343 {
4344 return (NONJUMP_INSN_P (insn)
4345 && GET_CODE (PATTERN (insn)) == SET
4346 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4347 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4348 /* Don't match mova_const. */
4349 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4350 }
4351
4352 /* Fix up a mova from a switch that went out of range. */
4353 static void
4354 fixup_mova (rtx mova)
4355 {
4356 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4357 if (! flag_pic)
4358 {
4359 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4360 INSN_CODE (mova) = -1;
4361 }
4362 else
4363 {
4364 rtx worker = mova;
4365 rtx lab = gen_label_rtx ();
4366 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4367
4368 do
4369 {
4370 worker = NEXT_INSN (worker);
4371 gcc_assert (worker
4372 && !LABEL_P (worker)
4373 && !JUMP_P (worker));
4374 } while (NOTE_P (worker)
4375 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4376 wpat = PATTERN (worker);
4377 wpat0 = XVECEXP (wpat, 0, 0);
4378 wpat1 = XVECEXP (wpat, 0, 1);
4379 wsrc = SET_SRC (wpat0);
4380 PATTERN (worker) = (gen_casesi_worker_2
4381 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4382 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4383 XEXP (wpat1, 0)));
4384 INSN_CODE (worker) = -1;
4385 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4386 base = gen_rtx_LABEL_REF (Pmode, lab);
4387 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4388 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4389 INSN_CODE (mova) = -1;
4390 }
4391 }
4392
4393 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4394 *num_mova, and check if the new mova is not nested within the first one.
4395 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4396 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4397 static int
4398 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
4399 {
4400 int n_addr = 0; /* Initialization to shut up spurious warning. */
4401 int f_target, n_target = 0; /* Likewise. */
4402
4403 if (optimize)
4404 {
4405 /* If NEW_MOVA has no address yet, it will be handled later. */
4406 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4407 return -1;
4408
4409 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4410 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4411 if (n_addr > n_target || n_addr + 1022 < n_target)
4412 {
4413 /* Change the mova into a load.
4414 broken_move will then return true for it. */
4415 fixup_mova (new_mova);
4416 return 1;
4417 }
4418 }
4419 if (!(*num_mova)++)
4420 {
4421 *first_mova = new_mova;
4422 return 2;
4423 }
4424 if (!optimize
4425 || ((f_target
4426 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4427 >= n_target))
4428 return -1;
4429
4430 (*num_mova)--;
4431 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4432 > n_target - n_addr)
4433 {
4434 fixup_mova (*first_mova);
4435 return 0;
4436 }
4437 else
4438 {
4439 fixup_mova (new_mova);
4440 return 1;
4441 }
4442 }
4443
4444 /* Find the last barrier from insn FROM which is close enough to hold the
4445 constant pool. If we can't find one, then create one near the end of
4446 the range. */
4447
4448 static rtx
4449 find_barrier (int num_mova, rtx mova, rtx from)
4450 {
4451 int count_si = 0;
4452 int count_hi = 0;
4453 int found_hi = 0;
4454 int found_si = 0;
4455 int found_di = 0;
4456 int hi_align = 2;
4457 int si_align = 2;
4458 int leading_mova = num_mova;
4459 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
4460 int si_limit;
4461 int hi_limit;
4462 rtx orig = from;
4463 rtx last_got = NULL_RTX;
4464 rtx last_symoff = NULL_RTX;
4465
4466 /* For HImode: range is 510, add 4 because pc counts from address of
4467 second instruction after this one, subtract 2 for the jump instruction
4468 that we may need to emit before the table, subtract 2 for the instruction
4469 that fills the jump delay slot (in very rare cases, reorg will take an
4470 instruction from after the constant pool or will leave the delay slot
4471 empty). This gives 510.
4472 For SImode: range is 1020, add 4 because pc counts from address of
4473 second instruction after this one, subtract 2 in case pc is 2 byte
4474 aligned, subtract 2 for the jump instruction that we may need to emit
4475 before the table, subtract 2 for the instruction that fills the jump
4476 delay slot. This gives 1018. */
4477
4478 /* The branch will always be shortened now that the reference address for
4479 forward branches is the successor address, thus we need no longer make
4480 adjustments to the [sh]i_limit for -O0. */
4481
4482 si_limit = 1018;
4483 hi_limit = 510;
4484
4485 while (from && count_si < si_limit && count_hi < hi_limit)
4486 {
4487 int inc = get_attr_length (from);
4488 int new_align = 1;
4489
4490 /* If this is a label that existed at the time of the compute_alignments
4491 call, determine the alignment. N.B. When find_barrier recurses for
4492 an out-of-reach mova, we might see labels at the start of previously
4493 inserted constant tables. */
4494 if (LABEL_P (from)
4495 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
4496 {
4497 if (optimize)
4498 new_align = 1 << label_to_alignment (from);
4499 else if (BARRIER_P (prev_nonnote_insn (from)))
4500 new_align = 1 << barrier_align (from);
4501 else
4502 new_align = 1;
4503 inc = 0;
4504 }
4505 /* In case we are scanning a constant table because of recursion, check
4506 for explicit alignments. If the table is long, we might be forced
4507 to emit the new table in front of it; the length of the alignment
4508 might be the last straw. */
4509 else if (NONJUMP_INSN_P (from)
4510 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4511 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
4512 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
4513 /* When we find the end of a constant table, paste the new constant
4514 at the end. That is better than putting it in front because
4515 this way, we don't need extra alignment for adding a 4-byte-aligned
4516 mov(a) label to a 2/4 or 8/4 byte aligned table. */
4517 else if (NONJUMP_INSN_P (from)
4518 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4519 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
4520 return from;
4521
4522 if (BARRIER_P (from))
4523 {
4524 rtx next;
4525
4526 found_barrier = from;
4527
4528 /* If we are at the end of the function, or in front of an alignment
4529 instruction, we need not insert an extra alignment. We prefer
4530 this kind of barrier. */
4531 if (barrier_align (from) > 2)
4532 good_barrier = from;
4533
4534 /* If we are at the end of a hot/cold block, dump the constants
4535 here. */
4536 next = NEXT_INSN (from);
4537 if (next
4538 && NOTE_P (next)
4539 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
4540 break;
4541 }
4542
4543 if (broken_move (from))
4544 {
4545 rtx pat, src, dst;
4546 enum machine_mode mode;
4547
4548 pat = PATTERN (from);
4549 if (GET_CODE (pat) == PARALLEL)
4550 pat = XVECEXP (pat, 0, 0);
4551 src = SET_SRC (pat);
4552 dst = SET_DEST (pat);
4553 mode = GET_MODE (dst);
4554
4555 /* GOT pcrelat setting comes in pair of
4556 mova .L8,r0
4557 mov.l .L8,r12
4558 instructions. (plus add r0,r12).
4559 Remember if we see one without the other. */
4560 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
4561 last_got = last_got ? NULL_RTX : from;
4562 else if (PIC_ADDR_P (src))
4563 last_got = last_got ? NULL_RTX : from;
4564
4565 /* We must explicitly check the mode, because sometimes the
4566 front end will generate code to load unsigned constants into
4567 HImode targets without properly sign extending them. */
4568 if (mode == HImode
4569 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
4570 {
4571 found_hi += 2;
4572 /* We put the short constants before the long constants, so
4573 we must count the length of short constants in the range
4574 for the long constants. */
4575 /* ??? This isn't optimal, but is easy to do. */
4576 si_limit -= 2;
4577 }
4578 else
4579 {
4580 /* We dump DF/DI constants before SF/SI ones, because
4581 the limit is the same, but the alignment requirements
4582 are higher. We may waste up to 4 additional bytes
4583 for alignment, and the DF/DI constant may have
4584 another SF/SI constant placed before it. */
4585 if (TARGET_SHCOMPACT
4586 && ! found_di
4587 && (mode == DFmode || mode == DImode))
4588 {
4589 found_di = 1;
4590 si_limit -= 8;
4591 }
4592 while (si_align > 2 && found_si + si_align - 2 > count_si)
4593 si_align >>= 1;
4594 if (found_si > count_si)
4595 count_si = found_si;
4596 found_si += GET_MODE_SIZE (mode);
4597 if (num_mova)
4598 si_limit -= GET_MODE_SIZE (mode);
4599 }
4600 }
4601
4602 if (mova_p (from))
4603 {
4604 switch (untangle_mova (&num_mova, &mova, from))
4605 {
4606 case 1:
4607 if (flag_pic)
4608 {
4609 rtx src = SET_SRC (PATTERN (from));
4610 if (GET_CODE (src) == CONST
4611 && GET_CODE (XEXP (src, 0)) == UNSPEC
4612 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
4613 last_symoff = from;
4614 }
4615 break;
4616 case 0: return find_barrier (0, 0, mova);
4617 case 2:
4618 {
4619 leading_mova = 0;
4620 barrier_before_mova
4621 = good_barrier ? good_barrier : found_barrier;
4622 }
4623 default: break;
4624 }
4625 if (found_si > count_si)
4626 count_si = found_si;
4627 }
4628 else if (JUMP_TABLE_DATA_P (from))
4629 {
4630 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
4631 || (num_mova
4632 && (prev_nonnote_insn (from)
4633 == XEXP (MOVA_LABELREF (mova), 0))))
4634 num_mova--;
4635 if (barrier_align (next_real_insn (from)) == align_jumps_log)
4636 {
4637 /* We have just passed the barrier in front of the
4638 ADDR_DIFF_VEC, which is stored in found_barrier. Since
4639 the ADDR_DIFF_VEC is accessed as data, just like our pool
4640 constants, this is a good opportunity to accommodate what
4641 we have gathered so far.
4642 If we waited any longer, we could end up at a barrier in
4643 front of code, which gives worse cache usage for separated
4644 instruction / data caches. */
4645 good_barrier = found_barrier;
4646 break;
4647 }
4648 else
4649 {
4650 rtx body = PATTERN (from);
4651 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
4652 }
4653 }
4654 /* For the SH1, we generate alignments even after jumps-around-jumps. */
4655 else if (JUMP_P (from)
4656 && ! TARGET_SH2
4657 && ! TARGET_SMALLCODE)
4658 new_align = 4;
4659
4660 /* There is a possibility that a bf is transformed into a bf/s by the
4661 delay slot scheduler. */
4662 if (JUMP_P (from) && !JUMP_TABLE_DATA_P (from)
4663 && get_attr_type (from) == TYPE_CBRANCH
4664 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (from)))) != SEQUENCE)
4665 inc += 2;
4666
4667 if (found_si)
4668 {
4669 count_si += inc;
4670 if (new_align > si_align)
4671 {
4672 si_limit -= (count_si - 1) & (new_align - si_align);
4673 si_align = new_align;
4674 }
4675 count_si = (count_si + new_align - 1) & -new_align;
4676 }
4677 if (found_hi)
4678 {
4679 count_hi += inc;
4680 if (new_align > hi_align)
4681 {
4682 hi_limit -= (count_hi - 1) & (new_align - hi_align);
4683 hi_align = new_align;
4684 }
4685 count_hi = (count_hi + new_align - 1) & -new_align;
4686 }
4687 from = NEXT_INSN (from);
4688 }
4689
4690 if (num_mova)
4691 {
4692 if (leading_mova)
4693 {
4694 /* Try as we might, the leading mova is out of range. Change
4695 it into a load (which will become a pcload) and retry. */
4696 fixup_mova (mova);
4697 return find_barrier (0, 0, mova);
4698 }
4699 else
4700 {
4701 /* Insert the constant pool table before the mova instruction,
4702 to prevent the mova label reference from going out of range. */
4703 from = mova;
4704 good_barrier = found_barrier = barrier_before_mova;
4705 }
4706 }
4707
4708 if (found_barrier)
4709 {
4710 if (good_barrier && next_real_insn (found_barrier))
4711 found_barrier = good_barrier;
4712 }
4713 else
4714 {
4715 /* We didn't find a barrier in time to dump our stuff,
4716 so we'll make one. */
4717 rtx label = gen_label_rtx ();
4718
4719 /* Don't emit a constant table in the middle of insns for
4720 casesi_worker_2. This is a bit overkill but is enough
4721 because casesi_worker_2 wouldn't appear so frequently. */
4722 if (last_symoff)
4723 from = last_symoff;
4724
4725 /* If we exceeded the range, then we must back up over the last
4726 instruction we looked at. Otherwise, we just need to undo the
4727 NEXT_INSN at the end of the loop. */
4728 if (PREV_INSN (from) != orig
4729 && (count_hi > hi_limit || count_si > si_limit))
4730 from = PREV_INSN (PREV_INSN (from));
4731 else
4732 from = PREV_INSN (from);
4733
4734 /* Don't emit a constant table int the middle of global pointer setting,
4735 since that that would move the addressing base GOT into another table.
4736 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
4737 in the pool anyway, so just move up the whole constant pool. */
4738 if (last_got)
4739 from = PREV_INSN (last_got);
4740
4741 /* Don't insert the constant pool table at the position which
4742 may be the landing pad. */
4743 if (flag_exceptions
4744 && CALL_P (from)
4745 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
4746 from = PREV_INSN (from);
4747
4748 /* Walk back to be just before any jump or label.
4749 Putting it before a label reduces the number of times the branch
4750 around the constant pool table will be hit. Putting it before
4751 a jump makes it more likely that the bra delay slot will be
4752 filled. */
4753 while (NOTE_P (from) || JUMP_P (from)
4754 || LABEL_P (from))
4755 from = PREV_INSN (from);
4756
4757 from = emit_jump_insn_after (gen_jump (label), from);
4758 JUMP_LABEL (from) = label;
4759 LABEL_NUSES (label) = 1;
4760 found_barrier = emit_barrier_after (from);
4761 emit_label_after (label, found_barrier);
4762 }
4763
4764 return found_barrier;
4765 }
4766
4767 /* If the instruction INSN is implemented by a special function, and we can
4768 positively find the register that is used to call the sfunc, and this
4769 register is not used anywhere else in this instruction - except as the
4770 destination of a set, return this register; else, return 0. */
4771 rtx
4772 sfunc_uses_reg (rtx insn)
4773 {
4774 int i;
4775 rtx pattern, part, reg_part, reg;
4776
4777 if (!NONJUMP_INSN_P (insn))
4778 return 0;
4779 pattern = PATTERN (insn);
4780 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
4781 return 0;
4782
4783 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4784 {
4785 part = XVECEXP (pattern, 0, i);
4786 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
4787 reg_part = part;
4788 }
4789 if (! reg_part)
4790 return 0;
4791 reg = XEXP (reg_part, 0);
4792 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
4793 {
4794 part = XVECEXP (pattern, 0, i);
4795 if (part == reg_part || GET_CODE (part) == CLOBBER)
4796 continue;
4797 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
4798 && REG_P (SET_DEST (part)))
4799 ? SET_SRC (part) : part)))
4800 return 0;
4801 }
4802 return reg;
4803 }
4804
4805 /* See if the only way in which INSN uses REG is by calling it, or by
4806 setting it while calling it. Set *SET to a SET rtx if the register
4807 is set by INSN. */
4808
4809 static int
4810 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
4811 {
4812 rtx pattern, reg2;
4813
4814 *set = NULL_RTX;
4815
4816 reg2 = sfunc_uses_reg (insn);
4817 if (reg2 && REGNO (reg2) == REGNO (reg))
4818 {
4819 pattern = single_set (insn);
4820 if (pattern
4821 && REG_P (SET_DEST (pattern))
4822 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4823 *set = pattern;
4824 return 0;
4825 }
4826 if (!CALL_P (insn))
4827 {
4828 /* We don't use rtx_equal_p because we don't care if the mode is
4829 different. */
4830 pattern = single_set (insn);
4831 if (pattern
4832 && REG_P (SET_DEST (pattern))
4833 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4834 {
4835 rtx par, part;
4836 int i;
4837
4838 *set = pattern;
4839 par = PATTERN (insn);
4840 if (GET_CODE (par) == PARALLEL)
4841 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
4842 {
4843 part = XVECEXP (par, 0, i);
4844 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
4845 return 1;
4846 }
4847 return reg_mentioned_p (reg, SET_SRC (pattern));
4848 }
4849
4850 return 1;
4851 }
4852
4853 pattern = PATTERN (insn);
4854
4855 if (GET_CODE (pattern) == PARALLEL)
4856 {
4857 int i;
4858
4859 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4860 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
4861 return 1;
4862 pattern = XVECEXP (pattern, 0, 0);
4863 }
4864
4865 if (GET_CODE (pattern) == SET)
4866 {
4867 if (reg_mentioned_p (reg, SET_DEST (pattern)))
4868 {
4869 /* We don't use rtx_equal_p, because we don't care if the
4870 mode is different. */
4871 if (!REG_P (SET_DEST (pattern))
4872 || REGNO (reg) != REGNO (SET_DEST (pattern)))
4873 return 1;
4874
4875 *set = pattern;
4876 }
4877
4878 pattern = SET_SRC (pattern);
4879 }
4880
4881 if (GET_CODE (pattern) != CALL
4882 || !MEM_P (XEXP (pattern, 0))
4883 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
4884 return 1;
4885
4886 return 0;
4887 }
4888
4889 /* Given a X, a pattern of an insn or a part of it, return a mask of used
4890 general registers. Bits 0..15 mean that the respective registers
4891 are used as inputs in the instruction. Bits 16..31 mean that the
4892 registers 0..15, respectively, are used as outputs, or are clobbered.
4893 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
4894 int
4895 regs_used (rtx x, int is_dest)
4896 {
4897 enum rtx_code code;
4898 const char *fmt;
4899 int i, used = 0;
4900
4901 if (! x)
4902 return used;
4903 code = GET_CODE (x);
4904 switch (code)
4905 {
4906 case REG:
4907 if (REGNO (x) < 16)
4908 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4909 << (REGNO (x) + is_dest));
4910 return 0;
4911 case SUBREG:
4912 {
4913 rtx y = SUBREG_REG (x);
4914
4915 if (!REG_P (y))
4916 break;
4917 if (REGNO (y) < 16)
4918 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4919 << (REGNO (y) +
4920 subreg_regno_offset (REGNO (y),
4921 GET_MODE (y),
4922 SUBREG_BYTE (x),
4923 GET_MODE (x)) + is_dest));
4924 return 0;
4925 }
4926 case SET:
4927 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
4928 case RETURN:
4929 /* If there was a return value, it must have been indicated with USE. */
4930 return 0x00ffff00;
4931 case CLOBBER:
4932 is_dest = 1;
4933 break;
4934 case MEM:
4935 is_dest = 0;
4936 break;
4937 case CALL:
4938 used |= 0x00ff00f0;
4939 break;
4940 default:
4941 break;
4942 }
4943
4944 fmt = GET_RTX_FORMAT (code);
4945
4946 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4947 {
4948 if (fmt[i] == 'E')
4949 {
4950 register int j;
4951 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4952 used |= regs_used (XVECEXP (x, i, j), is_dest);
4953 }
4954 else if (fmt[i] == 'e')
4955 used |= regs_used (XEXP (x, i), is_dest);
4956 }
4957 return used;
4958 }
4959
4960 /* Create an instruction that prevents redirection of a conditional branch
4961 to the destination of the JUMP with address ADDR.
4962 If the branch needs to be implemented as an indirect jump, try to find
4963 a scratch register for it.
4964 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
4965 If any preceding insn that doesn't fit into a delay slot is good enough,
4966 pass 1. Pass 2 if a definite blocking insn is needed.
4967 -1 is used internally to avoid deep recursion.
4968 If a blocking instruction is made or recognized, return it. */
4969
4970 static rtx
4971 gen_block_redirect (rtx jump, int addr, int need_block)
4972 {
4973 int dead = 0;
4974 rtx prev = prev_nonnote_insn (jump);
4975 rtx dest;
4976
4977 /* First, check if we already have an instruction that satisfies our need. */
4978 if (prev && NONJUMP_INSN_P (prev) && ! INSN_DELETED_P (prev))
4979 {
4980 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
4981 return prev;
4982 if (GET_CODE (PATTERN (prev)) == USE
4983 || GET_CODE (PATTERN (prev)) == CLOBBER
4984 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4985 prev = jump;
4986 else if ((need_block &= ~1) < 0)
4987 return prev;
4988 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
4989 need_block = 0;
4990 }
4991 if (GET_CODE (PATTERN (jump)) == RETURN)
4992 {
4993 if (! need_block)
4994 return prev;
4995 /* Reorg even does nasty things with return insns that cause branches
4996 to go out of range - see find_end_label and callers. */
4997 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
4998 }
4999 /* We can't use JUMP_LABEL here because it might be undefined
5000 when not optimizing. */
5001 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
5002 /* If the branch is out of range, try to find a scratch register for it. */
5003 if (optimize
5004 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5005 > 4092 + 4098))
5006 {
5007 rtx scan;
5008 /* Don't look for the stack pointer as a scratch register,
5009 it would cause trouble if an interrupt occurred. */
5010 unsigned attempt = 0x7fff, used;
5011 int jump_left = flag_expensive_optimizations + 1;
5012
5013 /* It is likely that the most recent eligible instruction is wanted for
5014 the delay slot. Therefore, find out which registers it uses, and
5015 try to avoid using them. */
5016
5017 for (scan = jump; (scan = PREV_INSN (scan)); )
5018 {
5019 enum rtx_code code;
5020
5021 if (INSN_DELETED_P (scan))
5022 continue;
5023 code = GET_CODE (scan);
5024 if (code == CODE_LABEL || code == JUMP_INSN)
5025 break;
5026 if (code == INSN
5027 && GET_CODE (PATTERN (scan)) != USE
5028 && GET_CODE (PATTERN (scan)) != CLOBBER
5029 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5030 {
5031 attempt &= ~regs_used (PATTERN (scan), 0);
5032 break;
5033 }
5034 }
5035 for (used = dead = 0, scan = JUMP_LABEL (jump);
5036 (scan = NEXT_INSN (scan)); )
5037 {
5038 enum rtx_code code;
5039
5040 if (INSN_DELETED_P (scan))
5041 continue;
5042 code = GET_CODE (scan);
5043 if (INSN_P (scan))
5044 {
5045 used |= regs_used (PATTERN (scan), 0);
5046 if (code == CALL_INSN)
5047 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5048 dead |= (used >> 16) & ~used;
5049 if (dead & attempt)
5050 {
5051 dead &= attempt;
5052 break;
5053 }
5054 if (code == JUMP_INSN)
5055 {
5056 if (jump_left-- && simplejump_p (scan))
5057 scan = JUMP_LABEL (scan);
5058 else
5059 break;
5060 }
5061 }
5062 }
5063 /* Mask out the stack pointer again, in case it was
5064 the only 'free' register we have found. */
5065 dead &= 0x7fff;
5066 }
5067 /* If the immediate destination is still in range, check for possible
5068 threading with a jump beyond the delay slot insn.
5069 Don't check if we are called recursively; the jump has been or will be
5070 checked in a different invocation then. */
5071
5072 else if (optimize && need_block >= 0)
5073 {
5074 rtx next = next_active_insn (next_active_insn (dest));
5075 if (next && JUMP_P (next)
5076 && GET_CODE (PATTERN (next)) == SET
5077 && recog_memoized (next) == CODE_FOR_jump_compact)
5078 {
5079 dest = JUMP_LABEL (next);
5080 if (dest
5081 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5082 > 4092 + 4098))
5083 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5084 }
5085 }
5086
5087 if (dead)
5088 {
5089 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5090
5091 /* It would be nice if we could convert the jump into an indirect
5092 jump / far branch right now, and thus exposing all constituent
5093 instructions to further optimization. However, reorg uses
5094 simplejump_p to determine if there is an unconditional jump where
5095 it should try to schedule instructions from the target of the
5096 branch; simplejump_p fails for indirect jumps even if they have
5097 a JUMP_LABEL. */
5098 rtx insn = emit_insn_before (gen_indirect_jump_scratch
5099 (reg, GEN_INT (unspec_bbr_uid++)),
5100 jump);
5101 /* ??? We would like this to have the scope of the jump, but that
5102 scope will change when a delay slot insn of an inner scope is added.
5103 Hence, after delay slot scheduling, we'll have to expect
5104 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5105 the jump. */
5106
5107 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
5108 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5109 return insn;
5110 }
5111 else if (need_block)
5112 /* We can't use JUMP_LABEL here because it might be undefined
5113 when not optimizing. */
5114 return emit_insn_before (gen_block_branch_redirect
5115 (GEN_INT (unspec_bbr_uid++)),
5116 jump);
5117 return prev;
5118 }
5119
5120 #define CONDJUMP_MIN -252
5121 #define CONDJUMP_MAX 262
5122 struct far_branch
5123 {
5124 /* A label (to be placed) in front of the jump
5125 that jumps to our ultimate destination. */
5126 rtx near_label;
5127 /* Where we are going to insert it if we cannot move the jump any farther,
5128 or the jump itself if we have picked up an existing jump. */
5129 rtx insert_place;
5130 /* The ultimate destination. */
5131 rtx far_label;
5132 struct far_branch *prev;
5133 /* If the branch has already been created, its address;
5134 else the address of its first prospective user. */
5135 int address;
5136 };
5137
5138 static void gen_far_branch (struct far_branch *);
5139 enum mdep_reorg_phase_e mdep_reorg_phase;
5140 static void
5141 gen_far_branch (struct far_branch *bp)
5142 {
5143 rtx insn = bp->insert_place;
5144 rtx jump;
5145 rtx label = gen_label_rtx ();
5146 int ok;
5147
5148 emit_label_after (label, insn);
5149 if (bp->far_label)
5150 {
5151 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5152 LABEL_NUSES (bp->far_label)++;
5153 }
5154 else
5155 jump = emit_jump_insn_after (gen_return (), insn);
5156 /* Emit a barrier so that reorg knows that any following instructions
5157 are not reachable via a fall-through path.
5158 But don't do this when not optimizing, since we wouldn't suppress the
5159 alignment for the barrier then, and could end up with out-of-range
5160 pc-relative loads. */
5161 if (optimize)
5162 emit_barrier_after (jump);
5163 emit_label_after (bp->near_label, insn);
5164 JUMP_LABEL (jump) = bp->far_label;
5165 ok = invert_jump (insn, label, 1);
5166 gcc_assert (ok);
5167
5168 /* If we are branching around a jump (rather than a return), prevent
5169 reorg from using an insn from the jump target as the delay slot insn -
5170 when reorg did this, it pessimized code (we rather hide the delay slot)
5171 and it could cause branches to go out of range. */
5172 if (bp->far_label)
5173 (emit_insn_after
5174 (gen_stuff_delay_slot
5175 (GEN_INT (unspec_bbr_uid++),
5176 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5177 insn));
5178 /* Prevent reorg from undoing our splits. */
5179 gen_block_redirect (jump, bp->address += 2, 2);
5180 }
5181
5182 /* Fix up ADDR_DIFF_VECs. */
5183 void
5184 fixup_addr_diff_vecs (rtx first)
5185 {
5186 rtx insn;
5187
5188 for (insn = first; insn; insn = NEXT_INSN (insn))
5189 {
5190 rtx vec_lab, pat, prev, prevpat, x, braf_label;
5191
5192 if (!JUMP_P (insn)
5193 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5194 continue;
5195 pat = PATTERN (insn);
5196 vec_lab = XEXP (XEXP (pat, 0), 0);
5197
5198 /* Search the matching casesi_jump_2. */
5199 for (prev = vec_lab; ; prev = PREV_INSN (prev))
5200 {
5201 if (!JUMP_P (prev))
5202 continue;
5203 prevpat = PATTERN (prev);
5204 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5205 continue;
5206 x = XVECEXP (prevpat, 0, 1);
5207 if (GET_CODE (x) != USE)
5208 continue;
5209 x = XEXP (x, 0);
5210 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5211 break;
5212 }
5213 /* FIXME: This is a bug in the optimizer, but it seems harmless
5214 to just avoid panicing. */
5215 if (!prev)
5216 continue;
5217
5218 /* Emit the reference label of the braf where it belongs, right after
5219 the casesi_jump_2 (i.e. braf). */
5220 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5221 emit_label_after (braf_label, prev);
5222
5223 /* Fix up the ADDR_DIF_VEC to be relative
5224 to the reference address of the braf. */
5225 XEXP (XEXP (pat, 0), 0) = braf_label;
5226 }
5227 }
5228
5229 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5230 a barrier. Return the base 2 logarithm of the desired alignment. */
5231 int
5232 barrier_align (rtx barrier_or_label)
5233 {
5234 rtx next = next_real_insn (barrier_or_label), pat, prev;
5235 int slot, credit, jump_to_next = 0;
5236
5237 if (! next)
5238 return 0;
5239
5240 pat = PATTERN (next);
5241
5242 if (GET_CODE (pat) == ADDR_DIFF_VEC)
5243 return 2;
5244
5245 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5246 /* This is a barrier in front of a constant table. */
5247 return 0;
5248
5249 prev = prev_real_insn (barrier_or_label);
5250 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
5251 {
5252 pat = PATTERN (prev);
5253 /* If this is a very small table, we want to keep the alignment after
5254 the table to the minimum for proper code alignment. */
5255 return ((TARGET_SMALLCODE
5256 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5257 <= (unsigned) 1 << (CACHE_LOG - 2)))
5258 ? 1 << TARGET_SHMEDIA : align_jumps_log);
5259 }
5260
5261 if (TARGET_SMALLCODE)
5262 return 0;
5263
5264 if (! TARGET_SH2 || ! optimize)
5265 return align_jumps_log;
5266
5267 /* When fixing up pcloads, a constant table might be inserted just before
5268 the basic block that ends with the barrier. Thus, we can't trust the
5269 instruction lengths before that. */
5270 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5271 {
5272 /* Check if there is an immediately preceding branch to the insn beyond
5273 the barrier. We must weight the cost of discarding useful information
5274 from the current cache line when executing this branch and there is
5275 an alignment, against that of fetching unneeded insn in front of the
5276 branch target when there is no alignment. */
5277
5278 /* There are two delay_slot cases to consider. One is the simple case
5279 where the preceding branch is to the insn beyond the barrier (simple
5280 delay slot filling), and the other is where the preceding branch has
5281 a delay slot that is a duplicate of the insn after the barrier
5282 (fill_eager_delay_slots) and the branch is to the insn after the insn
5283 after the barrier. */
5284
5285 /* PREV is presumed to be the JUMP_INSN for the barrier under
5286 investigation. Skip to the insn before it. */
5287 prev = prev_real_insn (prev);
5288
5289 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5290 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5291 prev = prev_real_insn (prev))
5292 {
5293 jump_to_next = 0;
5294 if (GET_CODE (PATTERN (prev)) == USE
5295 || GET_CODE (PATTERN (prev)) == CLOBBER)
5296 continue;
5297 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
5298 {
5299 prev = XVECEXP (PATTERN (prev), 0, 1);
5300 if (INSN_UID (prev) == INSN_UID (next))
5301 {
5302 /* Delay slot was filled with insn at jump target. */
5303 jump_to_next = 1;
5304 continue;
5305 }
5306 }
5307
5308 if (slot &&
5309 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5310 slot = 0;
5311 credit -= get_attr_length (prev);
5312 }
5313 if (prev
5314 && JUMP_P (prev)
5315 && JUMP_LABEL (prev))
5316 {
5317 rtx x;
5318 if (jump_to_next
5319 || next_real_insn (JUMP_LABEL (prev)) == next
5320 /* If relax_delay_slots() decides NEXT was redundant
5321 with some previous instruction, it will have
5322 redirected PREV's jump to the following insn. */
5323 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5324 /* There is no upper bound on redundant instructions
5325 that might have been skipped, but we must not put an
5326 alignment where none had been before. */
5327 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5328 (INSN_P (x)
5329 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5330 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5331 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5332 {
5333 rtx pat = PATTERN (prev);
5334 if (GET_CODE (pat) == PARALLEL)
5335 pat = XVECEXP (pat, 0, 0);
5336 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5337 return 0;
5338 }
5339 }
5340 }
5341
5342 return align_jumps_log;
5343 }
5344
5345 /* If we are inside a phony loop, almost any kind of label can turn up as the
5346 first one in the loop. Aligning a braf label causes incorrect switch
5347 destination addresses; we can detect braf labels because they are
5348 followed by a BARRIER.
5349 Applying loop alignment to small constant or switch tables is a waste
5350 of space, so we suppress this too. */
5351 int
5352 sh_loop_align (rtx label)
5353 {
5354 rtx next = label;
5355
5356 do
5357 next = next_nonnote_insn (next);
5358 while (next && LABEL_P (next));
5359
5360 if (! next
5361 || ! INSN_P (next)
5362 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
5363 || recog_memoized (next) == CODE_FOR_consttable_2)
5364 return 0;
5365
5366 return align_loops_log;
5367 }
5368
5369 /* Do a final pass over the function, just before delayed branch
5370 scheduling. */
5371
5372 static void
5373 sh_reorg (void)
5374 {
5375 rtx first, insn, mova = NULL_RTX;
5376 int num_mova;
5377 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5378 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5379
5380 first = get_insns ();
5381 max_labelno_before_reorg = max_label_num ();
5382
5383 /* We must split call insns before introducing `mova's. If we're
5384 optimizing, they'll have already been split. Otherwise, make
5385 sure we don't split them too late. */
5386 if (! optimize)
5387 split_all_insns_noflow ();
5388
5389 if (TARGET_SHMEDIA)
5390 return;
5391
5392 /* If relaxing, generate pseudo-ops to associate function calls with
5393 the symbols they call. It does no harm to not generate these
5394 pseudo-ops. However, when we can generate them, it enables to
5395 linker to potentially relax the jsr to a bsr, and eliminate the
5396 register load and, possibly, the constant pool entry. */
5397
5398 mdep_reorg_phase = SH_INSERT_USES_LABELS;
5399 if (TARGET_RELAX)
5400 {
5401 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
5402 own purposes. This works because none of the remaining passes
5403 need to look at them.
5404
5405 ??? But it may break in the future. We should use a machine
5406 dependent REG_NOTE, or some other approach entirely. */
5407 for (insn = first; insn; insn = NEXT_INSN (insn))
5408 {
5409 if (INSN_P (insn))
5410 {
5411 rtx note;
5412
5413 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
5414 NULL_RTX)) != 0)
5415 remove_note (insn, note);
5416 }
5417 }
5418
5419 for (insn = first; insn; insn = NEXT_INSN (insn))
5420 {
5421 rtx pattern, reg, link, set, scan, dies, label;
5422 int rescan = 0, foundinsn = 0;
5423
5424 if (CALL_P (insn))
5425 {
5426 pattern = PATTERN (insn);
5427
5428 if (GET_CODE (pattern) == PARALLEL)
5429 pattern = XVECEXP (pattern, 0, 0);
5430 if (GET_CODE (pattern) == SET)
5431 pattern = SET_SRC (pattern);
5432
5433 if (GET_CODE (pattern) != CALL
5434 || !MEM_P (XEXP (pattern, 0)))
5435 continue;
5436
5437 reg = XEXP (XEXP (pattern, 0), 0);
5438 }
5439 else
5440 {
5441 reg = sfunc_uses_reg (insn);
5442 if (! reg)
5443 continue;
5444 }
5445
5446 if (!REG_P (reg))
5447 continue;
5448
5449 /* Try scanning backward to find where the register is set. */
5450 link = NULL;
5451 for (scan = PREV_INSN (insn);
5452 scan && !LABEL_P (scan);
5453 scan = PREV_INSN (scan))
5454 {
5455 if (! INSN_P (scan))
5456 continue;
5457
5458 if (! reg_mentioned_p (reg, scan))
5459 continue;
5460
5461 if (noncall_uses_reg (reg, scan, &set))
5462 break;
5463
5464 if (set)
5465 {
5466 link = scan;
5467 break;
5468 }
5469 }
5470
5471 if (! link)
5472 continue;
5473
5474 /* The register is set at LINK. */
5475
5476 /* We can only optimize the function call if the register is
5477 being set to a symbol. In theory, we could sometimes
5478 optimize calls to a constant location, but the assembler
5479 and linker do not support that at present. */
5480 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
5481 && GET_CODE (SET_SRC (set)) != LABEL_REF)
5482 continue;
5483
5484 /* Scan forward from LINK to the place where REG dies, and
5485 make sure that the only insns which use REG are
5486 themselves function calls. */
5487
5488 /* ??? This doesn't work for call targets that were allocated
5489 by reload, since there may not be a REG_DEAD note for the
5490 register. */
5491
5492 dies = NULL_RTX;
5493 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
5494 {
5495 rtx scanset;
5496
5497 /* Don't try to trace forward past a CODE_LABEL if we haven't
5498 seen INSN yet. Ordinarily, we will only find the setting insn
5499 if it is in the same basic block. However,
5500 cross-jumping can insert code labels in between the load and
5501 the call, and can result in situations where a single call
5502 insn may have two targets depending on where we came from. */
5503
5504 if (LABEL_P (scan) && ! foundinsn)
5505 break;
5506
5507 if (! INSN_P (scan))
5508 continue;
5509
5510 /* Don't try to trace forward past a JUMP. To optimize
5511 safely, we would have to check that all the
5512 instructions at the jump destination did not use REG. */
5513
5514 if (JUMP_P (scan))
5515 break;
5516
5517 if (! reg_mentioned_p (reg, scan))
5518 continue;
5519
5520 if (noncall_uses_reg (reg, scan, &scanset))
5521 break;
5522
5523 if (scan == insn)
5524 foundinsn = 1;
5525
5526 if (scan != insn
5527 && (CALL_P (scan) || sfunc_uses_reg (scan)))
5528 {
5529 /* There is a function call to this register other
5530 than the one we are checking. If we optimize
5531 this call, we need to rescan again below. */
5532 rescan = 1;
5533 }
5534
5535 /* ??? We shouldn't have to worry about SCANSET here.
5536 We should just be able to check for a REG_DEAD note
5537 on a function call. However, the REG_DEAD notes are
5538 apparently not dependable around libcalls; c-torture
5539 execute/920501-2 is a test case. If SCANSET is set,
5540 then this insn sets the register, so it must have
5541 died earlier. Unfortunately, this will only handle
5542 the cases in which the register is, in fact, set in a
5543 later insn. */
5544
5545 /* ??? We shouldn't have to use FOUNDINSN here.
5546 This dates back to when we used LOG_LINKS to find
5547 the most recent insn which sets the register. */
5548
5549 if (foundinsn
5550 && (scanset
5551 || find_reg_note (scan, REG_DEAD, reg)))
5552 {
5553 dies = scan;
5554 break;
5555 }
5556 }
5557
5558 if (! dies)
5559 {
5560 /* Either there was a branch, or some insn used REG
5561 other than as a function call address. */
5562 continue;
5563 }
5564
5565 /* Create a code label, and put it in a REG_LABEL_OPERAND note
5566 on the insn which sets the register, and on each call insn
5567 which uses the register. In final_prescan_insn we look for
5568 the REG_LABEL_OPERAND notes, and output the appropriate label
5569 or pseudo-op. */
5570
5571 label = gen_label_rtx ();
5572 add_reg_note (link, REG_LABEL_OPERAND, label);
5573 add_reg_note (insn, REG_LABEL_OPERAND, label);
5574 if (rescan)
5575 {
5576 scan = link;
5577 do
5578 {
5579 rtx reg2;
5580
5581 scan = NEXT_INSN (scan);
5582 if (scan != insn
5583 && ((CALL_P (scan)
5584 && reg_mentioned_p (reg, scan))
5585 || ((reg2 = sfunc_uses_reg (scan))
5586 && REGNO (reg2) == REGNO (reg))))
5587 add_reg_note (scan, REG_LABEL_OPERAND, label);
5588 }
5589 while (scan != dies);
5590 }
5591 }
5592 }
5593
5594 if (TARGET_SH2)
5595 fixup_addr_diff_vecs (first);
5596
5597 if (optimize)
5598 {
5599 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
5600 shorten_branches (first);
5601 }
5602
5603 /* Scan the function looking for move instructions which have to be
5604 changed to pc-relative loads and insert the literal tables. */
5605 label_ref_list_pool = create_alloc_pool ("label references list",
5606 sizeof (struct label_ref_list_d),
5607 30);
5608 mdep_reorg_phase = SH_FIXUP_PCLOAD;
5609 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
5610 {
5611 if (mova_p (insn))
5612 {
5613 /* ??? basic block reordering can move a switch table dispatch
5614 below the switch table. Check if that has happened.
5615 We only have the addresses available when optimizing; but then,
5616 this check shouldn't be needed when not optimizing. */
5617 if (!untangle_mova (&num_mova, &mova, insn))
5618 {
5619 insn = mova;
5620 num_mova = 0;
5621 }
5622 }
5623 else if (JUMP_P (insn)
5624 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
5625 && num_mova
5626 /* ??? loop invariant motion can also move a mova out of a
5627 loop. Since loop does this code motion anyway, maybe we
5628 should wrap UNSPEC_MOVA into a CONST, so that reload can
5629 move it back. */
5630 && ((num_mova > 1
5631 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
5632 || (prev_nonnote_insn (insn)
5633 == XEXP (MOVA_LABELREF (mova), 0))))
5634 {
5635 rtx scan;
5636 int total;
5637
5638 num_mova--;
5639
5640 /* Some code might have been inserted between the mova and
5641 its ADDR_DIFF_VEC. Check if the mova is still in range. */
5642 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
5643 total += get_attr_length (scan);
5644
5645 /* range of mova is 1020, add 4 because pc counts from address of
5646 second instruction after this one, subtract 2 in case pc is 2
5647 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
5648 cancels out with alignment effects of the mova itself. */
5649 if (total > 1022)
5650 {
5651 /* Change the mova into a load, and restart scanning
5652 there. broken_move will then return true for mova. */
5653 fixup_mova (mova);
5654 insn = mova;
5655 }
5656 }
5657 if (broken_move (insn)
5658 || (NONJUMP_INSN_P (insn)
5659 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
5660 {
5661 rtx scan;
5662 /* Scan ahead looking for a barrier to stick the constant table
5663 behind. */
5664 rtx barrier = find_barrier (num_mova, mova, insn);
5665 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
5666 int need_aligned_label = 0;
5667
5668 if (num_mova && ! mova_p (mova))
5669 {
5670 /* find_barrier had to change the first mova into a
5671 pcload; thus, we have to start with this new pcload. */
5672 insn = mova;
5673 num_mova = 0;
5674 }
5675 /* Now find all the moves between the points and modify them. */
5676 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
5677 {
5678 if (LABEL_P (scan))
5679 last_float = 0;
5680 if (NONJUMP_INSN_P (scan)
5681 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
5682 need_aligned_label = 1;
5683 if (broken_move (scan))
5684 {
5685 rtx *patp = &PATTERN (scan), pat = *patp;
5686 rtx src, dst;
5687 rtx lab;
5688 rtx newsrc;
5689 enum machine_mode mode;
5690
5691 if (GET_CODE (pat) == PARALLEL)
5692 patp = &XVECEXP (pat, 0, 0), pat = *patp;
5693 src = SET_SRC (pat);
5694 dst = SET_DEST (pat);
5695 mode = GET_MODE (dst);
5696
5697 if (mode == SImode && hi_const (src)
5698 && REGNO (dst) != FPUL_REG)
5699 {
5700 int offset = 0;
5701
5702 mode = HImode;
5703 while (GET_CODE (dst) == SUBREG)
5704 {
5705 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
5706 GET_MODE (SUBREG_REG (dst)),
5707 SUBREG_BYTE (dst),
5708 GET_MODE (dst));
5709 dst = SUBREG_REG (dst);
5710 }
5711 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
5712 }
5713 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
5714 {
5715 /* This must be an insn that clobbers r0. */
5716 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
5717 XVECLEN (PATTERN (scan), 0)
5718 - 1);
5719 rtx clobber = *clobberp;
5720
5721 gcc_assert (GET_CODE (clobber) == CLOBBER
5722 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
5723
5724 if (last_float
5725 && reg_set_between_p (r0_rtx, last_float_move, scan))
5726 last_float = 0;
5727 if (last_float
5728 && TARGET_SHCOMPACT
5729 && GET_MODE_SIZE (mode) != 4
5730 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
5731 last_float = 0;
5732 lab = add_constant (src, mode, last_float);
5733 if (lab)
5734 emit_insn_before (gen_mova (lab), scan);
5735 else
5736 {
5737 /* There will be a REG_UNUSED note for r0 on
5738 LAST_FLOAT_MOVE; we have to change it to REG_INC,
5739 lest reorg:mark_target_live_regs will not
5740 consider r0 to be used, and we end up with delay
5741 slot insn in front of SCAN that clobbers r0. */
5742 rtx note
5743 = find_regno_note (last_float_move, REG_UNUSED, 0);
5744
5745 /* If we are not optimizing, then there may not be
5746 a note. */
5747 if (note)
5748 PUT_REG_NOTE_KIND (note, REG_INC);
5749
5750 *last_float_addr = r0_inc_rtx;
5751 }
5752 last_float_move = scan;
5753 last_float = src;
5754 newsrc = gen_const_mem (mode,
5755 (((TARGET_SH4 && ! TARGET_FMOVD)
5756 || REGNO (dst) == FPUL_REG)
5757 ? r0_inc_rtx
5758 : r0_rtx));
5759 last_float_addr = &XEXP (newsrc, 0);
5760
5761 /* Remove the clobber of r0. */
5762 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
5763 gen_rtx_SCRATCH (Pmode));
5764 }
5765 /* This is a mova needing a label. Create it. */
5766 else if (GET_CODE (src) == UNSPEC
5767 && XINT (src, 1) == UNSPEC_MOVA
5768 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
5769 {
5770 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
5771 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5772 newsrc = gen_rtx_UNSPEC (SImode,
5773 gen_rtvec (1, newsrc),
5774 UNSPEC_MOVA);
5775 }
5776 else
5777 {
5778 lab = add_constant (src, mode, 0);
5779 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5780 newsrc = gen_const_mem (mode, newsrc);
5781 }
5782 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
5783 INSN_CODE (scan) = -1;
5784 }
5785 }
5786 dump_table (need_aligned_label ? insn : 0, barrier);
5787 insn = barrier;
5788 }
5789 }
5790 free_alloc_pool (label_ref_list_pool);
5791 for (insn = first; insn; insn = NEXT_INSN (insn))
5792 PUT_MODE (insn, VOIDmode);
5793
5794 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
5795 INSN_ADDRESSES_FREE ();
5796 split_branches (first);
5797
5798 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
5799 also has an effect on the register that holds the address of the sfunc.
5800 Insert an extra dummy insn in front of each sfunc that pretends to
5801 use this register. */
5802 if (flag_delayed_branch)
5803 {
5804 for (insn = first; insn; insn = NEXT_INSN (insn))
5805 {
5806 rtx reg = sfunc_uses_reg (insn);
5807
5808 if (! reg)
5809 continue;
5810 emit_insn_before (gen_use_sfunc_addr (reg), insn);
5811 }
5812 }
5813 #if 0
5814 /* fpscr is not actually a user variable, but we pretend it is for the
5815 sake of the previous optimization passes, since we want it handled like
5816 one. However, we don't have any debugging information for it, so turn
5817 it into a non-user variable now. */
5818 if (TARGET_SH4)
5819 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
5820 #endif
5821 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
5822 }
5823
5824 int
5825 get_dest_uid (rtx label, int max_uid)
5826 {
5827 rtx dest = next_real_insn (label);
5828 int dest_uid;
5829 if (! dest)
5830 /* This can happen for an undefined label. */
5831 return 0;
5832 dest_uid = INSN_UID (dest);
5833 /* If this is a newly created branch redirection blocking instruction,
5834 we cannot index the branch_uid or insn_addresses arrays with its
5835 uid. But then, we won't need to, because the actual destination is
5836 the following branch. */
5837 while (dest_uid >= max_uid)
5838 {
5839 dest = NEXT_INSN (dest);
5840 dest_uid = INSN_UID (dest);
5841 }
5842 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
5843 return 0;
5844 return dest_uid;
5845 }
5846
5847 /* Split condbranches that are out of range. Also add clobbers for
5848 scratch registers that are needed in far jumps.
5849 We do this before delay slot scheduling, so that it can take our
5850 newly created instructions into account. It also allows us to
5851 find branches with common targets more easily. */
5852
5853 static void
5854 split_branches (rtx first)
5855 {
5856 rtx insn;
5857 struct far_branch **uid_branch, *far_branch_list = 0;
5858 int max_uid = get_max_uid ();
5859 int ok;
5860
5861 /* Find out which branches are out of range. */
5862 shorten_branches (first);
5863
5864 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
5865 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
5866
5867 for (insn = first; insn; insn = NEXT_INSN (insn))
5868 if (! INSN_P (insn))
5869 continue;
5870 else if (INSN_DELETED_P (insn))
5871 {
5872 /* Shorten_branches would split this instruction again,
5873 so transform it into a note. */
5874 SET_INSN_DELETED (insn);
5875 }
5876 else if (JUMP_P (insn)
5877 /* Don't mess with ADDR_DIFF_VEC */
5878 && (GET_CODE (PATTERN (insn)) == SET
5879 || GET_CODE (PATTERN (insn)) == RETURN))
5880 {
5881 enum attr_type type = get_attr_type (insn);
5882 if (type == TYPE_CBRANCH)
5883 {
5884 rtx next, beyond;
5885
5886 if (get_attr_length (insn) > 4)
5887 {
5888 rtx src = SET_SRC (PATTERN (insn));
5889 rtx olabel = XEXP (XEXP (src, 1), 0);
5890 int addr = INSN_ADDRESSES (INSN_UID (insn));
5891 rtx label = 0;
5892 int dest_uid = get_dest_uid (olabel, max_uid);
5893 struct far_branch *bp = uid_branch[dest_uid];
5894
5895 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
5896 the label if the LABEL_NUSES count drops to zero. There is
5897 always a jump_optimize pass that sets these values, but it
5898 proceeds to delete unreferenced code, and then if not
5899 optimizing, to un-delete the deleted instructions, thus
5900 leaving labels with too low uses counts. */
5901 if (! optimize)
5902 {
5903 JUMP_LABEL (insn) = olabel;
5904 LABEL_NUSES (olabel)++;
5905 }
5906 if (! bp)
5907 {
5908 bp = (struct far_branch *) alloca (sizeof *bp);
5909 uid_branch[dest_uid] = bp;
5910 bp->prev = far_branch_list;
5911 far_branch_list = bp;
5912 bp->far_label
5913 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
5914 LABEL_NUSES (bp->far_label)++;
5915 }
5916 else
5917 {
5918 label = bp->near_label;
5919 if (! label && bp->address - addr >= CONDJUMP_MIN)
5920 {
5921 rtx block = bp->insert_place;
5922
5923 if (GET_CODE (PATTERN (block)) == RETURN)
5924 block = PREV_INSN (block);
5925 else
5926 block = gen_block_redirect (block,
5927 bp->address, 2);
5928 label = emit_label_after (gen_label_rtx (),
5929 PREV_INSN (block));
5930 bp->near_label = label;
5931 }
5932 else if (label && ! NEXT_INSN (label))
5933 {
5934 if (addr + 2 - bp->address <= CONDJUMP_MAX)
5935 bp->insert_place = insn;
5936 else
5937 gen_far_branch (bp);
5938 }
5939 }
5940 if (! label
5941 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
5942 {
5943 bp->near_label = label = gen_label_rtx ();
5944 bp->insert_place = insn;
5945 bp->address = addr;
5946 }
5947 ok = redirect_jump (insn, label, 0);
5948 gcc_assert (ok);
5949 }
5950 else
5951 {
5952 /* get_attr_length (insn) == 2 */
5953 /* Check if we have a pattern where reorg wants to redirect
5954 the branch to a label from an unconditional branch that
5955 is too far away. */
5956 /* We can't use JUMP_LABEL here because it might be undefined
5957 when not optimizing. */
5958 /* A syntax error might cause beyond to be NULL_RTX. */
5959 beyond
5960 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
5961 0));
5962
5963 if (beyond
5964 && (JUMP_P (beyond)
5965 || ((beyond = next_active_insn (beyond))
5966 && JUMP_P (beyond)))
5967 && GET_CODE (PATTERN (beyond)) == SET
5968 && recog_memoized (beyond) == CODE_FOR_jump_compact
5969 && ((INSN_ADDRESSES
5970 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
5971 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5972 > 252 + 258 + 2))
5973 gen_block_redirect (beyond,
5974 INSN_ADDRESSES (INSN_UID (beyond)), 1);
5975 }
5976
5977 next = next_active_insn (insn);
5978
5979 if (next
5980 && (JUMP_P (next)
5981 || ((next = next_active_insn (next))
5982 && JUMP_P (next)))
5983 && GET_CODE (PATTERN (next)) == SET
5984 && recog_memoized (next) == CODE_FOR_jump_compact
5985 && ((INSN_ADDRESSES
5986 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
5987 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5988 > 252 + 258 + 2))
5989 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
5990 }
5991 else if (type == TYPE_JUMP || type == TYPE_RETURN)
5992 {
5993 int addr = INSN_ADDRESSES (INSN_UID (insn));
5994 rtx far_label = 0;
5995 int dest_uid = 0;
5996 struct far_branch *bp;
5997
5998 if (type == TYPE_JUMP)
5999 {
6000 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
6001 dest_uid = get_dest_uid (far_label, max_uid);
6002 if (! dest_uid)
6003 {
6004 /* Parse errors can lead to labels outside
6005 the insn stream. */
6006 if (! NEXT_INSN (far_label))
6007 continue;
6008
6009 if (! optimize)
6010 {
6011 JUMP_LABEL (insn) = far_label;
6012 LABEL_NUSES (far_label)++;
6013 }
6014 redirect_jump (insn, NULL_RTX, 1);
6015 far_label = 0;
6016 }
6017 }
6018 bp = uid_branch[dest_uid];
6019 if (! bp)
6020 {
6021 bp = (struct far_branch *) alloca (sizeof *bp);
6022 uid_branch[dest_uid] = bp;
6023 bp->prev = far_branch_list;
6024 far_branch_list = bp;
6025 bp->near_label = 0;
6026 bp->far_label = far_label;
6027 if (far_label)
6028 LABEL_NUSES (far_label)++;
6029 }
6030 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6031 if (addr - bp->address <= CONDJUMP_MAX)
6032 emit_label_after (bp->near_label, PREV_INSN (insn));
6033 else
6034 {
6035 gen_far_branch (bp);
6036 bp->near_label = 0;
6037 }
6038 else
6039 bp->near_label = 0;
6040 bp->address = addr;
6041 bp->insert_place = insn;
6042 if (! far_label)
6043 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6044 else
6045 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6046 }
6047 }
6048 /* Generate all pending far branches,
6049 and free our references to the far labels. */
6050 while (far_branch_list)
6051 {
6052 if (far_branch_list->near_label
6053 && ! NEXT_INSN (far_branch_list->near_label))
6054 gen_far_branch (far_branch_list);
6055 if (optimize
6056 && far_branch_list->far_label
6057 && ! --LABEL_NUSES (far_branch_list->far_label))
6058 delete_insn (far_branch_list->far_label);
6059 far_branch_list = far_branch_list->prev;
6060 }
6061
6062 /* Instruction length information is no longer valid due to the new
6063 instructions that have been generated. */
6064 init_insn_lengths ();
6065 }
6066
6067 /* Dump out instruction addresses, which is useful for debugging the
6068 constant pool table stuff.
6069
6070 If relaxing, output the label and pseudo-ops used to link together
6071 calls and the instruction which set the registers. */
6072
6073 /* ??? The addresses printed by this routine for insns are nonsense for
6074 insns which are inside of a sequence where none of the inner insns have
6075 variable length. This is because the second pass of shorten_branches
6076 does not bother to update them. */
6077
6078 void
6079 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
6080 int noperands ATTRIBUTE_UNUSED)
6081 {
6082 if (TARGET_DUMPISIZE)
6083 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6084
6085 if (TARGET_RELAX)
6086 {
6087 rtx note;
6088
6089 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
6090 if (note)
6091 {
6092 rtx pattern;
6093
6094 pattern = PATTERN (insn);
6095 if (GET_CODE (pattern) == PARALLEL)
6096 pattern = XVECEXP (pattern, 0, 0);
6097 switch (GET_CODE (pattern))
6098 {
6099 case SET:
6100 if (GET_CODE (SET_SRC (pattern)) != CALL
6101 && get_attr_type (insn) != TYPE_SFUNC)
6102 {
6103 targetm.asm_out.internal_label
6104 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6105 break;
6106 }
6107 /* else FALLTHROUGH */
6108 case CALL:
6109 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6110 CODE_LABEL_NUMBER (XEXP (note, 0)));
6111 break;
6112
6113 default:
6114 gcc_unreachable ();
6115 }
6116 }
6117 }
6118 }
6119
6120 /* Dump out any constants accumulated in the final pass. These will
6121 only be labels. */
6122
6123 const char *
6124 output_jump_label_table (void)
6125 {
6126 int i;
6127
6128 if (pool_size)
6129 {
6130 fprintf (asm_out_file, "\t.align 2\n");
6131 for (i = 0; i < pool_size; i++)
6132 {
6133 pool_node *p = &pool_vector[i];
6134
6135 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6136 CODE_LABEL_NUMBER (p->label));
6137 output_asm_insn (".long %O0", &p->value);
6138 }
6139 pool_size = 0;
6140 }
6141
6142 return "";
6143 }
6144 \f
6145 /* A full frame looks like:
6146
6147 arg-5
6148 arg-4
6149 [ if current_function_anonymous_args
6150 arg-3
6151 arg-2
6152 arg-1
6153 arg-0 ]
6154 saved-fp
6155 saved-r10
6156 saved-r11
6157 saved-r12
6158 saved-pr
6159 local-n
6160 ..
6161 local-1
6162 local-0 <- fp points here. */
6163
6164 /* Number of bytes pushed for anonymous args, used to pass information
6165 between expand_prologue and expand_epilogue. */
6166
6167 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6168 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6169 for an epilogue and a negative value means that it's for a sibcall
6170 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6171 all the registers that are about to be restored, and hence dead. */
6172
6173 static void
6174 output_stack_adjust (int size, rtx reg, int epilogue_p,
6175 HARD_REG_SET *live_regs_mask, bool frame_p)
6176 {
6177 rtx (*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6178 if (size)
6179 {
6180 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6181
6182 /* This test is bogus, as output_stack_adjust is used to re-align the
6183 stack. */
6184 #if 0
6185 gcc_assert (!(size % align));
6186 #endif
6187
6188 if (CONST_OK_FOR_ADD (size))
6189 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6190 /* Try to do it with two partial adjustments; however, we must make
6191 sure that the stack is properly aligned at all times, in case
6192 an interrupt occurs between the two partial adjustments. */
6193 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6194 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6195 {
6196 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6197 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6198 }
6199 else
6200 {
6201 rtx const_reg;
6202 rtx insn;
6203 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6204 int i;
6205
6206 /* If TEMP is invalid, we could temporarily save a general
6207 register to MACL. However, there is currently no need
6208 to handle this case, so just die when we see it. */
6209 if (epilogue_p < 0
6210 || current_function_interrupt
6211 || ! call_really_used_regs[temp] || fixed_regs[temp])
6212 temp = -1;
6213 if (temp < 0 && ! current_function_interrupt
6214 && (TARGET_SHMEDIA || epilogue_p >= 0))
6215 {
6216 HARD_REG_SET temps;
6217 COPY_HARD_REG_SET (temps, call_used_reg_set);
6218 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6219 if (epilogue_p > 0)
6220 {
6221 int nreg = 0;
6222 if (crtl->return_rtx)
6223 {
6224 enum machine_mode mode;
6225 mode = GET_MODE (crtl->return_rtx);
6226 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6227 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6228 }
6229 for (i = 0; i < nreg; i++)
6230 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6231 if (crtl->calls_eh_return)
6232 {
6233 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6234 for (i = 0; i <= 3; i++)
6235 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6236 }
6237 }
6238 if (TARGET_SHMEDIA && epilogue_p < 0)
6239 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
6240 CLEAR_HARD_REG_BIT (temps, i);
6241 if (epilogue_p <= 0)
6242 {
6243 for (i = FIRST_PARM_REG;
6244 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6245 CLEAR_HARD_REG_BIT (temps, i);
6246 if (cfun->static_chain_decl != NULL)
6247 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6248 }
6249 temp = scavenge_reg (&temps);
6250 }
6251 if (temp < 0 && live_regs_mask)
6252 {
6253 HARD_REG_SET temps;
6254
6255 COPY_HARD_REG_SET (temps, *live_regs_mask);
6256 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6257 temp = scavenge_reg (&temps);
6258 }
6259 if (temp < 0)
6260 {
6261 rtx adj_reg, tmp_reg, mem;
6262
6263 /* If we reached here, the most likely case is the (sibcall)
6264 epilogue for non SHmedia. Put a special push/pop sequence
6265 for such case as the last resort. This looks lengthy but
6266 would not be problem because it seems to be very
6267 rare. */
6268
6269 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
6270
6271
6272 /* ??? There is still the slight possibility that r4 or
6273 r5 have been reserved as fixed registers or assigned
6274 as global registers, and they change during an
6275 interrupt. There are possible ways to handle this:
6276
6277 - If we are adjusting the frame pointer (r14), we can do
6278 with a single temp register and an ordinary push / pop
6279 on the stack.
6280 - Grab any call-used or call-saved registers (i.e. not
6281 fixed or globals) for the temps we need. We might
6282 also grab r14 if we are adjusting the stack pointer.
6283 If we can't find enough available registers, issue
6284 a diagnostic and die - the user must have reserved
6285 way too many registers.
6286 But since all this is rather unlikely to happen and
6287 would require extra testing, we just die if r4 / r5
6288 are not available. */
6289 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6290 && !global_regs[4] && !global_regs[5]);
6291
6292 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6293 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6294 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6295 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6296 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6297 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6298 emit_move_insn (mem, tmp_reg);
6299 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6300 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6301 emit_move_insn (mem, tmp_reg);
6302 emit_move_insn (reg, adj_reg);
6303 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6304 emit_move_insn (adj_reg, mem);
6305 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6306 emit_move_insn (tmp_reg, mem);
6307 /* Tell flow the insns that pop r4/r5 aren't dead. */
6308 emit_use (tmp_reg);
6309 emit_use (adj_reg);
6310 return;
6311 }
6312 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6313
6314 /* If SIZE is negative, subtract the positive value.
6315 This sometimes allows a constant pool entry to be shared
6316 between prologue and epilogue code. */
6317 if (size < 0)
6318 {
6319 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6320 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6321 }
6322 else
6323 {
6324 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6325 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6326 }
6327 if (! epilogue_p)
6328 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6329 gen_rtx_SET (VOIDmode, reg,
6330 gen_rtx_PLUS (SImode, reg,
6331 GEN_INT (size))));
6332 }
6333 }
6334 }
6335
6336 static rtx
6337 frame_insn (rtx x)
6338 {
6339 x = emit_insn (x);
6340 RTX_FRAME_RELATED_P (x) = 1;
6341 return x;
6342 }
6343
6344 /* Output RTL to push register RN onto the stack. */
6345
6346 static rtx
6347 push (int rn)
6348 {
6349 rtx x;
6350 if (rn == FPUL_REG)
6351 x = gen_push_fpul ();
6352 else if (rn == FPSCR_REG)
6353 x = gen_push_fpscr ();
6354 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6355 && FP_OR_XD_REGISTER_P (rn))
6356 {
6357 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6358 return NULL_RTX;
6359 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6360 }
6361 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6362 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6363 else
6364 x = gen_push (gen_rtx_REG (SImode, rn));
6365
6366 x = frame_insn (x);
6367 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6368 return x;
6369 }
6370
6371 /* Output RTL to pop register RN from the stack. */
6372
6373 static void
6374 pop (int rn)
6375 {
6376 rtx x;
6377 if (rn == FPUL_REG)
6378 x = gen_pop_fpul ();
6379 else if (rn == FPSCR_REG)
6380 x = gen_pop_fpscr ();
6381 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6382 && FP_OR_XD_REGISTER_P (rn))
6383 {
6384 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6385 return;
6386 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6387 }
6388 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6389 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6390 else
6391 x = gen_pop (gen_rtx_REG (SImode, rn));
6392
6393 x = emit_insn (x);
6394 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6395 }
6396
6397 /* Generate code to push the regs specified in the mask. */
6398
6399 static void
6400 push_regs (HARD_REG_SET *mask, int interrupt_handler)
6401 {
6402 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
6403 int skip_fpscr = 0;
6404
6405 /* Push PR last; this gives better latencies after the prologue, and
6406 candidates for the return delay slot when there are no general
6407 registers pushed. */
6408 for (; i < FIRST_PSEUDO_REGISTER; i++)
6409 {
6410 /* If this is an interrupt handler, and the SZ bit varies,
6411 and we have to push any floating point register, we need
6412 to switch to the correct precision first. */
6413 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
6414 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
6415 {
6416 HARD_REG_SET unsaved;
6417
6418 push (FPSCR_REG);
6419 COMPL_HARD_REG_SET (unsaved, *mask);
6420 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
6421 skip_fpscr = 1;
6422 }
6423 if (i != PR_REG
6424 && (i != FPSCR_REG || ! skip_fpscr)
6425 && TEST_HARD_REG_BIT (*mask, i))
6426 {
6427 /* If the ISR has RESBANK attribute assigned, don't push any of
6428 the following registers - R0-R14, MACH, MACL and GBR. */
6429 if (! (sh_cfun_resbank_handler_p ()
6430 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
6431 || i == MACH_REG
6432 || i == MACL_REG
6433 || i == GBR_REG)))
6434 push (i);
6435 }
6436 }
6437
6438 /* Push banked registers last to improve delay slot opportunities. */
6439 if (interrupt_handler)
6440 {
6441 bool use_movml = false;
6442
6443 if (TARGET_SH2A)
6444 {
6445 unsigned int count = 0;
6446
6447 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6448 if (TEST_HARD_REG_BIT (*mask, i))
6449 count++;
6450 else
6451 break;
6452
6453 /* Use movml when all banked registers are pushed. */
6454 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
6455 use_movml = true;
6456 }
6457
6458 if (use_movml)
6459 {
6460 rtx x, mem, reg, set;
6461 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
6462
6463 /* We must avoid scheduling multiple store insn with another
6464 insns. */
6465 emit_insn (gen_blockage ());
6466 x = gen_movml_push_banked (sp_reg);
6467 x = frame_insn (x);
6468 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6469 {
6470 mem = gen_rtx_MEM (SImode, plus_constant (sp_reg, i * 4));
6471 reg = gen_rtx_REG (SImode, i);
6472 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (SImode, mem, reg));
6473 }
6474
6475 set = gen_rtx_SET (SImode, sp_reg, plus_constant (sp_reg, - 32));
6476 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
6477 emit_insn (gen_blockage ());
6478 }
6479 else
6480 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6481 if (TEST_HARD_REG_BIT (*mask, i))
6482 push (i);
6483 }
6484
6485 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
6486 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
6487 push (PR_REG);
6488 }
6489
6490 /* Calculate how much extra space is needed to save all callee-saved
6491 target registers.
6492 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6493
6494 static int
6495 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
6496 {
6497 int reg;
6498 int stack_space = 0;
6499 int interrupt_handler = sh_cfun_interrupt_handler_p ();
6500
6501 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6502 if ((! call_really_used_regs[reg] || interrupt_handler)
6503 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6504 /* Leave space to save this target register on the stack,
6505 in case target register allocation wants to use it. */
6506 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6507 return stack_space;
6508 }
6509
6510 /* Decide whether we should reserve space for callee-save target registers,
6511 in case target register allocation wants to use them. REGS_SAVED is
6512 the space, in bytes, that is already required for register saves.
6513 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6514
6515 static int
6516 shmedia_reserve_space_for_target_registers_p (int regs_saved,
6517 HARD_REG_SET *live_regs_mask)
6518 {
6519 if (optimize_size)
6520 return 0;
6521 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
6522 }
6523
6524 /* Decide how much space to reserve for callee-save target registers
6525 in case target register allocation wants to use them.
6526 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6527
6528 static int
6529 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
6530 {
6531 if (shmedia_space_reserved_for_target_registers)
6532 return shmedia_target_regs_stack_space (live_regs_mask);
6533 else
6534 return 0;
6535 }
6536
6537 /* Work out the registers which need to be saved, both as a mask and a
6538 count of saved words. Return the count.
6539
6540 If doing a pragma interrupt function, then push all regs used by the
6541 function, and if we call another function (we can tell by looking at PR),
6542 make sure that all the regs it clobbers are safe too. */
6543
6544 static int
6545 calc_live_regs (HARD_REG_SET *live_regs_mask)
6546 {
6547 unsigned int reg;
6548 int count;
6549 tree attrs;
6550 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
6551 bool nosave_low_regs;
6552 int pr_live, has_call;
6553
6554 attrs = DECL_ATTRIBUTES (current_function_decl);
6555 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
6556 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
6557 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
6558 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
6559
6560 CLEAR_HARD_REG_SET (*live_regs_mask);
6561 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
6562 && df_regs_ever_live_p (FPSCR_REG))
6563 target_flags &= ~MASK_FPU_SINGLE;
6564 /* If we can save a lot of saves by switching to double mode, do that. */
6565 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
6566 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
6567 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
6568 && (! call_really_used_regs[reg]
6569 || interrupt_handler)
6570 && ++count > 2)
6571 {
6572 target_flags &= ~MASK_FPU_SINGLE;
6573 break;
6574 }
6575 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
6576 knows how to use it. That means the pseudo originally allocated for
6577 the initial value can become the PR_MEDIA_REG hard register, as seen for
6578 execute/20010122-1.c:test9. */
6579 if (TARGET_SHMEDIA)
6580 /* ??? this function is called from initial_elimination_offset, hence we
6581 can't use the result of sh_media_register_for_return here. */
6582 pr_live = sh_pr_n_sets ();
6583 else
6584 {
6585 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
6586 pr_live = (pr_initial
6587 ? (!REG_P (pr_initial)
6588 || REGNO (pr_initial) != (PR_REG))
6589 : df_regs_ever_live_p (PR_REG));
6590 /* For Shcompact, if not optimizing, we end up with a memory reference
6591 using the return address pointer for __builtin_return_address even
6592 though there is no actual need to put the PR register on the stack. */
6593 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
6594 }
6595 /* Force PR to be live if the prologue has to call the SHmedia
6596 argument decoder or register saver. */
6597 if (TARGET_SHCOMPACT
6598 && ((crtl->args.info.call_cookie
6599 & ~ CALL_COOKIE_RET_TRAMP (1))
6600 || crtl->saves_all_registers))
6601 pr_live = 1;
6602 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
6603 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
6604 {
6605 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
6606 ? pr_live
6607 : interrupt_handler
6608 ? (/* Need to save all the regs ever live. */
6609 (df_regs_ever_live_p (reg)
6610 || (call_really_used_regs[reg]
6611 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
6612 || reg == PIC_OFFSET_TABLE_REGNUM)
6613 && has_call)
6614 || (TARGET_SHMEDIA && has_call
6615 && REGISTER_NATURAL_MODE (reg) == SImode
6616 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
6617 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
6618 && reg != RETURN_ADDRESS_POINTER_REGNUM
6619 && reg != T_REG && reg != GBR_REG
6620 /* Push fpscr only on targets which have FPU */
6621 && (reg != FPSCR_REG || TARGET_FPU_ANY))
6622 : (/* Only push those regs which are used and need to be saved. */
6623 (TARGET_SHCOMPACT
6624 && flag_pic
6625 && crtl->args.info.call_cookie
6626 && reg == PIC_OFFSET_TABLE_REGNUM)
6627 || (df_regs_ever_live_p (reg)
6628 && ((!call_really_used_regs[reg]
6629 && !(reg != PIC_OFFSET_TABLE_REGNUM
6630 && fixed_regs[reg] && call_used_regs[reg]))
6631 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
6632 || (crtl->calls_eh_return
6633 && (reg == EH_RETURN_DATA_REGNO (0)
6634 || reg == EH_RETURN_DATA_REGNO (1)
6635 || reg == EH_RETURN_DATA_REGNO (2)
6636 || reg == EH_RETURN_DATA_REGNO (3)))
6637 || ((reg == MACL_REG || reg == MACH_REG)
6638 && df_regs_ever_live_p (reg)
6639 && sh_cfun_attr_renesas_p ())
6640 ))
6641 {
6642 SET_HARD_REG_BIT (*live_regs_mask, reg);
6643 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6644
6645 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
6646 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
6647 {
6648 if (FP_REGISTER_P (reg))
6649 {
6650 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
6651 {
6652 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
6653 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
6654 }
6655 }
6656 else if (XD_REGISTER_P (reg))
6657 {
6658 /* Must switch to double mode to access these registers. */
6659 target_flags &= ~MASK_FPU_SINGLE;
6660 }
6661 }
6662 }
6663 if (nosave_low_regs && reg == R8_REG)
6664 break;
6665 }
6666 /* If we have a target register optimization pass after prologue / epilogue
6667 threading, we need to assume all target registers will be live even if
6668 they aren't now. */
6669 if (flag_branch_target_load_optimize2
6670 && TARGET_SAVE_ALL_TARGET_REGS
6671 && shmedia_space_reserved_for_target_registers)
6672 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6673 if ((! call_really_used_regs[reg] || interrupt_handler)
6674 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6675 {
6676 SET_HARD_REG_BIT (*live_regs_mask, reg);
6677 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6678 }
6679 /* If this is an interrupt handler, we don't have any call-clobbered
6680 registers we can conveniently use for target register save/restore.
6681 Make sure we save at least one general purpose register when we need
6682 to save target registers. */
6683 if (interrupt_handler
6684 && hard_reg_set_intersect_p (*live_regs_mask,
6685 reg_class_contents[TARGET_REGS])
6686 && ! hard_reg_set_intersect_p (*live_regs_mask,
6687 reg_class_contents[GENERAL_REGS]))
6688 {
6689 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
6690 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
6691 }
6692
6693 return count;
6694 }
6695
6696 /* Code to generate prologue and epilogue sequences */
6697
6698 /* PUSHED is the number of bytes that are being pushed on the
6699 stack for register saves. Return the frame size, padded
6700 appropriately so that the stack stays properly aligned. */
6701 static HOST_WIDE_INT
6702 rounded_frame_size (int pushed)
6703 {
6704 HOST_WIDE_INT size = get_frame_size ();
6705 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6706
6707 if (ACCUMULATE_OUTGOING_ARGS)
6708 size += crtl->outgoing_args_size;
6709
6710 return ((size + pushed + align - 1) & -align) - pushed;
6711 }
6712
6713 /* Choose a call-clobbered target-branch register that remains
6714 unchanged along the whole function. We set it up as the return
6715 value in the prologue. */
6716 int
6717 sh_media_register_for_return (void)
6718 {
6719 int regno;
6720 int tr0_used;
6721
6722 if (! current_function_is_leaf)
6723 return -1;
6724 if (lookup_attribute ("interrupt_handler",
6725 DECL_ATTRIBUTES (current_function_decl)))
6726 return -1;
6727 if (sh_cfun_interrupt_handler_p ())
6728 return -1;
6729
6730 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
6731
6732 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
6733 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
6734 return regno;
6735
6736 return -1;
6737 }
6738
6739 /* The maximum registers we need to save are:
6740 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
6741 - 32 floating point registers (for each pair, we save none,
6742 one single precision value, or a double precision value).
6743 - 8 target registers
6744 - add 1 entry for a delimiter. */
6745 #define MAX_SAVED_REGS (62+32+8)
6746
6747 typedef struct save_entry_s
6748 {
6749 unsigned char reg;
6750 unsigned char mode;
6751 short offset;
6752 } save_entry;
6753
6754 #define MAX_TEMPS 4
6755
6756 /* There will be a delimiter entry with VOIDmode both at the start and the
6757 end of a filled in schedule. The end delimiter has the offset of the
6758 save with the smallest (i.e. most negative) offset. */
6759 typedef struct save_schedule_s
6760 {
6761 save_entry entries[MAX_SAVED_REGS + 2];
6762 int temps[MAX_TEMPS+1];
6763 } save_schedule;
6764
6765 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
6766 use reverse order. Returns the last entry written to (not counting
6767 the delimiter). OFFSET_BASE is a number to be added to all offset
6768 entries. */
6769
6770 static save_entry *
6771 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
6772 int offset_base)
6773 {
6774 int align, i;
6775 save_entry *entry = schedule->entries;
6776 int tmpx = 0;
6777 int offset;
6778
6779 if (! current_function_interrupt)
6780 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
6781 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
6782 && ! FUNCTION_ARG_REGNO_P (i)
6783 && i != FIRST_RET_REG
6784 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
6785 && ! (crtl->calls_eh_return
6786 && (i == EH_RETURN_STACKADJ_REGNO
6787 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
6788 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
6789 schedule->temps[tmpx++] = i;
6790 entry->reg = -1;
6791 entry->mode = VOIDmode;
6792 entry->offset = offset_base;
6793 entry++;
6794 /* We loop twice: first, we save 8-byte aligned registers in the
6795 higher addresses, that are known to be aligned. Then, we
6796 proceed to saving 32-bit registers that don't need 8-byte
6797 alignment.
6798 If this is an interrupt function, all registers that need saving
6799 need to be saved in full. moreover, we need to postpone saving
6800 target registers till we have saved some general purpose registers
6801 we can then use as scratch registers. */
6802 offset = offset_base;
6803 for (align = 1; align >= 0; align--)
6804 {
6805 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6806 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6807 {
6808 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6809 int reg = i;
6810
6811 if (current_function_interrupt)
6812 {
6813 if (TARGET_REGISTER_P (i))
6814 continue;
6815 if (GENERAL_REGISTER_P (i))
6816 mode = DImode;
6817 }
6818 if (mode == SFmode && (i % 2) == 1
6819 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6820 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
6821 {
6822 mode = DFmode;
6823 i--;
6824 reg--;
6825 }
6826
6827 /* If we're doing the aligned pass and this is not aligned,
6828 or we're doing the unaligned pass and this is aligned,
6829 skip it. */
6830 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
6831 != align)
6832 continue;
6833
6834 if (current_function_interrupt
6835 && GENERAL_REGISTER_P (i)
6836 && tmpx < MAX_TEMPS)
6837 schedule->temps[tmpx++] = i;
6838
6839 offset -= GET_MODE_SIZE (mode);
6840 entry->reg = i;
6841 entry->mode = mode;
6842 entry->offset = offset;
6843 entry++;
6844 }
6845 if (align && current_function_interrupt)
6846 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
6847 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6848 {
6849 offset -= GET_MODE_SIZE (DImode);
6850 entry->reg = i;
6851 entry->mode = DImode;
6852 entry->offset = offset;
6853 entry++;
6854 }
6855 }
6856 entry->reg = -1;
6857 entry->mode = VOIDmode;
6858 entry->offset = offset;
6859 schedule->temps[tmpx] = -1;
6860 return entry - 1;
6861 }
6862
6863 void
6864 sh_expand_prologue (void)
6865 {
6866 HARD_REG_SET live_regs_mask;
6867 int d, i;
6868 int d_rounding = 0;
6869 int save_flags = target_flags;
6870 int pretend_args;
6871 int stack_usage;
6872 tree sp_switch_attr
6873 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
6874
6875 current_function_interrupt = sh_cfun_interrupt_handler_p ();
6876
6877 /* We have pretend args if we had an object sent partially in registers
6878 and partially on the stack, e.g. a large structure. */
6879 pretend_args = crtl->args.pretend_args_size;
6880 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
6881 && (NPARM_REGS(SImode)
6882 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
6883 pretend_args = 0;
6884 /* Dwarf2 module doesn't expect frame related insns here. */
6885 output_stack_adjust (-pretend_args
6886 - crtl->args.info.stack_regs * 8,
6887 stack_pointer_rtx, 0, NULL, false);
6888 stack_usage = pretend_args + crtl->args.info.stack_regs * 8;
6889
6890 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
6891 /* We're going to use the PIC register to load the address of the
6892 incoming-argument decoder and/or of the return trampoline from
6893 the GOT, so make sure the PIC register is preserved and
6894 initialized. */
6895 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
6896
6897 if (TARGET_SHCOMPACT
6898 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6899 {
6900 int reg;
6901
6902 /* First, make all registers with incoming arguments that will
6903 be pushed onto the stack live, so that register renaming
6904 doesn't overwrite them. */
6905 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
6906 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
6907 >= NPARM_REGS (SImode) - reg)
6908 for (; reg < NPARM_REGS (SImode); reg++)
6909 emit_insn (gen_shcompact_preserve_incoming_args
6910 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6911 else if (CALL_COOKIE_INT_REG_GET
6912 (crtl->args.info.call_cookie, reg) == 1)
6913 emit_insn (gen_shcompact_preserve_incoming_args
6914 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6915
6916 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
6917 stack_pointer_rtx);
6918 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
6919 GEN_INT (crtl->args.info.call_cookie));
6920 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
6921 gen_rtx_REG (SImode, R0_REG));
6922 }
6923 else if (TARGET_SHMEDIA)
6924 {
6925 int tr = sh_media_register_for_return ();
6926
6927 if (tr >= 0)
6928 emit_move_insn (gen_rtx_REG (DImode, tr),
6929 gen_rtx_REG (DImode, PR_MEDIA_REG));
6930 }
6931
6932 /* Emit the code for SETUP_VARARGS. */
6933 if (cfun->stdarg)
6934 {
6935 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6936 {
6937 /* Push arg regs as if they'd been provided by caller in stack. */
6938 for (i = 0; i < NPARM_REGS(SImode); i++)
6939 {
6940 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
6941
6942 if (i >= (NPARM_REGS(SImode)
6943 - crtl->args.info.arg_count[(int) SH_ARG_INT]
6944 ))
6945 break;
6946 push (rn);
6947 stack_usage += GET_MODE_SIZE (SImode);
6948 }
6949 }
6950 }
6951
6952 /* If we're supposed to switch stacks at function entry, do so now. */
6953 if (sp_switch_attr)
6954 {
6955 rtx lab, newsrc;
6956 /* The argument specifies a variable holding the address of the
6957 stack the interrupt function should switch to/from at entry/exit. */
6958 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
6959 const char *s
6960 = ggc_strdup (TREE_STRING_POINTER (arg));
6961 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
6962
6963 lab = add_constant (sp_switch, SImode, 0);
6964 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6965 newsrc = gen_const_mem (SImode, newsrc);
6966
6967 emit_insn (gen_sp_switch_1 (newsrc));
6968 }
6969
6970 d = calc_live_regs (&live_regs_mask);
6971 /* ??? Maybe we could save some switching if we can move a mode switch
6972 that already happens to be at the function start into the prologue. */
6973 if (target_flags != save_flags && ! current_function_interrupt)
6974 emit_insn (gen_toggle_sz ());
6975
6976 if (TARGET_SH5)
6977 {
6978 int offset_base, offset;
6979 rtx r0 = NULL_RTX;
6980 int offset_in_r0 = -1;
6981 int sp_in_r0 = 0;
6982 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6983 int total_size, save_size;
6984 save_schedule schedule;
6985 save_entry *entry;
6986 int *tmp_pnt;
6987
6988 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
6989 && ! current_function_interrupt)
6990 r0 = gen_rtx_REG (Pmode, R0_REG);
6991
6992 /* D is the actual number of bytes that we need for saving registers,
6993 however, in initial_elimination_offset we have committed to using
6994 an additional TREGS_SPACE amount of bytes - in order to keep both
6995 addresses to arguments supplied by the caller and local variables
6996 valid, we must keep this gap. Place it between the incoming
6997 arguments and the actually saved registers in a bid to optimize
6998 locality of reference. */
6999 total_size = d + tregs_space;
7000 total_size += rounded_frame_size (total_size);
7001 save_size = total_size - rounded_frame_size (d);
7002 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
7003 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7004 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
7005
7006 /* If adjusting the stack in a single step costs nothing extra, do so.
7007 I.e. either if a single addi is enough, or we need a movi anyway,
7008 and we don't exceed the maximum offset range (the test for the
7009 latter is conservative for simplicity). */
7010 if (TARGET_SHMEDIA
7011 && (CONST_OK_FOR_I10 (-total_size)
7012 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
7013 && total_size <= 2044)))
7014 d_rounding = total_size - save_size;
7015
7016 offset_base = d + d_rounding;
7017
7018 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
7019 0, NULL, true);
7020 stack_usage += save_size + d_rounding;
7021
7022 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
7023 tmp_pnt = schedule.temps;
7024 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7025 {
7026 enum machine_mode mode = (enum machine_mode) entry->mode;
7027 unsigned int reg = entry->reg;
7028 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
7029 rtx orig_reg_rtx;
7030
7031 offset = entry->offset;
7032
7033 reg_rtx = gen_rtx_REG (mode, reg);
7034
7035 mem_rtx = gen_frame_mem (mode,
7036 gen_rtx_PLUS (Pmode,
7037 stack_pointer_rtx,
7038 GEN_INT (offset)));
7039
7040 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7041 {
7042 gcc_assert (r0);
7043 mem_rtx = NULL_RTX;
7044 }
7045
7046 if (HAVE_PRE_DECREMENT
7047 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
7048 || mem_rtx == NULL_RTX
7049 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7050 {
7051 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
7052
7053 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
7054 pre_dec = NULL_RTX;
7055 else
7056 {
7057 mem_rtx = NULL_RTX;
7058 offset += GET_MODE_SIZE (mode);
7059 }
7060 }
7061
7062 if (mem_rtx != NULL_RTX)
7063 goto addr_ok;
7064
7065 if (offset_in_r0 == -1)
7066 {
7067 emit_move_insn (r0, GEN_INT (offset));
7068 offset_in_r0 = offset;
7069 }
7070 else if (offset != offset_in_r0)
7071 {
7072 emit_move_insn (r0,
7073 gen_rtx_PLUS
7074 (Pmode, r0,
7075 GEN_INT (offset - offset_in_r0)));
7076 offset_in_r0 += offset - offset_in_r0;
7077 }
7078
7079 if (pre_dec != NULL_RTX)
7080 {
7081 if (! sp_in_r0)
7082 {
7083 emit_move_insn (r0,
7084 gen_rtx_PLUS
7085 (Pmode, r0, stack_pointer_rtx));
7086 sp_in_r0 = 1;
7087 }
7088
7089 offset -= GET_MODE_SIZE (mode);
7090 offset_in_r0 -= GET_MODE_SIZE (mode);
7091
7092 mem_rtx = pre_dec;
7093 }
7094 else if (sp_in_r0)
7095 mem_rtx = gen_frame_mem (mode, r0);
7096 else
7097 mem_rtx = gen_frame_mem (mode,
7098 gen_rtx_PLUS (Pmode,
7099 stack_pointer_rtx,
7100 r0));
7101
7102 /* We must not use an r0-based address for target-branch
7103 registers or for special registers without pre-dec
7104 memory addresses, since we store their values in r0
7105 first. */
7106 gcc_assert (!TARGET_REGISTER_P (reg)
7107 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7108 || mem_rtx == pre_dec));
7109
7110 addr_ok:
7111 orig_reg_rtx = reg_rtx;
7112 if (TARGET_REGISTER_P (reg)
7113 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7114 && mem_rtx != pre_dec))
7115 {
7116 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
7117
7118 emit_move_insn (tmp_reg, reg_rtx);
7119
7120 if (REGNO (tmp_reg) == R0_REG)
7121 {
7122 offset_in_r0 = -1;
7123 sp_in_r0 = 0;
7124 gcc_assert (!refers_to_regno_p
7125 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
7126 }
7127
7128 if (*++tmp_pnt <= 0)
7129 tmp_pnt = schedule.temps;
7130
7131 reg_rtx = tmp_reg;
7132 }
7133 {
7134 rtx insn;
7135
7136 /* Mark as interesting for dwarf cfi generator */
7137 insn = emit_move_insn (mem_rtx, reg_rtx);
7138 RTX_FRAME_RELATED_P (insn) = 1;
7139 /* If we use an intermediate register for the save, we can't
7140 describe this exactly in cfi as a copy of the to-be-saved
7141 register into the temporary register and then the temporary
7142 register on the stack, because the temporary register can
7143 have a different natural size than the to-be-saved register.
7144 Thus, we gloss over the intermediate copy and pretend we do
7145 a direct save from the to-be-saved register. */
7146 if (REGNO (reg_rtx) != reg)
7147 {
7148 rtx set;
7149
7150 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
7151 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7152 }
7153
7154 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
7155 {
7156 rtx reg_rtx = gen_rtx_REG (mode, reg);
7157 rtx set;
7158 rtx mem_rtx = gen_frame_mem (mode,
7159 gen_rtx_PLUS (Pmode,
7160 stack_pointer_rtx,
7161 GEN_INT (offset)));
7162
7163 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
7164 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7165 }
7166 }
7167 }
7168
7169 gcc_assert (entry->offset == d_rounding);
7170 }
7171 else
7172 {
7173 push_regs (&live_regs_mask, current_function_interrupt);
7174 stack_usage += d;
7175 }
7176
7177 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7178 emit_insn (gen_GOTaddr2picreg ());
7179
7180 if (SHMEDIA_REGS_STACK_ADJUST ())
7181 {
7182 /* This must NOT go through the PLT, otherwise mach and macl
7183 may be clobbered. */
7184 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7185 (TARGET_FPU_ANY
7186 ? "__GCC_push_shmedia_regs"
7187 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7188 emit_insn (gen_shmedia_save_restore_regs_compact
7189 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7190 }
7191
7192 if (target_flags != save_flags && ! current_function_interrupt)
7193 emit_insn (gen_toggle_sz ());
7194
7195 target_flags = save_flags;
7196
7197 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7198 stack_pointer_rtx, 0, NULL, true);
7199 stack_usage += rounded_frame_size (d) - d_rounding;
7200
7201 if (frame_pointer_needed)
7202 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7203
7204 if (TARGET_SHCOMPACT
7205 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7206 {
7207 /* This must NOT go through the PLT, otherwise mach and macl
7208 may be clobbered. */
7209 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7210 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7211 emit_insn (gen_shcompact_incoming_args ());
7212 }
7213
7214 if (flag_stack_usage)
7215 current_function_static_stack_size = stack_usage;
7216 }
7217
7218 void
7219 sh_expand_epilogue (bool sibcall_p)
7220 {
7221 HARD_REG_SET live_regs_mask;
7222 int d, i;
7223 int d_rounding = 0;
7224
7225 int save_flags = target_flags;
7226 int frame_size, save_size;
7227 int fpscr_deferred = 0;
7228 int e = sibcall_p ? -1 : 1;
7229
7230 d = calc_live_regs (&live_regs_mask);
7231
7232 save_size = d;
7233 frame_size = rounded_frame_size (d);
7234
7235 if (TARGET_SH5)
7236 {
7237 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7238 int total_size;
7239 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
7240 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7241 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
7242
7243 total_size = d + tregs_space;
7244 total_size += rounded_frame_size (total_size);
7245 save_size = total_size - frame_size;
7246
7247 /* If adjusting the stack in a single step costs nothing extra, do so.
7248 I.e. either if a single addi is enough, or we need a movi anyway,
7249 and we don't exceed the maximum offset range (the test for the
7250 latter is conservative for simplicity). */
7251 if (TARGET_SHMEDIA
7252 && ! frame_pointer_needed
7253 && (CONST_OK_FOR_I10 (total_size)
7254 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
7255 && total_size <= 2044)))
7256 d_rounding = frame_size;
7257
7258 frame_size -= d_rounding;
7259 }
7260
7261 if (frame_pointer_needed)
7262 {
7263 /* We must avoid scheduling the epilogue with previous basic blocks.
7264 See PR/18032 and PR/40313. */
7265 emit_insn (gen_blockage ());
7266 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7267 &live_regs_mask, false);
7268
7269 /* We must avoid moving the stack pointer adjustment past code
7270 which reads from the local frame, else an interrupt could
7271 occur after the SP adjustment and clobber data in the local
7272 frame. */
7273 emit_insn (gen_blockage ());
7274 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7275 }
7276 else if (frame_size)
7277 {
7278 /* We must avoid moving the stack pointer adjustment past code
7279 which reads from the local frame, else an interrupt could
7280 occur after the SP adjustment and clobber data in the local
7281 frame. */
7282 emit_insn (gen_blockage ());
7283 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7284 &live_regs_mask, false);
7285 }
7286
7287 if (SHMEDIA_REGS_STACK_ADJUST ())
7288 {
7289 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7290 (TARGET_FPU_ANY
7291 ? "__GCC_pop_shmedia_regs"
7292 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
7293 /* This must NOT go through the PLT, otherwise mach and macl
7294 may be clobbered. */
7295 emit_insn (gen_shmedia_save_restore_regs_compact
7296 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
7297 }
7298
7299 /* Pop all the registers. */
7300
7301 if (target_flags != save_flags && ! current_function_interrupt)
7302 emit_insn (gen_toggle_sz ());
7303 if (TARGET_SH5)
7304 {
7305 int offset_base, offset;
7306 int offset_in_r0 = -1;
7307 int sp_in_r0 = 0;
7308 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
7309 save_schedule schedule;
7310 save_entry *entry;
7311 int *tmp_pnt;
7312
7313 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
7314 offset_base = -entry[1].offset + d_rounding;
7315 tmp_pnt = schedule.temps;
7316 for (; entry->mode != VOIDmode; entry--)
7317 {
7318 enum machine_mode mode = (enum machine_mode) entry->mode;
7319 int reg = entry->reg;
7320 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX;
7321
7322 offset = offset_base + entry->offset;
7323 reg_rtx = gen_rtx_REG (mode, reg);
7324
7325 mem_rtx = gen_frame_mem (mode,
7326 gen_rtx_PLUS (Pmode,
7327 stack_pointer_rtx,
7328 GEN_INT (offset)));
7329
7330 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7331 mem_rtx = NULL_RTX;
7332
7333 if (HAVE_POST_INCREMENT
7334 && (offset == offset_in_r0
7335 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
7336 && mem_rtx == NULL_RTX)
7337 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7338 {
7339 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
7340
7341 if (!memory_address_p (mode, XEXP (post_inc, 0)))
7342 post_inc = NULL_RTX;
7343 else
7344 mem_rtx = NULL_RTX;
7345 }
7346
7347 if (mem_rtx != NULL_RTX)
7348 goto addr_ok;
7349
7350 if (offset_in_r0 == -1)
7351 {
7352 emit_move_insn (r0, GEN_INT (offset));
7353 offset_in_r0 = offset;
7354 }
7355 else if (offset != offset_in_r0)
7356 {
7357 emit_move_insn (r0,
7358 gen_rtx_PLUS
7359 (Pmode, r0,
7360 GEN_INT (offset - offset_in_r0)));
7361 offset_in_r0 += offset - offset_in_r0;
7362 }
7363
7364 if (post_inc != NULL_RTX)
7365 {
7366 if (! sp_in_r0)
7367 {
7368 emit_move_insn (r0,
7369 gen_rtx_PLUS
7370 (Pmode, r0, stack_pointer_rtx));
7371 sp_in_r0 = 1;
7372 }
7373
7374 mem_rtx = post_inc;
7375
7376 offset_in_r0 += GET_MODE_SIZE (mode);
7377 }
7378 else if (sp_in_r0)
7379 mem_rtx = gen_frame_mem (mode, r0);
7380 else
7381 mem_rtx = gen_frame_mem (mode,
7382 gen_rtx_PLUS (Pmode,
7383 stack_pointer_rtx,
7384 r0));
7385
7386 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7387 || mem_rtx == post_inc);
7388
7389 addr_ok:
7390 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7391 && mem_rtx != post_inc)
7392 {
7393 emit_move_insn (r0, mem_rtx);
7394 mem_rtx = r0;
7395 }
7396 else if (TARGET_REGISTER_P (reg))
7397 {
7398 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
7399
7400 /* Give the scheduler a bit of freedom by using up to
7401 MAX_TEMPS registers in a round-robin fashion. */
7402 emit_move_insn (tmp_reg, mem_rtx);
7403 mem_rtx = tmp_reg;
7404 if (*++tmp_pnt < 0)
7405 tmp_pnt = schedule.temps;
7406 }
7407
7408 emit_move_insn (reg_rtx, mem_rtx);
7409 }
7410
7411 gcc_assert (entry->offset + offset_base == d + d_rounding);
7412 }
7413 else /* ! TARGET_SH5 */
7414 {
7415 int last_reg;
7416
7417 save_size = 0;
7418 /* For an ISR with RESBANK attribute assigned, don't pop PR
7419 register. */
7420 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
7421 && !sh_cfun_resbank_handler_p ())
7422 {
7423 if (!frame_pointer_needed)
7424 emit_insn (gen_blockage ());
7425 pop (PR_REG);
7426 }
7427
7428 /* Banked registers are popped first to avoid being scheduled in the
7429 delay slot. RTE switches banks before the ds instruction. */
7430 if (current_function_interrupt)
7431 {
7432 bool use_movml = false;
7433
7434 if (TARGET_SH2A)
7435 {
7436 unsigned int count = 0;
7437
7438 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7439 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7440 count++;
7441 else
7442 break;
7443
7444 /* Use movml when all banked register are poped. */
7445 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7446 use_movml = true;
7447 }
7448
7449 if (use_movml)
7450 {
7451 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7452
7453 /* We must avoid scheduling multiple load insn with another
7454 insns. */
7455 emit_insn (gen_blockage ());
7456 emit_insn (gen_movml_pop_banked (sp_reg));
7457 emit_insn (gen_blockage ());
7458 }
7459 else
7460 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
7461 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7462 pop (i);
7463
7464 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
7465 }
7466 else
7467 last_reg = FIRST_PSEUDO_REGISTER;
7468
7469 for (i = 0; i < last_reg; i++)
7470 {
7471 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
7472
7473 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
7474 && hard_reg_set_intersect_p (live_regs_mask,
7475 reg_class_contents[DF_REGS]))
7476 fpscr_deferred = 1;
7477 /* For an ISR with RESBANK attribute assigned, don't pop
7478 following registers, R0-R14, MACH, MACL and GBR. */
7479 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
7480 && ! (sh_cfun_resbank_handler_p ()
7481 && ((j >= FIRST_GENERAL_REG
7482 && j < LAST_GENERAL_REG)
7483 || j == MACH_REG
7484 || j == MACL_REG
7485 || j == GBR_REG)))
7486 pop (j);
7487
7488 if (j == FIRST_FP_REG && fpscr_deferred)
7489 pop (FPSCR_REG);
7490 }
7491 }
7492 if (target_flags != save_flags && ! current_function_interrupt)
7493 emit_insn (gen_toggle_sz ());
7494 target_flags = save_flags;
7495
7496 output_stack_adjust (crtl->args.pretend_args_size
7497 + save_size + d_rounding
7498 + crtl->args.info.stack_regs * 8,
7499 stack_pointer_rtx, e, NULL, false);
7500
7501 if (crtl->calls_eh_return)
7502 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
7503 EH_RETURN_STACKADJ_RTX));
7504
7505 /* Switch back to the normal stack if necessary. */
7506 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
7507 emit_insn (gen_sp_switch_2 ());
7508
7509 /* Tell flow the insn that pops PR isn't dead. */
7510 /* PR_REG will never be live in SHmedia mode, and we don't need to
7511 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
7512 by the return pattern. */
7513 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7514 emit_use (gen_rtx_REG (SImode, PR_REG));
7515 }
7516
7517 static int sh_need_epilogue_known = 0;
7518
7519 int
7520 sh_need_epilogue (void)
7521 {
7522 if (! sh_need_epilogue_known)
7523 {
7524 rtx epilogue;
7525
7526 start_sequence ();
7527 sh_expand_epilogue (0);
7528 epilogue = get_insns ();
7529 end_sequence ();
7530 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
7531 }
7532 return sh_need_epilogue_known > 0;
7533 }
7534
7535 /* Emit code to change the current function's return address to RA.
7536 TEMP is available as a scratch register, if needed. */
7537
7538 void
7539 sh_set_return_address (rtx ra, rtx tmp)
7540 {
7541 HARD_REG_SET live_regs_mask;
7542 int d;
7543 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7544 int pr_offset;
7545
7546 d = calc_live_regs (&live_regs_mask);
7547
7548 /* If pr_reg isn't life, we can set it (or the register given in
7549 sh_media_register_for_return) directly. */
7550 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7551 {
7552 rtx rr;
7553
7554 if (TARGET_SHMEDIA)
7555 {
7556 int rr_regno = sh_media_register_for_return ();
7557
7558 if (rr_regno < 0)
7559 rr_regno = pr_reg;
7560
7561 rr = gen_rtx_REG (DImode, rr_regno);
7562 }
7563 else
7564 rr = gen_rtx_REG (SImode, pr_reg);
7565
7566 emit_insn (GEN_MOV (rr, ra));
7567 /* Tell flow the register for return isn't dead. */
7568 emit_use (rr);
7569 return;
7570 }
7571
7572 if (TARGET_SH5)
7573 {
7574 int offset;
7575 save_schedule schedule;
7576 save_entry *entry;
7577
7578 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
7579 offset = entry[1].offset;
7580 for (; entry->mode != VOIDmode; entry--)
7581 if (entry->reg == pr_reg)
7582 goto found;
7583
7584 /* We can't find pr register. */
7585 gcc_unreachable ();
7586
7587 found:
7588 offset = entry->offset - offset;
7589 pr_offset = (rounded_frame_size (d) + offset
7590 + SHMEDIA_REGS_STACK_ADJUST ());
7591 }
7592 else
7593 pr_offset = rounded_frame_size (d);
7594
7595 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
7596
7597 if (frame_pointer_needed)
7598 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
7599 else
7600 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
7601
7602 tmp = gen_frame_mem (Pmode, tmp);
7603 emit_insn (GEN_MOV (tmp, ra));
7604 /* Tell this store isn't dead. */
7605 emit_use (tmp);
7606 }
7607
7608 /* Clear variables at function end. */
7609
7610 static void
7611 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
7612 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
7613 {
7614 sh_need_epilogue_known = 0;
7615 }
7616
7617 static rtx
7618 sh_builtin_saveregs (void)
7619 {
7620 /* First unnamed integer register. */
7621 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
7622 /* Number of integer registers we need to save. */
7623 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
7624 /* First unnamed SFmode float reg */
7625 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
7626 /* Number of SFmode float regs to save. */
7627 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
7628 rtx regbuf, fpregs;
7629 int bufsize, regno;
7630 alias_set_type alias_set;
7631
7632 if (TARGET_SH5)
7633 {
7634 if (n_intregs)
7635 {
7636 int pushregs = n_intregs;
7637
7638 while (pushregs < NPARM_REGS (SImode) - 1
7639 && (CALL_COOKIE_INT_REG_GET
7640 (crtl->args.info.call_cookie,
7641 NPARM_REGS (SImode) - pushregs)
7642 == 1))
7643 {
7644 crtl->args.info.call_cookie
7645 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7646 - pushregs, 1);
7647 pushregs++;
7648 }
7649
7650 if (pushregs == NPARM_REGS (SImode))
7651 crtl->args.info.call_cookie
7652 |= (CALL_COOKIE_INT_REG (0, 1)
7653 | CALL_COOKIE_STACKSEQ (pushregs - 1));
7654 else
7655 crtl->args.info.call_cookie
7656 |= CALL_COOKIE_STACKSEQ (pushregs);
7657
7658 crtl->args.pretend_args_size += 8 * n_intregs;
7659 }
7660 if (TARGET_SHCOMPACT)
7661 return const0_rtx;
7662 }
7663
7664 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
7665 {
7666 error ("__builtin_saveregs not supported by this subtarget");
7667 return const0_rtx;
7668 }
7669
7670 if (TARGET_SHMEDIA)
7671 n_floatregs = 0;
7672
7673 /* Allocate block of memory for the regs. */
7674 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
7675 Or can assign_stack_local accept a 0 SIZE argument? */
7676 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
7677
7678 if (TARGET_SHMEDIA)
7679 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
7680 else if (n_floatregs & 1)
7681 {
7682 rtx addr;
7683
7684 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7685 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
7686 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
7687 regbuf = change_address (regbuf, BLKmode, addr);
7688 }
7689 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
7690 {
7691 rtx addr, mask;
7692
7693 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7694 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
7695 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
7696 emit_insn (gen_andsi3 (addr, addr, mask));
7697 regbuf = change_address (regbuf, BLKmode, addr);
7698 }
7699 else
7700 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7701 alias_set = get_varargs_alias_set ();
7702 set_mem_alias_set (regbuf, alias_set);
7703
7704 /* Save int args.
7705 This is optimized to only save the regs that are necessary. Explicitly
7706 named args need not be saved. */
7707 if (n_intregs > 0)
7708 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7709 adjust_address (regbuf, BLKmode,
7710 n_floatregs * UNITS_PER_WORD),
7711 n_intregs);
7712
7713 if (TARGET_SHMEDIA)
7714 /* Return the address of the regbuf. */
7715 return XEXP (regbuf, 0);
7716
7717 /* Save float args.
7718 This is optimized to only save the regs that are necessary. Explicitly
7719 named args need not be saved.
7720 We explicitly build a pointer to the buffer because it halves the insn
7721 count when not optimizing (otherwise the pointer is built for each reg
7722 saved).
7723 We emit the moves in reverse order so that we can use predecrement. */
7724
7725 fpregs = copy_to_mode_reg (Pmode,
7726 plus_constant (XEXP (regbuf, 0),
7727 n_floatregs * UNITS_PER_WORD));
7728 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7729 {
7730 rtx mem;
7731 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7732 {
7733 emit_insn (gen_addsi3 (fpregs, fpregs,
7734 GEN_INT (-2 * UNITS_PER_WORD)));
7735 mem = change_address (regbuf, DFmode, fpregs);
7736 emit_move_insn (mem,
7737 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7738 }
7739 regno = first_floatreg;
7740 if (regno & 1)
7741 {
7742 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7743 mem = change_address (regbuf, SFmode, fpregs);
7744 emit_move_insn (mem,
7745 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
7746 - (TARGET_LITTLE_ENDIAN != 0)));
7747 }
7748 }
7749 else
7750 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7751 {
7752 rtx mem;
7753
7754 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7755 mem = change_address (regbuf, SFmode, fpregs);
7756 emit_move_insn (mem,
7757 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7758 }
7759
7760 /* Return the address of the regbuf. */
7761 return XEXP (regbuf, 0);
7762 }
7763
7764 /* Define the `__builtin_va_list' type for the ABI. */
7765
7766 static tree
7767 sh_build_builtin_va_list (void)
7768 {
7769 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7770 tree record, type_decl;
7771
7772 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
7773 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7774 return ptr_type_node;
7775
7776 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7777 type_decl = build_decl (BUILTINS_LOCATION,
7778 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7779
7780 f_next_o = build_decl (BUILTINS_LOCATION,
7781 FIELD_DECL, get_identifier ("__va_next_o"),
7782 ptr_type_node);
7783 f_next_o_limit = build_decl (BUILTINS_LOCATION,
7784 FIELD_DECL,
7785 get_identifier ("__va_next_o_limit"),
7786 ptr_type_node);
7787 f_next_fp = build_decl (BUILTINS_LOCATION,
7788 FIELD_DECL, get_identifier ("__va_next_fp"),
7789 ptr_type_node);
7790 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
7791 FIELD_DECL,
7792 get_identifier ("__va_next_fp_limit"),
7793 ptr_type_node);
7794 f_next_stack = build_decl (BUILTINS_LOCATION,
7795 FIELD_DECL, get_identifier ("__va_next_stack"),
7796 ptr_type_node);
7797
7798 DECL_FIELD_CONTEXT (f_next_o) = record;
7799 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7800 DECL_FIELD_CONTEXT (f_next_fp) = record;
7801 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7802 DECL_FIELD_CONTEXT (f_next_stack) = record;
7803
7804 TREE_CHAIN (record) = type_decl;
7805 TYPE_NAME (record) = type_decl;
7806 TYPE_FIELDS (record) = f_next_o;
7807 DECL_CHAIN (f_next_o) = f_next_o_limit;
7808 DECL_CHAIN (f_next_o_limit) = f_next_fp;
7809 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
7810 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
7811
7812 layout_type (record);
7813
7814 return record;
7815 }
7816
7817 /* Implement `va_start' for varargs and stdarg. */
7818
7819 static void
7820 sh_va_start (tree valist, rtx nextarg)
7821 {
7822 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7823 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7824 tree t, u;
7825 int nfp, nint;
7826
7827 if (TARGET_SH5)
7828 {
7829 expand_builtin_saveregs ();
7830 std_expand_builtin_va_start (valist, nextarg);
7831 return;
7832 }
7833
7834 if ((! TARGET_SH2E && ! TARGET_SH4)
7835 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7836 {
7837 std_expand_builtin_va_start (valist, nextarg);
7838 return;
7839 }
7840
7841 f_next_o = TYPE_FIELDS (va_list_type_node);
7842 f_next_o_limit = DECL_CHAIN (f_next_o);
7843 f_next_fp = DECL_CHAIN (f_next_o_limit);
7844 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7845 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7846
7847 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7848 NULL_TREE);
7849 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7850 valist, f_next_o_limit, NULL_TREE);
7851 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7852 NULL_TREE);
7853 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7854 valist, f_next_fp_limit, NULL_TREE);
7855 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7856 valist, f_next_stack, NULL_TREE);
7857
7858 /* Call __builtin_saveregs. */
7859 u = make_tree (sizetype, expand_builtin_saveregs ());
7860 u = fold_convert (ptr_type_node, u);
7861 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
7862 TREE_SIDE_EFFECTS (t) = 1;
7863 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7864
7865 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7866 if (nfp < 8)
7867 nfp = 8 - nfp;
7868 else
7869 nfp = 0;
7870 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7871 size_int (UNITS_PER_WORD * nfp));
7872 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
7873 TREE_SIDE_EFFECTS (t) = 1;
7874 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7875
7876 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
7877 TREE_SIDE_EFFECTS (t) = 1;
7878 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7879
7880 nint = crtl->args.info.arg_count[SH_ARG_INT];
7881 if (nint < 4)
7882 nint = 4 - nint;
7883 else
7884 nint = 0;
7885 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7886 size_int (UNITS_PER_WORD * nint));
7887 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
7888 TREE_SIDE_EFFECTS (t) = 1;
7889 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7890
7891 u = make_tree (ptr_type_node, nextarg);
7892 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
7893 TREE_SIDE_EFFECTS (t) = 1;
7894 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7895 }
7896
7897 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7898 member, return it. */
7899 static tree
7900 find_sole_member (tree type)
7901 {
7902 tree field, member = NULL_TREE;
7903
7904 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7905 {
7906 if (TREE_CODE (field) != FIELD_DECL)
7907 continue;
7908 if (!DECL_SIZE (field))
7909 return NULL_TREE;
7910 if (integer_zerop (DECL_SIZE (field)))
7911 continue;
7912 if (member)
7913 return NULL_TREE;
7914 member = field;
7915 }
7916 return member;
7917 }
7918 /* Implement `va_arg'. */
7919
7920 static tree
7921 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7922 gimple_seq *post_p ATTRIBUTE_UNUSED)
7923 {
7924 HOST_WIDE_INT size, rsize;
7925 tree tmp, pptr_type_node;
7926 tree addr, lab_over = NULL, result = NULL;
7927 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7928 tree eff_type;
7929
7930 if (pass_by_ref)
7931 type = build_pointer_type (type);
7932
7933 size = int_size_in_bytes (type);
7934 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7935 pptr_type_node = build_pointer_type (ptr_type_node);
7936
7937 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
7938 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7939 {
7940 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7941 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7942 int pass_as_float;
7943 tree lab_false;
7944 tree member;
7945
7946 f_next_o = TYPE_FIELDS (va_list_type_node);
7947 f_next_o_limit = DECL_CHAIN (f_next_o);
7948 f_next_fp = DECL_CHAIN (f_next_o_limit);
7949 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7950 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7951
7952 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7953 NULL_TREE);
7954 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7955 valist, f_next_o_limit, NULL_TREE);
7956 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7957 valist, f_next_fp, NULL_TREE);
7958 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7959 valist, f_next_fp_limit, NULL_TREE);
7960 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7961 valist, f_next_stack, NULL_TREE);
7962
7963 /* Structures with a single member with a distinct mode are passed
7964 like their member. This is relevant if the latter has a REAL_TYPE
7965 or COMPLEX_TYPE type. */
7966 eff_type = type;
7967 while (TREE_CODE (eff_type) == RECORD_TYPE
7968 && (member = find_sole_member (eff_type))
7969 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7970 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7971 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7972 {
7973 tree field_type = TREE_TYPE (member);
7974
7975 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7976 eff_type = field_type;
7977 else
7978 {
7979 gcc_assert ((TYPE_ALIGN (eff_type)
7980 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7981 || (TYPE_ALIGN (eff_type)
7982 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7983 break;
7984 }
7985 }
7986
7987 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7988 {
7989 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7990 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7991 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7992 && size <= 16));
7993 }
7994 else
7995 {
7996 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7997 }
7998
7999 addr = create_tmp_var (pptr_type_node, NULL);
8000 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8001 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8002
8003 valist = build_simple_mem_ref (addr);
8004
8005 if (pass_as_float)
8006 {
8007 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
8008 tree cmp;
8009 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
8010
8011 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
8012 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8013
8014 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
8015 tmp = next_fp_limit;
8016 if (size > 4 && !is_double)
8017 tmp = build2 (POINTER_PLUS_EXPR, TREE_TYPE (tmp),
8018 unshare_expr (tmp), size_int (4 - size));
8019 tmp = build2 (GE_EXPR, boolean_type_node,
8020 unshare_expr (next_fp_tmp), unshare_expr (tmp));
8021 cmp = build3 (COND_EXPR, void_type_node, tmp,
8022 build1 (GOTO_EXPR, void_type_node,
8023 unshare_expr (lab_false)), NULL_TREE);
8024 if (!is_double)
8025 gimplify_and_add (cmp, pre_p);
8026
8027 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
8028 || (is_double || size == 16))
8029 {
8030 tmp = fold_convert (sizetype, next_fp_tmp);
8031 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
8032 size_int (UNITS_PER_WORD));
8033 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
8034 unshare_expr (next_fp_tmp), tmp);
8035 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
8036 }
8037 if (is_double)
8038 gimplify_and_add (cmp, pre_p);
8039
8040 #ifdef FUNCTION_ARG_SCmode_WART
8041 if (TYPE_MODE (eff_type) == SCmode
8042 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
8043 {
8044 tree subtype = TREE_TYPE (eff_type);
8045 tree real, imag;
8046
8047 imag
8048 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8049 imag = get_initialized_tmp_var (imag, pre_p, NULL);
8050
8051 real
8052 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8053 real = get_initialized_tmp_var (real, pre_p, NULL);
8054
8055 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
8056 if (type != eff_type)
8057 result = build1 (VIEW_CONVERT_EXPR, type, result);
8058 result = get_initialized_tmp_var (result, pre_p, NULL);
8059 }
8060 #endif /* FUNCTION_ARG_SCmode_WART */
8061
8062 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8063 gimplify_and_add (tmp, pre_p);
8064
8065 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8066 gimplify_and_add (tmp, pre_p);
8067
8068 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8069 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8070 gimplify_assign (unshare_expr (next_fp_tmp),
8071 unshare_expr (valist), pre_p);
8072
8073 gimplify_assign (unshare_expr (valist),
8074 unshare_expr (next_fp_tmp), post_p);
8075 valist = next_fp_tmp;
8076 }
8077 else
8078 {
8079 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
8080 unshare_expr (next_o), size_int (rsize));
8081 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
8082 unshare_expr (next_o_limit));
8083 tmp = build3 (COND_EXPR, void_type_node, tmp,
8084 build1 (GOTO_EXPR, void_type_node,
8085 unshare_expr (lab_false)),
8086 NULL_TREE);
8087 gimplify_and_add (tmp, pre_p);
8088
8089 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
8090 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8091
8092 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8093 gimplify_and_add (tmp, pre_p);
8094
8095 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8096 gimplify_and_add (tmp, pre_p);
8097
8098 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
8099 gimplify_assign (unshare_expr (next_o),
8100 unshare_expr (next_o_limit), pre_p);
8101
8102 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8103 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8104 }
8105
8106 if (!result)
8107 {
8108 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8109 gimplify_and_add (tmp, pre_p);
8110 }
8111 }
8112
8113 /* ??? In va-sh.h, there had been code to make values larger than
8114 size 8 indirect. This does not match the FUNCTION_ARG macros. */
8115
8116 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
8117 if (result)
8118 {
8119 gimplify_assign (result, tmp, pre_p);
8120 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
8121 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8122 gimplify_and_add (tmp, pre_p);
8123 }
8124 else
8125 result = tmp;
8126
8127 if (pass_by_ref)
8128 result = build_va_arg_indirect_ref (result);
8129
8130 return result;
8131 }
8132
8133 /* 64 bit floating points memory transfers are paired single precision loads
8134 or store. So DWARF information needs fixing in little endian (unless
8135 PR=SZ=1 in FPSCR). */
8136 rtx
8137 sh_dwarf_register_span (rtx reg)
8138 {
8139 unsigned regno = REGNO (reg);
8140
8141 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
8142 return NULL_RTX;
8143
8144 return
8145 gen_rtx_PARALLEL (VOIDmode,
8146 gen_rtvec (2,
8147 gen_rtx_REG (SFmode,
8148 DBX_REGISTER_NUMBER (regno+1)),
8149 gen_rtx_REG (SFmode,
8150 DBX_REGISTER_NUMBER (regno))));
8151 }
8152
8153 static enum machine_mode
8154 sh_promote_function_mode (const_tree type, enum machine_mode mode,
8155 int *punsignedp, const_tree funtype,
8156 int for_return ATTRIBUTE_UNUSED)
8157 {
8158 if (sh_promote_prototypes (funtype))
8159 return promote_mode (type, mode, punsignedp);
8160 else
8161 return mode;
8162 }
8163
8164 static bool
8165 sh_promote_prototypes (const_tree type)
8166 {
8167 if (TARGET_HITACHI)
8168 return 0;
8169 if (! type)
8170 return 1;
8171 return ! sh_attr_renesas_p (type);
8172 }
8173
8174 /* Whether an argument must be passed by reference. On SHcompact, we
8175 pretend arguments wider than 32-bits that would have been passed in
8176 registers are passed by reference, so that an SHmedia trampoline
8177 loads them into the full 64-bits registers. */
8178
8179 static int
8180 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
8181 const_tree type, bool named)
8182 {
8183 unsigned HOST_WIDE_INT size;
8184
8185 if (type)
8186 size = int_size_in_bytes (type);
8187 else
8188 size = GET_MODE_SIZE (mode);
8189
8190 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
8191 && (!named
8192 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
8193 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
8194 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
8195 && size > 4
8196 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
8197 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8198 return size;
8199 else
8200 return 0;
8201 }
8202
8203 static bool
8204 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8205 const_tree type, bool named)
8206 {
8207 if (targetm.calls.must_pass_in_stack (mode, type))
8208 return true;
8209
8210 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
8211 wants to know about pass-by-reference semantics for incoming
8212 arguments. */
8213 if (! cum)
8214 return false;
8215
8216 if (TARGET_SHCOMPACT)
8217 {
8218 cum->byref = shcompact_byref (cum, mode, type, named);
8219 return cum->byref != 0;
8220 }
8221
8222 return false;
8223 }
8224
8225 static bool
8226 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8227 const_tree type, bool named ATTRIBUTE_UNUSED)
8228 {
8229 /* ??? How can it possibly be correct to return true only on the
8230 caller side of the equation? Is there someplace else in the
8231 sh backend that's magically producing the copies? */
8232 return (cum->outgoing
8233 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
8234 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
8235 }
8236
8237 static int
8238 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8239 tree type, bool named ATTRIBUTE_UNUSED)
8240 {
8241 int words = 0;
8242
8243 if (!TARGET_SH5
8244 && PASS_IN_REG_P (*cum, mode, type)
8245 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
8246 && (ROUND_REG (*cum, mode)
8247 + (mode != BLKmode
8248 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
8249 : ROUND_ADVANCE (int_size_in_bytes (type)))
8250 > NPARM_REGS (mode)))
8251 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
8252
8253 else if (!TARGET_SHCOMPACT
8254 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8255 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
8256
8257 return words * UNITS_PER_WORD;
8258 }
8259
8260
8261 /* Define where to put the arguments to a function.
8262 Value is zero to push the argument on the stack,
8263 or a hard register in which to store the argument.
8264
8265 MODE is the argument's machine mode.
8266 TYPE is the data type of the argument (as a tree).
8267 This is null for libcalls where that information may
8268 not be available.
8269 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8270 the preceding args and about the function being called.
8271 NAMED is nonzero if this argument is a named parameter
8272 (otherwise it is an extra parameter matching an ellipsis).
8273
8274 On SH the first args are normally in registers
8275 and the rest are pushed. Any arg that starts within the first
8276 NPARM_REGS words is at least partially passed in a register unless
8277 its data type forbids. */
8278
8279 static rtx
8280 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8281 const_tree type, bool named)
8282 {
8283 if (! TARGET_SH5 && mode == VOIDmode)
8284 return GEN_INT (ca->renesas_abi ? 1 : 0);
8285
8286 if (! TARGET_SH5
8287 && PASS_IN_REG_P (*ca, mode, type)
8288 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8289 {
8290 int regno;
8291
8292 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8293 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
8294 {
8295 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8296 gen_rtx_REG (SFmode,
8297 BASE_ARG_REG (mode)
8298 + (ROUND_REG (*ca, mode) ^ 1)),
8299 const0_rtx);
8300 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8301 gen_rtx_REG (SFmode,
8302 BASE_ARG_REG (mode)
8303 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
8304 GEN_INT (4));
8305 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8306 }
8307
8308 /* If the alignment of a DF value causes an SF register to be
8309 skipped, we will use that skipped register for the next SF
8310 value. */
8311 if ((TARGET_HITACHI || ca->renesas_abi)
8312 && ca->free_single_fp_reg
8313 && mode == SFmode)
8314 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8315
8316 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
8317 ^ (mode == SFmode && TARGET_SH4
8318 && TARGET_LITTLE_ENDIAN != 0
8319 && ! TARGET_HITACHI && ! ca->renesas_abi);
8320 return gen_rtx_REG (mode, regno);
8321
8322 }
8323
8324 if (TARGET_SH5)
8325 {
8326 if (mode == VOIDmode && TARGET_SHCOMPACT)
8327 return GEN_INT (ca->call_cookie);
8328
8329 /* The following test assumes unnamed arguments are promoted to
8330 DFmode. */
8331 if (mode == SFmode && ca->free_single_fp_reg)
8332 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
8333
8334 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
8335 && (named || ! ca->prototype_p)
8336 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
8337 {
8338 if (! ca->prototype_p && TARGET_SHMEDIA)
8339 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
8340
8341 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
8342 FIRST_FP_PARM_REG
8343 + ca->arg_count[(int) SH_ARG_FLOAT]);
8344 }
8345
8346 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
8347 && (! TARGET_SHCOMPACT
8348 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
8349 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
8350 type, named))))
8351 {
8352 return gen_rtx_REG (mode, (FIRST_PARM_REG
8353 + ca->arg_count[(int) SH_ARG_INT]));
8354 }
8355
8356 return 0;
8357 }
8358
8359 return 0;
8360 }
8361
8362 /* Update the data in CUM to advance over an argument
8363 of mode MODE and data type TYPE.
8364 (TYPE is null for libcalls where that information may not be
8365 available.) */
8366
8367 static void
8368 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8369 const_tree type, bool named)
8370 {
8371 if (ca->force_mem)
8372 ca->force_mem = 0;
8373 else if (TARGET_SH5)
8374 {
8375 const_tree type2 = (ca->byref && type
8376 ? TREE_TYPE (type)
8377 : type);
8378 enum machine_mode mode2 = (ca->byref && type
8379 ? TYPE_MODE (type2)
8380 : mode);
8381 int dwords = ((ca->byref
8382 ? ca->byref
8383 : mode2 == BLKmode
8384 ? int_size_in_bytes (type2)
8385 : GET_MODE_SIZE (mode2)) + 7) / 8;
8386 int numregs = MIN (dwords, NPARM_REGS (SImode)
8387 - ca->arg_count[(int) SH_ARG_INT]);
8388
8389 if (numregs)
8390 {
8391 ca->arg_count[(int) SH_ARG_INT] += numregs;
8392 if (TARGET_SHCOMPACT
8393 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
8394 {
8395 ca->call_cookie
8396 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8397 - numregs, 1);
8398 /* N.B. We want this also for outgoing. */
8399 ca->stack_regs += numregs;
8400 }
8401 else if (ca->byref)
8402 {
8403 if (! ca->outgoing)
8404 ca->stack_regs += numregs;
8405 ca->byref_regs += numregs;
8406 ca->byref = 0;
8407 do
8408 ca->call_cookie
8409 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8410 - numregs, 2);
8411 while (--numregs);
8412 ca->call_cookie
8413 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8414 - 1, 1);
8415 }
8416 else if (dwords > numregs)
8417 {
8418 int pushregs = numregs;
8419
8420 if (TARGET_SHCOMPACT)
8421 ca->stack_regs += numregs;
8422 while (pushregs < NPARM_REGS (SImode) - 1
8423 && (CALL_COOKIE_INT_REG_GET
8424 (ca->call_cookie,
8425 NPARM_REGS (SImode) - pushregs)
8426 == 1))
8427 {
8428 ca->call_cookie
8429 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8430 - pushregs, 1);
8431 pushregs++;
8432 }
8433 if (numregs == NPARM_REGS (SImode))
8434 ca->call_cookie
8435 |= CALL_COOKIE_INT_REG (0, 1)
8436 | CALL_COOKIE_STACKSEQ (numregs - 1);
8437 else
8438 ca->call_cookie
8439 |= CALL_COOKIE_STACKSEQ (numregs);
8440 }
8441 }
8442 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
8443 && (named || ! ca->prototype_p))
8444 {
8445 if (mode2 == SFmode && ca->free_single_fp_reg)
8446 ca->free_single_fp_reg = 0;
8447 else if (ca->arg_count[(int) SH_ARG_FLOAT]
8448 < NPARM_REGS (SFmode))
8449 {
8450 int numfpregs
8451 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
8452 NPARM_REGS (SFmode)
8453 - ca->arg_count[(int) SH_ARG_FLOAT]);
8454
8455 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
8456
8457 if (TARGET_SHCOMPACT && ! ca->prototype_p)
8458 {
8459 if (ca->outgoing && numregs > 0)
8460 do
8461 {
8462 ca->call_cookie
8463 |= (CALL_COOKIE_INT_REG
8464 (ca->arg_count[(int) SH_ARG_INT]
8465 - numregs + ((numfpregs - 2) / 2),
8466 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
8467 - numfpregs) / 2));
8468 }
8469 while (numfpregs -= 2);
8470 }
8471 else if (mode2 == SFmode && (named)
8472 && (ca->arg_count[(int) SH_ARG_FLOAT]
8473 < NPARM_REGS (SFmode)))
8474 ca->free_single_fp_reg
8475 = FIRST_FP_PARM_REG - numfpregs
8476 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
8477 }
8478 }
8479 return;
8480 }
8481
8482 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
8483 {
8484 /* Note that we've used the skipped register. */
8485 if (mode == SFmode && ca->free_single_fp_reg)
8486 {
8487 ca->free_single_fp_reg = 0;
8488 return;
8489 }
8490 /* When we have a DF after an SF, there's an SF register that get
8491 skipped in order to align the DF value. We note this skipped
8492 register, because the next SF value will use it, and not the
8493 SF that follows the DF. */
8494 if (mode == DFmode
8495 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
8496 {
8497 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
8498 + BASE_ARG_REG (mode));
8499 }
8500 }
8501
8502 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
8503 || PASS_IN_REG_P (*ca, mode, type))
8504 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
8505 = (ROUND_REG (*ca, mode)
8506 + (mode == BLKmode
8507 ? ROUND_ADVANCE (int_size_in_bytes (type))
8508 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
8509 }
8510
8511 /* The Renesas calling convention doesn't quite fit into this scheme since
8512 the address is passed like an invisible argument, but one that is always
8513 passed in memory. */
8514 static rtx
8515 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
8516 {
8517 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8518 return 0;
8519 return gen_rtx_REG (Pmode, 2);
8520 }
8521
8522 /* Worker function for TARGET_FUNCTION_VALUE.
8523
8524 For the SH, this is like LIBCALL_VALUE, except that we must change the
8525 mode like PROMOTE_MODE does.
8526 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
8527 tested here has to be kept in sync with the one in explow.c:promote_mode.
8528 */
8529
8530 static rtx
8531 sh_function_value (const_tree valtype,
8532 const_tree fn_decl_or_type,
8533 bool outgoing ATTRIBUTE_UNUSED)
8534 {
8535 if (fn_decl_or_type
8536 && !DECL_P (fn_decl_or_type))
8537 fn_decl_or_type = NULL;
8538
8539 return gen_rtx_REG (
8540 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
8541 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
8542 && (TREE_CODE (valtype) == INTEGER_TYPE
8543 || TREE_CODE (valtype) == ENUMERAL_TYPE
8544 || TREE_CODE (valtype) == BOOLEAN_TYPE
8545 || TREE_CODE (valtype) == REAL_TYPE
8546 || TREE_CODE (valtype) == OFFSET_TYPE))
8547 && sh_promote_prototypes (fn_decl_or_type)
8548 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
8549 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
8550 }
8551
8552 /* Worker function for TARGET_LIBCALL_VALUE. */
8553
8554 static rtx
8555 sh_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
8556 {
8557 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
8558 }
8559
8560 /* Return true if N is a possible register number of function value. */
8561
8562 static bool
8563 sh_function_value_regno_p (const unsigned int regno)
8564 {
8565 return ((regno) == FIRST_RET_REG
8566 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
8567 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
8568 }
8569
8570 /* Worker function for TARGET_RETURN_IN_MEMORY. */
8571
8572 static bool
8573 sh_return_in_memory (const_tree type, const_tree fndecl)
8574 {
8575 if (TARGET_SH5)
8576 {
8577 if (TYPE_MODE (type) == BLKmode)
8578 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
8579 else
8580 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
8581 }
8582 else
8583 {
8584 return (TYPE_MODE (type) == BLKmode
8585 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8586 && TREE_CODE (type) == RECORD_TYPE));
8587 }
8588 }
8589
8590 /* We actually emit the code in sh_expand_prologue. We used to use
8591 a static variable to flag that we need to emit this code, but that
8592 doesn't when inlining, when functions are deferred and then emitted
8593 later. Fortunately, we already have two flags that are part of struct
8594 function that tell if a function uses varargs or stdarg. */
8595 static void
8596 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
8597 enum machine_mode mode,
8598 tree type,
8599 int *pretend_arg_size,
8600 int second_time ATTRIBUTE_UNUSED)
8601 {
8602 gcc_assert (cfun->stdarg);
8603 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
8604 {
8605 int named_parm_regs, anon_parm_regs;
8606
8607 named_parm_regs = (ROUND_REG (*ca, mode)
8608 + (mode == BLKmode
8609 ? ROUND_ADVANCE (int_size_in_bytes (type))
8610 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
8611 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
8612 if (anon_parm_regs > 0)
8613 *pretend_arg_size = anon_parm_regs * 4;
8614 }
8615 }
8616
8617 static bool
8618 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
8619 {
8620 return TARGET_SH5;
8621 }
8622
8623 static bool
8624 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
8625 {
8626 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
8627 }
8628
8629
8630 /* Define the offset between two registers, one to be eliminated, and
8631 the other its replacement, at the start of a routine. */
8632
8633 int
8634 initial_elimination_offset (int from, int to)
8635 {
8636 int regs_saved;
8637 int regs_saved_rounding = 0;
8638 int total_saved_regs_space;
8639 int total_auto_space;
8640 int save_flags = target_flags;
8641 int copy_flags;
8642 HARD_REG_SET live_regs_mask;
8643
8644 shmedia_space_reserved_for_target_registers = false;
8645 regs_saved = calc_live_regs (&live_regs_mask);
8646 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
8647
8648 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
8649 {
8650 shmedia_space_reserved_for_target_registers = true;
8651 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
8652 }
8653
8654 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
8655 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
8656 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
8657
8658 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
8659 copy_flags = target_flags;
8660 target_flags = save_flags;
8661
8662 total_saved_regs_space = regs_saved + regs_saved_rounding;
8663
8664 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8665 return total_saved_regs_space + total_auto_space
8666 + crtl->args.info.byref_regs * 8;
8667
8668 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8669 return total_saved_regs_space + total_auto_space
8670 + crtl->args.info.byref_regs * 8;
8671
8672 /* Initial gap between fp and sp is 0. */
8673 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8674 return 0;
8675
8676 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8677 return rounded_frame_size (0);
8678
8679 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8680 return rounded_frame_size (0);
8681
8682 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
8683 && (to == HARD_FRAME_POINTER_REGNUM
8684 || to == STACK_POINTER_REGNUM));
8685 if (TARGET_SH5)
8686 {
8687 int n = total_saved_regs_space;
8688 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8689 save_schedule schedule;
8690 save_entry *entry;
8691
8692 n += total_auto_space;
8693
8694 /* If it wasn't saved, there's not much we can do. */
8695 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8696 return n;
8697
8698 target_flags = copy_flags;
8699
8700 sh5_schedule_saves (&live_regs_mask, &schedule, n);
8701 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
8702 if (entry->reg == pr_reg)
8703 {
8704 target_flags = save_flags;
8705 return entry->offset;
8706 }
8707 gcc_unreachable ();
8708 }
8709 else
8710 return total_auto_space;
8711 }
8712
8713 /* Parse the -mfixed-range= option string. */
8714 void
8715 sh_fix_range (const char *const_str)
8716 {
8717 int i, first, last;
8718 char *str, *dash, *comma;
8719
8720 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
8721 REG2 are either register names or register numbers. The effect
8722 of this option is to mark the registers in the range from REG1 to
8723 REG2 as ``fixed'' so they won't be used by the compiler. */
8724
8725 i = strlen (const_str);
8726 str = (char *) alloca (i + 1);
8727 memcpy (str, const_str, i + 1);
8728
8729 while (1)
8730 {
8731 dash = strchr (str, '-');
8732 if (!dash)
8733 {
8734 warning (0, "value of -mfixed-range must have form REG1-REG2");
8735 return;
8736 }
8737 *dash = '\0';
8738 comma = strchr (dash + 1, ',');
8739 if (comma)
8740 *comma = '\0';
8741
8742 first = decode_reg_name (str);
8743 if (first < 0)
8744 {
8745 warning (0, "unknown register name: %s", str);
8746 return;
8747 }
8748
8749 last = decode_reg_name (dash + 1);
8750 if (last < 0)
8751 {
8752 warning (0, "unknown register name: %s", dash + 1);
8753 return;
8754 }
8755
8756 *dash = '-';
8757
8758 if (first > last)
8759 {
8760 warning (0, "%s-%s is an empty range", str, dash + 1);
8761 return;
8762 }
8763
8764 for (i = first; i <= last; ++i)
8765 fixed_regs[i] = call_used_regs[i] = 1;
8766
8767 if (!comma)
8768 break;
8769
8770 *comma = ',';
8771 str = comma + 1;
8772 }
8773 }
8774 \f
8775 /* Insert any deferred function attributes from earlier pragmas. */
8776 static void
8777 sh_insert_attributes (tree node, tree *attributes)
8778 {
8779 tree attrs;
8780
8781 if (TREE_CODE (node) != FUNCTION_DECL)
8782 return;
8783
8784 /* We are only interested in fields. */
8785 if (!DECL_P (node))
8786 return;
8787
8788 /* Append the attributes to the deferred attributes. */
8789 *sh_deferred_function_attributes_tail = *attributes;
8790 attrs = sh_deferred_function_attributes;
8791 if (!attrs)
8792 return;
8793
8794 /* Some attributes imply or require the interrupt attribute. */
8795 if (!lookup_attribute ("interrupt_handler", attrs)
8796 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8797 {
8798 /* If we have a trapa_handler, but no interrupt_handler attribute,
8799 insert an interrupt_handler attribute. */
8800 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8801 /* We can't use sh_pr_interrupt here because that's not in the
8802 java frontend. */
8803 attrs
8804 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8805 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8806 if the interrupt attribute is missing, we ignore the attribute
8807 and warn. */
8808 else if (lookup_attribute ("sp_switch", attrs)
8809 || lookup_attribute ("trap_exit", attrs)
8810 || lookup_attribute ("nosave_low_regs", attrs)
8811 || lookup_attribute ("resbank", attrs))
8812 {
8813 tree *tail;
8814
8815 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8816 {
8817 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8818 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8819 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8820 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8821 warning (OPT_Wattributes,
8822 "%qE attribute only applies to interrupt functions",
8823 TREE_PURPOSE (attrs));
8824 else
8825 {
8826 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8827 NULL_TREE);
8828 tail = &TREE_CHAIN (*tail);
8829 }
8830 }
8831 attrs = *attributes;
8832 }
8833 }
8834
8835 /* Install the processed list. */
8836 *attributes = attrs;
8837
8838 /* Clear deferred attributes. */
8839 sh_deferred_function_attributes = NULL_TREE;
8840 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8841
8842 return;
8843 }
8844
8845 /* Supported attributes:
8846
8847 interrupt_handler -- specifies this function is an interrupt handler.
8848
8849 trapa_handler - like above, but don't save all registers.
8850
8851 sp_switch -- specifies an alternate stack for an interrupt handler
8852 to run on.
8853
8854 trap_exit -- use a trapa to exit an interrupt function instead of
8855 an rte instruction.
8856
8857 nosave_low_regs - don't save r0..r7 in an interrupt handler.
8858 This is useful on the SH3 and upwards,
8859 which has a separate set of low regs for User and Supervisor modes.
8860 This should only be used for the lowest level of interrupts. Higher levels
8861 of interrupts must save the registers in case they themselves are
8862 interrupted.
8863
8864 renesas -- use Renesas calling/layout conventions (functions and
8865 structures).
8866
8867 resbank -- In case of an ISR, use a register bank to save registers
8868 R0-R14, MACH, MACL, GBR and PR. This is useful only on SH2A targets.
8869 */
8870
8871 /* Handle a 'resbank' attribute. */
8872 static tree
8873 sh_handle_resbank_handler_attribute (tree * node, tree name,
8874 tree args ATTRIBUTE_UNUSED,
8875 int flags ATTRIBUTE_UNUSED,
8876 bool * no_add_attrs)
8877 {
8878 if (!TARGET_SH2A)
8879 {
8880 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
8881 name);
8882 *no_add_attrs = true;
8883 }
8884 if (TREE_CODE (*node) != FUNCTION_DECL)
8885 {
8886 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8887 name);
8888 *no_add_attrs = true;
8889 }
8890
8891 return NULL_TREE;
8892 }
8893
8894 /* Handle an "interrupt_handler" attribute; arguments as in
8895 struct attribute_spec.handler. */
8896 static tree
8897 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8898 tree args ATTRIBUTE_UNUSED,
8899 int flags ATTRIBUTE_UNUSED,
8900 bool *no_add_attrs)
8901 {
8902 if (TREE_CODE (*node) != FUNCTION_DECL)
8903 {
8904 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8905 name);
8906 *no_add_attrs = true;
8907 }
8908 else if (TARGET_SHCOMPACT)
8909 {
8910 error ("attribute interrupt_handler is not compatible with -m5-compact");
8911 *no_add_attrs = true;
8912 }
8913
8914 return NULL_TREE;
8915 }
8916
8917 /* Handle an 'function_vector' attribute; arguments as in
8918 struct attribute_spec.handler. */
8919 static tree
8920 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
8921 tree args ATTRIBUTE_UNUSED,
8922 int flags ATTRIBUTE_UNUSED,
8923 bool * no_add_attrs)
8924 {
8925 if (!TARGET_SH2A)
8926 {
8927 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
8928 name);
8929 *no_add_attrs = true;
8930 }
8931 else if (TREE_CODE (*node) != FUNCTION_DECL)
8932 {
8933 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8934 name);
8935 *no_add_attrs = true;
8936 }
8937 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8938 {
8939 /* The argument must be a constant integer. */
8940 warning (OPT_Wattributes,
8941 "%qE attribute argument not an integer constant",
8942 name);
8943 *no_add_attrs = true;
8944 }
8945 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
8946 {
8947 /* The argument value must be between 0 to 255. */
8948 warning (OPT_Wattributes,
8949 "%qE attribute argument should be between 0 to 255",
8950 name);
8951 *no_add_attrs = true;
8952 }
8953 return NULL_TREE;
8954 }
8955
8956 /* Returns 1 if current function has been assigned the attribute
8957 'function_vector'. */
8958 int
8959 sh2a_is_function_vector_call (rtx x)
8960 {
8961 if (GET_CODE (x) == SYMBOL_REF
8962 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8963 {
8964 tree tr = SYMBOL_REF_DECL (x);
8965
8966 if (sh2a_function_vector_p (tr))
8967 return 1;
8968 }
8969
8970 return 0;
8971 }
8972
8973 /* Returns the function vector number, if the the attribute
8974 'function_vector' is assigned, otherwise returns zero. */
8975 int
8976 sh2a_get_function_vector_number (rtx x)
8977 {
8978 int num;
8979 tree list, t;
8980
8981 if ((GET_CODE (x) == SYMBOL_REF)
8982 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8983 {
8984 t = SYMBOL_REF_DECL (x);
8985
8986 if (TREE_CODE (t) != FUNCTION_DECL)
8987 return 0;
8988
8989 list = SH_ATTRIBUTES (t);
8990 while (list)
8991 {
8992 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8993 {
8994 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
8995 return num;
8996 }
8997
8998 list = TREE_CHAIN (list);
8999 }
9000
9001 return 0;
9002 }
9003 else
9004 return 0;
9005 }
9006
9007 /* Handle an "sp_switch" attribute; arguments as in
9008 struct attribute_spec.handler. */
9009 static tree
9010 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
9011 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9012 {
9013 if (TREE_CODE (*node) != FUNCTION_DECL)
9014 {
9015 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9016 name);
9017 *no_add_attrs = true;
9018 }
9019 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
9020 {
9021 /* The argument must be a constant string. */
9022 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
9023 name);
9024 *no_add_attrs = true;
9025 }
9026
9027 return NULL_TREE;
9028 }
9029
9030 /* Handle an "trap_exit" attribute; arguments as in
9031 struct attribute_spec.handler. */
9032 static tree
9033 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
9034 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9035 {
9036 if (TREE_CODE (*node) != FUNCTION_DECL)
9037 {
9038 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9039 name);
9040 *no_add_attrs = true;
9041 }
9042 /* The argument specifies a trap number to be used in a trapa instruction
9043 at function exit (instead of an rte instruction). */
9044 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9045 {
9046 /* The argument must be a constant integer. */
9047 warning (OPT_Wattributes, "%qE attribute argument not an "
9048 "integer constant", name);
9049 *no_add_attrs = true;
9050 }
9051
9052 return NULL_TREE;
9053 }
9054
9055 static tree
9056 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
9057 tree name ATTRIBUTE_UNUSED,
9058 tree args ATTRIBUTE_UNUSED,
9059 int flags ATTRIBUTE_UNUSED,
9060 bool *no_add_attrs ATTRIBUTE_UNUSED)
9061 {
9062 return NULL_TREE;
9063 }
9064
9065 /* True if __attribute__((renesas)) or -mrenesas. */
9066 int
9067 sh_attr_renesas_p (const_tree td)
9068 {
9069 if (TARGET_HITACHI)
9070 return 1;
9071 if (td == 0)
9072 return 0;
9073 if (DECL_P (td))
9074 td = TREE_TYPE (td);
9075 if (td == error_mark_node)
9076 return 0;
9077 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
9078 != NULL_TREE);
9079 }
9080
9081 /* True if __attribute__((renesas)) or -mrenesas, for the current
9082 function. */
9083 int
9084 sh_cfun_attr_renesas_p (void)
9085 {
9086 return sh_attr_renesas_p (current_function_decl);
9087 }
9088
9089 int
9090 sh_cfun_interrupt_handler_p (void)
9091 {
9092 return (lookup_attribute ("interrupt_handler",
9093 DECL_ATTRIBUTES (current_function_decl))
9094 != NULL_TREE);
9095 }
9096
9097 /* Returns 1 if FUNC has been assigned the attribute
9098 "function_vector". */
9099 int
9100 sh2a_function_vector_p (tree func)
9101 {
9102 tree list;
9103 if (TREE_CODE (func) != FUNCTION_DECL)
9104 return 0;
9105
9106 list = SH_ATTRIBUTES (func);
9107 while (list)
9108 {
9109 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9110 return 1;
9111
9112 list = TREE_CHAIN (list);
9113 }
9114 return 0;
9115 }
9116
9117 /* Returns TRUE if given tree has the "resbank" attribute. */
9118
9119 int
9120 sh_cfun_resbank_handler_p (void)
9121 {
9122 return ((lookup_attribute ("resbank",
9123 DECL_ATTRIBUTES (current_function_decl))
9124 != NULL_TREE)
9125 && (lookup_attribute ("interrupt_handler",
9126 DECL_ATTRIBUTES (current_function_decl))
9127 != NULL_TREE) && TARGET_SH2A);
9128 }
9129
9130 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
9131
9132 static const char *
9133 sh_check_pch_target_flags (int old_flags)
9134 {
9135 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
9136 | MASK_SH_E | MASK_HARD_SH4
9137 | MASK_FPU_SINGLE | MASK_SH4))
9138 return _("created and used with different architectures / ABIs");
9139 if ((old_flags ^ target_flags) & MASK_HITACHI)
9140 return _("created and used with different ABIs");
9141 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
9142 return _("created and used with different endianness");
9143 return NULL;
9144 }
9145 \f
9146 /* Predicates used by the templates. */
9147
9148 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
9149 Used only in general_movsrc_operand. */
9150
9151 int
9152 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9153 {
9154 switch (REGNO (op))
9155 {
9156 case PR_REG:
9157 case MACL_REG:
9158 case MACH_REG:
9159 return 1;
9160 }
9161 return 0;
9162 }
9163
9164 /* Nonzero if OP is a floating point value with value 0.0. */
9165
9166 int
9167 fp_zero_operand (rtx op)
9168 {
9169 REAL_VALUE_TYPE r;
9170
9171 if (GET_MODE (op) != SFmode)
9172 return 0;
9173
9174 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9175 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
9176 }
9177
9178 /* Nonzero if OP is a floating point value with value 1.0. */
9179
9180 int
9181 fp_one_operand (rtx op)
9182 {
9183 REAL_VALUE_TYPE r;
9184
9185 if (GET_MODE (op) != SFmode)
9186 return 0;
9187
9188 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9189 return REAL_VALUES_EQUAL (r, dconst1);
9190 }
9191
9192 /* In general mode switching is used. If we are
9193 compiling without -mfmovd, movsf_ie isn't taken into account for
9194 mode switching. We could check in machine_dependent_reorg for
9195 cases where we know we are in single precision mode, but there is
9196 interface to find that out during reload, so we must avoid
9197 choosing an fldi alternative during reload and thus failing to
9198 allocate a scratch register for the constant loading. */
9199 int
9200 fldi_ok (void)
9201 {
9202 return 1;
9203 }
9204
9205 int
9206 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9207 {
9208 enum rtx_code code = GET_CODE (op);
9209 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
9210 }
9211
9212 /* Return the TLS type for TLS symbols, 0 for otherwise. */
9213 enum tls_model
9214 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9215 {
9216 if (GET_CODE (op) != SYMBOL_REF)
9217 return TLS_MODEL_NONE;
9218 return SYMBOL_REF_TLS_MODEL (op);
9219 }
9220 \f
9221 /* Return the destination address of a branch. */
9222
9223 static int
9224 branch_dest (rtx branch)
9225 {
9226 rtx dest = SET_SRC (PATTERN (branch));
9227 int dest_uid;
9228
9229 if (GET_CODE (dest) == IF_THEN_ELSE)
9230 dest = XEXP (dest, 1);
9231 dest = XEXP (dest, 0);
9232 dest_uid = INSN_UID (dest);
9233 return INSN_ADDRESSES (dest_uid);
9234 }
9235 \f
9236 /* Return nonzero if REG is not used after INSN.
9237 We assume REG is a reload reg, and therefore does
9238 not live past labels. It may live past calls or jumps though. */
9239 int
9240 reg_unused_after (rtx reg, rtx insn)
9241 {
9242 enum rtx_code code;
9243 rtx set;
9244
9245 /* If the reg is set by this instruction, then it is safe for our
9246 case. Disregard the case where this is a store to memory, since
9247 we are checking a register used in the store address. */
9248 set = single_set (insn);
9249 if (set && !MEM_P (SET_DEST (set))
9250 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9251 return 1;
9252
9253 while ((insn = NEXT_INSN (insn)))
9254 {
9255 rtx set;
9256 if (!INSN_P (insn))
9257 continue;
9258
9259 code = GET_CODE (insn);
9260
9261 #if 0
9262 /* If this is a label that existed before reload, then the register
9263 if dead here. However, if this is a label added by reorg, then
9264 the register may still be live here. We can't tell the difference,
9265 so we just ignore labels completely. */
9266 if (code == CODE_LABEL)
9267 return 1;
9268 /* else */
9269 #endif
9270
9271 if (code == JUMP_INSN)
9272 return 0;
9273
9274 /* If this is a sequence, we must handle them all at once.
9275 We could have for instance a call that sets the target register,
9276 and an insn in a delay slot that uses the register. In this case,
9277 we must return 0. */
9278 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
9279 {
9280 int i;
9281 int retval = 0;
9282
9283 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
9284 {
9285 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
9286 rtx set = single_set (this_insn);
9287
9288 if (CALL_P (this_insn))
9289 code = CALL_INSN;
9290 else if (JUMP_P (this_insn))
9291 {
9292 if (INSN_ANNULLED_BRANCH_P (this_insn))
9293 return 0;
9294 code = JUMP_INSN;
9295 }
9296
9297 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9298 return 0;
9299 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9300 {
9301 if (!MEM_P (SET_DEST (set)))
9302 retval = 1;
9303 else
9304 return 0;
9305 }
9306 if (set == 0
9307 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
9308 return 0;
9309 }
9310 if (retval == 1)
9311 return 1;
9312 else if (code == JUMP_INSN)
9313 return 0;
9314 }
9315
9316 set = single_set (insn);
9317 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9318 return 0;
9319 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9320 return !MEM_P (SET_DEST (set));
9321 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
9322 return 0;
9323
9324 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
9325 return 1;
9326 }
9327 return 1;
9328 }
9329 \f
9330 #include "ggc.h"
9331
9332 static GTY(()) rtx fpscr_rtx;
9333 rtx
9334 get_fpscr_rtx (void)
9335 {
9336 if (! fpscr_rtx)
9337 {
9338 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
9339 REG_USERVAR_P (fpscr_rtx) = 1;
9340 mark_user_reg (fpscr_rtx);
9341 }
9342 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
9343 mark_user_reg (fpscr_rtx);
9344 return fpscr_rtx;
9345 }
9346
9347 static GTY(()) tree fpscr_values;
9348
9349 static void
9350 emit_fpu_switch (rtx scratch, int index)
9351 {
9352 rtx dst, src;
9353
9354 if (fpscr_values == NULL)
9355 {
9356 tree t;
9357
9358 t = build_index_type (integer_one_node);
9359 t = build_array_type (integer_type_node, t);
9360 t = build_decl (BUILTINS_LOCATION,
9361 VAR_DECL, get_identifier ("__fpscr_values"), t);
9362 DECL_ARTIFICIAL (t) = 1;
9363 DECL_IGNORED_P (t) = 1;
9364 DECL_EXTERNAL (t) = 1;
9365 TREE_STATIC (t) = 1;
9366 TREE_PUBLIC (t) = 1;
9367 TREE_USED (t) = 1;
9368
9369 fpscr_values = t;
9370 }
9371
9372 src = DECL_RTL (fpscr_values);
9373 if (!can_create_pseudo_p ())
9374 {
9375 emit_move_insn (scratch, XEXP (src, 0));
9376 if (index != 0)
9377 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
9378 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
9379 }
9380 else
9381 src = adjust_address (src, PSImode, index * 4);
9382
9383 dst = get_fpscr_rtx ();
9384 emit_move_insn (dst, src);
9385 }
9386
9387 void
9388 emit_sf_insn (rtx pat)
9389 {
9390 emit_insn (pat);
9391 }
9392
9393 void
9394 emit_df_insn (rtx pat)
9395 {
9396 emit_insn (pat);
9397 }
9398
9399 void
9400 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9401 {
9402 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9403 }
9404
9405 void
9406 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9407 {
9408 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
9409 get_fpscr_rtx ()));
9410 }
9411
9412 void
9413 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9414 {
9415 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9416 }
9417
9418 void
9419 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9420 {
9421 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
9422 get_fpscr_rtx ()));
9423 }
9424 \f
9425 static rtx get_free_reg (HARD_REG_SET);
9426
9427 /* This function returns a register to use to load the address to load
9428 the fpscr from. Currently it always returns r1 or r7, but when we are
9429 able to use pseudo registers after combine, or have a better mechanism
9430 for choosing a register, it should be done here. */
9431 /* REGS_LIVE is the liveness information for the point for which we
9432 need this allocation. In some bare-bones exit blocks, r1 is live at the
9433 start. We can even have all of r0..r3 being live:
9434 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
9435 INSN before which new insns are placed with will clobber the register
9436 we return. If a basic block consists only of setting the return value
9437 register to a pseudo and using that register, the return value is not
9438 live before or after this block, yet we we'll insert our insns right in
9439 the middle. */
9440
9441 static rtx
9442 get_free_reg (HARD_REG_SET regs_live)
9443 {
9444 if (! TEST_HARD_REG_BIT (regs_live, 1))
9445 return gen_rtx_REG (Pmode, 1);
9446
9447 /* Hard reg 1 is live; since this is a small register classes target,
9448 there shouldn't be anything but a jump before the function end. */
9449 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
9450 return gen_rtx_REG (Pmode, 7);
9451 }
9452
9453 /* This function will set the fpscr from memory.
9454 MODE is the mode we are setting it to. */
9455 void
9456 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
9457 {
9458 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
9459 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
9460 rtx addr_reg;
9461
9462 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
9463 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
9464 }
9465
9466 /* Is the given character a logical line separator for the assembler? */
9467 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
9468 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
9469 #endif
9470
9471 int
9472 sh_insn_length_adjustment (rtx insn)
9473 {
9474 /* Instructions with unfilled delay slots take up an extra two bytes for
9475 the nop in the delay slot. */
9476 if (((NONJUMP_INSN_P (insn)
9477 && GET_CODE (PATTERN (insn)) != USE
9478 && GET_CODE (PATTERN (insn)) != CLOBBER)
9479 || CALL_P (insn)
9480 || (JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)))
9481 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
9482 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
9483 return 2;
9484
9485 /* SH2e has a bug that prevents the use of annulled branches, so if
9486 the delay slot is not filled, we'll have to put a NOP in it. */
9487 if (sh_cpu_attr == CPU_SH2E
9488 && JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)
9489 && get_attr_type (insn) == TYPE_CBRANCH
9490 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
9491 return 2;
9492
9493 /* sh-dsp parallel processing insn take four bytes instead of two. */
9494
9495 if (NONJUMP_INSN_P (insn))
9496 {
9497 int sum = 0;
9498 rtx body = PATTERN (insn);
9499 const char *templ;
9500 char c;
9501 int maybe_label = 1;
9502
9503 if (GET_CODE (body) == ASM_INPUT)
9504 templ = XSTR (body, 0);
9505 else if (asm_noperands (body) >= 0)
9506 templ
9507 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
9508 else
9509 return 0;
9510 do
9511 {
9512 int ppi_adjust = 0;
9513
9514 do
9515 c = *templ++;
9516 while (c == ' ' || c == '\t');
9517 /* all sh-dsp parallel-processing insns start with p.
9518 The only non-ppi sh insn starting with p is pref.
9519 The only ppi starting with pr is prnd. */
9520 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
9521 ppi_adjust = 2;
9522 /* The repeat pseudo-insn expands two three insns, a total of
9523 six bytes in size. */
9524 else if ((c == 'r' || c == 'R')
9525 && ! strncasecmp ("epeat", templ, 5))
9526 ppi_adjust = 4;
9527 while (c && c != '\n'
9528 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
9529 {
9530 /* If this is a label, it is obviously not a ppi insn. */
9531 if (c == ':' && maybe_label)
9532 {
9533 ppi_adjust = 0;
9534 break;
9535 }
9536 else if (c == '\'' || c == '"')
9537 maybe_label = 0;
9538 c = *templ++;
9539 }
9540 sum += ppi_adjust;
9541 maybe_label = c != ':';
9542 }
9543 while (c);
9544 return sum;
9545 }
9546 return 0;
9547 }
9548 \f
9549 /* Return TRUE for a valid displacement for the REG+disp addressing
9550 with MODE. */
9551
9552 /* ??? The SH2e does not have the REG+disp addressing mode when loading values
9553 into the FRx registers. We implement this by setting the maximum offset
9554 to zero when the value is SFmode. This also restricts loading of SFmode
9555 values into the integer registers, but that can't be helped. */
9556
9557 /* The SH allows a displacement in a QI or HI amode, but only when the
9558 other operand is R0. GCC doesn't handle this very well, so we forgot
9559 all of that.
9560
9561 A legitimate index for a QI or HI is 0, SI can be any number 0..63,
9562 DI can be any number 0..60. */
9563
9564 bool
9565 sh_legitimate_index_p (enum machine_mode mode, rtx op)
9566 {
9567 if (CONST_INT_P (op))
9568 {
9569 if (TARGET_SHMEDIA)
9570 {
9571 int size;
9572
9573 /* Check if this the address of an unaligned load / store. */
9574 if (mode == VOIDmode)
9575 return CONST_OK_FOR_I06 (INTVAL (op));
9576
9577 size = GET_MODE_SIZE (mode);
9578 return (!(INTVAL (op) & (size - 1))
9579 && INTVAL (op) >= -512 * size
9580 && INTVAL (op) < 512 * size);
9581 }
9582
9583 if (TARGET_SH2A)
9584 {
9585 if (GET_MODE_SIZE (mode) == 1
9586 && (unsigned) INTVAL (op) < 4096)
9587 return true;
9588 }
9589
9590 if ((GET_MODE_SIZE (mode) == 4
9591 && (unsigned) INTVAL (op) < 64
9592 && !(INTVAL (op) & 3)
9593 && !(TARGET_SH2E && mode == SFmode))
9594 || (GET_MODE_SIZE (mode) == 4
9595 && (unsigned) INTVAL (op) < 16383
9596 && !(INTVAL (op) & 3) && TARGET_SH2A))
9597 return true;
9598
9599 if ((GET_MODE_SIZE (mode) == 8
9600 && (unsigned) INTVAL (op) < 60
9601 && !(INTVAL (op) & 3)
9602 && !((TARGET_SH4 || TARGET_SH2A) && mode == DFmode))
9603 || ((GET_MODE_SIZE (mode)==8)
9604 && (unsigned) INTVAL (op) < 8192
9605 && !(INTVAL (op) & (TARGET_SH2A_DOUBLE ? 7 : 3))
9606 && (TARGET_SH2A && mode == DFmode)))
9607 return true;
9608 }
9609
9610 return false;
9611 }
9612
9613 /* Recognize an RTL expression that is a valid memory address for
9614 an instruction.
9615 The MODE argument is the machine mode for the MEM expression
9616 that wants to use this address.
9617 Allow REG
9618 REG+disp
9619 REG+r0
9620 REG++
9621 --REG */
9622
9623 static bool
9624 sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
9625 {
9626 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
9627 return true;
9628 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
9629 && ! TARGET_SHMEDIA
9630 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
9631 return true;
9632 else if (GET_CODE (x) == PLUS
9633 && (mode != PSImode || reload_completed))
9634 {
9635 rtx xop0 = XEXP (x, 0);
9636 rtx xop1 = XEXP (x, 1);
9637
9638 if (GET_MODE_SIZE (mode) <= 8
9639 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
9640 && sh_legitimate_index_p (mode, xop1))
9641 return true;
9642
9643 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
9644 || ((xop0 == stack_pointer_rtx
9645 || xop0 == hard_frame_pointer_rtx)
9646 && REG_P (xop1) && REGNO (xop1) == R0_REG)
9647 || ((xop1 == stack_pointer_rtx
9648 || xop1 == hard_frame_pointer_rtx)
9649 && REG_P (xop0) && REGNO (xop0) == R0_REG))
9650 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
9651 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
9652 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
9653 && TARGET_FMOVD && mode == DFmode)))
9654 {
9655 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
9656 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
9657 return true;
9658 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
9659 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
9660 return true;
9661 }
9662 }
9663
9664 return false;
9665 }
9666 \f
9667 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
9668 isn't protected by a PIC unspec. */
9669 int
9670 nonpic_symbol_mentioned_p (rtx x)
9671 {
9672 register const char *fmt;
9673 register int i;
9674
9675 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
9676 || GET_CODE (x) == PC)
9677 return 1;
9678
9679 /* We don't want to look into the possible MEM location of a
9680 CONST_DOUBLE, since we're not going to use it, in general. */
9681 if (GET_CODE (x) == CONST_DOUBLE)
9682 return 0;
9683
9684 if (GET_CODE (x) == UNSPEC
9685 && (XINT (x, 1) == UNSPEC_PIC
9686 || XINT (x, 1) == UNSPEC_GOT
9687 || XINT (x, 1) == UNSPEC_GOTOFF
9688 || XINT (x, 1) == UNSPEC_GOTPLT
9689 || XINT (x, 1) == UNSPEC_GOTTPOFF
9690 || XINT (x, 1) == UNSPEC_DTPOFF
9691 || XINT (x, 1) == UNSPEC_TPOFF
9692 || XINT (x, 1) == UNSPEC_PLT
9693 || XINT (x, 1) == UNSPEC_SYMOFF
9694 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
9695 return 0;
9696
9697 fmt = GET_RTX_FORMAT (GET_CODE (x));
9698 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9699 {
9700 if (fmt[i] == 'E')
9701 {
9702 register int j;
9703
9704 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9705 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
9706 return 1;
9707 }
9708 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
9709 return 1;
9710 }
9711
9712 return 0;
9713 }
9714
9715 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
9716 @GOTOFF in `reg'. */
9717 rtx
9718 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
9719 rtx reg)
9720 {
9721 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
9722 return orig;
9723
9724 if (GET_CODE (orig) == LABEL_REF
9725 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
9726 {
9727 if (reg == 0)
9728 reg = gen_reg_rtx (Pmode);
9729
9730 emit_insn (gen_symGOTOFF2reg (reg, orig));
9731 return reg;
9732 }
9733 else if (GET_CODE (orig) == SYMBOL_REF)
9734 {
9735 if (reg == 0)
9736 reg = gen_reg_rtx (Pmode);
9737
9738 emit_insn (gen_symGOT2reg (reg, orig));
9739 return reg;
9740 }
9741 return orig;
9742 }
9743
9744 /* Try machine-dependent ways of modifying an illegitimate address
9745 to be legitimate. If we find one, return the new, valid address.
9746 Otherwise, return X.
9747
9748 For the SH, if X is almost suitable for indexing, but the offset is
9749 out of range, convert it into a normal form so that CSE has a chance
9750 of reducing the number of address registers used. */
9751
9752 static rtx
9753 sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
9754 {
9755 if (flag_pic)
9756 x = legitimize_pic_address (oldx, mode, NULL_RTX);
9757
9758 if (GET_CODE (x) == PLUS
9759 && (GET_MODE_SIZE (mode) == 4
9760 || GET_MODE_SIZE (mode) == 8)
9761 && CONST_INT_P (XEXP (x, 1))
9762 && BASE_REGISTER_RTX_P (XEXP (x, 0))
9763 && ! TARGET_SHMEDIA
9764 && ! ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
9765 && ! (TARGET_SH2E && mode == SFmode))
9766 {
9767 rtx index_rtx = XEXP (x, 1);
9768 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9769 rtx sum;
9770
9771 /* On rare occasions, we might get an unaligned pointer
9772 that is indexed in a way to give an aligned address.
9773 Therefore, keep the lower two bits in offset_base. */
9774 /* Instead of offset_base 128..131 use 124..127, so that
9775 simple add suffices. */
9776 if (offset > 127)
9777 offset_base = ((offset + 4) & ~60) - 4;
9778 else
9779 offset_base = offset & ~60;
9780
9781 /* Sometimes the normal form does not suit DImode. We
9782 could avoid that by using smaller ranges, but that
9783 would give less optimized code when SImode is
9784 prevalent. */
9785 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9786 {
9787 sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
9788 GEN_INT (offset_base), NULL_RTX, 0,
9789 OPTAB_LIB_WIDEN);
9790
9791 return gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9792 }
9793 }
9794
9795 return x;
9796 }
9797
9798 /* Attempt to replace *P, which is an address that needs reloading, with
9799 a valid memory address for an operand of mode MODE.
9800 Like for sh_legitimize_address, for the SH we try to get a normal form
9801 of the address. That will allow inheritance of the address reloads. */
9802
9803 bool
9804 sh_legitimize_reload_address (rtx *p, enum machine_mode mode, int opnum,
9805 int itype)
9806 {
9807 enum reload_type type = (enum reload_type) itype;
9808
9809 if (GET_CODE (*p) == PLUS
9810 && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
9811 && CONST_INT_P (XEXP (*p, 1))
9812 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true)
9813 && ! TARGET_SHMEDIA
9814 && ! (TARGET_SH4 && mode == DFmode)
9815 && ! (mode == PSImode && type == RELOAD_FOR_INPUT_ADDRESS)
9816 && (ALLOW_INDEXED_ADDRESS
9817 || XEXP (*p, 0) == stack_pointer_rtx
9818 || XEXP (*p, 0) == hard_frame_pointer_rtx))
9819 {
9820 rtx index_rtx = XEXP (*p, 1);
9821 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9822 rtx sum;
9823
9824 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
9825 {
9826 push_reload (*p, NULL_RTX, p, NULL,
9827 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9828 goto win;
9829 }
9830 if (TARGET_SH2E && mode == SFmode)
9831 {
9832 *p = copy_rtx (*p);
9833 push_reload (*p, NULL_RTX, p, NULL,
9834 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9835 goto win;
9836 }
9837 /* Instead of offset_base 128..131 use 124..127, so that
9838 simple add suffices. */
9839 if (offset > 127)
9840 offset_base = ((offset + 4) & ~60) - 4;
9841 else
9842 offset_base = offset & ~60;
9843 /* Sometimes the normal form does not suit DImode. We could avoid
9844 that by using smaller ranges, but that would give less optimized
9845 code when SImode is prevalent. */
9846 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9847 {
9848 sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), GEN_INT (offset_base));
9849 *p = gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9850 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
9851 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9852 goto win;
9853 }
9854 }
9855 /* We must re-recognize what we created before. */
9856 else if (GET_CODE (*p) == PLUS
9857 && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
9858 && GET_CODE (XEXP (*p, 0)) == PLUS
9859 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
9860 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
9861 && CONST_INT_P (XEXP (*p, 1))
9862 && ! TARGET_SHMEDIA
9863 && ! (TARGET_SH2E && mode == SFmode))
9864 {
9865 /* Because this address is so complex, we know it must have
9866 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
9867 it is already unshared, and needs no further unsharing. */
9868 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
9869 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9870 goto win;
9871 }
9872
9873 return false;
9874
9875 win:
9876 return true;
9877 }
9878
9879 /* Mark the use of a constant in the literal table. If the constant
9880 has multiple labels, make it unique. */
9881 static rtx
9882 mark_constant_pool_use (rtx x)
9883 {
9884 rtx insn, lab, pattern;
9885
9886 if (x == NULL)
9887 return x;
9888
9889 switch (GET_CODE (x))
9890 {
9891 case LABEL_REF:
9892 x = XEXP (x, 0);
9893 case CODE_LABEL:
9894 break;
9895 default:
9896 return x;
9897 }
9898
9899 /* Get the first label in the list of labels for the same constant
9900 and delete another labels in the list. */
9901 lab = x;
9902 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
9903 {
9904 if (!LABEL_P (insn)
9905 || LABEL_REFS (insn) != NEXT_INSN (insn))
9906 break;
9907 lab = insn;
9908 }
9909
9910 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
9911 INSN_DELETED_P (insn) = 1;
9912
9913 /* Mark constants in a window. */
9914 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
9915 {
9916 if (!NONJUMP_INSN_P (insn))
9917 continue;
9918
9919 pattern = PATTERN (insn);
9920 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
9921 continue;
9922
9923 switch (XINT (pattern, 1))
9924 {
9925 case UNSPECV_CONST2:
9926 case UNSPECV_CONST4:
9927 case UNSPECV_CONST8:
9928 XVECEXP (pattern, 0, 1) = const1_rtx;
9929 break;
9930 case UNSPECV_WINDOW_END:
9931 if (XVECEXP (pattern, 0, 0) == x)
9932 return lab;
9933 break;
9934 case UNSPECV_CONST_END:
9935 return lab;
9936 default:
9937 break;
9938 }
9939 }
9940
9941 return lab;
9942 }
9943 \f
9944 /* Return true if it's possible to redirect BRANCH1 to the destination
9945 of an unconditional jump BRANCH2. We only want to do this if the
9946 resulting branch will have a short displacement. */
9947 int
9948 sh_can_redirect_branch (rtx branch1, rtx branch2)
9949 {
9950 if (flag_expensive_optimizations && simplejump_p (branch2))
9951 {
9952 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
9953 rtx insn;
9954 int distance;
9955
9956 for (distance = 0, insn = NEXT_INSN (branch1);
9957 insn && distance < 256;
9958 insn = PREV_INSN (insn))
9959 {
9960 if (insn == dest)
9961 return 1;
9962 else
9963 distance += get_attr_length (insn);
9964 }
9965 for (distance = 0, insn = NEXT_INSN (branch1);
9966 insn && distance < 256;
9967 insn = NEXT_INSN (insn))
9968 {
9969 if (insn == dest)
9970 return 1;
9971 else
9972 distance += get_attr_length (insn);
9973 }
9974 }
9975 return 0;
9976 }
9977
9978 /* Return nonzero if register old_reg can be renamed to register new_reg. */
9979 int
9980 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
9981 unsigned int new_reg)
9982 {
9983 /* Interrupt functions can only use registers that have already been
9984 saved by the prologue, even if they would normally be
9985 call-clobbered. */
9986
9987 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
9988 return 0;
9989
9990 return 1;
9991 }
9992
9993 /* Function to update the integer COST
9994 based on the relationship between INSN that is dependent on
9995 DEP_INSN through the dependence LINK. The default is to make no
9996 adjustment to COST. This can be used for example to specify to
9997 the scheduler that an output- or anti-dependence does not incur
9998 the same cost as a data-dependence. The return value should be
9999 the new value for COST. */
10000 static int
10001 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
10002 {
10003 rtx reg, use_pat;
10004
10005 if (TARGET_SHMEDIA)
10006 {
10007 /* On SHmedia, if the dependence is an anti-dependence or
10008 output-dependence, there is no cost. */
10009 if (REG_NOTE_KIND (link) != 0)
10010 {
10011 /* However, dependencies between target register loads and
10012 uses of the register in a subsequent block that are separated
10013 by a conditional branch are not modelled - we have to do with
10014 the anti-dependency between the target register load and the
10015 conditional branch that ends the current block. */
10016 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
10017 && GET_CODE (PATTERN (dep_insn)) == SET
10018 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
10019 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
10020 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
10021 {
10022 int orig_cost = cost;
10023 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
10024 rtx target = ((! note
10025 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
10026 ? insn : JUMP_LABEL (insn));
10027 /* On the likely path, the branch costs 1, on the unlikely path,
10028 it costs 3. */
10029 cost--;
10030 do
10031 target = next_active_insn (target);
10032 while (target && ! flow_dependent_p (target, dep_insn)
10033 && --cost > 0);
10034 /* If two branches are executed in immediate succession, with the
10035 first branch properly predicted, this causes a stall at the
10036 second branch, hence we won't need the target for the
10037 second branch for two cycles after the launch of the first
10038 branch. */
10039 if (cost > orig_cost - 2)
10040 cost = orig_cost - 2;
10041 }
10042 else
10043 cost = 0;
10044 }
10045
10046 else if (get_attr_is_mac_media (insn)
10047 && get_attr_is_mac_media (dep_insn))
10048 cost = 1;
10049
10050 else if (! reload_completed
10051 && GET_CODE (PATTERN (insn)) == SET
10052 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
10053 && GET_CODE (PATTERN (dep_insn)) == SET
10054 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
10055 && cost < 4)
10056 cost = 4;
10057 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
10058 that is needed at the target. */
10059 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
10060 && ! flow_dependent_p (insn, dep_insn))
10061 cost--;
10062 }
10063 else if (REG_NOTE_KIND (link) == 0)
10064 {
10065 enum attr_type type;
10066 rtx dep_set;
10067
10068 if (recog_memoized (insn) < 0
10069 || recog_memoized (dep_insn) < 0)
10070 return cost;
10071
10072 dep_set = single_set (dep_insn);
10073
10074 /* The latency that we specify in the scheduling description refers
10075 to the actual output, not to an auto-increment register; for that,
10076 the latency is one. */
10077 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
10078 {
10079 rtx set = single_set (insn);
10080
10081 if (set
10082 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
10083 && (!MEM_P (SET_DEST (set))
10084 || !reg_mentioned_p (SET_DEST (dep_set),
10085 XEXP (SET_DEST (set), 0))))
10086 cost = 1;
10087 }
10088 /* The only input for a call that is timing-critical is the
10089 function's address. */
10090 if (CALL_P (insn))
10091 {
10092 rtx call = PATTERN (insn);
10093
10094 if (GET_CODE (call) == PARALLEL)
10095 call = XVECEXP (call, 0 ,0);
10096 if (GET_CODE (call) == SET)
10097 call = SET_SRC (call);
10098 if (GET_CODE (call) == CALL && MEM_P (XEXP (call, 0))
10099 /* sibcalli_thunk uses a symbol_ref in an unspec. */
10100 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
10101 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
10102 cost -= TARGET_SH4_300 ? 3 : 6;
10103 }
10104 /* Likewise, the most timing critical input for an sfuncs call
10105 is the function address. However, sfuncs typically start
10106 using their arguments pretty quickly.
10107 Assume a four cycle delay for SH4 before they are needed.
10108 Cached ST40-300 calls are quicker, so assume only a one
10109 cycle delay there.
10110 ??? Maybe we should encode the delays till input registers
10111 are needed by sfuncs into the sfunc call insn. */
10112 /* All sfunc calls are parallels with at least four components.
10113 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
10114 else if (GET_CODE (PATTERN (insn)) == PARALLEL
10115 && XVECLEN (PATTERN (insn), 0) >= 4
10116 && (reg = sfunc_uses_reg (insn)))
10117 {
10118 if (! reg_set_p (reg, dep_insn))
10119 cost -= TARGET_SH4_300 ? 1 : 4;
10120 }
10121 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
10122 {
10123 enum attr_type dep_type = get_attr_type (dep_insn);
10124
10125 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
10126 cost--;
10127 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
10128 && (type = get_attr_type (insn)) != TYPE_CALL
10129 && type != TYPE_SFUNC)
10130 cost--;
10131 /* When the preceding instruction loads the shift amount of
10132 the following SHAD/SHLD, the latency of the load is increased
10133 by 1 cycle. */
10134 if (get_attr_type (insn) == TYPE_DYN_SHIFT
10135 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
10136 && reg_overlap_mentioned_p (SET_DEST (dep_set),
10137 XEXP (SET_SRC (single_set (insn)),
10138 1)))
10139 cost++;
10140 /* When an LS group instruction with a latency of less than
10141 3 cycles is followed by a double-precision floating-point
10142 instruction, FIPR, or FTRV, the latency of the first
10143 instruction is increased to 3 cycles. */
10144 else if (cost < 3
10145 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
10146 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
10147 cost = 3;
10148 /* The lsw register of a double-precision computation is ready one
10149 cycle earlier. */
10150 else if (reload_completed
10151 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
10152 && (use_pat = single_set (insn))
10153 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
10154 SET_SRC (use_pat)))
10155 cost -= 1;
10156
10157 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
10158 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
10159 cost -= 1;
10160 }
10161 else if (TARGET_SH4_300)
10162 {
10163 /* Stores need their input register two cycles later. */
10164 if (dep_set && cost >= 1
10165 && ((type = get_attr_type (insn)) == TYPE_STORE
10166 || type == TYPE_PSTORE
10167 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
10168 {
10169 rtx set = single_set (insn);
10170
10171 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
10172 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
10173 {
10174 cost -= 2;
10175 /* But don't reduce the cost below 1 if the address depends
10176 on a side effect of dep_insn. */
10177 if (cost < 1
10178 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
10179 cost = 1;
10180 }
10181 }
10182 }
10183 }
10184 /* An anti-dependence penalty of two applies if the first insn is a double
10185 precision fadd / fsub / fmul. */
10186 else if (!TARGET_SH4_300
10187 && REG_NOTE_KIND (link) == REG_DEP_ANTI
10188 && recog_memoized (dep_insn) >= 0
10189 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
10190 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
10191 /* A lot of alleged anti-flow dependences are fake,
10192 so check this one is real. */
10193 && flow_dependent_p (dep_insn, insn))
10194 cost = 2;
10195
10196 return cost;
10197 }
10198
10199 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
10200 if DEP_INSN is anti-flow dependent on INSN. */
10201 static int
10202 flow_dependent_p (rtx insn, rtx dep_insn)
10203 {
10204 rtx tmp = PATTERN (insn);
10205
10206 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
10207 return tmp == NULL_RTX;
10208 }
10209
10210 /* A helper function for flow_dependent_p called through note_stores. */
10211 static void
10212 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
10213 {
10214 rtx * pinsn = (rtx *) data;
10215
10216 if (*pinsn && reg_referenced_p (x, *pinsn))
10217 *pinsn = NULL_RTX;
10218 }
10219
10220 /* For use by sh_allocate_initial_value. Note that sh.md contains some
10221 'special function' patterns (type sfunc) that clobber pr, but that
10222 do not look like function calls to leaf_function_p. Hence we must
10223 do this extra check. */
10224 static int
10225 sh_pr_n_sets (void)
10226 {
10227 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10228 }
10229
10230 /* Return where to allocate pseudo for a given hard register initial
10231 value. */
10232 static rtx
10233 sh_allocate_initial_value (rtx hard_reg)
10234 {
10235 rtx x;
10236
10237 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
10238 {
10239 if (current_function_is_leaf
10240 && ! sh_pr_n_sets ()
10241 && ! (TARGET_SHCOMPACT
10242 && ((crtl->args.info.call_cookie
10243 & ~ CALL_COOKIE_RET_TRAMP (1))
10244 || crtl->saves_all_registers)))
10245 x = hard_reg;
10246 else
10247 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
10248 }
10249 else
10250 x = NULL_RTX;
10251
10252 return x;
10253 }
10254
10255 /* This function returns "2" to indicate dual issue for the SH4
10256 processor. To be used by the DFA pipeline description. */
10257 static int
10258 sh_issue_rate (void)
10259 {
10260 if (TARGET_SUPERSCALAR)
10261 return 2;
10262 else
10263 return 1;
10264 }
10265
10266 /* Functions for ready queue reordering for sched1. */
10267
10268 /* Get weight for mode for a set x. */
10269 static short
10270 find_set_regmode_weight (rtx x, enum machine_mode mode)
10271 {
10272 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
10273 return 1;
10274 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
10275 {
10276 if (REG_P (SET_DEST (x)))
10277 {
10278 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
10279 return 1;
10280 else
10281 return 0;
10282 }
10283 return 1;
10284 }
10285 return 0;
10286 }
10287
10288 /* Get regmode weight for insn. */
10289 static short
10290 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
10291 {
10292 short reg_weight = 0;
10293 rtx x;
10294
10295 /* Increment weight for each register born here. */
10296 x = PATTERN (insn);
10297 reg_weight += find_set_regmode_weight (x, mode);
10298 if (GET_CODE (x) == PARALLEL)
10299 {
10300 int j;
10301 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
10302 {
10303 x = XVECEXP (PATTERN (insn), 0, j);
10304 reg_weight += find_set_regmode_weight (x, mode);
10305 }
10306 }
10307 /* Decrement weight for each register that dies here. */
10308 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
10309 {
10310 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
10311 {
10312 rtx note = XEXP (x, 0);
10313 if (REG_P (note) && GET_MODE (note) == mode)
10314 reg_weight--;
10315 }
10316 }
10317 return reg_weight;
10318 }
10319
10320 /* Calculate regmode weights for all insns of a basic block. */
10321 static void
10322 find_regmode_weight (basic_block b, enum machine_mode mode)
10323 {
10324 rtx insn, next_tail, head, tail;
10325
10326 get_ebb_head_tail (b, b, &head, &tail);
10327 next_tail = NEXT_INSN (tail);
10328
10329 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
10330 {
10331 /* Handle register life information. */
10332 if (!INSN_P (insn))
10333 continue;
10334
10335 if (mode == SFmode)
10336 INSN_REGMODE_WEIGHT (insn, mode) =
10337 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
10338 else if (mode == SImode)
10339 INSN_REGMODE_WEIGHT (insn, mode) =
10340 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
10341 }
10342 }
10343
10344 /* Comparison function for ready queue sorting. */
10345 static int
10346 rank_for_reorder (const void *x, const void *y)
10347 {
10348 rtx tmp = *(const rtx *) y;
10349 rtx tmp2 = *(const rtx *) x;
10350
10351 /* The insn in a schedule group should be issued the first. */
10352 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
10353 return SCHED_GROUP_P (tmp2) ? 1 : -1;
10354
10355 /* If insns are equally good, sort by INSN_LUID (original insn order), This
10356 minimizes instruction movement, thus minimizing sched's effect on
10357 register pressure. */
10358 return INSN_LUID (tmp) - INSN_LUID (tmp2);
10359 }
10360
10361 /* Resort the array A in which only element at index N may be out of order. */
10362 static void
10363 swap_reorder (rtx *a, int n)
10364 {
10365 rtx insn = a[n - 1];
10366 int i = n - 2;
10367
10368 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
10369 {
10370 a[i + 1] = a[i];
10371 i -= 1;
10372 }
10373 a[i + 1] = insn;
10374 }
10375
10376 #define SCHED_REORDER(READY, N_READY) \
10377 do \
10378 { \
10379 if ((N_READY) == 2) \
10380 swap_reorder (READY, N_READY); \
10381 else if ((N_READY) > 2) \
10382 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
10383 } \
10384 while (0)
10385
10386 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
10387 macro. */
10388 static void
10389 ready_reorder (rtx *ready, int nready)
10390 {
10391 SCHED_REORDER (ready, nready);
10392 }
10393
10394 /* Count life regions of r0 for a block. */
10395 static int
10396 find_r0_life_regions (basic_block b)
10397 {
10398 rtx end, insn;
10399 rtx pset;
10400 rtx r0_reg;
10401 int live;
10402 int set;
10403 int death = 0;
10404
10405 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
10406 {
10407 set = 1;
10408 live = 1;
10409 }
10410 else
10411 {
10412 set = 0;
10413 live = 0;
10414 }
10415
10416 insn = BB_HEAD (b);
10417 end = BB_END (b);
10418 r0_reg = gen_rtx_REG (SImode, R0_REG);
10419 while (1)
10420 {
10421 if (INSN_P (insn))
10422 {
10423 if (find_regno_note (insn, REG_DEAD, R0_REG))
10424 {
10425 death++;
10426 live = 0;
10427 }
10428 if (!live
10429 && (pset = single_set (insn))
10430 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
10431 && !find_regno_note (insn, REG_UNUSED, R0_REG))
10432 {
10433 set++;
10434 live = 1;
10435 }
10436 }
10437 if (insn == end)
10438 break;
10439 insn = NEXT_INSN (insn);
10440 }
10441 return set - death;
10442 }
10443
10444 /* Calculate regmode weights for all insns of all basic block. */
10445 static void
10446 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
10447 int verbose ATTRIBUTE_UNUSED,
10448 int old_max_uid)
10449 {
10450 basic_block b;
10451
10452 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
10453 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
10454 r0_life_regions = 0;
10455
10456 FOR_EACH_BB_REVERSE (b)
10457 {
10458 find_regmode_weight (b, SImode);
10459 find_regmode_weight (b, SFmode);
10460 if (!reload_completed)
10461 r0_life_regions += find_r0_life_regions (b);
10462 }
10463
10464 CURR_REGMODE_PRESSURE (SImode) = 0;
10465 CURR_REGMODE_PRESSURE (SFmode) = 0;
10466
10467 }
10468
10469 /* Cleanup. */
10470 static void
10471 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
10472 int verbose ATTRIBUTE_UNUSED)
10473 {
10474 if (regmode_weight[0])
10475 {
10476 free (regmode_weight[0]);
10477 regmode_weight[0] = NULL;
10478 }
10479 if (regmode_weight[1])
10480 {
10481 free (regmode_weight[1]);
10482 regmode_weight[1] = NULL;
10483 }
10484 }
10485
10486 /* The scalar modes supported differs from the default version in TImode
10487 for 32-bit SHMEDIA. */
10488 static bool
10489 sh_scalar_mode_supported_p (enum machine_mode mode)
10490 {
10491 if (TARGET_SHMEDIA32 && mode == TImode)
10492 return false;
10493
10494 return default_scalar_mode_supported_p (mode);
10495 }
10496
10497 /* Cache the can_issue_more so that we can return it from reorder2. Also,
10498 keep count of register pressures on SImode and SFmode. */
10499 static int
10500 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
10501 int sched_verbose ATTRIBUTE_UNUSED,
10502 rtx insn,
10503 int can_issue_more)
10504 {
10505 if (GET_CODE (PATTERN (insn)) != USE
10506 && GET_CODE (PATTERN (insn)) != CLOBBER)
10507 cached_can_issue_more = can_issue_more - 1;
10508 else
10509 cached_can_issue_more = can_issue_more;
10510
10511 if (reload_completed)
10512 return cached_can_issue_more;
10513
10514 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
10515 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
10516
10517 return cached_can_issue_more;
10518 }
10519
10520 static void
10521 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
10522 int verbose ATTRIBUTE_UNUSED,
10523 int veclen ATTRIBUTE_UNUSED)
10524 {
10525 CURR_REGMODE_PRESSURE (SImode) = 0;
10526 CURR_REGMODE_PRESSURE (SFmode) = 0;
10527 }
10528
10529 /* Some magic numbers. */
10530 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10531 functions that already have high pressure on r0. */
10532 #define R0_MAX_LIFE_REGIONS 2
10533 /* Register Pressure thresholds for SImode and SFmode registers. */
10534 #define SIMODE_MAX_WEIGHT 5
10535 #define SFMODE_MAX_WEIGHT 10
10536
10537 /* Return true if the pressure is high for MODE. */
10538 static short
10539 high_pressure (enum machine_mode mode)
10540 {
10541 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10542 functions that already have high pressure on r0. */
10543 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
10544 return 1;
10545
10546 if (mode == SFmode)
10547 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
10548 else
10549 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
10550 }
10551
10552 /* Reorder ready queue if register pressure is high. */
10553 static int
10554 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
10555 int sched_verbose ATTRIBUTE_UNUSED,
10556 rtx *ready,
10557 int *n_readyp,
10558 int clock_var ATTRIBUTE_UNUSED)
10559 {
10560 if (reload_completed)
10561 return sh_issue_rate ();
10562
10563 if (high_pressure (SFmode) || high_pressure (SImode))
10564 {
10565 ready_reorder (ready, *n_readyp);
10566 }
10567
10568 return sh_issue_rate ();
10569 }
10570
10571 /* Skip cycles if the current register pressure is high. */
10572 static int
10573 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
10574 int sched_verbose ATTRIBUTE_UNUSED,
10575 rtx *ready ATTRIBUTE_UNUSED,
10576 int *n_readyp ATTRIBUTE_UNUSED,
10577 int clock_var ATTRIBUTE_UNUSED)
10578 {
10579 if (reload_completed)
10580 return cached_can_issue_more;
10581
10582 if (high_pressure(SFmode) || high_pressure (SImode))
10583 skip_cycles = 1;
10584
10585 return cached_can_issue_more;
10586 }
10587
10588 /* Skip cycles without sorting the ready queue. This will move insn from
10589 Q->R. If this is the last cycle we are skipping; allow sorting of ready
10590 queue by sh_reorder. */
10591
10592 /* Generally, skipping these many cycles are sufficient for all insns to move
10593 from Q -> R. */
10594 #define MAX_SKIPS 8
10595
10596 static int
10597 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
10598 int sched_verbose ATTRIBUTE_UNUSED,
10599 rtx insn ATTRIBUTE_UNUSED,
10600 int last_clock_var,
10601 int clock_var,
10602 int *sort_p)
10603 {
10604 if (reload_completed)
10605 return 0;
10606
10607 if (skip_cycles)
10608 {
10609 if ((clock_var - last_clock_var) < MAX_SKIPS)
10610 {
10611 *sort_p = 0;
10612 return 1;
10613 }
10614 /* If this is the last cycle we are skipping, allow reordering of R. */
10615 if ((clock_var - last_clock_var) == MAX_SKIPS)
10616 {
10617 *sort_p = 1;
10618 return 1;
10619 }
10620 }
10621
10622 skip_cycles = 0;
10623
10624 return 0;
10625 }
10626
10627 /* SHmedia requires registers for branches, so we can't generate new
10628 branches past reload. */
10629 static bool
10630 sh_cannot_modify_jumps_p (void)
10631 {
10632 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
10633 }
10634
10635 static reg_class_t
10636 sh_target_reg_class (void)
10637 {
10638 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
10639 }
10640
10641 static bool
10642 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
10643 {
10644 HARD_REG_SET dummy;
10645 #if 0
10646 rtx insn;
10647 #endif
10648
10649 if (! shmedia_space_reserved_for_target_registers)
10650 return 0;
10651 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
10652 return 0;
10653 if (calc_live_regs (&dummy) >= 6 * 8)
10654 return 1;
10655 return 0;
10656 }
10657
10658 static bool
10659 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
10660 {
10661 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
10662 }
10663 \f
10664 /*
10665 On the SH1..SH4, the trampoline looks like
10666 2 0002 D202 mov.l l2,r2
10667 1 0000 D301 mov.l l1,r3
10668 3 0004 422B jmp @r2
10669 4 0006 0009 nop
10670 5 0008 00000000 l1: .long area
10671 6 000c 00000000 l2: .long function
10672
10673 SH5 (compact) uses r1 instead of r3 for the static chain. */
10674
10675
10676 /* Emit RTL insns to initialize the variable parts of a trampoline.
10677 FNADDR is an RTX for the address of the function's pure code.
10678 CXT is an RTX for the static chain value for the function. */
10679
10680 static void
10681 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
10682 {
10683 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10684 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
10685
10686 if (TARGET_SHMEDIA64)
10687 {
10688 rtx tramp_templ;
10689 int fixed_len;
10690
10691 rtx movi1 = GEN_INT (0xcc000010);
10692 rtx shori1 = GEN_INT (0xc8000010);
10693 rtx src, dst;
10694
10695 /* The following trampoline works within a +- 128 KB range for cxt:
10696 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
10697 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
10698 gettr tr1,r1; blink tr0,r63 */
10699 /* Address rounding makes it hard to compute the exact bounds of the
10700 offset for this trampoline, but we have a rather generous offset
10701 range, so frame_offset should do fine as an upper bound. */
10702 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
10703 {
10704 /* ??? could optimize this trampoline initialization
10705 by writing DImode words with two insns each. */
10706 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
10707 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
10708 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
10709 insn = gen_rtx_AND (DImode, insn, mask);
10710 /* Or in ptb/u .,tr1 pattern */
10711 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
10712 insn = force_operand (insn, NULL_RTX);
10713 insn = gen_lowpart (SImode, insn);
10714 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
10715 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
10716 insn = gen_rtx_AND (DImode, insn, mask);
10717 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
10718 insn = gen_lowpart (SImode, insn);
10719 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
10720 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
10721 insn = gen_rtx_AND (DImode, insn, mask);
10722 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10723 insn = gen_lowpart (SImode, insn);
10724 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
10725 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
10726 insn = gen_rtx_AND (DImode, insn, mask);
10727 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10728 insn = gen_lowpart (SImode, insn);
10729 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
10730 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
10731 insn = gen_rtx_AND (DImode, insn, mask);
10732 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10733 insn = gen_lowpart (SImode, insn);
10734 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
10735 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
10736 GEN_INT (0x6bf10600));
10737 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
10738 GEN_INT (0x4415fc10));
10739 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
10740 GEN_INT (0x4401fff0));
10741 emit_insn (gen_ic_invalidate_line (tramp));
10742 return;
10743 }
10744 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
10745 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
10746
10747 tramp_templ = gen_datalabel_ref (tramp_templ);
10748 dst = tramp_mem;
10749 src = gen_const_mem (BLKmode, tramp_templ);
10750 set_mem_align (dst, 256);
10751 set_mem_align (src, 64);
10752 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
10753
10754 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
10755 emit_move_insn (adjust_address (tramp_mem, Pmode,
10756 fixed_len + GET_MODE_SIZE (Pmode)),
10757 cxt);
10758 emit_insn (gen_ic_invalidate_line (tramp));
10759 return;
10760 }
10761 else if (TARGET_SHMEDIA)
10762 {
10763 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
10764 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
10765 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
10766 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
10767 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
10768 rotated 10 right, and higher 16 bit of every 32 selected. */
10769 rtx movishori
10770 = force_reg (V2HImode, (simplify_gen_subreg
10771 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
10772 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
10773 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
10774
10775 fnaddr = force_reg (SImode, fnaddr);
10776 cxt = force_reg (SImode, cxt);
10777 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
10778 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
10779 movishori));
10780 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
10781 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10782 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
10783 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
10784 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
10785 gen_rtx_SUBREG (V2HImode, cxt, 0),
10786 movishori));
10787 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
10788 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10789 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
10790 if (TARGET_LITTLE_ENDIAN)
10791 {
10792 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
10793 emit_insn (gen_mextr4 (quad2, cxtload, blink));
10794 }
10795 else
10796 {
10797 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
10798 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
10799 }
10800 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
10801 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
10802 emit_insn (gen_ic_invalidate_line (tramp));
10803 return;
10804 }
10805 else if (TARGET_SHCOMPACT)
10806 {
10807 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
10808 return;
10809 }
10810 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
10811 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
10812 SImode));
10813 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
10814 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
10815 SImode));
10816 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
10817 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
10818 if (TARGET_HARVARD)
10819 {
10820 if (!TARGET_INLINE_IC_INVALIDATE
10821 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
10822 emit_library_call (function_symbol (NULL, "__ic_invalidate",
10823 FUNCTION_ORDINARY),
10824 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
10825 else
10826 emit_insn (gen_ic_invalidate_line (tramp));
10827 }
10828 }
10829
10830 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
10831
10832 static rtx
10833 sh_trampoline_adjust_address (rtx tramp)
10834 {
10835 if (TARGET_SHMEDIA)
10836 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
10837 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
10838 return tramp;
10839 }
10840
10841 /* FIXME: This is overly conservative. A SHcompact function that
10842 receives arguments ``by reference'' will have them stored in its
10843 own stack frame, so it must not pass pointers or references to
10844 these arguments to other functions by means of sibling calls. */
10845 /* If PIC, we cannot make sibling calls to global functions
10846 because the PLT requires r12 to be live. */
10847 static bool
10848 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10849 {
10850 return (1
10851 && (! TARGET_SHCOMPACT
10852 || crtl->args.info.stack_regs == 0)
10853 && ! sh_cfun_interrupt_handler_p ()
10854 && (! flag_pic
10855 || (decl && ! TREE_PUBLIC (decl))
10856 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
10857 }
10858 \f
10859 /* Machine specific built-in functions. */
10860
10861 struct builtin_description
10862 {
10863 const enum insn_code icode;
10864 const char *const name;
10865 int signature;
10866 tree fndecl;
10867 };
10868
10869 /* describe number and signedness of arguments; arg[0] == result
10870 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
10871 /* 9: 64-bit pointer, 10: 32-bit pointer */
10872 static const char signature_args[][4] =
10873 {
10874 #define SH_BLTIN_V2SI2 0
10875 { 4, 4 },
10876 #define SH_BLTIN_V4HI2 1
10877 { 4, 4 },
10878 #define SH_BLTIN_V2SI3 2
10879 { 4, 4, 4 },
10880 #define SH_BLTIN_V4HI3 3
10881 { 4, 4, 4 },
10882 #define SH_BLTIN_V8QI3 4
10883 { 4, 4, 4 },
10884 #define SH_BLTIN_MAC_HISI 5
10885 { 1, 4, 4, 1 },
10886 #define SH_BLTIN_SH_HI 6
10887 { 4, 4, 1 },
10888 #define SH_BLTIN_SH_SI 7
10889 { 4, 4, 1 },
10890 #define SH_BLTIN_V4HI2V2SI 8
10891 { 4, 4, 4 },
10892 #define SH_BLTIN_V4HI2V8QI 9
10893 { 4, 4, 4 },
10894 #define SH_BLTIN_SISF 10
10895 { 4, 2 },
10896 #define SH_BLTIN_LDUA_L 11
10897 { 2, 10 },
10898 #define SH_BLTIN_LDUA_Q 12
10899 { 1, 10 },
10900 #define SH_BLTIN_STUA_L 13
10901 { 0, 10, 2 },
10902 #define SH_BLTIN_STUA_Q 14
10903 { 0, 10, 1 },
10904 #define SH_BLTIN_LDUA_L64 15
10905 { 2, 9 },
10906 #define SH_BLTIN_LDUA_Q64 16
10907 { 1, 9 },
10908 #define SH_BLTIN_STUA_L64 17
10909 { 0, 9, 2 },
10910 #define SH_BLTIN_STUA_Q64 18
10911 { 0, 9, 1 },
10912 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
10913 #define SH_BLTIN_2 19
10914 #define SH_BLTIN_SU 19
10915 { 1, 2 },
10916 #define SH_BLTIN_3 20
10917 #define SH_BLTIN_SUS 20
10918 { 2, 2, 1 },
10919 #define SH_BLTIN_PSSV 21
10920 { 0, 8, 2, 2 },
10921 #define SH_BLTIN_XXUU 22
10922 #define SH_BLTIN_UUUU 22
10923 { 1, 1, 1, 1 },
10924 #define SH_BLTIN_PV 23
10925 { 0, 8 },
10926 };
10927 /* mcmv: operands considered unsigned. */
10928 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
10929 /* mperm: control value considered unsigned int. */
10930 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
10931 /* mshards_q: returns signed short. */
10932 /* nsb: takes long long arg, returns unsigned char. */
10933 static struct builtin_description bdesc[] =
10934 {
10935 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
10936 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
10937 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
10938 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
10939 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
10940 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
10941 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
10942 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
10943 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
10944 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
10945 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
10946 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
10947 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
10948 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
10949 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
10950 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
10951 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
10952 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
10953 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
10954 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
10955 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
10956 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
10957 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
10958 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
10959 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
10960 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
10961 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
10962 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
10963 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
10964 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
10965 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
10966 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
10967 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
10968 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
10969 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
10970 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
10971 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
10972 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
10973 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
10974 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
10975 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
10976 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
10977 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
10978 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
10979 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
10980 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
10981 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
10982 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
10983 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
10984 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
10985 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
10986 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
10987 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
10988 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
10989 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
10990 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
10991 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
10992 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
10993 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
10994 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
10995 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
10996 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3, 0 },
10997 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
10998 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
10999 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
11000 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
11001 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
11002 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
11003 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
11004 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
11005 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
11006 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
11007 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
11008 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
11009 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
11010 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
11011 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
11012 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
11013 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
11014 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
11015 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
11016 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
11017 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
11018 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
11019 };
11020
11021 static void
11022 sh_media_init_builtins (void)
11023 {
11024 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
11025 struct builtin_description *d;
11026
11027 memset (shared, 0, sizeof shared);
11028 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
11029 {
11030 tree type, arg_type = 0;
11031 int signature = d->signature;
11032 int i;
11033
11034 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
11035 type = shared[signature];
11036 else
11037 {
11038 int has_result = signature_args[signature][0] != 0;
11039
11040 if ((signature_args[signature][1] & 8)
11041 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
11042 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
11043 continue;
11044 if (! TARGET_FPU_ANY
11045 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
11046 continue;
11047 type = void_list_node;
11048 for (i = 3; ; i--)
11049 {
11050 int arg = signature_args[signature][i];
11051 int opno = i - 1 + has_result;
11052
11053 if (arg & 8)
11054 arg_type = ptr_type_node;
11055 else if (arg)
11056 arg_type = (*lang_hooks.types.type_for_mode)
11057 (insn_data[d->icode].operand[opno].mode,
11058 (arg & 1));
11059 else if (i)
11060 continue;
11061 else
11062 arg_type = void_type_node;
11063 if (i == 0)
11064 break;
11065 type = tree_cons (NULL_TREE, arg_type, type);
11066 }
11067 type = build_function_type (arg_type, type);
11068 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
11069 shared[signature] = type;
11070 }
11071 d->fndecl =
11072 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
11073 NULL, NULL_TREE);
11074 }
11075 }
11076
11077 /* Returns the shmedia builtin decl for CODE. */
11078
11079 static tree
11080 sh_media_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11081 {
11082 if (code >= ARRAY_SIZE (bdesc))
11083 return error_mark_node;
11084
11085 return bdesc[code].fndecl;
11086 }
11087
11088 /* Implements target hook vector_mode_supported_p. */
11089 bool
11090 sh_vector_mode_supported_p (enum machine_mode mode)
11091 {
11092 if (TARGET_FPU_ANY
11093 && ((mode == V2SFmode)
11094 || (mode == V4SFmode)
11095 || (mode == V16SFmode)))
11096 return true;
11097
11098 else if (TARGET_SHMEDIA
11099 && ((mode == V8QImode)
11100 || (mode == V2HImode)
11101 || (mode == V4HImode)
11102 || (mode == V2SImode)))
11103 return true;
11104
11105 return false;
11106 }
11107
11108 bool
11109 sh_frame_pointer_required (void)
11110 {
11111 /* If needed override this in other tm.h files to cope with various OS
11112 lossage requiring a frame pointer. */
11113 if (SUBTARGET_FRAME_POINTER_REQUIRED)
11114 return true;
11115
11116 if (crtl->profile)
11117 return true;
11118
11119 return false;
11120 }
11121
11122 /* Implements target hook dwarf_calling_convention. Return an enum
11123 of dwarf_calling_convention. */
11124 int
11125 sh_dwarf_calling_convention (const_tree func)
11126 {
11127 if (sh_attr_renesas_p (func))
11128 return DW_CC_GNU_renesas_sh;
11129
11130 return DW_CC_normal;
11131 }
11132
11133 static void
11134 sh_init_builtins (void)
11135 {
11136 if (TARGET_SHMEDIA)
11137 sh_media_init_builtins ();
11138 }
11139
11140 /* Returns the sh builtin decl for CODE. */
11141
11142 static tree
11143 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11144 {
11145 if (TARGET_SHMEDIA)
11146 return sh_media_builtin_decl (code, initialize_p);
11147
11148 return error_mark_node;
11149 }
11150
11151 /* Expand an expression EXP that calls a built-in function,
11152 with result going to TARGET if that's convenient
11153 (and in mode MODE if that's convenient).
11154 SUBTARGET may be used as the target for computing one of EXP's operands.
11155 IGNORE is nonzero if the value is to be ignored. */
11156
11157 static rtx
11158 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
11159 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
11160 {
11161 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11162 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
11163 const struct builtin_description *d = &bdesc[fcode];
11164 enum insn_code icode = d->icode;
11165 int signature = d->signature;
11166 enum machine_mode tmode = VOIDmode;
11167 int nop = 0, i;
11168 rtx op[4];
11169 rtx pat = 0;
11170
11171 if (signature_args[signature][0])
11172 {
11173 if (ignore)
11174 return 0;
11175
11176 tmode = insn_data[icode].operand[0].mode;
11177 if (! target
11178 || GET_MODE (target) != tmode
11179 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11180 target = gen_reg_rtx (tmode);
11181 op[nop++] = target;
11182 }
11183 else
11184 target = 0;
11185
11186 for (i = 1; i <= 3; i++, nop++)
11187 {
11188 tree arg;
11189 enum machine_mode opmode, argmode;
11190 tree optype;
11191
11192 if (! signature_args[signature][i])
11193 break;
11194 arg = CALL_EXPR_ARG (exp, i - 1);
11195 if (arg == error_mark_node)
11196 return const0_rtx;
11197 if (signature_args[signature][i] & 8)
11198 {
11199 opmode = ptr_mode;
11200 optype = ptr_type_node;
11201 }
11202 else
11203 {
11204 opmode = insn_data[icode].operand[nop].mode;
11205 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
11206 }
11207 argmode = TYPE_MODE (TREE_TYPE (arg));
11208 if (argmode != opmode)
11209 arg = build1 (NOP_EXPR, optype, arg);
11210 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
11211 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
11212 op[nop] = copy_to_mode_reg (opmode, op[nop]);
11213 }
11214
11215 switch (nop)
11216 {
11217 case 1:
11218 pat = (*insn_data[d->icode].genfun) (op[0]);
11219 break;
11220 case 2:
11221 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
11222 break;
11223 case 3:
11224 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
11225 break;
11226 case 4:
11227 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
11228 break;
11229 default:
11230 gcc_unreachable ();
11231 }
11232 if (! pat)
11233 return 0;
11234 emit_insn (pat);
11235 return target;
11236 }
11237
11238 void
11239 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
11240 {
11241 rtx sel0 = const0_rtx;
11242 rtx sel1 = const1_rtx;
11243 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
11244 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
11245
11246 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
11247 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
11248 }
11249
11250 void
11251 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
11252 {
11253 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
11254
11255 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
11256 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
11257 }
11258
11259 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
11260 We can allow any mode in any general register. The special registers
11261 only allow SImode. Don't allow any mode in the PR.
11262
11263 We cannot hold DCmode values in the XD registers because alter_reg
11264 handles subregs of them incorrectly. We could work around this by
11265 spacing the XD registers like the DR registers, but this would require
11266 additional memory in every compilation to hold larger register vectors.
11267 We could hold SFmode / SCmode values in XD registers, but that
11268 would require a tertiary reload when reloading from / to memory,
11269 and a secondary reload to reload from / to general regs; that
11270 seems to be a loosing proposition.
11271
11272 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
11273 it won't be ferried through GP registers first. */
11274
11275 bool
11276 sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
11277 {
11278 if (SPECIAL_REGISTER_P (regno))
11279 return mode == SImode;
11280
11281 if (regno == FPUL_REG)
11282 return (mode == SImode || mode == SFmode);
11283
11284 if (FP_REGISTER_P (regno) && mode == SFmode)
11285 return true;
11286
11287 if (mode == V2SFmode)
11288 {
11289 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
11290 || GENERAL_REGISTER_P (regno)))
11291 return true;
11292 else
11293 return false;
11294 }
11295
11296 if (mode == V4SFmode)
11297 {
11298 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
11299 || GENERAL_REGISTER_P (regno))
11300 return true;
11301 else
11302 return false;
11303 }
11304
11305 if (mode == V16SFmode)
11306 {
11307 if (TARGET_SHMEDIA)
11308 {
11309 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
11310 return true;
11311 else
11312 return false;
11313 }
11314 else
11315 return regno == FIRST_XD_REG;
11316 }
11317
11318 if (FP_REGISTER_P (regno))
11319 {
11320 if (mode == SFmode
11321 || mode == SImode
11322 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
11323 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
11324 || mode == DCmode
11325 || (TARGET_SHMEDIA
11326 && (mode == DFmode || mode == DImode
11327 || mode == V2SFmode || mode == TImode)))
11328 && ((regno - FIRST_FP_REG) & 1) == 0)
11329 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
11330 && ((regno - FIRST_FP_REG) & 3) == 0))
11331 return true;
11332 else
11333 return false;
11334 }
11335
11336 if (XD_REGISTER_P (regno))
11337 return mode == DFmode;
11338
11339 if (TARGET_REGISTER_P (regno))
11340 return (mode == DImode || mode == SImode || mode == PDImode);
11341
11342 if (regno == PR_REG)
11343 return mode == SImode;
11344
11345 if (regno == FPSCR_REG)
11346 return mode == PSImode;
11347
11348 /* FIXME. This works around PR target/37633 for -O0. */
11349 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
11350 {
11351 unsigned int n = GET_MODE_SIZE (mode) / 8;
11352
11353 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
11354 && regno <= FIRST_GENERAL_REG + 14)
11355 return false;
11356 }
11357
11358 return true;
11359 }
11360
11361 /* Return the class of registers for which a mode change from FROM to TO
11362 is invalid. */
11363 bool
11364 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
11365 enum reg_class rclass)
11366 {
11367 /* We want to enable the use of SUBREGs as a means to
11368 VEC_SELECT a single element of a vector. */
11369 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
11370 return (reg_classes_intersect_p (GENERAL_REGS, rclass));
11371
11372 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
11373 {
11374 if (TARGET_LITTLE_ENDIAN)
11375 {
11376 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
11377 return reg_classes_intersect_p (DF_REGS, rclass);
11378 }
11379 else
11380 {
11381 if (GET_MODE_SIZE (from) < 8)
11382 return reg_classes_intersect_p (DF_HI_REGS, rclass);
11383 }
11384 }
11385 return 0;
11386 }
11387
11388 /* Return true if registers in machine mode MODE will likely be
11389 allocated to registers in small register classes. */
11390
11391 bool
11392 sh_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
11393 {
11394 return (! TARGET_SHMEDIA);
11395 }
11396
11397 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
11398 that label is used. */
11399
11400 void
11401 sh_mark_label (rtx address, int nuses)
11402 {
11403 if (GOTOFF_P (address))
11404 {
11405 /* Extract the label or symbol. */
11406 address = XEXP (address, 0);
11407 if (GET_CODE (address) == PLUS)
11408 address = XEXP (address, 0);
11409 address = XVECEXP (address, 0, 0);
11410 }
11411 if (GET_CODE (address) == LABEL_REF
11412 && LABEL_P (XEXP (address, 0)))
11413 LABEL_NUSES (XEXP (address, 0)) += nuses;
11414 }
11415
11416 /* Compute extra cost of moving data between one register class
11417 and another. */
11418
11419 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
11420 uses this information. Hence, the general register <-> floating point
11421 register information here is not used for SFmode. */
11422
11423 static int
11424 sh_register_move_cost (enum machine_mode mode,
11425 reg_class_t srcclass, reg_class_t dstclass)
11426 {
11427 if (dstclass == T_REGS || dstclass == PR_REGS)
11428 return 10;
11429
11430 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
11431 return 4;
11432
11433 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
11434 && REGCLASS_HAS_FP_REG (srcclass)
11435 && REGCLASS_HAS_FP_REG (dstclass))
11436 return 4;
11437
11438 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
11439 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
11440
11441 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
11442 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
11443 return 9;
11444
11445 if ((REGCLASS_HAS_FP_REG (dstclass)
11446 && REGCLASS_HAS_GENERAL_REG (srcclass))
11447 || (REGCLASS_HAS_GENERAL_REG (dstclass)
11448 && REGCLASS_HAS_FP_REG (srcclass)))
11449 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
11450 * ((GET_MODE_SIZE (mode) + 7) / 8U));
11451
11452 if ((dstclass == FPUL_REGS
11453 && REGCLASS_HAS_GENERAL_REG (srcclass))
11454 || (srcclass == FPUL_REGS
11455 && REGCLASS_HAS_GENERAL_REG (dstclass)))
11456 return 5;
11457
11458 if ((dstclass == FPUL_REGS
11459 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
11460 || (srcclass == FPUL_REGS
11461 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
11462 return 7;
11463
11464 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11465 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11466 return 20;
11467
11468 /* ??? ptabs faults on (value & 0x3) == 0x3 */
11469 if (TARGET_SHMEDIA
11470 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
11471 {
11472 if (sh_gettrcost >= 0)
11473 return sh_gettrcost;
11474 else if (!TARGET_PT_FIXED)
11475 return 100;
11476 }
11477
11478 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11479 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11480 return 4;
11481
11482 if (TARGET_SHMEDIA
11483 || (TARGET_FMOVD
11484 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
11485 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
11486 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
11487
11488 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
11489 }
11490
11491 static rtx emit_load_ptr (rtx, rtx);
11492
11493 static rtx
11494 emit_load_ptr (rtx reg, rtx addr)
11495 {
11496 rtx mem = gen_const_mem (ptr_mode, addr);
11497
11498 if (Pmode != ptr_mode)
11499 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
11500 return emit_move_insn (reg, mem);
11501 }
11502
11503 static void
11504 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11505 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11506 tree function)
11507 {
11508 CUMULATIVE_ARGS cum;
11509 int structure_value_byref = 0;
11510 rtx this_rtx, this_value, sibcall, insns, funexp;
11511 tree funtype = TREE_TYPE (function);
11512 int simple_add = CONST_OK_FOR_ADD (delta);
11513 int did_load = 0;
11514 rtx scratch0, scratch1, scratch2;
11515 unsigned i;
11516
11517 reload_completed = 1;
11518 epilogue_completed = 1;
11519 current_function_uses_only_leaf_regs = 1;
11520
11521 emit_note (NOTE_INSN_PROLOGUE_END);
11522
11523 /* Find the "this" pointer. We have such a wide range of ABIs for the
11524 SH that it's best to do this completely machine independently.
11525 "this" is passed as first argument, unless a structure return pointer
11526 comes first, in which case "this" comes second. */
11527 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
11528 #ifndef PCC_STATIC_STRUCT_RETURN
11529 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11530 structure_value_byref = 1;
11531 #endif /* not PCC_STATIC_STRUCT_RETURN */
11532 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
11533 {
11534 tree ptype = build_pointer_type (TREE_TYPE (funtype));
11535
11536 sh_function_arg_advance (&cum, Pmode, ptype, true);
11537 }
11538 this_rtx = sh_function_arg (&cum, Pmode, ptr_type_node, true);
11539
11540 /* For SHcompact, we only have r0 for a scratch register: r1 is the
11541 static chain pointer (even if you can't have nested virtual functions
11542 right now, someone might implement them sometime), and the rest of the
11543 registers are used for argument passing, are callee-saved, or reserved. */
11544 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
11545 -ffixed-reg has been used. */
11546 if (! call_used_regs[0] || fixed_regs[0])
11547 error ("r0 needs to be available as a call-clobbered register");
11548 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
11549 if (! TARGET_SH5)
11550 {
11551 if (call_used_regs[1] && ! fixed_regs[1])
11552 scratch1 = gen_rtx_REG (ptr_mode, 1);
11553 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
11554 pointing where to return struct values. */
11555 if (call_used_regs[3] && ! fixed_regs[3])
11556 scratch2 = gen_rtx_REG (Pmode, 3);
11557 }
11558 else if (TARGET_SHMEDIA)
11559 {
11560 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
11561 if (i != REGNO (scratch0) &&
11562 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
11563 {
11564 scratch1 = gen_rtx_REG (ptr_mode, i);
11565 break;
11566 }
11567 if (scratch1 == scratch0)
11568 error ("Need a second call-clobbered general purpose register");
11569 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
11570 if (call_used_regs[i] && ! fixed_regs[i])
11571 {
11572 scratch2 = gen_rtx_REG (Pmode, i);
11573 break;
11574 }
11575 if (scratch2 == scratch0)
11576 error ("Need a call-clobbered target register");
11577 }
11578
11579 this_value = plus_constant (this_rtx, delta);
11580 if (vcall_offset
11581 && (simple_add || scratch0 != scratch1)
11582 && strict_memory_address_p (ptr_mode, this_value))
11583 {
11584 emit_load_ptr (scratch0, this_value);
11585 did_load = 1;
11586 }
11587
11588 if (!delta)
11589 ; /* Do nothing. */
11590 else if (simple_add)
11591 emit_move_insn (this_rtx, this_value);
11592 else
11593 {
11594 emit_move_insn (scratch1, GEN_INT (delta));
11595 emit_insn (gen_add2_insn (this_rtx, scratch1));
11596 }
11597
11598 if (vcall_offset)
11599 {
11600 rtx offset_addr;
11601
11602 if (!did_load)
11603 emit_load_ptr (scratch0, this_rtx);
11604
11605 offset_addr = plus_constant (scratch0, vcall_offset);
11606 if (strict_memory_address_p (ptr_mode, offset_addr))
11607 ; /* Do nothing. */
11608 else if (! TARGET_SH5 && scratch0 != scratch1)
11609 {
11610 /* scratch0 != scratch1, and we have indexed loads. Get better
11611 schedule by loading the offset into r1 and using an indexed
11612 load - then the load of r1 can issue before the load from
11613 (this_rtx + delta) finishes. */
11614 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11615 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
11616 }
11617 else if (CONST_OK_FOR_ADD (vcall_offset))
11618 {
11619 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
11620 offset_addr = scratch0;
11621 }
11622 else if (scratch0 != scratch1)
11623 {
11624 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11625 emit_insn (gen_add2_insn (scratch0, scratch1));
11626 offset_addr = scratch0;
11627 }
11628 else
11629 gcc_unreachable (); /* FIXME */
11630 emit_load_ptr (scratch0, offset_addr);
11631
11632 if (Pmode != ptr_mode)
11633 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
11634 emit_insn (gen_add2_insn (this_rtx, scratch0));
11635 }
11636
11637 /* Generate a tail call to the target function. */
11638 if (! TREE_USED (function))
11639 {
11640 assemble_external (function);
11641 TREE_USED (function) = 1;
11642 }
11643 funexp = XEXP (DECL_RTL (function), 0);
11644 /* If the function is overridden, so is the thunk, hence we don't
11645 need GOT addressing even if this is a public symbol. */
11646 #if 0
11647 if (TARGET_SH1 && ! flag_weak)
11648 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
11649 else
11650 #endif
11651 if (TARGET_SH2 && flag_pic)
11652 {
11653 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
11654 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
11655 }
11656 else
11657 {
11658 if (TARGET_SHMEDIA && flag_pic)
11659 {
11660 funexp = gen_sym2PIC (funexp);
11661 PUT_MODE (funexp, Pmode);
11662 }
11663 emit_move_insn (scratch2, funexp);
11664 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
11665 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
11666 }
11667 sibcall = emit_call_insn (sibcall);
11668 SIBLING_CALL_P (sibcall) = 1;
11669 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
11670 emit_barrier ();
11671
11672 /* Run just enough of rest_of_compilation to do scheduling and get
11673 the insns emitted. Note that use_thunk calls
11674 assemble_start_function and assemble_end_function. */
11675
11676 insn_locators_alloc ();
11677 insns = get_insns ();
11678
11679 if (optimize > 0)
11680 {
11681 if (! cfun->cfg)
11682 init_flow (cfun);
11683 split_all_insns_noflow ();
11684 }
11685
11686 sh_reorg ();
11687
11688 if (optimize > 0 && flag_delayed_branch)
11689 dbr_schedule (insns);
11690
11691 shorten_branches (insns);
11692 final_start_function (insns, file, 1);
11693 final (insns, file, 1);
11694 final_end_function ();
11695
11696 reload_completed = 0;
11697 epilogue_completed = 0;
11698 }
11699
11700 rtx
11701 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
11702 {
11703 rtx sym;
11704
11705 /* If this is not an ordinary function, the name usually comes from a
11706 string literal or an sprintf buffer. Make sure we use the same
11707 string consistently, so that cse will be able to unify address loads. */
11708 if (kind != FUNCTION_ORDINARY)
11709 name = IDENTIFIER_POINTER (get_identifier (name));
11710 sym = gen_rtx_SYMBOL_REF (Pmode, name);
11711 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
11712 if (flag_pic)
11713 switch (kind)
11714 {
11715 case FUNCTION_ORDINARY:
11716 break;
11717 case SFUNC_GOT:
11718 {
11719 rtx reg = target ? target : gen_reg_rtx (Pmode);
11720
11721 emit_insn (gen_symGOT2reg (reg, sym));
11722 sym = reg;
11723 break;
11724 }
11725 case SFUNC_STATIC:
11726 {
11727 /* ??? To allow cse to work, we use GOTOFF relocations.
11728 we could add combiner patterns to transform this into
11729 straight pc-relative calls with sym2PIC / bsrf when
11730 label load and function call are still 1:1 and in the
11731 same basic block during combine. */
11732 rtx reg = target ? target : gen_reg_rtx (Pmode);
11733
11734 emit_insn (gen_symGOTOFF2reg (reg, sym));
11735 sym = reg;
11736 break;
11737 }
11738 }
11739 if (target && sym != target)
11740 {
11741 emit_move_insn (target, sym);
11742 return target;
11743 }
11744 return sym;
11745 }
11746
11747 /* Find the number of a general purpose register in S. */
11748 static int
11749 scavenge_reg (HARD_REG_SET *s)
11750 {
11751 int r;
11752 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
11753 if (TEST_HARD_REG_BIT (*s, r))
11754 return r;
11755 return -1;
11756 }
11757
11758 rtx
11759 sh_get_pr_initial_val (void)
11760 {
11761 rtx val;
11762
11763 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
11764 PR register on SHcompact, because it might be clobbered by the prologue.
11765 We check first if that is known to be the case. */
11766 if (TARGET_SHCOMPACT
11767 && ((crtl->args.info.call_cookie
11768 & ~ CALL_COOKIE_RET_TRAMP (1))
11769 || crtl->saves_all_registers))
11770 return gen_frame_mem (SImode, return_address_pointer_rtx);
11771
11772 /* If we haven't finished rtl generation, there might be a nonlocal label
11773 that we haven't seen yet.
11774 ??? get_hard_reg_initial_val fails if it is called after register
11775 allocation has started, unless it has been called before for the
11776 same register. And even then, we end in trouble if we didn't use
11777 the register in the same basic block before. So call
11778 get_hard_reg_initial_val now and wrap it in an unspec if we might
11779 need to replace it. */
11780 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
11781 combine can put the pseudo returned by get_hard_reg_initial_val into
11782 instructions that need a general purpose registers, which will fail to
11783 be recognized when the pseudo becomes allocated to PR. */
11784 val
11785 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
11786 if (TARGET_SH1)
11787 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
11788 return val;
11789 }
11790
11791 int
11792 sh_expand_t_scc (rtx operands[])
11793 {
11794 enum rtx_code code = GET_CODE (operands[1]);
11795 rtx target = operands[0];
11796 rtx op0 = operands[2];
11797 rtx op1 = operands[3];
11798 rtx result = target;
11799 HOST_WIDE_INT val;
11800
11801 if (!REG_P (op0) || REGNO (op0) != T_REG
11802 || !CONST_INT_P (op1))
11803 return 0;
11804 if (!REG_P (result))
11805 result = gen_reg_rtx (SImode);
11806 val = INTVAL (op1);
11807 if ((code == EQ && val == 1) || (code == NE && val == 0))
11808 emit_insn (gen_movt (result));
11809 else if (TARGET_SH2A && ((code == EQ && val == 0)
11810 || (code == NE && val == 1)))
11811 emit_insn (gen_xorsi3_movrt (result));
11812 else if ((code == EQ && val == 0) || (code == NE && val == 1))
11813 {
11814 emit_clobber (result);
11815 emit_insn (gen_subc (result, result, result));
11816 emit_insn (gen_addsi3 (result, result, const1_rtx));
11817 }
11818 else if (code == EQ || code == NE)
11819 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
11820 else
11821 return 0;
11822 if (result != target)
11823 emit_move_insn (target, result);
11824 return 1;
11825 }
11826
11827 /* INSN is an sfunc; return the rtx that describes the address used. */
11828 static rtx
11829 extract_sfunc_addr (rtx insn)
11830 {
11831 rtx pattern, part = NULL_RTX;
11832 int len, i;
11833
11834 pattern = PATTERN (insn);
11835 len = XVECLEN (pattern, 0);
11836 for (i = 0; i < len; i++)
11837 {
11838 part = XVECEXP (pattern, 0, i);
11839 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
11840 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
11841 return XEXP (part, 0);
11842 }
11843 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
11844 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
11845 }
11846
11847 /* Verify that the register in use_sfunc_addr still agrees with the address
11848 used in the sfunc. This prevents fill_slots_from_thread from changing
11849 use_sfunc_addr.
11850 INSN is the use_sfunc_addr instruction, and REG is the register it
11851 guards. */
11852 int
11853 check_use_sfunc_addr (rtx insn, rtx reg)
11854 {
11855 /* Search for the sfunc. It should really come right after INSN. */
11856 while ((insn = NEXT_INSN (insn)))
11857 {
11858 if (LABEL_P (insn) || JUMP_P (insn))
11859 break;
11860 if (! INSN_P (insn))
11861 continue;
11862
11863 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
11864 insn = XVECEXP (PATTERN (insn), 0, 0);
11865 if (GET_CODE (PATTERN (insn)) != PARALLEL
11866 || get_attr_type (insn) != TYPE_SFUNC)
11867 continue;
11868 return rtx_equal_p (extract_sfunc_addr (insn), reg);
11869 }
11870 gcc_unreachable ();
11871 }
11872
11873 /* This function returns a constant rtx that represents pi / 2**15 in
11874 SFmode. it's used to scale SFmode angles, in radians, to a
11875 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11876 maps to 0x10000). */
11877
11878 static GTY(()) rtx sh_fsca_sf2int_rtx;
11879
11880 rtx
11881 sh_fsca_sf2int (void)
11882 {
11883 if (! sh_fsca_sf2int_rtx)
11884 {
11885 REAL_VALUE_TYPE rv;
11886
11887 real_from_string (&rv, "10430.378350470453");
11888 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
11889 }
11890
11891 return sh_fsca_sf2int_rtx;
11892 }
11893
11894 /* This function returns a constant rtx that represents pi / 2**15 in
11895 DFmode. it's used to scale DFmode angles, in radians, to a
11896 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11897 maps to 0x10000). */
11898
11899 static GTY(()) rtx sh_fsca_df2int_rtx;
11900
11901 rtx
11902 sh_fsca_df2int (void)
11903 {
11904 if (! sh_fsca_df2int_rtx)
11905 {
11906 REAL_VALUE_TYPE rv;
11907
11908 real_from_string (&rv, "10430.378350470453");
11909 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
11910 }
11911
11912 return sh_fsca_df2int_rtx;
11913 }
11914
11915 /* This function returns a constant rtx that represents 2**15 / pi in
11916 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
11917 of a full circle back to a SFmode value, i.e., 0x10000 maps to
11918 2*pi). */
11919
11920 static GTY(()) rtx sh_fsca_int2sf_rtx;
11921
11922 rtx
11923 sh_fsca_int2sf (void)
11924 {
11925 if (! sh_fsca_int2sf_rtx)
11926 {
11927 REAL_VALUE_TYPE rv;
11928
11929 real_from_string (&rv, "9.587379924285257e-5");
11930 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
11931 }
11932
11933 return sh_fsca_int2sf_rtx;
11934 }
11935
11936 /* Initialize the CUMULATIVE_ARGS structure. */
11937
11938 void
11939 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
11940 tree fntype,
11941 rtx libname ATTRIBUTE_UNUSED,
11942 tree fndecl,
11943 signed int n_named_args,
11944 enum machine_mode mode)
11945 {
11946 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
11947 pcum->free_single_fp_reg = 0;
11948 pcum->stack_regs = 0;
11949 pcum->byref_regs = 0;
11950 pcum->byref = 0;
11951 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
11952
11953 /* XXX - Should we check TARGET_HITACHI here ??? */
11954 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
11955
11956 if (fntype)
11957 {
11958 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
11959 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
11960 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
11961 pcum->arg_count [(int) SH_ARG_INT]
11962 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
11963
11964 pcum->call_cookie
11965 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11966 && pcum->arg_count [(int) SH_ARG_INT] == 0
11967 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
11968 ? int_size_in_bytes (TREE_TYPE (fntype))
11969 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
11970 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
11971 == FIRST_RET_REG));
11972 }
11973 else
11974 {
11975 pcum->arg_count [(int) SH_ARG_INT] = 0;
11976 pcum->prototype_p = FALSE;
11977 if (mode != VOIDmode)
11978 {
11979 pcum->call_cookie =
11980 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11981 && GET_MODE_SIZE (mode) > 4
11982 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
11983
11984 /* If the default ABI is the Renesas ABI then all library
11985 calls must assume that the library will be using the
11986 Renesas ABI. So if the function would return its result
11987 in memory then we must force the address of this memory
11988 block onto the stack. Ideally we would like to call
11989 targetm.calls.return_in_memory() here but we do not have
11990 the TYPE or the FNDECL available so we synthesize the
11991 contents of that function as best we can. */
11992 pcum->force_mem =
11993 (TARGET_DEFAULT & MASK_HITACHI)
11994 && (mode == BLKmode
11995 || (GET_MODE_SIZE (mode) > 4
11996 && !(mode == DFmode
11997 && TARGET_FPU_DOUBLE)));
11998 }
11999 else
12000 {
12001 pcum->call_cookie = 0;
12002 pcum->force_mem = FALSE;
12003 }
12004 }
12005 }
12006
12007 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
12008 not enter into CONST_DOUBLE for the replace.
12009
12010 Note that copying is not done so X must not be shared unless all copies
12011 are to be modified.
12012
12013 This is like replace_rtx, except that we operate on N_REPLACEMENTS
12014 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
12015 replacements[n*2+1] - and that we take mode changes into account.
12016
12017 If a replacement is ambiguous, return NULL_RTX.
12018
12019 If MODIFY is zero, don't modify any rtl in place,
12020 just return zero or nonzero for failure / success. */
12021
12022 rtx
12023 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
12024 {
12025 int i, j;
12026 const char *fmt;
12027
12028 /* The following prevents loops occurrence when we change MEM in
12029 CONST_DOUBLE onto the same CONST_DOUBLE. */
12030 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
12031 return x;
12032
12033 for (i = n_replacements - 1; i >= 0 ; i--)
12034 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
12035 return replacements[i*2+1];
12036
12037 /* Allow this function to make replacements in EXPR_LISTs. */
12038 if (x == 0)
12039 return 0;
12040
12041 if (GET_CODE (x) == SUBREG)
12042 {
12043 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
12044 n_replacements, modify);
12045
12046 if (CONST_INT_P (new_rtx))
12047 {
12048 x = simplify_subreg (GET_MODE (x), new_rtx,
12049 GET_MODE (SUBREG_REG (x)),
12050 SUBREG_BYTE (x));
12051 if (! x)
12052 abort ();
12053 }
12054 else if (modify)
12055 SUBREG_REG (x) = new_rtx;
12056
12057 return x;
12058 }
12059 else if (REG_P (x))
12060 {
12061 unsigned regno = REGNO (x);
12062 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
12063 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
12064 rtx result = NULL_RTX;
12065
12066 for (i = n_replacements - 1; i >= 0; i--)
12067 {
12068 rtx from = replacements[i*2];
12069 rtx to = replacements[i*2+1];
12070 unsigned from_regno, from_nregs, to_regno, new_regno;
12071
12072 if (!REG_P (from))
12073 continue;
12074 from_regno = REGNO (from);
12075 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
12076 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
12077 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
12078 {
12079 if (regno < from_regno
12080 || regno + nregs > from_regno + nregs
12081 || !REG_P (to)
12082 || result)
12083 return NULL_RTX;
12084 to_regno = REGNO (to);
12085 if (to_regno < FIRST_PSEUDO_REGISTER)
12086 {
12087 new_regno = regno + to_regno - from_regno;
12088 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
12089 != nregs)
12090 return NULL_RTX;
12091 result = gen_rtx_REG (GET_MODE (x), new_regno);
12092 }
12093 else if (GET_MODE (x) <= GET_MODE (to))
12094 result = gen_lowpart_common (GET_MODE (x), to);
12095 else
12096 result = gen_lowpart_SUBREG (GET_MODE (x), to);
12097 }
12098 }
12099 return result ? result : x;
12100 }
12101 else if (GET_CODE (x) == ZERO_EXTEND)
12102 {
12103 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
12104 n_replacements, modify);
12105
12106 if (CONST_INT_P (new_rtx))
12107 {
12108 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
12109 new_rtx, GET_MODE (XEXP (x, 0)));
12110 if (! x)
12111 abort ();
12112 }
12113 else if (modify)
12114 XEXP (x, 0) = new_rtx;
12115
12116 return x;
12117 }
12118
12119 fmt = GET_RTX_FORMAT (GET_CODE (x));
12120 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12121 {
12122 rtx new_rtx;
12123
12124 if (fmt[i] == 'e')
12125 {
12126 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
12127 n_replacements, modify);
12128 if (!new_rtx)
12129 return NULL_RTX;
12130 if (modify)
12131 XEXP (x, i) = new_rtx;
12132 }
12133 else if (fmt[i] == 'E')
12134 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12135 {
12136 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
12137 n_replacements, modify);
12138 if (!new_rtx)
12139 return NULL_RTX;
12140 if (modify)
12141 XVECEXP (x, i, j) = new_rtx;
12142 }
12143 }
12144
12145 return x;
12146 }
12147
12148 rtx
12149 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
12150 {
12151 enum rtx_code code = TRUNCATE;
12152
12153 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
12154 {
12155 rtx inner = XEXP (x, 0);
12156 enum machine_mode inner_mode = GET_MODE (inner);
12157
12158 if (inner_mode == mode)
12159 return inner;
12160 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
12161 x = inner;
12162 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
12163 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
12164 {
12165 code = GET_CODE (x);
12166 x = inner;
12167 }
12168 }
12169 return gen_rtx_fmt_e (code, mode, x);
12170 }
12171
12172 /* called via for_each_rtx after reload, to clean up truncates of
12173 registers that span multiple actual hard registers. */
12174 int
12175 shmedia_cleanup_truncate (rtx *p, void *n_changes)
12176 {
12177 rtx x = *p, reg;
12178
12179 if (GET_CODE (x) != TRUNCATE)
12180 return 0;
12181 reg = XEXP (x, 0);
12182 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && REG_P (reg))
12183 {
12184 enum machine_mode reg_mode = GET_MODE (reg);
12185 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
12186 subreg_lowpart_offset (DImode, reg_mode));
12187 *(int*) n_changes += 1;
12188 return -1;
12189 }
12190 return 0;
12191 }
12192
12193 /* Load and store depend on the highpart of the address. However,
12194 set_attr_alternative does not give well-defined results before reload,
12195 so we must look at the rtl ourselves to see if any of the feeding
12196 registers is used in a memref. */
12197
12198 /* Called by sh_contains_memref_p via for_each_rtx. */
12199 static int
12200 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
12201 {
12202 return (MEM_P (*loc));
12203 }
12204
12205 /* Return nonzero iff INSN contains a MEM. */
12206 int
12207 sh_contains_memref_p (rtx insn)
12208 {
12209 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
12210 }
12211
12212 /* Return nonzero iff INSN loads a banked register. */
12213 int
12214 sh_loads_bankedreg_p (rtx insn)
12215 {
12216 if (GET_CODE (PATTERN (insn)) == SET)
12217 {
12218 rtx op = SET_DEST (PATTERN(insn));
12219 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
12220 return 1;
12221 }
12222
12223 return 0;
12224 }
12225
12226 /* FNADDR is the MEM expression from a call expander. Return an address
12227 to use in an SHmedia insn pattern. */
12228 rtx
12229 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
12230 {
12231 int is_sym;
12232
12233 fnaddr = XEXP (fnaddr, 0);
12234 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
12235 if (flag_pic && is_sym)
12236 {
12237 if (! SYMBOL_REF_LOCAL_P (fnaddr))
12238 {
12239 rtx reg = gen_reg_rtx (Pmode);
12240
12241 /* We must not use GOTPLT for sibcalls, because PIC_REG
12242 must be restored before the PLT code gets to run. */
12243 if (is_sibcall)
12244 emit_insn (gen_symGOT2reg (reg, fnaddr));
12245 else
12246 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
12247 fnaddr = reg;
12248 }
12249 else
12250 {
12251 fnaddr = gen_sym2PIC (fnaddr);
12252 PUT_MODE (fnaddr, Pmode);
12253 }
12254 }
12255 /* If ptabs might trap, make this visible to the rest of the compiler.
12256 We generally assume that symbols pertain to valid locations, but
12257 it is possible to generate invalid symbols with asm or linker tricks.
12258 In a list of functions where each returns its successor, an invalid
12259 symbol might denote an empty list. */
12260 if (!TARGET_PT_FIXED
12261 && (!is_sym || TARGET_INVALID_SYMBOLS)
12262 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
12263 {
12264 rtx tr = gen_reg_rtx (PDImode);
12265
12266 emit_insn (gen_ptabs (tr, fnaddr));
12267 fnaddr = tr;
12268 }
12269 else if (! target_reg_operand (fnaddr, Pmode))
12270 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
12271 return fnaddr;
12272 }
12273
12274 reg_class_t
12275 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12276 enum machine_mode mode, secondary_reload_info *sri)
12277 {
12278 enum reg_class rclass = (enum reg_class) rclass_i;
12279
12280 if (in_p)
12281 {
12282 if (REGCLASS_HAS_FP_REG (rclass)
12283 && ! TARGET_SHMEDIA
12284 && immediate_operand ((x), mode)
12285 && ! ((fp_zero_operand (x) || fp_one_operand (x))
12286 && mode == SFmode && fldi_ok ()))
12287 switch (mode)
12288 {
12289 case SFmode:
12290 sri->icode = CODE_FOR_reload_insf__frn;
12291 return NO_REGS;
12292 case DFmode:
12293 sri->icode = CODE_FOR_reload_indf__frn;
12294 return NO_REGS;
12295 case SImode:
12296 /* ??? If we knew that we are in the appropriate mode -
12297 single precision - we could use a reload pattern directly. */
12298 return FPUL_REGS;
12299 default:
12300 abort ();
12301 }
12302 if (rclass == FPUL_REGS
12303 && ((REG_P (x)
12304 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
12305 || REGNO (x) == T_REG))
12306 || GET_CODE (x) == PLUS))
12307 return GENERAL_REGS;
12308 if (rclass == FPUL_REGS && immediate_operand (x, mode))
12309 {
12310 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
12311 return GENERAL_REGS;
12312 else if (mode == SFmode)
12313 return FP_REGS;
12314 sri->icode = CODE_FOR_reload_insi__i_fpul;
12315 return NO_REGS;
12316 }
12317 if (rclass == FPSCR_REGS
12318 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
12319 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
12320 return GENERAL_REGS;
12321 if (REGCLASS_HAS_FP_REG (rclass)
12322 && TARGET_SHMEDIA
12323 && immediate_operand (x, mode)
12324 && x != CONST0_RTX (GET_MODE (x))
12325 && GET_MODE (x) != V4SFmode)
12326 return GENERAL_REGS;
12327 if ((mode == QImode || mode == HImode)
12328 && TARGET_SHMEDIA && inqhi_operand (x, mode))
12329 {
12330 sri->icode = ((mode == QImode)
12331 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
12332 return NO_REGS;
12333 }
12334 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
12335 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
12336 return TARGET_REGS;
12337 } /* end of input-only processing. */
12338
12339 if (((REGCLASS_HAS_FP_REG (rclass)
12340 && (REG_P (x)
12341 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
12342 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
12343 && TARGET_FMOVD))))
12344 || (REGCLASS_HAS_GENERAL_REG (rclass)
12345 && REG_P (x)
12346 && FP_REGISTER_P (REGNO (x))))
12347 && ! TARGET_SHMEDIA
12348 && (mode == SFmode || mode == SImode))
12349 return FPUL_REGS;
12350 if ((rclass == FPUL_REGS
12351 || (REGCLASS_HAS_FP_REG (rclass)
12352 && ! TARGET_SHMEDIA && mode == SImode))
12353 && (MEM_P (x)
12354 || (REG_P (x)
12355 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
12356 || REGNO (x) == T_REG
12357 || system_reg_operand (x, VOIDmode)))))
12358 {
12359 if (rclass == FPUL_REGS)
12360 return GENERAL_REGS;
12361 return FPUL_REGS;
12362 }
12363 if ((rclass == TARGET_REGS
12364 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
12365 && !satisfies_constraint_Csy (x)
12366 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
12367 return GENERAL_REGS;
12368 if ((rclass == MAC_REGS || rclass == PR_REGS)
12369 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
12370 && rclass != REGNO_REG_CLASS (REGNO (x)))
12371 return GENERAL_REGS;
12372 if (rclass != GENERAL_REGS && REG_P (x)
12373 && TARGET_REGISTER_P (REGNO (x)))
12374 return GENERAL_REGS;
12375 return NO_REGS;
12376 }
12377
12378 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
12379
12380 #include "gt-sh.h"