1c201bbe6a396136542536b896623e0aae264870
[gcc.git] / gcc / config / sh / sh.c
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Steve Chamberlain (sac@cygnus.com).
6 Improved by Jim Wilson (wilson@cygnus.com).
7
8 This file is part of GCC.
9
10 GCC is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
14
15 GCC is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "insn-config.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "flags.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "reload.h"
35 #include "function.h"
36 #include "regs.h"
37 #include "hard-reg-set.h"
38 #include "output.h"
39 #include "insn-attr.h"
40 #include "diagnostic-core.h"
41 #include "toplev.h"
42 #include "recog.h"
43 #include "integrate.h"
44 #include "dwarf2.h"
45 #include "tm_p.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
50 #include "df.h"
51 #include "cfglayout.h"
52 #include "intl.h"
53 #include "sched-int.h"
54 #include "params.h"
55 #include "ggc.h"
56 #include "gimple.h"
57 #include "cfgloop.h"
58 #include "alloc-pool.h"
59 #include "tm-constrs.h"
60
61
62 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
63
64 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
65 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
66
67 /* These are some macros to abstract register modes. */
68 #define CONST_OK_FOR_ADD(size) \
69 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
70 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
71 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
72 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
73
74 /* Used to simplify the logic below. Find the attributes wherever
75 they may be. */
76 #define SH_ATTRIBUTES(decl) \
77 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
78 : DECL_ATTRIBUTES (decl) \
79 ? (DECL_ATTRIBUTES (decl)) \
80 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
81
82 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
83 int current_function_interrupt;
84
85 tree sh_deferred_function_attributes;
86 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
87
88 /* Global variables for machine-dependent things. */
89
90 /* Which cpu are we scheduling for. */
91 enum processor_type sh_cpu;
92
93 /* Definitions used in ready queue reordering for first scheduling pass. */
94
95 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
96 static short *regmode_weight[2];
97
98 /* Total SFmode and SImode weights of scheduled insns. */
99 static int curr_regmode_pressure[2];
100
101 /* Number of r0 life regions. */
102 static int r0_life_regions;
103
104 /* If true, skip cycles for Q -> R movement. */
105 static int skip_cycles = 0;
106
107 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
108 and returned from sh_reorder2. */
109 static short cached_can_issue_more;
110
111 /* Unique number for UNSPEC_BBR pattern. */
112 static unsigned int unspec_bbr_uid = 1;
113
114 /* Provides the class number of the smallest class containing
115 reg number. */
116
117 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
118 {
119 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
152 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
153 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
154 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
155 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
156 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
157 GENERAL_REGS, GENERAL_REGS,
158 };
159
160 char sh_register_names[FIRST_PSEUDO_REGISTER] \
161 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
162
163 char sh_additional_register_names[ADDREGNAMES_SIZE] \
164 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
165 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
166
167 int assembler_dialect;
168
169 static bool shmedia_space_reserved_for_target_registers;
170
171 static bool sh_handle_option (size_t, const char *, int);
172 static void split_branches (rtx);
173 static int branch_dest (rtx);
174 static void force_into (rtx, rtx);
175 static void print_slot (rtx);
176 static rtx add_constant (rtx, enum machine_mode, rtx);
177 static void dump_table (rtx, rtx);
178 static int hi_const (rtx);
179 static int broken_move (rtx);
180 static int mova_p (rtx);
181 static rtx find_barrier (int, rtx, rtx);
182 static int noncall_uses_reg (rtx, rtx, rtx *);
183 static rtx gen_block_redirect (rtx, int, int);
184 static void sh_reorg (void);
185 static void sh_option_override (void);
186 static void sh_option_optimization (int, int);
187 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
188 static rtx frame_insn (rtx);
189 static rtx push (int);
190 static void pop (int);
191 static void push_regs (HARD_REG_SET *, int);
192 static int calc_live_regs (HARD_REG_SET *);
193 static HOST_WIDE_INT rounded_frame_size (int);
194 static bool sh_frame_pointer_required (void);
195 static rtx mark_constant_pool_use (rtx);
196 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
197 static tree sh_handle_resbank_handler_attribute (tree *, tree,
198 tree, int, bool *);
199 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
200 tree, int, bool *);
201 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
202 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
203 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
204 static void sh_print_operand (FILE *, rtx, int);
205 static void sh_print_operand_address (FILE *, rtx);
206 static bool sh_print_operand_punct_valid_p (unsigned char code);
207 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
208 static void sh_insert_attributes (tree, tree *);
209 static const char *sh_check_pch_target_flags (int);
210 static int sh_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
211 static int sh_adjust_cost (rtx, rtx, rtx, int);
212 static int sh_issue_rate (void);
213 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
214 static short find_set_regmode_weight (rtx, enum machine_mode);
215 static short find_insn_regmode_weight (rtx, enum machine_mode);
216 static void find_regmode_weight (basic_block, enum machine_mode);
217 static int find_r0_life_regions (basic_block);
218 static void sh_md_init_global (FILE *, int, int);
219 static void sh_md_finish_global (FILE *, int);
220 static int rank_for_reorder (const void *, const void *);
221 static void swap_reorder (rtx *, int);
222 static void ready_reorder (rtx *, int);
223 static short high_pressure (enum machine_mode);
224 static int sh_reorder (FILE *, int, rtx *, int *, int);
225 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
226 static void sh_md_init (FILE *, int, int);
227 static int sh_variable_issue (FILE *, int, rtx, int);
228
229 static bool sh_function_ok_for_sibcall (tree, tree);
230
231 static bool sh_cannot_modify_jumps_p (void);
232 static reg_class_t sh_target_reg_class (void);
233 static bool sh_optimize_target_register_callee_saved (bool);
234 static bool sh_ms_bitfield_layout_p (const_tree);
235
236 static void sh_init_builtins (void);
237 static tree sh_builtin_decl (unsigned, bool);
238 static void sh_media_init_builtins (void);
239 static tree sh_media_builtin_decl (unsigned, bool);
240 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
241 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
242 static void sh_file_start (void);
243 static int flow_dependent_p (rtx, rtx);
244 static void flow_dependent_p_1 (rtx, const_rtx, void *);
245 static int shiftcosts (rtx);
246 static int andcosts (rtx);
247 static int addsubcosts (rtx);
248 static int multcosts (rtx);
249 static bool unspec_caller_rtx_p (rtx);
250 static bool sh_cannot_copy_insn_p (rtx);
251 static bool sh_rtx_costs (rtx, int, int, int *, bool);
252 static int sh_address_cost (rtx, bool);
253 static int sh_pr_n_sets (void);
254 static rtx sh_allocate_initial_value (rtx);
255 static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
256 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
257 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
258 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
259 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
260 static int scavenge_reg (HARD_REG_SET *s);
261 struct save_schedule_s;
262 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
263 struct save_schedule_s *, int);
264
265 static rtx sh_struct_value_rtx (tree, int);
266 static rtx sh_function_value (const_tree, const_tree, bool);
267 static bool sh_function_value_regno_p (const unsigned int);
268 static rtx sh_libcall_value (enum machine_mode, const_rtx);
269 static bool sh_return_in_memory (const_tree, const_tree);
270 static rtx sh_builtin_saveregs (void);
271 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
272 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
273 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
274 static tree sh_build_builtin_va_list (void);
275 static void sh_va_start (tree, rtx);
276 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
277 static bool sh_promote_prototypes (const_tree);
278 static enum machine_mode sh_promote_function_mode (const_tree type,
279 enum machine_mode,
280 int *punsignedp,
281 const_tree funtype,
282 int for_return);
283 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
284 const_tree, bool);
285 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
286 const_tree, bool);
287 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
288 tree, bool);
289 static void sh_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
290 const_tree, bool);
291 static rtx sh_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
292 const_tree, bool);
293 static bool sh_scalar_mode_supported_p (enum machine_mode);
294 static int sh_dwarf_calling_convention (const_tree);
295 static void sh_encode_section_info (tree, rtx, int);
296 static int sh2a_function_vector_p (tree);
297 static void sh_trampoline_init (rtx, tree, rtx);
298 static rtx sh_trampoline_adjust_address (rtx);
299 \f
300 static const struct attribute_spec sh_attribute_table[] =
301 {
302 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
303 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
304 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
305 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
306 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
307 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
308 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
309 { "resbank", 0, 0, true, false, false, sh_handle_resbank_handler_attribute },
310 { "function_vector", 1, 1, true, false, false, sh2a_handle_function_vector_handler_attribute },
311 #ifdef SYMBIAN
312 /* Symbian support adds three new attributes:
313 dllexport - for exporting a function/variable that will live in a dll
314 dllimport - for importing a function/variable from a dll
315
316 Microsoft allows multiple declspecs in one __declspec, separating
317 them with spaces. We do NOT support this. Instead, use __declspec
318 multiple times. */
319 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
320 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
321 #endif
322 { NULL, 0, 0, false, false, false, NULL }
323 };
324 \f
325 /* Initialize the GCC target structure. */
326 #undef TARGET_ATTRIBUTE_TABLE
327 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
328
329 /* The next two are used for debug info when compiling with -gdwarf. */
330 #undef TARGET_ASM_UNALIGNED_HI_OP
331 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
332 #undef TARGET_ASM_UNALIGNED_SI_OP
333 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
334
335 /* These are NULLed out on non-SH5 in TARGET_OPTION_OVERRIDE. */
336 #undef TARGET_ASM_UNALIGNED_DI_OP
337 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
338 #undef TARGET_ASM_ALIGNED_DI_OP
339 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
340
341 #undef TARGET_OPTION_OVERRIDE
342 #define TARGET_OPTION_OVERRIDE sh_option_override
343 #undef TARGET_OPTION_OPTIMIZATION
344 #define TARGET_OPTION_OPTIMIZATION sh_option_optimization
345
346 #undef TARGET_PRINT_OPERAND
347 #define TARGET_PRINT_OPERAND sh_print_operand
348 #undef TARGET_PRINT_OPERAND_ADDRESS
349 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
350 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
351 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
352
353 #undef TARGET_ASM_FUNCTION_EPILOGUE
354 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
355
356 #undef TARGET_ASM_OUTPUT_MI_THUNK
357 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
358
359 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
360 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
361
362 #undef TARGET_ASM_FILE_START
363 #define TARGET_ASM_FILE_START sh_file_start
364 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
365 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
366
367 #undef TARGET_DEFAULT_TARGET_FLAGS
368 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
369 #undef TARGET_HANDLE_OPTION
370 #define TARGET_HANDLE_OPTION sh_handle_option
371
372 #undef TARGET_REGISTER_MOVE_COST
373 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
374
375 #undef TARGET_INSERT_ATTRIBUTES
376 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
377
378 #undef TARGET_SCHED_ADJUST_COST
379 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
380
381 #undef TARGET_SCHED_ISSUE_RATE
382 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
383
384 /* The next 5 hooks have been implemented for reenabling sched1. With the
385 help of these macros we are limiting the movement of insns in sched1 to
386 reduce the register pressure. The overall idea is to keep count of SImode
387 and SFmode regs required by already scheduled insns. When these counts
388 cross some threshold values; give priority to insns that free registers.
389 The insn that frees registers is most likely to be the insn with lowest
390 LUID (original insn order); but such an insn might be there in the stalled
391 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
392 upto a max of 8 cycles so that such insns may move from Q -> R.
393
394 The description of the hooks are as below:
395
396 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
397 scheduler; it is called inside the sched_init function just after
398 find_insn_reg_weights function call. It is used to calculate the SImode
399 and SFmode weights of insns of basic blocks; much similar to what
400 find_insn_reg_weights does.
401 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
402
403 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
404 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
405 (Q)->(R).
406
407 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
408 high; reorder the ready queue so that the insn with lowest LUID will be
409 issued next.
410
411 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
412 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
413
414 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
415 can be returned from TARGET_SCHED_REORDER2.
416
417 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
418
419 #undef TARGET_SCHED_DFA_NEW_CYCLE
420 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
421
422 #undef TARGET_SCHED_INIT_GLOBAL
423 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
424
425 #undef TARGET_SCHED_FINISH_GLOBAL
426 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
427
428 #undef TARGET_SCHED_VARIABLE_ISSUE
429 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
430
431 #undef TARGET_SCHED_REORDER
432 #define TARGET_SCHED_REORDER sh_reorder
433
434 #undef TARGET_SCHED_REORDER2
435 #define TARGET_SCHED_REORDER2 sh_reorder2
436
437 #undef TARGET_SCHED_INIT
438 #define TARGET_SCHED_INIT sh_md_init
439
440 #undef TARGET_LEGITIMIZE_ADDRESS
441 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
442
443 #undef TARGET_CANNOT_MODIFY_JUMPS_P
444 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
445 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
446 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
447 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
448 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
449 sh_optimize_target_register_callee_saved
450
451 #undef TARGET_MS_BITFIELD_LAYOUT_P
452 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
453
454 #undef TARGET_INIT_BUILTINS
455 #define TARGET_INIT_BUILTINS sh_init_builtins
456 #undef TARGET_BUILTIN_DECL
457 #define TARGET_BUILTIN_DECL sh_builtin_decl
458 #undef TARGET_EXPAND_BUILTIN
459 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
460
461 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
462 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
463
464 #undef TARGET_CANNOT_COPY_INSN_P
465 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
466 #undef TARGET_RTX_COSTS
467 #define TARGET_RTX_COSTS sh_rtx_costs
468 #undef TARGET_ADDRESS_COST
469 #define TARGET_ADDRESS_COST sh_address_cost
470 #undef TARGET_ALLOCATE_INITIAL_VALUE
471 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
472
473 #undef TARGET_MACHINE_DEPENDENT_REORG
474 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
475
476 #undef TARGET_DWARF_REGISTER_SPAN
477 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
478
479 #ifdef HAVE_AS_TLS
480 #undef TARGET_HAVE_TLS
481 #define TARGET_HAVE_TLS true
482 #endif
483
484 #undef TARGET_PROMOTE_PROTOTYPES
485 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
486 #undef TARGET_PROMOTE_FUNCTION_MODE
487 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
488
489 #undef TARGET_FUNCTION_VALUE
490 #define TARGET_FUNCTION_VALUE sh_function_value
491 #undef TARGET_FUNCTION_VALUE_REGNO_P
492 #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
493 #undef TARGET_LIBCALL_VALUE
494 #define TARGET_LIBCALL_VALUE sh_libcall_value
495 #undef TARGET_STRUCT_VALUE_RTX
496 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
497 #undef TARGET_RETURN_IN_MEMORY
498 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
499
500 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
501 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
502 #undef TARGET_SETUP_INCOMING_VARARGS
503 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
504 #undef TARGET_STRICT_ARGUMENT_NAMING
505 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
506 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
507 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
508 #undef TARGET_MUST_PASS_IN_STACK
509 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
510 #undef TARGET_PASS_BY_REFERENCE
511 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
512 #undef TARGET_CALLEE_COPIES
513 #define TARGET_CALLEE_COPIES sh_callee_copies
514 #undef TARGET_ARG_PARTIAL_BYTES
515 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
516 #undef TARGET_FUNCTION_ARG
517 #define TARGET_FUNCTION_ARG sh_function_arg
518 #undef TARGET_FUNCTION_ARG_ADVANCE
519 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
520
521 #undef TARGET_BUILD_BUILTIN_VA_LIST
522 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
523 #undef TARGET_EXPAND_BUILTIN_VA_START
524 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
525 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
526 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
527
528 #undef TARGET_SCALAR_MODE_SUPPORTED_P
529 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
530 #undef TARGET_VECTOR_MODE_SUPPORTED_P
531 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
532
533 #undef TARGET_CHECK_PCH_TARGET_FLAGS
534 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
535
536 #undef TARGET_DWARF_CALLING_CONVENTION
537 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
538
539 #undef TARGET_FRAME_POINTER_REQUIRED
540 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
541
542 /* Return regmode weight for insn. */
543 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
544
545 /* Return current register pressure for regmode. */
546 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
547
548 #undef TARGET_ENCODE_SECTION_INFO
549 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
550
551 #ifdef SYMBIAN
552
553 #undef TARGET_ENCODE_SECTION_INFO
554 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
555 #undef TARGET_STRIP_NAME_ENCODING
556 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
557 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
558 #define TARGET_CXX_IMPORT_EXPORT_CLASS sh_symbian_import_export_class
559
560 #endif /* SYMBIAN */
561
562 #undef TARGET_SECONDARY_RELOAD
563 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
564
565 #undef TARGET_LEGITIMATE_ADDRESS_P
566 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
567
568 #undef TARGET_TRAMPOLINE_INIT
569 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
570 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
571 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
572
573 /* Machine-specific symbol_ref flags. */
574 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
575
576 struct gcc_target targetm = TARGET_INITIALIZER;
577 \f
578 /* Implement TARGET_HANDLE_OPTION. */
579
580 static bool
581 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
582 int value ATTRIBUTE_UNUSED)
583 {
584 switch (code)
585 {
586 case OPT_m1:
587 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
588 return true;
589
590 case OPT_m2:
591 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
592 return true;
593
594 case OPT_m2a:
595 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
596 return true;
597
598 case OPT_m2a_nofpu:
599 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
600 return true;
601
602 case OPT_m2a_single:
603 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
604 return true;
605
606 case OPT_m2a_single_only:
607 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
608 return true;
609
610 case OPT_m2e:
611 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
612 return true;
613
614 case OPT_m3:
615 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
616 return true;
617
618 case OPT_m3e:
619 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
620 return true;
621
622 case OPT_m4:
623 case OPT_m4_100:
624 case OPT_m4_200:
625 case OPT_m4_300:
626 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
627 return true;
628
629 case OPT_m4_nofpu:
630 case OPT_m4_100_nofpu:
631 case OPT_m4_200_nofpu:
632 case OPT_m4_300_nofpu:
633 case OPT_m4_340:
634 case OPT_m4_400:
635 case OPT_m4_500:
636 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
637 return true;
638
639 case OPT_m4_single:
640 case OPT_m4_100_single:
641 case OPT_m4_200_single:
642 case OPT_m4_300_single:
643 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
644 return true;
645
646 case OPT_m4_single_only:
647 case OPT_m4_100_single_only:
648 case OPT_m4_200_single_only:
649 case OPT_m4_300_single_only:
650 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
651 return true;
652
653 case OPT_m4a:
654 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
655 return true;
656
657 case OPT_m4a_nofpu:
658 case OPT_m4al:
659 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
660 return true;
661
662 case OPT_m4a_single:
663 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
664 return true;
665
666 case OPT_m4a_single_only:
667 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
668 return true;
669
670 case OPT_m5_32media:
671 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
672 return true;
673
674 case OPT_m5_32media_nofpu:
675 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
676 return true;
677
678 case OPT_m5_64media:
679 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
680 return true;
681
682 case OPT_m5_64media_nofpu:
683 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
684 return true;
685
686 case OPT_m5_compact:
687 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
688 return true;
689
690 case OPT_m5_compact_nofpu:
691 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
692 return true;
693
694 default:
695 return true;
696 }
697 }
698 \f
699 /* Set default optimization options. */
700 static void
701 sh_option_optimization (int level, int size)
702 {
703 if (level)
704 {
705 if (!size)
706 sh_div_str = "inv:minlat";
707 }
708 if (size)
709 {
710 target_flags |= MASK_SMALLCODE;
711 sh_div_str = SH_DIV_STR_FOR_SIZE ;
712 }
713 else
714 TARGET_CBRANCHDI4 = 1;
715 /* We can't meaningfully test TARGET_SHMEDIA here, because -m options
716 haven't been parsed yet, hence we'd read only the default.
717 sh_target_reg_class will return NO_REGS if this is not SHMEDIA, so
718 it's OK to always set flag_branch_target_load_optimize. */
719 if (level > 1)
720 {
721 flag_branch_target_load_optimize = 1;
722 if (!size)
723 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
724 }
725 /* Likewise, we can't meaningfully test TARGET_SH2E / TARGET_IEEE
726 here, so leave it to TARGET_OPTION_OVERRIDE to set
727 flag_finite_math_only. We set it to 2 here so we know if the user
728 explicitly requested this to be on or off. */
729 flag_finite_math_only = 2;
730 /* If flag_schedule_insns is 1, we set it to 2 here so we know if
731 the user explicitly requested this to be on or off. */
732 if (flag_schedule_insns > 0)
733 flag_schedule_insns = 2;
734
735 set_param_value ("simultaneous-prefetches", 2);
736 }
737
738 /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override
739 various options, and do some machine dependent initialization. */
740 static void
741 sh_option_override (void)
742 {
743 int regno;
744
745 SUBTARGET_OVERRIDE_OPTIONS;
746 if (flag_finite_math_only == 2)
747 flag_finite_math_only
748 = !flag_signaling_nans && TARGET_SH2E && ! TARGET_IEEE;
749 if (TARGET_SH2E && !flag_finite_math_only)
750 target_flags |= MASK_IEEE;
751 sh_cpu = PROCESSOR_SH1;
752 assembler_dialect = 0;
753 if (TARGET_SH2)
754 sh_cpu = PROCESSOR_SH2;
755 if (TARGET_SH2E)
756 sh_cpu = PROCESSOR_SH2E;
757 if (TARGET_SH2A)
758 sh_cpu = PROCESSOR_SH2A;
759 if (TARGET_SH3)
760 sh_cpu = PROCESSOR_SH3;
761 if (TARGET_SH3E)
762 sh_cpu = PROCESSOR_SH3E;
763 if (TARGET_SH4)
764 {
765 assembler_dialect = 1;
766 sh_cpu = PROCESSOR_SH4;
767 }
768 if (TARGET_SH4A_ARCH)
769 {
770 assembler_dialect = 1;
771 sh_cpu = PROCESSOR_SH4A;
772 }
773 if (TARGET_SH5)
774 {
775 sh_cpu = PROCESSOR_SH5;
776 target_flags |= MASK_ALIGN_DOUBLE;
777 if (TARGET_SHMEDIA_FPU)
778 target_flags |= MASK_FMOVD;
779 if (TARGET_SHMEDIA)
780 {
781 /* There are no delay slots on SHmedia. */
782 flag_delayed_branch = 0;
783 /* Relaxation isn't yet supported for SHmedia */
784 target_flags &= ~MASK_RELAX;
785 /* After reload, if conversion does little good but can cause
786 ICEs:
787 - find_if_block doesn't do anything for SH because we don't
788 have conditional execution patterns. (We use conditional
789 move patterns, which are handled differently, and only
790 before reload).
791 - find_cond_trap doesn't do anything for the SH because we
792 don't have conditional traps.
793 - find_if_case_1 uses redirect_edge_and_branch_force in
794 the only path that does an optimization, and this causes
795 an ICE when branch targets are in registers.
796 - find_if_case_2 doesn't do anything for the SHmedia after
797 reload except when it can redirect a tablejump - and
798 that's rather rare. */
799 flag_if_conversion2 = 0;
800 if (! strcmp (sh_div_str, "call"))
801 sh_div_strategy = SH_DIV_CALL;
802 else if (! strcmp (sh_div_str, "call2"))
803 sh_div_strategy = SH_DIV_CALL2;
804 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
805 sh_div_strategy = SH_DIV_FP;
806 else if (! strcmp (sh_div_str, "inv"))
807 sh_div_strategy = SH_DIV_INV;
808 else if (! strcmp (sh_div_str, "inv:minlat"))
809 sh_div_strategy = SH_DIV_INV_MINLAT;
810 else if (! strcmp (sh_div_str, "inv20u"))
811 sh_div_strategy = SH_DIV_INV20U;
812 else if (! strcmp (sh_div_str, "inv20l"))
813 sh_div_strategy = SH_DIV_INV20L;
814 else if (! strcmp (sh_div_str, "inv:call2"))
815 sh_div_strategy = SH_DIV_INV_CALL2;
816 else if (! strcmp (sh_div_str, "inv:call"))
817 sh_div_strategy = SH_DIV_INV_CALL;
818 else if (! strcmp (sh_div_str, "inv:fp"))
819 {
820 if (TARGET_FPU_ANY)
821 sh_div_strategy = SH_DIV_INV_FP;
822 else
823 sh_div_strategy = SH_DIV_INV;
824 }
825 TARGET_CBRANCHDI4 = 0;
826 /* Assembler CFI isn't yet fully supported for SHmedia. */
827 flag_dwarf2_cfi_asm = 0;
828 }
829 }
830 else
831 {
832 /* Only the sh64-elf assembler fully supports .quad properly. */
833 targetm.asm_out.aligned_op.di = NULL;
834 targetm.asm_out.unaligned_op.di = NULL;
835 }
836 if (TARGET_SH1)
837 {
838 if (! strcmp (sh_div_str, "call-div1"))
839 sh_div_strategy = SH_DIV_CALL_DIV1;
840 else if (! strcmp (sh_div_str, "call-fp")
841 && (TARGET_FPU_DOUBLE
842 || (TARGET_HARD_SH4 && TARGET_SH2E)
843 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
844 sh_div_strategy = SH_DIV_CALL_FP;
845 else if (! strcmp (sh_div_str, "call-table") && TARGET_SH2)
846 sh_div_strategy = SH_DIV_CALL_TABLE;
847 else
848 /* Pick one that makes most sense for the target in general.
849 It is not much good to use different functions depending
850 on -Os, since then we'll end up with two different functions
851 when some of the code is compiled for size, and some for
852 speed. */
853
854 /* SH4 tends to emphasize speed. */
855 if (TARGET_HARD_SH4)
856 sh_div_strategy = SH_DIV_CALL_TABLE;
857 /* These have their own way of doing things. */
858 else if (TARGET_SH2A)
859 sh_div_strategy = SH_DIV_INTRINSIC;
860 /* ??? Should we use the integer SHmedia function instead? */
861 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
862 sh_div_strategy = SH_DIV_CALL_FP;
863 /* SH1 .. SH3 cores often go into small-footprint systems, so
864 default to the smallest implementation available. */
865 else if (TARGET_SH2) /* ??? EXPERIMENTAL */
866 sh_div_strategy = SH_DIV_CALL_TABLE;
867 else
868 sh_div_strategy = SH_DIV_CALL_DIV1;
869 }
870 if (!TARGET_SH1)
871 TARGET_PRETEND_CMOVE = 0;
872 if (sh_divsi3_libfunc[0])
873 ; /* User supplied - leave it alone. */
874 else if (TARGET_DIVIDE_CALL_FP)
875 sh_divsi3_libfunc = "__sdivsi3_i4";
876 else if (TARGET_DIVIDE_CALL_TABLE)
877 sh_divsi3_libfunc = "__sdivsi3_i4i";
878 else if (TARGET_SH5)
879 sh_divsi3_libfunc = "__sdivsi3_1";
880 else
881 sh_divsi3_libfunc = "__sdivsi3";
882 if (sh_branch_cost == -1)
883 sh_branch_cost
884 = TARGET_SH5 ? 1 : ! TARGET_SH2 || TARGET_HARD_SH4 ? 2 : 1;
885
886 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
887 if (! VALID_REGISTER_P (regno))
888 sh_register_names[regno][0] = '\0';
889
890 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
891 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
892 sh_additional_register_names[regno][0] = '\0';
893
894 flag_omit_frame_pointer = (PREFERRED_DEBUGGING_TYPE == DWARF2_DEBUG);
895
896 if ((flag_pic && ! TARGET_PREFERGOT)
897 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
898 flag_no_function_cse = 1;
899
900 if (targetm.small_register_classes_for_mode_p (VOIDmode)) \
901 {
902 /* Never run scheduling before reload, since that can
903 break global alloc, and generates slower code anyway due
904 to the pressure on R0. */
905 /* Enable sched1 for SH4 if the user explicitly requests.
906 When sched1 is enabled, the ready queue will be reordered by
907 the target hooks if pressure is high. We can not do this for
908 PIC, SH3 and lower as they give spill failures for R0. */
909 if (!TARGET_HARD_SH4 || flag_pic)
910 flag_schedule_insns = 0;
911 /* ??? Current exception handling places basic block boundaries
912 after call_insns. It causes the high pressure on R0 and gives
913 spill failures for R0 in reload. See PR 22553 and the thread
914 on gcc-patches
915 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
916 else if (flag_exceptions)
917 {
918 if (flag_schedule_insns == 1)
919 warning (0, "ignoring -fschedule-insns because of exception handling bug");
920 flag_schedule_insns = 0;
921 }
922 else if (flag_schedule_insns == 2)
923 flag_schedule_insns = 0;
924 }
925
926 if ((target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) == 0)
927 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
928
929 /* Unwind info is not correct around the CFG unless either a frame
930 pointer is present or M_A_O_A is set. Fixing this requires rewriting
931 unwind info generation to be aware of the CFG and propagating states
932 around edges. */
933 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
934 || flag_exceptions || flag_non_call_exceptions)
935 && flag_omit_frame_pointer
936 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
937 {
938 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
939 warning (0, "unwind tables currently require either a frame pointer "
940 "or -maccumulate-outgoing-args for correctness");
941 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
942 }
943
944 /* Unwinding with -freorder-blocks-and-partition does not work on this
945 architecture, because it requires far jumps to label crossing between
946 hot/cold sections which are rejected on this architecture. */
947 if (flag_reorder_blocks_and_partition)
948 {
949 if (flag_exceptions)
950 {
951 inform (input_location,
952 "-freorder-blocks-and-partition does not work with "
953 "exceptions on this architecture");
954 flag_reorder_blocks_and_partition = 0;
955 flag_reorder_blocks = 1;
956 }
957 else if (flag_unwind_tables)
958 {
959 inform (input_location,
960 "-freorder-blocks-and-partition does not support unwind "
961 "info on this architecture");
962 flag_reorder_blocks_and_partition = 0;
963 flag_reorder_blocks = 1;
964 }
965 }
966
967 if (align_loops == 0)
968 align_loops = 1 << (TARGET_SH5 ? 3 : 2);
969 if (align_jumps == 0)
970 align_jumps = 1 << CACHE_LOG;
971 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
972 align_jumps = TARGET_SHMEDIA ? 4 : 2;
973
974 /* Allocation boundary (in *bytes*) for the code of a function.
975 SH1: 32 bit alignment is faster, because instructions are always
976 fetched as a pair from a longword boundary.
977 SH2 .. SH5 : align to cache line start. */
978 if (align_functions == 0)
979 align_functions
980 = TARGET_SMALLCODE ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
981 /* The linker relaxation code breaks when a function contains
982 alignments that are larger than that at the start of a
983 compilation unit. */
984 if (TARGET_RELAX)
985 {
986 int min_align
987 = align_loops > align_jumps ? align_loops : align_jumps;
988
989 /* Also take possible .long constants / mova tables int account. */
990 if (min_align < 4)
991 min_align = 4;
992 if (align_functions < min_align)
993 align_functions = min_align;
994 }
995
996 if (sh_fixed_range_str)
997 sh_fix_range (sh_fixed_range_str);
998
999 /* This target defaults to strict volatile bitfields. */
1000 if (flag_strict_volatile_bitfields < 0)
1001 flag_strict_volatile_bitfields = 1;
1002 }
1003 \f
1004 /* Print the operand address in x to the stream. */
1005
1006 static void
1007 sh_print_operand_address (FILE *stream, rtx x)
1008 {
1009 switch (GET_CODE (x))
1010 {
1011 case REG:
1012 case SUBREG:
1013 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
1014 break;
1015
1016 case PLUS:
1017 {
1018 rtx base = XEXP (x, 0);
1019 rtx index = XEXP (x, 1);
1020
1021 switch (GET_CODE (index))
1022 {
1023 case CONST_INT:
1024 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
1025 reg_names[true_regnum (base)]);
1026 break;
1027
1028 case REG:
1029 case SUBREG:
1030 {
1031 int base_num = true_regnum (base);
1032 int index_num = true_regnum (index);
1033
1034 fprintf (stream, "@(r0,%s)",
1035 reg_names[MAX (base_num, index_num)]);
1036 break;
1037 }
1038
1039 default:
1040 gcc_unreachable ();
1041 }
1042 }
1043 break;
1044
1045 case PRE_DEC:
1046 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1047 break;
1048
1049 case POST_INC:
1050 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1051 break;
1052
1053 default:
1054 x = mark_constant_pool_use (x);
1055 output_addr_const (stream, x);
1056 break;
1057 }
1058 }
1059
1060 /* Print operand x (an rtx) in assembler syntax to file stream
1061 according to modifier code.
1062
1063 '.' print a .s if insn needs delay slot
1064 ',' print LOCAL_LABEL_PREFIX
1065 '@' print trap, rte or rts depending upon pragma interruptness
1066 '#' output a nop if there is nothing to put in the delay slot
1067 ''' print likelihood suffix (/u for unlikely).
1068 '>' print branch target if -fverbose-asm
1069 'O' print a constant without the #
1070 'R' print the LSW of a dp value - changes if in little endian
1071 'S' print the MSW of a dp value - changes if in little endian
1072 'T' print the next word of a dp value - same as 'R' in big endian mode.
1073 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
1074 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
1075 'N' print 'r63' if the operand is (const_int 0).
1076 'd' print a V2SF reg as dN instead of fpN.
1077 'm' print a pair `base,offset' or `base,index', for LD and ST.
1078 'U' Likewise for {LD,ST}{HI,LO}.
1079 'V' print the position of a single bit set.
1080 'W' print the position of a single bit cleared.
1081 't' print a memory address which is a register.
1082 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1083 'o' output an operator. */
1084
1085 static void
1086 sh_print_operand (FILE *stream, rtx x, int code)
1087 {
1088 int regno;
1089 enum machine_mode mode;
1090
1091 switch (code)
1092 {
1093 tree trapa_attr;
1094
1095 case '.':
1096 if (final_sequence
1097 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1098 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1099 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1100 break;
1101 case ',':
1102 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1103 break;
1104 case '@':
1105 trapa_attr = lookup_attribute ("trap_exit",
1106 DECL_ATTRIBUTES (current_function_decl));
1107 if (trapa_attr)
1108 fprintf (stream, "trapa #%ld",
1109 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1110 else if (sh_cfun_interrupt_handler_p ())
1111 {
1112 if (sh_cfun_resbank_handler_p ())
1113 fprintf (stream, "resbank\n");
1114 fprintf (stream, "rte");
1115 }
1116 else
1117 fprintf (stream, "rts");
1118 break;
1119 case '#':
1120 /* Output a nop if there's nothing in the delay slot. */
1121 if (dbr_sequence_length () == 0)
1122 fprintf (stream, "\n\tnop");
1123 break;
1124 case '\'':
1125 {
1126 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1127
1128 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
1129 fputs ("/u", stream);
1130 break;
1131 }
1132 case '>':
1133 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1134 {
1135 fputs ("\t! target: ", stream);
1136 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1137 }
1138 break;
1139 case 'O':
1140 x = mark_constant_pool_use (x);
1141 output_addr_const (stream, x);
1142 break;
1143 /* N.B.: %R / %S / %T adjust memory addresses by four.
1144 For SHMEDIA, that means they can be used to access the first and
1145 second 32 bit part of a 64 bit (or larger) value that
1146 might be held in floating point registers or memory.
1147 While they can be used to access 64 bit parts of a larger value
1148 held in general purpose registers, that won't work with memory -
1149 neither for fp registers, since the frxx names are used. */
1150 case 'R':
1151 if (REG_P (x) || GET_CODE (x) == SUBREG)
1152 {
1153 regno = true_regnum (x);
1154 regno += FP_REGISTER_P (regno) ? 1 : LSW;
1155 fputs (reg_names[regno], (stream));
1156 }
1157 else if (MEM_P (x))
1158 {
1159 x = adjust_address (x, SImode, 4 * LSW);
1160 sh_print_operand_address (stream, XEXP (x, 0));
1161 }
1162 else
1163 {
1164 rtx sub = NULL_RTX;
1165
1166 mode = GET_MODE (x);
1167 if (mode == VOIDmode)
1168 mode = DImode;
1169 if (GET_MODE_SIZE (mode) >= 8)
1170 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
1171 if (sub)
1172 sh_print_operand (stream, sub, 0);
1173 else
1174 output_operand_lossage ("invalid operand to %%R");
1175 }
1176 break;
1177 case 'S':
1178 if (REG_P (x) || GET_CODE (x) == SUBREG)
1179 {
1180 regno = true_regnum (x);
1181 regno += FP_REGISTER_P (regno) ? 0 : MSW;
1182 fputs (reg_names[regno], (stream));
1183 }
1184 else if (MEM_P (x))
1185 {
1186 x = adjust_address (x, SImode, 4 * MSW);
1187 sh_print_operand_address (stream, XEXP (x, 0));
1188 }
1189 else
1190 {
1191 rtx sub = NULL_RTX;
1192
1193 mode = GET_MODE (x);
1194 if (mode == VOIDmode)
1195 mode = DImode;
1196 if (GET_MODE_SIZE (mode) >= 8)
1197 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
1198 if (sub)
1199 sh_print_operand (stream, sub, 0);
1200 else
1201 output_operand_lossage ("invalid operand to %%S");
1202 }
1203 break;
1204 case 'T':
1205 /* Next word of a double. */
1206 switch (GET_CODE (x))
1207 {
1208 case REG:
1209 fputs (reg_names[REGNO (x) + 1], (stream));
1210 break;
1211 case MEM:
1212 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1213 && GET_CODE (XEXP (x, 0)) != POST_INC)
1214 x = adjust_address (x, SImode, 4);
1215 sh_print_operand_address (stream, XEXP (x, 0));
1216 break;
1217 default:
1218 break;
1219 }
1220 break;
1221
1222 case 't':
1223 gcc_assert (MEM_P (x));
1224 x = XEXP (x, 0);
1225 switch (GET_CODE (x))
1226 {
1227 case REG:
1228 case SUBREG:
1229 sh_print_operand (stream, x, 0);
1230 break;
1231 default:
1232 break;
1233 }
1234 break;
1235
1236 case 'o':
1237 switch (GET_CODE (x))
1238 {
1239 case PLUS: fputs ("add", stream); break;
1240 case MINUS: fputs ("sub", stream); break;
1241 case MULT: fputs ("mul", stream); break;
1242 case DIV: fputs ("div", stream); break;
1243 case EQ: fputs ("eq", stream); break;
1244 case NE: fputs ("ne", stream); break;
1245 case GT: case LT: fputs ("gt", stream); break;
1246 case GE: case LE: fputs ("ge", stream); break;
1247 case GTU: case LTU: fputs ("gtu", stream); break;
1248 case GEU: case LEU: fputs ("geu", stream); break;
1249 default:
1250 break;
1251 }
1252 break;
1253 case 'M':
1254 if (TARGET_SHMEDIA)
1255 {
1256 if (MEM_P (x)
1257 && GET_CODE (XEXP (x, 0)) == PLUS
1258 && (REG_P (XEXP (XEXP (x, 0), 1))
1259 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1260 fputc ('x', stream);
1261 }
1262 else
1263 {
1264 if (MEM_P (x))
1265 {
1266 switch (GET_MODE (x))
1267 {
1268 case QImode: fputs (".b", stream); break;
1269 case HImode: fputs (".w", stream); break;
1270 case SImode: fputs (".l", stream); break;
1271 case SFmode: fputs (".s", stream); break;
1272 case DFmode: fputs (".d", stream); break;
1273 default: gcc_unreachable ();
1274 }
1275 }
1276 }
1277 break;
1278
1279 case 'm':
1280 gcc_assert (MEM_P (x));
1281 x = XEXP (x, 0);
1282 /* Fall through. */
1283 case 'U':
1284 switch (GET_CODE (x))
1285 {
1286 case REG:
1287 case SUBREG:
1288 sh_print_operand (stream, x, 0);
1289 fputs (", 0", stream);
1290 break;
1291
1292 case PLUS:
1293 sh_print_operand (stream, XEXP (x, 0), 0);
1294 fputs (", ", stream);
1295 sh_print_operand (stream, XEXP (x, 1), 0);
1296 break;
1297
1298 default:
1299 gcc_unreachable ();
1300 }
1301 break;
1302
1303 case 'V':
1304 {
1305 int num = exact_log2 (INTVAL (x));
1306 gcc_assert (num >= 0);
1307 fprintf (stream, "#%d", num);
1308 }
1309 break;
1310
1311 case 'W':
1312 {
1313 int num = exact_log2 (~INTVAL (x));
1314 gcc_assert (num >= 0);
1315 fprintf (stream, "#%d", num);
1316 }
1317 break;
1318
1319 case 'd':
1320 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1321
1322 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1323 break;
1324
1325 case 'N':
1326 if (x == CONST0_RTX (GET_MODE (x)))
1327 {
1328 fprintf ((stream), "r63");
1329 break;
1330 }
1331 goto default_output;
1332 case 'u':
1333 if (CONST_INT_P (x))
1334 {
1335 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1336 break;
1337 }
1338 /* Fall through. */
1339
1340 default_output:
1341 default:
1342 regno = 0;
1343 mode = GET_MODE (x);
1344
1345 switch (GET_CODE (x))
1346 {
1347 case TRUNCATE:
1348 {
1349 rtx inner = XEXP (x, 0);
1350 int offset = 0;
1351 enum machine_mode inner_mode;
1352
1353 /* We might see SUBREGs with vector mode registers inside. */
1354 if (GET_CODE (inner) == SUBREG
1355 && (GET_MODE_SIZE (GET_MODE (inner))
1356 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1357 && subreg_lowpart_p (inner))
1358 inner = SUBREG_REG (inner);
1359 if (CONST_INT_P (inner))
1360 {
1361 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1362 goto default_output;
1363 }
1364 inner_mode = GET_MODE (inner);
1365 if (GET_CODE (inner) == SUBREG
1366 && (GET_MODE_SIZE (GET_MODE (inner))
1367 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1368 && REG_P (SUBREG_REG (inner)))
1369 {
1370 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1371 GET_MODE (SUBREG_REG (inner)),
1372 SUBREG_BYTE (inner),
1373 GET_MODE (inner));
1374 inner = SUBREG_REG (inner);
1375 }
1376 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1377 abort ();
1378 /* Floating point register pairs are always big endian;
1379 general purpose registers are 64 bit wide. */
1380 regno = REGNO (inner);
1381 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1382 - HARD_REGNO_NREGS (regno, mode))
1383 + offset;
1384 x = inner;
1385 goto reg;
1386 }
1387 case SIGN_EXTEND:
1388 x = XEXP (x, 0);
1389 goto reg;
1390 /* FIXME: We need this on SHmedia32 because reload generates
1391 some sign-extended HI or QI loads into DImode registers
1392 but, because Pmode is SImode, the address ends up with a
1393 subreg:SI of the DImode register. Maybe reload should be
1394 fixed so as to apply alter_subreg to such loads? */
1395 case IF_THEN_ELSE:
1396 gcc_assert (trapping_target_operand (x, VOIDmode));
1397 x = XEXP (XEXP (x, 2), 0);
1398 goto default_output;
1399 case SUBREG:
1400 gcc_assert (SUBREG_BYTE (x) == 0
1401 && REG_P (SUBREG_REG (x)));
1402
1403 x = SUBREG_REG (x);
1404 /* Fall through. */
1405
1406 reg:
1407 case REG:
1408 regno += REGNO (x);
1409 if (FP_REGISTER_P (regno)
1410 && mode == V16SFmode)
1411 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1412 else if (FP_REGISTER_P (REGNO (x))
1413 && mode == V4SFmode)
1414 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1415 else if (REG_P (x)
1416 && mode == V2SFmode)
1417 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1418 else if (FP_REGISTER_P (REGNO (x))
1419 && GET_MODE_SIZE (mode) > 4)
1420 fprintf ((stream), "d%s", reg_names[regno] + 1);
1421 else
1422 fputs (reg_names[regno], (stream));
1423 break;
1424
1425 case MEM:
1426 output_address (XEXP (x, 0));
1427 break;
1428
1429 default:
1430 if (TARGET_SH1)
1431 fputc ('#', stream);
1432 output_addr_const (stream, x);
1433 break;
1434 }
1435 break;
1436 }
1437 }
1438
1439 static bool
1440 sh_print_operand_punct_valid_p (unsigned char code)
1441 {
1442 return (code == '.' || code == '#' || code == '@' || code == ','
1443 || code == '$' || code == '\'' || code == '>');
1444 }
1445 \f
1446
1447 /* Encode symbol attributes of a SYMBOL_REF into its
1448 SYMBOL_REF_FLAGS. */
1449 static void
1450 sh_encode_section_info (tree decl, rtx rtl, int first)
1451 {
1452 default_encode_section_info (decl, rtl, first);
1453
1454 if (TREE_CODE (decl) == FUNCTION_DECL
1455 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1456 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1457 }
1458
1459 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1460 static void
1461 force_into (rtx value, rtx target)
1462 {
1463 value = force_operand (value, target);
1464 if (! rtx_equal_p (value, target))
1465 emit_insn (gen_move_insn (target, value));
1466 }
1467
1468 /* Emit code to perform a block move. Choose the best method.
1469
1470 OPERANDS[0] is the destination.
1471 OPERANDS[1] is the source.
1472 OPERANDS[2] is the size.
1473 OPERANDS[3] is the alignment safe to use. */
1474
1475 int
1476 expand_block_move (rtx *operands)
1477 {
1478 int align = INTVAL (operands[3]);
1479 int constp = (CONST_INT_P (operands[2]));
1480 int bytes = (constp ? INTVAL (operands[2]) : 0);
1481
1482 if (! constp)
1483 return 0;
1484
1485 /* If we could use mov.l to move words and dest is word-aligned, we
1486 can use movua.l for loads and still generate a relatively short
1487 and efficient sequence. */
1488 if (TARGET_SH4A_ARCH && align < 4
1489 && MEM_ALIGN (operands[0]) >= 32
1490 && can_move_by_pieces (bytes, 32))
1491 {
1492 rtx dest = copy_rtx (operands[0]);
1493 rtx src = copy_rtx (operands[1]);
1494 /* We could use different pseudos for each copied word, but
1495 since movua can only load into r0, it's kind of
1496 pointless. */
1497 rtx temp = gen_reg_rtx (SImode);
1498 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1499 int copied = 0;
1500
1501 while (copied + 4 <= bytes)
1502 {
1503 rtx to = adjust_address (dest, SImode, copied);
1504 rtx from = adjust_automodify_address (src, BLKmode,
1505 src_addr, copied);
1506
1507 set_mem_size (from, GEN_INT (4));
1508 emit_insn (gen_movua (temp, from));
1509 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1510 emit_move_insn (to, temp);
1511 copied += 4;
1512 }
1513
1514 if (copied < bytes)
1515 move_by_pieces (adjust_address (dest, BLKmode, copied),
1516 adjust_automodify_address (src, BLKmode,
1517 src_addr, copied),
1518 bytes - copied, align, 0);
1519
1520 return 1;
1521 }
1522
1523 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1524 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1525 if (align < 4 || (bytes % 4 != 0))
1526 return 0;
1527
1528 if (TARGET_HARD_SH4)
1529 {
1530 if (bytes < 12)
1531 return 0;
1532 else if (bytes == 12)
1533 {
1534 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1535 rtx r4 = gen_rtx_REG (SImode, 4);
1536 rtx r5 = gen_rtx_REG (SImode, 5);
1537
1538 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1539 force_into (XEXP (operands[0], 0), r4);
1540 force_into (XEXP (operands[1], 0), r5);
1541 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1542 return 1;
1543 }
1544 else if (! TARGET_SMALLCODE)
1545 {
1546 const char *entry_name;
1547 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1548 int dwords;
1549 rtx r4 = gen_rtx_REG (SImode, 4);
1550 rtx r5 = gen_rtx_REG (SImode, 5);
1551 rtx r6 = gen_rtx_REG (SImode, 6);
1552
1553 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1554 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1555 force_into (XEXP (operands[0], 0), r4);
1556 force_into (XEXP (operands[1], 0), r5);
1557
1558 dwords = bytes >> 3;
1559 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1560 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1561 return 1;
1562 }
1563 else
1564 return 0;
1565 }
1566 if (bytes < 64)
1567 {
1568 char entry[30];
1569 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1570 rtx r4 = gen_rtx_REG (SImode, 4);
1571 rtx r5 = gen_rtx_REG (SImode, 5);
1572
1573 sprintf (entry, "__movmemSI%d", bytes);
1574 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1575 force_into (XEXP (operands[0], 0), r4);
1576 force_into (XEXP (operands[1], 0), r5);
1577 emit_insn (gen_block_move_real (func_addr_rtx));
1578 return 1;
1579 }
1580
1581 /* This is the same number of bytes as a memcpy call, but to a different
1582 less common function name, so this will occasionally use more space. */
1583 if (! TARGET_SMALLCODE)
1584 {
1585 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1586 int final_switch, while_loop;
1587 rtx r4 = gen_rtx_REG (SImode, 4);
1588 rtx r5 = gen_rtx_REG (SImode, 5);
1589 rtx r6 = gen_rtx_REG (SImode, 6);
1590
1591 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1592 force_into (XEXP (operands[0], 0), r4);
1593 force_into (XEXP (operands[1], 0), r5);
1594
1595 /* r6 controls the size of the move. 16 is decremented from it
1596 for each 64 bytes moved. Then the negative bit left over is used
1597 as an index into a list of move instructions. e.g., a 72 byte move
1598 would be set up with size(r6) = 14, for one iteration through the
1599 big while loop, and a switch of -2 for the last part. */
1600
1601 final_switch = 16 - ((bytes / 4) % 16);
1602 while_loop = ((bytes / 4) / 16 - 1) * 16;
1603 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1604 emit_insn (gen_block_lump_real (func_addr_rtx));
1605 return 1;
1606 }
1607
1608 return 0;
1609 }
1610
1611 /* Prepare operands for a move define_expand; specifically, one of the
1612 operands must be in a register. */
1613
1614 int
1615 prepare_move_operands (rtx operands[], enum machine_mode mode)
1616 {
1617 if ((mode == SImode || mode == DImode)
1618 && flag_pic
1619 && ! ((mode == Pmode || mode == ptr_mode)
1620 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1621 {
1622 rtx temp;
1623 if (SYMBOLIC_CONST_P (operands[1]))
1624 {
1625 if (MEM_P (operands[0]))
1626 operands[1] = force_reg (Pmode, operands[1]);
1627 else if (TARGET_SHMEDIA
1628 && GET_CODE (operands[1]) == LABEL_REF
1629 && target_reg_operand (operands[0], mode))
1630 /* It's ok. */;
1631 else
1632 {
1633 temp = (!can_create_pseudo_p ()
1634 ? operands[0]
1635 : gen_reg_rtx (Pmode));
1636 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1637 }
1638 }
1639 else if (GET_CODE (operands[1]) == CONST
1640 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1641 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1642 {
1643 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1644 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1645 mode, temp);
1646 operands[1] = expand_binop (mode, add_optab, temp,
1647 XEXP (XEXP (operands[1], 0), 1),
1648 (!can_create_pseudo_p ()
1649 ? temp
1650 : gen_reg_rtx (Pmode)),
1651 0, OPTAB_LIB_WIDEN);
1652 }
1653 }
1654
1655 if (! reload_in_progress && ! reload_completed)
1656 {
1657 /* Copy the source to a register if both operands aren't registers. */
1658 if (! register_operand (operands[0], mode)
1659 && ! sh_register_operand (operands[1], mode))
1660 operands[1] = copy_to_mode_reg (mode, operands[1]);
1661
1662 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1663 {
1664 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1665 except that we can't use that function because it is static. */
1666 rtx new_rtx = change_address (operands[0], mode, 0);
1667 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1668 operands[0] = new_rtx;
1669 }
1670
1671 /* This case can happen while generating code to move the result
1672 of a library call to the target. Reject `st r0,@(rX,rY)' because
1673 reload will fail to find a spill register for rX, since r0 is already
1674 being used for the source. */
1675 else if (TARGET_SH1
1676 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1677 && MEM_P (operands[0])
1678 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1679 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1680 operands[1] = copy_to_mode_reg (mode, operands[1]);
1681 }
1682
1683 if (mode == Pmode || mode == ptr_mode)
1684 {
1685 rtx op0, op1, opc;
1686 enum tls_model tls_kind;
1687
1688 op0 = operands[0];
1689 op1 = operands[1];
1690 if (GET_CODE (op1) == CONST
1691 && GET_CODE (XEXP (op1, 0)) == PLUS
1692 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1693 != TLS_MODEL_NONE))
1694 {
1695 opc = XEXP (XEXP (op1, 0), 1);
1696 op1 = XEXP (XEXP (op1, 0), 0);
1697 }
1698 else
1699 opc = NULL_RTX;
1700
1701 if ((tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1702 {
1703 rtx tga_op1, tga_ret, tmp, tmp2;
1704
1705 switch (tls_kind)
1706 {
1707 case TLS_MODEL_GLOBAL_DYNAMIC:
1708 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1709 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1710 op1 = tga_ret;
1711 break;
1712
1713 case TLS_MODEL_LOCAL_DYNAMIC:
1714 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1715 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1716
1717 tmp = gen_reg_rtx (Pmode);
1718 emit_move_insn (tmp, tga_ret);
1719
1720 if (register_operand (op0, Pmode))
1721 tmp2 = op0;
1722 else
1723 tmp2 = gen_reg_rtx (Pmode);
1724
1725 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1726 op1 = tmp2;
1727 break;
1728
1729 case TLS_MODEL_INITIAL_EXEC:
1730 if (! flag_pic)
1731 {
1732 /* Don't schedule insns for getting GOT address when
1733 the first scheduling is enabled, to avoid spill
1734 failures for R0. */
1735 if (flag_schedule_insns)
1736 emit_insn (gen_blockage ());
1737 emit_insn (gen_GOTaddr2picreg ());
1738 emit_use (gen_rtx_REG (SImode, PIC_REG));
1739 if (flag_schedule_insns)
1740 emit_insn (gen_blockage ());
1741 }
1742 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1743 tmp = gen_sym2GOTTPOFF (op1);
1744 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1745 op1 = tga_op1;
1746 break;
1747
1748 case TLS_MODEL_LOCAL_EXEC:
1749 tmp2 = gen_reg_rtx (Pmode);
1750 emit_insn (gen_load_gbr (tmp2));
1751 tmp = gen_reg_rtx (Pmode);
1752 emit_insn (gen_symTPOFF2reg (tmp, op1));
1753
1754 if (register_operand (op0, Pmode))
1755 op1 = op0;
1756 else
1757 op1 = gen_reg_rtx (Pmode);
1758
1759 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1760 break;
1761
1762 default:
1763 gcc_unreachable ();
1764 }
1765 if (opc)
1766 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1767 operands[1] = op1;
1768 }
1769 }
1770
1771 return 0;
1772 }
1773
1774 enum rtx_code
1775 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1776 enum rtx_code comparison)
1777 {
1778 rtx op1;
1779 rtx scratch = NULL_RTX;
1780
1781 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1782 comparison = GET_CODE (operands[0]);
1783 else
1784 scratch = operands[4];
1785 if (CONST_INT_P (operands[1])
1786 && !CONST_INT_P (operands[2]))
1787 {
1788 rtx tmp = operands[1];
1789
1790 operands[1] = operands[2];
1791 operands[2] = tmp;
1792 comparison = swap_condition (comparison);
1793 }
1794 if (CONST_INT_P (operands[2]))
1795 {
1796 HOST_WIDE_INT val = INTVAL (operands[2]);
1797 if ((val == -1 || val == -0x81)
1798 && (comparison == GT || comparison == LE))
1799 {
1800 comparison = (comparison == GT) ? GE : LT;
1801 operands[2] = gen_int_mode (val + 1, mode);
1802 }
1803 else if ((val == 1 || val == 0x80)
1804 && (comparison == GE || comparison == LT))
1805 {
1806 comparison = (comparison == GE) ? GT : LE;
1807 operands[2] = gen_int_mode (val - 1, mode);
1808 }
1809 else if (val == 1 && (comparison == GEU || comparison == LTU))
1810 {
1811 comparison = (comparison == GEU) ? NE : EQ;
1812 operands[2] = CONST0_RTX (mode);
1813 }
1814 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1815 {
1816 comparison = (comparison == GEU) ? GTU : LEU;
1817 operands[2] = gen_int_mode (val - 1, mode);
1818 }
1819 else if (val == 0 && (comparison == GTU || comparison == LEU))
1820 comparison = (comparison == GTU) ? NE : EQ;
1821 else if (mode == SImode
1822 && ((val == 0x7fffffff
1823 && (comparison == GTU || comparison == LEU))
1824 || ((unsigned HOST_WIDE_INT) val
1825 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1826 && (comparison == GEU || comparison == LTU))))
1827 {
1828 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1829 operands[2] = CONST0_RTX (mode);
1830 }
1831 }
1832 op1 = operands[1];
1833 if (can_create_pseudo_p ())
1834 operands[1] = force_reg (mode, op1);
1835 /* When we are handling DImode comparisons, we want to keep constants so
1836 that we can optimize the component comparisons; however, memory loads
1837 are better issued as a whole so that they can be scheduled well.
1838 SImode equality comparisons allow I08 constants, but only when they
1839 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1840 into a register, that register might as well be r0, and we allow the
1841 constant. If it is already in a register, this is likely to be
1842 allocated to a different hard register, thus we load the constant into
1843 a register unless it is zero. */
1844 if (!REG_P (operands[2])
1845 && (!CONST_INT_P (operands[2])
1846 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1847 && ((comparison != EQ && comparison != NE)
1848 || (REG_P (op1) && REGNO (op1) != R0_REG)
1849 || !satisfies_constraint_I08 (operands[2])))))
1850 {
1851 if (scratch && GET_MODE (scratch) == mode)
1852 {
1853 emit_move_insn (scratch, operands[2]);
1854 operands[2] = scratch;
1855 }
1856 else if (can_create_pseudo_p ())
1857 operands[2] = force_reg (mode, operands[2]);
1858 }
1859 return comparison;
1860 }
1861
1862 void
1863 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1864 {
1865 rtx (*branch_expander) (rtx) = gen_branch_true;
1866 rtx jump;
1867
1868 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1869 switch (comparison)
1870 {
1871 case NE: case LT: case LE: case LTU: case LEU:
1872 comparison = reverse_condition (comparison);
1873 branch_expander = gen_branch_false;
1874 default: ;
1875 }
1876 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1877 gen_rtx_fmt_ee (comparison, SImode,
1878 operands[1], operands[2])));
1879 jump = emit_jump_insn (branch_expander (operands[3]));
1880 if (probability >= 0)
1881 add_reg_note (jump, REG_BR_PROB, GEN_INT (probability));
1882
1883 }
1884
1885 /* ??? How should we distribute probabilities when more than one branch
1886 is generated. So far we only have soem ad-hoc observations:
1887 - If the operands are random, they are likely to differ in both parts.
1888 - If comparing items in a hash chain, the operands are random or equal;
1889 operation should be EQ or NE.
1890 - If items are searched in an ordered tree from the root, we can expect
1891 the highpart to be unequal about half of the time; operation should be
1892 an inequality comparison, operands non-constant, and overall probability
1893 about 50%. Likewise for quicksort.
1894 - Range checks will be often made against constants. Even if we assume for
1895 simplicity an even distribution of the non-constant operand over a
1896 sub-range here, the same probability could be generated with differently
1897 wide sub-ranges - as long as the ratio of the part of the subrange that
1898 is before the threshold to the part that comes after the threshold stays
1899 the same. Thus, we can't really tell anything here;
1900 assuming random distribution is at least simple.
1901 */
1902
1903 bool
1904 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1905 {
1906 enum rtx_code msw_taken, msw_skip, lsw_taken;
1907 rtx skip_label = NULL_RTX;
1908 rtx op1h, op1l, op2h, op2l;
1909 int num_branches;
1910 int prob, rev_prob;
1911 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1912 rtx scratch = operands[4];
1913
1914 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1915 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1916 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1917 op1l = gen_lowpart (SImode, operands[1]);
1918 op2l = gen_lowpart (SImode, operands[2]);
1919 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
1920 prob = split_branch_probability;
1921 rev_prob = REG_BR_PROB_BASE - prob;
1922 switch (comparison)
1923 {
1924 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1925 That costs 1 cycle more when the first branch can be predicted taken,
1926 but saves us mispredicts because only one branch needs prediction.
1927 It also enables generating the cmpeqdi_t-1 pattern. */
1928 case EQ:
1929 if (TARGET_CMPEQDI_T)
1930 {
1931 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1932 emit_jump_insn (gen_branch_true (operands[3]));
1933 return true;
1934 }
1935 msw_skip = NE;
1936 lsw_taken = EQ;
1937 if (prob >= 0)
1938 {
1939 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1940 */
1941 msw_skip_prob = rev_prob;
1942 if (REG_BR_PROB_BASE <= 65535)
1943 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1944 else
1945 {
1946 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1947 lsw_taken_prob
1948 = (prob
1949 ? (REG_BR_PROB_BASE
1950 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1951 / ((HOST_WIDEST_INT) prob << 32)))
1952 : 0);
1953 }
1954 }
1955 break;
1956 case NE:
1957 if (TARGET_CMPEQDI_T)
1958 {
1959 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1960 emit_jump_insn (gen_branch_false (operands[3]));
1961 return true;
1962 }
1963 msw_taken = NE;
1964 msw_taken_prob = prob;
1965 lsw_taken = NE;
1966 lsw_taken_prob = 0;
1967 break;
1968 case GTU: case GT:
1969 msw_taken = comparison;
1970 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1971 break;
1972 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1973 msw_skip = swap_condition (msw_taken);
1974 lsw_taken = GTU;
1975 break;
1976 case GEU: case GE:
1977 if (op2l == CONST0_RTX (SImode))
1978 msw_taken = comparison;
1979 else
1980 {
1981 msw_taken = comparison == GE ? GT : GTU;
1982 msw_skip = swap_condition (msw_taken);
1983 lsw_taken = GEU;
1984 }
1985 break;
1986 case LTU: case LT:
1987 msw_taken = comparison;
1988 if (op2l == CONST0_RTX (SImode))
1989 break;
1990 msw_skip = swap_condition (msw_taken);
1991 lsw_taken = LTU;
1992 break;
1993 case LEU: case LE:
1994 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1995 msw_taken = comparison;
1996 else
1997 {
1998 lsw_taken = LEU;
1999 if (comparison == LE)
2000 msw_taken = LT;
2001 else if (op2h != CONST0_RTX (SImode))
2002 msw_taken = LTU;
2003 else
2004 break;
2005 msw_skip = swap_condition (msw_taken);
2006 }
2007 break;
2008 default: return false;
2009 }
2010 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
2011 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2012 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
2013 if (comparison != EQ && comparison != NE && num_branches > 1)
2014 {
2015 if (!CONSTANT_P (operands[2])
2016 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
2017 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
2018 {
2019 msw_taken_prob = prob / 2U;
2020 msw_skip_prob
2021 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
2022 lsw_taken_prob = prob;
2023 }
2024 else
2025 {
2026 msw_taken_prob = prob;
2027 msw_skip_prob = REG_BR_PROB_BASE;
2028 /* ??? If we have a constant op2h, should we use that when
2029 calculating lsw_taken_prob? */
2030 lsw_taken_prob = prob;
2031 }
2032 }
2033 operands[1] = op1h;
2034 operands[2] = op2h;
2035 operands[4] = NULL_RTX;
2036 if (reload_completed
2037 && ! arith_reg_or_0_operand (op2h, SImode)
2038 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
2039 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
2040 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
2041 {
2042 emit_move_insn (scratch, operands[2]);
2043 operands[2] = scratch;
2044 }
2045 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2046 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2047 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2048 {
2049 rtx taken_label = operands[3];
2050
2051 /* Operands were possibly modified, but msw_skip doesn't expect this.
2052 Always use the original ones. */
2053 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2054 {
2055 operands[1] = op1h;
2056 operands[2] = op2h;
2057 }
2058
2059 operands[3] = skip_label = gen_label_rtx ();
2060 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2061 operands[3] = taken_label;
2062 }
2063 operands[1] = op1l;
2064 operands[2] = op2l;
2065 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2066 {
2067 if (reload_completed
2068 && ! arith_reg_or_0_operand (op2l, SImode)
2069 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2070 {
2071 emit_move_insn (scratch, operands[2]);
2072 operands[2] = scratch;
2073 }
2074 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2075 }
2076 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2077 emit_label (skip_label);
2078 return true;
2079 }
2080
2081 /* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */
2082
2083 static void
2084 sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
2085 {
2086 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
2087 {
2088 insn = gen_rtx_PARALLEL (VOIDmode,
2089 gen_rtvec (2, insn,
2090 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
2091 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
2092 }
2093 else
2094 emit_insn (insn);
2095 }
2096
2097 /* Prepare the operands for an scc instruction; make sure that the
2098 compare has been done and the result is in T_REG. */
2099 void
2100 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2101 {
2102 rtx t_reg = gen_rtx_REG (SImode, T_REG);
2103 enum rtx_code oldcode = code;
2104 enum machine_mode mode;
2105
2106 /* First need a compare insn. */
2107 switch (code)
2108 {
2109 case NE:
2110 /* It isn't possible to handle this case. */
2111 gcc_unreachable ();
2112 case LT:
2113 code = GT;
2114 break;
2115 case LE:
2116 code = GE;
2117 break;
2118 case LTU:
2119 code = GTU;
2120 break;
2121 case LEU:
2122 code = GEU;
2123 break;
2124 default:
2125 break;
2126 }
2127 if (code != oldcode)
2128 {
2129 rtx tmp = op0;
2130 op0 = op1;
2131 op1 = tmp;
2132 }
2133
2134 mode = GET_MODE (op0);
2135 if (mode == VOIDmode)
2136 mode = GET_MODE (op1);
2137
2138 op0 = force_reg (mode, op0);
2139 if ((code != EQ && code != NE
2140 && (op1 != const0_rtx
2141 || code == GTU || code == GEU || code == LTU || code == LEU))
2142 || (mode == DImode && op1 != const0_rtx)
2143 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2144 op1 = force_reg (mode, op1);
2145
2146 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2147 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2148 mode);
2149 }
2150
2151 rtx
2152 sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
2153 rtx op0, rtx op1)
2154 {
2155 rtx target = gen_reg_rtx (SImode);
2156 rtx tmp;
2157
2158 gcc_assert (TARGET_SHMEDIA);
2159 switch (code)
2160 {
2161 case EQ:
2162 case GT:
2163 case LT:
2164 case UNORDERED:
2165 case GTU:
2166 case LTU:
2167 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2168 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2169 code = NE;
2170 break;
2171
2172 case NE:
2173 case GE:
2174 case LE:
2175 case ORDERED:
2176 case GEU:
2177 case LEU:
2178 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2179 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2180 code = EQ;
2181 break;
2182
2183 case UNEQ:
2184 case UNGE:
2185 case UNGT:
2186 case UNLE:
2187 case UNLT:
2188 case LTGT:
2189 return NULL_RTX;
2190
2191 default:
2192 gcc_unreachable ();
2193 }
2194
2195 if (mode == DImode)
2196 {
2197 rtx t2 = gen_reg_rtx (DImode);
2198 emit_insn (gen_extendsidi2 (t2, target));
2199 target = t2;
2200 }
2201
2202 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2203 }
2204
2205 /* Called from the md file, set up the operands of a compare instruction. */
2206
2207 void
2208 sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
2209 {
2210 enum rtx_code code = GET_CODE (operands[0]);
2211 enum rtx_code branch_code;
2212 rtx op0 = operands[1];
2213 rtx op1 = operands[2];
2214 rtx insn, tem;
2215 bool need_ccmpeq = false;
2216
2217 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2218 {
2219 op0 = force_reg (mode, op0);
2220 op1 = force_reg (mode, op1);
2221 }
2222 else
2223 {
2224 if (code != EQ || mode == DImode)
2225 {
2226 /* Force args into regs, since we can't use constants here. */
2227 op0 = force_reg (mode, op0);
2228 if (op1 != const0_rtx || code == GTU || code == GEU)
2229 op1 = force_reg (mode, op1);
2230 }
2231 }
2232
2233 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2234 {
2235 if (code == LT
2236 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2237 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2238 {
2239 tem = op0, op0 = op1, op1 = tem;
2240 code = swap_condition (code);
2241 }
2242
2243 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2244 if (code == GE)
2245 {
2246 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2247 need_ccmpeq = true;
2248 code = GT;
2249 }
2250
2251 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2252 to EQ/GT respectively. */
2253 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2254 }
2255
2256 switch (code)
2257 {
2258 case EQ:
2259 case GT:
2260 case GE:
2261 case GTU:
2262 case GEU:
2263 branch_code = code;
2264 break;
2265 case NE:
2266 case LT:
2267 case LE:
2268 case LTU:
2269 case LEU:
2270 branch_code = reverse_condition (code);
2271 break;
2272 default:
2273 gcc_unreachable ();
2274 }
2275
2276 insn = gen_rtx_SET (VOIDmode,
2277 gen_rtx_REG (SImode, T_REG),
2278 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2279
2280 sh_emit_set_t_insn (insn, mode);
2281 if (need_ccmpeq)
2282 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2283
2284 if (branch_code == code)
2285 emit_jump_insn (gen_branch_true (operands[3]));
2286 else
2287 emit_jump_insn (gen_branch_false (operands[3]));
2288 }
2289
2290 void
2291 sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
2292 {
2293 enum rtx_code code = GET_CODE (operands[1]);
2294 rtx op0 = operands[2];
2295 rtx op1 = operands[3];
2296 rtx lab = NULL_RTX;
2297 bool invert = false;
2298 rtx tem;
2299
2300 op0 = force_reg (mode, op0);
2301 if ((code != EQ && code != NE
2302 && (op1 != const0_rtx
2303 || code == GTU || code == GEU || code == LTU || code == LEU))
2304 || (mode == DImode && op1 != const0_rtx)
2305 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2306 op1 = force_reg (mode, op1);
2307
2308 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2309 {
2310 if (code == LT || code == LE)
2311 {
2312 code = swap_condition (code);
2313 tem = op0, op0 = op1, op1 = tem;
2314 }
2315 if (code == GE)
2316 {
2317 if (TARGET_IEEE)
2318 {
2319 lab = gen_label_rtx ();
2320 sh_emit_scc_to_t (EQ, op0, op1);
2321 emit_jump_insn (gen_branch_true (lab));
2322 code = GT;
2323 }
2324 else
2325 {
2326 code = LT;
2327 invert = true;
2328 }
2329 }
2330 }
2331
2332 if (code == NE)
2333 {
2334 code = EQ;
2335 invert = true;
2336 }
2337
2338 sh_emit_scc_to_t (code, op0, op1);
2339 if (lab)
2340 emit_label (lab);
2341 if (invert)
2342 emit_insn (gen_movnegt (operands[0]));
2343 else
2344 emit_move_insn (operands[0], gen_rtx_REG (SImode, T_REG));
2345 }
2346 \f
2347 /* Functions to output assembly code. */
2348
2349 /* Return a sequence of instructions to perform DI or DF move.
2350
2351 Since the SH cannot move a DI or DF in one instruction, we have
2352 to take care when we see overlapping source and dest registers. */
2353
2354 const char *
2355 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2356 enum machine_mode mode)
2357 {
2358 rtx dst = operands[0];
2359 rtx src = operands[1];
2360
2361 if (MEM_P (dst)
2362 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2363 return "mov.l %T1,%0\n\tmov.l %1,%0";
2364
2365 if (register_operand (dst, mode)
2366 && register_operand (src, mode))
2367 {
2368 if (REGNO (src) == MACH_REG)
2369 return "sts mach,%S0\n\tsts macl,%R0";
2370
2371 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2372 when mov.d r1,r0 do r1->r0 then r2->r1. */
2373
2374 if (REGNO (src) + 1 == REGNO (dst))
2375 return "mov %T1,%T0\n\tmov %1,%0";
2376 else
2377 return "mov %1,%0\n\tmov %T1,%T0";
2378 }
2379 else if (CONST_INT_P (src))
2380 {
2381 if (INTVAL (src) < 0)
2382 output_asm_insn ("mov #-1,%S0", operands);
2383 else
2384 output_asm_insn ("mov #0,%S0", operands);
2385
2386 return "mov %1,%R0";
2387 }
2388 else if (MEM_P (src))
2389 {
2390 int ptrreg = -1;
2391 int dreg = REGNO (dst);
2392 rtx inside = XEXP (src, 0);
2393
2394 switch (GET_CODE (inside))
2395 {
2396 case REG:
2397 ptrreg = REGNO (inside);
2398 break;
2399
2400 case SUBREG:
2401 ptrreg = subreg_regno (inside);
2402 break;
2403
2404 case PLUS:
2405 ptrreg = REGNO (XEXP (inside, 0));
2406 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2407 an offsettable address. Unfortunately, offsettable addresses use
2408 QImode to check the offset, and a QImode offsettable address
2409 requires r0 for the other operand, which is not currently
2410 supported, so we can't use the 'o' constraint.
2411 Thus we must check for and handle r0+REG addresses here.
2412 We punt for now, since this is likely very rare. */
2413 gcc_assert (!REG_P (XEXP (inside, 1)));
2414 break;
2415
2416 case LABEL_REF:
2417 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
2418 case POST_INC:
2419 return "mov.l %1,%0\n\tmov.l %1,%T0";
2420 default:
2421 gcc_unreachable ();
2422 }
2423
2424 /* Work out the safe way to copy. Copy into the second half first. */
2425 if (dreg == ptrreg)
2426 return "mov.l %T1,%T0\n\tmov.l %1,%0";
2427 }
2428
2429 return "mov.l %1,%0\n\tmov.l %T1,%T0";
2430 }
2431
2432 /* Print an instruction which would have gone into a delay slot after
2433 another instruction, but couldn't because the other instruction expanded
2434 into a sequence where putting the slot insn at the end wouldn't work. */
2435
2436 static void
2437 print_slot (rtx insn)
2438 {
2439 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
2440
2441 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
2442 }
2443
2444 const char *
2445 output_far_jump (rtx insn, rtx op)
2446 {
2447 struct { rtx lab, reg, op; } this_jmp;
2448 rtx braf_base_lab = NULL_RTX;
2449 const char *jump;
2450 int far;
2451 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2452 rtx prev;
2453
2454 this_jmp.lab = gen_label_rtx ();
2455
2456 if (TARGET_SH2
2457 && offset >= -32764
2458 && offset - get_attr_length (insn) <= 32766)
2459 {
2460 far = 0;
2461 jump = "mov.w %O0,%1; braf %1";
2462 }
2463 else
2464 {
2465 far = 1;
2466 if (flag_pic)
2467 {
2468 if (TARGET_SH2)
2469 jump = "mov.l %O0,%1; braf %1";
2470 else
2471 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
2472 }
2473 else
2474 jump = "mov.l %O0,%1; jmp @%1";
2475 }
2476 /* If we have a scratch register available, use it. */
2477 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2478 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2479 {
2480 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2481 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2482 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
2483 output_asm_insn (jump, &this_jmp.lab);
2484 if (dbr_sequence_length ())
2485 print_slot (final_sequence);
2486 else
2487 output_asm_insn ("nop", 0);
2488 }
2489 else
2490 {
2491 /* Output the delay slot insn first if any. */
2492 if (dbr_sequence_length ())
2493 print_slot (final_sequence);
2494
2495 this_jmp.reg = gen_rtx_REG (SImode, 13);
2496 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2497 Fortunately, MACL is fixed and call-clobbered, and we never
2498 need its value across jumps, so save r13 in it instead of in
2499 the stack. */
2500 if (TARGET_SH5)
2501 output_asm_insn ("lds r13, macl", 0);
2502 else
2503 output_asm_insn ("mov.l r13,@-r15", 0);
2504 output_asm_insn (jump, &this_jmp.lab);
2505 if (TARGET_SH5)
2506 output_asm_insn ("sts macl, r13", 0);
2507 else
2508 output_asm_insn ("mov.l @r15+,r13", 0);
2509 }
2510 if (far && flag_pic && TARGET_SH2)
2511 {
2512 braf_base_lab = gen_label_rtx ();
2513 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2514 CODE_LABEL_NUMBER (braf_base_lab));
2515 }
2516 if (far)
2517 output_asm_insn (".align 2", 0);
2518 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2519 this_jmp.op = op;
2520 if (far && flag_pic)
2521 {
2522 if (TARGET_SH2)
2523 this_jmp.lab = braf_base_lab;
2524 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2525 }
2526 else
2527 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2528 return "";
2529 }
2530
2531 /* Local label counter, used for constants in the pool and inside
2532 pattern branches. */
2533
2534 static int lf = 100;
2535
2536 /* Output code for ordinary branches. */
2537
2538 const char *
2539 output_branch (int logic, rtx insn, rtx *operands)
2540 {
2541 switch (get_attr_length (insn))
2542 {
2543 case 6:
2544 /* This can happen if filling the delay slot has caused a forward
2545 branch to exceed its range (we could reverse it, but only
2546 when we know we won't overextend other branches; this should
2547 best be handled by relaxation).
2548 It can also happen when other condbranches hoist delay slot insn
2549 from their destination, thus leading to code size increase.
2550 But the branch will still be in the range -4092..+4098 bytes. */
2551
2552 if (! TARGET_RELAX)
2553 {
2554 int label = lf++;
2555 /* The call to print_slot will clobber the operands. */
2556 rtx op0 = operands[0];
2557
2558 /* If the instruction in the delay slot is annulled (true), then
2559 there is no delay slot where we can put it now. The only safe
2560 place for it is after the label. final will do that by default. */
2561
2562 if (final_sequence
2563 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2564 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2565 {
2566 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2567 ASSEMBLER_DIALECT ? "/" : ".", label);
2568 print_slot (final_sequence);
2569 }
2570 else
2571 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2572
2573 output_asm_insn ("bra\t%l0", &op0);
2574 fprintf (asm_out_file, "\tnop\n");
2575 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2576
2577 return "";
2578 }
2579 /* When relaxing, handle this like a short branch. The linker
2580 will fix it up if it still doesn't fit after relaxation. */
2581 case 2:
2582 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2583
2584 /* These are for SH2e, in which we have to account for the
2585 extra nop because of the hardware bug in annulled branches. */
2586 case 8:
2587 if (! TARGET_RELAX)
2588 {
2589 int label = lf++;
2590
2591 gcc_assert (!final_sequence
2592 || !(INSN_ANNULLED_BRANCH_P
2593 (XVECEXP (final_sequence, 0, 0))));
2594 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2595 logic ? "f" : "t",
2596 ASSEMBLER_DIALECT ? "/" : ".", label);
2597 fprintf (asm_out_file, "\tnop\n");
2598 output_asm_insn ("bra\t%l0", operands);
2599 fprintf (asm_out_file, "\tnop\n");
2600 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2601
2602 return "";
2603 }
2604 /* When relaxing, fall through. */
2605 case 4:
2606 {
2607 char buffer[10];
2608
2609 sprintf (buffer, "b%s%ss\t%%l0",
2610 logic ? "t" : "f",
2611 ASSEMBLER_DIALECT ? "/" : ".");
2612 output_asm_insn (buffer, &operands[0]);
2613 return "nop";
2614 }
2615
2616 default:
2617 /* There should be no longer branches now - that would
2618 indicate that something has destroyed the branches set
2619 up in machine_dependent_reorg. */
2620 gcc_unreachable ();
2621 }
2622 }
2623
2624 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2625 fill in operands 9 as a label to the successor insn.
2626 We try to use jump threading where possible.
2627 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2628 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2629 follow jmp and bt, if the address is in range. */
2630 const char *
2631 output_branchy_insn (enum rtx_code code, const char *templ,
2632 rtx insn, rtx *operands)
2633 {
2634 rtx next_insn = NEXT_INSN (insn);
2635
2636 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2637 {
2638 rtx src = SET_SRC (PATTERN (next_insn));
2639 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2640 {
2641 /* Following branch not taken */
2642 operands[9] = gen_label_rtx ();
2643 emit_label_after (operands[9], next_insn);
2644 INSN_ADDRESSES_NEW (operands[9],
2645 INSN_ADDRESSES (INSN_UID (next_insn))
2646 + get_attr_length (next_insn));
2647 return templ;
2648 }
2649 else
2650 {
2651 int offset = (branch_dest (next_insn)
2652 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2653 if (offset >= -252 && offset <= 258)
2654 {
2655 if (GET_CODE (src) == IF_THEN_ELSE)
2656 /* branch_true */
2657 src = XEXP (src, 1);
2658 operands[9] = src;
2659 return templ;
2660 }
2661 }
2662 }
2663 operands[9] = gen_label_rtx ();
2664 emit_label_after (operands[9], insn);
2665 INSN_ADDRESSES_NEW (operands[9],
2666 INSN_ADDRESSES (INSN_UID (insn))
2667 + get_attr_length (insn));
2668 return templ;
2669 }
2670
2671 const char *
2672 output_ieee_ccmpeq (rtx insn, rtx *operands)
2673 {
2674 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2675 insn, operands);
2676 }
2677 \f
2678 /* Output the start of the assembler file. */
2679
2680 static void
2681 sh_file_start (void)
2682 {
2683 default_file_start ();
2684
2685 #ifdef SYMBIAN
2686 /* Declare the .directive section before it is used. */
2687 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
2688 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
2689 #endif
2690
2691 if (TARGET_ELF)
2692 /* We need to show the text section with the proper
2693 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2694 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2695 will complain. We can teach GAS specifically about the
2696 default attributes for our choice of text section, but
2697 then we would have to change GAS again if/when we change
2698 the text section name. */
2699 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2700 else
2701 /* Switch to the data section so that the coffsem symbol
2702 isn't in the text section. */
2703 switch_to_section (data_section);
2704
2705 if (TARGET_LITTLE_ENDIAN)
2706 fputs ("\t.little\n", asm_out_file);
2707
2708 if (!TARGET_ELF)
2709 {
2710 if (TARGET_SHCOMPACT)
2711 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2712 else if (TARGET_SHMEDIA)
2713 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2714 TARGET_SHMEDIA64 ? 64 : 32);
2715 }
2716 }
2717 \f
2718 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2719
2720 static bool
2721 unspec_caller_rtx_p (rtx pat)
2722 {
2723 rtx base, offset;
2724 int i;
2725
2726 split_const (pat, &base, &offset);
2727 if (GET_CODE (base) == UNSPEC)
2728 {
2729 if (XINT (base, 1) == UNSPEC_CALLER)
2730 return true;
2731 for (i = 0; i < XVECLEN (base, 0); i++)
2732 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2733 return true;
2734 }
2735 return false;
2736 }
2737
2738 /* Indicate that INSN cannot be duplicated. This is true for insn
2739 that generates a unique label. */
2740
2741 static bool
2742 sh_cannot_copy_insn_p (rtx insn)
2743 {
2744 rtx pat;
2745
2746 if (!reload_completed || !flag_pic)
2747 return false;
2748
2749 if (!NONJUMP_INSN_P (insn))
2750 return false;
2751 if (asm_noperands (insn) >= 0)
2752 return false;
2753
2754 pat = PATTERN (insn);
2755 if (GET_CODE (pat) != SET)
2756 return false;
2757 pat = SET_SRC (pat);
2758
2759 if (unspec_caller_rtx_p (pat))
2760 return true;
2761
2762 return false;
2763 }
2764 \f
2765 /* Actual number of instructions used to make a shift by N. */
2766 static const char ashiftrt_insns[] =
2767 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2768
2769 /* Left shift and logical right shift are the same. */
2770 static const char shift_insns[] =
2771 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2772
2773 /* Individual shift amounts needed to get the above length sequences.
2774 One bit right shifts clobber the T bit, so when possible, put one bit
2775 shifts in the middle of the sequence, so the ends are eligible for
2776 branch delay slots. */
2777 static const short shift_amounts[32][5] = {
2778 {0}, {1}, {2}, {2, 1},
2779 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2780 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2781 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2782 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2783 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2784 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2785 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2786
2787 /* Likewise, but for shift amounts < 16, up to three highmost bits
2788 might be clobbered. This is typically used when combined with some
2789 kind of sign or zero extension. */
2790
2791 static const char ext_shift_insns[] =
2792 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2793
2794 static const short ext_shift_amounts[32][4] = {
2795 {0}, {1}, {2}, {2, 1},
2796 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2797 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2798 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2799 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2800 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2801 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2802 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2803
2804 /* Assuming we have a value that has been sign-extended by at least one bit,
2805 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2806 to shift it by N without data loss, and quicker than by other means? */
2807 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2808
2809 /* This is used in length attributes in sh.md to help compute the length
2810 of arbitrary constant shift instructions. */
2811
2812 int
2813 shift_insns_rtx (rtx insn)
2814 {
2815 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2816 int shift_count = INTVAL (XEXP (set_src, 1)) & 31;
2817 enum rtx_code shift_code = GET_CODE (set_src);
2818
2819 switch (shift_code)
2820 {
2821 case ASHIFTRT:
2822 return ashiftrt_insns[shift_count];
2823 case LSHIFTRT:
2824 case ASHIFT:
2825 return shift_insns[shift_count];
2826 default:
2827 gcc_unreachable ();
2828 }
2829 }
2830
2831 /* Return the cost of a shift. */
2832
2833 static inline int
2834 shiftcosts (rtx x)
2835 {
2836 int value;
2837
2838 if (TARGET_SHMEDIA)
2839 return 1;
2840
2841 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2842 {
2843 if (GET_MODE (x) == DImode
2844 && CONST_INT_P (XEXP (x, 1))
2845 && INTVAL (XEXP (x, 1)) == 1)
2846 return 2;
2847
2848 /* Everything else is invalid, because there is no pattern for it. */
2849 return MAX_COST;
2850 }
2851 /* If shift by a non constant, then this will be expensive. */
2852 if (!CONST_INT_P (XEXP (x, 1)))
2853 return SH_DYNAMIC_SHIFT_COST;
2854
2855 /* Otherwise, return the true cost in instructions. Cope with out of range
2856 shift counts more or less arbitrarily. */
2857 value = INTVAL (XEXP (x, 1)) & 31;
2858
2859 if (GET_CODE (x) == ASHIFTRT)
2860 {
2861 int cost = ashiftrt_insns[value];
2862 /* If SH3, then we put the constant in a reg and use shad. */
2863 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2864 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2865 return cost;
2866 }
2867 else
2868 return shift_insns[value];
2869 }
2870
2871 /* Return the cost of an AND operation. */
2872
2873 static inline int
2874 andcosts (rtx x)
2875 {
2876 int i;
2877
2878 /* Anding with a register is a single cycle and instruction. */
2879 if (!CONST_INT_P (XEXP (x, 1)))
2880 return 1;
2881
2882 i = INTVAL (XEXP (x, 1));
2883
2884 if (TARGET_SHMEDIA)
2885 {
2886 if (satisfies_constraint_I10 (XEXP (x, 1))
2887 || satisfies_constraint_J16 (XEXP (x, 1)))
2888 return 1;
2889 else
2890 return 1 + rtx_cost (XEXP (x, 1), AND, !optimize_size);
2891 }
2892
2893 /* These constants are single cycle extu.[bw] instructions. */
2894 if (i == 0xff || i == 0xffff)
2895 return 1;
2896 /* Constants that can be used in an and immediate instruction in a single
2897 cycle, but this requires r0, so make it a little more expensive. */
2898 if (CONST_OK_FOR_K08 (i))
2899 return 2;
2900 /* Constants that can be loaded with a mov immediate and an and.
2901 This case is probably unnecessary. */
2902 if (CONST_OK_FOR_I08 (i))
2903 return 2;
2904 /* Any other constants requires a 2 cycle pc-relative load plus an and.
2905 This case is probably unnecessary. */
2906 return 3;
2907 }
2908
2909 /* Return the cost of an addition or a subtraction. */
2910
2911 static inline int
2912 addsubcosts (rtx x)
2913 {
2914 /* Adding a register is a single cycle insn. */
2915 if (REG_P (XEXP (x, 1))
2916 || GET_CODE (XEXP (x, 1)) == SUBREG)
2917 return 1;
2918
2919 /* Likewise for small constants. */
2920 if (CONST_INT_P (XEXP (x, 1))
2921 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2922 return 1;
2923
2924 if (TARGET_SHMEDIA)
2925 switch (GET_CODE (XEXP (x, 1)))
2926 {
2927 case CONST:
2928 case LABEL_REF:
2929 case SYMBOL_REF:
2930 return TARGET_SHMEDIA64 ? 5 : 3;
2931
2932 case CONST_INT:
2933 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2934 return 2;
2935 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2936 return 3;
2937 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2938 return 4;
2939
2940 /* Fall through. */
2941 default:
2942 return 5;
2943 }
2944
2945 /* Any other constant requires a 2 cycle pc-relative load plus an
2946 addition. */
2947 return 3;
2948 }
2949
2950 /* Return the cost of a multiply. */
2951 static inline int
2952 multcosts (rtx x ATTRIBUTE_UNUSED)
2953 {
2954 if (sh_multcost >= 0)
2955 return sh_multcost;
2956 if (TARGET_SHMEDIA)
2957 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2958 accept constants. Ideally, we would use a cost of one or two and
2959 add the cost of the operand, but disregard the latter when inside loops
2960 and loop invariant code motion is still to follow.
2961 Using a multiply first and splitting it later if it's a loss
2962 doesn't work because of different sign / zero extension semantics
2963 of multiplies vs. shifts. */
2964 return TARGET_SMALLCODE ? 2 : 3;
2965
2966 if (TARGET_SH2)
2967 {
2968 /* We have a mul insn, so we can never take more than the mul and the
2969 read of the mac reg, but count more because of the latency and extra
2970 reg usage. */
2971 if (TARGET_SMALLCODE)
2972 return 2;
2973 return 3;
2974 }
2975
2976 /* If we're aiming at small code, then just count the number of
2977 insns in a multiply call sequence. */
2978 if (TARGET_SMALLCODE)
2979 return 5;
2980
2981 /* Otherwise count all the insns in the routine we'd be calling too. */
2982 return 20;
2983 }
2984
2985 /* Compute a (partial) cost for rtx X. Return true if the complete
2986 cost has been computed, and false if subexpressions should be
2987 scanned. In either case, *TOTAL contains the cost result. */
2988
2989 static bool
2990 sh_rtx_costs (rtx x, int code, int outer_code, int *total,
2991 bool speed ATTRIBUTE_UNUSED)
2992 {
2993 switch (code)
2994 {
2995 case CONST_INT:
2996 if (TARGET_SHMEDIA)
2997 {
2998 if (INTVAL (x) == 0)
2999 *total = 0;
3000 else if (outer_code == AND && and_operand ((x), DImode))
3001 *total = 0;
3002 else if ((outer_code == IOR || outer_code == XOR
3003 || outer_code == PLUS)
3004 && CONST_OK_FOR_I10 (INTVAL (x)))
3005 *total = 0;
3006 else if (CONST_OK_FOR_I16 (INTVAL (x)))
3007 *total = COSTS_N_INSNS (outer_code != SET);
3008 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
3009 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
3010 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
3011 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
3012 else
3013 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
3014 return true;
3015 }
3016 if (CONST_OK_FOR_I08 (INTVAL (x)))
3017 *total = 0;
3018 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
3019 && CONST_OK_FOR_K08 (INTVAL (x)))
3020 *total = 1;
3021 /* prepare_cmp_insn will force costly constants int registers before
3022 the cbranch[sd]i4 patterns can see them, so preserve potentially
3023 interesting ones not covered by I08 above. */
3024 else if (outer_code == COMPARE
3025 && ((unsigned HOST_WIDE_INT) INTVAL (x)
3026 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3027 || INTVAL (x) == 0x7fffffff
3028 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3029 *total = 1;
3030 else
3031 *total = 8;
3032 return true;
3033
3034 case CONST:
3035 case LABEL_REF:
3036 case SYMBOL_REF:
3037 if (TARGET_SHMEDIA64)
3038 *total = COSTS_N_INSNS (4);
3039 else if (TARGET_SHMEDIA32)
3040 *total = COSTS_N_INSNS (2);
3041 else
3042 *total = 5;
3043 return true;
3044
3045 case CONST_DOUBLE:
3046 if (TARGET_SHMEDIA)
3047 *total = COSTS_N_INSNS (4);
3048 /* prepare_cmp_insn will force costly constants int registers before
3049 the cbranchdi4 pattern can see them, so preserve potentially
3050 interesting ones. */
3051 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
3052 *total = 1;
3053 else
3054 *total = 10;
3055 return true;
3056 case CONST_VECTOR:
3057 if (x == CONST0_RTX (GET_MODE (x)))
3058 *total = 0;
3059 else if (sh_1el_vec (x, VOIDmode))
3060 *total = outer_code != SET;
3061 if (sh_rep_vec (x, VOIDmode))
3062 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3063 + (outer_code != SET));
3064 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3065 return true;
3066
3067 case PLUS:
3068 case MINUS:
3069 *total = COSTS_N_INSNS (addsubcosts (x));
3070 return true;
3071
3072 case AND:
3073 *total = COSTS_N_INSNS (andcosts (x));
3074 return true;
3075
3076 case MULT:
3077 *total = COSTS_N_INSNS (multcosts (x));
3078 return true;
3079
3080 case ASHIFT:
3081 case ASHIFTRT:
3082 case LSHIFTRT:
3083 *total = COSTS_N_INSNS (shiftcosts (x));
3084 return true;
3085
3086 case DIV:
3087 case UDIV:
3088 case MOD:
3089 case UMOD:
3090 *total = COSTS_N_INSNS (20);
3091 return true;
3092
3093 case PARALLEL:
3094 if (sh_1el_vec (x, VOIDmode))
3095 *total = outer_code != SET;
3096 if (sh_rep_vec (x, VOIDmode))
3097 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3098 + (outer_code != SET));
3099 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3100 return true;
3101
3102 case FLOAT:
3103 case FIX:
3104 *total = 100;
3105 return true;
3106
3107 default:
3108 return false;
3109 }
3110 }
3111
3112 /* Compute the cost of an address. For the SH, all valid addresses are
3113 the same cost. Use a slightly higher cost for reg + reg addressing,
3114 since it increases pressure on r0. */
3115
3116 static int
3117 sh_address_cost (rtx X,
3118 bool speed ATTRIBUTE_UNUSED)
3119 {
3120 return (GET_CODE (X) == PLUS
3121 && ! CONSTANT_P (XEXP (X, 1))
3122 && ! TARGET_SHMEDIA ? 1 : 0);
3123 }
3124
3125 /* Code to expand a shift. */
3126
3127 void
3128 gen_ashift (int type, int n, rtx reg)
3129 {
3130 /* Negative values here come from the shift_amounts array. */
3131 if (n < 0)
3132 {
3133 if (type == ASHIFT)
3134 type = LSHIFTRT;
3135 else
3136 type = ASHIFT;
3137 n = -n;
3138 }
3139
3140 switch (type)
3141 {
3142 case ASHIFTRT:
3143 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
3144 break;
3145 case LSHIFTRT:
3146 if (n == 1)
3147 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
3148 else
3149 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
3150 break;
3151 case ASHIFT:
3152 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
3153 break;
3154 }
3155 }
3156
3157 /* Same for HImode */
3158
3159 void
3160 gen_ashift_hi (int type, int n, rtx reg)
3161 {
3162 /* Negative values here come from the shift_amounts array. */
3163 if (n < 0)
3164 {
3165 if (type == ASHIFT)
3166 type = LSHIFTRT;
3167 else
3168 type = ASHIFT;
3169 n = -n;
3170 }
3171
3172 switch (type)
3173 {
3174 case ASHIFTRT:
3175 case LSHIFTRT:
3176 /* We don't have HImode right shift operations because using the
3177 ordinary 32 bit shift instructions for that doesn't generate proper
3178 zero/sign extension.
3179 gen_ashift_hi is only called in contexts where we know that the
3180 sign extension works out correctly. */
3181 {
3182 int offset = 0;
3183 if (GET_CODE (reg) == SUBREG)
3184 {
3185 offset = SUBREG_BYTE (reg);
3186 reg = SUBREG_REG (reg);
3187 }
3188 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3189 break;
3190 }
3191 case ASHIFT:
3192 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3193 break;
3194 }
3195 }
3196
3197 /* Output RTL to split a constant shift into its component SH constant
3198 shift instructions. */
3199
3200 void
3201 gen_shifty_op (int code, rtx *operands)
3202 {
3203 int value = INTVAL (operands[2]);
3204 int max, i;
3205
3206 /* Truncate the shift count in case it is out of bounds. */
3207 value = value & 31;
3208
3209 if (value == 31)
3210 {
3211 if (code == LSHIFTRT)
3212 {
3213 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3214 emit_insn (gen_movt (operands[0]));
3215 return;
3216 }
3217 else if (code == ASHIFT)
3218 {
3219 /* There is a two instruction sequence for 31 bit left shifts,
3220 but it requires r0. */
3221 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3222 {
3223 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3224 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3225 return;
3226 }
3227 }
3228 }
3229 else if (value == 0)
3230 {
3231 /* This can happen even when optimizing, if there were subregs before
3232 reload. Don't output a nop here, as this is never optimized away;
3233 use a no-op move instead. */
3234 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
3235 return;
3236 }
3237
3238 max = shift_insns[value];
3239 for (i = 0; i < max; i++)
3240 gen_ashift (code, shift_amounts[value][i], operands[0]);
3241 }
3242
3243 /* Same as above, but optimized for values where the topmost bits don't
3244 matter. */
3245
3246 void
3247 gen_shifty_hi_op (int code, rtx *operands)
3248 {
3249 int value = INTVAL (operands[2]);
3250 int max, i;
3251 void (*gen_fun) (int, int, rtx);
3252
3253 /* This operation is used by and_shl for SImode values with a few
3254 high bits known to be cleared. */
3255 value &= 31;
3256 if (value == 0)
3257 {
3258 emit_insn (gen_nop ());
3259 return;
3260 }
3261
3262 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3263 if (code == ASHIFT)
3264 {
3265 max = ext_shift_insns[value];
3266 for (i = 0; i < max; i++)
3267 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3268 }
3269 else
3270 /* When shifting right, emit the shifts in reverse order, so that
3271 solitary negative values come first. */
3272 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
3273 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3274 }
3275
3276 /* Output RTL for an arithmetic right shift. */
3277
3278 /* ??? Rewrite to use super-optimizer sequences. */
3279
3280 int
3281 expand_ashiftrt (rtx *operands)
3282 {
3283 rtx wrk;
3284 char func[18];
3285 int value;
3286
3287 if (TARGET_SH3)
3288 {
3289 if (!CONST_INT_P (operands[2]))
3290 {
3291 rtx count = copy_to_mode_reg (SImode, operands[2]);
3292 emit_insn (gen_negsi2 (count, count));
3293 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3294 return 1;
3295 }
3296 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3297 > 1 + SH_DYNAMIC_SHIFT_COST)
3298 {
3299 rtx count
3300 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3301 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3302 return 1;
3303 }
3304 }
3305 if (!CONST_INT_P (operands[2]))
3306 return 0;
3307
3308 value = INTVAL (operands[2]) & 31;
3309
3310 if (value == 31)
3311 {
3312 /* If we are called from abs expansion, arrange things so that we
3313 we can use a single MT instruction that doesn't clobber the source,
3314 if LICM can hoist out the load of the constant zero. */
3315 if (currently_expanding_to_rtl)
3316 {
3317 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3318 operands[1]));
3319 emit_insn (gen_mov_neg_si_t (operands[0]));
3320 return 1;
3321 }
3322 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3323 return 1;
3324 }
3325 else if (value >= 16 && value <= 19)
3326 {
3327 wrk = gen_reg_rtx (SImode);
3328 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3329 value -= 16;
3330 while (value--)
3331 gen_ashift (ASHIFTRT, 1, wrk);
3332 emit_move_insn (operands[0], wrk);
3333 return 1;
3334 }
3335 /* Expand a short sequence inline, longer call a magic routine. */
3336 else if (value <= 5)
3337 {
3338 wrk = gen_reg_rtx (SImode);
3339 emit_move_insn (wrk, operands[1]);
3340 while (value--)
3341 gen_ashift (ASHIFTRT, 1, wrk);
3342 emit_move_insn (operands[0], wrk);
3343 return 1;
3344 }
3345
3346 wrk = gen_reg_rtx (Pmode);
3347
3348 /* Load the value into an arg reg and call a helper. */
3349 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3350 sprintf (func, "__ashiftrt_r4_%d", value);
3351 function_symbol (wrk, func, SFUNC_STATIC);
3352 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
3353 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3354 return 1;
3355 }
3356
3357 int
3358 sh_dynamicalize_shift_p (rtx count)
3359 {
3360 return shift_insns[INTVAL (count) & 31] > 1 + SH_DYNAMIC_SHIFT_COST;
3361 }
3362
3363 /* Try to find a good way to implement the combiner pattern
3364 [(set (match_operand:SI 0 "register_operand" "r")
3365 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3366 (match_operand:SI 2 "const_int_operand" "n"))
3367 (match_operand:SI 3 "const_int_operand" "n"))) .
3368 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3369 return 0 for simple right / left or left/right shift combination.
3370 return 1 for a combination of shifts with zero_extend.
3371 return 2 for a combination of shifts with an AND that needs r0.
3372 return 3 for a combination of shifts with an AND that needs an extra
3373 scratch register, when the three highmost bits of the AND mask are clear.
3374 return 4 for a combination of shifts with an AND that needs an extra
3375 scratch register, when any of the three highmost bits of the AND mask
3376 is set.
3377 If ATTRP is set, store an initial right shift width in ATTRP[0],
3378 and the instruction length in ATTRP[1] . These values are not valid
3379 when returning 0.
3380 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3381 shift_amounts for the last shift value that is to be used before the
3382 sign extend. */
3383 int
3384 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3385 {
3386 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3387 int left = INTVAL (left_rtx), right;
3388 int best = 0;
3389 int cost, best_cost = 10000;
3390 int best_right = 0, best_len = 0;
3391 int i;
3392 int can_ext;
3393
3394 if (left < 0 || left > 31)
3395 return 0;
3396 if (CONST_INT_P (mask_rtx))
3397 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3398 else
3399 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3400 /* Can this be expressed as a right shift / left shift pair? */
3401 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3402 right = exact_log2 (lsb);
3403 mask2 = ~(mask + lsb - 1);
3404 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3405 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3406 if (! mask2)
3407 best_cost = shift_insns[right] + shift_insns[right + left];
3408 /* mask has no trailing zeroes <==> ! right */
3409 else if (! right && mask2 == ~(lsb2 - 1))
3410 {
3411 int late_right = exact_log2 (lsb2);
3412 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
3413 }
3414 /* Try to use zero extend. */
3415 if (mask2 == ~(lsb2 - 1))
3416 {
3417 int width, first;
3418
3419 for (width = 8; width <= 16; width += 8)
3420 {
3421 /* Can we zero-extend right away? */
3422 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3423 {
3424 cost
3425 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
3426 if (cost < best_cost)
3427 {
3428 best = 1;
3429 best_cost = cost;
3430 best_right = right;
3431 best_len = cost;
3432 if (attrp)
3433 attrp[2] = -1;
3434 }
3435 continue;
3436 }
3437 /* ??? Could try to put zero extend into initial right shift,
3438 or even shift a bit left before the right shift. */
3439 /* Determine value of first part of left shift, to get to the
3440 zero extend cut-off point. */
3441 first = width - exact_log2 (lsb2) + right;
3442 if (first >= 0 && right + left - first >= 0)
3443 {
3444 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
3445 + ext_shift_insns[right + left - first];
3446 if (cost < best_cost)
3447 {
3448 best = 1;
3449 best_cost = cost;
3450 best_right = right;
3451 best_len = cost;
3452 if (attrp)
3453 attrp[2] = first;
3454 }
3455 }
3456 }
3457 }
3458 /* Try to use r0 AND pattern */
3459 for (i = 0; i <= 2; i++)
3460 {
3461 if (i > right)
3462 break;
3463 if (! CONST_OK_FOR_K08 (mask >> i))
3464 continue;
3465 cost = (i != 0) + 2 + ext_shift_insns[left + i];
3466 if (cost < best_cost)
3467 {
3468 best = 2;
3469 best_cost = cost;
3470 best_right = i;
3471 best_len = cost - 1;
3472 }
3473 }
3474 /* Try to use a scratch register to hold the AND operand. */
3475 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
3476 for (i = 0; i <= 2; i++)
3477 {
3478 if (i > right)
3479 break;
3480 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
3481 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
3482 if (cost < best_cost)
3483 {
3484 best = 4 - can_ext;
3485 best_cost = cost;
3486 best_right = i;
3487 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
3488 }
3489 }
3490
3491 if (attrp)
3492 {
3493 attrp[0] = best_right;
3494 attrp[1] = best_len;
3495 }
3496 return best;
3497 }
3498
3499 /* This is used in length attributes of the unnamed instructions
3500 corresponding to shl_and_kind return values of 1 and 2. */
3501 int
3502 shl_and_length (rtx insn)
3503 {
3504 rtx set_src, left_rtx, mask_rtx;
3505 int attributes[3];
3506
3507 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3508 left_rtx = XEXP (XEXP (set_src, 0), 1);
3509 mask_rtx = XEXP (set_src, 1);
3510 shl_and_kind (left_rtx, mask_rtx, attributes);
3511 return attributes[1];
3512 }
3513
3514 /* This is used in length attribute of the and_shl_scratch instruction. */
3515
3516 int
3517 shl_and_scr_length (rtx insn)
3518 {
3519 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3520 int len = shift_insns[INTVAL (XEXP (set_src, 1)) & 31];
3521 rtx op = XEXP (set_src, 0);
3522 len += shift_insns[INTVAL (XEXP (op, 1)) & 31] + 1;
3523 op = XEXP (XEXP (op, 0), 0);
3524 return len + shift_insns[INTVAL (XEXP (op, 1)) & 31];
3525 }
3526
3527 /* Generate rtl for instructions for which shl_and_kind advised a particular
3528 method of generating them, i.e. returned zero. */
3529
3530 int
3531 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
3532 {
3533 int attributes[3];
3534 unsigned HOST_WIDE_INT mask;
3535 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
3536 int right, total_shift;
3537 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
3538
3539 right = attributes[0];
3540 total_shift = INTVAL (left_rtx) + right;
3541 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
3542 switch (kind)
3543 {
3544 default:
3545 return -1;
3546 case 1:
3547 {
3548 int first = attributes[2];
3549 rtx operands[3];
3550
3551 if (first < 0)
3552 {
3553 emit_insn ((mask << right) <= 0xff
3554 ? gen_zero_extendqisi2 (dest,
3555 gen_lowpart (QImode, source))
3556 : gen_zero_extendhisi2 (dest,
3557 gen_lowpart (HImode, source)));
3558 source = dest;
3559 }
3560 if (source != dest)
3561 emit_insn (gen_movsi (dest, source));
3562 operands[0] = dest;
3563 if (right)
3564 {
3565 operands[2] = GEN_INT (right);
3566 gen_shifty_hi_op (LSHIFTRT, operands);
3567 }
3568 if (first > 0)
3569 {
3570 operands[2] = GEN_INT (first);
3571 gen_shifty_hi_op (ASHIFT, operands);
3572 total_shift -= first;
3573 mask <<= first;
3574 }
3575 if (first >= 0)
3576 emit_insn (mask <= 0xff
3577 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
3578 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3579 if (total_shift > 0)
3580 {
3581 operands[2] = GEN_INT (total_shift);
3582 gen_shifty_hi_op (ASHIFT, operands);
3583 }
3584 break;
3585 }
3586 case 4:
3587 shift_gen_fun = gen_shifty_op;
3588 case 3:
3589 /* If the topmost bit that matters is set, set the topmost bits
3590 that don't matter. This way, we might be able to get a shorter
3591 signed constant. */
3592 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
3593 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
3594 case 2:
3595 /* Don't expand fine-grained when combining, because that will
3596 make the pattern fail. */
3597 if (currently_expanding_to_rtl
3598 || reload_in_progress || reload_completed)
3599 {
3600 rtx operands[3];
3601
3602 /* Cases 3 and 4 should be handled by this split
3603 only while combining */
3604 gcc_assert (kind <= 2);
3605 if (right)
3606 {
3607 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
3608 source = dest;
3609 }
3610 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3611 if (total_shift)
3612 {
3613 operands[0] = dest;
3614 operands[1] = dest;
3615 operands[2] = GEN_INT (total_shift);
3616 shift_gen_fun (ASHIFT, operands);
3617 }
3618 break;
3619 }
3620 else
3621 {
3622 int neg = 0;
3623 if (kind != 4 && total_shift < 16)
3624 {
3625 neg = -ext_shift_amounts[total_shift][1];
3626 if (neg > 0)
3627 neg -= ext_shift_amounts[total_shift][2];
3628 else
3629 neg = 0;
3630 }
3631 emit_insn (gen_and_shl_scratch (dest, source,
3632 GEN_INT (right),
3633 GEN_INT (mask),
3634 GEN_INT (total_shift + neg),
3635 GEN_INT (neg)));
3636 emit_insn (gen_movsi (dest, dest));
3637 break;
3638 }
3639 }
3640 return 0;
3641 }
3642
3643 /* Try to find a good way to implement the combiner pattern
3644 [(set (match_operand:SI 0 "register_operand" "=r")
3645 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3646 (match_operand:SI 2 "const_int_operand" "n")
3647 (match_operand:SI 3 "const_int_operand" "n")
3648 (const_int 0)))
3649 (clobber (reg:SI T_REG))]
3650 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3651 return 0 for simple left / right shift combination.
3652 return 1 for left shift / 8 bit sign extend / left shift.
3653 return 2 for left shift / 16 bit sign extend / left shift.
3654 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3655 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3656 return 5 for left shift / 16 bit sign extend / right shift
3657 return 6 for < 8 bit sign extend / left shift.
3658 return 7 for < 8 bit sign extend / left shift / single right shift.
3659 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3660
3661 int
3662 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3663 {
3664 int left, size, insize, ext;
3665 int cost = 0, best_cost;
3666 int kind;
3667
3668 left = INTVAL (left_rtx);
3669 size = INTVAL (size_rtx);
3670 insize = size - left;
3671 gcc_assert (insize > 0);
3672 /* Default to left / right shift. */
3673 kind = 0;
3674 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3675 if (size <= 16)
3676 {
3677 /* 16 bit shift / sign extend / 16 bit shift */
3678 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3679 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3680 below, by alternative 3 or something even better. */
3681 if (cost < best_cost)
3682 {
3683 kind = 5;
3684 best_cost = cost;
3685 }
3686 }
3687 /* Try a plain sign extend between two shifts. */
3688 for (ext = 16; ext >= insize; ext -= 8)
3689 {
3690 if (ext <= size)
3691 {
3692 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3693 if (cost < best_cost)
3694 {
3695 kind = ext / (unsigned) 8;
3696 best_cost = cost;
3697 }
3698 }
3699 /* Check if we can do a sloppy shift with a final signed shift
3700 restoring the sign. */
3701 if (EXT_SHIFT_SIGNED (size - ext))
3702 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3703 /* If not, maybe it's still cheaper to do the second shift sloppy,
3704 and do a final sign extend? */
3705 else if (size <= 16)
3706 cost = ext_shift_insns[ext - insize] + 1
3707 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3708 else
3709 continue;
3710 if (cost < best_cost)
3711 {
3712 kind = ext / (unsigned) 8 + 2;
3713 best_cost = cost;
3714 }
3715 }
3716 /* Check if we can sign extend in r0 */
3717 if (insize < 8)
3718 {
3719 cost = 3 + shift_insns[left];
3720 if (cost < best_cost)
3721 {
3722 kind = 6;
3723 best_cost = cost;
3724 }
3725 /* Try the same with a final signed shift. */
3726 if (left < 31)
3727 {
3728 cost = 3 + ext_shift_insns[left + 1] + 1;
3729 if (cost < best_cost)
3730 {
3731 kind = 7;
3732 best_cost = cost;
3733 }
3734 }
3735 }
3736 if (TARGET_SH3)
3737 {
3738 /* Try to use a dynamic shift. */
3739 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3740 if (cost < best_cost)
3741 {
3742 kind = 0;
3743 best_cost = cost;
3744 }
3745 }
3746 if (costp)
3747 *costp = cost;
3748 return kind;
3749 }
3750
3751 /* Function to be used in the length attribute of the instructions
3752 implementing this pattern. */
3753
3754 int
3755 shl_sext_length (rtx insn)
3756 {
3757 rtx set_src, left_rtx, size_rtx;
3758 int cost;
3759
3760 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3761 left_rtx = XEXP (XEXP (set_src, 0), 1);
3762 size_rtx = XEXP (set_src, 1);
3763 shl_sext_kind (left_rtx, size_rtx, &cost);
3764 return cost;
3765 }
3766
3767 /* Generate rtl for this pattern */
3768
3769 int
3770 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3771 {
3772 int kind;
3773 int left, size, insize, cost;
3774 rtx operands[3];
3775
3776 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3777 left = INTVAL (left_rtx);
3778 size = INTVAL (size_rtx);
3779 insize = size - left;
3780 switch (kind)
3781 {
3782 case 1:
3783 case 2:
3784 case 3:
3785 case 4:
3786 {
3787 int ext = kind & 1 ? 8 : 16;
3788 int shift2 = size - ext;
3789
3790 /* Don't expand fine-grained when combining, because that will
3791 make the pattern fail. */
3792 if (! currently_expanding_to_rtl
3793 && ! reload_in_progress && ! reload_completed)
3794 {
3795 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3796 emit_insn (gen_movsi (dest, source));
3797 break;
3798 }
3799 if (dest != source)
3800 emit_insn (gen_movsi (dest, source));
3801 operands[0] = dest;
3802 if (ext - insize)
3803 {
3804 operands[2] = GEN_INT (ext - insize);
3805 gen_shifty_hi_op (ASHIFT, operands);
3806 }
3807 emit_insn (kind & 1
3808 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3809 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3810 if (kind <= 2)
3811 {
3812 if (shift2)
3813 {
3814 operands[2] = GEN_INT (shift2);
3815 gen_shifty_op (ASHIFT, operands);
3816 }
3817 }
3818 else
3819 {
3820 if (shift2 > 0)
3821 {
3822 if (EXT_SHIFT_SIGNED (shift2))
3823 {
3824 operands[2] = GEN_INT (shift2 + 1);
3825 gen_shifty_op (ASHIFT, operands);
3826 operands[2] = const1_rtx;
3827 gen_shifty_op (ASHIFTRT, operands);
3828 break;
3829 }
3830 operands[2] = GEN_INT (shift2);
3831 gen_shifty_hi_op (ASHIFT, operands);
3832 }
3833 else if (shift2)
3834 {
3835 operands[2] = GEN_INT (-shift2);
3836 gen_shifty_hi_op (LSHIFTRT, operands);
3837 }
3838 emit_insn (size <= 8
3839 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3840 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3841 }
3842 break;
3843 }
3844 case 5:
3845 {
3846 int i = 16 - size;
3847 if (! currently_expanding_to_rtl
3848 && ! reload_in_progress && ! reload_completed)
3849 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3850 else
3851 {
3852 operands[0] = dest;
3853 operands[2] = GEN_INT (16 - insize);
3854 gen_shifty_hi_op (ASHIFT, operands);
3855 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3856 }
3857 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3858 while (--i >= 0)
3859 gen_ashift (ASHIFTRT, 1, dest);
3860 break;
3861 }
3862 case 6:
3863 case 7:
3864 /* Don't expand fine-grained when combining, because that will
3865 make the pattern fail. */
3866 if (! currently_expanding_to_rtl
3867 && ! reload_in_progress && ! reload_completed)
3868 {
3869 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3870 emit_insn (gen_movsi (dest, source));
3871 break;
3872 }
3873 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
3874 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
3875 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
3876 operands[0] = dest;
3877 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
3878 gen_shifty_op (ASHIFT, operands);
3879 if (kind == 7)
3880 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
3881 break;
3882 default:
3883 return -1;
3884 }
3885 return 0;
3886 }
3887
3888 /* Prefix a symbol_ref name with "datalabel". */
3889
3890 rtx
3891 gen_datalabel_ref (rtx sym)
3892 {
3893 const char *str;
3894
3895 if (GET_CODE (sym) == LABEL_REF)
3896 return gen_rtx_CONST (GET_MODE (sym),
3897 gen_rtx_UNSPEC (GET_MODE (sym),
3898 gen_rtvec (1, sym),
3899 UNSPEC_DATALABEL));
3900
3901 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
3902
3903 str = XSTR (sym, 0);
3904 /* Share all SYMBOL_REF strings with the same value - that is important
3905 for cse. */
3906 str = IDENTIFIER_POINTER (get_identifier (str));
3907 XSTR (sym, 0) = str;
3908
3909 return sym;
3910 }
3911
3912 \f
3913 static alloc_pool label_ref_list_pool;
3914
3915 typedef struct label_ref_list_d
3916 {
3917 rtx label;
3918 struct label_ref_list_d *next;
3919 } *label_ref_list_t;
3920
3921 /* The SH cannot load a large constant into a register, constants have to
3922 come from a pc relative load. The reference of a pc relative load
3923 instruction must be less than 1k in front of the instruction. This
3924 means that we often have to dump a constant inside a function, and
3925 generate code to branch around it.
3926
3927 It is important to minimize this, since the branches will slow things
3928 down and make things bigger.
3929
3930 Worst case code looks like:
3931
3932 mov.l L1,rn
3933 bra L2
3934 nop
3935 align
3936 L1: .long value
3937 L2:
3938 ..
3939
3940 mov.l L3,rn
3941 bra L4
3942 nop
3943 align
3944 L3: .long value
3945 L4:
3946 ..
3947
3948 We fix this by performing a scan before scheduling, which notices which
3949 instructions need to have their operands fetched from the constant table
3950 and builds the table.
3951
3952 The algorithm is:
3953
3954 scan, find an instruction which needs a pcrel move. Look forward, find the
3955 last barrier which is within MAX_COUNT bytes of the requirement.
3956 If there isn't one, make one. Process all the instructions between
3957 the find and the barrier.
3958
3959 In the above example, we can tell that L3 is within 1k of L1, so
3960 the first move can be shrunk from the 3 insn+constant sequence into
3961 just 1 insn, and the constant moved to L3 to make:
3962
3963 mov.l L1,rn
3964 ..
3965 mov.l L3,rn
3966 bra L4
3967 nop
3968 align
3969 L3:.long value
3970 L4:.long value
3971
3972 Then the second move becomes the target for the shortening process. */
3973
3974 typedef struct
3975 {
3976 rtx value; /* Value in table. */
3977 rtx label; /* Label of value. */
3978 label_ref_list_t wend; /* End of window. */
3979 enum machine_mode mode; /* Mode of value. */
3980
3981 /* True if this constant is accessed as part of a post-increment
3982 sequence. Note that HImode constants are never accessed in this way. */
3983 bool part_of_sequence_p;
3984 } pool_node;
3985
3986 /* The maximum number of constants that can fit into one pool, since
3987 constants in the range 0..510 are at least 2 bytes long, and in the
3988 range from there to 1018 at least 4 bytes. */
3989
3990 #define MAX_POOL_SIZE 372
3991 static pool_node pool_vector[MAX_POOL_SIZE];
3992 static int pool_size;
3993 static rtx pool_window_label;
3994 static int pool_window_last;
3995
3996 static int max_labelno_before_reorg;
3997
3998 /* ??? If we need a constant in HImode which is the truncated value of a
3999 constant we need in SImode, we could combine the two entries thus saving
4000 two bytes. Is this common enough to be worth the effort of implementing
4001 it? */
4002
4003 /* ??? This stuff should be done at the same time that we shorten branches.
4004 As it is now, we must assume that all branches are the maximum size, and
4005 this causes us to almost always output constant pools sooner than
4006 necessary. */
4007
4008 /* Add a constant to the pool and return its label. */
4009
4010 static rtx
4011 add_constant (rtx x, enum machine_mode mode, rtx last_value)
4012 {
4013 int i;
4014 rtx lab, new_rtx;
4015 label_ref_list_t ref, newref;
4016
4017 /* First see if we've already got it. */
4018 for (i = 0; i < pool_size; i++)
4019 {
4020 if (x->code == pool_vector[i].value->code
4021 && mode == pool_vector[i].mode)
4022 {
4023 if (x->code == CODE_LABEL)
4024 {
4025 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4026 continue;
4027 }
4028 if (rtx_equal_p (x, pool_vector[i].value))
4029 {
4030 lab = new_rtx = 0;
4031 if (! last_value
4032 || ! i
4033 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4034 {
4035 new_rtx = gen_label_rtx ();
4036 LABEL_REFS (new_rtx) = pool_vector[i].label;
4037 pool_vector[i].label = lab = new_rtx;
4038 }
4039 if (lab && pool_window_label)
4040 {
4041 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4042 newref->label = pool_window_label;
4043 ref = pool_vector[pool_window_last].wend;
4044 newref->next = ref;
4045 pool_vector[pool_window_last].wend = newref;
4046 }
4047 if (new_rtx)
4048 pool_window_label = new_rtx;
4049 pool_window_last = i;
4050 return lab;
4051 }
4052 }
4053 }
4054
4055 /* Need a new one. */
4056 pool_vector[pool_size].value = x;
4057 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4058 {
4059 lab = 0;
4060 pool_vector[pool_size - 1].part_of_sequence_p = true;
4061 }
4062 else
4063 lab = gen_label_rtx ();
4064 pool_vector[pool_size].mode = mode;
4065 pool_vector[pool_size].label = lab;
4066 pool_vector[pool_size].wend = NULL;
4067 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4068 if (lab && pool_window_label)
4069 {
4070 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4071 newref->label = pool_window_label;
4072 ref = pool_vector[pool_window_last].wend;
4073 newref->next = ref;
4074 pool_vector[pool_window_last].wend = newref;
4075 }
4076 if (lab)
4077 pool_window_label = lab;
4078 pool_window_last = pool_size;
4079 pool_size++;
4080 return lab;
4081 }
4082
4083 /* Output the literal table. START, if nonzero, is the first instruction
4084 this table is needed for, and also indicates that there is at least one
4085 casesi_worker_2 instruction; We have to emit the operand3 labels from
4086 these insns at a 4-byte aligned position. BARRIER is the barrier
4087 after which we are to place the table. */
4088
4089 static void
4090 dump_table (rtx start, rtx barrier)
4091 {
4092 rtx scan = barrier;
4093 int i;
4094 int need_align = 1;
4095 rtx lab;
4096 label_ref_list_t ref;
4097 int have_df = 0;
4098
4099 /* Do two passes, first time dump out the HI sized constants. */
4100
4101 for (i = 0; i < pool_size; i++)
4102 {
4103 pool_node *p = &pool_vector[i];
4104
4105 if (p->mode == HImode)
4106 {
4107 if (need_align)
4108 {
4109 scan = emit_insn_after (gen_align_2 (), scan);
4110 need_align = 0;
4111 }
4112 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4113 scan = emit_label_after (lab, scan);
4114 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4115 scan);
4116 for (ref = p->wend; ref; ref = ref->next)
4117 {
4118 lab = ref->label;
4119 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4120 }
4121 }
4122 else if (p->mode == DFmode)
4123 have_df = 1;
4124 }
4125
4126 need_align = 1;
4127
4128 if (start)
4129 {
4130 scan = emit_insn_after (gen_align_4 (), scan);
4131 need_align = 0;
4132 for (; start != barrier; start = NEXT_INSN (start))
4133 if (NONJUMP_INSN_P (start)
4134 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4135 {
4136 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4137 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4138
4139 scan = emit_label_after (lab, scan);
4140 }
4141 }
4142 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4143 {
4144 rtx align_insn = NULL_RTX;
4145
4146 scan = emit_label_after (gen_label_rtx (), scan);
4147 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4148 need_align = 0;
4149
4150 for (i = 0; i < pool_size; i++)
4151 {
4152 pool_node *p = &pool_vector[i];
4153
4154 switch (p->mode)
4155 {
4156 case HImode:
4157 break;
4158 case SImode:
4159 case SFmode:
4160 if (align_insn && !p->part_of_sequence_p)
4161 {
4162 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4163 emit_label_before (lab, align_insn);
4164 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4165 align_insn);
4166 for (ref = p->wend; ref; ref = ref->next)
4167 {
4168 lab = ref->label;
4169 emit_insn_before (gen_consttable_window_end (lab),
4170 align_insn);
4171 }
4172 delete_insn (align_insn);
4173 align_insn = NULL_RTX;
4174 continue;
4175 }
4176 else
4177 {
4178 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4179 scan = emit_label_after (lab, scan);
4180 scan = emit_insn_after (gen_consttable_4 (p->value,
4181 const0_rtx), scan);
4182 need_align = ! need_align;
4183 }
4184 break;
4185 case DFmode:
4186 if (need_align)
4187 {
4188 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4189 align_insn = scan;
4190 need_align = 0;
4191 }
4192 case DImode:
4193 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4194 scan = emit_label_after (lab, scan);
4195 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4196 scan);
4197 break;
4198 default:
4199 gcc_unreachable ();
4200 }
4201
4202 if (p->mode != HImode)
4203 {
4204 for (ref = p->wend; ref; ref = ref->next)
4205 {
4206 lab = ref->label;
4207 scan = emit_insn_after (gen_consttable_window_end (lab),
4208 scan);
4209 }
4210 }
4211 }
4212
4213 pool_size = 0;
4214 }
4215
4216 for (i = 0; i < pool_size; i++)
4217 {
4218 pool_node *p = &pool_vector[i];
4219
4220 switch (p->mode)
4221 {
4222 case HImode:
4223 break;
4224 case SImode:
4225 case SFmode:
4226 if (need_align)
4227 {
4228 need_align = 0;
4229 scan = emit_label_after (gen_label_rtx (), scan);
4230 scan = emit_insn_after (gen_align_4 (), scan);
4231 }
4232 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4233 scan = emit_label_after (lab, scan);
4234 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4235 scan);
4236 break;
4237 case DFmode:
4238 case DImode:
4239 if (need_align)
4240 {
4241 need_align = 0;
4242 scan = emit_label_after (gen_label_rtx (), scan);
4243 scan = emit_insn_after (gen_align_4 (), scan);
4244 }
4245 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4246 scan = emit_label_after (lab, scan);
4247 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4248 scan);
4249 break;
4250 default:
4251 gcc_unreachable ();
4252 }
4253
4254 if (p->mode != HImode)
4255 {
4256 for (ref = p->wend; ref; ref = ref->next)
4257 {
4258 lab = ref->label;
4259 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4260 }
4261 }
4262 }
4263
4264 scan = emit_insn_after (gen_consttable_end (), scan);
4265 scan = emit_barrier_after (scan);
4266 pool_size = 0;
4267 pool_window_label = NULL_RTX;
4268 pool_window_last = 0;
4269 }
4270
4271 /* Return nonzero if constant would be an ok source for a
4272 mov.w instead of a mov.l. */
4273
4274 static int
4275 hi_const (rtx src)
4276 {
4277 return (CONST_INT_P (src)
4278 && INTVAL (src) >= -32768
4279 && INTVAL (src) <= 32767);
4280 }
4281
4282 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4283
4284 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4285
4286 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4287 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4288 need to fix it if the input value is CONST_OK_FOR_I08. */
4289
4290 static int
4291 broken_move (rtx insn)
4292 {
4293 if (NONJUMP_INSN_P (insn))
4294 {
4295 rtx pat = PATTERN (insn);
4296 if (GET_CODE (pat) == PARALLEL)
4297 pat = XVECEXP (pat, 0, 0);
4298 if (GET_CODE (pat) == SET
4299 /* We can load any 8-bit value if we don't care what the high
4300 order bits end up as. */
4301 && GET_MODE (SET_DEST (pat)) != QImode
4302 && (CONSTANT_P (SET_SRC (pat))
4303 /* Match mova_const. */
4304 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4305 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4306 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4307 && ! (TARGET_SH2E
4308 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4309 && (fp_zero_operand (SET_SRC (pat))
4310 || fp_one_operand (SET_SRC (pat)))
4311 /* In general we don't know the current setting of fpscr, so disable fldi.
4312 There is an exception if this was a register-register move
4313 before reload - and hence it was ascertained that we have
4314 single precision setting - and in a post-reload optimization
4315 we changed this to do a constant load. In that case
4316 we don't have an r0 clobber, hence we must use fldi. */
4317 && (TARGET_FMOVD
4318 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4319 == SCRATCH))
4320 && REG_P (SET_DEST (pat))
4321 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4322 && ! (TARGET_SH2A
4323 && GET_MODE (SET_DEST (pat)) == SImode
4324 && (satisfies_constraint_I20 (SET_SRC (pat))
4325 || satisfies_constraint_I28 (SET_SRC (pat))))
4326 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4327 return 1;
4328 }
4329
4330 return 0;
4331 }
4332
4333 static int
4334 mova_p (rtx insn)
4335 {
4336 return (NONJUMP_INSN_P (insn)
4337 && GET_CODE (PATTERN (insn)) == SET
4338 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4339 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4340 /* Don't match mova_const. */
4341 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4342 }
4343
4344 /* Fix up a mova from a switch that went out of range. */
4345 static void
4346 fixup_mova (rtx mova)
4347 {
4348 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4349 if (! flag_pic)
4350 {
4351 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4352 INSN_CODE (mova) = -1;
4353 }
4354 else
4355 {
4356 rtx worker = mova;
4357 rtx lab = gen_label_rtx ();
4358 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4359
4360 do
4361 {
4362 worker = NEXT_INSN (worker);
4363 gcc_assert (worker
4364 && !LABEL_P (worker)
4365 && !JUMP_P (worker));
4366 } while (NOTE_P (worker)
4367 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4368 wpat = PATTERN (worker);
4369 wpat0 = XVECEXP (wpat, 0, 0);
4370 wpat1 = XVECEXP (wpat, 0, 1);
4371 wsrc = SET_SRC (wpat0);
4372 PATTERN (worker) = (gen_casesi_worker_2
4373 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4374 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4375 XEXP (wpat1, 0)));
4376 INSN_CODE (worker) = -1;
4377 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4378 base = gen_rtx_LABEL_REF (Pmode, lab);
4379 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4380 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4381 INSN_CODE (mova) = -1;
4382 }
4383 }
4384
4385 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4386 *num_mova, and check if the new mova is not nested within the first one.
4387 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4388 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4389 static int
4390 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
4391 {
4392 int n_addr = 0; /* Initialization to shut up spurious warning. */
4393 int f_target, n_target = 0; /* Likewise. */
4394
4395 if (optimize)
4396 {
4397 /* If NEW_MOVA has no address yet, it will be handled later. */
4398 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4399 return -1;
4400
4401 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4402 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4403 if (n_addr > n_target || n_addr + 1022 < n_target)
4404 {
4405 /* Change the mova into a load.
4406 broken_move will then return true for it. */
4407 fixup_mova (new_mova);
4408 return 1;
4409 }
4410 }
4411 if (!(*num_mova)++)
4412 {
4413 *first_mova = new_mova;
4414 return 2;
4415 }
4416 if (!optimize
4417 || ((f_target
4418 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4419 >= n_target))
4420 return -1;
4421
4422 (*num_mova)--;
4423 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4424 > n_target - n_addr)
4425 {
4426 fixup_mova (*first_mova);
4427 return 0;
4428 }
4429 else
4430 {
4431 fixup_mova (new_mova);
4432 return 1;
4433 }
4434 }
4435
4436 /* Find the last barrier from insn FROM which is close enough to hold the
4437 constant pool. If we can't find one, then create one near the end of
4438 the range. */
4439
4440 static rtx
4441 find_barrier (int num_mova, rtx mova, rtx from)
4442 {
4443 int count_si = 0;
4444 int count_hi = 0;
4445 int found_hi = 0;
4446 int found_si = 0;
4447 int found_di = 0;
4448 int hi_align = 2;
4449 int si_align = 2;
4450 int leading_mova = num_mova;
4451 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
4452 int si_limit;
4453 int hi_limit;
4454 rtx orig = from;
4455 rtx last_got = NULL_RTX;
4456 rtx last_symoff = NULL_RTX;
4457
4458 /* For HImode: range is 510, add 4 because pc counts from address of
4459 second instruction after this one, subtract 2 for the jump instruction
4460 that we may need to emit before the table, subtract 2 for the instruction
4461 that fills the jump delay slot (in very rare cases, reorg will take an
4462 instruction from after the constant pool or will leave the delay slot
4463 empty). This gives 510.
4464 For SImode: range is 1020, add 4 because pc counts from address of
4465 second instruction after this one, subtract 2 in case pc is 2 byte
4466 aligned, subtract 2 for the jump instruction that we may need to emit
4467 before the table, subtract 2 for the instruction that fills the jump
4468 delay slot. This gives 1018. */
4469
4470 /* The branch will always be shortened now that the reference address for
4471 forward branches is the successor address, thus we need no longer make
4472 adjustments to the [sh]i_limit for -O0. */
4473
4474 si_limit = 1018;
4475 hi_limit = 510;
4476
4477 while (from && count_si < si_limit && count_hi < hi_limit)
4478 {
4479 int inc = get_attr_length (from);
4480 int new_align = 1;
4481
4482 /* If this is a label that existed at the time of the compute_alignments
4483 call, determine the alignment. N.B. When find_barrier recurses for
4484 an out-of-reach mova, we might see labels at the start of previously
4485 inserted constant tables. */
4486 if (LABEL_P (from)
4487 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
4488 {
4489 if (optimize)
4490 new_align = 1 << label_to_alignment (from);
4491 else if (BARRIER_P (prev_nonnote_insn (from)))
4492 new_align = 1 << barrier_align (from);
4493 else
4494 new_align = 1;
4495 inc = 0;
4496 }
4497 /* In case we are scanning a constant table because of recursion, check
4498 for explicit alignments. If the table is long, we might be forced
4499 to emit the new table in front of it; the length of the alignment
4500 might be the last straw. */
4501 else if (NONJUMP_INSN_P (from)
4502 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4503 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
4504 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
4505 /* When we find the end of a constant table, paste the new constant
4506 at the end. That is better than putting it in front because
4507 this way, we don't need extra alignment for adding a 4-byte-aligned
4508 mov(a) label to a 2/4 or 8/4 byte aligned table. */
4509 else if (NONJUMP_INSN_P (from)
4510 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4511 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
4512 return from;
4513
4514 if (BARRIER_P (from))
4515 {
4516 rtx next;
4517
4518 found_barrier = from;
4519
4520 /* If we are at the end of the function, or in front of an alignment
4521 instruction, we need not insert an extra alignment. We prefer
4522 this kind of barrier. */
4523 if (barrier_align (from) > 2)
4524 good_barrier = from;
4525
4526 /* If we are at the end of a hot/cold block, dump the constants
4527 here. */
4528 next = NEXT_INSN (from);
4529 if (next
4530 && NOTE_P (next)
4531 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
4532 break;
4533 }
4534
4535 if (broken_move (from))
4536 {
4537 rtx pat, src, dst;
4538 enum machine_mode mode;
4539
4540 pat = PATTERN (from);
4541 if (GET_CODE (pat) == PARALLEL)
4542 pat = XVECEXP (pat, 0, 0);
4543 src = SET_SRC (pat);
4544 dst = SET_DEST (pat);
4545 mode = GET_MODE (dst);
4546
4547 /* GOT pcrelat setting comes in pair of
4548 mova .L8,r0
4549 mov.l .L8,r12
4550 instructions. (plus add r0,r12).
4551 Remember if we see one without the other. */
4552 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
4553 last_got = last_got ? NULL_RTX : from;
4554 else if (PIC_ADDR_P (src))
4555 last_got = last_got ? NULL_RTX : from;
4556
4557 /* We must explicitly check the mode, because sometimes the
4558 front end will generate code to load unsigned constants into
4559 HImode targets without properly sign extending them. */
4560 if (mode == HImode
4561 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
4562 {
4563 found_hi += 2;
4564 /* We put the short constants before the long constants, so
4565 we must count the length of short constants in the range
4566 for the long constants. */
4567 /* ??? This isn't optimal, but is easy to do. */
4568 si_limit -= 2;
4569 }
4570 else
4571 {
4572 /* We dump DF/DI constants before SF/SI ones, because
4573 the limit is the same, but the alignment requirements
4574 are higher. We may waste up to 4 additional bytes
4575 for alignment, and the DF/DI constant may have
4576 another SF/SI constant placed before it. */
4577 if (TARGET_SHCOMPACT
4578 && ! found_di
4579 && (mode == DFmode || mode == DImode))
4580 {
4581 found_di = 1;
4582 si_limit -= 8;
4583 }
4584 while (si_align > 2 && found_si + si_align - 2 > count_si)
4585 si_align >>= 1;
4586 if (found_si > count_si)
4587 count_si = found_si;
4588 found_si += GET_MODE_SIZE (mode);
4589 if (num_mova)
4590 si_limit -= GET_MODE_SIZE (mode);
4591 }
4592 }
4593
4594 if (mova_p (from))
4595 {
4596 switch (untangle_mova (&num_mova, &mova, from))
4597 {
4598 case 1:
4599 if (flag_pic)
4600 {
4601 rtx src = SET_SRC (PATTERN (from));
4602 if (GET_CODE (src) == CONST
4603 && GET_CODE (XEXP (src, 0)) == UNSPEC
4604 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
4605 last_symoff = from;
4606 }
4607 break;
4608 case 0: return find_barrier (0, 0, mova);
4609 case 2:
4610 {
4611 leading_mova = 0;
4612 barrier_before_mova
4613 = good_barrier ? good_barrier : found_barrier;
4614 }
4615 default: break;
4616 }
4617 if (found_si > count_si)
4618 count_si = found_si;
4619 }
4620 else if (JUMP_TABLE_DATA_P (from))
4621 {
4622 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
4623 || (num_mova
4624 && (prev_nonnote_insn (from)
4625 == XEXP (MOVA_LABELREF (mova), 0))))
4626 num_mova--;
4627 if (barrier_align (next_real_insn (from)) == align_jumps_log)
4628 {
4629 /* We have just passed the barrier in front of the
4630 ADDR_DIFF_VEC, which is stored in found_barrier. Since
4631 the ADDR_DIFF_VEC is accessed as data, just like our pool
4632 constants, this is a good opportunity to accommodate what
4633 we have gathered so far.
4634 If we waited any longer, we could end up at a barrier in
4635 front of code, which gives worse cache usage for separated
4636 instruction / data caches. */
4637 good_barrier = found_barrier;
4638 break;
4639 }
4640 else
4641 {
4642 rtx body = PATTERN (from);
4643 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
4644 }
4645 }
4646 /* For the SH1, we generate alignments even after jumps-around-jumps. */
4647 else if (JUMP_P (from)
4648 && ! TARGET_SH2
4649 && ! TARGET_SMALLCODE)
4650 new_align = 4;
4651
4652 /* There is a possibility that a bf is transformed into a bf/s by the
4653 delay slot scheduler. */
4654 if (JUMP_P (from) && !JUMP_TABLE_DATA_P (from)
4655 && get_attr_type (from) == TYPE_CBRANCH
4656 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (from)))) != SEQUENCE)
4657 inc += 2;
4658
4659 if (found_si)
4660 {
4661 count_si += inc;
4662 if (new_align > si_align)
4663 {
4664 si_limit -= (count_si - 1) & (new_align - si_align);
4665 si_align = new_align;
4666 }
4667 count_si = (count_si + new_align - 1) & -new_align;
4668 }
4669 if (found_hi)
4670 {
4671 count_hi += inc;
4672 if (new_align > hi_align)
4673 {
4674 hi_limit -= (count_hi - 1) & (new_align - hi_align);
4675 hi_align = new_align;
4676 }
4677 count_hi = (count_hi + new_align - 1) & -new_align;
4678 }
4679 from = NEXT_INSN (from);
4680 }
4681
4682 if (num_mova)
4683 {
4684 if (leading_mova)
4685 {
4686 /* Try as we might, the leading mova is out of range. Change
4687 it into a load (which will become a pcload) and retry. */
4688 fixup_mova (mova);
4689 return find_barrier (0, 0, mova);
4690 }
4691 else
4692 {
4693 /* Insert the constant pool table before the mova instruction,
4694 to prevent the mova label reference from going out of range. */
4695 from = mova;
4696 good_barrier = found_barrier = barrier_before_mova;
4697 }
4698 }
4699
4700 if (found_barrier)
4701 {
4702 if (good_barrier && next_real_insn (found_barrier))
4703 found_barrier = good_barrier;
4704 }
4705 else
4706 {
4707 /* We didn't find a barrier in time to dump our stuff,
4708 so we'll make one. */
4709 rtx label = gen_label_rtx ();
4710
4711 /* Don't emit a constant table in the middle of insns for
4712 casesi_worker_2. This is a bit overkill but is enough
4713 because casesi_worker_2 wouldn't appear so frequently. */
4714 if (last_symoff)
4715 from = last_symoff;
4716
4717 /* If we exceeded the range, then we must back up over the last
4718 instruction we looked at. Otherwise, we just need to undo the
4719 NEXT_INSN at the end of the loop. */
4720 if (PREV_INSN (from) != orig
4721 && (count_hi > hi_limit || count_si > si_limit))
4722 from = PREV_INSN (PREV_INSN (from));
4723 else
4724 from = PREV_INSN (from);
4725
4726 /* Don't emit a constant table int the middle of global pointer setting,
4727 since that that would move the addressing base GOT into another table.
4728 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
4729 in the pool anyway, so just move up the whole constant pool. */
4730 if (last_got)
4731 from = PREV_INSN (last_got);
4732
4733 /* Don't insert the constant pool table at the position which
4734 may be the landing pad. */
4735 if (flag_exceptions
4736 && CALL_P (from)
4737 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
4738 from = PREV_INSN (from);
4739
4740 /* Walk back to be just before any jump or label.
4741 Putting it before a label reduces the number of times the branch
4742 around the constant pool table will be hit. Putting it before
4743 a jump makes it more likely that the bra delay slot will be
4744 filled. */
4745 while (NOTE_P (from) || JUMP_P (from)
4746 || LABEL_P (from))
4747 from = PREV_INSN (from);
4748
4749 from = emit_jump_insn_after (gen_jump (label), from);
4750 JUMP_LABEL (from) = label;
4751 LABEL_NUSES (label) = 1;
4752 found_barrier = emit_barrier_after (from);
4753 emit_label_after (label, found_barrier);
4754 }
4755
4756 return found_barrier;
4757 }
4758
4759 /* If the instruction INSN is implemented by a special function, and we can
4760 positively find the register that is used to call the sfunc, and this
4761 register is not used anywhere else in this instruction - except as the
4762 destination of a set, return this register; else, return 0. */
4763 rtx
4764 sfunc_uses_reg (rtx insn)
4765 {
4766 int i;
4767 rtx pattern, part, reg_part, reg;
4768
4769 if (!NONJUMP_INSN_P (insn))
4770 return 0;
4771 pattern = PATTERN (insn);
4772 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
4773 return 0;
4774
4775 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4776 {
4777 part = XVECEXP (pattern, 0, i);
4778 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
4779 reg_part = part;
4780 }
4781 if (! reg_part)
4782 return 0;
4783 reg = XEXP (reg_part, 0);
4784 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
4785 {
4786 part = XVECEXP (pattern, 0, i);
4787 if (part == reg_part || GET_CODE (part) == CLOBBER)
4788 continue;
4789 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
4790 && REG_P (SET_DEST (part)))
4791 ? SET_SRC (part) : part)))
4792 return 0;
4793 }
4794 return reg;
4795 }
4796
4797 /* See if the only way in which INSN uses REG is by calling it, or by
4798 setting it while calling it. Set *SET to a SET rtx if the register
4799 is set by INSN. */
4800
4801 static int
4802 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
4803 {
4804 rtx pattern, reg2;
4805
4806 *set = NULL_RTX;
4807
4808 reg2 = sfunc_uses_reg (insn);
4809 if (reg2 && REGNO (reg2) == REGNO (reg))
4810 {
4811 pattern = single_set (insn);
4812 if (pattern
4813 && REG_P (SET_DEST (pattern))
4814 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4815 *set = pattern;
4816 return 0;
4817 }
4818 if (!CALL_P (insn))
4819 {
4820 /* We don't use rtx_equal_p because we don't care if the mode is
4821 different. */
4822 pattern = single_set (insn);
4823 if (pattern
4824 && REG_P (SET_DEST (pattern))
4825 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4826 {
4827 rtx par, part;
4828 int i;
4829
4830 *set = pattern;
4831 par = PATTERN (insn);
4832 if (GET_CODE (par) == PARALLEL)
4833 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
4834 {
4835 part = XVECEXP (par, 0, i);
4836 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
4837 return 1;
4838 }
4839 return reg_mentioned_p (reg, SET_SRC (pattern));
4840 }
4841
4842 return 1;
4843 }
4844
4845 pattern = PATTERN (insn);
4846
4847 if (GET_CODE (pattern) == PARALLEL)
4848 {
4849 int i;
4850
4851 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4852 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
4853 return 1;
4854 pattern = XVECEXP (pattern, 0, 0);
4855 }
4856
4857 if (GET_CODE (pattern) == SET)
4858 {
4859 if (reg_mentioned_p (reg, SET_DEST (pattern)))
4860 {
4861 /* We don't use rtx_equal_p, because we don't care if the
4862 mode is different. */
4863 if (!REG_P (SET_DEST (pattern))
4864 || REGNO (reg) != REGNO (SET_DEST (pattern)))
4865 return 1;
4866
4867 *set = pattern;
4868 }
4869
4870 pattern = SET_SRC (pattern);
4871 }
4872
4873 if (GET_CODE (pattern) != CALL
4874 || !MEM_P (XEXP (pattern, 0))
4875 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
4876 return 1;
4877
4878 return 0;
4879 }
4880
4881 /* Given a X, a pattern of an insn or a part of it, return a mask of used
4882 general registers. Bits 0..15 mean that the respective registers
4883 are used as inputs in the instruction. Bits 16..31 mean that the
4884 registers 0..15, respectively, are used as outputs, or are clobbered.
4885 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
4886 int
4887 regs_used (rtx x, int is_dest)
4888 {
4889 enum rtx_code code;
4890 const char *fmt;
4891 int i, used = 0;
4892
4893 if (! x)
4894 return used;
4895 code = GET_CODE (x);
4896 switch (code)
4897 {
4898 case REG:
4899 if (REGNO (x) < 16)
4900 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4901 << (REGNO (x) + is_dest));
4902 return 0;
4903 case SUBREG:
4904 {
4905 rtx y = SUBREG_REG (x);
4906
4907 if (!REG_P (y))
4908 break;
4909 if (REGNO (y) < 16)
4910 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4911 << (REGNO (y) +
4912 subreg_regno_offset (REGNO (y),
4913 GET_MODE (y),
4914 SUBREG_BYTE (x),
4915 GET_MODE (x)) + is_dest));
4916 return 0;
4917 }
4918 case SET:
4919 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
4920 case RETURN:
4921 /* If there was a return value, it must have been indicated with USE. */
4922 return 0x00ffff00;
4923 case CLOBBER:
4924 is_dest = 1;
4925 break;
4926 case MEM:
4927 is_dest = 0;
4928 break;
4929 case CALL:
4930 used |= 0x00ff00f0;
4931 break;
4932 default:
4933 break;
4934 }
4935
4936 fmt = GET_RTX_FORMAT (code);
4937
4938 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4939 {
4940 if (fmt[i] == 'E')
4941 {
4942 register int j;
4943 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4944 used |= regs_used (XVECEXP (x, i, j), is_dest);
4945 }
4946 else if (fmt[i] == 'e')
4947 used |= regs_used (XEXP (x, i), is_dest);
4948 }
4949 return used;
4950 }
4951
4952 /* Create an instruction that prevents redirection of a conditional branch
4953 to the destination of the JUMP with address ADDR.
4954 If the branch needs to be implemented as an indirect jump, try to find
4955 a scratch register for it.
4956 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
4957 If any preceding insn that doesn't fit into a delay slot is good enough,
4958 pass 1. Pass 2 if a definite blocking insn is needed.
4959 -1 is used internally to avoid deep recursion.
4960 If a blocking instruction is made or recognized, return it. */
4961
4962 static rtx
4963 gen_block_redirect (rtx jump, int addr, int need_block)
4964 {
4965 int dead = 0;
4966 rtx prev = prev_nonnote_insn (jump);
4967 rtx dest;
4968
4969 /* First, check if we already have an instruction that satisfies our need. */
4970 if (prev && NONJUMP_INSN_P (prev) && ! INSN_DELETED_P (prev))
4971 {
4972 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
4973 return prev;
4974 if (GET_CODE (PATTERN (prev)) == USE
4975 || GET_CODE (PATTERN (prev)) == CLOBBER
4976 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4977 prev = jump;
4978 else if ((need_block &= ~1) < 0)
4979 return prev;
4980 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
4981 need_block = 0;
4982 }
4983 if (GET_CODE (PATTERN (jump)) == RETURN)
4984 {
4985 if (! need_block)
4986 return prev;
4987 /* Reorg even does nasty things with return insns that cause branches
4988 to go out of range - see find_end_label and callers. */
4989 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
4990 }
4991 /* We can't use JUMP_LABEL here because it might be undefined
4992 when not optimizing. */
4993 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
4994 /* If the branch is out of range, try to find a scratch register for it. */
4995 if (optimize
4996 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4997 > 4092 + 4098))
4998 {
4999 rtx scan;
5000 /* Don't look for the stack pointer as a scratch register,
5001 it would cause trouble if an interrupt occurred. */
5002 unsigned attempt = 0x7fff, used;
5003 int jump_left = flag_expensive_optimizations + 1;
5004
5005 /* It is likely that the most recent eligible instruction is wanted for
5006 the delay slot. Therefore, find out which registers it uses, and
5007 try to avoid using them. */
5008
5009 for (scan = jump; (scan = PREV_INSN (scan)); )
5010 {
5011 enum rtx_code code;
5012
5013 if (INSN_DELETED_P (scan))
5014 continue;
5015 code = GET_CODE (scan);
5016 if (code == CODE_LABEL || code == JUMP_INSN)
5017 break;
5018 if (code == INSN
5019 && GET_CODE (PATTERN (scan)) != USE
5020 && GET_CODE (PATTERN (scan)) != CLOBBER
5021 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5022 {
5023 attempt &= ~regs_used (PATTERN (scan), 0);
5024 break;
5025 }
5026 }
5027 for (used = dead = 0, scan = JUMP_LABEL (jump);
5028 (scan = NEXT_INSN (scan)); )
5029 {
5030 enum rtx_code code;
5031
5032 if (INSN_DELETED_P (scan))
5033 continue;
5034 code = GET_CODE (scan);
5035 if (INSN_P (scan))
5036 {
5037 used |= regs_used (PATTERN (scan), 0);
5038 if (code == CALL_INSN)
5039 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5040 dead |= (used >> 16) & ~used;
5041 if (dead & attempt)
5042 {
5043 dead &= attempt;
5044 break;
5045 }
5046 if (code == JUMP_INSN)
5047 {
5048 if (jump_left-- && simplejump_p (scan))
5049 scan = JUMP_LABEL (scan);
5050 else
5051 break;
5052 }
5053 }
5054 }
5055 /* Mask out the stack pointer again, in case it was
5056 the only 'free' register we have found. */
5057 dead &= 0x7fff;
5058 }
5059 /* If the immediate destination is still in range, check for possible
5060 threading with a jump beyond the delay slot insn.
5061 Don't check if we are called recursively; the jump has been or will be
5062 checked in a different invocation then. */
5063
5064 else if (optimize && need_block >= 0)
5065 {
5066 rtx next = next_active_insn (next_active_insn (dest));
5067 if (next && JUMP_P (next)
5068 && GET_CODE (PATTERN (next)) == SET
5069 && recog_memoized (next) == CODE_FOR_jump_compact)
5070 {
5071 dest = JUMP_LABEL (next);
5072 if (dest
5073 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5074 > 4092 + 4098))
5075 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5076 }
5077 }
5078
5079 if (dead)
5080 {
5081 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5082
5083 /* It would be nice if we could convert the jump into an indirect
5084 jump / far branch right now, and thus exposing all constituent
5085 instructions to further optimization. However, reorg uses
5086 simplejump_p to determine if there is an unconditional jump where
5087 it should try to schedule instructions from the target of the
5088 branch; simplejump_p fails for indirect jumps even if they have
5089 a JUMP_LABEL. */
5090 rtx insn = emit_insn_before (gen_indirect_jump_scratch
5091 (reg, GEN_INT (unspec_bbr_uid++)),
5092 jump);
5093 /* ??? We would like this to have the scope of the jump, but that
5094 scope will change when a delay slot insn of an inner scope is added.
5095 Hence, after delay slot scheduling, we'll have to expect
5096 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5097 the jump. */
5098
5099 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
5100 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5101 return insn;
5102 }
5103 else if (need_block)
5104 /* We can't use JUMP_LABEL here because it might be undefined
5105 when not optimizing. */
5106 return emit_insn_before (gen_block_branch_redirect
5107 (GEN_INT (unspec_bbr_uid++)),
5108 jump);
5109 return prev;
5110 }
5111
5112 #define CONDJUMP_MIN -252
5113 #define CONDJUMP_MAX 262
5114 struct far_branch
5115 {
5116 /* A label (to be placed) in front of the jump
5117 that jumps to our ultimate destination. */
5118 rtx near_label;
5119 /* Where we are going to insert it if we cannot move the jump any farther,
5120 or the jump itself if we have picked up an existing jump. */
5121 rtx insert_place;
5122 /* The ultimate destination. */
5123 rtx far_label;
5124 struct far_branch *prev;
5125 /* If the branch has already been created, its address;
5126 else the address of its first prospective user. */
5127 int address;
5128 };
5129
5130 static void gen_far_branch (struct far_branch *);
5131 enum mdep_reorg_phase_e mdep_reorg_phase;
5132 static void
5133 gen_far_branch (struct far_branch *bp)
5134 {
5135 rtx insn = bp->insert_place;
5136 rtx jump;
5137 rtx label = gen_label_rtx ();
5138 int ok;
5139
5140 emit_label_after (label, insn);
5141 if (bp->far_label)
5142 {
5143 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5144 LABEL_NUSES (bp->far_label)++;
5145 }
5146 else
5147 jump = emit_jump_insn_after (gen_return (), insn);
5148 /* Emit a barrier so that reorg knows that any following instructions
5149 are not reachable via a fall-through path.
5150 But don't do this when not optimizing, since we wouldn't suppress the
5151 alignment for the barrier then, and could end up with out-of-range
5152 pc-relative loads. */
5153 if (optimize)
5154 emit_barrier_after (jump);
5155 emit_label_after (bp->near_label, insn);
5156 JUMP_LABEL (jump) = bp->far_label;
5157 ok = invert_jump (insn, label, 1);
5158 gcc_assert (ok);
5159
5160 /* If we are branching around a jump (rather than a return), prevent
5161 reorg from using an insn from the jump target as the delay slot insn -
5162 when reorg did this, it pessimized code (we rather hide the delay slot)
5163 and it could cause branches to go out of range. */
5164 if (bp->far_label)
5165 (emit_insn_after
5166 (gen_stuff_delay_slot
5167 (GEN_INT (unspec_bbr_uid++),
5168 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5169 insn));
5170 /* Prevent reorg from undoing our splits. */
5171 gen_block_redirect (jump, bp->address += 2, 2);
5172 }
5173
5174 /* Fix up ADDR_DIFF_VECs. */
5175 void
5176 fixup_addr_diff_vecs (rtx first)
5177 {
5178 rtx insn;
5179
5180 for (insn = first; insn; insn = NEXT_INSN (insn))
5181 {
5182 rtx vec_lab, pat, prev, prevpat, x, braf_label;
5183
5184 if (!JUMP_P (insn)
5185 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5186 continue;
5187 pat = PATTERN (insn);
5188 vec_lab = XEXP (XEXP (pat, 0), 0);
5189
5190 /* Search the matching casesi_jump_2. */
5191 for (prev = vec_lab; ; prev = PREV_INSN (prev))
5192 {
5193 if (!JUMP_P (prev))
5194 continue;
5195 prevpat = PATTERN (prev);
5196 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5197 continue;
5198 x = XVECEXP (prevpat, 0, 1);
5199 if (GET_CODE (x) != USE)
5200 continue;
5201 x = XEXP (x, 0);
5202 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5203 break;
5204 }
5205 /* FIXME: This is a bug in the optimizer, but it seems harmless
5206 to just avoid panicing. */
5207 if (!prev)
5208 continue;
5209
5210 /* Emit the reference label of the braf where it belongs, right after
5211 the casesi_jump_2 (i.e. braf). */
5212 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5213 emit_label_after (braf_label, prev);
5214
5215 /* Fix up the ADDR_DIF_VEC to be relative
5216 to the reference address of the braf. */
5217 XEXP (XEXP (pat, 0), 0) = braf_label;
5218 }
5219 }
5220
5221 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5222 a barrier. Return the base 2 logarithm of the desired alignment. */
5223 int
5224 barrier_align (rtx barrier_or_label)
5225 {
5226 rtx next = next_real_insn (barrier_or_label), pat, prev;
5227 int slot, credit, jump_to_next = 0;
5228
5229 if (! next)
5230 return 0;
5231
5232 pat = PATTERN (next);
5233
5234 if (GET_CODE (pat) == ADDR_DIFF_VEC)
5235 return 2;
5236
5237 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5238 /* This is a barrier in front of a constant table. */
5239 return 0;
5240
5241 prev = prev_real_insn (barrier_or_label);
5242 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
5243 {
5244 pat = PATTERN (prev);
5245 /* If this is a very small table, we want to keep the alignment after
5246 the table to the minimum for proper code alignment. */
5247 return ((TARGET_SMALLCODE
5248 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5249 <= (unsigned) 1 << (CACHE_LOG - 2)))
5250 ? 1 << TARGET_SHMEDIA : align_jumps_log);
5251 }
5252
5253 if (TARGET_SMALLCODE)
5254 return 0;
5255
5256 if (! TARGET_SH2 || ! optimize)
5257 return align_jumps_log;
5258
5259 /* When fixing up pcloads, a constant table might be inserted just before
5260 the basic block that ends with the barrier. Thus, we can't trust the
5261 instruction lengths before that. */
5262 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5263 {
5264 /* Check if there is an immediately preceding branch to the insn beyond
5265 the barrier. We must weight the cost of discarding useful information
5266 from the current cache line when executing this branch and there is
5267 an alignment, against that of fetching unneeded insn in front of the
5268 branch target when there is no alignment. */
5269
5270 /* There are two delay_slot cases to consider. One is the simple case
5271 where the preceding branch is to the insn beyond the barrier (simple
5272 delay slot filling), and the other is where the preceding branch has
5273 a delay slot that is a duplicate of the insn after the barrier
5274 (fill_eager_delay_slots) and the branch is to the insn after the insn
5275 after the barrier. */
5276
5277 /* PREV is presumed to be the JUMP_INSN for the barrier under
5278 investigation. Skip to the insn before it. */
5279 prev = prev_real_insn (prev);
5280
5281 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5282 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5283 prev = prev_real_insn (prev))
5284 {
5285 jump_to_next = 0;
5286 if (GET_CODE (PATTERN (prev)) == USE
5287 || GET_CODE (PATTERN (prev)) == CLOBBER)
5288 continue;
5289 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
5290 {
5291 prev = XVECEXP (PATTERN (prev), 0, 1);
5292 if (INSN_UID (prev) == INSN_UID (next))
5293 {
5294 /* Delay slot was filled with insn at jump target. */
5295 jump_to_next = 1;
5296 continue;
5297 }
5298 }
5299
5300 if (slot &&
5301 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5302 slot = 0;
5303 credit -= get_attr_length (prev);
5304 }
5305 if (prev
5306 && JUMP_P (prev)
5307 && JUMP_LABEL (prev))
5308 {
5309 rtx x;
5310 if (jump_to_next
5311 || next_real_insn (JUMP_LABEL (prev)) == next
5312 /* If relax_delay_slots() decides NEXT was redundant
5313 with some previous instruction, it will have
5314 redirected PREV's jump to the following insn. */
5315 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5316 /* There is no upper bound on redundant instructions
5317 that might have been skipped, but we must not put an
5318 alignment where none had been before. */
5319 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5320 (INSN_P (x)
5321 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5322 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5323 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5324 {
5325 rtx pat = PATTERN (prev);
5326 if (GET_CODE (pat) == PARALLEL)
5327 pat = XVECEXP (pat, 0, 0);
5328 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5329 return 0;
5330 }
5331 }
5332 }
5333
5334 return align_jumps_log;
5335 }
5336
5337 /* If we are inside a phony loop, almost any kind of label can turn up as the
5338 first one in the loop. Aligning a braf label causes incorrect switch
5339 destination addresses; we can detect braf labels because they are
5340 followed by a BARRIER.
5341 Applying loop alignment to small constant or switch tables is a waste
5342 of space, so we suppress this too. */
5343 int
5344 sh_loop_align (rtx label)
5345 {
5346 rtx next = label;
5347
5348 do
5349 next = next_nonnote_insn (next);
5350 while (next && LABEL_P (next));
5351
5352 if (! next
5353 || ! INSN_P (next)
5354 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
5355 || recog_memoized (next) == CODE_FOR_consttable_2)
5356 return 0;
5357
5358 return align_loops_log;
5359 }
5360
5361 /* Do a final pass over the function, just before delayed branch
5362 scheduling. */
5363
5364 static void
5365 sh_reorg (void)
5366 {
5367 rtx first, insn, mova = NULL_RTX;
5368 int num_mova;
5369 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5370 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5371
5372 first = get_insns ();
5373 max_labelno_before_reorg = max_label_num ();
5374
5375 /* We must split call insns before introducing `mova's. If we're
5376 optimizing, they'll have already been split. Otherwise, make
5377 sure we don't split them too late. */
5378 if (! optimize)
5379 split_all_insns_noflow ();
5380
5381 if (TARGET_SHMEDIA)
5382 return;
5383
5384 /* If relaxing, generate pseudo-ops to associate function calls with
5385 the symbols they call. It does no harm to not generate these
5386 pseudo-ops. However, when we can generate them, it enables to
5387 linker to potentially relax the jsr to a bsr, and eliminate the
5388 register load and, possibly, the constant pool entry. */
5389
5390 mdep_reorg_phase = SH_INSERT_USES_LABELS;
5391 if (TARGET_RELAX)
5392 {
5393 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
5394 own purposes. This works because none of the remaining passes
5395 need to look at them.
5396
5397 ??? But it may break in the future. We should use a machine
5398 dependent REG_NOTE, or some other approach entirely. */
5399 for (insn = first; insn; insn = NEXT_INSN (insn))
5400 {
5401 if (INSN_P (insn))
5402 {
5403 rtx note;
5404
5405 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
5406 NULL_RTX)) != 0)
5407 remove_note (insn, note);
5408 }
5409 }
5410
5411 for (insn = first; insn; insn = NEXT_INSN (insn))
5412 {
5413 rtx pattern, reg, link, set, scan, dies, label;
5414 int rescan = 0, foundinsn = 0;
5415
5416 if (CALL_P (insn))
5417 {
5418 pattern = PATTERN (insn);
5419
5420 if (GET_CODE (pattern) == PARALLEL)
5421 pattern = XVECEXP (pattern, 0, 0);
5422 if (GET_CODE (pattern) == SET)
5423 pattern = SET_SRC (pattern);
5424
5425 if (GET_CODE (pattern) != CALL
5426 || !MEM_P (XEXP (pattern, 0)))
5427 continue;
5428
5429 reg = XEXP (XEXP (pattern, 0), 0);
5430 }
5431 else
5432 {
5433 reg = sfunc_uses_reg (insn);
5434 if (! reg)
5435 continue;
5436 }
5437
5438 if (!REG_P (reg))
5439 continue;
5440
5441 /* Try scanning backward to find where the register is set. */
5442 link = NULL;
5443 for (scan = PREV_INSN (insn);
5444 scan && !LABEL_P (scan);
5445 scan = PREV_INSN (scan))
5446 {
5447 if (! INSN_P (scan))
5448 continue;
5449
5450 if (! reg_mentioned_p (reg, scan))
5451 continue;
5452
5453 if (noncall_uses_reg (reg, scan, &set))
5454 break;
5455
5456 if (set)
5457 {
5458 link = scan;
5459 break;
5460 }
5461 }
5462
5463 if (! link)
5464 continue;
5465
5466 /* The register is set at LINK. */
5467
5468 /* We can only optimize the function call if the register is
5469 being set to a symbol. In theory, we could sometimes
5470 optimize calls to a constant location, but the assembler
5471 and linker do not support that at present. */
5472 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
5473 && GET_CODE (SET_SRC (set)) != LABEL_REF)
5474 continue;
5475
5476 /* Scan forward from LINK to the place where REG dies, and
5477 make sure that the only insns which use REG are
5478 themselves function calls. */
5479
5480 /* ??? This doesn't work for call targets that were allocated
5481 by reload, since there may not be a REG_DEAD note for the
5482 register. */
5483
5484 dies = NULL_RTX;
5485 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
5486 {
5487 rtx scanset;
5488
5489 /* Don't try to trace forward past a CODE_LABEL if we haven't
5490 seen INSN yet. Ordinarily, we will only find the setting insn
5491 if it is in the same basic block. However,
5492 cross-jumping can insert code labels in between the load and
5493 the call, and can result in situations where a single call
5494 insn may have two targets depending on where we came from. */
5495
5496 if (LABEL_P (scan) && ! foundinsn)
5497 break;
5498
5499 if (! INSN_P (scan))
5500 continue;
5501
5502 /* Don't try to trace forward past a JUMP. To optimize
5503 safely, we would have to check that all the
5504 instructions at the jump destination did not use REG. */
5505
5506 if (JUMP_P (scan))
5507 break;
5508
5509 if (! reg_mentioned_p (reg, scan))
5510 continue;
5511
5512 if (noncall_uses_reg (reg, scan, &scanset))
5513 break;
5514
5515 if (scan == insn)
5516 foundinsn = 1;
5517
5518 if (scan != insn
5519 && (CALL_P (scan) || sfunc_uses_reg (scan)))
5520 {
5521 /* There is a function call to this register other
5522 than the one we are checking. If we optimize
5523 this call, we need to rescan again below. */
5524 rescan = 1;
5525 }
5526
5527 /* ??? We shouldn't have to worry about SCANSET here.
5528 We should just be able to check for a REG_DEAD note
5529 on a function call. However, the REG_DEAD notes are
5530 apparently not dependable around libcalls; c-torture
5531 execute/920501-2 is a test case. If SCANSET is set,
5532 then this insn sets the register, so it must have
5533 died earlier. Unfortunately, this will only handle
5534 the cases in which the register is, in fact, set in a
5535 later insn. */
5536
5537 /* ??? We shouldn't have to use FOUNDINSN here.
5538 This dates back to when we used LOG_LINKS to find
5539 the most recent insn which sets the register. */
5540
5541 if (foundinsn
5542 && (scanset
5543 || find_reg_note (scan, REG_DEAD, reg)))
5544 {
5545 dies = scan;
5546 break;
5547 }
5548 }
5549
5550 if (! dies)
5551 {
5552 /* Either there was a branch, or some insn used REG
5553 other than as a function call address. */
5554 continue;
5555 }
5556
5557 /* Create a code label, and put it in a REG_LABEL_OPERAND note
5558 on the insn which sets the register, and on each call insn
5559 which uses the register. In final_prescan_insn we look for
5560 the REG_LABEL_OPERAND notes, and output the appropriate label
5561 or pseudo-op. */
5562
5563 label = gen_label_rtx ();
5564 add_reg_note (link, REG_LABEL_OPERAND, label);
5565 add_reg_note (insn, REG_LABEL_OPERAND, label);
5566 if (rescan)
5567 {
5568 scan = link;
5569 do
5570 {
5571 rtx reg2;
5572
5573 scan = NEXT_INSN (scan);
5574 if (scan != insn
5575 && ((CALL_P (scan)
5576 && reg_mentioned_p (reg, scan))
5577 || ((reg2 = sfunc_uses_reg (scan))
5578 && REGNO (reg2) == REGNO (reg))))
5579 add_reg_note (scan, REG_LABEL_OPERAND, label);
5580 }
5581 while (scan != dies);
5582 }
5583 }
5584 }
5585
5586 if (TARGET_SH2)
5587 fixup_addr_diff_vecs (first);
5588
5589 if (optimize)
5590 {
5591 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
5592 shorten_branches (first);
5593 }
5594
5595 /* Scan the function looking for move instructions which have to be
5596 changed to pc-relative loads and insert the literal tables. */
5597 label_ref_list_pool = create_alloc_pool ("label references list",
5598 sizeof (struct label_ref_list_d),
5599 30);
5600 mdep_reorg_phase = SH_FIXUP_PCLOAD;
5601 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
5602 {
5603 if (mova_p (insn))
5604 {
5605 /* ??? basic block reordering can move a switch table dispatch
5606 below the switch table. Check if that has happened.
5607 We only have the addresses available when optimizing; but then,
5608 this check shouldn't be needed when not optimizing. */
5609 if (!untangle_mova (&num_mova, &mova, insn))
5610 {
5611 insn = mova;
5612 num_mova = 0;
5613 }
5614 }
5615 else if (JUMP_P (insn)
5616 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
5617 && num_mova
5618 /* ??? loop invariant motion can also move a mova out of a
5619 loop. Since loop does this code motion anyway, maybe we
5620 should wrap UNSPEC_MOVA into a CONST, so that reload can
5621 move it back. */
5622 && ((num_mova > 1
5623 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
5624 || (prev_nonnote_insn (insn)
5625 == XEXP (MOVA_LABELREF (mova), 0))))
5626 {
5627 rtx scan;
5628 int total;
5629
5630 num_mova--;
5631
5632 /* Some code might have been inserted between the mova and
5633 its ADDR_DIFF_VEC. Check if the mova is still in range. */
5634 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
5635 total += get_attr_length (scan);
5636
5637 /* range of mova is 1020, add 4 because pc counts from address of
5638 second instruction after this one, subtract 2 in case pc is 2
5639 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
5640 cancels out with alignment effects of the mova itself. */
5641 if (total > 1022)
5642 {
5643 /* Change the mova into a load, and restart scanning
5644 there. broken_move will then return true for mova. */
5645 fixup_mova (mova);
5646 insn = mova;
5647 }
5648 }
5649 if (broken_move (insn)
5650 || (NONJUMP_INSN_P (insn)
5651 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
5652 {
5653 rtx scan;
5654 /* Scan ahead looking for a barrier to stick the constant table
5655 behind. */
5656 rtx barrier = find_barrier (num_mova, mova, insn);
5657 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
5658 int need_aligned_label = 0;
5659
5660 if (num_mova && ! mova_p (mova))
5661 {
5662 /* find_barrier had to change the first mova into a
5663 pcload; thus, we have to start with this new pcload. */
5664 insn = mova;
5665 num_mova = 0;
5666 }
5667 /* Now find all the moves between the points and modify them. */
5668 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
5669 {
5670 if (LABEL_P (scan))
5671 last_float = 0;
5672 if (NONJUMP_INSN_P (scan)
5673 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
5674 need_aligned_label = 1;
5675 if (broken_move (scan))
5676 {
5677 rtx *patp = &PATTERN (scan), pat = *patp;
5678 rtx src, dst;
5679 rtx lab;
5680 rtx newsrc;
5681 enum machine_mode mode;
5682
5683 if (GET_CODE (pat) == PARALLEL)
5684 patp = &XVECEXP (pat, 0, 0), pat = *patp;
5685 src = SET_SRC (pat);
5686 dst = SET_DEST (pat);
5687 mode = GET_MODE (dst);
5688
5689 if (mode == SImode && hi_const (src)
5690 && REGNO (dst) != FPUL_REG)
5691 {
5692 int offset = 0;
5693
5694 mode = HImode;
5695 while (GET_CODE (dst) == SUBREG)
5696 {
5697 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
5698 GET_MODE (SUBREG_REG (dst)),
5699 SUBREG_BYTE (dst),
5700 GET_MODE (dst));
5701 dst = SUBREG_REG (dst);
5702 }
5703 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
5704 }
5705 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
5706 {
5707 /* This must be an insn that clobbers r0. */
5708 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
5709 XVECLEN (PATTERN (scan), 0)
5710 - 1);
5711 rtx clobber = *clobberp;
5712
5713 gcc_assert (GET_CODE (clobber) == CLOBBER
5714 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
5715
5716 if (last_float
5717 && reg_set_between_p (r0_rtx, last_float_move, scan))
5718 last_float = 0;
5719 if (last_float
5720 && TARGET_SHCOMPACT
5721 && GET_MODE_SIZE (mode) != 4
5722 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
5723 last_float = 0;
5724 lab = add_constant (src, mode, last_float);
5725 if (lab)
5726 emit_insn_before (gen_mova (lab), scan);
5727 else
5728 {
5729 /* There will be a REG_UNUSED note for r0 on
5730 LAST_FLOAT_MOVE; we have to change it to REG_INC,
5731 lest reorg:mark_target_live_regs will not
5732 consider r0 to be used, and we end up with delay
5733 slot insn in front of SCAN that clobbers r0. */
5734 rtx note
5735 = find_regno_note (last_float_move, REG_UNUSED, 0);
5736
5737 /* If we are not optimizing, then there may not be
5738 a note. */
5739 if (note)
5740 PUT_REG_NOTE_KIND (note, REG_INC);
5741
5742 *last_float_addr = r0_inc_rtx;
5743 }
5744 last_float_move = scan;
5745 last_float = src;
5746 newsrc = gen_const_mem (mode,
5747 (((TARGET_SH4 && ! TARGET_FMOVD)
5748 || REGNO (dst) == FPUL_REG)
5749 ? r0_inc_rtx
5750 : r0_rtx));
5751 last_float_addr = &XEXP (newsrc, 0);
5752
5753 /* Remove the clobber of r0. */
5754 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
5755 gen_rtx_SCRATCH (Pmode));
5756 }
5757 /* This is a mova needing a label. Create it. */
5758 else if (GET_CODE (src) == UNSPEC
5759 && XINT (src, 1) == UNSPEC_MOVA
5760 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
5761 {
5762 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
5763 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5764 newsrc = gen_rtx_UNSPEC (SImode,
5765 gen_rtvec (1, newsrc),
5766 UNSPEC_MOVA);
5767 }
5768 else
5769 {
5770 lab = add_constant (src, mode, 0);
5771 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5772 newsrc = gen_const_mem (mode, newsrc);
5773 }
5774 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
5775 INSN_CODE (scan) = -1;
5776 }
5777 }
5778 dump_table (need_aligned_label ? insn : 0, barrier);
5779 insn = barrier;
5780 }
5781 }
5782 free_alloc_pool (label_ref_list_pool);
5783 for (insn = first; insn; insn = NEXT_INSN (insn))
5784 PUT_MODE (insn, VOIDmode);
5785
5786 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
5787 INSN_ADDRESSES_FREE ();
5788 split_branches (first);
5789
5790 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
5791 also has an effect on the register that holds the address of the sfunc.
5792 Insert an extra dummy insn in front of each sfunc that pretends to
5793 use this register. */
5794 if (flag_delayed_branch)
5795 {
5796 for (insn = first; insn; insn = NEXT_INSN (insn))
5797 {
5798 rtx reg = sfunc_uses_reg (insn);
5799
5800 if (! reg)
5801 continue;
5802 emit_insn_before (gen_use_sfunc_addr (reg), insn);
5803 }
5804 }
5805 #if 0
5806 /* fpscr is not actually a user variable, but we pretend it is for the
5807 sake of the previous optimization passes, since we want it handled like
5808 one. However, we don't have any debugging information for it, so turn
5809 it into a non-user variable now. */
5810 if (TARGET_SH4)
5811 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
5812 #endif
5813 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
5814 }
5815
5816 int
5817 get_dest_uid (rtx label, int max_uid)
5818 {
5819 rtx dest = next_real_insn (label);
5820 int dest_uid;
5821 if (! dest)
5822 /* This can happen for an undefined label. */
5823 return 0;
5824 dest_uid = INSN_UID (dest);
5825 /* If this is a newly created branch redirection blocking instruction,
5826 we cannot index the branch_uid or insn_addresses arrays with its
5827 uid. But then, we won't need to, because the actual destination is
5828 the following branch. */
5829 while (dest_uid >= max_uid)
5830 {
5831 dest = NEXT_INSN (dest);
5832 dest_uid = INSN_UID (dest);
5833 }
5834 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
5835 return 0;
5836 return dest_uid;
5837 }
5838
5839 /* Split condbranches that are out of range. Also add clobbers for
5840 scratch registers that are needed in far jumps.
5841 We do this before delay slot scheduling, so that it can take our
5842 newly created instructions into account. It also allows us to
5843 find branches with common targets more easily. */
5844
5845 static void
5846 split_branches (rtx first)
5847 {
5848 rtx insn;
5849 struct far_branch **uid_branch, *far_branch_list = 0;
5850 int max_uid = get_max_uid ();
5851 int ok;
5852
5853 /* Find out which branches are out of range. */
5854 shorten_branches (first);
5855
5856 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
5857 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
5858
5859 for (insn = first; insn; insn = NEXT_INSN (insn))
5860 if (! INSN_P (insn))
5861 continue;
5862 else if (INSN_DELETED_P (insn))
5863 {
5864 /* Shorten_branches would split this instruction again,
5865 so transform it into a note. */
5866 SET_INSN_DELETED (insn);
5867 }
5868 else if (JUMP_P (insn)
5869 /* Don't mess with ADDR_DIFF_VEC */
5870 && (GET_CODE (PATTERN (insn)) == SET
5871 || GET_CODE (PATTERN (insn)) == RETURN))
5872 {
5873 enum attr_type type = get_attr_type (insn);
5874 if (type == TYPE_CBRANCH)
5875 {
5876 rtx next, beyond;
5877
5878 if (get_attr_length (insn) > 4)
5879 {
5880 rtx src = SET_SRC (PATTERN (insn));
5881 rtx olabel = XEXP (XEXP (src, 1), 0);
5882 int addr = INSN_ADDRESSES (INSN_UID (insn));
5883 rtx label = 0;
5884 int dest_uid = get_dest_uid (olabel, max_uid);
5885 struct far_branch *bp = uid_branch[dest_uid];
5886
5887 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
5888 the label if the LABEL_NUSES count drops to zero. There is
5889 always a jump_optimize pass that sets these values, but it
5890 proceeds to delete unreferenced code, and then if not
5891 optimizing, to un-delete the deleted instructions, thus
5892 leaving labels with too low uses counts. */
5893 if (! optimize)
5894 {
5895 JUMP_LABEL (insn) = olabel;
5896 LABEL_NUSES (olabel)++;
5897 }
5898 if (! bp)
5899 {
5900 bp = (struct far_branch *) alloca (sizeof *bp);
5901 uid_branch[dest_uid] = bp;
5902 bp->prev = far_branch_list;
5903 far_branch_list = bp;
5904 bp->far_label
5905 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
5906 LABEL_NUSES (bp->far_label)++;
5907 }
5908 else
5909 {
5910 label = bp->near_label;
5911 if (! label && bp->address - addr >= CONDJUMP_MIN)
5912 {
5913 rtx block = bp->insert_place;
5914
5915 if (GET_CODE (PATTERN (block)) == RETURN)
5916 block = PREV_INSN (block);
5917 else
5918 block = gen_block_redirect (block,
5919 bp->address, 2);
5920 label = emit_label_after (gen_label_rtx (),
5921 PREV_INSN (block));
5922 bp->near_label = label;
5923 }
5924 else if (label && ! NEXT_INSN (label))
5925 {
5926 if (addr + 2 - bp->address <= CONDJUMP_MAX)
5927 bp->insert_place = insn;
5928 else
5929 gen_far_branch (bp);
5930 }
5931 }
5932 if (! label
5933 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
5934 {
5935 bp->near_label = label = gen_label_rtx ();
5936 bp->insert_place = insn;
5937 bp->address = addr;
5938 }
5939 ok = redirect_jump (insn, label, 0);
5940 gcc_assert (ok);
5941 }
5942 else
5943 {
5944 /* get_attr_length (insn) == 2 */
5945 /* Check if we have a pattern where reorg wants to redirect
5946 the branch to a label from an unconditional branch that
5947 is too far away. */
5948 /* We can't use JUMP_LABEL here because it might be undefined
5949 when not optimizing. */
5950 /* A syntax error might cause beyond to be NULL_RTX. */
5951 beyond
5952 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
5953 0));
5954
5955 if (beyond
5956 && (JUMP_P (beyond)
5957 || ((beyond = next_active_insn (beyond))
5958 && JUMP_P (beyond)))
5959 && GET_CODE (PATTERN (beyond)) == SET
5960 && recog_memoized (beyond) == CODE_FOR_jump_compact
5961 && ((INSN_ADDRESSES
5962 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
5963 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5964 > 252 + 258 + 2))
5965 gen_block_redirect (beyond,
5966 INSN_ADDRESSES (INSN_UID (beyond)), 1);
5967 }
5968
5969 next = next_active_insn (insn);
5970
5971 if (next
5972 && (JUMP_P (next)
5973 || ((next = next_active_insn (next))
5974 && JUMP_P (next)))
5975 && GET_CODE (PATTERN (next)) == SET
5976 && recog_memoized (next) == CODE_FOR_jump_compact
5977 && ((INSN_ADDRESSES
5978 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
5979 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5980 > 252 + 258 + 2))
5981 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
5982 }
5983 else if (type == TYPE_JUMP || type == TYPE_RETURN)
5984 {
5985 int addr = INSN_ADDRESSES (INSN_UID (insn));
5986 rtx far_label = 0;
5987 int dest_uid = 0;
5988 struct far_branch *bp;
5989
5990 if (type == TYPE_JUMP)
5991 {
5992 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
5993 dest_uid = get_dest_uid (far_label, max_uid);
5994 if (! dest_uid)
5995 {
5996 /* Parse errors can lead to labels outside
5997 the insn stream. */
5998 if (! NEXT_INSN (far_label))
5999 continue;
6000
6001 if (! optimize)
6002 {
6003 JUMP_LABEL (insn) = far_label;
6004 LABEL_NUSES (far_label)++;
6005 }
6006 redirect_jump (insn, NULL_RTX, 1);
6007 far_label = 0;
6008 }
6009 }
6010 bp = uid_branch[dest_uid];
6011 if (! bp)
6012 {
6013 bp = (struct far_branch *) alloca (sizeof *bp);
6014 uid_branch[dest_uid] = bp;
6015 bp->prev = far_branch_list;
6016 far_branch_list = bp;
6017 bp->near_label = 0;
6018 bp->far_label = far_label;
6019 if (far_label)
6020 LABEL_NUSES (far_label)++;
6021 }
6022 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6023 if (addr - bp->address <= CONDJUMP_MAX)
6024 emit_label_after (bp->near_label, PREV_INSN (insn));
6025 else
6026 {
6027 gen_far_branch (bp);
6028 bp->near_label = 0;
6029 }
6030 else
6031 bp->near_label = 0;
6032 bp->address = addr;
6033 bp->insert_place = insn;
6034 if (! far_label)
6035 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6036 else
6037 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6038 }
6039 }
6040 /* Generate all pending far branches,
6041 and free our references to the far labels. */
6042 while (far_branch_list)
6043 {
6044 if (far_branch_list->near_label
6045 && ! NEXT_INSN (far_branch_list->near_label))
6046 gen_far_branch (far_branch_list);
6047 if (optimize
6048 && far_branch_list->far_label
6049 && ! --LABEL_NUSES (far_branch_list->far_label))
6050 delete_insn (far_branch_list->far_label);
6051 far_branch_list = far_branch_list->prev;
6052 }
6053
6054 /* Instruction length information is no longer valid due to the new
6055 instructions that have been generated. */
6056 init_insn_lengths ();
6057 }
6058
6059 /* Dump out instruction addresses, which is useful for debugging the
6060 constant pool table stuff.
6061
6062 If relaxing, output the label and pseudo-ops used to link together
6063 calls and the instruction which set the registers. */
6064
6065 /* ??? The addresses printed by this routine for insns are nonsense for
6066 insns which are inside of a sequence where none of the inner insns have
6067 variable length. This is because the second pass of shorten_branches
6068 does not bother to update them. */
6069
6070 void
6071 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
6072 int noperands ATTRIBUTE_UNUSED)
6073 {
6074 if (TARGET_DUMPISIZE)
6075 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6076
6077 if (TARGET_RELAX)
6078 {
6079 rtx note;
6080
6081 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
6082 if (note)
6083 {
6084 rtx pattern;
6085
6086 pattern = PATTERN (insn);
6087 if (GET_CODE (pattern) == PARALLEL)
6088 pattern = XVECEXP (pattern, 0, 0);
6089 switch (GET_CODE (pattern))
6090 {
6091 case SET:
6092 if (GET_CODE (SET_SRC (pattern)) != CALL
6093 && get_attr_type (insn) != TYPE_SFUNC)
6094 {
6095 targetm.asm_out.internal_label
6096 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6097 break;
6098 }
6099 /* else FALLTHROUGH */
6100 case CALL:
6101 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6102 CODE_LABEL_NUMBER (XEXP (note, 0)));
6103 break;
6104
6105 default:
6106 gcc_unreachable ();
6107 }
6108 }
6109 }
6110 }
6111
6112 /* Dump out any constants accumulated in the final pass. These will
6113 only be labels. */
6114
6115 const char *
6116 output_jump_label_table (void)
6117 {
6118 int i;
6119
6120 if (pool_size)
6121 {
6122 fprintf (asm_out_file, "\t.align 2\n");
6123 for (i = 0; i < pool_size; i++)
6124 {
6125 pool_node *p = &pool_vector[i];
6126
6127 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6128 CODE_LABEL_NUMBER (p->label));
6129 output_asm_insn (".long %O0", &p->value);
6130 }
6131 pool_size = 0;
6132 }
6133
6134 return "";
6135 }
6136 \f
6137 /* A full frame looks like:
6138
6139 arg-5
6140 arg-4
6141 [ if current_function_anonymous_args
6142 arg-3
6143 arg-2
6144 arg-1
6145 arg-0 ]
6146 saved-fp
6147 saved-r10
6148 saved-r11
6149 saved-r12
6150 saved-pr
6151 local-n
6152 ..
6153 local-1
6154 local-0 <- fp points here. */
6155
6156 /* Number of bytes pushed for anonymous args, used to pass information
6157 between expand_prologue and expand_epilogue. */
6158
6159 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6160 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6161 for an epilogue and a negative value means that it's for a sibcall
6162 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6163 all the registers that are about to be restored, and hence dead. */
6164
6165 static void
6166 output_stack_adjust (int size, rtx reg, int epilogue_p,
6167 HARD_REG_SET *live_regs_mask, bool frame_p)
6168 {
6169 rtx (*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6170 if (size)
6171 {
6172 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6173
6174 /* This test is bogus, as output_stack_adjust is used to re-align the
6175 stack. */
6176 #if 0
6177 gcc_assert (!(size % align));
6178 #endif
6179
6180 if (CONST_OK_FOR_ADD (size))
6181 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6182 /* Try to do it with two partial adjustments; however, we must make
6183 sure that the stack is properly aligned at all times, in case
6184 an interrupt occurs between the two partial adjustments. */
6185 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6186 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6187 {
6188 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6189 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6190 }
6191 else
6192 {
6193 rtx const_reg;
6194 rtx insn;
6195 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6196 int i;
6197
6198 /* If TEMP is invalid, we could temporarily save a general
6199 register to MACL. However, there is currently no need
6200 to handle this case, so just die when we see it. */
6201 if (epilogue_p < 0
6202 || current_function_interrupt
6203 || ! call_really_used_regs[temp] || fixed_regs[temp])
6204 temp = -1;
6205 if (temp < 0 && ! current_function_interrupt
6206 && (TARGET_SHMEDIA || epilogue_p >= 0))
6207 {
6208 HARD_REG_SET temps;
6209 COPY_HARD_REG_SET (temps, call_used_reg_set);
6210 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6211 if (epilogue_p > 0)
6212 {
6213 int nreg = 0;
6214 if (crtl->return_rtx)
6215 {
6216 enum machine_mode mode;
6217 mode = GET_MODE (crtl->return_rtx);
6218 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6219 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6220 }
6221 for (i = 0; i < nreg; i++)
6222 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6223 if (crtl->calls_eh_return)
6224 {
6225 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6226 for (i = 0; i <= 3; i++)
6227 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6228 }
6229 }
6230 if (TARGET_SHMEDIA && epilogue_p < 0)
6231 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
6232 CLEAR_HARD_REG_BIT (temps, i);
6233 if (epilogue_p <= 0)
6234 {
6235 for (i = FIRST_PARM_REG;
6236 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6237 CLEAR_HARD_REG_BIT (temps, i);
6238 if (cfun->static_chain_decl != NULL)
6239 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6240 }
6241 temp = scavenge_reg (&temps);
6242 }
6243 if (temp < 0 && live_regs_mask)
6244 {
6245 HARD_REG_SET temps;
6246
6247 COPY_HARD_REG_SET (temps, *live_regs_mask);
6248 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6249 temp = scavenge_reg (&temps);
6250 }
6251 if (temp < 0)
6252 {
6253 rtx adj_reg, tmp_reg, mem;
6254
6255 /* If we reached here, the most likely case is the (sibcall)
6256 epilogue for non SHmedia. Put a special push/pop sequence
6257 for such case as the last resort. This looks lengthy but
6258 would not be problem because it seems to be very
6259 rare. */
6260
6261 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
6262
6263
6264 /* ??? There is still the slight possibility that r4 or
6265 r5 have been reserved as fixed registers or assigned
6266 as global registers, and they change during an
6267 interrupt. There are possible ways to handle this:
6268
6269 - If we are adjusting the frame pointer (r14), we can do
6270 with a single temp register and an ordinary push / pop
6271 on the stack.
6272 - Grab any call-used or call-saved registers (i.e. not
6273 fixed or globals) for the temps we need. We might
6274 also grab r14 if we are adjusting the stack pointer.
6275 If we can't find enough available registers, issue
6276 a diagnostic and die - the user must have reserved
6277 way too many registers.
6278 But since all this is rather unlikely to happen and
6279 would require extra testing, we just die if r4 / r5
6280 are not available. */
6281 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6282 && !global_regs[4] && !global_regs[5]);
6283
6284 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6285 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6286 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6287 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6288 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6289 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6290 emit_move_insn (mem, tmp_reg);
6291 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6292 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6293 emit_move_insn (mem, tmp_reg);
6294 emit_move_insn (reg, adj_reg);
6295 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6296 emit_move_insn (adj_reg, mem);
6297 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6298 emit_move_insn (tmp_reg, mem);
6299 /* Tell flow the insns that pop r4/r5 aren't dead. */
6300 emit_use (tmp_reg);
6301 emit_use (adj_reg);
6302 return;
6303 }
6304 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6305
6306 /* If SIZE is negative, subtract the positive value.
6307 This sometimes allows a constant pool entry to be shared
6308 between prologue and epilogue code. */
6309 if (size < 0)
6310 {
6311 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6312 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6313 }
6314 else
6315 {
6316 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6317 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6318 }
6319 if (! epilogue_p)
6320 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6321 gen_rtx_SET (VOIDmode, reg,
6322 gen_rtx_PLUS (SImode, reg,
6323 GEN_INT (size))));
6324 }
6325 }
6326 }
6327
6328 static rtx
6329 frame_insn (rtx x)
6330 {
6331 x = emit_insn (x);
6332 RTX_FRAME_RELATED_P (x) = 1;
6333 return x;
6334 }
6335
6336 /* Output RTL to push register RN onto the stack. */
6337
6338 static rtx
6339 push (int rn)
6340 {
6341 rtx x;
6342 if (rn == FPUL_REG)
6343 x = gen_push_fpul ();
6344 else if (rn == FPSCR_REG)
6345 x = gen_push_fpscr ();
6346 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6347 && FP_OR_XD_REGISTER_P (rn))
6348 {
6349 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6350 return NULL_RTX;
6351 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6352 }
6353 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6354 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6355 else
6356 x = gen_push (gen_rtx_REG (SImode, rn));
6357
6358 x = frame_insn (x);
6359 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6360 return x;
6361 }
6362
6363 /* Output RTL to pop register RN from the stack. */
6364
6365 static void
6366 pop (int rn)
6367 {
6368 rtx x;
6369 if (rn == FPUL_REG)
6370 x = gen_pop_fpul ();
6371 else if (rn == FPSCR_REG)
6372 x = gen_pop_fpscr ();
6373 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6374 && FP_OR_XD_REGISTER_P (rn))
6375 {
6376 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6377 return;
6378 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6379 }
6380 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6381 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6382 else
6383 x = gen_pop (gen_rtx_REG (SImode, rn));
6384
6385 x = emit_insn (x);
6386 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6387 }
6388
6389 /* Generate code to push the regs specified in the mask. */
6390
6391 static void
6392 push_regs (HARD_REG_SET *mask, int interrupt_handler)
6393 {
6394 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
6395 int skip_fpscr = 0;
6396
6397 /* Push PR last; this gives better latencies after the prologue, and
6398 candidates for the return delay slot when there are no general
6399 registers pushed. */
6400 for (; i < FIRST_PSEUDO_REGISTER; i++)
6401 {
6402 /* If this is an interrupt handler, and the SZ bit varies,
6403 and we have to push any floating point register, we need
6404 to switch to the correct precision first. */
6405 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
6406 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
6407 {
6408 HARD_REG_SET unsaved;
6409
6410 push (FPSCR_REG);
6411 COMPL_HARD_REG_SET (unsaved, *mask);
6412 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
6413 skip_fpscr = 1;
6414 }
6415 if (i != PR_REG
6416 && (i != FPSCR_REG || ! skip_fpscr)
6417 && TEST_HARD_REG_BIT (*mask, i))
6418 {
6419 /* If the ISR has RESBANK attribute assigned, don't push any of
6420 the following registers - R0-R14, MACH, MACL and GBR. */
6421 if (! (sh_cfun_resbank_handler_p ()
6422 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
6423 || i == MACH_REG
6424 || i == MACL_REG
6425 || i == GBR_REG)))
6426 push (i);
6427 }
6428 }
6429
6430 /* Push banked registers last to improve delay slot opportunities. */
6431 if (interrupt_handler)
6432 {
6433 bool use_movml = false;
6434
6435 if (TARGET_SH2A)
6436 {
6437 unsigned int count = 0;
6438
6439 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6440 if (TEST_HARD_REG_BIT (*mask, i))
6441 count++;
6442 else
6443 break;
6444
6445 /* Use movml when all banked registers are pushed. */
6446 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
6447 use_movml = true;
6448 }
6449
6450 if (use_movml)
6451 {
6452 rtx x, mem, reg, set;
6453 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
6454
6455 /* We must avoid scheduling multiple store insn with another
6456 insns. */
6457 emit_insn (gen_blockage ());
6458 x = gen_movml_push_banked (sp_reg);
6459 x = frame_insn (x);
6460 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6461 {
6462 mem = gen_rtx_MEM (SImode, plus_constant (sp_reg, i * 4));
6463 reg = gen_rtx_REG (SImode, i);
6464 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (SImode, mem, reg));
6465 }
6466
6467 set = gen_rtx_SET (SImode, sp_reg, plus_constant (sp_reg, - 32));
6468 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
6469 emit_insn (gen_blockage ());
6470 }
6471 else
6472 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6473 if (TEST_HARD_REG_BIT (*mask, i))
6474 push (i);
6475 }
6476
6477 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
6478 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
6479 push (PR_REG);
6480 }
6481
6482 /* Calculate how much extra space is needed to save all callee-saved
6483 target registers.
6484 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6485
6486 static int
6487 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
6488 {
6489 int reg;
6490 int stack_space = 0;
6491 int interrupt_handler = sh_cfun_interrupt_handler_p ();
6492
6493 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6494 if ((! call_really_used_regs[reg] || interrupt_handler)
6495 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6496 /* Leave space to save this target register on the stack,
6497 in case target register allocation wants to use it. */
6498 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6499 return stack_space;
6500 }
6501
6502 /* Decide whether we should reserve space for callee-save target registers,
6503 in case target register allocation wants to use them. REGS_SAVED is
6504 the space, in bytes, that is already required for register saves.
6505 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6506
6507 static int
6508 shmedia_reserve_space_for_target_registers_p (int regs_saved,
6509 HARD_REG_SET *live_regs_mask)
6510 {
6511 if (optimize_size)
6512 return 0;
6513 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
6514 }
6515
6516 /* Decide how much space to reserve for callee-save target registers
6517 in case target register allocation wants to use them.
6518 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6519
6520 static int
6521 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
6522 {
6523 if (shmedia_space_reserved_for_target_registers)
6524 return shmedia_target_regs_stack_space (live_regs_mask);
6525 else
6526 return 0;
6527 }
6528
6529 /* Work out the registers which need to be saved, both as a mask and a
6530 count of saved words. Return the count.
6531
6532 If doing a pragma interrupt function, then push all regs used by the
6533 function, and if we call another function (we can tell by looking at PR),
6534 make sure that all the regs it clobbers are safe too. */
6535
6536 static int
6537 calc_live_regs (HARD_REG_SET *live_regs_mask)
6538 {
6539 unsigned int reg;
6540 int count;
6541 tree attrs;
6542 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
6543 bool nosave_low_regs;
6544 int pr_live, has_call;
6545
6546 attrs = DECL_ATTRIBUTES (current_function_decl);
6547 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
6548 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
6549 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
6550 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
6551
6552 CLEAR_HARD_REG_SET (*live_regs_mask);
6553 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
6554 && df_regs_ever_live_p (FPSCR_REG))
6555 target_flags &= ~MASK_FPU_SINGLE;
6556 /* If we can save a lot of saves by switching to double mode, do that. */
6557 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
6558 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
6559 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
6560 && (! call_really_used_regs[reg]
6561 || interrupt_handler)
6562 && ++count > 2)
6563 {
6564 target_flags &= ~MASK_FPU_SINGLE;
6565 break;
6566 }
6567 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
6568 knows how to use it. That means the pseudo originally allocated for
6569 the initial value can become the PR_MEDIA_REG hard register, as seen for
6570 execute/20010122-1.c:test9. */
6571 if (TARGET_SHMEDIA)
6572 /* ??? this function is called from initial_elimination_offset, hence we
6573 can't use the result of sh_media_register_for_return here. */
6574 pr_live = sh_pr_n_sets ();
6575 else
6576 {
6577 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
6578 pr_live = (pr_initial
6579 ? (!REG_P (pr_initial)
6580 || REGNO (pr_initial) != (PR_REG))
6581 : df_regs_ever_live_p (PR_REG));
6582 /* For Shcompact, if not optimizing, we end up with a memory reference
6583 using the return address pointer for __builtin_return_address even
6584 though there is no actual need to put the PR register on the stack. */
6585 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
6586 }
6587 /* Force PR to be live if the prologue has to call the SHmedia
6588 argument decoder or register saver. */
6589 if (TARGET_SHCOMPACT
6590 && ((crtl->args.info.call_cookie
6591 & ~ CALL_COOKIE_RET_TRAMP (1))
6592 || crtl->saves_all_registers))
6593 pr_live = 1;
6594 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
6595 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
6596 {
6597 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
6598 ? pr_live
6599 : interrupt_handler
6600 ? (/* Need to save all the regs ever live. */
6601 (df_regs_ever_live_p (reg)
6602 || (call_really_used_regs[reg]
6603 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
6604 || reg == PIC_OFFSET_TABLE_REGNUM)
6605 && has_call)
6606 || (TARGET_SHMEDIA && has_call
6607 && REGISTER_NATURAL_MODE (reg) == SImode
6608 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
6609 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
6610 && reg != RETURN_ADDRESS_POINTER_REGNUM
6611 && reg != T_REG && reg != GBR_REG
6612 /* Push fpscr only on targets which have FPU */
6613 && (reg != FPSCR_REG || TARGET_FPU_ANY))
6614 : (/* Only push those regs which are used and need to be saved. */
6615 (TARGET_SHCOMPACT
6616 && flag_pic
6617 && crtl->args.info.call_cookie
6618 && reg == PIC_OFFSET_TABLE_REGNUM)
6619 || (df_regs_ever_live_p (reg)
6620 && ((!call_really_used_regs[reg]
6621 && !(reg != PIC_OFFSET_TABLE_REGNUM
6622 && fixed_regs[reg] && call_used_regs[reg]))
6623 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
6624 || (crtl->calls_eh_return
6625 && (reg == EH_RETURN_DATA_REGNO (0)
6626 || reg == EH_RETURN_DATA_REGNO (1)
6627 || reg == EH_RETURN_DATA_REGNO (2)
6628 || reg == EH_RETURN_DATA_REGNO (3)))
6629 || ((reg == MACL_REG || reg == MACH_REG)
6630 && df_regs_ever_live_p (reg)
6631 && sh_cfun_attr_renesas_p ())
6632 ))
6633 {
6634 SET_HARD_REG_BIT (*live_regs_mask, reg);
6635 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6636
6637 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
6638 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
6639 {
6640 if (FP_REGISTER_P (reg))
6641 {
6642 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
6643 {
6644 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
6645 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
6646 }
6647 }
6648 else if (XD_REGISTER_P (reg))
6649 {
6650 /* Must switch to double mode to access these registers. */
6651 target_flags &= ~MASK_FPU_SINGLE;
6652 }
6653 }
6654 }
6655 if (nosave_low_regs && reg == R8_REG)
6656 break;
6657 }
6658 /* If we have a target register optimization pass after prologue / epilogue
6659 threading, we need to assume all target registers will be live even if
6660 they aren't now. */
6661 if (flag_branch_target_load_optimize2
6662 && TARGET_SAVE_ALL_TARGET_REGS
6663 && shmedia_space_reserved_for_target_registers)
6664 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6665 if ((! call_really_used_regs[reg] || interrupt_handler)
6666 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6667 {
6668 SET_HARD_REG_BIT (*live_regs_mask, reg);
6669 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6670 }
6671 /* If this is an interrupt handler, we don't have any call-clobbered
6672 registers we can conveniently use for target register save/restore.
6673 Make sure we save at least one general purpose register when we need
6674 to save target registers. */
6675 if (interrupt_handler
6676 && hard_reg_set_intersect_p (*live_regs_mask,
6677 reg_class_contents[TARGET_REGS])
6678 && ! hard_reg_set_intersect_p (*live_regs_mask,
6679 reg_class_contents[GENERAL_REGS]))
6680 {
6681 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
6682 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
6683 }
6684
6685 return count;
6686 }
6687
6688 /* Code to generate prologue and epilogue sequences */
6689
6690 /* PUSHED is the number of bytes that are being pushed on the
6691 stack for register saves. Return the frame size, padded
6692 appropriately so that the stack stays properly aligned. */
6693 static HOST_WIDE_INT
6694 rounded_frame_size (int pushed)
6695 {
6696 HOST_WIDE_INT size = get_frame_size ();
6697 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6698
6699 if (ACCUMULATE_OUTGOING_ARGS)
6700 size += crtl->outgoing_args_size;
6701
6702 return ((size + pushed + align - 1) & -align) - pushed;
6703 }
6704
6705 /* Choose a call-clobbered target-branch register that remains
6706 unchanged along the whole function. We set it up as the return
6707 value in the prologue. */
6708 int
6709 sh_media_register_for_return (void)
6710 {
6711 int regno;
6712 int tr0_used;
6713
6714 if (! current_function_is_leaf)
6715 return -1;
6716 if (lookup_attribute ("interrupt_handler",
6717 DECL_ATTRIBUTES (current_function_decl)))
6718 return -1;
6719 if (sh_cfun_interrupt_handler_p ())
6720 return -1;
6721
6722 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
6723
6724 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
6725 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
6726 return regno;
6727
6728 return -1;
6729 }
6730
6731 /* The maximum registers we need to save are:
6732 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
6733 - 32 floating point registers (for each pair, we save none,
6734 one single precision value, or a double precision value).
6735 - 8 target registers
6736 - add 1 entry for a delimiter. */
6737 #define MAX_SAVED_REGS (62+32+8)
6738
6739 typedef struct save_entry_s
6740 {
6741 unsigned char reg;
6742 unsigned char mode;
6743 short offset;
6744 } save_entry;
6745
6746 #define MAX_TEMPS 4
6747
6748 /* There will be a delimiter entry with VOIDmode both at the start and the
6749 end of a filled in schedule. The end delimiter has the offset of the
6750 save with the smallest (i.e. most negative) offset. */
6751 typedef struct save_schedule_s
6752 {
6753 save_entry entries[MAX_SAVED_REGS + 2];
6754 int temps[MAX_TEMPS+1];
6755 } save_schedule;
6756
6757 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
6758 use reverse order. Returns the last entry written to (not counting
6759 the delimiter). OFFSET_BASE is a number to be added to all offset
6760 entries. */
6761
6762 static save_entry *
6763 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
6764 int offset_base)
6765 {
6766 int align, i;
6767 save_entry *entry = schedule->entries;
6768 int tmpx = 0;
6769 int offset;
6770
6771 if (! current_function_interrupt)
6772 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
6773 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
6774 && ! FUNCTION_ARG_REGNO_P (i)
6775 && i != FIRST_RET_REG
6776 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
6777 && ! (crtl->calls_eh_return
6778 && (i == EH_RETURN_STACKADJ_REGNO
6779 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
6780 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
6781 schedule->temps[tmpx++] = i;
6782 entry->reg = -1;
6783 entry->mode = VOIDmode;
6784 entry->offset = offset_base;
6785 entry++;
6786 /* We loop twice: first, we save 8-byte aligned registers in the
6787 higher addresses, that are known to be aligned. Then, we
6788 proceed to saving 32-bit registers that don't need 8-byte
6789 alignment.
6790 If this is an interrupt function, all registers that need saving
6791 need to be saved in full. moreover, we need to postpone saving
6792 target registers till we have saved some general purpose registers
6793 we can then use as scratch registers. */
6794 offset = offset_base;
6795 for (align = 1; align >= 0; align--)
6796 {
6797 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6798 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6799 {
6800 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6801 int reg = i;
6802
6803 if (current_function_interrupt)
6804 {
6805 if (TARGET_REGISTER_P (i))
6806 continue;
6807 if (GENERAL_REGISTER_P (i))
6808 mode = DImode;
6809 }
6810 if (mode == SFmode && (i % 2) == 1
6811 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6812 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
6813 {
6814 mode = DFmode;
6815 i--;
6816 reg--;
6817 }
6818
6819 /* If we're doing the aligned pass and this is not aligned,
6820 or we're doing the unaligned pass and this is aligned,
6821 skip it. */
6822 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
6823 != align)
6824 continue;
6825
6826 if (current_function_interrupt
6827 && GENERAL_REGISTER_P (i)
6828 && tmpx < MAX_TEMPS)
6829 schedule->temps[tmpx++] = i;
6830
6831 offset -= GET_MODE_SIZE (mode);
6832 entry->reg = i;
6833 entry->mode = mode;
6834 entry->offset = offset;
6835 entry++;
6836 }
6837 if (align && current_function_interrupt)
6838 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
6839 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6840 {
6841 offset -= GET_MODE_SIZE (DImode);
6842 entry->reg = i;
6843 entry->mode = DImode;
6844 entry->offset = offset;
6845 entry++;
6846 }
6847 }
6848 entry->reg = -1;
6849 entry->mode = VOIDmode;
6850 entry->offset = offset;
6851 schedule->temps[tmpx] = -1;
6852 return entry - 1;
6853 }
6854
6855 void
6856 sh_expand_prologue (void)
6857 {
6858 HARD_REG_SET live_regs_mask;
6859 int d, i;
6860 int d_rounding = 0;
6861 int save_flags = target_flags;
6862 int pretend_args;
6863 int stack_usage;
6864 tree sp_switch_attr
6865 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
6866
6867 current_function_interrupt = sh_cfun_interrupt_handler_p ();
6868
6869 /* We have pretend args if we had an object sent partially in registers
6870 and partially on the stack, e.g. a large structure. */
6871 pretend_args = crtl->args.pretend_args_size;
6872 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
6873 && (NPARM_REGS(SImode)
6874 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
6875 pretend_args = 0;
6876 /* Dwarf2 module doesn't expect frame related insns here. */
6877 output_stack_adjust (-pretend_args
6878 - crtl->args.info.stack_regs * 8,
6879 stack_pointer_rtx, 0, NULL, false);
6880 stack_usage = pretend_args + crtl->args.info.stack_regs * 8;
6881
6882 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
6883 /* We're going to use the PIC register to load the address of the
6884 incoming-argument decoder and/or of the return trampoline from
6885 the GOT, so make sure the PIC register is preserved and
6886 initialized. */
6887 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
6888
6889 if (TARGET_SHCOMPACT
6890 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6891 {
6892 int reg;
6893
6894 /* First, make all registers with incoming arguments that will
6895 be pushed onto the stack live, so that register renaming
6896 doesn't overwrite them. */
6897 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
6898 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
6899 >= NPARM_REGS (SImode) - reg)
6900 for (; reg < NPARM_REGS (SImode); reg++)
6901 emit_insn (gen_shcompact_preserve_incoming_args
6902 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6903 else if (CALL_COOKIE_INT_REG_GET
6904 (crtl->args.info.call_cookie, reg) == 1)
6905 emit_insn (gen_shcompact_preserve_incoming_args
6906 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6907
6908 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
6909 stack_pointer_rtx);
6910 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
6911 GEN_INT (crtl->args.info.call_cookie));
6912 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
6913 gen_rtx_REG (SImode, R0_REG));
6914 }
6915 else if (TARGET_SHMEDIA)
6916 {
6917 int tr = sh_media_register_for_return ();
6918
6919 if (tr >= 0)
6920 emit_move_insn (gen_rtx_REG (DImode, tr),
6921 gen_rtx_REG (DImode, PR_MEDIA_REG));
6922 }
6923
6924 /* Emit the code for SETUP_VARARGS. */
6925 if (cfun->stdarg)
6926 {
6927 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6928 {
6929 /* Push arg regs as if they'd been provided by caller in stack. */
6930 for (i = 0; i < NPARM_REGS(SImode); i++)
6931 {
6932 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
6933
6934 if (i >= (NPARM_REGS(SImode)
6935 - crtl->args.info.arg_count[(int) SH_ARG_INT]
6936 ))
6937 break;
6938 push (rn);
6939 stack_usage += GET_MODE_SIZE (SImode);
6940 }
6941 }
6942 }
6943
6944 /* If we're supposed to switch stacks at function entry, do so now. */
6945 if (sp_switch_attr)
6946 {
6947 rtx lab, newsrc;
6948 /* The argument specifies a variable holding the address of the
6949 stack the interrupt function should switch to/from at entry/exit. */
6950 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
6951 const char *s
6952 = ggc_strdup (TREE_STRING_POINTER (arg));
6953 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
6954
6955 lab = add_constant (sp_switch, SImode, 0);
6956 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6957 newsrc = gen_const_mem (SImode, newsrc);
6958
6959 emit_insn (gen_sp_switch_1 (newsrc));
6960 }
6961
6962 d = calc_live_regs (&live_regs_mask);
6963 /* ??? Maybe we could save some switching if we can move a mode switch
6964 that already happens to be at the function start into the prologue. */
6965 if (target_flags != save_flags && ! current_function_interrupt)
6966 emit_insn (gen_toggle_sz ());
6967
6968 if (TARGET_SH5)
6969 {
6970 int offset_base, offset;
6971 rtx r0 = NULL_RTX;
6972 int offset_in_r0 = -1;
6973 int sp_in_r0 = 0;
6974 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6975 int total_size, save_size;
6976 save_schedule schedule;
6977 save_entry *entry;
6978 int *tmp_pnt;
6979
6980 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
6981 && ! current_function_interrupt)
6982 r0 = gen_rtx_REG (Pmode, R0_REG);
6983
6984 /* D is the actual number of bytes that we need for saving registers,
6985 however, in initial_elimination_offset we have committed to using
6986 an additional TREGS_SPACE amount of bytes - in order to keep both
6987 addresses to arguments supplied by the caller and local variables
6988 valid, we must keep this gap. Place it between the incoming
6989 arguments and the actually saved registers in a bid to optimize
6990 locality of reference. */
6991 total_size = d + tregs_space;
6992 total_size += rounded_frame_size (total_size);
6993 save_size = total_size - rounded_frame_size (d);
6994 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
6995 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6996 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
6997
6998 /* If adjusting the stack in a single step costs nothing extra, do so.
6999 I.e. either if a single addi is enough, or we need a movi anyway,
7000 and we don't exceed the maximum offset range (the test for the
7001 latter is conservative for simplicity). */
7002 if (TARGET_SHMEDIA
7003 && (CONST_OK_FOR_I10 (-total_size)
7004 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
7005 && total_size <= 2044)))
7006 d_rounding = total_size - save_size;
7007
7008 offset_base = d + d_rounding;
7009
7010 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
7011 0, NULL, true);
7012 stack_usage += save_size + d_rounding;
7013
7014 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
7015 tmp_pnt = schedule.temps;
7016 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7017 {
7018 enum machine_mode mode = (enum machine_mode) entry->mode;
7019 unsigned int reg = entry->reg;
7020 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
7021 rtx orig_reg_rtx;
7022
7023 offset = entry->offset;
7024
7025 reg_rtx = gen_rtx_REG (mode, reg);
7026
7027 mem_rtx = gen_frame_mem (mode,
7028 gen_rtx_PLUS (Pmode,
7029 stack_pointer_rtx,
7030 GEN_INT (offset)));
7031
7032 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7033 {
7034 gcc_assert (r0);
7035 mem_rtx = NULL_RTX;
7036 }
7037
7038 if (HAVE_PRE_DECREMENT
7039 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
7040 || mem_rtx == NULL_RTX
7041 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7042 {
7043 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
7044
7045 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
7046 pre_dec = NULL_RTX;
7047 else
7048 {
7049 mem_rtx = NULL_RTX;
7050 offset += GET_MODE_SIZE (mode);
7051 }
7052 }
7053
7054 if (mem_rtx != NULL_RTX)
7055 goto addr_ok;
7056
7057 if (offset_in_r0 == -1)
7058 {
7059 emit_move_insn (r0, GEN_INT (offset));
7060 offset_in_r0 = offset;
7061 }
7062 else if (offset != offset_in_r0)
7063 {
7064 emit_move_insn (r0,
7065 gen_rtx_PLUS
7066 (Pmode, r0,
7067 GEN_INT (offset - offset_in_r0)));
7068 offset_in_r0 += offset - offset_in_r0;
7069 }
7070
7071 if (pre_dec != NULL_RTX)
7072 {
7073 if (! sp_in_r0)
7074 {
7075 emit_move_insn (r0,
7076 gen_rtx_PLUS
7077 (Pmode, r0, stack_pointer_rtx));
7078 sp_in_r0 = 1;
7079 }
7080
7081 offset -= GET_MODE_SIZE (mode);
7082 offset_in_r0 -= GET_MODE_SIZE (mode);
7083
7084 mem_rtx = pre_dec;
7085 }
7086 else if (sp_in_r0)
7087 mem_rtx = gen_frame_mem (mode, r0);
7088 else
7089 mem_rtx = gen_frame_mem (mode,
7090 gen_rtx_PLUS (Pmode,
7091 stack_pointer_rtx,
7092 r0));
7093
7094 /* We must not use an r0-based address for target-branch
7095 registers or for special registers without pre-dec
7096 memory addresses, since we store their values in r0
7097 first. */
7098 gcc_assert (!TARGET_REGISTER_P (reg)
7099 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7100 || mem_rtx == pre_dec));
7101
7102 addr_ok:
7103 orig_reg_rtx = reg_rtx;
7104 if (TARGET_REGISTER_P (reg)
7105 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7106 && mem_rtx != pre_dec))
7107 {
7108 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
7109
7110 emit_move_insn (tmp_reg, reg_rtx);
7111
7112 if (REGNO (tmp_reg) == R0_REG)
7113 {
7114 offset_in_r0 = -1;
7115 sp_in_r0 = 0;
7116 gcc_assert (!refers_to_regno_p
7117 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
7118 }
7119
7120 if (*++tmp_pnt <= 0)
7121 tmp_pnt = schedule.temps;
7122
7123 reg_rtx = tmp_reg;
7124 }
7125 {
7126 rtx insn;
7127
7128 /* Mark as interesting for dwarf cfi generator */
7129 insn = emit_move_insn (mem_rtx, reg_rtx);
7130 RTX_FRAME_RELATED_P (insn) = 1;
7131 /* If we use an intermediate register for the save, we can't
7132 describe this exactly in cfi as a copy of the to-be-saved
7133 register into the temporary register and then the temporary
7134 register on the stack, because the temporary register can
7135 have a different natural size than the to-be-saved register.
7136 Thus, we gloss over the intermediate copy and pretend we do
7137 a direct save from the to-be-saved register. */
7138 if (REGNO (reg_rtx) != reg)
7139 {
7140 rtx set;
7141
7142 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
7143 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7144 }
7145
7146 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
7147 {
7148 rtx reg_rtx = gen_rtx_REG (mode, reg);
7149 rtx set;
7150 rtx mem_rtx = gen_frame_mem (mode,
7151 gen_rtx_PLUS (Pmode,
7152 stack_pointer_rtx,
7153 GEN_INT (offset)));
7154
7155 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
7156 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7157 }
7158 }
7159 }
7160
7161 gcc_assert (entry->offset == d_rounding);
7162 }
7163 else
7164 {
7165 push_regs (&live_regs_mask, current_function_interrupt);
7166 stack_usage += d;
7167 }
7168
7169 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7170 emit_insn (gen_GOTaddr2picreg ());
7171
7172 if (SHMEDIA_REGS_STACK_ADJUST ())
7173 {
7174 /* This must NOT go through the PLT, otherwise mach and macl
7175 may be clobbered. */
7176 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7177 (TARGET_FPU_ANY
7178 ? "__GCC_push_shmedia_regs"
7179 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7180 emit_insn (gen_shmedia_save_restore_regs_compact
7181 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7182 }
7183
7184 if (target_flags != save_flags && ! current_function_interrupt)
7185 emit_insn (gen_toggle_sz ());
7186
7187 target_flags = save_flags;
7188
7189 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7190 stack_pointer_rtx, 0, NULL, true);
7191 stack_usage += rounded_frame_size (d) - d_rounding;
7192
7193 if (frame_pointer_needed)
7194 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7195
7196 if (TARGET_SHCOMPACT
7197 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7198 {
7199 /* This must NOT go through the PLT, otherwise mach and macl
7200 may be clobbered. */
7201 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7202 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7203 emit_insn (gen_shcompact_incoming_args ());
7204 }
7205
7206 if (flag_stack_usage)
7207 current_function_static_stack_size = stack_usage;
7208 }
7209
7210 void
7211 sh_expand_epilogue (bool sibcall_p)
7212 {
7213 HARD_REG_SET live_regs_mask;
7214 int d, i;
7215 int d_rounding = 0;
7216
7217 int save_flags = target_flags;
7218 int frame_size, save_size;
7219 int fpscr_deferred = 0;
7220 int e = sibcall_p ? -1 : 1;
7221
7222 d = calc_live_regs (&live_regs_mask);
7223
7224 save_size = d;
7225 frame_size = rounded_frame_size (d);
7226
7227 if (TARGET_SH5)
7228 {
7229 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7230 int total_size;
7231 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
7232 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7233 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
7234
7235 total_size = d + tregs_space;
7236 total_size += rounded_frame_size (total_size);
7237 save_size = total_size - frame_size;
7238
7239 /* If adjusting the stack in a single step costs nothing extra, do so.
7240 I.e. either if a single addi is enough, or we need a movi anyway,
7241 and we don't exceed the maximum offset range (the test for the
7242 latter is conservative for simplicity). */
7243 if (TARGET_SHMEDIA
7244 && ! frame_pointer_needed
7245 && (CONST_OK_FOR_I10 (total_size)
7246 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
7247 && total_size <= 2044)))
7248 d_rounding = frame_size;
7249
7250 frame_size -= d_rounding;
7251 }
7252
7253 if (frame_pointer_needed)
7254 {
7255 /* We must avoid scheduling the epilogue with previous basic blocks.
7256 See PR/18032 and PR/40313. */
7257 emit_insn (gen_blockage ());
7258 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7259 &live_regs_mask, false);
7260
7261 /* We must avoid moving the stack pointer adjustment past code
7262 which reads from the local frame, else an interrupt could
7263 occur after the SP adjustment and clobber data in the local
7264 frame. */
7265 emit_insn (gen_blockage ());
7266 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7267 }
7268 else if (frame_size)
7269 {
7270 /* We must avoid moving the stack pointer adjustment past code
7271 which reads from the local frame, else an interrupt could
7272 occur after the SP adjustment and clobber data in the local
7273 frame. */
7274 emit_insn (gen_blockage ());
7275 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7276 &live_regs_mask, false);
7277 }
7278
7279 if (SHMEDIA_REGS_STACK_ADJUST ())
7280 {
7281 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7282 (TARGET_FPU_ANY
7283 ? "__GCC_pop_shmedia_regs"
7284 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
7285 /* This must NOT go through the PLT, otherwise mach and macl
7286 may be clobbered. */
7287 emit_insn (gen_shmedia_save_restore_regs_compact
7288 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
7289 }
7290
7291 /* Pop all the registers. */
7292
7293 if (target_flags != save_flags && ! current_function_interrupt)
7294 emit_insn (gen_toggle_sz ());
7295 if (TARGET_SH5)
7296 {
7297 int offset_base, offset;
7298 int offset_in_r0 = -1;
7299 int sp_in_r0 = 0;
7300 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
7301 save_schedule schedule;
7302 save_entry *entry;
7303 int *tmp_pnt;
7304
7305 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
7306 offset_base = -entry[1].offset + d_rounding;
7307 tmp_pnt = schedule.temps;
7308 for (; entry->mode != VOIDmode; entry--)
7309 {
7310 enum machine_mode mode = (enum machine_mode) entry->mode;
7311 int reg = entry->reg;
7312 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX;
7313
7314 offset = offset_base + entry->offset;
7315 reg_rtx = gen_rtx_REG (mode, reg);
7316
7317 mem_rtx = gen_frame_mem (mode,
7318 gen_rtx_PLUS (Pmode,
7319 stack_pointer_rtx,
7320 GEN_INT (offset)));
7321
7322 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7323 mem_rtx = NULL_RTX;
7324
7325 if (HAVE_POST_INCREMENT
7326 && (offset == offset_in_r0
7327 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
7328 && mem_rtx == NULL_RTX)
7329 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7330 {
7331 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
7332
7333 if (!memory_address_p (mode, XEXP (post_inc, 0)))
7334 post_inc = NULL_RTX;
7335 else
7336 mem_rtx = NULL_RTX;
7337 }
7338
7339 if (mem_rtx != NULL_RTX)
7340 goto addr_ok;
7341
7342 if (offset_in_r0 == -1)
7343 {
7344 emit_move_insn (r0, GEN_INT (offset));
7345 offset_in_r0 = offset;
7346 }
7347 else if (offset != offset_in_r0)
7348 {
7349 emit_move_insn (r0,
7350 gen_rtx_PLUS
7351 (Pmode, r0,
7352 GEN_INT (offset - offset_in_r0)));
7353 offset_in_r0 += offset - offset_in_r0;
7354 }
7355
7356 if (post_inc != NULL_RTX)
7357 {
7358 if (! sp_in_r0)
7359 {
7360 emit_move_insn (r0,
7361 gen_rtx_PLUS
7362 (Pmode, r0, stack_pointer_rtx));
7363 sp_in_r0 = 1;
7364 }
7365
7366 mem_rtx = post_inc;
7367
7368 offset_in_r0 += GET_MODE_SIZE (mode);
7369 }
7370 else if (sp_in_r0)
7371 mem_rtx = gen_frame_mem (mode, r0);
7372 else
7373 mem_rtx = gen_frame_mem (mode,
7374 gen_rtx_PLUS (Pmode,
7375 stack_pointer_rtx,
7376 r0));
7377
7378 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7379 || mem_rtx == post_inc);
7380
7381 addr_ok:
7382 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7383 && mem_rtx != post_inc)
7384 {
7385 emit_move_insn (r0, mem_rtx);
7386 mem_rtx = r0;
7387 }
7388 else if (TARGET_REGISTER_P (reg))
7389 {
7390 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
7391
7392 /* Give the scheduler a bit of freedom by using up to
7393 MAX_TEMPS registers in a round-robin fashion. */
7394 emit_move_insn (tmp_reg, mem_rtx);
7395 mem_rtx = tmp_reg;
7396 if (*++tmp_pnt < 0)
7397 tmp_pnt = schedule.temps;
7398 }
7399
7400 emit_move_insn (reg_rtx, mem_rtx);
7401 }
7402
7403 gcc_assert (entry->offset + offset_base == d + d_rounding);
7404 }
7405 else /* ! TARGET_SH5 */
7406 {
7407 int last_reg;
7408
7409 save_size = 0;
7410 /* For an ISR with RESBANK attribute assigned, don't pop PR
7411 register. */
7412 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
7413 && !sh_cfun_resbank_handler_p ())
7414 {
7415 if (!frame_pointer_needed)
7416 emit_insn (gen_blockage ());
7417 pop (PR_REG);
7418 }
7419
7420 /* Banked registers are popped first to avoid being scheduled in the
7421 delay slot. RTE switches banks before the ds instruction. */
7422 if (current_function_interrupt)
7423 {
7424 bool use_movml = false;
7425
7426 if (TARGET_SH2A)
7427 {
7428 unsigned int count = 0;
7429
7430 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7431 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7432 count++;
7433 else
7434 break;
7435
7436 /* Use movml when all banked register are poped. */
7437 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7438 use_movml = true;
7439 }
7440
7441 if (use_movml)
7442 {
7443 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7444
7445 /* We must avoid scheduling multiple load insn with another
7446 insns. */
7447 emit_insn (gen_blockage ());
7448 emit_insn (gen_movml_pop_banked (sp_reg));
7449 emit_insn (gen_blockage ());
7450 }
7451 else
7452 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
7453 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7454 pop (i);
7455
7456 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
7457 }
7458 else
7459 last_reg = FIRST_PSEUDO_REGISTER;
7460
7461 for (i = 0; i < last_reg; i++)
7462 {
7463 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
7464
7465 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
7466 && hard_reg_set_intersect_p (live_regs_mask,
7467 reg_class_contents[DF_REGS]))
7468 fpscr_deferred = 1;
7469 /* For an ISR with RESBANK attribute assigned, don't pop
7470 following registers, R0-R14, MACH, MACL and GBR. */
7471 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
7472 && ! (sh_cfun_resbank_handler_p ()
7473 && ((j >= FIRST_GENERAL_REG
7474 && j < LAST_GENERAL_REG)
7475 || j == MACH_REG
7476 || j == MACL_REG
7477 || j == GBR_REG)))
7478 pop (j);
7479
7480 if (j == FIRST_FP_REG && fpscr_deferred)
7481 pop (FPSCR_REG);
7482 }
7483 }
7484 if (target_flags != save_flags && ! current_function_interrupt)
7485 emit_insn (gen_toggle_sz ());
7486 target_flags = save_flags;
7487
7488 output_stack_adjust (crtl->args.pretend_args_size
7489 + save_size + d_rounding
7490 + crtl->args.info.stack_regs * 8,
7491 stack_pointer_rtx, e, NULL, false);
7492
7493 if (crtl->calls_eh_return)
7494 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
7495 EH_RETURN_STACKADJ_RTX));
7496
7497 /* Switch back to the normal stack if necessary. */
7498 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
7499 emit_insn (gen_sp_switch_2 ());
7500
7501 /* Tell flow the insn that pops PR isn't dead. */
7502 /* PR_REG will never be live in SHmedia mode, and we don't need to
7503 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
7504 by the return pattern. */
7505 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7506 emit_use (gen_rtx_REG (SImode, PR_REG));
7507 }
7508
7509 static int sh_need_epilogue_known = 0;
7510
7511 int
7512 sh_need_epilogue (void)
7513 {
7514 if (! sh_need_epilogue_known)
7515 {
7516 rtx epilogue;
7517
7518 start_sequence ();
7519 sh_expand_epilogue (0);
7520 epilogue = get_insns ();
7521 end_sequence ();
7522 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
7523 }
7524 return sh_need_epilogue_known > 0;
7525 }
7526
7527 /* Emit code to change the current function's return address to RA.
7528 TEMP is available as a scratch register, if needed. */
7529
7530 void
7531 sh_set_return_address (rtx ra, rtx tmp)
7532 {
7533 HARD_REG_SET live_regs_mask;
7534 int d;
7535 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7536 int pr_offset;
7537
7538 d = calc_live_regs (&live_regs_mask);
7539
7540 /* If pr_reg isn't life, we can set it (or the register given in
7541 sh_media_register_for_return) directly. */
7542 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7543 {
7544 rtx rr;
7545
7546 if (TARGET_SHMEDIA)
7547 {
7548 int rr_regno = sh_media_register_for_return ();
7549
7550 if (rr_regno < 0)
7551 rr_regno = pr_reg;
7552
7553 rr = gen_rtx_REG (DImode, rr_regno);
7554 }
7555 else
7556 rr = gen_rtx_REG (SImode, pr_reg);
7557
7558 emit_insn (GEN_MOV (rr, ra));
7559 /* Tell flow the register for return isn't dead. */
7560 emit_use (rr);
7561 return;
7562 }
7563
7564 if (TARGET_SH5)
7565 {
7566 int offset;
7567 save_schedule schedule;
7568 save_entry *entry;
7569
7570 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
7571 offset = entry[1].offset;
7572 for (; entry->mode != VOIDmode; entry--)
7573 if (entry->reg == pr_reg)
7574 goto found;
7575
7576 /* We can't find pr register. */
7577 gcc_unreachable ();
7578
7579 found:
7580 offset = entry->offset - offset;
7581 pr_offset = (rounded_frame_size (d) + offset
7582 + SHMEDIA_REGS_STACK_ADJUST ());
7583 }
7584 else
7585 pr_offset = rounded_frame_size (d);
7586
7587 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
7588
7589 if (frame_pointer_needed)
7590 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
7591 else
7592 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
7593
7594 tmp = gen_frame_mem (Pmode, tmp);
7595 emit_insn (GEN_MOV (tmp, ra));
7596 /* Tell this store isn't dead. */
7597 emit_use (tmp);
7598 }
7599
7600 /* Clear variables at function end. */
7601
7602 static void
7603 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
7604 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
7605 {
7606 sh_need_epilogue_known = 0;
7607 }
7608
7609 static rtx
7610 sh_builtin_saveregs (void)
7611 {
7612 /* First unnamed integer register. */
7613 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
7614 /* Number of integer registers we need to save. */
7615 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
7616 /* First unnamed SFmode float reg */
7617 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
7618 /* Number of SFmode float regs to save. */
7619 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
7620 rtx regbuf, fpregs;
7621 int bufsize, regno;
7622 alias_set_type alias_set;
7623
7624 if (TARGET_SH5)
7625 {
7626 if (n_intregs)
7627 {
7628 int pushregs = n_intregs;
7629
7630 while (pushregs < NPARM_REGS (SImode) - 1
7631 && (CALL_COOKIE_INT_REG_GET
7632 (crtl->args.info.call_cookie,
7633 NPARM_REGS (SImode) - pushregs)
7634 == 1))
7635 {
7636 crtl->args.info.call_cookie
7637 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7638 - pushregs, 1);
7639 pushregs++;
7640 }
7641
7642 if (pushregs == NPARM_REGS (SImode))
7643 crtl->args.info.call_cookie
7644 |= (CALL_COOKIE_INT_REG (0, 1)
7645 | CALL_COOKIE_STACKSEQ (pushregs - 1));
7646 else
7647 crtl->args.info.call_cookie
7648 |= CALL_COOKIE_STACKSEQ (pushregs);
7649
7650 crtl->args.pretend_args_size += 8 * n_intregs;
7651 }
7652 if (TARGET_SHCOMPACT)
7653 return const0_rtx;
7654 }
7655
7656 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
7657 {
7658 error ("__builtin_saveregs not supported by this subtarget");
7659 return const0_rtx;
7660 }
7661
7662 if (TARGET_SHMEDIA)
7663 n_floatregs = 0;
7664
7665 /* Allocate block of memory for the regs. */
7666 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
7667 Or can assign_stack_local accept a 0 SIZE argument? */
7668 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
7669
7670 if (TARGET_SHMEDIA)
7671 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
7672 else if (n_floatregs & 1)
7673 {
7674 rtx addr;
7675
7676 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7677 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
7678 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
7679 regbuf = change_address (regbuf, BLKmode, addr);
7680 }
7681 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
7682 {
7683 rtx addr, mask;
7684
7685 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7686 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
7687 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
7688 emit_insn (gen_andsi3 (addr, addr, mask));
7689 regbuf = change_address (regbuf, BLKmode, addr);
7690 }
7691 else
7692 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7693 alias_set = get_varargs_alias_set ();
7694 set_mem_alias_set (regbuf, alias_set);
7695
7696 /* Save int args.
7697 This is optimized to only save the regs that are necessary. Explicitly
7698 named args need not be saved. */
7699 if (n_intregs > 0)
7700 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7701 adjust_address (regbuf, BLKmode,
7702 n_floatregs * UNITS_PER_WORD),
7703 n_intregs);
7704
7705 if (TARGET_SHMEDIA)
7706 /* Return the address of the regbuf. */
7707 return XEXP (regbuf, 0);
7708
7709 /* Save float args.
7710 This is optimized to only save the regs that are necessary. Explicitly
7711 named args need not be saved.
7712 We explicitly build a pointer to the buffer because it halves the insn
7713 count when not optimizing (otherwise the pointer is built for each reg
7714 saved).
7715 We emit the moves in reverse order so that we can use predecrement. */
7716
7717 fpregs = copy_to_mode_reg (Pmode,
7718 plus_constant (XEXP (regbuf, 0),
7719 n_floatregs * UNITS_PER_WORD));
7720 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7721 {
7722 rtx mem;
7723 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7724 {
7725 emit_insn (gen_addsi3 (fpregs, fpregs,
7726 GEN_INT (-2 * UNITS_PER_WORD)));
7727 mem = change_address (regbuf, DFmode, fpregs);
7728 emit_move_insn (mem,
7729 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7730 }
7731 regno = first_floatreg;
7732 if (regno & 1)
7733 {
7734 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7735 mem = change_address (regbuf, SFmode, fpregs);
7736 emit_move_insn (mem,
7737 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
7738 - (TARGET_LITTLE_ENDIAN != 0)));
7739 }
7740 }
7741 else
7742 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7743 {
7744 rtx mem;
7745
7746 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7747 mem = change_address (regbuf, SFmode, fpregs);
7748 emit_move_insn (mem,
7749 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7750 }
7751
7752 /* Return the address of the regbuf. */
7753 return XEXP (regbuf, 0);
7754 }
7755
7756 /* Define the `__builtin_va_list' type for the ABI. */
7757
7758 static tree
7759 sh_build_builtin_va_list (void)
7760 {
7761 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7762 tree record, type_decl;
7763
7764 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
7765 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7766 return ptr_type_node;
7767
7768 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7769 type_decl = build_decl (BUILTINS_LOCATION,
7770 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7771
7772 f_next_o = build_decl (BUILTINS_LOCATION,
7773 FIELD_DECL, get_identifier ("__va_next_o"),
7774 ptr_type_node);
7775 f_next_o_limit = build_decl (BUILTINS_LOCATION,
7776 FIELD_DECL,
7777 get_identifier ("__va_next_o_limit"),
7778 ptr_type_node);
7779 f_next_fp = build_decl (BUILTINS_LOCATION,
7780 FIELD_DECL, get_identifier ("__va_next_fp"),
7781 ptr_type_node);
7782 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
7783 FIELD_DECL,
7784 get_identifier ("__va_next_fp_limit"),
7785 ptr_type_node);
7786 f_next_stack = build_decl (BUILTINS_LOCATION,
7787 FIELD_DECL, get_identifier ("__va_next_stack"),
7788 ptr_type_node);
7789
7790 DECL_FIELD_CONTEXT (f_next_o) = record;
7791 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7792 DECL_FIELD_CONTEXT (f_next_fp) = record;
7793 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7794 DECL_FIELD_CONTEXT (f_next_stack) = record;
7795
7796 TREE_CHAIN (record) = type_decl;
7797 TYPE_NAME (record) = type_decl;
7798 TYPE_FIELDS (record) = f_next_o;
7799 DECL_CHAIN (f_next_o) = f_next_o_limit;
7800 DECL_CHAIN (f_next_o_limit) = f_next_fp;
7801 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
7802 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
7803
7804 layout_type (record);
7805
7806 return record;
7807 }
7808
7809 /* Implement `va_start' for varargs and stdarg. */
7810
7811 static void
7812 sh_va_start (tree valist, rtx nextarg)
7813 {
7814 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7815 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7816 tree t, u;
7817 int nfp, nint;
7818
7819 if (TARGET_SH5)
7820 {
7821 expand_builtin_saveregs ();
7822 std_expand_builtin_va_start (valist, nextarg);
7823 return;
7824 }
7825
7826 if ((! TARGET_SH2E && ! TARGET_SH4)
7827 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7828 {
7829 std_expand_builtin_va_start (valist, nextarg);
7830 return;
7831 }
7832
7833 f_next_o = TYPE_FIELDS (va_list_type_node);
7834 f_next_o_limit = DECL_CHAIN (f_next_o);
7835 f_next_fp = DECL_CHAIN (f_next_o_limit);
7836 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7837 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7838
7839 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7840 NULL_TREE);
7841 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7842 valist, f_next_o_limit, NULL_TREE);
7843 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7844 NULL_TREE);
7845 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7846 valist, f_next_fp_limit, NULL_TREE);
7847 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7848 valist, f_next_stack, NULL_TREE);
7849
7850 /* Call __builtin_saveregs. */
7851 u = make_tree (sizetype, expand_builtin_saveregs ());
7852 u = fold_convert (ptr_type_node, u);
7853 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
7854 TREE_SIDE_EFFECTS (t) = 1;
7855 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7856
7857 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7858 if (nfp < 8)
7859 nfp = 8 - nfp;
7860 else
7861 nfp = 0;
7862 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7863 size_int (UNITS_PER_WORD * nfp));
7864 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
7865 TREE_SIDE_EFFECTS (t) = 1;
7866 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7867
7868 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
7869 TREE_SIDE_EFFECTS (t) = 1;
7870 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7871
7872 nint = crtl->args.info.arg_count[SH_ARG_INT];
7873 if (nint < 4)
7874 nint = 4 - nint;
7875 else
7876 nint = 0;
7877 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7878 size_int (UNITS_PER_WORD * nint));
7879 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
7880 TREE_SIDE_EFFECTS (t) = 1;
7881 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7882
7883 u = make_tree (ptr_type_node, nextarg);
7884 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
7885 TREE_SIDE_EFFECTS (t) = 1;
7886 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7887 }
7888
7889 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7890 member, return it. */
7891 static tree
7892 find_sole_member (tree type)
7893 {
7894 tree field, member = NULL_TREE;
7895
7896 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7897 {
7898 if (TREE_CODE (field) != FIELD_DECL)
7899 continue;
7900 if (!DECL_SIZE (field))
7901 return NULL_TREE;
7902 if (integer_zerop (DECL_SIZE (field)))
7903 continue;
7904 if (member)
7905 return NULL_TREE;
7906 member = field;
7907 }
7908 return member;
7909 }
7910 /* Implement `va_arg'. */
7911
7912 static tree
7913 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7914 gimple_seq *post_p ATTRIBUTE_UNUSED)
7915 {
7916 HOST_WIDE_INT size, rsize;
7917 tree tmp, pptr_type_node;
7918 tree addr, lab_over = NULL, result = NULL;
7919 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7920 tree eff_type;
7921
7922 if (pass_by_ref)
7923 type = build_pointer_type (type);
7924
7925 size = int_size_in_bytes (type);
7926 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7927 pptr_type_node = build_pointer_type (ptr_type_node);
7928
7929 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
7930 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7931 {
7932 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7933 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7934 int pass_as_float;
7935 tree lab_false;
7936 tree member;
7937
7938 f_next_o = TYPE_FIELDS (va_list_type_node);
7939 f_next_o_limit = DECL_CHAIN (f_next_o);
7940 f_next_fp = DECL_CHAIN (f_next_o_limit);
7941 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7942 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7943
7944 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7945 NULL_TREE);
7946 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7947 valist, f_next_o_limit, NULL_TREE);
7948 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7949 valist, f_next_fp, NULL_TREE);
7950 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7951 valist, f_next_fp_limit, NULL_TREE);
7952 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7953 valist, f_next_stack, NULL_TREE);
7954
7955 /* Structures with a single member with a distinct mode are passed
7956 like their member. This is relevant if the latter has a REAL_TYPE
7957 or COMPLEX_TYPE type. */
7958 eff_type = type;
7959 while (TREE_CODE (eff_type) == RECORD_TYPE
7960 && (member = find_sole_member (eff_type))
7961 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7962 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7963 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7964 {
7965 tree field_type = TREE_TYPE (member);
7966
7967 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7968 eff_type = field_type;
7969 else
7970 {
7971 gcc_assert ((TYPE_ALIGN (eff_type)
7972 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7973 || (TYPE_ALIGN (eff_type)
7974 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7975 break;
7976 }
7977 }
7978
7979 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7980 {
7981 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7982 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7983 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7984 && size <= 16));
7985 }
7986 else
7987 {
7988 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7989 }
7990
7991 addr = create_tmp_var (pptr_type_node, NULL);
7992 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7993 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7994
7995 valist = build_simple_mem_ref (addr);
7996
7997 if (pass_as_float)
7998 {
7999 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
8000 tree cmp;
8001 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
8002
8003 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
8004 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8005
8006 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
8007 tmp = next_fp_limit;
8008 if (size > 4 && !is_double)
8009 tmp = build2 (POINTER_PLUS_EXPR, TREE_TYPE (tmp),
8010 unshare_expr (tmp), size_int (4 - size));
8011 tmp = build2 (GE_EXPR, boolean_type_node,
8012 unshare_expr (next_fp_tmp), unshare_expr (tmp));
8013 cmp = build3 (COND_EXPR, void_type_node, tmp,
8014 build1 (GOTO_EXPR, void_type_node,
8015 unshare_expr (lab_false)), NULL_TREE);
8016 if (!is_double)
8017 gimplify_and_add (cmp, pre_p);
8018
8019 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
8020 || (is_double || size == 16))
8021 {
8022 tmp = fold_convert (sizetype, next_fp_tmp);
8023 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
8024 size_int (UNITS_PER_WORD));
8025 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
8026 unshare_expr (next_fp_tmp), tmp);
8027 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
8028 }
8029 if (is_double)
8030 gimplify_and_add (cmp, pre_p);
8031
8032 #ifdef FUNCTION_ARG_SCmode_WART
8033 if (TYPE_MODE (eff_type) == SCmode
8034 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
8035 {
8036 tree subtype = TREE_TYPE (eff_type);
8037 tree real, imag;
8038
8039 imag
8040 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8041 imag = get_initialized_tmp_var (imag, pre_p, NULL);
8042
8043 real
8044 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8045 real = get_initialized_tmp_var (real, pre_p, NULL);
8046
8047 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
8048 if (type != eff_type)
8049 result = build1 (VIEW_CONVERT_EXPR, type, result);
8050 result = get_initialized_tmp_var (result, pre_p, NULL);
8051 }
8052 #endif /* FUNCTION_ARG_SCmode_WART */
8053
8054 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8055 gimplify_and_add (tmp, pre_p);
8056
8057 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8058 gimplify_and_add (tmp, pre_p);
8059
8060 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8061 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8062 gimplify_assign (unshare_expr (next_fp_tmp),
8063 unshare_expr (valist), pre_p);
8064
8065 gimplify_assign (unshare_expr (valist),
8066 unshare_expr (next_fp_tmp), post_p);
8067 valist = next_fp_tmp;
8068 }
8069 else
8070 {
8071 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
8072 unshare_expr (next_o), size_int (rsize));
8073 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
8074 unshare_expr (next_o_limit));
8075 tmp = build3 (COND_EXPR, void_type_node, tmp,
8076 build1 (GOTO_EXPR, void_type_node,
8077 unshare_expr (lab_false)),
8078 NULL_TREE);
8079 gimplify_and_add (tmp, pre_p);
8080
8081 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
8082 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8083
8084 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8085 gimplify_and_add (tmp, pre_p);
8086
8087 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8088 gimplify_and_add (tmp, pre_p);
8089
8090 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
8091 gimplify_assign (unshare_expr (next_o),
8092 unshare_expr (next_o_limit), pre_p);
8093
8094 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8095 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8096 }
8097
8098 if (!result)
8099 {
8100 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8101 gimplify_and_add (tmp, pre_p);
8102 }
8103 }
8104
8105 /* ??? In va-sh.h, there had been code to make values larger than
8106 size 8 indirect. This does not match the FUNCTION_ARG macros. */
8107
8108 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
8109 if (result)
8110 {
8111 gimplify_assign (result, tmp, pre_p);
8112 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
8113 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8114 gimplify_and_add (tmp, pre_p);
8115 }
8116 else
8117 result = tmp;
8118
8119 if (pass_by_ref)
8120 result = build_va_arg_indirect_ref (result);
8121
8122 return result;
8123 }
8124
8125 /* 64 bit floating points memory transfers are paired single precision loads
8126 or store. So DWARF information needs fixing in little endian (unless
8127 PR=SZ=1 in FPSCR). */
8128 rtx
8129 sh_dwarf_register_span (rtx reg)
8130 {
8131 unsigned regno = REGNO (reg);
8132
8133 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
8134 return NULL_RTX;
8135
8136 return
8137 gen_rtx_PARALLEL (VOIDmode,
8138 gen_rtvec (2,
8139 gen_rtx_REG (SFmode,
8140 DBX_REGISTER_NUMBER (regno+1)),
8141 gen_rtx_REG (SFmode,
8142 DBX_REGISTER_NUMBER (regno))));
8143 }
8144
8145 static enum machine_mode
8146 sh_promote_function_mode (const_tree type, enum machine_mode mode,
8147 int *punsignedp, const_tree funtype,
8148 int for_return ATTRIBUTE_UNUSED)
8149 {
8150 if (sh_promote_prototypes (funtype))
8151 return promote_mode (type, mode, punsignedp);
8152 else
8153 return mode;
8154 }
8155
8156 static bool
8157 sh_promote_prototypes (const_tree type)
8158 {
8159 if (TARGET_HITACHI)
8160 return 0;
8161 if (! type)
8162 return 1;
8163 return ! sh_attr_renesas_p (type);
8164 }
8165
8166 /* Whether an argument must be passed by reference. On SHcompact, we
8167 pretend arguments wider than 32-bits that would have been passed in
8168 registers are passed by reference, so that an SHmedia trampoline
8169 loads them into the full 64-bits registers. */
8170
8171 static int
8172 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
8173 const_tree type, bool named)
8174 {
8175 unsigned HOST_WIDE_INT size;
8176
8177 if (type)
8178 size = int_size_in_bytes (type);
8179 else
8180 size = GET_MODE_SIZE (mode);
8181
8182 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
8183 && (!named
8184 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
8185 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
8186 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
8187 && size > 4
8188 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
8189 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8190 return size;
8191 else
8192 return 0;
8193 }
8194
8195 static bool
8196 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8197 const_tree type, bool named)
8198 {
8199 if (targetm.calls.must_pass_in_stack (mode, type))
8200 return true;
8201
8202 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
8203 wants to know about pass-by-reference semantics for incoming
8204 arguments. */
8205 if (! cum)
8206 return false;
8207
8208 if (TARGET_SHCOMPACT)
8209 {
8210 cum->byref = shcompact_byref (cum, mode, type, named);
8211 return cum->byref != 0;
8212 }
8213
8214 return false;
8215 }
8216
8217 static bool
8218 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8219 const_tree type, bool named ATTRIBUTE_UNUSED)
8220 {
8221 /* ??? How can it possibly be correct to return true only on the
8222 caller side of the equation? Is there someplace else in the
8223 sh backend that's magically producing the copies? */
8224 return (cum->outgoing
8225 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
8226 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
8227 }
8228
8229 static int
8230 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8231 tree type, bool named ATTRIBUTE_UNUSED)
8232 {
8233 int words = 0;
8234
8235 if (!TARGET_SH5
8236 && PASS_IN_REG_P (*cum, mode, type)
8237 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
8238 && (ROUND_REG (*cum, mode)
8239 + (mode != BLKmode
8240 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
8241 : ROUND_ADVANCE (int_size_in_bytes (type)))
8242 > NPARM_REGS (mode)))
8243 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
8244
8245 else if (!TARGET_SHCOMPACT
8246 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8247 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
8248
8249 return words * UNITS_PER_WORD;
8250 }
8251
8252
8253 /* Define where to put the arguments to a function.
8254 Value is zero to push the argument on the stack,
8255 or a hard register in which to store the argument.
8256
8257 MODE is the argument's machine mode.
8258 TYPE is the data type of the argument (as a tree).
8259 This is null for libcalls where that information may
8260 not be available.
8261 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8262 the preceding args and about the function being called.
8263 NAMED is nonzero if this argument is a named parameter
8264 (otherwise it is an extra parameter matching an ellipsis).
8265
8266 On SH the first args are normally in registers
8267 and the rest are pushed. Any arg that starts within the first
8268 NPARM_REGS words is at least partially passed in a register unless
8269 its data type forbids. */
8270
8271 static rtx
8272 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8273 const_tree type, bool named)
8274 {
8275 if (! TARGET_SH5 && mode == VOIDmode)
8276 return GEN_INT (ca->renesas_abi ? 1 : 0);
8277
8278 if (! TARGET_SH5
8279 && PASS_IN_REG_P (*ca, mode, type)
8280 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8281 {
8282 int regno;
8283
8284 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8285 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
8286 {
8287 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8288 gen_rtx_REG (SFmode,
8289 BASE_ARG_REG (mode)
8290 + (ROUND_REG (*ca, mode) ^ 1)),
8291 const0_rtx);
8292 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8293 gen_rtx_REG (SFmode,
8294 BASE_ARG_REG (mode)
8295 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
8296 GEN_INT (4));
8297 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8298 }
8299
8300 /* If the alignment of a DF value causes an SF register to be
8301 skipped, we will use that skipped register for the next SF
8302 value. */
8303 if ((TARGET_HITACHI || ca->renesas_abi)
8304 && ca->free_single_fp_reg
8305 && mode == SFmode)
8306 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8307
8308 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
8309 ^ (mode == SFmode && TARGET_SH4
8310 && TARGET_LITTLE_ENDIAN != 0
8311 && ! TARGET_HITACHI && ! ca->renesas_abi);
8312 return gen_rtx_REG (mode, regno);
8313
8314 }
8315
8316 if (TARGET_SH5)
8317 {
8318 if (mode == VOIDmode && TARGET_SHCOMPACT)
8319 return GEN_INT (ca->call_cookie);
8320
8321 /* The following test assumes unnamed arguments are promoted to
8322 DFmode. */
8323 if (mode == SFmode && ca->free_single_fp_reg)
8324 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
8325
8326 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
8327 && (named || ! ca->prototype_p)
8328 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
8329 {
8330 if (! ca->prototype_p && TARGET_SHMEDIA)
8331 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
8332
8333 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
8334 FIRST_FP_PARM_REG
8335 + ca->arg_count[(int) SH_ARG_FLOAT]);
8336 }
8337
8338 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
8339 && (! TARGET_SHCOMPACT
8340 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
8341 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
8342 type, named))))
8343 {
8344 return gen_rtx_REG (mode, (FIRST_PARM_REG
8345 + ca->arg_count[(int) SH_ARG_INT]));
8346 }
8347
8348 return 0;
8349 }
8350
8351 return 0;
8352 }
8353
8354 /* Update the data in CUM to advance over an argument
8355 of mode MODE and data type TYPE.
8356 (TYPE is null for libcalls where that information may not be
8357 available.) */
8358
8359 static void
8360 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8361 const_tree type, bool named)
8362 {
8363 if (ca->force_mem)
8364 ca->force_mem = 0;
8365 else if (TARGET_SH5)
8366 {
8367 const_tree type2 = (ca->byref && type
8368 ? TREE_TYPE (type)
8369 : type);
8370 enum machine_mode mode2 = (ca->byref && type
8371 ? TYPE_MODE (type2)
8372 : mode);
8373 int dwords = ((ca->byref
8374 ? ca->byref
8375 : mode2 == BLKmode
8376 ? int_size_in_bytes (type2)
8377 : GET_MODE_SIZE (mode2)) + 7) / 8;
8378 int numregs = MIN (dwords, NPARM_REGS (SImode)
8379 - ca->arg_count[(int) SH_ARG_INT]);
8380
8381 if (numregs)
8382 {
8383 ca->arg_count[(int) SH_ARG_INT] += numregs;
8384 if (TARGET_SHCOMPACT
8385 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
8386 {
8387 ca->call_cookie
8388 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8389 - numregs, 1);
8390 /* N.B. We want this also for outgoing. */
8391 ca->stack_regs += numregs;
8392 }
8393 else if (ca->byref)
8394 {
8395 if (! ca->outgoing)
8396 ca->stack_regs += numregs;
8397 ca->byref_regs += numregs;
8398 ca->byref = 0;
8399 do
8400 ca->call_cookie
8401 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8402 - numregs, 2);
8403 while (--numregs);
8404 ca->call_cookie
8405 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8406 - 1, 1);
8407 }
8408 else if (dwords > numregs)
8409 {
8410 int pushregs = numregs;
8411
8412 if (TARGET_SHCOMPACT)
8413 ca->stack_regs += numregs;
8414 while (pushregs < NPARM_REGS (SImode) - 1
8415 && (CALL_COOKIE_INT_REG_GET
8416 (ca->call_cookie,
8417 NPARM_REGS (SImode) - pushregs)
8418 == 1))
8419 {
8420 ca->call_cookie
8421 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8422 - pushregs, 1);
8423 pushregs++;
8424 }
8425 if (numregs == NPARM_REGS (SImode))
8426 ca->call_cookie
8427 |= CALL_COOKIE_INT_REG (0, 1)
8428 | CALL_COOKIE_STACKSEQ (numregs - 1);
8429 else
8430 ca->call_cookie
8431 |= CALL_COOKIE_STACKSEQ (numregs);
8432 }
8433 }
8434 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
8435 && (named || ! ca->prototype_p))
8436 {
8437 if (mode2 == SFmode && ca->free_single_fp_reg)
8438 ca->free_single_fp_reg = 0;
8439 else if (ca->arg_count[(int) SH_ARG_FLOAT]
8440 < NPARM_REGS (SFmode))
8441 {
8442 int numfpregs
8443 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
8444 NPARM_REGS (SFmode)
8445 - ca->arg_count[(int) SH_ARG_FLOAT]);
8446
8447 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
8448
8449 if (TARGET_SHCOMPACT && ! ca->prototype_p)
8450 {
8451 if (ca->outgoing && numregs > 0)
8452 do
8453 {
8454 ca->call_cookie
8455 |= (CALL_COOKIE_INT_REG
8456 (ca->arg_count[(int) SH_ARG_INT]
8457 - numregs + ((numfpregs - 2) / 2),
8458 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
8459 - numfpregs) / 2));
8460 }
8461 while (numfpregs -= 2);
8462 }
8463 else if (mode2 == SFmode && (named)
8464 && (ca->arg_count[(int) SH_ARG_FLOAT]
8465 < NPARM_REGS (SFmode)))
8466 ca->free_single_fp_reg
8467 = FIRST_FP_PARM_REG - numfpregs
8468 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
8469 }
8470 }
8471 return;
8472 }
8473
8474 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
8475 {
8476 /* Note that we've used the skipped register. */
8477 if (mode == SFmode && ca->free_single_fp_reg)
8478 {
8479 ca->free_single_fp_reg = 0;
8480 return;
8481 }
8482 /* When we have a DF after an SF, there's an SF register that get
8483 skipped in order to align the DF value. We note this skipped
8484 register, because the next SF value will use it, and not the
8485 SF that follows the DF. */
8486 if (mode == DFmode
8487 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
8488 {
8489 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
8490 + BASE_ARG_REG (mode));
8491 }
8492 }
8493
8494 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
8495 || PASS_IN_REG_P (*ca, mode, type))
8496 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
8497 = (ROUND_REG (*ca, mode)
8498 + (mode == BLKmode
8499 ? ROUND_ADVANCE (int_size_in_bytes (type))
8500 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
8501 }
8502
8503 /* The Renesas calling convention doesn't quite fit into this scheme since
8504 the address is passed like an invisible argument, but one that is always
8505 passed in memory. */
8506 static rtx
8507 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
8508 {
8509 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8510 return 0;
8511 return gen_rtx_REG (Pmode, 2);
8512 }
8513
8514 /* Worker function for TARGET_FUNCTION_VALUE.
8515
8516 For the SH, this is like LIBCALL_VALUE, except that we must change the
8517 mode like PROMOTE_MODE does.
8518 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
8519 tested here has to be kept in sync with the one in explow.c:promote_mode.
8520 */
8521
8522 static rtx
8523 sh_function_value (const_tree valtype,
8524 const_tree fn_decl_or_type,
8525 bool outgoing ATTRIBUTE_UNUSED)
8526 {
8527 if (fn_decl_or_type
8528 && !DECL_P (fn_decl_or_type))
8529 fn_decl_or_type = NULL;
8530
8531 return gen_rtx_REG (
8532 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
8533 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
8534 && (TREE_CODE (valtype) == INTEGER_TYPE
8535 || TREE_CODE (valtype) == ENUMERAL_TYPE
8536 || TREE_CODE (valtype) == BOOLEAN_TYPE
8537 || TREE_CODE (valtype) == REAL_TYPE
8538 || TREE_CODE (valtype) == OFFSET_TYPE))
8539 && sh_promote_prototypes (fn_decl_or_type)
8540 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
8541 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
8542 }
8543
8544 /* Worker function for TARGET_LIBCALL_VALUE. */
8545
8546 static rtx
8547 sh_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
8548 {
8549 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
8550 }
8551
8552 /* Return true if N is a possible register number of function value. */
8553
8554 static bool
8555 sh_function_value_regno_p (const unsigned int regno)
8556 {
8557 return ((regno) == FIRST_RET_REG
8558 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
8559 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
8560 }
8561
8562 /* Worker function for TARGET_RETURN_IN_MEMORY. */
8563
8564 static bool
8565 sh_return_in_memory (const_tree type, const_tree fndecl)
8566 {
8567 if (TARGET_SH5)
8568 {
8569 if (TYPE_MODE (type) == BLKmode)
8570 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
8571 else
8572 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
8573 }
8574 else
8575 {
8576 return (TYPE_MODE (type) == BLKmode
8577 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8578 && TREE_CODE (type) == RECORD_TYPE));
8579 }
8580 }
8581
8582 /* We actually emit the code in sh_expand_prologue. We used to use
8583 a static variable to flag that we need to emit this code, but that
8584 doesn't when inlining, when functions are deferred and then emitted
8585 later. Fortunately, we already have two flags that are part of struct
8586 function that tell if a function uses varargs or stdarg. */
8587 static void
8588 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
8589 enum machine_mode mode,
8590 tree type,
8591 int *pretend_arg_size,
8592 int second_time ATTRIBUTE_UNUSED)
8593 {
8594 gcc_assert (cfun->stdarg);
8595 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
8596 {
8597 int named_parm_regs, anon_parm_regs;
8598
8599 named_parm_regs = (ROUND_REG (*ca, mode)
8600 + (mode == BLKmode
8601 ? ROUND_ADVANCE (int_size_in_bytes (type))
8602 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
8603 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
8604 if (anon_parm_regs > 0)
8605 *pretend_arg_size = anon_parm_regs * 4;
8606 }
8607 }
8608
8609 static bool
8610 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
8611 {
8612 return TARGET_SH5;
8613 }
8614
8615 static bool
8616 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
8617 {
8618 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
8619 }
8620
8621
8622 /* Define the offset between two registers, one to be eliminated, and
8623 the other its replacement, at the start of a routine. */
8624
8625 int
8626 initial_elimination_offset (int from, int to)
8627 {
8628 int regs_saved;
8629 int regs_saved_rounding = 0;
8630 int total_saved_regs_space;
8631 int total_auto_space;
8632 int save_flags = target_flags;
8633 int copy_flags;
8634 HARD_REG_SET live_regs_mask;
8635
8636 shmedia_space_reserved_for_target_registers = false;
8637 regs_saved = calc_live_regs (&live_regs_mask);
8638 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
8639
8640 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
8641 {
8642 shmedia_space_reserved_for_target_registers = true;
8643 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
8644 }
8645
8646 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
8647 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
8648 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
8649
8650 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
8651 copy_flags = target_flags;
8652 target_flags = save_flags;
8653
8654 total_saved_regs_space = regs_saved + regs_saved_rounding;
8655
8656 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8657 return total_saved_regs_space + total_auto_space
8658 + crtl->args.info.byref_regs * 8;
8659
8660 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8661 return total_saved_regs_space + total_auto_space
8662 + crtl->args.info.byref_regs * 8;
8663
8664 /* Initial gap between fp and sp is 0. */
8665 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8666 return 0;
8667
8668 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8669 return rounded_frame_size (0);
8670
8671 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8672 return rounded_frame_size (0);
8673
8674 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
8675 && (to == HARD_FRAME_POINTER_REGNUM
8676 || to == STACK_POINTER_REGNUM));
8677 if (TARGET_SH5)
8678 {
8679 int n = total_saved_regs_space;
8680 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8681 save_schedule schedule;
8682 save_entry *entry;
8683
8684 n += total_auto_space;
8685
8686 /* If it wasn't saved, there's not much we can do. */
8687 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8688 return n;
8689
8690 target_flags = copy_flags;
8691
8692 sh5_schedule_saves (&live_regs_mask, &schedule, n);
8693 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
8694 if (entry->reg == pr_reg)
8695 {
8696 target_flags = save_flags;
8697 return entry->offset;
8698 }
8699 gcc_unreachable ();
8700 }
8701 else
8702 return total_auto_space;
8703 }
8704
8705 /* Parse the -mfixed-range= option string. */
8706 void
8707 sh_fix_range (const char *const_str)
8708 {
8709 int i, first, last;
8710 char *str, *dash, *comma;
8711
8712 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
8713 REG2 are either register names or register numbers. The effect
8714 of this option is to mark the registers in the range from REG1 to
8715 REG2 as ``fixed'' so they won't be used by the compiler. */
8716
8717 i = strlen (const_str);
8718 str = (char *) alloca (i + 1);
8719 memcpy (str, const_str, i + 1);
8720
8721 while (1)
8722 {
8723 dash = strchr (str, '-');
8724 if (!dash)
8725 {
8726 warning (0, "value of -mfixed-range must have form REG1-REG2");
8727 return;
8728 }
8729 *dash = '\0';
8730 comma = strchr (dash + 1, ',');
8731 if (comma)
8732 *comma = '\0';
8733
8734 first = decode_reg_name (str);
8735 if (first < 0)
8736 {
8737 warning (0, "unknown register name: %s", str);
8738 return;
8739 }
8740
8741 last = decode_reg_name (dash + 1);
8742 if (last < 0)
8743 {
8744 warning (0, "unknown register name: %s", dash + 1);
8745 return;
8746 }
8747
8748 *dash = '-';
8749
8750 if (first > last)
8751 {
8752 warning (0, "%s-%s is an empty range", str, dash + 1);
8753 return;
8754 }
8755
8756 for (i = first; i <= last; ++i)
8757 fixed_regs[i] = call_used_regs[i] = 1;
8758
8759 if (!comma)
8760 break;
8761
8762 *comma = ',';
8763 str = comma + 1;
8764 }
8765 }
8766 \f
8767 /* Insert any deferred function attributes from earlier pragmas. */
8768 static void
8769 sh_insert_attributes (tree node, tree *attributes)
8770 {
8771 tree attrs;
8772
8773 if (TREE_CODE (node) != FUNCTION_DECL)
8774 return;
8775
8776 /* We are only interested in fields. */
8777 if (!DECL_P (node))
8778 return;
8779
8780 /* Append the attributes to the deferred attributes. */
8781 *sh_deferred_function_attributes_tail = *attributes;
8782 attrs = sh_deferred_function_attributes;
8783 if (!attrs)
8784 return;
8785
8786 /* Some attributes imply or require the interrupt attribute. */
8787 if (!lookup_attribute ("interrupt_handler", attrs)
8788 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8789 {
8790 /* If we have a trapa_handler, but no interrupt_handler attribute,
8791 insert an interrupt_handler attribute. */
8792 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8793 /* We can't use sh_pr_interrupt here because that's not in the
8794 java frontend. */
8795 attrs
8796 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8797 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8798 if the interrupt attribute is missing, we ignore the attribute
8799 and warn. */
8800 else if (lookup_attribute ("sp_switch", attrs)
8801 || lookup_attribute ("trap_exit", attrs)
8802 || lookup_attribute ("nosave_low_regs", attrs)
8803 || lookup_attribute ("resbank", attrs))
8804 {
8805 tree *tail;
8806
8807 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8808 {
8809 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8810 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8811 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8812 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8813 warning (OPT_Wattributes,
8814 "%qE attribute only applies to interrupt functions",
8815 TREE_PURPOSE (attrs));
8816 else
8817 {
8818 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8819 NULL_TREE);
8820 tail = &TREE_CHAIN (*tail);
8821 }
8822 }
8823 attrs = *attributes;
8824 }
8825 }
8826
8827 /* Install the processed list. */
8828 *attributes = attrs;
8829
8830 /* Clear deferred attributes. */
8831 sh_deferred_function_attributes = NULL_TREE;
8832 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8833
8834 return;
8835 }
8836
8837 /* Supported attributes:
8838
8839 interrupt_handler -- specifies this function is an interrupt handler.
8840
8841 trapa_handler - like above, but don't save all registers.
8842
8843 sp_switch -- specifies an alternate stack for an interrupt handler
8844 to run on.
8845
8846 trap_exit -- use a trapa to exit an interrupt function instead of
8847 an rte instruction.
8848
8849 nosave_low_regs - don't save r0..r7 in an interrupt handler.
8850 This is useful on the SH3 and upwards,
8851 which has a separate set of low regs for User and Supervisor modes.
8852 This should only be used for the lowest level of interrupts. Higher levels
8853 of interrupts must save the registers in case they themselves are
8854 interrupted.
8855
8856 renesas -- use Renesas calling/layout conventions (functions and
8857 structures).
8858
8859 resbank -- In case of an ISR, use a register bank to save registers
8860 R0-R14, MACH, MACL, GBR and PR. This is useful only on SH2A targets.
8861 */
8862
8863 /* Handle a 'resbank' attribute. */
8864 static tree
8865 sh_handle_resbank_handler_attribute (tree * node, tree name,
8866 tree args ATTRIBUTE_UNUSED,
8867 int flags ATTRIBUTE_UNUSED,
8868 bool * no_add_attrs)
8869 {
8870 if (!TARGET_SH2A)
8871 {
8872 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
8873 name);
8874 *no_add_attrs = true;
8875 }
8876 if (TREE_CODE (*node) != FUNCTION_DECL)
8877 {
8878 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8879 name);
8880 *no_add_attrs = true;
8881 }
8882
8883 return NULL_TREE;
8884 }
8885
8886 /* Handle an "interrupt_handler" attribute; arguments as in
8887 struct attribute_spec.handler. */
8888 static tree
8889 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8890 tree args ATTRIBUTE_UNUSED,
8891 int flags ATTRIBUTE_UNUSED,
8892 bool *no_add_attrs)
8893 {
8894 if (TREE_CODE (*node) != FUNCTION_DECL)
8895 {
8896 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8897 name);
8898 *no_add_attrs = true;
8899 }
8900 else if (TARGET_SHCOMPACT)
8901 {
8902 error ("attribute interrupt_handler is not compatible with -m5-compact");
8903 *no_add_attrs = true;
8904 }
8905
8906 return NULL_TREE;
8907 }
8908
8909 /* Handle an 'function_vector' attribute; arguments as in
8910 struct attribute_spec.handler. */
8911 static tree
8912 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
8913 tree args ATTRIBUTE_UNUSED,
8914 int flags ATTRIBUTE_UNUSED,
8915 bool * no_add_attrs)
8916 {
8917 if (!TARGET_SH2A)
8918 {
8919 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
8920 name);
8921 *no_add_attrs = true;
8922 }
8923 else if (TREE_CODE (*node) != FUNCTION_DECL)
8924 {
8925 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8926 name);
8927 *no_add_attrs = true;
8928 }
8929 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8930 {
8931 /* The argument must be a constant integer. */
8932 warning (OPT_Wattributes,
8933 "%qE attribute argument not an integer constant",
8934 name);
8935 *no_add_attrs = true;
8936 }
8937 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
8938 {
8939 /* The argument value must be between 0 to 255. */
8940 warning (OPT_Wattributes,
8941 "%qE attribute argument should be between 0 to 255",
8942 name);
8943 *no_add_attrs = true;
8944 }
8945 return NULL_TREE;
8946 }
8947
8948 /* Returns 1 if current function has been assigned the attribute
8949 'function_vector'. */
8950 int
8951 sh2a_is_function_vector_call (rtx x)
8952 {
8953 if (GET_CODE (x) == SYMBOL_REF
8954 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8955 {
8956 tree tr = SYMBOL_REF_DECL (x);
8957
8958 if (sh2a_function_vector_p (tr))
8959 return 1;
8960 }
8961
8962 return 0;
8963 }
8964
8965 /* Returns the function vector number, if the the attribute
8966 'function_vector' is assigned, otherwise returns zero. */
8967 int
8968 sh2a_get_function_vector_number (rtx x)
8969 {
8970 int num;
8971 tree list, t;
8972
8973 if ((GET_CODE (x) == SYMBOL_REF)
8974 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8975 {
8976 t = SYMBOL_REF_DECL (x);
8977
8978 if (TREE_CODE (t) != FUNCTION_DECL)
8979 return 0;
8980
8981 list = SH_ATTRIBUTES (t);
8982 while (list)
8983 {
8984 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8985 {
8986 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
8987 return num;
8988 }
8989
8990 list = TREE_CHAIN (list);
8991 }
8992
8993 return 0;
8994 }
8995 else
8996 return 0;
8997 }
8998
8999 /* Handle an "sp_switch" attribute; arguments as in
9000 struct attribute_spec.handler. */
9001 static tree
9002 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
9003 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9004 {
9005 if (TREE_CODE (*node) != FUNCTION_DECL)
9006 {
9007 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9008 name);
9009 *no_add_attrs = true;
9010 }
9011 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
9012 {
9013 /* The argument must be a constant string. */
9014 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
9015 name);
9016 *no_add_attrs = true;
9017 }
9018
9019 return NULL_TREE;
9020 }
9021
9022 /* Handle an "trap_exit" attribute; arguments as in
9023 struct attribute_spec.handler. */
9024 static tree
9025 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
9026 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9027 {
9028 if (TREE_CODE (*node) != FUNCTION_DECL)
9029 {
9030 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9031 name);
9032 *no_add_attrs = true;
9033 }
9034 /* The argument specifies a trap number to be used in a trapa instruction
9035 at function exit (instead of an rte instruction). */
9036 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9037 {
9038 /* The argument must be a constant integer. */
9039 warning (OPT_Wattributes, "%qE attribute argument not an "
9040 "integer constant", name);
9041 *no_add_attrs = true;
9042 }
9043
9044 return NULL_TREE;
9045 }
9046
9047 static tree
9048 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
9049 tree name ATTRIBUTE_UNUSED,
9050 tree args ATTRIBUTE_UNUSED,
9051 int flags ATTRIBUTE_UNUSED,
9052 bool *no_add_attrs ATTRIBUTE_UNUSED)
9053 {
9054 return NULL_TREE;
9055 }
9056
9057 /* True if __attribute__((renesas)) or -mrenesas. */
9058 int
9059 sh_attr_renesas_p (const_tree td)
9060 {
9061 if (TARGET_HITACHI)
9062 return 1;
9063 if (td == 0)
9064 return 0;
9065 if (DECL_P (td))
9066 td = TREE_TYPE (td);
9067 if (td == error_mark_node)
9068 return 0;
9069 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
9070 != NULL_TREE);
9071 }
9072
9073 /* True if __attribute__((renesas)) or -mrenesas, for the current
9074 function. */
9075 int
9076 sh_cfun_attr_renesas_p (void)
9077 {
9078 return sh_attr_renesas_p (current_function_decl);
9079 }
9080
9081 int
9082 sh_cfun_interrupt_handler_p (void)
9083 {
9084 return (lookup_attribute ("interrupt_handler",
9085 DECL_ATTRIBUTES (current_function_decl))
9086 != NULL_TREE);
9087 }
9088
9089 /* Returns 1 if FUNC has been assigned the attribute
9090 "function_vector". */
9091 int
9092 sh2a_function_vector_p (tree func)
9093 {
9094 tree list;
9095 if (TREE_CODE (func) != FUNCTION_DECL)
9096 return 0;
9097
9098 list = SH_ATTRIBUTES (func);
9099 while (list)
9100 {
9101 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9102 return 1;
9103
9104 list = TREE_CHAIN (list);
9105 }
9106 return 0;
9107 }
9108
9109 /* Returns TRUE if given tree has the "resbank" attribute. */
9110
9111 int
9112 sh_cfun_resbank_handler_p (void)
9113 {
9114 return ((lookup_attribute ("resbank",
9115 DECL_ATTRIBUTES (current_function_decl))
9116 != NULL_TREE)
9117 && (lookup_attribute ("interrupt_handler",
9118 DECL_ATTRIBUTES (current_function_decl))
9119 != NULL_TREE) && TARGET_SH2A);
9120 }
9121
9122 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
9123
9124 static const char *
9125 sh_check_pch_target_flags (int old_flags)
9126 {
9127 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
9128 | MASK_SH_E | MASK_HARD_SH4
9129 | MASK_FPU_SINGLE | MASK_SH4))
9130 return _("created and used with different architectures / ABIs");
9131 if ((old_flags ^ target_flags) & MASK_HITACHI)
9132 return _("created and used with different ABIs");
9133 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
9134 return _("created and used with different endianness");
9135 return NULL;
9136 }
9137 \f
9138 /* Predicates used by the templates. */
9139
9140 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
9141 Used only in general_movsrc_operand. */
9142
9143 int
9144 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9145 {
9146 switch (REGNO (op))
9147 {
9148 case PR_REG:
9149 case MACL_REG:
9150 case MACH_REG:
9151 return 1;
9152 }
9153 return 0;
9154 }
9155
9156 /* Nonzero if OP is a floating point value with value 0.0. */
9157
9158 int
9159 fp_zero_operand (rtx op)
9160 {
9161 REAL_VALUE_TYPE r;
9162
9163 if (GET_MODE (op) != SFmode)
9164 return 0;
9165
9166 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9167 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
9168 }
9169
9170 /* Nonzero if OP is a floating point value with value 1.0. */
9171
9172 int
9173 fp_one_operand (rtx op)
9174 {
9175 REAL_VALUE_TYPE r;
9176
9177 if (GET_MODE (op) != SFmode)
9178 return 0;
9179
9180 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9181 return REAL_VALUES_EQUAL (r, dconst1);
9182 }
9183
9184 /* In general mode switching is used. If we are
9185 compiling without -mfmovd, movsf_ie isn't taken into account for
9186 mode switching. We could check in machine_dependent_reorg for
9187 cases where we know we are in single precision mode, but there is
9188 interface to find that out during reload, so we must avoid
9189 choosing an fldi alternative during reload and thus failing to
9190 allocate a scratch register for the constant loading. */
9191 int
9192 fldi_ok (void)
9193 {
9194 return 1;
9195 }
9196
9197 int
9198 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9199 {
9200 enum rtx_code code = GET_CODE (op);
9201 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
9202 }
9203
9204 /* Return the TLS type for TLS symbols, 0 for otherwise. */
9205 enum tls_model
9206 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9207 {
9208 if (GET_CODE (op) != SYMBOL_REF)
9209 return TLS_MODEL_NONE;
9210 return SYMBOL_REF_TLS_MODEL (op);
9211 }
9212 \f
9213 /* Return the destination address of a branch. */
9214
9215 static int
9216 branch_dest (rtx branch)
9217 {
9218 rtx dest = SET_SRC (PATTERN (branch));
9219 int dest_uid;
9220
9221 if (GET_CODE (dest) == IF_THEN_ELSE)
9222 dest = XEXP (dest, 1);
9223 dest = XEXP (dest, 0);
9224 dest_uid = INSN_UID (dest);
9225 return INSN_ADDRESSES (dest_uid);
9226 }
9227 \f
9228 /* Return nonzero if REG is not used after INSN.
9229 We assume REG is a reload reg, and therefore does
9230 not live past labels. It may live past calls or jumps though. */
9231 int
9232 reg_unused_after (rtx reg, rtx insn)
9233 {
9234 enum rtx_code code;
9235 rtx set;
9236
9237 /* If the reg is set by this instruction, then it is safe for our
9238 case. Disregard the case where this is a store to memory, since
9239 we are checking a register used in the store address. */
9240 set = single_set (insn);
9241 if (set && !MEM_P (SET_DEST (set))
9242 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9243 return 1;
9244
9245 while ((insn = NEXT_INSN (insn)))
9246 {
9247 rtx set;
9248 if (!INSN_P (insn))
9249 continue;
9250
9251 code = GET_CODE (insn);
9252
9253 #if 0
9254 /* If this is a label that existed before reload, then the register
9255 if dead here. However, if this is a label added by reorg, then
9256 the register may still be live here. We can't tell the difference,
9257 so we just ignore labels completely. */
9258 if (code == CODE_LABEL)
9259 return 1;
9260 /* else */
9261 #endif
9262
9263 if (code == JUMP_INSN)
9264 return 0;
9265
9266 /* If this is a sequence, we must handle them all at once.
9267 We could have for instance a call that sets the target register,
9268 and an insn in a delay slot that uses the register. In this case,
9269 we must return 0. */
9270 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
9271 {
9272 int i;
9273 int retval = 0;
9274
9275 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
9276 {
9277 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
9278 rtx set = single_set (this_insn);
9279
9280 if (CALL_P (this_insn))
9281 code = CALL_INSN;
9282 else if (JUMP_P (this_insn))
9283 {
9284 if (INSN_ANNULLED_BRANCH_P (this_insn))
9285 return 0;
9286 code = JUMP_INSN;
9287 }
9288
9289 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9290 return 0;
9291 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9292 {
9293 if (!MEM_P (SET_DEST (set)))
9294 retval = 1;
9295 else
9296 return 0;
9297 }
9298 if (set == 0
9299 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
9300 return 0;
9301 }
9302 if (retval == 1)
9303 return 1;
9304 else if (code == JUMP_INSN)
9305 return 0;
9306 }
9307
9308 set = single_set (insn);
9309 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9310 return 0;
9311 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9312 return !MEM_P (SET_DEST (set));
9313 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
9314 return 0;
9315
9316 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
9317 return 1;
9318 }
9319 return 1;
9320 }
9321 \f
9322 #include "ggc.h"
9323
9324 static GTY(()) rtx fpscr_rtx;
9325 rtx
9326 get_fpscr_rtx (void)
9327 {
9328 if (! fpscr_rtx)
9329 {
9330 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
9331 REG_USERVAR_P (fpscr_rtx) = 1;
9332 mark_user_reg (fpscr_rtx);
9333 }
9334 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
9335 mark_user_reg (fpscr_rtx);
9336 return fpscr_rtx;
9337 }
9338
9339 static GTY(()) tree fpscr_values;
9340
9341 static void
9342 emit_fpu_switch (rtx scratch, int index)
9343 {
9344 rtx dst, src;
9345
9346 if (fpscr_values == NULL)
9347 {
9348 tree t;
9349
9350 t = build_index_type (integer_one_node);
9351 t = build_array_type (integer_type_node, t);
9352 t = build_decl (BUILTINS_LOCATION,
9353 VAR_DECL, get_identifier ("__fpscr_values"), t);
9354 DECL_ARTIFICIAL (t) = 1;
9355 DECL_IGNORED_P (t) = 1;
9356 DECL_EXTERNAL (t) = 1;
9357 TREE_STATIC (t) = 1;
9358 TREE_PUBLIC (t) = 1;
9359 TREE_USED (t) = 1;
9360
9361 fpscr_values = t;
9362 }
9363
9364 src = DECL_RTL (fpscr_values);
9365 if (!can_create_pseudo_p ())
9366 {
9367 emit_move_insn (scratch, XEXP (src, 0));
9368 if (index != 0)
9369 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
9370 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
9371 }
9372 else
9373 src = adjust_address (src, PSImode, index * 4);
9374
9375 dst = get_fpscr_rtx ();
9376 emit_move_insn (dst, src);
9377 }
9378
9379 void
9380 emit_sf_insn (rtx pat)
9381 {
9382 emit_insn (pat);
9383 }
9384
9385 void
9386 emit_df_insn (rtx pat)
9387 {
9388 emit_insn (pat);
9389 }
9390
9391 void
9392 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9393 {
9394 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9395 }
9396
9397 void
9398 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9399 {
9400 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
9401 get_fpscr_rtx ()));
9402 }
9403
9404 void
9405 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9406 {
9407 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9408 }
9409
9410 void
9411 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9412 {
9413 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
9414 get_fpscr_rtx ()));
9415 }
9416 \f
9417 static rtx get_free_reg (HARD_REG_SET);
9418
9419 /* This function returns a register to use to load the address to load
9420 the fpscr from. Currently it always returns r1 or r7, but when we are
9421 able to use pseudo registers after combine, or have a better mechanism
9422 for choosing a register, it should be done here. */
9423 /* REGS_LIVE is the liveness information for the point for which we
9424 need this allocation. In some bare-bones exit blocks, r1 is live at the
9425 start. We can even have all of r0..r3 being live:
9426 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
9427 INSN before which new insns are placed with will clobber the register
9428 we return. If a basic block consists only of setting the return value
9429 register to a pseudo and using that register, the return value is not
9430 live before or after this block, yet we we'll insert our insns right in
9431 the middle. */
9432
9433 static rtx
9434 get_free_reg (HARD_REG_SET regs_live)
9435 {
9436 if (! TEST_HARD_REG_BIT (regs_live, 1))
9437 return gen_rtx_REG (Pmode, 1);
9438
9439 /* Hard reg 1 is live; since this is a small register classes target,
9440 there shouldn't be anything but a jump before the function end. */
9441 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
9442 return gen_rtx_REG (Pmode, 7);
9443 }
9444
9445 /* This function will set the fpscr from memory.
9446 MODE is the mode we are setting it to. */
9447 void
9448 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
9449 {
9450 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
9451 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
9452 rtx addr_reg;
9453
9454 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
9455 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
9456 }
9457
9458 /* Is the given character a logical line separator for the assembler? */
9459 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
9460 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
9461 #endif
9462
9463 int
9464 sh_insn_length_adjustment (rtx insn)
9465 {
9466 /* Instructions with unfilled delay slots take up an extra two bytes for
9467 the nop in the delay slot. */
9468 if (((NONJUMP_INSN_P (insn)
9469 && GET_CODE (PATTERN (insn)) != USE
9470 && GET_CODE (PATTERN (insn)) != CLOBBER)
9471 || CALL_P (insn)
9472 || (JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)))
9473 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
9474 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
9475 return 2;
9476
9477 /* SH2e has a bug that prevents the use of annulled branches, so if
9478 the delay slot is not filled, we'll have to put a NOP in it. */
9479 if (sh_cpu_attr == CPU_SH2E
9480 && JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)
9481 && get_attr_type (insn) == TYPE_CBRANCH
9482 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
9483 return 2;
9484
9485 /* sh-dsp parallel processing insn take four bytes instead of two. */
9486
9487 if (NONJUMP_INSN_P (insn))
9488 {
9489 int sum = 0;
9490 rtx body = PATTERN (insn);
9491 const char *templ;
9492 char c;
9493 int maybe_label = 1;
9494
9495 if (GET_CODE (body) == ASM_INPUT)
9496 templ = XSTR (body, 0);
9497 else if (asm_noperands (body) >= 0)
9498 templ
9499 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
9500 else
9501 return 0;
9502 do
9503 {
9504 int ppi_adjust = 0;
9505
9506 do
9507 c = *templ++;
9508 while (c == ' ' || c == '\t');
9509 /* all sh-dsp parallel-processing insns start with p.
9510 The only non-ppi sh insn starting with p is pref.
9511 The only ppi starting with pr is prnd. */
9512 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
9513 ppi_adjust = 2;
9514 /* The repeat pseudo-insn expands two three insns, a total of
9515 six bytes in size. */
9516 else if ((c == 'r' || c == 'R')
9517 && ! strncasecmp ("epeat", templ, 5))
9518 ppi_adjust = 4;
9519 while (c && c != '\n'
9520 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
9521 {
9522 /* If this is a label, it is obviously not a ppi insn. */
9523 if (c == ':' && maybe_label)
9524 {
9525 ppi_adjust = 0;
9526 break;
9527 }
9528 else if (c == '\'' || c == '"')
9529 maybe_label = 0;
9530 c = *templ++;
9531 }
9532 sum += ppi_adjust;
9533 maybe_label = c != ':';
9534 }
9535 while (c);
9536 return sum;
9537 }
9538 return 0;
9539 }
9540 \f
9541 /* Return TRUE for a valid displacement for the REG+disp addressing
9542 with MODE. */
9543
9544 /* ??? The SH2e does not have the REG+disp addressing mode when loading values
9545 into the FRx registers. We implement this by setting the maximum offset
9546 to zero when the value is SFmode. This also restricts loading of SFmode
9547 values into the integer registers, but that can't be helped. */
9548
9549 /* The SH allows a displacement in a QI or HI amode, but only when the
9550 other operand is R0. GCC doesn't handle this very well, so we forgot
9551 all of that.
9552
9553 A legitimate index for a QI or HI is 0, SI can be any number 0..63,
9554 DI can be any number 0..60. */
9555
9556 bool
9557 sh_legitimate_index_p (enum machine_mode mode, rtx op)
9558 {
9559 if (CONST_INT_P (op))
9560 {
9561 if (TARGET_SHMEDIA)
9562 {
9563 int size;
9564
9565 /* Check if this the address of an unaligned load / store. */
9566 if (mode == VOIDmode)
9567 return CONST_OK_FOR_I06 (INTVAL (op));
9568
9569 size = GET_MODE_SIZE (mode);
9570 return (!(INTVAL (op) & (size - 1))
9571 && INTVAL (op) >= -512 * size
9572 && INTVAL (op) < 512 * size);
9573 }
9574
9575 if (TARGET_SH2A)
9576 {
9577 if (GET_MODE_SIZE (mode) == 1
9578 && (unsigned) INTVAL (op) < 4096)
9579 return true;
9580 }
9581
9582 if ((GET_MODE_SIZE (mode) == 4
9583 && (unsigned) INTVAL (op) < 64
9584 && !(INTVAL (op) & 3)
9585 && !(TARGET_SH2E && mode == SFmode))
9586 || (GET_MODE_SIZE (mode) == 4
9587 && (unsigned) INTVAL (op) < 16383
9588 && !(INTVAL (op) & 3) && TARGET_SH2A))
9589 return true;
9590
9591 if ((GET_MODE_SIZE (mode) == 8
9592 && (unsigned) INTVAL (op) < 60
9593 && !(INTVAL (op) & 3)
9594 && !((TARGET_SH4 || TARGET_SH2A) && mode == DFmode))
9595 || ((GET_MODE_SIZE (mode)==8)
9596 && (unsigned) INTVAL (op) < 8192
9597 && !(INTVAL (op) & (TARGET_SH2A_DOUBLE ? 7 : 3))
9598 && (TARGET_SH2A && mode == DFmode)))
9599 return true;
9600 }
9601
9602 return false;
9603 }
9604
9605 /* Recognize an RTL expression that is a valid memory address for
9606 an instruction.
9607 The MODE argument is the machine mode for the MEM expression
9608 that wants to use this address.
9609 Allow REG
9610 REG+disp
9611 REG+r0
9612 REG++
9613 --REG */
9614
9615 static bool
9616 sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
9617 {
9618 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
9619 return true;
9620 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
9621 && ! TARGET_SHMEDIA
9622 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
9623 return true;
9624 else if (GET_CODE (x) == PLUS
9625 && (mode != PSImode || reload_completed))
9626 {
9627 rtx xop0 = XEXP (x, 0);
9628 rtx xop1 = XEXP (x, 1);
9629
9630 if (GET_MODE_SIZE (mode) <= 8
9631 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
9632 && sh_legitimate_index_p (mode, xop1))
9633 return true;
9634
9635 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
9636 || ((xop0 == stack_pointer_rtx
9637 || xop0 == hard_frame_pointer_rtx)
9638 && REG_P (xop1) && REGNO (xop1) == R0_REG)
9639 || ((xop1 == stack_pointer_rtx
9640 || xop1 == hard_frame_pointer_rtx)
9641 && REG_P (xop0) && REGNO (xop0) == R0_REG))
9642 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
9643 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
9644 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
9645 && TARGET_FMOVD && mode == DFmode)))
9646 {
9647 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
9648 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
9649 return true;
9650 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
9651 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
9652 return true;
9653 }
9654 }
9655
9656 return false;
9657 }
9658 \f
9659 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
9660 isn't protected by a PIC unspec. */
9661 int
9662 nonpic_symbol_mentioned_p (rtx x)
9663 {
9664 register const char *fmt;
9665 register int i;
9666
9667 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
9668 || GET_CODE (x) == PC)
9669 return 1;
9670
9671 /* We don't want to look into the possible MEM location of a
9672 CONST_DOUBLE, since we're not going to use it, in general. */
9673 if (GET_CODE (x) == CONST_DOUBLE)
9674 return 0;
9675
9676 if (GET_CODE (x) == UNSPEC
9677 && (XINT (x, 1) == UNSPEC_PIC
9678 || XINT (x, 1) == UNSPEC_GOT
9679 || XINT (x, 1) == UNSPEC_GOTOFF
9680 || XINT (x, 1) == UNSPEC_GOTPLT
9681 || XINT (x, 1) == UNSPEC_GOTTPOFF
9682 || XINT (x, 1) == UNSPEC_DTPOFF
9683 || XINT (x, 1) == UNSPEC_TPOFF
9684 || XINT (x, 1) == UNSPEC_PLT
9685 || XINT (x, 1) == UNSPEC_SYMOFF
9686 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
9687 return 0;
9688
9689 fmt = GET_RTX_FORMAT (GET_CODE (x));
9690 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9691 {
9692 if (fmt[i] == 'E')
9693 {
9694 register int j;
9695
9696 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9697 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
9698 return 1;
9699 }
9700 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
9701 return 1;
9702 }
9703
9704 return 0;
9705 }
9706
9707 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
9708 @GOTOFF in `reg'. */
9709 rtx
9710 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
9711 rtx reg)
9712 {
9713 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
9714 return orig;
9715
9716 if (GET_CODE (orig) == LABEL_REF
9717 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
9718 {
9719 if (reg == 0)
9720 reg = gen_reg_rtx (Pmode);
9721
9722 emit_insn (gen_symGOTOFF2reg (reg, orig));
9723 return reg;
9724 }
9725 else if (GET_CODE (orig) == SYMBOL_REF)
9726 {
9727 if (reg == 0)
9728 reg = gen_reg_rtx (Pmode);
9729
9730 emit_insn (gen_symGOT2reg (reg, orig));
9731 return reg;
9732 }
9733 return orig;
9734 }
9735
9736 /* Try machine-dependent ways of modifying an illegitimate address
9737 to be legitimate. If we find one, return the new, valid address.
9738 Otherwise, return X.
9739
9740 For the SH, if X is almost suitable for indexing, but the offset is
9741 out of range, convert it into a normal form so that CSE has a chance
9742 of reducing the number of address registers used. */
9743
9744 static rtx
9745 sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
9746 {
9747 if (flag_pic)
9748 x = legitimize_pic_address (oldx, mode, NULL_RTX);
9749
9750 if (GET_CODE (x) == PLUS
9751 && (GET_MODE_SIZE (mode) == 4
9752 || GET_MODE_SIZE (mode) == 8)
9753 && CONST_INT_P (XEXP (x, 1))
9754 && BASE_REGISTER_RTX_P (XEXP (x, 0))
9755 && ! TARGET_SHMEDIA
9756 && ! ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
9757 && ! (TARGET_SH2E && mode == SFmode))
9758 {
9759 rtx index_rtx = XEXP (x, 1);
9760 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9761 rtx sum;
9762
9763 /* On rare occasions, we might get an unaligned pointer
9764 that is indexed in a way to give an aligned address.
9765 Therefore, keep the lower two bits in offset_base. */
9766 /* Instead of offset_base 128..131 use 124..127, so that
9767 simple add suffices. */
9768 if (offset > 127)
9769 offset_base = ((offset + 4) & ~60) - 4;
9770 else
9771 offset_base = offset & ~60;
9772
9773 /* Sometimes the normal form does not suit DImode. We
9774 could avoid that by using smaller ranges, but that
9775 would give less optimized code when SImode is
9776 prevalent. */
9777 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9778 {
9779 sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
9780 GEN_INT (offset_base), NULL_RTX, 0,
9781 OPTAB_LIB_WIDEN);
9782
9783 return gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9784 }
9785 }
9786
9787 return x;
9788 }
9789
9790 /* Attempt to replace *P, which is an address that needs reloading, with
9791 a valid memory address for an operand of mode MODE.
9792 Like for sh_legitimize_address, for the SH we try to get a normal form
9793 of the address. That will allow inheritance of the address reloads. */
9794
9795 bool
9796 sh_legitimize_reload_address (rtx *p, enum machine_mode mode, int opnum,
9797 int itype)
9798 {
9799 enum reload_type type = (enum reload_type) itype;
9800
9801 if (GET_CODE (*p) == PLUS
9802 && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
9803 && CONST_INT_P (XEXP (*p, 1))
9804 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true)
9805 && ! TARGET_SHMEDIA
9806 && ! (TARGET_SH4 && mode == DFmode)
9807 && ! (mode == PSImode && type == RELOAD_FOR_INPUT_ADDRESS)
9808 && (ALLOW_INDEXED_ADDRESS
9809 || XEXP (*p, 0) == stack_pointer_rtx
9810 || XEXP (*p, 0) == hard_frame_pointer_rtx))
9811 {
9812 rtx index_rtx = XEXP (*p, 1);
9813 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9814 rtx sum;
9815
9816 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
9817 {
9818 push_reload (*p, NULL_RTX, p, NULL,
9819 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9820 goto win;
9821 }
9822 if (TARGET_SH2E && mode == SFmode)
9823 {
9824 *p = copy_rtx (*p);
9825 push_reload (*p, NULL_RTX, p, NULL,
9826 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9827 goto win;
9828 }
9829 /* Instead of offset_base 128..131 use 124..127, so that
9830 simple add suffices. */
9831 if (offset > 127)
9832 offset_base = ((offset + 4) & ~60) - 4;
9833 else
9834 offset_base = offset & ~60;
9835 /* Sometimes the normal form does not suit DImode. We could avoid
9836 that by using smaller ranges, but that would give less optimized
9837 code when SImode is prevalent. */
9838 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9839 {
9840 sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), GEN_INT (offset_base));
9841 *p = gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9842 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
9843 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9844 goto win;
9845 }
9846 }
9847 /* We must re-recognize what we created before. */
9848 else if (GET_CODE (*p) == PLUS
9849 && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
9850 && GET_CODE (XEXP (*p, 0)) == PLUS
9851 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
9852 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
9853 && CONST_INT_P (XEXP (*p, 1))
9854 && ! TARGET_SHMEDIA
9855 && ! (TARGET_SH2E && mode == SFmode))
9856 {
9857 /* Because this address is so complex, we know it must have
9858 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
9859 it is already unshared, and needs no further unsharing. */
9860 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
9861 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9862 goto win;
9863 }
9864
9865 return false;
9866
9867 win:
9868 return true;
9869 }
9870
9871 /* Mark the use of a constant in the literal table. If the constant
9872 has multiple labels, make it unique. */
9873 static rtx
9874 mark_constant_pool_use (rtx x)
9875 {
9876 rtx insn, lab, pattern;
9877
9878 if (x == NULL)
9879 return x;
9880
9881 switch (GET_CODE (x))
9882 {
9883 case LABEL_REF:
9884 x = XEXP (x, 0);
9885 case CODE_LABEL:
9886 break;
9887 default:
9888 return x;
9889 }
9890
9891 /* Get the first label in the list of labels for the same constant
9892 and delete another labels in the list. */
9893 lab = x;
9894 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
9895 {
9896 if (!LABEL_P (insn)
9897 || LABEL_REFS (insn) != NEXT_INSN (insn))
9898 break;
9899 lab = insn;
9900 }
9901
9902 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
9903 INSN_DELETED_P (insn) = 1;
9904
9905 /* Mark constants in a window. */
9906 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
9907 {
9908 if (!NONJUMP_INSN_P (insn))
9909 continue;
9910
9911 pattern = PATTERN (insn);
9912 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
9913 continue;
9914
9915 switch (XINT (pattern, 1))
9916 {
9917 case UNSPECV_CONST2:
9918 case UNSPECV_CONST4:
9919 case UNSPECV_CONST8:
9920 XVECEXP (pattern, 0, 1) = const1_rtx;
9921 break;
9922 case UNSPECV_WINDOW_END:
9923 if (XVECEXP (pattern, 0, 0) == x)
9924 return lab;
9925 break;
9926 case UNSPECV_CONST_END:
9927 return lab;
9928 default:
9929 break;
9930 }
9931 }
9932
9933 return lab;
9934 }
9935 \f
9936 /* Return true if it's possible to redirect BRANCH1 to the destination
9937 of an unconditional jump BRANCH2. We only want to do this if the
9938 resulting branch will have a short displacement. */
9939 int
9940 sh_can_redirect_branch (rtx branch1, rtx branch2)
9941 {
9942 if (flag_expensive_optimizations && simplejump_p (branch2))
9943 {
9944 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
9945 rtx insn;
9946 int distance;
9947
9948 for (distance = 0, insn = NEXT_INSN (branch1);
9949 insn && distance < 256;
9950 insn = PREV_INSN (insn))
9951 {
9952 if (insn == dest)
9953 return 1;
9954 else
9955 distance += get_attr_length (insn);
9956 }
9957 for (distance = 0, insn = NEXT_INSN (branch1);
9958 insn && distance < 256;
9959 insn = NEXT_INSN (insn))
9960 {
9961 if (insn == dest)
9962 return 1;
9963 else
9964 distance += get_attr_length (insn);
9965 }
9966 }
9967 return 0;
9968 }
9969
9970 /* Return nonzero if register old_reg can be renamed to register new_reg. */
9971 int
9972 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
9973 unsigned int new_reg)
9974 {
9975 /* Interrupt functions can only use registers that have already been
9976 saved by the prologue, even if they would normally be
9977 call-clobbered. */
9978
9979 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
9980 return 0;
9981
9982 return 1;
9983 }
9984
9985 /* Function to update the integer COST
9986 based on the relationship between INSN that is dependent on
9987 DEP_INSN through the dependence LINK. The default is to make no
9988 adjustment to COST. This can be used for example to specify to
9989 the scheduler that an output- or anti-dependence does not incur
9990 the same cost as a data-dependence. The return value should be
9991 the new value for COST. */
9992 static int
9993 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
9994 {
9995 rtx reg, use_pat;
9996
9997 if (TARGET_SHMEDIA)
9998 {
9999 /* On SHmedia, if the dependence is an anti-dependence or
10000 output-dependence, there is no cost. */
10001 if (REG_NOTE_KIND (link) != 0)
10002 {
10003 /* However, dependencies between target register loads and
10004 uses of the register in a subsequent block that are separated
10005 by a conditional branch are not modelled - we have to do with
10006 the anti-dependency between the target register load and the
10007 conditional branch that ends the current block. */
10008 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
10009 && GET_CODE (PATTERN (dep_insn)) == SET
10010 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
10011 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
10012 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
10013 {
10014 int orig_cost = cost;
10015 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
10016 rtx target = ((! note
10017 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
10018 ? insn : JUMP_LABEL (insn));
10019 /* On the likely path, the branch costs 1, on the unlikely path,
10020 it costs 3. */
10021 cost--;
10022 do
10023 target = next_active_insn (target);
10024 while (target && ! flow_dependent_p (target, dep_insn)
10025 && --cost > 0);
10026 /* If two branches are executed in immediate succession, with the
10027 first branch properly predicted, this causes a stall at the
10028 second branch, hence we won't need the target for the
10029 second branch for two cycles after the launch of the first
10030 branch. */
10031 if (cost > orig_cost - 2)
10032 cost = orig_cost - 2;
10033 }
10034 else
10035 cost = 0;
10036 }
10037
10038 else if (get_attr_is_mac_media (insn)
10039 && get_attr_is_mac_media (dep_insn))
10040 cost = 1;
10041
10042 else if (! reload_completed
10043 && GET_CODE (PATTERN (insn)) == SET
10044 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
10045 && GET_CODE (PATTERN (dep_insn)) == SET
10046 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
10047 && cost < 4)
10048 cost = 4;
10049 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
10050 that is needed at the target. */
10051 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
10052 && ! flow_dependent_p (insn, dep_insn))
10053 cost--;
10054 }
10055 else if (REG_NOTE_KIND (link) == 0)
10056 {
10057 enum attr_type type;
10058 rtx dep_set;
10059
10060 if (recog_memoized (insn) < 0
10061 || recog_memoized (dep_insn) < 0)
10062 return cost;
10063
10064 dep_set = single_set (dep_insn);
10065
10066 /* The latency that we specify in the scheduling description refers
10067 to the actual output, not to an auto-increment register; for that,
10068 the latency is one. */
10069 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
10070 {
10071 rtx set = single_set (insn);
10072
10073 if (set
10074 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
10075 && (!MEM_P (SET_DEST (set))
10076 || !reg_mentioned_p (SET_DEST (dep_set),
10077 XEXP (SET_DEST (set), 0))))
10078 cost = 1;
10079 }
10080 /* The only input for a call that is timing-critical is the
10081 function's address. */
10082 if (CALL_P (insn))
10083 {
10084 rtx call = PATTERN (insn);
10085
10086 if (GET_CODE (call) == PARALLEL)
10087 call = XVECEXP (call, 0 ,0);
10088 if (GET_CODE (call) == SET)
10089 call = SET_SRC (call);
10090 if (GET_CODE (call) == CALL && MEM_P (XEXP (call, 0))
10091 /* sibcalli_thunk uses a symbol_ref in an unspec. */
10092 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
10093 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
10094 cost -= TARGET_SH4_300 ? 3 : 6;
10095 }
10096 /* Likewise, the most timing critical input for an sfuncs call
10097 is the function address. However, sfuncs typically start
10098 using their arguments pretty quickly.
10099 Assume a four cycle delay for SH4 before they are needed.
10100 Cached ST40-300 calls are quicker, so assume only a one
10101 cycle delay there.
10102 ??? Maybe we should encode the delays till input registers
10103 are needed by sfuncs into the sfunc call insn. */
10104 /* All sfunc calls are parallels with at least four components.
10105 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
10106 else if (GET_CODE (PATTERN (insn)) == PARALLEL
10107 && XVECLEN (PATTERN (insn), 0) >= 4
10108 && (reg = sfunc_uses_reg (insn)))
10109 {
10110 if (! reg_set_p (reg, dep_insn))
10111 cost -= TARGET_SH4_300 ? 1 : 4;
10112 }
10113 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
10114 {
10115 enum attr_type dep_type = get_attr_type (dep_insn);
10116
10117 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
10118 cost--;
10119 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
10120 && (type = get_attr_type (insn)) != TYPE_CALL
10121 && type != TYPE_SFUNC)
10122 cost--;
10123 /* When the preceding instruction loads the shift amount of
10124 the following SHAD/SHLD, the latency of the load is increased
10125 by 1 cycle. */
10126 if (get_attr_type (insn) == TYPE_DYN_SHIFT
10127 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
10128 && reg_overlap_mentioned_p (SET_DEST (dep_set),
10129 XEXP (SET_SRC (single_set (insn)),
10130 1)))
10131 cost++;
10132 /* When an LS group instruction with a latency of less than
10133 3 cycles is followed by a double-precision floating-point
10134 instruction, FIPR, or FTRV, the latency of the first
10135 instruction is increased to 3 cycles. */
10136 else if (cost < 3
10137 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
10138 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
10139 cost = 3;
10140 /* The lsw register of a double-precision computation is ready one
10141 cycle earlier. */
10142 else if (reload_completed
10143 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
10144 && (use_pat = single_set (insn))
10145 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
10146 SET_SRC (use_pat)))
10147 cost -= 1;
10148
10149 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
10150 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
10151 cost -= 1;
10152 }
10153 else if (TARGET_SH4_300)
10154 {
10155 /* Stores need their input register two cycles later. */
10156 if (dep_set && cost >= 1
10157 && ((type = get_attr_type (insn)) == TYPE_STORE
10158 || type == TYPE_PSTORE
10159 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
10160 {
10161 rtx set = single_set (insn);
10162
10163 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
10164 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
10165 {
10166 cost -= 2;
10167 /* But don't reduce the cost below 1 if the address depends
10168 on a side effect of dep_insn. */
10169 if (cost < 1
10170 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
10171 cost = 1;
10172 }
10173 }
10174 }
10175 }
10176 /* An anti-dependence penalty of two applies if the first insn is a double
10177 precision fadd / fsub / fmul. */
10178 else if (!TARGET_SH4_300
10179 && REG_NOTE_KIND (link) == REG_DEP_ANTI
10180 && recog_memoized (dep_insn) >= 0
10181 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
10182 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
10183 /* A lot of alleged anti-flow dependences are fake,
10184 so check this one is real. */
10185 && flow_dependent_p (dep_insn, insn))
10186 cost = 2;
10187
10188 return cost;
10189 }
10190
10191 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
10192 if DEP_INSN is anti-flow dependent on INSN. */
10193 static int
10194 flow_dependent_p (rtx insn, rtx dep_insn)
10195 {
10196 rtx tmp = PATTERN (insn);
10197
10198 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
10199 return tmp == NULL_RTX;
10200 }
10201
10202 /* A helper function for flow_dependent_p called through note_stores. */
10203 static void
10204 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
10205 {
10206 rtx * pinsn = (rtx *) data;
10207
10208 if (*pinsn && reg_referenced_p (x, *pinsn))
10209 *pinsn = NULL_RTX;
10210 }
10211
10212 /* For use by sh_allocate_initial_value. Note that sh.md contains some
10213 'special function' patterns (type sfunc) that clobber pr, but that
10214 do not look like function calls to leaf_function_p. Hence we must
10215 do this extra check. */
10216 static int
10217 sh_pr_n_sets (void)
10218 {
10219 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10220 }
10221
10222 /* Return where to allocate pseudo for a given hard register initial
10223 value. */
10224 static rtx
10225 sh_allocate_initial_value (rtx hard_reg)
10226 {
10227 rtx x;
10228
10229 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
10230 {
10231 if (current_function_is_leaf
10232 && ! sh_pr_n_sets ()
10233 && ! (TARGET_SHCOMPACT
10234 && ((crtl->args.info.call_cookie
10235 & ~ CALL_COOKIE_RET_TRAMP (1))
10236 || crtl->saves_all_registers)))
10237 x = hard_reg;
10238 else
10239 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
10240 }
10241 else
10242 x = NULL_RTX;
10243
10244 return x;
10245 }
10246
10247 /* This function returns "2" to indicate dual issue for the SH4
10248 processor. To be used by the DFA pipeline description. */
10249 static int
10250 sh_issue_rate (void)
10251 {
10252 if (TARGET_SUPERSCALAR)
10253 return 2;
10254 else
10255 return 1;
10256 }
10257
10258 /* Functions for ready queue reordering for sched1. */
10259
10260 /* Get weight for mode for a set x. */
10261 static short
10262 find_set_regmode_weight (rtx x, enum machine_mode mode)
10263 {
10264 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
10265 return 1;
10266 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
10267 {
10268 if (REG_P (SET_DEST (x)))
10269 {
10270 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
10271 return 1;
10272 else
10273 return 0;
10274 }
10275 return 1;
10276 }
10277 return 0;
10278 }
10279
10280 /* Get regmode weight for insn. */
10281 static short
10282 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
10283 {
10284 short reg_weight = 0;
10285 rtx x;
10286
10287 /* Increment weight for each register born here. */
10288 x = PATTERN (insn);
10289 reg_weight += find_set_regmode_weight (x, mode);
10290 if (GET_CODE (x) == PARALLEL)
10291 {
10292 int j;
10293 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
10294 {
10295 x = XVECEXP (PATTERN (insn), 0, j);
10296 reg_weight += find_set_regmode_weight (x, mode);
10297 }
10298 }
10299 /* Decrement weight for each register that dies here. */
10300 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
10301 {
10302 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
10303 {
10304 rtx note = XEXP (x, 0);
10305 if (REG_P (note) && GET_MODE (note) == mode)
10306 reg_weight--;
10307 }
10308 }
10309 return reg_weight;
10310 }
10311
10312 /* Calculate regmode weights for all insns of a basic block. */
10313 static void
10314 find_regmode_weight (basic_block b, enum machine_mode mode)
10315 {
10316 rtx insn, next_tail, head, tail;
10317
10318 get_ebb_head_tail (b, b, &head, &tail);
10319 next_tail = NEXT_INSN (tail);
10320
10321 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
10322 {
10323 /* Handle register life information. */
10324 if (!INSN_P (insn))
10325 continue;
10326
10327 if (mode == SFmode)
10328 INSN_REGMODE_WEIGHT (insn, mode) =
10329 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
10330 else if (mode == SImode)
10331 INSN_REGMODE_WEIGHT (insn, mode) =
10332 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
10333 }
10334 }
10335
10336 /* Comparison function for ready queue sorting. */
10337 static int
10338 rank_for_reorder (const void *x, const void *y)
10339 {
10340 rtx tmp = *(const rtx *) y;
10341 rtx tmp2 = *(const rtx *) x;
10342
10343 /* The insn in a schedule group should be issued the first. */
10344 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
10345 return SCHED_GROUP_P (tmp2) ? 1 : -1;
10346
10347 /* If insns are equally good, sort by INSN_LUID (original insn order), This
10348 minimizes instruction movement, thus minimizing sched's effect on
10349 register pressure. */
10350 return INSN_LUID (tmp) - INSN_LUID (tmp2);
10351 }
10352
10353 /* Resort the array A in which only element at index N may be out of order. */
10354 static void
10355 swap_reorder (rtx *a, int n)
10356 {
10357 rtx insn = a[n - 1];
10358 int i = n - 2;
10359
10360 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
10361 {
10362 a[i + 1] = a[i];
10363 i -= 1;
10364 }
10365 a[i + 1] = insn;
10366 }
10367
10368 #define SCHED_REORDER(READY, N_READY) \
10369 do \
10370 { \
10371 if ((N_READY) == 2) \
10372 swap_reorder (READY, N_READY); \
10373 else if ((N_READY) > 2) \
10374 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
10375 } \
10376 while (0)
10377
10378 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
10379 macro. */
10380 static void
10381 ready_reorder (rtx *ready, int nready)
10382 {
10383 SCHED_REORDER (ready, nready);
10384 }
10385
10386 /* Count life regions of r0 for a block. */
10387 static int
10388 find_r0_life_regions (basic_block b)
10389 {
10390 rtx end, insn;
10391 rtx pset;
10392 rtx r0_reg;
10393 int live;
10394 int set;
10395 int death = 0;
10396
10397 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
10398 {
10399 set = 1;
10400 live = 1;
10401 }
10402 else
10403 {
10404 set = 0;
10405 live = 0;
10406 }
10407
10408 insn = BB_HEAD (b);
10409 end = BB_END (b);
10410 r0_reg = gen_rtx_REG (SImode, R0_REG);
10411 while (1)
10412 {
10413 if (INSN_P (insn))
10414 {
10415 if (find_regno_note (insn, REG_DEAD, R0_REG))
10416 {
10417 death++;
10418 live = 0;
10419 }
10420 if (!live
10421 && (pset = single_set (insn))
10422 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
10423 && !find_regno_note (insn, REG_UNUSED, R0_REG))
10424 {
10425 set++;
10426 live = 1;
10427 }
10428 }
10429 if (insn == end)
10430 break;
10431 insn = NEXT_INSN (insn);
10432 }
10433 return set - death;
10434 }
10435
10436 /* Calculate regmode weights for all insns of all basic block. */
10437 static void
10438 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
10439 int verbose ATTRIBUTE_UNUSED,
10440 int old_max_uid)
10441 {
10442 basic_block b;
10443
10444 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
10445 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
10446 r0_life_regions = 0;
10447
10448 FOR_EACH_BB_REVERSE (b)
10449 {
10450 find_regmode_weight (b, SImode);
10451 find_regmode_weight (b, SFmode);
10452 if (!reload_completed)
10453 r0_life_regions += find_r0_life_regions (b);
10454 }
10455
10456 CURR_REGMODE_PRESSURE (SImode) = 0;
10457 CURR_REGMODE_PRESSURE (SFmode) = 0;
10458
10459 }
10460
10461 /* Cleanup. */
10462 static void
10463 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
10464 int verbose ATTRIBUTE_UNUSED)
10465 {
10466 if (regmode_weight[0])
10467 {
10468 free (regmode_weight[0]);
10469 regmode_weight[0] = NULL;
10470 }
10471 if (regmode_weight[1])
10472 {
10473 free (regmode_weight[1]);
10474 regmode_weight[1] = NULL;
10475 }
10476 }
10477
10478 /* The scalar modes supported differs from the default version in TImode
10479 for 32-bit SHMEDIA. */
10480 static bool
10481 sh_scalar_mode_supported_p (enum machine_mode mode)
10482 {
10483 if (TARGET_SHMEDIA32 && mode == TImode)
10484 return false;
10485
10486 return default_scalar_mode_supported_p (mode);
10487 }
10488
10489 /* Cache the can_issue_more so that we can return it from reorder2. Also,
10490 keep count of register pressures on SImode and SFmode. */
10491 static int
10492 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
10493 int sched_verbose ATTRIBUTE_UNUSED,
10494 rtx insn,
10495 int can_issue_more)
10496 {
10497 if (GET_CODE (PATTERN (insn)) != USE
10498 && GET_CODE (PATTERN (insn)) != CLOBBER)
10499 cached_can_issue_more = can_issue_more - 1;
10500 else
10501 cached_can_issue_more = can_issue_more;
10502
10503 if (reload_completed)
10504 return cached_can_issue_more;
10505
10506 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
10507 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
10508
10509 return cached_can_issue_more;
10510 }
10511
10512 static void
10513 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
10514 int verbose ATTRIBUTE_UNUSED,
10515 int veclen ATTRIBUTE_UNUSED)
10516 {
10517 CURR_REGMODE_PRESSURE (SImode) = 0;
10518 CURR_REGMODE_PRESSURE (SFmode) = 0;
10519 }
10520
10521 /* Some magic numbers. */
10522 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10523 functions that already have high pressure on r0. */
10524 #define R0_MAX_LIFE_REGIONS 2
10525 /* Register Pressure thresholds for SImode and SFmode registers. */
10526 #define SIMODE_MAX_WEIGHT 5
10527 #define SFMODE_MAX_WEIGHT 10
10528
10529 /* Return true if the pressure is high for MODE. */
10530 static short
10531 high_pressure (enum machine_mode mode)
10532 {
10533 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10534 functions that already have high pressure on r0. */
10535 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
10536 return 1;
10537
10538 if (mode == SFmode)
10539 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
10540 else
10541 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
10542 }
10543
10544 /* Reorder ready queue if register pressure is high. */
10545 static int
10546 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
10547 int sched_verbose ATTRIBUTE_UNUSED,
10548 rtx *ready,
10549 int *n_readyp,
10550 int clock_var ATTRIBUTE_UNUSED)
10551 {
10552 if (reload_completed)
10553 return sh_issue_rate ();
10554
10555 if (high_pressure (SFmode) || high_pressure (SImode))
10556 {
10557 ready_reorder (ready, *n_readyp);
10558 }
10559
10560 return sh_issue_rate ();
10561 }
10562
10563 /* Skip cycles if the current register pressure is high. */
10564 static int
10565 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
10566 int sched_verbose ATTRIBUTE_UNUSED,
10567 rtx *ready ATTRIBUTE_UNUSED,
10568 int *n_readyp ATTRIBUTE_UNUSED,
10569 int clock_var ATTRIBUTE_UNUSED)
10570 {
10571 if (reload_completed)
10572 return cached_can_issue_more;
10573
10574 if (high_pressure(SFmode) || high_pressure (SImode))
10575 skip_cycles = 1;
10576
10577 return cached_can_issue_more;
10578 }
10579
10580 /* Skip cycles without sorting the ready queue. This will move insn from
10581 Q->R. If this is the last cycle we are skipping; allow sorting of ready
10582 queue by sh_reorder. */
10583
10584 /* Generally, skipping these many cycles are sufficient for all insns to move
10585 from Q -> R. */
10586 #define MAX_SKIPS 8
10587
10588 static int
10589 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
10590 int sched_verbose ATTRIBUTE_UNUSED,
10591 rtx insn ATTRIBUTE_UNUSED,
10592 int last_clock_var,
10593 int clock_var,
10594 int *sort_p)
10595 {
10596 if (reload_completed)
10597 return 0;
10598
10599 if (skip_cycles)
10600 {
10601 if ((clock_var - last_clock_var) < MAX_SKIPS)
10602 {
10603 *sort_p = 0;
10604 return 1;
10605 }
10606 /* If this is the last cycle we are skipping, allow reordering of R. */
10607 if ((clock_var - last_clock_var) == MAX_SKIPS)
10608 {
10609 *sort_p = 1;
10610 return 1;
10611 }
10612 }
10613
10614 skip_cycles = 0;
10615
10616 return 0;
10617 }
10618
10619 /* SHmedia requires registers for branches, so we can't generate new
10620 branches past reload. */
10621 static bool
10622 sh_cannot_modify_jumps_p (void)
10623 {
10624 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
10625 }
10626
10627 static reg_class_t
10628 sh_target_reg_class (void)
10629 {
10630 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
10631 }
10632
10633 static bool
10634 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
10635 {
10636 HARD_REG_SET dummy;
10637 #if 0
10638 rtx insn;
10639 #endif
10640
10641 if (! shmedia_space_reserved_for_target_registers)
10642 return 0;
10643 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
10644 return 0;
10645 if (calc_live_regs (&dummy) >= 6 * 8)
10646 return 1;
10647 return 0;
10648 }
10649
10650 static bool
10651 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
10652 {
10653 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
10654 }
10655 \f
10656 /*
10657 On the SH1..SH4, the trampoline looks like
10658 2 0002 D202 mov.l l2,r2
10659 1 0000 D301 mov.l l1,r3
10660 3 0004 422B jmp @r2
10661 4 0006 0009 nop
10662 5 0008 00000000 l1: .long area
10663 6 000c 00000000 l2: .long function
10664
10665 SH5 (compact) uses r1 instead of r3 for the static chain. */
10666
10667
10668 /* Emit RTL insns to initialize the variable parts of a trampoline.
10669 FNADDR is an RTX for the address of the function's pure code.
10670 CXT is an RTX for the static chain value for the function. */
10671
10672 static void
10673 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
10674 {
10675 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10676 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
10677
10678 if (TARGET_SHMEDIA64)
10679 {
10680 rtx tramp_templ;
10681 int fixed_len;
10682
10683 rtx movi1 = GEN_INT (0xcc000010);
10684 rtx shori1 = GEN_INT (0xc8000010);
10685 rtx src, dst;
10686
10687 /* The following trampoline works within a +- 128 KB range for cxt:
10688 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
10689 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
10690 gettr tr1,r1; blink tr0,r63 */
10691 /* Address rounding makes it hard to compute the exact bounds of the
10692 offset for this trampoline, but we have a rather generous offset
10693 range, so frame_offset should do fine as an upper bound. */
10694 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
10695 {
10696 /* ??? could optimize this trampoline initialization
10697 by writing DImode words with two insns each. */
10698 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
10699 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
10700 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
10701 insn = gen_rtx_AND (DImode, insn, mask);
10702 /* Or in ptb/u .,tr1 pattern */
10703 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
10704 insn = force_operand (insn, NULL_RTX);
10705 insn = gen_lowpart (SImode, insn);
10706 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
10707 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
10708 insn = gen_rtx_AND (DImode, insn, mask);
10709 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
10710 insn = gen_lowpart (SImode, insn);
10711 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
10712 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
10713 insn = gen_rtx_AND (DImode, insn, mask);
10714 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10715 insn = gen_lowpart (SImode, insn);
10716 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
10717 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
10718 insn = gen_rtx_AND (DImode, insn, mask);
10719 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10720 insn = gen_lowpart (SImode, insn);
10721 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
10722 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
10723 insn = gen_rtx_AND (DImode, insn, mask);
10724 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10725 insn = gen_lowpart (SImode, insn);
10726 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
10727 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
10728 GEN_INT (0x6bf10600));
10729 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
10730 GEN_INT (0x4415fc10));
10731 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
10732 GEN_INT (0x4401fff0));
10733 emit_insn (gen_ic_invalidate_line (tramp));
10734 return;
10735 }
10736 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
10737 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
10738
10739 tramp_templ = gen_datalabel_ref (tramp_templ);
10740 dst = tramp_mem;
10741 src = gen_const_mem (BLKmode, tramp_templ);
10742 set_mem_align (dst, 256);
10743 set_mem_align (src, 64);
10744 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
10745
10746 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
10747 emit_move_insn (adjust_address (tramp_mem, Pmode,
10748 fixed_len + GET_MODE_SIZE (Pmode)),
10749 cxt);
10750 emit_insn (gen_ic_invalidate_line (tramp));
10751 return;
10752 }
10753 else if (TARGET_SHMEDIA)
10754 {
10755 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
10756 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
10757 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
10758 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
10759 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
10760 rotated 10 right, and higher 16 bit of every 32 selected. */
10761 rtx movishori
10762 = force_reg (V2HImode, (simplify_gen_subreg
10763 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
10764 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
10765 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
10766
10767 fnaddr = force_reg (SImode, fnaddr);
10768 cxt = force_reg (SImode, cxt);
10769 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
10770 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
10771 movishori));
10772 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
10773 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10774 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
10775 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
10776 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
10777 gen_rtx_SUBREG (V2HImode, cxt, 0),
10778 movishori));
10779 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
10780 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10781 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
10782 if (TARGET_LITTLE_ENDIAN)
10783 {
10784 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
10785 emit_insn (gen_mextr4 (quad2, cxtload, blink));
10786 }
10787 else
10788 {
10789 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
10790 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
10791 }
10792 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
10793 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
10794 emit_insn (gen_ic_invalidate_line (tramp));
10795 return;
10796 }
10797 else if (TARGET_SHCOMPACT)
10798 {
10799 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
10800 return;
10801 }
10802 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
10803 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
10804 SImode));
10805 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
10806 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
10807 SImode));
10808 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
10809 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
10810 if (TARGET_HARVARD)
10811 {
10812 if (!TARGET_INLINE_IC_INVALIDATE
10813 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
10814 emit_library_call (function_symbol (NULL, "__ic_invalidate",
10815 FUNCTION_ORDINARY),
10816 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
10817 else
10818 emit_insn (gen_ic_invalidate_line (tramp));
10819 }
10820 }
10821
10822 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
10823
10824 static rtx
10825 sh_trampoline_adjust_address (rtx tramp)
10826 {
10827 if (TARGET_SHMEDIA)
10828 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
10829 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
10830 return tramp;
10831 }
10832
10833 /* FIXME: This is overly conservative. A SHcompact function that
10834 receives arguments ``by reference'' will have them stored in its
10835 own stack frame, so it must not pass pointers or references to
10836 these arguments to other functions by means of sibling calls. */
10837 /* If PIC, we cannot make sibling calls to global functions
10838 because the PLT requires r12 to be live. */
10839 static bool
10840 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10841 {
10842 return (1
10843 && (! TARGET_SHCOMPACT
10844 || crtl->args.info.stack_regs == 0)
10845 && ! sh_cfun_interrupt_handler_p ()
10846 && (! flag_pic
10847 || (decl && ! TREE_PUBLIC (decl))
10848 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
10849 }
10850 \f
10851 /* Machine specific built-in functions. */
10852
10853 struct builtin_description
10854 {
10855 const enum insn_code icode;
10856 const char *const name;
10857 int signature;
10858 tree fndecl;
10859 };
10860
10861 /* describe number and signedness of arguments; arg[0] == result
10862 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
10863 /* 9: 64-bit pointer, 10: 32-bit pointer */
10864 static const char signature_args[][4] =
10865 {
10866 #define SH_BLTIN_V2SI2 0
10867 { 4, 4 },
10868 #define SH_BLTIN_V4HI2 1
10869 { 4, 4 },
10870 #define SH_BLTIN_V2SI3 2
10871 { 4, 4, 4 },
10872 #define SH_BLTIN_V4HI3 3
10873 { 4, 4, 4 },
10874 #define SH_BLTIN_V8QI3 4
10875 { 4, 4, 4 },
10876 #define SH_BLTIN_MAC_HISI 5
10877 { 1, 4, 4, 1 },
10878 #define SH_BLTIN_SH_HI 6
10879 { 4, 4, 1 },
10880 #define SH_BLTIN_SH_SI 7
10881 { 4, 4, 1 },
10882 #define SH_BLTIN_V4HI2V2SI 8
10883 { 4, 4, 4 },
10884 #define SH_BLTIN_V4HI2V8QI 9
10885 { 4, 4, 4 },
10886 #define SH_BLTIN_SISF 10
10887 { 4, 2 },
10888 #define SH_BLTIN_LDUA_L 11
10889 { 2, 10 },
10890 #define SH_BLTIN_LDUA_Q 12
10891 { 1, 10 },
10892 #define SH_BLTIN_STUA_L 13
10893 { 0, 10, 2 },
10894 #define SH_BLTIN_STUA_Q 14
10895 { 0, 10, 1 },
10896 #define SH_BLTIN_LDUA_L64 15
10897 { 2, 9 },
10898 #define SH_BLTIN_LDUA_Q64 16
10899 { 1, 9 },
10900 #define SH_BLTIN_STUA_L64 17
10901 { 0, 9, 2 },
10902 #define SH_BLTIN_STUA_Q64 18
10903 { 0, 9, 1 },
10904 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
10905 #define SH_BLTIN_2 19
10906 #define SH_BLTIN_SU 19
10907 { 1, 2 },
10908 #define SH_BLTIN_3 20
10909 #define SH_BLTIN_SUS 20
10910 { 2, 2, 1 },
10911 #define SH_BLTIN_PSSV 21
10912 { 0, 8, 2, 2 },
10913 #define SH_BLTIN_XXUU 22
10914 #define SH_BLTIN_UUUU 22
10915 { 1, 1, 1, 1 },
10916 #define SH_BLTIN_PV 23
10917 { 0, 8 },
10918 };
10919 /* mcmv: operands considered unsigned. */
10920 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
10921 /* mperm: control value considered unsigned int. */
10922 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
10923 /* mshards_q: returns signed short. */
10924 /* nsb: takes long long arg, returns unsigned char. */
10925 static struct builtin_description bdesc[] =
10926 {
10927 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
10928 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
10929 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
10930 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
10931 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
10932 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
10933 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
10934 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
10935 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
10936 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
10937 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
10938 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
10939 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
10940 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
10941 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
10942 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
10943 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
10944 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
10945 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
10946 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
10947 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
10948 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
10949 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
10950 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
10951 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
10952 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
10953 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
10954 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
10955 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
10956 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
10957 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
10958 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
10959 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
10960 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
10961 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
10962 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
10963 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
10964 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
10965 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
10966 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
10967 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
10968 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
10969 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
10970 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
10971 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
10972 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
10973 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
10974 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
10975 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
10976 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
10977 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
10978 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
10979 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
10980 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
10981 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
10982 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
10983 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
10984 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
10985 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
10986 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
10987 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
10988 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3, 0 },
10989 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
10990 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
10991 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
10992 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
10993 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
10994 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
10995 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
10996 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
10997 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
10998 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
10999 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
11000 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
11001 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
11002 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
11003 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
11004 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
11005 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
11006 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
11007 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
11008 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
11009 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
11010 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
11011 };
11012
11013 static void
11014 sh_media_init_builtins (void)
11015 {
11016 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
11017 struct builtin_description *d;
11018
11019 memset (shared, 0, sizeof shared);
11020 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
11021 {
11022 tree type, arg_type = 0;
11023 int signature = d->signature;
11024 int i;
11025
11026 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
11027 type = shared[signature];
11028 else
11029 {
11030 int has_result = signature_args[signature][0] != 0;
11031
11032 if ((signature_args[signature][1] & 8)
11033 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
11034 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
11035 continue;
11036 if (! TARGET_FPU_ANY
11037 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
11038 continue;
11039 type = void_list_node;
11040 for (i = 3; ; i--)
11041 {
11042 int arg = signature_args[signature][i];
11043 int opno = i - 1 + has_result;
11044
11045 if (arg & 8)
11046 arg_type = ptr_type_node;
11047 else if (arg)
11048 arg_type = (*lang_hooks.types.type_for_mode)
11049 (insn_data[d->icode].operand[opno].mode,
11050 (arg & 1));
11051 else if (i)
11052 continue;
11053 else
11054 arg_type = void_type_node;
11055 if (i == 0)
11056 break;
11057 type = tree_cons (NULL_TREE, arg_type, type);
11058 }
11059 type = build_function_type (arg_type, type);
11060 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
11061 shared[signature] = type;
11062 }
11063 d->fndecl =
11064 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
11065 NULL, NULL_TREE);
11066 }
11067 }
11068
11069 /* Returns the shmedia builtin decl for CODE. */
11070
11071 static tree
11072 sh_media_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11073 {
11074 if (code >= ARRAY_SIZE (bdesc))
11075 return error_mark_node;
11076
11077 return bdesc[code].fndecl;
11078 }
11079
11080 /* Implements target hook vector_mode_supported_p. */
11081 bool
11082 sh_vector_mode_supported_p (enum machine_mode mode)
11083 {
11084 if (TARGET_FPU_ANY
11085 && ((mode == V2SFmode)
11086 || (mode == V4SFmode)
11087 || (mode == V16SFmode)))
11088 return true;
11089
11090 else if (TARGET_SHMEDIA
11091 && ((mode == V8QImode)
11092 || (mode == V2HImode)
11093 || (mode == V4HImode)
11094 || (mode == V2SImode)))
11095 return true;
11096
11097 return false;
11098 }
11099
11100 bool
11101 sh_frame_pointer_required (void)
11102 {
11103 /* If needed override this in other tm.h files to cope with various OS
11104 lossage requiring a frame pointer. */
11105 if (SUBTARGET_FRAME_POINTER_REQUIRED)
11106 return true;
11107
11108 if (crtl->profile)
11109 return true;
11110
11111 return false;
11112 }
11113
11114 /* Implements target hook dwarf_calling_convention. Return an enum
11115 of dwarf_calling_convention. */
11116 int
11117 sh_dwarf_calling_convention (const_tree func)
11118 {
11119 if (sh_attr_renesas_p (func))
11120 return DW_CC_GNU_renesas_sh;
11121
11122 return DW_CC_normal;
11123 }
11124
11125 static void
11126 sh_init_builtins (void)
11127 {
11128 if (TARGET_SHMEDIA)
11129 sh_media_init_builtins ();
11130 }
11131
11132 /* Returns the sh builtin decl for CODE. */
11133
11134 static tree
11135 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11136 {
11137 if (TARGET_SHMEDIA)
11138 return sh_media_builtin_decl (code, initialize_p);
11139
11140 return error_mark_node;
11141 }
11142
11143 /* Expand an expression EXP that calls a built-in function,
11144 with result going to TARGET if that's convenient
11145 (and in mode MODE if that's convenient).
11146 SUBTARGET may be used as the target for computing one of EXP's operands.
11147 IGNORE is nonzero if the value is to be ignored. */
11148
11149 static rtx
11150 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
11151 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
11152 {
11153 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11154 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
11155 const struct builtin_description *d = &bdesc[fcode];
11156 enum insn_code icode = d->icode;
11157 int signature = d->signature;
11158 enum machine_mode tmode = VOIDmode;
11159 int nop = 0, i;
11160 rtx op[4];
11161 rtx pat = 0;
11162
11163 if (signature_args[signature][0])
11164 {
11165 if (ignore)
11166 return 0;
11167
11168 tmode = insn_data[icode].operand[0].mode;
11169 if (! target
11170 || GET_MODE (target) != tmode
11171 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11172 target = gen_reg_rtx (tmode);
11173 op[nop++] = target;
11174 }
11175 else
11176 target = 0;
11177
11178 for (i = 1; i <= 3; i++, nop++)
11179 {
11180 tree arg;
11181 enum machine_mode opmode, argmode;
11182 tree optype;
11183
11184 if (! signature_args[signature][i])
11185 break;
11186 arg = CALL_EXPR_ARG (exp, i - 1);
11187 if (arg == error_mark_node)
11188 return const0_rtx;
11189 if (signature_args[signature][i] & 8)
11190 {
11191 opmode = ptr_mode;
11192 optype = ptr_type_node;
11193 }
11194 else
11195 {
11196 opmode = insn_data[icode].operand[nop].mode;
11197 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
11198 }
11199 argmode = TYPE_MODE (TREE_TYPE (arg));
11200 if (argmode != opmode)
11201 arg = build1 (NOP_EXPR, optype, arg);
11202 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
11203 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
11204 op[nop] = copy_to_mode_reg (opmode, op[nop]);
11205 }
11206
11207 switch (nop)
11208 {
11209 case 1:
11210 pat = (*insn_data[d->icode].genfun) (op[0]);
11211 break;
11212 case 2:
11213 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
11214 break;
11215 case 3:
11216 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
11217 break;
11218 case 4:
11219 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
11220 break;
11221 default:
11222 gcc_unreachable ();
11223 }
11224 if (! pat)
11225 return 0;
11226 emit_insn (pat);
11227 return target;
11228 }
11229
11230 void
11231 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
11232 {
11233 rtx sel0 = const0_rtx;
11234 rtx sel1 = const1_rtx;
11235 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
11236 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
11237
11238 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
11239 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
11240 }
11241
11242 void
11243 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
11244 {
11245 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
11246
11247 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
11248 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
11249 }
11250
11251 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
11252 We can allow any mode in any general register. The special registers
11253 only allow SImode. Don't allow any mode in the PR.
11254
11255 We cannot hold DCmode values in the XD registers because alter_reg
11256 handles subregs of them incorrectly. We could work around this by
11257 spacing the XD registers like the DR registers, but this would require
11258 additional memory in every compilation to hold larger register vectors.
11259 We could hold SFmode / SCmode values in XD registers, but that
11260 would require a tertiary reload when reloading from / to memory,
11261 and a secondary reload to reload from / to general regs; that
11262 seems to be a loosing proposition.
11263
11264 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
11265 it won't be ferried through GP registers first. */
11266
11267 bool
11268 sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
11269 {
11270 if (SPECIAL_REGISTER_P (regno))
11271 return mode == SImode;
11272
11273 if (regno == FPUL_REG)
11274 return (mode == SImode || mode == SFmode);
11275
11276 if (FP_REGISTER_P (regno) && mode == SFmode)
11277 return true;
11278
11279 if (mode == V2SFmode)
11280 {
11281 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
11282 || GENERAL_REGISTER_P (regno)))
11283 return true;
11284 else
11285 return false;
11286 }
11287
11288 if (mode == V4SFmode)
11289 {
11290 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
11291 || GENERAL_REGISTER_P (regno))
11292 return true;
11293 else
11294 return false;
11295 }
11296
11297 if (mode == V16SFmode)
11298 {
11299 if (TARGET_SHMEDIA)
11300 {
11301 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
11302 return true;
11303 else
11304 return false;
11305 }
11306 else
11307 return regno == FIRST_XD_REG;
11308 }
11309
11310 if (FP_REGISTER_P (regno))
11311 {
11312 if (mode == SFmode
11313 || mode == SImode
11314 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
11315 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
11316 || mode == DCmode
11317 || (TARGET_SHMEDIA
11318 && (mode == DFmode || mode == DImode
11319 || mode == V2SFmode || mode == TImode)))
11320 && ((regno - FIRST_FP_REG) & 1) == 0)
11321 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
11322 && ((regno - FIRST_FP_REG) & 3) == 0))
11323 return true;
11324 else
11325 return false;
11326 }
11327
11328 if (XD_REGISTER_P (regno))
11329 return mode == DFmode;
11330
11331 if (TARGET_REGISTER_P (regno))
11332 return (mode == DImode || mode == SImode || mode == PDImode);
11333
11334 if (regno == PR_REG)
11335 return mode == SImode;
11336
11337 if (regno == FPSCR_REG)
11338 return mode == PSImode;
11339
11340 /* FIXME. This works around PR target/37633 for -O0. */
11341 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
11342 {
11343 unsigned int n = GET_MODE_SIZE (mode) / 8;
11344
11345 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
11346 && regno <= FIRST_GENERAL_REG + 14)
11347 return false;
11348 }
11349
11350 return true;
11351 }
11352
11353 /* Return the class of registers for which a mode change from FROM to TO
11354 is invalid. */
11355 bool
11356 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
11357 enum reg_class rclass)
11358 {
11359 /* We want to enable the use of SUBREGs as a means to
11360 VEC_SELECT a single element of a vector. */
11361 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
11362 return (reg_classes_intersect_p (GENERAL_REGS, rclass));
11363
11364 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
11365 {
11366 if (TARGET_LITTLE_ENDIAN)
11367 {
11368 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
11369 return reg_classes_intersect_p (DF_REGS, rclass);
11370 }
11371 else
11372 {
11373 if (GET_MODE_SIZE (from) < 8)
11374 return reg_classes_intersect_p (DF_HI_REGS, rclass);
11375 }
11376 }
11377 return 0;
11378 }
11379
11380 /* Return true if registers in machine mode MODE will likely be
11381 allocated to registers in small register classes. */
11382
11383 bool
11384 sh_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
11385 {
11386 return (! TARGET_SHMEDIA);
11387 }
11388
11389 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
11390 that label is used. */
11391
11392 void
11393 sh_mark_label (rtx address, int nuses)
11394 {
11395 if (GOTOFF_P (address))
11396 {
11397 /* Extract the label or symbol. */
11398 address = XEXP (address, 0);
11399 if (GET_CODE (address) == PLUS)
11400 address = XEXP (address, 0);
11401 address = XVECEXP (address, 0, 0);
11402 }
11403 if (GET_CODE (address) == LABEL_REF
11404 && LABEL_P (XEXP (address, 0)))
11405 LABEL_NUSES (XEXP (address, 0)) += nuses;
11406 }
11407
11408 /* Compute extra cost of moving data between one register class
11409 and another. */
11410
11411 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
11412 uses this information. Hence, the general register <-> floating point
11413 register information here is not used for SFmode. */
11414
11415 static int
11416 sh_register_move_cost (enum machine_mode mode,
11417 reg_class_t srcclass, reg_class_t dstclass)
11418 {
11419 if (dstclass == T_REGS || dstclass == PR_REGS)
11420 return 10;
11421
11422 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
11423 return 4;
11424
11425 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
11426 && REGCLASS_HAS_FP_REG (srcclass)
11427 && REGCLASS_HAS_FP_REG (dstclass))
11428 return 4;
11429
11430 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
11431 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
11432
11433 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
11434 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
11435 return 9;
11436
11437 if ((REGCLASS_HAS_FP_REG (dstclass)
11438 && REGCLASS_HAS_GENERAL_REG (srcclass))
11439 || (REGCLASS_HAS_GENERAL_REG (dstclass)
11440 && REGCLASS_HAS_FP_REG (srcclass)))
11441 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
11442 * ((GET_MODE_SIZE (mode) + 7) / 8U));
11443
11444 if ((dstclass == FPUL_REGS
11445 && REGCLASS_HAS_GENERAL_REG (srcclass))
11446 || (srcclass == FPUL_REGS
11447 && REGCLASS_HAS_GENERAL_REG (dstclass)))
11448 return 5;
11449
11450 if ((dstclass == FPUL_REGS
11451 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
11452 || (srcclass == FPUL_REGS
11453 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
11454 return 7;
11455
11456 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11457 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11458 return 20;
11459
11460 /* ??? ptabs faults on (value & 0x3) == 0x3 */
11461 if (TARGET_SHMEDIA
11462 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
11463 {
11464 if (sh_gettrcost >= 0)
11465 return sh_gettrcost;
11466 else if (!TARGET_PT_FIXED)
11467 return 100;
11468 }
11469
11470 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11471 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11472 return 4;
11473
11474 if (TARGET_SHMEDIA
11475 || (TARGET_FMOVD
11476 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
11477 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
11478 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
11479
11480 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
11481 }
11482
11483 static rtx emit_load_ptr (rtx, rtx);
11484
11485 static rtx
11486 emit_load_ptr (rtx reg, rtx addr)
11487 {
11488 rtx mem = gen_const_mem (ptr_mode, addr);
11489
11490 if (Pmode != ptr_mode)
11491 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
11492 return emit_move_insn (reg, mem);
11493 }
11494
11495 static void
11496 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11497 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11498 tree function)
11499 {
11500 CUMULATIVE_ARGS cum;
11501 int structure_value_byref = 0;
11502 rtx this_rtx, this_value, sibcall, insns, funexp;
11503 tree funtype = TREE_TYPE (function);
11504 int simple_add = CONST_OK_FOR_ADD (delta);
11505 int did_load = 0;
11506 rtx scratch0, scratch1, scratch2;
11507 unsigned i;
11508
11509 reload_completed = 1;
11510 epilogue_completed = 1;
11511 current_function_uses_only_leaf_regs = 1;
11512
11513 emit_note (NOTE_INSN_PROLOGUE_END);
11514
11515 /* Find the "this" pointer. We have such a wide range of ABIs for the
11516 SH that it's best to do this completely machine independently.
11517 "this" is passed as first argument, unless a structure return pointer
11518 comes first, in which case "this" comes second. */
11519 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
11520 #ifndef PCC_STATIC_STRUCT_RETURN
11521 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11522 structure_value_byref = 1;
11523 #endif /* not PCC_STATIC_STRUCT_RETURN */
11524 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
11525 {
11526 tree ptype = build_pointer_type (TREE_TYPE (funtype));
11527
11528 sh_function_arg_advance (&cum, Pmode, ptype, true);
11529 }
11530 this_rtx = sh_function_arg (&cum, Pmode, ptr_type_node, true);
11531
11532 /* For SHcompact, we only have r0 for a scratch register: r1 is the
11533 static chain pointer (even if you can't have nested virtual functions
11534 right now, someone might implement them sometime), and the rest of the
11535 registers are used for argument passing, are callee-saved, or reserved. */
11536 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
11537 -ffixed-reg has been used. */
11538 if (! call_used_regs[0] || fixed_regs[0])
11539 error ("r0 needs to be available as a call-clobbered register");
11540 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
11541 if (! TARGET_SH5)
11542 {
11543 if (call_used_regs[1] && ! fixed_regs[1])
11544 scratch1 = gen_rtx_REG (ptr_mode, 1);
11545 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
11546 pointing where to return struct values. */
11547 if (call_used_regs[3] && ! fixed_regs[3])
11548 scratch2 = gen_rtx_REG (Pmode, 3);
11549 }
11550 else if (TARGET_SHMEDIA)
11551 {
11552 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
11553 if (i != REGNO (scratch0) &&
11554 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
11555 {
11556 scratch1 = gen_rtx_REG (ptr_mode, i);
11557 break;
11558 }
11559 if (scratch1 == scratch0)
11560 error ("Need a second call-clobbered general purpose register");
11561 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
11562 if (call_used_regs[i] && ! fixed_regs[i])
11563 {
11564 scratch2 = gen_rtx_REG (Pmode, i);
11565 break;
11566 }
11567 if (scratch2 == scratch0)
11568 error ("Need a call-clobbered target register");
11569 }
11570
11571 this_value = plus_constant (this_rtx, delta);
11572 if (vcall_offset
11573 && (simple_add || scratch0 != scratch1)
11574 && strict_memory_address_p (ptr_mode, this_value))
11575 {
11576 emit_load_ptr (scratch0, this_value);
11577 did_load = 1;
11578 }
11579
11580 if (!delta)
11581 ; /* Do nothing. */
11582 else if (simple_add)
11583 emit_move_insn (this_rtx, this_value);
11584 else
11585 {
11586 emit_move_insn (scratch1, GEN_INT (delta));
11587 emit_insn (gen_add2_insn (this_rtx, scratch1));
11588 }
11589
11590 if (vcall_offset)
11591 {
11592 rtx offset_addr;
11593
11594 if (!did_load)
11595 emit_load_ptr (scratch0, this_rtx);
11596
11597 offset_addr = plus_constant (scratch0, vcall_offset);
11598 if (strict_memory_address_p (ptr_mode, offset_addr))
11599 ; /* Do nothing. */
11600 else if (! TARGET_SH5 && scratch0 != scratch1)
11601 {
11602 /* scratch0 != scratch1, and we have indexed loads. Get better
11603 schedule by loading the offset into r1 and using an indexed
11604 load - then the load of r1 can issue before the load from
11605 (this_rtx + delta) finishes. */
11606 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11607 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
11608 }
11609 else if (CONST_OK_FOR_ADD (vcall_offset))
11610 {
11611 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
11612 offset_addr = scratch0;
11613 }
11614 else if (scratch0 != scratch1)
11615 {
11616 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11617 emit_insn (gen_add2_insn (scratch0, scratch1));
11618 offset_addr = scratch0;
11619 }
11620 else
11621 gcc_unreachable (); /* FIXME */
11622 emit_load_ptr (scratch0, offset_addr);
11623
11624 if (Pmode != ptr_mode)
11625 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
11626 emit_insn (gen_add2_insn (this_rtx, scratch0));
11627 }
11628
11629 /* Generate a tail call to the target function. */
11630 if (! TREE_USED (function))
11631 {
11632 assemble_external (function);
11633 TREE_USED (function) = 1;
11634 }
11635 funexp = XEXP (DECL_RTL (function), 0);
11636 /* If the function is overridden, so is the thunk, hence we don't
11637 need GOT addressing even if this is a public symbol. */
11638 #if 0
11639 if (TARGET_SH1 && ! flag_weak)
11640 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
11641 else
11642 #endif
11643 if (TARGET_SH2 && flag_pic)
11644 {
11645 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
11646 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
11647 }
11648 else
11649 {
11650 if (TARGET_SHMEDIA && flag_pic)
11651 {
11652 funexp = gen_sym2PIC (funexp);
11653 PUT_MODE (funexp, Pmode);
11654 }
11655 emit_move_insn (scratch2, funexp);
11656 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
11657 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
11658 }
11659 sibcall = emit_call_insn (sibcall);
11660 SIBLING_CALL_P (sibcall) = 1;
11661 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
11662 emit_barrier ();
11663
11664 /* Run just enough of rest_of_compilation to do scheduling and get
11665 the insns emitted. Note that use_thunk calls
11666 assemble_start_function and assemble_end_function. */
11667
11668 insn_locators_alloc ();
11669 insns = get_insns ();
11670
11671 if (optimize > 0)
11672 {
11673 if (! cfun->cfg)
11674 init_flow (cfun);
11675 split_all_insns_noflow ();
11676 }
11677
11678 sh_reorg ();
11679
11680 if (optimize > 0 && flag_delayed_branch)
11681 dbr_schedule (insns);
11682
11683 shorten_branches (insns);
11684 final_start_function (insns, file, 1);
11685 final (insns, file, 1);
11686 final_end_function ();
11687
11688 reload_completed = 0;
11689 epilogue_completed = 0;
11690 }
11691
11692 rtx
11693 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
11694 {
11695 rtx sym;
11696
11697 /* If this is not an ordinary function, the name usually comes from a
11698 string literal or an sprintf buffer. Make sure we use the same
11699 string consistently, so that cse will be able to unify address loads. */
11700 if (kind != FUNCTION_ORDINARY)
11701 name = IDENTIFIER_POINTER (get_identifier (name));
11702 sym = gen_rtx_SYMBOL_REF (Pmode, name);
11703 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
11704 if (flag_pic)
11705 switch (kind)
11706 {
11707 case FUNCTION_ORDINARY:
11708 break;
11709 case SFUNC_GOT:
11710 {
11711 rtx reg = target ? target : gen_reg_rtx (Pmode);
11712
11713 emit_insn (gen_symGOT2reg (reg, sym));
11714 sym = reg;
11715 break;
11716 }
11717 case SFUNC_STATIC:
11718 {
11719 /* ??? To allow cse to work, we use GOTOFF relocations.
11720 we could add combiner patterns to transform this into
11721 straight pc-relative calls with sym2PIC / bsrf when
11722 label load and function call are still 1:1 and in the
11723 same basic block during combine. */
11724 rtx reg = target ? target : gen_reg_rtx (Pmode);
11725
11726 emit_insn (gen_symGOTOFF2reg (reg, sym));
11727 sym = reg;
11728 break;
11729 }
11730 }
11731 if (target && sym != target)
11732 {
11733 emit_move_insn (target, sym);
11734 return target;
11735 }
11736 return sym;
11737 }
11738
11739 /* Find the number of a general purpose register in S. */
11740 static int
11741 scavenge_reg (HARD_REG_SET *s)
11742 {
11743 int r;
11744 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
11745 if (TEST_HARD_REG_BIT (*s, r))
11746 return r;
11747 return -1;
11748 }
11749
11750 rtx
11751 sh_get_pr_initial_val (void)
11752 {
11753 rtx val;
11754
11755 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
11756 PR register on SHcompact, because it might be clobbered by the prologue.
11757 We check first if that is known to be the case. */
11758 if (TARGET_SHCOMPACT
11759 && ((crtl->args.info.call_cookie
11760 & ~ CALL_COOKIE_RET_TRAMP (1))
11761 || crtl->saves_all_registers))
11762 return gen_frame_mem (SImode, return_address_pointer_rtx);
11763
11764 /* If we haven't finished rtl generation, there might be a nonlocal label
11765 that we haven't seen yet.
11766 ??? get_hard_reg_initial_val fails if it is called after register
11767 allocation has started, unless it has been called before for the
11768 same register. And even then, we end in trouble if we didn't use
11769 the register in the same basic block before. So call
11770 get_hard_reg_initial_val now and wrap it in an unspec if we might
11771 need to replace it. */
11772 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
11773 combine can put the pseudo returned by get_hard_reg_initial_val into
11774 instructions that need a general purpose registers, which will fail to
11775 be recognized when the pseudo becomes allocated to PR. */
11776 val
11777 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
11778 if (TARGET_SH1)
11779 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
11780 return val;
11781 }
11782
11783 int
11784 sh_expand_t_scc (rtx operands[])
11785 {
11786 enum rtx_code code = GET_CODE (operands[1]);
11787 rtx target = operands[0];
11788 rtx op0 = operands[2];
11789 rtx op1 = operands[3];
11790 rtx result = target;
11791 HOST_WIDE_INT val;
11792
11793 if (!REG_P (op0) || REGNO (op0) != T_REG
11794 || !CONST_INT_P (op1))
11795 return 0;
11796 if (!REG_P (result))
11797 result = gen_reg_rtx (SImode);
11798 val = INTVAL (op1);
11799 if ((code == EQ && val == 1) || (code == NE && val == 0))
11800 emit_insn (gen_movt (result));
11801 else if (TARGET_SH2A && ((code == EQ && val == 0)
11802 || (code == NE && val == 1)))
11803 emit_insn (gen_xorsi3_movrt (result));
11804 else if ((code == EQ && val == 0) || (code == NE && val == 1))
11805 {
11806 emit_clobber (result);
11807 emit_insn (gen_subc (result, result, result));
11808 emit_insn (gen_addsi3 (result, result, const1_rtx));
11809 }
11810 else if (code == EQ || code == NE)
11811 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
11812 else
11813 return 0;
11814 if (result != target)
11815 emit_move_insn (target, result);
11816 return 1;
11817 }
11818
11819 /* INSN is an sfunc; return the rtx that describes the address used. */
11820 static rtx
11821 extract_sfunc_addr (rtx insn)
11822 {
11823 rtx pattern, part = NULL_RTX;
11824 int len, i;
11825
11826 pattern = PATTERN (insn);
11827 len = XVECLEN (pattern, 0);
11828 for (i = 0; i < len; i++)
11829 {
11830 part = XVECEXP (pattern, 0, i);
11831 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
11832 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
11833 return XEXP (part, 0);
11834 }
11835 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
11836 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
11837 }
11838
11839 /* Verify that the register in use_sfunc_addr still agrees with the address
11840 used in the sfunc. This prevents fill_slots_from_thread from changing
11841 use_sfunc_addr.
11842 INSN is the use_sfunc_addr instruction, and REG is the register it
11843 guards. */
11844 int
11845 check_use_sfunc_addr (rtx insn, rtx reg)
11846 {
11847 /* Search for the sfunc. It should really come right after INSN. */
11848 while ((insn = NEXT_INSN (insn)))
11849 {
11850 if (LABEL_P (insn) || JUMP_P (insn))
11851 break;
11852 if (! INSN_P (insn))
11853 continue;
11854
11855 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
11856 insn = XVECEXP (PATTERN (insn), 0, 0);
11857 if (GET_CODE (PATTERN (insn)) != PARALLEL
11858 || get_attr_type (insn) != TYPE_SFUNC)
11859 continue;
11860 return rtx_equal_p (extract_sfunc_addr (insn), reg);
11861 }
11862 gcc_unreachable ();
11863 }
11864
11865 /* This function returns a constant rtx that represents pi / 2**15 in
11866 SFmode. it's used to scale SFmode angles, in radians, to a
11867 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11868 maps to 0x10000). */
11869
11870 static GTY(()) rtx sh_fsca_sf2int_rtx;
11871
11872 rtx
11873 sh_fsca_sf2int (void)
11874 {
11875 if (! sh_fsca_sf2int_rtx)
11876 {
11877 REAL_VALUE_TYPE rv;
11878
11879 real_from_string (&rv, "10430.378350470453");
11880 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
11881 }
11882
11883 return sh_fsca_sf2int_rtx;
11884 }
11885
11886 /* This function returns a constant rtx that represents pi / 2**15 in
11887 DFmode. it's used to scale DFmode angles, in radians, to a
11888 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11889 maps to 0x10000). */
11890
11891 static GTY(()) rtx sh_fsca_df2int_rtx;
11892
11893 rtx
11894 sh_fsca_df2int (void)
11895 {
11896 if (! sh_fsca_df2int_rtx)
11897 {
11898 REAL_VALUE_TYPE rv;
11899
11900 real_from_string (&rv, "10430.378350470453");
11901 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
11902 }
11903
11904 return sh_fsca_df2int_rtx;
11905 }
11906
11907 /* This function returns a constant rtx that represents 2**15 / pi in
11908 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
11909 of a full circle back to a SFmode value, i.e., 0x10000 maps to
11910 2*pi). */
11911
11912 static GTY(()) rtx sh_fsca_int2sf_rtx;
11913
11914 rtx
11915 sh_fsca_int2sf (void)
11916 {
11917 if (! sh_fsca_int2sf_rtx)
11918 {
11919 REAL_VALUE_TYPE rv;
11920
11921 real_from_string (&rv, "9.587379924285257e-5");
11922 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
11923 }
11924
11925 return sh_fsca_int2sf_rtx;
11926 }
11927
11928 /* Initialize the CUMULATIVE_ARGS structure. */
11929
11930 void
11931 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
11932 tree fntype,
11933 rtx libname ATTRIBUTE_UNUSED,
11934 tree fndecl,
11935 signed int n_named_args,
11936 enum machine_mode mode)
11937 {
11938 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
11939 pcum->free_single_fp_reg = 0;
11940 pcum->stack_regs = 0;
11941 pcum->byref_regs = 0;
11942 pcum->byref = 0;
11943 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
11944
11945 /* XXX - Should we check TARGET_HITACHI here ??? */
11946 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
11947
11948 if (fntype)
11949 {
11950 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
11951 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
11952 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
11953 pcum->arg_count [(int) SH_ARG_INT]
11954 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
11955
11956 pcum->call_cookie
11957 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11958 && pcum->arg_count [(int) SH_ARG_INT] == 0
11959 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
11960 ? int_size_in_bytes (TREE_TYPE (fntype))
11961 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
11962 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
11963 == FIRST_RET_REG));
11964 }
11965 else
11966 {
11967 pcum->arg_count [(int) SH_ARG_INT] = 0;
11968 pcum->prototype_p = FALSE;
11969 if (mode != VOIDmode)
11970 {
11971 pcum->call_cookie =
11972 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11973 && GET_MODE_SIZE (mode) > 4
11974 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
11975
11976 /* If the default ABI is the Renesas ABI then all library
11977 calls must assume that the library will be using the
11978 Renesas ABI. So if the function would return its result
11979 in memory then we must force the address of this memory
11980 block onto the stack. Ideally we would like to call
11981 targetm.calls.return_in_memory() here but we do not have
11982 the TYPE or the FNDECL available so we synthesize the
11983 contents of that function as best we can. */
11984 pcum->force_mem =
11985 (TARGET_DEFAULT & MASK_HITACHI)
11986 && (mode == BLKmode
11987 || (GET_MODE_SIZE (mode) > 4
11988 && !(mode == DFmode
11989 && TARGET_FPU_DOUBLE)));
11990 }
11991 else
11992 {
11993 pcum->call_cookie = 0;
11994 pcum->force_mem = FALSE;
11995 }
11996 }
11997 }
11998
11999 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
12000 not enter into CONST_DOUBLE for the replace.
12001
12002 Note that copying is not done so X must not be shared unless all copies
12003 are to be modified.
12004
12005 This is like replace_rtx, except that we operate on N_REPLACEMENTS
12006 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
12007 replacements[n*2+1] - and that we take mode changes into account.
12008
12009 If a replacement is ambiguous, return NULL_RTX.
12010
12011 If MODIFY is zero, don't modify any rtl in place,
12012 just return zero or nonzero for failure / success. */
12013
12014 rtx
12015 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
12016 {
12017 int i, j;
12018 const char *fmt;
12019
12020 /* The following prevents loops occurrence when we change MEM in
12021 CONST_DOUBLE onto the same CONST_DOUBLE. */
12022 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
12023 return x;
12024
12025 for (i = n_replacements - 1; i >= 0 ; i--)
12026 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
12027 return replacements[i*2+1];
12028
12029 /* Allow this function to make replacements in EXPR_LISTs. */
12030 if (x == 0)
12031 return 0;
12032
12033 if (GET_CODE (x) == SUBREG)
12034 {
12035 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
12036 n_replacements, modify);
12037
12038 if (CONST_INT_P (new_rtx))
12039 {
12040 x = simplify_subreg (GET_MODE (x), new_rtx,
12041 GET_MODE (SUBREG_REG (x)),
12042 SUBREG_BYTE (x));
12043 if (! x)
12044 abort ();
12045 }
12046 else if (modify)
12047 SUBREG_REG (x) = new_rtx;
12048
12049 return x;
12050 }
12051 else if (REG_P (x))
12052 {
12053 unsigned regno = REGNO (x);
12054 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
12055 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
12056 rtx result = NULL_RTX;
12057
12058 for (i = n_replacements - 1; i >= 0; i--)
12059 {
12060 rtx from = replacements[i*2];
12061 rtx to = replacements[i*2+1];
12062 unsigned from_regno, from_nregs, to_regno, new_regno;
12063
12064 if (!REG_P (from))
12065 continue;
12066 from_regno = REGNO (from);
12067 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
12068 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
12069 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
12070 {
12071 if (regno < from_regno
12072 || regno + nregs > from_regno + nregs
12073 || !REG_P (to)
12074 || result)
12075 return NULL_RTX;
12076 to_regno = REGNO (to);
12077 if (to_regno < FIRST_PSEUDO_REGISTER)
12078 {
12079 new_regno = regno + to_regno - from_regno;
12080 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
12081 != nregs)
12082 return NULL_RTX;
12083 result = gen_rtx_REG (GET_MODE (x), new_regno);
12084 }
12085 else if (GET_MODE (x) <= GET_MODE (to))
12086 result = gen_lowpart_common (GET_MODE (x), to);
12087 else
12088 result = gen_lowpart_SUBREG (GET_MODE (x), to);
12089 }
12090 }
12091 return result ? result : x;
12092 }
12093 else if (GET_CODE (x) == ZERO_EXTEND)
12094 {
12095 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
12096 n_replacements, modify);
12097
12098 if (CONST_INT_P (new_rtx))
12099 {
12100 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
12101 new_rtx, GET_MODE (XEXP (x, 0)));
12102 if (! x)
12103 abort ();
12104 }
12105 else if (modify)
12106 XEXP (x, 0) = new_rtx;
12107
12108 return x;
12109 }
12110
12111 fmt = GET_RTX_FORMAT (GET_CODE (x));
12112 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12113 {
12114 rtx new_rtx;
12115
12116 if (fmt[i] == 'e')
12117 {
12118 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
12119 n_replacements, modify);
12120 if (!new_rtx)
12121 return NULL_RTX;
12122 if (modify)
12123 XEXP (x, i) = new_rtx;
12124 }
12125 else if (fmt[i] == 'E')
12126 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12127 {
12128 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
12129 n_replacements, modify);
12130 if (!new_rtx)
12131 return NULL_RTX;
12132 if (modify)
12133 XVECEXP (x, i, j) = new_rtx;
12134 }
12135 }
12136
12137 return x;
12138 }
12139
12140 rtx
12141 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
12142 {
12143 enum rtx_code code = TRUNCATE;
12144
12145 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
12146 {
12147 rtx inner = XEXP (x, 0);
12148 enum machine_mode inner_mode = GET_MODE (inner);
12149
12150 if (inner_mode == mode)
12151 return inner;
12152 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
12153 x = inner;
12154 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
12155 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
12156 {
12157 code = GET_CODE (x);
12158 x = inner;
12159 }
12160 }
12161 return gen_rtx_fmt_e (code, mode, x);
12162 }
12163
12164 /* called via for_each_rtx after reload, to clean up truncates of
12165 registers that span multiple actual hard registers. */
12166 int
12167 shmedia_cleanup_truncate (rtx *p, void *n_changes)
12168 {
12169 rtx x = *p, reg;
12170
12171 if (GET_CODE (x) != TRUNCATE)
12172 return 0;
12173 reg = XEXP (x, 0);
12174 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && REG_P (reg))
12175 {
12176 enum machine_mode reg_mode = GET_MODE (reg);
12177 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
12178 subreg_lowpart_offset (DImode, reg_mode));
12179 *(int*) n_changes += 1;
12180 return -1;
12181 }
12182 return 0;
12183 }
12184
12185 /* Load and store depend on the highpart of the address. However,
12186 set_attr_alternative does not give well-defined results before reload,
12187 so we must look at the rtl ourselves to see if any of the feeding
12188 registers is used in a memref. */
12189
12190 /* Called by sh_contains_memref_p via for_each_rtx. */
12191 static int
12192 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
12193 {
12194 return (MEM_P (*loc));
12195 }
12196
12197 /* Return nonzero iff INSN contains a MEM. */
12198 int
12199 sh_contains_memref_p (rtx insn)
12200 {
12201 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
12202 }
12203
12204 /* Return nonzero iff INSN loads a banked register. */
12205 int
12206 sh_loads_bankedreg_p (rtx insn)
12207 {
12208 if (GET_CODE (PATTERN (insn)) == SET)
12209 {
12210 rtx op = SET_DEST (PATTERN(insn));
12211 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
12212 return 1;
12213 }
12214
12215 return 0;
12216 }
12217
12218 /* FNADDR is the MEM expression from a call expander. Return an address
12219 to use in an SHmedia insn pattern. */
12220 rtx
12221 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
12222 {
12223 int is_sym;
12224
12225 fnaddr = XEXP (fnaddr, 0);
12226 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
12227 if (flag_pic && is_sym)
12228 {
12229 if (! SYMBOL_REF_LOCAL_P (fnaddr))
12230 {
12231 rtx reg = gen_reg_rtx (Pmode);
12232
12233 /* We must not use GOTPLT for sibcalls, because PIC_REG
12234 must be restored before the PLT code gets to run. */
12235 if (is_sibcall)
12236 emit_insn (gen_symGOT2reg (reg, fnaddr));
12237 else
12238 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
12239 fnaddr = reg;
12240 }
12241 else
12242 {
12243 fnaddr = gen_sym2PIC (fnaddr);
12244 PUT_MODE (fnaddr, Pmode);
12245 }
12246 }
12247 /* If ptabs might trap, make this visible to the rest of the compiler.
12248 We generally assume that symbols pertain to valid locations, but
12249 it is possible to generate invalid symbols with asm or linker tricks.
12250 In a list of functions where each returns its successor, an invalid
12251 symbol might denote an empty list. */
12252 if (!TARGET_PT_FIXED
12253 && (!is_sym || TARGET_INVALID_SYMBOLS)
12254 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
12255 {
12256 rtx tr = gen_reg_rtx (PDImode);
12257
12258 emit_insn (gen_ptabs (tr, fnaddr));
12259 fnaddr = tr;
12260 }
12261 else if (! target_reg_operand (fnaddr, Pmode))
12262 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
12263 return fnaddr;
12264 }
12265
12266 reg_class_t
12267 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12268 enum machine_mode mode, secondary_reload_info *sri)
12269 {
12270 enum reg_class rclass = (enum reg_class) rclass_i;
12271
12272 if (in_p)
12273 {
12274 if (REGCLASS_HAS_FP_REG (rclass)
12275 && ! TARGET_SHMEDIA
12276 && immediate_operand ((x), mode)
12277 && ! ((fp_zero_operand (x) || fp_one_operand (x))
12278 && mode == SFmode && fldi_ok ()))
12279 switch (mode)
12280 {
12281 case SFmode:
12282 sri->icode = CODE_FOR_reload_insf__frn;
12283 return NO_REGS;
12284 case DFmode:
12285 sri->icode = CODE_FOR_reload_indf__frn;
12286 return NO_REGS;
12287 case SImode:
12288 /* ??? If we knew that we are in the appropriate mode -
12289 single precision - we could use a reload pattern directly. */
12290 return FPUL_REGS;
12291 default:
12292 abort ();
12293 }
12294 if (rclass == FPUL_REGS
12295 && ((REG_P (x)
12296 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
12297 || REGNO (x) == T_REG))
12298 || GET_CODE (x) == PLUS))
12299 return GENERAL_REGS;
12300 if (rclass == FPUL_REGS && immediate_operand (x, mode))
12301 {
12302 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
12303 return GENERAL_REGS;
12304 else if (mode == SFmode)
12305 return FP_REGS;
12306 sri->icode = CODE_FOR_reload_insi__i_fpul;
12307 return NO_REGS;
12308 }
12309 if (rclass == FPSCR_REGS
12310 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
12311 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
12312 return GENERAL_REGS;
12313 if (REGCLASS_HAS_FP_REG (rclass)
12314 && TARGET_SHMEDIA
12315 && immediate_operand (x, mode)
12316 && x != CONST0_RTX (GET_MODE (x))
12317 && GET_MODE (x) != V4SFmode)
12318 return GENERAL_REGS;
12319 if ((mode == QImode || mode == HImode)
12320 && TARGET_SHMEDIA && inqhi_operand (x, mode))
12321 {
12322 sri->icode = ((mode == QImode)
12323 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
12324 return NO_REGS;
12325 }
12326 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
12327 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
12328 return TARGET_REGS;
12329 } /* end of input-only processing. */
12330
12331 if (((REGCLASS_HAS_FP_REG (rclass)
12332 && (REG_P (x)
12333 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
12334 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
12335 && TARGET_FMOVD))))
12336 || (REGCLASS_HAS_GENERAL_REG (rclass)
12337 && REG_P (x)
12338 && FP_REGISTER_P (REGNO (x))))
12339 && ! TARGET_SHMEDIA
12340 && (mode == SFmode || mode == SImode))
12341 return FPUL_REGS;
12342 if ((rclass == FPUL_REGS
12343 || (REGCLASS_HAS_FP_REG (rclass)
12344 && ! TARGET_SHMEDIA && mode == SImode))
12345 && (MEM_P (x)
12346 || (REG_P (x)
12347 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
12348 || REGNO (x) == T_REG
12349 || system_reg_operand (x, VOIDmode)))))
12350 {
12351 if (rclass == FPUL_REGS)
12352 return GENERAL_REGS;
12353 return FPUL_REGS;
12354 }
12355 if ((rclass == TARGET_REGS
12356 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
12357 && !satisfies_constraint_Csy (x)
12358 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
12359 return GENERAL_REGS;
12360 if ((rclass == MAC_REGS || rclass == PR_REGS)
12361 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
12362 && rclass != REGNO_REG_CLASS (REGNO (x)))
12363 return GENERAL_REGS;
12364 if (rclass != GENERAL_REGS && REG_P (x)
12365 && TARGET_REGISTER_P (REGNO (x)))
12366 return GENERAL_REGS;
12367 return NO_REGS;
12368 }
12369
12370 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
12371
12372 #include "gt-sh.h"