sh.c (expand_cbranchdi4): Use a scratch register for the none zero constant operand...
[gcc.git] / gcc / config / sh / sh.c
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "insn-config.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "flags.h"
31 #include "expr.h"
32 #include "optabs.h"
33 #include "function.h"
34 #include "regs.h"
35 #include "hard-reg-set.h"
36 #include "output.h"
37 #include "insn-attr.h"
38 #include "toplev.h"
39 #include "recog.h"
40 #include "integrate.h"
41 #include "dwarf2.h"
42 #include "tm_p.h"
43 #include "target.h"
44 #include "target-def.h"
45 #include "real.h"
46 #include "langhooks.h"
47 #include "basic-block.h"
48 #include "df.h"
49 #include "cfglayout.h"
50 #include "intl.h"
51 #include "sched-int.h"
52 #include "ggc.h"
53 #include "gimple.h"
54 #include "cfgloop.h"
55 #include "alloc-pool.h"
56 #include "tm-constrs.h"
57
58
59 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
60
61 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
62 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
63
64 /* These are some macros to abstract register modes. */
65 #define CONST_OK_FOR_ADD(size) \
66 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
67 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
68 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
69 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
70
71 /* Used to simplify the logic below. Find the attributes wherever
72 they may be. */
73 #define SH_ATTRIBUTES(decl) \
74 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
75 : DECL_ATTRIBUTES (decl) \
76 ? (DECL_ATTRIBUTES (decl)) \
77 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
78
79 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
80 int current_function_interrupt;
81
82 tree sh_deferred_function_attributes;
83 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
84
85 /* Global variables for machine-dependent things. */
86
87 /* Which cpu are we scheduling for. */
88 enum processor_type sh_cpu;
89
90 /* Definitions used in ready queue reordering for first scheduling pass. */
91
92 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
93 static short *regmode_weight[2];
94
95 /* Total SFmode and SImode weights of scheduled insns. */
96 static int curr_regmode_pressure[2];
97
98 /* Number of r0 life regions. */
99 static int r0_life_regions;
100
101 /* If true, skip cycles for Q -> R movement. */
102 static int skip_cycles = 0;
103
104 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
105 and returned from sh_reorder2. */
106 static short cached_can_issue_more;
107
108 /* Provides the class number of the smallest class containing
109 reg number. */
110
111 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
112 {
113 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
114 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
115 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
116 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
117 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
118 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
130 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
131 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
132 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
133 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
134 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
146 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
147 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
148 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
149 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
150 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
151 GENERAL_REGS, GENERAL_REGS,
152 };
153
154 char sh_register_names[FIRST_PSEUDO_REGISTER] \
155 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
156
157 char sh_additional_register_names[ADDREGNAMES_SIZE] \
158 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
159 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
160
161 int assembler_dialect;
162
163 static bool shmedia_space_reserved_for_target_registers;
164
165 static bool sh_handle_option (size_t, const char *, int);
166 static void split_branches (rtx);
167 static int branch_dest (rtx);
168 static void force_into (rtx, rtx);
169 static void print_slot (rtx);
170 static rtx add_constant (rtx, enum machine_mode, rtx);
171 static void dump_table (rtx, rtx);
172 static int hi_const (rtx);
173 static int broken_move (rtx);
174 static int mova_p (rtx);
175 static rtx find_barrier (int, rtx, rtx);
176 static int noncall_uses_reg (rtx, rtx, rtx *);
177 static rtx gen_block_redirect (rtx, int, int);
178 static void sh_reorg (void);
179 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
180 static rtx frame_insn (rtx);
181 static rtx push (int);
182 static void pop (int);
183 static void push_regs (HARD_REG_SET *, int);
184 static int calc_live_regs (HARD_REG_SET *);
185 static HOST_WIDE_INT rounded_frame_size (int);
186 static rtx mark_constant_pool_use (rtx);
187 const struct attribute_spec sh_attribute_table[];
188 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
189 static tree sh_handle_resbank_handler_attribute (tree *, tree,
190 tree, int, bool *);
191 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
192 tree, int, bool *);
193 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
194 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
195 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
196 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
197 static void sh_insert_attributes (tree, tree *);
198 static const char *sh_check_pch_target_flags (int);
199 static int sh_adjust_cost (rtx, rtx, rtx, int);
200 static int sh_issue_rate (void);
201 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
202 static short find_set_regmode_weight (rtx, enum machine_mode);
203 static short find_insn_regmode_weight (rtx, enum machine_mode);
204 static void find_regmode_weight (basic_block, enum machine_mode);
205 static int find_r0_life_regions (basic_block);
206 static void sh_md_init_global (FILE *, int, int);
207 static void sh_md_finish_global (FILE *, int);
208 static int rank_for_reorder (const void *, const void *);
209 static void swap_reorder (rtx *, int);
210 static void ready_reorder (rtx *, int);
211 static short high_pressure (enum machine_mode);
212 static int sh_reorder (FILE *, int, rtx *, int *, int);
213 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
214 static void sh_md_init (FILE *, int, int);
215 static int sh_variable_issue (FILE *, int, rtx, int);
216
217 static bool sh_function_ok_for_sibcall (tree, tree);
218
219 static bool sh_cannot_modify_jumps_p (void);
220 static enum reg_class sh_target_reg_class (void);
221 static bool sh_optimize_target_register_callee_saved (bool);
222 static bool sh_ms_bitfield_layout_p (const_tree);
223
224 static void sh_init_builtins (void);
225 static void sh_media_init_builtins (void);
226 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
227 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
228 static void sh_file_start (void);
229 static int flow_dependent_p (rtx, rtx);
230 static void flow_dependent_p_1 (rtx, const_rtx, void *);
231 static int shiftcosts (rtx);
232 static int andcosts (rtx);
233 static int addsubcosts (rtx);
234 static int multcosts (rtx);
235 static bool unspec_caller_rtx_p (rtx);
236 static bool sh_cannot_copy_insn_p (rtx);
237 static bool sh_rtx_costs (rtx, int, int, int *, bool);
238 static int sh_address_cost (rtx, bool);
239 static int sh_pr_n_sets (void);
240 static rtx sh_allocate_initial_value (rtx);
241 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
242 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
243 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
244 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
245 static int scavenge_reg (HARD_REG_SET *s);
246 struct save_schedule_s;
247 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
248 struct save_schedule_s *, int);
249
250 static rtx sh_struct_value_rtx (tree, int);
251 static bool sh_return_in_memory (const_tree, const_tree);
252 static rtx sh_builtin_saveregs (void);
253 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
254 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
255 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
256 static tree sh_build_builtin_va_list (void);
257 static void sh_va_start (tree, rtx);
258 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
259 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
260 const_tree, bool);
261 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
262 const_tree, bool);
263 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
264 tree, bool);
265 static bool sh_scalar_mode_supported_p (enum machine_mode);
266 static int sh_dwarf_calling_convention (const_tree);
267 static void sh_encode_section_info (tree, rtx, int);
268 static int sh2a_function_vector_p (tree);
269
270 \f
271 /* Initialize the GCC target structure. */
272 #undef TARGET_ATTRIBUTE_TABLE
273 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
274
275 /* The next two are used for debug info when compiling with -gdwarf. */
276 #undef TARGET_ASM_UNALIGNED_HI_OP
277 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
278 #undef TARGET_ASM_UNALIGNED_SI_OP
279 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
280
281 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
282 #undef TARGET_ASM_UNALIGNED_DI_OP
283 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
284 #undef TARGET_ASM_ALIGNED_DI_OP
285 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
286
287 #undef TARGET_ASM_FUNCTION_EPILOGUE
288 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
289
290 #undef TARGET_ASM_OUTPUT_MI_THUNK
291 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
292
293 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
294 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
295
296 #undef TARGET_ASM_FILE_START
297 #define TARGET_ASM_FILE_START sh_file_start
298 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
299 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
300
301 #undef TARGET_DEFAULT_TARGET_FLAGS
302 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
303 #undef TARGET_HANDLE_OPTION
304 #define TARGET_HANDLE_OPTION sh_handle_option
305
306 #undef TARGET_INSERT_ATTRIBUTES
307 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
308
309 #undef TARGET_SCHED_ADJUST_COST
310 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
311
312 #undef TARGET_SCHED_ISSUE_RATE
313 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
314
315 /* The next 5 hooks have been implemented for reenabling sched1. With the
316 help of these macros we are limiting the movement of insns in sched1 to
317 reduce the register pressure. The overall idea is to keep count of SImode
318 and SFmode regs required by already scheduled insns. When these counts
319 cross some threshold values; give priority to insns that free registers.
320 The insn that frees registers is most likely to be the insn with lowest
321 LUID (original insn order); but such an insn might be there in the stalled
322 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
323 upto a max of 8 cycles so that such insns may move from Q -> R.
324
325 The description of the hooks are as below:
326
327 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
328 scheduler; it is called inside the sched_init function just after
329 find_insn_reg_weights function call. It is used to calculate the SImode
330 and SFmode weights of insns of basic blocks; much similar to what
331 find_insn_reg_weights does.
332 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
333
334 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
335 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
336 (Q)->(R).
337
338 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
339 high; reorder the ready queue so that the insn with lowest LUID will be
340 issued next.
341
342 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
343 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
344
345 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
346 can be returned from TARGET_SCHED_REORDER2.
347
348 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
349
350 #undef TARGET_SCHED_DFA_NEW_CYCLE
351 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
352
353 #undef TARGET_SCHED_INIT_GLOBAL
354 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
355
356 #undef TARGET_SCHED_FINISH_GLOBAL
357 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
358
359 #undef TARGET_SCHED_VARIABLE_ISSUE
360 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
361
362 #undef TARGET_SCHED_REORDER
363 #define TARGET_SCHED_REORDER sh_reorder
364
365 #undef TARGET_SCHED_REORDER2
366 #define TARGET_SCHED_REORDER2 sh_reorder2
367
368 #undef TARGET_SCHED_INIT
369 #define TARGET_SCHED_INIT sh_md_init
370
371 #undef TARGET_LEGITIMIZE_ADDRESS
372 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
373
374 #undef TARGET_CANNOT_MODIFY_JUMPS_P
375 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
376 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
377 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
378 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
379 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
380 sh_optimize_target_register_callee_saved
381
382 #undef TARGET_MS_BITFIELD_LAYOUT_P
383 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
384
385 #undef TARGET_INIT_BUILTINS
386 #define TARGET_INIT_BUILTINS sh_init_builtins
387 #undef TARGET_EXPAND_BUILTIN
388 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
389
390 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
391 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
392
393 #undef TARGET_CANNOT_COPY_INSN_P
394 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
395 #undef TARGET_RTX_COSTS
396 #define TARGET_RTX_COSTS sh_rtx_costs
397 #undef TARGET_ADDRESS_COST
398 #define TARGET_ADDRESS_COST sh_address_cost
399 #undef TARGET_ALLOCATE_INITIAL_VALUE
400 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
401
402 #undef TARGET_MACHINE_DEPENDENT_REORG
403 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
404
405 #undef TARGET_DWARF_REGISTER_SPAN
406 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
407
408 #ifdef HAVE_AS_TLS
409 #undef TARGET_HAVE_TLS
410 #define TARGET_HAVE_TLS true
411 #endif
412
413 #undef TARGET_PROMOTE_PROTOTYPES
414 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
415 #undef TARGET_PROMOTE_FUNCTION_ARGS
416 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
417 #undef TARGET_PROMOTE_FUNCTION_RETURN
418 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
419
420 #undef TARGET_STRUCT_VALUE_RTX
421 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
422 #undef TARGET_RETURN_IN_MEMORY
423 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
424
425 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
426 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
427 #undef TARGET_SETUP_INCOMING_VARARGS
428 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
429 #undef TARGET_STRICT_ARGUMENT_NAMING
430 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
431 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
432 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
433 #undef TARGET_MUST_PASS_IN_STACK
434 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
435 #undef TARGET_PASS_BY_REFERENCE
436 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
437 #undef TARGET_CALLEE_COPIES
438 #define TARGET_CALLEE_COPIES sh_callee_copies
439 #undef TARGET_ARG_PARTIAL_BYTES
440 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
441
442 #undef TARGET_BUILD_BUILTIN_VA_LIST
443 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
444 #undef TARGET_EXPAND_BUILTIN_VA_START
445 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
446 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
447 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
448
449 #undef TARGET_SCALAR_MODE_SUPPORTED_P
450 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
451 #undef TARGET_VECTOR_MODE_SUPPORTED_P
452 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
453
454 #undef TARGET_CHECK_PCH_TARGET_FLAGS
455 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
456
457 #undef TARGET_DWARF_CALLING_CONVENTION
458 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
459
460 /* Return regmode weight for insn. */
461 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
462
463 /* Return current register pressure for regmode. */
464 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
465
466 #undef TARGET_ENCODE_SECTION_INFO
467 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
468
469 #ifdef SYMBIAN
470
471 #undef TARGET_ENCODE_SECTION_INFO
472 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
473 #undef TARGET_STRIP_NAME_ENCODING
474 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
475 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
476 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
477
478 #endif /* SYMBIAN */
479
480 #undef TARGET_SECONDARY_RELOAD
481 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
482
483 /* Machine-specific symbol_ref flags. */
484 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
485
486 struct gcc_target targetm = TARGET_INITIALIZER;
487 \f
488 /* Implement TARGET_HANDLE_OPTION. */
489
490 static bool
491 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
492 int value ATTRIBUTE_UNUSED)
493 {
494 switch (code)
495 {
496 case OPT_m1:
497 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
498 return true;
499
500 case OPT_m2:
501 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
502 return true;
503
504 case OPT_m2a:
505 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
506 return true;
507
508 case OPT_m2a_nofpu:
509 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
510 return true;
511
512 case OPT_m2a_single:
513 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
514 return true;
515
516 case OPT_m2a_single_only:
517 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
518 return true;
519
520 case OPT_m2e:
521 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
522 return true;
523
524 case OPT_m3:
525 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
526 return true;
527
528 case OPT_m3e:
529 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
530 return true;
531
532 case OPT_m4:
533 case OPT_m4_100:
534 case OPT_m4_200:
535 case OPT_m4_300:
536 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
537 return true;
538
539 case OPT_m4_nofpu:
540 case OPT_m4_100_nofpu:
541 case OPT_m4_200_nofpu:
542 case OPT_m4_300_nofpu:
543 case OPT_m4_340:
544 case OPT_m4_400:
545 case OPT_m4_500:
546 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
547 return true;
548
549 case OPT_m4_single:
550 case OPT_m4_100_single:
551 case OPT_m4_200_single:
552 case OPT_m4_300_single:
553 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
554 return true;
555
556 case OPT_m4_single_only:
557 case OPT_m4_100_single_only:
558 case OPT_m4_200_single_only:
559 case OPT_m4_300_single_only:
560 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
561 return true;
562
563 case OPT_m4a:
564 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
565 return true;
566
567 case OPT_m4a_nofpu:
568 case OPT_m4al:
569 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
570 return true;
571
572 case OPT_m4a_single:
573 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
574 return true;
575
576 case OPT_m4a_single_only:
577 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
578 return true;
579
580 case OPT_m5_32media:
581 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
582 return true;
583
584 case OPT_m5_32media_nofpu:
585 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
586 return true;
587
588 case OPT_m5_64media:
589 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
590 return true;
591
592 case OPT_m5_64media_nofpu:
593 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
594 return true;
595
596 case OPT_m5_compact:
597 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
598 return true;
599
600 case OPT_m5_compact_nofpu:
601 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
602 return true;
603
604 default:
605 return true;
606 }
607 }
608 \f
609 /* Print the operand address in x to the stream. */
610
611 void
612 print_operand_address (FILE *stream, rtx x)
613 {
614 switch (GET_CODE (x))
615 {
616 case REG:
617 case SUBREG:
618 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
619 break;
620
621 case PLUS:
622 {
623 rtx base = XEXP (x, 0);
624 rtx index = XEXP (x, 1);
625
626 switch (GET_CODE (index))
627 {
628 case CONST_INT:
629 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
630 reg_names[true_regnum (base)]);
631 break;
632
633 case REG:
634 case SUBREG:
635 {
636 int base_num = true_regnum (base);
637 int index_num = true_regnum (index);
638
639 fprintf (stream, "@(r0,%s)",
640 reg_names[MAX (base_num, index_num)]);
641 break;
642 }
643
644 default:
645 gcc_unreachable ();
646 }
647 }
648 break;
649
650 case PRE_DEC:
651 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
652 break;
653
654 case POST_INC:
655 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
656 break;
657
658 default:
659 x = mark_constant_pool_use (x);
660 output_addr_const (stream, x);
661 break;
662 }
663 }
664
665 /* Print operand x (an rtx) in assembler syntax to file stream
666 according to modifier code.
667
668 '.' print a .s if insn needs delay slot
669 ',' print LOCAL_LABEL_PREFIX
670 '@' print trap, rte or rts depending upon pragma interruptness
671 '#' output a nop if there is nothing to put in the delay slot
672 ''' print likelihood suffix (/u for unlikely).
673 '>' print branch target if -fverbose-asm
674 'O' print a constant without the #
675 'R' print the LSW of a dp value - changes if in little endian
676 'S' print the MSW of a dp value - changes if in little endian
677 'T' print the next word of a dp value - same as 'R' in big endian mode.
678 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
679 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
680 'N' print 'r63' if the operand is (const_int 0).
681 'd' print a V2SF reg as dN instead of fpN.
682 'm' print a pair `base,offset' or `base,index', for LD and ST.
683 'U' Likewise for {LD,ST}{HI,LO}.
684 'V' print the position of a single bit set.
685 'W' print the position of a single bit cleared.
686 't' print a memory address which is a register.
687 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
688 'o' output an operator. */
689
690 void
691 print_operand (FILE *stream, rtx x, int code)
692 {
693 int regno;
694 enum machine_mode mode;
695
696 switch (code)
697 {
698 tree trapa_attr;
699
700 case '.':
701 if (final_sequence
702 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
703 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
704 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
705 break;
706 case ',':
707 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
708 break;
709 case '@':
710 trapa_attr = lookup_attribute ("trap_exit",
711 DECL_ATTRIBUTES (current_function_decl));
712 if (trapa_attr)
713 fprintf (stream, "trapa #%ld",
714 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
715 else if (sh_cfun_interrupt_handler_p ())
716 {
717 if (sh_cfun_resbank_handler_p ())
718 fprintf (stream, "resbank\n");
719 fprintf (stream, "rte");
720 }
721 else
722 fprintf (stream, "rts");
723 break;
724 case '#':
725 /* Output a nop if there's nothing in the delay slot. */
726 if (dbr_sequence_length () == 0)
727 fprintf (stream, "\n\tnop");
728 break;
729 case '\'':
730 {
731 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
732
733 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
734 fputs ("/u", stream);
735 break;
736 }
737 case '>':
738 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
739 {
740 fputs ("\t! target: ", stream);
741 output_addr_const (stream, JUMP_LABEL (current_output_insn));
742 }
743 break;
744 case 'O':
745 x = mark_constant_pool_use (x);
746 output_addr_const (stream, x);
747 break;
748 /* N.B.: %R / %S / %T adjust memory addresses by four.
749 For SHMEDIA, that means they can be used to access the first and
750 second 32 bit part of a 64 bit (or larger) value that
751 might be held in floating point registers or memory.
752 While they can be used to access 64 bit parts of a larger value
753 held in general purpose registers, that won't work with memory -
754 neither for fp registers, since the frxx names are used. */
755 case 'R':
756 if (REG_P (x) || GET_CODE (x) == SUBREG)
757 {
758 regno = true_regnum (x);
759 regno += FP_REGISTER_P (regno) ? 1 : LSW;
760 fputs (reg_names[regno], (stream));
761 }
762 else if (MEM_P (x))
763 {
764 x = adjust_address (x, SImode, 4 * LSW);
765 print_operand_address (stream, XEXP (x, 0));
766 }
767 else
768 {
769 rtx sub = NULL_RTX;
770
771 mode = GET_MODE (x);
772 if (mode == VOIDmode)
773 mode = DImode;
774 if (GET_MODE_SIZE (mode) >= 8)
775 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
776 if (sub)
777 print_operand (stream, sub, 0);
778 else
779 output_operand_lossage ("invalid operand to %%R");
780 }
781 break;
782 case 'S':
783 if (REG_P (x) || GET_CODE (x) == SUBREG)
784 {
785 regno = true_regnum (x);
786 regno += FP_REGISTER_P (regno) ? 0 : MSW;
787 fputs (reg_names[regno], (stream));
788 }
789 else if (MEM_P (x))
790 {
791 x = adjust_address (x, SImode, 4 * MSW);
792 print_operand_address (stream, XEXP (x, 0));
793 }
794 else
795 {
796 rtx sub = NULL_RTX;
797
798 mode = GET_MODE (x);
799 if (mode == VOIDmode)
800 mode = DImode;
801 if (GET_MODE_SIZE (mode) >= 8)
802 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
803 if (sub)
804 print_operand (stream, sub, 0);
805 else
806 output_operand_lossage ("invalid operand to %%S");
807 }
808 break;
809 case 'T':
810 /* Next word of a double. */
811 switch (GET_CODE (x))
812 {
813 case REG:
814 fputs (reg_names[REGNO (x) + 1], (stream));
815 break;
816 case MEM:
817 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
818 && GET_CODE (XEXP (x, 0)) != POST_INC)
819 x = adjust_address (x, SImode, 4);
820 print_operand_address (stream, XEXP (x, 0));
821 break;
822 default:
823 break;
824 }
825 break;
826
827 case 't':
828 gcc_assert (GET_CODE (x) == MEM);
829 x = XEXP (x, 0);
830 switch (GET_CODE (x))
831 {
832 case REG:
833 case SUBREG:
834 print_operand (stream, x, 0);
835 break;
836 default:
837 break;
838 }
839 break;
840
841 case 'o':
842 switch (GET_CODE (x))
843 {
844 case PLUS: fputs ("add", stream); break;
845 case MINUS: fputs ("sub", stream); break;
846 case MULT: fputs ("mul", stream); break;
847 case DIV: fputs ("div", stream); break;
848 case EQ: fputs ("eq", stream); break;
849 case NE: fputs ("ne", stream); break;
850 case GT: case LT: fputs ("gt", stream); break;
851 case GE: case LE: fputs ("ge", stream); break;
852 case GTU: case LTU: fputs ("gtu", stream); break;
853 case GEU: case LEU: fputs ("geu", stream); break;
854 default:
855 break;
856 }
857 break;
858 case 'M':
859 if (TARGET_SHMEDIA)
860 {
861 if (GET_CODE (x) == MEM
862 && GET_CODE (XEXP (x, 0)) == PLUS
863 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
864 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
865 fputc ('x', stream);
866 }
867 else
868 {
869 if (GET_CODE (x) == MEM)
870 {
871 switch (GET_MODE (x))
872 {
873 case QImode: fputs (".b", stream); break;
874 case HImode: fputs (".w", stream); break;
875 case SImode: fputs (".l", stream); break;
876 case SFmode: fputs (".s", stream); break;
877 case DFmode: fputs (".d", stream); break;
878 default: gcc_unreachable ();
879 }
880 }
881 }
882 break;
883
884 case 'm':
885 gcc_assert (GET_CODE (x) == MEM);
886 x = XEXP (x, 0);
887 /* Fall through. */
888 case 'U':
889 switch (GET_CODE (x))
890 {
891 case REG:
892 case SUBREG:
893 print_operand (stream, x, 0);
894 fputs (", 0", stream);
895 break;
896
897 case PLUS:
898 print_operand (stream, XEXP (x, 0), 0);
899 fputs (", ", stream);
900 print_operand (stream, XEXP (x, 1), 0);
901 break;
902
903 default:
904 gcc_unreachable ();
905 }
906 break;
907
908 case 'V':
909 {
910 int num = exact_log2 (INTVAL (x));
911 gcc_assert (num >= 0);
912 fprintf (stream, "#%d", num);
913 }
914 break;
915
916 case 'W':
917 {
918 int num = exact_log2 (~INTVAL (x));
919 gcc_assert (num >= 0);
920 fprintf (stream, "#%d", num);
921 }
922 break;
923
924 case 'd':
925 gcc_assert (GET_CODE (x) == REG && GET_MODE (x) == V2SFmode);
926
927 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
928 break;
929
930 case 'N':
931 if (x == CONST0_RTX (GET_MODE (x)))
932 {
933 fprintf ((stream), "r63");
934 break;
935 }
936 goto default_output;
937 case 'u':
938 if (GET_CODE (x) == CONST_INT)
939 {
940 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
941 break;
942 }
943 /* Fall through. */
944
945 default_output:
946 default:
947 regno = 0;
948 mode = GET_MODE (x);
949
950 switch (GET_CODE (x))
951 {
952 case TRUNCATE:
953 {
954 rtx inner = XEXP (x, 0);
955 int offset = 0;
956 enum machine_mode inner_mode;
957
958 /* We might see SUBREGs with vector mode registers inside. */
959 if (GET_CODE (inner) == SUBREG
960 && (GET_MODE_SIZE (GET_MODE (inner))
961 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
962 && subreg_lowpart_p (inner))
963 inner = SUBREG_REG (inner);
964 if (GET_CODE (inner) == CONST_INT)
965 {
966 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
967 goto default_output;
968 }
969 inner_mode = GET_MODE (inner);
970 if (GET_CODE (inner) == SUBREG
971 && (GET_MODE_SIZE (GET_MODE (inner))
972 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
973 && GET_CODE (SUBREG_REG (inner)) == REG)
974 {
975 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
976 GET_MODE (SUBREG_REG (inner)),
977 SUBREG_BYTE (inner),
978 GET_MODE (inner));
979 inner = SUBREG_REG (inner);
980 }
981 if (GET_CODE (inner) != REG || GET_MODE_SIZE (inner_mode) > 8)
982 abort ();
983 /* Floating point register pairs are always big endian;
984 general purpose registers are 64 bit wide. */
985 regno = REGNO (inner);
986 regno = (HARD_REGNO_NREGS (regno, inner_mode)
987 - HARD_REGNO_NREGS (regno, mode))
988 + offset;
989 x = inner;
990 goto reg;
991 }
992 case SIGN_EXTEND:
993 x = XEXP (x, 0);
994 goto reg;
995 /* FIXME: We need this on SHmedia32 because reload generates
996 some sign-extended HI or QI loads into DImode registers
997 but, because Pmode is SImode, the address ends up with a
998 subreg:SI of the DImode register. Maybe reload should be
999 fixed so as to apply alter_subreg to such loads? */
1000 case IF_THEN_ELSE:
1001 gcc_assert (trapping_target_operand (x, VOIDmode));
1002 x = XEXP (XEXP (x, 2), 0);
1003 goto default_output;
1004 case SUBREG:
1005 gcc_assert (SUBREG_BYTE (x) == 0
1006 && GET_CODE (SUBREG_REG (x)) == REG);
1007
1008 x = SUBREG_REG (x);
1009 /* Fall through. */
1010
1011 reg:
1012 case REG:
1013 regno += REGNO (x);
1014 if (FP_REGISTER_P (regno)
1015 && mode == V16SFmode)
1016 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1017 else if (FP_REGISTER_P (REGNO (x))
1018 && mode == V4SFmode)
1019 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1020 else if (GET_CODE (x) == REG
1021 && mode == V2SFmode)
1022 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1023 else if (FP_REGISTER_P (REGNO (x))
1024 && GET_MODE_SIZE (mode) > 4)
1025 fprintf ((stream), "d%s", reg_names[regno] + 1);
1026 else
1027 fputs (reg_names[regno], (stream));
1028 break;
1029
1030 case MEM:
1031 output_address (XEXP (x, 0));
1032 break;
1033
1034 default:
1035 if (TARGET_SH1)
1036 fputc ('#', stream);
1037 output_addr_const (stream, x);
1038 break;
1039 }
1040 break;
1041 }
1042 }
1043 \f
1044
1045 /* Encode symbol attributes of a SYMBOL_REF into its
1046 SYMBOL_REF_FLAGS. */
1047 static void
1048 sh_encode_section_info (tree decl, rtx rtl, int first)
1049 {
1050 default_encode_section_info (decl, rtl, first);
1051
1052 if (TREE_CODE (decl) == FUNCTION_DECL
1053 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1054 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1055 }
1056
1057 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1058 static void
1059 force_into (rtx value, rtx target)
1060 {
1061 value = force_operand (value, target);
1062 if (! rtx_equal_p (value, target))
1063 emit_insn (gen_move_insn (target, value));
1064 }
1065
1066 /* Emit code to perform a block move. Choose the best method.
1067
1068 OPERANDS[0] is the destination.
1069 OPERANDS[1] is the source.
1070 OPERANDS[2] is the size.
1071 OPERANDS[3] is the alignment safe to use. */
1072
1073 int
1074 expand_block_move (rtx *operands)
1075 {
1076 int align = INTVAL (operands[3]);
1077 int constp = (GET_CODE (operands[2]) == CONST_INT);
1078 int bytes = (constp ? INTVAL (operands[2]) : 0);
1079
1080 if (! constp)
1081 return 0;
1082
1083 /* If we could use mov.l to move words and dest is word-aligned, we
1084 can use movua.l for loads and still generate a relatively short
1085 and efficient sequence. */
1086 if (TARGET_SH4A_ARCH && align < 4
1087 && MEM_ALIGN (operands[0]) >= 32
1088 && can_move_by_pieces (bytes, 32))
1089 {
1090 rtx dest = copy_rtx (operands[0]);
1091 rtx src = copy_rtx (operands[1]);
1092 /* We could use different pseudos for each copied word, but
1093 since movua can only load into r0, it's kind of
1094 pointless. */
1095 rtx temp = gen_reg_rtx (SImode);
1096 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1097 int copied = 0;
1098
1099 while (copied + 4 <= bytes)
1100 {
1101 rtx to = adjust_address (dest, SImode, copied);
1102 rtx from = adjust_automodify_address (src, BLKmode,
1103 src_addr, copied);
1104
1105 set_mem_size (from, GEN_INT (4));
1106 emit_insn (gen_movua (temp, from));
1107 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1108 emit_move_insn (to, temp);
1109 copied += 4;
1110 }
1111
1112 if (copied < bytes)
1113 move_by_pieces (adjust_address (dest, BLKmode, copied),
1114 adjust_automodify_address (src, BLKmode,
1115 src_addr, copied),
1116 bytes - copied, align, 0);
1117
1118 return 1;
1119 }
1120
1121 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1122 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1123 if (align < 4 || (bytes % 4 != 0))
1124 return 0;
1125
1126 if (TARGET_HARD_SH4)
1127 {
1128 if (bytes < 12)
1129 return 0;
1130 else if (bytes == 12)
1131 {
1132 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1133 rtx r4 = gen_rtx_REG (SImode, 4);
1134 rtx r5 = gen_rtx_REG (SImode, 5);
1135
1136 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1137 force_into (XEXP (operands[0], 0), r4);
1138 force_into (XEXP (operands[1], 0), r5);
1139 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1140 return 1;
1141 }
1142 else if (! TARGET_SMALLCODE)
1143 {
1144 const char *entry_name;
1145 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1146 int dwords;
1147 rtx r4 = gen_rtx_REG (SImode, 4);
1148 rtx r5 = gen_rtx_REG (SImode, 5);
1149 rtx r6 = gen_rtx_REG (SImode, 6);
1150
1151 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1152 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1153 force_into (XEXP (operands[0], 0), r4);
1154 force_into (XEXP (operands[1], 0), r5);
1155
1156 dwords = bytes >> 3;
1157 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1158 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1159 return 1;
1160 }
1161 else
1162 return 0;
1163 }
1164 if (bytes < 64)
1165 {
1166 char entry[30];
1167 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1168 rtx r4 = gen_rtx_REG (SImode, 4);
1169 rtx r5 = gen_rtx_REG (SImode, 5);
1170
1171 sprintf (entry, "__movmemSI%d", bytes);
1172 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1173 force_into (XEXP (operands[0], 0), r4);
1174 force_into (XEXP (operands[1], 0), r5);
1175 emit_insn (gen_block_move_real (func_addr_rtx));
1176 return 1;
1177 }
1178
1179 /* This is the same number of bytes as a memcpy call, but to a different
1180 less common function name, so this will occasionally use more space. */
1181 if (! TARGET_SMALLCODE)
1182 {
1183 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1184 int final_switch, while_loop;
1185 rtx r4 = gen_rtx_REG (SImode, 4);
1186 rtx r5 = gen_rtx_REG (SImode, 5);
1187 rtx r6 = gen_rtx_REG (SImode, 6);
1188
1189 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1190 force_into (XEXP (operands[0], 0), r4);
1191 force_into (XEXP (operands[1], 0), r5);
1192
1193 /* r6 controls the size of the move. 16 is decremented from it
1194 for each 64 bytes moved. Then the negative bit left over is used
1195 as an index into a list of move instructions. e.g., a 72 byte move
1196 would be set up with size(r6) = 14, for one iteration through the
1197 big while loop, and a switch of -2 for the last part. */
1198
1199 final_switch = 16 - ((bytes / 4) % 16);
1200 while_loop = ((bytes / 4) / 16 - 1) * 16;
1201 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1202 emit_insn (gen_block_lump_real (func_addr_rtx));
1203 return 1;
1204 }
1205
1206 return 0;
1207 }
1208
1209 /* Prepare operands for a move define_expand; specifically, one of the
1210 operands must be in a register. */
1211
1212 int
1213 prepare_move_operands (rtx operands[], enum machine_mode mode)
1214 {
1215 if ((mode == SImode || mode == DImode)
1216 && flag_pic
1217 && ! ((mode == Pmode || mode == ptr_mode)
1218 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1219 {
1220 rtx temp;
1221 if (SYMBOLIC_CONST_P (operands[1]))
1222 {
1223 if (GET_CODE (operands[0]) == MEM)
1224 operands[1] = force_reg (Pmode, operands[1]);
1225 else if (TARGET_SHMEDIA
1226 && GET_CODE (operands[1]) == LABEL_REF
1227 && target_reg_operand (operands[0], mode))
1228 /* It's ok. */;
1229 else
1230 {
1231 temp = (!can_create_pseudo_p ()
1232 ? operands[0]
1233 : gen_reg_rtx (Pmode));
1234 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1235 }
1236 }
1237 else if (GET_CODE (operands[1]) == CONST
1238 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1239 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1240 {
1241 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1242 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1243 mode, temp);
1244 operands[1] = expand_binop (mode, add_optab, temp,
1245 XEXP (XEXP (operands[1], 0), 1),
1246 (!can_create_pseudo_p ()
1247 ? temp
1248 : gen_reg_rtx (Pmode)),
1249 0, OPTAB_LIB_WIDEN);
1250 }
1251 }
1252
1253 if (! reload_in_progress && ! reload_completed)
1254 {
1255 /* Copy the source to a register if both operands aren't registers. */
1256 if (! register_operand (operands[0], mode)
1257 && ! sh_register_operand (operands[1], mode))
1258 operands[1] = copy_to_mode_reg (mode, operands[1]);
1259
1260 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
1261 {
1262 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1263 except that we can't use that function because it is static. */
1264 rtx new_rtx = change_address (operands[0], mode, 0);
1265 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1266 operands[0] = new_rtx;
1267 }
1268
1269 /* This case can happen while generating code to move the result
1270 of a library call to the target. Reject `st r0,@(rX,rY)' because
1271 reload will fail to find a spill register for rX, since r0 is already
1272 being used for the source. */
1273 else if (TARGET_SH1
1274 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1275 && GET_CODE (operands[0]) == MEM
1276 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1277 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
1278 operands[1] = copy_to_mode_reg (mode, operands[1]);
1279 }
1280
1281 if (mode == Pmode || mode == ptr_mode)
1282 {
1283 rtx op0, op1, opc;
1284 enum tls_model tls_kind;
1285
1286 op0 = operands[0];
1287 op1 = operands[1];
1288 if (GET_CODE (op1) == CONST
1289 && GET_CODE (XEXP (op1, 0)) == PLUS
1290 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1291 != TLS_MODEL_NONE))
1292 {
1293 opc = XEXP (XEXP (op1, 0), 1);
1294 op1 = XEXP (XEXP (op1, 0), 0);
1295 }
1296 else
1297 opc = NULL_RTX;
1298
1299 if ((tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1300 {
1301 rtx tga_op1, tga_ret, tmp, tmp2;
1302
1303 switch (tls_kind)
1304 {
1305 case TLS_MODEL_GLOBAL_DYNAMIC:
1306 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1307 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1308 op1 = tga_ret;
1309 break;
1310
1311 case TLS_MODEL_LOCAL_DYNAMIC:
1312 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1313 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1314
1315 tmp = gen_reg_rtx (Pmode);
1316 emit_move_insn (tmp, tga_ret);
1317
1318 if (register_operand (op0, Pmode))
1319 tmp2 = op0;
1320 else
1321 tmp2 = gen_reg_rtx (Pmode);
1322
1323 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1324 op1 = tmp2;
1325 break;
1326
1327 case TLS_MODEL_INITIAL_EXEC:
1328 if (! flag_pic)
1329 {
1330 /* Don't schedule insns for getting GOT address when
1331 the first scheduling is enabled, to avoid spill
1332 failures for R0. */
1333 if (flag_schedule_insns)
1334 emit_insn (gen_blockage ());
1335 emit_insn (gen_GOTaddr2picreg ());
1336 emit_use (gen_rtx_REG (SImode, PIC_REG));
1337 if (flag_schedule_insns)
1338 emit_insn (gen_blockage ());
1339 }
1340 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1341 tmp = gen_sym2GOTTPOFF (op1);
1342 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1343 op1 = tga_op1;
1344 break;
1345
1346 case TLS_MODEL_LOCAL_EXEC:
1347 tmp2 = gen_reg_rtx (Pmode);
1348 emit_insn (gen_load_gbr (tmp2));
1349 tmp = gen_reg_rtx (Pmode);
1350 emit_insn (gen_symTPOFF2reg (tmp, op1));
1351
1352 if (register_operand (op0, Pmode))
1353 op1 = op0;
1354 else
1355 op1 = gen_reg_rtx (Pmode);
1356
1357 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1358 break;
1359
1360 default:
1361 gcc_unreachable ();
1362 }
1363 if (opc)
1364 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1365 operands[1] = op1;
1366 }
1367 }
1368
1369 return 0;
1370 }
1371
1372 enum rtx_code
1373 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1374 enum rtx_code comparison)
1375 {
1376 rtx op1;
1377 rtx scratch = NULL_RTX;
1378
1379 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1380 comparison = GET_CODE (operands[0]);
1381 else
1382 scratch = operands[4];
1383 if (GET_CODE (operands[1]) == CONST_INT
1384 && GET_CODE (operands[2]) != CONST_INT)
1385 {
1386 rtx tmp = operands[1];
1387
1388 operands[1] = operands[2];
1389 operands[2] = tmp;
1390 comparison = swap_condition (comparison);
1391 }
1392 if (GET_CODE (operands[2]) == CONST_INT)
1393 {
1394 HOST_WIDE_INT val = INTVAL (operands[2]);
1395 if ((val == -1 || val == -0x81)
1396 && (comparison == GT || comparison == LE))
1397 {
1398 comparison = (comparison == GT) ? GE : LT;
1399 operands[2] = gen_int_mode (val + 1, mode);
1400 }
1401 else if ((val == 1 || val == 0x80)
1402 && (comparison == GE || comparison == LT))
1403 {
1404 comparison = (comparison == GE) ? GT : LE;
1405 operands[2] = gen_int_mode (val - 1, mode);
1406 }
1407 else if (val == 1 && (comparison == GEU || comparison == LTU))
1408 {
1409 comparison = (comparison == GEU) ? NE : EQ;
1410 operands[2] = CONST0_RTX (mode);
1411 }
1412 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1413 {
1414 comparison = (comparison == GEU) ? GTU : LEU;
1415 operands[2] = gen_int_mode (val - 1, mode);
1416 }
1417 else if (val == 0 && (comparison == GTU || comparison == LEU))
1418 comparison = (comparison == GTU) ? NE : EQ;
1419 else if (mode == SImode
1420 && ((val == 0x7fffffff
1421 && (comparison == GTU || comparison == LEU))
1422 || ((unsigned HOST_WIDE_INT) val
1423 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1424 && (comparison == GEU || comparison == LTU))))
1425 {
1426 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1427 operands[2] = CONST0_RTX (mode);
1428 }
1429 }
1430 op1 = operands[1];
1431 if (can_create_pseudo_p ())
1432 operands[1] = force_reg (mode, op1);
1433 /* When we are handling DImode comparisons, we want to keep constants so
1434 that we can optimize the component comparisons; however, memory loads
1435 are better issued as a whole so that they can be scheduled well.
1436 SImode equality comparisons allow I08 constants, but only when they
1437 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1438 into a register, that register might as well be r0, and we allow the
1439 constant. If it is already in a register, this is likely to be
1440 allocated to a different hard register, thus we load the constant into
1441 a register unless it is zero. */
1442 if (!REG_P (operands[2])
1443 && (GET_CODE (operands[2]) != CONST_INT
1444 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1445 && ((comparison != EQ && comparison != NE)
1446 || (REG_P (op1) && REGNO (op1) != R0_REG)
1447 || !satisfies_constraint_I08 (operands[2])))))
1448 {
1449 if (scratch && GET_MODE (scratch) == mode)
1450 {
1451 emit_move_insn (scratch, operands[2]);
1452 operands[2] = scratch;
1453 }
1454 else if (can_create_pseudo_p ())
1455 operands[2] = force_reg (mode, operands[2]);
1456 }
1457 return comparison;
1458 }
1459
1460 void
1461 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1462 {
1463 rtx (*branch_expander) (rtx) = gen_branch_true;
1464 rtx jump;
1465
1466 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1467 switch (comparison)
1468 {
1469 case NE: case LT: case LE: case LTU: case LEU:
1470 comparison = reverse_condition (comparison);
1471 branch_expander = gen_branch_false;
1472 default: ;
1473 }
1474 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1475 gen_rtx_fmt_ee (comparison, SImode,
1476 operands[1], operands[2])));
1477 jump = emit_jump_insn (branch_expander (operands[3]));
1478 if (probability >= 0)
1479 add_reg_note (jump, REG_BR_PROB, GEN_INT (probability));
1480
1481 }
1482
1483 /* ??? How should we distribute probabilities when more than one branch
1484 is generated. So far we only have soem ad-hoc observations:
1485 - If the operands are random, they are likely to differ in both parts.
1486 - If comparing items in a hash chain, the operands are random or equal;
1487 operation should be EQ or NE.
1488 - If items are searched in an ordered tree from the root, we can expect
1489 the highpart to be unequal about half of the time; operation should be
1490 an inequality comparison, operands non-constant, and overall probability
1491 about 50%. Likewise for quicksort.
1492 - Range checks will be often made against constants. Even if we assume for
1493 simplicity an even distribution of the non-constant operand over a
1494 sub-range here, the same probability could be generated with differently
1495 wide sub-ranges - as long as the ratio of the part of the subrange that
1496 is before the threshold to the part that comes after the threshold stays
1497 the same. Thus, we can't really tell anything here;
1498 assuming random distribution is at least simple.
1499 */
1500
1501 bool
1502 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1503 {
1504 enum rtx_code msw_taken, msw_skip, lsw_taken;
1505 rtx skip_label = NULL_RTX;
1506 rtx op1h, op1l, op2h, op2l;
1507 int num_branches;
1508 int prob, rev_prob;
1509 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1510 rtx scratch = operands[4];
1511
1512 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1513 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1514 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1515 op1l = gen_lowpart (SImode, operands[1]);
1516 op2l = gen_lowpart (SImode, operands[2]);
1517 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
1518 prob = split_branch_probability;
1519 rev_prob = REG_BR_PROB_BASE - prob;
1520 switch (comparison)
1521 {
1522 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1523 That costs 1 cycle more when the first branch can be predicted taken,
1524 but saves us mispredicts because only one branch needs prediction.
1525 It also enables generating the cmpeqdi_t-1 pattern. */
1526 case EQ:
1527 if (TARGET_CMPEQDI_T)
1528 {
1529 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1530 emit_jump_insn (gen_branch_true (operands[3]));
1531 return true;
1532 }
1533 msw_skip = NE;
1534 lsw_taken = EQ;
1535 if (prob >= 0)
1536 {
1537 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1538 */
1539 msw_skip_prob = rev_prob;
1540 if (REG_BR_PROB_BASE <= 65535)
1541 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1542 else
1543 {
1544 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1545 lsw_taken_prob
1546 = (prob
1547 ? (REG_BR_PROB_BASE
1548 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1549 / ((HOST_WIDEST_INT) prob << 32)))
1550 : 0);
1551 }
1552 }
1553 break;
1554 case NE:
1555 if (TARGET_CMPEQDI_T)
1556 {
1557 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1558 emit_jump_insn (gen_branch_false (operands[3]));
1559 return true;
1560 }
1561 msw_taken = NE;
1562 msw_taken_prob = prob;
1563 lsw_taken = NE;
1564 lsw_taken_prob = 0;
1565 break;
1566 case GTU: case GT:
1567 msw_taken = comparison;
1568 if (GET_CODE (op2l) == CONST_INT && INTVAL (op2l) == -1)
1569 break;
1570 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1571 msw_skip = swap_condition (msw_taken);
1572 lsw_taken = GTU;
1573 break;
1574 case GEU: case GE:
1575 if (op2l == CONST0_RTX (SImode))
1576 msw_taken = comparison;
1577 else
1578 {
1579 msw_taken = comparison == GE ? GT : GTU;
1580 msw_skip = swap_condition (msw_taken);
1581 lsw_taken = GEU;
1582 }
1583 break;
1584 case LTU: case LT:
1585 msw_taken = comparison;
1586 if (op2l == CONST0_RTX (SImode))
1587 break;
1588 msw_skip = swap_condition (msw_taken);
1589 lsw_taken = LTU;
1590 break;
1591 case LEU: case LE:
1592 if (GET_CODE (op2l) == CONST_INT && INTVAL (op2l) == -1)
1593 msw_taken = comparison;
1594 else
1595 {
1596 lsw_taken = LEU;
1597 if (comparison == LE)
1598 msw_taken = LT;
1599 else if (op2h != CONST0_RTX (SImode))
1600 msw_taken = LTU;
1601 else
1602 break;
1603 msw_skip = swap_condition (msw_taken);
1604 }
1605 break;
1606 default: return false;
1607 }
1608 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
1609 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
1610 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
1611 if (comparison != EQ && comparison != NE && num_branches > 1)
1612 {
1613 if (!CONSTANT_P (operands[2])
1614 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
1615 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
1616 {
1617 msw_taken_prob = prob / 2U;
1618 msw_skip_prob
1619 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
1620 lsw_taken_prob = prob;
1621 }
1622 else
1623 {
1624 msw_taken_prob = prob;
1625 msw_skip_prob = REG_BR_PROB_BASE;
1626 /* ??? If we have a constant op2h, should we use that when
1627 calculating lsw_taken_prob? */
1628 lsw_taken_prob = prob;
1629 }
1630 }
1631 operands[1] = op1h;
1632 operands[2] = op2h;
1633 operands[4] = NULL_RTX;
1634 if (reload_completed
1635 && ! arith_reg_or_0_operand (op2h, SImode)
1636 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
1637 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
1638 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
1639 {
1640 emit_move_insn (scratch, operands[2]);
1641 operands[2] = scratch;
1642 }
1643 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
1644 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
1645 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
1646 {
1647 rtx taken_label = operands[3];
1648
1649 /* Operands were possibly modified, but msw_skip doesn't expect this.
1650 Always use the original ones. */
1651 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
1652 {
1653 operands[1] = op1h;
1654 operands[2] = op2h;
1655 }
1656
1657 operands[3] = skip_label = gen_label_rtx ();
1658 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
1659 operands[3] = taken_label;
1660 }
1661 operands[1] = op1l;
1662 operands[2] = op2l;
1663 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
1664 {
1665 if (reload_completed
1666 && ! arith_reg_or_0_operand (op2l, SImode)
1667 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
1668 {
1669 emit_move_insn (scratch, operands[2]);
1670 operands[2] = scratch;
1671 }
1672 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
1673 }
1674 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
1675 emit_label (skip_label);
1676 return true;
1677 }
1678
1679 /* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */
1680
1681 static void
1682 sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
1683 {
1684 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1685 {
1686 insn = gen_rtx_PARALLEL (VOIDmode,
1687 gen_rtvec (2, insn,
1688 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1689 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1690 }
1691 else
1692 emit_insn (insn);
1693 }
1694
1695 /* Prepare the operands for an scc instruction; make sure that the
1696 compare has been done and the result is in T_REG. */
1697 void
1698 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
1699 {
1700 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1701 enum rtx_code oldcode = code;
1702 enum machine_mode mode;
1703
1704 /* First need a compare insn. */
1705 switch (code)
1706 {
1707 case NE:
1708 /* It isn't possible to handle this case. */
1709 gcc_unreachable ();
1710 case LT:
1711 code = GT;
1712 break;
1713 case LE:
1714 code = GE;
1715 break;
1716 case LTU:
1717 code = GTU;
1718 break;
1719 case LEU:
1720 code = GEU;
1721 break;
1722 default:
1723 break;
1724 }
1725 if (code != oldcode)
1726 {
1727 rtx tmp = op0;
1728 op0 = op1;
1729 op1 = tmp;
1730 }
1731
1732 mode = GET_MODE (op0);
1733 if (mode == VOIDmode)
1734 mode = GET_MODE (op1);
1735
1736 op0 = force_reg (mode, op0);
1737 if ((code != EQ && code != NE
1738 && (op1 != const0_rtx
1739 || code == GTU || code == GEU || code == LTU || code == LEU))
1740 || (mode == DImode && op1 != const0_rtx)
1741 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1742 op1 = force_reg (mode, op1);
1743
1744 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
1745 gen_rtx_fmt_ee (code, SImode, op0, op1)),
1746 mode);
1747 }
1748
1749 rtx
1750 sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
1751 rtx op0, rtx op1)
1752 {
1753 rtx target = gen_reg_rtx (SImode);
1754 rtx tmp;
1755
1756 gcc_assert (TARGET_SHMEDIA);
1757 switch (code)
1758 {
1759 case EQ:
1760 case GT:
1761 case LT:
1762 case UNORDERED:
1763 case GTU:
1764 case LTU:
1765 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
1766 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
1767 code = NE;
1768 break;
1769
1770 case NE:
1771 case GE:
1772 case LE:
1773 case ORDERED:
1774 case GEU:
1775 case LEU:
1776 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
1777 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
1778 code = EQ;
1779 break;
1780
1781 case UNEQ:
1782 case UNGE:
1783 case UNGT:
1784 case UNLE:
1785 case UNLT:
1786 case LTGT:
1787 return NULL_RTX;
1788
1789 default:
1790 gcc_unreachable ();
1791 }
1792
1793 if (mode == DImode)
1794 {
1795 rtx t2 = gen_reg_rtx (DImode);
1796 emit_insn (gen_extendsidi2 (t2, target));
1797 target = t2;
1798 }
1799
1800 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
1801 }
1802
1803 /* Called from the md file, set up the operands of a compare instruction. */
1804
1805 void
1806 sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
1807 {
1808 enum rtx_code code = GET_CODE (operands[0]);
1809 enum rtx_code branch_code;
1810 rtx op0 = operands[1];
1811 rtx op1 = operands[2];
1812 rtx insn, tem;
1813 bool need_ccmpeq = false;
1814
1815 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
1816 {
1817 op0 = force_reg (mode, op0);
1818 op1 = force_reg (mode, op1);
1819 }
1820 else
1821 {
1822 if (code != EQ || mode == DImode)
1823 {
1824 /* Force args into regs, since we can't use constants here. */
1825 op0 = force_reg (mode, op0);
1826 if (op1 != const0_rtx || code == GTU || code == GEU)
1827 op1 = force_reg (mode, op1);
1828 }
1829 }
1830
1831 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1832 {
1833 if (code == LT
1834 || (code == LE && TARGET_IEEE && TARGET_SH2E)
1835 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
1836 {
1837 tem = op0, op0 = op1, op1 = tem;
1838 code = swap_condition (code);
1839 }
1840
1841 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
1842 if (code == GE)
1843 {
1844 gcc_assert (TARGET_IEEE && TARGET_SH2E);
1845 need_ccmpeq = true;
1846 code = GT;
1847 }
1848
1849 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
1850 to EQ/GT respectively. */
1851 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
1852 }
1853
1854 switch (code)
1855 {
1856 case EQ:
1857 case GT:
1858 case GE:
1859 case GTU:
1860 case GEU:
1861 branch_code = code;
1862 break;
1863 case NE:
1864 case LT:
1865 case LE:
1866 case LTU:
1867 case LEU:
1868 branch_code = reverse_condition (code);
1869 break;
1870 default:
1871 gcc_unreachable ();
1872 }
1873
1874 insn = gen_rtx_SET (VOIDmode,
1875 gen_rtx_REG (SImode, T_REG),
1876 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
1877
1878 sh_emit_set_t_insn (insn, mode);
1879 if (need_ccmpeq)
1880 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
1881
1882 if (branch_code == code)
1883 emit_jump_insn (gen_branch_true (operands[3]));
1884 else
1885 emit_jump_insn (gen_branch_false (operands[3]));
1886 }
1887
1888 void
1889 sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
1890 {
1891 enum rtx_code code = GET_CODE (operands[1]);
1892 rtx op0 = operands[2];
1893 rtx op1 = operands[3];
1894 rtx lab = NULL_RTX;
1895 bool invert = false;
1896 rtx tem;
1897
1898 op0 = force_reg (mode, op0);
1899 if ((code != EQ && code != NE
1900 && (op1 != const0_rtx
1901 || code == GTU || code == GEU || code == LTU || code == LEU))
1902 || (mode == DImode && op1 != const0_rtx)
1903 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1904 op1 = force_reg (mode, op1);
1905
1906 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1907 {
1908 if (code == LT || code == LE)
1909 {
1910 code = swap_condition (code);
1911 tem = op0, op0 = op1, op1 = tem;
1912 }
1913 if (code == GE)
1914 {
1915 if (TARGET_IEEE)
1916 {
1917 lab = gen_label_rtx ();
1918 sh_emit_scc_to_t (EQ, op0, op1);
1919 emit_jump_insn (gen_branch_true (lab));
1920 code = GT;
1921 }
1922 else
1923 {
1924 code = LT;
1925 invert = true;
1926 }
1927 }
1928 }
1929
1930 if (code == NE)
1931 {
1932 code = EQ;
1933 invert = true;
1934 }
1935
1936 sh_emit_scc_to_t (code, op0, op1);
1937 if (lab)
1938 emit_label (lab);
1939 if (invert)
1940 emit_insn (gen_movnegt (operands[0]));
1941 else
1942 emit_move_insn (operands[0], gen_rtx_REG (SImode, T_REG));
1943 }
1944 \f
1945 /* Functions to output assembly code. */
1946
1947 /* Return a sequence of instructions to perform DI or DF move.
1948
1949 Since the SH cannot move a DI or DF in one instruction, we have
1950 to take care when we see overlapping source and dest registers. */
1951
1952 const char *
1953 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1954 enum machine_mode mode)
1955 {
1956 rtx dst = operands[0];
1957 rtx src = operands[1];
1958
1959 if (GET_CODE (dst) == MEM
1960 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1961 return "mov.l %T1,%0\n\tmov.l %1,%0";
1962
1963 if (register_operand (dst, mode)
1964 && register_operand (src, mode))
1965 {
1966 if (REGNO (src) == MACH_REG)
1967 return "sts mach,%S0\n\tsts macl,%R0";
1968
1969 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1970 when mov.d r1,r0 do r1->r0 then r2->r1. */
1971
1972 if (REGNO (src) + 1 == REGNO (dst))
1973 return "mov %T1,%T0\n\tmov %1,%0";
1974 else
1975 return "mov %1,%0\n\tmov %T1,%T0";
1976 }
1977 else if (GET_CODE (src) == CONST_INT)
1978 {
1979 if (INTVAL (src) < 0)
1980 output_asm_insn ("mov #-1,%S0", operands);
1981 else
1982 output_asm_insn ("mov #0,%S0", operands);
1983
1984 return "mov %1,%R0";
1985 }
1986 else if (GET_CODE (src) == MEM)
1987 {
1988 int ptrreg = -1;
1989 int dreg = REGNO (dst);
1990 rtx inside = XEXP (src, 0);
1991
1992 switch (GET_CODE (inside))
1993 {
1994 case REG:
1995 ptrreg = REGNO (inside);
1996 break;
1997
1998 case SUBREG:
1999 ptrreg = subreg_regno (inside);
2000 break;
2001
2002 case PLUS:
2003 ptrreg = REGNO (XEXP (inside, 0));
2004 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2005 an offsettable address. Unfortunately, offsettable addresses use
2006 QImode to check the offset, and a QImode offsettable address
2007 requires r0 for the other operand, which is not currently
2008 supported, so we can't use the 'o' constraint.
2009 Thus we must check for and handle r0+REG addresses here.
2010 We punt for now, since this is likely very rare. */
2011 gcc_assert (GET_CODE (XEXP (inside, 1)) != REG);
2012 break;
2013
2014 case LABEL_REF:
2015 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
2016 case POST_INC:
2017 return "mov.l %1,%0\n\tmov.l %1,%T0";
2018 default:
2019 gcc_unreachable ();
2020 }
2021
2022 /* Work out the safe way to copy. Copy into the second half first. */
2023 if (dreg == ptrreg)
2024 return "mov.l %T1,%T0\n\tmov.l %1,%0";
2025 }
2026
2027 return "mov.l %1,%0\n\tmov.l %T1,%T0";
2028 }
2029
2030 /* Print an instruction which would have gone into a delay slot after
2031 another instruction, but couldn't because the other instruction expanded
2032 into a sequence where putting the slot insn at the end wouldn't work. */
2033
2034 static void
2035 print_slot (rtx insn)
2036 {
2037 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
2038
2039 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
2040 }
2041
2042 const char *
2043 output_far_jump (rtx insn, rtx op)
2044 {
2045 struct { rtx lab, reg, op; } this_jmp;
2046 rtx braf_base_lab = NULL_RTX;
2047 const char *jump;
2048 int far;
2049 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2050 rtx prev;
2051
2052 this_jmp.lab = gen_label_rtx ();
2053
2054 if (TARGET_SH2
2055 && offset >= -32764
2056 && offset - get_attr_length (insn) <= 32766)
2057 {
2058 far = 0;
2059 jump = "mov.w %O0,%1; braf %1";
2060 }
2061 else
2062 {
2063 far = 1;
2064 if (flag_pic)
2065 {
2066 if (TARGET_SH2)
2067 jump = "mov.l %O0,%1; braf %1";
2068 else
2069 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
2070 }
2071 else
2072 jump = "mov.l %O0,%1; jmp @%1";
2073 }
2074 /* If we have a scratch register available, use it. */
2075 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
2076 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2077 {
2078 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2079 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2080 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
2081 output_asm_insn (jump, &this_jmp.lab);
2082 if (dbr_sequence_length ())
2083 print_slot (final_sequence);
2084 else
2085 output_asm_insn ("nop", 0);
2086 }
2087 else
2088 {
2089 /* Output the delay slot insn first if any. */
2090 if (dbr_sequence_length ())
2091 print_slot (final_sequence);
2092
2093 this_jmp.reg = gen_rtx_REG (SImode, 13);
2094 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2095 Fortunately, MACL is fixed and call-clobbered, and we never
2096 need its value across jumps, so save r13 in it instead of in
2097 the stack. */
2098 if (TARGET_SH5)
2099 output_asm_insn ("lds r13, macl", 0);
2100 else
2101 output_asm_insn ("mov.l r13,@-r15", 0);
2102 output_asm_insn (jump, &this_jmp.lab);
2103 if (TARGET_SH5)
2104 output_asm_insn ("sts macl, r13", 0);
2105 else
2106 output_asm_insn ("mov.l @r15+,r13", 0);
2107 }
2108 if (far && flag_pic && TARGET_SH2)
2109 {
2110 braf_base_lab = gen_label_rtx ();
2111 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2112 CODE_LABEL_NUMBER (braf_base_lab));
2113 }
2114 if (far)
2115 output_asm_insn (".align 2", 0);
2116 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2117 this_jmp.op = op;
2118 if (far && flag_pic)
2119 {
2120 if (TARGET_SH2)
2121 this_jmp.lab = braf_base_lab;
2122 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2123 }
2124 else
2125 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2126 return "";
2127 }
2128
2129 /* Local label counter, used for constants in the pool and inside
2130 pattern branches. */
2131
2132 static int lf = 100;
2133
2134 /* Output code for ordinary branches. */
2135
2136 const char *
2137 output_branch (int logic, rtx insn, rtx *operands)
2138 {
2139 switch (get_attr_length (insn))
2140 {
2141 case 6:
2142 /* This can happen if filling the delay slot has caused a forward
2143 branch to exceed its range (we could reverse it, but only
2144 when we know we won't overextend other branches; this should
2145 best be handled by relaxation).
2146 It can also happen when other condbranches hoist delay slot insn
2147 from their destination, thus leading to code size increase.
2148 But the branch will still be in the range -4092..+4098 bytes. */
2149
2150 if (! TARGET_RELAX)
2151 {
2152 int label = lf++;
2153 /* The call to print_slot will clobber the operands. */
2154 rtx op0 = operands[0];
2155
2156 /* If the instruction in the delay slot is annulled (true), then
2157 there is no delay slot where we can put it now. The only safe
2158 place for it is after the label. final will do that by default. */
2159
2160 if (final_sequence
2161 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2162 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2163 {
2164 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2165 ASSEMBLER_DIALECT ? "/" : ".", label);
2166 print_slot (final_sequence);
2167 }
2168 else
2169 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2170
2171 output_asm_insn ("bra\t%l0", &op0);
2172 fprintf (asm_out_file, "\tnop\n");
2173 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2174
2175 return "";
2176 }
2177 /* When relaxing, handle this like a short branch. The linker
2178 will fix it up if it still doesn't fit after relaxation. */
2179 case 2:
2180 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2181
2182 /* These are for SH2e, in which we have to account for the
2183 extra nop because of the hardware bug in annulled branches. */
2184 case 8:
2185 if (! TARGET_RELAX)
2186 {
2187 int label = lf++;
2188
2189 gcc_assert (!final_sequence
2190 || !(INSN_ANNULLED_BRANCH_P
2191 (XVECEXP (final_sequence, 0, 0))));
2192 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2193 logic ? "f" : "t",
2194 ASSEMBLER_DIALECT ? "/" : ".", label);
2195 fprintf (asm_out_file, "\tnop\n");
2196 output_asm_insn ("bra\t%l0", operands);
2197 fprintf (asm_out_file, "\tnop\n");
2198 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2199
2200 return "";
2201 }
2202 /* When relaxing, fall through. */
2203 case 4:
2204 {
2205 char buffer[10];
2206
2207 sprintf (buffer, "b%s%ss\t%%l0",
2208 logic ? "t" : "f",
2209 ASSEMBLER_DIALECT ? "/" : ".");
2210 output_asm_insn (buffer, &operands[0]);
2211 return "nop";
2212 }
2213
2214 default:
2215 /* There should be no longer branches now - that would
2216 indicate that something has destroyed the branches set
2217 up in machine_dependent_reorg. */
2218 gcc_unreachable ();
2219 }
2220 }
2221
2222 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2223 fill in operands 9 as a label to the successor insn.
2224 We try to use jump threading where possible.
2225 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2226 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2227 follow jmp and bt, if the address is in range. */
2228 const char *
2229 output_branchy_insn (enum rtx_code code, const char *templ,
2230 rtx insn, rtx *operands)
2231 {
2232 rtx next_insn = NEXT_INSN (insn);
2233
2234 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
2235 {
2236 rtx src = SET_SRC (PATTERN (next_insn));
2237 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2238 {
2239 /* Following branch not taken */
2240 operands[9] = gen_label_rtx ();
2241 emit_label_after (operands[9], next_insn);
2242 INSN_ADDRESSES_NEW (operands[9],
2243 INSN_ADDRESSES (INSN_UID (next_insn))
2244 + get_attr_length (next_insn));
2245 return templ;
2246 }
2247 else
2248 {
2249 int offset = (branch_dest (next_insn)
2250 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2251 if (offset >= -252 && offset <= 258)
2252 {
2253 if (GET_CODE (src) == IF_THEN_ELSE)
2254 /* branch_true */
2255 src = XEXP (src, 1);
2256 operands[9] = src;
2257 return templ;
2258 }
2259 }
2260 }
2261 operands[9] = gen_label_rtx ();
2262 emit_label_after (operands[9], insn);
2263 INSN_ADDRESSES_NEW (operands[9],
2264 INSN_ADDRESSES (INSN_UID (insn))
2265 + get_attr_length (insn));
2266 return templ;
2267 }
2268
2269 const char *
2270 output_ieee_ccmpeq (rtx insn, rtx *operands)
2271 {
2272 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2273 insn, operands);
2274 }
2275 \f
2276 /* Output the start of the assembler file. */
2277
2278 static void
2279 sh_file_start (void)
2280 {
2281 default_file_start ();
2282
2283 #ifdef SYMBIAN
2284 /* Declare the .directive section before it is used. */
2285 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
2286 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
2287 #endif
2288
2289 if (TARGET_ELF)
2290 /* We need to show the text section with the proper
2291 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2292 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2293 will complain. We can teach GAS specifically about the
2294 default attributes for our choice of text section, but
2295 then we would have to change GAS again if/when we change
2296 the text section name. */
2297 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2298 else
2299 /* Switch to the data section so that the coffsem symbol
2300 isn't in the text section. */
2301 switch_to_section (data_section);
2302
2303 if (TARGET_LITTLE_ENDIAN)
2304 fputs ("\t.little\n", asm_out_file);
2305
2306 if (!TARGET_ELF)
2307 {
2308 if (TARGET_SHCOMPACT)
2309 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2310 else if (TARGET_SHMEDIA)
2311 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2312 TARGET_SHMEDIA64 ? 64 : 32);
2313 }
2314 }
2315 \f
2316 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2317
2318 static bool
2319 unspec_caller_rtx_p (rtx pat)
2320 {
2321 rtx base, offset;
2322 int i;
2323
2324 split_const (pat, &base, &offset);
2325 if (GET_CODE (base) == UNSPEC)
2326 {
2327 if (XINT (base, 1) == UNSPEC_CALLER)
2328 return true;
2329 for (i = 0; i < XVECLEN (base, 0); i++)
2330 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2331 return true;
2332 }
2333 return false;
2334 }
2335
2336 /* Indicate that INSN cannot be duplicated. This is true for insn
2337 that generates a unique label. */
2338
2339 static bool
2340 sh_cannot_copy_insn_p (rtx insn)
2341 {
2342 rtx pat;
2343
2344 if (!reload_completed || !flag_pic)
2345 return false;
2346
2347 if (GET_CODE (insn) != INSN)
2348 return false;
2349 if (asm_noperands (insn) >= 0)
2350 return false;
2351
2352 pat = PATTERN (insn);
2353 if (GET_CODE (pat) != SET)
2354 return false;
2355 pat = SET_SRC (pat);
2356
2357 if (unspec_caller_rtx_p (pat))
2358 return true;
2359
2360 return false;
2361 }
2362 \f
2363 /* Actual number of instructions used to make a shift by N. */
2364 static const char ashiftrt_insns[] =
2365 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2366
2367 /* Left shift and logical right shift are the same. */
2368 static const char shift_insns[] =
2369 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2370
2371 /* Individual shift amounts needed to get the above length sequences.
2372 One bit right shifts clobber the T bit, so when possible, put one bit
2373 shifts in the middle of the sequence, so the ends are eligible for
2374 branch delay slots. */
2375 static const short shift_amounts[32][5] = {
2376 {0}, {1}, {2}, {2, 1},
2377 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2378 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2379 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2380 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2381 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2382 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2383 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2384
2385 /* Likewise, but for shift amounts < 16, up to three highmost bits
2386 might be clobbered. This is typically used when combined with some
2387 kind of sign or zero extension. */
2388
2389 static const char ext_shift_insns[] =
2390 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2391
2392 static const short ext_shift_amounts[32][4] = {
2393 {0}, {1}, {2}, {2, 1},
2394 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2395 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2396 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2397 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2398 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2399 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2400 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2401
2402 /* Assuming we have a value that has been sign-extended by at least one bit,
2403 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2404 to shift it by N without data loss, and quicker than by other means? */
2405 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2406
2407 /* This is used in length attributes in sh.md to help compute the length
2408 of arbitrary constant shift instructions. */
2409
2410 int
2411 shift_insns_rtx (rtx insn)
2412 {
2413 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2414 int shift_count = INTVAL (XEXP (set_src, 1)) & 31;
2415 enum rtx_code shift_code = GET_CODE (set_src);
2416
2417 switch (shift_code)
2418 {
2419 case ASHIFTRT:
2420 return ashiftrt_insns[shift_count];
2421 case LSHIFTRT:
2422 case ASHIFT:
2423 return shift_insns[shift_count];
2424 default:
2425 gcc_unreachable ();
2426 }
2427 }
2428
2429 /* Return the cost of a shift. */
2430
2431 static inline int
2432 shiftcosts (rtx x)
2433 {
2434 int value;
2435
2436 if (TARGET_SHMEDIA)
2437 return 1;
2438
2439 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2440 {
2441 if (GET_MODE (x) == DImode
2442 && GET_CODE (XEXP (x, 1)) == CONST_INT
2443 && INTVAL (XEXP (x, 1)) == 1)
2444 return 2;
2445
2446 /* Everything else is invalid, because there is no pattern for it. */
2447 return MAX_COST;
2448 }
2449 /* If shift by a non constant, then this will be expensive. */
2450 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
2451 return SH_DYNAMIC_SHIFT_COST;
2452
2453 /* Otherwise, return the true cost in instructions. Cope with out of range
2454 shift counts more or less arbitrarily. */
2455 value = INTVAL (XEXP (x, 1)) & 31;
2456
2457 if (GET_CODE (x) == ASHIFTRT)
2458 {
2459 int cost = ashiftrt_insns[value];
2460 /* If SH3, then we put the constant in a reg and use shad. */
2461 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2462 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2463 return cost;
2464 }
2465 else
2466 return shift_insns[value];
2467 }
2468
2469 /* Return the cost of an AND operation. */
2470
2471 static inline int
2472 andcosts (rtx x)
2473 {
2474 int i;
2475
2476 /* Anding with a register is a single cycle and instruction. */
2477 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
2478 return 1;
2479
2480 i = INTVAL (XEXP (x, 1));
2481
2482 if (TARGET_SHMEDIA)
2483 {
2484 if (satisfies_constraint_I10 (XEXP (x, 1))
2485 || satisfies_constraint_J16 (XEXP (x, 1)))
2486 return 1;
2487 else
2488 return 1 + rtx_cost (XEXP (x, 1), AND, !optimize_size);
2489 }
2490
2491 /* These constants are single cycle extu.[bw] instructions. */
2492 if (i == 0xff || i == 0xffff)
2493 return 1;
2494 /* Constants that can be used in an and immediate instruction in a single
2495 cycle, but this requires r0, so make it a little more expensive. */
2496 if (CONST_OK_FOR_K08 (i))
2497 return 2;
2498 /* Constants that can be loaded with a mov immediate and an and.
2499 This case is probably unnecessary. */
2500 if (CONST_OK_FOR_I08 (i))
2501 return 2;
2502 /* Any other constants requires a 2 cycle pc-relative load plus an and.
2503 This case is probably unnecessary. */
2504 return 3;
2505 }
2506
2507 /* Return the cost of an addition or a subtraction. */
2508
2509 static inline int
2510 addsubcosts (rtx x)
2511 {
2512 /* Adding a register is a single cycle insn. */
2513 if (GET_CODE (XEXP (x, 1)) == REG
2514 || GET_CODE (XEXP (x, 1)) == SUBREG)
2515 return 1;
2516
2517 /* Likewise for small constants. */
2518 if (GET_CODE (XEXP (x, 1)) == CONST_INT
2519 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2520 return 1;
2521
2522 if (TARGET_SHMEDIA)
2523 switch (GET_CODE (XEXP (x, 1)))
2524 {
2525 case CONST:
2526 case LABEL_REF:
2527 case SYMBOL_REF:
2528 return TARGET_SHMEDIA64 ? 5 : 3;
2529
2530 case CONST_INT:
2531 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2532 return 2;
2533 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2534 return 3;
2535 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2536 return 4;
2537
2538 /* Fall through. */
2539 default:
2540 return 5;
2541 }
2542
2543 /* Any other constant requires a 2 cycle pc-relative load plus an
2544 addition. */
2545 return 3;
2546 }
2547
2548 /* Return the cost of a multiply. */
2549 static inline int
2550 multcosts (rtx x ATTRIBUTE_UNUSED)
2551 {
2552 if (sh_multcost >= 0)
2553 return sh_multcost;
2554 if (TARGET_SHMEDIA)
2555 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2556 accept constants. Ideally, we would use a cost of one or two and
2557 add the cost of the operand, but disregard the latter when inside loops
2558 and loop invariant code motion is still to follow.
2559 Using a multiply first and splitting it later if it's a loss
2560 doesn't work because of different sign / zero extension semantics
2561 of multiplies vs. shifts. */
2562 return TARGET_SMALLCODE ? 2 : 3;
2563
2564 if (TARGET_SH2)
2565 {
2566 /* We have a mul insn, so we can never take more than the mul and the
2567 read of the mac reg, but count more because of the latency and extra
2568 reg usage. */
2569 if (TARGET_SMALLCODE)
2570 return 2;
2571 return 3;
2572 }
2573
2574 /* If we're aiming at small code, then just count the number of
2575 insns in a multiply call sequence. */
2576 if (TARGET_SMALLCODE)
2577 return 5;
2578
2579 /* Otherwise count all the insns in the routine we'd be calling too. */
2580 return 20;
2581 }
2582
2583 /* Compute a (partial) cost for rtx X. Return true if the complete
2584 cost has been computed, and false if subexpressions should be
2585 scanned. In either case, *TOTAL contains the cost result. */
2586
2587 static bool
2588 sh_rtx_costs (rtx x, int code, int outer_code, int *total,
2589 bool speed ATTRIBUTE_UNUSED)
2590 {
2591 switch (code)
2592 {
2593 case CONST_INT:
2594 if (TARGET_SHMEDIA)
2595 {
2596 if (INTVAL (x) == 0)
2597 *total = 0;
2598 else if (outer_code == AND && and_operand ((x), DImode))
2599 *total = 0;
2600 else if ((outer_code == IOR || outer_code == XOR
2601 || outer_code == PLUS)
2602 && CONST_OK_FOR_I10 (INTVAL (x)))
2603 *total = 0;
2604 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2605 *total = COSTS_N_INSNS (outer_code != SET);
2606 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2607 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2608 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2609 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2610 else
2611 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2612 return true;
2613 }
2614 if (CONST_OK_FOR_I08 (INTVAL (x)))
2615 *total = 0;
2616 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2617 && CONST_OK_FOR_K08 (INTVAL (x)))
2618 *total = 1;
2619 /* prepare_cmp_insn will force costly constants int registers before
2620 the cbranch[sd]i4 patterns can see them, so preserve potentially
2621 interesting ones not covered by I08 above. */
2622 else if (outer_code == COMPARE
2623 && ((unsigned HOST_WIDE_INT) INTVAL (x)
2624 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
2625 || INTVAL (x) == 0x7fffffff
2626 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
2627 *total = 1;
2628 else
2629 *total = 8;
2630 return true;
2631
2632 case CONST:
2633 case LABEL_REF:
2634 case SYMBOL_REF:
2635 if (TARGET_SHMEDIA64)
2636 *total = COSTS_N_INSNS (4);
2637 else if (TARGET_SHMEDIA32)
2638 *total = COSTS_N_INSNS (2);
2639 else
2640 *total = 5;
2641 return true;
2642
2643 case CONST_DOUBLE:
2644 if (TARGET_SHMEDIA)
2645 *total = COSTS_N_INSNS (4);
2646 /* prepare_cmp_insn will force costly constants int registers before
2647 the cbranchdi4 pattern can see them, so preserve potentially
2648 interesting ones. */
2649 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
2650 *total = 1;
2651 else
2652 *total = 10;
2653 return true;
2654 case CONST_VECTOR:
2655 if (x == CONST0_RTX (GET_MODE (x)))
2656 *total = 0;
2657 else if (sh_1el_vec (x, VOIDmode))
2658 *total = outer_code != SET;
2659 if (sh_rep_vec (x, VOIDmode))
2660 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2661 + (outer_code != SET));
2662 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2663 return true;
2664
2665 case PLUS:
2666 case MINUS:
2667 *total = COSTS_N_INSNS (addsubcosts (x));
2668 return true;
2669
2670 case AND:
2671 *total = COSTS_N_INSNS (andcosts (x));
2672 return true;
2673
2674 case MULT:
2675 *total = COSTS_N_INSNS (multcosts (x));
2676 return true;
2677
2678 case ASHIFT:
2679 case ASHIFTRT:
2680 case LSHIFTRT:
2681 *total = COSTS_N_INSNS (shiftcosts (x));
2682 return true;
2683
2684 case DIV:
2685 case UDIV:
2686 case MOD:
2687 case UMOD:
2688 *total = COSTS_N_INSNS (20);
2689 return true;
2690
2691 case PARALLEL:
2692 if (sh_1el_vec (x, VOIDmode))
2693 *total = outer_code != SET;
2694 if (sh_rep_vec (x, VOIDmode))
2695 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2696 + (outer_code != SET));
2697 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2698 return true;
2699
2700 case FLOAT:
2701 case FIX:
2702 *total = 100;
2703 return true;
2704
2705 default:
2706 return false;
2707 }
2708 }
2709
2710 /* Compute the cost of an address. For the SH, all valid addresses are
2711 the same cost. Use a slightly higher cost for reg + reg addressing,
2712 since it increases pressure on r0. */
2713
2714 static int
2715 sh_address_cost (rtx X,
2716 bool speed ATTRIBUTE_UNUSED)
2717 {
2718 return (GET_CODE (X) == PLUS
2719 && ! CONSTANT_P (XEXP (X, 1))
2720 && ! TARGET_SHMEDIA ? 1 : 0);
2721 }
2722
2723 /* Code to expand a shift. */
2724
2725 void
2726 gen_ashift (int type, int n, rtx reg)
2727 {
2728 /* Negative values here come from the shift_amounts array. */
2729 if (n < 0)
2730 {
2731 if (type == ASHIFT)
2732 type = LSHIFTRT;
2733 else
2734 type = ASHIFT;
2735 n = -n;
2736 }
2737
2738 switch (type)
2739 {
2740 case ASHIFTRT:
2741 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
2742 break;
2743 case LSHIFTRT:
2744 if (n == 1)
2745 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
2746 else
2747 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
2748 break;
2749 case ASHIFT:
2750 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
2751 break;
2752 }
2753 }
2754
2755 /* Same for HImode */
2756
2757 void
2758 gen_ashift_hi (int type, int n, rtx reg)
2759 {
2760 /* Negative values here come from the shift_amounts array. */
2761 if (n < 0)
2762 {
2763 if (type == ASHIFT)
2764 type = LSHIFTRT;
2765 else
2766 type = ASHIFT;
2767 n = -n;
2768 }
2769
2770 switch (type)
2771 {
2772 case ASHIFTRT:
2773 case LSHIFTRT:
2774 /* We don't have HImode right shift operations because using the
2775 ordinary 32 bit shift instructions for that doesn't generate proper
2776 zero/sign extension.
2777 gen_ashift_hi is only called in contexts where we know that the
2778 sign extension works out correctly. */
2779 {
2780 int offset = 0;
2781 if (GET_CODE (reg) == SUBREG)
2782 {
2783 offset = SUBREG_BYTE (reg);
2784 reg = SUBREG_REG (reg);
2785 }
2786 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
2787 break;
2788 }
2789 case ASHIFT:
2790 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
2791 break;
2792 }
2793 }
2794
2795 /* Output RTL to split a constant shift into its component SH constant
2796 shift instructions. */
2797
2798 void
2799 gen_shifty_op (int code, rtx *operands)
2800 {
2801 int value = INTVAL (operands[2]);
2802 int max, i;
2803
2804 /* Truncate the shift count in case it is out of bounds. */
2805 value = value & 31;
2806
2807 if (value == 31)
2808 {
2809 if (code == LSHIFTRT)
2810 {
2811 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
2812 emit_insn (gen_movt (operands[0]));
2813 return;
2814 }
2815 else if (code == ASHIFT)
2816 {
2817 /* There is a two instruction sequence for 31 bit left shifts,
2818 but it requires r0. */
2819 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
2820 {
2821 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
2822 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2823 return;
2824 }
2825 }
2826 }
2827 else if (value == 0)
2828 {
2829 /* This can happen even when optimizing, if there were subregs before
2830 reload. Don't output a nop here, as this is never optimized away;
2831 use a no-op move instead. */
2832 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
2833 return;
2834 }
2835
2836 max = shift_insns[value];
2837 for (i = 0; i < max; i++)
2838 gen_ashift (code, shift_amounts[value][i], operands[0]);
2839 }
2840
2841 /* Same as above, but optimized for values where the topmost bits don't
2842 matter. */
2843
2844 void
2845 gen_shifty_hi_op (int code, rtx *operands)
2846 {
2847 int value = INTVAL (operands[2]);
2848 int max, i;
2849 void (*gen_fun) (int, int, rtx);
2850
2851 /* This operation is used by and_shl for SImode values with a few
2852 high bits known to be cleared. */
2853 value &= 31;
2854 if (value == 0)
2855 {
2856 emit_insn (gen_nop ());
2857 return;
2858 }
2859
2860 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2861 if (code == ASHIFT)
2862 {
2863 max = ext_shift_insns[value];
2864 for (i = 0; i < max; i++)
2865 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2866 }
2867 else
2868 /* When shifting right, emit the shifts in reverse order, so that
2869 solitary negative values come first. */
2870 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2871 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2872 }
2873
2874 /* Output RTL for an arithmetic right shift. */
2875
2876 /* ??? Rewrite to use super-optimizer sequences. */
2877
2878 int
2879 expand_ashiftrt (rtx *operands)
2880 {
2881 rtx wrk;
2882 char func[18];
2883 int value;
2884
2885 if (TARGET_SH3)
2886 {
2887 if (GET_CODE (operands[2]) != CONST_INT)
2888 {
2889 rtx count = copy_to_mode_reg (SImode, operands[2]);
2890 emit_insn (gen_negsi2 (count, count));
2891 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2892 return 1;
2893 }
2894 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2895 > 1 + SH_DYNAMIC_SHIFT_COST)
2896 {
2897 rtx count
2898 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2899 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2900 return 1;
2901 }
2902 }
2903 if (GET_CODE (operands[2]) != CONST_INT)
2904 return 0;
2905
2906 value = INTVAL (operands[2]) & 31;
2907
2908 if (value == 31)
2909 {
2910 /* If we are called from abs expansion, arrange things so that we
2911 we can use a single MT instruction that doesn't clobber the source,
2912 if LICM can hoist out the load of the constant zero. */
2913 if (currently_expanding_to_rtl)
2914 {
2915 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
2916 operands[1]));
2917 emit_insn (gen_mov_neg_si_t (operands[0]));
2918 return 1;
2919 }
2920 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2921 return 1;
2922 }
2923 else if (value >= 16 && value <= 19)
2924 {
2925 wrk = gen_reg_rtx (SImode);
2926 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2927 value -= 16;
2928 while (value--)
2929 gen_ashift (ASHIFTRT, 1, wrk);
2930 emit_move_insn (operands[0], wrk);
2931 return 1;
2932 }
2933 /* Expand a short sequence inline, longer call a magic routine. */
2934 else if (value <= 5)
2935 {
2936 wrk = gen_reg_rtx (SImode);
2937 emit_move_insn (wrk, operands[1]);
2938 while (value--)
2939 gen_ashift (ASHIFTRT, 1, wrk);
2940 emit_move_insn (operands[0], wrk);
2941 return 1;
2942 }
2943
2944 wrk = gen_reg_rtx (Pmode);
2945
2946 /* Load the value into an arg reg and call a helper. */
2947 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2948 sprintf (func, "__ashiftrt_r4_%d", value);
2949 function_symbol (wrk, func, SFUNC_STATIC);
2950 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2951 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2952 return 1;
2953 }
2954
2955 int
2956 sh_dynamicalize_shift_p (rtx count)
2957 {
2958 return shift_insns[INTVAL (count) & 31] > 1 + SH_DYNAMIC_SHIFT_COST;
2959 }
2960
2961 /* Try to find a good way to implement the combiner pattern
2962 [(set (match_operand:SI 0 "register_operand" "r")
2963 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2964 (match_operand:SI 2 "const_int_operand" "n"))
2965 (match_operand:SI 3 "const_int_operand" "n"))) .
2966 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2967 return 0 for simple right / left or left/right shift combination.
2968 return 1 for a combination of shifts with zero_extend.
2969 return 2 for a combination of shifts with an AND that needs r0.
2970 return 3 for a combination of shifts with an AND that needs an extra
2971 scratch register, when the three highmost bits of the AND mask are clear.
2972 return 4 for a combination of shifts with an AND that needs an extra
2973 scratch register, when any of the three highmost bits of the AND mask
2974 is set.
2975 If ATTRP is set, store an initial right shift width in ATTRP[0],
2976 and the instruction length in ATTRP[1] . These values are not valid
2977 when returning 0.
2978 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2979 shift_amounts for the last shift value that is to be used before the
2980 sign extend. */
2981 int
2982 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2983 {
2984 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2985 int left = INTVAL (left_rtx), right;
2986 int best = 0;
2987 int cost, best_cost = 10000;
2988 int best_right = 0, best_len = 0;
2989 int i;
2990 int can_ext;
2991
2992 if (left < 0 || left > 31)
2993 return 0;
2994 if (GET_CODE (mask_rtx) == CONST_INT)
2995 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2996 else
2997 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2998 /* Can this be expressed as a right shift / left shift pair? */
2999 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3000 right = exact_log2 (lsb);
3001 mask2 = ~(mask + lsb - 1);
3002 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3003 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3004 if (! mask2)
3005 best_cost = shift_insns[right] + shift_insns[right + left];
3006 /* mask has no trailing zeroes <==> ! right */
3007 else if (! right && mask2 == ~(lsb2 - 1))
3008 {
3009 int late_right = exact_log2 (lsb2);
3010 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
3011 }
3012 /* Try to use zero extend. */
3013 if (mask2 == ~(lsb2 - 1))
3014 {
3015 int width, first;
3016
3017 for (width = 8; width <= 16; width += 8)
3018 {
3019 /* Can we zero-extend right away? */
3020 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3021 {
3022 cost
3023 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
3024 if (cost < best_cost)
3025 {
3026 best = 1;
3027 best_cost = cost;
3028 best_right = right;
3029 best_len = cost;
3030 if (attrp)
3031 attrp[2] = -1;
3032 }
3033 continue;
3034 }
3035 /* ??? Could try to put zero extend into initial right shift,
3036 or even shift a bit left before the right shift. */
3037 /* Determine value of first part of left shift, to get to the
3038 zero extend cut-off point. */
3039 first = width - exact_log2 (lsb2) + right;
3040 if (first >= 0 && right + left - first >= 0)
3041 {
3042 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
3043 + ext_shift_insns[right + left - first];
3044 if (cost < best_cost)
3045 {
3046 best = 1;
3047 best_cost = cost;
3048 best_right = right;
3049 best_len = cost;
3050 if (attrp)
3051 attrp[2] = first;
3052 }
3053 }
3054 }
3055 }
3056 /* Try to use r0 AND pattern */
3057 for (i = 0; i <= 2; i++)
3058 {
3059 if (i > right)
3060 break;
3061 if (! CONST_OK_FOR_K08 (mask >> i))
3062 continue;
3063 cost = (i != 0) + 2 + ext_shift_insns[left + i];
3064 if (cost < best_cost)
3065 {
3066 best = 2;
3067 best_cost = cost;
3068 best_right = i;
3069 best_len = cost - 1;
3070 }
3071 }
3072 /* Try to use a scratch register to hold the AND operand. */
3073 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
3074 for (i = 0; i <= 2; i++)
3075 {
3076 if (i > right)
3077 break;
3078 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
3079 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
3080 if (cost < best_cost)
3081 {
3082 best = 4 - can_ext;
3083 best_cost = cost;
3084 best_right = i;
3085 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
3086 }
3087 }
3088
3089 if (attrp)
3090 {
3091 attrp[0] = best_right;
3092 attrp[1] = best_len;
3093 }
3094 return best;
3095 }
3096
3097 /* This is used in length attributes of the unnamed instructions
3098 corresponding to shl_and_kind return values of 1 and 2. */
3099 int
3100 shl_and_length (rtx insn)
3101 {
3102 rtx set_src, left_rtx, mask_rtx;
3103 int attributes[3];
3104
3105 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3106 left_rtx = XEXP (XEXP (set_src, 0), 1);
3107 mask_rtx = XEXP (set_src, 1);
3108 shl_and_kind (left_rtx, mask_rtx, attributes);
3109 return attributes[1];
3110 }
3111
3112 /* This is used in length attribute of the and_shl_scratch instruction. */
3113
3114 int
3115 shl_and_scr_length (rtx insn)
3116 {
3117 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3118 int len = shift_insns[INTVAL (XEXP (set_src, 1)) & 31];
3119 rtx op = XEXP (set_src, 0);
3120 len += shift_insns[INTVAL (XEXP (op, 1)) & 31] + 1;
3121 op = XEXP (XEXP (op, 0), 0);
3122 return len + shift_insns[INTVAL (XEXP (op, 1)) & 31];
3123 }
3124
3125 /* Generate rtl for instructions for which shl_and_kind advised a particular
3126 method of generating them, i.e. returned zero. */
3127
3128 int
3129 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
3130 {
3131 int attributes[3];
3132 unsigned HOST_WIDE_INT mask;
3133 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
3134 int right, total_shift;
3135 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
3136
3137 right = attributes[0];
3138 total_shift = INTVAL (left_rtx) + right;
3139 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
3140 switch (kind)
3141 {
3142 default:
3143 return -1;
3144 case 1:
3145 {
3146 int first = attributes[2];
3147 rtx operands[3];
3148
3149 if (first < 0)
3150 {
3151 emit_insn ((mask << right) <= 0xff
3152 ? gen_zero_extendqisi2 (dest,
3153 gen_lowpart (QImode, source))
3154 : gen_zero_extendhisi2 (dest,
3155 gen_lowpart (HImode, source)));
3156 source = dest;
3157 }
3158 if (source != dest)
3159 emit_insn (gen_movsi (dest, source));
3160 operands[0] = dest;
3161 if (right)
3162 {
3163 operands[2] = GEN_INT (right);
3164 gen_shifty_hi_op (LSHIFTRT, operands);
3165 }
3166 if (first > 0)
3167 {
3168 operands[2] = GEN_INT (first);
3169 gen_shifty_hi_op (ASHIFT, operands);
3170 total_shift -= first;
3171 mask <<= first;
3172 }
3173 if (first >= 0)
3174 emit_insn (mask <= 0xff
3175 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
3176 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3177 if (total_shift > 0)
3178 {
3179 operands[2] = GEN_INT (total_shift);
3180 gen_shifty_hi_op (ASHIFT, operands);
3181 }
3182 break;
3183 }
3184 case 4:
3185 shift_gen_fun = gen_shifty_op;
3186 case 3:
3187 /* If the topmost bit that matters is set, set the topmost bits
3188 that don't matter. This way, we might be able to get a shorter
3189 signed constant. */
3190 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
3191 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
3192 case 2:
3193 /* Don't expand fine-grained when combining, because that will
3194 make the pattern fail. */
3195 if (currently_expanding_to_rtl
3196 || reload_in_progress || reload_completed)
3197 {
3198 rtx operands[3];
3199
3200 /* Cases 3 and 4 should be handled by this split
3201 only while combining */
3202 gcc_assert (kind <= 2);
3203 if (right)
3204 {
3205 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
3206 source = dest;
3207 }
3208 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3209 if (total_shift)
3210 {
3211 operands[0] = dest;
3212 operands[1] = dest;
3213 operands[2] = GEN_INT (total_shift);
3214 shift_gen_fun (ASHIFT, operands);
3215 }
3216 break;
3217 }
3218 else
3219 {
3220 int neg = 0;
3221 if (kind != 4 && total_shift < 16)
3222 {
3223 neg = -ext_shift_amounts[total_shift][1];
3224 if (neg > 0)
3225 neg -= ext_shift_amounts[total_shift][2];
3226 else
3227 neg = 0;
3228 }
3229 emit_insn (gen_and_shl_scratch (dest, source,
3230 GEN_INT (right),
3231 GEN_INT (mask),
3232 GEN_INT (total_shift + neg),
3233 GEN_INT (neg)));
3234 emit_insn (gen_movsi (dest, dest));
3235 break;
3236 }
3237 }
3238 return 0;
3239 }
3240
3241 /* Try to find a good way to implement the combiner pattern
3242 [(set (match_operand:SI 0 "register_operand" "=r")
3243 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3244 (match_operand:SI 2 "const_int_operand" "n")
3245 (match_operand:SI 3 "const_int_operand" "n")
3246 (const_int 0)))
3247 (clobber (reg:SI T_REG))]
3248 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3249 return 0 for simple left / right shift combination.
3250 return 1 for left shift / 8 bit sign extend / left shift.
3251 return 2 for left shift / 16 bit sign extend / left shift.
3252 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3253 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3254 return 5 for left shift / 16 bit sign extend / right shift
3255 return 6 for < 8 bit sign extend / left shift.
3256 return 7 for < 8 bit sign extend / left shift / single right shift.
3257 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3258
3259 int
3260 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3261 {
3262 int left, size, insize, ext;
3263 int cost = 0, best_cost;
3264 int kind;
3265
3266 left = INTVAL (left_rtx);
3267 size = INTVAL (size_rtx);
3268 insize = size - left;
3269 gcc_assert (insize > 0);
3270 /* Default to left / right shift. */
3271 kind = 0;
3272 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3273 if (size <= 16)
3274 {
3275 /* 16 bit shift / sign extend / 16 bit shift */
3276 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3277 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3278 below, by alternative 3 or something even better. */
3279 if (cost < best_cost)
3280 {
3281 kind = 5;
3282 best_cost = cost;
3283 }
3284 }
3285 /* Try a plain sign extend between two shifts. */
3286 for (ext = 16; ext >= insize; ext -= 8)
3287 {
3288 if (ext <= size)
3289 {
3290 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3291 if (cost < best_cost)
3292 {
3293 kind = ext / (unsigned) 8;
3294 best_cost = cost;
3295 }
3296 }
3297 /* Check if we can do a sloppy shift with a final signed shift
3298 restoring the sign. */
3299 if (EXT_SHIFT_SIGNED (size - ext))
3300 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3301 /* If not, maybe it's still cheaper to do the second shift sloppy,
3302 and do a final sign extend? */
3303 else if (size <= 16)
3304 cost = ext_shift_insns[ext - insize] + 1
3305 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3306 else
3307 continue;
3308 if (cost < best_cost)
3309 {
3310 kind = ext / (unsigned) 8 + 2;
3311 best_cost = cost;
3312 }
3313 }
3314 /* Check if we can sign extend in r0 */
3315 if (insize < 8)
3316 {
3317 cost = 3 + shift_insns[left];
3318 if (cost < best_cost)
3319 {
3320 kind = 6;
3321 best_cost = cost;
3322 }
3323 /* Try the same with a final signed shift. */
3324 if (left < 31)
3325 {
3326 cost = 3 + ext_shift_insns[left + 1] + 1;
3327 if (cost < best_cost)
3328 {
3329 kind = 7;
3330 best_cost = cost;
3331 }
3332 }
3333 }
3334 if (TARGET_SH3)
3335 {
3336 /* Try to use a dynamic shift. */
3337 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3338 if (cost < best_cost)
3339 {
3340 kind = 0;
3341 best_cost = cost;
3342 }
3343 }
3344 if (costp)
3345 *costp = cost;
3346 return kind;
3347 }
3348
3349 /* Function to be used in the length attribute of the instructions
3350 implementing this pattern. */
3351
3352 int
3353 shl_sext_length (rtx insn)
3354 {
3355 rtx set_src, left_rtx, size_rtx;
3356 int cost;
3357
3358 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3359 left_rtx = XEXP (XEXP (set_src, 0), 1);
3360 size_rtx = XEXP (set_src, 1);
3361 shl_sext_kind (left_rtx, size_rtx, &cost);
3362 return cost;
3363 }
3364
3365 /* Generate rtl for this pattern */
3366
3367 int
3368 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3369 {
3370 int kind;
3371 int left, size, insize, cost;
3372 rtx operands[3];
3373
3374 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3375 left = INTVAL (left_rtx);
3376 size = INTVAL (size_rtx);
3377 insize = size - left;
3378 switch (kind)
3379 {
3380 case 1:
3381 case 2:
3382 case 3:
3383 case 4:
3384 {
3385 int ext = kind & 1 ? 8 : 16;
3386 int shift2 = size - ext;
3387
3388 /* Don't expand fine-grained when combining, because that will
3389 make the pattern fail. */
3390 if (! currently_expanding_to_rtl
3391 && ! reload_in_progress && ! reload_completed)
3392 {
3393 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3394 emit_insn (gen_movsi (dest, source));
3395 break;
3396 }
3397 if (dest != source)
3398 emit_insn (gen_movsi (dest, source));
3399 operands[0] = dest;
3400 if (ext - insize)
3401 {
3402 operands[2] = GEN_INT (ext - insize);
3403 gen_shifty_hi_op (ASHIFT, operands);
3404 }
3405 emit_insn (kind & 1
3406 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3407 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3408 if (kind <= 2)
3409 {
3410 if (shift2)
3411 {
3412 operands[2] = GEN_INT (shift2);
3413 gen_shifty_op (ASHIFT, operands);
3414 }
3415 }
3416 else
3417 {
3418 if (shift2 > 0)
3419 {
3420 if (EXT_SHIFT_SIGNED (shift2))
3421 {
3422 operands[2] = GEN_INT (shift2 + 1);
3423 gen_shifty_op (ASHIFT, operands);
3424 operands[2] = const1_rtx;
3425 gen_shifty_op (ASHIFTRT, operands);
3426 break;
3427 }
3428 operands[2] = GEN_INT (shift2);
3429 gen_shifty_hi_op (ASHIFT, operands);
3430 }
3431 else if (shift2)
3432 {
3433 operands[2] = GEN_INT (-shift2);
3434 gen_shifty_hi_op (LSHIFTRT, operands);
3435 }
3436 emit_insn (size <= 8
3437 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3438 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3439 }
3440 break;
3441 }
3442 case 5:
3443 {
3444 int i = 16 - size;
3445 if (! currently_expanding_to_rtl
3446 && ! reload_in_progress && ! reload_completed)
3447 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3448 else
3449 {
3450 operands[0] = dest;
3451 operands[2] = GEN_INT (16 - insize);
3452 gen_shifty_hi_op (ASHIFT, operands);
3453 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3454 }
3455 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3456 while (--i >= 0)
3457 gen_ashift (ASHIFTRT, 1, dest);
3458 break;
3459 }
3460 case 6:
3461 case 7:
3462 /* Don't expand fine-grained when combining, because that will
3463 make the pattern fail. */
3464 if (! currently_expanding_to_rtl
3465 && ! reload_in_progress && ! reload_completed)
3466 {
3467 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3468 emit_insn (gen_movsi (dest, source));
3469 break;
3470 }
3471 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
3472 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
3473 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
3474 operands[0] = dest;
3475 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
3476 gen_shifty_op (ASHIFT, operands);
3477 if (kind == 7)
3478 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
3479 break;
3480 default:
3481 return -1;
3482 }
3483 return 0;
3484 }
3485
3486 /* Prefix a symbol_ref name with "datalabel". */
3487
3488 rtx
3489 gen_datalabel_ref (rtx sym)
3490 {
3491 const char *str;
3492
3493 if (GET_CODE (sym) == LABEL_REF)
3494 return gen_rtx_CONST (GET_MODE (sym),
3495 gen_rtx_UNSPEC (GET_MODE (sym),
3496 gen_rtvec (1, sym),
3497 UNSPEC_DATALABEL));
3498
3499 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
3500
3501 str = XSTR (sym, 0);
3502 /* Share all SYMBOL_REF strings with the same value - that is important
3503 for cse. */
3504 str = IDENTIFIER_POINTER (get_identifier (str));
3505 XSTR (sym, 0) = str;
3506
3507 return sym;
3508 }
3509
3510 \f
3511 static alloc_pool label_ref_list_pool;
3512
3513 typedef struct label_ref_list_d
3514 {
3515 rtx label;
3516 struct label_ref_list_d *next;
3517 } *label_ref_list_t;
3518
3519 /* The SH cannot load a large constant into a register, constants have to
3520 come from a pc relative load. The reference of a pc relative load
3521 instruction must be less than 1k in front of the instruction. This
3522 means that we often have to dump a constant inside a function, and
3523 generate code to branch around it.
3524
3525 It is important to minimize this, since the branches will slow things
3526 down and make things bigger.
3527
3528 Worst case code looks like:
3529
3530 mov.l L1,rn
3531 bra L2
3532 nop
3533 align
3534 L1: .long value
3535 L2:
3536 ..
3537
3538 mov.l L3,rn
3539 bra L4
3540 nop
3541 align
3542 L3: .long value
3543 L4:
3544 ..
3545
3546 We fix this by performing a scan before scheduling, which notices which
3547 instructions need to have their operands fetched from the constant table
3548 and builds the table.
3549
3550 The algorithm is:
3551
3552 scan, find an instruction which needs a pcrel move. Look forward, find the
3553 last barrier which is within MAX_COUNT bytes of the requirement.
3554 If there isn't one, make one. Process all the instructions between
3555 the find and the barrier.
3556
3557 In the above example, we can tell that L3 is within 1k of L1, so
3558 the first move can be shrunk from the 3 insn+constant sequence into
3559 just 1 insn, and the constant moved to L3 to make:
3560
3561 mov.l L1,rn
3562 ..
3563 mov.l L3,rn
3564 bra L4
3565 nop
3566 align
3567 L3:.long value
3568 L4:.long value
3569
3570 Then the second move becomes the target for the shortening process. */
3571
3572 typedef struct
3573 {
3574 rtx value; /* Value in table. */
3575 rtx label; /* Label of value. */
3576 label_ref_list_t wend; /* End of window. */
3577 enum machine_mode mode; /* Mode of value. */
3578
3579 /* True if this constant is accessed as part of a post-increment
3580 sequence. Note that HImode constants are never accessed in this way. */
3581 bool part_of_sequence_p;
3582 } pool_node;
3583
3584 /* The maximum number of constants that can fit into one pool, since
3585 constants in the range 0..510 are at least 2 bytes long, and in the
3586 range from there to 1018 at least 4 bytes. */
3587
3588 #define MAX_POOL_SIZE 372
3589 static pool_node pool_vector[MAX_POOL_SIZE];
3590 static int pool_size;
3591 static rtx pool_window_label;
3592 static int pool_window_last;
3593
3594 static int max_labelno_before_reorg;
3595
3596 /* ??? If we need a constant in HImode which is the truncated value of a
3597 constant we need in SImode, we could combine the two entries thus saving
3598 two bytes. Is this common enough to be worth the effort of implementing
3599 it? */
3600
3601 /* ??? This stuff should be done at the same time that we shorten branches.
3602 As it is now, we must assume that all branches are the maximum size, and
3603 this causes us to almost always output constant pools sooner than
3604 necessary. */
3605
3606 /* Add a constant to the pool and return its label. */
3607
3608 static rtx
3609 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3610 {
3611 int i;
3612 rtx lab, new_rtx;
3613 label_ref_list_t ref, newref;
3614
3615 /* First see if we've already got it. */
3616 for (i = 0; i < pool_size; i++)
3617 {
3618 if (x->code == pool_vector[i].value->code
3619 && mode == pool_vector[i].mode)
3620 {
3621 if (x->code == CODE_LABEL)
3622 {
3623 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3624 continue;
3625 }
3626 if (rtx_equal_p (x, pool_vector[i].value))
3627 {
3628 lab = new_rtx = 0;
3629 if (! last_value
3630 || ! i
3631 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3632 {
3633 new_rtx = gen_label_rtx ();
3634 LABEL_REFS (new_rtx) = pool_vector[i].label;
3635 pool_vector[i].label = lab = new_rtx;
3636 }
3637 if (lab && pool_window_label)
3638 {
3639 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3640 newref->label = pool_window_label;
3641 ref = pool_vector[pool_window_last].wend;
3642 newref->next = ref;
3643 pool_vector[pool_window_last].wend = newref;
3644 }
3645 if (new_rtx)
3646 pool_window_label = new_rtx;
3647 pool_window_last = i;
3648 return lab;
3649 }
3650 }
3651 }
3652
3653 /* Need a new one. */
3654 pool_vector[pool_size].value = x;
3655 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
3656 {
3657 lab = 0;
3658 pool_vector[pool_size - 1].part_of_sequence_p = true;
3659 }
3660 else
3661 lab = gen_label_rtx ();
3662 pool_vector[pool_size].mode = mode;
3663 pool_vector[pool_size].label = lab;
3664 pool_vector[pool_size].wend = NULL;
3665 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
3666 if (lab && pool_window_label)
3667 {
3668 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3669 newref->label = pool_window_label;
3670 ref = pool_vector[pool_window_last].wend;
3671 newref->next = ref;
3672 pool_vector[pool_window_last].wend = newref;
3673 }
3674 if (lab)
3675 pool_window_label = lab;
3676 pool_window_last = pool_size;
3677 pool_size++;
3678 return lab;
3679 }
3680
3681 /* Output the literal table. START, if nonzero, is the first instruction
3682 this table is needed for, and also indicates that there is at least one
3683 casesi_worker_2 instruction; We have to emit the operand3 labels from
3684 these insns at a 4-byte aligned position. BARRIER is the barrier
3685 after which we are to place the table. */
3686
3687 static void
3688 dump_table (rtx start, rtx barrier)
3689 {
3690 rtx scan = barrier;
3691 int i;
3692 int need_align = 1;
3693 rtx lab;
3694 label_ref_list_t ref;
3695 int have_df = 0;
3696
3697 /* Do two passes, first time dump out the HI sized constants. */
3698
3699 for (i = 0; i < pool_size; i++)
3700 {
3701 pool_node *p = &pool_vector[i];
3702
3703 if (p->mode == HImode)
3704 {
3705 if (need_align)
3706 {
3707 scan = emit_insn_after (gen_align_2 (), scan);
3708 need_align = 0;
3709 }
3710 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3711 scan = emit_label_after (lab, scan);
3712 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
3713 scan);
3714 for (ref = p->wend; ref; ref = ref->next)
3715 {
3716 lab = ref->label;
3717 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3718 }
3719 }
3720 else if (p->mode == DFmode)
3721 have_df = 1;
3722 }
3723
3724 need_align = 1;
3725
3726 if (start)
3727 {
3728 scan = emit_insn_after (gen_align_4 (), scan);
3729 need_align = 0;
3730 for (; start != barrier; start = NEXT_INSN (start))
3731 if (GET_CODE (start) == INSN
3732 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
3733 {
3734 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
3735 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
3736
3737 scan = emit_label_after (lab, scan);
3738 }
3739 }
3740 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
3741 {
3742 rtx align_insn = NULL_RTX;
3743
3744 scan = emit_label_after (gen_label_rtx (), scan);
3745 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3746 need_align = 0;
3747
3748 for (i = 0; i < pool_size; i++)
3749 {
3750 pool_node *p = &pool_vector[i];
3751
3752 switch (p->mode)
3753 {
3754 case HImode:
3755 break;
3756 case SImode:
3757 case SFmode:
3758 if (align_insn && !p->part_of_sequence_p)
3759 {
3760 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3761 emit_label_before (lab, align_insn);
3762 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
3763 align_insn);
3764 for (ref = p->wend; ref; ref = ref->next)
3765 {
3766 lab = ref->label;
3767 emit_insn_before (gen_consttable_window_end (lab),
3768 align_insn);
3769 }
3770 delete_insn (align_insn);
3771 align_insn = NULL_RTX;
3772 continue;
3773 }
3774 else
3775 {
3776 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3777 scan = emit_label_after (lab, scan);
3778 scan = emit_insn_after (gen_consttable_4 (p->value,
3779 const0_rtx), scan);
3780 need_align = ! need_align;
3781 }
3782 break;
3783 case DFmode:
3784 if (need_align)
3785 {
3786 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3787 align_insn = scan;
3788 need_align = 0;
3789 }
3790 case DImode:
3791 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3792 scan = emit_label_after (lab, scan);
3793 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3794 scan);
3795 break;
3796 default:
3797 gcc_unreachable ();
3798 }
3799
3800 if (p->mode != HImode)
3801 {
3802 for (ref = p->wend; ref; ref = ref->next)
3803 {
3804 lab = ref->label;
3805 scan = emit_insn_after (gen_consttable_window_end (lab),
3806 scan);
3807 }
3808 }
3809 }
3810
3811 pool_size = 0;
3812 }
3813
3814 for (i = 0; i < pool_size; i++)
3815 {
3816 pool_node *p = &pool_vector[i];
3817
3818 switch (p->mode)
3819 {
3820 case HImode:
3821 break;
3822 case SImode:
3823 case SFmode:
3824 if (need_align)
3825 {
3826 need_align = 0;
3827 scan = emit_label_after (gen_label_rtx (), scan);
3828 scan = emit_insn_after (gen_align_4 (), scan);
3829 }
3830 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3831 scan = emit_label_after (lab, scan);
3832 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
3833 scan);
3834 break;
3835 case DFmode:
3836 case DImode:
3837 if (need_align)
3838 {
3839 need_align = 0;
3840 scan = emit_label_after (gen_label_rtx (), scan);
3841 scan = emit_insn_after (gen_align_4 (), scan);
3842 }
3843 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3844 scan = emit_label_after (lab, scan);
3845 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3846 scan);
3847 break;
3848 default:
3849 gcc_unreachable ();
3850 }
3851
3852 if (p->mode != HImode)
3853 {
3854 for (ref = p->wend; ref; ref = ref->next)
3855 {
3856 lab = ref->label;
3857 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3858 }
3859 }
3860 }
3861
3862 scan = emit_insn_after (gen_consttable_end (), scan);
3863 scan = emit_barrier_after (scan);
3864 pool_size = 0;
3865 pool_window_label = NULL_RTX;
3866 pool_window_last = 0;
3867 }
3868
3869 /* Return nonzero if constant would be an ok source for a
3870 mov.w instead of a mov.l. */
3871
3872 static int
3873 hi_const (rtx src)
3874 {
3875 return (GET_CODE (src) == CONST_INT
3876 && INTVAL (src) >= -32768
3877 && INTVAL (src) <= 32767);
3878 }
3879
3880 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
3881
3882 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3883
3884 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3885 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3886 need to fix it if the input value is CONST_OK_FOR_I08. */
3887
3888 static int
3889 broken_move (rtx insn)
3890 {
3891 if (GET_CODE (insn) == INSN)
3892 {
3893 rtx pat = PATTERN (insn);
3894 if (GET_CODE (pat) == PARALLEL)
3895 pat = XVECEXP (pat, 0, 0);
3896 if (GET_CODE (pat) == SET
3897 /* We can load any 8-bit value if we don't care what the high
3898 order bits end up as. */
3899 && GET_MODE (SET_DEST (pat)) != QImode
3900 && (CONSTANT_P (SET_SRC (pat))
3901 /* Match mova_const. */
3902 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3903 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3904 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3905 && ! (TARGET_SH2E
3906 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3907 && (fp_zero_operand (SET_SRC (pat))
3908 || fp_one_operand (SET_SRC (pat)))
3909 /* ??? If this is a -m4 or -m4-single compilation, in general
3910 we don't know the current setting of fpscr, so disable fldi.
3911 There is an exception if this was a register-register move
3912 before reload - and hence it was ascertained that we have
3913 single precision setting - and in a post-reload optimization
3914 we changed this to do a constant load. In that case
3915 we don't have an r0 clobber, hence we must use fldi. */
3916 && (! TARGET_SH4 || TARGET_FMOVD
3917 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3918 == SCRATCH))
3919 && GET_CODE (SET_DEST (pat)) == REG
3920 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3921 && ! (TARGET_SH2A
3922 && GET_MODE (SET_DEST (pat)) == SImode
3923 && (satisfies_constraint_I20 (SET_SRC (pat))
3924 || satisfies_constraint_I28 (SET_SRC (pat))))
3925 && ! satisfies_constraint_I08 (SET_SRC (pat)))
3926 return 1;
3927 }
3928
3929 return 0;
3930 }
3931
3932 static int
3933 mova_p (rtx insn)
3934 {
3935 return (GET_CODE (insn) == INSN
3936 && GET_CODE (PATTERN (insn)) == SET
3937 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3938 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3939 /* Don't match mova_const. */
3940 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
3941 }
3942
3943 /* Fix up a mova from a switch that went out of range. */
3944 static void
3945 fixup_mova (rtx mova)
3946 {
3947 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
3948 if (! flag_pic)
3949 {
3950 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
3951 INSN_CODE (mova) = -1;
3952 }
3953 else
3954 {
3955 rtx worker = mova;
3956 rtx lab = gen_label_rtx ();
3957 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
3958
3959 do
3960 {
3961 worker = NEXT_INSN (worker);
3962 gcc_assert (worker
3963 && GET_CODE (worker) != CODE_LABEL
3964 && GET_CODE (worker) != JUMP_INSN);
3965 } while (GET_CODE (worker) == NOTE
3966 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3967 wpat = PATTERN (worker);
3968 wpat0 = XVECEXP (wpat, 0, 0);
3969 wpat1 = XVECEXP (wpat, 0, 1);
3970 wsrc = SET_SRC (wpat0);
3971 PATTERN (worker) = (gen_casesi_worker_2
3972 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3973 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3974 XEXP (wpat1, 0)));
3975 INSN_CODE (worker) = -1;
3976 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3977 base = gen_rtx_LABEL_REF (Pmode, lab);
3978 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
3979 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3980 INSN_CODE (mova) = -1;
3981 }
3982 }
3983
3984 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
3985 *num_mova, and check if the new mova is not nested within the first one.
3986 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
3987 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
3988 static int
3989 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
3990 {
3991 int n_addr = 0; /* Initialization to shut up spurious warning. */
3992 int f_target, n_target = 0; /* Likewise. */
3993
3994 if (optimize)
3995 {
3996 /* If NEW_MOVA has no address yet, it will be handled later. */
3997 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
3998 return -1;
3999
4000 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4001 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4002 if (n_addr > n_target || n_addr + 1022 < n_target)
4003 {
4004 /* Change the mova into a load.
4005 broken_move will then return true for it. */
4006 fixup_mova (new_mova);
4007 return 1;
4008 }
4009 }
4010 if (!(*num_mova)++)
4011 {
4012 *first_mova = new_mova;
4013 return 2;
4014 }
4015 if (!optimize
4016 || ((f_target
4017 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4018 >= n_target))
4019 return -1;
4020
4021 (*num_mova)--;
4022 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4023 > n_target - n_addr)
4024 {
4025 fixup_mova (*first_mova);
4026 return 0;
4027 }
4028 else
4029 {
4030 fixup_mova (new_mova);
4031 return 1;
4032 }
4033 }
4034
4035 /* Find the last barrier from insn FROM which is close enough to hold the
4036 constant pool. If we can't find one, then create one near the end of
4037 the range. */
4038
4039 static rtx
4040 find_barrier (int num_mova, rtx mova, rtx from)
4041 {
4042 int count_si = 0;
4043 int count_hi = 0;
4044 int found_hi = 0;
4045 int found_si = 0;
4046 int found_di = 0;
4047 int hi_align = 2;
4048 int si_align = 2;
4049 int leading_mova = num_mova;
4050 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
4051 int si_limit;
4052 int hi_limit;
4053 rtx orig = from;
4054
4055 /* For HImode: range is 510, add 4 because pc counts from address of
4056 second instruction after this one, subtract 2 for the jump instruction
4057 that we may need to emit before the table, subtract 2 for the instruction
4058 that fills the jump delay slot (in very rare cases, reorg will take an
4059 instruction from after the constant pool or will leave the delay slot
4060 empty). This gives 510.
4061 For SImode: range is 1020, add 4 because pc counts from address of
4062 second instruction after this one, subtract 2 in case pc is 2 byte
4063 aligned, subtract 2 for the jump instruction that we may need to emit
4064 before the table, subtract 2 for the instruction that fills the jump
4065 delay slot. This gives 1018. */
4066
4067 /* The branch will always be shortened now that the reference address for
4068 forward branches is the successor address, thus we need no longer make
4069 adjustments to the [sh]i_limit for -O0. */
4070
4071 si_limit = 1018;
4072 hi_limit = 510;
4073
4074 while (from && count_si < si_limit && count_hi < hi_limit)
4075 {
4076 int inc = get_attr_length (from);
4077 int new_align = 1;
4078
4079 /* If this is a label that existed at the time of the compute_alignments
4080 call, determine the alignment. N.B. When find_barrier recurses for
4081 an out-of-reach mova, we might see labels at the start of previously
4082 inserted constant tables. */
4083 if (GET_CODE (from) == CODE_LABEL
4084 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
4085 {
4086 if (optimize)
4087 new_align = 1 << label_to_alignment (from);
4088 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
4089 new_align = 1 << barrier_align (from);
4090 else
4091 new_align = 1;
4092 inc = 0;
4093 }
4094 /* In case we are scanning a constant table because of recursion, check
4095 for explicit alignments. If the table is long, we might be forced
4096 to emit the new table in front of it; the length of the alignment
4097 might be the last straw. */
4098 else if (GET_CODE (from) == INSN
4099 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4100 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
4101 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
4102 /* When we find the end of a constant table, paste the new constant
4103 at the end. That is better than putting it in front because
4104 this way, we don't need extra alignment for adding a 4-byte-aligned
4105 mov(a) label to a 2/4 or 8/4 byte aligned table. */
4106 else if (GET_CODE (from) == INSN
4107 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4108 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
4109 return from;
4110
4111 if (GET_CODE (from) == BARRIER)
4112 {
4113 rtx next;
4114
4115 found_barrier = from;
4116
4117 /* If we are at the end of the function, or in front of an alignment
4118 instruction, we need not insert an extra alignment. We prefer
4119 this kind of barrier. */
4120 if (barrier_align (from) > 2)
4121 good_barrier = from;
4122
4123 /* If we are at the end of a hot/cold block, dump the constants
4124 here. */
4125 next = NEXT_INSN (from);
4126 if (next
4127 && NOTE_P (next)
4128 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
4129 break;
4130 }
4131
4132 if (broken_move (from))
4133 {
4134 rtx pat, src, dst;
4135 enum machine_mode mode;
4136
4137 pat = PATTERN (from);
4138 if (GET_CODE (pat) == PARALLEL)
4139 pat = XVECEXP (pat, 0, 0);
4140 src = SET_SRC (pat);
4141 dst = SET_DEST (pat);
4142 mode = GET_MODE (dst);
4143
4144 /* We must explicitly check the mode, because sometimes the
4145 front end will generate code to load unsigned constants into
4146 HImode targets without properly sign extending them. */
4147 if (mode == HImode
4148 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
4149 {
4150 found_hi += 2;
4151 /* We put the short constants before the long constants, so
4152 we must count the length of short constants in the range
4153 for the long constants. */
4154 /* ??? This isn't optimal, but is easy to do. */
4155 si_limit -= 2;
4156 }
4157 else
4158 {
4159 /* We dump DF/DI constants before SF/SI ones, because
4160 the limit is the same, but the alignment requirements
4161 are higher. We may waste up to 4 additional bytes
4162 for alignment, and the DF/DI constant may have
4163 another SF/SI constant placed before it. */
4164 if (TARGET_SHCOMPACT
4165 && ! found_di
4166 && (mode == DFmode || mode == DImode))
4167 {
4168 found_di = 1;
4169 si_limit -= 8;
4170 }
4171 while (si_align > 2 && found_si + si_align - 2 > count_si)
4172 si_align >>= 1;
4173 if (found_si > count_si)
4174 count_si = found_si;
4175 found_si += GET_MODE_SIZE (mode);
4176 if (num_mova)
4177 si_limit -= GET_MODE_SIZE (mode);
4178 }
4179 }
4180
4181 if (mova_p (from))
4182 {
4183 switch (untangle_mova (&num_mova, &mova, from))
4184 {
4185 case 0: return find_barrier (0, 0, mova);
4186 case 2:
4187 {
4188 leading_mova = 0;
4189 barrier_before_mova
4190 = good_barrier ? good_barrier : found_barrier;
4191 }
4192 default: break;
4193 }
4194 if (found_si > count_si)
4195 count_si = found_si;
4196 }
4197 else if (GET_CODE (from) == JUMP_INSN
4198 && (GET_CODE (PATTERN (from)) == ADDR_VEC
4199 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
4200 {
4201 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
4202 || (num_mova
4203 && (prev_nonnote_insn (from)
4204 == XEXP (MOVA_LABELREF (mova), 0))))
4205 num_mova--;
4206 if (barrier_align (next_real_insn (from)) == align_jumps_log)
4207 {
4208 /* We have just passed the barrier in front of the
4209 ADDR_DIFF_VEC, which is stored in found_barrier. Since
4210 the ADDR_DIFF_VEC is accessed as data, just like our pool
4211 constants, this is a good opportunity to accommodate what
4212 we have gathered so far.
4213 If we waited any longer, we could end up at a barrier in
4214 front of code, which gives worse cache usage for separated
4215 instruction / data caches. */
4216 good_barrier = found_barrier;
4217 break;
4218 }
4219 else
4220 {
4221 rtx body = PATTERN (from);
4222 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
4223 }
4224 }
4225 /* For the SH1, we generate alignments even after jumps-around-jumps. */
4226 else if (GET_CODE (from) == JUMP_INSN
4227 && ! TARGET_SH2
4228 && ! TARGET_SMALLCODE)
4229 new_align = 4;
4230
4231 if (found_si)
4232 {
4233 count_si += inc;
4234 if (new_align > si_align)
4235 {
4236 si_limit -= (count_si - 1) & (new_align - si_align);
4237 si_align = new_align;
4238 }
4239 count_si = (count_si + new_align - 1) & -new_align;
4240 }
4241 if (found_hi)
4242 {
4243 count_hi += inc;
4244 if (new_align > hi_align)
4245 {
4246 hi_limit -= (count_hi - 1) & (new_align - hi_align);
4247 hi_align = new_align;
4248 }
4249 count_hi = (count_hi + new_align - 1) & -new_align;
4250 }
4251 from = NEXT_INSN (from);
4252 }
4253
4254 if (num_mova)
4255 {
4256 if (leading_mova)
4257 {
4258 /* Try as we might, the leading mova is out of range. Change
4259 it into a load (which will become a pcload) and retry. */
4260 fixup_mova (mova);
4261 return find_barrier (0, 0, mova);
4262 }
4263 else
4264 {
4265 /* Insert the constant pool table before the mova instruction,
4266 to prevent the mova label reference from going out of range. */
4267 from = mova;
4268 good_barrier = found_barrier = barrier_before_mova;
4269 }
4270 }
4271
4272 if (found_barrier)
4273 {
4274 if (good_barrier && next_real_insn (found_barrier))
4275 found_barrier = good_barrier;
4276 }
4277 else
4278 {
4279 /* We didn't find a barrier in time to dump our stuff,
4280 so we'll make one. */
4281 rtx label = gen_label_rtx ();
4282
4283 /* If we exceeded the range, then we must back up over the last
4284 instruction we looked at. Otherwise, we just need to undo the
4285 NEXT_INSN at the end of the loop. */
4286 if (PREV_INSN (from) != orig
4287 && (count_hi > hi_limit || count_si > si_limit))
4288 from = PREV_INSN (PREV_INSN (from));
4289 else
4290 from = PREV_INSN (from);
4291
4292 /* Walk back to be just before any jump or label.
4293 Putting it before a label reduces the number of times the branch
4294 around the constant pool table will be hit. Putting it before
4295 a jump makes it more likely that the bra delay slot will be
4296 filled. */
4297 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
4298 || GET_CODE (from) == CODE_LABEL)
4299 from = PREV_INSN (from);
4300
4301 from = emit_jump_insn_after (gen_jump (label), from);
4302 JUMP_LABEL (from) = label;
4303 LABEL_NUSES (label) = 1;
4304 found_barrier = emit_barrier_after (from);
4305 emit_label_after (label, found_barrier);
4306 }
4307
4308 return found_barrier;
4309 }
4310
4311 /* If the instruction INSN is implemented by a special function, and we can
4312 positively find the register that is used to call the sfunc, and this
4313 register is not used anywhere else in this instruction - except as the
4314 destination of a set, return this register; else, return 0. */
4315 rtx
4316 sfunc_uses_reg (rtx insn)
4317 {
4318 int i;
4319 rtx pattern, part, reg_part, reg;
4320
4321 if (GET_CODE (insn) != INSN)
4322 return 0;
4323 pattern = PATTERN (insn);
4324 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
4325 return 0;
4326
4327 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4328 {
4329 part = XVECEXP (pattern, 0, i);
4330 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
4331 reg_part = part;
4332 }
4333 if (! reg_part)
4334 return 0;
4335 reg = XEXP (reg_part, 0);
4336 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
4337 {
4338 part = XVECEXP (pattern, 0, i);
4339 if (part == reg_part || GET_CODE (part) == CLOBBER)
4340 continue;
4341 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
4342 && GET_CODE (SET_DEST (part)) == REG)
4343 ? SET_SRC (part) : part)))
4344 return 0;
4345 }
4346 return reg;
4347 }
4348
4349 /* See if the only way in which INSN uses REG is by calling it, or by
4350 setting it while calling it. Set *SET to a SET rtx if the register
4351 is set by INSN. */
4352
4353 static int
4354 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
4355 {
4356 rtx pattern, reg2;
4357
4358 *set = NULL_RTX;
4359
4360 reg2 = sfunc_uses_reg (insn);
4361 if (reg2 && REGNO (reg2) == REGNO (reg))
4362 {
4363 pattern = single_set (insn);
4364 if (pattern
4365 && GET_CODE (SET_DEST (pattern)) == REG
4366 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4367 *set = pattern;
4368 return 0;
4369 }
4370 if (GET_CODE (insn) != CALL_INSN)
4371 {
4372 /* We don't use rtx_equal_p because we don't care if the mode is
4373 different. */
4374 pattern = single_set (insn);
4375 if (pattern
4376 && GET_CODE (SET_DEST (pattern)) == REG
4377 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4378 {
4379 rtx par, part;
4380 int i;
4381
4382 *set = pattern;
4383 par = PATTERN (insn);
4384 if (GET_CODE (par) == PARALLEL)
4385 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
4386 {
4387 part = XVECEXP (par, 0, i);
4388 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
4389 return 1;
4390 }
4391 return reg_mentioned_p (reg, SET_SRC (pattern));
4392 }
4393
4394 return 1;
4395 }
4396
4397 pattern = PATTERN (insn);
4398
4399 if (GET_CODE (pattern) == PARALLEL)
4400 {
4401 int i;
4402
4403 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4404 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
4405 return 1;
4406 pattern = XVECEXP (pattern, 0, 0);
4407 }
4408
4409 if (GET_CODE (pattern) == SET)
4410 {
4411 if (reg_mentioned_p (reg, SET_DEST (pattern)))
4412 {
4413 /* We don't use rtx_equal_p, because we don't care if the
4414 mode is different. */
4415 if (GET_CODE (SET_DEST (pattern)) != REG
4416 || REGNO (reg) != REGNO (SET_DEST (pattern)))
4417 return 1;
4418
4419 *set = pattern;
4420 }
4421
4422 pattern = SET_SRC (pattern);
4423 }
4424
4425 if (GET_CODE (pattern) != CALL
4426 || GET_CODE (XEXP (pattern, 0)) != MEM
4427 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
4428 return 1;
4429
4430 return 0;
4431 }
4432
4433 /* Given a X, a pattern of an insn or a part of it, return a mask of used
4434 general registers. Bits 0..15 mean that the respective registers
4435 are used as inputs in the instruction. Bits 16..31 mean that the
4436 registers 0..15, respectively, are used as outputs, or are clobbered.
4437 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
4438 int
4439 regs_used (rtx x, int is_dest)
4440 {
4441 enum rtx_code code;
4442 const char *fmt;
4443 int i, used = 0;
4444
4445 if (! x)
4446 return used;
4447 code = GET_CODE (x);
4448 switch (code)
4449 {
4450 case REG:
4451 if (REGNO (x) < 16)
4452 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4453 << (REGNO (x) + is_dest));
4454 return 0;
4455 case SUBREG:
4456 {
4457 rtx y = SUBREG_REG (x);
4458
4459 if (GET_CODE (y) != REG)
4460 break;
4461 if (REGNO (y) < 16)
4462 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4463 << (REGNO (y) +
4464 subreg_regno_offset (REGNO (y),
4465 GET_MODE (y),
4466 SUBREG_BYTE (x),
4467 GET_MODE (x)) + is_dest));
4468 return 0;
4469 }
4470 case SET:
4471 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
4472 case RETURN:
4473 /* If there was a return value, it must have been indicated with USE. */
4474 return 0x00ffff00;
4475 case CLOBBER:
4476 is_dest = 1;
4477 break;
4478 case MEM:
4479 is_dest = 0;
4480 break;
4481 case CALL:
4482 used |= 0x00ff00f0;
4483 break;
4484 default:
4485 break;
4486 }
4487
4488 fmt = GET_RTX_FORMAT (code);
4489
4490 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4491 {
4492 if (fmt[i] == 'E')
4493 {
4494 register int j;
4495 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4496 used |= regs_used (XVECEXP (x, i, j), is_dest);
4497 }
4498 else if (fmt[i] == 'e')
4499 used |= regs_used (XEXP (x, i), is_dest);
4500 }
4501 return used;
4502 }
4503
4504 /* Create an instruction that prevents redirection of a conditional branch
4505 to the destination of the JUMP with address ADDR.
4506 If the branch needs to be implemented as an indirect jump, try to find
4507 a scratch register for it.
4508 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
4509 If any preceding insn that doesn't fit into a delay slot is good enough,
4510 pass 1. Pass 2 if a definite blocking insn is needed.
4511 -1 is used internally to avoid deep recursion.
4512 If a blocking instruction is made or recognized, return it. */
4513
4514 static rtx
4515 gen_block_redirect (rtx jump, int addr, int need_block)
4516 {
4517 int dead = 0;
4518 rtx prev = prev_nonnote_insn (jump);
4519 rtx dest;
4520
4521 /* First, check if we already have an instruction that satisfies our need. */
4522 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
4523 {
4524 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
4525 return prev;
4526 if (GET_CODE (PATTERN (prev)) == USE
4527 || GET_CODE (PATTERN (prev)) == CLOBBER
4528 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4529 prev = jump;
4530 else if ((need_block &= ~1) < 0)
4531 return prev;
4532 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
4533 need_block = 0;
4534 }
4535 if (GET_CODE (PATTERN (jump)) == RETURN)
4536 {
4537 if (! need_block)
4538 return prev;
4539 /* Reorg even does nasty things with return insns that cause branches
4540 to go out of range - see find_end_label and callers. */
4541 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
4542 }
4543 /* We can't use JUMP_LABEL here because it might be undefined
4544 when not optimizing. */
4545 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
4546 /* If the branch is out of range, try to find a scratch register for it. */
4547 if (optimize
4548 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4549 > 4092 + 4098))
4550 {
4551 rtx scan;
4552 /* Don't look for the stack pointer as a scratch register,
4553 it would cause trouble if an interrupt occurred. */
4554 unsigned attempt = 0x7fff, used;
4555 int jump_left = flag_expensive_optimizations + 1;
4556
4557 /* It is likely that the most recent eligible instruction is wanted for
4558 the delay slot. Therefore, find out which registers it uses, and
4559 try to avoid using them. */
4560
4561 for (scan = jump; (scan = PREV_INSN (scan)); )
4562 {
4563 enum rtx_code code;
4564
4565 if (INSN_DELETED_P (scan))
4566 continue;
4567 code = GET_CODE (scan);
4568 if (code == CODE_LABEL || code == JUMP_INSN)
4569 break;
4570 if (code == INSN
4571 && GET_CODE (PATTERN (scan)) != USE
4572 && GET_CODE (PATTERN (scan)) != CLOBBER
4573 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
4574 {
4575 attempt &= ~regs_used (PATTERN (scan), 0);
4576 break;
4577 }
4578 }
4579 for (used = dead = 0, scan = JUMP_LABEL (jump);
4580 (scan = NEXT_INSN (scan)); )
4581 {
4582 enum rtx_code code;
4583
4584 if (INSN_DELETED_P (scan))
4585 continue;
4586 code = GET_CODE (scan);
4587 if (INSN_P (scan))
4588 {
4589 used |= regs_used (PATTERN (scan), 0);
4590 if (code == CALL_INSN)
4591 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
4592 dead |= (used >> 16) & ~used;
4593 if (dead & attempt)
4594 {
4595 dead &= attempt;
4596 break;
4597 }
4598 if (code == JUMP_INSN)
4599 {
4600 if (jump_left-- && simplejump_p (scan))
4601 scan = JUMP_LABEL (scan);
4602 else
4603 break;
4604 }
4605 }
4606 }
4607 /* Mask out the stack pointer again, in case it was
4608 the only 'free' register we have found. */
4609 dead &= 0x7fff;
4610 }
4611 /* If the immediate destination is still in range, check for possible
4612 threading with a jump beyond the delay slot insn.
4613 Don't check if we are called recursively; the jump has been or will be
4614 checked in a different invocation then. */
4615
4616 else if (optimize && need_block >= 0)
4617 {
4618 rtx next = next_active_insn (next_active_insn (dest));
4619 if (next && GET_CODE (next) == JUMP_INSN
4620 && GET_CODE (PATTERN (next)) == SET
4621 && recog_memoized (next) == CODE_FOR_jump_compact)
4622 {
4623 dest = JUMP_LABEL (next);
4624 if (dest
4625 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4626 > 4092 + 4098))
4627 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
4628 }
4629 }
4630
4631 if (dead)
4632 {
4633 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
4634
4635 /* It would be nice if we could convert the jump into an indirect
4636 jump / far branch right now, and thus exposing all constituent
4637 instructions to further optimization. However, reorg uses
4638 simplejump_p to determine if there is an unconditional jump where
4639 it should try to schedule instructions from the target of the
4640 branch; simplejump_p fails for indirect jumps even if they have
4641 a JUMP_LABEL. */
4642 rtx insn = emit_insn_before (gen_indirect_jump_scratch
4643 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
4644 , jump);
4645 /* ??? We would like this to have the scope of the jump, but that
4646 scope will change when a delay slot insn of an inner scope is added.
4647 Hence, after delay slot scheduling, we'll have to expect
4648 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
4649 the jump. */
4650
4651 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
4652 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
4653 return insn;
4654 }
4655 else if (need_block)
4656 /* We can't use JUMP_LABEL here because it might be undefined
4657 when not optimizing. */
4658 return emit_insn_before (gen_block_branch_redirect
4659 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
4660 , jump);
4661 return prev;
4662 }
4663
4664 #define CONDJUMP_MIN -252
4665 #define CONDJUMP_MAX 262
4666 struct far_branch
4667 {
4668 /* A label (to be placed) in front of the jump
4669 that jumps to our ultimate destination. */
4670 rtx near_label;
4671 /* Where we are going to insert it if we cannot move the jump any farther,
4672 or the jump itself if we have picked up an existing jump. */
4673 rtx insert_place;
4674 /* The ultimate destination. */
4675 rtx far_label;
4676 struct far_branch *prev;
4677 /* If the branch has already been created, its address;
4678 else the address of its first prospective user. */
4679 int address;
4680 };
4681
4682 static void gen_far_branch (struct far_branch *);
4683 enum mdep_reorg_phase_e mdep_reorg_phase;
4684 static void
4685 gen_far_branch (struct far_branch *bp)
4686 {
4687 rtx insn = bp->insert_place;
4688 rtx jump;
4689 rtx label = gen_label_rtx ();
4690 int ok;
4691
4692 emit_label_after (label, insn);
4693 if (bp->far_label)
4694 {
4695 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
4696 LABEL_NUSES (bp->far_label)++;
4697 }
4698 else
4699 jump = emit_jump_insn_after (gen_return (), insn);
4700 /* Emit a barrier so that reorg knows that any following instructions
4701 are not reachable via a fall-through path.
4702 But don't do this when not optimizing, since we wouldn't suppress the
4703 alignment for the barrier then, and could end up with out-of-range
4704 pc-relative loads. */
4705 if (optimize)
4706 emit_barrier_after (jump);
4707 emit_label_after (bp->near_label, insn);
4708 JUMP_LABEL (jump) = bp->far_label;
4709 ok = invert_jump (insn, label, 1);
4710 gcc_assert (ok);
4711
4712 /* If we are branching around a jump (rather than a return), prevent
4713 reorg from using an insn from the jump target as the delay slot insn -
4714 when reorg did this, it pessimized code (we rather hide the delay slot)
4715 and it could cause branches to go out of range. */
4716 if (bp->far_label)
4717 (emit_insn_after
4718 (gen_stuff_delay_slot
4719 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
4720 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
4721 insn));
4722 /* Prevent reorg from undoing our splits. */
4723 gen_block_redirect (jump, bp->address += 2, 2);
4724 }
4725
4726 /* Fix up ADDR_DIFF_VECs. */
4727 void
4728 fixup_addr_diff_vecs (rtx first)
4729 {
4730 rtx insn;
4731
4732 for (insn = first; insn; insn = NEXT_INSN (insn))
4733 {
4734 rtx vec_lab, pat, prev, prevpat, x, braf_label;
4735
4736 if (GET_CODE (insn) != JUMP_INSN
4737 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
4738 continue;
4739 pat = PATTERN (insn);
4740 vec_lab = XEXP (XEXP (pat, 0), 0);
4741
4742 /* Search the matching casesi_jump_2. */
4743 for (prev = vec_lab; ; prev = PREV_INSN (prev))
4744 {
4745 if (GET_CODE (prev) != JUMP_INSN)
4746 continue;
4747 prevpat = PATTERN (prev);
4748 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
4749 continue;
4750 x = XVECEXP (prevpat, 0, 1);
4751 if (GET_CODE (x) != USE)
4752 continue;
4753 x = XEXP (x, 0);
4754 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
4755 break;
4756 }
4757 /* FIXME: This is a bug in the optimizer, but it seems harmless
4758 to just avoid panicing. */
4759 if (!prev)
4760 continue;
4761
4762 /* Emit the reference label of the braf where it belongs, right after
4763 the casesi_jump_2 (i.e. braf). */
4764 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
4765 emit_label_after (braf_label, prev);
4766
4767 /* Fix up the ADDR_DIF_VEC to be relative
4768 to the reference address of the braf. */
4769 XEXP (XEXP (pat, 0), 0) = braf_label;
4770 }
4771 }
4772
4773 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
4774 a barrier. Return the base 2 logarithm of the desired alignment. */
4775 int
4776 barrier_align (rtx barrier_or_label)
4777 {
4778 rtx next = next_real_insn (barrier_or_label), pat, prev;
4779 int slot, credit, jump_to_next = 0;
4780
4781 if (! next)
4782 return 0;
4783
4784 pat = PATTERN (next);
4785
4786 if (GET_CODE (pat) == ADDR_DIFF_VEC)
4787 return 2;
4788
4789 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
4790 /* This is a barrier in front of a constant table. */
4791 return 0;
4792
4793 prev = prev_real_insn (barrier_or_label);
4794 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
4795 {
4796 pat = PATTERN (prev);
4797 /* If this is a very small table, we want to keep the alignment after
4798 the table to the minimum for proper code alignment. */
4799 return ((TARGET_SMALLCODE
4800 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
4801 <= (unsigned) 1 << (CACHE_LOG - 2)))
4802 ? 1 << TARGET_SHMEDIA : align_jumps_log);
4803 }
4804
4805 if (TARGET_SMALLCODE)
4806 return 0;
4807
4808 if (! TARGET_SH2 || ! optimize)
4809 return align_jumps_log;
4810
4811 /* When fixing up pcloads, a constant table might be inserted just before
4812 the basic block that ends with the barrier. Thus, we can't trust the
4813 instruction lengths before that. */
4814 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
4815 {
4816 /* Check if there is an immediately preceding branch to the insn beyond
4817 the barrier. We must weight the cost of discarding useful information
4818 from the current cache line when executing this branch and there is
4819 an alignment, against that of fetching unneeded insn in front of the
4820 branch target when there is no alignment. */
4821
4822 /* There are two delay_slot cases to consider. One is the simple case
4823 where the preceding branch is to the insn beyond the barrier (simple
4824 delay slot filling), and the other is where the preceding branch has
4825 a delay slot that is a duplicate of the insn after the barrier
4826 (fill_eager_delay_slots) and the branch is to the insn after the insn
4827 after the barrier. */
4828
4829 /* PREV is presumed to be the JUMP_INSN for the barrier under
4830 investigation. Skip to the insn before it. */
4831 prev = prev_real_insn (prev);
4832
4833 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
4834 credit >= 0 && prev && GET_CODE (prev) == INSN;
4835 prev = prev_real_insn (prev))
4836 {
4837 jump_to_next = 0;
4838 if (GET_CODE (PATTERN (prev)) == USE
4839 || GET_CODE (PATTERN (prev)) == CLOBBER)
4840 continue;
4841 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
4842 {
4843 prev = XVECEXP (PATTERN (prev), 0, 1);
4844 if (INSN_UID (prev) == INSN_UID (next))
4845 {
4846 /* Delay slot was filled with insn at jump target. */
4847 jump_to_next = 1;
4848 continue;
4849 }
4850 }
4851
4852 if (slot &&
4853 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4854 slot = 0;
4855 credit -= get_attr_length (prev);
4856 }
4857 if (prev
4858 && GET_CODE (prev) == JUMP_INSN
4859 && JUMP_LABEL (prev))
4860 {
4861 rtx x;
4862 if (jump_to_next
4863 || next_real_insn (JUMP_LABEL (prev)) == next
4864 /* If relax_delay_slots() decides NEXT was redundant
4865 with some previous instruction, it will have
4866 redirected PREV's jump to the following insn. */
4867 || JUMP_LABEL (prev) == next_nonnote_insn (next)
4868 /* There is no upper bound on redundant instructions
4869 that might have been skipped, but we must not put an
4870 alignment where none had been before. */
4871 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
4872 (INSN_P (x)
4873 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
4874 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
4875 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
4876 {
4877 rtx pat = PATTERN (prev);
4878 if (GET_CODE (pat) == PARALLEL)
4879 pat = XVECEXP (pat, 0, 0);
4880 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
4881 return 0;
4882 }
4883 }
4884 }
4885
4886 return align_jumps_log;
4887 }
4888
4889 /* If we are inside a phony loop, almost any kind of label can turn up as the
4890 first one in the loop. Aligning a braf label causes incorrect switch
4891 destination addresses; we can detect braf labels because they are
4892 followed by a BARRIER.
4893 Applying loop alignment to small constant or switch tables is a waste
4894 of space, so we suppress this too. */
4895 int
4896 sh_loop_align (rtx label)
4897 {
4898 rtx next = label;
4899
4900 do
4901 next = next_nonnote_insn (next);
4902 while (next && GET_CODE (next) == CODE_LABEL);
4903
4904 if (! next
4905 || ! INSN_P (next)
4906 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
4907 || recog_memoized (next) == CODE_FOR_consttable_2)
4908 return 0;
4909
4910 return align_loops_log;
4911 }
4912
4913 /* Do a final pass over the function, just before delayed branch
4914 scheduling. */
4915
4916 static void
4917 sh_reorg (void)
4918 {
4919 rtx first, insn, mova = NULL_RTX;
4920 int num_mova;
4921 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
4922 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
4923
4924 first = get_insns ();
4925 max_labelno_before_reorg = max_label_num ();
4926
4927 /* We must split call insns before introducing `mova's. If we're
4928 optimizing, they'll have already been split. Otherwise, make
4929 sure we don't split them too late. */
4930 if (! optimize)
4931 split_all_insns_noflow ();
4932
4933 if (TARGET_SHMEDIA)
4934 return;
4935
4936 /* If relaxing, generate pseudo-ops to associate function calls with
4937 the symbols they call. It does no harm to not generate these
4938 pseudo-ops. However, when we can generate them, it enables to
4939 linker to potentially relax the jsr to a bsr, and eliminate the
4940 register load and, possibly, the constant pool entry. */
4941
4942 mdep_reorg_phase = SH_INSERT_USES_LABELS;
4943 if (TARGET_RELAX)
4944 {
4945 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
4946 own purposes. This works because none of the remaining passes
4947 need to look at them.
4948
4949 ??? But it may break in the future. We should use a machine
4950 dependent REG_NOTE, or some other approach entirely. */
4951 for (insn = first; insn; insn = NEXT_INSN (insn))
4952 {
4953 if (INSN_P (insn))
4954 {
4955 rtx note;
4956
4957 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
4958 NULL_RTX)) != 0)
4959 remove_note (insn, note);
4960 }
4961 }
4962
4963 for (insn = first; insn; insn = NEXT_INSN (insn))
4964 {
4965 rtx pattern, reg, link, set, scan, dies, label;
4966 int rescan = 0, foundinsn = 0;
4967
4968 if (GET_CODE (insn) == CALL_INSN)
4969 {
4970 pattern = PATTERN (insn);
4971
4972 if (GET_CODE (pattern) == PARALLEL)
4973 pattern = XVECEXP (pattern, 0, 0);
4974 if (GET_CODE (pattern) == SET)
4975 pattern = SET_SRC (pattern);
4976
4977 if (GET_CODE (pattern) != CALL
4978 || GET_CODE (XEXP (pattern, 0)) != MEM)
4979 continue;
4980
4981 reg = XEXP (XEXP (pattern, 0), 0);
4982 }
4983 else
4984 {
4985 reg = sfunc_uses_reg (insn);
4986 if (! reg)
4987 continue;
4988 }
4989
4990 if (GET_CODE (reg) != REG)
4991 continue;
4992
4993 /* Try scanning backward to find where the register is set. */
4994 link = NULL;
4995 for (scan = PREV_INSN (insn);
4996 scan && GET_CODE (scan) != CODE_LABEL;
4997 scan = PREV_INSN (scan))
4998 {
4999 if (! INSN_P (scan))
5000 continue;
5001
5002 if (! reg_mentioned_p (reg, scan))
5003 continue;
5004
5005 if (noncall_uses_reg (reg, scan, &set))
5006 break;
5007
5008 if (set)
5009 {
5010 link = scan;
5011 break;
5012 }
5013 }
5014
5015 if (! link)
5016 continue;
5017
5018 /* The register is set at LINK. */
5019
5020 /* We can only optimize the function call if the register is
5021 being set to a symbol. In theory, we could sometimes
5022 optimize calls to a constant location, but the assembler
5023 and linker do not support that at present. */
5024 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
5025 && GET_CODE (SET_SRC (set)) != LABEL_REF)
5026 continue;
5027
5028 /* Scan forward from LINK to the place where REG dies, and
5029 make sure that the only insns which use REG are
5030 themselves function calls. */
5031
5032 /* ??? This doesn't work for call targets that were allocated
5033 by reload, since there may not be a REG_DEAD note for the
5034 register. */
5035
5036 dies = NULL_RTX;
5037 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
5038 {
5039 rtx scanset;
5040
5041 /* Don't try to trace forward past a CODE_LABEL if we haven't
5042 seen INSN yet. Ordinarily, we will only find the setting insn
5043 if it is in the same basic block. However,
5044 cross-jumping can insert code labels in between the load and
5045 the call, and can result in situations where a single call
5046 insn may have two targets depending on where we came from. */
5047
5048 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
5049 break;
5050
5051 if (! INSN_P (scan))
5052 continue;
5053
5054 /* Don't try to trace forward past a JUMP. To optimize
5055 safely, we would have to check that all the
5056 instructions at the jump destination did not use REG. */
5057
5058 if (GET_CODE (scan) == JUMP_INSN)
5059 break;
5060
5061 if (! reg_mentioned_p (reg, scan))
5062 continue;
5063
5064 if (noncall_uses_reg (reg, scan, &scanset))
5065 break;
5066
5067 if (scan == insn)
5068 foundinsn = 1;
5069
5070 if (scan != insn
5071 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
5072 {
5073 /* There is a function call to this register other
5074 than the one we are checking. If we optimize
5075 this call, we need to rescan again below. */
5076 rescan = 1;
5077 }
5078
5079 /* ??? We shouldn't have to worry about SCANSET here.
5080 We should just be able to check for a REG_DEAD note
5081 on a function call. However, the REG_DEAD notes are
5082 apparently not dependable around libcalls; c-torture
5083 execute/920501-2 is a test case. If SCANSET is set,
5084 then this insn sets the register, so it must have
5085 died earlier. Unfortunately, this will only handle
5086 the cases in which the register is, in fact, set in a
5087 later insn. */
5088
5089 /* ??? We shouldn't have to use FOUNDINSN here.
5090 This dates back to when we used LOG_LINKS to find
5091 the most recent insn which sets the register. */
5092
5093 if (foundinsn
5094 && (scanset
5095 || find_reg_note (scan, REG_DEAD, reg)))
5096 {
5097 dies = scan;
5098 break;
5099 }
5100 }
5101
5102 if (! dies)
5103 {
5104 /* Either there was a branch, or some insn used REG
5105 other than as a function call address. */
5106 continue;
5107 }
5108
5109 /* Create a code label, and put it in a REG_LABEL_OPERAND note
5110 on the insn which sets the register, and on each call insn
5111 which uses the register. In final_prescan_insn we look for
5112 the REG_LABEL_OPERAND notes, and output the appropriate label
5113 or pseudo-op. */
5114
5115 label = gen_label_rtx ();
5116 add_reg_note (link, REG_LABEL_OPERAND, label);
5117 add_reg_note (insn, REG_LABEL_OPERAND, label);
5118 if (rescan)
5119 {
5120 scan = link;
5121 do
5122 {
5123 rtx reg2;
5124
5125 scan = NEXT_INSN (scan);
5126 if (scan != insn
5127 && ((GET_CODE (scan) == CALL_INSN
5128 && reg_mentioned_p (reg, scan))
5129 || ((reg2 = sfunc_uses_reg (scan))
5130 && REGNO (reg2) == REGNO (reg))))
5131 add_reg_note (scan, REG_LABEL_OPERAND, label);
5132 }
5133 while (scan != dies);
5134 }
5135 }
5136 }
5137
5138 if (TARGET_SH2)
5139 fixup_addr_diff_vecs (first);
5140
5141 if (optimize)
5142 {
5143 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
5144 shorten_branches (first);
5145 }
5146
5147 /* Scan the function looking for move instructions which have to be
5148 changed to pc-relative loads and insert the literal tables. */
5149 label_ref_list_pool = create_alloc_pool ("label references list",
5150 sizeof (struct label_ref_list_d),
5151 30);
5152 mdep_reorg_phase = SH_FIXUP_PCLOAD;
5153 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
5154 {
5155 if (mova_p (insn))
5156 {
5157 /* ??? basic block reordering can move a switch table dispatch
5158 below the switch table. Check if that has happened.
5159 We only have the addresses available when optimizing; but then,
5160 this check shouldn't be needed when not optimizing. */
5161 if (!untangle_mova (&num_mova, &mova, insn))
5162 {
5163 insn = mova;
5164 num_mova = 0;
5165 }
5166 }
5167 else if (GET_CODE (insn) == JUMP_INSN
5168 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
5169 && num_mova
5170 /* ??? loop invariant motion can also move a mova out of a
5171 loop. Since loop does this code motion anyway, maybe we
5172 should wrap UNSPEC_MOVA into a CONST, so that reload can
5173 move it back. */
5174 && ((num_mova > 1
5175 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
5176 || (prev_nonnote_insn (insn)
5177 == XEXP (MOVA_LABELREF (mova), 0))))
5178 {
5179 rtx scan;
5180 int total;
5181
5182 num_mova--;
5183
5184 /* Some code might have been inserted between the mova and
5185 its ADDR_DIFF_VEC. Check if the mova is still in range. */
5186 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
5187 total += get_attr_length (scan);
5188
5189 /* range of mova is 1020, add 4 because pc counts from address of
5190 second instruction after this one, subtract 2 in case pc is 2
5191 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
5192 cancels out with alignment effects of the mova itself. */
5193 if (total > 1022)
5194 {
5195 /* Change the mova into a load, and restart scanning
5196 there. broken_move will then return true for mova. */
5197 fixup_mova (mova);
5198 insn = mova;
5199 }
5200 }
5201 if (broken_move (insn)
5202 || (GET_CODE (insn) == INSN
5203 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
5204 {
5205 rtx scan;
5206 /* Scan ahead looking for a barrier to stick the constant table
5207 behind. */
5208 rtx barrier = find_barrier (num_mova, mova, insn);
5209 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
5210 int need_aligned_label = 0;
5211
5212 if (num_mova && ! mova_p (mova))
5213 {
5214 /* find_barrier had to change the first mova into a
5215 pcload; thus, we have to start with this new pcload. */
5216 insn = mova;
5217 num_mova = 0;
5218 }
5219 /* Now find all the moves between the points and modify them. */
5220 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
5221 {
5222 if (GET_CODE (scan) == CODE_LABEL)
5223 last_float = 0;
5224 if (GET_CODE (scan) == INSN
5225 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
5226 need_aligned_label = 1;
5227 if (broken_move (scan))
5228 {
5229 rtx *patp = &PATTERN (scan), pat = *patp;
5230 rtx src, dst;
5231 rtx lab;
5232 rtx newsrc;
5233 enum machine_mode mode;
5234
5235 if (GET_CODE (pat) == PARALLEL)
5236 patp = &XVECEXP (pat, 0, 0), pat = *patp;
5237 src = SET_SRC (pat);
5238 dst = SET_DEST (pat);
5239 mode = GET_MODE (dst);
5240
5241 if (mode == SImode && hi_const (src)
5242 && REGNO (dst) != FPUL_REG)
5243 {
5244 int offset = 0;
5245
5246 mode = HImode;
5247 while (GET_CODE (dst) == SUBREG)
5248 {
5249 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
5250 GET_MODE (SUBREG_REG (dst)),
5251 SUBREG_BYTE (dst),
5252 GET_MODE (dst));
5253 dst = SUBREG_REG (dst);
5254 }
5255 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
5256 }
5257 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
5258 {
5259 /* This must be an insn that clobbers r0. */
5260 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
5261 XVECLEN (PATTERN (scan), 0)
5262 - 1);
5263 rtx clobber = *clobberp;
5264
5265 gcc_assert (GET_CODE (clobber) == CLOBBER
5266 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
5267
5268 if (last_float
5269 && reg_set_between_p (r0_rtx, last_float_move, scan))
5270 last_float = 0;
5271 if (last_float
5272 && TARGET_SHCOMPACT
5273 && GET_MODE_SIZE (mode) != 4
5274 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
5275 last_float = 0;
5276 lab = add_constant (src, mode, last_float);
5277 if (lab)
5278 emit_insn_before (gen_mova (lab), scan);
5279 else
5280 {
5281 /* There will be a REG_UNUSED note for r0 on
5282 LAST_FLOAT_MOVE; we have to change it to REG_INC,
5283 lest reorg:mark_target_live_regs will not
5284 consider r0 to be used, and we end up with delay
5285 slot insn in front of SCAN that clobbers r0. */
5286 rtx note
5287 = find_regno_note (last_float_move, REG_UNUSED, 0);
5288
5289 /* If we are not optimizing, then there may not be
5290 a note. */
5291 if (note)
5292 PUT_REG_NOTE_KIND (note, REG_INC);
5293
5294 *last_float_addr = r0_inc_rtx;
5295 }
5296 last_float_move = scan;
5297 last_float = src;
5298 newsrc = gen_const_mem (mode,
5299 (((TARGET_SH4 && ! TARGET_FMOVD)
5300 || REGNO (dst) == FPUL_REG)
5301 ? r0_inc_rtx
5302 : r0_rtx));
5303 last_float_addr = &XEXP (newsrc, 0);
5304
5305 /* Remove the clobber of r0. */
5306 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
5307 gen_rtx_SCRATCH (Pmode));
5308 }
5309 /* This is a mova needing a label. Create it. */
5310 else if (GET_CODE (src) == UNSPEC
5311 && XINT (src, 1) == UNSPEC_MOVA
5312 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
5313 {
5314 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
5315 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5316 newsrc = gen_rtx_UNSPEC (SImode,
5317 gen_rtvec (1, newsrc),
5318 UNSPEC_MOVA);
5319 }
5320 else
5321 {
5322 lab = add_constant (src, mode, 0);
5323 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5324 newsrc = gen_const_mem (mode, newsrc);
5325 }
5326 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
5327 INSN_CODE (scan) = -1;
5328 }
5329 }
5330 dump_table (need_aligned_label ? insn : 0, barrier);
5331 insn = barrier;
5332 }
5333 }
5334 free_alloc_pool (label_ref_list_pool);
5335 for (insn = first; insn; insn = NEXT_INSN (insn))
5336 PUT_MODE (insn, VOIDmode);
5337
5338 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
5339 INSN_ADDRESSES_FREE ();
5340 split_branches (first);
5341
5342 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
5343 also has an effect on the register that holds the address of the sfunc.
5344 Insert an extra dummy insn in front of each sfunc that pretends to
5345 use this register. */
5346 if (flag_delayed_branch)
5347 {
5348 for (insn = first; insn; insn = NEXT_INSN (insn))
5349 {
5350 rtx reg = sfunc_uses_reg (insn);
5351
5352 if (! reg)
5353 continue;
5354 emit_insn_before (gen_use_sfunc_addr (reg), insn);
5355 }
5356 }
5357 #if 0
5358 /* fpscr is not actually a user variable, but we pretend it is for the
5359 sake of the previous optimization passes, since we want it handled like
5360 one. However, we don't have any debugging information for it, so turn
5361 it into a non-user variable now. */
5362 if (TARGET_SH4)
5363 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
5364 #endif
5365 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
5366 }
5367
5368 int
5369 get_dest_uid (rtx label, int max_uid)
5370 {
5371 rtx dest = next_real_insn (label);
5372 int dest_uid;
5373 if (! dest)
5374 /* This can happen for an undefined label. */
5375 return 0;
5376 dest_uid = INSN_UID (dest);
5377 /* If this is a newly created branch redirection blocking instruction,
5378 we cannot index the branch_uid or insn_addresses arrays with its
5379 uid. But then, we won't need to, because the actual destination is
5380 the following branch. */
5381 while (dest_uid >= max_uid)
5382 {
5383 dest = NEXT_INSN (dest);
5384 dest_uid = INSN_UID (dest);
5385 }
5386 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
5387 return 0;
5388 return dest_uid;
5389 }
5390
5391 /* Split condbranches that are out of range. Also add clobbers for
5392 scratch registers that are needed in far jumps.
5393 We do this before delay slot scheduling, so that it can take our
5394 newly created instructions into account. It also allows us to
5395 find branches with common targets more easily. */
5396
5397 static void
5398 split_branches (rtx first)
5399 {
5400 rtx insn;
5401 struct far_branch **uid_branch, *far_branch_list = 0;
5402 int max_uid = get_max_uid ();
5403 int ok;
5404
5405 /* Find out which branches are out of range. */
5406 shorten_branches (first);
5407
5408 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
5409 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
5410
5411 for (insn = first; insn; insn = NEXT_INSN (insn))
5412 if (! INSN_P (insn))
5413 continue;
5414 else if (INSN_DELETED_P (insn))
5415 {
5416 /* Shorten_branches would split this instruction again,
5417 so transform it into a note. */
5418 SET_INSN_DELETED (insn);
5419 }
5420 else if (GET_CODE (insn) == JUMP_INSN
5421 /* Don't mess with ADDR_DIFF_VEC */
5422 && (GET_CODE (PATTERN (insn)) == SET
5423 || GET_CODE (PATTERN (insn)) == RETURN))
5424 {
5425 enum attr_type type = get_attr_type (insn);
5426 if (type == TYPE_CBRANCH)
5427 {
5428 rtx next, beyond;
5429
5430 if (get_attr_length (insn) > 4)
5431 {
5432 rtx src = SET_SRC (PATTERN (insn));
5433 rtx olabel = XEXP (XEXP (src, 1), 0);
5434 int addr = INSN_ADDRESSES (INSN_UID (insn));
5435 rtx label = 0;
5436 int dest_uid = get_dest_uid (olabel, max_uid);
5437 struct far_branch *bp = uid_branch[dest_uid];
5438
5439 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
5440 the label if the LABEL_NUSES count drops to zero. There is
5441 always a jump_optimize pass that sets these values, but it
5442 proceeds to delete unreferenced code, and then if not
5443 optimizing, to un-delete the deleted instructions, thus
5444 leaving labels with too low uses counts. */
5445 if (! optimize)
5446 {
5447 JUMP_LABEL (insn) = olabel;
5448 LABEL_NUSES (olabel)++;
5449 }
5450 if (! bp)
5451 {
5452 bp = (struct far_branch *) alloca (sizeof *bp);
5453 uid_branch[dest_uid] = bp;
5454 bp->prev = far_branch_list;
5455 far_branch_list = bp;
5456 bp->far_label
5457 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
5458 LABEL_NUSES (bp->far_label)++;
5459 }
5460 else
5461 {
5462 label = bp->near_label;
5463 if (! label && bp->address - addr >= CONDJUMP_MIN)
5464 {
5465 rtx block = bp->insert_place;
5466
5467 if (GET_CODE (PATTERN (block)) == RETURN)
5468 block = PREV_INSN (block);
5469 else
5470 block = gen_block_redirect (block,
5471 bp->address, 2);
5472 label = emit_label_after (gen_label_rtx (),
5473 PREV_INSN (block));
5474 bp->near_label = label;
5475 }
5476 else if (label && ! NEXT_INSN (label))
5477 {
5478 if (addr + 2 - bp->address <= CONDJUMP_MAX)
5479 bp->insert_place = insn;
5480 else
5481 gen_far_branch (bp);
5482 }
5483 }
5484 if (! label
5485 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
5486 {
5487 bp->near_label = label = gen_label_rtx ();
5488 bp->insert_place = insn;
5489 bp->address = addr;
5490 }
5491 ok = redirect_jump (insn, label, 0);
5492 gcc_assert (ok);
5493 }
5494 else
5495 {
5496 /* get_attr_length (insn) == 2 */
5497 /* Check if we have a pattern where reorg wants to redirect
5498 the branch to a label from an unconditional branch that
5499 is too far away. */
5500 /* We can't use JUMP_LABEL here because it might be undefined
5501 when not optimizing. */
5502 /* A syntax error might cause beyond to be NULL_RTX. */
5503 beyond
5504 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
5505 0));
5506
5507 if (beyond
5508 && (GET_CODE (beyond) == JUMP_INSN
5509 || ((beyond = next_active_insn (beyond))
5510 && GET_CODE (beyond) == JUMP_INSN))
5511 && GET_CODE (PATTERN (beyond)) == SET
5512 && recog_memoized (beyond) == CODE_FOR_jump_compact
5513 && ((INSN_ADDRESSES
5514 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
5515 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5516 > 252 + 258 + 2))
5517 gen_block_redirect (beyond,
5518 INSN_ADDRESSES (INSN_UID (beyond)), 1);
5519 }
5520
5521 next = next_active_insn (insn);
5522
5523 if ((GET_CODE (next) == JUMP_INSN
5524 || ((next = next_active_insn (next))
5525 && GET_CODE (next) == JUMP_INSN))
5526 && GET_CODE (PATTERN (next)) == SET
5527 && recog_memoized (next) == CODE_FOR_jump_compact
5528 && ((INSN_ADDRESSES
5529 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
5530 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5531 > 252 + 258 + 2))
5532 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
5533 }
5534 else if (type == TYPE_JUMP || type == TYPE_RETURN)
5535 {
5536 int addr = INSN_ADDRESSES (INSN_UID (insn));
5537 rtx far_label = 0;
5538 int dest_uid = 0;
5539 struct far_branch *bp;
5540
5541 if (type == TYPE_JUMP)
5542 {
5543 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
5544 dest_uid = get_dest_uid (far_label, max_uid);
5545 if (! dest_uid)
5546 {
5547 /* Parse errors can lead to labels outside
5548 the insn stream. */
5549 if (! NEXT_INSN (far_label))
5550 continue;
5551
5552 if (! optimize)
5553 {
5554 JUMP_LABEL (insn) = far_label;
5555 LABEL_NUSES (far_label)++;
5556 }
5557 redirect_jump (insn, NULL_RTX, 1);
5558 far_label = 0;
5559 }
5560 }
5561 bp = uid_branch[dest_uid];
5562 if (! bp)
5563 {
5564 bp = (struct far_branch *) alloca (sizeof *bp);
5565 uid_branch[dest_uid] = bp;
5566 bp->prev = far_branch_list;
5567 far_branch_list = bp;
5568 bp->near_label = 0;
5569 bp->far_label = far_label;
5570 if (far_label)
5571 LABEL_NUSES (far_label)++;
5572 }
5573 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
5574 if (addr - bp->address <= CONDJUMP_MAX)
5575 emit_label_after (bp->near_label, PREV_INSN (insn));
5576 else
5577 {
5578 gen_far_branch (bp);
5579 bp->near_label = 0;
5580 }
5581 else
5582 bp->near_label = 0;
5583 bp->address = addr;
5584 bp->insert_place = insn;
5585 if (! far_label)
5586 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
5587 else
5588 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
5589 }
5590 }
5591 /* Generate all pending far branches,
5592 and free our references to the far labels. */
5593 while (far_branch_list)
5594 {
5595 if (far_branch_list->near_label
5596 && ! NEXT_INSN (far_branch_list->near_label))
5597 gen_far_branch (far_branch_list);
5598 if (optimize
5599 && far_branch_list->far_label
5600 && ! --LABEL_NUSES (far_branch_list->far_label))
5601 delete_insn (far_branch_list->far_label);
5602 far_branch_list = far_branch_list->prev;
5603 }
5604
5605 /* Instruction length information is no longer valid due to the new
5606 instructions that have been generated. */
5607 init_insn_lengths ();
5608 }
5609
5610 /* Dump out instruction addresses, which is useful for debugging the
5611 constant pool table stuff.
5612
5613 If relaxing, output the label and pseudo-ops used to link together
5614 calls and the instruction which set the registers. */
5615
5616 /* ??? The addresses printed by this routine for insns are nonsense for
5617 insns which are inside of a sequence where none of the inner insns have
5618 variable length. This is because the second pass of shorten_branches
5619 does not bother to update them. */
5620
5621 void
5622 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
5623 int noperands ATTRIBUTE_UNUSED)
5624 {
5625 if (TARGET_DUMPISIZE)
5626 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
5627
5628 if (TARGET_RELAX)
5629 {
5630 rtx note;
5631
5632 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
5633 if (note)
5634 {
5635 rtx pattern;
5636
5637 pattern = PATTERN (insn);
5638 if (GET_CODE (pattern) == PARALLEL)
5639 pattern = XVECEXP (pattern, 0, 0);
5640 switch (GET_CODE (pattern))
5641 {
5642 case SET:
5643 if (GET_CODE (SET_SRC (pattern)) != CALL
5644 && get_attr_type (insn) != TYPE_SFUNC)
5645 {
5646 targetm.asm_out.internal_label
5647 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
5648 break;
5649 }
5650 /* else FALLTHROUGH */
5651 case CALL:
5652 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
5653 CODE_LABEL_NUMBER (XEXP (note, 0)));
5654 break;
5655
5656 default:
5657 gcc_unreachable ();
5658 }
5659 }
5660 }
5661 }
5662
5663 /* Dump out any constants accumulated in the final pass. These will
5664 only be labels. */
5665
5666 const char *
5667 output_jump_label_table (void)
5668 {
5669 int i;
5670
5671 if (pool_size)
5672 {
5673 fprintf (asm_out_file, "\t.align 2\n");
5674 for (i = 0; i < pool_size; i++)
5675 {
5676 pool_node *p = &pool_vector[i];
5677
5678 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5679 CODE_LABEL_NUMBER (p->label));
5680 output_asm_insn (".long %O0", &p->value);
5681 }
5682 pool_size = 0;
5683 }
5684
5685 return "";
5686 }
5687 \f
5688 /* A full frame looks like:
5689
5690 arg-5
5691 arg-4
5692 [ if current_function_anonymous_args
5693 arg-3
5694 arg-2
5695 arg-1
5696 arg-0 ]
5697 saved-fp
5698 saved-r10
5699 saved-r11
5700 saved-r12
5701 saved-pr
5702 local-n
5703 ..
5704 local-1
5705 local-0 <- fp points here. */
5706
5707 /* Number of bytes pushed for anonymous args, used to pass information
5708 between expand_prologue and expand_epilogue. */
5709
5710 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
5711 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
5712 for an epilogue and a negative value means that it's for a sibcall
5713 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
5714 all the registers that are about to be restored, and hence dead. */
5715
5716 static void
5717 output_stack_adjust (int size, rtx reg, int epilogue_p,
5718 HARD_REG_SET *live_regs_mask)
5719 {
5720 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
5721 if (size)
5722 {
5723 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5724
5725 /* This test is bogus, as output_stack_adjust is used to re-align the
5726 stack. */
5727 #if 0
5728 gcc_assert (!(size % align));
5729 #endif
5730
5731 if (CONST_OK_FOR_ADD (size))
5732 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
5733 /* Try to do it with two partial adjustments; however, we must make
5734 sure that the stack is properly aligned at all times, in case
5735 an interrupt occurs between the two partial adjustments. */
5736 else if (CONST_OK_FOR_ADD (size / 2 & -align)
5737 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
5738 {
5739 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
5740 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
5741 }
5742 else
5743 {
5744 rtx const_reg;
5745 rtx insn;
5746 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
5747 int i;
5748
5749 /* If TEMP is invalid, we could temporarily save a general
5750 register to MACL. However, there is currently no need
5751 to handle this case, so just die when we see it. */
5752 if (epilogue_p < 0
5753 || current_function_interrupt
5754 || ! call_really_used_regs[temp] || fixed_regs[temp])
5755 temp = -1;
5756 if (temp < 0 && ! current_function_interrupt
5757 && (TARGET_SHMEDIA || epilogue_p >= 0))
5758 {
5759 HARD_REG_SET temps;
5760 COPY_HARD_REG_SET (temps, call_used_reg_set);
5761 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
5762 if (epilogue_p > 0)
5763 {
5764 int nreg = 0;
5765 if (crtl->return_rtx)
5766 {
5767 enum machine_mode mode;
5768 mode = GET_MODE (crtl->return_rtx);
5769 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
5770 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
5771 }
5772 for (i = 0; i < nreg; i++)
5773 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
5774 if (crtl->calls_eh_return)
5775 {
5776 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
5777 for (i = 0; i <= 3; i++)
5778 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
5779 }
5780 }
5781 if (TARGET_SHMEDIA && epilogue_p < 0)
5782 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
5783 CLEAR_HARD_REG_BIT (temps, i);
5784 if (epilogue_p <= 0)
5785 {
5786 for (i = FIRST_PARM_REG;
5787 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
5788 CLEAR_HARD_REG_BIT (temps, i);
5789 if (cfun->static_chain_decl != NULL)
5790 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
5791 }
5792 temp = scavenge_reg (&temps);
5793 }
5794 if (temp < 0 && live_regs_mask)
5795 {
5796 HARD_REG_SET temps;
5797
5798 COPY_HARD_REG_SET (temps, *live_regs_mask);
5799 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
5800 temp = scavenge_reg (&temps);
5801 }
5802 if (temp < 0)
5803 {
5804 rtx adj_reg, tmp_reg, mem;
5805
5806 /* If we reached here, the most likely case is the (sibcall)
5807 epilogue for non SHmedia. Put a special push/pop sequence
5808 for such case as the last resort. This looks lengthy but
5809 would not be problem because it seems to be very
5810 rare. */
5811
5812 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
5813
5814
5815 /* ??? There is still the slight possibility that r4 or
5816 r5 have been reserved as fixed registers or assigned
5817 as global registers, and they change during an
5818 interrupt. There are possible ways to handle this:
5819
5820 - If we are adjusting the frame pointer (r14), we can do
5821 with a single temp register and an ordinary push / pop
5822 on the stack.
5823 - Grab any call-used or call-saved registers (i.e. not
5824 fixed or globals) for the temps we need. We might
5825 also grab r14 if we are adjusting the stack pointer.
5826 If we can't find enough available registers, issue
5827 a diagnostic and die - the user must have reserved
5828 way too many registers.
5829 But since all this is rather unlikely to happen and
5830 would require extra testing, we just die if r4 / r5
5831 are not available. */
5832 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
5833 && !global_regs[4] && !global_regs[5]);
5834
5835 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
5836 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
5837 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
5838 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
5839 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
5840 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5841 emit_move_insn (mem, tmp_reg);
5842 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
5843 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5844 emit_move_insn (mem, tmp_reg);
5845 emit_move_insn (reg, adj_reg);
5846 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5847 emit_move_insn (adj_reg, mem);
5848 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5849 emit_move_insn (tmp_reg, mem);
5850 /* Tell flow the insns that pop r4/r5 aren't dead. */
5851 emit_use (tmp_reg);
5852 emit_use (adj_reg);
5853 return;
5854 }
5855 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
5856
5857 /* If SIZE is negative, subtract the positive value.
5858 This sometimes allows a constant pool entry to be shared
5859 between prologue and epilogue code. */
5860 if (size < 0)
5861 {
5862 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
5863 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
5864 }
5865 else
5866 {
5867 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
5868 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
5869 }
5870 if (! epilogue_p)
5871 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5872 gen_rtx_SET (VOIDmode, reg,
5873 gen_rtx_PLUS (SImode, reg,
5874 GEN_INT (size))));
5875 }
5876 }
5877 }
5878
5879 static rtx
5880 frame_insn (rtx x)
5881 {
5882 x = emit_insn (x);
5883 RTX_FRAME_RELATED_P (x) = 1;
5884 return x;
5885 }
5886
5887 /* Output RTL to push register RN onto the stack. */
5888
5889 static rtx
5890 push (int rn)
5891 {
5892 rtx x;
5893 if (rn == FPUL_REG)
5894 x = gen_push_fpul ();
5895 else if (rn == FPSCR_REG)
5896 x = gen_push_fpscr ();
5897 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5898 && FP_OR_XD_REGISTER_P (rn))
5899 {
5900 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5901 return NULL_RTX;
5902 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
5903 }
5904 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5905 x = gen_push_e (gen_rtx_REG (SFmode, rn));
5906 else
5907 x = gen_push (gen_rtx_REG (SImode, rn));
5908
5909 x = frame_insn (x);
5910 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
5911 return x;
5912 }
5913
5914 /* Output RTL to pop register RN from the stack. */
5915
5916 static void
5917 pop (int rn)
5918 {
5919 rtx x;
5920 if (rn == FPUL_REG)
5921 x = gen_pop_fpul ();
5922 else if (rn == FPSCR_REG)
5923 x = gen_pop_fpscr ();
5924 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5925 && FP_OR_XD_REGISTER_P (rn))
5926 {
5927 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5928 return;
5929 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
5930 }
5931 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5932 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
5933 else
5934 x = gen_pop (gen_rtx_REG (SImode, rn));
5935
5936 x = emit_insn (x);
5937 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
5938 }
5939
5940 /* Generate code to push the regs specified in the mask. */
5941
5942 static void
5943 push_regs (HARD_REG_SET *mask, int interrupt_handler)
5944 {
5945 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
5946 int skip_fpscr = 0;
5947
5948 /* Push PR last; this gives better latencies after the prologue, and
5949 candidates for the return delay slot when there are no general
5950 registers pushed. */
5951 for (; i < FIRST_PSEUDO_REGISTER; i++)
5952 {
5953 /* If this is an interrupt handler, and the SZ bit varies,
5954 and we have to push any floating point register, we need
5955 to switch to the correct precision first. */
5956 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
5957 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
5958 {
5959 HARD_REG_SET unsaved;
5960
5961 push (FPSCR_REG);
5962 COMPL_HARD_REG_SET (unsaved, *mask);
5963 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
5964 skip_fpscr = 1;
5965 }
5966 if (i != PR_REG
5967 && (i != FPSCR_REG || ! skip_fpscr)
5968 && TEST_HARD_REG_BIT (*mask, i))
5969 {
5970 /* If the ISR has RESBANK attribute assigned, don't push any of
5971 the following registers - R0-R14, MACH, MACL and GBR. */
5972 if (! (sh_cfun_resbank_handler_p ()
5973 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
5974 || i == MACH_REG
5975 || i == MACL_REG
5976 || i == GBR_REG)))
5977 push (i);
5978 }
5979 }
5980
5981 /* Push banked registers last to improve delay slot opportunities. */
5982 if (interrupt_handler)
5983 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
5984 if (TEST_HARD_REG_BIT (*mask, i))
5985 push (i);
5986
5987 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
5988 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
5989 push (PR_REG);
5990 }
5991
5992 /* Calculate how much extra space is needed to save all callee-saved
5993 target registers.
5994 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5995
5996 static int
5997 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
5998 {
5999 int reg;
6000 int stack_space = 0;
6001 int interrupt_handler = sh_cfun_interrupt_handler_p ();
6002
6003 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6004 if ((! call_really_used_regs[reg] || interrupt_handler)
6005 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6006 /* Leave space to save this target register on the stack,
6007 in case target register allocation wants to use it. */
6008 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6009 return stack_space;
6010 }
6011
6012 /* Decide whether we should reserve space for callee-save target registers,
6013 in case target register allocation wants to use them. REGS_SAVED is
6014 the space, in bytes, that is already required for register saves.
6015 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6016
6017 static int
6018 shmedia_reserve_space_for_target_registers_p (int regs_saved,
6019 HARD_REG_SET *live_regs_mask)
6020 {
6021 if (optimize_size)
6022 return 0;
6023 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
6024 }
6025
6026 /* Decide how much space to reserve for callee-save target registers
6027 in case target register allocation wants to use them.
6028 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6029
6030 static int
6031 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
6032 {
6033 if (shmedia_space_reserved_for_target_registers)
6034 return shmedia_target_regs_stack_space (live_regs_mask);
6035 else
6036 return 0;
6037 }
6038
6039 /* Work out the registers which need to be saved, both as a mask and a
6040 count of saved words. Return the count.
6041
6042 If doing a pragma interrupt function, then push all regs used by the
6043 function, and if we call another function (we can tell by looking at PR),
6044 make sure that all the regs it clobbers are safe too. */
6045
6046 static int
6047 calc_live_regs (HARD_REG_SET *live_regs_mask)
6048 {
6049 unsigned int reg;
6050 int count;
6051 tree attrs;
6052 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
6053 bool nosave_low_regs;
6054 int pr_live, has_call;
6055
6056 attrs = DECL_ATTRIBUTES (current_function_decl);
6057 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
6058 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
6059 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
6060 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
6061
6062 CLEAR_HARD_REG_SET (*live_regs_mask);
6063 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
6064 && df_regs_ever_live_p (FPSCR_REG))
6065 target_flags &= ~MASK_FPU_SINGLE;
6066 /* If we can save a lot of saves by switching to double mode, do that. */
6067 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
6068 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
6069 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
6070 && (! call_really_used_regs[reg]
6071 || interrupt_handler)
6072 && ++count > 2)
6073 {
6074 target_flags &= ~MASK_FPU_SINGLE;
6075 break;
6076 }
6077 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
6078 knows how to use it. That means the pseudo originally allocated for
6079 the initial value can become the PR_MEDIA_REG hard register, as seen for
6080 execute/20010122-1.c:test9. */
6081 if (TARGET_SHMEDIA)
6082 /* ??? this function is called from initial_elimination_offset, hence we
6083 can't use the result of sh_media_register_for_return here. */
6084 pr_live = sh_pr_n_sets ();
6085 else
6086 {
6087 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
6088 pr_live = (pr_initial
6089 ? (GET_CODE (pr_initial) != REG
6090 || REGNO (pr_initial) != (PR_REG))
6091 : df_regs_ever_live_p (PR_REG));
6092 /* For Shcompact, if not optimizing, we end up with a memory reference
6093 using the return address pointer for __builtin_return_address even
6094 though there is no actual need to put the PR register on the stack. */
6095 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
6096 }
6097 /* Force PR to be live if the prologue has to call the SHmedia
6098 argument decoder or register saver. */
6099 if (TARGET_SHCOMPACT
6100 && ((crtl->args.info.call_cookie
6101 & ~ CALL_COOKIE_RET_TRAMP (1))
6102 || crtl->saves_all_registers))
6103 pr_live = 1;
6104 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
6105 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
6106 {
6107 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
6108 ? pr_live
6109 : interrupt_handler
6110 ? (/* Need to save all the regs ever live. */
6111 (df_regs_ever_live_p (reg)
6112 || (call_really_used_regs[reg]
6113 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
6114 || reg == PIC_OFFSET_TABLE_REGNUM)
6115 && has_call)
6116 || (TARGET_SHMEDIA && has_call
6117 && REGISTER_NATURAL_MODE (reg) == SImode
6118 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
6119 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
6120 && reg != RETURN_ADDRESS_POINTER_REGNUM
6121 && reg != T_REG && reg != GBR_REG
6122 /* Push fpscr only on targets which have FPU */
6123 && (reg != FPSCR_REG || TARGET_FPU_ANY))
6124 : (/* Only push those regs which are used and need to be saved. */
6125 (TARGET_SHCOMPACT
6126 && flag_pic
6127 && crtl->args.info.call_cookie
6128 && reg == PIC_OFFSET_TABLE_REGNUM)
6129 || (df_regs_ever_live_p (reg)
6130 && ((!call_really_used_regs[reg]
6131 && !(reg != PIC_OFFSET_TABLE_REGNUM
6132 && fixed_regs[reg] && call_used_regs[reg]))
6133 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
6134 || (crtl->calls_eh_return
6135 && (reg == EH_RETURN_DATA_REGNO (0)
6136 || reg == EH_RETURN_DATA_REGNO (1)
6137 || reg == EH_RETURN_DATA_REGNO (2)
6138 || reg == EH_RETURN_DATA_REGNO (3)))
6139 || ((reg == MACL_REG || reg == MACH_REG)
6140 && df_regs_ever_live_p (reg)
6141 && sh_cfun_attr_renesas_p ())
6142 ))
6143 {
6144 SET_HARD_REG_BIT (*live_regs_mask, reg);
6145 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6146
6147 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
6148 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
6149 {
6150 if (FP_REGISTER_P (reg))
6151 {
6152 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
6153 {
6154 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
6155 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
6156 }
6157 }
6158 else if (XD_REGISTER_P (reg))
6159 {
6160 /* Must switch to double mode to access these registers. */
6161 target_flags &= ~MASK_FPU_SINGLE;
6162 }
6163 }
6164 }
6165 if (nosave_low_regs && reg == R8_REG)
6166 break;
6167 }
6168 /* If we have a target register optimization pass after prologue / epilogue
6169 threading, we need to assume all target registers will be live even if
6170 they aren't now. */
6171 if (flag_branch_target_load_optimize2
6172 && TARGET_SAVE_ALL_TARGET_REGS
6173 && shmedia_space_reserved_for_target_registers)
6174 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6175 if ((! call_really_used_regs[reg] || interrupt_handler)
6176 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6177 {
6178 SET_HARD_REG_BIT (*live_regs_mask, reg);
6179 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6180 }
6181 /* If this is an interrupt handler, we don't have any call-clobbered
6182 registers we can conveniently use for target register save/restore.
6183 Make sure we save at least one general purpose register when we need
6184 to save target registers. */
6185 if (interrupt_handler
6186 && hard_reg_set_intersect_p (*live_regs_mask,
6187 reg_class_contents[TARGET_REGS])
6188 && ! hard_reg_set_intersect_p (*live_regs_mask,
6189 reg_class_contents[GENERAL_REGS]))
6190 {
6191 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
6192 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
6193 }
6194
6195 return count;
6196 }
6197
6198 /* Code to generate prologue and epilogue sequences */
6199
6200 /* PUSHED is the number of bytes that are being pushed on the
6201 stack for register saves. Return the frame size, padded
6202 appropriately so that the stack stays properly aligned. */
6203 static HOST_WIDE_INT
6204 rounded_frame_size (int pushed)
6205 {
6206 HOST_WIDE_INT size = get_frame_size ();
6207 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6208
6209 return ((size + pushed + align - 1) & -align) - pushed;
6210 }
6211
6212 /* Choose a call-clobbered target-branch register that remains
6213 unchanged along the whole function. We set it up as the return
6214 value in the prologue. */
6215 int
6216 sh_media_register_for_return (void)
6217 {
6218 int regno;
6219 int tr0_used;
6220
6221 if (! current_function_is_leaf)
6222 return -1;
6223 if (lookup_attribute ("interrupt_handler",
6224 DECL_ATTRIBUTES (current_function_decl)))
6225 return -1;
6226 if (sh_cfun_interrupt_handler_p ())
6227 return -1;
6228
6229 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
6230
6231 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
6232 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
6233 return regno;
6234
6235 return -1;
6236 }
6237
6238 /* The maximum registers we need to save are:
6239 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
6240 - 32 floating point registers (for each pair, we save none,
6241 one single precision value, or a double precision value).
6242 - 8 target registers
6243 - add 1 entry for a delimiter. */
6244 #define MAX_SAVED_REGS (62+32+8)
6245
6246 typedef struct save_entry_s
6247 {
6248 unsigned char reg;
6249 unsigned char mode;
6250 short offset;
6251 } save_entry;
6252
6253 #define MAX_TEMPS 4
6254
6255 /* There will be a delimiter entry with VOIDmode both at the start and the
6256 end of a filled in schedule. The end delimiter has the offset of the
6257 save with the smallest (i.e. most negative) offset. */
6258 typedef struct save_schedule_s
6259 {
6260 save_entry entries[MAX_SAVED_REGS + 2];
6261 int temps[MAX_TEMPS+1];
6262 } save_schedule;
6263
6264 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
6265 use reverse order. Returns the last entry written to (not counting
6266 the delimiter). OFFSET_BASE is a number to be added to all offset
6267 entries. */
6268
6269 static save_entry *
6270 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
6271 int offset_base)
6272 {
6273 int align, i;
6274 save_entry *entry = schedule->entries;
6275 int tmpx = 0;
6276 int offset;
6277
6278 if (! current_function_interrupt)
6279 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
6280 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
6281 && ! FUNCTION_ARG_REGNO_P (i)
6282 && i != FIRST_RET_REG
6283 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
6284 && ! (crtl->calls_eh_return
6285 && (i == EH_RETURN_STACKADJ_REGNO
6286 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
6287 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
6288 schedule->temps[tmpx++] = i;
6289 entry->reg = -1;
6290 entry->mode = VOIDmode;
6291 entry->offset = offset_base;
6292 entry++;
6293 /* We loop twice: first, we save 8-byte aligned registers in the
6294 higher addresses, that are known to be aligned. Then, we
6295 proceed to saving 32-bit registers that don't need 8-byte
6296 alignment.
6297 If this is an interrupt function, all registers that need saving
6298 need to be saved in full. moreover, we need to postpone saving
6299 target registers till we have saved some general purpose registers
6300 we can then use as scratch registers. */
6301 offset = offset_base;
6302 for (align = 1; align >= 0; align--)
6303 {
6304 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6305 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6306 {
6307 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6308 int reg = i;
6309
6310 if (current_function_interrupt)
6311 {
6312 if (TARGET_REGISTER_P (i))
6313 continue;
6314 if (GENERAL_REGISTER_P (i))
6315 mode = DImode;
6316 }
6317 if (mode == SFmode && (i % 2) == 1
6318 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6319 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
6320 {
6321 mode = DFmode;
6322 i--;
6323 reg--;
6324 }
6325
6326 /* If we're doing the aligned pass and this is not aligned,
6327 or we're doing the unaligned pass and this is aligned,
6328 skip it. */
6329 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
6330 != align)
6331 continue;
6332
6333 if (current_function_interrupt
6334 && GENERAL_REGISTER_P (i)
6335 && tmpx < MAX_TEMPS)
6336 schedule->temps[tmpx++] = i;
6337
6338 offset -= GET_MODE_SIZE (mode);
6339 entry->reg = i;
6340 entry->mode = mode;
6341 entry->offset = offset;
6342 entry++;
6343 }
6344 if (align && current_function_interrupt)
6345 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
6346 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6347 {
6348 offset -= GET_MODE_SIZE (DImode);
6349 entry->reg = i;
6350 entry->mode = DImode;
6351 entry->offset = offset;
6352 entry++;
6353 }
6354 }
6355 entry->reg = -1;
6356 entry->mode = VOIDmode;
6357 entry->offset = offset;
6358 schedule->temps[tmpx] = -1;
6359 return entry - 1;
6360 }
6361
6362 void
6363 sh_expand_prologue (void)
6364 {
6365 HARD_REG_SET live_regs_mask;
6366 int d, i;
6367 int d_rounding = 0;
6368 int save_flags = target_flags;
6369 int pretend_args;
6370 tree sp_switch_attr
6371 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
6372
6373 current_function_interrupt = sh_cfun_interrupt_handler_p ();
6374
6375 /* We have pretend args if we had an object sent partially in registers
6376 and partially on the stack, e.g. a large structure. */
6377 pretend_args = crtl->args.pretend_args_size;
6378 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
6379 && (NPARM_REGS(SImode)
6380 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
6381 pretend_args = 0;
6382 output_stack_adjust (-pretend_args
6383 - crtl->args.info.stack_regs * 8,
6384 stack_pointer_rtx, 0, NULL);
6385
6386 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
6387 /* We're going to use the PIC register to load the address of the
6388 incoming-argument decoder and/or of the return trampoline from
6389 the GOT, so make sure the PIC register is preserved and
6390 initialized. */
6391 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
6392
6393 if (TARGET_SHCOMPACT
6394 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6395 {
6396 int reg;
6397
6398 /* First, make all registers with incoming arguments that will
6399 be pushed onto the stack live, so that register renaming
6400 doesn't overwrite them. */
6401 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
6402 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
6403 >= NPARM_REGS (SImode) - reg)
6404 for (; reg < NPARM_REGS (SImode); reg++)
6405 emit_insn (gen_shcompact_preserve_incoming_args
6406 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6407 else if (CALL_COOKIE_INT_REG_GET
6408 (crtl->args.info.call_cookie, reg) == 1)
6409 emit_insn (gen_shcompact_preserve_incoming_args
6410 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6411
6412 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
6413 stack_pointer_rtx);
6414 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
6415 GEN_INT (crtl->args.info.call_cookie));
6416 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
6417 gen_rtx_REG (SImode, R0_REG));
6418 }
6419 else if (TARGET_SHMEDIA)
6420 {
6421 int tr = sh_media_register_for_return ();
6422
6423 if (tr >= 0)
6424 emit_move_insn (gen_rtx_REG (DImode, tr),
6425 gen_rtx_REG (DImode, PR_MEDIA_REG));
6426 }
6427
6428 /* Emit the code for SETUP_VARARGS. */
6429 if (cfun->stdarg)
6430 {
6431 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6432 {
6433 /* Push arg regs as if they'd been provided by caller in stack. */
6434 for (i = 0; i < NPARM_REGS(SImode); i++)
6435 {
6436 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
6437 rtx insn;
6438
6439 if (i >= (NPARM_REGS(SImode)
6440 - crtl->args.info.arg_count[(int) SH_ARG_INT]
6441 ))
6442 break;
6443 insn = push (rn);
6444 }
6445 }
6446 }
6447
6448 /* If we're supposed to switch stacks at function entry, do so now. */
6449 if (sp_switch_attr)
6450 {
6451 /* The argument specifies a variable holding the address of the
6452 stack the interrupt function should switch to/from at entry/exit. */
6453 const char *s
6454 = ggc_strdup (TREE_STRING_POINTER (TREE_VALUE (sp_switch_attr)));
6455 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
6456
6457 emit_insn (gen_sp_switch_1 (sp_switch));
6458 }
6459
6460 d = calc_live_regs (&live_regs_mask);
6461 /* ??? Maybe we could save some switching if we can move a mode switch
6462 that already happens to be at the function start into the prologue. */
6463 if (target_flags != save_flags && ! current_function_interrupt)
6464 emit_insn (gen_toggle_sz ());
6465
6466 if (TARGET_SH5)
6467 {
6468 int offset_base, offset;
6469 rtx r0 = NULL_RTX;
6470 int offset_in_r0 = -1;
6471 int sp_in_r0 = 0;
6472 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6473 int total_size, save_size;
6474 save_schedule schedule;
6475 save_entry *entry;
6476 int *tmp_pnt;
6477
6478 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
6479 && ! current_function_interrupt)
6480 r0 = gen_rtx_REG (Pmode, R0_REG);
6481
6482 /* D is the actual number of bytes that we need for saving registers,
6483 however, in initial_elimination_offset we have committed to using
6484 an additional TREGS_SPACE amount of bytes - in order to keep both
6485 addresses to arguments supplied by the caller and local variables
6486 valid, we must keep this gap. Place it between the incoming
6487 arguments and the actually saved registers in a bid to optimize
6488 locality of reference. */
6489 total_size = d + tregs_space;
6490 total_size += rounded_frame_size (total_size);
6491 save_size = total_size - rounded_frame_size (d);
6492 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
6493 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6494 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
6495
6496 /* If adjusting the stack in a single step costs nothing extra, do so.
6497 I.e. either if a single addi is enough, or we need a movi anyway,
6498 and we don't exceed the maximum offset range (the test for the
6499 latter is conservative for simplicity). */
6500 if (TARGET_SHMEDIA
6501 && (CONST_OK_FOR_I10 (-total_size)
6502 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
6503 && total_size <= 2044)))
6504 d_rounding = total_size - save_size;
6505
6506 offset_base = d + d_rounding;
6507
6508 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
6509 0, NULL);
6510
6511 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
6512 tmp_pnt = schedule.temps;
6513 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
6514 {
6515 enum machine_mode mode = (enum machine_mode) entry->mode;
6516 unsigned int reg = entry->reg;
6517 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
6518 rtx orig_reg_rtx;
6519
6520 offset = entry->offset;
6521
6522 reg_rtx = gen_rtx_REG (mode, reg);
6523
6524 mem_rtx = gen_frame_mem (mode,
6525 gen_rtx_PLUS (Pmode,
6526 stack_pointer_rtx,
6527 GEN_INT (offset)));
6528
6529 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
6530 {
6531 gcc_assert (r0);
6532 mem_rtx = NULL_RTX;
6533 }
6534
6535 if (HAVE_PRE_DECREMENT
6536 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
6537 || mem_rtx == NULL_RTX
6538 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6539 {
6540 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
6541
6542 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
6543 pre_dec = NULL_RTX;
6544 else
6545 {
6546 mem_rtx = NULL_RTX;
6547 offset += GET_MODE_SIZE (mode);
6548 }
6549 }
6550
6551 if (mem_rtx != NULL_RTX)
6552 goto addr_ok;
6553
6554 if (offset_in_r0 == -1)
6555 {
6556 emit_move_insn (r0, GEN_INT (offset));
6557 offset_in_r0 = offset;
6558 }
6559 else if (offset != offset_in_r0)
6560 {
6561 emit_move_insn (r0,
6562 gen_rtx_PLUS
6563 (Pmode, r0,
6564 GEN_INT (offset - offset_in_r0)));
6565 offset_in_r0 += offset - offset_in_r0;
6566 }
6567
6568 if (pre_dec != NULL_RTX)
6569 {
6570 if (! sp_in_r0)
6571 {
6572 emit_move_insn (r0,
6573 gen_rtx_PLUS
6574 (Pmode, r0, stack_pointer_rtx));
6575 sp_in_r0 = 1;
6576 }
6577
6578 offset -= GET_MODE_SIZE (mode);
6579 offset_in_r0 -= GET_MODE_SIZE (mode);
6580
6581 mem_rtx = pre_dec;
6582 }
6583 else if (sp_in_r0)
6584 mem_rtx = gen_frame_mem (mode, r0);
6585 else
6586 mem_rtx = gen_frame_mem (mode,
6587 gen_rtx_PLUS (Pmode,
6588 stack_pointer_rtx,
6589 r0));
6590
6591 /* We must not use an r0-based address for target-branch
6592 registers or for special registers without pre-dec
6593 memory addresses, since we store their values in r0
6594 first. */
6595 gcc_assert (!TARGET_REGISTER_P (reg)
6596 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6597 || mem_rtx == pre_dec));
6598
6599 addr_ok:
6600 orig_reg_rtx = reg_rtx;
6601 if (TARGET_REGISTER_P (reg)
6602 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6603 && mem_rtx != pre_dec))
6604 {
6605 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
6606
6607 emit_move_insn (tmp_reg, reg_rtx);
6608
6609 if (REGNO (tmp_reg) == R0_REG)
6610 {
6611 offset_in_r0 = -1;
6612 sp_in_r0 = 0;
6613 gcc_assert (!refers_to_regno_p
6614 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
6615 }
6616
6617 if (*++tmp_pnt <= 0)
6618 tmp_pnt = schedule.temps;
6619
6620 reg_rtx = tmp_reg;
6621 }
6622 {
6623 rtx insn;
6624
6625 /* Mark as interesting for dwarf cfi generator */
6626 insn = emit_move_insn (mem_rtx, reg_rtx);
6627 RTX_FRAME_RELATED_P (insn) = 1;
6628 /* If we use an intermediate register for the save, we can't
6629 describe this exactly in cfi as a copy of the to-be-saved
6630 register into the temporary register and then the temporary
6631 register on the stack, because the temporary register can
6632 have a different natural size than the to-be-saved register.
6633 Thus, we gloss over the intermediate copy and pretend we do
6634 a direct save from the to-be-saved register. */
6635 if (REGNO (reg_rtx) != reg)
6636 {
6637 rtx set;
6638
6639 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
6640 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
6641 }
6642
6643 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
6644 {
6645 rtx reg_rtx = gen_rtx_REG (mode, reg);
6646 rtx set;
6647 rtx mem_rtx = gen_frame_mem (mode,
6648 gen_rtx_PLUS (Pmode,
6649 stack_pointer_rtx,
6650 GEN_INT (offset)));
6651
6652 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
6653 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
6654 }
6655 }
6656 }
6657
6658 gcc_assert (entry->offset == d_rounding);
6659 }
6660 else
6661 push_regs (&live_regs_mask, current_function_interrupt);
6662
6663 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
6664 emit_insn (gen_GOTaddr2picreg ());
6665
6666 if (SHMEDIA_REGS_STACK_ADJUST ())
6667 {
6668 /* This must NOT go through the PLT, otherwise mach and macl
6669 may be clobbered. */
6670 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6671 (TARGET_FPU_ANY
6672 ? "__GCC_push_shmedia_regs"
6673 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
6674 emit_insn (gen_shmedia_save_restore_regs_compact
6675 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
6676 }
6677
6678 if (target_flags != save_flags && ! current_function_interrupt)
6679 emit_insn (gen_toggle_sz ());
6680
6681 target_flags = save_flags;
6682
6683 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
6684 stack_pointer_rtx, 0, NULL);
6685
6686 if (frame_pointer_needed)
6687 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
6688
6689 if (TARGET_SHCOMPACT
6690 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6691 {
6692 /* This must NOT go through the PLT, otherwise mach and macl
6693 may be clobbered. */
6694 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6695 "__GCC_shcompact_incoming_args", SFUNC_GOT);
6696 emit_insn (gen_shcompact_incoming_args ());
6697 }
6698 }
6699
6700 void
6701 sh_expand_epilogue (bool sibcall_p)
6702 {
6703 HARD_REG_SET live_regs_mask;
6704 int d, i;
6705 int d_rounding = 0;
6706
6707 int save_flags = target_flags;
6708 int frame_size, save_size;
6709 int fpscr_deferred = 0;
6710 int e = sibcall_p ? -1 : 1;
6711
6712 d = calc_live_regs (&live_regs_mask);
6713
6714 save_size = d;
6715 frame_size = rounded_frame_size (d);
6716
6717 if (TARGET_SH5)
6718 {
6719 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6720 int total_size;
6721 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
6722 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6723 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
6724
6725 total_size = d + tregs_space;
6726 total_size += rounded_frame_size (total_size);
6727 save_size = total_size - frame_size;
6728
6729 /* If adjusting the stack in a single step costs nothing extra, do so.
6730 I.e. either if a single addi is enough, or we need a movi anyway,
6731 and we don't exceed the maximum offset range (the test for the
6732 latter is conservative for simplicity). */
6733 if (TARGET_SHMEDIA
6734 && ! frame_pointer_needed
6735 && (CONST_OK_FOR_I10 (total_size)
6736 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
6737 && total_size <= 2044)))
6738 d_rounding = frame_size;
6739
6740 frame_size -= d_rounding;
6741 }
6742
6743 if (frame_pointer_needed)
6744 {
6745 /* We must avoid scheduling the epilogue with previous basic blocks
6746 when exception handling is enabled. See PR/18032. */
6747 if (flag_exceptions)
6748 emit_insn (gen_blockage ());
6749 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
6750 &live_regs_mask);
6751
6752 /* We must avoid moving the stack pointer adjustment past code
6753 which reads from the local frame, else an interrupt could
6754 occur after the SP adjustment and clobber data in the local
6755 frame. */
6756 emit_insn (gen_blockage ());
6757 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
6758 }
6759 else if (frame_size)
6760 {
6761 /* We must avoid moving the stack pointer adjustment past code
6762 which reads from the local frame, else an interrupt could
6763 occur after the SP adjustment and clobber data in the local
6764 frame. */
6765 emit_insn (gen_blockage ());
6766 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
6767 }
6768
6769 if (SHMEDIA_REGS_STACK_ADJUST ())
6770 {
6771 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6772 (TARGET_FPU_ANY
6773 ? "__GCC_pop_shmedia_regs"
6774 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
6775 /* This must NOT go through the PLT, otherwise mach and macl
6776 may be clobbered. */
6777 emit_insn (gen_shmedia_save_restore_regs_compact
6778 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
6779 }
6780
6781 /* Pop all the registers. */
6782
6783 if (target_flags != save_flags && ! current_function_interrupt)
6784 emit_insn (gen_toggle_sz ());
6785 if (TARGET_SH5)
6786 {
6787 int offset_base, offset;
6788 int offset_in_r0 = -1;
6789 int sp_in_r0 = 0;
6790 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
6791 save_schedule schedule;
6792 save_entry *entry;
6793 int *tmp_pnt;
6794
6795 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
6796 offset_base = -entry[1].offset + d_rounding;
6797 tmp_pnt = schedule.temps;
6798 for (; entry->mode != VOIDmode; entry--)
6799 {
6800 enum machine_mode mode = (enum machine_mode) entry->mode;
6801 int reg = entry->reg;
6802 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
6803
6804 offset = offset_base + entry->offset;
6805 reg_rtx = gen_rtx_REG (mode, reg);
6806
6807 mem_rtx = gen_frame_mem (mode,
6808 gen_rtx_PLUS (Pmode,
6809 stack_pointer_rtx,
6810 GEN_INT (offset)));
6811
6812 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
6813 mem_rtx = NULL_RTX;
6814
6815 if (HAVE_POST_INCREMENT
6816 && (offset == offset_in_r0
6817 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
6818 && mem_rtx == NULL_RTX)
6819 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6820 {
6821 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
6822
6823 if (!memory_address_p (mode, XEXP (post_inc, 0)))
6824 post_inc = NULL_RTX;
6825 else
6826 mem_rtx = NULL_RTX;
6827 }
6828
6829 if (mem_rtx != NULL_RTX)
6830 goto addr_ok;
6831
6832 if (offset_in_r0 == -1)
6833 {
6834 emit_move_insn (r0, GEN_INT (offset));
6835 offset_in_r0 = offset;
6836 }
6837 else if (offset != offset_in_r0)
6838 {
6839 emit_move_insn (r0,
6840 gen_rtx_PLUS
6841 (Pmode, r0,
6842 GEN_INT (offset - offset_in_r0)));
6843 offset_in_r0 += offset - offset_in_r0;
6844 }
6845
6846 if (post_inc != NULL_RTX)
6847 {
6848 if (! sp_in_r0)
6849 {
6850 emit_move_insn (r0,
6851 gen_rtx_PLUS
6852 (Pmode, r0, stack_pointer_rtx));
6853 sp_in_r0 = 1;
6854 }
6855
6856 mem_rtx = post_inc;
6857
6858 offset_in_r0 += GET_MODE_SIZE (mode);
6859 }
6860 else if (sp_in_r0)
6861 mem_rtx = gen_frame_mem (mode, r0);
6862 else
6863 mem_rtx = gen_frame_mem (mode,
6864 gen_rtx_PLUS (Pmode,
6865 stack_pointer_rtx,
6866 r0));
6867
6868 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6869 || mem_rtx == post_inc);
6870
6871 addr_ok:
6872 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6873 && mem_rtx != post_inc)
6874 {
6875 insn = emit_move_insn (r0, mem_rtx);
6876 mem_rtx = r0;
6877 }
6878 else if (TARGET_REGISTER_P (reg))
6879 {
6880 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
6881
6882 /* Give the scheduler a bit of freedom by using up to
6883 MAX_TEMPS registers in a round-robin fashion. */
6884 insn = emit_move_insn (tmp_reg, mem_rtx);
6885 mem_rtx = tmp_reg;
6886 if (*++tmp_pnt < 0)
6887 tmp_pnt = schedule.temps;
6888 }
6889
6890 insn = emit_move_insn (reg_rtx, mem_rtx);
6891 }
6892
6893 gcc_assert (entry->offset + offset_base == d + d_rounding);
6894 }
6895 else /* ! TARGET_SH5 */
6896 {
6897 int last_reg;
6898
6899 save_size = 0;
6900 /* For an ISR with RESBANK attribute assigned, don't pop PR
6901 register. */
6902 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
6903 && !sh_cfun_resbank_handler_p ())
6904 {
6905 if (!frame_pointer_needed)
6906 emit_insn (gen_blockage ());
6907 pop (PR_REG);
6908 }
6909
6910 /* Banked registers are poped first to avoid being scheduled in the
6911 delay slot. RTE switches banks before the ds instruction. */
6912 if (current_function_interrupt)
6913 {
6914 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6915 if (TEST_HARD_REG_BIT (live_regs_mask, i))
6916 pop (LAST_BANKED_REG - i);
6917
6918 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
6919 }
6920 else
6921 last_reg = FIRST_PSEUDO_REGISTER;
6922
6923 for (i = 0; i < last_reg; i++)
6924 {
6925 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
6926
6927 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
6928 && hard_reg_set_intersect_p (live_regs_mask,
6929 reg_class_contents[DF_REGS]))
6930 fpscr_deferred = 1;
6931 /* For an ISR with RESBANK attribute assigned, don't pop
6932 following registers, R0-R14, MACH, MACL and GBR. */
6933 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
6934 && ! (sh_cfun_resbank_handler_p ()
6935 && ((j >= FIRST_GENERAL_REG
6936 && j < LAST_GENERAL_REG)
6937 || j == MACH_REG
6938 || j == MACL_REG
6939 || j == GBR_REG)))
6940 pop (j);
6941
6942 if (j == FIRST_FP_REG && fpscr_deferred)
6943 pop (FPSCR_REG);
6944 }
6945 }
6946 if (target_flags != save_flags && ! current_function_interrupt)
6947 emit_insn (gen_toggle_sz ());
6948 target_flags = save_flags;
6949
6950 output_stack_adjust (crtl->args.pretend_args_size
6951 + save_size + d_rounding
6952 + crtl->args.info.stack_regs * 8,
6953 stack_pointer_rtx, e, NULL);
6954
6955 if (crtl->calls_eh_return)
6956 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
6957 EH_RETURN_STACKADJ_RTX));
6958
6959 /* Switch back to the normal stack if necessary. */
6960 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
6961 emit_insn (gen_sp_switch_2 ());
6962
6963 /* Tell flow the insn that pops PR isn't dead. */
6964 /* PR_REG will never be live in SHmedia mode, and we don't need to
6965 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6966 by the return pattern. */
6967 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6968 emit_use (gen_rtx_REG (SImode, PR_REG));
6969 }
6970
6971 static int sh_need_epilogue_known = 0;
6972
6973 int
6974 sh_need_epilogue (void)
6975 {
6976 if (! sh_need_epilogue_known)
6977 {
6978 rtx epilogue;
6979
6980 start_sequence ();
6981 sh_expand_epilogue (0);
6982 epilogue = get_insns ();
6983 end_sequence ();
6984 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
6985 }
6986 return sh_need_epilogue_known > 0;
6987 }
6988
6989 /* Emit code to change the current function's return address to RA.
6990 TEMP is available as a scratch register, if needed. */
6991
6992 void
6993 sh_set_return_address (rtx ra, rtx tmp)
6994 {
6995 HARD_REG_SET live_regs_mask;
6996 int d;
6997 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6998 int pr_offset;
6999
7000 d = calc_live_regs (&live_regs_mask);
7001
7002 /* If pr_reg isn't life, we can set it (or the register given in
7003 sh_media_register_for_return) directly. */
7004 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7005 {
7006 rtx rr;
7007
7008 if (TARGET_SHMEDIA)
7009 {
7010 int rr_regno = sh_media_register_for_return ();
7011
7012 if (rr_regno < 0)
7013 rr_regno = pr_reg;
7014
7015 rr = gen_rtx_REG (DImode, rr_regno);
7016 }
7017 else
7018 rr = gen_rtx_REG (SImode, pr_reg);
7019
7020 emit_insn (GEN_MOV (rr, ra));
7021 /* Tell flow the register for return isn't dead. */
7022 emit_use (rr);
7023 return;
7024 }
7025
7026 if (TARGET_SH5)
7027 {
7028 int offset;
7029 save_schedule schedule;
7030 save_entry *entry;
7031
7032 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
7033 offset = entry[1].offset;
7034 for (; entry->mode != VOIDmode; entry--)
7035 if (entry->reg == pr_reg)
7036 goto found;
7037
7038 /* We can't find pr register. */
7039 gcc_unreachable ();
7040
7041 found:
7042 offset = entry->offset - offset;
7043 pr_offset = (rounded_frame_size (d) + offset
7044 + SHMEDIA_REGS_STACK_ADJUST ());
7045 }
7046 else
7047 pr_offset = rounded_frame_size (d);
7048
7049 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
7050 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
7051
7052 tmp = gen_frame_mem (Pmode, tmp);
7053 emit_insn (GEN_MOV (tmp, ra));
7054 }
7055
7056 /* Clear variables at function end. */
7057
7058 static void
7059 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
7060 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
7061 {
7062 sh_need_epilogue_known = 0;
7063 }
7064
7065 static rtx
7066 sh_builtin_saveregs (void)
7067 {
7068 /* First unnamed integer register. */
7069 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
7070 /* Number of integer registers we need to save. */
7071 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
7072 /* First unnamed SFmode float reg */
7073 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
7074 /* Number of SFmode float regs to save. */
7075 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
7076 rtx regbuf, fpregs;
7077 int bufsize, regno;
7078 alias_set_type alias_set;
7079
7080 if (TARGET_SH5)
7081 {
7082 if (n_intregs)
7083 {
7084 int pushregs = n_intregs;
7085
7086 while (pushregs < NPARM_REGS (SImode) - 1
7087 && (CALL_COOKIE_INT_REG_GET
7088 (crtl->args.info.call_cookie,
7089 NPARM_REGS (SImode) - pushregs)
7090 == 1))
7091 {
7092 crtl->args.info.call_cookie
7093 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7094 - pushregs, 1);
7095 pushregs++;
7096 }
7097
7098 if (pushregs == NPARM_REGS (SImode))
7099 crtl->args.info.call_cookie
7100 |= (CALL_COOKIE_INT_REG (0, 1)
7101 | CALL_COOKIE_STACKSEQ (pushregs - 1));
7102 else
7103 crtl->args.info.call_cookie
7104 |= CALL_COOKIE_STACKSEQ (pushregs);
7105
7106 crtl->args.pretend_args_size += 8 * n_intregs;
7107 }
7108 if (TARGET_SHCOMPACT)
7109 return const0_rtx;
7110 }
7111
7112 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
7113 {
7114 error ("__builtin_saveregs not supported by this subtarget");
7115 return const0_rtx;
7116 }
7117
7118 if (TARGET_SHMEDIA)
7119 n_floatregs = 0;
7120
7121 /* Allocate block of memory for the regs. */
7122 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
7123 Or can assign_stack_local accept a 0 SIZE argument? */
7124 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
7125
7126 if (TARGET_SHMEDIA)
7127 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
7128 else if (n_floatregs & 1)
7129 {
7130 rtx addr;
7131
7132 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7133 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
7134 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
7135 regbuf = change_address (regbuf, BLKmode, addr);
7136 }
7137 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
7138 {
7139 rtx addr, mask;
7140
7141 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7142 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
7143 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
7144 emit_insn (gen_andsi3 (addr, addr, mask));
7145 regbuf = change_address (regbuf, BLKmode, addr);
7146 }
7147 else
7148 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7149 alias_set = get_varargs_alias_set ();
7150 set_mem_alias_set (regbuf, alias_set);
7151
7152 /* Save int args.
7153 This is optimized to only save the regs that are necessary. Explicitly
7154 named args need not be saved. */
7155 if (n_intregs > 0)
7156 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7157 adjust_address (regbuf, BLKmode,
7158 n_floatregs * UNITS_PER_WORD),
7159 n_intregs);
7160
7161 if (TARGET_SHMEDIA)
7162 /* Return the address of the regbuf. */
7163 return XEXP (regbuf, 0);
7164
7165 /* Save float args.
7166 This is optimized to only save the regs that are necessary. Explicitly
7167 named args need not be saved.
7168 We explicitly build a pointer to the buffer because it halves the insn
7169 count when not optimizing (otherwise the pointer is built for each reg
7170 saved).
7171 We emit the moves in reverse order so that we can use predecrement. */
7172
7173 fpregs = copy_to_mode_reg (Pmode,
7174 plus_constant (XEXP (regbuf, 0),
7175 n_floatregs * UNITS_PER_WORD));
7176 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7177 {
7178 rtx mem;
7179 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7180 {
7181 emit_insn (gen_addsi3 (fpregs, fpregs,
7182 GEN_INT (-2 * UNITS_PER_WORD)));
7183 mem = change_address (regbuf, DFmode, fpregs);
7184 emit_move_insn (mem,
7185 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7186 }
7187 regno = first_floatreg;
7188 if (regno & 1)
7189 {
7190 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7191 mem = change_address (regbuf, SFmode, fpregs);
7192 emit_move_insn (mem,
7193 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
7194 - (TARGET_LITTLE_ENDIAN != 0)));
7195 }
7196 }
7197 else
7198 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7199 {
7200 rtx mem;
7201
7202 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7203 mem = change_address (regbuf, SFmode, fpregs);
7204 emit_move_insn (mem,
7205 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7206 }
7207
7208 /* Return the address of the regbuf. */
7209 return XEXP (regbuf, 0);
7210 }
7211
7212 /* Define the `__builtin_va_list' type for the ABI. */
7213
7214 static tree
7215 sh_build_builtin_va_list (void)
7216 {
7217 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7218 tree record;
7219
7220 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
7221 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7222 return ptr_type_node;
7223
7224 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7225
7226 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
7227 ptr_type_node);
7228 f_next_o_limit = build_decl (FIELD_DECL,
7229 get_identifier ("__va_next_o_limit"),
7230 ptr_type_node);
7231 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
7232 ptr_type_node);
7233 f_next_fp_limit = build_decl (FIELD_DECL,
7234 get_identifier ("__va_next_fp_limit"),
7235 ptr_type_node);
7236 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
7237 ptr_type_node);
7238
7239 DECL_FIELD_CONTEXT (f_next_o) = record;
7240 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7241 DECL_FIELD_CONTEXT (f_next_fp) = record;
7242 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7243 DECL_FIELD_CONTEXT (f_next_stack) = record;
7244
7245 TYPE_FIELDS (record) = f_next_o;
7246 TREE_CHAIN (f_next_o) = f_next_o_limit;
7247 TREE_CHAIN (f_next_o_limit) = f_next_fp;
7248 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
7249 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
7250
7251 layout_type (record);
7252
7253 return record;
7254 }
7255
7256 /* Implement `va_start' for varargs and stdarg. */
7257
7258 static void
7259 sh_va_start (tree valist, rtx nextarg)
7260 {
7261 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7262 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7263 tree t, u;
7264 int nfp, nint;
7265
7266 if (TARGET_SH5)
7267 {
7268 expand_builtin_saveregs ();
7269 std_expand_builtin_va_start (valist, nextarg);
7270 return;
7271 }
7272
7273 if ((! TARGET_SH2E && ! TARGET_SH4)
7274 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7275 {
7276 std_expand_builtin_va_start (valist, nextarg);
7277 return;
7278 }
7279
7280 f_next_o = TYPE_FIELDS (va_list_type_node);
7281 f_next_o_limit = TREE_CHAIN (f_next_o);
7282 f_next_fp = TREE_CHAIN (f_next_o_limit);
7283 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7284 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7285
7286 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7287 NULL_TREE);
7288 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7289 valist, f_next_o_limit, NULL_TREE);
7290 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7291 NULL_TREE);
7292 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7293 valist, f_next_fp_limit, NULL_TREE);
7294 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7295 valist, f_next_stack, NULL_TREE);
7296
7297 /* Call __builtin_saveregs. */
7298 u = make_tree (sizetype, expand_builtin_saveregs ());
7299 u = fold_convert (ptr_type_node, u);
7300 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
7301 TREE_SIDE_EFFECTS (t) = 1;
7302 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7303
7304 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7305 if (nfp < 8)
7306 nfp = 8 - nfp;
7307 else
7308 nfp = 0;
7309 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7310 size_int (UNITS_PER_WORD * nfp));
7311 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
7312 TREE_SIDE_EFFECTS (t) = 1;
7313 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7314
7315 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
7316 TREE_SIDE_EFFECTS (t) = 1;
7317 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7318
7319 nint = crtl->args.info.arg_count[SH_ARG_INT];
7320 if (nint < 4)
7321 nint = 4 - nint;
7322 else
7323 nint = 0;
7324 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7325 size_int (UNITS_PER_WORD * nint));
7326 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
7327 TREE_SIDE_EFFECTS (t) = 1;
7328 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7329
7330 u = make_tree (ptr_type_node, nextarg);
7331 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
7332 TREE_SIDE_EFFECTS (t) = 1;
7333 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7334 }
7335
7336 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7337 member, return it. */
7338 static tree
7339 find_sole_member (tree type)
7340 {
7341 tree field, member = NULL_TREE;
7342
7343 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7344 {
7345 if (TREE_CODE (field) != FIELD_DECL)
7346 continue;
7347 if (!DECL_SIZE (field))
7348 return NULL_TREE;
7349 if (integer_zerop (DECL_SIZE (field)))
7350 continue;
7351 if (member)
7352 return NULL_TREE;
7353 member = field;
7354 }
7355 return member;
7356 }
7357 /* Implement `va_arg'. */
7358
7359 static tree
7360 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7361 gimple_seq *post_p ATTRIBUTE_UNUSED)
7362 {
7363 HOST_WIDE_INT size, rsize;
7364 tree tmp, pptr_type_node;
7365 tree addr, lab_over = NULL, result = NULL;
7366 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7367 tree eff_type;
7368
7369 if (pass_by_ref)
7370 type = build_pointer_type (type);
7371
7372 size = int_size_in_bytes (type);
7373 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7374 pptr_type_node = build_pointer_type (ptr_type_node);
7375
7376 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
7377 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7378 {
7379 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7380 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7381 int pass_as_float;
7382 tree lab_false;
7383 tree member;
7384
7385 f_next_o = TYPE_FIELDS (va_list_type_node);
7386 f_next_o_limit = TREE_CHAIN (f_next_o);
7387 f_next_fp = TREE_CHAIN (f_next_o_limit);
7388 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7389 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7390
7391 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7392 NULL_TREE);
7393 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7394 valist, f_next_o_limit, NULL_TREE);
7395 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7396 valist, f_next_fp, NULL_TREE);
7397 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7398 valist, f_next_fp_limit, NULL_TREE);
7399 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7400 valist, f_next_stack, NULL_TREE);
7401
7402 /* Structures with a single member with a distinct mode are passed
7403 like their member. This is relevant if the latter has a REAL_TYPE
7404 or COMPLEX_TYPE type. */
7405 eff_type = type;
7406 while (TREE_CODE (eff_type) == RECORD_TYPE
7407 && (member = find_sole_member (eff_type))
7408 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7409 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7410 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7411 {
7412 tree field_type = TREE_TYPE (member);
7413
7414 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7415 eff_type = field_type;
7416 else
7417 {
7418 gcc_assert ((TYPE_ALIGN (eff_type)
7419 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7420 || (TYPE_ALIGN (eff_type)
7421 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7422 break;
7423 }
7424 }
7425
7426 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7427 {
7428 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7429 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7430 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7431 && size <= 16));
7432 }
7433 else
7434 {
7435 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7436 }
7437
7438 addr = create_tmp_var (pptr_type_node, NULL);
7439 lab_false = create_artificial_label ();
7440 lab_over = create_artificial_label ();
7441
7442 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
7443
7444 if (pass_as_float)
7445 {
7446 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
7447 tree cmp;
7448 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
7449
7450 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
7451 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7452
7453 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
7454 tmp = next_fp_limit;
7455 if (size > 4 && !is_double)
7456 tmp = build2 (POINTER_PLUS_EXPR, TREE_TYPE (tmp),
7457 unshare_expr (tmp), size_int (4 - size));
7458 tmp = build2 (GE_EXPR, boolean_type_node,
7459 unshare_expr (next_fp_tmp), unshare_expr (tmp));
7460 cmp = build3 (COND_EXPR, void_type_node, tmp,
7461 build1 (GOTO_EXPR, void_type_node,
7462 unshare_expr (lab_false)), NULL_TREE);
7463 if (!is_double)
7464 gimplify_and_add (cmp, pre_p);
7465
7466 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
7467 || (is_double || size == 16))
7468 {
7469 tmp = fold_convert (sizetype, next_fp_tmp);
7470 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
7471 size_int (UNITS_PER_WORD));
7472 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7473 unshare_expr (next_fp_tmp), tmp);
7474 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
7475 }
7476 if (is_double)
7477 gimplify_and_add (cmp, pre_p);
7478
7479 #ifdef FUNCTION_ARG_SCmode_WART
7480 if (TYPE_MODE (eff_type) == SCmode
7481 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
7482 {
7483 tree subtype = TREE_TYPE (eff_type);
7484 tree real, imag;
7485
7486 imag
7487 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7488 imag = get_initialized_tmp_var (imag, pre_p, NULL);
7489
7490 real
7491 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7492 real = get_initialized_tmp_var (real, pre_p, NULL);
7493
7494 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
7495 if (type != eff_type)
7496 result = build1 (VIEW_CONVERT_EXPR, type, result);
7497 result = get_initialized_tmp_var (result, pre_p, NULL);
7498 }
7499 #endif /* FUNCTION_ARG_SCmode_WART */
7500
7501 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7502 gimplify_and_add (tmp, pre_p);
7503
7504 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7505 gimplify_and_add (tmp, pre_p);
7506
7507 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7508 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7509 gimplify_assign (unshare_expr (next_fp_tmp),
7510 unshare_expr (valist), pre_p);
7511
7512 gimplify_assign (unshare_expr (valist),
7513 unshare_expr (next_fp_tmp), post_p);
7514 valist = next_fp_tmp;
7515 }
7516 else
7517 {
7518 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7519 unshare_expr (next_o), size_int (rsize));
7520 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
7521 unshare_expr (next_o_limit));
7522 tmp = build3 (COND_EXPR, void_type_node, tmp,
7523 build1 (GOTO_EXPR, void_type_node,
7524 unshare_expr (lab_false)),
7525 NULL_TREE);
7526 gimplify_and_add (tmp, pre_p);
7527
7528 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
7529 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7530
7531 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7532 gimplify_and_add (tmp, pre_p);
7533
7534 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7535 gimplify_and_add (tmp, pre_p);
7536
7537 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
7538 gimplify_assign (unshare_expr (next_o),
7539 unshare_expr (next_o_limit), pre_p);
7540
7541 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7542 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7543 }
7544
7545 if (!result)
7546 {
7547 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7548 gimplify_and_add (tmp, pre_p);
7549 }
7550 }
7551
7552 /* ??? In va-sh.h, there had been code to make values larger than
7553 size 8 indirect. This does not match the FUNCTION_ARG macros. */
7554
7555 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
7556 if (result)
7557 {
7558 gimplify_assign (result, tmp, pre_p);
7559
7560 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7561 gimplify_and_add (tmp, pre_p);
7562 }
7563 else
7564 result = tmp;
7565
7566 if (pass_by_ref)
7567 result = build_va_arg_indirect_ref (result);
7568
7569 return result;
7570 }
7571
7572 /* 64 bit floating points memory transfers are paired single precision loads
7573 or store. So DWARF information needs fixing in little endian (unless
7574 PR=SZ=1 in FPSCR). */
7575 rtx
7576 sh_dwarf_register_span (rtx reg)
7577 {
7578 unsigned regno = REGNO (reg);
7579
7580 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
7581 return NULL_RTX;
7582
7583 return
7584 gen_rtx_PARALLEL (VOIDmode,
7585 gen_rtvec (2,
7586 gen_rtx_REG (SFmode,
7587 DBX_REGISTER_NUMBER (regno+1)),
7588 gen_rtx_REG (SFmode,
7589 DBX_REGISTER_NUMBER (regno))));
7590 }
7591
7592 bool
7593 sh_promote_prototypes (const_tree type)
7594 {
7595 if (TARGET_HITACHI)
7596 return 0;
7597 if (! type)
7598 return 1;
7599 return ! sh_attr_renesas_p (type);
7600 }
7601
7602 /* Whether an argument must be passed by reference. On SHcompact, we
7603 pretend arguments wider than 32-bits that would have been passed in
7604 registers are passed by reference, so that an SHmedia trampoline
7605 loads them into the full 64-bits registers. */
7606
7607 static int
7608 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7609 const_tree type, bool named)
7610 {
7611 unsigned HOST_WIDE_INT size;
7612
7613 if (type)
7614 size = int_size_in_bytes (type);
7615 else
7616 size = GET_MODE_SIZE (mode);
7617
7618 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
7619 && (!named
7620 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
7621 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
7622 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
7623 && size > 4
7624 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
7625 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7626 return size;
7627 else
7628 return 0;
7629 }
7630
7631 static bool
7632 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7633 const_tree type, bool named)
7634 {
7635 if (targetm.calls.must_pass_in_stack (mode, type))
7636 return true;
7637
7638 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
7639 wants to know about pass-by-reference semantics for incoming
7640 arguments. */
7641 if (! cum)
7642 return false;
7643
7644 if (TARGET_SHCOMPACT)
7645 {
7646 cum->byref = shcompact_byref (cum, mode, type, named);
7647 return cum->byref != 0;
7648 }
7649
7650 return false;
7651 }
7652
7653 static bool
7654 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7655 const_tree type, bool named ATTRIBUTE_UNUSED)
7656 {
7657 /* ??? How can it possibly be correct to return true only on the
7658 caller side of the equation? Is there someplace else in the
7659 sh backend that's magically producing the copies? */
7660 return (cum->outgoing
7661 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
7662 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
7663 }
7664
7665 static int
7666 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7667 tree type, bool named ATTRIBUTE_UNUSED)
7668 {
7669 int words = 0;
7670
7671 if (!TARGET_SH5
7672 && PASS_IN_REG_P (*cum, mode, type)
7673 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
7674 && (ROUND_REG (*cum, mode)
7675 + (mode != BLKmode
7676 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7677 : ROUND_ADVANCE (int_size_in_bytes (type)))
7678 > NPARM_REGS (mode)))
7679 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
7680
7681 else if (!TARGET_SHCOMPACT
7682 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7683 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
7684
7685 return words * UNITS_PER_WORD;
7686 }
7687
7688
7689 /* Define where to put the arguments to a function.
7690 Value is zero to push the argument on the stack,
7691 or a hard register in which to store the argument.
7692
7693 MODE is the argument's machine mode.
7694 TYPE is the data type of the argument (as a tree).
7695 This is null for libcalls where that information may
7696 not be available.
7697 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7698 the preceding args and about the function being called.
7699 NAMED is nonzero if this argument is a named parameter
7700 (otherwise it is an extra parameter matching an ellipsis).
7701
7702 On SH the first args are normally in registers
7703 and the rest are pushed. Any arg that starts within the first
7704 NPARM_REGS words is at least partially passed in a register unless
7705 its data type forbids. */
7706
7707
7708 rtx
7709 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7710 tree type, int named)
7711 {
7712 if (! TARGET_SH5 && mode == VOIDmode)
7713 return GEN_INT (ca->renesas_abi ? 1 : 0);
7714
7715 if (! TARGET_SH5
7716 && PASS_IN_REG_P (*ca, mode, type)
7717 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
7718 {
7719 int regno;
7720
7721 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
7722 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
7723 {
7724 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
7725 gen_rtx_REG (SFmode,
7726 BASE_ARG_REG (mode)
7727 + (ROUND_REG (*ca, mode) ^ 1)),
7728 const0_rtx);
7729 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
7730 gen_rtx_REG (SFmode,
7731 BASE_ARG_REG (mode)
7732 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
7733 GEN_INT (4));
7734 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
7735 }
7736
7737 /* If the alignment of a DF value causes an SF register to be
7738 skipped, we will use that skipped register for the next SF
7739 value. */
7740 if ((TARGET_HITACHI || ca->renesas_abi)
7741 && ca->free_single_fp_reg
7742 && mode == SFmode)
7743 return gen_rtx_REG (mode, ca->free_single_fp_reg);
7744
7745 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
7746 ^ (mode == SFmode && TARGET_SH4
7747 && TARGET_LITTLE_ENDIAN != 0
7748 && ! TARGET_HITACHI && ! ca->renesas_abi);
7749 return gen_rtx_REG (mode, regno);
7750
7751 }
7752
7753 if (TARGET_SH5)
7754 {
7755 if (mode == VOIDmode && TARGET_SHCOMPACT)
7756 return GEN_INT (ca->call_cookie);
7757
7758 /* The following test assumes unnamed arguments are promoted to
7759 DFmode. */
7760 if (mode == SFmode && ca->free_single_fp_reg)
7761 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
7762
7763 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
7764 && (named || ! ca->prototype_p)
7765 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
7766 {
7767 if (! ca->prototype_p && TARGET_SHMEDIA)
7768 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
7769
7770 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
7771 FIRST_FP_PARM_REG
7772 + ca->arg_count[(int) SH_ARG_FLOAT]);
7773 }
7774
7775 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
7776 && (! TARGET_SHCOMPACT
7777 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
7778 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
7779 type, named))))
7780 {
7781 return gen_rtx_REG (mode, (FIRST_PARM_REG
7782 + ca->arg_count[(int) SH_ARG_INT]));
7783 }
7784
7785 return 0;
7786 }
7787
7788 return 0;
7789 }
7790
7791 /* Update the data in CUM to advance over an argument
7792 of mode MODE and data type TYPE.
7793 (TYPE is null for libcalls where that information may not be
7794 available.) */
7795
7796 void
7797 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7798 tree type, int named)
7799 {
7800 if (ca->force_mem)
7801 ca->force_mem = 0;
7802 else if (TARGET_SH5)
7803 {
7804 tree type2 = (ca->byref && type
7805 ? TREE_TYPE (type)
7806 : type);
7807 enum machine_mode mode2 = (ca->byref && type
7808 ? TYPE_MODE (type2)
7809 : mode);
7810 int dwords = ((ca->byref
7811 ? ca->byref
7812 : mode2 == BLKmode
7813 ? int_size_in_bytes (type2)
7814 : GET_MODE_SIZE (mode2)) + 7) / 8;
7815 int numregs = MIN (dwords, NPARM_REGS (SImode)
7816 - ca->arg_count[(int) SH_ARG_INT]);
7817
7818 if (numregs)
7819 {
7820 ca->arg_count[(int) SH_ARG_INT] += numregs;
7821 if (TARGET_SHCOMPACT
7822 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
7823 {
7824 ca->call_cookie
7825 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7826 - numregs, 1);
7827 /* N.B. We want this also for outgoing. */
7828 ca->stack_regs += numregs;
7829 }
7830 else if (ca->byref)
7831 {
7832 if (! ca->outgoing)
7833 ca->stack_regs += numregs;
7834 ca->byref_regs += numregs;
7835 ca->byref = 0;
7836 do
7837 ca->call_cookie
7838 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7839 - numregs, 2);
7840 while (--numregs);
7841 ca->call_cookie
7842 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7843 - 1, 1);
7844 }
7845 else if (dwords > numregs)
7846 {
7847 int pushregs = numregs;
7848
7849 if (TARGET_SHCOMPACT)
7850 ca->stack_regs += numregs;
7851 while (pushregs < NPARM_REGS (SImode) - 1
7852 && (CALL_COOKIE_INT_REG_GET
7853 (ca->call_cookie,
7854 NPARM_REGS (SImode) - pushregs)
7855 == 1))
7856 {
7857 ca->call_cookie
7858 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7859 - pushregs, 1);
7860 pushregs++;
7861 }
7862 if (numregs == NPARM_REGS (SImode))
7863 ca->call_cookie
7864 |= CALL_COOKIE_INT_REG (0, 1)
7865 | CALL_COOKIE_STACKSEQ (numregs - 1);
7866 else
7867 ca->call_cookie
7868 |= CALL_COOKIE_STACKSEQ (numregs);
7869 }
7870 }
7871 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
7872 && (named || ! ca->prototype_p))
7873 {
7874 if (mode2 == SFmode && ca->free_single_fp_reg)
7875 ca->free_single_fp_reg = 0;
7876 else if (ca->arg_count[(int) SH_ARG_FLOAT]
7877 < NPARM_REGS (SFmode))
7878 {
7879 int numfpregs
7880 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
7881 NPARM_REGS (SFmode)
7882 - ca->arg_count[(int) SH_ARG_FLOAT]);
7883
7884 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
7885
7886 if (TARGET_SHCOMPACT && ! ca->prototype_p)
7887 {
7888 if (ca->outgoing && numregs > 0)
7889 do
7890 {
7891 ca->call_cookie
7892 |= (CALL_COOKIE_INT_REG
7893 (ca->arg_count[(int) SH_ARG_INT]
7894 - numregs + ((numfpregs - 2) / 2),
7895 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
7896 - numfpregs) / 2));
7897 }
7898 while (numfpregs -= 2);
7899 }
7900 else if (mode2 == SFmode && (named)
7901 && (ca->arg_count[(int) SH_ARG_FLOAT]
7902 < NPARM_REGS (SFmode)))
7903 ca->free_single_fp_reg
7904 = FIRST_FP_PARM_REG - numfpregs
7905 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
7906 }
7907 }
7908 return;
7909 }
7910
7911 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
7912 {
7913 /* Note that we've used the skipped register. */
7914 if (mode == SFmode && ca->free_single_fp_reg)
7915 {
7916 ca->free_single_fp_reg = 0;
7917 return;
7918 }
7919 /* When we have a DF after an SF, there's an SF register that get
7920 skipped in order to align the DF value. We note this skipped
7921 register, because the next SF value will use it, and not the
7922 SF that follows the DF. */
7923 if (mode == DFmode
7924 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
7925 {
7926 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
7927 + BASE_ARG_REG (mode));
7928 }
7929 }
7930
7931 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
7932 || PASS_IN_REG_P (*ca, mode, type))
7933 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
7934 = (ROUND_REG (*ca, mode)
7935 + (mode == BLKmode
7936 ? ROUND_ADVANCE (int_size_in_bytes (type))
7937 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
7938 }
7939
7940 /* The Renesas calling convention doesn't quite fit into this scheme since
7941 the address is passed like an invisible argument, but one that is always
7942 passed in memory. */
7943 static rtx
7944 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
7945 {
7946 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7947 return 0;
7948 return gen_rtx_REG (Pmode, 2);
7949 }
7950
7951 /* Worker function for TARGET_RETURN_IN_MEMORY. */
7952
7953 static bool
7954 sh_return_in_memory (const_tree type, const_tree fndecl)
7955 {
7956 if (TARGET_SH5)
7957 {
7958 if (TYPE_MODE (type) == BLKmode)
7959 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
7960 else
7961 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
7962 }
7963 else
7964 {
7965 return (TYPE_MODE (type) == BLKmode
7966 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7967 && TREE_CODE (type) == RECORD_TYPE));
7968 }
7969 }
7970
7971 /* We actually emit the code in sh_expand_prologue. We used to use
7972 a static variable to flag that we need to emit this code, but that
7973 doesn't when inlining, when functions are deferred and then emitted
7974 later. Fortunately, we already have two flags that are part of struct
7975 function that tell if a function uses varargs or stdarg. */
7976 static void
7977 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
7978 enum machine_mode mode,
7979 tree type,
7980 int *pretend_arg_size,
7981 int second_time ATTRIBUTE_UNUSED)
7982 {
7983 gcc_assert (cfun->stdarg);
7984 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7985 {
7986 int named_parm_regs, anon_parm_regs;
7987
7988 named_parm_regs = (ROUND_REG (*ca, mode)
7989 + (mode == BLKmode
7990 ? ROUND_ADVANCE (int_size_in_bytes (type))
7991 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
7992 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
7993 if (anon_parm_regs > 0)
7994 *pretend_arg_size = anon_parm_regs * 4;
7995 }
7996 }
7997
7998 static bool
7999 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
8000 {
8001 return TARGET_SH5;
8002 }
8003
8004 static bool
8005 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
8006 {
8007 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
8008 }
8009
8010
8011 /* Define the offset between two registers, one to be eliminated, and
8012 the other its replacement, at the start of a routine. */
8013
8014 int
8015 initial_elimination_offset (int from, int to)
8016 {
8017 int regs_saved;
8018 int regs_saved_rounding = 0;
8019 int total_saved_regs_space;
8020 int total_auto_space;
8021 int save_flags = target_flags;
8022 int copy_flags;
8023 HARD_REG_SET live_regs_mask;
8024
8025 shmedia_space_reserved_for_target_registers = false;
8026 regs_saved = calc_live_regs (&live_regs_mask);
8027 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
8028
8029 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
8030 {
8031 shmedia_space_reserved_for_target_registers = true;
8032 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
8033 }
8034
8035 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
8036 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
8037 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
8038
8039 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
8040 copy_flags = target_flags;
8041 target_flags = save_flags;
8042
8043 total_saved_regs_space = regs_saved + regs_saved_rounding;
8044
8045 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8046 return total_saved_regs_space + total_auto_space
8047 + crtl->args.info.byref_regs * 8;
8048
8049 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8050 return total_saved_regs_space + total_auto_space
8051 + crtl->args.info.byref_regs * 8;
8052
8053 /* Initial gap between fp and sp is 0. */
8054 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8055 return 0;
8056
8057 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8058 return rounded_frame_size (0);
8059
8060 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8061 return rounded_frame_size (0);
8062
8063 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
8064 && (to == HARD_FRAME_POINTER_REGNUM
8065 || to == STACK_POINTER_REGNUM));
8066 if (TARGET_SH5)
8067 {
8068 int n = total_saved_regs_space;
8069 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8070 save_schedule schedule;
8071 save_entry *entry;
8072
8073 n += total_auto_space;
8074
8075 /* If it wasn't saved, there's not much we can do. */
8076 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8077 return n;
8078
8079 target_flags = copy_flags;
8080
8081 sh5_schedule_saves (&live_regs_mask, &schedule, n);
8082 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
8083 if (entry->reg == pr_reg)
8084 {
8085 target_flags = save_flags;
8086 return entry->offset;
8087 }
8088 gcc_unreachable ();
8089 }
8090 else
8091 return total_auto_space;
8092 }
8093
8094 /* Parse the -mfixed-range= option string. */
8095 void
8096 sh_fix_range (const char *const_str)
8097 {
8098 int i, first, last;
8099 char *str, *dash, *comma;
8100
8101 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
8102 REG2 are either register names or register numbers. The effect
8103 of this option is to mark the registers in the range from REG1 to
8104 REG2 as ``fixed'' so they won't be used by the compiler. */
8105
8106 i = strlen (const_str);
8107 str = (char *) alloca (i + 1);
8108 memcpy (str, const_str, i + 1);
8109
8110 while (1)
8111 {
8112 dash = strchr (str, '-');
8113 if (!dash)
8114 {
8115 warning (0, "value of -mfixed-range must have form REG1-REG2");
8116 return;
8117 }
8118 *dash = '\0';
8119 comma = strchr (dash + 1, ',');
8120 if (comma)
8121 *comma = '\0';
8122
8123 first = decode_reg_name (str);
8124 if (first < 0)
8125 {
8126 warning (0, "unknown register name: %s", str);
8127 return;
8128 }
8129
8130 last = decode_reg_name (dash + 1);
8131 if (last < 0)
8132 {
8133 warning (0, "unknown register name: %s", dash + 1);
8134 return;
8135 }
8136
8137 *dash = '-';
8138
8139 if (first > last)
8140 {
8141 warning (0, "%s-%s is an empty range", str, dash + 1);
8142 return;
8143 }
8144
8145 for (i = first; i <= last; ++i)
8146 fixed_regs[i] = call_used_regs[i] = 1;
8147
8148 if (!comma)
8149 break;
8150
8151 *comma = ',';
8152 str = comma + 1;
8153 }
8154 }
8155 \f
8156 /* Insert any deferred function attributes from earlier pragmas. */
8157 static void
8158 sh_insert_attributes (tree node, tree *attributes)
8159 {
8160 tree attrs;
8161
8162 if (TREE_CODE (node) != FUNCTION_DECL)
8163 return;
8164
8165 /* We are only interested in fields. */
8166 if (!DECL_P (node))
8167 return;
8168
8169 /* Append the attributes to the deferred attributes. */
8170 *sh_deferred_function_attributes_tail = *attributes;
8171 attrs = sh_deferred_function_attributes;
8172 if (!attrs)
8173 return;
8174
8175 /* Some attributes imply or require the interrupt attribute. */
8176 if (!lookup_attribute ("interrupt_handler", attrs)
8177 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8178 {
8179 /* If we have a trapa_handler, but no interrupt_handler attribute,
8180 insert an interrupt_handler attribute. */
8181 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8182 /* We can't use sh_pr_interrupt here because that's not in the
8183 java frontend. */
8184 attrs
8185 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8186 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8187 if the interrupt attribute is missing, we ignore the attribute
8188 and warn. */
8189 else if (lookup_attribute ("sp_switch", attrs)
8190 || lookup_attribute ("trap_exit", attrs)
8191 || lookup_attribute ("nosave_low_regs", attrs)
8192 || lookup_attribute ("resbank", attrs))
8193 {
8194 tree *tail;
8195
8196 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8197 {
8198 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8199 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8200 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8201 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8202 warning (OPT_Wattributes,
8203 "%qE attribute only applies to interrupt functions",
8204 TREE_PURPOSE (attrs));
8205 else
8206 {
8207 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8208 NULL_TREE);
8209 tail = &TREE_CHAIN (*tail);
8210 }
8211 }
8212 attrs = *attributes;
8213 }
8214 }
8215
8216 /* Install the processed list. */
8217 *attributes = attrs;
8218
8219 /* Clear deferred attributes. */
8220 sh_deferred_function_attributes = NULL_TREE;
8221 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8222
8223 return;
8224 }
8225
8226 /* Supported attributes:
8227
8228 interrupt_handler -- specifies this function is an interrupt handler.
8229
8230 trapa_handler - like above, but don't save all registers.
8231
8232 sp_switch -- specifies an alternate stack for an interrupt handler
8233 to run on.
8234
8235 trap_exit -- use a trapa to exit an interrupt function instead of
8236 an rte instruction.
8237
8238 nosave_low_regs - don't save r0..r7 in an interrupt handler.
8239 This is useful on the SH3 and upwards,
8240 which has a separate set of low regs for User and Supervisor modes.
8241 This should only be used for the lowest level of interrupts. Higher levels
8242 of interrupts must save the registers in case they themselves are
8243 interrupted.
8244
8245 renesas -- use Renesas calling/layout conventions (functions and
8246 structures).
8247
8248 resbank -- In case of an ISR, use a register bank to save registers
8249 R0-R14, MACH, MACL, GBR and PR. This is useful only on SH2A targets.
8250 */
8251
8252 const struct attribute_spec sh_attribute_table[] =
8253 {
8254 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
8255 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
8256 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
8257 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
8258 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
8259 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
8260 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
8261 { "resbank", 0, 0, true, false, false, sh_handle_resbank_handler_attribute },
8262 { "function_vector", 1, 1, true, false, false, sh2a_handle_function_vector_handler_attribute },
8263 #ifdef SYMBIAN
8264 /* Symbian support adds three new attributes:
8265 dllexport - for exporting a function/variable that will live in a dll
8266 dllimport - for importing a function/variable from a dll
8267
8268 Microsoft allows multiple declspecs in one __declspec, separating
8269 them with spaces. We do NOT support this. Instead, use __declspec
8270 multiple times. */
8271 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
8272 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
8273 #endif
8274 { NULL, 0, 0, false, false, false, NULL }
8275 };
8276
8277 /* Handle a 'resbank' attribute. */
8278 static tree
8279 sh_handle_resbank_handler_attribute (tree * node, tree name,
8280 tree args ATTRIBUTE_UNUSED,
8281 int flags ATTRIBUTE_UNUSED,
8282 bool * no_add_attrs)
8283 {
8284 if (!TARGET_SH2A)
8285 {
8286 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
8287 name);
8288 *no_add_attrs = true;
8289 }
8290 if (TREE_CODE (*node) != FUNCTION_DECL)
8291 {
8292 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8293 name);
8294 *no_add_attrs = true;
8295 }
8296
8297 return NULL_TREE;
8298 }
8299
8300 /* Handle an "interrupt_handler" attribute; arguments as in
8301 struct attribute_spec.handler. */
8302 static tree
8303 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8304 tree args ATTRIBUTE_UNUSED,
8305 int flags ATTRIBUTE_UNUSED,
8306 bool *no_add_attrs)
8307 {
8308 if (TREE_CODE (*node) != FUNCTION_DECL)
8309 {
8310 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8311 name);
8312 *no_add_attrs = true;
8313 }
8314 else if (TARGET_SHCOMPACT)
8315 {
8316 error ("attribute interrupt_handler is not compatible with -m5-compact");
8317 *no_add_attrs = true;
8318 }
8319
8320 return NULL_TREE;
8321 }
8322
8323 /* Handle an 'function_vector' attribute; arguments as in
8324 struct attribute_spec.handler. */
8325 static tree
8326 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
8327 tree args ATTRIBUTE_UNUSED,
8328 int flags ATTRIBUTE_UNUSED,
8329 bool * no_add_attrs)
8330 {
8331 if (!TARGET_SH2A)
8332 {
8333 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
8334 name);
8335 *no_add_attrs = true;
8336 }
8337 else if (TREE_CODE (*node) != FUNCTION_DECL)
8338 {
8339 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8340 name);
8341 *no_add_attrs = true;
8342 }
8343 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8344 {
8345 /* The argument must be a constant integer. */
8346 warning (OPT_Wattributes,
8347 "%qE attribute argument not an integer constant",
8348 name);
8349 *no_add_attrs = true;
8350 }
8351 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
8352 {
8353 /* The argument value must be between 0 to 255. */
8354 warning (OPT_Wattributes,
8355 "%qE attribute argument should be between 0 to 255",
8356 name);
8357 *no_add_attrs = true;
8358 }
8359 return NULL_TREE;
8360 }
8361
8362 /* Returns 1 if current function has been assigned the attribute
8363 'function_vector'. */
8364 int
8365 sh2a_is_function_vector_call (rtx x)
8366 {
8367 if (GET_CODE (x) == SYMBOL_REF
8368 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8369 {
8370 tree tr = SYMBOL_REF_DECL (x);
8371
8372 if (sh2a_function_vector_p (tr))
8373 return 1;
8374 }
8375
8376 return 0;
8377 }
8378
8379 /* Returns the function vector number, if the the attribute
8380 'function_vector' is assigned, otherwise returns zero. */
8381 int
8382 sh2a_get_function_vector_number (rtx x)
8383 {
8384 int num;
8385 tree list, t;
8386
8387 if ((GET_CODE (x) == SYMBOL_REF)
8388 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8389 {
8390 t = SYMBOL_REF_DECL (x);
8391
8392 if (TREE_CODE (t) != FUNCTION_DECL)
8393 return 0;
8394
8395 list = SH_ATTRIBUTES (t);
8396 while (list)
8397 {
8398 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8399 {
8400 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
8401 return num;
8402 }
8403
8404 list = TREE_CHAIN (list);
8405 }
8406
8407 return 0;
8408 }
8409 else
8410 return 0;
8411 }
8412
8413 /* Handle an "sp_switch" attribute; arguments as in
8414 struct attribute_spec.handler. */
8415 static tree
8416 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
8417 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8418 {
8419 if (TREE_CODE (*node) != FUNCTION_DECL)
8420 {
8421 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8422 name);
8423 *no_add_attrs = true;
8424 }
8425 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
8426 {
8427 /* The argument must be a constant string. */
8428 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
8429 name);
8430 *no_add_attrs = true;
8431 }
8432
8433 return NULL_TREE;
8434 }
8435
8436 /* Handle an "trap_exit" attribute; arguments as in
8437 struct attribute_spec.handler. */
8438 static tree
8439 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
8440 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8441 {
8442 if (TREE_CODE (*node) != FUNCTION_DECL)
8443 {
8444 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8445 name);
8446 *no_add_attrs = true;
8447 }
8448 /* The argument specifies a trap number to be used in a trapa instruction
8449 at function exit (instead of an rte instruction). */
8450 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8451 {
8452 /* The argument must be a constant integer. */
8453 warning (OPT_Wattributes, "%qE attribute argument not an "
8454 "integer constant", name);
8455 *no_add_attrs = true;
8456 }
8457
8458 return NULL_TREE;
8459 }
8460
8461 static tree
8462 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
8463 tree name ATTRIBUTE_UNUSED,
8464 tree args ATTRIBUTE_UNUSED,
8465 int flags ATTRIBUTE_UNUSED,
8466 bool *no_add_attrs ATTRIBUTE_UNUSED)
8467 {
8468 return NULL_TREE;
8469 }
8470
8471 /* True if __attribute__((renesas)) or -mrenesas. */
8472 int
8473 sh_attr_renesas_p (const_tree td)
8474 {
8475 if (TARGET_HITACHI)
8476 return 1;
8477 if (td == 0)
8478 return 0;
8479 if (DECL_P (td))
8480 td = TREE_TYPE (td);
8481 if (td == error_mark_node)
8482 return 0;
8483 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
8484 != NULL_TREE);
8485 }
8486
8487 /* True if __attribute__((renesas)) or -mrenesas, for the current
8488 function. */
8489 int
8490 sh_cfun_attr_renesas_p (void)
8491 {
8492 return sh_attr_renesas_p (current_function_decl);
8493 }
8494
8495 int
8496 sh_cfun_interrupt_handler_p (void)
8497 {
8498 return (lookup_attribute ("interrupt_handler",
8499 DECL_ATTRIBUTES (current_function_decl))
8500 != NULL_TREE);
8501 }
8502
8503 /* Returns 1 if FUNC has been assigned the attribute
8504 "function_vector". */
8505 int
8506 sh2a_function_vector_p (tree func)
8507 {
8508 tree list;
8509 if (TREE_CODE (func) != FUNCTION_DECL)
8510 return 0;
8511
8512 list = SH_ATTRIBUTES (func);
8513 while (list)
8514 {
8515 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8516 return 1;
8517
8518 list = TREE_CHAIN (list);
8519 }
8520 return 0;
8521 }
8522
8523 /* Returns TRUE if given tree has the "resbank" attribute. */
8524
8525 int
8526 sh_cfun_resbank_handler_p (void)
8527 {
8528 return ((lookup_attribute ("resbank",
8529 DECL_ATTRIBUTES (current_function_decl))
8530 != NULL_TREE)
8531 && (lookup_attribute ("interrupt_handler",
8532 DECL_ATTRIBUTES (current_function_decl))
8533 != NULL_TREE) && TARGET_SH2A);
8534 }
8535
8536 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
8537
8538 static const char *
8539 sh_check_pch_target_flags (int old_flags)
8540 {
8541 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
8542 | MASK_SH_E | MASK_HARD_SH4
8543 | MASK_FPU_SINGLE | MASK_SH4))
8544 return _("created and used with different architectures / ABIs");
8545 if ((old_flags ^ target_flags) & MASK_HITACHI)
8546 return _("created and used with different ABIs");
8547 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
8548 return _("created and used with different endianness");
8549 return NULL;
8550 }
8551 \f
8552 /* Predicates used by the templates. */
8553
8554 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
8555 Used only in general_movsrc_operand. */
8556
8557 int
8558 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8559 {
8560 switch (REGNO (op))
8561 {
8562 case PR_REG:
8563 case MACL_REG:
8564 case MACH_REG:
8565 return 1;
8566 }
8567 return 0;
8568 }
8569
8570 /* Nonzero if OP is a floating point value with value 0.0. */
8571
8572 int
8573 fp_zero_operand (rtx op)
8574 {
8575 REAL_VALUE_TYPE r;
8576
8577 if (GET_MODE (op) != SFmode)
8578 return 0;
8579
8580 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
8581 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
8582 }
8583
8584 /* Nonzero if OP is a floating point value with value 1.0. */
8585
8586 int
8587 fp_one_operand (rtx op)
8588 {
8589 REAL_VALUE_TYPE r;
8590
8591 if (GET_MODE (op) != SFmode)
8592 return 0;
8593
8594 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
8595 return REAL_VALUES_EQUAL (r, dconst1);
8596 }
8597
8598 /* For -m4 and -m4-single-only, mode switching is used. If we are
8599 compiling without -mfmovd, movsf_ie isn't taken into account for
8600 mode switching. We could check in machine_dependent_reorg for
8601 cases where we know we are in single precision mode, but there is
8602 interface to find that out during reload, so we must avoid
8603 choosing an fldi alternative during reload and thus failing to
8604 allocate a scratch register for the constant loading. */
8605 int
8606 fldi_ok (void)
8607 {
8608 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
8609 }
8610
8611 int
8612 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8613 {
8614 enum rtx_code code = GET_CODE (op);
8615 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
8616 }
8617
8618 /* Return the TLS type for TLS symbols, 0 for otherwise. */
8619 enum tls_model
8620 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8621 {
8622 if (GET_CODE (op) != SYMBOL_REF)
8623 return TLS_MODEL_NONE;
8624 return SYMBOL_REF_TLS_MODEL (op);
8625 }
8626 \f
8627 /* Return the destination address of a branch. */
8628
8629 static int
8630 branch_dest (rtx branch)
8631 {
8632 rtx dest = SET_SRC (PATTERN (branch));
8633 int dest_uid;
8634
8635 if (GET_CODE (dest) == IF_THEN_ELSE)
8636 dest = XEXP (dest, 1);
8637 dest = XEXP (dest, 0);
8638 dest_uid = INSN_UID (dest);
8639 return INSN_ADDRESSES (dest_uid);
8640 }
8641 \f
8642 /* Return nonzero if REG is not used after INSN.
8643 We assume REG is a reload reg, and therefore does
8644 not live past labels. It may live past calls or jumps though. */
8645 int
8646 reg_unused_after (rtx reg, rtx insn)
8647 {
8648 enum rtx_code code;
8649 rtx set;
8650
8651 /* If the reg is set by this instruction, then it is safe for our
8652 case. Disregard the case where this is a store to memory, since
8653 we are checking a register used in the store address. */
8654 set = single_set (insn);
8655 if (set && GET_CODE (SET_DEST (set)) != MEM
8656 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8657 return 1;
8658
8659 while ((insn = NEXT_INSN (insn)))
8660 {
8661 rtx set;
8662 if (!INSN_P (insn))
8663 continue;
8664
8665 code = GET_CODE (insn);
8666
8667 #if 0
8668 /* If this is a label that existed before reload, then the register
8669 if dead here. However, if this is a label added by reorg, then
8670 the register may still be live here. We can't tell the difference,
8671 so we just ignore labels completely. */
8672 if (code == CODE_LABEL)
8673 return 1;
8674 /* else */
8675 #endif
8676
8677 if (code == JUMP_INSN)
8678 return 0;
8679
8680 /* If this is a sequence, we must handle them all at once.
8681 We could have for instance a call that sets the target register,
8682 and an insn in a delay slot that uses the register. In this case,
8683 we must return 0. */
8684 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
8685 {
8686 int i;
8687 int retval = 0;
8688
8689 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
8690 {
8691 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
8692 rtx set = single_set (this_insn);
8693
8694 if (GET_CODE (this_insn) == CALL_INSN)
8695 code = CALL_INSN;
8696 else if (GET_CODE (this_insn) == JUMP_INSN)
8697 {
8698 if (INSN_ANNULLED_BRANCH_P (this_insn))
8699 return 0;
8700 code = JUMP_INSN;
8701 }
8702
8703 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8704 return 0;
8705 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8706 {
8707 if (GET_CODE (SET_DEST (set)) != MEM)
8708 retval = 1;
8709 else
8710 return 0;
8711 }
8712 if (set == 0
8713 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
8714 return 0;
8715 }
8716 if (retval == 1)
8717 return 1;
8718 else if (code == JUMP_INSN)
8719 return 0;
8720 }
8721
8722 set = single_set (insn);
8723 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8724 return 0;
8725 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8726 return GET_CODE (SET_DEST (set)) != MEM;
8727 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
8728 return 0;
8729
8730 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
8731 return 1;
8732 }
8733 return 1;
8734 }
8735 \f
8736 #include "ggc.h"
8737
8738 static GTY(()) rtx fpscr_rtx;
8739 rtx
8740 get_fpscr_rtx (void)
8741 {
8742 if (! fpscr_rtx)
8743 {
8744 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
8745 REG_USERVAR_P (fpscr_rtx) = 1;
8746 mark_user_reg (fpscr_rtx);
8747 }
8748 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
8749 mark_user_reg (fpscr_rtx);
8750 return fpscr_rtx;
8751 }
8752
8753 static GTY(()) tree fpscr_values;
8754
8755 static void
8756 emit_fpu_switch (rtx scratch, int index)
8757 {
8758 rtx dst, src;
8759
8760 if (fpscr_values == NULL)
8761 {
8762 tree t;
8763
8764 t = build_index_type (integer_one_node);
8765 t = build_array_type (integer_type_node, t);
8766 t = build_decl (VAR_DECL, get_identifier ("__fpscr_values"), t);
8767 DECL_ARTIFICIAL (t) = 1;
8768 DECL_IGNORED_P (t) = 1;
8769 DECL_EXTERNAL (t) = 1;
8770 TREE_STATIC (t) = 1;
8771 TREE_PUBLIC (t) = 1;
8772 TREE_USED (t) = 1;
8773
8774 fpscr_values = t;
8775 }
8776
8777 src = DECL_RTL (fpscr_values);
8778 if (!can_create_pseudo_p ())
8779 {
8780 emit_move_insn (scratch, XEXP (src, 0));
8781 if (index != 0)
8782 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
8783 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
8784 }
8785 else
8786 src = adjust_address (src, PSImode, index * 4);
8787
8788 dst = get_fpscr_rtx ();
8789 emit_move_insn (dst, src);
8790 }
8791
8792 void
8793 emit_sf_insn (rtx pat)
8794 {
8795 emit_insn (pat);
8796 }
8797
8798 void
8799 emit_df_insn (rtx pat)
8800 {
8801 emit_insn (pat);
8802 }
8803
8804 void
8805 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8806 {
8807 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8808 }
8809
8810 void
8811 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8812 {
8813 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
8814 get_fpscr_rtx ()));
8815 }
8816
8817 void
8818 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8819 {
8820 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8821 }
8822
8823 void
8824 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8825 {
8826 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
8827 get_fpscr_rtx ()));
8828 }
8829 \f
8830 static rtx get_free_reg (HARD_REG_SET);
8831
8832 /* This function returns a register to use to load the address to load
8833 the fpscr from. Currently it always returns r1 or r7, but when we are
8834 able to use pseudo registers after combine, or have a better mechanism
8835 for choosing a register, it should be done here. */
8836 /* REGS_LIVE is the liveness information for the point for which we
8837 need this allocation. In some bare-bones exit blocks, r1 is live at the
8838 start. We can even have all of r0..r3 being live:
8839 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8840 INSN before which new insns are placed with will clobber the register
8841 we return. If a basic block consists only of setting the return value
8842 register to a pseudo and using that register, the return value is not
8843 live before or after this block, yet we we'll insert our insns right in
8844 the middle. */
8845
8846 static rtx
8847 get_free_reg (HARD_REG_SET regs_live)
8848 {
8849 if (! TEST_HARD_REG_BIT (regs_live, 1))
8850 return gen_rtx_REG (Pmode, 1);
8851
8852 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8853 there shouldn't be anything but a jump before the function end. */
8854 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
8855 return gen_rtx_REG (Pmode, 7);
8856 }
8857
8858 /* This function will set the fpscr from memory.
8859 MODE is the mode we are setting it to. */
8860 void
8861 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8862 {
8863 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
8864 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
8865 rtx addr_reg;
8866
8867 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
8868 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
8869 }
8870
8871 /* Is the given character a logical line separator for the assembler? */
8872 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8873 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
8874 #endif
8875
8876 int
8877 sh_insn_length_adjustment (rtx insn)
8878 {
8879 /* Instructions with unfilled delay slots take up an extra two bytes for
8880 the nop in the delay slot. */
8881 if (((GET_CODE (insn) == INSN
8882 && GET_CODE (PATTERN (insn)) != USE
8883 && GET_CODE (PATTERN (insn)) != CLOBBER)
8884 || GET_CODE (insn) == CALL_INSN
8885 || (GET_CODE (insn) == JUMP_INSN
8886 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8887 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8888 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8889 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8890 return 2;
8891
8892 /* SH2e has a bug that prevents the use of annulled branches, so if
8893 the delay slot is not filled, we'll have to put a NOP in it. */
8894 if (sh_cpu_attr == CPU_SH2E
8895 && GET_CODE (insn) == JUMP_INSN
8896 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8897 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8898 && get_attr_type (insn) == TYPE_CBRANCH
8899 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8900 return 2;
8901
8902 /* sh-dsp parallel processing insn take four bytes instead of two. */
8903
8904 if (GET_CODE (insn) == INSN)
8905 {
8906 int sum = 0;
8907 rtx body = PATTERN (insn);
8908 const char *templ;
8909 char c;
8910 int maybe_label = 1;
8911
8912 if (GET_CODE (body) == ASM_INPUT)
8913 templ = XSTR (body, 0);
8914 else if (asm_noperands (body) >= 0)
8915 templ
8916 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
8917 else
8918 return 0;
8919 do
8920 {
8921 int ppi_adjust = 0;
8922
8923 do
8924 c = *templ++;
8925 while (c == ' ' || c == '\t');
8926 /* all sh-dsp parallel-processing insns start with p.
8927 The only non-ppi sh insn starting with p is pref.
8928 The only ppi starting with pr is prnd. */
8929 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
8930 ppi_adjust = 2;
8931 /* The repeat pseudo-insn expands two three insns, a total of
8932 six bytes in size. */
8933 else if ((c == 'r' || c == 'R')
8934 && ! strncasecmp ("epeat", templ, 5))
8935 ppi_adjust = 4;
8936 while (c && c != '\n'
8937 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
8938 {
8939 /* If this is a label, it is obviously not a ppi insn. */
8940 if (c == ':' && maybe_label)
8941 {
8942 ppi_adjust = 0;
8943 break;
8944 }
8945 else if (c == '\'' || c == '"')
8946 maybe_label = 0;
8947 c = *templ++;
8948 }
8949 sum += ppi_adjust;
8950 maybe_label = c != ':';
8951 }
8952 while (c);
8953 return sum;
8954 }
8955 return 0;
8956 }
8957 \f
8958 /* Return TRUE for a valid displacement for the REG+disp addressing
8959 with MODE. */
8960
8961 /* ??? The SH2e does not have the REG+disp addressing mode when loading values
8962 into the FRx registers. We implement this by setting the maximum offset
8963 to zero when the value is SFmode. This also restricts loading of SFmode
8964 values into the integer registers, but that can't be helped. */
8965
8966 /* The SH allows a displacement in a QI or HI amode, but only when the
8967 other operand is R0. GCC doesn't handle this very well, so we forgot
8968 all of that.
8969
8970 A legitimate index for a QI or HI is 0, SI can be any number 0..63,
8971 DI can be any number 0..60. */
8972
8973 bool
8974 sh_legitimate_index_p (enum machine_mode mode, rtx op)
8975 {
8976 if (GET_CODE (op) == CONST_INT)
8977 {
8978 if (TARGET_SHMEDIA)
8979 {
8980 int size;
8981
8982 /* Check if this the address of an unaligned load / store. */
8983 if (mode == VOIDmode)
8984 return CONST_OK_FOR_I06 (INTVAL (op));
8985
8986 size = GET_MODE_SIZE (mode);
8987 return (!(INTVAL (op) & (size - 1))
8988 && INTVAL (op) >= -512 * size
8989 && INTVAL (op) < 512 * size);
8990 }
8991
8992 if (TARGET_SH2A)
8993 {
8994 if (GET_MODE_SIZE (mode) == 1
8995 && (unsigned) INTVAL (op) < 4096)
8996 return true;
8997 }
8998
8999 if ((GET_MODE_SIZE (mode) == 4
9000 && (unsigned) INTVAL (op) < 64
9001 && !(INTVAL (op) & 3)
9002 && !(TARGET_SH2E && mode == SFmode))
9003 || (GET_MODE_SIZE (mode) == 4
9004 && (unsigned) INTVAL (op) < 16383
9005 && !(INTVAL (op) & 3) && TARGET_SH2A))
9006 return true;
9007
9008 if ((GET_MODE_SIZE (mode) == 8
9009 && (unsigned) INTVAL (op) < 60
9010 && !(INTVAL (op) & 3)
9011 && !((TARGET_SH4 || TARGET_SH2A) && mode == DFmode))
9012 || ((GET_MODE_SIZE (mode)==8)
9013 && (unsigned) INTVAL (op) < 8192
9014 && !(INTVAL (op) & (TARGET_SH2A_DOUBLE ? 7 : 3))
9015 && (TARGET_SH2A && mode == DFmode)))
9016 return true;
9017 }
9018
9019 return false;
9020 }
9021
9022 /* Recognize an RTL expression that is a valid memory address for
9023 an instruction.
9024 The MODE argument is the machine mode for the MEM expression
9025 that wants to use this address.
9026 Allow REG
9027 REG+disp
9028 REG+r0
9029 REG++
9030 --REG */
9031
9032 bool
9033 sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
9034 {
9035 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
9036 return true;
9037 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
9038 && ! TARGET_SHMEDIA
9039 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
9040 return true;
9041 else if (GET_CODE (x) == PLUS
9042 && (mode != PSImode || reload_completed))
9043 {
9044 rtx xop0 = XEXP (x, 0);
9045 rtx xop1 = XEXP (x, 1);
9046
9047 if (GET_MODE_SIZE (mode) <= 8
9048 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
9049 && sh_legitimate_index_p (mode, xop1))
9050 return true;
9051
9052 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
9053 || ((xop0 == stack_pointer_rtx
9054 || xop0 == hard_frame_pointer_rtx)
9055 && REG_P (xop1) && REGNO (xop1) == R0_REG)
9056 || ((xop1 == stack_pointer_rtx
9057 || xop1 == hard_frame_pointer_rtx)
9058 && REG_P (xop0) && REGNO (xop0) == R0_REG))
9059 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
9060 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
9061 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
9062 && TARGET_FMOVD && mode == DFmode)))
9063 {
9064 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
9065 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
9066 return true;
9067 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
9068 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
9069 return true;
9070 }
9071 }
9072
9073 return false;
9074 }
9075 \f
9076 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
9077 isn't protected by a PIC unspec. */
9078 int
9079 nonpic_symbol_mentioned_p (rtx x)
9080 {
9081 register const char *fmt;
9082 register int i;
9083
9084 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
9085 || GET_CODE (x) == PC)
9086 return 1;
9087
9088 /* We don't want to look into the possible MEM location of a
9089 CONST_DOUBLE, since we're not going to use it, in general. */
9090 if (GET_CODE (x) == CONST_DOUBLE)
9091 return 0;
9092
9093 if (GET_CODE (x) == UNSPEC
9094 && (XINT (x, 1) == UNSPEC_PIC
9095 || XINT (x, 1) == UNSPEC_GOT
9096 || XINT (x, 1) == UNSPEC_GOTOFF
9097 || XINT (x, 1) == UNSPEC_GOTPLT
9098 || XINT (x, 1) == UNSPEC_GOTTPOFF
9099 || XINT (x, 1) == UNSPEC_DTPOFF
9100 || XINT (x, 1) == UNSPEC_PLT
9101 || XINT (x, 1) == UNSPEC_SYMOFF
9102 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
9103 return 0;
9104
9105 fmt = GET_RTX_FORMAT (GET_CODE (x));
9106 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9107 {
9108 if (fmt[i] == 'E')
9109 {
9110 register int j;
9111
9112 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9113 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
9114 return 1;
9115 }
9116 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
9117 return 1;
9118 }
9119
9120 return 0;
9121 }
9122
9123 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
9124 @GOTOFF in `reg'. */
9125 rtx
9126 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
9127 rtx reg)
9128 {
9129 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
9130 return orig;
9131
9132 if (GET_CODE (orig) == LABEL_REF
9133 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
9134 {
9135 if (reg == 0)
9136 reg = gen_reg_rtx (Pmode);
9137
9138 emit_insn (gen_symGOTOFF2reg (reg, orig));
9139 return reg;
9140 }
9141 else if (GET_CODE (orig) == SYMBOL_REF)
9142 {
9143 if (reg == 0)
9144 reg = gen_reg_rtx (Pmode);
9145
9146 emit_insn (gen_symGOT2reg (reg, orig));
9147 return reg;
9148 }
9149 return orig;
9150 }
9151
9152 /* Try machine-dependent ways of modifying an illegitimate address
9153 to be legitimate. If we find one, return the new, valid address.
9154 Otherwise, return X.
9155
9156 For the SH, if X is almost suitable for indexing, but the offset is
9157 out of range, convert it into a normal form so that CSE has a chance
9158 of reducing the number of address registers used. */
9159
9160 static rtx
9161 sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
9162 {
9163 if (flag_pic)
9164 x = legitimize_pic_address (oldx, mode, NULL_RTX);
9165
9166 if (GET_CODE (x) == PLUS
9167 && (GET_MODE_SIZE (mode) == 4
9168 || GET_MODE_SIZE (mode) == 8)
9169 && GET_CODE (XEXP (x, 1)) == CONST_INT
9170 && BASE_REGISTER_RTX_P (XEXP (x, 0))
9171 && ! TARGET_SHMEDIA
9172 && ! ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
9173 && ! (TARGET_SH2E && mode == SFmode))
9174 {
9175 rtx index_rtx = XEXP (x, 1);
9176 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9177 rtx sum;
9178
9179 /* On rare occasions, we might get an unaligned pointer
9180 that is indexed in a way to give an aligned address.
9181 Therefore, keep the lower two bits in offset_base. */
9182 /* Instead of offset_base 128..131 use 124..127, so that
9183 simple add suffices. */
9184 if (offset > 127)
9185 offset_base = ((offset + 4) & ~60) - 4;
9186 else
9187 offset_base = offset & ~60;
9188
9189 /* Sometimes the normal form does not suit DImode. We
9190 could avoid that by using smaller ranges, but that
9191 would give less optimized code when SImode is
9192 prevalent. */
9193 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9194 {
9195 sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
9196 GEN_INT (offset_base), NULL_RTX, 0,
9197 OPTAB_LIB_WIDEN);
9198
9199 return gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9200 }
9201 }
9202
9203 return x;
9204 }
9205
9206 /* Mark the use of a constant in the literal table. If the constant
9207 has multiple labels, make it unique. */
9208 static rtx
9209 mark_constant_pool_use (rtx x)
9210 {
9211 rtx insn, lab, pattern;
9212
9213 if (x == NULL)
9214 return x;
9215
9216 switch (GET_CODE (x))
9217 {
9218 case LABEL_REF:
9219 x = XEXP (x, 0);
9220 case CODE_LABEL:
9221 break;
9222 default:
9223 return x;
9224 }
9225
9226 /* Get the first label in the list of labels for the same constant
9227 and delete another labels in the list. */
9228 lab = x;
9229 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
9230 {
9231 if (GET_CODE (insn) != CODE_LABEL
9232 || LABEL_REFS (insn) != NEXT_INSN (insn))
9233 break;
9234 lab = insn;
9235 }
9236
9237 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
9238 INSN_DELETED_P (insn) = 1;
9239
9240 /* Mark constants in a window. */
9241 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
9242 {
9243 if (GET_CODE (insn) != INSN)
9244 continue;
9245
9246 pattern = PATTERN (insn);
9247 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
9248 continue;
9249
9250 switch (XINT (pattern, 1))
9251 {
9252 case UNSPECV_CONST2:
9253 case UNSPECV_CONST4:
9254 case UNSPECV_CONST8:
9255 XVECEXP (pattern, 0, 1) = const1_rtx;
9256 break;
9257 case UNSPECV_WINDOW_END:
9258 if (XVECEXP (pattern, 0, 0) == x)
9259 return lab;
9260 break;
9261 case UNSPECV_CONST_END:
9262 return lab;
9263 default:
9264 break;
9265 }
9266 }
9267
9268 return lab;
9269 }
9270 \f
9271 /* Return true if it's possible to redirect BRANCH1 to the destination
9272 of an unconditional jump BRANCH2. We only want to do this if the
9273 resulting branch will have a short displacement. */
9274 int
9275 sh_can_redirect_branch (rtx branch1, rtx branch2)
9276 {
9277 if (flag_expensive_optimizations && simplejump_p (branch2))
9278 {
9279 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
9280 rtx insn;
9281 int distance;
9282
9283 for (distance = 0, insn = NEXT_INSN (branch1);
9284 insn && distance < 256;
9285 insn = PREV_INSN (insn))
9286 {
9287 if (insn == dest)
9288 return 1;
9289 else
9290 distance += get_attr_length (insn);
9291 }
9292 for (distance = 0, insn = NEXT_INSN (branch1);
9293 insn && distance < 256;
9294 insn = NEXT_INSN (insn))
9295 {
9296 if (insn == dest)
9297 return 1;
9298 else
9299 distance += get_attr_length (insn);
9300 }
9301 }
9302 return 0;
9303 }
9304
9305 /* Return nonzero if register old_reg can be renamed to register new_reg. */
9306 int
9307 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
9308 unsigned int new_reg)
9309 {
9310 /* Interrupt functions can only use registers that have already been
9311 saved by the prologue, even if they would normally be
9312 call-clobbered. */
9313
9314 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
9315 return 0;
9316
9317 return 1;
9318 }
9319
9320 /* Function to update the integer COST
9321 based on the relationship between INSN that is dependent on
9322 DEP_INSN through the dependence LINK. The default is to make no
9323 adjustment to COST. This can be used for example to specify to
9324 the scheduler that an output- or anti-dependence does not incur
9325 the same cost as a data-dependence. The return value should be
9326 the new value for COST. */
9327 static int
9328 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
9329 {
9330 rtx reg, use_pat;
9331
9332 if (TARGET_SHMEDIA)
9333 {
9334 /* On SHmedia, if the dependence is an anti-dependence or
9335 output-dependence, there is no cost. */
9336 if (REG_NOTE_KIND (link) != 0)
9337 {
9338 /* However, dependencies between target register loads and
9339 uses of the register in a subsequent block that are separated
9340 by a conditional branch are not modelled - we have to do with
9341 the anti-dependency between the target register load and the
9342 conditional branch that ends the current block. */
9343 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
9344 && GET_CODE (PATTERN (dep_insn)) == SET
9345 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
9346 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
9347 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
9348 {
9349 int orig_cost = cost;
9350 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
9351 rtx target = ((! note
9352 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
9353 ? insn : JUMP_LABEL (insn));
9354 /* On the likely path, the branch costs 1, on the unlikely path,
9355 it costs 3. */
9356 cost--;
9357 do
9358 target = next_active_insn (target);
9359 while (target && ! flow_dependent_p (target, dep_insn)
9360 && --cost > 0);
9361 /* If two branches are executed in immediate succession, with the
9362 first branch properly predicted, this causes a stall at the
9363 second branch, hence we won't need the target for the
9364 second branch for two cycles after the launch of the first
9365 branch. */
9366 if (cost > orig_cost - 2)
9367 cost = orig_cost - 2;
9368 }
9369 else
9370 cost = 0;
9371 }
9372
9373 else if (get_attr_is_mac_media (insn)
9374 && get_attr_is_mac_media (dep_insn))
9375 cost = 1;
9376
9377 else if (! reload_completed
9378 && GET_CODE (PATTERN (insn)) == SET
9379 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
9380 && GET_CODE (PATTERN (dep_insn)) == SET
9381 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
9382 && cost < 4)
9383 cost = 4;
9384 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
9385 that is needed at the target. */
9386 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
9387 && ! flow_dependent_p (insn, dep_insn))
9388 cost--;
9389 }
9390 else if (REG_NOTE_KIND (link) == 0)
9391 {
9392 enum attr_type type;
9393 rtx dep_set;
9394
9395 if (recog_memoized (insn) < 0
9396 || recog_memoized (dep_insn) < 0)
9397 return cost;
9398
9399 dep_set = single_set (dep_insn);
9400
9401 /* The latency that we specify in the scheduling description refers
9402 to the actual output, not to an auto-increment register; for that,
9403 the latency is one. */
9404 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
9405 {
9406 rtx set = single_set (insn);
9407
9408 if (set
9409 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
9410 && (!MEM_P (SET_DEST (set))
9411 || !reg_mentioned_p (SET_DEST (dep_set),
9412 XEXP (SET_DEST (set), 0))))
9413 cost = 1;
9414 }
9415 /* The only input for a call that is timing-critical is the
9416 function's address. */
9417 if (GET_CODE (insn) == CALL_INSN)
9418 {
9419 rtx call = PATTERN (insn);
9420
9421 if (GET_CODE (call) == PARALLEL)
9422 call = XVECEXP (call, 0 ,0);
9423 if (GET_CODE (call) == SET)
9424 call = SET_SRC (call);
9425 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
9426 /* sibcalli_thunk uses a symbol_ref in an unspec. */
9427 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
9428 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
9429 cost -= TARGET_SH4_300 ? 3 : 6;
9430 }
9431 /* Likewise, the most timing critical input for an sfuncs call
9432 is the function address. However, sfuncs typically start
9433 using their arguments pretty quickly.
9434 Assume a four cycle delay for SH4 before they are needed.
9435 Cached ST40-300 calls are quicker, so assume only a one
9436 cycle delay there.
9437 ??? Maybe we should encode the delays till input registers
9438 are needed by sfuncs into the sfunc call insn. */
9439 /* All sfunc calls are parallels with at least four components.
9440 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
9441 else if (GET_CODE (PATTERN (insn)) == PARALLEL
9442 && XVECLEN (PATTERN (insn), 0) >= 4
9443 && (reg = sfunc_uses_reg (insn)))
9444 {
9445 if (! reg_set_p (reg, dep_insn))
9446 cost -= TARGET_SH4_300 ? 1 : 4;
9447 }
9448 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
9449 {
9450 enum attr_type dep_type = get_attr_type (dep_insn);
9451
9452 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
9453 cost--;
9454 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
9455 && (type = get_attr_type (insn)) != TYPE_CALL
9456 && type != TYPE_SFUNC)
9457 cost--;
9458 /* When the preceding instruction loads the shift amount of
9459 the following SHAD/SHLD, the latency of the load is increased
9460 by 1 cycle. */
9461 if (get_attr_type (insn) == TYPE_DYN_SHIFT
9462 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
9463 && reg_overlap_mentioned_p (SET_DEST (dep_set),
9464 XEXP (SET_SRC (single_set (insn)),
9465 1)))
9466 cost++;
9467 /* When an LS group instruction with a latency of less than
9468 3 cycles is followed by a double-precision floating-point
9469 instruction, FIPR, or FTRV, the latency of the first
9470 instruction is increased to 3 cycles. */
9471 else if (cost < 3
9472 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
9473 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
9474 cost = 3;
9475 /* The lsw register of a double-precision computation is ready one
9476 cycle earlier. */
9477 else if (reload_completed
9478 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
9479 && (use_pat = single_set (insn))
9480 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
9481 SET_SRC (use_pat)))
9482 cost -= 1;
9483
9484 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
9485 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
9486 cost -= 1;
9487 }
9488 else if (TARGET_SH4_300)
9489 {
9490 /* Stores need their input register two cycles later. */
9491 if (dep_set && cost >= 1
9492 && ((type = get_attr_type (insn)) == TYPE_STORE
9493 || type == TYPE_PSTORE
9494 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
9495 {
9496 rtx set = single_set (insn);
9497
9498 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
9499 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
9500 {
9501 cost -= 2;
9502 /* But don't reduce the cost below 1 if the address depends
9503 on a side effect of dep_insn. */
9504 if (cost < 1
9505 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
9506 cost = 1;
9507 }
9508 }
9509 }
9510 }
9511 /* An anti-dependence penalty of two applies if the first insn is a double
9512 precision fadd / fsub / fmul. */
9513 else if (!TARGET_SH4_300
9514 && REG_NOTE_KIND (link) == REG_DEP_ANTI
9515 && recog_memoized (dep_insn) >= 0
9516 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
9517 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
9518 /* A lot of alleged anti-flow dependences are fake,
9519 so check this one is real. */
9520 && flow_dependent_p (dep_insn, insn))
9521 cost = 2;
9522
9523 return cost;
9524 }
9525
9526 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
9527 if DEP_INSN is anti-flow dependent on INSN. */
9528 static int
9529 flow_dependent_p (rtx insn, rtx dep_insn)
9530 {
9531 rtx tmp = PATTERN (insn);
9532
9533 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
9534 return tmp == NULL_RTX;
9535 }
9536
9537 /* A helper function for flow_dependent_p called through note_stores. */
9538 static void
9539 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
9540 {
9541 rtx * pinsn = (rtx *) data;
9542
9543 if (*pinsn && reg_referenced_p (x, *pinsn))
9544 *pinsn = NULL_RTX;
9545 }
9546
9547 /* For use by sh_allocate_initial_value. Note that sh.md contains some
9548 'special function' patterns (type sfunc) that clobber pr, but that
9549 do not look like function calls to leaf_function_p. Hence we must
9550 do this extra check. */
9551 static int
9552 sh_pr_n_sets (void)
9553 {
9554 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9555 }
9556
9557 /* Return where to allocate pseudo for a given hard register initial
9558 value. */
9559 static rtx
9560 sh_allocate_initial_value (rtx hard_reg)
9561 {
9562 rtx x;
9563
9564 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
9565 {
9566 if (current_function_is_leaf
9567 && ! sh_pr_n_sets ()
9568 && ! (TARGET_SHCOMPACT
9569 && ((crtl->args.info.call_cookie
9570 & ~ CALL_COOKIE_RET_TRAMP (1))
9571 || crtl->saves_all_registers)))
9572 x = hard_reg;
9573 else
9574 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
9575 }
9576 else
9577 x = NULL_RTX;
9578
9579 return x;
9580 }
9581
9582 /* This function returns "2" to indicate dual issue for the SH4
9583 processor. To be used by the DFA pipeline description. */
9584 static int
9585 sh_issue_rate (void)
9586 {
9587 if (TARGET_SUPERSCALAR)
9588 return 2;
9589 else
9590 return 1;
9591 }
9592
9593 /* Functions for ready queue reordering for sched1. */
9594
9595 /* Get weight for mode for a set x. */
9596 static short
9597 find_set_regmode_weight (rtx x, enum machine_mode mode)
9598 {
9599 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
9600 return 1;
9601 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
9602 {
9603 if (GET_CODE (SET_DEST (x)) == REG)
9604 {
9605 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
9606 return 1;
9607 else
9608 return 0;
9609 }
9610 return 1;
9611 }
9612 return 0;
9613 }
9614
9615 /* Get regmode weight for insn. */
9616 static short
9617 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
9618 {
9619 short reg_weight = 0;
9620 rtx x;
9621
9622 /* Increment weight for each register born here. */
9623 x = PATTERN (insn);
9624 reg_weight += find_set_regmode_weight (x, mode);
9625 if (GET_CODE (x) == PARALLEL)
9626 {
9627 int j;
9628 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
9629 {
9630 x = XVECEXP (PATTERN (insn), 0, j);
9631 reg_weight += find_set_regmode_weight (x, mode);
9632 }
9633 }
9634 /* Decrement weight for each register that dies here. */
9635 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
9636 {
9637 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
9638 {
9639 rtx note = XEXP (x, 0);
9640 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
9641 reg_weight--;
9642 }
9643 }
9644 return reg_weight;
9645 }
9646
9647 /* Calculate regmode weights for all insns of a basic block. */
9648 static void
9649 find_regmode_weight (basic_block b, enum machine_mode mode)
9650 {
9651 rtx insn, next_tail, head, tail;
9652
9653 get_ebb_head_tail (b, b, &head, &tail);
9654 next_tail = NEXT_INSN (tail);
9655
9656 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
9657 {
9658 /* Handle register life information. */
9659 if (!INSN_P (insn))
9660 continue;
9661
9662 if (mode == SFmode)
9663 INSN_REGMODE_WEIGHT (insn, mode) =
9664 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
9665 else if (mode == SImode)
9666 INSN_REGMODE_WEIGHT (insn, mode) =
9667 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
9668 }
9669 }
9670
9671 /* Comparison function for ready queue sorting. */
9672 static int
9673 rank_for_reorder (const void *x, const void *y)
9674 {
9675 rtx tmp = *(const rtx *) y;
9676 rtx tmp2 = *(const rtx *) x;
9677
9678 /* The insn in a schedule group should be issued the first. */
9679 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
9680 return SCHED_GROUP_P (tmp2) ? 1 : -1;
9681
9682 /* If insns are equally good, sort by INSN_LUID (original insn order), This
9683 minimizes instruction movement, thus minimizing sched's effect on
9684 register pressure. */
9685 return INSN_LUID (tmp) - INSN_LUID (tmp2);
9686 }
9687
9688 /* Resort the array A in which only element at index N may be out of order. */
9689 static void
9690 swap_reorder (rtx *a, int n)
9691 {
9692 rtx insn = a[n - 1];
9693 int i = n - 2;
9694
9695 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
9696 {
9697 a[i + 1] = a[i];
9698 i -= 1;
9699 }
9700 a[i + 1] = insn;
9701 }
9702
9703 #define SCHED_REORDER(READY, N_READY) \
9704 do \
9705 { \
9706 if ((N_READY) == 2) \
9707 swap_reorder (READY, N_READY); \
9708 else if ((N_READY) > 2) \
9709 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
9710 } \
9711 while (0)
9712
9713 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
9714 macro. */
9715 static void
9716 ready_reorder (rtx *ready, int nready)
9717 {
9718 SCHED_REORDER (ready, nready);
9719 }
9720
9721 /* Count life regions of r0 for a block. */
9722 static int
9723 find_r0_life_regions (basic_block b)
9724 {
9725 rtx end, insn;
9726 rtx pset;
9727 rtx r0_reg;
9728 int live;
9729 int set;
9730 int death = 0;
9731
9732 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
9733 {
9734 set = 1;
9735 live = 1;
9736 }
9737 else
9738 {
9739 set = 0;
9740 live = 0;
9741 }
9742
9743 insn = BB_HEAD (b);
9744 end = BB_END (b);
9745 r0_reg = gen_rtx_REG (SImode, R0_REG);
9746 while (1)
9747 {
9748 if (INSN_P (insn))
9749 {
9750 if (find_regno_note (insn, REG_DEAD, R0_REG))
9751 {
9752 death++;
9753 live = 0;
9754 }
9755 if (!live
9756 && (pset = single_set (insn))
9757 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
9758 && !find_regno_note (insn, REG_UNUSED, R0_REG))
9759 {
9760 set++;
9761 live = 1;
9762 }
9763 }
9764 if (insn == end)
9765 break;
9766 insn = NEXT_INSN (insn);
9767 }
9768 return set - death;
9769 }
9770
9771 /* Calculate regmode weights for all insns of all basic block. */
9772 static void
9773 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
9774 int verbose ATTRIBUTE_UNUSED,
9775 int old_max_uid)
9776 {
9777 basic_block b;
9778
9779 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
9780 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
9781 r0_life_regions = 0;
9782
9783 FOR_EACH_BB_REVERSE (b)
9784 {
9785 find_regmode_weight (b, SImode);
9786 find_regmode_weight (b, SFmode);
9787 if (!reload_completed)
9788 r0_life_regions += find_r0_life_regions (b);
9789 }
9790
9791 CURR_REGMODE_PRESSURE (SImode) = 0;
9792 CURR_REGMODE_PRESSURE (SFmode) = 0;
9793
9794 }
9795
9796 /* Cleanup. */
9797 static void
9798 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
9799 int verbose ATTRIBUTE_UNUSED)
9800 {
9801 if (regmode_weight[0])
9802 {
9803 free (regmode_weight[0]);
9804 regmode_weight[0] = NULL;
9805 }
9806 if (regmode_weight[1])
9807 {
9808 free (regmode_weight[1]);
9809 regmode_weight[1] = NULL;
9810 }
9811 }
9812
9813 /* The scalar modes supported differs from the default version in TImode
9814 for 32-bit SHMEDIA. */
9815 static bool
9816 sh_scalar_mode_supported_p (enum machine_mode mode)
9817 {
9818 if (TARGET_SHMEDIA32 && mode == TImode)
9819 return false;
9820
9821 return default_scalar_mode_supported_p (mode);
9822 }
9823
9824 /* Cache the can_issue_more so that we can return it from reorder2. Also,
9825 keep count of register pressures on SImode and SFmode. */
9826 static int
9827 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
9828 int sched_verbose ATTRIBUTE_UNUSED,
9829 rtx insn,
9830 int can_issue_more)
9831 {
9832 if (GET_CODE (PATTERN (insn)) != USE
9833 && GET_CODE (PATTERN (insn)) != CLOBBER)
9834 cached_can_issue_more = can_issue_more - 1;
9835 else
9836 cached_can_issue_more = can_issue_more;
9837
9838 if (reload_completed)
9839 return cached_can_issue_more;
9840
9841 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
9842 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
9843
9844 return cached_can_issue_more;
9845 }
9846
9847 static void
9848 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
9849 int verbose ATTRIBUTE_UNUSED,
9850 int veclen ATTRIBUTE_UNUSED)
9851 {
9852 CURR_REGMODE_PRESSURE (SImode) = 0;
9853 CURR_REGMODE_PRESSURE (SFmode) = 0;
9854 }
9855
9856 /* Some magic numbers. */
9857 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9858 functions that already have high pressure on r0. */
9859 #define R0_MAX_LIFE_REGIONS 2
9860 /* Register Pressure thresholds for SImode and SFmode registers. */
9861 #define SIMODE_MAX_WEIGHT 5
9862 #define SFMODE_MAX_WEIGHT 10
9863
9864 /* Return true if the pressure is high for MODE. */
9865 static short
9866 high_pressure (enum machine_mode mode)
9867 {
9868 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9869 functions that already have high pressure on r0. */
9870 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
9871 return 1;
9872
9873 if (mode == SFmode)
9874 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
9875 else
9876 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
9877 }
9878
9879 /* Reorder ready queue if register pressure is high. */
9880 static int
9881 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
9882 int sched_verbose ATTRIBUTE_UNUSED,
9883 rtx *ready,
9884 int *n_readyp,
9885 int clock_var ATTRIBUTE_UNUSED)
9886 {
9887 if (reload_completed)
9888 return sh_issue_rate ();
9889
9890 if (high_pressure (SFmode) || high_pressure (SImode))
9891 {
9892 ready_reorder (ready, *n_readyp);
9893 }
9894
9895 return sh_issue_rate ();
9896 }
9897
9898 /* Skip cycles if the current register pressure is high. */
9899 static int
9900 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
9901 int sched_verbose ATTRIBUTE_UNUSED,
9902 rtx *ready ATTRIBUTE_UNUSED,
9903 int *n_readyp ATTRIBUTE_UNUSED,
9904 int clock_var ATTRIBUTE_UNUSED)
9905 {
9906 if (reload_completed)
9907 return cached_can_issue_more;
9908
9909 if (high_pressure(SFmode) || high_pressure (SImode))
9910 skip_cycles = 1;
9911
9912 return cached_can_issue_more;
9913 }
9914
9915 /* Skip cycles without sorting the ready queue. This will move insn from
9916 Q->R. If this is the last cycle we are skipping; allow sorting of ready
9917 queue by sh_reorder. */
9918
9919 /* Generally, skipping these many cycles are sufficient for all insns to move
9920 from Q -> R. */
9921 #define MAX_SKIPS 8
9922
9923 static int
9924 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
9925 int sched_verbose ATTRIBUTE_UNUSED,
9926 rtx insn ATTRIBUTE_UNUSED,
9927 int last_clock_var,
9928 int clock_var,
9929 int *sort_p)
9930 {
9931 if (reload_completed)
9932 return 0;
9933
9934 if (skip_cycles)
9935 {
9936 if ((clock_var - last_clock_var) < MAX_SKIPS)
9937 {
9938 *sort_p = 0;
9939 return 1;
9940 }
9941 /* If this is the last cycle we are skipping, allow reordering of R. */
9942 if ((clock_var - last_clock_var) == MAX_SKIPS)
9943 {
9944 *sort_p = 1;
9945 return 1;
9946 }
9947 }
9948
9949 skip_cycles = 0;
9950
9951 return 0;
9952 }
9953
9954 /* SHmedia requires registers for branches, so we can't generate new
9955 branches past reload. */
9956 static bool
9957 sh_cannot_modify_jumps_p (void)
9958 {
9959 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
9960 }
9961
9962 static enum reg_class
9963 sh_target_reg_class (void)
9964 {
9965 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
9966 }
9967
9968 static bool
9969 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
9970 {
9971 HARD_REG_SET dummy;
9972 #if 0
9973 rtx insn;
9974 #endif
9975
9976 if (! shmedia_space_reserved_for_target_registers)
9977 return 0;
9978 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
9979 return 0;
9980 if (calc_live_regs (&dummy) >= 6 * 8)
9981 return 1;
9982 return 0;
9983 }
9984
9985 static bool
9986 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
9987 {
9988 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
9989 }
9990 \f
9991 /*
9992 On the SH1..SH4, the trampoline looks like
9993 2 0002 D202 mov.l l2,r2
9994 1 0000 D301 mov.l l1,r3
9995 3 0004 422B jmp @r2
9996 4 0006 0009 nop
9997 5 0008 00000000 l1: .long area
9998 6 000c 00000000 l2: .long function
9999
10000 SH5 (compact) uses r1 instead of r3 for the static chain. */
10001
10002
10003 /* Emit RTL insns to initialize the variable parts of a trampoline.
10004 FNADDR is an RTX for the address of the function's pure code.
10005 CXT is an RTX for the static chain value for the function. */
10006
10007 void
10008 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
10009 {
10010 rtx tramp_mem = gen_frame_mem (BLKmode, tramp);
10011
10012 if (TARGET_SHMEDIA64)
10013 {
10014 rtx tramp_templ;
10015 int fixed_len;
10016
10017 rtx movi1 = GEN_INT (0xcc000010);
10018 rtx shori1 = GEN_INT (0xc8000010);
10019 rtx src, dst;
10020
10021 /* The following trampoline works within a +- 128 KB range for cxt:
10022 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
10023 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
10024 gettr tr1,r1; blink tr0,r63 */
10025 /* Address rounding makes it hard to compute the exact bounds of the
10026 offset for this trampoline, but we have a rather generous offset
10027 range, so frame_offset should do fine as an upper bound. */
10028 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
10029 {
10030 /* ??? could optimize this trampoline initialization
10031 by writing DImode words with two insns each. */
10032 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
10033 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
10034 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
10035 insn = gen_rtx_AND (DImode, insn, mask);
10036 /* Or in ptb/u .,tr1 pattern */
10037 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
10038 insn = force_operand (insn, NULL_RTX);
10039 insn = gen_lowpart (SImode, insn);
10040 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
10041 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
10042 insn = gen_rtx_AND (DImode, insn, mask);
10043 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
10044 insn = gen_lowpart (SImode, insn);
10045 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
10046 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
10047 insn = gen_rtx_AND (DImode, insn, mask);
10048 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10049 insn = gen_lowpart (SImode, insn);
10050 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
10051 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
10052 insn = gen_rtx_AND (DImode, insn, mask);
10053 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10054 insn = gen_lowpart (SImode, insn);
10055 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
10056 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
10057 insn = gen_rtx_AND (DImode, insn, mask);
10058 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10059 insn = gen_lowpart (SImode, insn);
10060 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
10061 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
10062 GEN_INT (0x6bf10600));
10063 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
10064 GEN_INT (0x4415fc10));
10065 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
10066 GEN_INT (0x4401fff0));
10067 emit_insn (gen_ic_invalidate_line (tramp));
10068 return;
10069 }
10070 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
10071 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
10072
10073 tramp_templ = gen_datalabel_ref (tramp_templ);
10074 dst = tramp_mem;
10075 src = gen_const_mem (BLKmode, tramp_templ);
10076 set_mem_align (dst, 256);
10077 set_mem_align (src, 64);
10078 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
10079
10080 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
10081 emit_move_insn (adjust_address (tramp_mem, Pmode,
10082 fixed_len + GET_MODE_SIZE (Pmode)),
10083 cxt);
10084 emit_insn (gen_ic_invalidate_line (tramp));
10085 return;
10086 }
10087 else if (TARGET_SHMEDIA)
10088 {
10089 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
10090 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
10091 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
10092 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
10093 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
10094 rotated 10 right, and higher 16 bit of every 32 selected. */
10095 rtx movishori
10096 = force_reg (V2HImode, (simplify_gen_subreg
10097 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
10098 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
10099 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
10100
10101 tramp = force_reg (Pmode, tramp);
10102 fnaddr = force_reg (SImode, fnaddr);
10103 cxt = force_reg (SImode, cxt);
10104 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
10105 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
10106 movishori));
10107 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
10108 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10109 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
10110 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
10111 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
10112 gen_rtx_SUBREG (V2HImode, cxt, 0),
10113 movishori));
10114 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
10115 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10116 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
10117 if (TARGET_LITTLE_ENDIAN)
10118 {
10119 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
10120 emit_insn (gen_mextr4 (quad2, cxtload, blink));
10121 }
10122 else
10123 {
10124 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
10125 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
10126 }
10127 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
10128 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
10129 emit_insn (gen_ic_invalidate_line (tramp));
10130 return;
10131 }
10132 else if (TARGET_SHCOMPACT)
10133 {
10134 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
10135 return;
10136 }
10137 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
10138 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
10139 SImode));
10140 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
10141 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
10142 SImode));
10143 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
10144 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
10145 if (TARGET_HARVARD)
10146 {
10147 if (!TARGET_INLINE_IC_INVALIDATE
10148 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
10149 emit_library_call (function_symbol (NULL, "__ic_invalidate",
10150 FUNCTION_ORDINARY),
10151 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
10152 else
10153 emit_insn (gen_ic_invalidate_line (tramp));
10154 }
10155 }
10156
10157 /* FIXME: This is overly conservative. A SHcompact function that
10158 receives arguments ``by reference'' will have them stored in its
10159 own stack frame, so it must not pass pointers or references to
10160 these arguments to other functions by means of sibling calls. */
10161 /* If PIC, we cannot make sibling calls to global functions
10162 because the PLT requires r12 to be live. */
10163 static bool
10164 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10165 {
10166 return (1
10167 && (! TARGET_SHCOMPACT
10168 || crtl->args.info.stack_regs == 0)
10169 && ! sh_cfun_interrupt_handler_p ()
10170 && (! flag_pic
10171 || (decl && ! TREE_PUBLIC (decl))
10172 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
10173 }
10174 \f
10175 /* Machine specific built-in functions. */
10176
10177 struct builtin_description
10178 {
10179 const enum insn_code icode;
10180 const char *const name;
10181 int signature;
10182 };
10183
10184 /* describe number and signedness of arguments; arg[0] == result
10185 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
10186 /* 9: 64-bit pointer, 10: 32-bit pointer */
10187 static const char signature_args[][4] =
10188 {
10189 #define SH_BLTIN_V2SI2 0
10190 { 4, 4 },
10191 #define SH_BLTIN_V4HI2 1
10192 { 4, 4 },
10193 #define SH_BLTIN_V2SI3 2
10194 { 4, 4, 4 },
10195 #define SH_BLTIN_V4HI3 3
10196 { 4, 4, 4 },
10197 #define SH_BLTIN_V8QI3 4
10198 { 4, 4, 4 },
10199 #define SH_BLTIN_MAC_HISI 5
10200 { 1, 4, 4, 1 },
10201 #define SH_BLTIN_SH_HI 6
10202 { 4, 4, 1 },
10203 #define SH_BLTIN_SH_SI 7
10204 { 4, 4, 1 },
10205 #define SH_BLTIN_V4HI2V2SI 8
10206 { 4, 4, 4 },
10207 #define SH_BLTIN_V4HI2V8QI 9
10208 { 4, 4, 4 },
10209 #define SH_BLTIN_SISF 10
10210 { 4, 2 },
10211 #define SH_BLTIN_LDUA_L 11
10212 { 2, 10 },
10213 #define SH_BLTIN_LDUA_Q 12
10214 { 1, 10 },
10215 #define SH_BLTIN_STUA_L 13
10216 { 0, 10, 2 },
10217 #define SH_BLTIN_STUA_Q 14
10218 { 0, 10, 1 },
10219 #define SH_BLTIN_LDUA_L64 15
10220 { 2, 9 },
10221 #define SH_BLTIN_LDUA_Q64 16
10222 { 1, 9 },
10223 #define SH_BLTIN_STUA_L64 17
10224 { 0, 9, 2 },
10225 #define SH_BLTIN_STUA_Q64 18
10226 { 0, 9, 1 },
10227 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
10228 #define SH_BLTIN_2 19
10229 #define SH_BLTIN_SU 19
10230 { 1, 2 },
10231 #define SH_BLTIN_3 20
10232 #define SH_BLTIN_SUS 20
10233 { 2, 2, 1 },
10234 #define SH_BLTIN_PSSV 21
10235 { 0, 8, 2, 2 },
10236 #define SH_BLTIN_XXUU 22
10237 #define SH_BLTIN_UUUU 22
10238 { 1, 1, 1, 1 },
10239 #define SH_BLTIN_PV 23
10240 { 0, 8 },
10241 };
10242 /* mcmv: operands considered unsigned. */
10243 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
10244 /* mperm: control value considered unsigned int. */
10245 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
10246 /* mshards_q: returns signed short. */
10247 /* nsb: takes long long arg, returns unsigned char. */
10248 static const struct builtin_description bdesc[] =
10249 {
10250 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
10251 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
10252 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
10253 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
10254 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
10255 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
10256 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
10257 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
10258 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
10259 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
10260 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
10261 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
10262 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
10263 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
10264 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
10265 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
10266 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
10267 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
10268 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3 },
10269 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3 },
10270 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3 },
10271 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3 },
10272 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3 },
10273 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3 },
10274 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3 },
10275 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
10276 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
10277 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
10278 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
10279 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
10280 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
10281 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
10282 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
10283 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
10284 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
10285 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
10286 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
10287 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
10288 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
10289 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
10290 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
10291 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
10292 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
10293 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
10294 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
10295 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
10296 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
10297 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
10298 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
10299 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
10300 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
10301 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
10302 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
10303 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
10304 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
10305 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
10306 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
10307 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
10308 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
10309 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
10310 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
10311 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3 },
10312 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2 },
10313 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2 },
10314 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
10315 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
10316 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
10317 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
10318 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
10319 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
10320 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
10321 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
10322 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
10323 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64 },
10324 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64 },
10325 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64 },
10326 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64 },
10327 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64 },
10328 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64 },
10329 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64 },
10330 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64 },
10331 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
10332 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
10333 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
10334 };
10335
10336 static void
10337 sh_media_init_builtins (void)
10338 {
10339 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
10340 const struct builtin_description *d;
10341
10342 memset (shared, 0, sizeof shared);
10343 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
10344 {
10345 tree type, arg_type = 0;
10346 int signature = d->signature;
10347 int i;
10348
10349 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
10350 type = shared[signature];
10351 else
10352 {
10353 int has_result = signature_args[signature][0] != 0;
10354
10355 if ((signature_args[signature][1] & 8)
10356 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
10357 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
10358 continue;
10359 if (! TARGET_FPU_ANY
10360 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
10361 continue;
10362 type = void_list_node;
10363 for (i = 3; ; i--)
10364 {
10365 int arg = signature_args[signature][i];
10366 int opno = i - 1 + has_result;
10367
10368 if (arg & 8)
10369 arg_type = ptr_type_node;
10370 else if (arg)
10371 arg_type = (*lang_hooks.types.type_for_mode)
10372 (insn_data[d->icode].operand[opno].mode,
10373 (arg & 1));
10374 else if (i)
10375 continue;
10376 else
10377 arg_type = void_type_node;
10378 if (i == 0)
10379 break;
10380 type = tree_cons (NULL_TREE, arg_type, type);
10381 }
10382 type = build_function_type (arg_type, type);
10383 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
10384 shared[signature] = type;
10385 }
10386 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
10387 NULL, NULL_TREE);
10388 }
10389 }
10390
10391 /* Implements target hook vector_mode_supported_p. */
10392 bool
10393 sh_vector_mode_supported_p (enum machine_mode mode)
10394 {
10395 if (TARGET_FPU_ANY
10396 && ((mode == V2SFmode)
10397 || (mode == V4SFmode)
10398 || (mode == V16SFmode)))
10399 return true;
10400
10401 else if (TARGET_SHMEDIA
10402 && ((mode == V8QImode)
10403 || (mode == V2HImode)
10404 || (mode == V4HImode)
10405 || (mode == V2SImode)))
10406 return true;
10407
10408 return false;
10409 }
10410
10411 /* Implements target hook dwarf_calling_convention. Return an enum
10412 of dwarf_calling_convention. */
10413 int
10414 sh_dwarf_calling_convention (const_tree func)
10415 {
10416 if (sh_attr_renesas_p (func))
10417 return DW_CC_GNU_renesas_sh;
10418
10419 return DW_CC_normal;
10420 }
10421
10422 static void
10423 sh_init_builtins (void)
10424 {
10425 if (TARGET_SHMEDIA)
10426 sh_media_init_builtins ();
10427 }
10428
10429 /* Expand an expression EXP that calls a built-in function,
10430 with result going to TARGET if that's convenient
10431 (and in mode MODE if that's convenient).
10432 SUBTARGET may be used as the target for computing one of EXP's operands.
10433 IGNORE is nonzero if the value is to be ignored. */
10434
10435 static rtx
10436 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10437 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
10438 {
10439 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10440 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10441 const struct builtin_description *d = &bdesc[fcode];
10442 enum insn_code icode = d->icode;
10443 int signature = d->signature;
10444 enum machine_mode tmode = VOIDmode;
10445 int nop = 0, i;
10446 rtx op[4];
10447 rtx pat = 0;
10448
10449 if (signature_args[signature][0])
10450 {
10451 if (ignore)
10452 return 0;
10453
10454 tmode = insn_data[icode].operand[0].mode;
10455 if (! target
10456 || GET_MODE (target) != tmode
10457 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10458 target = gen_reg_rtx (tmode);
10459 op[nop++] = target;
10460 }
10461 else
10462 target = 0;
10463
10464 for (i = 1; i <= 3; i++, nop++)
10465 {
10466 tree arg;
10467 enum machine_mode opmode, argmode;
10468 tree optype;
10469
10470 if (! signature_args[signature][i])
10471 break;
10472 arg = CALL_EXPR_ARG (exp, i - 1);
10473 if (arg == error_mark_node)
10474 return const0_rtx;
10475 if (signature_args[signature][i] & 8)
10476 {
10477 opmode = ptr_mode;
10478 optype = ptr_type_node;
10479 }
10480 else
10481 {
10482 opmode = insn_data[icode].operand[nop].mode;
10483 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
10484 }
10485 argmode = TYPE_MODE (TREE_TYPE (arg));
10486 if (argmode != opmode)
10487 arg = build1 (NOP_EXPR, optype, arg);
10488 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
10489 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
10490 op[nop] = copy_to_mode_reg (opmode, op[nop]);
10491 }
10492
10493 switch (nop)
10494 {
10495 case 1:
10496 pat = (*insn_data[d->icode].genfun) (op[0]);
10497 break;
10498 case 2:
10499 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
10500 break;
10501 case 3:
10502 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
10503 break;
10504 case 4:
10505 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
10506 break;
10507 default:
10508 gcc_unreachable ();
10509 }
10510 if (! pat)
10511 return 0;
10512 emit_insn (pat);
10513 return target;
10514 }
10515
10516 void
10517 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
10518 {
10519 rtx sel0 = const0_rtx;
10520 rtx sel1 = const1_rtx;
10521 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
10522 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
10523
10524 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
10525 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
10526 }
10527
10528 void
10529 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
10530 {
10531 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
10532
10533 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
10534 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
10535 }
10536
10537 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
10538 We can allow any mode in any general register. The special registers
10539 only allow SImode. Don't allow any mode in the PR.
10540
10541 We cannot hold DCmode values in the XD registers because alter_reg
10542 handles subregs of them incorrectly. We could work around this by
10543 spacing the XD registers like the DR registers, but this would require
10544 additional memory in every compilation to hold larger register vectors.
10545 We could hold SFmode / SCmode values in XD registers, but that
10546 would require a tertiary reload when reloading from / to memory,
10547 and a secondary reload to reload from / to general regs; that
10548 seems to be a loosing proposition.
10549
10550 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
10551 it won't be ferried through GP registers first. */
10552
10553 bool
10554 sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
10555 {
10556 if (SPECIAL_REGISTER_P (regno))
10557 return mode == SImode;
10558
10559 if (regno == FPUL_REG)
10560 return (mode == SImode || mode == SFmode);
10561
10562 if (FP_REGISTER_P (regno) && mode == SFmode)
10563 return true;
10564
10565 if (mode == V2SFmode)
10566 {
10567 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
10568 || GENERAL_REGISTER_P (regno)))
10569 return true;
10570 else
10571 return false;
10572 }
10573
10574 if (mode == V4SFmode)
10575 {
10576 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
10577 || GENERAL_REGISTER_P (regno))
10578 return true;
10579 else
10580 return false;
10581 }
10582
10583 if (mode == V16SFmode)
10584 {
10585 if (TARGET_SHMEDIA)
10586 {
10587 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
10588 return true;
10589 else
10590 return false;
10591 }
10592 else
10593 return regno == FIRST_XD_REG;
10594 }
10595
10596 if (FP_REGISTER_P (regno))
10597 {
10598 if (mode == SFmode
10599 || mode == SImode
10600 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
10601 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
10602 || mode == DCmode
10603 || (TARGET_SHMEDIA
10604 && (mode == DFmode || mode == DImode
10605 || mode == V2SFmode || mode == TImode)))
10606 && ((regno - FIRST_FP_REG) & 1) == 0)
10607 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
10608 && ((regno - FIRST_FP_REG) & 3) == 0))
10609 return true;
10610 else
10611 return false;
10612 }
10613
10614 if (XD_REGISTER_P (regno))
10615 return mode == DFmode;
10616
10617 if (TARGET_REGISTER_P (regno))
10618 return (mode == DImode || mode == SImode || mode == PDImode);
10619
10620 if (regno == PR_REG)
10621 return mode == SImode;
10622
10623 if (regno == FPSCR_REG)
10624 return mode == PSImode;
10625
10626 /* FIXME. This works around PR target/37633 for -O0. */
10627 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
10628 {
10629 unsigned int n = GET_MODE_SIZE (mode) / 8;
10630
10631 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
10632 && regno <= FIRST_GENERAL_REG + 14)
10633 return false;
10634 }
10635
10636 return true;
10637 }
10638
10639 /* Return the class of registers for which a mode change from FROM to TO
10640 is invalid. */
10641 bool
10642 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
10643 enum reg_class rclass)
10644 {
10645 /* We want to enable the use of SUBREGs as a means to
10646 VEC_SELECT a single element of a vector. */
10647 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
10648 return (reg_classes_intersect_p (GENERAL_REGS, rclass));
10649
10650 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
10651 {
10652 if (TARGET_LITTLE_ENDIAN)
10653 {
10654 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
10655 return reg_classes_intersect_p (DF_REGS, rclass);
10656 }
10657 else
10658 {
10659 if (GET_MODE_SIZE (from) < 8)
10660 return reg_classes_intersect_p (DF_HI_REGS, rclass);
10661 }
10662 }
10663 return 0;
10664 }
10665
10666
10667 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
10668 that label is used. */
10669
10670 void
10671 sh_mark_label (rtx address, int nuses)
10672 {
10673 if (GOTOFF_P (address))
10674 {
10675 /* Extract the label or symbol. */
10676 address = XEXP (address, 0);
10677 if (GET_CODE (address) == PLUS)
10678 address = XEXP (address, 0);
10679 address = XVECEXP (address, 0, 0);
10680 }
10681 if (GET_CODE (address) == LABEL_REF
10682 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
10683 LABEL_NUSES (XEXP (address, 0)) += nuses;
10684 }
10685
10686 /* Compute extra cost of moving data between one register class
10687 and another. */
10688
10689 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
10690 uses this information. Hence, the general register <-> floating point
10691 register information here is not used for SFmode. */
10692
10693 int
10694 sh_register_move_cost (enum machine_mode mode,
10695 enum reg_class srcclass, enum reg_class dstclass)
10696 {
10697 if (dstclass == T_REGS || dstclass == PR_REGS)
10698 return 10;
10699
10700 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
10701 return 4;
10702
10703 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
10704 && REGCLASS_HAS_FP_REG (srcclass)
10705 && REGCLASS_HAS_FP_REG (dstclass))
10706 return 4;
10707
10708 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
10709 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
10710
10711 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
10712 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
10713 return 9;
10714
10715 if ((REGCLASS_HAS_FP_REG (dstclass)
10716 && REGCLASS_HAS_GENERAL_REG (srcclass))
10717 || (REGCLASS_HAS_GENERAL_REG (dstclass)
10718 && REGCLASS_HAS_FP_REG (srcclass)))
10719 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
10720 * ((GET_MODE_SIZE (mode) + 7) / 8U));
10721
10722 if ((dstclass == FPUL_REGS
10723 && REGCLASS_HAS_GENERAL_REG (srcclass))
10724 || (srcclass == FPUL_REGS
10725 && REGCLASS_HAS_GENERAL_REG (dstclass)))
10726 return 5;
10727
10728 if ((dstclass == FPUL_REGS
10729 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
10730 || (srcclass == FPUL_REGS
10731 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
10732 return 7;
10733
10734 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10735 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10736 return 20;
10737
10738 /* ??? ptabs faults on (value & 0x3) == 0x3 */
10739 if (TARGET_SHMEDIA
10740 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
10741 {
10742 if (sh_gettrcost >= 0)
10743 return sh_gettrcost;
10744 else if (!TARGET_PT_FIXED)
10745 return 100;
10746 }
10747
10748 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10749 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10750 return 4;
10751
10752 if (TARGET_SHMEDIA
10753 || (TARGET_FMOVD
10754 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
10755 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
10756 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
10757
10758 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
10759 }
10760
10761 static rtx emit_load_ptr (rtx, rtx);
10762
10763 static rtx
10764 emit_load_ptr (rtx reg, rtx addr)
10765 {
10766 rtx mem = gen_const_mem (ptr_mode, addr);
10767
10768 if (Pmode != ptr_mode)
10769 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
10770 return emit_move_insn (reg, mem);
10771 }
10772
10773 static void
10774 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
10775 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10776 tree function)
10777 {
10778 CUMULATIVE_ARGS cum;
10779 int structure_value_byref = 0;
10780 rtx this_rtx, this_value, sibcall, insns, funexp;
10781 tree funtype = TREE_TYPE (function);
10782 int simple_add = CONST_OK_FOR_ADD (delta);
10783 int did_load = 0;
10784 rtx scratch0, scratch1, scratch2;
10785 unsigned i;
10786
10787 reload_completed = 1;
10788 epilogue_completed = 1;
10789 current_function_uses_only_leaf_regs = 1;
10790
10791 emit_note (NOTE_INSN_PROLOGUE_END);
10792
10793 /* Find the "this" pointer. We have such a wide range of ABIs for the
10794 SH that it's best to do this completely machine independently.
10795 "this" is passed as first argument, unless a structure return pointer
10796 comes first, in which case "this" comes second. */
10797 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
10798 #ifndef PCC_STATIC_STRUCT_RETURN
10799 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
10800 structure_value_byref = 1;
10801 #endif /* not PCC_STATIC_STRUCT_RETURN */
10802 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
10803 {
10804 tree ptype = build_pointer_type (TREE_TYPE (funtype));
10805
10806 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
10807 }
10808 this_rtx = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
10809
10810 /* For SHcompact, we only have r0 for a scratch register: r1 is the
10811 static chain pointer (even if you can't have nested virtual functions
10812 right now, someone might implement them sometime), and the rest of the
10813 registers are used for argument passing, are callee-saved, or reserved. */
10814 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
10815 -ffixed-reg has been used. */
10816 if (! call_used_regs[0] || fixed_regs[0])
10817 error ("r0 needs to be available as a call-clobbered register");
10818 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
10819 if (! TARGET_SH5)
10820 {
10821 if (call_used_regs[1] && ! fixed_regs[1])
10822 scratch1 = gen_rtx_REG (ptr_mode, 1);
10823 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
10824 pointing where to return struct values. */
10825 if (call_used_regs[3] && ! fixed_regs[3])
10826 scratch2 = gen_rtx_REG (Pmode, 3);
10827 }
10828 else if (TARGET_SHMEDIA)
10829 {
10830 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
10831 if (i != REGNO (scratch0) &&
10832 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
10833 {
10834 scratch1 = gen_rtx_REG (ptr_mode, i);
10835 break;
10836 }
10837 if (scratch1 == scratch0)
10838 error ("Need a second call-clobbered general purpose register");
10839 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
10840 if (call_used_regs[i] && ! fixed_regs[i])
10841 {
10842 scratch2 = gen_rtx_REG (Pmode, i);
10843 break;
10844 }
10845 if (scratch2 == scratch0)
10846 error ("Need a call-clobbered target register");
10847 }
10848
10849 this_value = plus_constant (this_rtx, delta);
10850 if (vcall_offset
10851 && (simple_add || scratch0 != scratch1)
10852 && strict_memory_address_p (ptr_mode, this_value))
10853 {
10854 emit_load_ptr (scratch0, this_value);
10855 did_load = 1;
10856 }
10857
10858 if (!delta)
10859 ; /* Do nothing. */
10860 else if (simple_add)
10861 emit_move_insn (this_rtx, this_value);
10862 else
10863 {
10864 emit_move_insn (scratch1, GEN_INT (delta));
10865 emit_insn (gen_add2_insn (this_rtx, scratch1));
10866 }
10867
10868 if (vcall_offset)
10869 {
10870 rtx offset_addr;
10871
10872 if (!did_load)
10873 emit_load_ptr (scratch0, this_rtx);
10874
10875 offset_addr = plus_constant (scratch0, vcall_offset);
10876 if (strict_memory_address_p (ptr_mode, offset_addr))
10877 ; /* Do nothing. */
10878 else if (! TARGET_SH5 && scratch0 != scratch1)
10879 {
10880 /* scratch0 != scratch1, and we have indexed loads. Get better
10881 schedule by loading the offset into r1 and using an indexed
10882 load - then the load of r1 can issue before the load from
10883 (this_rtx + delta) finishes. */
10884 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10885 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
10886 }
10887 else if (CONST_OK_FOR_ADD (vcall_offset))
10888 {
10889 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
10890 offset_addr = scratch0;
10891 }
10892 else if (scratch0 != scratch1)
10893 {
10894 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10895 emit_insn (gen_add2_insn (scratch0, scratch1));
10896 offset_addr = scratch0;
10897 }
10898 else
10899 gcc_unreachable (); /* FIXME */
10900 emit_load_ptr (scratch0, offset_addr);
10901
10902 if (Pmode != ptr_mode)
10903 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
10904 emit_insn (gen_add2_insn (this_rtx, scratch0));
10905 }
10906
10907 /* Generate a tail call to the target function. */
10908 if (! TREE_USED (function))
10909 {
10910 assemble_external (function);
10911 TREE_USED (function) = 1;
10912 }
10913 funexp = XEXP (DECL_RTL (function), 0);
10914 /* If the function is overridden, so is the thunk, hence we don't
10915 need GOT addressing even if this is a public symbol. */
10916 #if 0
10917 if (TARGET_SH1 && ! flag_weak)
10918 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
10919 else
10920 #endif
10921 if (TARGET_SH2 && flag_pic)
10922 {
10923 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
10924 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
10925 }
10926 else
10927 {
10928 if (TARGET_SHMEDIA && flag_pic)
10929 {
10930 funexp = gen_sym2PIC (funexp);
10931 PUT_MODE (funexp, Pmode);
10932 }
10933 emit_move_insn (scratch2, funexp);
10934 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
10935 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
10936 }
10937 sibcall = emit_call_insn (sibcall);
10938 SIBLING_CALL_P (sibcall) = 1;
10939 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
10940 emit_barrier ();
10941
10942 /* Run just enough of rest_of_compilation to do scheduling and get
10943 the insns emitted. Note that use_thunk calls
10944 assemble_start_function and assemble_end_function. */
10945
10946 insn_locators_alloc ();
10947 insns = get_insns ();
10948
10949 if (optimize > 0)
10950 {
10951 if (! cfun->cfg)
10952 init_flow (cfun);
10953 split_all_insns_noflow ();
10954 }
10955
10956 sh_reorg ();
10957
10958 if (optimize > 0 && flag_delayed_branch)
10959 dbr_schedule (insns);
10960
10961 shorten_branches (insns);
10962 final_start_function (insns, file, 1);
10963 final (insns, file, 1);
10964 final_end_function ();
10965 free_after_compilation (cfun);
10966
10967 reload_completed = 0;
10968 epilogue_completed = 0;
10969 }
10970
10971 rtx
10972 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
10973 {
10974 rtx sym;
10975
10976 /* If this is not an ordinary function, the name usually comes from a
10977 string literal or an sprintf buffer. Make sure we use the same
10978 string consistently, so that cse will be able to unify address loads. */
10979 if (kind != FUNCTION_ORDINARY)
10980 name = IDENTIFIER_POINTER (get_identifier (name));
10981 sym = gen_rtx_SYMBOL_REF (Pmode, name);
10982 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
10983 if (flag_pic)
10984 switch (kind)
10985 {
10986 case FUNCTION_ORDINARY:
10987 break;
10988 case SFUNC_GOT:
10989 {
10990 rtx reg = target ? target : gen_reg_rtx (Pmode);
10991
10992 emit_insn (gen_symGOT2reg (reg, sym));
10993 sym = reg;
10994 break;
10995 }
10996 case SFUNC_STATIC:
10997 {
10998 /* ??? To allow cse to work, we use GOTOFF relocations.
10999 we could add combiner patterns to transform this into
11000 straight pc-relative calls with sym2PIC / bsrf when
11001 label load and function call are still 1:1 and in the
11002 same basic block during combine. */
11003 rtx reg = target ? target : gen_reg_rtx (Pmode);
11004
11005 emit_insn (gen_symGOTOFF2reg (reg, sym));
11006 sym = reg;
11007 break;
11008 }
11009 }
11010 if (target && sym != target)
11011 {
11012 emit_move_insn (target, sym);
11013 return target;
11014 }
11015 return sym;
11016 }
11017
11018 /* Find the number of a general purpose register in S. */
11019 static int
11020 scavenge_reg (HARD_REG_SET *s)
11021 {
11022 int r;
11023 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
11024 if (TEST_HARD_REG_BIT (*s, r))
11025 return r;
11026 return -1;
11027 }
11028
11029 rtx
11030 sh_get_pr_initial_val (void)
11031 {
11032 rtx val;
11033
11034 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
11035 PR register on SHcompact, because it might be clobbered by the prologue.
11036 We check first if that is known to be the case. */
11037 if (TARGET_SHCOMPACT
11038 && ((crtl->args.info.call_cookie
11039 & ~ CALL_COOKIE_RET_TRAMP (1))
11040 || crtl->saves_all_registers))
11041 return gen_frame_mem (SImode, return_address_pointer_rtx);
11042
11043 /* If we haven't finished rtl generation, there might be a nonlocal label
11044 that we haven't seen yet.
11045 ??? get_hard_reg_initial_val fails if it is called after register
11046 allocation has started, unless it has been called before for the
11047 same register. And even then, we end in trouble if we didn't use
11048 the register in the same basic block before. So call
11049 get_hard_reg_initial_val now and wrap it in an unspec if we might
11050 need to replace it. */
11051 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
11052 combine can put the pseudo returned by get_hard_reg_initial_val into
11053 instructions that need a general purpose registers, which will fail to
11054 be recognized when the pseudo becomes allocated to PR. */
11055 val
11056 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
11057 if (TARGET_SH1)
11058 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
11059 return val;
11060 }
11061
11062 int
11063 sh_expand_t_scc (rtx operands[])
11064 {
11065 enum rtx_code code = GET_CODE (operands[1]);
11066 rtx target = operands[0];
11067 rtx op0 = operands[2];
11068 rtx op1 = operands[3];
11069 rtx result = target;
11070 HOST_WIDE_INT val;
11071
11072 if (GET_CODE (op0) != REG || REGNO (op0) != T_REG
11073 || GET_CODE (op1) != CONST_INT)
11074 return 0;
11075 if (GET_CODE (result) != REG)
11076 result = gen_reg_rtx (SImode);
11077 val = INTVAL (op1);
11078 if ((code == EQ && val == 1) || (code == NE && val == 0))
11079 emit_insn (gen_movt (result));
11080 else if (TARGET_SH2A && ((code == EQ && val == 0)
11081 || (code == NE && val == 1)))
11082 emit_insn (gen_movrt (result));
11083 else if ((code == EQ && val == 0) || (code == NE && val == 1))
11084 {
11085 emit_clobber (result);
11086 emit_insn (gen_subc (result, result, result));
11087 emit_insn (gen_addsi3 (result, result, const1_rtx));
11088 }
11089 else if (code == EQ || code == NE)
11090 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
11091 else
11092 return 0;
11093 if (result != target)
11094 emit_move_insn (target, result);
11095 return 1;
11096 }
11097
11098 /* INSN is an sfunc; return the rtx that describes the address used. */
11099 static rtx
11100 extract_sfunc_addr (rtx insn)
11101 {
11102 rtx pattern, part = NULL_RTX;
11103 int len, i;
11104
11105 pattern = PATTERN (insn);
11106 len = XVECLEN (pattern, 0);
11107 for (i = 0; i < len; i++)
11108 {
11109 part = XVECEXP (pattern, 0, i);
11110 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
11111 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
11112 return XEXP (part, 0);
11113 }
11114 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
11115 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
11116 }
11117
11118 /* Verify that the register in use_sfunc_addr still agrees with the address
11119 used in the sfunc. This prevents fill_slots_from_thread from changing
11120 use_sfunc_addr.
11121 INSN is the use_sfunc_addr instruction, and REG is the register it
11122 guards. */
11123 int
11124 check_use_sfunc_addr (rtx insn, rtx reg)
11125 {
11126 /* Search for the sfunc. It should really come right after INSN. */
11127 while ((insn = NEXT_INSN (insn)))
11128 {
11129 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
11130 break;
11131 if (! INSN_P (insn))
11132 continue;
11133
11134 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
11135 insn = XVECEXP (PATTERN (insn), 0, 0);
11136 if (GET_CODE (PATTERN (insn)) != PARALLEL
11137 || get_attr_type (insn) != TYPE_SFUNC)
11138 continue;
11139 return rtx_equal_p (extract_sfunc_addr (insn), reg);
11140 }
11141 gcc_unreachable ();
11142 }
11143
11144 /* This function returns a constant rtx that represents pi / 2**15 in
11145 SFmode. it's used to scale SFmode angles, in radians, to a
11146 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11147 maps to 0x10000). */
11148
11149 static GTY(()) rtx sh_fsca_sf2int_rtx;
11150
11151 rtx
11152 sh_fsca_sf2int (void)
11153 {
11154 if (! sh_fsca_sf2int_rtx)
11155 {
11156 REAL_VALUE_TYPE rv;
11157
11158 real_from_string (&rv, "10430.378350470453");
11159 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
11160 }
11161
11162 return sh_fsca_sf2int_rtx;
11163 }
11164
11165 /* This function returns a constant rtx that represents pi / 2**15 in
11166 DFmode. it's used to scale DFmode angles, in radians, to a
11167 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11168 maps to 0x10000). */
11169
11170 static GTY(()) rtx sh_fsca_df2int_rtx;
11171
11172 rtx
11173 sh_fsca_df2int (void)
11174 {
11175 if (! sh_fsca_df2int_rtx)
11176 {
11177 REAL_VALUE_TYPE rv;
11178
11179 real_from_string (&rv, "10430.378350470453");
11180 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
11181 }
11182
11183 return sh_fsca_df2int_rtx;
11184 }
11185
11186 /* This function returns a constant rtx that represents 2**15 / pi in
11187 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
11188 of a full circle back to a SFmode value, i.e., 0x10000 maps to
11189 2*pi). */
11190
11191 static GTY(()) rtx sh_fsca_int2sf_rtx;
11192
11193 rtx
11194 sh_fsca_int2sf (void)
11195 {
11196 if (! sh_fsca_int2sf_rtx)
11197 {
11198 REAL_VALUE_TYPE rv;
11199
11200 real_from_string (&rv, "9.587379924285257e-5");
11201 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
11202 }
11203
11204 return sh_fsca_int2sf_rtx;
11205 }
11206
11207 /* Initialize the CUMULATIVE_ARGS structure. */
11208
11209 void
11210 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
11211 tree fntype,
11212 rtx libname ATTRIBUTE_UNUSED,
11213 tree fndecl,
11214 signed int n_named_args,
11215 enum machine_mode mode)
11216 {
11217 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
11218 pcum->free_single_fp_reg = 0;
11219 pcum->stack_regs = 0;
11220 pcum->byref_regs = 0;
11221 pcum->byref = 0;
11222 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
11223
11224 /* XXX - Should we check TARGET_HITACHI here ??? */
11225 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
11226
11227 if (fntype)
11228 {
11229 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
11230 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
11231 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
11232 pcum->arg_count [(int) SH_ARG_INT]
11233 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
11234
11235 pcum->call_cookie
11236 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11237 && pcum->arg_count [(int) SH_ARG_INT] == 0
11238 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
11239 ? int_size_in_bytes (TREE_TYPE (fntype))
11240 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
11241 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
11242 == FIRST_RET_REG));
11243 }
11244 else
11245 {
11246 pcum->arg_count [(int) SH_ARG_INT] = 0;
11247 pcum->prototype_p = FALSE;
11248 if (mode != VOIDmode)
11249 {
11250 pcum->call_cookie =
11251 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11252 && GET_MODE_SIZE (mode) > 4
11253 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
11254
11255 /* If the default ABI is the Renesas ABI then all library
11256 calls must assume that the library will be using the
11257 Renesas ABI. So if the function would return its result
11258 in memory then we must force the address of this memory
11259 block onto the stack. Ideally we would like to call
11260 targetm.calls.return_in_memory() here but we do not have
11261 the TYPE or the FNDECL available so we synthesize the
11262 contents of that function as best we can. */
11263 pcum->force_mem =
11264 (TARGET_DEFAULT & MASK_HITACHI)
11265 && (mode == BLKmode
11266 || (GET_MODE_SIZE (mode) > 4
11267 && !(mode == DFmode
11268 && TARGET_FPU_DOUBLE)));
11269 }
11270 else
11271 {
11272 pcum->call_cookie = 0;
11273 pcum->force_mem = FALSE;
11274 }
11275 }
11276 }
11277
11278 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
11279 not enter into CONST_DOUBLE for the replace.
11280
11281 Note that copying is not done so X must not be shared unless all copies
11282 are to be modified.
11283
11284 This is like replace_rtx, except that we operate on N_REPLACEMENTS
11285 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
11286 replacements[n*2+1] - and that we take mode changes into account.
11287
11288 If a replacement is ambiguous, return NULL_RTX.
11289
11290 If MODIFY is zero, don't modify any rtl in place,
11291 just return zero or nonzero for failure / success. */
11292
11293 rtx
11294 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
11295 {
11296 int i, j;
11297 const char *fmt;
11298
11299 /* The following prevents loops occurrence when we change MEM in
11300 CONST_DOUBLE onto the same CONST_DOUBLE. */
11301 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
11302 return x;
11303
11304 for (i = n_replacements - 1; i >= 0 ; i--)
11305 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
11306 return replacements[i*2+1];
11307
11308 /* Allow this function to make replacements in EXPR_LISTs. */
11309 if (x == 0)
11310 return 0;
11311
11312 if (GET_CODE (x) == SUBREG)
11313 {
11314 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
11315 n_replacements, modify);
11316
11317 if (GET_CODE (new_rtx) == CONST_INT)
11318 {
11319 x = simplify_subreg (GET_MODE (x), new_rtx,
11320 GET_MODE (SUBREG_REG (x)),
11321 SUBREG_BYTE (x));
11322 if (! x)
11323 abort ();
11324 }
11325 else if (modify)
11326 SUBREG_REG (x) = new_rtx;
11327
11328 return x;
11329 }
11330 else if (GET_CODE (x) == REG)
11331 {
11332 unsigned regno = REGNO (x);
11333 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
11334 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
11335 rtx result = NULL_RTX;
11336
11337 for (i = n_replacements - 1; i >= 0; i--)
11338 {
11339 rtx from = replacements[i*2];
11340 rtx to = replacements[i*2+1];
11341 unsigned from_regno, from_nregs, to_regno, new_regno;
11342
11343 if (GET_CODE (from) != REG)
11344 continue;
11345 from_regno = REGNO (from);
11346 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
11347 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
11348 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
11349 {
11350 if (regno < from_regno
11351 || regno + nregs > from_regno + nregs
11352 || GET_CODE (to) != REG
11353 || result)
11354 return NULL_RTX;
11355 to_regno = REGNO (to);
11356 if (to_regno < FIRST_PSEUDO_REGISTER)
11357 {
11358 new_regno = regno + to_regno - from_regno;
11359 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
11360 != nregs)
11361 return NULL_RTX;
11362 result = gen_rtx_REG (GET_MODE (x), new_regno);
11363 }
11364 else if (GET_MODE (x) <= GET_MODE (to))
11365 result = gen_lowpart_common (GET_MODE (x), to);
11366 else
11367 result = gen_lowpart_SUBREG (GET_MODE (x), to);
11368 }
11369 }
11370 return result ? result : x;
11371 }
11372 else if (GET_CODE (x) == ZERO_EXTEND)
11373 {
11374 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
11375 n_replacements, modify);
11376
11377 if (GET_CODE (new_rtx) == CONST_INT)
11378 {
11379 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
11380 new_rtx, GET_MODE (XEXP (x, 0)));
11381 if (! x)
11382 abort ();
11383 }
11384 else if (modify)
11385 XEXP (x, 0) = new_rtx;
11386
11387 return x;
11388 }
11389
11390 fmt = GET_RTX_FORMAT (GET_CODE (x));
11391 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
11392 {
11393 rtx new_rtx;
11394
11395 if (fmt[i] == 'e')
11396 {
11397 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
11398 n_replacements, modify);
11399 if (!new_rtx)
11400 return NULL_RTX;
11401 if (modify)
11402 XEXP (x, i) = new_rtx;
11403 }
11404 else if (fmt[i] == 'E')
11405 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11406 {
11407 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
11408 n_replacements, modify);
11409 if (!new_rtx)
11410 return NULL_RTX;
11411 if (modify)
11412 XVECEXP (x, i, j) = new_rtx;
11413 }
11414 }
11415
11416 return x;
11417 }
11418
11419 rtx
11420 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
11421 {
11422 enum rtx_code code = TRUNCATE;
11423
11424 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
11425 {
11426 rtx inner = XEXP (x, 0);
11427 enum machine_mode inner_mode = GET_MODE (inner);
11428
11429 if (inner_mode == mode)
11430 return inner;
11431 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
11432 x = inner;
11433 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
11434 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
11435 {
11436 code = GET_CODE (x);
11437 x = inner;
11438 }
11439 }
11440 return gen_rtx_fmt_e (code, mode, x);
11441 }
11442
11443 /* called via for_each_rtx after reload, to clean up truncates of
11444 registers that span multiple actual hard registers. */
11445 int
11446 shmedia_cleanup_truncate (rtx *p, void *n_changes)
11447 {
11448 rtx x = *p, reg;
11449
11450 if (GET_CODE (x) != TRUNCATE)
11451 return 0;
11452 reg = XEXP (x, 0);
11453 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && GET_CODE (reg) == REG)
11454 {
11455 enum machine_mode reg_mode = GET_MODE (reg);
11456 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
11457 subreg_lowpart_offset (DImode, reg_mode));
11458 *(int*) n_changes += 1;
11459 return -1;
11460 }
11461 return 0;
11462 }
11463
11464 /* Load and store depend on the highpart of the address. However,
11465 set_attr_alternative does not give well-defined results before reload,
11466 so we must look at the rtl ourselves to see if any of the feeding
11467 registers is used in a memref. */
11468
11469 /* Called by sh_contains_memref_p via for_each_rtx. */
11470 static int
11471 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
11472 {
11473 return (GET_CODE (*loc) == MEM);
11474 }
11475
11476 /* Return nonzero iff INSN contains a MEM. */
11477 int
11478 sh_contains_memref_p (rtx insn)
11479 {
11480 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
11481 }
11482
11483 /* Return nonzero iff INSN loads a banked register. */
11484 int
11485 sh_loads_bankedreg_p (rtx insn)
11486 {
11487 if (GET_CODE (PATTERN (insn)) == SET)
11488 {
11489 rtx op = SET_DEST (PATTERN(insn));
11490 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
11491 return 1;
11492 }
11493
11494 return 0;
11495 }
11496
11497 /* FNADDR is the MEM expression from a call expander. Return an address
11498 to use in an SHmedia insn pattern. */
11499 rtx
11500 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
11501 {
11502 int is_sym;
11503
11504 fnaddr = XEXP (fnaddr, 0);
11505 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
11506 if (flag_pic && is_sym)
11507 {
11508 if (! SYMBOL_REF_LOCAL_P (fnaddr))
11509 {
11510 rtx reg = gen_reg_rtx (Pmode);
11511
11512 /* We must not use GOTPLT for sibcalls, because PIC_REG
11513 must be restored before the PLT code gets to run. */
11514 if (is_sibcall)
11515 emit_insn (gen_symGOT2reg (reg, fnaddr));
11516 else
11517 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
11518 fnaddr = reg;
11519 }
11520 else
11521 {
11522 fnaddr = gen_sym2PIC (fnaddr);
11523 PUT_MODE (fnaddr, Pmode);
11524 }
11525 }
11526 /* If ptabs might trap, make this visible to the rest of the compiler.
11527 We generally assume that symbols pertain to valid locations, but
11528 it is possible to generate invalid symbols with asm or linker tricks.
11529 In a list of functions where each returns its successor, an invalid
11530 symbol might denote an empty list. */
11531 if (!TARGET_PT_FIXED
11532 && (!is_sym || TARGET_INVALID_SYMBOLS)
11533 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
11534 {
11535 rtx tr = gen_reg_rtx (PDImode);
11536
11537 emit_insn (gen_ptabs (tr, fnaddr));
11538 fnaddr = tr;
11539 }
11540 else if (! target_reg_operand (fnaddr, Pmode))
11541 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
11542 return fnaddr;
11543 }
11544
11545 enum reg_class
11546 sh_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
11547 enum machine_mode mode, secondary_reload_info *sri)
11548 {
11549 if (in_p)
11550 {
11551 if (REGCLASS_HAS_FP_REG (rclass)
11552 && ! TARGET_SHMEDIA
11553 && immediate_operand ((x), mode)
11554 && ! ((fp_zero_operand (x) || fp_one_operand (x))
11555 && mode == SFmode && fldi_ok ()))
11556 switch (mode)
11557 {
11558 case SFmode:
11559 sri->icode = CODE_FOR_reload_insf__frn;
11560 return NO_REGS;
11561 case DFmode:
11562 sri->icode = CODE_FOR_reload_indf__frn;
11563 return NO_REGS;
11564 case SImode:
11565 /* ??? If we knew that we are in the appropriate mode -
11566 single precision - we could use a reload pattern directly. */
11567 return FPUL_REGS;
11568 default:
11569 abort ();
11570 }
11571 if (rclass == FPUL_REGS
11572 && ((GET_CODE (x) == REG
11573 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
11574 || REGNO (x) == T_REG))
11575 || GET_CODE (x) == PLUS))
11576 return GENERAL_REGS;
11577 if (rclass == FPUL_REGS && immediate_operand (x, mode))
11578 {
11579 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
11580 return GENERAL_REGS;
11581 else if (mode == SFmode)
11582 return FP_REGS;
11583 sri->icode = CODE_FOR_reload_insi__i_fpul;
11584 return NO_REGS;
11585 }
11586 if (rclass == FPSCR_REGS
11587 && ((GET_CODE (x) == REG && REGNO (x) >= FIRST_PSEUDO_REGISTER)
11588 || (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == PLUS)))
11589 return GENERAL_REGS;
11590 if (REGCLASS_HAS_FP_REG (rclass)
11591 && TARGET_SHMEDIA
11592 && immediate_operand (x, mode)
11593 && x != CONST0_RTX (GET_MODE (x))
11594 && GET_MODE (x) != V4SFmode)
11595 return GENERAL_REGS;
11596 if ((mode == QImode || mode == HImode)
11597 && TARGET_SHMEDIA && inqhi_operand (x, mode))
11598 {
11599 sri->icode = ((mode == QImode)
11600 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
11601 return NO_REGS;
11602 }
11603 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
11604 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
11605 return TARGET_REGS;
11606 } /* end of input-only processing. */
11607
11608 if (((REGCLASS_HAS_FP_REG (rclass)
11609 && (GET_CODE (x) == REG
11610 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
11611 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
11612 && TARGET_FMOVD))))
11613 || (REGCLASS_HAS_GENERAL_REG (rclass)
11614 && GET_CODE (x) == REG
11615 && FP_REGISTER_P (REGNO (x))))
11616 && ! TARGET_SHMEDIA
11617 && (mode == SFmode || mode == SImode))
11618 return FPUL_REGS;
11619 if ((rclass == FPUL_REGS
11620 || (REGCLASS_HAS_FP_REG (rclass)
11621 && ! TARGET_SHMEDIA && mode == SImode))
11622 && (GET_CODE (x) == MEM
11623 || (GET_CODE (x) == REG
11624 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
11625 || REGNO (x) == T_REG
11626 || system_reg_operand (x, VOIDmode)))))
11627 {
11628 if (rclass == FPUL_REGS)
11629 return GENERAL_REGS;
11630 return FPUL_REGS;
11631 }
11632 if ((rclass == TARGET_REGS
11633 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
11634 && !satisfies_constraint_Csy (x)
11635 && (GET_CODE (x) != REG || ! GENERAL_REGISTER_P (REGNO (x))))
11636 return GENERAL_REGS;
11637 if ((rclass == MAC_REGS || rclass == PR_REGS)
11638 && GET_CODE (x) == REG && ! GENERAL_REGISTER_P (REGNO (x))
11639 && rclass != REGNO_REG_CLASS (REGNO (x)))
11640 return GENERAL_REGS;
11641 if (rclass != GENERAL_REGS && GET_CODE (x) == REG
11642 && TARGET_REGISTER_P (REGNO (x)))
11643 return GENERAL_REGS;
11644 return NO_REGS;
11645 }
11646
11647 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
11648
11649 #include "gt-sh.h"