sh.c: Declare the prototype of sh_adjust_unroll_max only when...
[gcc.git] / gcc / config / sh / sh.c
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "insn-config.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "flags.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "function.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "output.h"
38 #include "insn-attr.h"
39 #include "toplev.h"
40 #include "recog.h"
41 #include "c-pragma.h"
42 #include "integrate.h"
43 #include "dwarf2.h"
44 #include "tm_p.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "real.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
50 #include "cfglayout.h"
51 #include "intl.h"
52 #include "sched-int.h"
53 #include "ggc.h"
54 #include "tree-gimple.h"
55 #include "cfgloop.h"
56
57
58 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
59
60 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
61 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
62
63 /* These are some macros to abstract register modes. */
64 #define CONST_OK_FOR_ADD(size) \
65 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
66 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
67 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
68 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
69
70 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
71 int current_function_interrupt;
72
73 /* ??? The pragma interrupt support will not work for SH3. */
74 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
75 output code for the next function appropriate for an interrupt handler. */
76 int pragma_interrupt;
77
78 /* This is set by the trap_exit attribute for functions. It specifies
79 a trap number to be used in a trapa instruction at function exit
80 (instead of an rte instruction). */
81 int trap_exit;
82
83 /* This is used by the sp_switch attribute for functions. It specifies
84 a variable holding the address of the stack the interrupt function
85 should switch to/from at entry/exit. */
86 rtx sp_switch;
87
88 /* This is set by #pragma trapa, and is similar to the above, except that
89 the compiler doesn't emit code to preserve all registers. */
90 static int pragma_trapa;
91
92 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
93 which has a separate set of low regs for User and Supervisor modes.
94 This should only be used for the lowest level of interrupts. Higher levels
95 of interrupts must save the registers in case they themselves are
96 interrupted. */
97 int pragma_nosave_low_regs;
98
99 /* This is used for communication between TARGET_SETUP_INCOMING_VARARGS and
100 sh_expand_prologue. */
101 int current_function_anonymous_args;
102
103 /* Global variables for machine-dependent things. */
104
105 /* Which cpu are we scheduling for. */
106 enum processor_type sh_cpu;
107
108 /* Definitions used in ready queue reordering for first scheduling pass. */
109
110 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
111 static short *regmode_weight[2];
112
113 /* Total SFmode and SImode weights of scheduled insns. */
114 static int curr_regmode_pressure[2];
115
116 /* If true, skip cycles for Q -> R movement. */
117 static int skip_cycles = 0;
118
119 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
120 and returned from sh_reorder2. */
121 static short cached_can_issue_more;
122
123 /* Saved operands from the last compare to use when we generate an scc
124 or bcc insn. */
125
126 rtx sh_compare_op0;
127 rtx sh_compare_op1;
128
129 /* Provides the class number of the smallest class containing
130 reg number. */
131
132 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
133 {
134 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
140 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
141 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
142 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
143 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
144 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
145 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
146 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
147 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
148 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
149 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
150 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
156 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
157 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
158 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
159 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
160 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
161 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
162 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
163 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
164 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
165 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
166 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
167 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
168 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
169 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
170 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
171 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
172 GENERAL_REGS,
173 };
174
175 char sh_register_names[FIRST_PSEUDO_REGISTER] \
176 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
177
178 char sh_additional_register_names[ADDREGNAMES_SIZE] \
179 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
180 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
181
182 /* Provide reg_class from a letter such as appears in the machine
183 description. *: target independently reserved letter.
184 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
185
186 enum reg_class reg_class_from_letter[] =
187 {
188 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
189 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
190 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
191 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
192 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
193 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
194 /* y */ FPUL_REGS, /* z */ R0_REGS
195 };
196
197 int assembler_dialect;
198
199 static bool shmedia_space_reserved_for_target_registers;
200
201 static void split_branches (rtx);
202 static int branch_dest (rtx);
203 static void force_into (rtx, rtx);
204 static void print_slot (rtx);
205 static rtx add_constant (rtx, enum machine_mode, rtx);
206 static void dump_table (rtx, rtx);
207 static int hi_const (rtx);
208 static int broken_move (rtx);
209 static int mova_p (rtx);
210 static rtx find_barrier (int, rtx, rtx);
211 static int noncall_uses_reg (rtx, rtx, rtx *);
212 static rtx gen_block_redirect (rtx, int, int);
213 static void sh_reorg (void);
214 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
215 static rtx frame_insn (rtx);
216 static rtx push (int);
217 static void pop (int);
218 static void push_regs (HARD_REG_SET *, int);
219 static int calc_live_regs (HARD_REG_SET *);
220 static void mark_use (rtx, rtx *);
221 static HOST_WIDE_INT rounded_frame_size (int);
222 static rtx mark_constant_pool_use (rtx);
223 const struct attribute_spec sh_attribute_table[];
224 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
225 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
226 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
227 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
228 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
229 static void sh_insert_attributes (tree, tree *);
230 static int sh_adjust_cost (rtx, rtx, rtx, int);
231 static int sh_issue_rate (void);
232 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
233 static short find_set_regmode_weight (rtx, enum machine_mode);
234 static short find_insn_regmode_weight (rtx, enum machine_mode);
235 static void find_regmode_weight (int, enum machine_mode);
236 static void sh_md_init_global (FILE *, int, int);
237 static void sh_md_finish_global (FILE *, int);
238 static int rank_for_reorder (const void *, const void *);
239 static void swap_reorder (rtx *, int);
240 static void ready_reorder (rtx *, int);
241 static short high_pressure (enum machine_mode);
242 static int sh_reorder (FILE *, int, rtx *, int *, int);
243 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
244 static void sh_md_init (FILE *, int, int);
245 static int sh_variable_issue (FILE *, int, rtx, int);
246
247 static bool sh_function_ok_for_sibcall (tree, tree);
248
249 static bool sh_cannot_modify_jumps_p (void);
250 static int sh_target_reg_class (void);
251 static bool sh_optimize_target_register_callee_saved (bool);
252 static bool sh_ms_bitfield_layout_p (tree);
253
254 static void sh_init_builtins (void);
255 static void sh_media_init_builtins (void);
256 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
257 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
258 static void sh_file_start (void);
259 static int flow_dependent_p (rtx, rtx);
260 static void flow_dependent_p_1 (rtx, rtx, void *);
261 static int shiftcosts (rtx);
262 static int andcosts (rtx);
263 static int addsubcosts (rtx);
264 static int multcosts (rtx);
265 static bool unspec_caller_rtx_p (rtx);
266 static bool sh_cannot_copy_insn_p (rtx);
267 static bool sh_rtx_costs (rtx, int, int, int *);
268 static int sh_address_cost (rtx);
269 #ifdef TARGET_ADJUST_UNROLL_MAX
270 static int sh_adjust_unroll_max (struct loop *, int, int, int, int);
271 #endif
272 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
273 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
274 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
275 static int scavenge_reg (HARD_REG_SET *s);
276 struct save_schedule_s;
277 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
278 struct save_schedule_s *, int);
279
280 static rtx sh_struct_value_rtx (tree, int);
281 static bool sh_return_in_memory (tree, tree);
282 static rtx sh_builtin_saveregs (void);
283 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
284 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
285 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
286 static tree sh_build_builtin_va_list (void);
287 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
288 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
289 tree, bool);
290 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
291 tree, bool);
292 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
293 tree, bool);
294 static int sh_dwarf_calling_convention (tree);
295 static int hard_regs_intersect_p (HARD_REG_SET *, HARD_REG_SET *);
296
297 \f
298 /* Initialize the GCC target structure. */
299 #undef TARGET_ATTRIBUTE_TABLE
300 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
301
302 /* The next two are used for debug info when compiling with -gdwarf. */
303 #undef TARGET_ASM_UNALIGNED_HI_OP
304 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
305 #undef TARGET_ASM_UNALIGNED_SI_OP
306 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
307
308 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
309 #undef TARGET_ASM_UNALIGNED_DI_OP
310 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
311 #undef TARGET_ASM_ALIGNED_DI_OP
312 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
313
314 #undef TARGET_ASM_FUNCTION_EPILOGUE
315 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
316
317 #undef TARGET_ASM_OUTPUT_MI_THUNK
318 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
319
320 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
321 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
322
323 #undef TARGET_ASM_FILE_START
324 #define TARGET_ASM_FILE_START sh_file_start
325 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
326 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
327
328 #undef TARGET_INSERT_ATTRIBUTES
329 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
330
331 #undef TARGET_SCHED_ADJUST_COST
332 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
333
334 #undef TARGET_SCHED_ISSUE_RATE
335 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
336
337 /* The next 5 hooks have been implemented for reenabling sched1. With the
338 help of these macros we are limiting the movement of insns in sched1 to
339 reduce the register pressure. The overall idea is to keep count of SImode
340 and SFmode regs required by already scheduled insns. When these counts
341 cross some threshold values; give priority to insns that free registers.
342 The insn that frees registers is most likely to be the insn with lowest
343 LUID (original insn order); but such an insn might be there in the stalled
344 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
345 upto a max of 8 cycles so that such insns may move from Q -> R.
346
347 The description of the hooks are as below:
348
349 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
350 scheduler; it is called inside the sched_init function just after
351 find_insn_reg_weights function call. It is used to calculate the SImode
352 and SFmode weights of insns of basic blocks; much similar to what
353 find_insn_reg_weights does.
354 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
355
356 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
357 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
358 (Q)->(R).
359
360 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
361 high; reorder the ready queue so that the insn with lowest LUID will be
362 issued next.
363
364 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
365 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
366
367 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
368 can be returned from TARGET_SCHED_REORDER2.
369
370 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
371
372 #undef TARGET_SCHED_DFA_NEW_CYCLE
373 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
374
375 #undef TARGET_SCHED_INIT_GLOBAL
376 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
377
378 #undef TARGET_SCHED_FINISH_GLOBAL
379 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
380
381 #undef TARGET_SCHED_VARIABLE_ISSUE
382 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
383
384 #undef TARGET_SCHED_REORDER
385 #define TARGET_SCHED_REORDER sh_reorder
386
387 #undef TARGET_SCHED_REORDER2
388 #define TARGET_SCHED_REORDER2 sh_reorder2
389
390 #undef TARGET_SCHED_INIT
391 #define TARGET_SCHED_INIT sh_md_init
392
393 #undef TARGET_CANNOT_MODIFY_JUMPS_P
394 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
395 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
396 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
397 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
398 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
399 sh_optimize_target_register_callee_saved
400
401 #undef TARGET_MS_BITFIELD_LAYOUT_P
402 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
403
404 #undef TARGET_INIT_BUILTINS
405 #define TARGET_INIT_BUILTINS sh_init_builtins
406 #undef TARGET_EXPAND_BUILTIN
407 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
408
409 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
410 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
411
412 #undef TARGET_CANNOT_COPY_INSN_P
413 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
414 #undef TARGET_RTX_COSTS
415 #define TARGET_RTX_COSTS sh_rtx_costs
416 #undef TARGET_ADDRESS_COST
417 #define TARGET_ADDRESS_COST sh_address_cost
418
419 #undef TARGET_MACHINE_DEPENDENT_REORG
420 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
421
422 #ifdef HAVE_AS_TLS
423 #undef TARGET_HAVE_TLS
424 #define TARGET_HAVE_TLS true
425 #endif
426
427 #undef TARGET_PROMOTE_PROTOTYPES
428 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
429 #undef TARGET_PROMOTE_FUNCTION_ARGS
430 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
431 #undef TARGET_PROMOTE_FUNCTION_RETURN
432 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
433
434 #undef TARGET_STRUCT_VALUE_RTX
435 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
436 #undef TARGET_RETURN_IN_MEMORY
437 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
438
439 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
440 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
441 #undef TARGET_SETUP_INCOMING_VARARGS
442 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
443 #undef TARGET_STRICT_ARGUMENT_NAMING
444 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
445 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
446 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
447 #undef TARGET_MUST_PASS_IN_STACK
448 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
449 #undef TARGET_PASS_BY_REFERENCE
450 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
451 #undef TARGET_CALLEE_COPIES
452 #define TARGET_CALLEE_COPIES sh_callee_copies
453 #undef TARGET_ARG_PARTIAL_BYTES
454 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
455
456 #undef TARGET_BUILD_BUILTIN_VA_LIST
457 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
458 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
459 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
460
461 #undef TARGET_VECTOR_MODE_SUPPORTED_P
462 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
463
464 #undef TARGET_PCH_VALID_P
465 #define TARGET_PCH_VALID_P sh_pch_valid_p
466
467 #undef TARGET_DWARF_CALLING_CONVENTION
468 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
469
470 /* Return regmode weight for insn. */
471 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
472
473 /* Return current register pressure for regmode. */
474 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
475
476 #ifdef SYMBIAN
477
478 #undef TARGET_ENCODE_SECTION_INFO
479 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
480 #undef TARGET_STRIP_NAME_ENCODING
481 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
482 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
483 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
484
485 #endif /* SYMBIAN */
486
487 #ifdef TARGET_ADJUST_UNROLL_MAX
488 #undef TARGET_ADJUST_UNROLL_MAX
489 #define TARGET_ADJUST_UNROLL_MAX sh_adjust_unroll_max
490 #endif
491
492 struct gcc_target targetm = TARGET_INITIALIZER;
493 \f
494 /* Print the operand address in x to the stream. */
495
496 void
497 print_operand_address (FILE *stream, rtx x)
498 {
499 switch (GET_CODE (x))
500 {
501 case REG:
502 case SUBREG:
503 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
504 break;
505
506 case PLUS:
507 {
508 rtx base = XEXP (x, 0);
509 rtx index = XEXP (x, 1);
510
511 switch (GET_CODE (index))
512 {
513 case CONST_INT:
514 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
515 reg_names[true_regnum (base)]);
516 break;
517
518 case REG:
519 case SUBREG:
520 {
521 int base_num = true_regnum (base);
522 int index_num = true_regnum (index);
523
524 fprintf (stream, "@(r0,%s)",
525 reg_names[MAX (base_num, index_num)]);
526 break;
527 }
528
529 default:
530 gcc_unreachable ();
531 }
532 }
533 break;
534
535 case PRE_DEC:
536 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
537 break;
538
539 case POST_INC:
540 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
541 break;
542
543 default:
544 x = mark_constant_pool_use (x);
545 output_addr_const (stream, x);
546 break;
547 }
548 }
549
550 /* Print operand x (an rtx) in assembler syntax to file stream
551 according to modifier code.
552
553 '.' print a .s if insn needs delay slot
554 ',' print LOCAL_LABEL_PREFIX
555 '@' print trap, rte or rts depending upon pragma interruptness
556 '#' output a nop if there is nothing to put in the delay slot
557 ''' print likelihood suffix (/u for unlikely).
558 '>' print branch target if -fverbose-asm
559 'O' print a constant without the #
560 'R' print the LSW of a dp value - changes if in little endian
561 'S' print the MSW of a dp value - changes if in little endian
562 'T' print the next word of a dp value - same as 'R' in big endian mode.
563 'M' print an `x' if `m' will print `base,index'.
564 'N' print 'r63' if the operand is (const_int 0).
565 'd' print a V2SF reg as dN instead of fpN.
566 'm' print a pair `base,offset' or `base,index', for LD and ST.
567 'U' Likewise for {LD,ST}{HI,LO}.
568 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
569 'o' output an operator. */
570
571 void
572 print_operand (FILE *stream, rtx x, int code)
573 {
574 int regno;
575 enum machine_mode mode;
576
577 switch (code)
578 {
579 case '.':
580 if (final_sequence
581 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
582 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
583 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
584 break;
585 case ',':
586 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
587 break;
588 case '@':
589 if (trap_exit)
590 fprintf (stream, "trapa #%d", trap_exit);
591 else if (sh_cfun_interrupt_handler_p ())
592 fprintf (stream, "rte");
593 else
594 fprintf (stream, "rts");
595 break;
596 case '#':
597 /* Output a nop if there's nothing in the delay slot. */
598 if (dbr_sequence_length () == 0)
599 fprintf (stream, "\n\tnop");
600 break;
601 case '\'':
602 {
603 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
604
605 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
606 fputs ("/u", stream);
607 break;
608 }
609 case '>':
610 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
611 {
612 fputs ("\t! target: ", stream);
613 output_addr_const (stream, JUMP_LABEL (current_output_insn));
614 }
615 break;
616 case 'O':
617 x = mark_constant_pool_use (x);
618 output_addr_const (stream, x);
619 break;
620 case 'R':
621 fputs (reg_names[REGNO (x) + LSW], (stream));
622 break;
623 case 'S':
624 fputs (reg_names[REGNO (x) + MSW], (stream));
625 break;
626 case 'T':
627 /* Next word of a double. */
628 switch (GET_CODE (x))
629 {
630 case REG:
631 fputs (reg_names[REGNO (x) + 1], (stream));
632 break;
633 case MEM:
634 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
635 && GET_CODE (XEXP (x, 0)) != POST_INC)
636 x = adjust_address (x, SImode, 4);
637 print_operand_address (stream, XEXP (x, 0));
638 break;
639 default:
640 break;
641 }
642 break;
643 case 'o':
644 switch (GET_CODE (x))
645 {
646 case PLUS: fputs ("add", stream); break;
647 case MINUS: fputs ("sub", stream); break;
648 case MULT: fputs ("mul", stream); break;
649 case DIV: fputs ("div", stream); break;
650 case EQ: fputs ("eq", stream); break;
651 case NE: fputs ("ne", stream); break;
652 case GT: case LT: fputs ("gt", stream); break;
653 case GE: case LE: fputs ("ge", stream); break;
654 case GTU: case LTU: fputs ("gtu", stream); break;
655 case GEU: case LEU: fputs ("geu", stream); break;
656 default:
657 break;
658 }
659 break;
660 case 'M':
661 if (GET_CODE (x) == MEM
662 && GET_CODE (XEXP (x, 0)) == PLUS
663 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
664 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
665 fputc ('x', stream);
666 break;
667
668 case 'm':
669 gcc_assert (GET_CODE (x) == MEM);
670 x = XEXP (x, 0);
671 /* Fall through. */
672 case 'U':
673 switch (GET_CODE (x))
674 {
675 case REG:
676 case SUBREG:
677 print_operand (stream, x, 0);
678 fputs (", 0", stream);
679 break;
680
681 case PLUS:
682 print_operand (stream, XEXP (x, 0), 0);
683 fputs (", ", stream);
684 print_operand (stream, XEXP (x, 1), 0);
685 break;
686
687 default:
688 gcc_unreachable ();
689 }
690 break;
691
692 case 'd':
693 gcc_assert (GET_CODE (x) == REG && GET_MODE (x) == V2SFmode);
694
695 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
696 break;
697
698 case 'N':
699 if (x == CONST0_RTX (GET_MODE (x)))
700 {
701 fprintf ((stream), "r63");
702 break;
703 }
704 goto default_output;
705 case 'u':
706 if (GET_CODE (x) == CONST_INT)
707 {
708 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
709 break;
710 }
711 /* Fall through. */
712
713 default_output:
714 default:
715 regno = 0;
716 mode = GET_MODE (x);
717
718 switch (GET_CODE (x))
719 {
720 case TRUNCATE:
721 {
722 rtx inner = XEXP (x, 0);
723 int offset = 0;
724 enum machine_mode inner_mode;
725
726 /* We might see SUBREGs with vector mode registers inside. */
727 if (GET_CODE (inner) == SUBREG
728 && (GET_MODE_SIZE (GET_MODE (inner))
729 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
730 && subreg_lowpart_p (inner))
731 inner = SUBREG_REG (inner);
732 if (GET_CODE (inner) == CONST_INT)
733 {
734 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
735 goto default_output;
736 }
737 inner_mode = GET_MODE (inner);
738 if (GET_CODE (inner) == SUBREG
739 && (GET_MODE_SIZE (GET_MODE (inner))
740 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
741 && GET_CODE (SUBREG_REG (inner)) == REG)
742 {
743 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
744 GET_MODE (SUBREG_REG (inner)),
745 SUBREG_BYTE (inner),
746 GET_MODE (inner));
747 inner = SUBREG_REG (inner);
748 }
749 if (GET_CODE (inner) != REG || GET_MODE_SIZE (inner_mode) > 8)
750 abort ();
751 /* Floating point register pairs are always big endian;
752 general purpose registers are 64 bit wide. */
753 regno = REGNO (inner);
754 regno = (HARD_REGNO_NREGS (regno, inner_mode)
755 - HARD_REGNO_NREGS (regno, mode))
756 + offset;
757 x = inner;
758 goto reg;
759 }
760 case SIGN_EXTEND:
761 x = XEXP (x, 0);
762 goto reg;
763 /* FIXME: We need this on SHmedia32 because reload generates
764 some sign-extended HI or QI loads into DImode registers
765 but, because Pmode is SImode, the address ends up with a
766 subreg:SI of the DImode register. Maybe reload should be
767 fixed so as to apply alter_subreg to such loads? */
768 case IF_THEN_ELSE:
769 gcc_assert (trapping_target_operand (x, VOIDmode));
770 x = XEXP (XEXP (x, 2), 0);
771 goto default_output;
772 case SUBREG:
773 gcc_assert (SUBREG_BYTE (x) == 0
774 && GET_CODE (SUBREG_REG (x)) == REG);
775
776 x = SUBREG_REG (x);
777 /* Fall through. */
778
779 reg:
780 case REG:
781 regno += REGNO (x);
782 if (FP_REGISTER_P (regno)
783 && mode == V16SFmode)
784 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
785 else if (FP_REGISTER_P (REGNO (x))
786 && mode == V4SFmode)
787 fprintf ((stream), "fv%s", reg_names[regno] + 2);
788 else if (GET_CODE (x) == REG
789 && mode == V2SFmode)
790 fprintf ((stream), "fp%s", reg_names[regno] + 2);
791 else if (FP_REGISTER_P (REGNO (x))
792 && GET_MODE_SIZE (mode) > 4)
793 fprintf ((stream), "d%s", reg_names[regno] + 1);
794 else
795 fputs (reg_names[regno], (stream));
796 break;
797
798 case MEM:
799 output_address (XEXP (x, 0));
800 break;
801
802 case CONST:
803 if (TARGET_SHMEDIA
804 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
805 && (GET_MODE (XEXP (x, 0)) == DImode
806 || GET_MODE (XEXP (x, 0)) == SImode)
807 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
808 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
809 {
810 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
811
812 fputc ('(', stream);
813 if (GET_CODE (val) == ASHIFTRT)
814 {
815 fputc ('(', stream);
816 if (GET_CODE (XEXP (val, 0)) == CONST)
817 fputc ('(', stream);
818 output_addr_const (stream, XEXP (val, 0));
819 if (GET_CODE (XEXP (val, 0)) == CONST)
820 fputc (')', stream);
821 fputs (" >> ", stream);
822 output_addr_const (stream, XEXP (val, 1));
823 fputc (')', stream);
824 }
825 else
826 {
827 if (GET_CODE (val) == CONST)
828 fputc ('(', stream);
829 output_addr_const (stream, val);
830 if (GET_CODE (val) == CONST)
831 fputc (')', stream);
832 }
833 fputs (" & 65535)", stream);
834 break;
835 }
836
837 /* Fall through. */
838 default:
839 if (TARGET_SH1)
840 fputc ('#', stream);
841 output_addr_const (stream, x);
842 break;
843 }
844 break;
845 }
846 }
847 \f
848 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
849 static void
850 force_into (rtx value, rtx target)
851 {
852 value = force_operand (value, target);
853 if (! rtx_equal_p (value, target))
854 emit_insn (gen_move_insn (target, value));
855 }
856
857 /* Emit code to perform a block move. Choose the best method.
858
859 OPERANDS[0] is the destination.
860 OPERANDS[1] is the source.
861 OPERANDS[2] is the size.
862 OPERANDS[3] is the alignment safe to use. */
863
864 int
865 expand_block_move (rtx *operands)
866 {
867 int align = INTVAL (operands[3]);
868 int constp = (GET_CODE (operands[2]) == CONST_INT);
869 int bytes = (constp ? INTVAL (operands[2]) : 0);
870
871 if (! constp)
872 return 0;
873
874 /* If we could use mov.l to move words and dest is word-aligned, we
875 can use movua.l for loads and still generate a relatively short
876 and efficient sequence. */
877 if (TARGET_SH4A_ARCH && align < 4
878 && MEM_ALIGN (operands[0]) >= 32
879 && can_move_by_pieces (bytes, 32))
880 {
881 rtx dest = copy_rtx (operands[0]);
882 rtx src = copy_rtx (operands[1]);
883 /* We could use different pseudos for each copied word, but
884 since movua can only load into r0, it's kind of
885 pointless. */
886 rtx temp = gen_reg_rtx (SImode);
887 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
888 int copied = 0;
889
890 while (copied + 4 <= bytes)
891 {
892 rtx to = adjust_address (dest, SImode, copied);
893 rtx from = adjust_automodify_address (src, SImode, src_addr, copied);
894
895 emit_insn (gen_movua (temp, from));
896 emit_move_insn (src_addr, plus_constant (src_addr, 4));
897 emit_move_insn (to, temp);
898 copied += 4;
899 }
900
901 if (copied < bytes)
902 move_by_pieces (adjust_address (dest, BLKmode, copied),
903 adjust_automodify_address (src, BLKmode,
904 src_addr, copied),
905 bytes - copied, align, 0);
906
907 return 1;
908 }
909
910 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
911 alignment, or if it isn't a multiple of 4 bytes, then fail. */
912 if (align < 4 || (bytes % 4 != 0))
913 return 0;
914
915 if (TARGET_HARD_SH4)
916 {
917 if (bytes < 12)
918 return 0;
919 else if (bytes == 12)
920 {
921 rtx func_addr_rtx = gen_reg_rtx (Pmode);
922 rtx r4 = gen_rtx_REG (SImode, 4);
923 rtx r5 = gen_rtx_REG (SImode, 5);
924
925 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
926 force_into (XEXP (operands[0], 0), r4);
927 force_into (XEXP (operands[1], 0), r5);
928 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
929 return 1;
930 }
931 else if (! TARGET_SMALLCODE)
932 {
933 const char *entry_name;
934 rtx func_addr_rtx = gen_reg_rtx (Pmode);
935 int dwords;
936 rtx r4 = gen_rtx_REG (SImode, 4);
937 rtx r5 = gen_rtx_REG (SImode, 5);
938 rtx r6 = gen_rtx_REG (SImode, 6);
939
940 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
941 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
942 force_into (XEXP (operands[0], 0), r4);
943 force_into (XEXP (operands[1], 0), r5);
944
945 dwords = bytes >> 3;
946 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
947 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
948 return 1;
949 }
950 else
951 return 0;
952 }
953 if (bytes < 64)
954 {
955 char entry[30];
956 rtx func_addr_rtx = gen_reg_rtx (Pmode);
957 rtx r4 = gen_rtx_REG (SImode, 4);
958 rtx r5 = gen_rtx_REG (SImode, 5);
959
960 sprintf (entry, "__movmemSI%d", bytes);
961 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
962 force_into (XEXP (operands[0], 0), r4);
963 force_into (XEXP (operands[1], 0), r5);
964 emit_insn (gen_block_move_real (func_addr_rtx));
965 return 1;
966 }
967
968 /* This is the same number of bytes as a memcpy call, but to a different
969 less common function name, so this will occasionally use more space. */
970 if (! TARGET_SMALLCODE)
971 {
972 rtx func_addr_rtx = gen_reg_rtx (Pmode);
973 int final_switch, while_loop;
974 rtx r4 = gen_rtx_REG (SImode, 4);
975 rtx r5 = gen_rtx_REG (SImode, 5);
976 rtx r6 = gen_rtx_REG (SImode, 6);
977
978 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
979 force_into (XEXP (operands[0], 0), r4);
980 force_into (XEXP (operands[1], 0), r5);
981
982 /* r6 controls the size of the move. 16 is decremented from it
983 for each 64 bytes moved. Then the negative bit left over is used
984 as an index into a list of move instructions. e.g., a 72 byte move
985 would be set up with size(r6) = 14, for one iteration through the
986 big while loop, and a switch of -2 for the last part. */
987
988 final_switch = 16 - ((bytes / 4) % 16);
989 while_loop = ((bytes / 4) / 16 - 1) * 16;
990 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
991 emit_insn (gen_block_lump_real (func_addr_rtx));
992 return 1;
993 }
994
995 return 0;
996 }
997
998 /* Prepare operands for a move define_expand; specifically, one of the
999 operands must be in a register. */
1000
1001 int
1002 prepare_move_operands (rtx operands[], enum machine_mode mode)
1003 {
1004 if ((mode == SImode || mode == DImode)
1005 && flag_pic
1006 && ! ((mode == Pmode || mode == ptr_mode)
1007 && tls_symbolic_operand (operands[1], Pmode) != 0))
1008 {
1009 rtx temp;
1010 if (SYMBOLIC_CONST_P (operands[1]))
1011 {
1012 if (GET_CODE (operands[0]) == MEM)
1013 operands[1] = force_reg (Pmode, operands[1]);
1014 else if (TARGET_SHMEDIA
1015 && GET_CODE (operands[1]) == LABEL_REF
1016 && target_reg_operand (operands[0], mode))
1017 /* It's ok. */;
1018 else
1019 {
1020 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1021 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1022 }
1023 }
1024 else if (GET_CODE (operands[1]) == CONST
1025 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1026 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1027 {
1028 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1029 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1030 mode, temp);
1031 operands[1] = expand_binop (mode, add_optab, temp,
1032 XEXP (XEXP (operands[1], 0), 1),
1033 no_new_pseudos ? temp
1034 : gen_reg_rtx (Pmode),
1035 0, OPTAB_LIB_WIDEN);
1036 }
1037 }
1038
1039 if (! reload_in_progress && ! reload_completed)
1040 {
1041 /* Copy the source to a register if both operands aren't registers. */
1042 if (! register_operand (operands[0], mode)
1043 && ! sh_register_operand (operands[1], mode))
1044 operands[1] = copy_to_mode_reg (mode, operands[1]);
1045
1046 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
1047 {
1048 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1049 except that we can't use that function because it is static. */
1050 rtx new = change_address (operands[0], mode, 0);
1051 MEM_COPY_ATTRIBUTES (new, operands[0]);
1052 operands[0] = new;
1053 }
1054
1055 /* This case can happen while generating code to move the result
1056 of a library call to the target. Reject `st r0,@(rX,rY)' because
1057 reload will fail to find a spill register for rX, since r0 is already
1058 being used for the source. */
1059 else if (TARGET_SH1
1060 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1061 && GET_CODE (operands[0]) == MEM
1062 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1063 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
1064 operands[1] = copy_to_mode_reg (mode, operands[1]);
1065 }
1066
1067 if (mode == Pmode || mode == ptr_mode)
1068 {
1069 rtx op0, op1;
1070 enum tls_model tls_kind;
1071
1072 op0 = operands[0];
1073 op1 = operands[1];
1074 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1075 {
1076 rtx tga_op1, tga_ret, tmp, tmp2;
1077
1078 switch (tls_kind)
1079 {
1080 case TLS_MODEL_GLOBAL_DYNAMIC:
1081 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1082 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1083 op1 = tga_ret;
1084 break;
1085
1086 case TLS_MODEL_LOCAL_DYNAMIC:
1087 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1088 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1089
1090 tmp = gen_reg_rtx (Pmode);
1091 emit_move_insn (tmp, tga_ret);
1092
1093 if (register_operand (op0, Pmode))
1094 tmp2 = op0;
1095 else
1096 tmp2 = gen_reg_rtx (Pmode);
1097
1098 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1099 op1 = tmp2;
1100 break;
1101
1102 case TLS_MODEL_INITIAL_EXEC:
1103 if (! flag_pic)
1104 {
1105 /* Don't schedule insns for getting GOT address when
1106 the first scheduling is enabled, to avoid spill
1107 failures for R0. */
1108 if (flag_schedule_insns)
1109 emit_insn (gen_blockage ());
1110 emit_insn (gen_GOTaddr2picreg ());
1111 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode,
1112 PIC_REG)));
1113 if (flag_schedule_insns)
1114 emit_insn (gen_blockage ());
1115 }
1116 tga_op1 = no_new_pseudos ? op0 : gen_reg_rtx (Pmode);
1117 tmp = gen_sym2GOTTPOFF (op1);
1118 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1119 op1 = tga_op1;
1120 break;
1121
1122 case TLS_MODEL_LOCAL_EXEC:
1123 tmp2 = gen_reg_rtx (Pmode);
1124 emit_insn (gen_load_gbr (tmp2));
1125 tmp = gen_reg_rtx (Pmode);
1126 emit_insn (gen_symTPOFF2reg (tmp, op1));
1127
1128 if (register_operand (op0, Pmode))
1129 op1 = op0;
1130 else
1131 op1 = gen_reg_rtx (Pmode);
1132
1133 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1134 break;
1135
1136 default:
1137 gcc_unreachable ();
1138 }
1139 operands[1] = op1;
1140 }
1141 }
1142
1143 return 0;
1144 }
1145
1146 /* Prepare the operands for an scc instruction; make sure that the
1147 compare has been done. */
1148 rtx
1149 prepare_scc_operands (enum rtx_code code)
1150 {
1151 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1152 enum rtx_code oldcode = code;
1153 enum machine_mode mode;
1154
1155 /* First need a compare insn. */
1156 switch (code)
1157 {
1158 case NE:
1159 /* It isn't possible to handle this case. */
1160 gcc_unreachable ();
1161 case LT:
1162 code = GT;
1163 break;
1164 case LE:
1165 code = GE;
1166 break;
1167 case LTU:
1168 code = GTU;
1169 break;
1170 case LEU:
1171 code = GEU;
1172 break;
1173 default:
1174 break;
1175 }
1176 if (code != oldcode)
1177 {
1178 rtx tmp = sh_compare_op0;
1179 sh_compare_op0 = sh_compare_op1;
1180 sh_compare_op1 = tmp;
1181 }
1182
1183 mode = GET_MODE (sh_compare_op0);
1184 if (mode == VOIDmode)
1185 mode = GET_MODE (sh_compare_op1);
1186
1187 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1188 if ((code != EQ && code != NE
1189 && (sh_compare_op1 != const0_rtx
1190 || code == GTU || code == GEU || code == LTU || code == LEU))
1191 || (mode == DImode && sh_compare_op1 != const0_rtx)
1192 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1193 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1194
1195 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1196 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1197 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1198 gen_rtx_SET (VOIDmode, t_reg,
1199 gen_rtx_fmt_ee (code, SImode,
1200 sh_compare_op0, sh_compare_op1)),
1201 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1202 else
1203 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1204 gen_rtx_fmt_ee (code, SImode,
1205 sh_compare_op0, sh_compare_op1)));
1206
1207 return t_reg;
1208 }
1209
1210 /* Called from the md file, set up the operands of a compare instruction. */
1211
1212 void
1213 from_compare (rtx *operands, int code)
1214 {
1215 enum machine_mode mode = GET_MODE (sh_compare_op0);
1216 rtx insn;
1217 if (mode == VOIDmode)
1218 mode = GET_MODE (sh_compare_op1);
1219 if (code != EQ
1220 || mode == DImode
1221 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1222 {
1223 /* Force args into regs, since we can't use constants here. */
1224 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1225 if (sh_compare_op1 != const0_rtx
1226 || code == GTU || code == GEU
1227 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1228 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1229 }
1230 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1231 {
1232 from_compare (operands, GT);
1233 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1234 }
1235 else
1236 insn = gen_rtx_SET (VOIDmode,
1237 gen_rtx_REG (SImode, T_REG),
1238 gen_rtx_fmt_ee (code, SImode,
1239 sh_compare_op0, sh_compare_op1));
1240 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1241 {
1242 insn = gen_rtx_PARALLEL (VOIDmode,
1243 gen_rtvec (2, insn,
1244 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1245 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1246 }
1247 else
1248 emit_insn (insn);
1249 }
1250 \f
1251 /* Functions to output assembly code. */
1252
1253 /* Return a sequence of instructions to perform DI or DF move.
1254
1255 Since the SH cannot move a DI or DF in one instruction, we have
1256 to take care when we see overlapping source and dest registers. */
1257
1258 const char *
1259 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1260 enum machine_mode mode)
1261 {
1262 rtx dst = operands[0];
1263 rtx src = operands[1];
1264
1265 if (GET_CODE (dst) == MEM
1266 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1267 return "mov.l %T1,%0\n\tmov.l %1,%0";
1268
1269 if (register_operand (dst, mode)
1270 && register_operand (src, mode))
1271 {
1272 if (REGNO (src) == MACH_REG)
1273 return "sts mach,%S0\n\tsts macl,%R0";
1274
1275 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1276 when mov.d r1,r0 do r1->r0 then r2->r1. */
1277
1278 if (REGNO (src) + 1 == REGNO (dst))
1279 return "mov %T1,%T0\n\tmov %1,%0";
1280 else
1281 return "mov %1,%0\n\tmov %T1,%T0";
1282 }
1283 else if (GET_CODE (src) == CONST_INT)
1284 {
1285 if (INTVAL (src) < 0)
1286 output_asm_insn ("mov #-1,%S0", operands);
1287 else
1288 output_asm_insn ("mov #0,%S0", operands);
1289
1290 return "mov %1,%R0";
1291 }
1292 else if (GET_CODE (src) == MEM)
1293 {
1294 int ptrreg = -1;
1295 int dreg = REGNO (dst);
1296 rtx inside = XEXP (src, 0);
1297
1298 switch (GET_CODE (inside))
1299 {
1300 case REG:
1301 ptrreg = REGNO (inside);
1302 break;
1303
1304 case SUBREG:
1305 ptrreg = subreg_regno (inside);
1306 break;
1307
1308 case PLUS:
1309 ptrreg = REGNO (XEXP (inside, 0));
1310 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1311 an offsettable address. Unfortunately, offsettable addresses use
1312 QImode to check the offset, and a QImode offsettable address
1313 requires r0 for the other operand, which is not currently
1314 supported, so we can't use the 'o' constraint.
1315 Thus we must check for and handle r0+REG addresses here.
1316 We punt for now, since this is likely very rare. */
1317 gcc_assert (GET_CODE (XEXP (inside, 1)) != REG);
1318 break;
1319
1320 case LABEL_REF:
1321 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1322 case POST_INC:
1323 return "mov.l %1,%0\n\tmov.l %1,%T0";
1324 default:
1325 gcc_unreachable ();
1326 }
1327
1328 /* Work out the safe way to copy. Copy into the second half first. */
1329 if (dreg == ptrreg)
1330 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1331 }
1332
1333 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1334 }
1335
1336 /* Print an instruction which would have gone into a delay slot after
1337 another instruction, but couldn't because the other instruction expanded
1338 into a sequence where putting the slot insn at the end wouldn't work. */
1339
1340 static void
1341 print_slot (rtx insn)
1342 {
1343 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
1344
1345 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1346 }
1347
1348 const char *
1349 output_far_jump (rtx insn, rtx op)
1350 {
1351 struct { rtx lab, reg, op; } this;
1352 rtx braf_base_lab = NULL_RTX;
1353 const char *jump;
1354 int far;
1355 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1356 rtx prev;
1357
1358 this.lab = gen_label_rtx ();
1359
1360 if (TARGET_SH2
1361 && offset >= -32764
1362 && offset - get_attr_length (insn) <= 32766)
1363 {
1364 far = 0;
1365 jump = "mov.w %O0,%1; braf %1";
1366 }
1367 else
1368 {
1369 far = 1;
1370 if (flag_pic)
1371 {
1372 if (TARGET_SH2)
1373 jump = "mov.l %O0,%1; braf %1";
1374 else
1375 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1376 }
1377 else
1378 jump = "mov.l %O0,%1; jmp @%1";
1379 }
1380 /* If we have a scratch register available, use it. */
1381 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1382 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1383 {
1384 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1385 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1386 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1387 output_asm_insn (jump, &this.lab);
1388 if (dbr_sequence_length ())
1389 print_slot (final_sequence);
1390 else
1391 output_asm_insn ("nop", 0);
1392 }
1393 else
1394 {
1395 /* Output the delay slot insn first if any. */
1396 if (dbr_sequence_length ())
1397 print_slot (final_sequence);
1398
1399 this.reg = gen_rtx_REG (SImode, 13);
1400 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1401 Fortunately, MACL is fixed and call-clobbered, and we never
1402 need its value across jumps, so save r13 in it instead of in
1403 the stack. */
1404 if (TARGET_SH5)
1405 output_asm_insn ("lds r13, macl", 0);
1406 else
1407 output_asm_insn ("mov.l r13,@-r15", 0);
1408 output_asm_insn (jump, &this.lab);
1409 if (TARGET_SH5)
1410 output_asm_insn ("sts macl, r13", 0);
1411 else
1412 output_asm_insn ("mov.l @r15+,r13", 0);
1413 }
1414 if (far && flag_pic && TARGET_SH2)
1415 {
1416 braf_base_lab = gen_label_rtx ();
1417 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1418 CODE_LABEL_NUMBER (braf_base_lab));
1419 }
1420 if (far)
1421 output_asm_insn (".align 2", 0);
1422 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1423 this.op = op;
1424 if (far && flag_pic)
1425 {
1426 if (TARGET_SH2)
1427 this.lab = braf_base_lab;
1428 output_asm_insn (".long %O2-%O0", &this.lab);
1429 }
1430 else
1431 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1432 return "";
1433 }
1434
1435 /* Local label counter, used for constants in the pool and inside
1436 pattern branches. */
1437
1438 static int lf = 100;
1439
1440 /* Output code for ordinary branches. */
1441
1442 const char *
1443 output_branch (int logic, rtx insn, rtx *operands)
1444 {
1445 switch (get_attr_length (insn))
1446 {
1447 case 6:
1448 /* This can happen if filling the delay slot has caused a forward
1449 branch to exceed its range (we could reverse it, but only
1450 when we know we won't overextend other branches; this should
1451 best be handled by relaxation).
1452 It can also happen when other condbranches hoist delay slot insn
1453 from their destination, thus leading to code size increase.
1454 But the branch will still be in the range -4092..+4098 bytes. */
1455
1456 if (! TARGET_RELAX)
1457 {
1458 int label = lf++;
1459 /* The call to print_slot will clobber the operands. */
1460 rtx op0 = operands[0];
1461
1462 /* If the instruction in the delay slot is annulled (true), then
1463 there is no delay slot where we can put it now. The only safe
1464 place for it is after the label. final will do that by default. */
1465
1466 if (final_sequence
1467 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1468 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1469 {
1470 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1471 ASSEMBLER_DIALECT ? "/" : ".", label);
1472 print_slot (final_sequence);
1473 }
1474 else
1475 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1476
1477 output_asm_insn ("bra\t%l0", &op0);
1478 fprintf (asm_out_file, "\tnop\n");
1479 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1480
1481 return "";
1482 }
1483 /* When relaxing, handle this like a short branch. The linker
1484 will fix it up if it still doesn't fit after relaxation. */
1485 case 2:
1486 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1487
1488 /* These are for SH2e, in which we have to account for the
1489 extra nop because of the hardware bug in annulled branches. */
1490 case 8:
1491 if (! TARGET_RELAX)
1492 {
1493 int label = lf++;
1494
1495 gcc_assert (!final_sequence
1496 || !(INSN_ANNULLED_BRANCH_P
1497 (XVECEXP (final_sequence, 0, 0))));
1498 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1499 logic ? "f" : "t",
1500 ASSEMBLER_DIALECT ? "/" : ".", label);
1501 fprintf (asm_out_file, "\tnop\n");
1502 output_asm_insn ("bra\t%l0", operands);
1503 fprintf (asm_out_file, "\tnop\n");
1504 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1505
1506 return "";
1507 }
1508 /* When relaxing, fall through. */
1509 case 4:
1510 {
1511 char buffer[10];
1512
1513 sprintf (buffer, "b%s%ss\t%%l0",
1514 logic ? "t" : "f",
1515 ASSEMBLER_DIALECT ? "/" : ".");
1516 output_asm_insn (buffer, &operands[0]);
1517 return "nop";
1518 }
1519
1520 default:
1521 /* There should be no longer branches now - that would
1522 indicate that something has destroyed the branches set
1523 up in machine_dependent_reorg. */
1524 gcc_unreachable ();
1525 }
1526 }
1527
1528 const char *
1529 output_branchy_insn (enum rtx_code code, const char *template,
1530 rtx insn, rtx *operands)
1531 {
1532 rtx next_insn = NEXT_INSN (insn);
1533
1534 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1535 {
1536 rtx src = SET_SRC (PATTERN (next_insn));
1537 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1538 {
1539 /* Following branch not taken */
1540 operands[9] = gen_label_rtx ();
1541 emit_label_after (operands[9], next_insn);
1542 INSN_ADDRESSES_NEW (operands[9],
1543 INSN_ADDRESSES (INSN_UID (next_insn))
1544 + get_attr_length (next_insn));
1545 return template;
1546 }
1547 else
1548 {
1549 int offset = (branch_dest (next_insn)
1550 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1551 if (offset >= -252 && offset <= 258)
1552 {
1553 if (GET_CODE (src) == IF_THEN_ELSE)
1554 /* branch_true */
1555 src = XEXP (src, 1);
1556 operands[9] = src;
1557 return template;
1558 }
1559 }
1560 }
1561 operands[9] = gen_label_rtx ();
1562 emit_label_after (operands[9], insn);
1563 INSN_ADDRESSES_NEW (operands[9],
1564 INSN_ADDRESSES (INSN_UID (insn))
1565 + get_attr_length (insn));
1566 return template;
1567 }
1568
1569 const char *
1570 output_ieee_ccmpeq (rtx insn, rtx *operands)
1571 {
1572 return output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
1573 }
1574 \f
1575 /* Output the start of the assembler file. */
1576
1577 static void
1578 sh_file_start (void)
1579 {
1580 default_file_start ();
1581
1582 #ifdef SYMBIAN
1583 /* Declare the .directive section before it is used. */
1584 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
1585 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
1586 #endif
1587
1588 if (TARGET_ELF)
1589 /* We need to show the text section with the proper
1590 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1591 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
1592 will complain. We can teach GAS specifically about the
1593 default attributes for our choice of text section, but
1594 then we would have to change GAS again if/when we change
1595 the text section name. */
1596 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
1597 else
1598 /* Switch to the data section so that the coffsem symbol
1599 isn't in the text section. */
1600 data_section ();
1601
1602 if (TARGET_LITTLE_ENDIAN)
1603 fputs ("\t.little\n", asm_out_file);
1604
1605 if (!TARGET_ELF)
1606 {
1607 if (TARGET_SHCOMPACT)
1608 fputs ("\t.mode\tSHcompact\n", asm_out_file);
1609 else if (TARGET_SHMEDIA)
1610 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1611 TARGET_SHMEDIA64 ? 64 : 32);
1612 }
1613 }
1614 \f
1615 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1616
1617 static bool
1618 unspec_caller_rtx_p (rtx pat)
1619 {
1620 switch (GET_CODE (pat))
1621 {
1622 case CONST:
1623 return unspec_caller_rtx_p (XEXP (pat, 0));
1624 case PLUS:
1625 case MINUS:
1626 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1627 return true;
1628 return unspec_caller_rtx_p (XEXP (pat, 1));
1629 case UNSPEC:
1630 if (XINT (pat, 1) == UNSPEC_CALLER)
1631 return true;
1632 default:
1633 break;
1634 }
1635
1636 return false;
1637 }
1638
1639 /* Indicate that INSN cannot be duplicated. This is true for insn
1640 that generates an unique label. */
1641
1642 static bool
1643 sh_cannot_copy_insn_p (rtx insn)
1644 {
1645 rtx pat;
1646
1647 if (!reload_completed || !flag_pic)
1648 return false;
1649
1650 if (GET_CODE (insn) != INSN)
1651 return false;
1652 if (asm_noperands (insn) >= 0)
1653 return false;
1654
1655 pat = PATTERN (insn);
1656 if (GET_CODE (pat) != SET)
1657 return false;
1658 pat = SET_SRC (pat);
1659
1660 if (unspec_caller_rtx_p (pat))
1661 return true;
1662
1663 return false;
1664 }
1665 \f
1666 /* Actual number of instructions used to make a shift by N. */
1667 static const char ashiftrt_insns[] =
1668 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1669
1670 /* Left shift and logical right shift are the same. */
1671 static const char shift_insns[] =
1672 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1673
1674 /* Individual shift amounts needed to get the above length sequences.
1675 One bit right shifts clobber the T bit, so when possible, put one bit
1676 shifts in the middle of the sequence, so the ends are eligible for
1677 branch delay slots. */
1678 static const short shift_amounts[32][5] = {
1679 {0}, {1}, {2}, {2, 1},
1680 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1681 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1682 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1683 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1684 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1685 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1686 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1687
1688 /* Likewise, but for shift amounts < 16, up to three highmost bits
1689 might be clobbered. This is typically used when combined with some
1690 kind of sign or zero extension. */
1691
1692 static const char ext_shift_insns[] =
1693 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1694
1695 static const short ext_shift_amounts[32][4] = {
1696 {0}, {1}, {2}, {2, 1},
1697 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1698 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1699 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1700 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1701 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1702 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1703 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1704
1705 /* Assuming we have a value that has been sign-extended by at least one bit,
1706 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1707 to shift it by N without data loss, and quicker than by other means? */
1708 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1709
1710 /* This is used in length attributes in sh.md to help compute the length
1711 of arbitrary constant shift instructions. */
1712
1713 int
1714 shift_insns_rtx (rtx insn)
1715 {
1716 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1717 int shift_count = INTVAL (XEXP (set_src, 1));
1718 enum rtx_code shift_code = GET_CODE (set_src);
1719
1720 switch (shift_code)
1721 {
1722 case ASHIFTRT:
1723 return ashiftrt_insns[shift_count];
1724 case LSHIFTRT:
1725 case ASHIFT:
1726 return shift_insns[shift_count];
1727 default:
1728 gcc_unreachable ();
1729 }
1730 }
1731
1732 /* Return the cost of a shift. */
1733
1734 static inline int
1735 shiftcosts (rtx x)
1736 {
1737 int value;
1738
1739 if (TARGET_SHMEDIA)
1740 return 1;
1741
1742 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1743 {
1744 if (GET_MODE (x) == DImode
1745 && GET_CODE (XEXP (x, 1)) == CONST_INT
1746 && INTVAL (XEXP (x, 1)) == 1)
1747 return 2;
1748
1749 /* Everything else is invalid, because there is no pattern for it. */
1750 return 10000;
1751 }
1752 /* If shift by a non constant, then this will be expensive. */
1753 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1754 return SH_DYNAMIC_SHIFT_COST;
1755
1756 value = INTVAL (XEXP (x, 1));
1757
1758 /* Otherwise, return the true cost in instructions. */
1759 if (GET_CODE (x) == ASHIFTRT)
1760 {
1761 int cost = ashiftrt_insns[value];
1762 /* If SH3, then we put the constant in a reg and use shad. */
1763 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1764 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1765 return cost;
1766 }
1767 else
1768 return shift_insns[value];
1769 }
1770
1771 /* Return the cost of an AND operation. */
1772
1773 static inline int
1774 andcosts (rtx x)
1775 {
1776 int i;
1777
1778 /* Anding with a register is a single cycle and instruction. */
1779 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1780 return 1;
1781
1782 i = INTVAL (XEXP (x, 1));
1783
1784 if (TARGET_SHMEDIA)
1785 {
1786 if ((GET_CODE (XEXP (x, 1)) == CONST_INT
1787 && CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1788 || EXTRA_CONSTRAINT_C16 (XEXP (x, 1)))
1789 return 1;
1790 else
1791 return 2;
1792 }
1793
1794 /* These constants are single cycle extu.[bw] instructions. */
1795 if (i == 0xff || i == 0xffff)
1796 return 1;
1797 /* Constants that can be used in an and immediate instruction in a single
1798 cycle, but this requires r0, so make it a little more expensive. */
1799 if (CONST_OK_FOR_K08 (i))
1800 return 2;
1801 /* Constants that can be loaded with a mov immediate and an and.
1802 This case is probably unnecessary. */
1803 if (CONST_OK_FOR_I08 (i))
1804 return 2;
1805 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1806 This case is probably unnecessary. */
1807 return 3;
1808 }
1809
1810 /* Return the cost of an addition or a subtraction. */
1811
1812 static inline int
1813 addsubcosts (rtx x)
1814 {
1815 /* Adding a register is a single cycle insn. */
1816 if (GET_CODE (XEXP (x, 1)) == REG
1817 || GET_CODE (XEXP (x, 1)) == SUBREG)
1818 return 1;
1819
1820 /* Likewise for small constants. */
1821 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1822 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1823 return 1;
1824
1825 if (TARGET_SHMEDIA)
1826 switch (GET_CODE (XEXP (x, 1)))
1827 {
1828 case CONST:
1829 case LABEL_REF:
1830 case SYMBOL_REF:
1831 return TARGET_SHMEDIA64 ? 5 : 3;
1832
1833 case CONST_INT:
1834 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1835 return 2;
1836 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
1837 return 3;
1838 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
1839 return 4;
1840
1841 /* Fall through. */
1842 default:
1843 return 5;
1844 }
1845
1846 /* Any other constant requires a 2 cycle pc-relative load plus an
1847 addition. */
1848 return 3;
1849 }
1850
1851 /* Return the cost of a multiply. */
1852 static inline int
1853 multcosts (rtx x ATTRIBUTE_UNUSED)
1854 {
1855 if (*sh_multcost_str)
1856 return atoi (sh_multcost_str);
1857 if (TARGET_SHMEDIA)
1858 /* ??? We have a mul insn, but it has a latency of three, and doesn't
1859 accept constants. Ideally, we would use a cost of one or two and
1860 add the cost of the operand, but disregard the latter when inside loops
1861 and loop invariant code motion is still to follow.
1862 Using a multiply first and splitting it later if it's a loss
1863 doesn't work because of different sign / zero extension semantics
1864 of multiplies vs. shifts. */
1865 return TARGET_SMALLCODE ? 2 : 3;
1866
1867 if (TARGET_SH2)
1868 {
1869 /* We have a mul insn, so we can never take more than the mul and the
1870 read of the mac reg, but count more because of the latency and extra
1871 reg usage. */
1872 if (TARGET_SMALLCODE)
1873 return 2;
1874 return 3;
1875 }
1876
1877 /* If we're aiming at small code, then just count the number of
1878 insns in a multiply call sequence. */
1879 if (TARGET_SMALLCODE)
1880 return 5;
1881
1882 /* Otherwise count all the insns in the routine we'd be calling too. */
1883 return 20;
1884 }
1885
1886 /* Compute a (partial) cost for rtx X. Return true if the complete
1887 cost has been computed, and false if subexpressions should be
1888 scanned. In either case, *TOTAL contains the cost result. */
1889
1890 static bool
1891 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
1892 {
1893 switch (code)
1894 {
1895 case CONST_INT:
1896 if (TARGET_SHMEDIA)
1897 {
1898 if (INTVAL (x) == 0)
1899 *total = 0;
1900 else if (outer_code == AND && and_operand ((x), DImode))
1901 *total = 0;
1902 else if ((outer_code == IOR || outer_code == XOR
1903 || outer_code == PLUS)
1904 && CONST_OK_FOR_I10 (INTVAL (x)))
1905 *total = 0;
1906 else if (CONST_OK_FOR_I16 (INTVAL (x)))
1907 *total = COSTS_N_INSNS (outer_code != SET);
1908 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
1909 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
1910 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
1911 *total = COSTS_N_INSNS (3);
1912 else
1913 *total = COSTS_N_INSNS (4);
1914 return true;
1915 }
1916 if (CONST_OK_FOR_I08 (INTVAL (x)))
1917 *total = 0;
1918 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
1919 && CONST_OK_FOR_K08 (INTVAL (x)))
1920 *total = 1;
1921 else
1922 *total = 8;
1923 return true;
1924
1925 case CONST:
1926 case LABEL_REF:
1927 case SYMBOL_REF:
1928 if (TARGET_SHMEDIA64)
1929 *total = COSTS_N_INSNS (4);
1930 else if (TARGET_SHMEDIA32)
1931 *total = COSTS_N_INSNS (2);
1932 else
1933 *total = 5;
1934 return true;
1935
1936 case CONST_DOUBLE:
1937 if (TARGET_SHMEDIA)
1938 *total = COSTS_N_INSNS (4);
1939 else
1940 *total = 10;
1941 return true;
1942 case CONST_VECTOR:
1943 if (x == CONST0_RTX (GET_MODE (x)))
1944 *total = 0;
1945 else if (sh_1el_vec (x, VOIDmode))
1946 *total = outer_code != SET;
1947 if (sh_rep_vec (x, VOIDmode))
1948 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
1949 + (outer_code != SET));
1950 *total = COSTS_N_INSNS (3) + (outer_code != SET);
1951 return true;
1952
1953 case PLUS:
1954 case MINUS:
1955 *total = COSTS_N_INSNS (addsubcosts (x));
1956 return true;
1957
1958 case AND:
1959 *total = COSTS_N_INSNS (andcosts (x));
1960 return true;
1961
1962 case MULT:
1963 *total = COSTS_N_INSNS (multcosts (x));
1964 return true;
1965
1966 case ASHIFT:
1967 case ASHIFTRT:
1968 case LSHIFTRT:
1969 *total = COSTS_N_INSNS (shiftcosts (x));
1970 return true;
1971
1972 case DIV:
1973 case UDIV:
1974 case MOD:
1975 case UMOD:
1976 *total = COSTS_N_INSNS (20);
1977 return true;
1978
1979 case PARALLEL:
1980 if (sh_1el_vec (x, VOIDmode))
1981 *total = outer_code != SET;
1982 if (sh_rep_vec (x, VOIDmode))
1983 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
1984 + (outer_code != SET));
1985 *total = COSTS_N_INSNS (3) + (outer_code != SET);
1986 return true;
1987
1988 case FLOAT:
1989 case FIX:
1990 *total = 100;
1991 return true;
1992
1993 default:
1994 return false;
1995 }
1996 }
1997
1998 /* Compute the cost of an address. For the SH, all valid addresses are
1999 the same cost. Use a slightly higher cost for reg + reg addressing,
2000 since it increases pressure on r0. */
2001
2002 static int
2003 sh_address_cost (rtx X)
2004 {
2005 return (GET_CODE (X) == PLUS
2006 && ! CONSTANT_P (XEXP (X, 1))
2007 && ! TARGET_SHMEDIA ? 1 : 0);
2008 }
2009
2010 /* Code to expand a shift. */
2011
2012 void
2013 gen_ashift (int type, int n, rtx reg)
2014 {
2015 /* Negative values here come from the shift_amounts array. */
2016 if (n < 0)
2017 {
2018 if (type == ASHIFT)
2019 type = LSHIFTRT;
2020 else
2021 type = ASHIFT;
2022 n = -n;
2023 }
2024
2025 switch (type)
2026 {
2027 case ASHIFTRT:
2028 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
2029 break;
2030 case LSHIFTRT:
2031 if (n == 1)
2032 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
2033 else
2034 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
2035 break;
2036 case ASHIFT:
2037 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
2038 break;
2039 }
2040 }
2041
2042 /* Same for HImode */
2043
2044 void
2045 gen_ashift_hi (int type, int n, rtx reg)
2046 {
2047 /* Negative values here come from the shift_amounts array. */
2048 if (n < 0)
2049 {
2050 if (type == ASHIFT)
2051 type = LSHIFTRT;
2052 else
2053 type = ASHIFT;
2054 n = -n;
2055 }
2056
2057 switch (type)
2058 {
2059 case ASHIFTRT:
2060 case LSHIFTRT:
2061 /* We don't have HImode right shift operations because using the
2062 ordinary 32 bit shift instructions for that doesn't generate proper
2063 zero/sign extension.
2064 gen_ashift_hi is only called in contexts where we know that the
2065 sign extension works out correctly. */
2066 {
2067 int offset = 0;
2068 if (GET_CODE (reg) == SUBREG)
2069 {
2070 offset = SUBREG_BYTE (reg);
2071 reg = SUBREG_REG (reg);
2072 }
2073 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
2074 break;
2075 }
2076 case ASHIFT:
2077 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
2078 break;
2079 }
2080 }
2081
2082 /* Output RTL to split a constant shift into its component SH constant
2083 shift instructions. */
2084
2085 void
2086 gen_shifty_op (int code, rtx *operands)
2087 {
2088 int value = INTVAL (operands[2]);
2089 int max, i;
2090
2091 /* Truncate the shift count in case it is out of bounds. */
2092 value = value & 0x1f;
2093
2094 if (value == 31)
2095 {
2096 if (code == LSHIFTRT)
2097 {
2098 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
2099 emit_insn (gen_movt (operands[0]));
2100 return;
2101 }
2102 else if (code == ASHIFT)
2103 {
2104 /* There is a two instruction sequence for 31 bit left shifts,
2105 but it requires r0. */
2106 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
2107 {
2108 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
2109 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2110 return;
2111 }
2112 }
2113 }
2114 else if (value == 0)
2115 {
2116 /* This can happen even when optimizing, if there were subregs before
2117 reload. Don't output a nop here, as this is never optimized away;
2118 use a no-op move instead. */
2119 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
2120 return;
2121 }
2122
2123 max = shift_insns[value];
2124 for (i = 0; i < max; i++)
2125 gen_ashift (code, shift_amounts[value][i], operands[0]);
2126 }
2127
2128 /* Same as above, but optimized for values where the topmost bits don't
2129 matter. */
2130
2131 void
2132 gen_shifty_hi_op (int code, rtx *operands)
2133 {
2134 int value = INTVAL (operands[2]);
2135 int max, i;
2136 void (*gen_fun) (int, int, rtx);
2137
2138 /* This operation is used by and_shl for SImode values with a few
2139 high bits known to be cleared. */
2140 value &= 31;
2141 if (value == 0)
2142 {
2143 emit_insn (gen_nop ());
2144 return;
2145 }
2146
2147 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2148 if (code == ASHIFT)
2149 {
2150 max = ext_shift_insns[value];
2151 for (i = 0; i < max; i++)
2152 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2153 }
2154 else
2155 /* When shifting right, emit the shifts in reverse order, so that
2156 solitary negative values come first. */
2157 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2158 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2159 }
2160
2161 /* Output RTL for an arithmetic right shift. */
2162
2163 /* ??? Rewrite to use super-optimizer sequences. */
2164
2165 int
2166 expand_ashiftrt (rtx *operands)
2167 {
2168 rtx wrk;
2169 char func[18];
2170 int value;
2171
2172 if (TARGET_SH3)
2173 {
2174 if (GET_CODE (operands[2]) != CONST_INT)
2175 {
2176 rtx count = copy_to_mode_reg (SImode, operands[2]);
2177 emit_insn (gen_negsi2 (count, count));
2178 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2179 return 1;
2180 }
2181 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2182 > 1 + SH_DYNAMIC_SHIFT_COST)
2183 {
2184 rtx count
2185 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2186 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2187 return 1;
2188 }
2189 }
2190 if (GET_CODE (operands[2]) != CONST_INT)
2191 return 0;
2192
2193 value = INTVAL (operands[2]) & 31;
2194
2195 if (value == 31)
2196 {
2197 /* If we are called from abs expansion, arrange things so that we
2198 we can use a single MT instruction that doesn't clobber the source,
2199 if LICM can hoist out the load of the constant zero. */
2200 if (currently_expanding_to_rtl)
2201 {
2202 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
2203 operands[1]));
2204 emit_insn (gen_mov_neg_si_t (operands[0]));
2205 return 1;
2206 }
2207 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2208 return 1;
2209 }
2210 else if (value >= 16 && value <= 19)
2211 {
2212 wrk = gen_reg_rtx (SImode);
2213 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2214 value -= 16;
2215 while (value--)
2216 gen_ashift (ASHIFTRT, 1, wrk);
2217 emit_move_insn (operands[0], wrk);
2218 return 1;
2219 }
2220 /* Expand a short sequence inline, longer call a magic routine. */
2221 else if (value <= 5)
2222 {
2223 wrk = gen_reg_rtx (SImode);
2224 emit_move_insn (wrk, operands[1]);
2225 while (value--)
2226 gen_ashift (ASHIFTRT, 1, wrk);
2227 emit_move_insn (operands[0], wrk);
2228 return 1;
2229 }
2230
2231 wrk = gen_reg_rtx (Pmode);
2232
2233 /* Load the value into an arg reg and call a helper. */
2234 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2235 sprintf (func, "__ashiftrt_r4_%d", value);
2236 function_symbol (wrk, func, SFUNC_STATIC);
2237 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2238 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2239 return 1;
2240 }
2241
2242 int
2243 sh_dynamicalize_shift_p (rtx count)
2244 {
2245 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2246 }
2247
2248 /* Try to find a good way to implement the combiner pattern
2249 [(set (match_operand:SI 0 "register_operand" "r")
2250 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2251 (match_operand:SI 2 "const_int_operand" "n"))
2252 (match_operand:SI 3 "const_int_operand" "n"))) .
2253 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2254 return 0 for simple right / left or left/right shift combination.
2255 return 1 for a combination of shifts with zero_extend.
2256 return 2 for a combination of shifts with an AND that needs r0.
2257 return 3 for a combination of shifts with an AND that needs an extra
2258 scratch register, when the three highmost bits of the AND mask are clear.
2259 return 4 for a combination of shifts with an AND that needs an extra
2260 scratch register, when any of the three highmost bits of the AND mask
2261 is set.
2262 If ATTRP is set, store an initial right shift width in ATTRP[0],
2263 and the instruction length in ATTRP[1] . These values are not valid
2264 when returning 0.
2265 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2266 shift_amounts for the last shift value that is to be used before the
2267 sign extend. */
2268 int
2269 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2270 {
2271 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2272 int left = INTVAL (left_rtx), right;
2273 int best = 0;
2274 int cost, best_cost = 10000;
2275 int best_right = 0, best_len = 0;
2276 int i;
2277 int can_ext;
2278
2279 if (left < 0 || left > 31)
2280 return 0;
2281 if (GET_CODE (mask_rtx) == CONST_INT)
2282 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2283 else
2284 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2285 /* Can this be expressed as a right shift / left shift pair? */
2286 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2287 right = exact_log2 (lsb);
2288 mask2 = ~(mask + lsb - 1);
2289 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2290 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2291 if (! mask2)
2292 best_cost = shift_insns[right] + shift_insns[right + left];
2293 /* mask has no trailing zeroes <==> ! right */
2294 else if (! right && mask2 == ~(lsb2 - 1))
2295 {
2296 int late_right = exact_log2 (lsb2);
2297 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2298 }
2299 /* Try to use zero extend. */
2300 if (mask2 == ~(lsb2 - 1))
2301 {
2302 int width, first;
2303
2304 for (width = 8; width <= 16; width += 8)
2305 {
2306 /* Can we zero-extend right away? */
2307 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2308 {
2309 cost
2310 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2311 if (cost < best_cost)
2312 {
2313 best = 1;
2314 best_cost = cost;
2315 best_right = right;
2316 best_len = cost;
2317 if (attrp)
2318 attrp[2] = -1;
2319 }
2320 continue;
2321 }
2322 /* ??? Could try to put zero extend into initial right shift,
2323 or even shift a bit left before the right shift. */
2324 /* Determine value of first part of left shift, to get to the
2325 zero extend cut-off point. */
2326 first = width - exact_log2 (lsb2) + right;
2327 if (first >= 0 && right + left - first >= 0)
2328 {
2329 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2330 + ext_shift_insns[right + left - first];
2331 if (cost < best_cost)
2332 {
2333 best = 1;
2334 best_cost = cost;
2335 best_right = right;
2336 best_len = cost;
2337 if (attrp)
2338 attrp[2] = first;
2339 }
2340 }
2341 }
2342 }
2343 /* Try to use r0 AND pattern */
2344 for (i = 0; i <= 2; i++)
2345 {
2346 if (i > right)
2347 break;
2348 if (! CONST_OK_FOR_K08 (mask >> i))
2349 continue;
2350 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2351 if (cost < best_cost)
2352 {
2353 best = 2;
2354 best_cost = cost;
2355 best_right = i;
2356 best_len = cost - 1;
2357 }
2358 }
2359 /* Try to use a scratch register to hold the AND operand. */
2360 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2361 for (i = 0; i <= 2; i++)
2362 {
2363 if (i > right)
2364 break;
2365 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2366 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2367 if (cost < best_cost)
2368 {
2369 best = 4 - can_ext;
2370 best_cost = cost;
2371 best_right = i;
2372 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2373 }
2374 }
2375
2376 if (attrp)
2377 {
2378 attrp[0] = best_right;
2379 attrp[1] = best_len;
2380 }
2381 return best;
2382 }
2383
2384 /* This is used in length attributes of the unnamed instructions
2385 corresponding to shl_and_kind return values of 1 and 2. */
2386 int
2387 shl_and_length (rtx insn)
2388 {
2389 rtx set_src, left_rtx, mask_rtx;
2390 int attributes[3];
2391
2392 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2393 left_rtx = XEXP (XEXP (set_src, 0), 1);
2394 mask_rtx = XEXP (set_src, 1);
2395 shl_and_kind (left_rtx, mask_rtx, attributes);
2396 return attributes[1];
2397 }
2398
2399 /* This is used in length attribute of the and_shl_scratch instruction. */
2400
2401 int
2402 shl_and_scr_length (rtx insn)
2403 {
2404 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2405 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2406 rtx op = XEXP (set_src, 0);
2407 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2408 op = XEXP (XEXP (op, 0), 0);
2409 return len + shift_insns[INTVAL (XEXP (op, 1))];
2410 }
2411
2412 /* Generate rtl for instructions for which shl_and_kind advised a particular
2413 method of generating them, i.e. returned zero. */
2414
2415 int
2416 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2417 {
2418 int attributes[3];
2419 unsigned HOST_WIDE_INT mask;
2420 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2421 int right, total_shift;
2422 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2423
2424 right = attributes[0];
2425 total_shift = INTVAL (left_rtx) + right;
2426 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2427 switch (kind)
2428 {
2429 default:
2430 return -1;
2431 case 1:
2432 {
2433 int first = attributes[2];
2434 rtx operands[3];
2435
2436 if (first < 0)
2437 {
2438 emit_insn ((mask << right) <= 0xff
2439 ? gen_zero_extendqisi2 (dest,
2440 gen_lowpart (QImode, source))
2441 : gen_zero_extendhisi2 (dest,
2442 gen_lowpart (HImode, source)));
2443 source = dest;
2444 }
2445 if (source != dest)
2446 emit_insn (gen_movsi (dest, source));
2447 operands[0] = dest;
2448 if (right)
2449 {
2450 operands[2] = GEN_INT (right);
2451 gen_shifty_hi_op (LSHIFTRT, operands);
2452 }
2453 if (first > 0)
2454 {
2455 operands[2] = GEN_INT (first);
2456 gen_shifty_hi_op (ASHIFT, operands);
2457 total_shift -= first;
2458 mask <<= first;
2459 }
2460 if (first >= 0)
2461 emit_insn (mask <= 0xff
2462 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2463 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2464 if (total_shift > 0)
2465 {
2466 operands[2] = GEN_INT (total_shift);
2467 gen_shifty_hi_op (ASHIFT, operands);
2468 }
2469 break;
2470 }
2471 case 4:
2472 shift_gen_fun = gen_shifty_op;
2473 case 3:
2474 /* If the topmost bit that matters is set, set the topmost bits
2475 that don't matter. This way, we might be able to get a shorter
2476 signed constant. */
2477 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2478 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2479 case 2:
2480 /* Don't expand fine-grained when combining, because that will
2481 make the pattern fail. */
2482 if (currently_expanding_to_rtl
2483 || reload_in_progress || reload_completed)
2484 {
2485 rtx operands[3];
2486
2487 /* Cases 3 and 4 should be handled by this split
2488 only while combining */
2489 gcc_assert (kind <= 2);
2490 if (right)
2491 {
2492 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2493 source = dest;
2494 }
2495 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2496 if (total_shift)
2497 {
2498 operands[0] = dest;
2499 operands[1] = dest;
2500 operands[2] = GEN_INT (total_shift);
2501 shift_gen_fun (ASHIFT, operands);
2502 }
2503 break;
2504 }
2505 else
2506 {
2507 int neg = 0;
2508 if (kind != 4 && total_shift < 16)
2509 {
2510 neg = -ext_shift_amounts[total_shift][1];
2511 if (neg > 0)
2512 neg -= ext_shift_amounts[total_shift][2];
2513 else
2514 neg = 0;
2515 }
2516 emit_insn (gen_and_shl_scratch (dest, source,
2517 GEN_INT (right),
2518 GEN_INT (mask),
2519 GEN_INT (total_shift + neg),
2520 GEN_INT (neg)));
2521 emit_insn (gen_movsi (dest, dest));
2522 break;
2523 }
2524 }
2525 return 0;
2526 }
2527
2528 /* Try to find a good way to implement the combiner pattern
2529 [(set (match_operand:SI 0 "register_operand" "=r")
2530 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2531 (match_operand:SI 2 "const_int_operand" "n")
2532 (match_operand:SI 3 "const_int_operand" "n")
2533 (const_int 0)))
2534 (clobber (reg:SI T_REG))]
2535 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2536 return 0 for simple left / right shift combination.
2537 return 1 for left shift / 8 bit sign extend / left shift.
2538 return 2 for left shift / 16 bit sign extend / left shift.
2539 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2540 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2541 return 5 for left shift / 16 bit sign extend / right shift
2542 return 6 for < 8 bit sign extend / left shift.
2543 return 7 for < 8 bit sign extend / left shift / single right shift.
2544 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2545
2546 int
2547 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
2548 {
2549 int left, size, insize, ext;
2550 int cost = 0, best_cost;
2551 int kind;
2552
2553 left = INTVAL (left_rtx);
2554 size = INTVAL (size_rtx);
2555 insize = size - left;
2556 gcc_assert (insize > 0);
2557 /* Default to left / right shift. */
2558 kind = 0;
2559 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2560 if (size <= 16)
2561 {
2562 /* 16 bit shift / sign extend / 16 bit shift */
2563 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2564 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2565 below, by alternative 3 or something even better. */
2566 if (cost < best_cost)
2567 {
2568 kind = 5;
2569 best_cost = cost;
2570 }
2571 }
2572 /* Try a plain sign extend between two shifts. */
2573 for (ext = 16; ext >= insize; ext -= 8)
2574 {
2575 if (ext <= size)
2576 {
2577 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2578 if (cost < best_cost)
2579 {
2580 kind = ext / (unsigned) 8;
2581 best_cost = cost;
2582 }
2583 }
2584 /* Check if we can do a sloppy shift with a final signed shift
2585 restoring the sign. */
2586 if (EXT_SHIFT_SIGNED (size - ext))
2587 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2588 /* If not, maybe it's still cheaper to do the second shift sloppy,
2589 and do a final sign extend? */
2590 else if (size <= 16)
2591 cost = ext_shift_insns[ext - insize] + 1
2592 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2593 else
2594 continue;
2595 if (cost < best_cost)
2596 {
2597 kind = ext / (unsigned) 8 + 2;
2598 best_cost = cost;
2599 }
2600 }
2601 /* Check if we can sign extend in r0 */
2602 if (insize < 8)
2603 {
2604 cost = 3 + shift_insns[left];
2605 if (cost < best_cost)
2606 {
2607 kind = 6;
2608 best_cost = cost;
2609 }
2610 /* Try the same with a final signed shift. */
2611 if (left < 31)
2612 {
2613 cost = 3 + ext_shift_insns[left + 1] + 1;
2614 if (cost < best_cost)
2615 {
2616 kind = 7;
2617 best_cost = cost;
2618 }
2619 }
2620 }
2621 if (TARGET_SH3)
2622 {
2623 /* Try to use a dynamic shift. */
2624 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2625 if (cost < best_cost)
2626 {
2627 kind = 0;
2628 best_cost = cost;
2629 }
2630 }
2631 if (costp)
2632 *costp = cost;
2633 return kind;
2634 }
2635
2636 /* Function to be used in the length attribute of the instructions
2637 implementing this pattern. */
2638
2639 int
2640 shl_sext_length (rtx insn)
2641 {
2642 rtx set_src, left_rtx, size_rtx;
2643 int cost;
2644
2645 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2646 left_rtx = XEXP (XEXP (set_src, 0), 1);
2647 size_rtx = XEXP (set_src, 1);
2648 shl_sext_kind (left_rtx, size_rtx, &cost);
2649 return cost;
2650 }
2651
2652 /* Generate rtl for this pattern */
2653
2654 int
2655 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
2656 {
2657 int kind;
2658 int left, size, insize, cost;
2659 rtx operands[3];
2660
2661 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2662 left = INTVAL (left_rtx);
2663 size = INTVAL (size_rtx);
2664 insize = size - left;
2665 switch (kind)
2666 {
2667 case 1:
2668 case 2:
2669 case 3:
2670 case 4:
2671 {
2672 int ext = kind & 1 ? 8 : 16;
2673 int shift2 = size - ext;
2674
2675 /* Don't expand fine-grained when combining, because that will
2676 make the pattern fail. */
2677 if (! currently_expanding_to_rtl
2678 && ! reload_in_progress && ! reload_completed)
2679 {
2680 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2681 emit_insn (gen_movsi (dest, source));
2682 break;
2683 }
2684 if (dest != source)
2685 emit_insn (gen_movsi (dest, source));
2686 operands[0] = dest;
2687 if (ext - insize)
2688 {
2689 operands[2] = GEN_INT (ext - insize);
2690 gen_shifty_hi_op (ASHIFT, operands);
2691 }
2692 emit_insn (kind & 1
2693 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2694 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2695 if (kind <= 2)
2696 {
2697 if (shift2)
2698 {
2699 operands[2] = GEN_INT (shift2);
2700 gen_shifty_op (ASHIFT, operands);
2701 }
2702 }
2703 else
2704 {
2705 if (shift2 > 0)
2706 {
2707 if (EXT_SHIFT_SIGNED (shift2))
2708 {
2709 operands[2] = GEN_INT (shift2 + 1);
2710 gen_shifty_op (ASHIFT, operands);
2711 operands[2] = const1_rtx;
2712 gen_shifty_op (ASHIFTRT, operands);
2713 break;
2714 }
2715 operands[2] = GEN_INT (shift2);
2716 gen_shifty_hi_op (ASHIFT, operands);
2717 }
2718 else if (shift2)
2719 {
2720 operands[2] = GEN_INT (-shift2);
2721 gen_shifty_hi_op (LSHIFTRT, operands);
2722 }
2723 emit_insn (size <= 8
2724 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2725 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2726 }
2727 break;
2728 }
2729 case 5:
2730 {
2731 int i = 16 - size;
2732 if (! currently_expanding_to_rtl
2733 && ! reload_in_progress && ! reload_completed)
2734 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2735 else
2736 {
2737 operands[0] = dest;
2738 operands[2] = GEN_INT (16 - insize);
2739 gen_shifty_hi_op (ASHIFT, operands);
2740 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2741 }
2742 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2743 while (--i >= 0)
2744 gen_ashift (ASHIFTRT, 1, dest);
2745 break;
2746 }
2747 case 6:
2748 case 7:
2749 /* Don't expand fine-grained when combining, because that will
2750 make the pattern fail. */
2751 if (! currently_expanding_to_rtl
2752 && ! reload_in_progress && ! reload_completed)
2753 {
2754 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2755 emit_insn (gen_movsi (dest, source));
2756 break;
2757 }
2758 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2759 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2760 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2761 operands[0] = dest;
2762 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2763 gen_shifty_op (ASHIFT, operands);
2764 if (kind == 7)
2765 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
2766 break;
2767 default:
2768 return -1;
2769 }
2770 return 0;
2771 }
2772
2773 /* Prefix a symbol_ref name with "datalabel". */
2774
2775 rtx
2776 gen_datalabel_ref (rtx sym)
2777 {
2778 const char *str;
2779
2780 if (GET_CODE (sym) == LABEL_REF)
2781 return gen_rtx_CONST (GET_MODE (sym),
2782 gen_rtx_UNSPEC (GET_MODE (sym),
2783 gen_rtvec (1, sym),
2784 UNSPEC_DATALABEL));
2785
2786 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
2787
2788 str = XSTR (sym, 0);
2789 /* Share all SYMBOL_REF strings with the same value - that is important
2790 for cse. */
2791 str = IDENTIFIER_POINTER (get_identifier (str));
2792 XSTR (sym, 0) = str;
2793
2794 return sym;
2795 }
2796
2797 \f
2798 /* The SH cannot load a large constant into a register, constants have to
2799 come from a pc relative load. The reference of a pc relative load
2800 instruction must be less than 1k infront of the instruction. This
2801 means that we often have to dump a constant inside a function, and
2802 generate code to branch around it.
2803
2804 It is important to minimize this, since the branches will slow things
2805 down and make things bigger.
2806
2807 Worst case code looks like:
2808
2809 mov.l L1,rn
2810 bra L2
2811 nop
2812 align
2813 L1: .long value
2814 L2:
2815 ..
2816
2817 mov.l L3,rn
2818 bra L4
2819 nop
2820 align
2821 L3: .long value
2822 L4:
2823 ..
2824
2825 We fix this by performing a scan before scheduling, which notices which
2826 instructions need to have their operands fetched from the constant table
2827 and builds the table.
2828
2829 The algorithm is:
2830
2831 scan, find an instruction which needs a pcrel move. Look forward, find the
2832 last barrier which is within MAX_COUNT bytes of the requirement.
2833 If there isn't one, make one. Process all the instructions between
2834 the find and the barrier.
2835
2836 In the above example, we can tell that L3 is within 1k of L1, so
2837 the first move can be shrunk from the 3 insn+constant sequence into
2838 just 1 insn, and the constant moved to L3 to make:
2839
2840 mov.l L1,rn
2841 ..
2842 mov.l L3,rn
2843 bra L4
2844 nop
2845 align
2846 L3:.long value
2847 L4:.long value
2848
2849 Then the second move becomes the target for the shortening process. */
2850
2851 typedef struct
2852 {
2853 rtx value; /* Value in table. */
2854 rtx label; /* Label of value. */
2855 rtx wend; /* End of window. */
2856 enum machine_mode mode; /* Mode of value. */
2857
2858 /* True if this constant is accessed as part of a post-increment
2859 sequence. Note that HImode constants are never accessed in this way. */
2860 bool part_of_sequence_p;
2861 } pool_node;
2862
2863 /* The maximum number of constants that can fit into one pool, since
2864 constants in the range 0..510 are at least 2 bytes long, and in the
2865 range from there to 1018 at least 4 bytes. */
2866
2867 #define MAX_POOL_SIZE 372
2868 static pool_node pool_vector[MAX_POOL_SIZE];
2869 static int pool_size;
2870 static rtx pool_window_label;
2871 static int pool_window_last;
2872
2873 /* ??? If we need a constant in HImode which is the truncated value of a
2874 constant we need in SImode, we could combine the two entries thus saving
2875 two bytes. Is this common enough to be worth the effort of implementing
2876 it? */
2877
2878 /* ??? This stuff should be done at the same time that we shorten branches.
2879 As it is now, we must assume that all branches are the maximum size, and
2880 this causes us to almost always output constant pools sooner than
2881 necessary. */
2882
2883 /* Add a constant to the pool and return its label. */
2884
2885 static rtx
2886 add_constant (rtx x, enum machine_mode mode, rtx last_value)
2887 {
2888 int i;
2889 rtx lab, new, ref, newref;
2890
2891 /* First see if we've already got it. */
2892 for (i = 0; i < pool_size; i++)
2893 {
2894 if (x->code == pool_vector[i].value->code
2895 && mode == pool_vector[i].mode)
2896 {
2897 if (x->code == CODE_LABEL)
2898 {
2899 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
2900 continue;
2901 }
2902 if (rtx_equal_p (x, pool_vector[i].value))
2903 {
2904 lab = new = 0;
2905 if (! last_value
2906 || ! i
2907 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
2908 {
2909 new = gen_label_rtx ();
2910 LABEL_REFS (new) = pool_vector[i].label;
2911 pool_vector[i].label = lab = new;
2912 }
2913 if (lab && pool_window_label)
2914 {
2915 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2916 ref = pool_vector[pool_window_last].wend;
2917 LABEL_NEXTREF (newref) = ref;
2918 pool_vector[pool_window_last].wend = newref;
2919 }
2920 if (new)
2921 pool_window_label = new;
2922 pool_window_last = i;
2923 return lab;
2924 }
2925 }
2926 }
2927
2928 /* Need a new one. */
2929 pool_vector[pool_size].value = x;
2930 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
2931 {
2932 lab = 0;
2933 pool_vector[pool_size - 1].part_of_sequence_p = true;
2934 }
2935 else
2936 lab = gen_label_rtx ();
2937 pool_vector[pool_size].mode = mode;
2938 pool_vector[pool_size].label = lab;
2939 pool_vector[pool_size].wend = NULL_RTX;
2940 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
2941 if (lab && pool_window_label)
2942 {
2943 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2944 ref = pool_vector[pool_window_last].wend;
2945 LABEL_NEXTREF (newref) = ref;
2946 pool_vector[pool_window_last].wend = newref;
2947 }
2948 if (lab)
2949 pool_window_label = lab;
2950 pool_window_last = pool_size;
2951 pool_size++;
2952 return lab;
2953 }
2954
2955 /* Output the literal table. START, if nonzero, is the first instruction
2956 this table is needed for, and also indicates that there is at least one
2957 casesi_worker_2 instruction; We have to emit the operand3 labels from
2958 these insns at a 4-byte aligned position. BARRIER is the barrier
2959 after which we are to place the table. */
2960
2961 static void
2962 dump_table (rtx start, rtx barrier)
2963 {
2964 rtx scan = barrier;
2965 int i;
2966 int need_align = 1;
2967 rtx lab, ref;
2968 int have_df = 0;
2969
2970 /* Do two passes, first time dump out the HI sized constants. */
2971
2972 for (i = 0; i < pool_size; i++)
2973 {
2974 pool_node *p = &pool_vector[i];
2975
2976 if (p->mode == HImode)
2977 {
2978 if (need_align)
2979 {
2980 scan = emit_insn_after (gen_align_2 (), scan);
2981 need_align = 0;
2982 }
2983 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2984 scan = emit_label_after (lab, scan);
2985 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
2986 scan);
2987 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2988 {
2989 lab = XEXP (ref, 0);
2990 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2991 }
2992 }
2993 else if (p->mode == DFmode)
2994 have_df = 1;
2995 }
2996
2997 need_align = 1;
2998
2999 if (start)
3000 {
3001 scan = emit_insn_after (gen_align_4 (), scan);
3002 need_align = 0;
3003 for (; start != barrier; start = NEXT_INSN (start))
3004 if (GET_CODE (start) == INSN
3005 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
3006 {
3007 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
3008 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
3009
3010 scan = emit_label_after (lab, scan);
3011 }
3012 }
3013 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
3014 {
3015 rtx align_insn = NULL_RTX;
3016
3017 scan = emit_label_after (gen_label_rtx (), scan);
3018 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3019 need_align = 0;
3020
3021 for (i = 0; i < pool_size; i++)
3022 {
3023 pool_node *p = &pool_vector[i];
3024
3025 switch (p->mode)
3026 {
3027 case HImode:
3028 break;
3029 case SImode:
3030 case SFmode:
3031 if (align_insn && !p->part_of_sequence_p)
3032 {
3033 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3034 emit_label_before (lab, align_insn);
3035 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
3036 align_insn);
3037 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3038 {
3039 lab = XEXP (ref, 0);
3040 emit_insn_before (gen_consttable_window_end (lab),
3041 align_insn);
3042 }
3043 delete_insn (align_insn);
3044 align_insn = NULL_RTX;
3045 continue;
3046 }
3047 else
3048 {
3049 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3050 scan = emit_label_after (lab, scan);
3051 scan = emit_insn_after (gen_consttable_4 (p->value,
3052 const0_rtx), scan);
3053 need_align = ! need_align;
3054 }
3055 break;
3056 case DFmode:
3057 if (need_align)
3058 {
3059 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3060 align_insn = scan;
3061 need_align = 0;
3062 }
3063 case DImode:
3064 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3065 scan = emit_label_after (lab, scan);
3066 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3067 scan);
3068 break;
3069 default:
3070 gcc_unreachable ();
3071 }
3072
3073 if (p->mode != HImode)
3074 {
3075 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3076 {
3077 lab = XEXP (ref, 0);
3078 scan = emit_insn_after (gen_consttable_window_end (lab),
3079 scan);
3080 }
3081 }
3082 }
3083
3084 pool_size = 0;
3085 }
3086
3087 for (i = 0; i < pool_size; i++)
3088 {
3089 pool_node *p = &pool_vector[i];
3090
3091 switch (p->mode)
3092 {
3093 case HImode:
3094 break;
3095 case SImode:
3096 case SFmode:
3097 if (need_align)
3098 {
3099 need_align = 0;
3100 scan = emit_label_after (gen_label_rtx (), scan);
3101 scan = emit_insn_after (gen_align_4 (), scan);
3102 }
3103 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3104 scan = emit_label_after (lab, scan);
3105 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
3106 scan);
3107 break;
3108 case DFmode:
3109 case DImode:
3110 if (need_align)
3111 {
3112 need_align = 0;
3113 scan = emit_label_after (gen_label_rtx (), scan);
3114 scan = emit_insn_after (gen_align_4 (), scan);
3115 }
3116 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3117 scan = emit_label_after (lab, scan);
3118 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3119 scan);
3120 break;
3121 default:
3122 gcc_unreachable ();
3123 }
3124
3125 if (p->mode != HImode)
3126 {
3127 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3128 {
3129 lab = XEXP (ref, 0);
3130 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3131 }
3132 }
3133 }
3134
3135 scan = emit_insn_after (gen_consttable_end (), scan);
3136 scan = emit_barrier_after (scan);
3137 pool_size = 0;
3138 pool_window_label = NULL_RTX;
3139 pool_window_last = 0;
3140 }
3141
3142 /* Return nonzero if constant would be an ok source for a
3143 mov.w instead of a mov.l. */
3144
3145 static int
3146 hi_const (rtx src)
3147 {
3148 return (GET_CODE (src) == CONST_INT
3149 && INTVAL (src) >= -32768
3150 && INTVAL (src) <= 32767);
3151 }
3152
3153 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3154
3155 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3156 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3157 need to fix it if the input value is CONST_OK_FOR_I08. */
3158
3159 static int
3160 broken_move (rtx insn)
3161 {
3162 if (GET_CODE (insn) == INSN)
3163 {
3164 rtx pat = PATTERN (insn);
3165 if (GET_CODE (pat) == PARALLEL)
3166 pat = XVECEXP (pat, 0, 0);
3167 if (GET_CODE (pat) == SET
3168 /* We can load any 8 bit value if we don't care what the high
3169 order bits end up as. */
3170 && GET_MODE (SET_DEST (pat)) != QImode
3171 && (CONSTANT_P (SET_SRC (pat))
3172 /* Match mova_const. */
3173 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3174 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3175 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3176 && ! (TARGET_SH2E
3177 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3178 && (fp_zero_operand (SET_SRC (pat))
3179 || fp_one_operand (SET_SRC (pat)))
3180 /* ??? If this is a -m4 or -m4-single compilation, in general
3181 we don't know the current setting of fpscr, so disable fldi.
3182 There is an exception if this was a register-register move
3183 before reload - and hence it was ascertained that we have
3184 single precision setting - and in a post-reload optimization
3185 we changed this to do a constant load. In that case
3186 we don't have an r0 clobber, hence we must use fldi. */
3187 && (! TARGET_SH4 || TARGET_FMOVD
3188 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3189 == SCRATCH))
3190 && GET_CODE (SET_DEST (pat)) == REG
3191 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3192 && ! (TARGET_SH2A
3193 && GET_MODE (SET_DEST (pat)) == SImode
3194 && GET_CODE (SET_SRC (pat)) == CONST_INT
3195 && CONST_OK_FOR_I20 (INTVAL (SET_SRC (pat))))
3196 && (GET_CODE (SET_SRC (pat)) != CONST_INT
3197 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
3198 return 1;
3199 }
3200
3201 return 0;
3202 }
3203
3204 static int
3205 mova_p (rtx insn)
3206 {
3207 return (GET_CODE (insn) == INSN
3208 && GET_CODE (PATTERN (insn)) == SET
3209 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3210 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3211 /* Don't match mova_const. */
3212 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
3213 }
3214
3215 /* Fix up a mova from a switch that went out of range. */
3216 static void
3217 fixup_mova (rtx mova)
3218 {
3219 if (! flag_pic)
3220 {
3221 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3222 INSN_CODE (mova) = -1;
3223 }
3224 else
3225 {
3226 rtx worker = mova;
3227 rtx lab = gen_label_rtx ();
3228 rtx wpat, wpat0, wpat1, wsrc, diff;
3229
3230 do
3231 {
3232 worker = NEXT_INSN (worker);
3233 gcc_assert (worker
3234 && GET_CODE (worker) != CODE_LABEL
3235 && GET_CODE (worker) != JUMP_INSN);
3236 } while (recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3237 wpat = PATTERN (worker);
3238 wpat0 = XVECEXP (wpat, 0, 0);
3239 wpat1 = XVECEXP (wpat, 0, 1);
3240 wsrc = SET_SRC (wpat0);
3241 PATTERN (worker) = (gen_casesi_worker_2
3242 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3243 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3244 XEXP (wpat1, 0)));
3245 INSN_CODE (worker) = -1;
3246 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3247 gen_rtx_LABEL_REF (Pmode, lab));
3248 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3249 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3250 INSN_CODE (mova) = -1;
3251 }
3252 }
3253
3254 /* Find the last barrier from insn FROM which is close enough to hold the
3255 constant pool. If we can't find one, then create one near the end of
3256 the range. */
3257
3258 static rtx
3259 find_barrier (int num_mova, rtx mova, rtx from)
3260 {
3261 int count_si = 0;
3262 int count_hi = 0;
3263 int found_hi = 0;
3264 int found_si = 0;
3265 int found_di = 0;
3266 int hi_align = 2;
3267 int si_align = 2;
3268 int leading_mova = num_mova;
3269 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3270 int si_limit;
3271 int hi_limit;
3272
3273 /* For HImode: range is 510, add 4 because pc counts from address of
3274 second instruction after this one, subtract 2 for the jump instruction
3275 that we may need to emit before the table, subtract 2 for the instruction
3276 that fills the jump delay slot (in very rare cases, reorg will take an
3277 instruction from after the constant pool or will leave the delay slot
3278 empty). This gives 510.
3279 For SImode: range is 1020, add 4 because pc counts from address of
3280 second instruction after this one, subtract 2 in case pc is 2 byte
3281 aligned, subtract 2 for the jump instruction that we may need to emit
3282 before the table, subtract 2 for the instruction that fills the jump
3283 delay slot. This gives 1018. */
3284
3285 /* The branch will always be shortened now that the reference address for
3286 forward branches is the successor address, thus we need no longer make
3287 adjustments to the [sh]i_limit for -O0. */
3288
3289 si_limit = 1018;
3290 hi_limit = 510;
3291
3292 while (from && count_si < si_limit && count_hi < hi_limit)
3293 {
3294 int inc = get_attr_length (from);
3295 int new_align = 1;
3296
3297 if (GET_CODE (from) == CODE_LABEL)
3298 {
3299 if (optimize)
3300 new_align = 1 << label_to_alignment (from);
3301 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3302 new_align = 1 << barrier_align (from);
3303 else
3304 new_align = 1;
3305 inc = 0;
3306 }
3307
3308 if (GET_CODE (from) == BARRIER)
3309 {
3310
3311 found_barrier = from;
3312
3313 /* If we are at the end of the function, or in front of an alignment
3314 instruction, we need not insert an extra alignment. We prefer
3315 this kind of barrier. */
3316 if (barrier_align (from) > 2)
3317 good_barrier = from;
3318 }
3319
3320 if (broken_move (from))
3321 {
3322 rtx pat, src, dst;
3323 enum machine_mode mode;
3324
3325 pat = PATTERN (from);
3326 if (GET_CODE (pat) == PARALLEL)
3327 pat = XVECEXP (pat, 0, 0);
3328 src = SET_SRC (pat);
3329 dst = SET_DEST (pat);
3330 mode = GET_MODE (dst);
3331
3332 /* We must explicitly check the mode, because sometimes the
3333 front end will generate code to load unsigned constants into
3334 HImode targets without properly sign extending them. */
3335 if (mode == HImode
3336 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3337 {
3338 found_hi += 2;
3339 /* We put the short constants before the long constants, so
3340 we must count the length of short constants in the range
3341 for the long constants. */
3342 /* ??? This isn't optimal, but is easy to do. */
3343 si_limit -= 2;
3344 }
3345 else
3346 {
3347 /* We dump DF/DI constants before SF/SI ones, because
3348 the limit is the same, but the alignment requirements
3349 are higher. We may waste up to 4 additional bytes
3350 for alignment, and the DF/DI constant may have
3351 another SF/SI constant placed before it. */
3352 if (TARGET_SHCOMPACT
3353 && ! found_di
3354 && (mode == DFmode || mode == DImode))
3355 {
3356 found_di = 1;
3357 si_limit -= 8;
3358 }
3359 while (si_align > 2 && found_si + si_align - 2 > count_si)
3360 si_align >>= 1;
3361 if (found_si > count_si)
3362 count_si = found_si;
3363 found_si += GET_MODE_SIZE (mode);
3364 if (num_mova)
3365 si_limit -= GET_MODE_SIZE (mode);
3366 }
3367 }
3368
3369 if (mova_p (from))
3370 {
3371 if (! num_mova++)
3372 {
3373 leading_mova = 0;
3374 mova = from;
3375 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
3376 }
3377 if (found_si > count_si)
3378 count_si = found_si;
3379 }
3380 else if (GET_CODE (from) == JUMP_INSN
3381 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3382 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3383 {
3384 if (num_mova)
3385 num_mova--;
3386 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3387 {
3388 /* We have just passed the barrier in front of the
3389 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3390 the ADDR_DIFF_VEC is accessed as data, just like our pool
3391 constants, this is a good opportunity to accommodate what
3392 we have gathered so far.
3393 If we waited any longer, we could end up at a barrier in
3394 front of code, which gives worse cache usage for separated
3395 instruction / data caches. */
3396 good_barrier = found_barrier;
3397 break;
3398 }
3399 else
3400 {
3401 rtx body = PATTERN (from);
3402 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3403 }
3404 }
3405 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3406 else if (GET_CODE (from) == JUMP_INSN
3407 && ! TARGET_SH2
3408 && ! TARGET_SMALLCODE)
3409 new_align = 4;
3410
3411 if (found_si)
3412 {
3413 count_si += inc;
3414 if (new_align > si_align)
3415 {
3416 si_limit -= (count_si - 1) & (new_align - si_align);
3417 si_align = new_align;
3418 }
3419 count_si = (count_si + new_align - 1) & -new_align;
3420 }
3421 if (found_hi)
3422 {
3423 count_hi += inc;
3424 if (new_align > hi_align)
3425 {
3426 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3427 hi_align = new_align;
3428 }
3429 count_hi = (count_hi + new_align - 1) & -new_align;
3430 }
3431 from = NEXT_INSN (from);
3432 }
3433
3434 if (num_mova)
3435 {
3436 if (leading_mova)
3437 {
3438 /* Try as we might, the leading mova is out of range. Change
3439 it into a load (which will become a pcload) and retry. */
3440 fixup_mova (mova);
3441 return find_barrier (0, 0, mova);
3442 }
3443 else
3444 {
3445 /* Insert the constant pool table before the mova instruction,
3446 to prevent the mova label reference from going out of range. */
3447 from = mova;
3448 good_barrier = found_barrier = barrier_before_mova;
3449 }
3450 }
3451
3452 if (found_barrier)
3453 {
3454 if (good_barrier && next_real_insn (found_barrier))
3455 found_barrier = good_barrier;
3456 }
3457 else
3458 {
3459 /* We didn't find a barrier in time to dump our stuff,
3460 so we'll make one. */
3461 rtx label = gen_label_rtx ();
3462
3463 /* If we exceeded the range, then we must back up over the last
3464 instruction we looked at. Otherwise, we just need to undo the
3465 NEXT_INSN at the end of the loop. */
3466 if (count_hi > hi_limit || count_si > si_limit)
3467 from = PREV_INSN (PREV_INSN (from));
3468 else
3469 from = PREV_INSN (from);
3470
3471 /* Walk back to be just before any jump or label.
3472 Putting it before a label reduces the number of times the branch
3473 around the constant pool table will be hit. Putting it before
3474 a jump makes it more likely that the bra delay slot will be
3475 filled. */
3476 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3477 || GET_CODE (from) == CODE_LABEL)
3478 from = PREV_INSN (from);
3479
3480 from = emit_jump_insn_after (gen_jump (label), from);
3481 JUMP_LABEL (from) = label;
3482 LABEL_NUSES (label) = 1;
3483 found_barrier = emit_barrier_after (from);
3484 emit_label_after (label, found_barrier);
3485 }
3486
3487 return found_barrier;
3488 }
3489
3490 /* If the instruction INSN is implemented by a special function, and we can
3491 positively find the register that is used to call the sfunc, and this
3492 register is not used anywhere else in this instruction - except as the
3493 destination of a set, return this register; else, return 0. */
3494 rtx
3495 sfunc_uses_reg (rtx insn)
3496 {
3497 int i;
3498 rtx pattern, part, reg_part, reg;
3499
3500 if (GET_CODE (insn) != INSN)
3501 return 0;
3502 pattern = PATTERN (insn);
3503 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3504 return 0;
3505
3506 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3507 {
3508 part = XVECEXP (pattern, 0, i);
3509 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3510 reg_part = part;
3511 }
3512 if (! reg_part)
3513 return 0;
3514 reg = XEXP (reg_part, 0);
3515 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3516 {
3517 part = XVECEXP (pattern, 0, i);
3518 if (part == reg_part || GET_CODE (part) == CLOBBER)
3519 continue;
3520 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3521 && GET_CODE (SET_DEST (part)) == REG)
3522 ? SET_SRC (part) : part)))
3523 return 0;
3524 }
3525 return reg;
3526 }
3527
3528 /* See if the only way in which INSN uses REG is by calling it, or by
3529 setting it while calling it. Set *SET to a SET rtx if the register
3530 is set by INSN. */
3531
3532 static int
3533 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
3534 {
3535 rtx pattern, reg2;
3536
3537 *set = NULL_RTX;
3538
3539 reg2 = sfunc_uses_reg (insn);
3540 if (reg2 && REGNO (reg2) == REGNO (reg))
3541 {
3542 pattern = single_set (insn);
3543 if (pattern
3544 && GET_CODE (SET_DEST (pattern)) == REG
3545 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3546 *set = pattern;
3547 return 0;
3548 }
3549 if (GET_CODE (insn) != CALL_INSN)
3550 {
3551 /* We don't use rtx_equal_p because we don't care if the mode is
3552 different. */
3553 pattern = single_set (insn);
3554 if (pattern
3555 && GET_CODE (SET_DEST (pattern)) == REG
3556 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3557 {
3558 rtx par, part;
3559 int i;
3560
3561 *set = pattern;
3562 par = PATTERN (insn);
3563 if (GET_CODE (par) == PARALLEL)
3564 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3565 {
3566 part = XVECEXP (par, 0, i);
3567 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3568 return 1;
3569 }
3570 return reg_mentioned_p (reg, SET_SRC (pattern));
3571 }
3572
3573 return 1;
3574 }
3575
3576 pattern = PATTERN (insn);
3577
3578 if (GET_CODE (pattern) == PARALLEL)
3579 {
3580 int i;
3581
3582 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3583 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
3584 return 1;
3585 pattern = XVECEXP (pattern, 0, 0);
3586 }
3587
3588 if (GET_CODE (pattern) == SET)
3589 {
3590 if (reg_mentioned_p (reg, SET_DEST (pattern)))
3591 {
3592 /* We don't use rtx_equal_p, because we don't care if the
3593 mode is different. */
3594 if (GET_CODE (SET_DEST (pattern)) != REG
3595 || REGNO (reg) != REGNO (SET_DEST (pattern)))
3596 return 1;
3597
3598 *set = pattern;
3599 }
3600
3601 pattern = SET_SRC (pattern);
3602 }
3603
3604 if (GET_CODE (pattern) != CALL
3605 || GET_CODE (XEXP (pattern, 0)) != MEM
3606 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
3607 return 1;
3608
3609 return 0;
3610 }
3611
3612 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3613 general registers. Bits 0..15 mean that the respective registers
3614 are used as inputs in the instruction. Bits 16..31 mean that the
3615 registers 0..15, respectively, are used as outputs, or are clobbered.
3616 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3617 int
3618 regs_used (rtx x, int is_dest)
3619 {
3620 enum rtx_code code;
3621 const char *fmt;
3622 int i, used = 0;
3623
3624 if (! x)
3625 return used;
3626 code = GET_CODE (x);
3627 switch (code)
3628 {
3629 case REG:
3630 if (REGNO (x) < 16)
3631 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3632 << (REGNO (x) + is_dest));
3633 return 0;
3634 case SUBREG:
3635 {
3636 rtx y = SUBREG_REG (x);
3637
3638 if (GET_CODE (y) != REG)
3639 break;
3640 if (REGNO (y) < 16)
3641 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3642 << (REGNO (y) +
3643 subreg_regno_offset (REGNO (y),
3644 GET_MODE (y),
3645 SUBREG_BYTE (x),
3646 GET_MODE (x)) + is_dest));
3647 return 0;
3648 }
3649 case SET:
3650 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3651 case RETURN:
3652 /* If there was a return value, it must have been indicated with USE. */
3653 return 0x00ffff00;
3654 case CLOBBER:
3655 is_dest = 1;
3656 break;
3657 case MEM:
3658 is_dest = 0;
3659 break;
3660 case CALL:
3661 used |= 0x00ff00f0;
3662 break;
3663 default:
3664 break;
3665 }
3666
3667 fmt = GET_RTX_FORMAT (code);
3668
3669 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3670 {
3671 if (fmt[i] == 'E')
3672 {
3673 register int j;
3674 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3675 used |= regs_used (XVECEXP (x, i, j), is_dest);
3676 }
3677 else if (fmt[i] == 'e')
3678 used |= regs_used (XEXP (x, i), is_dest);
3679 }
3680 return used;
3681 }
3682
3683 /* Create an instruction that prevents redirection of a conditional branch
3684 to the destination of the JUMP with address ADDR.
3685 If the branch needs to be implemented as an indirect jump, try to find
3686 a scratch register for it.
3687 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3688 If any preceding insn that doesn't fit into a delay slot is good enough,
3689 pass 1. Pass 2 if a definite blocking insn is needed.
3690 -1 is used internally to avoid deep recursion.
3691 If a blocking instruction is made or recognized, return it. */
3692
3693 static rtx
3694 gen_block_redirect (rtx jump, int addr, int need_block)
3695 {
3696 int dead = 0;
3697 rtx prev = prev_nonnote_insn (jump);
3698 rtx dest;
3699
3700 /* First, check if we already have an instruction that satisfies our need. */
3701 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3702 {
3703 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3704 return prev;
3705 if (GET_CODE (PATTERN (prev)) == USE
3706 || GET_CODE (PATTERN (prev)) == CLOBBER
3707 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3708 prev = jump;
3709 else if ((need_block &= ~1) < 0)
3710 return prev;
3711 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3712 need_block = 0;
3713 }
3714 if (GET_CODE (PATTERN (jump)) == RETURN)
3715 {
3716 if (! need_block)
3717 return prev;
3718 /* Reorg even does nasty things with return insns that cause branches
3719 to go out of range - see find_end_label and callers. */
3720 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
3721 }
3722 /* We can't use JUMP_LABEL here because it might be undefined
3723 when not optimizing. */
3724 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3725 /* If the branch is out of range, try to find a scratch register for it. */
3726 if (optimize
3727 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3728 > 4092 + 4098))
3729 {
3730 rtx scan;
3731 /* Don't look for the stack pointer as a scratch register,
3732 it would cause trouble if an interrupt occurred. */
3733 unsigned try = 0x7fff, used;
3734 int jump_left = flag_expensive_optimizations + 1;
3735
3736 /* It is likely that the most recent eligible instruction is wanted for
3737 the delay slot. Therefore, find out which registers it uses, and
3738 try to avoid using them. */
3739
3740 for (scan = jump; (scan = PREV_INSN (scan)); )
3741 {
3742 enum rtx_code code;
3743
3744 if (INSN_DELETED_P (scan))
3745 continue;
3746 code = GET_CODE (scan);
3747 if (code == CODE_LABEL || code == JUMP_INSN)
3748 break;
3749 if (code == INSN
3750 && GET_CODE (PATTERN (scan)) != USE
3751 && GET_CODE (PATTERN (scan)) != CLOBBER
3752 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
3753 {
3754 try &= ~regs_used (PATTERN (scan), 0);
3755 break;
3756 }
3757 }
3758 for (used = dead = 0, scan = JUMP_LABEL (jump);
3759 (scan = NEXT_INSN (scan)); )
3760 {
3761 enum rtx_code code;
3762
3763 if (INSN_DELETED_P (scan))
3764 continue;
3765 code = GET_CODE (scan);
3766 if (INSN_P (scan))
3767 {
3768 used |= regs_used (PATTERN (scan), 0);
3769 if (code == CALL_INSN)
3770 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
3771 dead |= (used >> 16) & ~used;
3772 if (dead & try)
3773 {
3774 dead &= try;
3775 break;
3776 }
3777 if (code == JUMP_INSN)
3778 {
3779 if (jump_left-- && simplejump_p (scan))
3780 scan = JUMP_LABEL (scan);
3781 else
3782 break;
3783 }
3784 }
3785 }
3786 /* Mask out the stack pointer again, in case it was
3787 the only 'free' register we have found. */
3788 dead &= 0x7fff;
3789 }
3790 /* If the immediate destination is still in range, check for possible
3791 threading with a jump beyond the delay slot insn.
3792 Don't check if we are called recursively; the jump has been or will be
3793 checked in a different invocation then. */
3794
3795 else if (optimize && need_block >= 0)
3796 {
3797 rtx next = next_active_insn (next_active_insn (dest));
3798 if (next && GET_CODE (next) == JUMP_INSN
3799 && GET_CODE (PATTERN (next)) == SET
3800 && recog_memoized (next) == CODE_FOR_jump_compact)
3801 {
3802 dest = JUMP_LABEL (next);
3803 if (dest
3804 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3805 > 4092 + 4098))
3806 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
3807 }
3808 }
3809
3810 if (dead)
3811 {
3812 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
3813
3814 /* It would be nice if we could convert the jump into an indirect
3815 jump / far branch right now, and thus exposing all constituent
3816 instructions to further optimization. However, reorg uses
3817 simplejump_p to determine if there is an unconditional jump where
3818 it should try to schedule instructions from the target of the
3819 branch; simplejump_p fails for indirect jumps even if they have
3820 a JUMP_LABEL. */
3821 rtx insn = emit_insn_before (gen_indirect_jump_scratch
3822 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
3823 , jump);
3824 /* ??? We would like this to have the scope of the jump, but that
3825 scope will change when a delay slot insn of an inner scope is added.
3826 Hence, after delay slot scheduling, we'll have to expect
3827 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
3828 the jump. */
3829
3830 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
3831 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
3832 return insn;
3833 }
3834 else if (need_block)
3835 /* We can't use JUMP_LABEL here because it might be undefined
3836 when not optimizing. */
3837 return emit_insn_before (gen_block_branch_redirect
3838 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
3839 , jump);
3840 return prev;
3841 }
3842
3843 #define CONDJUMP_MIN -252
3844 #define CONDJUMP_MAX 262
3845 struct far_branch
3846 {
3847 /* A label (to be placed) in front of the jump
3848 that jumps to our ultimate destination. */
3849 rtx near_label;
3850 /* Where we are going to insert it if we cannot move the jump any farther,
3851 or the jump itself if we have picked up an existing jump. */
3852 rtx insert_place;
3853 /* The ultimate destination. */
3854 rtx far_label;
3855 struct far_branch *prev;
3856 /* If the branch has already been created, its address;
3857 else the address of its first prospective user. */
3858 int address;
3859 };
3860
3861 static void gen_far_branch (struct far_branch *);
3862 enum mdep_reorg_phase_e mdep_reorg_phase;
3863 static void
3864 gen_far_branch (struct far_branch *bp)
3865 {
3866 rtx insn = bp->insert_place;
3867 rtx jump;
3868 rtx label = gen_label_rtx ();
3869 int ok;
3870
3871 emit_label_after (label, insn);
3872 if (bp->far_label)
3873 {
3874 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
3875 LABEL_NUSES (bp->far_label)++;
3876 }
3877 else
3878 jump = emit_jump_insn_after (gen_return (), insn);
3879 /* Emit a barrier so that reorg knows that any following instructions
3880 are not reachable via a fall-through path.
3881 But don't do this when not optimizing, since we wouldn't suppress the
3882 alignment for the barrier then, and could end up with out-of-range
3883 pc-relative loads. */
3884 if (optimize)
3885 emit_barrier_after (jump);
3886 emit_label_after (bp->near_label, insn);
3887 JUMP_LABEL (jump) = bp->far_label;
3888 ok = invert_jump (insn, label, 1);
3889 gcc_assert (ok);
3890
3891 /* If we are branching around a jump (rather than a return), prevent
3892 reorg from using an insn from the jump target as the delay slot insn -
3893 when reorg did this, it pessimized code (we rather hide the delay slot)
3894 and it could cause branches to go out of range. */
3895 if (bp->far_label)
3896 (emit_insn_after
3897 (gen_stuff_delay_slot
3898 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
3899 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
3900 insn));
3901 /* Prevent reorg from undoing our splits. */
3902 gen_block_redirect (jump, bp->address += 2, 2);
3903 }
3904
3905 /* Fix up ADDR_DIFF_VECs. */
3906 void
3907 fixup_addr_diff_vecs (rtx first)
3908 {
3909 rtx insn;
3910
3911 for (insn = first; insn; insn = NEXT_INSN (insn))
3912 {
3913 rtx vec_lab, pat, prev, prevpat, x, braf_label;
3914
3915 if (GET_CODE (insn) != JUMP_INSN
3916 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
3917 continue;
3918 pat = PATTERN (insn);
3919 vec_lab = XEXP (XEXP (pat, 0), 0);
3920
3921 /* Search the matching casesi_jump_2. */
3922 for (prev = vec_lab; ; prev = PREV_INSN (prev))
3923 {
3924 if (GET_CODE (prev) != JUMP_INSN)
3925 continue;
3926 prevpat = PATTERN (prev);
3927 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
3928 continue;
3929 x = XVECEXP (prevpat, 0, 1);
3930 if (GET_CODE (x) != USE)
3931 continue;
3932 x = XEXP (x, 0);
3933 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
3934 break;
3935 }
3936 /* FIXME: This is a bug in the optimizer, but it seems harmless
3937 to just avoid panicing. */
3938 if (!prev)
3939 continue;
3940
3941 /* Emit the reference label of the braf where it belongs, right after
3942 the casesi_jump_2 (i.e. braf). */
3943 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
3944 emit_label_after (braf_label, prev);
3945
3946 /* Fix up the ADDR_DIF_VEC to be relative
3947 to the reference address of the braf. */
3948 XEXP (XEXP (pat, 0), 0) = braf_label;
3949 }
3950 }
3951
3952 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
3953 a barrier. Return the base 2 logarithm of the desired alignment. */
3954 int
3955 barrier_align (rtx barrier_or_label)
3956 {
3957 rtx next = next_real_insn (barrier_or_label), pat, prev;
3958 int slot, credit, jump_to_next = 0;
3959
3960 if (! next)
3961 return 0;
3962
3963 pat = PATTERN (next);
3964
3965 if (GET_CODE (pat) == ADDR_DIFF_VEC)
3966 return 2;
3967
3968 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
3969 /* This is a barrier in front of a constant table. */
3970 return 0;
3971
3972 prev = prev_real_insn (barrier_or_label);
3973 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
3974 {
3975 pat = PATTERN (prev);
3976 /* If this is a very small table, we want to keep the alignment after
3977 the table to the minimum for proper code alignment. */
3978 return ((TARGET_SMALLCODE
3979 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
3980 <= (unsigned) 1 << (CACHE_LOG - 2)))
3981 ? 1 << TARGET_SHMEDIA : align_jumps_log);
3982 }
3983
3984 if (TARGET_SMALLCODE)
3985 return 0;
3986
3987 if (! TARGET_SH2 || ! optimize)
3988 return align_jumps_log;
3989
3990 /* When fixing up pcloads, a constant table might be inserted just before
3991 the basic block that ends with the barrier. Thus, we can't trust the
3992 instruction lengths before that. */
3993 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
3994 {
3995 /* Check if there is an immediately preceding branch to the insn beyond
3996 the barrier. We must weight the cost of discarding useful information
3997 from the current cache line when executing this branch and there is
3998 an alignment, against that of fetching unneeded insn in front of the
3999 branch target when there is no alignment. */
4000
4001 /* There are two delay_slot cases to consider. One is the simple case
4002 where the preceding branch is to the insn beyond the barrier (simple
4003 delay slot filling), and the other is where the preceding branch has
4004 a delay slot that is a duplicate of the insn after the barrier
4005 (fill_eager_delay_slots) and the branch is to the insn after the insn
4006 after the barrier. */
4007
4008 /* PREV is presumed to be the JUMP_INSN for the barrier under
4009 investigation. Skip to the insn before it. */
4010 prev = prev_real_insn (prev);
4011
4012 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
4013 credit >= 0 && prev && GET_CODE (prev) == INSN;
4014 prev = prev_real_insn (prev))
4015 {
4016 jump_to_next = 0;
4017 if (GET_CODE (PATTERN (prev)) == USE
4018 || GET_CODE (PATTERN (prev)) == CLOBBER)
4019 continue;
4020 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
4021 {
4022 prev = XVECEXP (PATTERN (prev), 0, 1);
4023 if (INSN_UID (prev) == INSN_UID (next))
4024 {
4025 /* Delay slot was filled with insn at jump target. */
4026 jump_to_next = 1;
4027 continue;
4028 }
4029 }
4030
4031 if (slot &&
4032 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4033 slot = 0;
4034 credit -= get_attr_length (prev);
4035 }
4036 if (prev
4037 && GET_CODE (prev) == JUMP_INSN
4038 && JUMP_LABEL (prev))
4039 {
4040 rtx x;
4041 if (jump_to_next
4042 || next_real_insn (JUMP_LABEL (prev)) == next
4043 /* If relax_delay_slots() decides NEXT was redundant
4044 with some previous instruction, it will have
4045 redirected PREV's jump to the following insn. */
4046 || JUMP_LABEL (prev) == next_nonnote_insn (next)
4047 /* There is no upper bound on redundant instructions
4048 that might have been skipped, but we must not put an
4049 alignment where none had been before. */
4050 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
4051 (INSN_P (x)
4052 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
4053 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
4054 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
4055 {
4056 rtx pat = PATTERN (prev);
4057 if (GET_CODE (pat) == PARALLEL)
4058 pat = XVECEXP (pat, 0, 0);
4059 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
4060 return 0;
4061 }
4062 }
4063 }
4064
4065 return align_jumps_log;
4066 }
4067
4068 /* If we are inside a phony loop, almost any kind of label can turn up as the
4069 first one in the loop. Aligning a braf label causes incorrect switch
4070 destination addresses; we can detect braf labels because they are
4071 followed by a BARRIER.
4072 Applying loop alignment to small constant or switch tables is a waste
4073 of space, so we suppress this too. */
4074 int
4075 sh_loop_align (rtx label)
4076 {
4077 rtx next = label;
4078
4079 do
4080 next = next_nonnote_insn (next);
4081 while (next && GET_CODE (next) == CODE_LABEL);
4082
4083 if (! next
4084 || ! INSN_P (next)
4085 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
4086 || recog_memoized (next) == CODE_FOR_consttable_2)
4087 return 0;
4088
4089 return align_loops_log;
4090 }
4091
4092 /* Do a final pass over the function, just before delayed branch
4093 scheduling. */
4094
4095 static void
4096 sh_reorg (void)
4097 {
4098 rtx first, insn, mova = NULL_RTX;
4099 int num_mova;
4100 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
4101 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
4102
4103 first = get_insns ();
4104
4105 /* We must split call insns before introducing `mova's. If we're
4106 optimizing, they'll have already been split. Otherwise, make
4107 sure we don't split them too late. */
4108 if (! optimize)
4109 split_all_insns_noflow ();
4110
4111 if (TARGET_SHMEDIA)
4112 return;
4113
4114 /* If relaxing, generate pseudo-ops to associate function calls with
4115 the symbols they call. It does no harm to not generate these
4116 pseudo-ops. However, when we can generate them, it enables to
4117 linker to potentially relax the jsr to a bsr, and eliminate the
4118 register load and, possibly, the constant pool entry. */
4119
4120 mdep_reorg_phase = SH_INSERT_USES_LABELS;
4121 if (TARGET_RELAX)
4122 {
4123 /* Remove all REG_LABEL notes. We want to use them for our own
4124 purposes. This works because none of the remaining passes
4125 need to look at them.
4126
4127 ??? But it may break in the future. We should use a machine
4128 dependent REG_NOTE, or some other approach entirely. */
4129 for (insn = first; insn; insn = NEXT_INSN (insn))
4130 {
4131 if (INSN_P (insn))
4132 {
4133 rtx note;
4134
4135 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
4136 remove_note (insn, note);
4137 }
4138 }
4139
4140 for (insn = first; insn; insn = NEXT_INSN (insn))
4141 {
4142 rtx pattern, reg, link, set, scan, dies, label;
4143 int rescan = 0, foundinsn = 0;
4144
4145 if (GET_CODE (insn) == CALL_INSN)
4146 {
4147 pattern = PATTERN (insn);
4148
4149 if (GET_CODE (pattern) == PARALLEL)
4150 pattern = XVECEXP (pattern, 0, 0);
4151 if (GET_CODE (pattern) == SET)
4152 pattern = SET_SRC (pattern);
4153
4154 if (GET_CODE (pattern) != CALL
4155 || GET_CODE (XEXP (pattern, 0)) != MEM)
4156 continue;
4157
4158 reg = XEXP (XEXP (pattern, 0), 0);
4159 }
4160 else
4161 {
4162 reg = sfunc_uses_reg (insn);
4163 if (! reg)
4164 continue;
4165 }
4166
4167 if (GET_CODE (reg) != REG)
4168 continue;
4169
4170 /* This is a function call via REG. If the only uses of REG
4171 between the time that it is set and the time that it dies
4172 are in function calls, then we can associate all the
4173 function calls with the setting of REG. */
4174
4175 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
4176 {
4177 if (REG_NOTE_KIND (link) != 0)
4178 continue;
4179 set = single_set (XEXP (link, 0));
4180 if (set && rtx_equal_p (reg, SET_DEST (set)))
4181 {
4182 link = XEXP (link, 0);
4183 break;
4184 }
4185 }
4186
4187 if (! link)
4188 {
4189 /* ??? Sometimes global register allocation will have
4190 deleted the insn pointed to by LOG_LINKS. Try
4191 scanning backward to find where the register is set. */
4192 for (scan = PREV_INSN (insn);
4193 scan && GET_CODE (scan) != CODE_LABEL;
4194 scan = PREV_INSN (scan))
4195 {
4196 if (! INSN_P (scan))
4197 continue;
4198
4199 if (! reg_mentioned_p (reg, scan))
4200 continue;
4201
4202 if (noncall_uses_reg (reg, scan, &set))
4203 break;
4204
4205 if (set)
4206 {
4207 link = scan;
4208 break;
4209 }
4210 }
4211 }
4212
4213 if (! link)
4214 continue;
4215
4216 /* The register is set at LINK. */
4217
4218 /* We can only optimize the function call if the register is
4219 being set to a symbol. In theory, we could sometimes
4220 optimize calls to a constant location, but the assembler
4221 and linker do not support that at present. */
4222 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4223 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4224 continue;
4225
4226 /* Scan forward from LINK to the place where REG dies, and
4227 make sure that the only insns which use REG are
4228 themselves function calls. */
4229
4230 /* ??? This doesn't work for call targets that were allocated
4231 by reload, since there may not be a REG_DEAD note for the
4232 register. */
4233
4234 dies = NULL_RTX;
4235 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4236 {
4237 rtx scanset;
4238
4239 /* Don't try to trace forward past a CODE_LABEL if we haven't
4240 seen INSN yet. Ordinarily, we will only find the setting insn
4241 in LOG_LINKS if it is in the same basic block. However,
4242 cross-jumping can insert code labels in between the load and
4243 the call, and can result in situations where a single call
4244 insn may have two targets depending on where we came from. */
4245
4246 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4247 break;
4248
4249 if (! INSN_P (scan))
4250 continue;
4251
4252 /* Don't try to trace forward past a JUMP. To optimize
4253 safely, we would have to check that all the
4254 instructions at the jump destination did not use REG. */
4255
4256 if (GET_CODE (scan) == JUMP_INSN)
4257 break;
4258
4259 if (! reg_mentioned_p (reg, scan))
4260 continue;
4261
4262 if (noncall_uses_reg (reg, scan, &scanset))
4263 break;
4264
4265 if (scan == insn)
4266 foundinsn = 1;
4267
4268 if (scan != insn
4269 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4270 {
4271 /* There is a function call to this register other
4272 than the one we are checking. If we optimize
4273 this call, we need to rescan again below. */
4274 rescan = 1;
4275 }
4276
4277 /* ??? We shouldn't have to worry about SCANSET here.
4278 We should just be able to check for a REG_DEAD note
4279 on a function call. However, the REG_DEAD notes are
4280 apparently not dependable around libcalls; c-torture
4281 execute/920501-2 is a test case. If SCANSET is set,
4282 then this insn sets the register, so it must have
4283 died earlier. Unfortunately, this will only handle
4284 the cases in which the register is, in fact, set in a
4285 later insn. */
4286
4287 /* ??? We shouldn't have to use FOUNDINSN here.
4288 However, the LOG_LINKS fields are apparently not
4289 entirely reliable around libcalls;
4290 newlib/libm/math/e_pow.c is a test case. Sometimes
4291 an insn will appear in LOG_LINKS even though it is
4292 not the most recent insn which sets the register. */
4293
4294 if (foundinsn
4295 && (scanset
4296 || find_reg_note (scan, REG_DEAD, reg)))
4297 {
4298 dies = scan;
4299 break;
4300 }
4301 }
4302
4303 if (! dies)
4304 {
4305 /* Either there was a branch, or some insn used REG
4306 other than as a function call address. */
4307 continue;
4308 }
4309
4310 /* Create a code label, and put it in a REG_LABEL note on
4311 the insn which sets the register, and on each call insn
4312 which uses the register. In final_prescan_insn we look
4313 for the REG_LABEL notes, and output the appropriate label
4314 or pseudo-op. */
4315
4316 label = gen_label_rtx ();
4317 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
4318 REG_NOTES (link));
4319 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
4320 REG_NOTES (insn));
4321 if (rescan)
4322 {
4323 scan = link;
4324 do
4325 {
4326 rtx reg2;
4327
4328 scan = NEXT_INSN (scan);
4329 if (scan != insn
4330 && ((GET_CODE (scan) == CALL_INSN
4331 && reg_mentioned_p (reg, scan))
4332 || ((reg2 = sfunc_uses_reg (scan))
4333 && REGNO (reg2) == REGNO (reg))))
4334 REG_NOTES (scan)
4335 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
4336 }
4337 while (scan != dies);
4338 }
4339 }
4340 }
4341
4342 if (TARGET_SH2)
4343 fixup_addr_diff_vecs (first);
4344
4345 if (optimize)
4346 {
4347 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4348 shorten_branches (first);
4349 }
4350 /* Scan the function looking for move instructions which have to be
4351 changed to pc-relative loads and insert the literal tables. */
4352
4353 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4354 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4355 {
4356 if (mova_p (insn))
4357 {
4358 /* ??? basic block reordering can move a switch table dispatch
4359 below the switch table. Check if that has happened.
4360 We only have the addresses available when optimizing; but then,
4361 this check shouldn't be needed when not optimizing. */
4362 rtx label_ref = XVECEXP (SET_SRC (PATTERN (insn)), 0, 0);
4363 if (optimize
4364 && (INSN_ADDRESSES (INSN_UID (insn))
4365 > INSN_ADDRESSES (INSN_UID (XEXP (label_ref, 0)))))
4366 {
4367 /* Change the mova into a load.
4368 broken_move will then return true for it. */
4369 fixup_mova (insn);
4370 }
4371 else if (! num_mova++)
4372 mova = insn;
4373 }
4374 else if (GET_CODE (insn) == JUMP_INSN
4375 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4376 && num_mova)
4377 {
4378 rtx scan;
4379 int total;
4380
4381 num_mova--;
4382
4383 /* Some code might have been inserted between the mova and
4384 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4385 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4386 total += get_attr_length (scan);
4387
4388 /* range of mova is 1020, add 4 because pc counts from address of
4389 second instruction after this one, subtract 2 in case pc is 2
4390 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4391 cancels out with alignment effects of the mova itself. */
4392 if (total > 1022)
4393 {
4394 /* Change the mova into a load, and restart scanning
4395 there. broken_move will then return true for mova. */
4396 fixup_mova (mova);
4397 insn = mova;
4398 }
4399 }
4400 if (broken_move (insn)
4401 || (GET_CODE (insn) == INSN
4402 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
4403 {
4404 rtx scan;
4405 /* Scan ahead looking for a barrier to stick the constant table
4406 behind. */
4407 rtx barrier = find_barrier (num_mova, mova, insn);
4408 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4409 int need_aligned_label = 0;
4410
4411 if (num_mova && ! mova_p (mova))
4412 {
4413 /* find_barrier had to change the first mova into a
4414 pcload; thus, we have to start with this new pcload. */
4415 insn = mova;
4416 num_mova = 0;
4417 }
4418 /* Now find all the moves between the points and modify them. */
4419 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4420 {
4421 if (GET_CODE (scan) == CODE_LABEL)
4422 last_float = 0;
4423 if (GET_CODE (scan) == INSN
4424 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
4425 need_aligned_label = 1;
4426 if (broken_move (scan))
4427 {
4428 rtx *patp = &PATTERN (scan), pat = *patp;
4429 rtx src, dst;
4430 rtx lab;
4431 rtx newsrc;
4432 enum machine_mode mode;
4433
4434 if (GET_CODE (pat) == PARALLEL)
4435 patp = &XVECEXP (pat, 0, 0), pat = *patp;
4436 src = SET_SRC (pat);
4437 dst = SET_DEST (pat);
4438 mode = GET_MODE (dst);
4439
4440 if (mode == SImode && hi_const (src)
4441 && REGNO (dst) != FPUL_REG)
4442 {
4443 int offset = 0;
4444
4445 mode = HImode;
4446 while (GET_CODE (dst) == SUBREG)
4447 {
4448 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
4449 GET_MODE (SUBREG_REG (dst)),
4450 SUBREG_BYTE (dst),
4451 GET_MODE (dst));
4452 dst = SUBREG_REG (dst);
4453 }
4454 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
4455 }
4456 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
4457 {
4458 /* This must be an insn that clobbers r0. */
4459 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
4460 XVECLEN (PATTERN (scan), 0)
4461 - 1);
4462 rtx clobber = *clobberp;
4463
4464 gcc_assert (GET_CODE (clobber) == CLOBBER
4465 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
4466
4467 if (last_float
4468 && reg_set_between_p (r0_rtx, last_float_move, scan))
4469 last_float = 0;
4470 if (last_float
4471 && TARGET_SHCOMPACT
4472 && GET_MODE_SIZE (mode) != 4
4473 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
4474 last_float = 0;
4475 lab = add_constant (src, mode, last_float);
4476 if (lab)
4477 emit_insn_before (gen_mova (lab), scan);
4478 else
4479 {
4480 /* There will be a REG_UNUSED note for r0 on
4481 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4482 lest reorg:mark_target_live_regs will not
4483 consider r0 to be used, and we end up with delay
4484 slot insn in front of SCAN that clobbers r0. */
4485 rtx note
4486 = find_regno_note (last_float_move, REG_UNUSED, 0);
4487
4488 /* If we are not optimizing, then there may not be
4489 a note. */
4490 if (note)
4491 PUT_MODE (note, REG_INC);
4492
4493 *last_float_addr = r0_inc_rtx;
4494 }
4495 last_float_move = scan;
4496 last_float = src;
4497 newsrc = gen_rtx_MEM (mode,
4498 (((TARGET_SH4 && ! TARGET_FMOVD)
4499 || REGNO (dst) == FPUL_REG)
4500 ? r0_inc_rtx
4501 : r0_rtx));
4502 last_float_addr = &XEXP (newsrc, 0);
4503
4504 /* Remove the clobber of r0. */
4505 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
4506 gen_rtx_SCRATCH (Pmode));
4507 }
4508 /* This is a mova needing a label. Create it. */
4509 else if (GET_CODE (src) == UNSPEC
4510 && XINT (src, 1) == UNSPEC_MOVA
4511 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
4512 {
4513 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
4514 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4515 newsrc = gen_rtx_UNSPEC (SImode,
4516 gen_rtvec (1, newsrc),
4517 UNSPEC_MOVA);
4518 }
4519 else
4520 {
4521 lab = add_constant (src, mode, 0);
4522 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4523 newsrc = gen_const_mem (mode, newsrc);
4524 }
4525 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
4526 INSN_CODE (scan) = -1;
4527 }
4528 }
4529 dump_table (need_aligned_label ? insn : 0, barrier);
4530 insn = barrier;
4531 }
4532 }
4533
4534 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
4535 INSN_ADDRESSES_FREE ();
4536 split_branches (first);
4537
4538 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4539 also has an effect on the register that holds the address of the sfunc.
4540 Insert an extra dummy insn in front of each sfunc that pretends to
4541 use this register. */
4542 if (flag_delayed_branch)
4543 {
4544 for (insn = first; insn; insn = NEXT_INSN (insn))
4545 {
4546 rtx reg = sfunc_uses_reg (insn);
4547
4548 if (! reg)
4549 continue;
4550 emit_insn_before (gen_use_sfunc_addr (reg), insn);
4551 }
4552 }
4553 #if 0
4554 /* fpscr is not actually a user variable, but we pretend it is for the
4555 sake of the previous optimization passes, since we want it handled like
4556 one. However, we don't have any debugging information for it, so turn
4557 it into a non-user variable now. */
4558 if (TARGET_SH4)
4559 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4560 #endif
4561 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
4562 }
4563
4564 int
4565 get_dest_uid (rtx label, int max_uid)
4566 {
4567 rtx dest = next_real_insn (label);
4568 int dest_uid;
4569 if (! dest)
4570 /* This can happen for an undefined label. */
4571 return 0;
4572 dest_uid = INSN_UID (dest);
4573 /* If this is a newly created branch redirection blocking instruction,
4574 we cannot index the branch_uid or insn_addresses arrays with its
4575 uid. But then, we won't need to, because the actual destination is
4576 the following branch. */
4577 while (dest_uid >= max_uid)
4578 {
4579 dest = NEXT_INSN (dest);
4580 dest_uid = INSN_UID (dest);
4581 }
4582 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
4583 return 0;
4584 return dest_uid;
4585 }
4586
4587 /* Split condbranches that are out of range. Also add clobbers for
4588 scratch registers that are needed in far jumps.
4589 We do this before delay slot scheduling, so that it can take our
4590 newly created instructions into account. It also allows us to
4591 find branches with common targets more easily. */
4592
4593 static void
4594 split_branches (rtx first)
4595 {
4596 rtx insn;
4597 struct far_branch **uid_branch, *far_branch_list = 0;
4598 int max_uid = get_max_uid ();
4599 int ok;
4600
4601 /* Find out which branches are out of range. */
4602 shorten_branches (first);
4603
4604 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
4605 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
4606
4607 for (insn = first; insn; insn = NEXT_INSN (insn))
4608 if (! INSN_P (insn))
4609 continue;
4610 else if (INSN_DELETED_P (insn))
4611 {
4612 /* Shorten_branches would split this instruction again,
4613 so transform it into a note. */
4614 PUT_CODE (insn, NOTE);
4615 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4616 NOTE_SOURCE_FILE (insn) = 0;
4617 }
4618 else if (GET_CODE (insn) == JUMP_INSN
4619 /* Don't mess with ADDR_DIFF_VEC */
4620 && (GET_CODE (PATTERN (insn)) == SET
4621 || GET_CODE (PATTERN (insn)) == RETURN))
4622 {
4623 enum attr_type type = get_attr_type (insn);
4624 if (type == TYPE_CBRANCH)
4625 {
4626 rtx next, beyond;
4627
4628 if (get_attr_length (insn) > 4)
4629 {
4630 rtx src = SET_SRC (PATTERN (insn));
4631 rtx olabel = XEXP (XEXP (src, 1), 0);
4632 int addr = INSN_ADDRESSES (INSN_UID (insn));
4633 rtx label = 0;
4634 int dest_uid = get_dest_uid (olabel, max_uid);
4635 struct far_branch *bp = uid_branch[dest_uid];
4636
4637 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4638 the label if the LABEL_NUSES count drops to zero. There is
4639 always a jump_optimize pass that sets these values, but it
4640 proceeds to delete unreferenced code, and then if not
4641 optimizing, to un-delete the deleted instructions, thus
4642 leaving labels with too low uses counts. */
4643 if (! optimize)
4644 {
4645 JUMP_LABEL (insn) = olabel;
4646 LABEL_NUSES (olabel)++;
4647 }
4648 if (! bp)
4649 {
4650 bp = (struct far_branch *) alloca (sizeof *bp);
4651 uid_branch[dest_uid] = bp;
4652 bp->prev = far_branch_list;
4653 far_branch_list = bp;
4654 bp->far_label
4655 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
4656 LABEL_NUSES (bp->far_label)++;
4657 }
4658 else
4659 {
4660 label = bp->near_label;
4661 if (! label && bp->address - addr >= CONDJUMP_MIN)
4662 {
4663 rtx block = bp->insert_place;
4664
4665 if (GET_CODE (PATTERN (block)) == RETURN)
4666 block = PREV_INSN (block);
4667 else
4668 block = gen_block_redirect (block,
4669 bp->address, 2);
4670 label = emit_label_after (gen_label_rtx (),
4671 PREV_INSN (block));
4672 bp->near_label = label;
4673 }
4674 else if (label && ! NEXT_INSN (label))
4675 {
4676 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4677 bp->insert_place = insn;
4678 else
4679 gen_far_branch (bp);
4680 }
4681 }
4682 if (! label
4683 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4684 {
4685 bp->near_label = label = gen_label_rtx ();
4686 bp->insert_place = insn;
4687 bp->address = addr;
4688 }
4689 ok = redirect_jump (insn, label, 1);
4690 gcc_assert (ok);
4691 }
4692 else
4693 {
4694 /* get_attr_length (insn) == 2 */
4695 /* Check if we have a pattern where reorg wants to redirect
4696 the branch to a label from an unconditional branch that
4697 is too far away. */
4698 /* We can't use JUMP_LABEL here because it might be undefined
4699 when not optimizing. */
4700 /* A syntax error might cause beyond to be NULL_RTX. */
4701 beyond
4702 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4703 0));
4704
4705 if (beyond
4706 && (GET_CODE (beyond) == JUMP_INSN
4707 || ((beyond = next_active_insn (beyond))
4708 && GET_CODE (beyond) == JUMP_INSN))
4709 && GET_CODE (PATTERN (beyond)) == SET
4710 && recog_memoized (beyond) == CODE_FOR_jump_compact
4711 && ((INSN_ADDRESSES
4712 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4713 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4714 > 252 + 258 + 2))
4715 gen_block_redirect (beyond,
4716 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4717 }
4718
4719 next = next_active_insn (insn);
4720
4721 if ((GET_CODE (next) == JUMP_INSN
4722 || ((next = next_active_insn (next))
4723 && GET_CODE (next) == JUMP_INSN))
4724 && GET_CODE (PATTERN (next)) == SET
4725 && recog_memoized (next) == CODE_FOR_jump_compact
4726 && ((INSN_ADDRESSES
4727 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
4728 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4729 > 252 + 258 + 2))
4730 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
4731 }
4732 else if (type == TYPE_JUMP || type == TYPE_RETURN)
4733 {
4734 int addr = INSN_ADDRESSES (INSN_UID (insn));
4735 rtx far_label = 0;
4736 int dest_uid = 0;
4737 struct far_branch *bp;
4738
4739 if (type == TYPE_JUMP)
4740 {
4741 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
4742 dest_uid = get_dest_uid (far_label, max_uid);
4743 if (! dest_uid)
4744 {
4745 /* Parse errors can lead to labels outside
4746 the insn stream. */
4747 if (! NEXT_INSN (far_label))
4748 continue;
4749
4750 if (! optimize)
4751 {
4752 JUMP_LABEL (insn) = far_label;
4753 LABEL_NUSES (far_label)++;
4754 }
4755 redirect_jump (insn, NULL_RTX, 1);
4756 far_label = 0;
4757 }
4758 }
4759 bp = uid_branch[dest_uid];
4760 if (! bp)
4761 {
4762 bp = (struct far_branch *) alloca (sizeof *bp);
4763 uid_branch[dest_uid] = bp;
4764 bp->prev = far_branch_list;
4765 far_branch_list = bp;
4766 bp->near_label = 0;
4767 bp->far_label = far_label;
4768 if (far_label)
4769 LABEL_NUSES (far_label)++;
4770 }
4771 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
4772 if (addr - bp->address <= CONDJUMP_MAX)
4773 emit_label_after (bp->near_label, PREV_INSN (insn));
4774 else
4775 {
4776 gen_far_branch (bp);
4777 bp->near_label = 0;
4778 }
4779 else
4780 bp->near_label = 0;
4781 bp->address = addr;
4782 bp->insert_place = insn;
4783 if (! far_label)
4784 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
4785 else
4786 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
4787 }
4788 }
4789 /* Generate all pending far branches,
4790 and free our references to the far labels. */
4791 while (far_branch_list)
4792 {
4793 if (far_branch_list->near_label
4794 && ! NEXT_INSN (far_branch_list->near_label))
4795 gen_far_branch (far_branch_list);
4796 if (optimize
4797 && far_branch_list->far_label
4798 && ! --LABEL_NUSES (far_branch_list->far_label))
4799 delete_insn (far_branch_list->far_label);
4800 far_branch_list = far_branch_list->prev;
4801 }
4802
4803 /* Instruction length information is no longer valid due to the new
4804 instructions that have been generated. */
4805 init_insn_lengths ();
4806 }
4807
4808 /* Dump out instruction addresses, which is useful for debugging the
4809 constant pool table stuff.
4810
4811 If relaxing, output the label and pseudo-ops used to link together
4812 calls and the instruction which set the registers. */
4813
4814 /* ??? The addresses printed by this routine for insns are nonsense for
4815 insns which are inside of a sequence where none of the inner insns have
4816 variable length. This is because the second pass of shorten_branches
4817 does not bother to update them. */
4818
4819 void
4820 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
4821 int noperands ATTRIBUTE_UNUSED)
4822 {
4823 if (TARGET_DUMPISIZE)
4824 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
4825
4826 if (TARGET_RELAX)
4827 {
4828 rtx note;
4829
4830 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
4831 if (note)
4832 {
4833 rtx pattern;
4834
4835 pattern = PATTERN (insn);
4836 switch (GET_CODE (pattern))
4837 {
4838 case PARALLEL:
4839 pattern = XVECEXP (pattern, 0, 0);
4840 break;
4841
4842 case SET:
4843 if (GET_CODE (SET_SRC (pattern)) != CALL
4844 && get_attr_type (insn) != TYPE_SFUNC)
4845 {
4846 targetm.asm_out.internal_label
4847 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
4848 break;
4849 }
4850 /* else FALLTHROUGH */
4851 case CALL:
4852 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
4853 CODE_LABEL_NUMBER (XEXP (note, 0)));
4854 break;
4855
4856 default:
4857 gcc_unreachable ();
4858 }
4859 }
4860 }
4861 }
4862
4863 /* Dump out any constants accumulated in the final pass. These will
4864 only be labels. */
4865
4866 const char *
4867 output_jump_label_table (void)
4868 {
4869 int i;
4870
4871 if (pool_size)
4872 {
4873 fprintf (asm_out_file, "\t.align 2\n");
4874 for (i = 0; i < pool_size; i++)
4875 {
4876 pool_node *p = &pool_vector[i];
4877
4878 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4879 CODE_LABEL_NUMBER (p->label));
4880 output_asm_insn (".long %O0", &p->value);
4881 }
4882 pool_size = 0;
4883 }
4884
4885 return "";
4886 }
4887 \f
4888 /* A full frame looks like:
4889
4890 arg-5
4891 arg-4
4892 [ if current_function_anonymous_args
4893 arg-3
4894 arg-2
4895 arg-1
4896 arg-0 ]
4897 saved-fp
4898 saved-r10
4899 saved-r11
4900 saved-r12
4901 saved-pr
4902 local-n
4903 ..
4904 local-1
4905 local-0 <- fp points here. */
4906
4907 /* Number of bytes pushed for anonymous args, used to pass information
4908 between expand_prologue and expand_epilogue. */
4909
4910 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
4911 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
4912 for an epilogue and a negative value means that it's for a sibcall
4913 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
4914 all the registers that are about to be restored, and hence dead. */
4915
4916 static void
4917 output_stack_adjust (int size, rtx reg, int epilogue_p,
4918 HARD_REG_SET *live_regs_mask)
4919 {
4920 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
4921 if (size)
4922 {
4923 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4924
4925 /* This test is bogus, as output_stack_adjust is used to re-align the
4926 stack. */
4927 #if 0
4928 gcc_assert (!(size % align));
4929 #endif
4930
4931 if (CONST_OK_FOR_ADD (size))
4932 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
4933 /* Try to do it with two partial adjustments; however, we must make
4934 sure that the stack is properly aligned at all times, in case
4935 an interrupt occurs between the two partial adjustments. */
4936 else if (CONST_OK_FOR_ADD (size / 2 & -align)
4937 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
4938 {
4939 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
4940 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
4941 }
4942 else
4943 {
4944 rtx const_reg;
4945 rtx insn;
4946 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
4947 int i;
4948
4949 /* If TEMP is invalid, we could temporarily save a general
4950 register to MACL. However, there is currently no need
4951 to handle this case, so just die when we see it. */
4952 if (epilogue_p < 0
4953 || current_function_interrupt
4954 || ! call_really_used_regs[temp] || fixed_regs[temp])
4955 temp = -1;
4956 if (temp < 0 && ! current_function_interrupt
4957 && (TARGET_SHMEDIA || epilogue_p >= 0))
4958 {
4959 HARD_REG_SET temps;
4960 COPY_HARD_REG_SET (temps, call_used_reg_set);
4961 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
4962 if (epilogue_p > 0)
4963 {
4964 int nreg = 0;
4965 if (current_function_return_rtx)
4966 {
4967 enum machine_mode mode;
4968 mode = GET_MODE (current_function_return_rtx);
4969 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
4970 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
4971 }
4972 for (i = 0; i < nreg; i++)
4973 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
4974 if (current_function_calls_eh_return)
4975 {
4976 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
4977 for (i = 0; i <= 3; i++)
4978 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
4979 }
4980 }
4981 if (TARGET_SHMEDIA && epilogue_p < 0)
4982 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
4983 CLEAR_HARD_REG_BIT (temps, i);
4984 if (epilogue_p <= 0)
4985 {
4986 for (i = FIRST_PARM_REG;
4987 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
4988 CLEAR_HARD_REG_BIT (temps, i);
4989 if (cfun->static_chain_decl != NULL)
4990 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
4991 }
4992 temp = scavenge_reg (&temps);
4993 }
4994 if (temp < 0 && live_regs_mask)
4995 temp = scavenge_reg (live_regs_mask);
4996 if (temp < 0)
4997 {
4998 rtx adj_reg, tmp_reg, mem;
4999
5000 /* If we reached here, the most likely case is the (sibcall)
5001 epilogue for non SHmedia. Put a special push/pop sequence
5002 for such case as the last resort. This looks lengthy but
5003 would not be problem because it seems to be very
5004 rare. */
5005
5006 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
5007
5008
5009 /* ??? There is still the slight possibility that r4 or
5010 r5 have been reserved as fixed registers or assigned
5011 as global registers, and they change during an
5012 interrupt. There are possible ways to handle this:
5013
5014 - If we are adjusting the frame pointer (r14), we can do
5015 with a single temp register and an ordinary push / pop
5016 on the stack.
5017 - Grab any call-used or call-saved registers (i.e. not
5018 fixed or globals) for the temps we need. We might
5019 also grab r14 if we are adjusting the stack pointer.
5020 If we can't find enough available registers, issue
5021 a diagnostic and die - the user must have reserved
5022 way too many registers.
5023 But since all this is rather unlikely to happen and
5024 would require extra testing, we just die if r4 / r5
5025 are not available. */
5026 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
5027 && !global_regs[4] && !global_regs[5]);
5028
5029 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
5030 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
5031 emit_move_insn (gen_rtx_MEM (Pmode, reg), adj_reg);
5032 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
5033 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
5034 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5035 emit_move_insn (mem, tmp_reg);
5036 emit_move_insn (tmp_reg, gen_rtx_MEM (Pmode, reg));
5037 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5038 emit_move_insn (mem, tmp_reg);
5039 emit_move_insn (reg, adj_reg);
5040 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
5041 emit_move_insn (adj_reg, mem);
5042 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
5043 emit_move_insn (tmp_reg, mem);
5044 return;
5045 }
5046 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
5047
5048 /* If SIZE is negative, subtract the positive value.
5049 This sometimes allows a constant pool entry to be shared
5050 between prologue and epilogue code. */
5051 if (size < 0)
5052 {
5053 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
5054 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
5055 }
5056 else
5057 {
5058 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
5059 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
5060 }
5061 if (! epilogue_p)
5062 REG_NOTES (insn)
5063 = (gen_rtx_EXPR_LIST
5064 (REG_FRAME_RELATED_EXPR,
5065 gen_rtx_SET (VOIDmode, reg,
5066 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
5067 REG_NOTES (insn)));
5068 }
5069 }
5070 }
5071
5072 static rtx
5073 frame_insn (rtx x)
5074 {
5075 x = emit_insn (x);
5076 RTX_FRAME_RELATED_P (x) = 1;
5077 return x;
5078 }
5079
5080 /* Output RTL to push register RN onto the stack. */
5081
5082 static rtx
5083 push (int rn)
5084 {
5085 rtx x;
5086 if (rn == FPUL_REG)
5087 x = gen_push_fpul ();
5088 else if (rn == FPSCR_REG)
5089 x = gen_push_fpscr ();
5090 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5091 && FP_OR_XD_REGISTER_P (rn))
5092 {
5093 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5094 return NULL_RTX;
5095 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
5096 }
5097 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5098 x = gen_push_e (gen_rtx_REG (SFmode, rn));
5099 else
5100 x = gen_push (gen_rtx_REG (SImode, rn));
5101
5102 x = frame_insn (x);
5103 REG_NOTES (x)
5104 = gen_rtx_EXPR_LIST (REG_INC,
5105 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5106 return x;
5107 }
5108
5109 /* Output RTL to pop register RN from the stack. */
5110
5111 static void
5112 pop (int rn)
5113 {
5114 rtx x;
5115 if (rn == FPUL_REG)
5116 x = gen_pop_fpul ();
5117 else if (rn == FPSCR_REG)
5118 x = gen_pop_fpscr ();
5119 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5120 && FP_OR_XD_REGISTER_P (rn))
5121 {
5122 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5123 return;
5124 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
5125 }
5126 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5127 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
5128 else
5129 x = gen_pop (gen_rtx_REG (SImode, rn));
5130
5131 x = emit_insn (x);
5132 REG_NOTES (x)
5133 = gen_rtx_EXPR_LIST (REG_INC,
5134 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5135 }
5136
5137 /* Generate code to push the regs specified in the mask. */
5138
5139 static void
5140 push_regs (HARD_REG_SET *mask, int interrupt_handler)
5141 {
5142 int i;
5143 int skip_fpscr = 0;
5144
5145 /* Push PR last; this gives better latencies after the prologue, and
5146 candidates for the return delay slot when there are no general
5147 registers pushed. */
5148 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5149 {
5150 /* If this is an interrupt handler, and the SZ bit varies,
5151 and we have to push any floating point register, we need
5152 to switch to the correct precision first. */
5153 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
5154 && hard_regs_intersect_p (mask, &reg_class_contents[DF_REGS]))
5155 {
5156 HARD_REG_SET unsaved;
5157
5158 push (FPSCR_REG);
5159 COMPL_HARD_REG_SET (unsaved, *mask);
5160 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
5161 skip_fpscr = 1;
5162 }
5163 if (i != PR_REG
5164 && (i != FPSCR_REG || ! skip_fpscr)
5165 && TEST_HARD_REG_BIT (*mask, i))
5166 push (i);
5167 }
5168 if (TEST_HARD_REG_BIT (*mask, PR_REG))
5169 push (PR_REG);
5170 }
5171
5172 /* Calculate how much extra space is needed to save all callee-saved
5173 target registers.
5174 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5175
5176 static int
5177 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
5178 {
5179 int reg;
5180 int stack_space = 0;
5181 int interrupt_handler = sh_cfun_interrupt_handler_p ();
5182
5183 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5184 if ((! call_really_used_regs[reg] || interrupt_handler)
5185 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5186 /* Leave space to save this target register on the stack,
5187 in case target register allocation wants to use it. */
5188 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5189 return stack_space;
5190 }
5191
5192 /* Decide whether we should reserve space for callee-save target registers,
5193 in case target register allocation wants to use them. REGS_SAVED is
5194 the space, in bytes, that is already required for register saves.
5195 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5196
5197 static int
5198 shmedia_reserve_space_for_target_registers_p (int regs_saved,
5199 HARD_REG_SET *live_regs_mask)
5200 {
5201 if (optimize_size)
5202 return 0;
5203 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
5204 }
5205
5206 /* Decide how much space to reserve for callee-save target registers
5207 in case target register allocation wants to use them.
5208 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5209
5210 static int
5211 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5212 {
5213 if (shmedia_space_reserved_for_target_registers)
5214 return shmedia_target_regs_stack_space (live_regs_mask);
5215 else
5216 return 0;
5217 }
5218
5219 /* Work out the registers which need to be saved, both as a mask and a
5220 count of saved words. Return the count.
5221
5222 If doing a pragma interrupt function, then push all regs used by the
5223 function, and if we call another function (we can tell by looking at PR),
5224 make sure that all the regs it clobbers are safe too. */
5225
5226 static int
5227 calc_live_regs (HARD_REG_SET *live_regs_mask)
5228 {
5229 unsigned int reg;
5230 int count;
5231 int interrupt_handler;
5232 int pr_live, has_call;
5233
5234 interrupt_handler = sh_cfun_interrupt_handler_p ();
5235
5236 CLEAR_HARD_REG_SET (*live_regs_mask);
5237 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
5238 && regs_ever_live[FPSCR_REG])
5239 target_flags &= ~FPU_SINGLE_BIT;
5240 /* If we can save a lot of saves by switching to double mode, do that. */
5241 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
5242 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5243 if (regs_ever_live[reg] && regs_ever_live[reg+1]
5244 && (! call_really_used_regs[reg]
5245 || (interrupt_handler && ! pragma_trapa))
5246 && ++count > 2)
5247 {
5248 target_flags &= ~FPU_SINGLE_BIT;
5249 break;
5250 }
5251 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5252 knows how to use it. That means the pseudo originally allocated for
5253 the initial value can become the PR_MEDIA_REG hard register, as seen for
5254 execute/20010122-1.c:test9. */
5255 if (TARGET_SHMEDIA)
5256 /* ??? this function is called from initial_elimination_offset, hence we
5257 can't use the result of sh_media_register_for_return here. */
5258 pr_live = sh_pr_n_sets ();
5259 else
5260 {
5261 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5262 pr_live = (pr_initial
5263 ? (GET_CODE (pr_initial) != REG
5264 || REGNO (pr_initial) != (PR_REG))
5265 : regs_ever_live[PR_REG]);
5266 /* For Shcompact, if not optimizing, we end up with a memory reference
5267 using the return address pointer for __builtin_return_address even
5268 though there is no actual need to put the PR register on the stack. */
5269 pr_live |= regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM];
5270 }
5271 /* Force PR to be live if the prologue has to call the SHmedia
5272 argument decoder or register saver. */
5273 if (TARGET_SHCOMPACT
5274 && ((current_function_args_info.call_cookie
5275 & ~ CALL_COOKIE_RET_TRAMP (1))
5276 || current_function_has_nonlocal_label))
5277 pr_live = 1;
5278 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5279 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
5280 {
5281 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5282 ? pr_live
5283 : (interrupt_handler && ! pragma_trapa)
5284 ? (/* Need to save all the regs ever live. */
5285 (regs_ever_live[reg]
5286 || (call_really_used_regs[reg]
5287 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
5288 || reg == PIC_OFFSET_TABLE_REGNUM)
5289 && has_call)
5290 || (has_call && REGISTER_NATURAL_MODE (reg) == SImode
5291 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5292 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5293 && reg != RETURN_ADDRESS_POINTER_REGNUM
5294 && reg != T_REG && reg != GBR_REG
5295 /* Push fpscr only on targets which have FPU */
5296 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5297 : (/* Only push those regs which are used and need to be saved. */
5298 (TARGET_SHCOMPACT
5299 && flag_pic
5300 && current_function_args_info.call_cookie
5301 && reg == PIC_OFFSET_TABLE_REGNUM)
5302 || (regs_ever_live[reg] && ! call_really_used_regs[reg])
5303 || (current_function_calls_eh_return
5304 && (reg == EH_RETURN_DATA_REGNO (0)
5305 || reg == EH_RETURN_DATA_REGNO (1)
5306 || reg == EH_RETURN_DATA_REGNO (2)
5307 || reg == EH_RETURN_DATA_REGNO (3)))
5308 || ((reg == MACL_REG || reg == MACH_REG)
5309 && regs_ever_live[reg]
5310 && sh_cfun_attr_renesas_p ())
5311 ))
5312 {
5313 SET_HARD_REG_BIT (*live_regs_mask, reg);
5314 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5315
5316 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
5317 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5318 {
5319 if (FP_REGISTER_P (reg))
5320 {
5321 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
5322 {
5323 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5324 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
5325 }
5326 }
5327 else if (XD_REGISTER_P (reg))
5328 {
5329 /* Must switch to double mode to access these registers. */
5330 target_flags &= ~FPU_SINGLE_BIT;
5331 }
5332 }
5333 }
5334 }
5335 /* If we have a target register optimization pass after prologue / epilogue
5336 threading, we need to assume all target registers will be live even if
5337 they aren't now. */
5338 if (flag_branch_target_load_optimize2
5339 && TARGET_SAVE_ALL_TARGET_REGS
5340 && shmedia_space_reserved_for_target_registers)
5341 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5342 if ((! call_really_used_regs[reg] || interrupt_handler)
5343 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5344 {
5345 SET_HARD_REG_BIT (*live_regs_mask, reg);
5346 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5347 }
5348 /* If this is an interrupt handler, we don't have any call-clobbered
5349 registers we can conveniently use for target register save/restore.
5350 Make sure we save at least one general purpose register when we need
5351 to save target registers. */
5352 if (interrupt_handler
5353 && hard_regs_intersect_p (live_regs_mask,
5354 &reg_class_contents[TARGET_REGS])
5355 && ! hard_regs_intersect_p (live_regs_mask,
5356 &reg_class_contents[GENERAL_REGS]))
5357 {
5358 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
5359 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
5360 }
5361
5362 return count;
5363 }
5364
5365 /* Code to generate prologue and epilogue sequences */
5366
5367 /* PUSHED is the number of bytes that are being pushed on the
5368 stack for register saves. Return the frame size, padded
5369 appropriately so that the stack stays properly aligned. */
5370 static HOST_WIDE_INT
5371 rounded_frame_size (int pushed)
5372 {
5373 HOST_WIDE_INT size = get_frame_size ();
5374 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5375
5376 return ((size + pushed + align - 1) & -align) - pushed;
5377 }
5378
5379 /* Choose a call-clobbered target-branch register that remains
5380 unchanged along the whole function. We set it up as the return
5381 value in the prologue. */
5382 int
5383 sh_media_register_for_return (void)
5384 {
5385 int regno;
5386 int tr0_used;
5387
5388 if (! current_function_is_leaf)
5389 return -1;
5390 if (lookup_attribute ("interrupt_handler",
5391 DECL_ATTRIBUTES (current_function_decl)))
5392 return -1;
5393 if (sh_cfun_interrupt_handler_p ())
5394 return -1;
5395
5396 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
5397
5398 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
5399 if (call_really_used_regs[regno] && ! regs_ever_live[regno])
5400 return regno;
5401
5402 return -1;
5403 }
5404
5405 /* The maximum registers we need to save are:
5406 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
5407 - 32 floating point registers (for each pair, we save none,
5408 one single precision value, or a double precision value).
5409 - 8 target registers
5410 - add 1 entry for a delimiter. */
5411 #define MAX_SAVED_REGS (62+32+8)
5412
5413 typedef struct save_entry_s
5414 {
5415 unsigned char reg;
5416 unsigned char mode;
5417 short offset;
5418 } save_entry;
5419
5420 #define MAX_TEMPS 4
5421
5422 /* There will be a delimiter entry with VOIDmode both at the start and the
5423 end of a filled in schedule. The end delimiter has the offset of the
5424 save with the smallest (i.e. most negative) offset. */
5425 typedef struct save_schedule_s
5426 {
5427 save_entry entries[MAX_SAVED_REGS + 2];
5428 int temps[MAX_TEMPS+1];
5429 } save_schedule;
5430
5431 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
5432 use reverse order. Returns the last entry written to (not counting
5433 the delimiter). OFFSET_BASE is a number to be added to all offset
5434 entries. */
5435
5436 static save_entry *
5437 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
5438 int offset_base)
5439 {
5440 int align, i;
5441 save_entry *entry = schedule->entries;
5442 int tmpx = 0;
5443 int offset;
5444
5445 if (! current_function_interrupt)
5446 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
5447 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
5448 && ! FUNCTION_ARG_REGNO_P (i)
5449 && i != FIRST_RET_REG
5450 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
5451 && ! (current_function_calls_eh_return
5452 && (i == EH_RETURN_STACKADJ_REGNO
5453 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
5454 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
5455 schedule->temps[tmpx++] = i;
5456 entry->reg = -1;
5457 entry->mode = VOIDmode;
5458 entry->offset = offset_base;
5459 entry++;
5460 /* We loop twice: first, we save 8-byte aligned registers in the
5461 higher addresses, that are known to be aligned. Then, we
5462 proceed to saving 32-bit registers that don't need 8-byte
5463 alignment.
5464 If this is an interrupt function, all registers that need saving
5465 need to be saved in full. moreover, we need to postpone saving
5466 target registers till we have saved some general purpose registers
5467 we can then use as scratch registers. */
5468 offset = offset_base;
5469 for (align = 1; align >= 0; align--)
5470 {
5471 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
5472 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5473 {
5474 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5475 int reg = i;
5476
5477 if (current_function_interrupt)
5478 {
5479 if (TARGET_REGISTER_P (i))
5480 continue;
5481 if (GENERAL_REGISTER_P (i))
5482 mode = DImode;
5483 }
5484 if (mode == SFmode && (i % 2) == 1
5485 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5486 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
5487 {
5488 mode = DFmode;
5489 i--;
5490 reg--;
5491 }
5492
5493 /* If we're doing the aligned pass and this is not aligned,
5494 or we're doing the unaligned pass and this is aligned,
5495 skip it. */
5496 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
5497 != align)
5498 continue;
5499
5500 if (current_function_interrupt
5501 && GENERAL_REGISTER_P (i)
5502 && tmpx < MAX_TEMPS)
5503 schedule->temps[tmpx++] = i;
5504
5505 offset -= GET_MODE_SIZE (mode);
5506 entry->reg = i;
5507 entry->mode = mode;
5508 entry->offset = offset;
5509 entry++;
5510 }
5511 if (align && current_function_interrupt)
5512 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
5513 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5514 {
5515 offset -= GET_MODE_SIZE (DImode);
5516 entry->reg = i;
5517 entry->mode = DImode;
5518 entry->offset = offset;
5519 entry++;
5520 }
5521 }
5522 entry->reg = -1;
5523 entry->mode = VOIDmode;
5524 entry->offset = offset;
5525 schedule->temps[tmpx] = -1;
5526 return entry - 1;
5527 }
5528
5529 void
5530 sh_expand_prologue (void)
5531 {
5532 HARD_REG_SET live_regs_mask;
5533 int d, i;
5534 int d_rounding = 0;
5535 int save_flags = target_flags;
5536 int pretend_args;
5537
5538 current_function_interrupt = sh_cfun_interrupt_handler_p ();
5539
5540 /* We have pretend args if we had an object sent partially in registers
5541 and partially on the stack, e.g. a large structure. */
5542 pretend_args = current_function_pretend_args_size;
5543 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
5544 && (NPARM_REGS(SImode)
5545 > current_function_args_info.arg_count[(int) SH_ARG_INT]))
5546 pretend_args = 0;
5547 output_stack_adjust (-pretend_args
5548 - current_function_args_info.stack_regs * 8,
5549 stack_pointer_rtx, 0, NULL);
5550
5551 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
5552 /* We're going to use the PIC register to load the address of the
5553 incoming-argument decoder and/or of the return trampoline from
5554 the GOT, so make sure the PIC register is preserved and
5555 initialized. */
5556 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5557
5558 if (TARGET_SHCOMPACT
5559 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5560 {
5561 int reg;
5562
5563 /* First, make all registers with incoming arguments that will
5564 be pushed onto the stack live, so that register renaming
5565 doesn't overwrite them. */
5566 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
5567 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
5568 >= NPARM_REGS (SImode) - reg)
5569 for (; reg < NPARM_REGS (SImode); reg++)
5570 emit_insn (gen_shcompact_preserve_incoming_args
5571 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5572 else if (CALL_COOKIE_INT_REG_GET
5573 (current_function_args_info.call_cookie, reg) == 1)
5574 emit_insn (gen_shcompact_preserve_incoming_args
5575 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5576
5577 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
5578 stack_pointer_rtx);
5579 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
5580 GEN_INT (current_function_args_info.call_cookie));
5581 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
5582 gen_rtx_REG (SImode, R0_REG));
5583 }
5584 else if (TARGET_SHMEDIA)
5585 {
5586 int tr = sh_media_register_for_return ();
5587
5588 if (tr >= 0)
5589 {
5590 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
5591 gen_rtx_REG (DImode, PR_MEDIA_REG));
5592
5593 /* ??? We should suppress saving pr when we don't need it, but this
5594 is tricky because of builtin_return_address. */
5595
5596 /* If this function only exits with sibcalls, this copy
5597 will be flagged as dead. */
5598 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5599 const0_rtx,
5600 REG_NOTES (insn));
5601 }
5602 }
5603
5604 /* Emit the code for SETUP_VARARGS. */
5605 if (current_function_stdarg)
5606 {
5607 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
5608 {
5609 /* Push arg regs as if they'd been provided by caller in stack. */
5610 for (i = 0; i < NPARM_REGS(SImode); i++)
5611 {
5612 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
5613 rtx insn;
5614
5615 if (i >= (NPARM_REGS(SImode)
5616 - current_function_args_info.arg_count[(int) SH_ARG_INT]
5617 ))
5618 break;
5619 insn = push (rn);
5620 RTX_FRAME_RELATED_P (insn) = 0;
5621 }
5622 }
5623 }
5624
5625 /* If we're supposed to switch stacks at function entry, do so now. */
5626 if (sp_switch)
5627 emit_insn (gen_sp_switch_1 ());
5628
5629 d = calc_live_regs (&live_regs_mask);
5630 /* ??? Maybe we could save some switching if we can move a mode switch
5631 that already happens to be at the function start into the prologue. */
5632 if (target_flags != save_flags && ! current_function_interrupt)
5633 emit_insn (gen_toggle_sz ());
5634
5635 if (TARGET_SH5)
5636 {
5637 int offset_base, offset;
5638 rtx r0 = NULL_RTX;
5639 int offset_in_r0 = -1;
5640 int sp_in_r0 = 0;
5641 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5642 int total_size, save_size;
5643 save_schedule schedule;
5644 save_entry *entry;
5645 int *tmp_pnt;
5646
5647 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
5648 && ! current_function_interrupt)
5649 r0 = gen_rtx_REG (Pmode, R0_REG);
5650
5651 /* D is the actual number of bytes that we need for saving registers,
5652 however, in initial_elimination_offset we have committed to using
5653 an additional TREGS_SPACE amount of bytes - in order to keep both
5654 addresses to arguments supplied by the caller and local variables
5655 valid, we must keep this gap. Place it between the incoming
5656 arguments and the actually saved registers in a bid to optimize
5657 locality of reference. */
5658 total_size = d + tregs_space;
5659 total_size += rounded_frame_size (total_size);
5660 save_size = total_size - rounded_frame_size (d);
5661 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
5662 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5663 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
5664
5665 /* If adjusting the stack in a single step costs nothing extra, do so.
5666 I.e. either if a single addi is enough, or we need a movi anyway,
5667 and we don't exceed the maximum offset range (the test for the
5668 latter is conservative for simplicity). */
5669 if (TARGET_SHMEDIA
5670 && (CONST_OK_FOR_I10 (-total_size)
5671 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
5672 && total_size <= 2044)))
5673 d_rounding = total_size - save_size;
5674
5675 offset_base = d + d_rounding;
5676
5677 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
5678 0, NULL);
5679
5680 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
5681 tmp_pnt = schedule.temps;
5682 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
5683 {
5684 enum machine_mode mode = entry->mode;
5685 unsigned int reg = entry->reg;
5686 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
5687 rtx orig_reg_rtx;
5688
5689 offset = entry->offset;
5690
5691 reg_rtx = gen_rtx_REG (mode, reg);
5692
5693 mem_rtx = gen_rtx_MEM (mode,
5694 gen_rtx_PLUS (Pmode,
5695 stack_pointer_rtx,
5696 GEN_INT (offset)));
5697
5698 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
5699
5700 gcc_assert (r0);
5701 mem_rtx = NULL_RTX;
5702
5703 try_pre_dec:
5704 do
5705 if (HAVE_PRE_DECREMENT
5706 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
5707 || mem_rtx == NULL_RTX
5708 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5709 {
5710 pre_dec = gen_rtx_MEM (mode,
5711 gen_rtx_PRE_DEC (Pmode, r0));
5712
5713 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
5714 pre_dec_ok);
5715
5716 pre_dec = NULL_RTX;
5717
5718 break;
5719
5720 pre_dec_ok:
5721 mem_rtx = NULL_RTX;
5722 offset += GET_MODE_SIZE (mode);
5723 }
5724 while (0);
5725
5726 if (mem_rtx != NULL_RTX)
5727 goto addr_ok;
5728
5729 if (offset_in_r0 == -1)
5730 {
5731 emit_move_insn (r0, GEN_INT (offset));
5732 offset_in_r0 = offset;
5733 }
5734 else if (offset != offset_in_r0)
5735 {
5736 emit_move_insn (r0,
5737 gen_rtx_PLUS
5738 (Pmode, r0,
5739 GEN_INT (offset - offset_in_r0)));
5740 offset_in_r0 += offset - offset_in_r0;
5741 }
5742
5743 if (pre_dec != NULL_RTX)
5744 {
5745 if (! sp_in_r0)
5746 {
5747 emit_move_insn (r0,
5748 gen_rtx_PLUS
5749 (Pmode, r0, stack_pointer_rtx));
5750 sp_in_r0 = 1;
5751 }
5752
5753 offset -= GET_MODE_SIZE (mode);
5754 offset_in_r0 -= GET_MODE_SIZE (mode);
5755
5756 mem_rtx = pre_dec;
5757 }
5758 else if (sp_in_r0)
5759 mem_rtx = gen_rtx_MEM (mode, r0);
5760 else
5761 mem_rtx = gen_rtx_MEM (mode,
5762 gen_rtx_PLUS (Pmode,
5763 stack_pointer_rtx,
5764 r0));
5765
5766 /* We must not use an r0-based address for target-branch
5767 registers or for special registers without pre-dec
5768 memory addresses, since we store their values in r0
5769 first. */
5770 gcc_assert (!TARGET_REGISTER_P (reg)
5771 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
5772 || mem_rtx == pre_dec));
5773
5774 addr_ok:
5775 orig_reg_rtx = reg_rtx;
5776 if (TARGET_REGISTER_P (reg)
5777 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5778 && mem_rtx != pre_dec))
5779 {
5780 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
5781
5782 emit_move_insn (tmp_reg, reg_rtx);
5783
5784 if (REGNO (tmp_reg) == R0_REG)
5785 {
5786 offset_in_r0 = -1;
5787 sp_in_r0 = 0;
5788 gcc_assert (!refers_to_regno_p
5789 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
5790 }
5791
5792 if (*++tmp_pnt <= 0)
5793 tmp_pnt = schedule.temps;
5794
5795 reg_rtx = tmp_reg;
5796 }
5797 {
5798 rtx insn;
5799
5800 /* Mark as interesting for dwarf cfi generator */
5801 insn = emit_move_insn (mem_rtx, reg_rtx);
5802 RTX_FRAME_RELATED_P (insn) = 1;
5803 /* If we use an intermediate register for the save, we can't
5804 describe this exactly in cfi as a copy of the to-be-saved
5805 register into the temporary register and then the temporary
5806 register on the stack, because the temporary register can
5807 have a different natural size than the to-be-saved register.
5808 Thus, we gloss over the intermediate copy and pretend we do
5809 a direct save from the to-be-saved register. */
5810 if (REGNO (reg_rtx) != reg)
5811 {
5812 rtx set, note_rtx;
5813
5814 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
5815 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
5816 REG_NOTES (insn));
5817 REG_NOTES (insn) = note_rtx;
5818 }
5819
5820 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
5821 {
5822 rtx reg_rtx = gen_rtx_REG (mode, reg);
5823 rtx set, note_rtx;
5824 rtx mem_rtx = gen_rtx_MEM (mode,
5825 gen_rtx_PLUS (Pmode,
5826 stack_pointer_rtx,
5827 GEN_INT (offset)));
5828
5829 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
5830 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
5831 REG_NOTES (insn));
5832 REG_NOTES (insn) = note_rtx;
5833 }
5834 }
5835 }
5836
5837 gcc_assert (entry->offset == d_rounding);
5838 }
5839 else
5840 push_regs (&live_regs_mask, current_function_interrupt);
5841
5842 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
5843 {
5844 rtx insn = get_last_insn ();
5845 rtx last = emit_insn (gen_GOTaddr2picreg ());
5846
5847 /* Mark these insns as possibly dead. Sometimes, flow2 may
5848 delete all uses of the PIC register. In this case, let it
5849 delete the initialization too. */
5850 do
5851 {
5852 insn = NEXT_INSN (insn);
5853
5854 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5855 const0_rtx,
5856 REG_NOTES (insn));
5857 }
5858 while (insn != last);
5859 }
5860
5861 if (SHMEDIA_REGS_STACK_ADJUST ())
5862 {
5863 /* This must NOT go through the PLT, otherwise mach and macl
5864 may be clobbered. */
5865 function_symbol (gen_rtx_REG (Pmode, R0_REG),
5866 (TARGET_FPU_ANY
5867 ? "__GCC_push_shmedia_regs"
5868 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
5869 emit_insn (gen_shmedia_save_restore_regs_compact
5870 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
5871 }
5872
5873 if (target_flags != save_flags && ! current_function_interrupt)
5874 {
5875 rtx insn = emit_insn (gen_toggle_sz ());
5876
5877 /* If we're lucky, a mode switch in the function body will
5878 overwrite fpscr, turning this insn dead. Tell flow this
5879 insn is ok to delete. */
5880 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5881 const0_rtx,
5882 REG_NOTES (insn));
5883 }
5884
5885 target_flags = save_flags;
5886
5887 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
5888 stack_pointer_rtx, 0, NULL);
5889
5890 if (frame_pointer_needed)
5891 frame_insn (GEN_MOV (frame_pointer_rtx, stack_pointer_rtx));
5892
5893 if (TARGET_SHCOMPACT
5894 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5895 {
5896 /* This must NOT go through the PLT, otherwise mach and macl
5897 may be clobbered. */
5898 function_symbol (gen_rtx_REG (Pmode, R0_REG),
5899 "__GCC_shcompact_incoming_args", SFUNC_GOT);
5900 emit_insn (gen_shcompact_incoming_args ());
5901 }
5902 }
5903
5904 void
5905 sh_expand_epilogue (bool sibcall_p)
5906 {
5907 HARD_REG_SET live_regs_mask;
5908 int d, i;
5909 int d_rounding = 0;
5910
5911 int save_flags = target_flags;
5912 int frame_size, save_size;
5913 int fpscr_deferred = 0;
5914 int e = sibcall_p ? -1 : 1;
5915
5916 d = calc_live_regs (&live_regs_mask);
5917
5918 save_size = d;
5919 frame_size = rounded_frame_size (d);
5920
5921 if (TARGET_SH5)
5922 {
5923 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5924 int total_size;
5925 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
5926 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5927 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
5928
5929 total_size = d + tregs_space;
5930 total_size += rounded_frame_size (total_size);
5931 save_size = total_size - frame_size;
5932
5933 /* If adjusting the stack in a single step costs nothing extra, do so.
5934 I.e. either if a single addi is enough, or we need a movi anyway,
5935 and we don't exceed the maximum offset range (the test for the
5936 latter is conservative for simplicity). */
5937 if (TARGET_SHMEDIA
5938 && ! frame_pointer_needed
5939 && (CONST_OK_FOR_I10 (total_size)
5940 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
5941 && total_size <= 2044)))
5942 d_rounding = frame_size;
5943
5944 frame_size -= d_rounding;
5945 }
5946
5947 if (frame_pointer_needed)
5948 {
5949 /* We must avoid scheduling the epilogue with previous basic blocks
5950 when exception handling is enabled. See PR/18032. */
5951 if (flag_exceptions)
5952 emit_insn (gen_blockage ());
5953 output_stack_adjust (frame_size, frame_pointer_rtx, e, &live_regs_mask);
5954
5955 /* We must avoid moving the stack pointer adjustment past code
5956 which reads from the local frame, else an interrupt could
5957 occur after the SP adjustment and clobber data in the local
5958 frame. */
5959 emit_insn (gen_blockage ());
5960 emit_insn (GEN_MOV (stack_pointer_rtx, frame_pointer_rtx));
5961 }
5962 else if (frame_size)
5963 {
5964 /* We must avoid moving the stack pointer adjustment past code
5965 which reads from the local frame, else an interrupt could
5966 occur after the SP adjustment and clobber data in the local
5967 frame. */
5968 emit_insn (gen_blockage ());
5969 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
5970 }
5971
5972 if (SHMEDIA_REGS_STACK_ADJUST ())
5973 {
5974 function_symbol (gen_rtx_REG (Pmode, R0_REG),
5975 (TARGET_FPU_ANY
5976 ? "__GCC_pop_shmedia_regs"
5977 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
5978 /* This must NOT go through the PLT, otherwise mach and macl
5979 may be clobbered. */
5980 emit_insn (gen_shmedia_save_restore_regs_compact
5981 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
5982 }
5983
5984 /* Pop all the registers. */
5985
5986 if (target_flags != save_flags && ! current_function_interrupt)
5987 emit_insn (gen_toggle_sz ());
5988 if (TARGET_SH5)
5989 {
5990 int offset_base, offset;
5991 int offset_in_r0 = -1;
5992 int sp_in_r0 = 0;
5993 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
5994 save_schedule schedule;
5995 save_entry *entry;
5996 int *tmp_pnt;
5997
5998 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
5999 offset_base = -entry[1].offset + d_rounding;
6000 tmp_pnt = schedule.temps;
6001 for (; entry->mode != VOIDmode; entry--)
6002 {
6003 enum machine_mode mode = entry->mode;
6004 int reg = entry->reg;
6005 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
6006
6007 offset = offset_base + entry->offset;
6008 reg_rtx = gen_rtx_REG (mode, reg);
6009
6010 mem_rtx = gen_rtx_MEM (mode,
6011 gen_rtx_PLUS (Pmode,
6012 stack_pointer_rtx,
6013 GEN_INT (offset)));
6014
6015 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
6016
6017 mem_rtx = NULL_RTX;
6018
6019 try_post_inc:
6020 do
6021 if (HAVE_POST_INCREMENT
6022 && (offset == offset_in_r0
6023 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
6024 && mem_rtx == NULL_RTX)
6025 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6026 {
6027 post_inc = gen_rtx_MEM (mode,
6028 gen_rtx_POST_INC (Pmode, r0));
6029
6030 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
6031 post_inc_ok);
6032
6033 post_inc = NULL_RTX;
6034
6035 break;
6036
6037 post_inc_ok:
6038 mem_rtx = NULL_RTX;
6039 }
6040 while (0);
6041
6042 if (mem_rtx != NULL_RTX)
6043 goto addr_ok;
6044
6045 if (offset_in_r0 == -1)
6046 {
6047 emit_move_insn (r0, GEN_INT (offset));
6048 offset_in_r0 = offset;
6049 }
6050 else if (offset != offset_in_r0)
6051 {
6052 emit_move_insn (r0,
6053 gen_rtx_PLUS
6054 (Pmode, r0,
6055 GEN_INT (offset - offset_in_r0)));
6056 offset_in_r0 += offset - offset_in_r0;
6057 }
6058
6059 if (post_inc != NULL_RTX)
6060 {
6061 if (! sp_in_r0)
6062 {
6063 emit_move_insn (r0,
6064 gen_rtx_PLUS
6065 (Pmode, r0, stack_pointer_rtx));
6066 sp_in_r0 = 1;
6067 }
6068
6069 mem_rtx = post_inc;
6070
6071 offset_in_r0 += GET_MODE_SIZE (mode);
6072 }
6073 else if (sp_in_r0)
6074 mem_rtx = gen_rtx_MEM (mode, r0);
6075 else
6076 mem_rtx = gen_rtx_MEM (mode,
6077 gen_rtx_PLUS (Pmode,
6078 stack_pointer_rtx,
6079 r0));
6080
6081 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6082 || mem_rtx == post_inc);
6083
6084 addr_ok:
6085 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6086 && mem_rtx != post_inc)
6087 {
6088 insn = emit_move_insn (r0, mem_rtx);
6089 mem_rtx = r0;
6090 }
6091 else if (TARGET_REGISTER_P (reg))
6092 {
6093 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
6094
6095 /* Give the scheduler a bit of freedom by using up to
6096 MAX_TEMPS registers in a round-robin fashion. */
6097 insn = emit_move_insn (tmp_reg, mem_rtx);
6098 mem_rtx = tmp_reg;
6099 if (*++tmp_pnt < 0)
6100 tmp_pnt = schedule.temps;
6101 }
6102
6103 insn = emit_move_insn (reg_rtx, mem_rtx);
6104 if (reg == PR_MEDIA_REG && sh_media_register_for_return () >= 0)
6105 /* This is dead, unless we return with a sibcall. */
6106 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6107 const0_rtx,
6108 REG_NOTES (insn));
6109 }
6110
6111 gcc_assert (entry->offset + offset_base == d + d_rounding);
6112 }
6113 else /* ! TARGET_SH5 */
6114 {
6115 save_size = 0;
6116 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6117 pop (PR_REG);
6118 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
6119 {
6120 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
6121
6122 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
6123 && hard_regs_intersect_p (&live_regs_mask,
6124 &reg_class_contents[DF_REGS]))
6125 fpscr_deferred = 1;
6126 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
6127 pop (j);
6128 if (j == FIRST_FP_REG && fpscr_deferred)
6129 pop (FPSCR_REG);
6130
6131 }
6132 }
6133 if (target_flags != save_flags && ! current_function_interrupt)
6134 emit_insn (gen_toggle_sz ());
6135 target_flags = save_flags;
6136
6137 output_stack_adjust (current_function_pretend_args_size
6138 + save_size + d_rounding
6139 + current_function_args_info.stack_regs * 8,
6140 stack_pointer_rtx, e, NULL);
6141
6142 if (current_function_calls_eh_return)
6143 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
6144 EH_RETURN_STACKADJ_RTX));
6145
6146 /* Switch back to the normal stack if necessary. */
6147 if (sp_switch)
6148 emit_insn (gen_sp_switch_2 ());
6149
6150 /* Tell flow the insn that pops PR isn't dead. */
6151 /* PR_REG will never be live in SHmedia mode, and we don't need to
6152 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6153 by the return pattern. */
6154 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6155 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
6156 }
6157
6158 static int sh_need_epilogue_known = 0;
6159
6160 int
6161 sh_need_epilogue (void)
6162 {
6163 if (! sh_need_epilogue_known)
6164 {
6165 rtx epilogue;
6166
6167 start_sequence ();
6168 sh_expand_epilogue (0);
6169 epilogue = get_insns ();
6170 end_sequence ();
6171 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
6172 }
6173 return sh_need_epilogue_known > 0;
6174 }
6175
6176 /* Emit code to change the current function's return address to RA.
6177 TEMP is available as a scratch register, if needed. */
6178
6179 void
6180 sh_set_return_address (rtx ra, rtx tmp)
6181 {
6182 HARD_REG_SET live_regs_mask;
6183 int d;
6184 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6185 int pr_offset;
6186
6187 d = calc_live_regs (&live_regs_mask);
6188
6189 /* If pr_reg isn't life, we can set it (or the register given in
6190 sh_media_register_for_return) directly. */
6191 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6192 {
6193 rtx rr;
6194
6195 if (TARGET_SHMEDIA)
6196 {
6197 int rr_regno = sh_media_register_for_return ();
6198
6199 if (rr_regno < 0)
6200 rr_regno = pr_reg;
6201
6202 rr = gen_rtx_REG (DImode, rr_regno);
6203 }
6204 else
6205 rr = gen_rtx_REG (SImode, pr_reg);
6206
6207 emit_insn (GEN_MOV (rr, ra));
6208 /* Tell flow the register for return isn't dead. */
6209 emit_insn (gen_rtx_USE (VOIDmode, rr));
6210 return;
6211 }
6212
6213 if (TARGET_SH5)
6214 {
6215 int offset;
6216 save_schedule schedule;
6217 save_entry *entry;
6218
6219 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
6220 offset = entry[1].offset;
6221 for (; entry->mode != VOIDmode; entry--)
6222 if (entry->reg == pr_reg)
6223 goto found;
6224
6225 /* We can't find pr register. */
6226 gcc_unreachable ();
6227
6228 found:
6229 offset = entry->offset - offset;
6230 pr_offset = (rounded_frame_size (d) + offset
6231 + SHMEDIA_REGS_STACK_ADJUST ());
6232 }
6233 else
6234 pr_offset = rounded_frame_size (d);
6235
6236 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6237 emit_insn (GEN_ADD3 (tmp, tmp, frame_pointer_rtx));
6238
6239 tmp = gen_rtx_MEM (Pmode, tmp);
6240 emit_insn (GEN_MOV (tmp, ra));
6241 }
6242
6243 /* Clear variables at function end. */
6244
6245 static void
6246 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6247 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6248 {
6249 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
6250 sh_need_epilogue_known = 0;
6251 sp_switch = NULL_RTX;
6252 }
6253
6254 static rtx
6255 sh_builtin_saveregs (void)
6256 {
6257 /* First unnamed integer register. */
6258 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
6259 /* Number of integer registers we need to save. */
6260 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6261 /* First unnamed SFmode float reg */
6262 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6263 /* Number of SFmode float regs to save. */
6264 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6265 rtx regbuf, fpregs;
6266 int bufsize, regno;
6267 HOST_WIDE_INT alias_set;
6268
6269 if (TARGET_SH5)
6270 {
6271 if (n_intregs)
6272 {
6273 int pushregs = n_intregs;
6274
6275 while (pushregs < NPARM_REGS (SImode) - 1
6276 && (CALL_COOKIE_INT_REG_GET
6277 (current_function_args_info.call_cookie,
6278 NPARM_REGS (SImode) - pushregs)
6279 == 1))
6280 {
6281 current_function_args_info.call_cookie
6282 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6283 - pushregs, 1);
6284 pushregs++;
6285 }
6286
6287 if (pushregs == NPARM_REGS (SImode))
6288 current_function_args_info.call_cookie
6289 |= (CALL_COOKIE_INT_REG (0, 1)
6290 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6291 else
6292 current_function_args_info.call_cookie
6293 |= CALL_COOKIE_STACKSEQ (pushregs);
6294
6295 current_function_pretend_args_size += 8 * n_intregs;
6296 }
6297 if (TARGET_SHCOMPACT)
6298 return const0_rtx;
6299 }
6300
6301 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
6302 {
6303 error ("__builtin_saveregs not supported by this subtarget");
6304 return const0_rtx;
6305 }
6306
6307 if (TARGET_SHMEDIA)
6308 n_floatregs = 0;
6309
6310 /* Allocate block of memory for the regs. */
6311 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6312 Or can assign_stack_local accept a 0 SIZE argument? */
6313 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
6314
6315 if (TARGET_SHMEDIA)
6316 regbuf = gen_rtx_MEM (BLKmode,
6317 gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
6318 else if (n_floatregs & 1)
6319 {
6320 rtx addr;
6321
6322 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6323 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
6324 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6325 regbuf = change_address (regbuf, BLKmode, addr);
6326 }
6327 else
6328 regbuf = assign_stack_local (BLKmode, bufsize, 0);
6329 alias_set = get_varargs_alias_set ();
6330 set_mem_alias_set (regbuf, alias_set);
6331
6332 /* Save int args.
6333 This is optimized to only save the regs that are necessary. Explicitly
6334 named args need not be saved. */
6335 if (n_intregs > 0)
6336 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
6337 adjust_address (regbuf, BLKmode,
6338 n_floatregs * UNITS_PER_WORD),
6339 n_intregs);
6340
6341 if (TARGET_SHMEDIA)
6342 /* Return the address of the regbuf. */
6343 return XEXP (regbuf, 0);
6344
6345 /* Save float args.
6346 This is optimized to only save the regs that are necessary. Explicitly
6347 named args need not be saved.
6348 We explicitly build a pointer to the buffer because it halves the insn
6349 count when not optimizing (otherwise the pointer is built for each reg
6350 saved).
6351 We emit the moves in reverse order so that we can use predecrement. */
6352
6353 fpregs = copy_to_mode_reg (Pmode,
6354 plus_constant (XEXP (regbuf, 0),
6355 n_floatregs * UNITS_PER_WORD));
6356 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
6357 {
6358 rtx mem;
6359 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
6360 {
6361 emit_insn (gen_addsi3 (fpregs, fpregs,
6362 GEN_INT (-2 * UNITS_PER_WORD)));
6363 mem = gen_rtx_MEM (DFmode, fpregs);
6364 set_mem_alias_set (mem, alias_set);
6365 emit_move_insn (mem,
6366 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
6367 }
6368 regno = first_floatreg;
6369 if (regno & 1)
6370 {
6371 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6372 mem = gen_rtx_MEM (SFmode, fpregs);
6373 set_mem_alias_set (mem, alias_set);
6374 emit_move_insn (mem,
6375 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
6376 - (TARGET_LITTLE_ENDIAN != 0)));
6377 }
6378 }
6379 else
6380 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
6381 {
6382 rtx mem;
6383
6384 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6385 mem = gen_rtx_MEM (SFmode, fpregs);
6386 set_mem_alias_set (mem, alias_set);
6387 emit_move_insn (mem,
6388 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
6389 }
6390
6391 /* Return the address of the regbuf. */
6392 return XEXP (regbuf, 0);
6393 }
6394
6395 /* Define the `__builtin_va_list' type for the ABI. */
6396
6397 static tree
6398 sh_build_builtin_va_list (void)
6399 {
6400 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6401 tree record;
6402
6403 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
6404 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6405 return ptr_type_node;
6406
6407 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6408
6409 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
6410 ptr_type_node);
6411 f_next_o_limit = build_decl (FIELD_DECL,
6412 get_identifier ("__va_next_o_limit"),
6413 ptr_type_node);
6414 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
6415 ptr_type_node);
6416 f_next_fp_limit = build_decl (FIELD_DECL,
6417 get_identifier ("__va_next_fp_limit"),
6418 ptr_type_node);
6419 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
6420 ptr_type_node);
6421
6422 DECL_FIELD_CONTEXT (f_next_o) = record;
6423 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
6424 DECL_FIELD_CONTEXT (f_next_fp) = record;
6425 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
6426 DECL_FIELD_CONTEXT (f_next_stack) = record;
6427
6428 TYPE_FIELDS (record) = f_next_o;
6429 TREE_CHAIN (f_next_o) = f_next_o_limit;
6430 TREE_CHAIN (f_next_o_limit) = f_next_fp;
6431 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
6432 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
6433
6434 layout_type (record);
6435
6436 return record;
6437 }
6438
6439 /* Implement `va_start' for varargs and stdarg. */
6440
6441 void
6442 sh_va_start (tree valist, rtx nextarg)
6443 {
6444 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6445 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6446 tree t, u;
6447 int nfp, nint;
6448
6449 if (TARGET_SH5)
6450 {
6451 expand_builtin_saveregs ();
6452 std_expand_builtin_va_start (valist, nextarg);
6453 return;
6454 }
6455
6456 if ((! TARGET_SH2E && ! TARGET_SH4)
6457 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6458 {
6459 std_expand_builtin_va_start (valist, nextarg);
6460 return;
6461 }
6462
6463 f_next_o = TYPE_FIELDS (va_list_type_node);
6464 f_next_o_limit = TREE_CHAIN (f_next_o);
6465 f_next_fp = TREE_CHAIN (f_next_o_limit);
6466 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6467 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6468
6469 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6470 NULL_TREE);
6471 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6472 valist, f_next_o_limit, NULL_TREE);
6473 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
6474 NULL_TREE);
6475 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6476 valist, f_next_fp_limit, NULL_TREE);
6477 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6478 valist, f_next_stack, NULL_TREE);
6479
6480 /* Call __builtin_saveregs. */
6481 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
6482 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
6483 TREE_SIDE_EFFECTS (t) = 1;
6484 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6485
6486 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
6487 if (nfp < 8)
6488 nfp = 8 - nfp;
6489 else
6490 nfp = 0;
6491 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6492 build_int_cst (NULL_TREE, UNITS_PER_WORD * nfp)));
6493 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
6494 TREE_SIDE_EFFECTS (t) = 1;
6495 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6496
6497 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
6498 TREE_SIDE_EFFECTS (t) = 1;
6499 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6500
6501 nint = current_function_args_info.arg_count[SH_ARG_INT];
6502 if (nint < 4)
6503 nint = 4 - nint;
6504 else
6505 nint = 0;
6506 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6507 build_int_cst (NULL_TREE, UNITS_PER_WORD * nint)));
6508 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
6509 TREE_SIDE_EFFECTS (t) = 1;
6510 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6511
6512 u = make_tree (ptr_type_node, nextarg);
6513 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
6514 TREE_SIDE_EFFECTS (t) = 1;
6515 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6516 }
6517
6518 /* Implement `va_arg'. */
6519
6520 static tree
6521 sh_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p,
6522 tree *post_p ATTRIBUTE_UNUSED)
6523 {
6524 HOST_WIDE_INT size, rsize;
6525 tree tmp, pptr_type_node;
6526 tree addr, lab_over = NULL, result = NULL;
6527 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
6528
6529 if (pass_by_ref)
6530 type = build_pointer_type (type);
6531
6532 size = int_size_in_bytes (type);
6533 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6534 pptr_type_node = build_pointer_type (ptr_type_node);
6535
6536 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
6537 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
6538 {
6539 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6540 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6541 int pass_as_float;
6542 tree lab_false;
6543
6544 f_next_o = TYPE_FIELDS (va_list_type_node);
6545 f_next_o_limit = TREE_CHAIN (f_next_o);
6546 f_next_fp = TREE_CHAIN (f_next_o_limit);
6547 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6548 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6549
6550 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6551 NULL_TREE);
6552 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6553 valist, f_next_o_limit, NULL_TREE);
6554 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
6555 valist, f_next_fp, NULL_TREE);
6556 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6557 valist, f_next_fp_limit, NULL_TREE);
6558 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6559 valist, f_next_stack, NULL_TREE);
6560
6561 /* Structures with a single member with a distinct mode are passed
6562 like their member. This is relevant if the latter has a REAL_TYPE
6563 or COMPLEX_TYPE type. */
6564 if (TREE_CODE (type) == RECORD_TYPE
6565 && TYPE_FIELDS (type)
6566 && TREE_CODE (TYPE_FIELDS (type)) == FIELD_DECL
6567 && (TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == REAL_TYPE
6568 || TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == COMPLEX_TYPE)
6569 && TREE_CHAIN (TYPE_FIELDS (type)) == NULL_TREE)
6570 type = TREE_TYPE (TYPE_FIELDS (type));
6571
6572 if (TARGET_SH4)
6573 {
6574 pass_as_float = ((TREE_CODE (type) == REAL_TYPE && size <= 8)
6575 || (TREE_CODE (type) == COMPLEX_TYPE
6576 && TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
6577 && size <= 16));
6578 }
6579 else
6580 {
6581 pass_as_float = (TREE_CODE (type) == REAL_TYPE && size == 4);
6582 }
6583
6584 addr = create_tmp_var (pptr_type_node, NULL);
6585 lab_false = create_artificial_label ();
6586 lab_over = create_artificial_label ();
6587
6588 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
6589
6590 if (pass_as_float)
6591 {
6592 int first_floatreg
6593 = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6594 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6595
6596 tmp = build (GE_EXPR, boolean_type_node, next_fp, next_fp_limit);
6597 tmp = build (COND_EXPR, void_type_node, tmp,
6598 build (GOTO_EXPR, void_type_node, lab_false),
6599 NULL);
6600 gimplify_and_add (tmp, pre_p);
6601
6602 if (TYPE_ALIGN (type) > BITS_PER_WORD
6603 || (((TREE_CODE (type) == REAL_TYPE && size == 8) || size == 16)
6604 && (n_floatregs & 1)))
6605 {
6606 tmp = fold_convert (ptr_type_node, size_int (UNITS_PER_WORD));
6607 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp, tmp);
6608 tmp = build (PLUS_EXPR, ptr_type_node, next_fp, tmp);
6609 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp, tmp);
6610 gimplify_and_add (tmp, pre_p);
6611 }
6612
6613 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
6614 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6615 gimplify_and_add (tmp, pre_p);
6616
6617 #ifdef FUNCTION_ARG_SCmode_WART
6618 if (TYPE_MODE (type) == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
6619 {
6620 tree subtype = TREE_TYPE (type);
6621 tree real, imag;
6622
6623 imag = std_gimplify_va_arg_expr (valist, subtype, pre_p, NULL);
6624 imag = get_initialized_tmp_var (imag, pre_p, NULL);
6625
6626 real = std_gimplify_va_arg_expr (valist, subtype, pre_p, NULL);
6627 real = get_initialized_tmp_var (real, pre_p, NULL);
6628
6629 result = build (COMPLEX_EXPR, type, real, imag);
6630 result = get_initialized_tmp_var (result, pre_p, NULL);
6631 }
6632 #endif /* FUNCTION_ARG_SCmode_WART */
6633
6634 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6635 gimplify_and_add (tmp, pre_p);
6636
6637 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6638 gimplify_and_add (tmp, pre_p);
6639
6640 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6641 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6642 gimplify_and_add (tmp, pre_p);
6643 }
6644 else
6645 {
6646 tmp = fold_convert (ptr_type_node, size_int (rsize));
6647 tmp = build (PLUS_EXPR, ptr_type_node, next_o, tmp);
6648 tmp = build (GT_EXPR, boolean_type_node, tmp, next_o_limit);
6649 tmp = build (COND_EXPR, void_type_node, tmp,
6650 build (GOTO_EXPR, void_type_node, lab_false),
6651 NULL);
6652 gimplify_and_add (tmp, pre_p);
6653
6654 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
6655 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6656 gimplify_and_add (tmp, pre_p);
6657
6658 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6659 gimplify_and_add (tmp, pre_p);
6660
6661 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6662 gimplify_and_add (tmp, pre_p);
6663
6664 if (size > 4 && ! TARGET_SH4)
6665 {
6666 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
6667 gimplify_and_add (tmp, pre_p);
6668 }
6669
6670 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6671 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6672 gimplify_and_add (tmp, pre_p);
6673 }
6674
6675 if (!result)
6676 {
6677 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6678 gimplify_and_add (tmp, pre_p);
6679 }
6680 }
6681
6682 /* ??? In va-sh.h, there had been code to make values larger than
6683 size 8 indirect. This does not match the FUNCTION_ARG macros. */
6684
6685 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
6686 if (result)
6687 {
6688 tmp = build (MODIFY_EXPR, void_type_node, result, tmp);
6689 gimplify_and_add (tmp, pre_p);
6690
6691 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6692 gimplify_and_add (tmp, pre_p);
6693 }
6694 else
6695 result = tmp;
6696
6697 if (pass_by_ref)
6698 result = build_fold_indirect_ref (result);
6699
6700 return result;
6701 }
6702
6703 bool
6704 sh_promote_prototypes (tree type)
6705 {
6706 if (TARGET_HITACHI)
6707 return 0;
6708 if (! type)
6709 return 1;
6710 return ! sh_attr_renesas_p (type);
6711 }
6712
6713 /* Whether an argument must be passed by reference. On SHcompact, we
6714 pretend arguments wider than 32-bits that would have been passed in
6715 registers are passed by reference, so that an SHmedia trampoline
6716 loads them into the full 64-bits registers. */
6717
6718 static int
6719 shcompact_byref (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6720 tree type, bool named)
6721 {
6722 unsigned HOST_WIDE_INT size;
6723
6724 if (type)
6725 size = int_size_in_bytes (type);
6726 else
6727 size = GET_MODE_SIZE (mode);
6728
6729 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
6730 && (!named
6731 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
6732 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
6733 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
6734 && size > 4
6735 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
6736 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
6737 return size;
6738 else
6739 return 0;
6740 }
6741
6742 static bool
6743 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6744 tree type, bool named)
6745 {
6746 if (targetm.calls.must_pass_in_stack (mode, type))
6747 return true;
6748
6749 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
6750 wants to know about pass-by-reference semantics for incoming
6751 arguments. */
6752 if (! cum)
6753 return false;
6754
6755 if (TARGET_SHCOMPACT)
6756 {
6757 cum->byref = shcompact_byref (cum, mode, type, named);
6758 return cum->byref != 0;
6759 }
6760
6761 return false;
6762 }
6763
6764 static bool
6765 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6766 tree type, bool named ATTRIBUTE_UNUSED)
6767 {
6768 /* ??? How can it possibly be correct to return true only on the
6769 caller side of the equation? Is there someplace else in the
6770 sh backend that's magically producing the copies? */
6771 return (cum->outgoing
6772 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
6773 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
6774 }
6775
6776 static int
6777 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6778 tree type, bool named ATTRIBUTE_UNUSED)
6779 {
6780 int words = 0;
6781
6782 if (!TARGET_SH5
6783 && PASS_IN_REG_P (*cum, mode, type)
6784 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
6785 && (ROUND_REG (*cum, mode)
6786 + (mode != BLKmode
6787 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
6788 : ROUND_ADVANCE (int_size_in_bytes (type)))
6789 > NPARM_REGS (mode)))
6790 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
6791
6792 else if (!TARGET_SHCOMPACT
6793 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
6794 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
6795
6796 return words * UNITS_PER_WORD;
6797 }
6798
6799
6800 /* Define where to put the arguments to a function.
6801 Value is zero to push the argument on the stack,
6802 or a hard register in which to store the argument.
6803
6804 MODE is the argument's machine mode.
6805 TYPE is the data type of the argument (as a tree).
6806 This is null for libcalls where that information may
6807 not be available.
6808 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6809 the preceding args and about the function being called.
6810 NAMED is nonzero if this argument is a named parameter
6811 (otherwise it is an extra parameter matching an ellipsis).
6812
6813 On SH the first args are normally in registers
6814 and the rest are pushed. Any arg that starts within the first
6815 NPARM_REGS words is at least partially passed in a register unless
6816 its data type forbids. */
6817
6818
6819 rtx
6820 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6821 tree type, int named)
6822 {
6823 if (! TARGET_SH5 && mode == VOIDmode)
6824 return GEN_INT (ca->renesas_abi ? 1 : 0);
6825
6826 if (! TARGET_SH5
6827 && PASS_IN_REG_P (*ca, mode, type)
6828 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
6829 {
6830 int regno;
6831
6832 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
6833 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
6834 {
6835 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
6836 gen_rtx_REG (SFmode,
6837 BASE_ARG_REG (mode)
6838 + (ROUND_REG (*ca, mode) ^ 1)),
6839 const0_rtx);
6840 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
6841 gen_rtx_REG (SFmode,
6842 BASE_ARG_REG (mode)
6843 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
6844 GEN_INT (4));
6845 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
6846 }
6847
6848 /* If the alignment of a DF value causes an SF register to be
6849 skipped, we will use that skipped register for the next SF
6850 value. */
6851 if ((TARGET_HITACHI || ca->renesas_abi)
6852 && ca->free_single_fp_reg
6853 && mode == SFmode)
6854 return gen_rtx_REG (mode, ca->free_single_fp_reg);
6855
6856 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
6857 ^ (mode == SFmode && TARGET_SH4
6858 && TARGET_LITTLE_ENDIAN != 0
6859 && ! TARGET_HITACHI && ! ca->renesas_abi);
6860 return gen_rtx_REG (mode, regno);
6861
6862 }
6863
6864 if (TARGET_SH5)
6865 {
6866 if (mode == VOIDmode && TARGET_SHCOMPACT)
6867 return GEN_INT (ca->call_cookie);
6868
6869 /* The following test assumes unnamed arguments are promoted to
6870 DFmode. */
6871 if (mode == SFmode && ca->free_single_fp_reg)
6872 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
6873
6874 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
6875 && (named || ! ca->prototype_p)
6876 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
6877 {
6878 if (! ca->prototype_p && TARGET_SHMEDIA)
6879 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
6880
6881 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
6882 FIRST_FP_PARM_REG
6883 + ca->arg_count[(int) SH_ARG_FLOAT]);
6884 }
6885
6886 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
6887 && (! TARGET_SHCOMPACT
6888 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
6889 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
6890 type, named))))
6891 {
6892 return gen_rtx_REG (mode, (FIRST_PARM_REG
6893 + ca->arg_count[(int) SH_ARG_INT]));
6894 }
6895
6896 return 0;
6897 }
6898
6899 return 0;
6900 }
6901
6902 /* Update the data in CUM to advance over an argument
6903 of mode MODE and data type TYPE.
6904 (TYPE is null for libcalls where that information may not be
6905 available.) */
6906
6907 void
6908 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6909 tree type, int named)
6910 {
6911 if (ca->force_mem)
6912 ca->force_mem = 0;
6913 else if (TARGET_SH5)
6914 {
6915 tree type2 = (ca->byref && type
6916 ? TREE_TYPE (type)
6917 : type);
6918 enum machine_mode mode2 = (ca->byref && type
6919 ? TYPE_MODE (type2)
6920 : mode);
6921 int dwords = ((ca->byref
6922 ? ca->byref
6923 : mode2 == BLKmode
6924 ? int_size_in_bytes (type2)
6925 : GET_MODE_SIZE (mode2)) + 7) / 8;
6926 int numregs = MIN (dwords, NPARM_REGS (SImode)
6927 - ca->arg_count[(int) SH_ARG_INT]);
6928
6929 if (numregs)
6930 {
6931 ca->arg_count[(int) SH_ARG_INT] += numregs;
6932 if (TARGET_SHCOMPACT
6933 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
6934 {
6935 ca->call_cookie
6936 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6937 - numregs, 1);
6938 /* N.B. We want this also for outgoing. */
6939 ca->stack_regs += numregs;
6940 }
6941 else if (ca->byref)
6942 {
6943 if (! ca->outgoing)
6944 ca->stack_regs += numregs;
6945 ca->byref_regs += numregs;
6946 ca->byref = 0;
6947 do
6948 ca->call_cookie
6949 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6950 - numregs, 2);
6951 while (--numregs);
6952 ca->call_cookie
6953 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6954 - 1, 1);
6955 }
6956 else if (dwords > numregs)
6957 {
6958 int pushregs = numregs;
6959
6960 if (TARGET_SHCOMPACT)
6961 ca->stack_regs += numregs;
6962 while (pushregs < NPARM_REGS (SImode) - 1
6963 && (CALL_COOKIE_INT_REG_GET
6964 (ca->call_cookie,
6965 NPARM_REGS (SImode) - pushregs)
6966 == 1))
6967 {
6968 ca->call_cookie
6969 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6970 - pushregs, 1);
6971 pushregs++;
6972 }
6973 if (numregs == NPARM_REGS (SImode))
6974 ca->call_cookie
6975 |= CALL_COOKIE_INT_REG (0, 1)
6976 | CALL_COOKIE_STACKSEQ (numregs - 1);
6977 else
6978 ca->call_cookie
6979 |= CALL_COOKIE_STACKSEQ (numregs);
6980 }
6981 }
6982 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
6983 && (named || ! ca->prototype_p))
6984 {
6985 if (mode2 == SFmode && ca->free_single_fp_reg)
6986 ca->free_single_fp_reg = 0;
6987 else if (ca->arg_count[(int) SH_ARG_FLOAT]
6988 < NPARM_REGS (SFmode))
6989 {
6990 int numfpregs
6991 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
6992 NPARM_REGS (SFmode)
6993 - ca->arg_count[(int) SH_ARG_FLOAT]);
6994
6995 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
6996
6997 if (TARGET_SHCOMPACT && ! ca->prototype_p)
6998 {
6999 if (ca->outgoing && numregs > 0)
7000 do
7001 {
7002 ca->call_cookie
7003 |= (CALL_COOKIE_INT_REG
7004 (ca->arg_count[(int) SH_ARG_INT]
7005 - numregs + ((numfpregs - 2) / 2),
7006 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
7007 - numfpregs) / 2));
7008 }
7009 while (numfpregs -= 2);
7010 }
7011 else if (mode2 == SFmode && (named)
7012 && (ca->arg_count[(int) SH_ARG_FLOAT]
7013 < NPARM_REGS (SFmode)))
7014 ca->free_single_fp_reg
7015 = FIRST_FP_PARM_REG - numfpregs
7016 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
7017 }
7018 }
7019 return;
7020 }
7021
7022 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
7023 {
7024 /* Note that we've used the skipped register. */
7025 if (mode == SFmode && ca->free_single_fp_reg)
7026 {
7027 ca->free_single_fp_reg = 0;
7028 return;
7029 }
7030 /* When we have a DF after an SF, there's an SF register that get
7031 skipped in order to align the DF value. We note this skipped
7032 register, because the next SF value will use it, and not the
7033 SF that follows the DF. */
7034 if (mode == DFmode
7035 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
7036 {
7037 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
7038 + BASE_ARG_REG (mode));
7039 }
7040 }
7041
7042 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
7043 || PASS_IN_REG_P (*ca, mode, type))
7044 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
7045 = (ROUND_REG (*ca, mode)
7046 + (mode == BLKmode
7047 ? ROUND_ADVANCE (int_size_in_bytes (type))
7048 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
7049 }
7050
7051 /* The Renesas calling convention doesn't quite fit into this scheme since
7052 the address is passed like an invisible argument, but one that is always
7053 passed in memory. */
7054 static rtx
7055 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
7056 {
7057 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7058 return 0;
7059 return gen_rtx_REG (Pmode, 2);
7060 }
7061
7062 /* Worker function for TARGET_RETURN_IN_MEMORY. */
7063
7064 static bool
7065 sh_return_in_memory (tree type, tree fndecl)
7066 {
7067 if (TARGET_SH5)
7068 {
7069 if (TYPE_MODE (type) == BLKmode)
7070 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
7071 else
7072 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
7073 }
7074 else
7075 {
7076 return (TYPE_MODE (type) == BLKmode
7077 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7078 && TREE_CODE (type) == RECORD_TYPE));
7079 }
7080 }
7081
7082 /* We actually emit the code in sh_expand_prologue. We used to use
7083 a static variable to flag that we need to emit this code, but that
7084 doesn't when inlining, when functions are deferred and then emitted
7085 later. Fortunately, we already have two flags that are part of struct
7086 function that tell if a function uses varargs or stdarg. */
7087 static void
7088 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
7089 enum machine_mode mode,
7090 tree type,
7091 int *pretend_arg_size,
7092 int second_time ATTRIBUTE_UNUSED)
7093 {
7094 gcc_assert (current_function_stdarg);
7095 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7096 {
7097 int named_parm_regs, anon_parm_regs;
7098
7099 named_parm_regs = (ROUND_REG (*ca, mode)
7100 + (mode == BLKmode
7101 ? ROUND_ADVANCE (int_size_in_bytes (type))
7102 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
7103 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
7104 if (anon_parm_regs > 0)
7105 *pretend_arg_size = anon_parm_regs * 4;
7106 }
7107 }
7108
7109 static bool
7110 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
7111 {
7112 return TARGET_SH5;
7113 }
7114
7115 static bool
7116 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
7117 {
7118 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
7119 }
7120
7121
7122 /* Define the offset between two registers, one to be eliminated, and
7123 the other its replacement, at the start of a routine. */
7124
7125 int
7126 initial_elimination_offset (int from, int to)
7127 {
7128 int regs_saved;
7129 int regs_saved_rounding = 0;
7130 int total_saved_regs_space;
7131 int total_auto_space;
7132 int save_flags = target_flags;
7133 int copy_flags;
7134 HARD_REG_SET live_regs_mask;
7135
7136 shmedia_space_reserved_for_target_registers = false;
7137 regs_saved = calc_live_regs (&live_regs_mask);
7138 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
7139
7140 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
7141 {
7142 shmedia_space_reserved_for_target_registers = true;
7143 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
7144 }
7145
7146 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
7147 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7148 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
7149
7150 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
7151 copy_flags = target_flags;
7152 target_flags = save_flags;
7153
7154 total_saved_regs_space = regs_saved + regs_saved_rounding;
7155
7156 if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
7157 return total_saved_regs_space + total_auto_space
7158 + current_function_args_info.byref_regs * 8;
7159
7160 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7161 return total_saved_regs_space + total_auto_space
7162 + current_function_args_info.byref_regs * 8;
7163
7164 /* Initial gap between fp and sp is 0. */
7165 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7166 return 0;
7167
7168 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
7169 && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM));
7170 if (TARGET_SH5)
7171 {
7172 int n = total_saved_regs_space;
7173 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7174 save_schedule schedule;
7175 save_entry *entry;
7176
7177 n += total_auto_space;
7178
7179 /* If it wasn't saved, there's not much we can do. */
7180 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7181 return n;
7182
7183 target_flags = copy_flags;
7184
7185 sh5_schedule_saves (&live_regs_mask, &schedule, n);
7186 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7187 if (entry->reg == pr_reg)
7188 {
7189 target_flags = save_flags;
7190 return entry->offset;
7191 }
7192 gcc_unreachable ();
7193 }
7194 else
7195 return total_auto_space;
7196 }
7197 \f
7198 /* Handle machine specific pragmas to be semi-compatible with Renesas
7199 compiler. */
7200
7201 void
7202 sh_pr_interrupt (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7203 {
7204 pragma_interrupt = 1;
7205 }
7206
7207 void
7208 sh_pr_trapa (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7209 {
7210 pragma_interrupt = pragma_trapa = 1;
7211 }
7212
7213 void
7214 sh_pr_nosave_low_regs (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7215 {
7216 pragma_nosave_low_regs = 1;
7217 }
7218
7219 /* Generate 'handle_interrupt' attribute for decls */
7220
7221 static void
7222 sh_insert_attributes (tree node, tree *attributes)
7223 {
7224 if (! pragma_interrupt
7225 || TREE_CODE (node) != FUNCTION_DECL)
7226 return;
7227
7228 /* We are only interested in fields. */
7229 if (!DECL_P (node))
7230 return;
7231
7232 /* Add a 'handle_interrupt' attribute. */
7233 * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
7234
7235 return;
7236 }
7237
7238 /* Supported attributes:
7239
7240 interrupt_handler -- specifies this function is an interrupt handler.
7241
7242 sp_switch -- specifies an alternate stack for an interrupt handler
7243 to run on.
7244
7245 trap_exit -- use a trapa to exit an interrupt function instead of
7246 an rte instruction.
7247
7248 renesas -- use Renesas calling/layout conventions (functions and
7249 structures).
7250
7251 */
7252
7253 const struct attribute_spec sh_attribute_table[] =
7254 {
7255 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
7256 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7257 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
7258 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
7259 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
7260 #ifdef SYMBIAN
7261 /* Symbian support adds three new attributes:
7262 dllexport - for exporting a function/variable that will live in a dll
7263 dllimport - for importing a function/variable from a dll
7264
7265 Microsoft allows multiple declspecs in one __declspec, separating
7266 them with spaces. We do NOT support this. Instead, use __declspec
7267 multiple times. */
7268 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7269 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7270 #endif
7271 { NULL, 0, 0, false, false, false, NULL }
7272 };
7273
7274 /* Handle an "interrupt_handler" attribute; arguments as in
7275 struct attribute_spec.handler. */
7276 static tree
7277 sh_handle_interrupt_handler_attribute (tree *node, tree name,
7278 tree args ATTRIBUTE_UNUSED,
7279 int flags ATTRIBUTE_UNUSED,
7280 bool *no_add_attrs)
7281 {
7282 if (TREE_CODE (*node) != FUNCTION_DECL)
7283 {
7284 warning (0, "%qs attribute only applies to functions",
7285 IDENTIFIER_POINTER (name));
7286 *no_add_attrs = true;
7287 }
7288 else if (TARGET_SHCOMPACT)
7289 {
7290 error ("attribute interrupt_handler is not compatible with -m5-compact");
7291 *no_add_attrs = true;
7292 }
7293
7294 return NULL_TREE;
7295 }
7296
7297 /* Handle an "sp_switch" attribute; arguments as in
7298 struct attribute_spec.handler. */
7299 static tree
7300 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
7301 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7302 {
7303 if (TREE_CODE (*node) != FUNCTION_DECL)
7304 {
7305 warning (0, "%qs attribute only applies to functions",
7306 IDENTIFIER_POINTER (name));
7307 *no_add_attrs = true;
7308 }
7309 else if (!pragma_interrupt)
7310 {
7311 /* The sp_switch attribute only has meaning for interrupt functions. */
7312 warning (0, "%qs attribute only applies to interrupt functions",
7313 IDENTIFIER_POINTER (name));
7314 *no_add_attrs = true;
7315 }
7316 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
7317 {
7318 /* The argument must be a constant string. */
7319 warning (0, "%qs attribute argument not a string constant",
7320 IDENTIFIER_POINTER (name));
7321 *no_add_attrs = true;
7322 }
7323 else
7324 {
7325 const char *s = ggc_strdup (TREE_STRING_POINTER (TREE_VALUE (args)));
7326 sp_switch = gen_rtx_SYMBOL_REF (VOIDmode, s);
7327 }
7328
7329 return NULL_TREE;
7330 }
7331
7332 /* Handle an "trap_exit" attribute; arguments as in
7333 struct attribute_spec.handler. */
7334 static tree
7335 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
7336 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7337 {
7338 if (TREE_CODE (*node) != FUNCTION_DECL)
7339 {
7340 warning (0, "%qs attribute only applies to functions",
7341 IDENTIFIER_POINTER (name));
7342 *no_add_attrs = true;
7343 }
7344 else if (!pragma_interrupt)
7345 {
7346 /* The trap_exit attribute only has meaning for interrupt functions. */
7347 warning (0, "%qs attribute only applies to interrupt functions",
7348 IDENTIFIER_POINTER (name));
7349 *no_add_attrs = true;
7350 }
7351 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
7352 {
7353 /* The argument must be a constant integer. */
7354 warning (0, "%qs attribute argument not an integer constant",
7355 IDENTIFIER_POINTER (name));
7356 *no_add_attrs = true;
7357 }
7358 else
7359 {
7360 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
7361 }
7362
7363 return NULL_TREE;
7364 }
7365
7366 static tree
7367 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
7368 tree name ATTRIBUTE_UNUSED,
7369 tree args ATTRIBUTE_UNUSED,
7370 int flags ATTRIBUTE_UNUSED,
7371 bool *no_add_attrs ATTRIBUTE_UNUSED)
7372 {
7373 return NULL_TREE;
7374 }
7375
7376 /* True if __attribute__((renesas)) or -mrenesas. */
7377 int
7378 sh_attr_renesas_p (tree td)
7379 {
7380 if (TARGET_HITACHI)
7381 return 1;
7382 if (td == 0)
7383 return 0;
7384 if (DECL_P (td))
7385 td = TREE_TYPE (td);
7386 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
7387 != NULL_TREE);
7388 }
7389
7390 /* True if __attribute__((renesas)) or -mrenesas, for the current
7391 function. */
7392 int
7393 sh_cfun_attr_renesas_p (void)
7394 {
7395 return sh_attr_renesas_p (current_function_decl);
7396 }
7397
7398 int
7399 sh_cfun_interrupt_handler_p (void)
7400 {
7401 return (lookup_attribute ("interrupt_handler",
7402 DECL_ATTRIBUTES (current_function_decl))
7403 != NULL_TREE);
7404 }
7405
7406 /* ??? target_switches in toplev.c is static, hence we have to duplicate it. */
7407 static const struct
7408 {
7409 const char *const name;
7410 const int value;
7411 const char *const description;
7412 }
7413 sh_target_switches[] = TARGET_SWITCHES;
7414 #define target_switches sh_target_switches
7415
7416 /* Like default_pch_valid_p, but take flag_mask into account. */
7417 const char *
7418 sh_pch_valid_p (const void *data_p, size_t len)
7419 {
7420 #ifdef TARGET_OPTIONS
7421 /* ??? We have a copy of this in toplev.c, but it is static. */
7422 static const struct
7423 {
7424 const char *const prefix;
7425 const char **const variable;
7426 const char *const description;
7427 const char *const value;
7428 }
7429 target_options[] = TARGET_OPTIONS;
7430 #endif
7431
7432 const char *data = (const char *)data_p;
7433 const char *flag_that_differs = NULL;
7434 size_t i;
7435 int old_flags;
7436 int flag_mask
7437 = (SH1_BIT | SH2_BIT | SH3_BIT | SH_E_BIT | HARD_SH4_BIT | FPU_SINGLE_BIT
7438 | SH4_BIT | HITACHI_BIT | LITTLE_ENDIAN_BIT);
7439
7440 /* -fpic and -fpie also usually make a PCH invalid. */
7441 if (data[0] != flag_pic)
7442 return _("created and used with different settings of -fpic");
7443 if (data[1] != flag_pie)
7444 return _("created and used with different settings of -fpie");
7445 data += 2;
7446
7447 /* Check target_flags. */
7448 memcpy (&old_flags, data, sizeof (target_flags));
7449 if (((old_flags ^ target_flags) & flag_mask) != 0)
7450 {
7451 for (i = 0; i < ARRAY_SIZE (target_switches); i++)
7452 {
7453 int bits;
7454
7455 bits = target_switches[i].value;
7456 if (bits < 0)
7457 bits = -bits;
7458 bits &= flag_mask;
7459 if ((target_flags & bits) != (old_flags & bits))
7460 {
7461 flag_that_differs = target_switches[i].name;
7462 goto make_message;
7463 }
7464 }
7465 gcc_unreachable ();
7466 }
7467 data += sizeof (target_flags);
7468 len -= sizeof (target_flags);
7469
7470 /* Check string options. */
7471 #ifdef TARGET_OPTIONS
7472 for (i = 0; i < ARRAY_SIZE (target_options); i++)
7473 {
7474 const char *str = *target_options[i].variable;
7475 size_t l;
7476 if (! str)
7477 str = "";
7478 l = strlen (str) + 1;
7479 if (len < l || memcmp (data, str, l) != 0)
7480 {
7481 flag_that_differs = target_options[i].prefix;
7482 goto make_message;
7483 }
7484 data += l;
7485 len -= l;
7486 }
7487 #endif
7488
7489 return NULL;
7490
7491 make_message:
7492 {
7493 char *r;
7494 asprintf (&r, _("created and used with differing settings of '-m%s'"),
7495 flag_that_differs);
7496 if (r == NULL)
7497 return _("out of memory");
7498 return r;
7499 }
7500 }
7501 \f
7502 /* Predicates used by the templates. */
7503
7504 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
7505 Used only in general_movsrc_operand. */
7506
7507 int
7508 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7509 {
7510 switch (REGNO (op))
7511 {
7512 case PR_REG:
7513 case MACL_REG:
7514 case MACH_REG:
7515 return 1;
7516 }
7517 return 0;
7518 }
7519
7520 /* Returns 1 if OP can be source of a simple move operation.
7521 Same as general_operand, but a LABEL_REF is valid, PRE_DEC is
7522 invalid as are subregs of system registers. */
7523
7524 int
7525 general_movsrc_operand (rtx op, enum machine_mode mode)
7526 {
7527 if (GET_CODE (op) == MEM)
7528 {
7529 rtx inside = XEXP (op, 0);
7530 if (GET_CODE (inside) == CONST)
7531 inside = XEXP (inside, 0);
7532
7533 if (GET_CODE (inside) == LABEL_REF)
7534 return 1;
7535
7536 if (GET_CODE (inside) == PLUS
7537 && GET_CODE (XEXP (inside, 0)) == LABEL_REF
7538 && GET_CODE (XEXP (inside, 1)) == CONST_INT)
7539 return 1;
7540
7541 /* Only post inc allowed. */
7542 if (GET_CODE (inside) == PRE_DEC)
7543 return 0;
7544 }
7545
7546 if ((mode == QImode || mode == HImode)
7547 && (GET_CODE (op) == SUBREG
7548 && GET_CODE (XEXP (op, 0)) == REG
7549 && system_reg_operand (XEXP (op, 0), mode)))
7550 return 0;
7551
7552 if (TARGET_SHMEDIA
7553 && (GET_CODE (op) == PARALLEL || GET_CODE (op) == CONST_VECTOR)
7554 && sh_rep_vec (op, mode))
7555 return 1;
7556 if (TARGET_SHMEDIA && 1
7557 && GET_CODE (op) == SUBREG && GET_MODE (op) == mode
7558 && SUBREG_REG (op) == const0_rtx && subreg_lowpart_p (op))
7559 /* FIXME */ abort (); /* return 1; */
7560 return general_operand (op, mode);
7561 }
7562
7563 /* Returns 1 if OP can be a destination of a move.
7564 Same as general_operand, but no preinc allowed. */
7565
7566 int
7567 general_movdst_operand (rtx op, enum machine_mode mode)
7568 {
7569 /* Only pre dec allowed. */
7570 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
7571 return 0;
7572 if (mode == DImode && TARGET_SHMEDIA && GET_CODE (op) == SUBREG
7573 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8
7574 && ! (high_life_started || reload_completed))
7575 return 0;
7576
7577 return general_operand (op, mode);
7578 }
7579
7580 /* Returns 1 if OP is a normal arithmetic register. */
7581
7582 int
7583 arith_reg_operand (rtx op, enum machine_mode mode)
7584 {
7585 if (register_operand (op, mode))
7586 {
7587 int regno;
7588
7589 if (GET_CODE (op) == REG)
7590 regno = REGNO (op);
7591 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7592 regno = REGNO (SUBREG_REG (op));
7593 else
7594 return 1;
7595
7596 return (regno != T_REG && regno != PR_REG
7597 && ! TARGET_REGISTER_P (regno)
7598 && (regno != FPUL_REG || TARGET_SH4)
7599 && regno != MACH_REG && regno != MACL_REG);
7600 }
7601 /* Allow a no-op sign extension - compare LOAD_EXTEND_OP.
7602 We allow SImode here, as not using an FP register is just a matter of
7603 proper register allocation. */
7604 if (TARGET_SHMEDIA
7605 && GET_MODE (op) == DImode && GET_CODE (op) == SIGN_EXTEND
7606 && GET_MODE (XEXP (op, 0)) == SImode
7607 && GET_CODE (XEXP (op, 0)) != SUBREG)
7608 return register_operand (XEXP (op, 0), VOIDmode);
7609 #if 0 /* Can't do this because of PROMOTE_MODE for unsigned vars. */
7610 if (GET_MODE (op) == SImode && GET_CODE (op) == SIGN_EXTEND
7611 && GET_MODE (XEXP (op, 0)) == HImode
7612 && GET_CODE (XEXP (op, 0)) == REG
7613 && REGNO (XEXP (op, 0)) <= LAST_GENERAL_REG)
7614 return register_operand (XEXP (op, 0), VOIDmode);
7615 #endif
7616 if (GET_MODE_CLASS (GET_MODE (op)) == MODE_VECTOR_INT
7617 && GET_CODE (op) == SUBREG
7618 && GET_MODE (SUBREG_REG (op)) == DImode
7619 && GET_CODE (SUBREG_REG (op)) == SIGN_EXTEND
7620 && GET_MODE (XEXP (SUBREG_REG (op), 0)) == SImode
7621 && GET_CODE (XEXP (SUBREG_REG (op), 0)) != SUBREG)
7622 return register_operand (XEXP (SUBREG_REG (op), 0), VOIDmode);
7623 return 0;
7624 }
7625
7626 /* Like above, but for DImode destinations: forbid paradoxical DImode subregs,
7627 because this would lead to missing sign extensions when truncating from
7628 DImode to SImode. */
7629 int
7630 arith_reg_dest (rtx op, enum machine_mode mode)
7631 {
7632 if (mode == DImode && GET_CODE (op) == SUBREG
7633 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8
7634 && TARGET_SHMEDIA)
7635 return 0;
7636 return arith_reg_operand (op, mode);
7637 }
7638
7639 /* Like arith_reg_operand, but for register source operands of narrow
7640 logical SHMEDIA operations: forbid subregs of DImode / TImode regs. */
7641 int
7642 logical_reg_operand (rtx op, enum machine_mode mode)
7643 {
7644 if (TARGET_SHMEDIA
7645 && GET_CODE (op) == SUBREG
7646 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) > 4
7647 && mode != DImode)
7648 return 0;
7649 return arith_reg_operand (op, mode);
7650 }
7651
7652 int
7653 int_gpr_dest (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7654 {
7655 enum machine_mode op_mode = GET_MODE (op);
7656
7657 if (GET_MODE_CLASS (op_mode) != MODE_INT
7658 || GET_MODE_SIZE (op_mode) >= UNITS_PER_WORD)
7659 return 0;
7660 if (! reload_completed)
7661 return 0;
7662 return true_regnum (op) <= LAST_GENERAL_REG;
7663 }
7664
7665 int
7666 fp_arith_reg_operand (rtx op, enum machine_mode mode)
7667 {
7668 if (register_operand (op, mode))
7669 {
7670 int regno;
7671
7672 if (GET_CODE (op) == REG)
7673 regno = REGNO (op);
7674 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7675 regno = REGNO (SUBREG_REG (op));
7676 else
7677 return 1;
7678
7679 return (regno >= FIRST_PSEUDO_REGISTER
7680 || FP_REGISTER_P (regno));
7681 }
7682 return 0;
7683 }
7684
7685 int
7686 fp_arith_reg_dest (rtx op, enum machine_mode mode)
7687 {
7688 if (mode == DImode && GET_CODE (op) == SUBREG
7689 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8)
7690 return 0;
7691 return fp_arith_reg_operand (op, mode);
7692 }
7693
7694 /* Returns 1 if OP is a valid source operand for an arithmetic insn. */
7695
7696 int
7697 arith_operand (rtx op, enum machine_mode mode)
7698 {
7699 if (arith_reg_operand (op, mode))
7700 return 1;
7701
7702 if (TARGET_SHMEDIA)
7703 {
7704 /* FIXME: We should be checking whether the CONST_INT fits in a
7705 CONST_OK_FOR_I16 here, but this causes reload_cse to crash when
7706 attempting to transform a sequence of two 64-bit sets of the
7707 same register from literal constants into a set and an add,
7708 when the difference is too wide for an add. */
7709 if (GET_CODE (op) == CONST_INT
7710 || EXTRA_CONSTRAINT_C16 (op))
7711 return 1;
7712 else if (GET_CODE (op) == TRUNCATE
7713 && ! system_reg_operand (XEXP (op, 0), VOIDmode)
7714 && (mode == VOIDmode || mode == GET_MODE (op))
7715 && (GET_MODE_SIZE (GET_MODE (op))
7716 < GET_MODE_SIZE (GET_MODE (XEXP (op, 0))))
7717 && (! FP_REGISTER_P (REGNO (XEXP (op, 0)))
7718 || GET_MODE_SIZE (GET_MODE (op)) == 4))
7719 return register_operand (XEXP (op, 0), VOIDmode);
7720 else
7721 return 0;
7722 }
7723 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I08 (INTVAL (op)))
7724 return 1;
7725
7726 return 0;
7727 }
7728
7729 /* Returns 1 if OP is a valid source operand for a compare insn. */
7730
7731 int
7732 arith_reg_or_0_operand (rtx op, enum machine_mode mode)
7733 {
7734 if (arith_reg_operand (op, mode))
7735 return 1;
7736
7737 if (EXTRA_CONSTRAINT_Z (op))
7738 return 1;
7739
7740 return 0;
7741 }
7742
7743 /* Return 1 if OP is a valid source operand for xor. */
7744
7745 int
7746 xor_operand (rtx op, enum machine_mode mode)
7747 {
7748 if (GET_CODE (op) == CONST_INT)
7749 return (TARGET_SHMEDIA
7750 ? (CONST_OK_FOR_I06 (INTVAL (op))
7751 || (no_new_pseudos && INTVAL (op) == 0xff))
7752 : CONST_OK_FOR_K08 (INTVAL (op)));
7753 if (TARGET_SHMEDIA
7754 && mode != DImode && GET_CODE (op) == SUBREG
7755 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) > 4)
7756 return 0;
7757 return arith_reg_operand (op, mode);
7758 }
7759
7760 /* Return 1 if OP is a valid source operand for shmedia cmpgt / cmpgtu. */
7761 int
7762 cmp_operand (rtx op, enum machine_mode mode)
7763 {
7764 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_N (INTVAL (op)))
7765 return 1;
7766 if (TARGET_SHMEDIA
7767 && mode != DImode && GET_CODE (op) == SUBREG
7768 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) > 4)
7769 return 0;
7770 return arith_reg_operand (op, mode);
7771 }
7772
7773 /* Returns 1 if OP is a valid source operand for a logical operation. */
7774
7775 int
7776 logical_operand (rtx op, enum machine_mode mode)
7777 {
7778 if (TARGET_SHMEDIA
7779 && mode != DImode && GET_CODE (op) == SUBREG
7780 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) > 4)
7781 return 0;
7782
7783 if (arith_reg_operand (op, mode))
7784 return 1;
7785
7786 if (TARGET_SHMEDIA)
7787 {
7788 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I10 (INTVAL (op)))
7789 return 1;
7790 else
7791 return 0;
7792 }
7793 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_K08 (INTVAL (op)))
7794 return 1;
7795
7796 return 0;
7797 }
7798
7799 int
7800 and_operand (rtx op, enum machine_mode mode)
7801 {
7802 if (logical_operand (op, mode))
7803 return 1;
7804
7805 /* Check mshflo.l / mshflhi.l opportunities. */
7806 if (TARGET_SHMEDIA
7807 && mode == DImode
7808 && GET_CODE (op) == CONST_INT
7809 && CONST_OK_FOR_J16 (INTVAL (op)))
7810 return 1;
7811
7812 return 0;
7813 }
7814
7815 /* Nonzero if OP is a floating point value with value 0.0. */
7816
7817 int
7818 fp_zero_operand (rtx op)
7819 {
7820 REAL_VALUE_TYPE r;
7821
7822 if (GET_MODE (op) != SFmode)
7823 return 0;
7824
7825 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7826 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
7827 }
7828
7829 /* Nonzero if OP is a floating point value with value 1.0. */
7830
7831 int
7832 fp_one_operand (rtx op)
7833 {
7834 REAL_VALUE_TYPE r;
7835
7836 if (GET_MODE (op) != SFmode)
7837 return 0;
7838
7839 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7840 return REAL_VALUES_EQUAL (r, dconst1);
7841 }
7842
7843 /* For -m4 and -m4-single-only, mode switching is used. If we are
7844 compiling without -mfmovd, movsf_ie isn't taken into account for
7845 mode switching. We could check in machine_dependent_reorg for
7846 cases where we know we are in single precision mode, but there is
7847 interface to find that out during reload, so we must avoid
7848 choosing an fldi alternative during reload and thus failing to
7849 allocate a scratch register for the constant loading. */
7850 int
7851 fldi_ok (void)
7852 {
7853 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
7854 }
7855
7856 int
7857 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7858 {
7859 enum rtx_code code = GET_CODE (op);
7860 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
7861 }
7862
7863 int
7864 fpscr_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7865 {
7866 return (GET_CODE (op) == REG
7867 && (REGNO (op) == FPSCR_REG
7868 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
7869 && !(reload_in_progress || reload_completed)))
7870 && GET_MODE (op) == PSImode);
7871 }
7872
7873 int
7874 fpul_operand (rtx op, enum machine_mode mode)
7875 {
7876 if (TARGET_SHMEDIA)
7877 return fp_arith_reg_operand (op, mode);
7878
7879 return (GET_CODE (op) == REG
7880 && (REGNO (op) == FPUL_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER)
7881 && GET_MODE (op) == mode);
7882 }
7883
7884 int
7885 symbol_ref_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7886 {
7887 return (GET_CODE (op) == SYMBOL_REF);
7888 }
7889
7890 /* Return the TLS type for TLS symbols, 0 for otherwise. */
7891 int
7892 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7893 {
7894 if (GET_CODE (op) != SYMBOL_REF)
7895 return 0;
7896 return SYMBOL_REF_TLS_MODEL (op);
7897 }
7898
7899 int
7900 commutative_float_operator (rtx op, enum machine_mode mode)
7901 {
7902 if (GET_MODE (op) != mode)
7903 return 0;
7904 switch (GET_CODE (op))
7905 {
7906 case PLUS:
7907 case MULT:
7908 return 1;
7909 default:
7910 break;
7911 }
7912 return 0;
7913 }
7914
7915 int
7916 noncommutative_float_operator (rtx op, enum machine_mode mode)
7917 {
7918 if (GET_MODE (op) != mode)
7919 return 0;
7920 switch (GET_CODE (op))
7921 {
7922 case MINUS:
7923 case DIV:
7924 return 1;
7925 default:
7926 break;
7927 }
7928 return 0;
7929 }
7930
7931 int
7932 unary_float_operator (rtx op, enum machine_mode mode)
7933 {
7934 if (GET_MODE (op) != mode)
7935 return 0;
7936 switch (GET_CODE (op))
7937 {
7938 case ABS:
7939 case NEG:
7940 case SQRT:
7941 return 1;
7942 default:
7943 break;
7944 }
7945 return 0;
7946 }
7947
7948 int
7949 binary_float_operator (rtx op, enum machine_mode mode)
7950 {
7951 if (GET_MODE (op) != mode)
7952 return 0;
7953 switch (GET_CODE (op))
7954 {
7955 case PLUS:
7956 case MINUS:
7957 case MULT:
7958 case DIV:
7959 return 1;
7960 default:
7961 break;
7962 }
7963 return 0;
7964 }
7965
7966 int
7967 binary_logical_operator (rtx op, enum machine_mode mode)
7968 {
7969 if (GET_MODE (op) != mode)
7970 return 0;
7971 switch (GET_CODE (op))
7972 {
7973 case IOR:
7974 case AND:
7975 case XOR:
7976 return 1;
7977 default:
7978 break;
7979 }
7980 return 0;
7981 }
7982
7983 int
7984 equality_comparison_operator (rtx op, enum machine_mode mode)
7985 {
7986 return ((mode == VOIDmode || GET_MODE (op) == mode)
7987 && (GET_CODE (op) == EQ || GET_CODE (op) == NE));
7988 }
7989
7990 int
7991 greater_comparison_operator (rtx op, enum machine_mode mode)
7992 {
7993 if (mode != VOIDmode && GET_MODE (op) != mode)
7994 return 0;
7995 switch (GET_CODE (op))
7996 {
7997 case GT:
7998 case GE:
7999 case GTU:
8000 case GEU:
8001 return 1;
8002 default:
8003 return 0;
8004 }
8005 }
8006
8007 int
8008 less_comparison_operator (rtx op, enum machine_mode mode)
8009 {
8010 if (mode != VOIDmode && GET_MODE (op) != mode)
8011 return 0;
8012 switch (GET_CODE (op))
8013 {
8014 case LT:
8015 case LE:
8016 case LTU:
8017 case LEU:
8018 return 1;
8019 default:
8020 return 0;
8021 }
8022 }
8023
8024 int
8025 shift_operator (rtx op, enum machine_mode mode)
8026 {
8027 if (mode != VOIDmode && GET_MODE (op) != mode)
8028 return 0;
8029 switch (GET_CODE (op))
8030 {
8031 case ASHIFT:
8032 case ASHIFTRT:
8033 case LSHIFTRT:
8034 return 1;
8035 default:
8036 return 0;
8037 }
8038 }
8039
8040 int
8041 logical_operator (rtx op, enum machine_mode mode)
8042 {
8043 if (mode != VOIDmode && GET_MODE (op) != mode)
8044 return 0;
8045 switch (GET_CODE (op))
8046 {
8047 case AND:
8048 case IOR:
8049 case XOR:
8050 return 1;
8051 default:
8052 return 0;
8053 }
8054 }
8055
8056 /* Accept pseudos and branch target registers. */
8057 int
8058 target_reg_operand (rtx op, enum machine_mode mode)
8059 {
8060 if (mode == VOIDmode
8061 ? GET_MODE (op) != Pmode && GET_MODE (op) != PDImode
8062 : mode != GET_MODE (op))
8063 return 0;
8064
8065 if (GET_CODE (op) == SUBREG)
8066 op = XEXP (op, 0);
8067
8068 if (GET_CODE (op) != REG)
8069 return 0;
8070
8071 /* We must protect ourselves from matching pseudos that are virtual
8072 register, because they will eventually be replaced with hardware
8073 registers that aren't branch-target registers. */
8074 if (REGNO (op) > LAST_VIRTUAL_REGISTER
8075 || TARGET_REGISTER_P (REGNO (op)))
8076 return 1;
8077
8078 return 0;
8079 }
8080
8081 /* Same as target_reg_operand, except that label_refs and symbol_refs
8082 are accepted before reload. */
8083 int
8084 target_operand (rtx op, enum machine_mode mode)
8085 {
8086 if (mode != VOIDmode && mode != Pmode)
8087 return 0;
8088
8089 if ((GET_MODE (op) == Pmode || GET_MODE (op) == VOIDmode)
8090 && EXTRA_CONSTRAINT_Csy (op))
8091 return ! reload_completed;
8092
8093 return target_reg_operand (op, mode);
8094 }
8095
8096 int
8097 mextr_bit_offset (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8098 {
8099 HOST_WIDE_INT i;
8100
8101 if (GET_CODE (op) != CONST_INT)
8102 return 0;
8103 i = INTVAL (op);
8104 return i >= 1 * 8 && i <= 7 * 8 && (i & 7) == 0;
8105 }
8106
8107 int
8108 extend_reg_operand (rtx op, enum machine_mode mode)
8109 {
8110 return (GET_CODE (op) == TRUNCATE
8111 ? arith_operand
8112 : arith_reg_operand) (op, mode);
8113 }
8114
8115 int
8116 trunc_hi_operand (rtx op, enum machine_mode mode)
8117 {
8118 enum machine_mode op_mode = GET_MODE (op);
8119
8120 if (op_mode != SImode && op_mode != DImode
8121 && op_mode != V4HImode && op_mode != V2SImode)
8122 return 0;
8123 return extend_reg_operand (op, mode);
8124 }
8125
8126 int
8127 extend_reg_or_0_operand (rtx op, enum machine_mode mode)
8128 {
8129 return (GET_CODE (op) == TRUNCATE
8130 ? arith_operand
8131 : arith_reg_or_0_operand) (op, mode);
8132 }
8133
8134 int
8135 minuend_operand (rtx op, enum machine_mode mode)
8136 {
8137 return op == constm1_rtx || extend_reg_or_0_operand (op, mode);
8138 }
8139
8140 int
8141 general_extend_operand (rtx op, enum machine_mode mode)
8142 {
8143 return (GET_CODE (op) == TRUNCATE
8144 ? arith_operand
8145 : nonimmediate_operand) (op, mode);
8146 }
8147
8148 int
8149 ua_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8150 {
8151 if (GET_CODE (op) == PLUS
8152 && (GET_CODE (XEXP (op, 1)) != CONST_INT
8153 || ! CONST_OK_FOR_I06 (INTVAL (XEXP (op, 1)))))
8154 return 0;
8155 return address_operand (op, QImode);
8156 }
8157
8158 int
8159 cache_address_operand (rtx op, enum machine_mode mode)
8160 {
8161 if (GET_CODE (op) == PLUS)
8162 {
8163 if (GET_CODE (XEXP (op, 0)) != REG)
8164 return 0;
8165 if (GET_CODE (XEXP (op, 1)) != CONST_INT
8166 || (INTVAL (XEXP (op, 1)) & 31))
8167 return 0;
8168 }
8169 else if (GET_CODE (op) != REG)
8170 return 0;
8171 return address_operand (op, mode);
8172 }
8173
8174 int
8175 inqhi_operand (rtx op, enum machine_mode mode)
8176 {
8177 if (GET_CODE (op) != TRUNCATE || mode != GET_MODE (op))
8178 return 0;
8179 op = XEXP (op, 0);
8180 /* Can't use true_regnum here because copy_cost wants to know about
8181 SECONDARY_INPUT_RELOAD_CLASS. */
8182 return GET_CODE (op) == REG && FP_REGISTER_P (REGNO (op));
8183 }
8184
8185 int
8186 sh_rep_vec (rtx v, enum machine_mode mode)
8187 {
8188 int i;
8189 rtx x, y;
8190
8191 if ((GET_CODE (v) != CONST_VECTOR && GET_CODE (v) != PARALLEL)
8192 || (GET_MODE (v) != mode && mode != VOIDmode))
8193 return 0;
8194 i = XVECLEN (v, 0) - 2;
8195 x = XVECEXP (v, 0, i + 1);
8196 if (GET_MODE_UNIT_SIZE (mode) == 1)
8197 {
8198 y = XVECEXP (v, 0, i);
8199 for (i -= 2; i >= 0; i -= 2)
8200 if (! rtx_equal_p (XVECEXP (v, 0, i + 1), x)
8201 || ! rtx_equal_p (XVECEXP (v, 0, i), y))
8202 return 0;
8203 }
8204 else
8205 for (; i >= 0; i--)
8206 if (XVECEXP (v, 0, i) != x)
8207 return 0;
8208 return 1;
8209 }
8210
8211 /* Determine if V is a constant vector matching MODE with only one element
8212 that is not a sign extension. Two byte-sized elements count as one. */
8213 int
8214 sh_1el_vec (rtx v, enum machine_mode mode)
8215 {
8216 int unit_size;
8217 int i, last, least, sign_ix;
8218 rtx sign;
8219
8220 if (GET_CODE (v) != CONST_VECTOR
8221 || (GET_MODE (v) != mode && mode != VOIDmode))
8222 return 0;
8223 /* Determine numbers of last and of least significant elements. */
8224 last = XVECLEN (v, 0) - 1;
8225 least = TARGET_LITTLE_ENDIAN ? 0 : last;
8226 if (GET_CODE (XVECEXP (v, 0, least)) != CONST_INT)
8227 return 0;
8228 sign_ix = least;
8229 if (GET_MODE_UNIT_SIZE (mode) == 1)
8230 sign_ix = TARGET_LITTLE_ENDIAN ? 1 : last - 1;
8231 if (GET_CODE (XVECEXP (v, 0, sign_ix)) != CONST_INT)
8232 return 0;
8233 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (v));
8234 sign = (INTVAL (XVECEXP (v, 0, sign_ix)) >> (unit_size * BITS_PER_UNIT - 1)
8235 ? constm1_rtx : const0_rtx);
8236 i = XVECLEN (v, 0) - 1;
8237 do
8238 if (i != least && i != sign_ix && XVECEXP (v, 0, i) != sign)
8239 return 0;
8240 while (--i);
8241 return 1;
8242 }
8243
8244 int
8245 sh_const_vec (rtx v, enum machine_mode mode)
8246 {
8247 int i;
8248
8249 if (GET_CODE (v) != CONST_VECTOR
8250 || (GET_MODE (v) != mode && mode != VOIDmode))
8251 return 0;
8252 i = XVECLEN (v, 0) - 1;
8253 for (; i >= 0; i--)
8254 if (GET_CODE (XVECEXP (v, 0, i)) != CONST_INT)
8255 return 0;
8256 return 1;
8257 }
8258 \f
8259 /* Return the destination address of a branch. */
8260
8261 static int
8262 branch_dest (rtx branch)
8263 {
8264 rtx dest = SET_SRC (PATTERN (branch));
8265 int dest_uid;
8266
8267 if (GET_CODE (dest) == IF_THEN_ELSE)
8268 dest = XEXP (dest, 1);
8269 dest = XEXP (dest, 0);
8270 dest_uid = INSN_UID (dest);
8271 return INSN_ADDRESSES (dest_uid);
8272 }
8273 \f
8274 /* Return nonzero if REG is not used after INSN.
8275 We assume REG is a reload reg, and therefore does
8276 not live past labels. It may live past calls or jumps though. */
8277 int
8278 reg_unused_after (rtx reg, rtx insn)
8279 {
8280 enum rtx_code code;
8281 rtx set;
8282
8283 /* If the reg is set by this instruction, then it is safe for our
8284 case. Disregard the case where this is a store to memory, since
8285 we are checking a register used in the store address. */
8286 set = single_set (insn);
8287 if (set && GET_CODE (SET_DEST (set)) != MEM
8288 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8289 return 1;
8290
8291 while ((insn = NEXT_INSN (insn)))
8292 {
8293 rtx set;
8294 if (!INSN_P (insn))
8295 continue;
8296
8297 code = GET_CODE (insn);
8298
8299 #if 0
8300 /* If this is a label that existed before reload, then the register
8301 if dead here. However, if this is a label added by reorg, then
8302 the register may still be live here. We can't tell the difference,
8303 so we just ignore labels completely. */
8304 if (code == CODE_LABEL)
8305 return 1;
8306 /* else */
8307 #endif
8308
8309 if (code == JUMP_INSN)
8310 return 0;
8311
8312 /* If this is a sequence, we must handle them all at once.
8313 We could have for instance a call that sets the target register,
8314 and an insn in a delay slot that uses the register. In this case,
8315 we must return 0. */
8316 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
8317 {
8318 int i;
8319 int retval = 0;
8320
8321 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
8322 {
8323 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
8324 rtx set = single_set (this_insn);
8325
8326 if (GET_CODE (this_insn) == CALL_INSN)
8327 code = CALL_INSN;
8328 else if (GET_CODE (this_insn) == JUMP_INSN)
8329 {
8330 if (INSN_ANNULLED_BRANCH_P (this_insn))
8331 return 0;
8332 code = JUMP_INSN;
8333 }
8334
8335 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8336 return 0;
8337 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8338 {
8339 if (GET_CODE (SET_DEST (set)) != MEM)
8340 retval = 1;
8341 else
8342 return 0;
8343 }
8344 if (set == 0
8345 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
8346 return 0;
8347 }
8348 if (retval == 1)
8349 return 1;
8350 else if (code == JUMP_INSN)
8351 return 0;
8352 }
8353
8354 set = single_set (insn);
8355 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8356 return 0;
8357 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8358 return GET_CODE (SET_DEST (set)) != MEM;
8359 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
8360 return 0;
8361
8362 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
8363 return 1;
8364 }
8365 return 1;
8366 }
8367 \f
8368 #include "ggc.h"
8369
8370 static GTY(()) rtx fpscr_rtx;
8371 rtx
8372 get_fpscr_rtx (void)
8373 {
8374 if (! fpscr_rtx)
8375 {
8376 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
8377 REG_USERVAR_P (fpscr_rtx) = 1;
8378 mark_user_reg (fpscr_rtx);
8379 }
8380 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
8381 mark_user_reg (fpscr_rtx);
8382 return fpscr_rtx;
8383 }
8384
8385 void
8386 emit_sf_insn (rtx pat)
8387 {
8388 emit_insn (pat);
8389 }
8390
8391 void
8392 emit_df_insn (rtx pat)
8393 {
8394 emit_insn (pat);
8395 }
8396
8397 void
8398 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8399 {
8400 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8401 }
8402
8403 void
8404 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8405 {
8406 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
8407 get_fpscr_rtx ()));
8408 }
8409
8410 void
8411 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8412 {
8413 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8414 }
8415
8416 void
8417 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8418 {
8419 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
8420 get_fpscr_rtx ()));
8421 }
8422 \f
8423 /* ??? gcc does flow analysis strictly after common subexpression
8424 elimination. As a result, common subexpression elimination fails
8425 when there are some intervening statements setting the same register.
8426 If we did nothing about this, this would hurt the precision switching
8427 for SH4 badly. There is some cse after reload, but it is unable to
8428 undo the extra register pressure from the unused instructions, and
8429 it cannot remove auto-increment loads.
8430
8431 A C code example that shows this flow/cse weakness for (at least) SH
8432 and sparc (as of gcc ss-970706) is this:
8433
8434 double
8435 f(double a)
8436 {
8437 double d;
8438 d = 0.1;
8439 a += d;
8440 d = 1.1;
8441 d = 0.1;
8442 a *= d;
8443 return a;
8444 }
8445
8446 So we add another pass before common subexpression elimination, to
8447 remove assignments that are dead due to a following assignment in the
8448 same basic block. */
8449
8450 static void
8451 mark_use (rtx x, rtx *reg_set_block)
8452 {
8453 enum rtx_code code;
8454
8455 if (! x)
8456 return;
8457 code = GET_CODE (x);
8458 switch (code)
8459 {
8460 case REG:
8461 {
8462 int regno = REGNO (x);
8463 int nregs = (regno < FIRST_PSEUDO_REGISTER
8464 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
8465 : 1);
8466 do
8467 {
8468 reg_set_block[regno + nregs - 1] = 0;
8469 }
8470 while (--nregs);
8471 break;
8472 }
8473 case SET:
8474 {
8475 rtx dest = SET_DEST (x);
8476
8477 if (GET_CODE (dest) == SUBREG)
8478 dest = SUBREG_REG (dest);
8479 if (GET_CODE (dest) != REG)
8480 mark_use (dest, reg_set_block);
8481 mark_use (SET_SRC (x), reg_set_block);
8482 break;
8483 }
8484 case CLOBBER:
8485 break;
8486 default:
8487 {
8488 const char *fmt = GET_RTX_FORMAT (code);
8489 int i, j;
8490 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8491 {
8492 if (fmt[i] == 'e')
8493 mark_use (XEXP (x, i), reg_set_block);
8494 else if (fmt[i] == 'E')
8495 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8496 mark_use (XVECEXP (x, i, j), reg_set_block);
8497 }
8498 break;
8499 }
8500 }
8501 }
8502 \f
8503 static rtx get_free_reg (HARD_REG_SET);
8504
8505 /* This function returns a register to use to load the address to load
8506 the fpscr from. Currently it always returns r1 or r7, but when we are
8507 able to use pseudo registers after combine, or have a better mechanism
8508 for choosing a register, it should be done here. */
8509 /* REGS_LIVE is the liveness information for the point for which we
8510 need this allocation. In some bare-bones exit blocks, r1 is live at the
8511 start. We can even have all of r0..r3 being live:
8512 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8513 INSN before which new insns are placed with will clobber the register
8514 we return. If a basic block consists only of setting the return value
8515 register to a pseudo and using that register, the return value is not
8516 live before or after this block, yet we we'll insert our insns right in
8517 the middle. */
8518
8519 static rtx
8520 get_free_reg (HARD_REG_SET regs_live)
8521 {
8522 if (! TEST_HARD_REG_BIT (regs_live, 1))
8523 return gen_rtx_REG (Pmode, 1);
8524
8525 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8526 there shouldn't be anything but a jump before the function end. */
8527 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
8528 return gen_rtx_REG (Pmode, 7);
8529 }
8530
8531 /* This function will set the fpscr from memory.
8532 MODE is the mode we are setting it to. */
8533 void
8534 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8535 {
8536 enum attr_fp_mode fp_mode = mode;
8537 rtx addr_reg = get_free_reg (regs_live);
8538
8539 if (fp_mode == (enum attr_fp_mode) ACTUAL_NORMAL_MODE (FP_MODE))
8540 emit_insn (gen_fpu_switch1 (addr_reg));
8541 else
8542 emit_insn (gen_fpu_switch0 (addr_reg));
8543 }
8544
8545 /* Is the given character a logical line separator for the assembler? */
8546 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8547 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
8548 #endif
8549
8550 int
8551 sh_insn_length_adjustment (rtx insn)
8552 {
8553 /* Instructions with unfilled delay slots take up an extra two bytes for
8554 the nop in the delay slot. */
8555 if (((GET_CODE (insn) == INSN
8556 && GET_CODE (PATTERN (insn)) != USE
8557 && GET_CODE (PATTERN (insn)) != CLOBBER)
8558 || GET_CODE (insn) == CALL_INSN
8559 || (GET_CODE (insn) == JUMP_INSN
8560 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8561 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8562 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8563 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8564 return 2;
8565
8566 /* SH2e has a bug that prevents the use of annulled branches, so if
8567 the delay slot is not filled, we'll have to put a NOP in it. */
8568 if (sh_cpu == CPU_SH2E
8569 && GET_CODE (insn) == JUMP_INSN
8570 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8571 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8572 && get_attr_type (insn) == TYPE_CBRANCH
8573 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8574 return 2;
8575
8576 /* sh-dsp parallel processing insn take four bytes instead of two. */
8577
8578 if (GET_CODE (insn) == INSN)
8579 {
8580 int sum = 0;
8581 rtx body = PATTERN (insn);
8582 const char *template;
8583 char c;
8584 int maybe_label = 1;
8585
8586 if (GET_CODE (body) == ASM_INPUT)
8587 template = XSTR (body, 0);
8588 else if (asm_noperands (body) >= 0)
8589 template
8590 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
8591 else
8592 return 0;
8593 do
8594 {
8595 int ppi_adjust = 0;
8596
8597 do
8598 c = *template++;
8599 while (c == ' ' || c == '\t');
8600 /* all sh-dsp parallel-processing insns start with p.
8601 The only non-ppi sh insn starting with p is pref.
8602 The only ppi starting with pr is prnd. */
8603 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
8604 ppi_adjust = 2;
8605 /* The repeat pseudo-insn expands two three insns, a total of
8606 six bytes in size. */
8607 else if ((c == 'r' || c == 'R')
8608 && ! strncasecmp ("epeat", template, 5))
8609 ppi_adjust = 4;
8610 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
8611 {
8612 /* If this is a label, it is obviously not a ppi insn. */
8613 if (c == ':' && maybe_label)
8614 {
8615 ppi_adjust = 0;
8616 break;
8617 }
8618 else if (c == '\'' || c == '"')
8619 maybe_label = 0;
8620 c = *template++;
8621 }
8622 sum += ppi_adjust;
8623 maybe_label = c != ':';
8624 }
8625 while (c);
8626 return sum;
8627 }
8628 return 0;
8629 }
8630 \f
8631 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8632 isn't protected by a PIC unspec. */
8633 int
8634 nonpic_symbol_mentioned_p (rtx x)
8635 {
8636 register const char *fmt;
8637 register int i;
8638
8639 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8640 || GET_CODE (x) == PC)
8641 return 1;
8642
8643 /* We don't want to look into the possible MEM location of a
8644 CONST_DOUBLE, since we're not going to use it, in general. */
8645 if (GET_CODE (x) == CONST_DOUBLE)
8646 return 0;
8647
8648 if (GET_CODE (x) == UNSPEC
8649 && (XINT (x, 1) == UNSPEC_PIC
8650 || XINT (x, 1) == UNSPEC_GOT
8651 || XINT (x, 1) == UNSPEC_GOTOFF
8652 || XINT (x, 1) == UNSPEC_GOTPLT
8653 || XINT (x, 1) == UNSPEC_GOTTPOFF
8654 || XINT (x, 1) == UNSPEC_DTPOFF
8655 || XINT (x, 1) == UNSPEC_PLT))
8656 return 0;
8657
8658 fmt = GET_RTX_FORMAT (GET_CODE (x));
8659 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8660 {
8661 if (fmt[i] == 'E')
8662 {
8663 register int j;
8664
8665 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8666 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8667 return 1;
8668 }
8669 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8670 return 1;
8671 }
8672
8673 return 0;
8674 }
8675
8676 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8677 @GOTOFF in `reg'. */
8678 rtx
8679 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8680 rtx reg)
8681 {
8682 if (tls_symbolic_operand (orig, Pmode))
8683 return orig;
8684
8685 if (GET_CODE (orig) == LABEL_REF
8686 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8687 {
8688 if (reg == 0)
8689 reg = gen_reg_rtx (Pmode);
8690
8691 emit_insn (gen_symGOTOFF2reg (reg, orig));
8692 return reg;
8693 }
8694 else if (GET_CODE (orig) == SYMBOL_REF)
8695 {
8696 if (reg == 0)
8697 reg = gen_reg_rtx (Pmode);
8698
8699 emit_insn (gen_symGOT2reg (reg, orig));
8700 return reg;
8701 }
8702 return orig;
8703 }
8704
8705 /* Mark the use of a constant in the literal table. If the constant
8706 has multiple labels, make it unique. */
8707 static rtx
8708 mark_constant_pool_use (rtx x)
8709 {
8710 rtx insn, lab, pattern;
8711
8712 if (x == NULL)
8713 return x;
8714
8715 switch (GET_CODE (x))
8716 {
8717 case LABEL_REF:
8718 x = XEXP (x, 0);
8719 case CODE_LABEL:
8720 break;
8721 default:
8722 return x;
8723 }
8724
8725 /* Get the first label in the list of labels for the same constant
8726 and delete another labels in the list. */
8727 lab = x;
8728 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8729 {
8730 if (GET_CODE (insn) != CODE_LABEL
8731 || LABEL_REFS (insn) != NEXT_INSN (insn))
8732 break;
8733 lab = insn;
8734 }
8735
8736 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8737 INSN_DELETED_P (insn) = 1;
8738
8739 /* Mark constants in a window. */
8740 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8741 {
8742 if (GET_CODE (insn) != INSN)
8743 continue;
8744
8745 pattern = PATTERN (insn);
8746 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8747 continue;
8748
8749 switch (XINT (pattern, 1))
8750 {
8751 case UNSPECV_CONST2:
8752 case UNSPECV_CONST4:
8753 case UNSPECV_CONST8:
8754 XVECEXP (pattern, 0, 1) = const1_rtx;
8755 break;
8756 case UNSPECV_WINDOW_END:
8757 if (XVECEXP (pattern, 0, 0) == x)
8758 return lab;
8759 break;
8760 case UNSPECV_CONST_END:
8761 return lab;
8762 default:
8763 break;
8764 }
8765 }
8766
8767 return lab;
8768 }
8769
8770 int
8771 ua_offset (rtx c, enum machine_mode mode ATTRIBUTE_UNUSED)
8772 {
8773 return GET_CODE (c) == CONST_INT && CONST_OK_FOR_I06 (INTVAL (c));
8774 }
8775 \f
8776 /* Return true if it's possible to redirect BRANCH1 to the destination
8777 of an unconditional jump BRANCH2. We only want to do this if the
8778 resulting branch will have a short displacement. */
8779 int
8780 sh_can_redirect_branch (rtx branch1, rtx branch2)
8781 {
8782 if (flag_expensive_optimizations && simplejump_p (branch2))
8783 {
8784 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8785 rtx insn;
8786 int distance;
8787
8788 for (distance = 0, insn = NEXT_INSN (branch1);
8789 insn && distance < 256;
8790 insn = PREV_INSN (insn))
8791 {
8792 if (insn == dest)
8793 return 1;
8794 else
8795 distance += get_attr_length (insn);
8796 }
8797 for (distance = 0, insn = NEXT_INSN (branch1);
8798 insn && distance < 256;
8799 insn = NEXT_INSN (insn))
8800 {
8801 if (insn == dest)
8802 return 1;
8803 else
8804 distance += get_attr_length (insn);
8805 }
8806 }
8807 return 0;
8808 }
8809
8810 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8811 int
8812 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8813 unsigned int new_reg)
8814 {
8815 /* Interrupt functions can only use registers that have already been
8816 saved by the prologue, even if they would normally be
8817 call-clobbered. */
8818
8819 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
8820 return 0;
8821
8822 return 1;
8823 }
8824
8825 /* Function to update the integer COST
8826 based on the relationship between INSN that is dependent on
8827 DEP_INSN through the dependence LINK. The default is to make no
8828 adjustment to COST. This can be used for example to specify to
8829 the scheduler that an output- or anti-dependence does not incur
8830 the same cost as a data-dependence. The return value should be
8831 the new value for COST. */
8832 static int
8833 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8834 {
8835 rtx reg, use_pat;
8836
8837 if (TARGET_SHMEDIA)
8838 {
8839 /* On SHmedia, if the dependence is an anti-dependence or
8840 output-dependence, there is no cost. */
8841 if (REG_NOTE_KIND (link) != 0)
8842 {
8843 /* However, dependencies between target register loads and
8844 uses of the register in a subsequent block that are separated
8845 by a conditional branch are not modelled - we have to do with
8846 the anti-dependency between the target register load and the
8847 conditional branch that ends the current block. */
8848 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8849 && GET_CODE (PATTERN (dep_insn)) == SET
8850 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
8851 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
8852 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
8853 {
8854 int orig_cost = cost;
8855 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
8856 rtx target = ((! note
8857 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
8858 ? insn : JUMP_LABEL (insn));
8859 /* On the likely path, the branch costs 1, on the unlikely path,
8860 it costs 3. */
8861 cost--;
8862 do
8863 target = next_active_insn (target);
8864 while (target && ! flow_dependent_p (target, dep_insn)
8865 && --cost > 0);
8866 /* If two branches are executed in immediate succession, with the
8867 first branch properly predicted, this causes a stall at the
8868 second branch, hence we won't need the target for the
8869 second branch for two cycles after the launch of the first
8870 branch. */
8871 if (cost > orig_cost - 2)
8872 cost = orig_cost - 2;
8873 }
8874 else
8875 cost = 0;
8876 }
8877
8878 else if (get_attr_is_mac_media (insn)
8879 && get_attr_is_mac_media (dep_insn))
8880 cost = 1;
8881
8882 else if (! reload_completed
8883 && GET_CODE (PATTERN (insn)) == SET
8884 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
8885 && GET_CODE (PATTERN (dep_insn)) == SET
8886 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
8887 && cost < 4)
8888 cost = 4;
8889 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
8890 that is needed at the target. */
8891 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
8892 && ! flow_dependent_p (insn, dep_insn))
8893 cost--;
8894 }
8895 else if (REG_NOTE_KIND (link) == 0)
8896 {
8897 enum attr_type dep_type, type;
8898
8899 if (recog_memoized (insn) < 0
8900 || recog_memoized (dep_insn) < 0)
8901 return cost;
8902
8903 dep_type = get_attr_type (dep_insn);
8904 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
8905 cost--;
8906 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
8907 && (type = get_attr_type (insn)) != TYPE_CALL
8908 && type != TYPE_SFUNC)
8909 cost--;
8910
8911 /* The only input for a call that is timing-critical is the
8912 function's address. */
8913 if (GET_CODE(insn) == CALL_INSN)
8914 {
8915 rtx call = PATTERN (insn);
8916
8917 if (GET_CODE (call) == PARALLEL)
8918 call = XVECEXP (call, 0 ,0);
8919 if (GET_CODE (call) == SET)
8920 call = SET_SRC (call);
8921 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
8922 /* sibcalli_thunk uses a symbol_ref in an unspec. */
8923 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
8924 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
8925 cost = 0;
8926 }
8927 /* Likewise, the most timing critical input for an sfuncs call
8928 is the function address. However, sfuncs typically start
8929 using their arguments pretty quickly.
8930 Assume a four cycle delay before they are needed. */
8931 /* All sfunc calls are parallels with at least four components.
8932 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8933 else if (GET_CODE (PATTERN (insn)) == PARALLEL
8934 && XVECLEN (PATTERN (insn), 0) >= 4
8935 && (reg = sfunc_uses_reg (insn)))
8936 {
8937 if (! reg_set_p (reg, dep_insn))
8938 cost -= 4;
8939 }
8940 /* When the preceding instruction loads the shift amount of
8941 the following SHAD/SHLD, the latency of the load is increased
8942 by 1 cycle. */
8943 else if (TARGET_SH4
8944 && get_attr_type (insn) == TYPE_DYN_SHIFT
8945 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
8946 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
8947 XEXP (SET_SRC (single_set (insn)),
8948 1)))
8949 cost++;
8950 /* When an LS group instruction with a latency of less than
8951 3 cycles is followed by a double-precision floating-point
8952 instruction, FIPR, or FTRV, the latency of the first
8953 instruction is increased to 3 cycles. */
8954 else if (cost < 3
8955 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
8956 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
8957 cost = 3;
8958 /* The lsw register of a double-precision computation is ready one
8959 cycle earlier. */
8960 else if (reload_completed
8961 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
8962 && (use_pat = single_set (insn))
8963 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
8964 SET_SRC (use_pat)))
8965 cost -= 1;
8966
8967 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
8968 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
8969 cost -= 1;
8970 }
8971 /* An anti-dependence penalty of two applies if the first insn is a double
8972 precision fadd / fsub / fmul. */
8973 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8974 && recog_memoized (dep_insn) >= 0
8975 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
8976 /* A lot of alleged anti-flow dependences are fake,
8977 so check this one is real. */
8978 && flow_dependent_p (dep_insn, insn))
8979 cost = 2;
8980
8981
8982 return cost;
8983 }
8984
8985 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
8986 if DEP_INSN is anti-flow dependent on INSN. */
8987 static int
8988 flow_dependent_p (rtx insn, rtx dep_insn)
8989 {
8990 rtx tmp = PATTERN (insn);
8991
8992 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
8993 return tmp == NULL_RTX;
8994 }
8995
8996 /* A helper function for flow_dependent_p called through note_stores. */
8997 static void
8998 flow_dependent_p_1 (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
8999 {
9000 rtx * pinsn = (rtx *) data;
9001
9002 if (*pinsn && reg_referenced_p (x, *pinsn))
9003 *pinsn = NULL_RTX;
9004 }
9005
9006 /* For use by ALLOCATE_INITIAL_VALUE. Note that sh.md contains some
9007 'special function' patterns (type sfunc) that clobber pr, but that
9008 do not look like function calls to leaf_function_p. Hence we must
9009 do this extra check. */
9010 int
9011 sh_pr_n_sets (void)
9012 {
9013 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9014 }
9015
9016 /* This function returns "2" to indicate dual issue for the SH4
9017 processor. To be used by the DFA pipeline description. */
9018 static int
9019 sh_issue_rate (void)
9020 {
9021 if (TARGET_SUPERSCALAR)
9022 return 2;
9023 else
9024 return 1;
9025 }
9026
9027 /* Functions for ready queue reordering for sched1. */
9028
9029 /* Get weight for mode for a set x. */
9030 static short
9031 find_set_regmode_weight (rtx x, enum machine_mode mode)
9032 {
9033 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
9034 return 1;
9035 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
9036 {
9037 if (GET_CODE (SET_DEST (x)) == REG)
9038 {
9039 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
9040 return 1;
9041 else
9042 return 0;
9043 }
9044 return 1;
9045 }
9046 return 0;
9047 }
9048
9049 /* Get regmode weight for insn. */
9050 static short
9051 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
9052 {
9053 short reg_weight = 0;
9054 rtx x;
9055
9056 /* Increment weight for each register born here. */
9057 x = PATTERN (insn);
9058 reg_weight += find_set_regmode_weight (x, mode);
9059 if (GET_CODE (x) == PARALLEL)
9060 {
9061 int j;
9062 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
9063 {
9064 x = XVECEXP (PATTERN (insn), 0, j);
9065 reg_weight += find_set_regmode_weight (x, mode);
9066 }
9067 }
9068 /* Decrement weight for each register that dies here. */
9069 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
9070 {
9071 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
9072 {
9073 rtx note = XEXP (x, 0);
9074 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
9075 reg_weight--;
9076 }
9077 }
9078 return reg_weight;
9079 }
9080
9081 /* Calculate regmode weights for all insns of a basic block. */
9082 static void
9083 find_regmode_weight (int b, enum machine_mode mode)
9084 {
9085 rtx insn, next_tail, head, tail;
9086
9087 get_block_head_tail (b, &head, &tail);
9088 next_tail = NEXT_INSN (tail);
9089
9090 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
9091 {
9092 /* Handle register life information. */
9093 if (!INSN_P (insn))
9094 continue;
9095
9096 if (mode == SFmode)
9097 INSN_REGMODE_WEIGHT (insn, mode) =
9098 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
9099 else if (mode == SImode)
9100 INSN_REGMODE_WEIGHT (insn, mode) =
9101 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
9102 }
9103 }
9104
9105 /* Comparison function for ready queue sorting. */
9106 static int
9107 rank_for_reorder (const void *x, const void *y)
9108 {
9109 rtx tmp = *(const rtx *) y;
9110 rtx tmp2 = *(const rtx *) x;
9111
9112 /* The insn in a schedule group should be issued the first. */
9113 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
9114 return SCHED_GROUP_P (tmp2) ? 1 : -1;
9115
9116 /* If insns are equally good, sort by INSN_LUID (original insn order), This
9117 minimizes instruction movement, thus minimizing sched's effect on
9118 register pressure. */
9119 return INSN_LUID (tmp) - INSN_LUID (tmp2);
9120 }
9121
9122 /* Resort the array A in which only element at index N may be out of order. */
9123 static void
9124 swap_reorder (rtx *a, int n)
9125 {
9126 rtx insn = a[n - 1];
9127 int i = n - 2;
9128
9129 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
9130 {
9131 a[i + 1] = a[i];
9132 i -= 1;
9133 }
9134 a[i + 1] = insn;
9135 }
9136
9137 #define SCHED_REORDER(READY, N_READY) \
9138 do \
9139 { \
9140 if ((N_READY) == 2) \
9141 swap_reorder (READY, N_READY); \
9142 else if ((N_READY) > 2) \
9143 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
9144 } \
9145 while (0)
9146
9147 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
9148 macro. */
9149 static void
9150 ready_reorder (rtx *ready, int nready)
9151 {
9152 SCHED_REORDER (ready, nready);
9153 }
9154
9155 /* Calculate regmode weights for all insns of all basic block. */
9156 static void
9157 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
9158 int verbose ATTRIBUTE_UNUSED,
9159 int old_max_uid)
9160 {
9161 basic_block b;
9162
9163 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
9164 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
9165
9166 FOR_EACH_BB_REVERSE (b)
9167 {
9168 find_regmode_weight (b->index, SImode);
9169 find_regmode_weight (b->index, SFmode);
9170 }
9171
9172 CURR_REGMODE_PRESSURE (SImode) = 0;
9173 CURR_REGMODE_PRESSURE (SFmode) = 0;
9174
9175 }
9176
9177 /* Cleanup. */
9178 static void
9179 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
9180 int verbose ATTRIBUTE_UNUSED)
9181 {
9182 if (regmode_weight[0])
9183 {
9184 free (regmode_weight[0]);
9185 regmode_weight[0] = NULL;
9186 }
9187 if (regmode_weight[1])
9188 {
9189 free (regmode_weight[1]);
9190 regmode_weight[1] = NULL;
9191 }
9192 }
9193
9194 /* Cache the can_issue_more so that we can return it from reorder2. Also,
9195 keep count of register pressures on SImode and SFmode. */
9196 static int
9197 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
9198 int sched_verbose ATTRIBUTE_UNUSED,
9199 rtx insn,
9200 int can_issue_more)
9201 {
9202 if (GET_CODE (PATTERN (insn)) != USE
9203 && GET_CODE (PATTERN (insn)) != CLOBBER)
9204 cached_can_issue_more = can_issue_more - 1;
9205 else
9206 cached_can_issue_more = can_issue_more;
9207
9208 if (reload_completed)
9209 return cached_can_issue_more;
9210
9211 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
9212 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
9213
9214 return cached_can_issue_more;
9215 }
9216
9217 static void
9218 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
9219 int verbose ATTRIBUTE_UNUSED,
9220 int veclen ATTRIBUTE_UNUSED)
9221 {
9222 CURR_REGMODE_PRESSURE (SImode) = 0;
9223 CURR_REGMODE_PRESSURE (SFmode) = 0;
9224 }
9225
9226 /* Some magic numbers. */
9227 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9228 functions that already have high pressure on r0. */
9229 #define R0_MAX_LIFE_REGIONS 2
9230 #define R0_MAX_LIVE_LENGTH 12
9231 /* Register Pressure thresholds for SImode and SFmode registers. */
9232 #define SIMODE_MAX_WEIGHT 5
9233 #define SFMODE_MAX_WEIGHT 10
9234
9235 /* Return true if the pressure is high for MODE. */
9236 static short
9237 high_pressure (enum machine_mode mode)
9238 {
9239 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9240 functions that already have high pressure on r0. */
9241 if ((REG_N_SETS (0) - REG_N_DEATHS (0)) >= R0_MAX_LIFE_REGIONS
9242 && REG_LIVE_LENGTH (0) >= R0_MAX_LIVE_LENGTH)
9243 return 1;
9244
9245 if (mode == SFmode)
9246 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
9247 else
9248 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
9249 }
9250
9251 /* Reorder ready queue if register pressure is high. */
9252 static int
9253 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
9254 int sched_verbose ATTRIBUTE_UNUSED,
9255 rtx *ready,
9256 int *n_readyp,
9257 int clock_var ATTRIBUTE_UNUSED)
9258 {
9259 if (reload_completed)
9260 return sh_issue_rate ();
9261
9262 if (high_pressure (SFmode) || high_pressure (SImode))
9263 {
9264 ready_reorder (ready, *n_readyp);
9265 }
9266
9267 return sh_issue_rate ();
9268 }
9269
9270 /* Skip cycles if the current register pressure is high. */
9271 static int
9272 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
9273 int sched_verbose ATTRIBUTE_UNUSED,
9274 rtx *ready ATTRIBUTE_UNUSED,
9275 int *n_readyp ATTRIBUTE_UNUSED,
9276 int clock_var ATTRIBUTE_UNUSED)
9277 {
9278 if (reload_completed)
9279 return cached_can_issue_more;
9280
9281 if (high_pressure(SFmode) || high_pressure (SImode))
9282 skip_cycles = 1;
9283
9284 return cached_can_issue_more;
9285 }
9286
9287 /* Skip cycles without sorting the ready queue. This will move insn from
9288 Q->R. If this is the last cycle we are skipping; allow sorting of ready
9289 queue by sh_reorder. */
9290
9291 /* Generally, skipping these many cycles are sufficient for all insns to move
9292 from Q -> R. */
9293 #define MAX_SKIPS 8
9294
9295 static int
9296 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
9297 int sched_verbose ATTRIBUTE_UNUSED,
9298 rtx insn ATTRIBUTE_UNUSED,
9299 int last_clock_var,
9300 int clock_var,
9301 int *sort_p)
9302 {
9303 if (reload_completed)
9304 return 0;
9305
9306 if (skip_cycles)
9307 {
9308 if ((clock_var - last_clock_var) < MAX_SKIPS)
9309 {
9310 *sort_p = 0;
9311 return 1;
9312 }
9313 /* If this is the last cycle we are skipping, allow reordering of R. */
9314 if ((clock_var - last_clock_var) == MAX_SKIPS)
9315 {
9316 *sort_p = 1;
9317 return 1;
9318 }
9319 }
9320
9321 skip_cycles = 0;
9322
9323 return 0;
9324 }
9325
9326 /* SHmedia requires registers for branches, so we can't generate new
9327 branches past reload. */
9328 static bool
9329 sh_cannot_modify_jumps_p (void)
9330 {
9331 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
9332 }
9333
9334 static int
9335 sh_target_reg_class (void)
9336 {
9337 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
9338 }
9339
9340 static bool
9341 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
9342 {
9343 HARD_REG_SET dummy;
9344 rtx insn;
9345
9346 if (! shmedia_space_reserved_for_target_registers)
9347 return 0;
9348 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
9349 return 0;
9350 if (calc_live_regs (&dummy) >= 6 * 8)
9351 return 1;
9352 /* This is a borderline case. See if we got a nested loop, or a loop
9353 with a call, or with more than 4 labels inside. */
9354 for (insn = get_insns(); insn; insn = NEXT_INSN (insn))
9355 {
9356 if (GET_CODE (insn) == NOTE
9357 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
9358 {
9359 int labels = 0;
9360
9361 do
9362 {
9363 insn = NEXT_INSN (insn);
9364 if ((GET_CODE (insn) == NOTE
9365 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
9366 || GET_CODE (insn) == CALL_INSN
9367 || (GET_CODE (insn) == CODE_LABEL && ++labels > 4))
9368 return 1;
9369 }
9370 while (GET_CODE (insn) != NOTE
9371 || NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_END);
9372 }
9373 }
9374 return 0;
9375 }
9376
9377 static bool
9378 sh_ms_bitfield_layout_p (tree record_type ATTRIBUTE_UNUSED)
9379 {
9380 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
9381 }
9382 \f
9383 /*
9384 On the SH1..SH4, the trampoline looks like
9385 2 0002 D202 mov.l l2,r2
9386 1 0000 D301 mov.l l1,r3
9387 3 0004 422B jmp @r2
9388 4 0006 0009 nop
9389 5 0008 00000000 l1: .long area
9390 6 000c 00000000 l2: .long function
9391
9392 SH5 (compact) uses r1 instead of r3 for the static chain. */
9393
9394
9395 /* Emit RTL insns to initialize the variable parts of a trampoline.
9396 FNADDR is an RTX for the address of the function's pure code.
9397 CXT is an RTX for the static chain value for the function. */
9398
9399 void
9400 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
9401 {
9402 if (TARGET_SHMEDIA64)
9403 {
9404 rtx tramp_templ;
9405 int fixed_len;
9406
9407 rtx movi1 = GEN_INT (0xcc000010);
9408 rtx shori1 = GEN_INT (0xc8000010);
9409 rtx src, dst;
9410
9411 /* The following trampoline works within a +- 128 KB range for cxt:
9412 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
9413 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
9414 gettr tr1,r1; blink tr0,r63 */
9415 /* Address rounding makes it hard to compute the exact bounds of the
9416 offset for this trampoline, but we have a rather generous offset
9417 range, so frame_offset should do fine as an upper bound. */
9418 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
9419 {
9420 /* ??? could optimize this trampoline initialization
9421 by writing DImode words with two insns each. */
9422 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
9423 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
9424 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
9425 insn = gen_rtx_AND (DImode, insn, mask);
9426 /* Or in ptb/u .,tr1 pattern */
9427 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
9428 insn = force_operand (insn, NULL_RTX);
9429 insn = gen_lowpart (SImode, insn);
9430 emit_move_insn (gen_rtx_MEM (SImode, tramp), insn);
9431 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
9432 insn = gen_rtx_AND (DImode, insn, mask);
9433 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
9434 insn = gen_lowpart (SImode, insn);
9435 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), insn);
9436 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
9437 insn = gen_rtx_AND (DImode, insn, mask);
9438 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9439 insn = gen_lowpart (SImode, insn);
9440 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), insn);
9441 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
9442 insn = gen_rtx_AND (DImode, insn, mask);
9443 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9444 insn = gen_lowpart (SImode, insn);
9445 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
9446 insn);
9447 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
9448 insn = gen_rtx_AND (DImode, insn, mask);
9449 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9450 insn = gen_lowpart (SImode, insn);
9451 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 16)),
9452 insn);
9453 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 20)),
9454 GEN_INT (0x6bf10600));
9455 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 24)),
9456 GEN_INT (0x4415fc10));
9457 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 28)),
9458 GEN_INT (0x4401fff0));
9459 emit_insn (gen_ic_invalidate_line (tramp));
9460 return;
9461 }
9462 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
9463 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
9464
9465 tramp_templ = gen_datalabel_ref (tramp_templ);
9466 dst = gen_rtx_MEM (BLKmode, tramp);
9467 src = gen_rtx_MEM (BLKmode, tramp_templ);
9468 set_mem_align (dst, 256);
9469 set_mem_align (src, 64);
9470 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
9471
9472 emit_move_insn (gen_rtx_MEM (Pmode, plus_constant (tramp, fixed_len)),
9473 fnaddr);
9474 emit_move_insn (gen_rtx_MEM (Pmode,
9475 plus_constant (tramp,
9476 fixed_len
9477 + GET_MODE_SIZE (Pmode))),
9478 cxt);
9479 emit_insn (gen_ic_invalidate_line (tramp));
9480 return;
9481 }
9482 else if (TARGET_SHMEDIA)
9483 {
9484 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
9485 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
9486 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
9487 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
9488 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
9489 rotated 10 right, and higher 16 bit of every 32 selected. */
9490 rtx movishori
9491 = force_reg (V2HImode, (simplify_gen_subreg
9492 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
9493 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
9494 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
9495
9496 tramp = force_reg (Pmode, tramp);
9497 fnaddr = force_reg (SImode, fnaddr);
9498 cxt = force_reg (SImode, cxt);
9499 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
9500 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
9501 movishori));
9502 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
9503 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9504 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
9505 emit_move_insn (gen_rtx_MEM (DImode, tramp), quad0);
9506 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
9507 gen_rtx_SUBREG (V2HImode, cxt, 0),
9508 movishori));
9509 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
9510 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9511 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
9512 if (TARGET_LITTLE_ENDIAN)
9513 {
9514 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
9515 emit_insn (gen_mextr4 (quad2, cxtload, blink));
9516 }
9517 else
9518 {
9519 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
9520 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
9521 }
9522 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 8)), quad1);
9523 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), quad2);
9524 emit_insn (gen_ic_invalidate_line (tramp));
9525 return;
9526 }
9527 else if (TARGET_SHCOMPACT)
9528 {
9529 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
9530 return;
9531 }
9532 emit_move_insn (gen_rtx_MEM (SImode, tramp),
9533 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
9534 SImode));
9535 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
9536 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
9537 SImode));
9538 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
9539 cxt);
9540 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
9541 fnaddr);
9542 if (TARGET_HARVARD)
9543 {
9544 if (TARGET_USERMODE)
9545 emit_library_call (function_symbol (NULL, "__ic_invalidate",
9546 FUNCTION_ORDINARY),
9547 0, VOIDmode, 1, tramp, SImode);
9548 else
9549 emit_insn (gen_ic_invalidate_line (tramp));
9550 }
9551 }
9552
9553 /* FIXME: This is overly conservative. A SHcompact function that
9554 receives arguments ``by reference'' will have them stored in its
9555 own stack frame, so it must not pass pointers or references to
9556 these arguments to other functions by means of sibling calls. */
9557 /* If PIC, we cannot make sibling calls to global functions
9558 because the PLT requires r12 to be live. */
9559 static bool
9560 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9561 {
9562 return (1
9563 && (! TARGET_SHCOMPACT
9564 || current_function_args_info.stack_regs == 0)
9565 && ! sh_cfun_interrupt_handler_p ()
9566 && (! flag_pic
9567 || (decl && ! TREE_PUBLIC (decl))
9568 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
9569 }
9570 \f
9571 /* Machine specific built-in functions. */
9572
9573 struct builtin_description
9574 {
9575 const enum insn_code icode;
9576 const char *const name;
9577 int signature;
9578 };
9579
9580 /* describe number and signedness of arguments; arg[0] == result
9581 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9582 /* 9: 64 bit pointer, 10: 32 bit pointer */
9583 static const char signature_args[][4] =
9584 {
9585 #define SH_BLTIN_V2SI2 0
9586 { 4, 4 },
9587 #define SH_BLTIN_V4HI2 1
9588 { 4, 4 },
9589 #define SH_BLTIN_V2SI3 2
9590 { 4, 4, 4 },
9591 #define SH_BLTIN_V4HI3 3
9592 { 4, 4, 4 },
9593 #define SH_BLTIN_V8QI3 4
9594 { 4, 4, 4 },
9595 #define SH_BLTIN_MAC_HISI 5
9596 { 1, 4, 4, 1 },
9597 #define SH_BLTIN_SH_HI 6
9598 { 4, 4, 1 },
9599 #define SH_BLTIN_SH_SI 7
9600 { 4, 4, 1 },
9601 #define SH_BLTIN_V4HI2V2SI 8
9602 { 4, 4, 4 },
9603 #define SH_BLTIN_V4HI2V8QI 9
9604 { 4, 4, 4 },
9605 #define SH_BLTIN_SISF 10
9606 { 4, 2 },
9607 #define SH_BLTIN_LDUA_L 11
9608 { 2, 10 },
9609 #define SH_BLTIN_LDUA_Q 12
9610 { 1, 10 },
9611 #define SH_BLTIN_STUA_L 13
9612 { 0, 10, 2 },
9613 #define SH_BLTIN_STUA_Q 14
9614 { 0, 10, 1 },
9615 #define SH_BLTIN_LDUA_L64 15
9616 { 2, 9 },
9617 #define SH_BLTIN_LDUA_Q64 16
9618 { 1, 9 },
9619 #define SH_BLTIN_STUA_L64 17
9620 { 0, 9, 2 },
9621 #define SH_BLTIN_STUA_Q64 18
9622 { 0, 9, 1 },
9623 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
9624 #define SH_BLTIN_2 19
9625 #define SH_BLTIN_SU 19
9626 { 1, 2 },
9627 #define SH_BLTIN_3 20
9628 #define SH_BLTIN_SUS 20
9629 { 2, 2, 1 },
9630 #define SH_BLTIN_PSSV 21
9631 { 0, 8, 2, 2 },
9632 #define SH_BLTIN_XXUU 22
9633 #define SH_BLTIN_UUUU 22
9634 { 1, 1, 1, 1 },
9635 #define SH_BLTIN_PV 23
9636 { 0, 8 },
9637 };
9638 /* mcmv: operands considered unsigned. */
9639 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9640 /* mperm: control value considered unsigned int. */
9641 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9642 /* mshards_q: returns signed short. */
9643 /* nsb: takes long long arg, returns unsigned char. */
9644 static const struct builtin_description bdesc[] =
9645 {
9646 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9647 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9648 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9649 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9650 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9651 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9652 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9653 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9654 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9655 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9656 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9657 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9658 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9659 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9660 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9661 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9662 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9663 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9664 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3 },
9665 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3 },
9666 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3 },
9667 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3 },
9668 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3 },
9669 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3 },
9670 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3 },
9671 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9672 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9673 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9674 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9675 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9676 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9677 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9678 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9679 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9680 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9681 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9682 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9683 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9684 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9685 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9686 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9687 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
9688 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
9689 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
9690 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
9691 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
9692 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
9693 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
9694 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
9695 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
9696 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
9697 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
9698 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
9699 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
9700 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
9701 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
9702 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
9703 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
9704 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9705 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9706 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9707 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3 },
9708 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2 },
9709 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2 },
9710 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
9711 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9712 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9713 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9714 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9715 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9716 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9717 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9718 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9719 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64 },
9720 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64 },
9721 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64 },
9722 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64 },
9723 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64 },
9724 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64 },
9725 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64 },
9726 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64 },
9727 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
9728 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
9729 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
9730 };
9731
9732 static void
9733 sh_media_init_builtins (void)
9734 {
9735 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
9736 const struct builtin_description *d;
9737
9738 memset (shared, 0, sizeof shared);
9739 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
9740 {
9741 tree type, arg_type = 0;
9742 int signature = d->signature;
9743 int i;
9744
9745 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
9746 type = shared[signature];
9747 else
9748 {
9749 int has_result = signature_args[signature][0] != 0;
9750
9751 if ((signature_args[signature][1] & 8)
9752 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
9753 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
9754 continue;
9755 if (! TARGET_FPU_ANY
9756 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
9757 continue;
9758 type = void_list_node;
9759 for (i = 3; ; i--)
9760 {
9761 int arg = signature_args[signature][i];
9762 int opno = i - 1 + has_result;
9763
9764 if (arg & 8)
9765 arg_type = ptr_type_node;
9766 else if (arg)
9767 arg_type = (*lang_hooks.types.type_for_mode)
9768 (insn_data[d->icode].operand[opno].mode,
9769 (arg & 1));
9770 else if (i)
9771 continue;
9772 else
9773 arg_type = void_type_node;
9774 if (i == 0)
9775 break;
9776 type = tree_cons (NULL_TREE, arg_type, type);
9777 }
9778 type = build_function_type (arg_type, type);
9779 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
9780 shared[signature] = type;
9781 }
9782 lang_hooks.builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
9783 NULL, NULL_TREE);
9784 }
9785 }
9786
9787 /* Implements target hook vector_mode_supported_p. */
9788 bool
9789 sh_vector_mode_supported_p (enum machine_mode mode)
9790 {
9791 if (TARGET_FPU_ANY
9792 && ((mode == V2SFmode)
9793 || (mode == V4SFmode)
9794 || (mode == V16SFmode)))
9795 return true;
9796
9797 else if (TARGET_SHMEDIA
9798 && ((mode == V8QImode)
9799 || (mode == V2HImode)
9800 || (mode == V4HImode)
9801 || (mode == V2SImode)))
9802 return true;
9803
9804 return false;
9805 }
9806
9807 /* Implements target hook dwarf_calling_convention. Return an enum
9808 of dwarf_calling_convention. */
9809 int
9810 sh_dwarf_calling_convention (tree func)
9811 {
9812 if (sh_attr_renesas_p (func))
9813 return DW_CC_GNU_renesas_sh;
9814
9815 return DW_CC_normal;
9816 }
9817
9818 static void
9819 sh_init_builtins (void)
9820 {
9821 if (TARGET_SHMEDIA)
9822 sh_media_init_builtins ();
9823 }
9824
9825 /* Expand an expression EXP that calls a built-in function,
9826 with result going to TARGET if that's convenient
9827 (and in mode MODE if that's convenient).
9828 SUBTARGET may be used as the target for computing one of EXP's operands.
9829 IGNORE is nonzero if the value is to be ignored. */
9830
9831 static rtx
9832 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9833 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
9834 {
9835 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
9836 tree arglist = TREE_OPERAND (exp, 1);
9837 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9838 const struct builtin_description *d = &bdesc[fcode];
9839 enum insn_code icode = d->icode;
9840 int signature = d->signature;
9841 enum machine_mode tmode = VOIDmode;
9842 int nop = 0, i;
9843 rtx op[4];
9844 rtx pat = 0;
9845
9846 if (signature_args[signature][0])
9847 {
9848 if (ignore)
9849 return 0;
9850
9851 tmode = insn_data[icode].operand[0].mode;
9852 if (! target
9853 || GET_MODE (target) != tmode
9854 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9855 target = gen_reg_rtx (tmode);
9856 op[nop++] = target;
9857 }
9858 else
9859 target = 0;
9860
9861 for (i = 1; i <= 3; i++, nop++)
9862 {
9863 tree arg;
9864 enum machine_mode opmode, argmode;
9865 tree optype;
9866
9867 if (! signature_args[signature][i])
9868 break;
9869 arg = TREE_VALUE (arglist);
9870 if (arg == error_mark_node)
9871 return const0_rtx;
9872 arglist = TREE_CHAIN (arglist);
9873 if (signature_args[signature][i] & 8)
9874 {
9875 opmode = ptr_mode;
9876 optype = ptr_type_node;
9877 }
9878 else
9879 {
9880 opmode = insn_data[icode].operand[nop].mode;
9881 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
9882 }
9883 argmode = TYPE_MODE (TREE_TYPE (arg));
9884 if (argmode != opmode)
9885 arg = build1 (NOP_EXPR, optype, arg);
9886 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
9887 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
9888 op[nop] = copy_to_mode_reg (opmode, op[nop]);
9889 }
9890
9891 switch (nop)
9892 {
9893 case 1:
9894 pat = (*insn_data[d->icode].genfun) (op[0]);
9895 break;
9896 case 2:
9897 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
9898 break;
9899 case 3:
9900 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
9901 break;
9902 case 4:
9903 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
9904 break;
9905 default:
9906 gcc_unreachable ();
9907 }
9908 if (! pat)
9909 return 0;
9910 emit_insn (pat);
9911 return target;
9912 }
9913
9914 void
9915 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
9916 {
9917 rtx sel0 = const0_rtx;
9918 rtx sel1 = const1_rtx;
9919 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
9920 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
9921
9922 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
9923 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
9924 }
9925
9926 void
9927 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
9928 {
9929 rtx sel0 = const0_rtx;
9930 rtx sel1 = const1_rtx;
9931 rtx (*fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
9932 = gen_binary_sf_op;
9933 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
9934
9935 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
9936 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
9937 }
9938
9939 /* Return the class of registers for which a mode change from FROM to TO
9940 is invalid. */
9941 bool
9942 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9943 enum reg_class class)
9944 {
9945 /* We want to enable the use of SUBREGs as a means to
9946 VEC_SELECT a single element of a vector. */
9947 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
9948 return (reg_classes_intersect_p (GENERAL_REGS, class));
9949
9950 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
9951 {
9952 if (TARGET_LITTLE_ENDIAN)
9953 {
9954 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
9955 return reg_classes_intersect_p (DF_REGS, class);
9956 }
9957 else
9958 {
9959 if (GET_MODE_SIZE (from) < 8)
9960 return reg_classes_intersect_p (DF_HI_REGS, class);
9961 }
9962 }
9963 return 0;
9964 }
9965
9966
9967 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
9968 that label is used. */
9969
9970 void
9971 sh_mark_label (rtx address, int nuses)
9972 {
9973 if (GOTOFF_P (address))
9974 {
9975 /* Extract the label or symbol. */
9976 address = XEXP (address, 0);
9977 if (GET_CODE (address) == PLUS)
9978 address = XEXP (address, 0);
9979 address = XVECEXP (address, 0, 0);
9980 }
9981 if (GET_CODE (address) == LABEL_REF
9982 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
9983 LABEL_NUSES (XEXP (address, 0)) += nuses;
9984 }
9985
9986 /* Compute extra cost of moving data between one register class
9987 and another. */
9988
9989 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
9990 uses this information. Hence, the general register <-> floating point
9991 register information here is not used for SFmode. */
9992
9993 int
9994 sh_register_move_cost (enum machine_mode mode,
9995 enum reg_class srcclass, enum reg_class dstclass)
9996 {
9997 if (dstclass == T_REGS || dstclass == PR_REGS)
9998 return 10;
9999
10000 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
10001 return 4;
10002
10003 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
10004 && REGCLASS_HAS_FP_REG (srcclass)
10005 && REGCLASS_HAS_FP_REG (dstclass))
10006 return 4;
10007
10008 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
10009 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
10010 return 9;
10011
10012 if ((REGCLASS_HAS_FP_REG (dstclass)
10013 && REGCLASS_HAS_GENERAL_REG (srcclass))
10014 || (REGCLASS_HAS_GENERAL_REG (dstclass)
10015 && REGCLASS_HAS_FP_REG (srcclass)))
10016 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
10017 * ((GET_MODE_SIZE (mode) + 7) / 8U));
10018
10019 if ((dstclass == FPUL_REGS
10020 && REGCLASS_HAS_GENERAL_REG (srcclass))
10021 || (srcclass == FPUL_REGS
10022 && REGCLASS_HAS_GENERAL_REG (dstclass)))
10023 return 5;
10024
10025 if ((dstclass == FPUL_REGS
10026 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
10027 || (srcclass == FPUL_REGS
10028 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
10029 return 7;
10030
10031 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10032 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10033 return 20;
10034
10035 /* ??? ptabs faults on (value & 0x3) == 0x3 */
10036 if (TARGET_SHMEDIA
10037 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
10038 {
10039 if (*sh_gettrcost_str)
10040 return atoi (sh_gettrcost_str);
10041 else if (!TARGET_PT_FIXED)
10042 return 100;
10043 }
10044
10045 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10046 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10047 return 4;
10048
10049 if (TARGET_SHMEDIA
10050 || (TARGET_FMOVD
10051 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
10052 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
10053 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
10054
10055 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
10056 }
10057
10058 /* Like register_operand, but take into account that SHMEDIA can use
10059 the constant zero like a general register. */
10060 int
10061 sh_register_operand (rtx op, enum machine_mode mode)
10062 {
10063 if (op == CONST0_RTX (mode) && TARGET_SHMEDIA)
10064 return 1;
10065 return register_operand (op, mode);
10066 }
10067
10068 int
10069 cmpsi_operand (rtx op, enum machine_mode mode)
10070 {
10071 if (GET_CODE (op) == REG && REGNO (op) == T_REG
10072 && GET_MODE (op) == SImode
10073 && TARGET_SH1)
10074 return 1;
10075 return arith_operand (op, mode);
10076 }
10077
10078 int
10079 shift_count_reg_operand (rtx op, enum machine_mode mode)
10080 {
10081 if ((GET_CODE (op) == ZERO_EXTEND || GET_CODE (op) == SIGN_EXTEND
10082 || (GET_CODE (op) == SUBREG && SUBREG_BYTE (op) == 0))
10083 && (mode == VOIDmode || mode == GET_MODE (op))
10084 && GET_MODE_BITSIZE (GET_MODE (XEXP (op, 0))) >= 6
10085 && GET_MODE_CLASS (GET_MODE (XEXP (op, 0))) == MODE_INT)
10086 {
10087 mode = VOIDmode;
10088 do
10089 op = XEXP (op, 0);
10090 while ((GET_CODE (op) == ZERO_EXTEND || GET_CODE (op) == SIGN_EXTEND
10091 || GET_CODE (op) == TRUNCATE)
10092 && GET_MODE_BITSIZE (GET_MODE (XEXP (op, 0))) >= 6
10093 && GET_MODE_CLASS (GET_MODE (XEXP (op, 0))) == MODE_INT);
10094
10095 }
10096 return arith_reg_operand (op, mode);
10097 }
10098
10099 int
10100 shift_count_operand (rtx op, enum machine_mode mode)
10101 {
10102 return (CONSTANT_P (op)
10103 ? (GET_CODE (op) == CONST_INT
10104 ? (unsigned) INTVAL (op) < GET_MODE_BITSIZE (mode)
10105 : nonmemory_operand (op, mode))
10106 : shift_count_reg_operand (op, mode));
10107 }
10108
10109 static rtx emit_load_ptr (rtx, rtx);
10110
10111 static rtx
10112 emit_load_ptr (rtx reg, rtx addr)
10113 {
10114 rtx mem = gen_rtx_MEM (ptr_mode, addr);
10115
10116 if (Pmode != ptr_mode)
10117 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
10118 return emit_move_insn (reg, mem);
10119 }
10120
10121 static void
10122 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
10123 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10124 tree function)
10125 {
10126 CUMULATIVE_ARGS cum;
10127 int structure_value_byref = 0;
10128 rtx this, this_value, sibcall, insns, funexp;
10129 tree funtype = TREE_TYPE (function);
10130 int simple_add = CONST_OK_FOR_ADD (delta);
10131 int did_load = 0;
10132 rtx scratch0, scratch1, scratch2;
10133 unsigned i;
10134
10135 reload_completed = 1;
10136 epilogue_completed = 1;
10137 no_new_pseudos = 1;
10138 current_function_uses_only_leaf_regs = 1;
10139 reset_block_changes ();
10140
10141 emit_note (NOTE_INSN_PROLOGUE_END);
10142
10143 /* Find the "this" pointer. We have such a wide range of ABIs for the
10144 SH that it's best to do this completely machine independently.
10145 "this" is passed as first argument, unless a structure return pointer
10146 comes first, in which case "this" comes second. */
10147 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
10148 #ifndef PCC_STATIC_STRUCT_RETURN
10149 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
10150 structure_value_byref = 1;
10151 #endif /* not PCC_STATIC_STRUCT_RETURN */
10152 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
10153 {
10154 tree ptype = build_pointer_type (TREE_TYPE (funtype));
10155
10156 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
10157 }
10158 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
10159
10160 /* For SHcompact, we only have r0 for a scratch register: r1 is the
10161 static chain pointer (even if you can't have nested virtual functions
10162 right now, someone might implement them sometime), and the rest of the
10163 registers are used for argument passing, are callee-saved, or reserved. */
10164 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
10165 -ffixed-reg has been used. */
10166 if (! call_used_regs[0] || fixed_regs[0])
10167 error ("r0 needs to be available as a call-clobbered register");
10168 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
10169 if (! TARGET_SH5)
10170 {
10171 if (call_used_regs[1] && ! fixed_regs[1])
10172 scratch1 = gen_rtx_REG (ptr_mode, 1);
10173 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
10174 pointing where to return struct values. */
10175 if (call_used_regs[3] && ! fixed_regs[3])
10176 scratch2 = gen_rtx_REG (Pmode, 3);
10177 }
10178 else if (TARGET_SHMEDIA)
10179 {
10180 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
10181 if (i != REGNO (scratch0) &&
10182 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
10183 {
10184 scratch1 = gen_rtx_REG (ptr_mode, i);
10185 break;
10186 }
10187 if (scratch1 == scratch0)
10188 error ("Need a second call-clobbered general purpose register");
10189 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
10190 if (call_used_regs[i] && ! fixed_regs[i])
10191 {
10192 scratch2 = gen_rtx_REG (Pmode, i);
10193 break;
10194 }
10195 if (scratch2 == scratch0)
10196 error ("Need a call-clobbered target register");
10197 }
10198
10199 this_value = plus_constant (this, delta);
10200 if (vcall_offset
10201 && (simple_add || scratch0 != scratch1)
10202 && strict_memory_address_p (ptr_mode, this_value))
10203 {
10204 emit_load_ptr (scratch0, this_value);
10205 did_load = 1;
10206 }
10207
10208 if (!delta)
10209 ; /* Do nothing. */
10210 else if (simple_add)
10211 emit_move_insn (this, this_value);
10212 else
10213 {
10214 emit_move_insn (scratch1, GEN_INT (delta));
10215 emit_insn (gen_add2_insn (this, scratch1));
10216 }
10217
10218 if (vcall_offset)
10219 {
10220 rtx offset_addr;
10221
10222 if (!did_load)
10223 emit_load_ptr (scratch0, this);
10224
10225 offset_addr = plus_constant (scratch0, vcall_offset);
10226 if (strict_memory_address_p (ptr_mode, offset_addr))
10227 ; /* Do nothing. */
10228 else if (! TARGET_SH5 && scratch0 != scratch1)
10229 {
10230 /* scratch0 != scratch1, and we have indexed loads. Get better
10231 schedule by loading the offset into r1 and using an indexed
10232 load - then the load of r1 can issue before the load from
10233 (this + delta) finishes. */
10234 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10235 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
10236 }
10237 else if (CONST_OK_FOR_ADD (vcall_offset))
10238 {
10239 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
10240 offset_addr = scratch0;
10241 }
10242 else if (scratch0 != scratch1)
10243 {
10244 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10245 emit_insn (gen_add2_insn (scratch0, scratch1));
10246 offset_addr = scratch0;
10247 }
10248 else
10249 gcc_unreachable (); /* FIXME */
10250 emit_load_ptr (scratch0, offset_addr);
10251
10252 if (Pmode != ptr_mode)
10253 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
10254 emit_insn (gen_add2_insn (this, scratch0));
10255 }
10256
10257 /* Generate a tail call to the target function. */
10258 if (! TREE_USED (function))
10259 {
10260 assemble_external (function);
10261 TREE_USED (function) = 1;
10262 }
10263 funexp = XEXP (DECL_RTL (function), 0);
10264 /* If the function is overridden, so is the thunk, hence we don't
10265 need GOT addressing even if this is a public symbol. */
10266 #if 0
10267 if (TARGET_SH1 && ! flag_weak)
10268 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
10269 else
10270 #endif
10271 if (TARGET_SH2 && flag_pic)
10272 {
10273 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
10274 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
10275 }
10276 else
10277 {
10278 if (TARGET_SHMEDIA && flag_pic)
10279 {
10280 funexp = gen_sym2PIC (funexp);
10281 PUT_MODE (funexp, Pmode);
10282 }
10283 emit_move_insn (scratch2, funexp);
10284 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
10285 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
10286 }
10287 sibcall = emit_call_insn (sibcall);
10288 SIBLING_CALL_P (sibcall) = 1;
10289 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
10290 emit_barrier ();
10291
10292 /* Run just enough of rest_of_compilation to do scheduling and get
10293 the insns emitted. Note that use_thunk calls
10294 assemble_start_function and assemble_end_function. */
10295
10296 insn_locators_initialize ();
10297 insns = get_insns ();
10298
10299 if (optimize > 0 && flag_schedule_insns_after_reload)
10300 {
10301 /* Initialize the bitmap obstacks. */
10302 bitmap_obstack_initialize (NULL);
10303 bitmap_obstack_initialize (&reg_obstack);
10304 if (! basic_block_info)
10305 init_flow ();
10306 rtl_register_cfg_hooks ();
10307 find_basic_blocks (insns);
10308 life_analysis (dump_file, PROP_FINAL);
10309
10310 split_all_insns (1);
10311
10312 schedule_insns (dump_file);
10313 }
10314
10315 sh_reorg ();
10316
10317 if (optimize > 0 && flag_delayed_branch)
10318 dbr_schedule (insns, dump_file);
10319 shorten_branches (insns);
10320 final_start_function (insns, file, 1);
10321 final (insns, file, 1);
10322 final_end_function ();
10323
10324 if (optimize > 0 && flag_schedule_insns_after_reload)
10325 {
10326 /* Release all memory allocated by flow. */
10327 free_basic_block_vars ();
10328
10329 /* Release the bitmap obstacks. */
10330 bitmap_obstack_release (&reg_obstack);
10331 bitmap_obstack_release (NULL);
10332 }
10333
10334 reload_completed = 0;
10335 epilogue_completed = 0;
10336 no_new_pseudos = 0;
10337 }
10338
10339 rtx
10340 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
10341 {
10342 rtx sym;
10343
10344 /* If this is not an ordinary function, the name usually comes from a
10345 string literal or an sprintf buffer. Make sure we use the same
10346 string consistently, so that cse will be able to unify address loads. */
10347 if (kind != FUNCTION_ORDINARY)
10348 name = IDENTIFIER_POINTER (get_identifier (name));
10349 sym = gen_rtx_SYMBOL_REF (Pmode, name);
10350 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
10351 if (flag_pic)
10352 switch (kind)
10353 {
10354 case FUNCTION_ORDINARY:
10355 break;
10356 case SFUNC_GOT:
10357 {
10358 rtx reg = target ? target : gen_reg_rtx (Pmode);
10359
10360 emit_insn (gen_symGOT2reg (reg, sym));
10361 sym = reg;
10362 break;
10363 }
10364 case SFUNC_STATIC:
10365 {
10366 /* ??? To allow cse to work, we use GOTOFF relocations.
10367 we could add combiner patterns to transform this into
10368 straight pc-relative calls with sym2PIC / bsrf when
10369 label load and function call are still 1:1 and in the
10370 same basic block during combine. */
10371 rtx reg = target ? target : gen_reg_rtx (Pmode);
10372
10373 emit_insn (gen_symGOTOFF2reg (reg, sym));
10374 sym = reg;
10375 break;
10376 }
10377 }
10378 if (target && sym != target)
10379 {
10380 emit_move_insn (target, sym);
10381 return target;
10382 }
10383 return sym;
10384 }
10385
10386 /* Find the number of a general purpose register in S. */
10387 static int
10388 scavenge_reg (HARD_REG_SET *s)
10389 {
10390 int r;
10391 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
10392 if (TEST_HARD_REG_BIT (*s, r))
10393 return r;
10394 return -1;
10395 }
10396
10397 rtx
10398 sh_get_pr_initial_val (void)
10399 {
10400 rtx val;
10401
10402 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
10403 PR register on SHcompact, because it might be clobbered by the prologue.
10404 We check first if that is known to be the case. */
10405 if (TARGET_SHCOMPACT
10406 && ((current_function_args_info.call_cookie
10407 & ~ CALL_COOKIE_RET_TRAMP (1))
10408 || current_function_has_nonlocal_label))
10409 return gen_rtx_MEM (SImode, return_address_pointer_rtx);
10410
10411 /* If we haven't finished rtl generation, there might be a nonlocal label
10412 that we haven't seen yet.
10413 ??? get_hard_reg_initial_val fails if it is called while no_new_pseudos
10414 is set, unless it has been called before for the same register. And even
10415 then, we end in trouble if we didn't use the register in the same
10416 basic block before. So call get_hard_reg_initial_val now and wrap it
10417 in an unspec if we might need to replace it. */
10418 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
10419 combine can put the pseudo returned by get_hard_reg_initial_val into
10420 instructions that need a general purpose registers, which will fail to
10421 be recognized when the pseudo becomes allocated to PR. */
10422 val
10423 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10424 if (TARGET_SH1)
10425 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
10426 return val;
10427 }
10428
10429 int
10430 sh_expand_t_scc (enum rtx_code code, rtx target)
10431 {
10432 rtx result = target;
10433 HOST_WIDE_INT val;
10434
10435 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
10436 || GET_CODE (sh_compare_op1) != CONST_INT)
10437 return 0;
10438 if (GET_CODE (result) != REG)
10439 result = gen_reg_rtx (SImode);
10440 val = INTVAL (sh_compare_op1);
10441 if ((code == EQ && val == 1) || (code == NE && val == 0))
10442 emit_insn (gen_movt (result));
10443 else if ((code == EQ && val == 0) || (code == NE && val == 1))
10444 {
10445 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
10446 emit_insn (gen_subc (result, result, result));
10447 emit_insn (gen_addsi3 (result, result, const1_rtx));
10448 }
10449 else if (code == EQ || code == NE)
10450 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
10451 else
10452 return 0;
10453 if (result != target)
10454 emit_move_insn (target, result);
10455 return 1;
10456 }
10457
10458 /* INSN is an sfunc; return the rtx that describes the address used. */
10459 static rtx
10460 extract_sfunc_addr (rtx insn)
10461 {
10462 rtx pattern, part = NULL_RTX;
10463 int len, i;
10464
10465 pattern = PATTERN (insn);
10466 len = XVECLEN (pattern, 0);
10467 for (i = 0; i < len; i++)
10468 {
10469 part = XVECEXP (pattern, 0, i);
10470 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
10471 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
10472 return XEXP (part, 0);
10473 }
10474 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
10475 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
10476 }
10477
10478 /* Verify that the register in use_sfunc_addr still agrees with the address
10479 used in the sfunc. This prevents fill_slots_from_thread from changing
10480 use_sfunc_addr.
10481 INSN is the use_sfunc_addr instruction, and REG is the register it
10482 guards. */
10483 int
10484 check_use_sfunc_addr (rtx insn, rtx reg)
10485 {
10486 /* Search for the sfunc. It should really come right after INSN. */
10487 while ((insn = NEXT_INSN (insn)))
10488 {
10489 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
10490 break;
10491 if (! INSN_P (insn))
10492 continue;
10493
10494 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
10495 insn = XVECEXP (PATTERN (insn), 0, 0);
10496 if (GET_CODE (PATTERN (insn)) != PARALLEL
10497 || get_attr_type (insn) != TYPE_SFUNC)
10498 continue;
10499 return rtx_equal_p (extract_sfunc_addr (insn), reg);
10500 }
10501 gcc_unreachable ();
10502 }
10503
10504 /* Returns 1 if OP is a MEM that can be source of a simple move operation. */
10505
10506 int
10507 unaligned_load_operand (rtx op, enum machine_mode mode)
10508 {
10509 rtx inside;
10510
10511 if (GET_CODE (op) != MEM || GET_MODE (op) != mode)
10512 return 0;
10513
10514 inside = XEXP (op, 0);
10515
10516 if (GET_CODE (inside) == POST_INC)
10517 inside = XEXP (inside, 0);
10518
10519 if (GET_CODE (inside) == REG)
10520 return 1;
10521
10522 return 0;
10523 }
10524
10525 /* This function returns a constant rtx that represents pi / 2**15 in
10526 SFmode. it's used to scale SFmode angles, in radians, to a
10527 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10528 maps to 0x10000). */
10529
10530 static GTY(()) rtx sh_fsca_sf2int_rtx;
10531
10532 rtx
10533 sh_fsca_sf2int (void)
10534 {
10535 if (! sh_fsca_sf2int_rtx)
10536 {
10537 REAL_VALUE_TYPE rv;
10538
10539 real_from_string (&rv, "10430.378350470453");
10540 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
10541 }
10542
10543 return sh_fsca_sf2int_rtx;
10544 }
10545
10546 /* This function returns a constant rtx that represents pi / 2**15 in
10547 DFmode. it's used to scale DFmode angles, in radians, to a
10548 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10549 maps to 0x10000). */
10550
10551 static GTY(()) rtx sh_fsca_df2int_rtx;
10552
10553 rtx
10554 sh_fsca_df2int (void)
10555 {
10556 if (! sh_fsca_df2int_rtx)
10557 {
10558 REAL_VALUE_TYPE rv;
10559
10560 real_from_string (&rv, "10430.378350470453");
10561 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
10562 }
10563
10564 return sh_fsca_df2int_rtx;
10565 }
10566
10567 /* This function returns a constant rtx that represents 2**15 / pi in
10568 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
10569 of a full circle back to a SFmode value, i.e., 0x10000 maps to
10570 2*pi). */
10571
10572 static GTY(()) rtx sh_fsca_int2sf_rtx;
10573
10574 rtx
10575 sh_fsca_int2sf (void)
10576 {
10577 if (! sh_fsca_int2sf_rtx)
10578 {
10579 REAL_VALUE_TYPE rv;
10580
10581 real_from_string (&rv, "9.587379924285257e-5");
10582 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
10583 }
10584
10585 return sh_fsca_int2sf_rtx;
10586 }
10587
10588 /* Initialize the CUMULATIVE_ARGS structure. */
10589
10590 void
10591 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
10592 tree fntype,
10593 rtx libname ATTRIBUTE_UNUSED,
10594 tree fndecl,
10595 signed int n_named_args,
10596 enum machine_mode mode)
10597 {
10598 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
10599 pcum->free_single_fp_reg = 0;
10600 pcum->stack_regs = 0;
10601 pcum->byref_regs = 0;
10602 pcum->byref = 0;
10603 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
10604
10605 /* XXX - Should we check TARGET_HITACHI here ??? */
10606 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
10607
10608 if (fntype)
10609 {
10610 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
10611 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
10612 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
10613 pcum->arg_count [(int) SH_ARG_INT]
10614 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
10615
10616 pcum->call_cookie
10617 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10618 && pcum->arg_count [(int) SH_ARG_INT] == 0
10619 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
10620 ? int_size_in_bytes (TREE_TYPE (fntype))
10621 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
10622 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
10623 == FIRST_RET_REG));
10624 }
10625 else
10626 {
10627 pcum->arg_count [(int) SH_ARG_INT] = 0;
10628 pcum->prototype_p = FALSE;
10629 if (mode != VOIDmode)
10630 {
10631 pcum->call_cookie =
10632 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10633 && GET_MODE_SIZE (mode) > 4
10634 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
10635
10636 /* If the default ABI is the Renesas ABI then all library
10637 calls must assume that the library will be using the
10638 Renesas ABI. So if the function would return its result
10639 in memory then we must force the address of this memory
10640 block onto the stack. Ideally we would like to call
10641 targetm.calls.return_in_memory() here but we do not have
10642 the TYPE or the FNDECL available so we synthesize the
10643 contents of that function as best we can. */
10644 pcum->force_mem =
10645 (TARGET_DEFAULT & HITACHI_BIT)
10646 && (mode == BLKmode
10647 || (GET_MODE_SIZE (mode) > 4
10648 && !(mode == DFmode
10649 && TARGET_FPU_DOUBLE)));
10650 }
10651 else
10652 {
10653 pcum->call_cookie = 0;
10654 pcum->force_mem = FALSE;
10655 }
10656 }
10657 }
10658
10659 /* Determine if two hard register sets intersect.
10660 Return 1 if they do. */
10661
10662 static int
10663 hard_regs_intersect_p (HARD_REG_SET *a, HARD_REG_SET *b)
10664 {
10665 HARD_REG_SET c;
10666 COPY_HARD_REG_SET (c, *a);
10667 AND_HARD_REG_SET (c, *b);
10668 GO_IF_HARD_REG_SUBSET (c, reg_class_contents[(int) NO_REGS], lose);
10669 return 1;
10670 lose:
10671 return 0;
10672 }
10673
10674 #ifdef TARGET_ADJUST_UNROLL_MAX
10675 static int
10676 sh_adjust_unroll_max (struct loop * loop, int insn_count,
10677 int max_unrolled_insns, int strength_reduce_p,
10678 int unroll_type)
10679 {
10680 /* This doesn't work in 4.0 because the old unroller & loop.h is gone. */
10681 if (TARGET_ADJUST_UNROLL && TARGET_SHMEDIA)
10682 {
10683 /* Throttle back loop unrolling so that the costs of using more
10684 targets than the eight target register we have don't outweigh
10685 the benefits of unrolling. */
10686 rtx insn;
10687 int n_labels = 0, n_calls = 0, n_exit_dest = 0, n_inner_loops = -1;
10688 int n_barriers = 0;
10689 rtx dest;
10690 int i;
10691 rtx exit_dest[8];
10692 int threshold;
10693 int unroll_benefit = 0, mem_latency = 0;
10694 int base_cost, best_cost, cost;
10695 int factor, best_factor;
10696 int n_dest;
10697 unsigned max_iterations = 32767;
10698 int n_iterations;
10699 int need_precond = 0, precond = 0;
10700 basic_block * bbs = get_loop_body (loop);
10701 struct niter_desc *desc;
10702
10703 /* Assume that all labels inside the loop are used from inside the
10704 loop. If the loop has multiple entry points, it is unlikely to
10705 be unrolled anyways.
10706 Also assume that all calls are to different functions. That is
10707 somewhat pessimistic, but if you have lots of calls, unrolling the
10708 loop is not likely to gain you much in the first place. */
10709 i = loop->num_nodes - 1;
10710 for (insn = BB_HEAD (bbs[i]); ; )
10711 {
10712 if (GET_CODE (insn) == CODE_LABEL)
10713 n_labels++;
10714 else if (GET_CODE (insn) == CALL_INSN)
10715 n_calls++;
10716 else if (GET_CODE (insn) == NOTE
10717 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
10718 n_inner_loops++;
10719 else if (GET_CODE (insn) == BARRIER)
10720 n_barriers++;
10721 if (insn != BB_END (bbs[i]))
10722 insn = NEXT_INSN (insn);
10723 else if (--i >= 0)
10724 insn = BB_HEAD (bbs[i]);
10725 else
10726 break;
10727 }
10728 free (bbs);
10729 /* One label for the loop top is normal, and it won't be duplicated by
10730 unrolling. */
10731 if (n_labels <= 1)
10732 return max_unrolled_insns;
10733 if (n_inner_loops > 0)
10734 return 0;
10735 for (dest = loop->exit_labels; dest && n_exit_dest < 8;
10736 dest = LABEL_NEXTREF (dest))
10737 {
10738 for (i = n_exit_dest - 1;
10739 i >= 0 && XEXP (dest, 0) != XEXP (exit_dest[i], 0); i--);
10740 if (i < 0)
10741 exit_dest[n_exit_dest++] = dest;
10742 }
10743 /* If the loop top and call and exit destinations are enough to fill up
10744 the target registers, we're unlikely to do any more damage by
10745 unrolling. */
10746 if (n_calls + n_exit_dest >= 7)
10747 return max_unrolled_insns;
10748
10749 /* ??? In the new loop unroller, there is no longer any strength
10750 reduction information available. Thus, when it comes to unrolling,
10751 we know the cost of everything, but we know the value of nothing. */
10752 #if 0
10753 if (strength_reduce_p
10754 && (unroll_type == LPT_UNROLL_RUNTIME
10755 || unroll_type == LPT_UNROLL_CONSTANT
10756 || unroll_type == LPT_PEEL_COMPLETELY))
10757 {
10758 struct loop_ivs *ivs = LOOP_IVS (loop);
10759 struct iv_class *bl;
10760
10761 /* We'll save one compare-and-branch in each loop body copy
10762 but the last one. */
10763 unroll_benefit = 1;
10764 /* Assess the benefit of removing biv & giv updates. */
10765 for (bl = ivs->list; bl; bl = bl->next)
10766 {
10767 rtx increment = biv_total_increment (bl);
10768 struct induction *v;
10769
10770 if (increment && GET_CODE (increment) == CONST_INT)
10771 {
10772 unroll_benefit++;
10773 for (v = bl->giv; v; v = v->next_iv)
10774 {
10775 if (! v->ignore && v->same == 0
10776 && GET_CODE (v->mult_val) == CONST_INT)
10777 unroll_benefit++;
10778 /* If this giv uses an array, try to determine
10779 a maximum iteration count from the size of the
10780 array. This need not be correct all the time,
10781 but should not be too far off the mark too often. */
10782 while (v->giv_type == DEST_ADDR)
10783 {
10784 rtx mem = PATTERN (v->insn);
10785 tree mem_expr, type, size_tree;
10786
10787 if (GET_CODE (SET_SRC (mem)) == MEM)
10788 mem = SET_SRC (mem);
10789 else if (GET_CODE (SET_DEST (mem)) == MEM)
10790 mem = SET_DEST (mem);
10791 else
10792 break;
10793 mem_expr = MEM_EXPR (mem);
10794 if (! mem_expr)
10795 break;
10796 type = TREE_TYPE (mem_expr);
10797 if (TREE_CODE (type) != ARRAY_TYPE
10798 || ! TYPE_SIZE (type) || ! TYPE_SIZE_UNIT (type))
10799 break;
10800 size_tree = fold (build (TRUNC_DIV_EXPR,
10801 bitsizetype,
10802 TYPE_SIZE (type),
10803 TYPE_SIZE_UNIT (type)));
10804 if (TREE_CODE (size_tree) == INTEGER_CST
10805 && ! TREE_INT_CST_HIGH (size_tree)
10806 && TREE_INT_CST_LOW (size_tree) < max_iterations)
10807 max_iterations = TREE_INT_CST_LOW (size_tree);
10808 break;
10809 }
10810 }
10811 }
10812 }
10813 }
10814 #else /* 0 */
10815 /* Assume there is at least some benefit. */
10816 unroll_benefit = 1;
10817 #endif /* 0 */
10818
10819 desc = get_simple_loop_desc (loop);
10820 n_iterations = desc->const_iter ? desc->niter : 0;
10821 max_iterations
10822 = max_iterations < desc->niter_max ? max_iterations : desc->niter_max;
10823
10824 if (! strength_reduce_p || ! n_iterations)
10825 need_precond = 1;
10826 if (! n_iterations)
10827 {
10828 n_iterations
10829 = max_iterations < 3 ? max_iterations : max_iterations * 3 / 4;
10830 if (! n_iterations)
10831 return 0;
10832 }
10833 #if 0 /* ??? See above - missing induction variable information. */
10834 while (unroll_benefit > 1) /* no loop */
10835 {
10836 /* We include the benefit of biv/ giv updates. Check if some or
10837 all of these updates are likely to fit into a scheduling
10838 bubble of a load.
10839 We check for the following case:
10840 - All the insns leading to the first JUMP_INSN are in a strict
10841 dependency chain.
10842 - there is at least one memory reference in them.
10843
10844 When we find such a pattern, we assume that we can hide as many
10845 updates as the total of the load latency is, if we have an
10846 unroll factor of at least two. We might or might not also do
10847 this without unrolling, so rather than considering this as an
10848 extra unroll benefit, discount it in the unroll benefits of unroll
10849 factors higher than two. */
10850
10851 rtx set, last_set;
10852
10853 insn = next_active_insn (loop->start);
10854 last_set = single_set (insn);
10855 if (! last_set)
10856 break;
10857 if (GET_CODE (SET_SRC (last_set)) == MEM)
10858 mem_latency += 2;
10859 for (insn = NEXT_INSN (insn); insn != end; insn = NEXT_INSN (insn))
10860 {
10861 if (! INSN_P (insn))
10862 continue;
10863 if (GET_CODE (insn) == JUMP_INSN)
10864 break;
10865 if (! reg_referenced_p (SET_DEST (last_set), PATTERN (insn)))
10866 {
10867 /* Check if this is a to-be-reduced giv insn. */
10868 struct loop_ivs *ivs = LOOP_IVS (loop);
10869 struct iv_class *bl;
10870 struct induction *v;
10871 for (bl = ivs->list; bl; bl = bl->next)
10872 {
10873 if (bl->biv->insn == insn)
10874 goto is_biv;
10875 for (v = bl->giv; v; v = v->next_iv)
10876 if (v->insn == insn)
10877 goto is_giv;
10878 }
10879 mem_latency--;
10880 is_biv:
10881 is_giv:
10882 continue;
10883 }
10884 set = single_set (insn);
10885 if (! set)
10886 continue;
10887 if (GET_CODE (SET_SRC (set)) == MEM)
10888 mem_latency += 2;
10889 last_set = set;
10890 }
10891 if (mem_latency < 0)
10892 mem_latency = 0;
10893 else if (mem_latency > unroll_benefit - 1)
10894 mem_latency = unroll_benefit - 1;
10895 break;
10896 }
10897 #endif /* 0 */
10898 if (n_labels + (unroll_benefit + n_labels * 8) / n_iterations
10899 <= unroll_benefit)
10900 return max_unrolled_insns;
10901
10902 n_dest = n_labels + n_calls + n_exit_dest;
10903 base_cost = n_dest <= 8 ? 0 : n_dest - 7;
10904 best_cost = 0;
10905 best_factor = 1;
10906 if (n_barriers * 2 > n_labels - 1)
10907 n_barriers = (n_labels - 1) / 2;
10908 for (factor = 2; factor <= 8; factor++)
10909 {
10910 /* Bump up preconditioning cost for each power of two. */
10911 if (! (factor & (factor-1)))
10912 precond += 4;
10913 /* When preconditioning, only powers of two will be considered. */
10914 else if (need_precond)
10915 continue;
10916 n_dest = ((unroll_type != LPT_PEEL_COMPLETELY)
10917 + (n_labels - 1) * factor + n_calls + n_exit_dest
10918 - (n_barriers * factor >> 1)
10919 + need_precond);
10920 cost
10921 = ((n_dest <= 8 ? 0 : n_dest - 7)
10922 - base_cost * factor
10923 - ((factor > 2 ? unroll_benefit - mem_latency : unroll_benefit)
10924 * (factor - (unroll_type != LPT_PEEL_COMPLETELY)))
10925 + ((unroll_benefit + 1 + (n_labels - 1) * factor)
10926 / n_iterations));
10927 if (need_precond)
10928 cost += (precond + unroll_benefit * factor / 2) / n_iterations;
10929 if (cost < best_cost)
10930 {
10931 best_cost = cost;
10932 best_factor = factor;
10933 }
10934 }
10935 threshold = best_factor * insn_count;
10936 if (max_unrolled_insns > threshold)
10937 max_unrolled_insns = threshold;
10938 }
10939 return max_unrolled_insns;
10940 }
10941 #endif /* TARGET_ADJUST_UNROLL_MAX */
10942
10943 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
10944 not enter into CONST_DOUBLE for the replace.
10945
10946 Note that copying is not done so X must not be shared unless all copies
10947 are to be modified.
10948
10949 This is like replace_rtx, except that we operate on N_REPLACEMENTS
10950 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
10951 replacements[n*2+1] - and that we take mode changes into account.
10952
10953 If a replacement is ambiguous, return NULL_RTX.
10954
10955 If MODIFY is zero, don't modify any rtl in place,
10956 just return zero or nonzero for failure / success. */
10957
10958 rtx
10959 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
10960 {
10961 int i, j;
10962 const char *fmt;
10963
10964 /* The following prevents loops occurrence when we change MEM in
10965 CONST_DOUBLE onto the same CONST_DOUBLE. */
10966 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
10967 return x;
10968
10969 for (i = n_replacements - 1; i >= 0 ; i--)
10970 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
10971 return replacements[i*2+1];
10972
10973 /* Allow this function to make replacements in EXPR_LISTs. */
10974 if (x == 0)
10975 return 0;
10976
10977 if (GET_CODE (x) == SUBREG)
10978 {
10979 rtx new = replace_n_hard_rtx (SUBREG_REG (x), replacements,
10980 n_replacements, modify);
10981
10982 if (GET_CODE (new) == CONST_INT)
10983 {
10984 x = simplify_subreg (GET_MODE (x), new,
10985 GET_MODE (SUBREG_REG (x)),
10986 SUBREG_BYTE (x));
10987 if (! x)
10988 abort ();
10989 }
10990 else if (modify)
10991 SUBREG_REG (x) = new;
10992
10993 return x;
10994 }
10995 else if (GET_CODE (x) == REG)
10996 {
10997 unsigned regno = REGNO (x);
10998 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
10999 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
11000 rtx result = NULL_RTX;
11001
11002 for (i = n_replacements - 1; i >= 0; i--)
11003 {
11004 rtx from = replacements[i*2];
11005 rtx to = replacements[i*2+1];
11006 unsigned from_regno, from_nregs, to_regno, new_regno;
11007
11008 if (GET_CODE (from) != REG)
11009 continue;
11010 from_regno = REGNO (from);
11011 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
11012 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
11013 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
11014 {
11015 if (regno < from_regno
11016 || regno + nregs > from_regno + nregs
11017 || GET_CODE (to) != REG
11018 || result)
11019 return NULL_RTX;
11020 to_regno = REGNO (to);
11021 if (to_regno < FIRST_PSEUDO_REGISTER)
11022 {
11023 new_regno = regno + to_regno - from_regno;
11024 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
11025 != nregs)
11026 return NULL_RTX;
11027 result = gen_rtx_REG (GET_MODE (x), new_regno);
11028 }
11029 else if (GET_MODE (x) <= GET_MODE (to))
11030 result = gen_lowpart_common (GET_MODE (x), to);
11031 else
11032 result = gen_lowpart_SUBREG (GET_MODE (x), to);
11033 }
11034 }
11035 return result ? result : x;
11036 }
11037 else if (GET_CODE (x) == ZERO_EXTEND)
11038 {
11039 rtx new = replace_n_hard_rtx (XEXP (x, 0), replacements,
11040 n_replacements, modify);
11041
11042 if (GET_CODE (new) == CONST_INT)
11043 {
11044 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
11045 new, GET_MODE (XEXP (x, 0)));
11046 if (! x)
11047 abort ();
11048 }
11049 else if (modify)
11050 XEXP (x, 0) = new;
11051
11052 return x;
11053 }
11054
11055 fmt = GET_RTX_FORMAT (GET_CODE (x));
11056 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
11057 {
11058 rtx new;
11059
11060 if (fmt[i] == 'e')
11061 {
11062 new = replace_n_hard_rtx (XEXP (x, i), replacements,
11063 n_replacements, modify);
11064 if (!new)
11065 return NULL_RTX;
11066 if (modify)
11067 XEXP (x, i) = new;
11068 }
11069 else if (fmt[i] == 'E')
11070 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11071 {
11072 new = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
11073 n_replacements, modify);
11074 if (!new)
11075 return NULL_RTX;
11076 if (modify)
11077 XVECEXP (x, i, j) = new;
11078 }
11079 }
11080
11081 return x;
11082 }
11083
11084 rtx
11085 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
11086 {
11087 enum rtx_code code = TRUNCATE;
11088
11089 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
11090 {
11091 rtx inner = XEXP (x, 0);
11092 enum machine_mode inner_mode = GET_MODE (inner);
11093
11094 if (inner_mode == mode)
11095 return inner;
11096 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
11097 x = inner;
11098 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
11099 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
11100 {
11101 code = GET_CODE (x);
11102 x = inner;
11103 }
11104 }
11105 return gen_rtx_fmt_e (code, mode, x);
11106 }
11107
11108 /* called via for_each_rtx after reload, to clean up truncates of
11109 registers that span multiple actual hard registers. */
11110 int
11111 shmedia_cleanup_truncate (rtx *p, void *n_changes)
11112 {
11113 rtx x = *p, reg;
11114
11115 if (GET_CODE (x) != TRUNCATE)
11116 return 0;
11117 reg = XEXP (x, 0);
11118 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && GET_CODE (reg) == REG)
11119 {
11120 enum machine_mode reg_mode = GET_MODE (reg);
11121 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
11122 subreg_lowpart_offset (DImode, reg_mode));
11123 *(int*) n_changes += 1;
11124 return -1;
11125 }
11126 return 0;
11127 }
11128
11129 /* Load and store depend on the highpart of the address. However,
11130 set_attr_alternative does not give well-defined results before reload,
11131 so we must look at the rtl ourselves to see if any of the feeding
11132 registers is used in a memref. */
11133
11134 /* Called by sh_contains_memref_p via for_each_rtx. */
11135 static int
11136 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
11137 {
11138 return (GET_CODE (*loc) == MEM);
11139 }
11140
11141 /* Return non-zero iff INSN contains a MEM. */
11142 int
11143 sh_contains_memref_p (rtx insn)
11144 {
11145 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
11146 }
11147
11148 /* FNADDR is the MEM expression from a call expander. Return an address
11149 to use in an SHmedia insn pattern. */
11150 rtx
11151 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
11152 {
11153 int is_sym;
11154
11155 fnaddr = XEXP (fnaddr, 0);
11156 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
11157 if (flag_pic && is_sym)
11158 {
11159 if (! SYMBOL_REF_LOCAL_P (fnaddr))
11160 {
11161 rtx reg = gen_reg_rtx (Pmode);
11162
11163 /* We must not use GOTPLT for sibcalls, because PIC_REG
11164 must be restored before the PLT code gets to run. */
11165 if (is_sibcall)
11166 emit_insn (gen_symGOT2reg (reg, fnaddr));
11167 else
11168 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
11169 fnaddr = reg;
11170 }
11171 else
11172 {
11173 fnaddr = gen_sym2PIC (fnaddr);
11174 PUT_MODE (fnaddr, Pmode);
11175 }
11176 }
11177 /* If ptabs might trap, make this visible to the rest of the compiler.
11178 We generally assume that symbols pertain to valid locations, but
11179 it is possible to generate invalid symbols with asm or linker tricks.
11180 In a list of functions where each returns its successor, an invalid
11181 symbol might denote an empty list. */
11182 if (!TARGET_PT_FIXED
11183 && (!is_sym || TARGET_INVALID_SYMBOLS)
11184 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
11185 {
11186 rtx tr = gen_reg_rtx (PDImode);
11187
11188 emit_insn (gen_ptabs (tr, fnaddr));
11189 fnaddr = tr;
11190 }
11191 else if (! target_reg_operand (fnaddr, Pmode))
11192 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
11193 return fnaddr;
11194 }
11195
11196 const char *sh_multcost_str = "";
11197 const char *sh_gettrcost_str = "";
11198 const char *sh_div_str = "";
11199 const char *sh_divsi3_libfunc = "";
11200 const char *cut2_workaround_str = "";
11201 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
11202
11203 /* This defines the storage for the variable part of a -mboard= option.
11204 It is only required when using the sh-superh-elf target */
11205 #ifdef _SUPERH_H
11206 const char * boardtype = "7750p2";
11207 const char * osruntime = "bare";
11208 #endif
11209
11210 #include "gt-sh.h"