Introduce sh2a support.
[gcc.git] / gcc / config / sh / sh.c
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1997, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "insn-config.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "flags.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "function.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "output.h"
38 #include "insn-attr.h"
39 #include "toplev.h"
40 #include "recog.h"
41 #include "c-pragma.h"
42 #include "integrate.h"
43 #include "tm_p.h"
44 #include "target.h"
45 #include "target-def.h"
46 #include "real.h"
47 #include "langhooks.h"
48 #include "basic-block.h"
49 #include "ra.h"
50 #include "cfglayout.h"
51 #include "intl.h"
52 #include "sched-int.h"
53 #include "ggc.h"
54 #include "tree-gimple.h"
55
56
57 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
58
59 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
60 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
61
62 /* These are some macros to abstract register modes. */
63 #define CONST_OK_FOR_ADD(size) \
64 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
65 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
66 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
67 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
68
69 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
70 int current_function_interrupt;
71
72 /* ??? The pragma interrupt support will not work for SH3. */
73 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
74 output code for the next function appropriate for an interrupt handler. */
75 int pragma_interrupt;
76
77 /* This is set by the trap_exit attribute for functions. It specifies
78 a trap number to be used in a trapa instruction at function exit
79 (instead of an rte instruction). */
80 int trap_exit;
81
82 /* This is used by the sp_switch attribute for functions. It specifies
83 a variable holding the address of the stack the interrupt function
84 should switch to/from at entry/exit. */
85 rtx sp_switch;
86
87 /* This is set by #pragma trapa, and is similar to the above, except that
88 the compiler doesn't emit code to preserve all registers. */
89 static int pragma_trapa;
90
91 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
92 which has a separate set of low regs for User and Supervisor modes.
93 This should only be used for the lowest level of interrupts. Higher levels
94 of interrupts must save the registers in case they themselves are
95 interrupted. */
96 int pragma_nosave_low_regs;
97
98 /* This is used for communication between TARGET_SETUP_INCOMING_VARARGS and
99 sh_expand_prologue. */
100 int current_function_anonymous_args;
101
102 /* Global variables for machine-dependent things. */
103
104 /* Which cpu are we scheduling for. */
105 enum processor_type sh_cpu;
106
107 /* Definitions used in ready queue reordering for first scheduling pass. */
108
109 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
110 static short *regmode_weight[2];
111
112 /* Total SFmode and SImode weights of scheduled insns. */
113 static int curr_regmode_pressure[2];
114
115 /* If true, skip cycles for Q -> R movement. */
116 static int skip_cycles = 0;
117
118 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
119 and returned from sh_reorder2. */
120 static short cached_can_issue_more;
121
122 /* Saved operands from the last compare to use when we generate an scc
123 or bcc insn. */
124
125 rtx sh_compare_op0;
126 rtx sh_compare_op1;
127
128 /* Provides the class number of the smallest class containing
129 reg number. */
130
131 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
132 {
133 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
140 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
141 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
142 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
143 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
144 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
145 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
146 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
147 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
148 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
149 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
156 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
157 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
158 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
159 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
160 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
161 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
162 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
163 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
164 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
165 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
166 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
167 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
168 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
169 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
170 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
171 GENERAL_REGS,
172 };
173
174 char sh_register_names[FIRST_PSEUDO_REGISTER] \
175 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
176
177 char sh_additional_register_names[ADDREGNAMES_SIZE] \
178 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
179 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
180
181 /* Provide reg_class from a letter such as appears in the machine
182 description. *: target independently reserved letter.
183 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
184
185 enum reg_class reg_class_from_letter[] =
186 {
187 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
188 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
189 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
190 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
191 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
192 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
193 /* y */ FPUL_REGS, /* z */ R0_REGS
194 };
195
196 int assembler_dialect;
197
198 static bool shmedia_space_reserved_for_target_registers;
199
200 static void split_branches (rtx);
201 static int branch_dest (rtx);
202 static void force_into (rtx, rtx);
203 static void print_slot (rtx);
204 static rtx add_constant (rtx, enum machine_mode, rtx);
205 static void dump_table (rtx, rtx);
206 static int hi_const (rtx);
207 static int broken_move (rtx);
208 static int mova_p (rtx);
209 static rtx find_barrier (int, rtx, rtx);
210 static int noncall_uses_reg (rtx, rtx, rtx *);
211 static rtx gen_block_redirect (rtx, int, int);
212 static void sh_reorg (void);
213 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
214 static rtx frame_insn (rtx);
215 static rtx push (int);
216 static void pop (int);
217 static void push_regs (HARD_REG_SET *, int);
218 static int calc_live_regs (HARD_REG_SET *);
219 static void mark_use (rtx, rtx *);
220 static HOST_WIDE_INT rounded_frame_size (int);
221 static rtx mark_constant_pool_use (rtx);
222 const struct attribute_spec sh_attribute_table[];
223 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
224 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
225 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
226 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
227 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
228 static void sh_insert_attributes (tree, tree *);
229 static int sh_adjust_cost (rtx, rtx, rtx, int);
230 static int sh_issue_rate (void);
231 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
232 static short find_set_regmode_weight (rtx, enum machine_mode);
233 static short find_insn_regmode_weight (rtx, enum machine_mode);
234 static void find_regmode_weight (int, enum machine_mode);
235 static void sh_md_init_global (FILE *, int, int);
236 static void sh_md_finish_global (FILE *, int);
237 static int rank_for_reorder (const void *, const void *);
238 static void swap_reorder (rtx *, int);
239 static void ready_reorder (rtx *, int);
240 static short high_pressure (enum machine_mode);
241 static int sh_reorder (FILE *, int, rtx *, int *, int);
242 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
243 static void sh_md_init (FILE *, int, int);
244 static int sh_variable_issue (FILE *, int, rtx, int);
245
246 static bool sh_function_ok_for_sibcall (tree, tree);
247
248 static bool sh_cannot_modify_jumps_p (void);
249 static int sh_target_reg_class (void);
250 static bool sh_optimize_target_register_callee_saved (bool);
251 static bool sh_ms_bitfield_layout_p (tree);
252
253 static void sh_init_builtins (void);
254 static void sh_media_init_builtins (void);
255 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
256 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
257 static void sh_file_start (void);
258 static int flow_dependent_p (rtx, rtx);
259 static void flow_dependent_p_1 (rtx, rtx, void *);
260 static int shiftcosts (rtx);
261 static int andcosts (rtx);
262 static int addsubcosts (rtx);
263 static int multcosts (rtx);
264 static bool unspec_caller_rtx_p (rtx);
265 static bool sh_cannot_copy_insn_p (rtx);
266 static bool sh_rtx_costs (rtx, int, int, int *);
267 static int sh_address_cost (rtx);
268 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
269 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
270 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
271 static int scavenge_reg (HARD_REG_SET *s);
272 struct save_schedule_s;
273 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
274 struct save_schedule_s *, int);
275
276 static rtx sh_struct_value_rtx (tree, int);
277 static bool sh_return_in_memory (tree, tree);
278 static rtx sh_builtin_saveregs (void);
279 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
280 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
281 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
282 static tree sh_build_builtin_va_list (void);
283 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
284 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
285 tree, bool);
286
287 \f
288 /* Initialize the GCC target structure. */
289 #undef TARGET_ATTRIBUTE_TABLE
290 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
291
292 /* The next two are used for debug info when compiling with -gdwarf. */
293 #undef TARGET_ASM_UNALIGNED_HI_OP
294 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
295 #undef TARGET_ASM_UNALIGNED_SI_OP
296 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
297
298 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
299 #undef TARGET_ASM_UNALIGNED_DI_OP
300 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
301 #undef TARGET_ASM_ALIGNED_DI_OP
302 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
303
304 #undef TARGET_ASM_FUNCTION_EPILOGUE
305 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
306
307 #undef TARGET_ASM_OUTPUT_MI_THUNK
308 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
309
310 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
311 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
312
313 #undef TARGET_ASM_FILE_START
314 #define TARGET_ASM_FILE_START sh_file_start
315 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
316 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
317
318 #undef TARGET_INSERT_ATTRIBUTES
319 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
320
321 #undef TARGET_SCHED_ADJUST_COST
322 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
323
324 #undef TARGET_SCHED_ISSUE_RATE
325 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
326
327 /* The next 5 hooks have been implemented for reenabling sched1. With the
328 help of these macros we are limiting the movement of insns in sched1 to
329 reduce the register pressure. The overall idea is to keep count of SImode
330 and SFmode regs required by already scheduled insns. When these counts
331 cross some threshold values; give priority to insns that free registers.
332 The insn that frees registers is most likely to be the insn with lowest
333 LUID (original insn order); but such an insn might be there in the stalled
334 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
335 upto a max of 8 cycles so that such insns may move from Q -> R.
336
337 The description of the hooks are as below:
338
339 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
340 scheduler; it is called inside the sched_init function just after
341 find_insn_reg_weights function call. It is used to calculate the SImode
342 and SFmode weights of insns of basic blocks; much similar to what
343 find_insn_reg_weights does.
344 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
345
346 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
347 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
348 (Q)->(R).
349
350 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
351 high; reorder the ready queue so that the insn with lowest LUID will be
352 issued next.
353
354 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
355 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
356
357 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
358 can be returned from TARGET_SCHED_REORDER2.
359
360 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
361
362 #undef TARGET_SCHED_DFA_NEW_CYCLE
363 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
364
365 #undef TARGET_SCHED_INIT_GLOBAL
366 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
367
368 #undef TARGET_SCHED_FINISH_GLOBAL
369 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
370
371 #undef TARGET_SCHED_VARIABLE_ISSUE
372 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
373
374 #undef TARGET_SCHED_REORDER
375 #define TARGET_SCHED_REORDER sh_reorder
376
377 #undef TARGET_SCHED_REORDER2
378 #define TARGET_SCHED_REORDER2 sh_reorder2
379
380 #undef TARGET_SCHED_INIT
381 #define TARGET_SCHED_INIT sh_md_init
382
383 #undef TARGET_CANNOT_MODIFY_JUMPS_P
384 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
385 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
386 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
387 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
388 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
389 sh_optimize_target_register_callee_saved
390
391 #undef TARGET_MS_BITFIELD_LAYOUT_P
392 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
393
394 #undef TARGET_INIT_BUILTINS
395 #define TARGET_INIT_BUILTINS sh_init_builtins
396 #undef TARGET_EXPAND_BUILTIN
397 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
398
399 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
400 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
401
402 #undef TARGET_CANNOT_COPY_INSN_P
403 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
404 #undef TARGET_RTX_COSTS
405 #define TARGET_RTX_COSTS sh_rtx_costs
406 #undef TARGET_ADDRESS_COST
407 #define TARGET_ADDRESS_COST sh_address_cost
408
409 #undef TARGET_MACHINE_DEPENDENT_REORG
410 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
411
412 #ifdef HAVE_AS_TLS
413 #undef TARGET_HAVE_TLS
414 #define TARGET_HAVE_TLS true
415 #endif
416
417 #undef TARGET_PROMOTE_PROTOTYPES
418 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
419 #undef TARGET_PROMOTE_FUNCTION_ARGS
420 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
421 #undef TARGET_PROMOTE_FUNCTION_RETURN
422 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
423
424 #undef TARGET_STRUCT_VALUE_RTX
425 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
426 #undef TARGET_RETURN_IN_MEMORY
427 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
428
429 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
430 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
431 #undef TARGET_SETUP_INCOMING_VARARGS
432 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
433 #undef TARGET_STRICT_ARGUMENT_NAMING
434 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
435 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
436 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
437 #undef TARGET_MUST_PASS_IN_STACK
438 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
439 #undef TARGET_PASS_BY_REFERENCE
440 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
441
442 #undef TARGET_BUILD_BUILTIN_VA_LIST
443 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
444 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
445 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
446
447 #undef TARGET_PCH_VALID_P
448 #define TARGET_PCH_VALID_P sh_pch_valid_p
449
450 /* Return regmode weight for insn. */
451 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
452
453 /* Return current register pressure for regmode. */
454 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
455
456 #ifdef SYMBIAN
457
458 #undef TARGET_ENCODE_SECTION_INFO
459 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
460 #undef TARGET_STRIP_NAME_ENCODING
461 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
462 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
463 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
464
465 #endif /* SYMBIAN */
466
467 struct gcc_target targetm = TARGET_INITIALIZER;
468 \f
469 /* Print the operand address in x to the stream. */
470
471 void
472 print_operand_address (FILE *stream, rtx x)
473 {
474 switch (GET_CODE (x))
475 {
476 case REG:
477 case SUBREG:
478 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
479 break;
480
481 case PLUS:
482 {
483 rtx base = XEXP (x, 0);
484 rtx index = XEXP (x, 1);
485
486 switch (GET_CODE (index))
487 {
488 case CONST_INT:
489 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
490 reg_names[true_regnum (base)]);
491 break;
492
493 case REG:
494 case SUBREG:
495 {
496 int base_num = true_regnum (base);
497 int index_num = true_regnum (index);
498
499 fprintf (stream, "@(r0,%s)",
500 reg_names[MAX (base_num, index_num)]);
501 break;
502 }
503
504 default:
505 debug_rtx (x);
506 abort ();
507 }
508 }
509 break;
510
511 case PRE_DEC:
512 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
513 break;
514
515 case POST_INC:
516 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
517 break;
518
519 default:
520 x = mark_constant_pool_use (x);
521 output_addr_const (stream, x);
522 break;
523 }
524 }
525
526 /* Print operand x (an rtx) in assembler syntax to file stream
527 according to modifier code.
528
529 '.' print a .s if insn needs delay slot
530 ',' print LOCAL_LABEL_PREFIX
531 '@' print trap, rte or rts depending upon pragma interruptness
532 '#' output a nop if there is nothing to put in the delay slot
533 ''' print likelihood suffix (/u for unlikely).
534 'O' print a constant without the #
535 'R' print the LSW of a dp value - changes if in little endian
536 'S' print the MSW of a dp value - changes if in little endian
537 'T' print the next word of a dp value - same as 'R' in big endian mode.
538 'M' print an `x' if `m' will print `base,index'.
539 'N' print 'r63' if the operand is (const_int 0).
540 'd' print a V2SF reg as dN instead of fpN.
541 'm' print a pair `base,offset' or `base,index', for LD and ST.
542 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
543 'o' output an operator. */
544
545 void
546 print_operand (FILE *stream, rtx x, int code)
547 {
548 switch (code)
549 {
550 case '.':
551 if (final_sequence
552 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
553 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
554 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
555 break;
556 case ',':
557 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
558 break;
559 case '@':
560 if (trap_exit)
561 fprintf (stream, "trapa #%d", trap_exit);
562 else if (sh_cfun_interrupt_handler_p ())
563 fprintf (stream, "rte");
564 else
565 fprintf (stream, "rts");
566 break;
567 case '#':
568 /* Output a nop if there's nothing in the delay slot. */
569 if (dbr_sequence_length () == 0)
570 fprintf (stream, "\n\tnop");
571 break;
572 case '\'':
573 {
574 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
575
576 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
577 fputs ("/u", stream);
578 break;
579 }
580 case 'O':
581 x = mark_constant_pool_use (x);
582 output_addr_const (stream, x);
583 break;
584 case 'R':
585 fputs (reg_names[REGNO (x) + LSW], (stream));
586 break;
587 case 'S':
588 fputs (reg_names[REGNO (x) + MSW], (stream));
589 break;
590 case 'T':
591 /* Next word of a double. */
592 switch (GET_CODE (x))
593 {
594 case REG:
595 fputs (reg_names[REGNO (x) + 1], (stream));
596 break;
597 case MEM:
598 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
599 && GET_CODE (XEXP (x, 0)) != POST_INC)
600 x = adjust_address (x, SImode, 4);
601 print_operand_address (stream, XEXP (x, 0));
602 break;
603 default:
604 break;
605 }
606 break;
607 case 'o':
608 switch (GET_CODE (x))
609 {
610 case PLUS: fputs ("add", stream); break;
611 case MINUS: fputs ("sub", stream); break;
612 case MULT: fputs ("mul", stream); break;
613 case DIV: fputs ("div", stream); break;
614 case EQ: fputs ("eq", stream); break;
615 case NE: fputs ("ne", stream); break;
616 case GT: case LT: fputs ("gt", stream); break;
617 case GE: case LE: fputs ("ge", stream); break;
618 case GTU: case LTU: fputs ("gtu", stream); break;
619 case GEU: case LEU: fputs ("geu", stream); break;
620 default:
621 break;
622 }
623 break;
624 case 'M':
625 if (GET_CODE (x) == MEM
626 && GET_CODE (XEXP (x, 0)) == PLUS
627 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
628 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
629 fputc ('x', stream);
630 break;
631
632 case 'm':
633 if (GET_CODE (x) != MEM)
634 abort ();
635 x = XEXP (x, 0);
636 switch (GET_CODE (x))
637 {
638 case REG:
639 case SUBREG:
640 print_operand (stream, x, 0);
641 fputs (", 0", stream);
642 break;
643
644 case PLUS:
645 print_operand (stream, XEXP (x, 0), 0);
646 fputs (", ", stream);
647 print_operand (stream, XEXP (x, 1), 0);
648 break;
649
650 default:
651 abort ();
652 }
653 break;
654
655 case 'd':
656 if (GET_CODE (x) != REG || GET_MODE (x) != V2SFmode)
657 abort ();
658
659 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
660 break;
661
662 case 'N':
663 if (x == CONST0_RTX (GET_MODE (x)))
664 {
665 fprintf ((stream), "r63");
666 break;
667 }
668 goto default_output;
669 case 'u':
670 if (GET_CODE (x) == CONST_INT)
671 {
672 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
673 break;
674 }
675 /* Fall through. */
676
677 default_output:
678 default:
679 switch (GET_CODE (x))
680 {
681 /* FIXME: We need this on SHmedia32 because reload generates
682 some sign-extended HI or QI loads into DImode registers
683 but, because Pmode is SImode, the address ends up with a
684 subreg:SI of the DImode register. Maybe reload should be
685 fixed so as to apply alter_subreg to such loads? */
686 case SUBREG:
687 if (SUBREG_BYTE (x) != 0
688 || GET_CODE (SUBREG_REG (x)) != REG)
689 abort ();
690
691 x = SUBREG_REG (x);
692 /* Fall through. */
693
694 case REG:
695 if (FP_REGISTER_P (REGNO (x))
696 && GET_MODE (x) == V16SFmode)
697 fprintf ((stream), "mtrx%s", reg_names[REGNO (x)] + 2);
698 else if (FP_REGISTER_P (REGNO (x))
699 && GET_MODE (x) == V4SFmode)
700 fprintf ((stream), "fv%s", reg_names[REGNO (x)] + 2);
701 else if (GET_CODE (x) == REG
702 && GET_MODE (x) == V2SFmode)
703 fprintf ((stream), "fp%s", reg_names[REGNO (x)] + 2);
704 else if (FP_REGISTER_P (REGNO (x))
705 && GET_MODE_SIZE (GET_MODE (x)) > 4)
706 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
707 else
708 fputs (reg_names[REGNO (x)], (stream));
709 break;
710
711 case MEM:
712 output_address (XEXP (x, 0));
713 break;
714
715 case CONST:
716 if (TARGET_SHMEDIA
717 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
718 && GET_MODE (XEXP (x, 0)) == DImode
719 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
720 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
721 {
722 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
723
724 fputc ('(', stream);
725 if (GET_CODE (val) == ASHIFTRT)
726 {
727 fputc ('(', stream);
728 if (GET_CODE (XEXP (val, 0)) == CONST)
729 fputc ('(', stream);
730 output_addr_const (stream, XEXP (val, 0));
731 if (GET_CODE (XEXP (val, 0)) == CONST)
732 fputc (')', stream);
733 fputs (" >> ", stream);
734 output_addr_const (stream, XEXP (val, 1));
735 fputc (')', stream);
736 }
737 else
738 {
739 if (GET_CODE (val) == CONST)
740 fputc ('(', stream);
741 output_addr_const (stream, val);
742 if (GET_CODE (val) == CONST)
743 fputc (')', stream);
744 }
745 fputs (" & 65535)", stream);
746 break;
747 }
748
749 /* Fall through. */
750 default:
751 if (TARGET_SH1)
752 fputc ('#', stream);
753 output_addr_const (stream, x);
754 break;
755 }
756 break;
757 }
758 }
759 \f
760 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
761 static void
762 force_into (rtx value, rtx target)
763 {
764 value = force_operand (value, target);
765 if (! rtx_equal_p (value, target))
766 emit_insn (gen_move_insn (target, value));
767 }
768
769 /* Emit code to perform a block move. Choose the best method.
770
771 OPERANDS[0] is the destination.
772 OPERANDS[1] is the source.
773 OPERANDS[2] is the size.
774 OPERANDS[3] is the alignment safe to use. */
775
776 int
777 expand_block_move (rtx *operands)
778 {
779 int align = INTVAL (operands[3]);
780 int constp = (GET_CODE (operands[2]) == CONST_INT);
781 int bytes = (constp ? INTVAL (operands[2]) : 0);
782
783 if (! constp)
784 return 0;
785
786 /* If we could use mov.l to move words and dest is word-aligned, we
787 can use movua.l for loads and still generate a relatively short
788 and efficient sequence. */
789 if (TARGET_SH4A_ARCH && align < 4
790 && MEM_ALIGN (operands[0]) >= 32
791 && can_move_by_pieces (bytes, 32))
792 {
793 rtx dest = copy_rtx (operands[0]);
794 rtx src = copy_rtx (operands[1]);
795 /* We could use different pseudos for each copied word, but
796 since movua can only load into r0, it's kind of
797 pointless. */
798 rtx temp = gen_reg_rtx (SImode);
799 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
800 int copied = 0;
801
802 while (copied + 4 <= bytes)
803 {
804 rtx to = adjust_address (dest, SImode, copied);
805 rtx from = adjust_automodify_address (src, SImode, src_addr, copied);
806
807 emit_insn (gen_movua (temp, from));
808 emit_move_insn (src_addr, plus_constant (src_addr, 4));
809 emit_move_insn (to, temp);
810 copied += 4;
811 }
812
813 if (copied < bytes)
814 move_by_pieces (adjust_address (dest, BLKmode, copied),
815 adjust_automodify_address (src, BLKmode,
816 src_addr, copied),
817 bytes - copied, align, 0);
818
819 return 1;
820 }
821
822 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
823 alignment, or if it isn't a multiple of 4 bytes, then fail. */
824 if (align < 4 || (bytes % 4 != 0))
825 return 0;
826
827 if (TARGET_HARD_SH4)
828 {
829 if (bytes < 12)
830 return 0;
831 else if (bytes == 12)
832 {
833 tree entry_name;
834 rtx sym;
835 rtx func_addr_rtx;
836 rtx r4 = gen_rtx_REG (SImode, 4);
837 rtx r5 = gen_rtx_REG (SImode, 5);
838
839 entry_name = get_identifier ("__movmemSI12_i4");
840
841 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
842 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
843 force_into (XEXP (operands[0], 0), r4);
844 force_into (XEXP (operands[1], 0), r5);
845 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
846 return 1;
847 }
848 else if (! TARGET_SMALLCODE)
849 {
850 tree entry_name;
851 rtx sym;
852 rtx func_addr_rtx;
853 int dwords;
854 rtx r4 = gen_rtx_REG (SImode, 4);
855 rtx r5 = gen_rtx_REG (SImode, 5);
856 rtx r6 = gen_rtx_REG (SImode, 6);
857
858 entry_name = get_identifier (bytes & 4
859 ? "__movmem_i4_odd"
860 : "__movmem_i4_even");
861 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
862 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
863 force_into (XEXP (operands[0], 0), r4);
864 force_into (XEXP (operands[1], 0), r5);
865
866 dwords = bytes >> 3;
867 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
868 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
869 return 1;
870 }
871 else
872 return 0;
873 }
874 if (bytes < 64)
875 {
876 char entry[30];
877 tree entry_name;
878 rtx sym;
879 rtx func_addr_rtx;
880 rtx r4 = gen_rtx_REG (SImode, 4);
881 rtx r5 = gen_rtx_REG (SImode, 5);
882
883 sprintf (entry, "__movmemSI%d", bytes);
884 entry_name = get_identifier (entry);
885 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
886 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
887 force_into (XEXP (operands[0], 0), r4);
888 force_into (XEXP (operands[1], 0), r5);
889 emit_insn (gen_block_move_real (func_addr_rtx));
890 return 1;
891 }
892
893 /* This is the same number of bytes as a memcpy call, but to a different
894 less common function name, so this will occasionally use more space. */
895 if (! TARGET_SMALLCODE)
896 {
897 tree entry_name;
898 rtx sym;
899 rtx func_addr_rtx;
900 int final_switch, while_loop;
901 rtx r4 = gen_rtx_REG (SImode, 4);
902 rtx r5 = gen_rtx_REG (SImode, 5);
903 rtx r6 = gen_rtx_REG (SImode, 6);
904
905 entry_name = get_identifier ("__movmem");
906 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
907 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
908 force_into (XEXP (operands[0], 0), r4);
909 force_into (XEXP (operands[1], 0), r5);
910
911 /* r6 controls the size of the move. 16 is decremented from it
912 for each 64 bytes moved. Then the negative bit left over is used
913 as an index into a list of move instructions. e.g., a 72 byte move
914 would be set up with size(r6) = 14, for one iteration through the
915 big while loop, and a switch of -2 for the last part. */
916
917 final_switch = 16 - ((bytes / 4) % 16);
918 while_loop = ((bytes / 4) / 16 - 1) * 16;
919 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
920 emit_insn (gen_block_lump_real (func_addr_rtx));
921 return 1;
922 }
923
924 return 0;
925 }
926
927 /* Prepare operands for a move define_expand; specifically, one of the
928 operands must be in a register. */
929
930 int
931 prepare_move_operands (rtx operands[], enum machine_mode mode)
932 {
933 if ((mode == SImode || mode == DImode)
934 && flag_pic
935 && ! ((mode == Pmode || mode == ptr_mode)
936 && tls_symbolic_operand (operands[1], Pmode) != 0))
937 {
938 rtx temp;
939 if (SYMBOLIC_CONST_P (operands[1]))
940 {
941 if (GET_CODE (operands[0]) == MEM)
942 operands[1] = force_reg (Pmode, operands[1]);
943 else if (TARGET_SHMEDIA
944 && GET_CODE (operands[1]) == LABEL_REF
945 && target_reg_operand (operands[0], mode))
946 /* It's ok. */;
947 else
948 {
949 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
950 operands[1] = legitimize_pic_address (operands[1], mode, temp);
951 }
952 }
953 else if (GET_CODE (operands[1]) == CONST
954 && GET_CODE (XEXP (operands[1], 0)) == PLUS
955 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
956 {
957 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
958 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
959 mode, temp);
960 operands[1] = expand_binop (mode, add_optab, temp,
961 XEXP (XEXP (operands[1], 0), 1),
962 no_new_pseudos ? temp
963 : gen_reg_rtx (Pmode),
964 0, OPTAB_LIB_WIDEN);
965 }
966 }
967
968 if (! reload_in_progress && ! reload_completed)
969 {
970 /* Copy the source to a register if both operands aren't registers. */
971 if (! register_operand (operands[0], mode)
972 && ! sh_register_operand (operands[1], mode))
973 operands[1] = copy_to_mode_reg (mode, operands[1]);
974
975 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
976 {
977 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
978 except that we can't use that function because it is static. */
979 rtx new = change_address (operands[0], mode, 0);
980 MEM_COPY_ATTRIBUTES (new, operands[0]);
981 operands[0] = new;
982 }
983
984 /* This case can happen while generating code to move the result
985 of a library call to the target. Reject `st r0,@(rX,rY)' because
986 reload will fail to find a spill register for rX, since r0 is already
987 being used for the source. */
988 else if (refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
989 && GET_CODE (operands[0]) == MEM
990 && GET_CODE (XEXP (operands[0], 0)) == PLUS
991 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
992 operands[1] = copy_to_mode_reg (mode, operands[1]);
993 }
994
995 if (mode == Pmode || mode == ptr_mode)
996 {
997 rtx op0, op1;
998 enum tls_model tls_kind;
999
1000 op0 = operands[0];
1001 op1 = operands[1];
1002 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1003 {
1004 rtx tga_op1, tga_ret, tmp, tmp2;
1005
1006
1007 switch (tls_kind)
1008 {
1009 case TLS_MODEL_GLOBAL_DYNAMIC:
1010 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1011 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1012 op1 = tga_ret;
1013 break;
1014
1015 case TLS_MODEL_LOCAL_DYNAMIC:
1016 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1017 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1018
1019 tmp = gen_reg_rtx (Pmode);
1020 emit_move_insn (tmp, tga_ret);
1021
1022 if (register_operand (op0, Pmode))
1023 tmp2 = op0;
1024 else
1025 tmp2 = gen_reg_rtx (Pmode);
1026
1027 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1028 op1 = tmp2;
1029 break;
1030
1031 case TLS_MODEL_INITIAL_EXEC:
1032 if (! flag_pic)
1033 emit_insn (gen_GOTaddr2picreg ());
1034 tga_op1 = gen_reg_rtx (Pmode);
1035 tmp = gen_sym2GOTTPOFF (op1);
1036 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1037 op1 = tga_op1;
1038 break;
1039
1040 case TLS_MODEL_LOCAL_EXEC:
1041 tmp2 = gen_reg_rtx (Pmode);
1042 emit_insn (gen_load_gbr (tmp2));
1043 tmp = gen_reg_rtx (Pmode);
1044 emit_insn (gen_symTPOFF2reg (tmp, op1));
1045 RTX_UNCHANGING_P (tmp) = 1;
1046
1047 if (register_operand (op0, Pmode))
1048 op1 = op0;
1049 else
1050 op1 = gen_reg_rtx (Pmode);
1051
1052 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1053 break;
1054
1055 default:
1056 abort ();
1057 }
1058 operands[1] = op1;
1059 }
1060 }
1061
1062 return 0;
1063 }
1064
1065 /* Prepare the operands for an scc instruction; make sure that the
1066 compare has been done. */
1067 rtx
1068 prepare_scc_operands (enum rtx_code code)
1069 {
1070 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1071 enum rtx_code oldcode = code;
1072 enum machine_mode mode;
1073
1074 /* First need a compare insn. */
1075 switch (code)
1076 {
1077 case NE:
1078 /* It isn't possible to handle this case. */
1079 abort ();
1080 case LT:
1081 code = GT;
1082 break;
1083 case LE:
1084 code = GE;
1085 break;
1086 case LTU:
1087 code = GTU;
1088 break;
1089 case LEU:
1090 code = GEU;
1091 break;
1092 default:
1093 break;
1094 }
1095 if (code != oldcode)
1096 {
1097 rtx tmp = sh_compare_op0;
1098 sh_compare_op0 = sh_compare_op1;
1099 sh_compare_op1 = tmp;
1100 }
1101
1102 mode = GET_MODE (sh_compare_op0);
1103 if (mode == VOIDmode)
1104 mode = GET_MODE (sh_compare_op1);
1105
1106 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1107 if ((code != EQ && code != NE
1108 && (sh_compare_op1 != const0_rtx
1109 || code == GTU || code == GEU || code == LTU || code == LEU))
1110 || (mode == DImode && sh_compare_op1 != const0_rtx)
1111 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1112 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1113
1114 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1115 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1116 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1117 gen_rtx_SET (VOIDmode, t_reg,
1118 gen_rtx_fmt_ee (code, SImode,
1119 sh_compare_op0, sh_compare_op1)),
1120 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1121 else
1122 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1123 gen_rtx_fmt_ee (code, SImode,
1124 sh_compare_op0, sh_compare_op1)));
1125
1126 return t_reg;
1127 }
1128
1129 /* Called from the md file, set up the operands of a compare instruction. */
1130
1131 void
1132 from_compare (rtx *operands, int code)
1133 {
1134 enum machine_mode mode = GET_MODE (sh_compare_op0);
1135 rtx insn;
1136 if (mode == VOIDmode)
1137 mode = GET_MODE (sh_compare_op1);
1138 if (code != EQ
1139 || mode == DImode
1140 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1141 {
1142 /* Force args into regs, since we can't use constants here. */
1143 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1144 if (sh_compare_op1 != const0_rtx
1145 || code == GTU || code == GEU
1146 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1147 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1148 }
1149 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1150 {
1151 from_compare (operands, GT);
1152 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1153 }
1154 else
1155 insn = gen_rtx_SET (VOIDmode,
1156 gen_rtx_REG (SImode, T_REG),
1157 gen_rtx_fmt_ee (code, SImode,
1158 sh_compare_op0, sh_compare_op1));
1159 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1160 {
1161 insn = gen_rtx_PARALLEL (VOIDmode,
1162 gen_rtvec (2, insn,
1163 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1164 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1165 }
1166 else
1167 emit_insn (insn);
1168 }
1169 \f
1170 /* Functions to output assembly code. */
1171
1172 /* Return a sequence of instructions to perform DI or DF move.
1173
1174 Since the SH cannot move a DI or DF in one instruction, we have
1175 to take care when we see overlapping source and dest registers. */
1176
1177 const char *
1178 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1179 enum machine_mode mode)
1180 {
1181 rtx dst = operands[0];
1182 rtx src = operands[1];
1183
1184 if (GET_CODE (dst) == MEM
1185 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1186 return "mov.l %T1,%0\n\tmov.l %1,%0";
1187
1188 if (register_operand (dst, mode)
1189 && register_operand (src, mode))
1190 {
1191 if (REGNO (src) == MACH_REG)
1192 return "sts mach,%S0\n\tsts macl,%R0";
1193
1194 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1195 when mov.d r1,r0 do r1->r0 then r2->r1. */
1196
1197 if (REGNO (src) + 1 == REGNO (dst))
1198 return "mov %T1,%T0\n\tmov %1,%0";
1199 else
1200 return "mov %1,%0\n\tmov %T1,%T0";
1201 }
1202 else if (GET_CODE (src) == CONST_INT)
1203 {
1204 if (INTVAL (src) < 0)
1205 output_asm_insn ("mov #-1,%S0", operands);
1206 else
1207 output_asm_insn ("mov #0,%S0", operands);
1208
1209 return "mov %1,%R0";
1210 }
1211 else if (GET_CODE (src) == MEM)
1212 {
1213 int ptrreg = -1;
1214 int dreg = REGNO (dst);
1215 rtx inside = XEXP (src, 0);
1216
1217 if (GET_CODE (inside) == REG)
1218 ptrreg = REGNO (inside);
1219 else if (GET_CODE (inside) == SUBREG)
1220 ptrreg = subreg_regno (inside);
1221 else if (GET_CODE (inside) == PLUS)
1222 {
1223 ptrreg = REGNO (XEXP (inside, 0));
1224 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1225 an offsettable address. Unfortunately, offsettable addresses use
1226 QImode to check the offset, and a QImode offsettable address
1227 requires r0 for the other operand, which is not currently
1228 supported, so we can't use the 'o' constraint.
1229 Thus we must check for and handle r0+REG addresses here.
1230 We punt for now, since this is likely very rare. */
1231 if (GET_CODE (XEXP (inside, 1)) == REG)
1232 abort ();
1233 }
1234 else if (GET_CODE (inside) == LABEL_REF)
1235 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1236 else if (GET_CODE (inside) == POST_INC)
1237 return "mov.l %1,%0\n\tmov.l %1,%T0";
1238 else
1239 abort ();
1240
1241 /* Work out the safe way to copy. Copy into the second half first. */
1242 if (dreg == ptrreg)
1243 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1244 }
1245
1246 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1247 }
1248
1249 /* Print an instruction which would have gone into a delay slot after
1250 another instruction, but couldn't because the other instruction expanded
1251 into a sequence where putting the slot insn at the end wouldn't work. */
1252
1253 static void
1254 print_slot (rtx insn)
1255 {
1256 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1, NULL);
1257
1258 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1259 }
1260
1261 const char *
1262 output_far_jump (rtx insn, rtx op)
1263 {
1264 struct { rtx lab, reg, op; } this;
1265 rtx braf_base_lab = NULL_RTX;
1266 const char *jump;
1267 int far;
1268 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1269 rtx prev;
1270
1271 this.lab = gen_label_rtx ();
1272
1273 if (TARGET_SH2
1274 && offset >= -32764
1275 && offset - get_attr_length (insn) <= 32766)
1276 {
1277 far = 0;
1278 jump = "mov.w %O0,%1; braf %1";
1279 }
1280 else
1281 {
1282 far = 1;
1283 if (flag_pic)
1284 {
1285 if (TARGET_SH2)
1286 jump = "mov.l %O0,%1; braf %1";
1287 else
1288 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1289 }
1290 else
1291 jump = "mov.l %O0,%1; jmp @%1";
1292 }
1293 /* If we have a scratch register available, use it. */
1294 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1295 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1296 {
1297 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1298 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1299 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1300 output_asm_insn (jump, &this.lab);
1301 if (dbr_sequence_length ())
1302 print_slot (final_sequence);
1303 else
1304 output_asm_insn ("nop", 0);
1305 }
1306 else
1307 {
1308 /* Output the delay slot insn first if any. */
1309 if (dbr_sequence_length ())
1310 print_slot (final_sequence);
1311
1312 this.reg = gen_rtx_REG (SImode, 13);
1313 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1314 Fortunately, MACL is fixed and call-clobbered, and we never
1315 need its value across jumps, so save r13 in it instead of in
1316 the stack. */
1317 if (TARGET_SH5)
1318 output_asm_insn ("lds r13, macl", 0);
1319 else
1320 output_asm_insn ("mov.l r13,@-r15", 0);
1321 output_asm_insn (jump, &this.lab);
1322 if (TARGET_SH5)
1323 output_asm_insn ("sts macl, r13", 0);
1324 else
1325 output_asm_insn ("mov.l @r15+,r13", 0);
1326 }
1327 if (far && flag_pic && TARGET_SH2)
1328 {
1329 braf_base_lab = gen_label_rtx ();
1330 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1331 CODE_LABEL_NUMBER (braf_base_lab));
1332 }
1333 if (far)
1334 output_asm_insn (".align 2", 0);
1335 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1336 this.op = op;
1337 if (far && flag_pic)
1338 {
1339 if (TARGET_SH2)
1340 this.lab = braf_base_lab;
1341 output_asm_insn (".long %O2-%O0", &this.lab);
1342 }
1343 else
1344 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1345 return "";
1346 }
1347
1348 /* Local label counter, used for constants in the pool and inside
1349 pattern branches. */
1350
1351 static int lf = 100;
1352
1353 /* Output code for ordinary branches. */
1354
1355 const char *
1356 output_branch (int logic, rtx insn, rtx *operands)
1357 {
1358 switch (get_attr_length (insn))
1359 {
1360 case 6:
1361 /* This can happen if filling the delay slot has caused a forward
1362 branch to exceed its range (we could reverse it, but only
1363 when we know we won't overextend other branches; this should
1364 best be handled by relaxation).
1365 It can also happen when other condbranches hoist delay slot insn
1366 from their destination, thus leading to code size increase.
1367 But the branch will still be in the range -4092..+4098 bytes. */
1368
1369 if (! TARGET_RELAX)
1370 {
1371 int label = lf++;
1372 /* The call to print_slot will clobber the operands. */
1373 rtx op0 = operands[0];
1374
1375 /* If the instruction in the delay slot is annulled (true), then
1376 there is no delay slot where we can put it now. The only safe
1377 place for it is after the label. final will do that by default. */
1378
1379 if (final_sequence
1380 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1381 {
1382 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1383 ASSEMBLER_DIALECT ? "/" : ".", label);
1384 print_slot (final_sequence);
1385 }
1386 else
1387 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1388
1389 output_asm_insn ("bra\t%l0", &op0);
1390 fprintf (asm_out_file, "\tnop\n");
1391 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1392
1393 return "";
1394 }
1395 /* When relaxing, handle this like a short branch. The linker
1396 will fix it up if it still doesn't fit after relaxation. */
1397 case 2:
1398 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1399
1400 /* These are for SH2e, in which we have to account for the
1401 extra nop because of the hardware bug in annulled branches. */
1402 case 8:
1403 if (! TARGET_RELAX)
1404 {
1405 int label = lf++;
1406
1407 if (final_sequence
1408 && INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1409 abort ();
1410 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1411 logic ? "f" : "t",
1412 ASSEMBLER_DIALECT ? "/" : ".", label);
1413 fprintf (asm_out_file, "\tnop\n");
1414 output_asm_insn ("bra\t%l0", operands);
1415 fprintf (asm_out_file, "\tnop\n");
1416 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1417
1418 return "";
1419 }
1420 /* When relaxing, fall through. */
1421 case 4:
1422 {
1423 char buffer[10];
1424
1425 sprintf (buffer, "b%s%ss\t%%l0",
1426 logic ? "t" : "f",
1427 ASSEMBLER_DIALECT ? "/" : ".");
1428 output_asm_insn (buffer, &operands[0]);
1429 return "nop";
1430 }
1431
1432 default:
1433 /* There should be no longer branches now - that would
1434 indicate that something has destroyed the branches set
1435 up in machine_dependent_reorg. */
1436 abort ();
1437 }
1438 }
1439
1440 const char *
1441 output_branchy_insn (enum rtx_code code, const char *template,
1442 rtx insn, rtx *operands)
1443 {
1444 rtx next_insn = NEXT_INSN (insn);
1445
1446 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1447 {
1448 rtx src = SET_SRC (PATTERN (next_insn));
1449 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1450 {
1451 /* Following branch not taken */
1452 operands[9] = gen_label_rtx ();
1453 emit_label_after (operands[9], next_insn);
1454 INSN_ADDRESSES_NEW (operands[9],
1455 INSN_ADDRESSES (INSN_UID (next_insn))
1456 + get_attr_length (next_insn));
1457 return template;
1458 }
1459 else
1460 {
1461 int offset = (branch_dest (next_insn)
1462 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1463 if (offset >= -252 && offset <= 258)
1464 {
1465 if (GET_CODE (src) == IF_THEN_ELSE)
1466 /* branch_true */
1467 src = XEXP (src, 1);
1468 operands[9] = src;
1469 return template;
1470 }
1471 }
1472 }
1473 operands[9] = gen_label_rtx ();
1474 emit_label_after (operands[9], insn);
1475 INSN_ADDRESSES_NEW (operands[9],
1476 INSN_ADDRESSES (INSN_UID (insn))
1477 + get_attr_length (insn));
1478 return template;
1479 }
1480
1481 const char *
1482 output_ieee_ccmpeq (rtx insn, rtx *operands)
1483 {
1484 return output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
1485 }
1486 \f
1487 /* Output the start of the assembler file. */
1488
1489 static void
1490 sh_file_start (void)
1491 {
1492 default_file_start ();
1493
1494 #ifdef SYMBIAN
1495 /* Declare the .directive section before it is used. */
1496 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
1497 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
1498 #endif
1499
1500 if (TARGET_ELF)
1501 /* We need to show the text section with the proper
1502 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1503 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
1504 will complain. We can teach GAS specifically about the
1505 default attributes for our choice of text section, but
1506 then we would have to change GAS again if/when we change
1507 the text section name. */
1508 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
1509 else
1510 /* Switch to the data section so that the coffsem symbol
1511 isn't in the text section. */
1512 data_section ();
1513
1514 if (TARGET_LITTLE_ENDIAN)
1515 fputs ("\t.little\n", asm_out_file);
1516
1517 if (!TARGET_ELF)
1518 {
1519 if (TARGET_SHCOMPACT)
1520 fputs ("\t.mode\tSHcompact\n", asm_out_file);
1521 else if (TARGET_SHMEDIA)
1522 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1523 TARGET_SHMEDIA64 ? 64 : 32);
1524 }
1525 }
1526 \f
1527 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1528
1529 static bool
1530 unspec_caller_rtx_p (rtx pat)
1531 {
1532 switch (GET_CODE (pat))
1533 {
1534 case CONST:
1535 return unspec_caller_rtx_p (XEXP (pat, 0));
1536 case PLUS:
1537 case MINUS:
1538 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1539 return true;
1540 return unspec_caller_rtx_p (XEXP (pat, 1));
1541 case UNSPEC:
1542 if (XINT (pat, 1) == UNSPEC_CALLER)
1543 return true;
1544 default:
1545 break;
1546 }
1547
1548 return false;
1549 }
1550
1551 /* Indicate that INSN cannot be duplicated. This is true for insn
1552 that generates an unique label. */
1553
1554 static bool
1555 sh_cannot_copy_insn_p (rtx insn)
1556 {
1557 rtx pat;
1558
1559 if (!reload_completed || !flag_pic)
1560 return false;
1561
1562 if (GET_CODE (insn) != INSN)
1563 return false;
1564 if (asm_noperands (insn) >= 0)
1565 return false;
1566
1567 pat = PATTERN (insn);
1568 if (GET_CODE (pat) != SET)
1569 return false;
1570 pat = SET_SRC (pat);
1571
1572 if (unspec_caller_rtx_p (pat))
1573 return true;
1574
1575 return false;
1576 }
1577 \f
1578 /* Actual number of instructions used to make a shift by N. */
1579 static const char ashiftrt_insns[] =
1580 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1581
1582 /* Left shift and logical right shift are the same. */
1583 static const char shift_insns[] =
1584 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1585
1586 /* Individual shift amounts needed to get the above length sequences.
1587 One bit right shifts clobber the T bit, so when possible, put one bit
1588 shifts in the middle of the sequence, so the ends are eligible for
1589 branch delay slots. */
1590 static const short shift_amounts[32][5] = {
1591 {0}, {1}, {2}, {2, 1},
1592 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1593 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1594 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1595 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1596 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1597 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1598 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1599
1600 /* Likewise, but for shift amounts < 16, up to three highmost bits
1601 might be clobbered. This is typically used when combined with some
1602 kind of sign or zero extension. */
1603
1604 static const char ext_shift_insns[] =
1605 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1606
1607 static const short ext_shift_amounts[32][4] = {
1608 {0}, {1}, {2}, {2, 1},
1609 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1610 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1611 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1612 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1613 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1614 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1615 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1616
1617 /* Assuming we have a value that has been sign-extended by at least one bit,
1618 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1619 to shift it by N without data loss, and quicker than by other means? */
1620 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1621
1622 /* This is used in length attributes in sh.md to help compute the length
1623 of arbitrary constant shift instructions. */
1624
1625 int
1626 shift_insns_rtx (rtx insn)
1627 {
1628 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1629 int shift_count = INTVAL (XEXP (set_src, 1));
1630 enum rtx_code shift_code = GET_CODE (set_src);
1631
1632 switch (shift_code)
1633 {
1634 case ASHIFTRT:
1635 return ashiftrt_insns[shift_count];
1636 case LSHIFTRT:
1637 case ASHIFT:
1638 return shift_insns[shift_count];
1639 default:
1640 abort ();
1641 }
1642 }
1643
1644 /* Return the cost of a shift. */
1645
1646 static inline int
1647 shiftcosts (rtx x)
1648 {
1649 int value;
1650
1651 if (TARGET_SHMEDIA)
1652 return 1;
1653
1654 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1655 {
1656 if (GET_MODE (x) == DImode
1657 && GET_CODE (XEXP (x, 1)) == CONST_INT
1658 && INTVAL (XEXP (x, 1)) == 1)
1659 return 2;
1660
1661 /* Everything else is invalid, because there is no pattern for it. */
1662 return 10000;
1663 }
1664 /* If shift by a non constant, then this will be expensive. */
1665 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1666 return SH_DYNAMIC_SHIFT_COST;
1667
1668 value = INTVAL (XEXP (x, 1));
1669
1670 /* Otherwise, return the true cost in instructions. */
1671 if (GET_CODE (x) == ASHIFTRT)
1672 {
1673 int cost = ashiftrt_insns[value];
1674 /* If SH3, then we put the constant in a reg and use shad. */
1675 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1676 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1677 return cost;
1678 }
1679 else
1680 return shift_insns[value];
1681 }
1682
1683 /* Return the cost of an AND operation. */
1684
1685 static inline int
1686 andcosts (rtx x)
1687 {
1688 int i;
1689
1690 /* Anding with a register is a single cycle and instruction. */
1691 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1692 return 1;
1693
1694 i = INTVAL (XEXP (x, 1));
1695
1696 if (TARGET_SHMEDIA)
1697 {
1698 if ((GET_CODE (XEXP (x, 1)) == CONST_INT
1699 && CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1700 || EXTRA_CONSTRAINT_C16 (XEXP (x, 1)))
1701 return 1;
1702 else
1703 return 2;
1704 }
1705
1706 /* These constants are single cycle extu.[bw] instructions. */
1707 if (i == 0xff || i == 0xffff)
1708 return 1;
1709 /* Constants that can be used in an and immediate instruction in a single
1710 cycle, but this requires r0, so make it a little more expensive. */
1711 if (CONST_OK_FOR_K08 (i))
1712 return 2;
1713 /* Constants that can be loaded with a mov immediate and an and.
1714 This case is probably unnecessary. */
1715 if (CONST_OK_FOR_I08 (i))
1716 return 2;
1717 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1718 This case is probably unnecessary. */
1719 return 3;
1720 }
1721
1722 /* Return the cost of an addition or a subtraction. */
1723
1724 static inline int
1725 addsubcosts (rtx x)
1726 {
1727 /* Adding a register is a single cycle insn. */
1728 if (GET_CODE (XEXP (x, 1)) == REG
1729 || GET_CODE (XEXP (x, 1)) == SUBREG)
1730 return 1;
1731
1732 /* Likewise for small constants. */
1733 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1734 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1735 return 1;
1736
1737 if (TARGET_SHMEDIA)
1738 switch (GET_CODE (XEXP (x, 1)))
1739 {
1740 case CONST:
1741 case LABEL_REF:
1742 case SYMBOL_REF:
1743 return TARGET_SHMEDIA64 ? 5 : 3;
1744
1745 case CONST_INT:
1746 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1747 return 2;
1748 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
1749 return 3;
1750 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
1751 return 4;
1752
1753 /* Fall through. */
1754 default:
1755 return 5;
1756 }
1757
1758 /* Any other constant requires a 2 cycle pc-relative load plus an
1759 addition. */
1760 return 3;
1761 }
1762
1763 /* Return the cost of a multiply. */
1764 static inline int
1765 multcosts (rtx x ATTRIBUTE_UNUSED)
1766 {
1767 if (TARGET_SHMEDIA)
1768 return 3;
1769
1770 if (TARGET_SH2)
1771 {
1772 /* We have a mul insn, so we can never take more than the mul and the
1773 read of the mac reg, but count more because of the latency and extra
1774 reg usage. */
1775 if (TARGET_SMALLCODE)
1776 return 2;
1777 return 3;
1778 }
1779
1780 /* If we're aiming at small code, then just count the number of
1781 insns in a multiply call sequence. */
1782 if (TARGET_SMALLCODE)
1783 return 5;
1784
1785 /* Otherwise count all the insns in the routine we'd be calling too. */
1786 return 20;
1787 }
1788
1789 /* Compute a (partial) cost for rtx X. Return true if the complete
1790 cost has been computed, and false if subexpressions should be
1791 scanned. In either case, *TOTAL contains the cost result. */
1792
1793 static bool
1794 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
1795 {
1796 switch (code)
1797 {
1798 case CONST_INT:
1799 if (TARGET_SHMEDIA)
1800 {
1801 if (INTVAL (x) == 0)
1802 *total = 0;
1803 else if (outer_code == AND && and_operand ((x), DImode))
1804 *total = 0;
1805 else if ((outer_code == IOR || outer_code == XOR
1806 || outer_code == PLUS)
1807 && CONST_OK_FOR_I10 (INTVAL (x)))
1808 *total = 0;
1809 else if (CONST_OK_FOR_I16 (INTVAL (x)))
1810 *total = COSTS_N_INSNS (outer_code != SET);
1811 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
1812 *total = COSTS_N_INSNS (2);
1813 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
1814 *total = COSTS_N_INSNS (3);
1815 else
1816 *total = COSTS_N_INSNS (4);
1817 return true;
1818 }
1819 if (CONST_OK_FOR_I08 (INTVAL (x)))
1820 *total = 0;
1821 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
1822 && CONST_OK_FOR_K08 (INTVAL (x)))
1823 *total = 1;
1824 else
1825 *total = 8;
1826 return true;
1827
1828 case CONST:
1829 case LABEL_REF:
1830 case SYMBOL_REF:
1831 if (TARGET_SHMEDIA64)
1832 *total = COSTS_N_INSNS (4);
1833 else if (TARGET_SHMEDIA32)
1834 *total = COSTS_N_INSNS (2);
1835 else
1836 *total = 5;
1837 return true;
1838
1839 case CONST_DOUBLE:
1840 if (TARGET_SHMEDIA)
1841 *total = COSTS_N_INSNS (4);
1842 else
1843 *total = 10;
1844 return true;
1845
1846 case PLUS:
1847 *total = COSTS_N_INSNS (addsubcosts (x));
1848 return true;
1849
1850 case AND:
1851 *total = COSTS_N_INSNS (andcosts (x));
1852 return true;
1853
1854 case MULT:
1855 *total = COSTS_N_INSNS (multcosts (x));
1856 return true;
1857
1858 case ASHIFT:
1859 case ASHIFTRT:
1860 case LSHIFTRT:
1861 *total = COSTS_N_INSNS (shiftcosts (x));
1862 return true;
1863
1864 case DIV:
1865 case UDIV:
1866 case MOD:
1867 case UMOD:
1868 *total = COSTS_N_INSNS (20);
1869 return true;
1870
1871 case FLOAT:
1872 case FIX:
1873 *total = 100;
1874 return true;
1875
1876 default:
1877 return false;
1878 }
1879 }
1880
1881 /* Compute the cost of an address. For the SH, all valid addresses are
1882 the same cost. Use a slightly higher cost for reg + reg addressing,
1883 since it increases pressure on r0. */
1884
1885 static int
1886 sh_address_cost (rtx X)
1887 {
1888 return (GET_CODE (X) == PLUS
1889 && ! CONSTANT_P (XEXP (X, 1))
1890 && ! TARGET_SHMEDIA ? 1 : 0);
1891 }
1892
1893 /* Code to expand a shift. */
1894
1895 void
1896 gen_ashift (int type, int n, rtx reg)
1897 {
1898 /* Negative values here come from the shift_amounts array. */
1899 if (n < 0)
1900 {
1901 if (type == ASHIFT)
1902 type = LSHIFTRT;
1903 else
1904 type = ASHIFT;
1905 n = -n;
1906 }
1907
1908 switch (type)
1909 {
1910 case ASHIFTRT:
1911 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
1912 break;
1913 case LSHIFTRT:
1914 if (n == 1)
1915 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
1916 else
1917 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
1918 break;
1919 case ASHIFT:
1920 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
1921 break;
1922 }
1923 }
1924
1925 /* Same for HImode */
1926
1927 void
1928 gen_ashift_hi (int type, int n, rtx reg)
1929 {
1930 /* Negative values here come from the shift_amounts array. */
1931 if (n < 0)
1932 {
1933 if (type == ASHIFT)
1934 type = LSHIFTRT;
1935 else
1936 type = ASHIFT;
1937 n = -n;
1938 }
1939
1940 switch (type)
1941 {
1942 case ASHIFTRT:
1943 case LSHIFTRT:
1944 /* We don't have HImode right shift operations because using the
1945 ordinary 32 bit shift instructions for that doesn't generate proper
1946 zero/sign extension.
1947 gen_ashift_hi is only called in contexts where we know that the
1948 sign extension works out correctly. */
1949 {
1950 int offset = 0;
1951 if (GET_CODE (reg) == SUBREG)
1952 {
1953 offset = SUBREG_BYTE (reg);
1954 reg = SUBREG_REG (reg);
1955 }
1956 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
1957 break;
1958 }
1959 case ASHIFT:
1960 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
1961 break;
1962 }
1963 }
1964
1965 /* Output RTL to split a constant shift into its component SH constant
1966 shift instructions. */
1967
1968 void
1969 gen_shifty_op (int code, rtx *operands)
1970 {
1971 int value = INTVAL (operands[2]);
1972 int max, i;
1973
1974 /* Truncate the shift count in case it is out of bounds. */
1975 value = value & 0x1f;
1976
1977 if (value == 31)
1978 {
1979 if (code == LSHIFTRT)
1980 {
1981 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
1982 emit_insn (gen_movt (operands[0]));
1983 return;
1984 }
1985 else if (code == ASHIFT)
1986 {
1987 /* There is a two instruction sequence for 31 bit left shifts,
1988 but it requires r0. */
1989 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
1990 {
1991 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
1992 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
1993 return;
1994 }
1995 }
1996 }
1997 else if (value == 0)
1998 {
1999 /* This can happen when not optimizing. We must output something here
2000 to prevent the compiler from aborting in final.c after the try_split
2001 call. */
2002 emit_insn (gen_nop ());
2003 return;
2004 }
2005
2006 max = shift_insns[value];
2007 for (i = 0; i < max; i++)
2008 gen_ashift (code, shift_amounts[value][i], operands[0]);
2009 }
2010
2011 /* Same as above, but optimized for values where the topmost bits don't
2012 matter. */
2013
2014 void
2015 gen_shifty_hi_op (int code, rtx *operands)
2016 {
2017 int value = INTVAL (operands[2]);
2018 int max, i;
2019 void (*gen_fun) (int, int, rtx);
2020
2021 /* This operation is used by and_shl for SImode values with a few
2022 high bits known to be cleared. */
2023 value &= 31;
2024 if (value == 0)
2025 {
2026 emit_insn (gen_nop ());
2027 return;
2028 }
2029
2030 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2031 if (code == ASHIFT)
2032 {
2033 max = ext_shift_insns[value];
2034 for (i = 0; i < max; i++)
2035 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2036 }
2037 else
2038 /* When shifting right, emit the shifts in reverse order, so that
2039 solitary negative values come first. */
2040 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2041 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2042 }
2043
2044 /* Output RTL for an arithmetic right shift. */
2045
2046 /* ??? Rewrite to use super-optimizer sequences. */
2047
2048 int
2049 expand_ashiftrt (rtx *operands)
2050 {
2051 rtx sym;
2052 rtx wrk;
2053 char func[18];
2054 tree func_name;
2055 int value;
2056
2057 if (TARGET_SH3)
2058 {
2059 if (GET_CODE (operands[2]) != CONST_INT)
2060 {
2061 rtx count = copy_to_mode_reg (SImode, operands[2]);
2062 emit_insn (gen_negsi2 (count, count));
2063 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2064 return 1;
2065 }
2066 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2067 > 1 + SH_DYNAMIC_SHIFT_COST)
2068 {
2069 rtx count
2070 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2071 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2072 return 1;
2073 }
2074 }
2075 if (GET_CODE (operands[2]) != CONST_INT)
2076 return 0;
2077
2078 value = INTVAL (operands[2]) & 31;
2079
2080 if (value == 31)
2081 {
2082 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2083 return 1;
2084 }
2085 else if (value >= 16 && value <= 19)
2086 {
2087 wrk = gen_reg_rtx (SImode);
2088 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2089 value -= 16;
2090 while (value--)
2091 gen_ashift (ASHIFTRT, 1, wrk);
2092 emit_move_insn (operands[0], wrk);
2093 return 1;
2094 }
2095 /* Expand a short sequence inline, longer call a magic routine. */
2096 else if (value <= 5)
2097 {
2098 wrk = gen_reg_rtx (SImode);
2099 emit_move_insn (wrk, operands[1]);
2100 while (value--)
2101 gen_ashift (ASHIFTRT, 1, wrk);
2102 emit_move_insn (operands[0], wrk);
2103 return 1;
2104 }
2105
2106 wrk = gen_reg_rtx (Pmode);
2107
2108 /* Load the value into an arg reg and call a helper. */
2109 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2110 sprintf (func, "__ashiftrt_r4_%d", value);
2111 func_name = get_identifier (func);
2112 sym = function_symbol (IDENTIFIER_POINTER (func_name));
2113 emit_move_insn (wrk, sym);
2114 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2115 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2116 return 1;
2117 }
2118
2119 int
2120 sh_dynamicalize_shift_p (rtx count)
2121 {
2122 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2123 }
2124
2125 /* Try to find a good way to implement the combiner pattern
2126 [(set (match_operand:SI 0 "register_operand" "r")
2127 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2128 (match_operand:SI 2 "const_int_operand" "n"))
2129 (match_operand:SI 3 "const_int_operand" "n"))) .
2130 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2131 return 0 for simple right / left or left/right shift combination.
2132 return 1 for a combination of shifts with zero_extend.
2133 return 2 for a combination of shifts with an AND that needs r0.
2134 return 3 for a combination of shifts with an AND that needs an extra
2135 scratch register, when the three highmost bits of the AND mask are clear.
2136 return 4 for a combination of shifts with an AND that needs an extra
2137 scratch register, when any of the three highmost bits of the AND mask
2138 is set.
2139 If ATTRP is set, store an initial right shift width in ATTRP[0],
2140 and the instruction length in ATTRP[1] . These values are not valid
2141 when returning 0.
2142 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2143 shift_amounts for the last shift value that is to be used before the
2144 sign extend. */
2145 int
2146 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2147 {
2148 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2149 int left = INTVAL (left_rtx), right;
2150 int best = 0;
2151 int cost, best_cost = 10000;
2152 int best_right = 0, best_len = 0;
2153 int i;
2154 int can_ext;
2155
2156 if (left < 0 || left > 31)
2157 return 0;
2158 if (GET_CODE (mask_rtx) == CONST_INT)
2159 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2160 else
2161 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2162 /* Can this be expressed as a right shift / left shift pair? */
2163 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2164 right = exact_log2 (lsb);
2165 mask2 = ~(mask + lsb - 1);
2166 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2167 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2168 if (! mask2)
2169 best_cost = shift_insns[right] + shift_insns[right + left];
2170 /* mask has no trailing zeroes <==> ! right */
2171 else if (! right && mask2 == ~(lsb2 - 1))
2172 {
2173 int late_right = exact_log2 (lsb2);
2174 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2175 }
2176 /* Try to use zero extend. */
2177 if (mask2 == ~(lsb2 - 1))
2178 {
2179 int width, first;
2180
2181 for (width = 8; width <= 16; width += 8)
2182 {
2183 /* Can we zero-extend right away? */
2184 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2185 {
2186 cost
2187 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2188 if (cost < best_cost)
2189 {
2190 best = 1;
2191 best_cost = cost;
2192 best_right = right;
2193 best_len = cost;
2194 if (attrp)
2195 attrp[2] = -1;
2196 }
2197 continue;
2198 }
2199 /* ??? Could try to put zero extend into initial right shift,
2200 or even shift a bit left before the right shift. */
2201 /* Determine value of first part of left shift, to get to the
2202 zero extend cut-off point. */
2203 first = width - exact_log2 (lsb2) + right;
2204 if (first >= 0 && right + left - first >= 0)
2205 {
2206 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2207 + ext_shift_insns[right + left - first];
2208 if (cost < best_cost)
2209 {
2210 best = 1;
2211 best_cost = cost;
2212 best_right = right;
2213 best_len = cost;
2214 if (attrp)
2215 attrp[2] = first;
2216 }
2217 }
2218 }
2219 }
2220 /* Try to use r0 AND pattern */
2221 for (i = 0; i <= 2; i++)
2222 {
2223 if (i > right)
2224 break;
2225 if (! CONST_OK_FOR_K08 (mask >> i))
2226 continue;
2227 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2228 if (cost < best_cost)
2229 {
2230 best = 2;
2231 best_cost = cost;
2232 best_right = i;
2233 best_len = cost - 1;
2234 }
2235 }
2236 /* Try to use a scratch register to hold the AND operand. */
2237 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2238 for (i = 0; i <= 2; i++)
2239 {
2240 if (i > right)
2241 break;
2242 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2243 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2244 if (cost < best_cost)
2245 {
2246 best = 4 - can_ext;
2247 best_cost = cost;
2248 best_right = i;
2249 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2250 }
2251 }
2252
2253 if (attrp)
2254 {
2255 attrp[0] = best_right;
2256 attrp[1] = best_len;
2257 }
2258 return best;
2259 }
2260
2261 /* This is used in length attributes of the unnamed instructions
2262 corresponding to shl_and_kind return values of 1 and 2. */
2263 int
2264 shl_and_length (rtx insn)
2265 {
2266 rtx set_src, left_rtx, mask_rtx;
2267 int attributes[3];
2268
2269 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2270 left_rtx = XEXP (XEXP (set_src, 0), 1);
2271 mask_rtx = XEXP (set_src, 1);
2272 shl_and_kind (left_rtx, mask_rtx, attributes);
2273 return attributes[1];
2274 }
2275
2276 /* This is used in length attribute of the and_shl_scratch instruction. */
2277
2278 int
2279 shl_and_scr_length (rtx insn)
2280 {
2281 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2282 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2283 rtx op = XEXP (set_src, 0);
2284 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2285 op = XEXP (XEXP (op, 0), 0);
2286 return len + shift_insns[INTVAL (XEXP (op, 1))];
2287 }
2288
2289 /* Generating rtl? */
2290 extern int rtx_equal_function_value_matters;
2291
2292 /* Generate rtl for instructions for which shl_and_kind advised a particular
2293 method of generating them, i.e. returned zero. */
2294
2295 int
2296 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2297 {
2298 int attributes[3];
2299 unsigned HOST_WIDE_INT mask;
2300 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2301 int right, total_shift;
2302 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2303
2304 right = attributes[0];
2305 total_shift = INTVAL (left_rtx) + right;
2306 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2307 switch (kind)
2308 {
2309 default:
2310 return -1;
2311 case 1:
2312 {
2313 int first = attributes[2];
2314 rtx operands[3];
2315
2316 if (first < 0)
2317 {
2318 emit_insn ((mask << right) <= 0xff
2319 ? gen_zero_extendqisi2 (dest,
2320 gen_lowpart (QImode, source))
2321 : gen_zero_extendhisi2 (dest,
2322 gen_lowpart (HImode, source)));
2323 source = dest;
2324 }
2325 if (source != dest)
2326 emit_insn (gen_movsi (dest, source));
2327 operands[0] = dest;
2328 if (right)
2329 {
2330 operands[2] = GEN_INT (right);
2331 gen_shifty_hi_op (LSHIFTRT, operands);
2332 }
2333 if (first > 0)
2334 {
2335 operands[2] = GEN_INT (first);
2336 gen_shifty_hi_op (ASHIFT, operands);
2337 total_shift -= first;
2338 mask <<= first;
2339 }
2340 if (first >= 0)
2341 emit_insn (mask <= 0xff
2342 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2343 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2344 if (total_shift > 0)
2345 {
2346 operands[2] = GEN_INT (total_shift);
2347 gen_shifty_hi_op (ASHIFT, operands);
2348 }
2349 break;
2350 }
2351 case 4:
2352 shift_gen_fun = gen_shifty_op;
2353 case 3:
2354 /* If the topmost bit that matters is set, set the topmost bits
2355 that don't matter. This way, we might be able to get a shorter
2356 signed constant. */
2357 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2358 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2359 case 2:
2360 /* Don't expand fine-grained when combining, because that will
2361 make the pattern fail. */
2362 if (rtx_equal_function_value_matters
2363 || reload_in_progress || reload_completed)
2364 {
2365 rtx operands[3];
2366
2367 /* Cases 3 and 4 should be handled by this split
2368 only while combining */
2369 if (kind > 2)
2370 abort ();
2371 if (right)
2372 {
2373 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2374 source = dest;
2375 }
2376 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2377 if (total_shift)
2378 {
2379 operands[0] = dest;
2380 operands[1] = dest;
2381 operands[2] = GEN_INT (total_shift);
2382 shift_gen_fun (ASHIFT, operands);
2383 }
2384 break;
2385 }
2386 else
2387 {
2388 int neg = 0;
2389 if (kind != 4 && total_shift < 16)
2390 {
2391 neg = -ext_shift_amounts[total_shift][1];
2392 if (neg > 0)
2393 neg -= ext_shift_amounts[total_shift][2];
2394 else
2395 neg = 0;
2396 }
2397 emit_insn (gen_and_shl_scratch (dest, source,
2398 GEN_INT (right),
2399 GEN_INT (mask),
2400 GEN_INT (total_shift + neg),
2401 GEN_INT (neg)));
2402 emit_insn (gen_movsi (dest, dest));
2403 break;
2404 }
2405 }
2406 return 0;
2407 }
2408
2409 /* Try to find a good way to implement the combiner pattern
2410 [(set (match_operand:SI 0 "register_operand" "=r")
2411 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2412 (match_operand:SI 2 "const_int_operand" "n")
2413 (match_operand:SI 3 "const_int_operand" "n")
2414 (const_int 0)))
2415 (clobber (reg:SI T_REG))]
2416 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2417 return 0 for simple left / right shift combination.
2418 return 1 for left shift / 8 bit sign extend / left shift.
2419 return 2 for left shift / 16 bit sign extend / left shift.
2420 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2421 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2422 return 5 for left shift / 16 bit sign extend / right shift
2423 return 6 for < 8 bit sign extend / left shift.
2424 return 7 for < 8 bit sign extend / left shift / single right shift.
2425 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2426
2427 int
2428 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
2429 {
2430 int left, size, insize, ext;
2431 int cost = 0, best_cost;
2432 int kind;
2433
2434 left = INTVAL (left_rtx);
2435 size = INTVAL (size_rtx);
2436 insize = size - left;
2437 if (insize <= 0)
2438 abort ();
2439 /* Default to left / right shift. */
2440 kind = 0;
2441 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2442 if (size <= 16)
2443 {
2444 /* 16 bit shift / sign extend / 16 bit shift */
2445 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2446 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2447 below, by alternative 3 or something even better. */
2448 if (cost < best_cost)
2449 {
2450 kind = 5;
2451 best_cost = cost;
2452 }
2453 }
2454 /* Try a plain sign extend between two shifts. */
2455 for (ext = 16; ext >= insize; ext -= 8)
2456 {
2457 if (ext <= size)
2458 {
2459 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2460 if (cost < best_cost)
2461 {
2462 kind = ext / (unsigned) 8;
2463 best_cost = cost;
2464 }
2465 }
2466 /* Check if we can do a sloppy shift with a final signed shift
2467 restoring the sign. */
2468 if (EXT_SHIFT_SIGNED (size - ext))
2469 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2470 /* If not, maybe it's still cheaper to do the second shift sloppy,
2471 and do a final sign extend? */
2472 else if (size <= 16)
2473 cost = ext_shift_insns[ext - insize] + 1
2474 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2475 else
2476 continue;
2477 if (cost < best_cost)
2478 {
2479 kind = ext / (unsigned) 8 + 2;
2480 best_cost = cost;
2481 }
2482 }
2483 /* Check if we can sign extend in r0 */
2484 if (insize < 8)
2485 {
2486 cost = 3 + shift_insns[left];
2487 if (cost < best_cost)
2488 {
2489 kind = 6;
2490 best_cost = cost;
2491 }
2492 /* Try the same with a final signed shift. */
2493 if (left < 31)
2494 {
2495 cost = 3 + ext_shift_insns[left + 1] + 1;
2496 if (cost < best_cost)
2497 {
2498 kind = 7;
2499 best_cost = cost;
2500 }
2501 }
2502 }
2503 if (TARGET_SH3)
2504 {
2505 /* Try to use a dynamic shift. */
2506 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2507 if (cost < best_cost)
2508 {
2509 kind = 0;
2510 best_cost = cost;
2511 }
2512 }
2513 if (costp)
2514 *costp = cost;
2515 return kind;
2516 }
2517
2518 /* Function to be used in the length attribute of the instructions
2519 implementing this pattern. */
2520
2521 int
2522 shl_sext_length (rtx insn)
2523 {
2524 rtx set_src, left_rtx, size_rtx;
2525 int cost;
2526
2527 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2528 left_rtx = XEXP (XEXP (set_src, 0), 1);
2529 size_rtx = XEXP (set_src, 1);
2530 shl_sext_kind (left_rtx, size_rtx, &cost);
2531 return cost;
2532 }
2533
2534 /* Generate rtl for this pattern */
2535
2536 int
2537 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
2538 {
2539 int kind;
2540 int left, size, insize, cost;
2541 rtx operands[3];
2542
2543 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2544 left = INTVAL (left_rtx);
2545 size = INTVAL (size_rtx);
2546 insize = size - left;
2547 switch (kind)
2548 {
2549 case 1:
2550 case 2:
2551 case 3:
2552 case 4:
2553 {
2554 int ext = kind & 1 ? 8 : 16;
2555 int shift2 = size - ext;
2556
2557 /* Don't expand fine-grained when combining, because that will
2558 make the pattern fail. */
2559 if (! rtx_equal_function_value_matters
2560 && ! reload_in_progress && ! reload_completed)
2561 {
2562 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2563 emit_insn (gen_movsi (dest, source));
2564 break;
2565 }
2566 if (dest != source)
2567 emit_insn (gen_movsi (dest, source));
2568 operands[0] = dest;
2569 if (ext - insize)
2570 {
2571 operands[2] = GEN_INT (ext - insize);
2572 gen_shifty_hi_op (ASHIFT, operands);
2573 }
2574 emit_insn (kind & 1
2575 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2576 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2577 if (kind <= 2)
2578 {
2579 if (shift2)
2580 {
2581 operands[2] = GEN_INT (shift2);
2582 gen_shifty_op (ASHIFT, operands);
2583 }
2584 }
2585 else
2586 {
2587 if (shift2 > 0)
2588 {
2589 if (EXT_SHIFT_SIGNED (shift2))
2590 {
2591 operands[2] = GEN_INT (shift2 + 1);
2592 gen_shifty_op (ASHIFT, operands);
2593 operands[2] = const1_rtx;
2594 gen_shifty_op (ASHIFTRT, operands);
2595 break;
2596 }
2597 operands[2] = GEN_INT (shift2);
2598 gen_shifty_hi_op (ASHIFT, operands);
2599 }
2600 else if (shift2)
2601 {
2602 operands[2] = GEN_INT (-shift2);
2603 gen_shifty_hi_op (LSHIFTRT, operands);
2604 }
2605 emit_insn (size <= 8
2606 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2607 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2608 }
2609 break;
2610 }
2611 case 5:
2612 {
2613 int i = 16 - size;
2614 if (! rtx_equal_function_value_matters
2615 && ! reload_in_progress && ! reload_completed)
2616 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2617 else
2618 {
2619 operands[0] = dest;
2620 operands[2] = GEN_INT (16 - insize);
2621 gen_shifty_hi_op (ASHIFT, operands);
2622 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2623 }
2624 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2625 while (--i >= 0)
2626 gen_ashift (ASHIFTRT, 1, dest);
2627 break;
2628 }
2629 case 6:
2630 case 7:
2631 /* Don't expand fine-grained when combining, because that will
2632 make the pattern fail. */
2633 if (! rtx_equal_function_value_matters
2634 && ! reload_in_progress && ! reload_completed)
2635 {
2636 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2637 emit_insn (gen_movsi (dest, source));
2638 break;
2639 }
2640 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2641 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2642 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2643 operands[0] = dest;
2644 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2645 gen_shifty_op (ASHIFT, operands);
2646 if (kind == 7)
2647 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
2648 break;
2649 default:
2650 return -1;
2651 }
2652 return 0;
2653 }
2654
2655 /* Prefix a symbol_ref name with "datalabel". */
2656
2657 rtx
2658 gen_datalabel_ref (rtx sym)
2659 {
2660 if (GET_CODE (sym) == LABEL_REF)
2661 return gen_rtx_CONST (GET_MODE (sym),
2662 gen_rtx_UNSPEC (GET_MODE (sym),
2663 gen_rtvec (1, sym),
2664 UNSPEC_DATALABEL));
2665
2666 if (GET_CODE (sym) != SYMBOL_REF)
2667 abort ();
2668
2669 return sym;
2670 }
2671
2672 \f
2673 /* The SH cannot load a large constant into a register, constants have to
2674 come from a pc relative load. The reference of a pc relative load
2675 instruction must be less than 1k infront of the instruction. This
2676 means that we often have to dump a constant inside a function, and
2677 generate code to branch around it.
2678
2679 It is important to minimize this, since the branches will slow things
2680 down and make things bigger.
2681
2682 Worst case code looks like:
2683
2684 mov.l L1,rn
2685 bra L2
2686 nop
2687 align
2688 L1: .long value
2689 L2:
2690 ..
2691
2692 mov.l L3,rn
2693 bra L4
2694 nop
2695 align
2696 L3: .long value
2697 L4:
2698 ..
2699
2700 We fix this by performing a scan before scheduling, which notices which
2701 instructions need to have their operands fetched from the constant table
2702 and builds the table.
2703
2704 The algorithm is:
2705
2706 scan, find an instruction which needs a pcrel move. Look forward, find the
2707 last barrier which is within MAX_COUNT bytes of the requirement.
2708 If there isn't one, make one. Process all the instructions between
2709 the find and the barrier.
2710
2711 In the above example, we can tell that L3 is within 1k of L1, so
2712 the first move can be shrunk from the 3 insn+constant sequence into
2713 just 1 insn, and the constant moved to L3 to make:
2714
2715 mov.l L1,rn
2716 ..
2717 mov.l L3,rn
2718 bra L4
2719 nop
2720 align
2721 L3:.long value
2722 L4:.long value
2723
2724 Then the second move becomes the target for the shortening process. */
2725
2726 typedef struct
2727 {
2728 rtx value; /* Value in table. */
2729 rtx label; /* Label of value. */
2730 rtx wend; /* End of window. */
2731 enum machine_mode mode; /* Mode of value. */
2732
2733 /* True if this constant is accessed as part of a post-increment
2734 sequence. Note that HImode constants are never accessed in this way. */
2735 bool part_of_sequence_p;
2736 } pool_node;
2737
2738 /* The maximum number of constants that can fit into one pool, since
2739 the pc relative range is 0...1020 bytes and constants are at least 4
2740 bytes long. */
2741
2742 #define MAX_POOL_SIZE (1020/4)
2743 static pool_node pool_vector[MAX_POOL_SIZE];
2744 static int pool_size;
2745 static rtx pool_window_label;
2746 static int pool_window_last;
2747
2748 /* ??? If we need a constant in HImode which is the truncated value of a
2749 constant we need in SImode, we could combine the two entries thus saving
2750 two bytes. Is this common enough to be worth the effort of implementing
2751 it? */
2752
2753 /* ??? This stuff should be done at the same time that we shorten branches.
2754 As it is now, we must assume that all branches are the maximum size, and
2755 this causes us to almost always output constant pools sooner than
2756 necessary. */
2757
2758 /* Add a constant to the pool and return its label. */
2759
2760 static rtx
2761 add_constant (rtx x, enum machine_mode mode, rtx last_value)
2762 {
2763 int i;
2764 rtx lab, new, ref, newref;
2765
2766 /* First see if we've already got it. */
2767 for (i = 0; i < pool_size; i++)
2768 {
2769 if (x->code == pool_vector[i].value->code
2770 && mode == pool_vector[i].mode)
2771 {
2772 if (x->code == CODE_LABEL)
2773 {
2774 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
2775 continue;
2776 }
2777 if (rtx_equal_p (x, pool_vector[i].value))
2778 {
2779 lab = new = 0;
2780 if (! last_value
2781 || ! i
2782 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
2783 {
2784 new = gen_label_rtx ();
2785 LABEL_REFS (new) = pool_vector[i].label;
2786 pool_vector[i].label = lab = new;
2787 }
2788 if (lab && pool_window_label)
2789 {
2790 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2791 ref = pool_vector[pool_window_last].wend;
2792 LABEL_NEXTREF (newref) = ref;
2793 pool_vector[pool_window_last].wend = newref;
2794 }
2795 if (new)
2796 pool_window_label = new;
2797 pool_window_last = i;
2798 return lab;
2799 }
2800 }
2801 }
2802
2803 /* Need a new one. */
2804 pool_vector[pool_size].value = x;
2805 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
2806 {
2807 lab = 0;
2808 pool_vector[pool_size - 1].part_of_sequence_p = true;
2809 }
2810 else
2811 lab = gen_label_rtx ();
2812 pool_vector[pool_size].mode = mode;
2813 pool_vector[pool_size].label = lab;
2814 pool_vector[pool_size].wend = NULL_RTX;
2815 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
2816 if (lab && pool_window_label)
2817 {
2818 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2819 ref = pool_vector[pool_window_last].wend;
2820 LABEL_NEXTREF (newref) = ref;
2821 pool_vector[pool_window_last].wend = newref;
2822 }
2823 if (lab)
2824 pool_window_label = lab;
2825 pool_window_last = pool_size;
2826 pool_size++;
2827 return lab;
2828 }
2829
2830 /* Output the literal table. START, if nonzero, is the first instruction
2831 this table is needed for, and also indicates that there is at least one
2832 casesi_worker_2 instruction; We have to emit the operand3 labels from
2833 these insns at a 4-byte aligned position. BARRIER is the barrier
2834 after which we are to place the table. */
2835
2836 static void
2837 dump_table (rtx start, rtx barrier)
2838 {
2839 rtx scan = barrier;
2840 int i;
2841 int need_align = 1;
2842 rtx lab, ref;
2843 int have_df = 0;
2844
2845 /* Do two passes, first time dump out the HI sized constants. */
2846
2847 for (i = 0; i < pool_size; i++)
2848 {
2849 pool_node *p = &pool_vector[i];
2850
2851 if (p->mode == HImode)
2852 {
2853 if (need_align)
2854 {
2855 scan = emit_insn_after (gen_align_2 (), scan);
2856 need_align = 0;
2857 }
2858 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2859 scan = emit_label_after (lab, scan);
2860 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
2861 scan);
2862 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2863 {
2864 lab = XEXP (ref, 0);
2865 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2866 }
2867 }
2868 else if (p->mode == DFmode)
2869 have_df = 1;
2870 }
2871
2872 need_align = 1;
2873
2874 if (start)
2875 {
2876 scan = emit_insn_after (gen_align_4 (), scan);
2877 need_align = 0;
2878 for (; start != barrier; start = NEXT_INSN (start))
2879 if (GET_CODE (start) == INSN
2880 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
2881 {
2882 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
2883 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
2884
2885 scan = emit_label_after (lab, scan);
2886 }
2887 }
2888 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
2889 {
2890 rtx align_insn = NULL_RTX;
2891
2892 scan = emit_label_after (gen_label_rtx (), scan);
2893 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2894 need_align = 0;
2895
2896 for (i = 0; i < pool_size; i++)
2897 {
2898 pool_node *p = &pool_vector[i];
2899
2900 switch (p->mode)
2901 {
2902 case HImode:
2903 break;
2904 case SImode:
2905 case SFmode:
2906 if (align_insn && !p->part_of_sequence_p)
2907 {
2908 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2909 emit_label_before (lab, align_insn);
2910 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
2911 align_insn);
2912 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2913 {
2914 lab = XEXP (ref, 0);
2915 emit_insn_before (gen_consttable_window_end (lab),
2916 align_insn);
2917 }
2918 delete_insn (align_insn);
2919 align_insn = NULL_RTX;
2920 continue;
2921 }
2922 else
2923 {
2924 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2925 scan = emit_label_after (lab, scan);
2926 scan = emit_insn_after (gen_consttable_4 (p->value,
2927 const0_rtx), scan);
2928 need_align = ! need_align;
2929 }
2930 break;
2931 case DFmode:
2932 if (need_align)
2933 {
2934 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2935 align_insn = scan;
2936 need_align = 0;
2937 }
2938 case DImode:
2939 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2940 scan = emit_label_after (lab, scan);
2941 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2942 scan);
2943 break;
2944 default:
2945 abort ();
2946 break;
2947 }
2948
2949 if (p->mode != HImode)
2950 {
2951 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2952 {
2953 lab = XEXP (ref, 0);
2954 scan = emit_insn_after (gen_consttable_window_end (lab),
2955 scan);
2956 }
2957 }
2958 }
2959
2960 pool_size = 0;
2961 }
2962
2963 for (i = 0; i < pool_size; i++)
2964 {
2965 pool_node *p = &pool_vector[i];
2966
2967 switch (p->mode)
2968 {
2969 case HImode:
2970 break;
2971 case SImode:
2972 case SFmode:
2973 if (need_align)
2974 {
2975 need_align = 0;
2976 scan = emit_label_after (gen_label_rtx (), scan);
2977 scan = emit_insn_after (gen_align_4 (), scan);
2978 }
2979 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2980 scan = emit_label_after (lab, scan);
2981 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
2982 scan);
2983 break;
2984 case DFmode:
2985 case DImode:
2986 if (need_align)
2987 {
2988 need_align = 0;
2989 scan = emit_label_after (gen_label_rtx (), scan);
2990 scan = emit_insn_after (gen_align_4 (), scan);
2991 }
2992 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2993 scan = emit_label_after (lab, scan);
2994 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2995 scan);
2996 break;
2997 default:
2998 abort ();
2999 break;
3000 }
3001
3002 if (p->mode != HImode)
3003 {
3004 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3005 {
3006 lab = XEXP (ref, 0);
3007 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3008 }
3009 }
3010 }
3011
3012 scan = emit_insn_after (gen_consttable_end (), scan);
3013 scan = emit_barrier_after (scan);
3014 pool_size = 0;
3015 pool_window_label = NULL_RTX;
3016 pool_window_last = 0;
3017 }
3018
3019 /* Return nonzero if constant would be an ok source for a
3020 mov.w instead of a mov.l. */
3021
3022 static int
3023 hi_const (rtx src)
3024 {
3025 return (GET_CODE (src) == CONST_INT
3026 && INTVAL (src) >= -32768
3027 && INTVAL (src) <= 32767);
3028 }
3029
3030 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3031
3032 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3033 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3034 need to fix it if the input value is CONST_OK_FOR_I08. */
3035
3036 static int
3037 broken_move (rtx insn)
3038 {
3039 if (GET_CODE (insn) == INSN)
3040 {
3041 rtx pat = PATTERN (insn);
3042 if (GET_CODE (pat) == PARALLEL)
3043 pat = XVECEXP (pat, 0, 0);
3044 if (GET_CODE (pat) == SET
3045 /* We can load any 8 bit value if we don't care what the high
3046 order bits end up as. */
3047 && GET_MODE (SET_DEST (pat)) != QImode
3048 && (CONSTANT_P (SET_SRC (pat))
3049 /* Match mova_const. */
3050 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3051 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3052 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3053 && ! (TARGET_SH2E
3054 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3055 && (fp_zero_operand (SET_SRC (pat))
3056 || fp_one_operand (SET_SRC (pat)))
3057 /* ??? If this is a -m4 or -m4-single compilation, in general
3058 we don't know the current setting of fpscr, so disable fldi.
3059 There is an exception if this was a register-register move
3060 before reload - and hence it was ascertained that we have
3061 single precision setting - and in a post-reload optimization
3062 we changed this to do a constant load. In that case
3063 we don't have an r0 clobber, hence we must use fldi. */
3064 && (! TARGET_SH4 || TARGET_FMOVD
3065 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3066 == SCRATCH))
3067 && GET_CODE (SET_DEST (pat)) == REG
3068 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3069 && ! (TARGET_SH2A
3070 && GET_MODE (SET_DEST (pat)) == SImode
3071 && GET_CODE (SET_SRC (pat)) == CONST_INT
3072 && CONST_OK_FOR_I20 (INTVAL (SET_SRC (pat))))
3073 && (GET_CODE (SET_SRC (pat)) != CONST_INT
3074 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
3075 return 1;
3076 }
3077
3078 return 0;
3079 }
3080
3081 static int
3082 mova_p (rtx insn)
3083 {
3084 return (GET_CODE (insn) == INSN
3085 && GET_CODE (PATTERN (insn)) == SET
3086 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3087 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3088 /* Don't match mova_const. */
3089 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
3090 }
3091
3092 /* Fix up a mova from a switch that went out of range. */
3093 static void
3094 fixup_mova (rtx mova)
3095 {
3096 if (! flag_pic)
3097 {
3098 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3099 INSN_CODE (mova) = -1;
3100 }
3101 else
3102 {
3103 rtx worker = mova;
3104 rtx lab = gen_label_rtx ();
3105 rtx wpat, wpat0, wpat1, wsrc, diff;
3106
3107 do
3108 {
3109 worker = NEXT_INSN (worker);
3110 if (! worker
3111 || GET_CODE (worker) == CODE_LABEL
3112 || GET_CODE (worker) == JUMP_INSN)
3113 abort ();
3114 } while (recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3115 wpat = PATTERN (worker);
3116 wpat0 = XVECEXP (wpat, 0, 0);
3117 wpat1 = XVECEXP (wpat, 0, 1);
3118 wsrc = SET_SRC (wpat0);
3119 PATTERN (worker) = (gen_casesi_worker_2
3120 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3121 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3122 XEXP (wpat1, 0)));
3123 INSN_CODE (worker) = -1;
3124 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3125 gen_rtx_LABEL_REF (Pmode, lab));
3126 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3127 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3128 INSN_CODE (mova) = -1;
3129 }
3130 }
3131
3132 /* Find the last barrier from insn FROM which is close enough to hold the
3133 constant pool. If we can't find one, then create one near the end of
3134 the range. */
3135
3136 static rtx
3137 find_barrier (int num_mova, rtx mova, rtx from)
3138 {
3139 int count_si = 0;
3140 int count_hi = 0;
3141 int found_hi = 0;
3142 int found_si = 0;
3143 int found_di = 0;
3144 int hi_align = 2;
3145 int si_align = 2;
3146 int leading_mova = num_mova;
3147 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3148 int si_limit;
3149 int hi_limit;
3150
3151 /* For HImode: range is 510, add 4 because pc counts from address of
3152 second instruction after this one, subtract 2 for the jump instruction
3153 that we may need to emit before the table, subtract 2 for the instruction
3154 that fills the jump delay slot (in very rare cases, reorg will take an
3155 instruction from after the constant pool or will leave the delay slot
3156 empty). This gives 510.
3157 For SImode: range is 1020, add 4 because pc counts from address of
3158 second instruction after this one, subtract 2 in case pc is 2 byte
3159 aligned, subtract 2 for the jump instruction that we may need to emit
3160 before the table, subtract 2 for the instruction that fills the jump
3161 delay slot. This gives 1018. */
3162
3163 /* The branch will always be shortened now that the reference address for
3164 forward branches is the successor address, thus we need no longer make
3165 adjustments to the [sh]i_limit for -O0. */
3166
3167 si_limit = 1018;
3168 hi_limit = 510;
3169
3170 while (from && count_si < si_limit && count_hi < hi_limit)
3171 {
3172 int inc = get_attr_length (from);
3173 int new_align = 1;
3174
3175 if (GET_CODE (from) == CODE_LABEL)
3176 {
3177 if (optimize)
3178 new_align = 1 << label_to_alignment (from);
3179 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3180 new_align = 1 << barrier_align (from);
3181 else
3182 new_align = 1;
3183 inc = 0;
3184 }
3185
3186 if (GET_CODE (from) == BARRIER)
3187 {
3188
3189 found_barrier = from;
3190
3191 /* If we are at the end of the function, or in front of an alignment
3192 instruction, we need not insert an extra alignment. We prefer
3193 this kind of barrier. */
3194 if (barrier_align (from) > 2)
3195 good_barrier = from;
3196 }
3197
3198 if (broken_move (from))
3199 {
3200 rtx pat, src, dst;
3201 enum machine_mode mode;
3202
3203 pat = PATTERN (from);
3204 if (GET_CODE (pat) == PARALLEL)
3205 pat = XVECEXP (pat, 0, 0);
3206 src = SET_SRC (pat);
3207 dst = SET_DEST (pat);
3208 mode = GET_MODE (dst);
3209
3210 /* We must explicitly check the mode, because sometimes the
3211 front end will generate code to load unsigned constants into
3212 HImode targets without properly sign extending them. */
3213 if (mode == HImode
3214 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3215 {
3216 found_hi += 2;
3217 /* We put the short constants before the long constants, so
3218 we must count the length of short constants in the range
3219 for the long constants. */
3220 /* ??? This isn't optimal, but is easy to do. */
3221 si_limit -= 2;
3222 }
3223 else
3224 {
3225 /* We dump DF/DI constants before SF/SI ones, because
3226 the limit is the same, but the alignment requirements
3227 are higher. We may waste up to 4 additional bytes
3228 for alignment, and the DF/DI constant may have
3229 another SF/SI constant placed before it. */
3230 if (TARGET_SHCOMPACT
3231 && ! found_di
3232 && (mode == DFmode || mode == DImode))
3233 {
3234 found_di = 1;
3235 si_limit -= 8;
3236 }
3237 while (si_align > 2 && found_si + si_align - 2 > count_si)
3238 si_align >>= 1;
3239 if (found_si > count_si)
3240 count_si = found_si;
3241 found_si += GET_MODE_SIZE (mode);
3242 if (num_mova)
3243 si_limit -= GET_MODE_SIZE (mode);
3244 }
3245
3246 /* See the code in machine_dependent_reorg, which has a similar if
3247 statement that generates a new mova insn in many cases. */
3248 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
3249 inc += 2;
3250 }
3251
3252 if (mova_p (from))
3253 {
3254 if (! num_mova++)
3255 {
3256 leading_mova = 0;
3257 mova = from;
3258 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
3259 }
3260 if (found_si > count_si)
3261 count_si = found_si;
3262 }
3263 else if (GET_CODE (from) == JUMP_INSN
3264 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3265 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3266 {
3267 if (num_mova)
3268 num_mova--;
3269 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3270 {
3271 /* We have just passed the barrier in front of the
3272 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3273 the ADDR_DIFF_VEC is accessed as data, just like our pool
3274 constants, this is a good opportunity to accommodate what
3275 we have gathered so far.
3276 If we waited any longer, we could end up at a barrier in
3277 front of code, which gives worse cache usage for separated
3278 instruction / data caches. */
3279 good_barrier = found_barrier;
3280 break;
3281 }
3282 else
3283 {
3284 rtx body = PATTERN (from);
3285 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3286 }
3287 }
3288 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3289 else if (GET_CODE (from) == JUMP_INSN
3290 && ! TARGET_SH2
3291 && ! TARGET_SMALLCODE)
3292 new_align = 4;
3293
3294 if (found_si)
3295 {
3296 count_si += inc;
3297 if (new_align > si_align)
3298 {
3299 si_limit -= (count_si - 1) & (new_align - si_align);
3300 si_align = new_align;
3301 }
3302 count_si = (count_si + new_align - 1) & -new_align;
3303 }
3304 if (found_hi)
3305 {
3306 count_hi += inc;
3307 if (new_align > hi_align)
3308 {
3309 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3310 hi_align = new_align;
3311 }
3312 count_hi = (count_hi + new_align - 1) & -new_align;
3313 }
3314 from = NEXT_INSN (from);
3315 }
3316
3317 if (num_mova)
3318 {
3319 if (leading_mova)
3320 {
3321 /* Try as we might, the leading mova is out of range. Change
3322 it into a load (which will become a pcload) and retry. */
3323 fixup_mova (mova);
3324 return find_barrier (0, 0, mova);
3325 }
3326 else
3327 {
3328 /* Insert the constant pool table before the mova instruction,
3329 to prevent the mova label reference from going out of range. */
3330 from = mova;
3331 good_barrier = found_barrier = barrier_before_mova;
3332 }
3333 }
3334
3335 if (found_barrier)
3336 {
3337 if (good_barrier && next_real_insn (found_barrier))
3338 found_barrier = good_barrier;
3339 }
3340 else
3341 {
3342 /* We didn't find a barrier in time to dump our stuff,
3343 so we'll make one. */
3344 rtx label = gen_label_rtx ();
3345
3346 /* If we exceeded the range, then we must back up over the last
3347 instruction we looked at. Otherwise, we just need to undo the
3348 NEXT_INSN at the end of the loop. */
3349 if (count_hi > hi_limit || count_si > si_limit)
3350 from = PREV_INSN (PREV_INSN (from));
3351 else
3352 from = PREV_INSN (from);
3353
3354 /* Walk back to be just before any jump or label.
3355 Putting it before a label reduces the number of times the branch
3356 around the constant pool table will be hit. Putting it before
3357 a jump makes it more likely that the bra delay slot will be
3358 filled. */
3359 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3360 || GET_CODE (from) == CODE_LABEL)
3361 from = PREV_INSN (from);
3362
3363 from = emit_jump_insn_after (gen_jump (label), from);
3364 JUMP_LABEL (from) = label;
3365 LABEL_NUSES (label) = 1;
3366 found_barrier = emit_barrier_after (from);
3367 emit_label_after (label, found_barrier);
3368 }
3369
3370 return found_barrier;
3371 }
3372
3373 /* If the instruction INSN is implemented by a special function, and we can
3374 positively find the register that is used to call the sfunc, and this
3375 register is not used anywhere else in this instruction - except as the
3376 destination of a set, return this register; else, return 0. */
3377 rtx
3378 sfunc_uses_reg (rtx insn)
3379 {
3380 int i;
3381 rtx pattern, part, reg_part, reg;
3382
3383 if (GET_CODE (insn) != INSN)
3384 return 0;
3385 pattern = PATTERN (insn);
3386 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3387 return 0;
3388
3389 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3390 {
3391 part = XVECEXP (pattern, 0, i);
3392 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3393 reg_part = part;
3394 }
3395 if (! reg_part)
3396 return 0;
3397 reg = XEXP (reg_part, 0);
3398 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3399 {
3400 part = XVECEXP (pattern, 0, i);
3401 if (part == reg_part || GET_CODE (part) == CLOBBER)
3402 continue;
3403 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3404 && GET_CODE (SET_DEST (part)) == REG)
3405 ? SET_SRC (part) : part)))
3406 return 0;
3407 }
3408 return reg;
3409 }
3410
3411 /* See if the only way in which INSN uses REG is by calling it, or by
3412 setting it while calling it. Set *SET to a SET rtx if the register
3413 is set by INSN. */
3414
3415 static int
3416 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
3417 {
3418 rtx pattern, reg2;
3419
3420 *set = NULL_RTX;
3421
3422 reg2 = sfunc_uses_reg (insn);
3423 if (reg2 && REGNO (reg2) == REGNO (reg))
3424 {
3425 pattern = single_set (insn);
3426 if (pattern
3427 && GET_CODE (SET_DEST (pattern)) == REG
3428 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3429 *set = pattern;
3430 return 0;
3431 }
3432 if (GET_CODE (insn) != CALL_INSN)
3433 {
3434 /* We don't use rtx_equal_p because we don't care if the mode is
3435 different. */
3436 pattern = single_set (insn);
3437 if (pattern
3438 && GET_CODE (SET_DEST (pattern)) == REG
3439 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3440 {
3441 rtx par, part;
3442 int i;
3443
3444 *set = pattern;
3445 par = PATTERN (insn);
3446 if (GET_CODE (par) == PARALLEL)
3447 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3448 {
3449 part = XVECEXP (par, 0, i);
3450 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3451 return 1;
3452 }
3453 return reg_mentioned_p (reg, SET_SRC (pattern));
3454 }
3455
3456 return 1;
3457 }
3458
3459 pattern = PATTERN (insn);
3460
3461 if (GET_CODE (pattern) == PARALLEL)
3462 {
3463 int i;
3464
3465 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3466 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
3467 return 1;
3468 pattern = XVECEXP (pattern, 0, 0);
3469 }
3470
3471 if (GET_CODE (pattern) == SET)
3472 {
3473 if (reg_mentioned_p (reg, SET_DEST (pattern)))
3474 {
3475 /* We don't use rtx_equal_p, because we don't care if the
3476 mode is different. */
3477 if (GET_CODE (SET_DEST (pattern)) != REG
3478 || REGNO (reg) != REGNO (SET_DEST (pattern)))
3479 return 1;
3480
3481 *set = pattern;
3482 }
3483
3484 pattern = SET_SRC (pattern);
3485 }
3486
3487 if (GET_CODE (pattern) != CALL
3488 || GET_CODE (XEXP (pattern, 0)) != MEM
3489 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
3490 return 1;
3491
3492 return 0;
3493 }
3494
3495 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3496 general registers. Bits 0..15 mean that the respective registers
3497 are used as inputs in the instruction. Bits 16..31 mean that the
3498 registers 0..15, respectively, are used as outputs, or are clobbered.
3499 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3500 int
3501 regs_used (rtx x, int is_dest)
3502 {
3503 enum rtx_code code;
3504 const char *fmt;
3505 int i, used = 0;
3506
3507 if (! x)
3508 return used;
3509 code = GET_CODE (x);
3510 switch (code)
3511 {
3512 case REG:
3513 if (REGNO (x) < 16)
3514 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3515 << (REGNO (x) + is_dest));
3516 return 0;
3517 case SUBREG:
3518 {
3519 rtx y = SUBREG_REG (x);
3520
3521 if (GET_CODE (y) != REG)
3522 break;
3523 if (REGNO (y) < 16)
3524 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3525 << (REGNO (y) +
3526 subreg_regno_offset (REGNO (y),
3527 GET_MODE (y),
3528 SUBREG_BYTE (x),
3529 GET_MODE (x)) + is_dest));
3530 return 0;
3531 }
3532 case SET:
3533 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3534 case RETURN:
3535 /* If there was a return value, it must have been indicated with USE. */
3536 return 0x00ffff00;
3537 case CLOBBER:
3538 is_dest = 1;
3539 break;
3540 case MEM:
3541 is_dest = 0;
3542 break;
3543 case CALL:
3544 used |= 0x00ff00f0;
3545 break;
3546 default:
3547 break;
3548 }
3549
3550 fmt = GET_RTX_FORMAT (code);
3551
3552 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3553 {
3554 if (fmt[i] == 'E')
3555 {
3556 register int j;
3557 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3558 used |= regs_used (XVECEXP (x, i, j), is_dest);
3559 }
3560 else if (fmt[i] == 'e')
3561 used |= regs_used (XEXP (x, i), is_dest);
3562 }
3563 return used;
3564 }
3565
3566 /* Create an instruction that prevents redirection of a conditional branch
3567 to the destination of the JUMP with address ADDR.
3568 If the branch needs to be implemented as an indirect jump, try to find
3569 a scratch register for it.
3570 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3571 If any preceding insn that doesn't fit into a delay slot is good enough,
3572 pass 1. Pass 2 if a definite blocking insn is needed.
3573 -1 is used internally to avoid deep recursion.
3574 If a blocking instruction is made or recognized, return it. */
3575
3576 static rtx
3577 gen_block_redirect (rtx jump, int addr, int need_block)
3578 {
3579 int dead = 0;
3580 rtx prev = prev_nonnote_insn (jump);
3581 rtx dest;
3582
3583 /* First, check if we already have an instruction that satisfies our need. */
3584 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3585 {
3586 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3587 return prev;
3588 if (GET_CODE (PATTERN (prev)) == USE
3589 || GET_CODE (PATTERN (prev)) == CLOBBER
3590 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3591 prev = jump;
3592 else if ((need_block &= ~1) < 0)
3593 return prev;
3594 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3595 need_block = 0;
3596 }
3597 if (GET_CODE (PATTERN (jump)) == RETURN)
3598 {
3599 if (! need_block)
3600 return prev;
3601 /* Reorg even does nasty things with return insns that cause branches
3602 to go out of range - see find_end_label and callers. */
3603 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
3604 }
3605 /* We can't use JUMP_LABEL here because it might be undefined
3606 when not optimizing. */
3607 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3608 /* If the branch is out of range, try to find a scratch register for it. */
3609 if (optimize
3610 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3611 > 4092 + 4098))
3612 {
3613 rtx scan;
3614 /* Don't look for the stack pointer as a scratch register,
3615 it would cause trouble if an interrupt occurred. */
3616 unsigned try = 0x7fff, used;
3617 int jump_left = flag_expensive_optimizations + 1;
3618
3619 /* It is likely that the most recent eligible instruction is wanted for
3620 the delay slot. Therefore, find out which registers it uses, and
3621 try to avoid using them. */
3622
3623 for (scan = jump; (scan = PREV_INSN (scan)); )
3624 {
3625 enum rtx_code code;
3626
3627 if (INSN_DELETED_P (scan))
3628 continue;
3629 code = GET_CODE (scan);
3630 if (code == CODE_LABEL || code == JUMP_INSN)
3631 break;
3632 if (code == INSN
3633 && GET_CODE (PATTERN (scan)) != USE
3634 && GET_CODE (PATTERN (scan)) != CLOBBER
3635 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
3636 {
3637 try &= ~regs_used (PATTERN (scan), 0);
3638 break;
3639 }
3640 }
3641 for (used = dead = 0, scan = JUMP_LABEL (jump);
3642 (scan = NEXT_INSN (scan)); )
3643 {
3644 enum rtx_code code;
3645
3646 if (INSN_DELETED_P (scan))
3647 continue;
3648 code = GET_CODE (scan);
3649 if (INSN_P (scan))
3650 {
3651 used |= regs_used (PATTERN (scan), 0);
3652 if (code == CALL_INSN)
3653 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
3654 dead |= (used >> 16) & ~used;
3655 if (dead & try)
3656 {
3657 dead &= try;
3658 break;
3659 }
3660 if (code == JUMP_INSN)
3661 {
3662 if (jump_left-- && simplejump_p (scan))
3663 scan = JUMP_LABEL (scan);
3664 else
3665 break;
3666 }
3667 }
3668 }
3669 /* Mask out the stack pointer again, in case it was
3670 the only 'free' register we have found. */
3671 dead &= 0x7fff;
3672 }
3673 /* If the immediate destination is still in range, check for possible
3674 threading with a jump beyond the delay slot insn.
3675 Don't check if we are called recursively; the jump has been or will be
3676 checked in a different invocation then. */
3677
3678 else if (optimize && need_block >= 0)
3679 {
3680 rtx next = next_active_insn (next_active_insn (dest));
3681 if (next && GET_CODE (next) == JUMP_INSN
3682 && GET_CODE (PATTERN (next)) == SET
3683 && recog_memoized (next) == CODE_FOR_jump_compact)
3684 {
3685 dest = JUMP_LABEL (next);
3686 if (dest
3687 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3688 > 4092 + 4098))
3689 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
3690 }
3691 }
3692
3693 if (dead)
3694 {
3695 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
3696
3697 /* It would be nice if we could convert the jump into an indirect
3698 jump / far branch right now, and thus exposing all constituent
3699 instructions to further optimization. However, reorg uses
3700 simplejump_p to determine if there is an unconditional jump where
3701 it should try to schedule instructions from the target of the
3702 branch; simplejump_p fails for indirect jumps even if they have
3703 a JUMP_LABEL. */
3704 rtx insn = emit_insn_before (gen_indirect_jump_scratch
3705 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
3706 , jump);
3707 /* ??? We would like this to have the scope of the jump, but that
3708 scope will change when a delay slot insn of an inner scope is added.
3709 Hence, after delay slot scheduling, we'll have to expect
3710 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
3711 the jump. */
3712
3713 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
3714 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
3715 return insn;
3716 }
3717 else if (need_block)
3718 /* We can't use JUMP_LABEL here because it might be undefined
3719 when not optimizing. */
3720 return emit_insn_before (gen_block_branch_redirect
3721 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
3722 , jump);
3723 return prev;
3724 }
3725
3726 #define CONDJUMP_MIN -252
3727 #define CONDJUMP_MAX 262
3728 struct far_branch
3729 {
3730 /* A label (to be placed) in front of the jump
3731 that jumps to our ultimate destination. */
3732 rtx near_label;
3733 /* Where we are going to insert it if we cannot move the jump any farther,
3734 or the jump itself if we have picked up an existing jump. */
3735 rtx insert_place;
3736 /* The ultimate destination. */
3737 rtx far_label;
3738 struct far_branch *prev;
3739 /* If the branch has already been created, its address;
3740 else the address of its first prospective user. */
3741 int address;
3742 };
3743
3744 static void gen_far_branch (struct far_branch *);
3745 enum mdep_reorg_phase_e mdep_reorg_phase;
3746 static void
3747 gen_far_branch (struct far_branch *bp)
3748 {
3749 rtx insn = bp->insert_place;
3750 rtx jump;
3751 rtx label = gen_label_rtx ();
3752
3753 emit_label_after (label, insn);
3754 if (bp->far_label)
3755 {
3756 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
3757 LABEL_NUSES (bp->far_label)++;
3758 }
3759 else
3760 jump = emit_jump_insn_after (gen_return (), insn);
3761 /* Emit a barrier so that reorg knows that any following instructions
3762 are not reachable via a fall-through path.
3763 But don't do this when not optimizing, since we wouldn't suppress the
3764 alignment for the barrier then, and could end up with out-of-range
3765 pc-relative loads. */
3766 if (optimize)
3767 emit_barrier_after (jump);
3768 emit_label_after (bp->near_label, insn);
3769 JUMP_LABEL (jump) = bp->far_label;
3770 if (! invert_jump (insn, label, 1))
3771 abort ();
3772 /* If we are branching around a jump (rather than a return), prevent
3773 reorg from using an insn from the jump target as the delay slot insn -
3774 when reorg did this, it pessimized code (we rather hide the delay slot)
3775 and it could cause branches to go out of range. */
3776 if (bp->far_label)
3777 (emit_insn_after
3778 (gen_stuff_delay_slot
3779 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
3780 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
3781 insn));
3782 /* Prevent reorg from undoing our splits. */
3783 gen_block_redirect (jump, bp->address += 2, 2);
3784 }
3785
3786 /* Fix up ADDR_DIFF_VECs. */
3787 void
3788 fixup_addr_diff_vecs (rtx first)
3789 {
3790 rtx insn;
3791
3792 for (insn = first; insn; insn = NEXT_INSN (insn))
3793 {
3794 rtx vec_lab, pat, prev, prevpat, x, braf_label;
3795
3796 if (GET_CODE (insn) != JUMP_INSN
3797 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
3798 continue;
3799 pat = PATTERN (insn);
3800 vec_lab = XEXP (XEXP (pat, 0), 0);
3801
3802 /* Search the matching casesi_jump_2. */
3803 for (prev = vec_lab; ; prev = PREV_INSN (prev))
3804 {
3805 if (GET_CODE (prev) != JUMP_INSN)
3806 continue;
3807 prevpat = PATTERN (prev);
3808 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
3809 continue;
3810 x = XVECEXP (prevpat, 0, 1);
3811 if (GET_CODE (x) != USE)
3812 continue;
3813 x = XEXP (x, 0);
3814 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
3815 break;
3816 }
3817 /* FIXME: This is a bug in the optimizer, but it seems harmless
3818 to just avoid panicing. */
3819 if (!prev)
3820 continue;
3821
3822 /* Emit the reference label of the braf where it belongs, right after
3823 the casesi_jump_2 (i.e. braf). */
3824 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
3825 emit_label_after (braf_label, prev);
3826
3827 /* Fix up the ADDR_DIF_VEC to be relative
3828 to the reference address of the braf. */
3829 XEXP (XEXP (pat, 0), 0) = braf_label;
3830 }
3831 }
3832
3833 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
3834 a barrier. Return the base 2 logarithm of the desired alignment. */
3835 int
3836 barrier_align (rtx barrier_or_label)
3837 {
3838 rtx next = next_real_insn (barrier_or_label), pat, prev;
3839 int slot, credit, jump_to_next = 0;
3840
3841 if (! next)
3842 return 0;
3843
3844 pat = PATTERN (next);
3845
3846 if (GET_CODE (pat) == ADDR_DIFF_VEC)
3847 return 2;
3848
3849 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
3850 /* This is a barrier in front of a constant table. */
3851 return 0;
3852
3853 prev = prev_real_insn (barrier_or_label);
3854 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
3855 {
3856 pat = PATTERN (prev);
3857 /* If this is a very small table, we want to keep the alignment after
3858 the table to the minimum for proper code alignment. */
3859 return ((TARGET_SMALLCODE
3860 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
3861 <= (unsigned) 1 << (CACHE_LOG - 2)))
3862 ? 1 << TARGET_SHMEDIA : align_jumps_log);
3863 }
3864
3865 if (TARGET_SMALLCODE)
3866 return 0;
3867
3868 if (! TARGET_SH2 || ! optimize)
3869 return align_jumps_log;
3870
3871 /* When fixing up pcloads, a constant table might be inserted just before
3872 the basic block that ends with the barrier. Thus, we can't trust the
3873 instruction lengths before that. */
3874 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
3875 {
3876 /* Check if there is an immediately preceding branch to the insn beyond
3877 the barrier. We must weight the cost of discarding useful information
3878 from the current cache line when executing this branch and there is
3879 an alignment, against that of fetching unneeded insn in front of the
3880 branch target when there is no alignment. */
3881
3882 /* There are two delay_slot cases to consider. One is the simple case
3883 where the preceding branch is to the insn beyond the barrier (simple
3884 delay slot filling), and the other is where the preceding branch has
3885 a delay slot that is a duplicate of the insn after the barrier
3886 (fill_eager_delay_slots) and the branch is to the insn after the insn
3887 after the barrier. */
3888
3889 /* PREV is presumed to be the JUMP_INSN for the barrier under
3890 investigation. Skip to the insn before it. */
3891 prev = prev_real_insn (prev);
3892
3893 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
3894 credit >= 0 && prev && GET_CODE (prev) == INSN;
3895 prev = prev_real_insn (prev))
3896 {
3897 jump_to_next = 0;
3898 if (GET_CODE (PATTERN (prev)) == USE
3899 || GET_CODE (PATTERN (prev)) == CLOBBER)
3900 continue;
3901 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
3902 {
3903 prev = XVECEXP (PATTERN (prev), 0, 1);
3904 if (INSN_UID (prev) == INSN_UID (next))
3905 {
3906 /* Delay slot was filled with insn at jump target. */
3907 jump_to_next = 1;
3908 continue;
3909 }
3910 }
3911
3912 if (slot &&
3913 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3914 slot = 0;
3915 credit -= get_attr_length (prev);
3916 }
3917 if (prev
3918 && GET_CODE (prev) == JUMP_INSN
3919 && JUMP_LABEL (prev))
3920 {
3921 rtx x;
3922 if (jump_to_next
3923 || next_real_insn (JUMP_LABEL (prev)) == next
3924 /* If relax_delay_slots() decides NEXT was redundant
3925 with some previous instruction, it will have
3926 redirected PREV's jump to the following insn. */
3927 || JUMP_LABEL (prev) == next_nonnote_insn (next)
3928 /* There is no upper bound on redundant instructions
3929 that might have been skipped, but we must not put an
3930 alignment where none had been before. */
3931 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
3932 (INSN_P (x)
3933 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
3934 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
3935 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
3936 {
3937 rtx pat = PATTERN (prev);
3938 if (GET_CODE (pat) == PARALLEL)
3939 pat = XVECEXP (pat, 0, 0);
3940 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
3941 return 0;
3942 }
3943 }
3944 }
3945
3946 return align_jumps_log;
3947 }
3948
3949 /* If we are inside a phony loop, almost any kind of label can turn up as the
3950 first one in the loop. Aligning a braf label causes incorrect switch
3951 destination addresses; we can detect braf labels because they are
3952 followed by a BARRIER.
3953 Applying loop alignment to small constant or switch tables is a waste
3954 of space, so we suppress this too. */
3955 int
3956 sh_loop_align (rtx label)
3957 {
3958 rtx next = label;
3959
3960 do
3961 next = next_nonnote_insn (next);
3962 while (next && GET_CODE (next) == CODE_LABEL);
3963
3964 if (! next
3965 || ! INSN_P (next)
3966 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
3967 || recog_memoized (next) == CODE_FOR_consttable_2)
3968 return 0;
3969
3970 return align_loops_log;
3971 }
3972
3973 /* Do a final pass over the function, just before delayed branch
3974 scheduling. */
3975
3976 static void
3977 sh_reorg (void)
3978 {
3979 rtx first, insn, mova = NULL_RTX;
3980 int num_mova;
3981 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
3982 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
3983
3984 first = get_insns ();
3985
3986 /* We must split call insns before introducing `mova's. If we're
3987 optimizing, they'll have already been split. Otherwise, make
3988 sure we don't split them too late. */
3989 if (! optimize)
3990 split_all_insns_noflow ();
3991
3992 if (TARGET_SHMEDIA)
3993 return;
3994
3995 /* If relaxing, generate pseudo-ops to associate function calls with
3996 the symbols they call. It does no harm to not generate these
3997 pseudo-ops. However, when we can generate them, it enables to
3998 linker to potentially relax the jsr to a bsr, and eliminate the
3999 register load and, possibly, the constant pool entry. */
4000
4001 mdep_reorg_phase = SH_INSERT_USES_LABELS;
4002 if (TARGET_RELAX)
4003 {
4004 /* Remove all REG_LABEL notes. We want to use them for our own
4005 purposes. This works because none of the remaining passes
4006 need to look at them.
4007
4008 ??? But it may break in the future. We should use a machine
4009 dependent REG_NOTE, or some other approach entirely. */
4010 for (insn = first; insn; insn = NEXT_INSN (insn))
4011 {
4012 if (INSN_P (insn))
4013 {
4014 rtx note;
4015
4016 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
4017 remove_note (insn, note);
4018 }
4019 }
4020
4021 for (insn = first; insn; insn = NEXT_INSN (insn))
4022 {
4023 rtx pattern, reg, link, set, scan, dies, label;
4024 int rescan = 0, foundinsn = 0;
4025
4026 if (GET_CODE (insn) == CALL_INSN)
4027 {
4028 pattern = PATTERN (insn);
4029
4030 if (GET_CODE (pattern) == PARALLEL)
4031 pattern = XVECEXP (pattern, 0, 0);
4032 if (GET_CODE (pattern) == SET)
4033 pattern = SET_SRC (pattern);
4034
4035 if (GET_CODE (pattern) != CALL
4036 || GET_CODE (XEXP (pattern, 0)) != MEM)
4037 continue;
4038
4039 reg = XEXP (XEXP (pattern, 0), 0);
4040 }
4041 else
4042 {
4043 reg = sfunc_uses_reg (insn);
4044 if (! reg)
4045 continue;
4046 }
4047
4048 if (GET_CODE (reg) != REG)
4049 continue;
4050
4051 /* This is a function call via REG. If the only uses of REG
4052 between the time that it is set and the time that it dies
4053 are in function calls, then we can associate all the
4054 function calls with the setting of REG. */
4055
4056 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
4057 {
4058 if (REG_NOTE_KIND (link) != 0)
4059 continue;
4060 set = single_set (XEXP (link, 0));
4061 if (set && rtx_equal_p (reg, SET_DEST (set)))
4062 {
4063 link = XEXP (link, 0);
4064 break;
4065 }
4066 }
4067
4068 if (! link)
4069 {
4070 /* ??? Sometimes global register allocation will have
4071 deleted the insn pointed to by LOG_LINKS. Try
4072 scanning backward to find where the register is set. */
4073 for (scan = PREV_INSN (insn);
4074 scan && GET_CODE (scan) != CODE_LABEL;
4075 scan = PREV_INSN (scan))
4076 {
4077 if (! INSN_P (scan))
4078 continue;
4079
4080 if (! reg_mentioned_p (reg, scan))
4081 continue;
4082
4083 if (noncall_uses_reg (reg, scan, &set))
4084 break;
4085
4086 if (set)
4087 {
4088 link = scan;
4089 break;
4090 }
4091 }
4092 }
4093
4094 if (! link)
4095 continue;
4096
4097 /* The register is set at LINK. */
4098
4099 /* We can only optimize the function call if the register is
4100 being set to a symbol. In theory, we could sometimes
4101 optimize calls to a constant location, but the assembler
4102 and linker do not support that at present. */
4103 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4104 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4105 continue;
4106
4107 /* Scan forward from LINK to the place where REG dies, and
4108 make sure that the only insns which use REG are
4109 themselves function calls. */
4110
4111 /* ??? This doesn't work for call targets that were allocated
4112 by reload, since there may not be a REG_DEAD note for the
4113 register. */
4114
4115 dies = NULL_RTX;
4116 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4117 {
4118 rtx scanset;
4119
4120 /* Don't try to trace forward past a CODE_LABEL if we haven't
4121 seen INSN yet. Ordinarily, we will only find the setting insn
4122 in LOG_LINKS if it is in the same basic block. However,
4123 cross-jumping can insert code labels in between the load and
4124 the call, and can result in situations where a single call
4125 insn may have two targets depending on where we came from. */
4126
4127 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4128 break;
4129
4130 if (! INSN_P (scan))
4131 continue;
4132
4133 /* Don't try to trace forward past a JUMP. To optimize
4134 safely, we would have to check that all the
4135 instructions at the jump destination did not use REG. */
4136
4137 if (GET_CODE (scan) == JUMP_INSN)
4138 break;
4139
4140 if (! reg_mentioned_p (reg, scan))
4141 continue;
4142
4143 if (noncall_uses_reg (reg, scan, &scanset))
4144 break;
4145
4146 if (scan == insn)
4147 foundinsn = 1;
4148
4149 if (scan != insn
4150 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4151 {
4152 /* There is a function call to this register other
4153 than the one we are checking. If we optimize
4154 this call, we need to rescan again below. */
4155 rescan = 1;
4156 }
4157
4158 /* ??? We shouldn't have to worry about SCANSET here.
4159 We should just be able to check for a REG_DEAD note
4160 on a function call. However, the REG_DEAD notes are
4161 apparently not dependable around libcalls; c-torture
4162 execute/920501-2 is a test case. If SCANSET is set,
4163 then this insn sets the register, so it must have
4164 died earlier. Unfortunately, this will only handle
4165 the cases in which the register is, in fact, set in a
4166 later insn. */
4167
4168 /* ??? We shouldn't have to use FOUNDINSN here.
4169 However, the LOG_LINKS fields are apparently not
4170 entirely reliable around libcalls;
4171 newlib/libm/math/e_pow.c is a test case. Sometimes
4172 an insn will appear in LOG_LINKS even though it is
4173 not the most recent insn which sets the register. */
4174
4175 if (foundinsn
4176 && (scanset
4177 || find_reg_note (scan, REG_DEAD, reg)))
4178 {
4179 dies = scan;
4180 break;
4181 }
4182 }
4183
4184 if (! dies)
4185 {
4186 /* Either there was a branch, or some insn used REG
4187 other than as a function call address. */
4188 continue;
4189 }
4190
4191 /* Create a code label, and put it in a REG_LABEL note on
4192 the insn which sets the register, and on each call insn
4193 which uses the register. In final_prescan_insn we look
4194 for the REG_LABEL notes, and output the appropriate label
4195 or pseudo-op. */
4196
4197 label = gen_label_rtx ();
4198 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
4199 REG_NOTES (link));
4200 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
4201 REG_NOTES (insn));
4202 if (rescan)
4203 {
4204 scan = link;
4205 do
4206 {
4207 rtx reg2;
4208
4209 scan = NEXT_INSN (scan);
4210 if (scan != insn
4211 && ((GET_CODE (scan) == CALL_INSN
4212 && reg_mentioned_p (reg, scan))
4213 || ((reg2 = sfunc_uses_reg (scan))
4214 && REGNO (reg2) == REGNO (reg))))
4215 REG_NOTES (scan)
4216 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
4217 }
4218 while (scan != dies);
4219 }
4220 }
4221 }
4222
4223 if (TARGET_SH2)
4224 fixup_addr_diff_vecs (first);
4225
4226 if (optimize)
4227 {
4228 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4229 shorten_branches (first);
4230 }
4231 /* Scan the function looking for move instructions which have to be
4232 changed to pc-relative loads and insert the literal tables. */
4233
4234 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4235 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4236 {
4237 if (mova_p (insn))
4238 {
4239 /* ??? basic block reordering can move a switch table dispatch
4240 below the switch table. Check if that has happened.
4241 We only have the addresses available when optimizing; but then,
4242 this check shouldn't be needed when not optimizing. */
4243 rtx label_ref = XVECEXP (SET_SRC (PATTERN (insn)), 0, 0);
4244 if (optimize
4245 && (INSN_ADDRESSES (INSN_UID (insn))
4246 > INSN_ADDRESSES (INSN_UID (XEXP (label_ref, 0)))))
4247 {
4248 /* Change the mova into a load.
4249 broken_move will then return true for it. */
4250 fixup_mova (insn);
4251 }
4252 else if (! num_mova++)
4253 mova = insn;
4254 }
4255 else if (GET_CODE (insn) == JUMP_INSN
4256 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4257 && num_mova)
4258 {
4259 rtx scan;
4260 int total;
4261
4262 num_mova--;
4263
4264 /* Some code might have been inserted between the mova and
4265 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4266 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4267 total += get_attr_length (scan);
4268
4269 /* range of mova is 1020, add 4 because pc counts from address of
4270 second instruction after this one, subtract 2 in case pc is 2
4271 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4272 cancels out with alignment effects of the mova itself. */
4273 if (total > 1022)
4274 {
4275 /* Change the mova into a load, and restart scanning
4276 there. broken_move will then return true for mova. */
4277 fixup_mova (mova);
4278 insn = mova;
4279 }
4280 }
4281 if (broken_move (insn)
4282 || (GET_CODE (insn) == INSN
4283 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
4284 {
4285 rtx scan;
4286 /* Scan ahead looking for a barrier to stick the constant table
4287 behind. */
4288 rtx barrier = find_barrier (num_mova, mova, insn);
4289 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4290 int need_aligned_label = 0;
4291
4292 if (num_mova && ! mova_p (mova))
4293 {
4294 /* find_barrier had to change the first mova into a
4295 pcload; thus, we have to start with this new pcload. */
4296 insn = mova;
4297 num_mova = 0;
4298 }
4299 /* Now find all the moves between the points and modify them. */
4300 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4301 {
4302 if (GET_CODE (scan) == CODE_LABEL)
4303 last_float = 0;
4304 if (GET_CODE (scan) == INSN
4305 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
4306 need_aligned_label = 1;
4307 if (broken_move (scan))
4308 {
4309 rtx *patp = &PATTERN (scan), pat = *patp;
4310 rtx src, dst;
4311 rtx lab;
4312 rtx newsrc;
4313 enum machine_mode mode;
4314
4315 if (GET_CODE (pat) == PARALLEL)
4316 patp = &XVECEXP (pat, 0, 0), pat = *patp;
4317 src = SET_SRC (pat);
4318 dst = SET_DEST (pat);
4319 mode = GET_MODE (dst);
4320
4321 if (mode == SImode && hi_const (src)
4322 && REGNO (dst) != FPUL_REG)
4323 {
4324 int offset = 0;
4325
4326 mode = HImode;
4327 while (GET_CODE (dst) == SUBREG)
4328 {
4329 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
4330 GET_MODE (SUBREG_REG (dst)),
4331 SUBREG_BYTE (dst),
4332 GET_MODE (dst));
4333 dst = SUBREG_REG (dst);
4334 }
4335 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
4336 }
4337 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
4338 {
4339 /* This must be an insn that clobbers r0. */
4340 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
4341 XVECLEN (PATTERN (scan), 0)
4342 - 1);
4343 rtx clobber = *clobberp;
4344
4345 if (GET_CODE (clobber) != CLOBBER
4346 || ! rtx_equal_p (XEXP (clobber, 0), r0_rtx))
4347 abort ();
4348
4349 if (last_float
4350 && reg_set_between_p (r0_rtx, last_float_move, scan))
4351 last_float = 0;
4352 if (last_float
4353 && TARGET_SHCOMPACT
4354 && GET_MODE_SIZE (mode) != 4
4355 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
4356 last_float = 0;
4357 lab = add_constant (src, mode, last_float);
4358 if (lab)
4359 emit_insn_before (gen_mova (lab), scan);
4360 else
4361 {
4362 /* There will be a REG_UNUSED note for r0 on
4363 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4364 lest reorg:mark_target_live_regs will not
4365 consider r0 to be used, and we end up with delay
4366 slot insn in front of SCAN that clobbers r0. */
4367 rtx note
4368 = find_regno_note (last_float_move, REG_UNUSED, 0);
4369
4370 /* If we are not optimizing, then there may not be
4371 a note. */
4372 if (note)
4373 PUT_MODE (note, REG_INC);
4374
4375 *last_float_addr = r0_inc_rtx;
4376 }
4377 last_float_move = scan;
4378 last_float = src;
4379 newsrc = gen_rtx_MEM (mode,
4380 (((TARGET_SH4 && ! TARGET_FMOVD)
4381 || REGNO (dst) == FPUL_REG)
4382 ? r0_inc_rtx
4383 : r0_rtx));
4384 last_float_addr = &XEXP (newsrc, 0);
4385
4386 /* Remove the clobber of r0. */
4387 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
4388 gen_rtx_SCRATCH (Pmode));
4389 RTX_UNCHANGING_P (newsrc) = 1;
4390 }
4391 /* This is a mova needing a label. Create it. */
4392 else if (GET_CODE (src) == UNSPEC
4393 && XINT (src, 1) == UNSPEC_MOVA
4394 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
4395 {
4396 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
4397 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4398 newsrc = gen_rtx_UNSPEC (SImode,
4399 gen_rtvec (1, newsrc),
4400 UNSPEC_MOVA);
4401 }
4402 else
4403 {
4404 lab = add_constant (src, mode, 0);
4405 newsrc = gen_rtx_MEM (mode,
4406 gen_rtx_LABEL_REF (VOIDmode, lab));
4407 RTX_UNCHANGING_P (newsrc) = 1;
4408 }
4409 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
4410 INSN_CODE (scan) = -1;
4411 }
4412 }
4413 dump_table (need_aligned_label ? insn : 0, barrier);
4414 insn = barrier;
4415 }
4416 }
4417
4418 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
4419 INSN_ADDRESSES_FREE ();
4420 split_branches (first);
4421
4422 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4423 also has an effect on the register that holds the address of the sfunc.
4424 Insert an extra dummy insn in front of each sfunc that pretends to
4425 use this register. */
4426 if (flag_delayed_branch)
4427 {
4428 for (insn = first; insn; insn = NEXT_INSN (insn))
4429 {
4430 rtx reg = sfunc_uses_reg (insn);
4431
4432 if (! reg)
4433 continue;
4434 emit_insn_before (gen_use_sfunc_addr (reg), insn);
4435 }
4436 }
4437 #if 0
4438 /* fpscr is not actually a user variable, but we pretend it is for the
4439 sake of the previous optimization passes, since we want it handled like
4440 one. However, we don't have any debugging information for it, so turn
4441 it into a non-user variable now. */
4442 if (TARGET_SH4)
4443 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4444 #endif
4445 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
4446 }
4447
4448 int
4449 get_dest_uid (rtx label, int max_uid)
4450 {
4451 rtx dest = next_real_insn (label);
4452 int dest_uid;
4453 if (! dest)
4454 /* This can happen for an undefined label. */
4455 return 0;
4456 dest_uid = INSN_UID (dest);
4457 /* If this is a newly created branch redirection blocking instruction,
4458 we cannot index the branch_uid or insn_addresses arrays with its
4459 uid. But then, we won't need to, because the actual destination is
4460 the following branch. */
4461 while (dest_uid >= max_uid)
4462 {
4463 dest = NEXT_INSN (dest);
4464 dest_uid = INSN_UID (dest);
4465 }
4466 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
4467 return 0;
4468 return dest_uid;
4469 }
4470
4471 /* Split condbranches that are out of range. Also add clobbers for
4472 scratch registers that are needed in far jumps.
4473 We do this before delay slot scheduling, so that it can take our
4474 newly created instructions into account. It also allows us to
4475 find branches with common targets more easily. */
4476
4477 static void
4478 split_branches (rtx first)
4479 {
4480 rtx insn;
4481 struct far_branch **uid_branch, *far_branch_list = 0;
4482 int max_uid = get_max_uid ();
4483
4484 /* Find out which branches are out of range. */
4485 shorten_branches (first);
4486
4487 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
4488 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
4489
4490 for (insn = first; insn; insn = NEXT_INSN (insn))
4491 if (! INSN_P (insn))
4492 continue;
4493 else if (INSN_DELETED_P (insn))
4494 {
4495 /* Shorten_branches would split this instruction again,
4496 so transform it into a note. */
4497 PUT_CODE (insn, NOTE);
4498 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4499 NOTE_SOURCE_FILE (insn) = 0;
4500 }
4501 else if (GET_CODE (insn) == JUMP_INSN
4502 /* Don't mess with ADDR_DIFF_VEC */
4503 && (GET_CODE (PATTERN (insn)) == SET
4504 || GET_CODE (PATTERN (insn)) == RETURN))
4505 {
4506 enum attr_type type = get_attr_type (insn);
4507 if (type == TYPE_CBRANCH)
4508 {
4509 rtx next, beyond;
4510
4511 if (get_attr_length (insn) > 4)
4512 {
4513 rtx src = SET_SRC (PATTERN (insn));
4514 rtx olabel = XEXP (XEXP (src, 1), 0);
4515 int addr = INSN_ADDRESSES (INSN_UID (insn));
4516 rtx label = 0;
4517 int dest_uid = get_dest_uid (olabel, max_uid);
4518 struct far_branch *bp = uid_branch[dest_uid];
4519
4520 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4521 the label if the LABEL_NUSES count drops to zero. There is
4522 always a jump_optimize pass that sets these values, but it
4523 proceeds to delete unreferenced code, and then if not
4524 optimizing, to un-delete the deleted instructions, thus
4525 leaving labels with too low uses counts. */
4526 if (! optimize)
4527 {
4528 JUMP_LABEL (insn) = olabel;
4529 LABEL_NUSES (olabel)++;
4530 }
4531 if (! bp)
4532 {
4533 bp = (struct far_branch *) alloca (sizeof *bp);
4534 uid_branch[dest_uid] = bp;
4535 bp->prev = far_branch_list;
4536 far_branch_list = bp;
4537 bp->far_label
4538 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
4539 LABEL_NUSES (bp->far_label)++;
4540 }
4541 else
4542 {
4543 label = bp->near_label;
4544 if (! label && bp->address - addr >= CONDJUMP_MIN)
4545 {
4546 rtx block = bp->insert_place;
4547
4548 if (GET_CODE (PATTERN (block)) == RETURN)
4549 block = PREV_INSN (block);
4550 else
4551 block = gen_block_redirect (block,
4552 bp->address, 2);
4553 label = emit_label_after (gen_label_rtx (),
4554 PREV_INSN (block));
4555 bp->near_label = label;
4556 }
4557 else if (label && ! NEXT_INSN (label))
4558 {
4559 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4560 bp->insert_place = insn;
4561 else
4562 gen_far_branch (bp);
4563 }
4564 }
4565 if (! label
4566 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4567 {
4568 bp->near_label = label = gen_label_rtx ();
4569 bp->insert_place = insn;
4570 bp->address = addr;
4571 }
4572 if (! redirect_jump (insn, label, 1))
4573 abort ();
4574 }
4575 else
4576 {
4577 /* get_attr_length (insn) == 2 */
4578 /* Check if we have a pattern where reorg wants to redirect
4579 the branch to a label from an unconditional branch that
4580 is too far away. */
4581 /* We can't use JUMP_LABEL here because it might be undefined
4582 when not optimizing. */
4583 /* A syntax error might cause beyond to be NULL_RTX. */
4584 beyond
4585 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4586 0));
4587
4588 if (beyond
4589 && (GET_CODE (beyond) == JUMP_INSN
4590 || ((beyond = next_active_insn (beyond))
4591 && GET_CODE (beyond) == JUMP_INSN))
4592 && GET_CODE (PATTERN (beyond)) == SET
4593 && recog_memoized (beyond) == CODE_FOR_jump_compact
4594 && ((INSN_ADDRESSES
4595 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4596 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4597 > 252 + 258 + 2))
4598 gen_block_redirect (beyond,
4599 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4600 }
4601
4602 next = next_active_insn (insn);
4603
4604 if ((GET_CODE (next) == JUMP_INSN
4605 || GET_CODE (next = next_active_insn (next)) == JUMP_INSN)
4606 && GET_CODE (PATTERN (next)) == SET
4607 && recog_memoized (next) == CODE_FOR_jump_compact
4608 && ((INSN_ADDRESSES
4609 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
4610 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4611 > 252 + 258 + 2))
4612 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
4613 }
4614 else if (type == TYPE_JUMP || type == TYPE_RETURN)
4615 {
4616 int addr = INSN_ADDRESSES (INSN_UID (insn));
4617 rtx far_label = 0;
4618 int dest_uid = 0;
4619 struct far_branch *bp;
4620
4621 if (type == TYPE_JUMP)
4622 {
4623 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
4624 dest_uid = get_dest_uid (far_label, max_uid);
4625 if (! dest_uid)
4626 {
4627 /* Parse errors can lead to labels outside
4628 the insn stream. */
4629 if (! NEXT_INSN (far_label))
4630 continue;
4631
4632 if (! optimize)
4633 {
4634 JUMP_LABEL (insn) = far_label;
4635 LABEL_NUSES (far_label)++;
4636 }
4637 redirect_jump (insn, NULL_RTX, 1);
4638 far_label = 0;
4639 }
4640 }
4641 bp = uid_branch[dest_uid];
4642 if (! bp)
4643 {
4644 bp = (struct far_branch *) alloca (sizeof *bp);
4645 uid_branch[dest_uid] = bp;
4646 bp->prev = far_branch_list;
4647 far_branch_list = bp;
4648 bp->near_label = 0;
4649 bp->far_label = far_label;
4650 if (far_label)
4651 LABEL_NUSES (far_label)++;
4652 }
4653 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
4654 if (addr - bp->address <= CONDJUMP_MAX)
4655 emit_label_after (bp->near_label, PREV_INSN (insn));
4656 else
4657 {
4658 gen_far_branch (bp);
4659 bp->near_label = 0;
4660 }
4661 else
4662 bp->near_label = 0;
4663 bp->address = addr;
4664 bp->insert_place = insn;
4665 if (! far_label)
4666 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
4667 else
4668 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
4669 }
4670 }
4671 /* Generate all pending far branches,
4672 and free our references to the far labels. */
4673 while (far_branch_list)
4674 {
4675 if (far_branch_list->near_label
4676 && ! NEXT_INSN (far_branch_list->near_label))
4677 gen_far_branch (far_branch_list);
4678 if (optimize
4679 && far_branch_list->far_label
4680 && ! --LABEL_NUSES (far_branch_list->far_label))
4681 delete_insn (far_branch_list->far_label);
4682 far_branch_list = far_branch_list->prev;
4683 }
4684
4685 /* Instruction length information is no longer valid due to the new
4686 instructions that have been generated. */
4687 init_insn_lengths ();
4688 }
4689
4690 /* Dump out instruction addresses, which is useful for debugging the
4691 constant pool table stuff.
4692
4693 If relaxing, output the label and pseudo-ops used to link together
4694 calls and the instruction which set the registers. */
4695
4696 /* ??? The addresses printed by this routine for insns are nonsense for
4697 insns which are inside of a sequence where none of the inner insns have
4698 variable length. This is because the second pass of shorten_branches
4699 does not bother to update them. */
4700
4701 void
4702 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
4703 int noperands ATTRIBUTE_UNUSED)
4704 {
4705 if (TARGET_DUMPISIZE)
4706 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
4707
4708 if (TARGET_RELAX)
4709 {
4710 rtx note;
4711
4712 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
4713 if (note)
4714 {
4715 rtx pattern;
4716
4717 pattern = PATTERN (insn);
4718 if (GET_CODE (pattern) == PARALLEL)
4719 pattern = XVECEXP (pattern, 0, 0);
4720 if (GET_CODE (pattern) == CALL
4721 || (GET_CODE (pattern) == SET
4722 && (GET_CODE (SET_SRC (pattern)) == CALL
4723 || get_attr_type (insn) == TYPE_SFUNC)))
4724 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
4725 CODE_LABEL_NUMBER (XEXP (note, 0)));
4726 else if (GET_CODE (pattern) == SET)
4727 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4728 CODE_LABEL_NUMBER (XEXP (note, 0)));
4729 else
4730 abort ();
4731 }
4732 }
4733 }
4734
4735 /* Dump out any constants accumulated in the final pass. These will
4736 only be labels. */
4737
4738 const char *
4739 output_jump_label_table (void)
4740 {
4741 int i;
4742
4743 if (pool_size)
4744 {
4745 fprintf (asm_out_file, "\t.align 2\n");
4746 for (i = 0; i < pool_size; i++)
4747 {
4748 pool_node *p = &pool_vector[i];
4749
4750 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4751 CODE_LABEL_NUMBER (p->label));
4752 output_asm_insn (".long %O0", &p->value);
4753 }
4754 pool_size = 0;
4755 }
4756
4757 return "";
4758 }
4759 \f
4760 /* A full frame looks like:
4761
4762 arg-5
4763 arg-4
4764 [ if current_function_anonymous_args
4765 arg-3
4766 arg-2
4767 arg-1
4768 arg-0 ]
4769 saved-fp
4770 saved-r10
4771 saved-r11
4772 saved-r12
4773 saved-pr
4774 local-n
4775 ..
4776 local-1
4777 local-0 <- fp points here. */
4778
4779 /* Number of bytes pushed for anonymous args, used to pass information
4780 between expand_prologue and expand_epilogue. */
4781
4782 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
4783 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
4784 for an epilogue and a negative value means that it's for a sibcall
4785 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
4786 all the registers that are about to be restored, and hence dead. */
4787
4788 static void
4789 output_stack_adjust (int size, rtx reg, int epilogue_p,
4790 HARD_REG_SET *live_regs_mask)
4791 {
4792 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
4793 if (size)
4794 {
4795 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4796
4797 /* This test is bogus, as output_stack_adjust is used to re-align the
4798 stack. */
4799 #if 0
4800 if (size % align)
4801 abort ();
4802 #endif
4803
4804 if (CONST_OK_FOR_ADD (size))
4805 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
4806 /* Try to do it with two partial adjustments; however, we must make
4807 sure that the stack is properly aligned at all times, in case
4808 an interrupt occurs between the two partial adjustments. */
4809 else if (CONST_OK_FOR_ADD (size / 2 & -align)
4810 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
4811 {
4812 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
4813 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
4814 }
4815 else
4816 {
4817 rtx const_reg;
4818 rtx insn;
4819 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
4820 int i;
4821
4822 /* If TEMP is invalid, we could temporarily save a general
4823 register to MACL. However, there is currently no need
4824 to handle this case, so just abort when we see it. */
4825 if (epilogue_p < 0
4826 || current_function_interrupt
4827 || ! call_used_regs[temp] || fixed_regs[temp])
4828 temp = -1;
4829 if (temp < 0 && ! current_function_interrupt
4830 && (TARGET_SHMEDIA || epilogue_p >= 0))
4831 {
4832 HARD_REG_SET temps;
4833 COPY_HARD_REG_SET (temps, call_used_reg_set);
4834 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
4835 if (epilogue_p > 0)
4836 {
4837 int nreg = 0;
4838 if (current_function_return_rtx)
4839 {
4840 enum machine_mode mode;
4841 mode = GET_MODE (current_function_return_rtx);
4842 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
4843 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
4844 }
4845 for (i = 0; i < nreg; i++)
4846 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
4847 if (current_function_calls_eh_return)
4848 {
4849 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
4850 for (i = 0; i <= 3; i++)
4851 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
4852 }
4853 }
4854 if (TARGET_SHMEDIA && epilogue_p < 0)
4855 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
4856 CLEAR_HARD_REG_BIT (temps, i);
4857 if (epilogue_p <= 0)
4858 {
4859 for (i = FIRST_PARM_REG;
4860 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
4861 CLEAR_HARD_REG_BIT (temps, i);
4862 if (cfun->static_chain_decl != NULL)
4863 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
4864 }
4865 temp = scavenge_reg (&temps);
4866 }
4867 if (temp < 0 && live_regs_mask)
4868 temp = scavenge_reg (live_regs_mask);
4869 if (temp < 0)
4870 {
4871 /* If we reached here, the most likely case is the (sibcall)
4872 epilogue for non SHmedia. Put a special push/pop sequence
4873 for such case as the last resort. This looks lengthy but
4874 would not be problem because it seems to be very rare. */
4875 if (! TARGET_SHMEDIA && epilogue_p)
4876 {
4877 rtx adj_reg, tmp_reg, mem;
4878
4879 /* ??? There is still the slight possibility that r4 or r5
4880 have been reserved as fixed registers or assigned as
4881 global registers, and they change during an interrupt.
4882 There are possible ways to handle this:
4883 - If we are adjusting the frame pointer (r14), we can do
4884 with a single temp register and an ordinary push / pop
4885 on the stack.
4886 - Grab any call-used or call-saved registers (i.e. not
4887 fixed or globals) for the temps we need. We might
4888 also grab r14 if we are adjusting the stack pointer.
4889 If we can't find enough available registers, issue
4890 a diagnostic and abort - the user must have reserved
4891 way too many registers.
4892 But since all this is rather unlikely to happen and
4893 would require extra testing, we just abort if r4 / r5
4894 are not available. */
4895 if (fixed_regs[4] || fixed_regs[5]
4896 || global_regs[4] || global_regs[5])
4897 abort ();
4898
4899 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
4900 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
4901 emit_move_insn (gen_rtx_MEM (Pmode, reg), adj_reg);
4902 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
4903 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
4904 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
4905 emit_move_insn (mem, tmp_reg);
4906 emit_move_insn (tmp_reg, gen_rtx_MEM (Pmode, reg));
4907 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
4908 emit_move_insn (mem, tmp_reg);
4909 emit_move_insn (reg, adj_reg);
4910 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
4911 emit_move_insn (adj_reg, mem);
4912 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
4913 emit_move_insn (tmp_reg, mem);
4914 return;
4915 }
4916 else
4917 abort ();
4918 }
4919 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
4920
4921 /* If SIZE is negative, subtract the positive value.
4922 This sometimes allows a constant pool entry to be shared
4923 between prologue and epilogue code. */
4924 if (size < 0)
4925 {
4926 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
4927 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
4928 }
4929 else
4930 {
4931 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
4932 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
4933 }
4934 if (! epilogue_p)
4935 REG_NOTES (insn)
4936 = (gen_rtx_EXPR_LIST
4937 (REG_FRAME_RELATED_EXPR,
4938 gen_rtx_SET (VOIDmode, reg,
4939 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
4940 REG_NOTES (insn)));
4941 }
4942 }
4943 }
4944
4945 static rtx
4946 frame_insn (rtx x)
4947 {
4948 x = emit_insn (x);
4949 RTX_FRAME_RELATED_P (x) = 1;
4950 return x;
4951 }
4952
4953 /* Output RTL to push register RN onto the stack. */
4954
4955 static rtx
4956 push (int rn)
4957 {
4958 rtx x;
4959 if (rn == FPUL_REG)
4960 x = gen_push_fpul ();
4961 else if (rn == FPSCR_REG)
4962 x = gen_push_fpscr ();
4963 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4964 && FP_OR_XD_REGISTER_P (rn))
4965 {
4966 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4967 return NULL_RTX;
4968 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
4969 }
4970 else if (TARGET_SH2E && FP_REGISTER_P (rn))
4971 x = gen_push_e (gen_rtx_REG (SFmode, rn));
4972 else
4973 x = gen_push (gen_rtx_REG (SImode, rn));
4974
4975 x = frame_insn (x);
4976 REG_NOTES (x)
4977 = gen_rtx_EXPR_LIST (REG_INC,
4978 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4979 return x;
4980 }
4981
4982 /* Output RTL to pop register RN from the stack. */
4983
4984 static void
4985 pop (int rn)
4986 {
4987 rtx x;
4988 if (rn == FPUL_REG)
4989 x = gen_pop_fpul ();
4990 else if (rn == FPSCR_REG)
4991 x = gen_pop_fpscr ();
4992 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4993 && FP_OR_XD_REGISTER_P (rn))
4994 {
4995 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4996 return;
4997 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
4998 }
4999 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5000 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
5001 else
5002 x = gen_pop (gen_rtx_REG (SImode, rn));
5003
5004 x = emit_insn (x);
5005 REG_NOTES (x)
5006 = gen_rtx_EXPR_LIST (REG_INC,
5007 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5008 }
5009
5010 /* Generate code to push the regs specified in the mask. */
5011
5012 static void
5013 push_regs (HARD_REG_SET *mask, int interrupt_handler)
5014 {
5015 int i;
5016 int skip_fpscr = 0;
5017
5018 /* Push PR last; this gives better latencies after the prologue, and
5019 candidates for the return delay slot when there are no general
5020 registers pushed. */
5021 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5022 {
5023 /* If this is an interrupt handler, and the SZ bit varies,
5024 and we have to push any floating point register, we need
5025 to switch to the correct precision first. */
5026 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
5027 && hard_regs_intersect_p (mask, &reg_class_contents[DF_REGS]))
5028 {
5029 HARD_REG_SET unsaved;
5030
5031 push (FPSCR_REG);
5032 COMPL_HARD_REG_SET (unsaved, *mask);
5033 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
5034 skip_fpscr = 1;
5035 }
5036 if (i != PR_REG
5037 && (i != FPSCR_REG || ! skip_fpscr)
5038 && TEST_HARD_REG_BIT (*mask, i))
5039 push (i);
5040 }
5041 if (TEST_HARD_REG_BIT (*mask, PR_REG))
5042 push (PR_REG);
5043 }
5044
5045 /* Calculate how much extra space is needed to save all callee-saved
5046 target registers.
5047 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5048
5049 static int
5050 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
5051 {
5052 int reg;
5053 int stack_space = 0;
5054 int interrupt_handler = sh_cfun_interrupt_handler_p ();
5055
5056 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5057 if ((! call_used_regs[reg] || interrupt_handler)
5058 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5059 /* Leave space to save this target register on the stack,
5060 in case target register allocation wants to use it. */
5061 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5062 return stack_space;
5063 }
5064
5065 /* Decide whether we should reserve space for callee-save target registers,
5066 in case target register allocation wants to use them. REGS_SAVED is
5067 the space, in bytes, that is already required for register saves.
5068 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5069
5070 static int
5071 shmedia_reserve_space_for_target_registers_p (int regs_saved,
5072 HARD_REG_SET *live_regs_mask)
5073 {
5074 if (optimize_size)
5075 return 0;
5076 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
5077 }
5078
5079 /* Decide how much space to reserve for callee-save target registers
5080 in case target register allocation wants to use them.
5081 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5082
5083 static int
5084 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5085 {
5086 if (shmedia_space_reserved_for_target_registers)
5087 return shmedia_target_regs_stack_space (live_regs_mask);
5088 else
5089 return 0;
5090 }
5091
5092 /* Work out the registers which need to be saved, both as a mask and a
5093 count of saved words. Return the count.
5094
5095 If doing a pragma interrupt function, then push all regs used by the
5096 function, and if we call another function (we can tell by looking at PR),
5097 make sure that all the regs it clobbers are safe too. */
5098
5099 static int
5100 calc_live_regs (HARD_REG_SET *live_regs_mask)
5101 {
5102 int reg;
5103 int count;
5104 int interrupt_handler;
5105 int pr_live, has_call;
5106
5107 interrupt_handler = sh_cfun_interrupt_handler_p ();
5108
5109 CLEAR_HARD_REG_SET (*live_regs_mask);
5110 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
5111 && regs_ever_live[FPSCR_REG])
5112 target_flags &= ~FPU_SINGLE_BIT;
5113 /* If we can save a lot of saves by switching to double mode, do that. */
5114 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
5115 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5116 if (regs_ever_live[reg] && regs_ever_live[reg+1]
5117 && (! call_used_regs[reg] || (interrupt_handler && ! pragma_trapa))
5118 && ++count > 2)
5119 {
5120 target_flags &= ~FPU_SINGLE_BIT;
5121 break;
5122 }
5123 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5124 knows how to use it. That means the pseudo originally allocated for
5125 the initial value can become the PR_MEDIA_REG hard register, as seen for
5126 execute/20010122-1.c:test9. */
5127 if (TARGET_SHMEDIA)
5128 /* ??? this function is called from initial_elimination_offset, hence we
5129 can't use the result of sh_media_register_for_return here. */
5130 pr_live = sh_pr_n_sets ();
5131 else
5132 {
5133 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5134 pr_live = (pr_initial
5135 ? (GET_CODE (pr_initial) != REG
5136 || REGNO (pr_initial) != (PR_REG))
5137 : regs_ever_live[PR_REG]);
5138 /* For Shcompact, if not optimizing, we end up with a memory reference
5139 using the return address pointer for __builtin_return_address even
5140 though there is no actual need to put the PR register on the stack. */
5141 pr_live |= regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM];
5142 }
5143 /* Force PR to be live if the prologue has to call the SHmedia
5144 argument decoder or register saver. */
5145 if (TARGET_SHCOMPACT
5146 && ((current_function_args_info.call_cookie
5147 & ~ CALL_COOKIE_RET_TRAMP (1))
5148 || current_function_has_nonlocal_label))
5149 pr_live = 1;
5150 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5151 for (count = 0, reg = FIRST_PSEUDO_REGISTER - 1; reg >= 0; reg--)
5152 {
5153 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5154 ? pr_live
5155 : (interrupt_handler && ! pragma_trapa)
5156 ? (/* Need to save all the regs ever live. */
5157 (regs_ever_live[reg]
5158 || (call_used_regs[reg]
5159 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG)
5160 && has_call)
5161 || (has_call && REGISTER_NATURAL_MODE (reg) == SImode
5162 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5163 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5164 && reg != RETURN_ADDRESS_POINTER_REGNUM
5165 && reg != T_REG && reg != GBR_REG
5166 /* Push fpscr only on targets which have FPU */
5167 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5168 : (/* Only push those regs which are used and need to be saved. */
5169 (TARGET_SHCOMPACT
5170 && flag_pic
5171 && current_function_args_info.call_cookie
5172 && reg == (int) PIC_OFFSET_TABLE_REGNUM)
5173 || (regs_ever_live[reg] && ! call_used_regs[reg])
5174 || (current_function_calls_eh_return
5175 && (reg == (int) EH_RETURN_DATA_REGNO (0)
5176 || reg == (int) EH_RETURN_DATA_REGNO (1)
5177 || reg == (int) EH_RETURN_DATA_REGNO (2)
5178 || reg == (int) EH_RETURN_DATA_REGNO (3)))
5179 || ((reg == MACL_REG || reg == MACH_REG)
5180 && regs_ever_live[reg]
5181 && sh_cfun_attr_renesas_p ())
5182 ))
5183 {
5184 SET_HARD_REG_BIT (*live_regs_mask, reg);
5185 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5186
5187 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
5188 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5189 {
5190 if (FP_REGISTER_P (reg))
5191 {
5192 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
5193 {
5194 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5195 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
5196 }
5197 }
5198 else if (XD_REGISTER_P (reg))
5199 {
5200 /* Must switch to double mode to access these registers. */
5201 target_flags &= ~FPU_SINGLE_BIT;
5202 }
5203 }
5204 }
5205 }
5206 /* If we have a target register optimization pass after prologue / epilogue
5207 threading, we need to assume all target registers will be live even if
5208 they aren't now. */
5209 if (flag_branch_target_load_optimize2
5210 && TARGET_SAVE_ALL_TARGET_REGS
5211 && shmedia_space_reserved_for_target_registers)
5212 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5213 if ((! call_used_regs[reg] || interrupt_handler)
5214 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5215 {
5216 SET_HARD_REG_BIT (*live_regs_mask, reg);
5217 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5218 }
5219 /* If this is an interrupt handler, we don't have any call-clobbered
5220 registers we can conveniently use for target register save/restore.
5221 Make sure we save at least one general purpose register when we need
5222 to save target registers. */
5223 if (interrupt_handler
5224 && hard_regs_intersect_p (live_regs_mask,
5225 &reg_class_contents[TARGET_REGS])
5226 && ! hard_regs_intersect_p (live_regs_mask,
5227 &reg_class_contents[GENERAL_REGS]))
5228 {
5229 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
5230 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
5231 }
5232
5233 return count;
5234 }
5235
5236 /* Code to generate prologue and epilogue sequences */
5237
5238 /* PUSHED is the number of bytes that are being pushed on the
5239 stack for register saves. Return the frame size, padded
5240 appropriately so that the stack stays properly aligned. */
5241 static HOST_WIDE_INT
5242 rounded_frame_size (int pushed)
5243 {
5244 HOST_WIDE_INT size = get_frame_size ();
5245 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5246
5247 return ((size + pushed + align - 1) & -align) - pushed;
5248 }
5249
5250 /* Choose a call-clobbered target-branch register that remains
5251 unchanged along the whole function. We set it up as the return
5252 value in the prologue. */
5253 int
5254 sh_media_register_for_return (void)
5255 {
5256 int regno;
5257 int tr0_used;
5258
5259 if (! current_function_is_leaf)
5260 return -1;
5261 if (lookup_attribute ("interrupt_handler",
5262 DECL_ATTRIBUTES (current_function_decl)))
5263 return -1;
5264
5265 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
5266
5267 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
5268 if (call_used_regs[regno] && ! regs_ever_live[regno])
5269 return regno;
5270
5271 return -1;
5272 }
5273
5274 /* The maximum registers we need to save are:
5275 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
5276 - 32 floating point registers (for each pair, we save none,
5277 one single precision value, or a double precision value).
5278 - 8 target registers
5279 - add 1 entry for a delimiter. */
5280 #define MAX_SAVED_REGS (62+32+8)
5281
5282 typedef struct save_entry_s
5283 {
5284 unsigned char reg;
5285 unsigned char mode;
5286 short offset;
5287 } save_entry;
5288
5289 #define MAX_TEMPS 4
5290
5291 /* There will be a delimiter entry with VOIDmode both at the start and the
5292 end of a filled in schedule. The end delimiter has the offset of the
5293 save with the smallest (i.e. most negative) offset. */
5294 typedef struct save_schedule_s
5295 {
5296 save_entry entries[MAX_SAVED_REGS + 2];
5297 int temps[MAX_TEMPS+1];
5298 } save_schedule;
5299
5300 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
5301 use reverse order. Returns the last entry written to (not counting
5302 the delimiter). OFFSET_BASE is a number to be added to all offset
5303 entries. */
5304
5305 static save_entry *
5306 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
5307 int offset_base)
5308 {
5309 int align, i;
5310 save_entry *entry = schedule->entries;
5311 int tmpx = 0;
5312 int offset;
5313
5314 if (! current_function_interrupt)
5315 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
5316 if (call_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
5317 && ! FUNCTION_ARG_REGNO_P (i)
5318 && i != FIRST_RET_REG
5319 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
5320 && ! (current_function_calls_eh_return
5321 && (i == EH_RETURN_STACKADJ_REGNO
5322 || ((unsigned) i <= EH_RETURN_DATA_REGNO (0)
5323 && (unsigned) i >= EH_RETURN_DATA_REGNO (3)))))
5324 schedule->temps[tmpx++] = i;
5325 entry->reg = -1;
5326 entry->mode = VOIDmode;
5327 entry->offset = offset_base;
5328 entry++;
5329 /* We loop twice: first, we save 8-byte aligned registers in the
5330 higher addresses, that are known to be aligned. Then, we
5331 proceed to saving 32-bit registers that don't need 8-byte
5332 alignment.
5333 If this is an interrupt function, all registers that need saving
5334 need to be saved in full. moreover, we need to postpone saving
5335 target registers till we have saved some general purpose registers
5336 we can then use as scratch registers. */
5337 offset = offset_base;
5338 for (align = 1; align >= 0; align--)
5339 {
5340 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
5341 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5342 {
5343 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5344 int reg = i;
5345
5346 if (current_function_interrupt)
5347 {
5348 if (TARGET_REGISTER_P (i))
5349 continue;
5350 if (GENERAL_REGISTER_P (i))
5351 mode = DImode;
5352 }
5353 if (mode == SFmode && (i % 2) == 1
5354 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5355 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
5356 {
5357 mode = DFmode;
5358 i--;
5359 reg--;
5360 }
5361
5362 /* If we're doing the aligned pass and this is not aligned,
5363 or we're doing the unaligned pass and this is aligned,
5364 skip it. */
5365 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
5366 != align)
5367 continue;
5368
5369 if (current_function_interrupt
5370 && GENERAL_REGISTER_P (i)
5371 && tmpx < MAX_TEMPS)
5372 schedule->temps[tmpx++] = i;
5373
5374 offset -= GET_MODE_SIZE (mode);
5375 entry->reg = i;
5376 entry->mode = mode;
5377 entry->offset = offset;
5378 entry++;
5379 }
5380 if (align && current_function_interrupt)
5381 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
5382 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5383 {
5384 offset -= GET_MODE_SIZE (DImode);
5385 entry->reg = i;
5386 entry->mode = DImode;
5387 entry->offset = offset;
5388 entry++;
5389 }
5390 }
5391 entry->reg = -1;
5392 entry->mode = VOIDmode;
5393 entry->offset = offset;
5394 schedule->temps[tmpx] = -1;
5395 return entry - 1;
5396 }
5397
5398 void
5399 sh_expand_prologue (void)
5400 {
5401 HARD_REG_SET live_regs_mask;
5402 int d, i;
5403 int d_rounding = 0;
5404 int save_flags = target_flags;
5405 int pretend_args;
5406
5407 current_function_interrupt = sh_cfun_interrupt_handler_p ();
5408
5409 /* We have pretend args if we had an object sent partially in registers
5410 and partially on the stack, e.g. a large structure. */
5411 pretend_args = current_function_pretend_args_size;
5412 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
5413 && (NPARM_REGS(SImode)
5414 > current_function_args_info.arg_count[(int) SH_ARG_INT]))
5415 pretend_args = 0;
5416 output_stack_adjust (-pretend_args
5417 - current_function_args_info.stack_regs * 8,
5418 stack_pointer_rtx, 0, NULL);
5419
5420 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
5421 /* We're going to use the PIC register to load the address of the
5422 incoming-argument decoder and/or of the return trampoline from
5423 the GOT, so make sure the PIC register is preserved and
5424 initialized. */
5425 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5426
5427 if (TARGET_SHCOMPACT
5428 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5429 {
5430 int reg;
5431
5432 /* First, make all registers with incoming arguments that will
5433 be pushed onto the stack live, so that register renaming
5434 doesn't overwrite them. */
5435 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
5436 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
5437 >= NPARM_REGS (SImode) - reg)
5438 for (; reg < NPARM_REGS (SImode); reg++)
5439 emit_insn (gen_shcompact_preserve_incoming_args
5440 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5441 else if (CALL_COOKIE_INT_REG_GET
5442 (current_function_args_info.call_cookie, reg) == 1)
5443 emit_insn (gen_shcompact_preserve_incoming_args
5444 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5445
5446 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
5447 stack_pointer_rtx);
5448 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
5449 GEN_INT (current_function_args_info.call_cookie));
5450 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
5451 gen_rtx_REG (SImode, R0_REG));
5452 }
5453 else if (TARGET_SHMEDIA)
5454 {
5455 int tr = sh_media_register_for_return ();
5456
5457 if (tr >= 0)
5458 {
5459 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
5460 gen_rtx_REG (DImode, PR_MEDIA_REG));
5461
5462 /* ??? We should suppress saving pr when we don't need it, but this
5463 is tricky because of builtin_return_address. */
5464
5465 /* If this function only exits with sibcalls, this copy
5466 will be flagged as dead. */
5467 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5468 const0_rtx,
5469 REG_NOTES (insn));
5470 }
5471 }
5472
5473 /* Emit the code for SETUP_VARARGS. */
5474 if (current_function_stdarg)
5475 {
5476 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
5477 {
5478 /* Push arg regs as if they'd been provided by caller in stack. */
5479 for (i = 0; i < NPARM_REGS(SImode); i++)
5480 {
5481 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
5482 rtx insn;
5483
5484 if (i >= (NPARM_REGS(SImode)
5485 - current_function_args_info.arg_count[(int) SH_ARG_INT]
5486 ))
5487 break;
5488 insn = push (rn);
5489 RTX_FRAME_RELATED_P (insn) = 0;
5490 }
5491 }
5492 }
5493
5494 /* If we're supposed to switch stacks at function entry, do so now. */
5495 if (sp_switch)
5496 emit_insn (gen_sp_switch_1 ());
5497
5498 d = calc_live_regs (&live_regs_mask);
5499 /* ??? Maybe we could save some switching if we can move a mode switch
5500 that already happens to be at the function start into the prologue. */
5501 if (target_flags != save_flags && ! current_function_interrupt)
5502 emit_insn (gen_toggle_sz ());
5503
5504 if (TARGET_SH5)
5505 {
5506 int offset_base, offset;
5507 rtx r0 = NULL_RTX;
5508 int offset_in_r0 = -1;
5509 int sp_in_r0 = 0;
5510 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5511 int total_size, save_size;
5512 save_schedule schedule;
5513 save_entry *entry;
5514 int *tmp_pnt;
5515
5516 if (call_used_regs[R0_REG] && ! fixed_regs[R0_REG]
5517 && ! current_function_interrupt)
5518 r0 = gen_rtx_REG (Pmode, R0_REG);
5519
5520 /* D is the actual number of bytes that we need for saving registers,
5521 however, in initial_elimination_offset we have committed to using
5522 an additional TREGS_SPACE amount of bytes - in order to keep both
5523 addresses to arguments supplied by the caller and local variables
5524 valid, we must keep this gap. Place it between the incoming
5525 arguments and the actually saved registers in a bid to optimize
5526 locality of reference. */
5527 total_size = d + tregs_space;
5528 total_size += rounded_frame_size (total_size);
5529 save_size = total_size - rounded_frame_size (d);
5530 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
5531 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5532 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
5533
5534 /* If adjusting the stack in a single step costs nothing extra, do so.
5535 I.e. either if a single addi is enough, or we need a movi anyway,
5536 and we don't exceed the maximum offset range (the test for the
5537 latter is conservative for simplicity). */
5538 if (TARGET_SHMEDIA
5539 && (CONST_OK_FOR_I10 (-total_size)
5540 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
5541 && total_size <= 2044)))
5542 d_rounding = total_size - save_size;
5543
5544 offset_base = d + d_rounding;
5545
5546 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
5547 0, NULL);
5548
5549 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
5550 tmp_pnt = schedule.temps;
5551 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
5552 {
5553 enum machine_mode mode = entry->mode;
5554 int reg = entry->reg;
5555 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
5556
5557 offset = entry->offset;
5558
5559 reg_rtx = gen_rtx_REG (mode, reg);
5560
5561 mem_rtx = gen_rtx_MEM (mode,
5562 gen_rtx_PLUS (Pmode,
5563 stack_pointer_rtx,
5564 GEN_INT (offset)));
5565
5566 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
5567
5568 if (! r0)
5569 abort ();
5570 mem_rtx = NULL_RTX;
5571
5572 try_pre_dec:
5573 do
5574 if (HAVE_PRE_DECREMENT
5575 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
5576 || mem_rtx == NULL_RTX
5577 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5578 {
5579 pre_dec = gen_rtx_MEM (mode,
5580 gen_rtx_PRE_DEC (Pmode, r0));
5581
5582 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
5583 pre_dec_ok);
5584
5585 pre_dec = NULL_RTX;
5586
5587 break;
5588
5589 pre_dec_ok:
5590 mem_rtx = NULL_RTX;
5591 offset += GET_MODE_SIZE (mode);
5592 }
5593 while (0);
5594
5595 if (mem_rtx != NULL_RTX)
5596 goto addr_ok;
5597
5598 if (offset_in_r0 == -1)
5599 {
5600 emit_move_insn (r0, GEN_INT (offset));
5601 offset_in_r0 = offset;
5602 }
5603 else if (offset != offset_in_r0)
5604 {
5605 emit_move_insn (r0,
5606 gen_rtx_PLUS
5607 (Pmode, r0,
5608 GEN_INT (offset - offset_in_r0)));
5609 offset_in_r0 += offset - offset_in_r0;
5610 }
5611
5612 if (pre_dec != NULL_RTX)
5613 {
5614 if (! sp_in_r0)
5615 {
5616 emit_move_insn (r0,
5617 gen_rtx_PLUS
5618 (Pmode, r0, stack_pointer_rtx));
5619 sp_in_r0 = 1;
5620 }
5621
5622 offset -= GET_MODE_SIZE (mode);
5623 offset_in_r0 -= GET_MODE_SIZE (mode);
5624
5625 mem_rtx = pre_dec;
5626 }
5627 else if (sp_in_r0)
5628 mem_rtx = gen_rtx_MEM (mode, r0);
5629 else
5630 mem_rtx = gen_rtx_MEM (mode,
5631 gen_rtx_PLUS (Pmode,
5632 stack_pointer_rtx,
5633 r0));
5634
5635 /* We must not use an r0-based address for target-branch
5636 registers or for special registers without pre-dec
5637 memory addresses, since we store their values in r0
5638 first. */
5639 if (TARGET_REGISTER_P (reg)
5640 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5641 && mem_rtx != pre_dec))
5642 abort ();
5643
5644 addr_ok:
5645 if (TARGET_REGISTER_P (reg)
5646 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5647 && mem_rtx != pre_dec))
5648 {
5649 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
5650
5651 emit_move_insn (tmp_reg, reg_rtx);
5652
5653 if (REGNO (tmp_reg) == R0_REG)
5654 {
5655 offset_in_r0 = -1;
5656 sp_in_r0 = 0;
5657 if (refers_to_regno_p (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0))
5658 abort ();
5659 }
5660
5661 if (*++tmp_pnt <= 0)
5662 tmp_pnt = schedule.temps;
5663
5664 reg_rtx = tmp_reg;
5665 }
5666 {
5667 rtx insn;
5668
5669 /* Mark as interesting for dwarf cfi generator */
5670 insn = emit_move_insn (mem_rtx, reg_rtx);
5671 RTX_FRAME_RELATED_P (insn) = 1;
5672
5673 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
5674 {
5675 rtx reg_rtx = gen_rtx_REG (mode, reg);
5676 rtx set, note_rtx;
5677 rtx mem_rtx = gen_rtx_MEM (mode,
5678 gen_rtx_PLUS (Pmode,
5679 stack_pointer_rtx,
5680 GEN_INT (offset)));
5681
5682 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
5683 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
5684 REG_NOTES (insn));
5685 REG_NOTES (insn) = note_rtx;
5686 }
5687 }
5688 }
5689
5690 if (entry->offset != d_rounding)
5691 abort ();
5692 }
5693 else
5694 push_regs (&live_regs_mask, current_function_interrupt);
5695
5696 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
5697 {
5698 rtx insn = get_last_insn ();
5699 rtx last = emit_insn (gen_GOTaddr2picreg ());
5700
5701 /* Mark these insns as possibly dead. Sometimes, flow2 may
5702 delete all uses of the PIC register. In this case, let it
5703 delete the initialization too. */
5704 do
5705 {
5706 insn = NEXT_INSN (insn);
5707
5708 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5709 const0_rtx,
5710 REG_NOTES (insn));
5711 }
5712 while (insn != last);
5713 }
5714
5715 if (SHMEDIA_REGS_STACK_ADJUST ())
5716 {
5717 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5718 function_symbol (TARGET_FPU_ANY
5719 ? "__GCC_push_shmedia_regs"
5720 : "__GCC_push_shmedia_regs_nofpu"));
5721 /* This must NOT go through the PLT, otherwise mach and macl
5722 may be clobbered. */
5723 emit_insn (gen_shmedia_save_restore_regs_compact
5724 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
5725 }
5726
5727 if (target_flags != save_flags && ! current_function_interrupt)
5728 {
5729 rtx insn = emit_insn (gen_toggle_sz ());
5730
5731 /* If we're lucky, a mode switch in the function body will
5732 overwrite fpscr, turning this insn dead. Tell flow this
5733 insn is ok to delete. */
5734 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5735 const0_rtx,
5736 REG_NOTES (insn));
5737 }
5738
5739 target_flags = save_flags;
5740
5741 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
5742 stack_pointer_rtx, 0, NULL);
5743
5744 if (frame_pointer_needed)
5745 frame_insn (GEN_MOV (frame_pointer_rtx, stack_pointer_rtx));
5746
5747 if (TARGET_SHCOMPACT
5748 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5749 {
5750 /* This must NOT go through the PLT, otherwise mach and macl
5751 may be clobbered. */
5752 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5753 function_symbol ("__GCC_shcompact_incoming_args"));
5754 emit_insn (gen_shcompact_incoming_args ());
5755 }
5756 }
5757
5758 void
5759 sh_expand_epilogue (bool sibcall_p)
5760 {
5761 HARD_REG_SET live_regs_mask;
5762 int d, i;
5763 int d_rounding = 0;
5764
5765 int save_flags = target_flags;
5766 int frame_size, save_size;
5767 int fpscr_deferred = 0;
5768 int e = sibcall_p ? -1 : 1;
5769
5770 d = calc_live_regs (&live_regs_mask);
5771
5772 save_size = d;
5773 frame_size = rounded_frame_size (d);
5774
5775 if (TARGET_SH5)
5776 {
5777 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5778 int total_size;
5779 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
5780 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5781 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
5782
5783 total_size = d + tregs_space;
5784 total_size += rounded_frame_size (total_size);
5785 save_size = total_size - frame_size;
5786
5787 /* If adjusting the stack in a single step costs nothing extra, do so.
5788 I.e. either if a single addi is enough, or we need a movi anyway,
5789 and we don't exceed the maximum offset range (the test for the
5790 latter is conservative for simplicity). */
5791 if (TARGET_SHMEDIA
5792 && ! frame_pointer_needed
5793 && (CONST_OK_FOR_I10 (total_size)
5794 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
5795 && total_size <= 2044)))
5796 d_rounding = frame_size;
5797
5798 frame_size -= d_rounding;
5799 }
5800
5801 if (frame_pointer_needed)
5802 {
5803 output_stack_adjust (frame_size, frame_pointer_rtx, e, &live_regs_mask);
5804
5805 /* We must avoid moving the stack pointer adjustment past code
5806 which reads from the local frame, else an interrupt could
5807 occur after the SP adjustment and clobber data in the local
5808 frame. */
5809 emit_insn (gen_blockage ());
5810 emit_insn (GEN_MOV (stack_pointer_rtx, frame_pointer_rtx));
5811 }
5812 else if (frame_size)
5813 {
5814 /* We must avoid moving the stack pointer adjustment past code
5815 which reads from the local frame, else an interrupt could
5816 occur after the SP adjustment and clobber data in the local
5817 frame. */
5818 emit_insn (gen_blockage ());
5819 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
5820 }
5821
5822 if (SHMEDIA_REGS_STACK_ADJUST ())
5823 {
5824 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5825 function_symbol (TARGET_FPU_ANY
5826 ? "__GCC_pop_shmedia_regs"
5827 : "__GCC_pop_shmedia_regs_nofpu"));
5828 /* This must NOT go through the PLT, otherwise mach and macl
5829 may be clobbered. */
5830 emit_insn (gen_shmedia_save_restore_regs_compact
5831 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
5832 }
5833
5834 /* Pop all the registers. */
5835
5836 if (target_flags != save_flags && ! current_function_interrupt)
5837 emit_insn (gen_toggle_sz ());
5838 if (TARGET_SH5)
5839 {
5840 int offset_base, offset;
5841 int offset_in_r0 = -1;
5842 int sp_in_r0 = 0;
5843 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
5844 save_schedule schedule;
5845 save_entry *entry;
5846 int *tmp_pnt;
5847
5848 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
5849 offset_base = -entry[1].offset + d_rounding;
5850 tmp_pnt = schedule.temps;
5851 for (; entry->mode != VOIDmode; entry--)
5852 {
5853 enum machine_mode mode = entry->mode;
5854 int reg = entry->reg;
5855 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
5856
5857 offset = offset_base + entry->offset;
5858 reg_rtx = gen_rtx_REG (mode, reg);
5859
5860 mem_rtx = gen_rtx_MEM (mode,
5861 gen_rtx_PLUS (Pmode,
5862 stack_pointer_rtx,
5863 GEN_INT (offset)));
5864
5865 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
5866
5867 mem_rtx = NULL_RTX;
5868
5869 try_post_inc:
5870 do
5871 if (HAVE_POST_INCREMENT
5872 && (offset == offset_in_r0
5873 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
5874 && mem_rtx == NULL_RTX)
5875 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5876 {
5877 post_inc = gen_rtx_MEM (mode,
5878 gen_rtx_POST_INC (Pmode, r0));
5879
5880 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
5881 post_inc_ok);
5882
5883 post_inc = NULL_RTX;
5884
5885 break;
5886
5887 post_inc_ok:
5888 mem_rtx = NULL_RTX;
5889 }
5890 while (0);
5891
5892 if (mem_rtx != NULL_RTX)
5893 goto addr_ok;
5894
5895 if (offset_in_r0 == -1)
5896 {
5897 emit_move_insn (r0, GEN_INT (offset));
5898 offset_in_r0 = offset;
5899 }
5900 else if (offset != offset_in_r0)
5901 {
5902 emit_move_insn (r0,
5903 gen_rtx_PLUS
5904 (Pmode, r0,
5905 GEN_INT (offset - offset_in_r0)));
5906 offset_in_r0 += offset - offset_in_r0;
5907 }
5908
5909 if (post_inc != NULL_RTX)
5910 {
5911 if (! sp_in_r0)
5912 {
5913 emit_move_insn (r0,
5914 gen_rtx_PLUS
5915 (Pmode, r0, stack_pointer_rtx));
5916 sp_in_r0 = 1;
5917 }
5918
5919 mem_rtx = post_inc;
5920
5921 offset_in_r0 += GET_MODE_SIZE (mode);
5922 }
5923 else if (sp_in_r0)
5924 mem_rtx = gen_rtx_MEM (mode, r0);
5925 else
5926 mem_rtx = gen_rtx_MEM (mode,
5927 gen_rtx_PLUS (Pmode,
5928 stack_pointer_rtx,
5929 r0));
5930
5931 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5932 && mem_rtx != post_inc)
5933 abort ();
5934
5935 addr_ok:
5936 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5937 && mem_rtx != post_inc)
5938 {
5939 insn = emit_move_insn (r0, mem_rtx);
5940 mem_rtx = r0;
5941 }
5942 else if (TARGET_REGISTER_P (reg))
5943 {
5944 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
5945
5946 /* Give the scheduler a bit of freedom by using up to
5947 MAX_TEMPS registers in a round-robin fashion. */
5948 insn = emit_move_insn (tmp_reg, mem_rtx);
5949 mem_rtx = tmp_reg;
5950 if (*++tmp_pnt < 0)
5951 tmp_pnt = schedule.temps;
5952 }
5953
5954 insn = emit_move_insn (reg_rtx, mem_rtx);
5955 if (reg == PR_MEDIA_REG && sh_media_register_for_return () >= 0)
5956 /* This is dead, unless we return with a sibcall. */
5957 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5958 const0_rtx,
5959 REG_NOTES (insn));
5960 }
5961
5962 if (entry->offset + offset_base != d + d_rounding)
5963 abort ();
5964 }
5965 else /* ! TARGET_SH5 */
5966 {
5967 save_size = 0;
5968 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
5969 pop (PR_REG);
5970 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5971 {
5972 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
5973
5974 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
5975 && hard_regs_intersect_p (&live_regs_mask,
5976 &reg_class_contents[DF_REGS]))
5977 fpscr_deferred = 1;
5978 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
5979 pop (j);
5980 if (j == FIRST_FP_REG && fpscr_deferred)
5981 pop (FPSCR_REG);
5982
5983 }
5984 }
5985 if (target_flags != save_flags && ! current_function_interrupt)
5986 emit_insn (gen_toggle_sz ());
5987 target_flags = save_flags;
5988
5989 output_stack_adjust (current_function_pretend_args_size
5990 + save_size + d_rounding
5991 + current_function_args_info.stack_regs * 8,
5992 stack_pointer_rtx, e, NULL);
5993
5994 if (current_function_calls_eh_return)
5995 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
5996 EH_RETURN_STACKADJ_RTX));
5997
5998 /* Switch back to the normal stack if necessary. */
5999 if (sp_switch)
6000 emit_insn (gen_sp_switch_2 ());
6001
6002 /* Tell flow the insn that pops PR isn't dead. */
6003 /* PR_REG will never be live in SHmedia mode, and we don't need to
6004 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6005 by the return pattern. */
6006 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6007 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
6008 }
6009
6010 static int sh_need_epilogue_known = 0;
6011
6012 int
6013 sh_need_epilogue (void)
6014 {
6015 if (! sh_need_epilogue_known)
6016 {
6017 rtx epilogue;
6018
6019 start_sequence ();
6020 sh_expand_epilogue (0);
6021 epilogue = get_insns ();
6022 end_sequence ();
6023 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
6024 }
6025 return sh_need_epilogue_known > 0;
6026 }
6027
6028 /* Emit code to change the current function's return address to RA.
6029 TEMP is available as a scratch register, if needed. */
6030
6031 void
6032 sh_set_return_address (rtx ra, rtx tmp)
6033 {
6034 HARD_REG_SET live_regs_mask;
6035 int d;
6036 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6037 int pr_offset;
6038
6039 d = calc_live_regs (&live_regs_mask);
6040
6041 /* If pr_reg isn't life, we can set it (or the register given in
6042 sh_media_register_for_return) directly. */
6043 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6044 {
6045 rtx rr;
6046
6047 if (TARGET_SHMEDIA)
6048 {
6049 int rr_regno = sh_media_register_for_return ();
6050
6051 if (rr_regno < 0)
6052 rr_regno = pr_reg;
6053
6054 rr = gen_rtx_REG (DImode, rr_regno);
6055 }
6056 else
6057 rr = gen_rtx_REG (SImode, pr_reg);
6058
6059 emit_insn (GEN_MOV (rr, ra));
6060 /* Tell flow the register for return isn't dead. */
6061 emit_insn (gen_rtx_USE (VOIDmode, rr));
6062 return;
6063 }
6064
6065 if (TARGET_SH5)
6066 {
6067 int offset;
6068 save_schedule schedule;
6069 save_entry *entry;
6070
6071 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
6072 offset = entry[1].offset;
6073 for (; entry->mode != VOIDmode; entry--)
6074 if (entry->reg == pr_reg)
6075 goto found;
6076
6077 /* We can't find pr register. */
6078 abort ();
6079
6080 found:
6081 offset = entry->offset - offset;
6082 pr_offset = (rounded_frame_size (d) + offset
6083 + SHMEDIA_REGS_STACK_ADJUST ());
6084 }
6085 else
6086 pr_offset = rounded_frame_size (d);
6087
6088 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6089 emit_insn (GEN_ADD3 (tmp, tmp, frame_pointer_rtx));
6090
6091 tmp = gen_rtx_MEM (Pmode, tmp);
6092 emit_insn (GEN_MOV (tmp, ra));
6093 }
6094
6095 /* Clear variables at function end. */
6096
6097 static void
6098 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6099 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6100 {
6101 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
6102 sh_need_epilogue_known = 0;
6103 sp_switch = NULL_RTX;
6104 }
6105
6106 static rtx
6107 sh_builtin_saveregs (void)
6108 {
6109 /* First unnamed integer register. */
6110 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
6111 /* Number of integer registers we need to save. */
6112 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6113 /* First unnamed SFmode float reg */
6114 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6115 /* Number of SFmode float regs to save. */
6116 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6117 rtx regbuf, fpregs;
6118 int bufsize, regno;
6119 HOST_WIDE_INT alias_set;
6120
6121 if (TARGET_SH5)
6122 {
6123 if (n_intregs)
6124 {
6125 int pushregs = n_intregs;
6126
6127 while (pushregs < NPARM_REGS (SImode) - 1
6128 && (CALL_COOKIE_INT_REG_GET
6129 (current_function_args_info.call_cookie,
6130 NPARM_REGS (SImode) - pushregs)
6131 == 1))
6132 {
6133 current_function_args_info.call_cookie
6134 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6135 - pushregs, 1);
6136 pushregs++;
6137 }
6138
6139 if (pushregs == NPARM_REGS (SImode))
6140 current_function_args_info.call_cookie
6141 |= (CALL_COOKIE_INT_REG (0, 1)
6142 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6143 else
6144 current_function_args_info.call_cookie
6145 |= CALL_COOKIE_STACKSEQ (pushregs);
6146
6147 current_function_pretend_args_size += 8 * n_intregs;
6148 }
6149 if (TARGET_SHCOMPACT)
6150 return const0_rtx;
6151 }
6152
6153 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
6154 {
6155 error ("__builtin_saveregs not supported by this subtarget");
6156 return const0_rtx;
6157 }
6158
6159 if (TARGET_SHMEDIA)
6160 n_floatregs = 0;
6161
6162 /* Allocate block of memory for the regs. */
6163 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6164 Or can assign_stack_local accept a 0 SIZE argument? */
6165 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
6166
6167 if (TARGET_SHMEDIA)
6168 regbuf = gen_rtx_MEM (BLKmode,
6169 gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
6170 else if (n_floatregs & 1)
6171 {
6172 rtx addr;
6173
6174 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6175 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
6176 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6177 regbuf = change_address (regbuf, BLKmode, addr);
6178 }
6179 else
6180 regbuf = assign_stack_local (BLKmode, bufsize, 0);
6181 alias_set = get_varargs_alias_set ();
6182 set_mem_alias_set (regbuf, alias_set);
6183
6184 /* Save int args.
6185 This is optimized to only save the regs that are necessary. Explicitly
6186 named args need not be saved. */
6187 if (n_intregs > 0)
6188 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
6189 adjust_address (regbuf, BLKmode,
6190 n_floatregs * UNITS_PER_WORD),
6191 n_intregs);
6192
6193 if (TARGET_SHMEDIA)
6194 /* Return the address of the regbuf. */
6195 return XEXP (regbuf, 0);
6196
6197 /* Save float args.
6198 This is optimized to only save the regs that are necessary. Explicitly
6199 named args need not be saved.
6200 We explicitly build a pointer to the buffer because it halves the insn
6201 count when not optimizing (otherwise the pointer is built for each reg
6202 saved).
6203 We emit the moves in reverse order so that we can use predecrement. */
6204
6205 fpregs = gen_reg_rtx (Pmode);
6206 emit_move_insn (fpregs, XEXP (regbuf, 0));
6207 emit_insn (gen_addsi3 (fpregs, fpregs,
6208 GEN_INT (n_floatregs * UNITS_PER_WORD)));
6209 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
6210 {
6211 rtx mem;
6212 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
6213 {
6214 emit_insn (gen_addsi3 (fpregs, fpregs,
6215 GEN_INT (-2 * UNITS_PER_WORD)));
6216 mem = gen_rtx_MEM (DFmode, fpregs);
6217 set_mem_alias_set (mem, alias_set);
6218 emit_move_insn (mem,
6219 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
6220 }
6221 regno = first_floatreg;
6222 if (regno & 1)
6223 {
6224 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6225 mem = gen_rtx_MEM (SFmode, fpregs);
6226 set_mem_alias_set (mem, alias_set);
6227 emit_move_insn (mem,
6228 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
6229 - (TARGET_LITTLE_ENDIAN != 0)));
6230 }
6231 }
6232 else
6233 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
6234 {
6235 rtx mem;
6236
6237 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6238 mem = gen_rtx_MEM (SFmode, fpregs);
6239 set_mem_alias_set (mem, alias_set);
6240 emit_move_insn (mem,
6241 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
6242 }
6243
6244 /* Return the address of the regbuf. */
6245 return XEXP (regbuf, 0);
6246 }
6247
6248 /* Define the `__builtin_va_list' type for the ABI. */
6249
6250 static tree
6251 sh_build_builtin_va_list (void)
6252 {
6253 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6254 tree record;
6255
6256 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
6257 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6258 return ptr_type_node;
6259
6260 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6261
6262 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
6263 ptr_type_node);
6264 f_next_o_limit = build_decl (FIELD_DECL,
6265 get_identifier ("__va_next_o_limit"),
6266 ptr_type_node);
6267 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
6268 ptr_type_node);
6269 f_next_fp_limit = build_decl (FIELD_DECL,
6270 get_identifier ("__va_next_fp_limit"),
6271 ptr_type_node);
6272 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
6273 ptr_type_node);
6274
6275 DECL_FIELD_CONTEXT (f_next_o) = record;
6276 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
6277 DECL_FIELD_CONTEXT (f_next_fp) = record;
6278 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
6279 DECL_FIELD_CONTEXT (f_next_stack) = record;
6280
6281 TYPE_FIELDS (record) = f_next_o;
6282 TREE_CHAIN (f_next_o) = f_next_o_limit;
6283 TREE_CHAIN (f_next_o_limit) = f_next_fp;
6284 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
6285 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
6286
6287 layout_type (record);
6288
6289 return record;
6290 }
6291
6292 /* Implement `va_start' for varargs and stdarg. */
6293
6294 void
6295 sh_va_start (tree valist, rtx nextarg)
6296 {
6297 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6298 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6299 tree t, u;
6300 int nfp, nint;
6301
6302 if (TARGET_SH5)
6303 {
6304 expand_builtin_saveregs ();
6305 std_expand_builtin_va_start (valist, nextarg);
6306 return;
6307 }
6308
6309 if ((! TARGET_SH2E && ! TARGET_SH4)
6310 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6311 {
6312 std_expand_builtin_va_start (valist, nextarg);
6313 return;
6314 }
6315
6316 f_next_o = TYPE_FIELDS (va_list_type_node);
6317 f_next_o_limit = TREE_CHAIN (f_next_o);
6318 f_next_fp = TREE_CHAIN (f_next_o_limit);
6319 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6320 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6321
6322 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6323 NULL_TREE);
6324 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6325 valist, f_next_o_limit, NULL_TREE);
6326 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
6327 NULL_TREE);
6328 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6329 valist, f_next_fp_limit, NULL_TREE);
6330 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6331 valist, f_next_stack, NULL_TREE);
6332
6333 /* Call __builtin_saveregs. */
6334 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
6335 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
6336 TREE_SIDE_EFFECTS (t) = 1;
6337 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6338
6339 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
6340 if (nfp < 8)
6341 nfp = 8 - nfp;
6342 else
6343 nfp = 0;
6344 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6345 build_int_2 (UNITS_PER_WORD * nfp, 0)));
6346 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
6347 TREE_SIDE_EFFECTS (t) = 1;
6348 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6349
6350 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
6351 TREE_SIDE_EFFECTS (t) = 1;
6352 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6353
6354 nint = current_function_args_info.arg_count[SH_ARG_INT];
6355 if (nint < 4)
6356 nint = 4 - nint;
6357 else
6358 nint = 0;
6359 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6360 build_int_2 (UNITS_PER_WORD * nint, 0)));
6361 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
6362 TREE_SIDE_EFFECTS (t) = 1;
6363 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6364
6365 u = make_tree (ptr_type_node, nextarg);
6366 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
6367 TREE_SIDE_EFFECTS (t) = 1;
6368 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6369 }
6370
6371 /* Implement `va_arg'. */
6372
6373 static tree
6374 sh_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p,
6375 tree *post_p ATTRIBUTE_UNUSED)
6376 {
6377 HOST_WIDE_INT size, rsize;
6378 tree tmp, pptr_type_node;
6379 tree addr, lab_over, result = NULL;
6380 int pass_by_ref = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6381
6382 if (pass_by_ref)
6383 type = build_pointer_type (type);
6384
6385 size = int_size_in_bytes (type);
6386 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6387 pptr_type_node = build_pointer_type (ptr_type_node);
6388
6389 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
6390 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
6391 {
6392 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6393 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6394 int pass_as_float;
6395 tree lab_false;
6396
6397 f_next_o = TYPE_FIELDS (va_list_type_node);
6398 f_next_o_limit = TREE_CHAIN (f_next_o);
6399 f_next_fp = TREE_CHAIN (f_next_o_limit);
6400 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6401 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6402
6403 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6404 NULL_TREE);
6405 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6406 valist, f_next_o_limit, NULL_TREE);
6407 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
6408 valist, f_next_fp, NULL_TREE);
6409 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6410 valist, f_next_fp_limit, NULL_TREE);
6411 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6412 valist, f_next_stack, NULL_TREE);
6413
6414 /* Structures with a single member with a distinct mode are passed
6415 like their member. This is relevant if the latter has a REAL_TYPE
6416 or COMPLEX_TYPE type. */
6417 if (TREE_CODE (type) == RECORD_TYPE
6418 && TYPE_FIELDS (type)
6419 && TREE_CODE (TYPE_FIELDS (type)) == FIELD_DECL
6420 && (TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == REAL_TYPE
6421 || TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == COMPLEX_TYPE)
6422 && TREE_CHAIN (TYPE_FIELDS (type)) == NULL_TREE)
6423 type = TREE_TYPE (TYPE_FIELDS (type));
6424
6425 if (TARGET_SH4)
6426 {
6427 pass_as_float = ((TREE_CODE (type) == REAL_TYPE && size <= 8)
6428 || (TREE_CODE (type) == COMPLEX_TYPE
6429 && TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
6430 && size <= 16));
6431 }
6432 else
6433 {
6434 pass_as_float = (TREE_CODE (type) == REAL_TYPE && size == 4);
6435 }
6436
6437 addr = create_tmp_var (pptr_type_node, NULL);
6438 lab_false = create_artificial_label ();
6439 lab_over = create_artificial_label ();
6440
6441 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
6442
6443 if (pass_as_float)
6444 {
6445 int first_floatreg
6446 = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6447 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6448
6449 tmp = build (GE_EXPR, boolean_type_node, next_fp, next_fp_limit);
6450 tmp = build (COND_EXPR, void_type_node, tmp,
6451 build (GOTO_EXPR, void_type_node, lab_false),
6452 NULL);
6453 gimplify_and_add (tmp, pre_p);
6454
6455 if (TYPE_ALIGN (type) > BITS_PER_WORD
6456 || (((TREE_CODE (type) == REAL_TYPE && size == 8) || size == 16)
6457 && (n_floatregs & 1)))
6458 {
6459 tmp = fold_convert (ptr_type_node, size_int (UNITS_PER_WORD));
6460 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp, tmp);
6461 tmp = build (PLUS_EXPR, ptr_type_node, next_fp, tmp);
6462 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp, tmp);
6463 gimplify_and_add (tmp, pre_p);
6464 }
6465
6466 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
6467 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6468 gimplify_and_add (tmp, pre_p);
6469
6470 #ifdef FUNCTION_ARG_SCmode_WART
6471 if (TYPE_MODE (type) == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
6472 {
6473 tree subtype = TREE_TYPE (type);
6474 tree real, imag;
6475
6476 imag = std_gimplify_va_arg_expr (valist, subtype, pre_p, NULL);
6477 imag = get_initialized_tmp_var (imag, pre_p, NULL);
6478
6479 real = std_gimplify_va_arg_expr (valist, subtype, pre_p, NULL);
6480 real = get_initialized_tmp_var (real, pre_p, NULL);
6481
6482 result = build (COMPLEX_EXPR, type, real, imag);
6483 result = get_initialized_tmp_var (result, pre_p, NULL);
6484 }
6485 #endif /* FUNCTION_ARG_SCmode_WART */
6486
6487 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6488 gimplify_and_add (tmp, pre_p);
6489
6490 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6491 gimplify_and_add (tmp, pre_p);
6492
6493 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6494 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6495 gimplify_and_add (tmp, pre_p);
6496 }
6497 else
6498 {
6499 tmp = fold_convert (ptr_type_node, size_int (rsize));
6500 tmp = build (PLUS_EXPR, ptr_type_node, next_o, tmp);
6501 tmp = build (GT_EXPR, boolean_type_node, tmp, next_o_limit);
6502 tmp = build (COND_EXPR, void_type_node, tmp,
6503 build (GOTO_EXPR, void_type_node, lab_false),
6504 NULL);
6505 gimplify_and_add (tmp, pre_p);
6506
6507 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
6508 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6509 gimplify_and_add (tmp, pre_p);
6510
6511 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6512 gimplify_and_add (tmp, pre_p);
6513
6514 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6515 gimplify_and_add (tmp, pre_p);
6516
6517 if (size > 4 && ! TARGET_SH4)
6518 {
6519 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
6520 gimplify_and_add (tmp, pre_p);
6521 }
6522
6523 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6524 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6525 gimplify_and_add (tmp, pre_p);
6526 }
6527
6528 if (!result)
6529 {
6530 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6531 gimplify_and_add (tmp, pre_p);
6532 }
6533 }
6534
6535 /* ??? In va-sh.h, there had been code to make values larger than
6536 size 8 indirect. This does not match the FUNCTION_ARG macros. */
6537
6538 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
6539 if (result)
6540 {
6541 tmp = build (MODIFY_EXPR, void_type_node, result, tmp);
6542 gimplify_and_add (tmp, pre_p);
6543
6544 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6545 gimplify_and_add (tmp, pre_p);
6546 }
6547 else
6548 result = tmp;
6549
6550 if (pass_by_ref)
6551 result = build_fold_indirect_ref (result);
6552
6553 return result;
6554 }
6555
6556 bool
6557 sh_promote_prototypes (tree type)
6558 {
6559 if (TARGET_HITACHI)
6560 return 0;
6561 if (! type)
6562 return 1;
6563 return ! sh_attr_renesas_p (type);
6564 }
6565
6566 /* Whether an argument must be passed by reference. On SHcompact, we
6567 pretend arguments wider than 32-bits that would have been passed in
6568 registers are passed by reference, so that an SHmedia trampoline
6569 loads them into the full 64-bits registers. */
6570
6571 static int
6572 shcompact_byref (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6573 tree type, bool named)
6574 {
6575 unsigned HOST_WIDE_INT size;
6576
6577 if (type)
6578 size = int_size_in_bytes (type);
6579 else
6580 size = GET_MODE_SIZE (mode);
6581
6582 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
6583 && (!named
6584 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
6585 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
6586 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
6587 && size > 4
6588 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
6589 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
6590 return size;
6591 else
6592 return 0;
6593 }
6594
6595 static bool
6596 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6597 tree type, bool named)
6598 {
6599 if (targetm.calls.must_pass_in_stack (mode, type))
6600 return true;
6601
6602 if (TARGET_SHCOMPACT)
6603 {
6604 cum->byref = shcompact_byref (cum, mode, type, named);
6605 return cum->byref != 0;
6606 }
6607
6608 return false;
6609 }
6610
6611 /* Define where to put the arguments to a function.
6612 Value is zero to push the argument on the stack,
6613 or a hard register in which to store the argument.
6614
6615 MODE is the argument's machine mode.
6616 TYPE is the data type of the argument (as a tree).
6617 This is null for libcalls where that information may
6618 not be available.
6619 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6620 the preceding args and about the function being called.
6621 NAMED is nonzero if this argument is a named parameter
6622 (otherwise it is an extra parameter matching an ellipsis).
6623
6624 On SH the first args are normally in registers
6625 and the rest are pushed. Any arg that starts within the first
6626 NPARM_REGS words is at least partially passed in a register unless
6627 its data type forbids. */
6628
6629
6630 rtx
6631 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6632 tree type, int named)
6633 {
6634 if (! TARGET_SH5 && mode == VOIDmode)
6635 return GEN_INT (ca->renesas_abi ? 1 : 0);
6636
6637 if (! TARGET_SH5
6638 && PASS_IN_REG_P (*ca, mode, type)
6639 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
6640 {
6641 int regno;
6642
6643 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
6644 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
6645 {
6646 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
6647 gen_rtx_REG (SFmode,
6648 BASE_ARG_REG (mode)
6649 + (ROUND_REG (*ca, mode) ^ 1)),
6650 const0_rtx);
6651 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
6652 gen_rtx_REG (SFmode,
6653 BASE_ARG_REG (mode)
6654 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
6655 GEN_INT (4));
6656 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
6657 }
6658
6659 /* If the alignment of a DF value causes an SF register to be
6660 skipped, we will use that skipped register for the next SF
6661 value. */
6662 if ((TARGET_HITACHI || ca->renesas_abi)
6663 && ca->free_single_fp_reg
6664 && mode == SFmode)
6665 return gen_rtx_REG (mode, ca->free_single_fp_reg);
6666
6667 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
6668 ^ (mode == SFmode && TARGET_SH4
6669 && TARGET_LITTLE_ENDIAN != 0
6670 && ! TARGET_HITACHI && ! ca->renesas_abi);
6671 return gen_rtx_REG (mode, regno);
6672
6673 }
6674
6675 if (TARGET_SH5)
6676 {
6677 if (mode == VOIDmode && TARGET_SHCOMPACT)
6678 return GEN_INT (ca->call_cookie);
6679
6680 /* The following test assumes unnamed arguments are promoted to
6681 DFmode. */
6682 if (mode == SFmode && ca->free_single_fp_reg)
6683 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
6684
6685 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
6686 && (named || ! ca->prototype_p)
6687 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
6688 {
6689 if (! ca->prototype_p && TARGET_SHMEDIA)
6690 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
6691
6692 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
6693 FIRST_FP_PARM_REG
6694 + ca->arg_count[(int) SH_ARG_FLOAT]);
6695 }
6696
6697 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
6698 && (! TARGET_SHCOMPACT
6699 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
6700 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
6701 type, named))))
6702 {
6703 return gen_rtx_REG (mode, (FIRST_PARM_REG
6704 + ca->arg_count[(int) SH_ARG_INT]));
6705 }
6706
6707 return 0;
6708 }
6709
6710 return 0;
6711 }
6712
6713 /* Update the data in CUM to advance over an argument
6714 of mode MODE and data type TYPE.
6715 (TYPE is null for libcalls where that information may not be
6716 available.) */
6717
6718 void
6719 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6720 tree type, int named)
6721 {
6722 if (ca->force_mem)
6723 ca->force_mem = 0;
6724 else if (TARGET_SH5)
6725 {
6726 tree type2 = (ca->byref && type
6727 ? TREE_TYPE (type)
6728 : type);
6729 enum machine_mode mode2 = (ca->byref && type
6730 ? TYPE_MODE (type2)
6731 : mode);
6732 int dwords = ((ca->byref
6733 ? ca->byref
6734 : mode2 == BLKmode
6735 ? int_size_in_bytes (type2)
6736 : GET_MODE_SIZE (mode2)) + 7) / 8;
6737 int numregs = MIN (dwords, NPARM_REGS (SImode)
6738 - ca->arg_count[(int) SH_ARG_INT]);
6739
6740 if (numregs)
6741 {
6742 ca->arg_count[(int) SH_ARG_INT] += numregs;
6743 if (TARGET_SHCOMPACT
6744 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
6745 {
6746 ca->call_cookie
6747 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6748 - numregs, 1);
6749 /* N.B. We want this also for outgoing. */
6750 ca->stack_regs += numregs;
6751 }
6752 else if (ca->byref)
6753 {
6754 if (! ca->outgoing)
6755 ca->stack_regs += numregs;
6756 ca->byref_regs += numregs;
6757 ca->byref = 0;
6758 do
6759 ca->call_cookie
6760 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6761 - numregs, 2);
6762 while (--numregs);
6763 ca->call_cookie
6764 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6765 - 1, 1);
6766 }
6767 else if (dwords > numregs)
6768 {
6769 int pushregs = numregs;
6770
6771 if (TARGET_SHCOMPACT)
6772 ca->stack_regs += numregs;
6773 while (pushregs < NPARM_REGS (SImode) - 1
6774 && (CALL_COOKIE_INT_REG_GET
6775 (ca->call_cookie,
6776 NPARM_REGS (SImode) - pushregs)
6777 == 1))
6778 {
6779 ca->call_cookie
6780 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6781 - pushregs, 1);
6782 pushregs++;
6783 }
6784 if (numregs == NPARM_REGS (SImode))
6785 ca->call_cookie
6786 |= CALL_COOKIE_INT_REG (0, 1)
6787 | CALL_COOKIE_STACKSEQ (numregs - 1);
6788 else
6789 ca->call_cookie
6790 |= CALL_COOKIE_STACKSEQ (numregs);
6791 }
6792 }
6793 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
6794 && (named || ! ca->prototype_p))
6795 {
6796 if (mode2 == SFmode && ca->free_single_fp_reg)
6797 ca->free_single_fp_reg = 0;
6798 else if (ca->arg_count[(int) SH_ARG_FLOAT]
6799 < NPARM_REGS (SFmode))
6800 {
6801 int numfpregs
6802 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
6803 NPARM_REGS (SFmode)
6804 - ca->arg_count[(int) SH_ARG_FLOAT]);
6805
6806 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
6807
6808 if (TARGET_SHCOMPACT && ! ca->prototype_p)
6809 {
6810 if (ca->outgoing && numregs > 0)
6811 do
6812 {
6813 ca->call_cookie
6814 |= (CALL_COOKIE_INT_REG
6815 (ca->arg_count[(int) SH_ARG_INT]
6816 - numregs + ((numfpregs - 2) / 2),
6817 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
6818 - numfpregs) / 2));
6819 }
6820 while (numfpregs -= 2);
6821 }
6822 else if (mode2 == SFmode && (named)
6823 && (ca->arg_count[(int) SH_ARG_FLOAT]
6824 < NPARM_REGS (SFmode)))
6825 ca->free_single_fp_reg
6826 = FIRST_FP_PARM_REG - numfpregs
6827 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
6828 }
6829 }
6830 return;
6831 }
6832
6833 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
6834 {
6835 /* Note that we've used the skipped register. */
6836 if (mode == SFmode && ca->free_single_fp_reg)
6837 {
6838 ca->free_single_fp_reg = 0;
6839 return;
6840 }
6841 /* When we have a DF after an SF, there's an SF register that get
6842 skipped in order to align the DF value. We note this skipped
6843 register, because the next SF value will use it, and not the
6844 SF that follows the DF. */
6845 if (mode == DFmode
6846 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
6847 {
6848 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
6849 + BASE_ARG_REG (mode));
6850 }
6851 }
6852
6853 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
6854 || PASS_IN_REG_P (*ca, mode, type))
6855 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
6856 = (ROUND_REG (*ca, mode)
6857 + (mode == BLKmode
6858 ? ROUND_ADVANCE (int_size_in_bytes (type))
6859 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
6860 }
6861
6862 /* The Renesas calling convention doesn't quite fit into this scheme since
6863 the address is passed like an invisible argument, but one that is always
6864 passed in memory. */
6865 static rtx
6866 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
6867 {
6868 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
6869 return 0;
6870 return gen_rtx_REG (Pmode, 2);
6871 }
6872
6873 /* Worker function for TARGET_RETURN_IN_MEMORY. */
6874
6875 static bool
6876 sh_return_in_memory (tree type, tree fndecl)
6877 {
6878 if (TARGET_SH5)
6879 {
6880 if (TYPE_MODE (type) == BLKmode)
6881 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
6882 else
6883 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
6884 }
6885 else
6886 {
6887 return (TYPE_MODE (type) == BLKmode
6888 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
6889 && TREE_CODE (type) == RECORD_TYPE));
6890 }
6891 }
6892
6893 /* We actually emit the code in sh_expand_prologue. We used to use
6894 a static variable to flag that we need to emit this code, but that
6895 doesn't when inlining, when functions are deferred and then emitted
6896 later. Fortunately, we already have two flags that are part of struct
6897 function that tell if a function uses varargs or stdarg. */
6898 static void
6899 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
6900 enum machine_mode mode,
6901 tree type,
6902 int *pretend_arg_size,
6903 int second_time ATTRIBUTE_UNUSED)
6904 {
6905 if (! current_function_stdarg)
6906 abort ();
6907 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6908 {
6909 int named_parm_regs, anon_parm_regs;
6910
6911 named_parm_regs = (ROUND_REG (*ca, mode)
6912 + (mode == BLKmode
6913 ? ROUND_ADVANCE (int_size_in_bytes (type))
6914 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
6915 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
6916 if (anon_parm_regs > 0)
6917 *pretend_arg_size = anon_parm_regs * 4;
6918 }
6919 }
6920
6921 static bool
6922 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
6923 {
6924 return TARGET_SH5;
6925 }
6926
6927 static bool
6928 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
6929 {
6930 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
6931 }
6932
6933
6934 /* Define the offset between two registers, one to be eliminated, and
6935 the other its replacement, at the start of a routine. */
6936
6937 int
6938 initial_elimination_offset (int from, int to)
6939 {
6940 int regs_saved;
6941 int regs_saved_rounding = 0;
6942 int total_saved_regs_space;
6943 int total_auto_space;
6944 int save_flags = target_flags;
6945 int copy_flags;
6946 HARD_REG_SET live_regs_mask;
6947
6948 shmedia_space_reserved_for_target_registers = false;
6949 regs_saved = calc_live_regs (&live_regs_mask);
6950 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
6951
6952 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
6953 {
6954 shmedia_space_reserved_for_target_registers = true;
6955 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
6956 }
6957
6958 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
6959 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6960 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
6961
6962 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
6963 copy_flags = target_flags;
6964 target_flags = save_flags;
6965
6966 total_saved_regs_space = regs_saved + regs_saved_rounding;
6967
6968 if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
6969 return total_saved_regs_space + total_auto_space
6970 + current_function_args_info.byref_regs * 8;
6971
6972 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
6973 return total_saved_regs_space + total_auto_space
6974 + current_function_args_info.byref_regs * 8;
6975
6976 /* Initial gap between fp and sp is 0. */
6977 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
6978 return 0;
6979
6980 if (from == RETURN_ADDRESS_POINTER_REGNUM
6981 && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM))
6982 {
6983 if (TARGET_SH5)
6984 {
6985 int n = total_saved_regs_space;
6986 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6987 save_schedule schedule;
6988 save_entry *entry;
6989
6990 n += total_auto_space;
6991
6992 /* If it wasn't saved, there's not much we can do. */
6993 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6994 return n;
6995
6996 target_flags = copy_flags;
6997
6998 sh5_schedule_saves (&live_regs_mask, &schedule, n);
6999 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7000 if (entry->reg == pr_reg)
7001 {
7002 target_flags = save_flags;
7003 return entry->offset;
7004 }
7005 abort ();
7006 }
7007 else
7008 return total_auto_space;
7009 }
7010
7011 abort ();
7012 }
7013 \f
7014 /* Handle machine specific pragmas to be semi-compatible with Renesas
7015 compiler. */
7016
7017 void
7018 sh_pr_interrupt (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7019 {
7020 pragma_interrupt = 1;
7021 }
7022
7023 void
7024 sh_pr_trapa (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7025 {
7026 pragma_interrupt = pragma_trapa = 1;
7027 }
7028
7029 void
7030 sh_pr_nosave_low_regs (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7031 {
7032 pragma_nosave_low_regs = 1;
7033 }
7034
7035 /* Generate 'handle_interrupt' attribute for decls */
7036
7037 static void
7038 sh_insert_attributes (tree node, tree *attributes)
7039 {
7040 if (! pragma_interrupt
7041 || TREE_CODE (node) != FUNCTION_DECL)
7042 return;
7043
7044 /* We are only interested in fields. */
7045 if (TREE_CODE_CLASS (TREE_CODE (node)) != 'd')
7046 return;
7047
7048 /* Add a 'handle_interrupt' attribute. */
7049 * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
7050
7051 return;
7052 }
7053
7054 /* Supported attributes:
7055
7056 interrupt_handler -- specifies this function is an interrupt handler.
7057
7058 sp_switch -- specifies an alternate stack for an interrupt handler
7059 to run on.
7060
7061 trap_exit -- use a trapa to exit an interrupt function instead of
7062 an rte instruction.
7063
7064 renesas -- use Renesas calling/layout conventions (functions and
7065 structures).
7066
7067 */
7068
7069 const struct attribute_spec sh_attribute_table[] =
7070 {
7071 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
7072 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7073 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
7074 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
7075 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
7076 #ifdef SYMBIAN
7077 /* Symbian support adds three new attributes:
7078 dllexport - for exporting a function/variable that will live in a dll
7079 dllimport - for importing a function/variable from a dll
7080
7081 Microsoft allows multiple declspecs in one __declspec, separating
7082 them with spaces. We do NOT support this. Instead, use __declspec
7083 multiple times. */
7084 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7085 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7086 #endif
7087 { NULL, 0, 0, false, false, false, NULL }
7088 };
7089
7090 /* Handle an "interrupt_handler" attribute; arguments as in
7091 struct attribute_spec.handler. */
7092 static tree
7093 sh_handle_interrupt_handler_attribute (tree *node, tree name,
7094 tree args ATTRIBUTE_UNUSED,
7095 int flags ATTRIBUTE_UNUSED,
7096 bool *no_add_attrs)
7097 {
7098 if (TREE_CODE (*node) != FUNCTION_DECL)
7099 {
7100 warning ("`%s' attribute only applies to functions",
7101 IDENTIFIER_POINTER (name));
7102 *no_add_attrs = true;
7103 }
7104 else if (TARGET_SHCOMPACT)
7105 {
7106 error ("attribute interrupt_handler is not compatible with -m5-compact");
7107 *no_add_attrs = true;
7108 }
7109
7110 return NULL_TREE;
7111 }
7112
7113 /* Handle an "sp_switch" attribute; arguments as in
7114 struct attribute_spec.handler. */
7115 static tree
7116 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
7117 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7118 {
7119 if (TREE_CODE (*node) != FUNCTION_DECL)
7120 {
7121 warning ("`%s' attribute only applies to functions",
7122 IDENTIFIER_POINTER (name));
7123 *no_add_attrs = true;
7124 }
7125 else if (!pragma_interrupt)
7126 {
7127 /* The sp_switch attribute only has meaning for interrupt functions. */
7128 warning ("`%s' attribute only applies to interrupt functions",
7129 IDENTIFIER_POINTER (name));
7130 *no_add_attrs = true;
7131 }
7132 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
7133 {
7134 /* The argument must be a constant string. */
7135 warning ("`%s' attribute argument not a string constant",
7136 IDENTIFIER_POINTER (name));
7137 *no_add_attrs = true;
7138 }
7139 else
7140 {
7141 sp_switch = gen_rtx_SYMBOL_REF (VOIDmode,
7142 TREE_STRING_POINTER (TREE_VALUE (args)));
7143 }
7144
7145 return NULL_TREE;
7146 }
7147
7148 /* Handle an "trap_exit" attribute; arguments as in
7149 struct attribute_spec.handler. */
7150 static tree
7151 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
7152 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7153 {
7154 if (TREE_CODE (*node) != FUNCTION_DECL)
7155 {
7156 warning ("`%s' attribute only applies to functions",
7157 IDENTIFIER_POINTER (name));
7158 *no_add_attrs = true;
7159 }
7160 else if (!pragma_interrupt)
7161 {
7162 /* The trap_exit attribute only has meaning for interrupt functions. */
7163 warning ("`%s' attribute only applies to interrupt functions",
7164 IDENTIFIER_POINTER (name));
7165 *no_add_attrs = true;
7166 }
7167 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
7168 {
7169 /* The argument must be a constant integer. */
7170 warning ("`%s' attribute argument not an integer constant",
7171 IDENTIFIER_POINTER (name));
7172 *no_add_attrs = true;
7173 }
7174 else
7175 {
7176 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
7177 }
7178
7179 return NULL_TREE;
7180 }
7181
7182 static tree
7183 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
7184 tree name ATTRIBUTE_UNUSED,
7185 tree args ATTRIBUTE_UNUSED,
7186 int flags ATTRIBUTE_UNUSED,
7187 bool *no_add_attrs ATTRIBUTE_UNUSED)
7188 {
7189 return NULL_TREE;
7190 }
7191
7192 /* True if __attribute__((renesas)) or -mrenesas. */
7193 int
7194 sh_attr_renesas_p (tree td)
7195 {
7196 if (TARGET_HITACHI)
7197 return 1;
7198 if (td == 0)
7199 return 0;
7200 if (DECL_P (td))
7201 td = TREE_TYPE (td);
7202 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
7203 != NULL_TREE);
7204 }
7205
7206 /* True if __attribute__((renesas)) or -mrenesas, for the current
7207 function. */
7208 int
7209 sh_cfun_attr_renesas_p (void)
7210 {
7211 return sh_attr_renesas_p (current_function_decl);
7212 }
7213
7214 int
7215 sh_cfun_interrupt_handler_p (void)
7216 {
7217 return (lookup_attribute ("interrupt_handler",
7218 DECL_ATTRIBUTES (current_function_decl))
7219 != NULL_TREE);
7220 }
7221
7222 /* ??? target_switches in toplev.c is static, hence we have to duplicate it. */
7223 static const struct
7224 {
7225 const char *const name;
7226 const int value;
7227 const char *const description;
7228 }
7229 sh_target_switches[] = TARGET_SWITCHES;
7230 #define target_switches sh_target_switches
7231
7232 /* Like default_pch_valid_p, but take flag_mask into account. */
7233 const char *
7234 sh_pch_valid_p (const void *data_p, size_t len)
7235 {
7236 const char *data = (const char *)data_p;
7237 const char *flag_that_differs = NULL;
7238 size_t i;
7239 int old_flags;
7240 int flag_mask
7241 = (SH1_BIT | SH2_BIT | SH3_BIT | SH_E_BIT | HARD_SH4_BIT | FPU_SINGLE_BIT
7242 | SH4_BIT | HITACHI_BIT | LITTLE_ENDIAN_BIT);
7243
7244 /* -fpic and -fpie also usually make a PCH invalid. */
7245 if (data[0] != flag_pic)
7246 return _("created and used with different settings of -fpic");
7247 if (data[1] != flag_pie)
7248 return _("created and used with different settings of -fpie");
7249 data += 2;
7250
7251 /* Check target_flags. */
7252 memcpy (&old_flags, data, sizeof (target_flags));
7253 if (((old_flags ^ target_flags) & flag_mask) != 0)
7254 {
7255 for (i = 0; i < ARRAY_SIZE (target_switches); i++)
7256 {
7257 int bits;
7258
7259 bits = target_switches[i].value;
7260 if (bits < 0)
7261 bits = -bits;
7262 bits &= flag_mask;
7263 if ((target_flags & bits) != (old_flags & bits))
7264 {
7265 flag_that_differs = target_switches[i].name;
7266 goto make_message;
7267 }
7268 }
7269 abort ();
7270 }
7271 data += sizeof (target_flags);
7272 len -= sizeof (target_flags);
7273
7274 /* Check string options. */
7275 #ifdef TARGET_OPTIONS
7276 for (i = 0; i < ARRAY_SIZE (target_options); i++)
7277 {
7278 const char *str = *target_options[i].variable;
7279 size_t l;
7280 if (! str)
7281 str = "";
7282 l = strlen (str) + 1;
7283 if (len < l || memcmp (data, str, l) != 0)
7284 {
7285 flag_that_differs = target_options[i].prefix;
7286 goto make_message;
7287 }
7288 data += l;
7289 len -= l;
7290 }
7291 #endif
7292
7293 return NULL;
7294
7295 make_message:
7296 {
7297 char *r;
7298 asprintf (&r, _("created and used with differing settings of `-m%s'"),
7299 flag_that_differs);
7300 if (r == NULL)
7301 return _("out of memory");
7302 return r;
7303 }
7304 }
7305 \f
7306 /* Predicates used by the templates. */
7307
7308 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
7309 Used only in general_movsrc_operand. */
7310
7311 int
7312 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7313 {
7314 switch (REGNO (op))
7315 {
7316 case PR_REG:
7317 case MACL_REG:
7318 case MACH_REG:
7319 return 1;
7320 }
7321 return 0;
7322 }
7323
7324 /* Returns 1 if OP can be source of a simple move operation.
7325 Same as general_operand, but a LABEL_REF is valid, PRE_DEC is
7326 invalid as are subregs of system registers. */
7327
7328 int
7329 general_movsrc_operand (rtx op, enum machine_mode mode)
7330 {
7331 if (GET_CODE (op) == MEM)
7332 {
7333 rtx inside = XEXP (op, 0);
7334 if (GET_CODE (inside) == CONST)
7335 inside = XEXP (inside, 0);
7336
7337 if (GET_CODE (inside) == LABEL_REF)
7338 return 1;
7339
7340 if (GET_CODE (inside) == PLUS
7341 && GET_CODE (XEXP (inside, 0)) == LABEL_REF
7342 && GET_CODE (XEXP (inside, 1)) == CONST_INT)
7343 return 1;
7344
7345 /* Only post inc allowed. */
7346 if (GET_CODE (inside) == PRE_DEC)
7347 return 0;
7348 }
7349
7350 if ((mode == QImode || mode == HImode)
7351 && (GET_CODE (op) == SUBREG
7352 && GET_CODE (XEXP (op, 0)) == REG
7353 && system_reg_operand (XEXP (op, 0), mode)))
7354 return 0;
7355
7356 return general_operand (op, mode);
7357 }
7358
7359 /* Returns 1 if OP can be a destination of a move.
7360 Same as general_operand, but no preinc allowed. */
7361
7362 int
7363 general_movdst_operand (rtx op, enum machine_mode mode)
7364 {
7365 /* Only pre dec allowed. */
7366 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
7367 return 0;
7368
7369 return general_operand (op, mode);
7370 }
7371
7372 /* Returns 1 if OP is a normal arithmetic register. */
7373
7374 int
7375 arith_reg_operand (rtx op, enum machine_mode mode)
7376 {
7377 if (register_operand (op, mode))
7378 {
7379 int regno;
7380
7381 if (GET_CODE (op) == REG)
7382 regno = REGNO (op);
7383 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7384 regno = REGNO (SUBREG_REG (op));
7385 else
7386 return 1;
7387
7388 return (regno != T_REG && regno != PR_REG
7389 && ! TARGET_REGISTER_P (regno)
7390 && (regno != FPUL_REG || TARGET_SH4)
7391 && regno != MACH_REG && regno != MACL_REG);
7392 }
7393 return 0;
7394 }
7395
7396 /* Like above, but for DImode destinations: forbid paradoxical DImode subregs,
7397 because this would lead to missing sign extensions when truncating from
7398 DImode to SImode. */
7399 int
7400 arith_reg_dest (rtx op, enum machine_mode mode)
7401 {
7402 if (mode == DImode && GET_CODE (op) == SUBREG
7403 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8)
7404 return 0;
7405 return arith_reg_operand (op, mode);
7406 }
7407
7408 int
7409 int_gpr_dest (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7410 {
7411 enum machine_mode op_mode = GET_MODE (op);
7412
7413 if (GET_MODE_CLASS (op_mode) != MODE_INT
7414 || GET_MODE_SIZE (op_mode) >= UNITS_PER_WORD)
7415 return 0;
7416 if (! reload_completed)
7417 return 0;
7418 return true_regnum (op) <= LAST_GENERAL_REG;
7419 }
7420
7421 int
7422 fp_arith_reg_operand (rtx op, enum machine_mode mode)
7423 {
7424 if (register_operand (op, mode))
7425 {
7426 int regno;
7427
7428 if (GET_CODE (op) == REG)
7429 regno = REGNO (op);
7430 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7431 regno = REGNO (SUBREG_REG (op));
7432 else
7433 return 1;
7434
7435 return (regno >= FIRST_PSEUDO_REGISTER
7436 || FP_REGISTER_P (regno));
7437 }
7438 return 0;
7439 }
7440
7441 /* Returns 1 if OP is a valid source operand for an arithmetic insn. */
7442
7443 int
7444 arith_operand (rtx op, enum machine_mode mode)
7445 {
7446 if (arith_reg_operand (op, mode))
7447 return 1;
7448
7449 if (TARGET_SHMEDIA)
7450 {
7451 /* FIXME: We should be checking whether the CONST_INT fits in a
7452 CONST_OK_FOR_I16 here, but this causes reload_cse to crash when
7453 attempting to transform a sequence of two 64-bit sets of the
7454 same register from literal constants into a set and an add,
7455 when the difference is too wide for an add. */
7456 if (GET_CODE (op) == CONST_INT
7457 || EXTRA_CONSTRAINT_C16 (op))
7458 return 1;
7459 else
7460 return 0;
7461 }
7462 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I08 (INTVAL (op)))
7463 return 1;
7464
7465 return 0;
7466 }
7467
7468 /* Returns 1 if OP is a valid source operand for a compare insn. */
7469
7470 int
7471 arith_reg_or_0_operand (rtx op, enum machine_mode mode)
7472 {
7473 if (arith_reg_operand (op, mode))
7474 return 1;
7475
7476 if (EXTRA_CONSTRAINT_Z (op))
7477 return 1;
7478
7479 return 0;
7480 }
7481
7482 /* Return 1 if OP is a valid source operand for an SHmedia operation
7483 that takes either a register or a 6-bit immediate. */
7484
7485 int
7486 shmedia_6bit_operand (rtx op, enum machine_mode mode)
7487 {
7488 return (arith_reg_operand (op, mode)
7489 || (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I06 (INTVAL (op))));
7490 }
7491
7492 /* Returns 1 if OP is a valid source operand for a logical operation. */
7493
7494 int
7495 logical_operand (rtx op, enum machine_mode mode)
7496 {
7497 if (arith_reg_operand (op, mode))
7498 return 1;
7499
7500 if (TARGET_SHMEDIA)
7501 {
7502 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I10 (INTVAL (op)))
7503 return 1;
7504 else
7505 return 0;
7506 }
7507 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_K08 (INTVAL (op)))
7508 return 1;
7509
7510 return 0;
7511 }
7512
7513 int
7514 and_operand (rtx op, enum machine_mode mode)
7515 {
7516 if (logical_operand (op, mode))
7517 return 1;
7518
7519 /* Check mshflo.l / mshflhi.l opportunities. */
7520 if (TARGET_SHMEDIA
7521 && mode == DImode
7522 && GET_CODE (op) == CONST_INT
7523 && CONST_OK_FOR_J16 (INTVAL (op)))
7524 return 1;
7525
7526 return 0;
7527 }
7528
7529 /* Nonzero if OP is a floating point value with value 0.0. */
7530
7531 int
7532 fp_zero_operand (rtx op)
7533 {
7534 REAL_VALUE_TYPE r;
7535
7536 if (GET_MODE (op) != SFmode)
7537 return 0;
7538
7539 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7540 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
7541 }
7542
7543 /* Nonzero if OP is a floating point value with value 1.0. */
7544
7545 int
7546 fp_one_operand (rtx op)
7547 {
7548 REAL_VALUE_TYPE r;
7549
7550 if (GET_MODE (op) != SFmode)
7551 return 0;
7552
7553 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7554 return REAL_VALUES_EQUAL (r, dconst1);
7555 }
7556
7557 /* For -m4 and -m4-single-only, mode switching is used. If we are
7558 compiling without -mfmovd, movsf_ie isn't taken into account for
7559 mode switching. We could check in machine_dependent_reorg for
7560 cases where we know we are in single precision mode, but there is
7561 interface to find that out during reload, so we must avoid
7562 choosing an fldi alternative during reload and thus failing to
7563 allocate a scratch register for the constant loading. */
7564 int
7565 fldi_ok (void)
7566 {
7567 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
7568 }
7569
7570 int
7571 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7572 {
7573 enum rtx_code code = GET_CODE (op);
7574 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
7575 }
7576
7577 int
7578 fpscr_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7579 {
7580 return (GET_CODE (op) == REG
7581 && (REGNO (op) == FPSCR_REG
7582 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
7583 && !(reload_in_progress || reload_completed)))
7584 && GET_MODE (op) == PSImode);
7585 }
7586
7587 int
7588 fpul_operand (rtx op, enum machine_mode mode)
7589 {
7590 if (TARGET_SHMEDIA)
7591 return fp_arith_reg_operand (op, mode);
7592
7593 return (GET_CODE (op) == REG
7594 && (REGNO (op) == FPUL_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER)
7595 && GET_MODE (op) == mode);
7596 }
7597
7598 int
7599 symbol_ref_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7600 {
7601 return (GET_CODE (op) == SYMBOL_REF);
7602 }
7603
7604 /* Return the TLS type for TLS symbols, 0 for otherwise. */
7605 int
7606 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7607 {
7608 if (GET_CODE (op) != SYMBOL_REF)
7609 return 0;
7610 return SYMBOL_REF_TLS_MODEL (op);
7611 }
7612
7613 int
7614 commutative_float_operator (rtx op, enum machine_mode mode)
7615 {
7616 if (GET_MODE (op) != mode)
7617 return 0;
7618 switch (GET_CODE (op))
7619 {
7620 case PLUS:
7621 case MULT:
7622 return 1;
7623 default:
7624 break;
7625 }
7626 return 0;
7627 }
7628
7629 int
7630 noncommutative_float_operator (rtx op, enum machine_mode mode)
7631 {
7632 if (GET_MODE (op) != mode)
7633 return 0;
7634 switch (GET_CODE (op))
7635 {
7636 case MINUS:
7637 case DIV:
7638 return 1;
7639 default:
7640 break;
7641 }
7642 return 0;
7643 }
7644
7645 int
7646 unary_float_operator (rtx op, enum machine_mode mode)
7647 {
7648 if (GET_MODE (op) != mode)
7649 return 0;
7650 switch (GET_CODE (op))
7651 {
7652 case ABS:
7653 case NEG:
7654 case SQRT:
7655 return 1;
7656 default:
7657 break;
7658 }
7659 return 0;
7660 }
7661
7662 int
7663 binary_float_operator (rtx op, enum machine_mode mode)
7664 {
7665 if (GET_MODE (op) != mode)
7666 return 0;
7667 switch (GET_CODE (op))
7668 {
7669 case PLUS:
7670 case MINUS:
7671 case MULT:
7672 case DIV:
7673 return 1;
7674 default:
7675 break;
7676 }
7677 return 0;
7678 }
7679
7680 int
7681 binary_logical_operator (rtx op, enum machine_mode mode)
7682 {
7683 if (GET_MODE (op) != mode)
7684 return 0;
7685 switch (GET_CODE (op))
7686 {
7687 case IOR:
7688 case AND:
7689 case XOR:
7690 return 1;
7691 default:
7692 break;
7693 }
7694 return 0;
7695 }
7696
7697 int
7698 equality_comparison_operator (rtx op, enum machine_mode mode)
7699 {
7700 return ((mode == VOIDmode || GET_MODE (op) == mode)
7701 && (GET_CODE (op) == EQ || GET_CODE (op) == NE));
7702 }
7703
7704 int
7705 greater_comparison_operator (rtx op, enum machine_mode mode)
7706 {
7707 if (mode != VOIDmode && GET_MODE (op) == mode)
7708 return 0;
7709 switch (GET_CODE (op))
7710 {
7711 case GT:
7712 case GE:
7713 case GTU:
7714 case GEU:
7715 return 1;
7716 default:
7717 return 0;
7718 }
7719 }
7720
7721 int
7722 less_comparison_operator (rtx op, enum machine_mode mode)
7723 {
7724 if (mode != VOIDmode && GET_MODE (op) == mode)
7725 return 0;
7726 switch (GET_CODE (op))
7727 {
7728 case LT:
7729 case LE:
7730 case LTU:
7731 case LEU:
7732 return 1;
7733 default:
7734 return 0;
7735 }
7736 }
7737
7738 /* Accept pseudos and branch target registers. */
7739 int
7740 target_reg_operand (rtx op, enum machine_mode mode)
7741 {
7742 if (mode != DImode
7743 || GET_MODE (op) != DImode)
7744 return 0;
7745
7746 if (GET_CODE (op) == SUBREG)
7747 op = XEXP (op, 0);
7748
7749 if (GET_CODE (op) != REG)
7750 return 0;
7751
7752 /* We must protect ourselves from matching pseudos that are virtual
7753 register, because they will eventually be replaced with hardware
7754 registers that aren't branch-target registers. */
7755 if (REGNO (op) > LAST_VIRTUAL_REGISTER
7756 || TARGET_REGISTER_P (REGNO (op)))
7757 return 1;
7758
7759 return 0;
7760 }
7761
7762 /* Same as target_reg_operand, except that label_refs and symbol_refs
7763 are accepted before reload. */
7764 int
7765 target_operand (rtx op, enum machine_mode mode)
7766 {
7767 if (mode != DImode)
7768 return 0;
7769
7770 if ((GET_MODE (op) == DImode || GET_MODE (op) == VOIDmode)
7771 && EXTRA_CONSTRAINT_Csy (op))
7772 return ! reload_completed;
7773
7774 return target_reg_operand (op, mode);
7775 }
7776
7777 int
7778 mextr_bit_offset (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7779 {
7780 HOST_WIDE_INT i;
7781
7782 if (GET_CODE (op) != CONST_INT)
7783 return 0;
7784 i = INTVAL (op);
7785 return i >= 1 * 8 && i <= 7 * 8 && (i & 7) == 0;
7786 }
7787
7788 int
7789 extend_reg_operand (rtx op, enum machine_mode mode)
7790 {
7791 return (GET_CODE (op) == TRUNCATE
7792 ? arith_operand
7793 : arith_reg_operand) (op, mode);
7794 }
7795
7796 int
7797 trunc_hi_operand (rtx op, enum machine_mode mode)
7798 {
7799 enum machine_mode op_mode = GET_MODE (op);
7800
7801 if (op_mode != SImode && op_mode != DImode
7802 && op_mode != V4HImode && op_mode != V2SImode)
7803 return 0;
7804 return extend_reg_operand (op, mode);
7805 }
7806
7807 int
7808 extend_reg_or_0_operand (rtx op, enum machine_mode mode)
7809 {
7810 return (GET_CODE (op) == TRUNCATE
7811 ? arith_operand
7812 : arith_reg_or_0_operand) (op, mode);
7813 }
7814
7815 int
7816 general_extend_operand (rtx op, enum machine_mode mode)
7817 {
7818 return (GET_CODE (op) == TRUNCATE
7819 ? arith_operand
7820 : nonimmediate_operand) (op, mode);
7821 }
7822
7823 int
7824 inqhi_operand (rtx op, enum machine_mode mode)
7825 {
7826 if (GET_CODE (op) != TRUNCATE || mode != GET_MODE (op))
7827 return 0;
7828 op = XEXP (op, 0);
7829 /* Can't use true_regnum here because copy_cost wants to know about
7830 SECONDARY_INPUT_RELOAD_CLASS. */
7831 return GET_CODE (op) == REG && FP_REGISTER_P (REGNO (op));
7832 }
7833
7834 int
7835 sh_rep_vec (rtx v, enum machine_mode mode)
7836 {
7837 int i;
7838 rtx x, y;
7839
7840 if ((GET_CODE (v) != CONST_VECTOR && GET_CODE (v) != PARALLEL)
7841 || (GET_MODE (v) != mode && mode != VOIDmode))
7842 return 0;
7843 i = XVECLEN (v, 0) - 2;
7844 x = XVECEXP (v, 0, i + 1);
7845 if (GET_MODE_UNIT_SIZE (mode) == 1)
7846 {
7847 y = XVECEXP (v, 0, i);
7848 for (i -= 2; i >= 0; i -= 2)
7849 if (! rtx_equal_p (XVECEXP (v, 0, i + 1), x)
7850 || ! rtx_equal_p (XVECEXP (v, 0, i), y))
7851 return 0;
7852 }
7853 else
7854 for (; i >= 0; i--)
7855 if (XVECEXP (v, 0, i) != x)
7856 return 0;
7857 return 1;
7858 }
7859
7860 /* Determine if V is a constant vector matching MODE with only one element
7861 that is not a sign extension. Two byte-sized elements count as one. */
7862 int
7863 sh_1el_vec (rtx v, enum machine_mode mode)
7864 {
7865 int unit_size;
7866 int i, last, least, sign_ix;
7867 rtx sign;
7868
7869 if (GET_CODE (v) != CONST_VECTOR
7870 || (GET_MODE (v) != mode && mode != VOIDmode))
7871 return 0;
7872 /* Determine numbers of last and of least significant elements. */
7873 last = XVECLEN (v, 0) - 1;
7874 least = TARGET_LITTLE_ENDIAN ? 0 : last;
7875 if (GET_CODE (XVECEXP (v, 0, least)) != CONST_INT)
7876 return 0;
7877 sign_ix = least;
7878 if (GET_MODE_UNIT_SIZE (mode) == 1)
7879 sign_ix = TARGET_LITTLE_ENDIAN ? 1 : last - 1;
7880 if (GET_CODE (XVECEXP (v, 0, sign_ix)) != CONST_INT)
7881 return 0;
7882 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (v));
7883 sign = (INTVAL (XVECEXP (v, 0, sign_ix)) >> (unit_size * BITS_PER_UNIT - 1)
7884 ? constm1_rtx : const0_rtx);
7885 i = XVECLEN (v, 0) - 1;
7886 do
7887 if (i != least && i != sign_ix && XVECEXP (v, 0, i) != sign)
7888 return 0;
7889 while (--i);
7890 return 1;
7891 }
7892
7893 int
7894 sh_const_vec (rtx v, enum machine_mode mode)
7895 {
7896 int i;
7897
7898 if (GET_CODE (v) != CONST_VECTOR
7899 || (GET_MODE (v) != mode && mode != VOIDmode))
7900 return 0;
7901 i = XVECLEN (v, 0) - 1;
7902 for (; i >= 0; i--)
7903 if (GET_CODE (XVECEXP (v, 0, i)) != CONST_INT)
7904 return 0;
7905 return 1;
7906 }
7907 \f
7908 /* Return the destination address of a branch. */
7909
7910 static int
7911 branch_dest (rtx branch)
7912 {
7913 rtx dest = SET_SRC (PATTERN (branch));
7914 int dest_uid;
7915
7916 if (GET_CODE (dest) == IF_THEN_ELSE)
7917 dest = XEXP (dest, 1);
7918 dest = XEXP (dest, 0);
7919 dest_uid = INSN_UID (dest);
7920 return INSN_ADDRESSES (dest_uid);
7921 }
7922 \f
7923 /* Return nonzero if REG is not used after INSN.
7924 We assume REG is a reload reg, and therefore does
7925 not live past labels. It may live past calls or jumps though. */
7926 int
7927 reg_unused_after (rtx reg, rtx insn)
7928 {
7929 enum rtx_code code;
7930 rtx set;
7931
7932 /* If the reg is set by this instruction, then it is safe for our
7933 case. Disregard the case where this is a store to memory, since
7934 we are checking a register used in the store address. */
7935 set = single_set (insn);
7936 if (set && GET_CODE (SET_DEST (set)) != MEM
7937 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7938 return 1;
7939
7940 while ((insn = NEXT_INSN (insn)))
7941 {
7942 rtx set;
7943 if (!INSN_P (insn))
7944 continue;
7945
7946 code = GET_CODE (insn);
7947
7948 #if 0
7949 /* If this is a label that existed before reload, then the register
7950 if dead here. However, if this is a label added by reorg, then
7951 the register may still be live here. We can't tell the difference,
7952 so we just ignore labels completely. */
7953 if (code == CODE_LABEL)
7954 return 1;
7955 /* else */
7956 #endif
7957
7958 if (code == JUMP_INSN)
7959 return 0;
7960
7961 /* If this is a sequence, we must handle them all at once.
7962 We could have for instance a call that sets the target register,
7963 and an insn in a delay slot that uses the register. In this case,
7964 we must return 0. */
7965 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
7966 {
7967 int i;
7968 int retval = 0;
7969
7970 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
7971 {
7972 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
7973 rtx set = single_set (this_insn);
7974
7975 if (GET_CODE (this_insn) == CALL_INSN)
7976 code = CALL_INSN;
7977 else if (GET_CODE (this_insn) == JUMP_INSN)
7978 {
7979 if (INSN_ANNULLED_BRANCH_P (this_insn))
7980 return 0;
7981 code = JUMP_INSN;
7982 }
7983
7984 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7985 return 0;
7986 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7987 {
7988 if (GET_CODE (SET_DEST (set)) != MEM)
7989 retval = 1;
7990 else
7991 return 0;
7992 }
7993 if (set == 0
7994 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
7995 return 0;
7996 }
7997 if (retval == 1)
7998 return 1;
7999 else if (code == JUMP_INSN)
8000 return 0;
8001 }
8002
8003 set = single_set (insn);
8004 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8005 return 0;
8006 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8007 return GET_CODE (SET_DEST (set)) != MEM;
8008 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
8009 return 0;
8010
8011 if (code == CALL_INSN && call_used_regs[REGNO (reg)])
8012 return 1;
8013 }
8014 return 1;
8015 }
8016 \f
8017 #include "ggc.h"
8018
8019 static GTY(()) rtx fpscr_rtx;
8020 rtx
8021 get_fpscr_rtx (void)
8022 {
8023 if (! fpscr_rtx)
8024 {
8025 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
8026 REG_USERVAR_P (fpscr_rtx) = 1;
8027 mark_user_reg (fpscr_rtx);
8028 }
8029 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
8030 mark_user_reg (fpscr_rtx);
8031 return fpscr_rtx;
8032 }
8033
8034 void
8035 emit_sf_insn (rtx pat)
8036 {
8037 emit_insn (pat);
8038 }
8039
8040 void
8041 emit_df_insn (rtx pat)
8042 {
8043 emit_insn (pat);
8044 }
8045
8046 void
8047 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8048 {
8049 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8050 }
8051
8052 void
8053 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8054 {
8055 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
8056 get_fpscr_rtx ()));
8057 }
8058
8059 void
8060 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8061 {
8062 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8063 }
8064
8065 void
8066 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8067 {
8068 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
8069 get_fpscr_rtx ()));
8070 }
8071 \f
8072 /* ??? gcc does flow analysis strictly after common subexpression
8073 elimination. As a result, common subexpression elimination fails
8074 when there are some intervening statements setting the same register.
8075 If we did nothing about this, this would hurt the precision switching
8076 for SH4 badly. There is some cse after reload, but it is unable to
8077 undo the extra register pressure from the unused instructions, and
8078 it cannot remove auto-increment loads.
8079
8080 A C code example that shows this flow/cse weakness for (at least) SH
8081 and sparc (as of gcc ss-970706) is this:
8082
8083 double
8084 f(double a)
8085 {
8086 double d;
8087 d = 0.1;
8088 a += d;
8089 d = 1.1;
8090 d = 0.1;
8091 a *= d;
8092 return a;
8093 }
8094
8095 So we add another pass before common subexpression elimination, to
8096 remove assignments that are dead due to a following assignment in the
8097 same basic block. */
8098
8099 static void
8100 mark_use (rtx x, rtx *reg_set_block)
8101 {
8102 enum rtx_code code;
8103
8104 if (! x)
8105 return;
8106 code = GET_CODE (x);
8107 switch (code)
8108 {
8109 case REG:
8110 {
8111 int regno = REGNO (x);
8112 int nregs = (regno < FIRST_PSEUDO_REGISTER
8113 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
8114 : 1);
8115 do
8116 {
8117 reg_set_block[regno + nregs - 1] = 0;
8118 }
8119 while (--nregs);
8120 break;
8121 }
8122 case SET:
8123 {
8124 rtx dest = SET_DEST (x);
8125
8126 if (GET_CODE (dest) == SUBREG)
8127 dest = SUBREG_REG (dest);
8128 if (GET_CODE (dest) != REG)
8129 mark_use (dest, reg_set_block);
8130 mark_use (SET_SRC (x), reg_set_block);
8131 break;
8132 }
8133 case CLOBBER:
8134 break;
8135 default:
8136 {
8137 const char *fmt = GET_RTX_FORMAT (code);
8138 int i, j;
8139 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8140 {
8141 if (fmt[i] == 'e')
8142 mark_use (XEXP (x, i), reg_set_block);
8143 else if (fmt[i] == 'E')
8144 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8145 mark_use (XVECEXP (x, i, j), reg_set_block);
8146 }
8147 break;
8148 }
8149 }
8150 }
8151 \f
8152 static rtx get_free_reg (HARD_REG_SET);
8153
8154 /* This function returns a register to use to load the address to load
8155 the fpscr from. Currently it always returns r1 or r7, but when we are
8156 able to use pseudo registers after combine, or have a better mechanism
8157 for choosing a register, it should be done here. */
8158 /* REGS_LIVE is the liveness information for the point for which we
8159 need this allocation. In some bare-bones exit blocks, r1 is live at the
8160 start. We can even have all of r0..r3 being live:
8161 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8162 INSN before which new insns are placed with will clobber the register
8163 we return. If a basic block consists only of setting the return value
8164 register to a pseudo and using that register, the return value is not
8165 live before or after this block, yet we we'll insert our insns right in
8166 the middle. */
8167
8168 static rtx
8169 get_free_reg (HARD_REG_SET regs_live)
8170 {
8171 if (! TEST_HARD_REG_BIT (regs_live, 1))
8172 return gen_rtx_REG (Pmode, 1);
8173
8174 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8175 there shouldn't be anything but a jump before the function end. */
8176 if (! TEST_HARD_REG_BIT (regs_live, 7))
8177 return gen_rtx_REG (Pmode, 7);
8178
8179 abort ();
8180 }
8181
8182 /* This function will set the fpscr from memory.
8183 MODE is the mode we are setting it to. */
8184 void
8185 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8186 {
8187 enum attr_fp_mode fp_mode = mode;
8188 rtx addr_reg = get_free_reg (regs_live);
8189
8190 if (fp_mode == (enum attr_fp_mode) ACTUAL_NORMAL_MODE (FP_MODE))
8191 emit_insn (gen_fpu_switch1 (addr_reg));
8192 else
8193 emit_insn (gen_fpu_switch0 (addr_reg));
8194 }
8195
8196 /* Is the given character a logical line separator for the assembler? */
8197 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8198 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
8199 #endif
8200
8201 int
8202 sh_insn_length_adjustment (rtx insn)
8203 {
8204 /* Instructions with unfilled delay slots take up an extra two bytes for
8205 the nop in the delay slot. */
8206 if (((GET_CODE (insn) == INSN
8207 && GET_CODE (PATTERN (insn)) != USE
8208 && GET_CODE (PATTERN (insn)) != CLOBBER)
8209 || GET_CODE (insn) == CALL_INSN
8210 || (GET_CODE (insn) == JUMP_INSN
8211 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8212 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8213 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8214 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8215 return 2;
8216
8217 /* SH2e has a bug that prevents the use of annulled branches, so if
8218 the delay slot is not filled, we'll have to put a NOP in it. */
8219 if (sh_cpu == CPU_SH2E
8220 && GET_CODE (insn) == JUMP_INSN
8221 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8222 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8223 && get_attr_type (insn) == TYPE_CBRANCH
8224 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8225 return 2;
8226
8227 /* sh-dsp parallel processing insn take four bytes instead of two. */
8228
8229 if (GET_CODE (insn) == INSN)
8230 {
8231 int sum = 0;
8232 rtx body = PATTERN (insn);
8233 const char *template;
8234 char c;
8235 int maybe_label = 1;
8236
8237 if (GET_CODE (body) == ASM_INPUT)
8238 template = XSTR (body, 0);
8239 else if (asm_noperands (body) >= 0)
8240 template
8241 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
8242 else
8243 return 0;
8244 do
8245 {
8246 int ppi_adjust = 0;
8247
8248 do
8249 c = *template++;
8250 while (c == ' ' || c == '\t');
8251 /* all sh-dsp parallel-processing insns start with p.
8252 The only non-ppi sh insn starting with p is pref.
8253 The only ppi starting with pr is prnd. */
8254 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
8255 ppi_adjust = 2;
8256 /* The repeat pseudo-insn expands two three insns, a total of
8257 six bytes in size. */
8258 else if ((c == 'r' || c == 'R')
8259 && ! strncasecmp ("epeat", template, 5))
8260 ppi_adjust = 4;
8261 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
8262 {
8263 /* If this is a label, it is obviously not a ppi insn. */
8264 if (c == ':' && maybe_label)
8265 {
8266 ppi_adjust = 0;
8267 break;
8268 }
8269 else if (c == '\'' || c == '"')
8270 maybe_label = 0;
8271 c = *template++;
8272 }
8273 sum += ppi_adjust;
8274 maybe_label = c != ':';
8275 }
8276 while (c);
8277 return sum;
8278 }
8279 return 0;
8280 }
8281 \f
8282 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8283 isn't protected by a PIC unspec. */
8284 int
8285 nonpic_symbol_mentioned_p (rtx x)
8286 {
8287 register const char *fmt;
8288 register int i;
8289
8290 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8291 || GET_CODE (x) == PC)
8292 return 1;
8293
8294 /* We don't want to look into the possible MEM location of a
8295 CONST_DOUBLE, since we're not going to use it, in general. */
8296 if (GET_CODE (x) == CONST_DOUBLE)
8297 return 0;
8298
8299 if (GET_CODE (x) == UNSPEC
8300 && (XINT (x, 1) == UNSPEC_PIC
8301 || XINT (x, 1) == UNSPEC_GOT
8302 || XINT (x, 1) == UNSPEC_GOTOFF
8303 || XINT (x, 1) == UNSPEC_GOTPLT
8304 || XINT (x, 1) == UNSPEC_GOTTPOFF
8305 || XINT (x, 1) == UNSPEC_DTPOFF
8306 || XINT (x, 1) == UNSPEC_PLT))
8307 return 0;
8308
8309 fmt = GET_RTX_FORMAT (GET_CODE (x));
8310 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8311 {
8312 if (fmt[i] == 'E')
8313 {
8314 register int j;
8315
8316 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8317 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8318 return 1;
8319 }
8320 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8321 return 1;
8322 }
8323
8324 return 0;
8325 }
8326
8327 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8328 @GOTOFF in `reg'. */
8329 rtx
8330 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8331 rtx reg)
8332 {
8333 if (tls_symbolic_operand (orig, Pmode))
8334 return orig;
8335
8336 if (GET_CODE (orig) == LABEL_REF
8337 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8338 {
8339 if (reg == 0)
8340 reg = gen_reg_rtx (Pmode);
8341
8342 emit_insn (gen_symGOTOFF2reg (reg, orig));
8343 return reg;
8344 }
8345 else if (GET_CODE (orig) == SYMBOL_REF)
8346 {
8347 if (reg == 0)
8348 reg = gen_reg_rtx (Pmode);
8349
8350 emit_insn (gen_symGOT2reg (reg, orig));
8351 return reg;
8352 }
8353 return orig;
8354 }
8355
8356 /* Mark the use of a constant in the literal table. If the constant
8357 has multiple labels, make it unique. */
8358 static rtx
8359 mark_constant_pool_use (rtx x)
8360 {
8361 rtx insn, lab, pattern;
8362
8363 if (x == NULL)
8364 return x;
8365
8366 switch (GET_CODE (x))
8367 {
8368 case LABEL_REF:
8369 x = XEXP (x, 0);
8370 case CODE_LABEL:
8371 break;
8372 default:
8373 return x;
8374 }
8375
8376 /* Get the first label in the list of labels for the same constant
8377 and delete another labels in the list. */
8378 lab = x;
8379 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8380 {
8381 if (GET_CODE (insn) != CODE_LABEL
8382 || LABEL_REFS (insn) != NEXT_INSN (insn))
8383 break;
8384 lab = insn;
8385 }
8386
8387 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8388 INSN_DELETED_P (insn) = 1;
8389
8390 /* Mark constants in a window. */
8391 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8392 {
8393 if (GET_CODE (insn) != INSN)
8394 continue;
8395
8396 pattern = PATTERN (insn);
8397 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8398 continue;
8399
8400 switch (XINT (pattern, 1))
8401 {
8402 case UNSPECV_CONST2:
8403 case UNSPECV_CONST4:
8404 case UNSPECV_CONST8:
8405 XVECEXP (pattern, 0, 1) = const1_rtx;
8406 break;
8407 case UNSPECV_WINDOW_END:
8408 if (XVECEXP (pattern, 0, 0) == x)
8409 return lab;
8410 break;
8411 case UNSPECV_CONST_END:
8412 return lab;
8413 default:
8414 break;
8415 }
8416 }
8417
8418 return lab;
8419 }
8420 \f
8421 /* Return true if it's possible to redirect BRANCH1 to the destination
8422 of an unconditional jump BRANCH2. We only want to do this if the
8423 resulting branch will have a short displacement. */
8424 int
8425 sh_can_redirect_branch (rtx branch1, rtx branch2)
8426 {
8427 if (flag_expensive_optimizations && simplejump_p (branch2))
8428 {
8429 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8430 rtx insn;
8431 int distance;
8432
8433 for (distance = 0, insn = NEXT_INSN (branch1);
8434 insn && distance < 256;
8435 insn = PREV_INSN (insn))
8436 {
8437 if (insn == dest)
8438 return 1;
8439 else
8440 distance += get_attr_length (insn);
8441 }
8442 for (distance = 0, insn = NEXT_INSN (branch1);
8443 insn && distance < 256;
8444 insn = NEXT_INSN (insn))
8445 {
8446 if (insn == dest)
8447 return 1;
8448 else
8449 distance += get_attr_length (insn);
8450 }
8451 }
8452 return 0;
8453 }
8454
8455 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8456 int
8457 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8458 unsigned int new_reg)
8459 {
8460 /* Interrupt functions can only use registers that have already been
8461 saved by the prologue, even if they would normally be
8462 call-clobbered. */
8463
8464 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
8465 return 0;
8466
8467 return 1;
8468 }
8469
8470 /* Function to update the integer COST
8471 based on the relationship between INSN that is dependent on
8472 DEP_INSN through the dependence LINK. The default is to make no
8473 adjustment to COST. This can be used for example to specify to
8474 the scheduler that an output- or anti-dependence does not incur
8475 the same cost as a data-dependence. The return value should be
8476 the new value for COST. */
8477 static int
8478 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8479 {
8480 rtx reg, use_pat;
8481
8482 if (TARGET_SHMEDIA)
8483 {
8484 /* On SHmedia, if the dependence is an anti-dependence or
8485 output-dependence, there is no cost. */
8486 if (REG_NOTE_KIND (link) != 0)
8487 cost = 0;
8488
8489 if (get_attr_is_mac_media (insn)
8490 && get_attr_is_mac_media (dep_insn))
8491 cost = 1;
8492 }
8493 else if (REG_NOTE_KIND (link) == 0)
8494 {
8495 enum attr_type dep_type, type;
8496
8497 if (recog_memoized (insn) < 0
8498 || recog_memoized (dep_insn) < 0)
8499 return cost;
8500
8501 dep_type = get_attr_type (dep_insn);
8502 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
8503 cost--;
8504 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
8505 && (type = get_attr_type (insn)) != TYPE_CALL
8506 && type != TYPE_SFUNC)
8507 cost--;
8508
8509 /* The only input for a call that is timing-critical is the
8510 function's address. */
8511 if (GET_CODE(insn) == CALL_INSN)
8512 {
8513 rtx call = PATTERN (insn);
8514
8515 if (GET_CODE (call) == PARALLEL)
8516 call = XVECEXP (call, 0 ,0);
8517 if (GET_CODE (call) == SET)
8518 call = SET_SRC (call);
8519 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
8520 && ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn))
8521 cost = 0;
8522 }
8523 /* Likewise, the most timing critical input for an sfuncs call
8524 is the function address. However, sfuncs typically start
8525 using their arguments pretty quickly.
8526 Assume a four cycle delay before they are needed. */
8527 /* All sfunc calls are parallels with at least four components.
8528 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8529 else if (GET_CODE (PATTERN (insn)) == PARALLEL
8530 && XVECLEN (PATTERN (insn), 0) >= 4
8531 && (reg = sfunc_uses_reg (insn)))
8532 {
8533 if (! reg_set_p (reg, dep_insn))
8534 cost -= 4;
8535 }
8536 /* When the preceding instruction loads the shift amount of
8537 the following SHAD/SHLD, the latency of the load is increased
8538 by 1 cycle. */
8539 else if (TARGET_SH4
8540 && get_attr_type (insn) == TYPE_DYN_SHIFT
8541 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
8542 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
8543 XEXP (SET_SRC (single_set (insn)),
8544 1)))
8545 cost++;
8546 /* When an LS group instruction with a latency of less than
8547 3 cycles is followed by a double-precision floating-point
8548 instruction, FIPR, or FTRV, the latency of the first
8549 instruction is increased to 3 cycles. */
8550 else if (cost < 3
8551 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
8552 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
8553 cost = 3;
8554 /* The lsw register of a double-precision computation is ready one
8555 cycle earlier. */
8556 else if (reload_completed
8557 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
8558 && (use_pat = single_set (insn))
8559 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
8560 SET_SRC (use_pat)))
8561 cost -= 1;
8562
8563 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
8564 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
8565 cost -= 1;
8566 }
8567 /* An anti-dependence penalty of two applies if the first insn is a double
8568 precision fadd / fsub / fmul. */
8569 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8570 && recog_memoized (dep_insn) >= 0
8571 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
8572 /* A lot of alleged anti-flow dependences are fake,
8573 so check this one is real. */
8574 && flow_dependent_p (dep_insn, insn))
8575 cost = 2;
8576
8577
8578 return cost;
8579 }
8580
8581 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
8582 if DEP_INSN is anti-flow dependent on INSN. */
8583 static int
8584 flow_dependent_p (rtx insn, rtx dep_insn)
8585 {
8586 rtx tmp = PATTERN (insn);
8587
8588 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
8589 return tmp == NULL_RTX;
8590 }
8591
8592 /* A helper function for flow_dependent_p called through note_stores. */
8593 static void
8594 flow_dependent_p_1 (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
8595 {
8596 rtx * pinsn = (rtx *) data;
8597
8598 if (*pinsn && reg_referenced_p (x, *pinsn))
8599 *pinsn = NULL_RTX;
8600 }
8601
8602 /* For use by ALLOCATE_INITIAL_VALUE. Note that sh.md contains some
8603 'special function' patterns (type sfunc) that clobber pr, but that
8604 do not look like function calls to leaf_function_p. Hence we must
8605 do this extra check. */
8606 int
8607 sh_pr_n_sets (void)
8608 {
8609 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
8610 }
8611
8612 /* This function returns "2" to indicate dual issue for the SH4
8613 processor. To be used by the DFA pipeline description. */
8614 static int
8615 sh_issue_rate (void)
8616 {
8617 if (TARGET_SUPERSCALAR)
8618 return 2;
8619 else
8620 return 1;
8621 }
8622
8623 /* Functions for ready queue reordering for sched1. */
8624
8625 /* Get weight for mode for a set x. */
8626 static short
8627 find_set_regmode_weight (rtx x, enum machine_mode mode)
8628 {
8629 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
8630 return 1;
8631 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
8632 {
8633 if (GET_CODE (SET_DEST (x)) == REG)
8634 {
8635 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
8636 return 1;
8637 else
8638 return 0;
8639 }
8640 return 1;
8641 }
8642 return 0;
8643 }
8644
8645 /* Get regmode weight for insn. */
8646 static short
8647 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
8648 {
8649 short reg_weight = 0;
8650 rtx x;
8651
8652 /* Increment weight for each register born here. */
8653 x = PATTERN (insn);
8654 reg_weight += find_set_regmode_weight (x, mode);
8655 if (GET_CODE (x) == PARALLEL)
8656 {
8657 int j;
8658 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
8659 {
8660 x = XVECEXP (PATTERN (insn), 0, j);
8661 reg_weight += find_set_regmode_weight (x, mode);
8662 }
8663 }
8664 /* Decrement weight for each register that dies here. */
8665 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
8666 {
8667 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
8668 {
8669 rtx note = XEXP (x, 0);
8670 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
8671 reg_weight--;
8672 }
8673 }
8674 return reg_weight;
8675 }
8676
8677 /* Calculate regmode weights for all insns of a basic block. */
8678 static void
8679 find_regmode_weight (int b, enum machine_mode mode)
8680 {
8681 rtx insn, next_tail, head, tail;
8682
8683 get_block_head_tail (b, &head, &tail);
8684 next_tail = NEXT_INSN (tail);
8685
8686 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
8687 {
8688 /* Handle register life information. */
8689 if (!INSN_P (insn))
8690 continue;
8691
8692 if (mode == SFmode)
8693 INSN_REGMODE_WEIGHT (insn, mode) =
8694 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
8695 else if (mode == SImode)
8696 INSN_REGMODE_WEIGHT (insn, mode) =
8697 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
8698 }
8699 }
8700
8701 /* Comparison function for ready queue sorting. */
8702 static int
8703 rank_for_reorder (const void *x, const void *y)
8704 {
8705 rtx tmp = *(const rtx *) y;
8706 rtx tmp2 = *(const rtx *) x;
8707
8708 /* The insn in a schedule group should be issued the first. */
8709 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
8710 return SCHED_GROUP_P (tmp2) ? 1 : -1;
8711
8712 /* If insns are equally good, sort by INSN_LUID (original insn order), This
8713 minimizes instruction movement, thus minimizing sched's effect on
8714 register pressure. */
8715 return INSN_LUID (tmp) - INSN_LUID (tmp2);
8716 }
8717
8718 /* Resort the array A in which only element at index N may be out of order. */
8719 static void
8720 swap_reorder (rtx *a, int n)
8721 {
8722 rtx insn = a[n - 1];
8723 int i = n - 2;
8724
8725 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
8726 {
8727 a[i + 1] = a[i];
8728 i -= 1;
8729 }
8730 a[i + 1] = insn;
8731 }
8732
8733 #define SCHED_REORDER(READY, N_READY) \
8734 do \
8735 { \
8736 if ((N_READY) == 2) \
8737 swap_reorder (READY, N_READY); \
8738 else if ((N_READY) > 2) \
8739 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
8740 } \
8741 while (0)
8742
8743 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
8744 macro. */
8745 static void
8746 ready_reorder (rtx *ready, int nready)
8747 {
8748 SCHED_REORDER (ready, nready);
8749 }
8750
8751 /* Calculate regmode weights for all insns of all basic block. */
8752 static void
8753 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
8754 int verbose ATTRIBUTE_UNUSED,
8755 int old_max_uid)
8756 {
8757 basic_block b;
8758
8759 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
8760 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
8761
8762 FOR_EACH_BB_REVERSE (b)
8763 {
8764 find_regmode_weight (b->index, SImode);
8765 find_regmode_weight (b->index, SFmode);
8766 }
8767
8768 CURR_REGMODE_PRESSURE (SImode) = 0;
8769 CURR_REGMODE_PRESSURE (SFmode) = 0;
8770
8771 }
8772
8773 /* Cleanup. */
8774 static void
8775 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
8776 int verbose ATTRIBUTE_UNUSED)
8777 {
8778 if (regmode_weight[0])
8779 {
8780 free (regmode_weight[0]);
8781 regmode_weight[0] = NULL;
8782 }
8783 if (regmode_weight[1])
8784 {
8785 free (regmode_weight[1]);
8786 regmode_weight[1] = NULL;
8787 }
8788 }
8789
8790 /* Cache the can_issue_more so that we can return it from reorder2. Also,
8791 keep count of register pressures on SImode and SFmode. */
8792 static int
8793 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
8794 int sched_verbose ATTRIBUTE_UNUSED,
8795 rtx insn,
8796 int can_issue_more)
8797 {
8798 if (GET_CODE (PATTERN (insn)) != USE
8799 && GET_CODE (PATTERN (insn)) != CLOBBER)
8800 cached_can_issue_more = can_issue_more - 1;
8801 else
8802 cached_can_issue_more = can_issue_more;
8803
8804 if (reload_completed)
8805 return cached_can_issue_more;
8806
8807 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
8808 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
8809
8810 return cached_can_issue_more;
8811 }
8812
8813 static void
8814 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
8815 int verbose ATTRIBUTE_UNUSED,
8816 int veclen ATTRIBUTE_UNUSED)
8817 {
8818 CURR_REGMODE_PRESSURE (SImode) = 0;
8819 CURR_REGMODE_PRESSURE (SFmode) = 0;
8820 }
8821
8822 /* Some magic numbers. */
8823 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8824 functions that already have high pressure on r0. */
8825 #define R0_MAX_LIFE_REGIONS 2
8826 #define R0_MAX_LIVE_LENGTH 12
8827 /* Register Pressure thresholds for SImode and SFmode registers. */
8828 #define SIMODE_MAX_WEIGHT 5
8829 #define SFMODE_MAX_WEIGHT 10
8830
8831 /* Return true if the pressure is high for MODE. */
8832 static short
8833 high_pressure (enum machine_mode mode)
8834 {
8835 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8836 functions that already have high pressure on r0. */
8837 if ((REG_N_SETS (0) - REG_N_DEATHS (0)) >= R0_MAX_LIFE_REGIONS
8838 && REG_LIVE_LENGTH (0) >= R0_MAX_LIVE_LENGTH)
8839 return 1;
8840
8841 if (mode == SFmode)
8842 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
8843 else
8844 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
8845 }
8846
8847 /* Reorder ready queue if register pressure is high. */
8848 static int
8849 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
8850 int sched_verbose ATTRIBUTE_UNUSED,
8851 rtx *ready,
8852 int *n_readyp,
8853 int clock_var ATTRIBUTE_UNUSED)
8854 {
8855 if (reload_completed)
8856 return sh_issue_rate ();
8857
8858 if (high_pressure (SFmode) || high_pressure (SImode))
8859 {
8860 ready_reorder (ready, *n_readyp);
8861 }
8862
8863 return sh_issue_rate ();
8864 }
8865
8866 /* Skip cycles if the current register pressure is high. */
8867 static int
8868 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
8869 int sched_verbose ATTRIBUTE_UNUSED,
8870 rtx *ready ATTRIBUTE_UNUSED,
8871 int *n_readyp ATTRIBUTE_UNUSED,
8872 int clock_var ATTRIBUTE_UNUSED)
8873 {
8874 if (reload_completed)
8875 return cached_can_issue_more;
8876
8877 if (high_pressure(SFmode) || high_pressure (SImode))
8878 skip_cycles = 1;
8879
8880 return cached_can_issue_more;
8881 }
8882
8883 /* Skip cycles without sorting the ready queue. This will move insn from
8884 Q->R. If this is the last cycle we are skipping; allow sorting of ready
8885 queue by sh_reorder. */
8886
8887 /* Generally, skipping these many cycles are sufficient for all insns to move
8888 from Q -> R. */
8889 #define MAX_SKIPS 8
8890
8891 static int
8892 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
8893 int sched_verbose ATTRIBUTE_UNUSED,
8894 rtx insn ATTRIBUTE_UNUSED,
8895 int last_clock_var,
8896 int clock_var,
8897 int *sort_p)
8898 {
8899 if (reload_completed)
8900 return 0;
8901
8902 if (skip_cycles)
8903 {
8904 if ((clock_var - last_clock_var) < MAX_SKIPS)
8905 {
8906 *sort_p = 0;
8907 return 1;
8908 }
8909 /* If this is the last cycle we are skipping, allow reordering of R. */
8910 if ((clock_var - last_clock_var) == MAX_SKIPS)
8911 {
8912 *sort_p = 1;
8913 return 1;
8914 }
8915 }
8916
8917 skip_cycles = 0;
8918
8919 return 0;
8920 }
8921
8922 /* SHmedia requires registers for branches, so we can't generate new
8923 branches past reload. */
8924 static bool
8925 sh_cannot_modify_jumps_p (void)
8926 {
8927 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
8928 }
8929
8930 static int
8931 sh_target_reg_class (void)
8932 {
8933 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
8934 }
8935
8936 static bool
8937 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
8938 {
8939 return (shmedia_space_reserved_for_target_registers
8940 && (! after_prologue_epilogue_gen || TARGET_SAVE_ALL_TARGET_REGS));
8941 }
8942
8943 static bool
8944 sh_ms_bitfield_layout_p (tree record_type ATTRIBUTE_UNUSED)
8945 {
8946 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
8947 }
8948 \f
8949 /*
8950 On the SH1..SH4, the trampoline looks like
8951 2 0002 D202 mov.l l2,r2
8952 1 0000 D301 mov.l l1,r3
8953 3 0004 422B jmp @r2
8954 4 0006 0009 nop
8955 5 0008 00000000 l1: .long area
8956 6 000c 00000000 l2: .long function
8957
8958 SH5 (compact) uses r1 instead of r3 for the static chain. */
8959
8960
8961 /* Emit RTL insns to initialize the variable parts of a trampoline.
8962 FNADDR is an RTX for the address of the function's pure code.
8963 CXT is an RTX for the static chain value for the function. */
8964
8965 void
8966 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
8967 {
8968 if (TARGET_SHMEDIA64)
8969 {
8970 rtx tramp_templ;
8971 int fixed_len;
8972
8973 rtx movi1 = GEN_INT (0xcc000010);
8974 rtx shori1 = GEN_INT (0xc8000010);
8975 rtx src, dst;
8976
8977 /* The following trampoline works within a +- 128 KB range for cxt:
8978 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
8979 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
8980 gettr tr1,r1; blink tr0,r63 */
8981 /* Address rounding makes it hard to compute the exact bounds of the
8982 offset for this trampoline, but we have a rather generous offset
8983 range, so frame_offset should do fine as an upper bound. */
8984 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
8985 {
8986 /* ??? could optimize this trampoline initialization
8987 by writing DImode words with two insns each. */
8988 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
8989 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
8990 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
8991 insn = gen_rtx_AND (DImode, insn, mask);
8992 /* Or in ptb/u .,tr1 pattern */
8993 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
8994 insn = force_operand (insn, NULL_RTX);
8995 insn = gen_lowpart (SImode, insn);
8996 emit_move_insn (gen_rtx_MEM (SImode, tramp), insn);
8997 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
8998 insn = gen_rtx_AND (DImode, insn, mask);
8999 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
9000 insn = gen_lowpart (SImode, insn);
9001 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), insn);
9002 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
9003 insn = gen_rtx_AND (DImode, insn, mask);
9004 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9005 insn = gen_lowpart (SImode, insn);
9006 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), insn);
9007 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
9008 insn = gen_rtx_AND (DImode, insn, mask);
9009 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9010 insn = gen_lowpart (SImode, insn);
9011 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
9012 insn);
9013 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
9014 insn = gen_rtx_AND (DImode, insn, mask);
9015 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9016 insn = gen_lowpart (SImode, insn);
9017 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 16)),
9018 insn);
9019 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 20)),
9020 GEN_INT (0x6bf10600));
9021 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 24)),
9022 GEN_INT (0x4415fc10));
9023 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 28)),
9024 GEN_INT (0x4401fff0));
9025 emit_insn (gen_ic_invalidate_line (tramp));
9026 return;
9027 }
9028 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
9029 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
9030
9031 tramp_templ = gen_datalabel_ref (tramp_templ);
9032 dst = gen_rtx_MEM (BLKmode, tramp);
9033 src = gen_rtx_MEM (BLKmode, tramp_templ);
9034 set_mem_align (dst, 256);
9035 set_mem_align (src, 64);
9036 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
9037
9038 emit_move_insn (gen_rtx_MEM (Pmode, plus_constant (tramp, fixed_len)),
9039 fnaddr);
9040 emit_move_insn (gen_rtx_MEM (Pmode,
9041 plus_constant (tramp,
9042 fixed_len
9043 + GET_MODE_SIZE (Pmode))),
9044 cxt);
9045 emit_insn (gen_ic_invalidate_line (tramp));
9046 return;
9047 }
9048 else if (TARGET_SHMEDIA)
9049 {
9050 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
9051 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
9052 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
9053 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
9054 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
9055 rotated 10 right, and higher 16 bit of every 32 selected. */
9056 rtx movishori
9057 = force_reg (V2HImode, (simplify_gen_subreg
9058 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
9059 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
9060 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
9061
9062 tramp = force_reg (Pmode, tramp);
9063 fnaddr = force_reg (SImode, fnaddr);
9064 cxt = force_reg (SImode, cxt);
9065 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
9066 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
9067 movishori));
9068 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
9069 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9070 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
9071 emit_move_insn (gen_rtx_MEM (DImode, tramp), quad0);
9072 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
9073 gen_rtx_SUBREG (V2HImode, cxt, 0),
9074 movishori));
9075 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
9076 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9077 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
9078 if (TARGET_LITTLE_ENDIAN)
9079 {
9080 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
9081 emit_insn (gen_mextr4 (quad2, cxtload, blink));
9082 }
9083 else
9084 {
9085 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
9086 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
9087 }
9088 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 8)), quad1);
9089 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), quad2);
9090 emit_insn (gen_ic_invalidate_line (tramp));
9091 return;
9092 }
9093 else if (TARGET_SHCOMPACT)
9094 {
9095 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
9096 return;
9097 }
9098 emit_move_insn (gen_rtx_MEM (SImode, tramp),
9099 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
9100 SImode));
9101 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
9102 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
9103 SImode));
9104 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
9105 cxt);
9106 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
9107 fnaddr);
9108 if (TARGET_HARVARD)
9109 {
9110 if (TARGET_USERMODE)
9111 emit_library_call (function_symbol ("__ic_invalidate"),
9112 0, VOIDmode, 1, tramp, SImode);
9113 else
9114 emit_insn (gen_ic_invalidate_line (tramp));
9115 }
9116 }
9117
9118 /* FIXME: This is overly conservative. A SHcompact function that
9119 receives arguments ``by reference'' will have them stored in its
9120 own stack frame, so it must not pass pointers or references to
9121 these arguments to other functions by means of sibling calls. */
9122 static bool
9123 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9124 {
9125 return (decl
9126 && (! TARGET_SHCOMPACT
9127 || current_function_args_info.stack_regs == 0)
9128 && ! sh_cfun_interrupt_handler_p ());
9129 }
9130 \f
9131 /* Machine specific built-in functions. */
9132
9133 struct builtin_description
9134 {
9135 const enum insn_code icode;
9136 const char *const name;
9137 int signature;
9138 };
9139
9140 /* describe number and signedness of arguments; arg[0] == result
9141 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9142 static const char signature_args[][4] =
9143 {
9144 #define SH_BLTIN_V2SI2 0
9145 { 4, 4 },
9146 #define SH_BLTIN_V4HI2 1
9147 { 4, 4 },
9148 #define SH_BLTIN_V2SI3 2
9149 { 4, 4, 4 },
9150 #define SH_BLTIN_V4HI3 3
9151 { 4, 4, 4 },
9152 #define SH_BLTIN_V8QI3 4
9153 { 4, 4, 4 },
9154 #define SH_BLTIN_MAC_HISI 5
9155 { 1, 4, 4, 1 },
9156 #define SH_BLTIN_SH_HI 6
9157 { 4, 4, 1 },
9158 #define SH_BLTIN_SH_SI 7
9159 { 4, 4, 1 },
9160 #define SH_BLTIN_V4HI2V2SI 8
9161 { 4, 4, 4 },
9162 #define SH_BLTIN_V4HI2V8QI 9
9163 { 4, 4, 4 },
9164 #define SH_BLTIN_SISF 10
9165 { 4, 2 },
9166 #define SH_BLTIN_LDUA_L 11
9167 { 2, 8 },
9168 #define SH_BLTIN_LDUA_Q 12
9169 { 1, 8 },
9170 #define SH_BLTIN_STUA_L 13
9171 { 0, 8, 2 },
9172 #define SH_BLTIN_STUA_Q 14
9173 { 0, 8, 1 },
9174 #define SH_BLTIN_UDI 15
9175 { 0, 8, 1 },
9176 #define SH_BLTIN_NUM_SHARED_SIGNATURES 16
9177 #define SH_BLTIN_2 16
9178 #define SH_BLTIN_SU 16
9179 { 1, 2 },
9180 #define SH_BLTIN_3 17
9181 #define SH_BLTIN_SUS 17
9182 { 2, 2, 1 },
9183 #define SH_BLTIN_PSSV 18
9184 { 0, 8, 2, 2 },
9185 #define SH_BLTIN_XXUU 19
9186 #define SH_BLTIN_UUUU 19
9187 { 1, 1, 1, 1 },
9188 #define SH_BLTIN_PV 20
9189 { 0, 8 },
9190 };
9191 /* mcmv: operands considered unsigned. */
9192 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9193 /* mperm: control value considered unsigned int. */
9194 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9195 /* mshards_q: returns signed short. */
9196 /* nsb: takes long long arg, returns unsigned char. */
9197 static const struct builtin_description bdesc[] =
9198 {
9199 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9200 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9201 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9202 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9203 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9204 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9205 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9206 #if 0
9207 { CODE_FOR_alloco32, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9208 { CODE_FOR_alloco64, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9209 #endif
9210 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9211 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9212 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9213 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9214 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9215 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9216 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9217 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9218 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9219 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9220 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_UDI },
9221 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_UDI },
9222 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_UDI },
9223 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_UDI },
9224 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_UDI },
9225 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_UDI },
9226 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_UDI },
9227 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9228 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9229 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9230 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9231 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9232 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9233 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9234 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9235 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9236 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9237 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9238 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9239 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9240 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9241 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9242 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9243 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
9244 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
9245 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
9246 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
9247 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
9248 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
9249 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
9250 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
9251 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
9252 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
9253 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
9254 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
9255 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
9256 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
9257 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
9258 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
9259 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
9260 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9261 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9262 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9263 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
9264 #if 0
9265 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9266 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9267 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9268 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9269 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9270 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9271 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9272 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9273 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9274 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9275 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9276 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9277 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9278 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9279 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9280 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9281 #endif
9282 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
9283 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
9284 #if 0
9285 { CODE_FOR_prefetch32,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
9286 { CODE_FOR_prefetch64,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV }
9287 #endif
9288 };
9289
9290 static void
9291 sh_media_init_builtins (void)
9292 {
9293 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
9294 const struct builtin_description *d;
9295
9296 memset (shared, 0, sizeof shared);
9297 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
9298 {
9299 tree type, arg_type;
9300 int signature = d->signature;
9301 int i;
9302
9303 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
9304 type = shared[signature];
9305 else
9306 {
9307 int has_result = signature_args[signature][0] != 0;
9308
9309 if (signature_args[signature][1] == 8
9310 && (insn_data[d->icode].operand[has_result].mode != Pmode))
9311 continue;
9312 if (! TARGET_FPU_ANY
9313 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
9314 continue;
9315 type = void_list_node;
9316 for (i = 3; ; i--)
9317 {
9318 int arg = signature_args[signature][i];
9319 int opno = i - 1 + has_result;
9320
9321 if (arg == 8)
9322 arg_type = ptr_type_node;
9323 else if (arg)
9324 arg_type = ((*lang_hooks.types.type_for_mode)
9325 (insn_data[d->icode].operand[opno].mode,
9326 (arg & 1)));
9327 else if (i)
9328 continue;
9329 else
9330 arg_type = void_type_node;
9331 if (i == 0)
9332 break;
9333 type = tree_cons (NULL_TREE, arg_type, type);
9334 }
9335 type = build_function_type (arg_type, type);
9336 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
9337 shared[signature] = type;
9338 }
9339 lang_hooks.builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
9340 NULL, NULL_TREE);
9341 }
9342 }
9343
9344 static void
9345 sh_init_builtins (void)
9346 {
9347 if (TARGET_SHMEDIA)
9348 sh_media_init_builtins ();
9349 }
9350
9351 /* Expand an expression EXP that calls a built-in function,
9352 with result going to TARGET if that's convenient
9353 (and in mode MODE if that's convenient).
9354 SUBTARGET may be used as the target for computing one of EXP's operands.
9355 IGNORE is nonzero if the value is to be ignored. */
9356
9357 static rtx
9358 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9359 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
9360 {
9361 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
9362 tree arglist = TREE_OPERAND (exp, 1);
9363 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9364 const struct builtin_description *d = &bdesc[fcode];
9365 enum insn_code icode = d->icode;
9366 int signature = d->signature;
9367 enum machine_mode tmode = VOIDmode;
9368 int nop = 0, i;
9369 rtx op[4];
9370 rtx pat;
9371
9372 if (signature_args[signature][0])
9373 {
9374 if (ignore)
9375 return 0;
9376
9377 tmode = insn_data[icode].operand[0].mode;
9378 if (! target
9379 || GET_MODE (target) != tmode
9380 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9381 target = gen_reg_rtx (tmode);
9382 op[nop++] = target;
9383 }
9384 else
9385 target = 0;
9386
9387 for (i = 1; i <= 3; i++, nop++)
9388 {
9389 tree arg;
9390 enum machine_mode opmode, argmode;
9391
9392 if (! signature_args[signature][i])
9393 break;
9394 arg = TREE_VALUE (arglist);
9395 if (arg == error_mark_node)
9396 return const0_rtx;
9397 arglist = TREE_CHAIN (arglist);
9398 opmode = insn_data[icode].operand[nop].mode;
9399 argmode = TYPE_MODE (TREE_TYPE (arg));
9400 if (argmode != opmode)
9401 arg = build1 (NOP_EXPR,
9402 (*lang_hooks.types.type_for_mode) (opmode, 0), arg);
9403 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
9404 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
9405 op[nop] = copy_to_mode_reg (opmode, op[nop]);
9406 }
9407
9408 switch (nop)
9409 {
9410 case 1:
9411 pat = (*insn_data[d->icode].genfun) (op[0]);
9412 break;
9413 case 2:
9414 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
9415 break;
9416 case 3:
9417 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
9418 break;
9419 case 4:
9420 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
9421 break;
9422 default:
9423 abort ();
9424 }
9425 if (! pat)
9426 return 0;
9427 emit_insn (pat);
9428 return target;
9429 }
9430
9431 void
9432 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
9433 {
9434 rtx sel0 = const0_rtx;
9435 rtx sel1 = const1_rtx;
9436 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
9437 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
9438
9439 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
9440 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
9441 }
9442
9443 void
9444 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
9445 {
9446 rtx sel0 = const0_rtx;
9447 rtx sel1 = const1_rtx;
9448 rtx (*fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
9449 = gen_binary_sf_op;
9450 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
9451
9452 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
9453 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
9454 }
9455
9456 /* Return the class of registers for which a mode change from FROM to TO
9457 is invalid. */
9458 bool
9459 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9460 enum reg_class class)
9461 {
9462 /* We want to enable the use of SUBREGs as a means to
9463 VEC_SELECT a single element of a vector. */
9464 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
9465 return (reg_classes_intersect_p (GENERAL_REGS, class));
9466
9467 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
9468 {
9469 if (TARGET_LITTLE_ENDIAN)
9470 {
9471 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
9472 return reg_classes_intersect_p (DF_REGS, class);
9473 }
9474 else
9475 {
9476 if (GET_MODE_SIZE (from) < 8)
9477 return reg_classes_intersect_p (DF_HI_REGS, class);
9478 }
9479 }
9480 return 0;
9481 }
9482
9483
9484 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
9485 that label is used. */
9486
9487 void
9488 sh_mark_label (rtx address, int nuses)
9489 {
9490 if (GOTOFF_P (address))
9491 {
9492 /* Extract the label or symbol. */
9493 address = XEXP (address, 0);
9494 if (GET_CODE (address) == PLUS)
9495 address = XEXP (address, 0);
9496 address = XVECEXP (address, 0, 0);
9497 }
9498 if (GET_CODE (address) == LABEL_REF
9499 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
9500 LABEL_NUSES (XEXP (address, 0)) += nuses;
9501 }
9502
9503 /* Compute extra cost of moving data between one register class
9504 and another. */
9505
9506 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
9507 uses this information. Hence, the general register <-> floating point
9508 register information here is not used for SFmode. */
9509
9510 int
9511 sh_register_move_cost (enum machine_mode mode,
9512 enum reg_class srcclass, enum reg_class dstclass)
9513 {
9514 if (dstclass == T_REGS || dstclass == PR_REGS)
9515 return 10;
9516
9517 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
9518 return 4;
9519
9520 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
9521 && REGCLASS_HAS_FP_REG (srcclass)
9522 && REGCLASS_HAS_FP_REG (dstclass))
9523 return 4;
9524
9525 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
9526 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
9527 return 9;
9528
9529 if ((REGCLASS_HAS_FP_REG (dstclass)
9530 && REGCLASS_HAS_GENERAL_REG (srcclass))
9531 || (REGCLASS_HAS_GENERAL_REG (dstclass)
9532 && REGCLASS_HAS_FP_REG (srcclass)))
9533 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
9534 * ((GET_MODE_SIZE (mode) + 7) / 8U));
9535
9536 if ((dstclass == FPUL_REGS
9537 && REGCLASS_HAS_GENERAL_REG (srcclass))
9538 || (srcclass == FPUL_REGS
9539 && REGCLASS_HAS_GENERAL_REG (dstclass)))
9540 return 5;
9541
9542 if ((dstclass == FPUL_REGS
9543 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
9544 || (srcclass == FPUL_REGS
9545 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
9546 return 7;
9547
9548 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9549 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9550 return 20;
9551
9552 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9553 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9554 return 4;
9555
9556 if (TARGET_SHMEDIA
9557 || (TARGET_FMOVD
9558 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
9559 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
9560 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
9561
9562 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
9563 }
9564
9565 /* Like register_operand, but take into account that SHMEDIA can use
9566 the constant zero like a general register. */
9567 int
9568 sh_register_operand (rtx op, enum machine_mode mode)
9569 {
9570 if (op == CONST0_RTX (mode) && TARGET_SHMEDIA)
9571 return 1;
9572 return register_operand (op, mode);
9573 }
9574
9575 int
9576 cmpsi_operand (rtx op, enum machine_mode mode)
9577 {
9578 if (GET_CODE (op) == REG && REGNO (op) == T_REG
9579 && GET_MODE (op) == SImode)
9580 return 1;
9581 return arith_operand (op, mode);
9582 }
9583
9584 static rtx emit_load_ptr (rtx, rtx);
9585
9586 static rtx
9587 emit_load_ptr (rtx reg, rtx addr)
9588 {
9589 rtx mem = gen_rtx_MEM (ptr_mode, addr);
9590
9591 if (Pmode != ptr_mode)
9592 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
9593 return emit_move_insn (reg, mem);
9594 }
9595
9596 void
9597 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
9598 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9599 tree function)
9600 {
9601 CUMULATIVE_ARGS cum;
9602 int structure_value_byref = 0;
9603 rtx this, this_value, sibcall, insns, funexp;
9604 tree funtype = TREE_TYPE (function);
9605 int simple_add = CONST_OK_FOR_ADD (delta);
9606 int did_load = 0;
9607 rtx scratch0, scratch1, scratch2;
9608
9609 reload_completed = 1;
9610 epilogue_completed = 1;
9611 no_new_pseudos = 1;
9612 current_function_uses_only_leaf_regs = 1;
9613 reset_block_changes ();
9614
9615 emit_note (NOTE_INSN_PROLOGUE_END);
9616
9617 /* Find the "this" pointer. We have such a wide range of ABIs for the
9618 SH that it's best to do this completely machine independently.
9619 "this" is passed as first argument, unless a structure return pointer
9620 comes first, in which case "this" comes second. */
9621 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
9622 #ifndef PCC_STATIC_STRUCT_RETURN
9623 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
9624 structure_value_byref = 1;
9625 #endif /* not PCC_STATIC_STRUCT_RETURN */
9626 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
9627 {
9628 tree ptype = build_pointer_type (TREE_TYPE (funtype));
9629
9630 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
9631 }
9632 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
9633
9634 /* For SHcompact, we only have r0 for a scratch register: r1 is the
9635 static chain pointer (even if you can't have nested virtual functions
9636 right now, someone might implement them sometime), and the rest of the
9637 registers are used for argument passing, are callee-saved, or reserved. */
9638 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
9639 if (! TARGET_SH5)
9640 {
9641 scratch1 = gen_rtx_REG (ptr_mode, 1);
9642 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
9643 pointing where to return struct values. */
9644 scratch2 = gen_rtx_REG (Pmode, 3);
9645 }
9646 else if (TARGET_SHMEDIA)
9647 {
9648 scratch1 = gen_rtx_REG (ptr_mode, 21);
9649 scratch2 = gen_rtx_REG (Pmode, TR0_REG);
9650 }
9651
9652 this_value = plus_constant (this, delta);
9653 if (vcall_offset
9654 && (simple_add || scratch0 != scratch1)
9655 && strict_memory_address_p (ptr_mode, this_value))
9656 {
9657 emit_load_ptr (scratch0, this_value);
9658 did_load = 1;
9659 }
9660
9661 if (!delta)
9662 ; /* Do nothing. */
9663 else if (simple_add)
9664 emit_move_insn (this, this_value);
9665 else
9666 {
9667 emit_move_insn (scratch1, GEN_INT (delta));
9668 emit_insn (gen_add2_insn (this, scratch1));
9669 }
9670
9671 if (vcall_offset)
9672 {
9673 rtx offset_addr;
9674
9675 if (!did_load)
9676 emit_load_ptr (scratch0, this);
9677
9678 offset_addr = plus_constant (scratch0, vcall_offset);
9679 if (strict_memory_address_p (ptr_mode, offset_addr))
9680 ; /* Do nothing. */
9681 else if (! TARGET_SH5)
9682 {
9683 /* scratch0 != scratch1, and we have indexed loads. Get better
9684 schedule by loading the offset into r1 and using an indexed
9685 load - then the load of r1 can issue before the load from
9686 (this + delta) finishes. */
9687 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9688 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
9689 }
9690 else if (CONST_OK_FOR_ADD (vcall_offset))
9691 {
9692 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
9693 offset_addr = scratch0;
9694 }
9695 else if (scratch0 != scratch1)
9696 {
9697 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9698 emit_insn (gen_add2_insn (scratch0, scratch1));
9699 offset_addr = scratch0;
9700 }
9701 else
9702 abort (); /* FIXME */
9703 emit_load_ptr (scratch0, offset_addr);
9704
9705 if (Pmode != ptr_mode)
9706 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
9707 emit_insn (gen_add2_insn (this, scratch0));
9708 }
9709
9710 /* Generate a tail call to the target function. */
9711 if (! TREE_USED (function))
9712 {
9713 assemble_external (function);
9714 TREE_USED (function) = 1;
9715 }
9716 funexp = XEXP (DECL_RTL (function), 0);
9717 emit_move_insn (scratch2, funexp);
9718 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
9719 sibcall = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
9720 SIBLING_CALL_P (sibcall) = 1;
9721 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
9722 emit_barrier ();
9723
9724 /* Run just enough of rest_of_compilation to do scheduling and get
9725 the insns emitted. Note that use_thunk calls
9726 assemble_start_function and assemble_end_function. */
9727
9728 insn_locators_initialize ();
9729 insns = get_insns ();
9730
9731 if (optimize > 0 && flag_schedule_insns_after_reload)
9732 {
9733 find_basic_blocks (insns, max_reg_num (), dump_file);
9734 life_analysis (dump_file, PROP_FINAL);
9735
9736 split_all_insns (1);
9737
9738 schedule_insns (dump_file);
9739 }
9740
9741 sh_reorg ();
9742
9743 if (optimize > 0 && flag_delayed_branch)
9744 dbr_schedule (insns, dump_file);
9745 shorten_branches (insns);
9746 final_start_function (insns, file, 1);
9747 final (insns, file, 1, 0);
9748 final_end_function ();
9749
9750 if (optimize > 0 && flag_schedule_insns_after_reload)
9751 {
9752 /* Release all memory allocated by flow. */
9753 free_basic_block_vars ();
9754
9755 /* Release all memory held by regsets now. */
9756 regset_release_memory ();
9757 }
9758
9759 reload_completed = 0;
9760 epilogue_completed = 0;
9761 no_new_pseudos = 0;
9762 }
9763
9764 rtx
9765 function_symbol (const char *name)
9766 {
9767 rtx sym = gen_rtx_SYMBOL_REF (Pmode, name);
9768 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
9769 return sym;
9770 }
9771
9772 /* Find the number of a general purpose register in S. */
9773 static int
9774 scavenge_reg (HARD_REG_SET *s)
9775 {
9776 int r;
9777 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
9778 if (TEST_HARD_REG_BIT (*s, r))
9779 return r;
9780 return -1;
9781 }
9782
9783 rtx
9784 sh_get_pr_initial_val (void)
9785 {
9786 rtx val;
9787
9788 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
9789 PR register on SHcompact, because it might be clobbered by the prologue.
9790 We check first if that is known to be the case. */
9791 if (TARGET_SHCOMPACT
9792 && ((current_function_args_info.call_cookie
9793 & ~ CALL_COOKIE_RET_TRAMP (1))
9794 || current_function_has_nonlocal_label))
9795 return gen_rtx_MEM (SImode, return_address_pointer_rtx);
9796
9797 /* If we haven't finished rtl generation, there might be a nonlocal label
9798 that we haven't seen yet.
9799 ??? get_hard_reg_initial_val fails if it is called while no_new_pseudos
9800 is set, unless it has been called before for the same register. And even
9801 then, we end in trouble if we didn't use the register in the same
9802 basic block before. So call get_hard_reg_initial_val now and wrap it
9803 in an unspec if we might need to replace it. */
9804 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
9805 combine can put the pseudo returned by get_hard_reg_initial_val into
9806 instructions that need a general purpose registers, which will fail to
9807 be recognized when the pseudo becomes allocated to PR. */
9808 val
9809 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9810 if (TARGET_SH1)
9811 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
9812 return val;
9813 }
9814
9815 int
9816 sh_expand_t_scc (enum rtx_code code, rtx target)
9817 {
9818 rtx result = target;
9819 HOST_WIDE_INT val;
9820
9821 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
9822 || GET_CODE (sh_compare_op1) != CONST_INT)
9823 return 0;
9824 if (GET_CODE (result) != REG)
9825 result = gen_reg_rtx (SImode);
9826 val = INTVAL (sh_compare_op1);
9827 if ((code == EQ && val == 1) || (code == NE && val == 0))
9828 emit_insn (gen_movt (result));
9829 else if ((code == EQ && val == 0) || (code == NE && val == 1))
9830 {
9831 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
9832 emit_insn (gen_subc (result, result, result));
9833 emit_insn (gen_addsi3 (result, result, const1_rtx));
9834 }
9835 else if (code == EQ || code == NE)
9836 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
9837 else
9838 return 0;
9839 if (result != target)
9840 emit_move_insn (target, result);
9841 return 1;
9842 }
9843
9844 /* INSN is an sfunc; return the rtx that describes the address used. */
9845 static rtx
9846 extract_sfunc_addr (rtx insn)
9847 {
9848 rtx pattern, part = NULL_RTX;
9849 int len, i;
9850
9851 pattern = PATTERN (insn);
9852 len = XVECLEN (pattern, 0);
9853 for (i = 0; i < len; i++)
9854 {
9855 part = XVECEXP (pattern, 0, i);
9856 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
9857 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
9858 return XEXP (part, 0);
9859 }
9860 if (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE)
9861 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
9862 abort ();
9863 }
9864
9865 /* Verify that the register in use_sfunc_addr still agrees with the address
9866 used in the sfunc. This prevents fill_slots_from_thread from changing
9867 use_sfunc_addr.
9868 INSN is the use_sfunc_addr instruction, and REG is the register it
9869 guards. */
9870 int
9871 check_use_sfunc_addr (rtx insn, rtx reg)
9872 {
9873 /* Search for the sfunc. It should really come right after INSN. */
9874 while ((insn = NEXT_INSN (insn)))
9875 {
9876 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
9877 break;
9878 if (! INSN_P (insn))
9879 continue;
9880
9881 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
9882 insn = XVECEXP (PATTERN (insn), 0, 0);
9883 if (GET_CODE (PATTERN (insn)) != PARALLEL
9884 || get_attr_type (insn) != TYPE_SFUNC)
9885 continue;
9886 return rtx_equal_p (extract_sfunc_addr (insn), reg);
9887 }
9888 abort ();
9889 }
9890
9891 /* Returns 1 if OP is a MEM that can be source of a simple move operation. */
9892
9893 int
9894 unaligned_load_operand (rtx op, enum machine_mode mode)
9895 {
9896 rtx inside;
9897
9898 if (GET_CODE (op) != MEM || GET_MODE (op) != mode)
9899 return 0;
9900
9901 inside = XEXP (op, 0);
9902
9903 if (GET_CODE (inside) == POST_INC)
9904 inside = XEXP (inside, 0);
9905
9906 if (GET_CODE (inside) == REG)
9907 return 1;
9908
9909 return 0;
9910 }
9911
9912 /* This function returns a constant rtx that represents pi / 2**15 in
9913 SFmode. it's used to scale SFmode angles, in radians, to a
9914 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
9915 maps to 0x10000). */
9916
9917 static GTY(()) rtx sh_fsca_sf2int_rtx;
9918
9919 rtx
9920 sh_fsca_sf2int (void)
9921 {
9922 if (! sh_fsca_sf2int_rtx)
9923 {
9924 REAL_VALUE_TYPE rv;
9925
9926 real_from_string (&rv, "10430.378350470453");
9927 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
9928 }
9929
9930 return sh_fsca_sf2int_rtx;
9931 }
9932
9933 /* This function returns a constant rtx that represents pi / 2**15 in
9934 DFmode. it's used to scale DFmode angles, in radians, to a
9935 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
9936 maps to 0x10000). */
9937
9938 static GTY(()) rtx sh_fsca_df2int_rtx;
9939
9940 rtx
9941 sh_fsca_df2int (void)
9942 {
9943 if (! sh_fsca_df2int_rtx)
9944 {
9945 REAL_VALUE_TYPE rv;
9946
9947 real_from_string (&rv, "10430.378350470453");
9948 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
9949 }
9950
9951 return sh_fsca_df2int_rtx;
9952 }
9953
9954 /* This function returns a constant rtx that represents 2**15 / pi in
9955 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
9956 of a full circle back to a SFmode value, i.e., 0x10000 maps to
9957 2*pi). */
9958
9959 static GTY(()) rtx sh_fsca_int2sf_rtx;
9960
9961 rtx
9962 sh_fsca_int2sf (void)
9963 {
9964 if (! sh_fsca_int2sf_rtx)
9965 {
9966 REAL_VALUE_TYPE rv;
9967
9968 real_from_string (&rv, "9.587379924285257e-5");
9969 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
9970 }
9971
9972 return sh_fsca_int2sf_rtx;
9973 }
9974 #include "gt-sh.h"