b3e949e04679eb638a8fe447b32b528c013d3b4c
[gcc.git] / gcc / config / sh / sh.c
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993-2016 Free Software Foundation, Inc.
3 Contributed by Steve Chamberlain (sac@cygnus.com).
4 Improved by Jim Wilson (wilson@cygnus.com).
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include <sstream>
23
24 #include "config.h"
25 #define INCLUDE_VECTOR
26 #include "system.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "target.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "gimple.h"
33 #include "cfghooks.h"
34 #include "df.h"
35 #include "tm_p.h"
36 #include "stringpool.h"
37 #include "optabs.h"
38 #include "emit-rtl.h"
39 #include "recog.h"
40 #include "diagnostic-core.h"
41 #include "alias.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
44 #include "calls.h"
45 #include "varasm.h"
46 #include "flags.h"
47 #include "explow.h"
48 #include "expr.h"
49 #include "reload.h"
50 #include "output.h"
51 #include "insn-attr.h"
52 #include "dwarf2.h"
53 #include "langhooks.h"
54 #include "cfgrtl.h"
55 #include "intl.h"
56 #include "sched-int.h"
57 #include "gimplify.h"
58 #include "tm-constrs.h"
59 #include "opts.h"
60 #include "tree-pass.h"
61 #include "context.h"
62 #include "builtins.h"
63 #include "rtl-iter.h"
64
65 /* This file should be included last. */
66 #include "target-def.h"
67
68 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
69
70 #define CONST_OK_FOR_ADD(size) CONST_OK_FOR_I08 (size)
71 #define GEN_MOV (*(gen_movsi))
72 #define GEN_ADD3 (*(gen_addsi3))
73 #define GEN_SUB3 (*(gen_subsi3))
74
75 /* Used to simplify the logic below. Find the attributes wherever
76 they may be. */
77 #define SH_ATTRIBUTES(decl) \
78 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
79 : DECL_ATTRIBUTES (decl) \
80 ? (DECL_ATTRIBUTES (decl)) \
81 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
82
83 /* Set to true by expand_prologue() when the function is an
84 interrupt handler. */
85 bool current_function_interrupt;
86
87 tree sh_deferred_function_attributes;
88 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
89
90 /* Global variables for machine-dependent things. */
91
92 /* Which cpu are we scheduling for. */
93 enum processor_type sh_cpu;
94
95 /* Definitions used in ready queue reordering for first scheduling pass. */
96
97 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
98 static short *regmode_weight[2];
99
100 /* Total SFmode and SImode weights of scheduled insns. */
101 static int curr_regmode_pressure[2];
102
103 /* Number of r0 life regions. */
104 static int r0_life_regions;
105
106 /* If true, skip cycles for Q -> R movement. */
107 static int skip_cycles = 0;
108
109 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
110 and returned from sh_reorder2. */
111 static short cached_can_issue_more;
112
113 /* Unique number for UNSPEC_BBR pattern. */
114 static unsigned int unspec_bbr_uid = 1;
115
116 /* Provides the class number of the smallest class containing
117 reg number. */
118 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
119 {
120 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
153 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
154 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
155 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
156 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
157 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
158 GENERAL_REGS, GENERAL_REGS,
159 };
160
161 char sh_register_names[FIRST_PSEUDO_REGISTER] \
162 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
163
164 char sh_additional_register_names[ADDREGNAMES_SIZE] \
165 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
166 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
167
168 int assembler_dialect;
169
170 static void split_branches (rtx_insn *);
171 static int branch_dest (rtx);
172 static void print_slot (rtx_sequence *);
173 static rtx_code_label *add_constant (rtx, machine_mode, rtx);
174 static void dump_table (rtx_insn *, rtx_insn *);
175 static bool broken_move (rtx_insn *);
176 static bool mova_p (rtx_insn *);
177 static rtx_insn *find_barrier (int, rtx_insn *, rtx_insn *);
178 static bool noncall_uses_reg (rtx, rtx_insn *, rtx *);
179 static rtx_insn *gen_block_redirect (rtx_insn *, int, int);
180 static void sh_reorg (void);
181 static void sh_option_override (void);
182 static void sh_override_options_after_change (void);
183 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
184 static rtx_insn* emit_frame_insn (rtx);
185 static rtx push (int);
186 static void pop (int);
187 static void push_regs (HARD_REG_SET* mask, bool interrupt_handler);
188 static int calc_live_regs (HARD_REG_SET *);
189 static HOST_WIDE_INT rounded_frame_size (int);
190 static bool sh_frame_pointer_required (void);
191 static void sh_emit_mode_set (int, int, int, HARD_REG_SET);
192 static int sh_mode_needed (int, rtx_insn *);
193 static int sh_mode_after (int, int, rtx_insn *);
194 static int sh_mode_entry (int);
195 static int sh_mode_exit (int);
196 static int sh_mode_priority (int entity, int n);
197
198 static rtx mark_constant_pool_use (rtx);
199 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree,
200 int, bool *);
201 static tree sh_handle_resbank_handler_attribute (tree *, tree,
202 tree, int, bool *);
203 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
204 tree, int, bool *);
205 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
206 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
207 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
208 static void sh_print_operand (FILE *, rtx, int);
209 static void sh_print_operand_address (FILE *, machine_mode, rtx);
210 static bool sh_print_operand_punct_valid_p (unsigned char code);
211 static bool sh_asm_output_addr_const_extra (FILE *file, rtx x);
212 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
213 static void sh_insert_attributes (tree, tree *);
214 static const char *sh_check_pch_target_flags (int);
215 static int sh_register_move_cost (machine_mode, reg_class_t, reg_class_t);
216 static int sh_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
217 static int sh_issue_rate (void);
218 static int sh_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *sort_p);
219 static short find_set_regmode_weight (rtx, machine_mode);
220 static short find_insn_regmode_weight (rtx, machine_mode);
221 static void find_regmode_weight (basic_block, machine_mode);
222 static int find_r0_life_regions (basic_block);
223 static void sh_md_init_global (FILE *, int, int);
224 static void sh_md_finish_global (FILE *, int);
225 static int rank_for_reorder (const void *, const void *);
226 static void swap_reorder (rtx_insn **, int);
227 static void ready_reorder (rtx_insn **, int);
228 static bool high_pressure (machine_mode);
229 static int sh_reorder (FILE *, int, rtx_insn **, int *, int);
230 static int sh_reorder2 (FILE *, int, rtx_insn **, int *, int);
231 static void sh_md_init (FILE *, int, int);
232 static int sh_variable_issue (FILE *, int, rtx_insn *, int);
233
234 static bool sh_function_ok_for_sibcall (tree, tree);
235
236 static bool sh_can_follow_jump (const rtx_insn *, const rtx_insn *);
237 static bool sh_ms_bitfield_layout_p (const_tree);
238
239 static void sh_init_builtins (void);
240 static tree sh_builtin_decl (unsigned, bool);
241 static rtx sh_expand_builtin (tree, rtx, rtx, machine_mode, int);
242 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
243 HOST_WIDE_INT, tree);
244 static void sh_file_start (void);
245 static bool sh_assemble_integer (rtx, unsigned int, int);
246 static bool flow_dependent_p (rtx, rtx);
247 static void flow_dependent_p_1 (rtx, const_rtx, void *);
248 static int shiftcosts (rtx);
249 static int and_xor_ior_costs (rtx, int);
250 static int addsubcosts (rtx);
251 static int multcosts (rtx);
252 static bool unspec_caller_rtx_p (rtx);
253 static bool sh_cannot_copy_insn_p (rtx_insn *);
254 static bool sh_cannot_force_const_mem_p (machine_mode, rtx);
255 static bool sh_rtx_costs (rtx, machine_mode, int, int, int *, bool);
256 static int sh_address_cost (rtx, machine_mode, addr_space_t, bool);
257 static int sh_pr_n_sets (void);
258 static rtx sh_allocate_initial_value (rtx);
259 static reg_class_t sh_preferred_reload_class (rtx, reg_class_t);
260 static reg_class_t sh_secondary_reload (bool, rtx, reg_class_t,
261 machine_mode,
262 struct secondary_reload_info *);
263 static bool sh_legitimate_address_p (machine_mode, rtx, bool);
264 static rtx sh_legitimize_address (rtx, rtx, machine_mode);
265 static rtx sh_delegitimize_address (rtx);
266 static bool sh_cannot_substitute_mem_equiv_p (rtx);
267 static bool sh_legitimize_address_displacement (rtx *, rtx *, machine_mode);
268 static int scavenge_reg (HARD_REG_SET *s);
269
270 static rtx sh_struct_value_rtx (tree, int);
271 static rtx sh_function_value (const_tree, const_tree, bool);
272 static bool sh_function_value_regno_p (const unsigned int);
273 static rtx sh_libcall_value (machine_mode, const_rtx);
274 static bool sh_return_in_memory (const_tree, const_tree);
275 static rtx sh_builtin_saveregs (void);
276 static void sh_setup_incoming_varargs (cumulative_args_t, machine_mode,
277 tree, int *, int);
278 static bool sh_strict_argument_naming (cumulative_args_t);
279 static bool sh_pretend_outgoing_varargs_named (cumulative_args_t);
280 static void sh_atomic_assign_expand_fenv (tree *, tree *, tree *);
281 static tree sh_build_builtin_va_list (void);
282 static void sh_va_start (tree, rtx);
283 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
284 static bool sh_promote_prototypes (const_tree);
285 static machine_mode sh_promote_function_mode (const_tree type,
286 machine_mode,
287 int *punsignedp,
288 const_tree funtype,
289 int for_return);
290 static bool sh_pass_by_reference (cumulative_args_t, machine_mode,
291 const_tree, bool);
292 static bool sh_callee_copies (cumulative_args_t, machine_mode,
293 const_tree, bool);
294 static int sh_arg_partial_bytes (cumulative_args_t, machine_mode,
295 tree, bool);
296 static void sh_function_arg_advance (cumulative_args_t, machine_mode,
297 const_tree, bool);
298 static rtx sh_function_arg (cumulative_args_t, machine_mode,
299 const_tree, bool);
300 static int sh_dwarf_calling_convention (const_tree);
301 static void sh_encode_section_info (tree, rtx, int);
302 static bool sh2a_function_vector_p (tree);
303 static void sh_trampoline_init (rtx, tree, rtx);
304 static rtx sh_trampoline_adjust_address (rtx);
305 static void sh_conditional_register_usage (void);
306 static bool sh_legitimate_constant_p (machine_mode, rtx);
307 static int mov_insn_size (machine_mode, bool);
308 static int mov_insn_alignment_mask (machine_mode, bool);
309 static bool sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
310 unsigned int,
311 enum by_pieces_operation,
312 bool);
313 static bool sequence_insn_p (rtx_insn *);
314 static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool);
315 static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&,
316 machine_mode, bool);
317 static bool sh_legitimate_combined_insn (rtx_insn* insn);
318
319 static bool sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2);
320
321 static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED;
322 \f
323 static const struct attribute_spec sh_attribute_table[] =
324 {
325 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
326 affects_type_identity } */
327 { "interrupt_handler", 0, 0, true, false, false,
328 sh_handle_interrupt_handler_attribute, false },
329 { "sp_switch", 1, 1, true, false, false,
330 sh_handle_sp_switch_attribute, false },
331 { "trap_exit", 1, 1, true, false, false,
332 sh_handle_trap_exit_attribute, false },
333 { "renesas", 0, 0, false, true, false,
334 sh_handle_renesas_attribute, false },
335 { "trapa_handler", 0, 0, true, false, false,
336 sh_handle_interrupt_handler_attribute, false },
337 { "nosave_low_regs", 0, 0, true, false, false,
338 sh_handle_interrupt_handler_attribute, false },
339 { "resbank", 0, 0, true, false, false,
340 sh_handle_resbank_handler_attribute, false },
341 { "function_vector", 1, 1, true, false, false,
342 sh2a_handle_function_vector_handler_attribute, false },
343 { NULL, 0, 0, false, false, false, NULL, false }
344 };
345 \f
346 /* Initialize the GCC target structure. */
347 #undef TARGET_ATTRIBUTE_TABLE
348 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
349
350 /* The next two are used for debug info when compiling with -gdwarf. */
351 #undef TARGET_ASM_UNALIGNED_HI_OP
352 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
353 #undef TARGET_ASM_UNALIGNED_SI_OP
354 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
355
356 #undef TARGET_OPTION_OVERRIDE
357 #define TARGET_OPTION_OVERRIDE sh_option_override
358
359 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
360 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
361 sh_override_options_after_change
362
363 #undef TARGET_PRINT_OPERAND
364 #define TARGET_PRINT_OPERAND sh_print_operand
365 #undef TARGET_PRINT_OPERAND_ADDRESS
366 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
367 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
368 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
369 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
370 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra
371
372 #undef TARGET_ASM_FUNCTION_EPILOGUE
373 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
374
375 #undef TARGET_ASM_OUTPUT_MI_THUNK
376 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
377
378 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
379 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
380 hook_bool_const_tree_hwi_hwi_const_tree_true
381
382 #undef TARGET_ASM_FILE_START
383 #define TARGET_ASM_FILE_START sh_file_start
384 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
385 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
386
387 #undef TARGET_ASM_INTEGER
388 #define TARGET_ASM_INTEGER sh_assemble_integer
389
390 #undef TARGET_REGISTER_MOVE_COST
391 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
392
393 #undef TARGET_INSERT_ATTRIBUTES
394 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
395
396 #undef TARGET_SCHED_ADJUST_COST
397 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
398
399 #undef TARGET_SCHED_ISSUE_RATE
400 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
401
402 /* The next 5 hooks have been implemented for reenabling sched1. With the
403 help of these macros we are limiting the movement of insns in sched1 to
404 reduce the register pressure. The overall idea is to keep count of SImode
405 and SFmode regs required by already scheduled insns. When these counts
406 cross some threshold values; give priority to insns that free registers.
407 The insn that frees registers is most likely to be the insn with lowest
408 LUID (original insn order); but such an insn might be there in the stalled
409 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
410 up to a max of 8 cycles so that such insns may move from Q -> R.
411
412 The description of the hooks are as below:
413
414 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
415 scheduler; it is called inside the sched_init function just after
416 find_insn_reg_weights function call. It is used to calculate the SImode
417 and SFmode weights of insns of basic blocks; much similar to what
418 find_insn_reg_weights does.
419 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
420
421 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
422 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
423 (Q)->(R).
424
425 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
426 high; reorder the ready queue so that the insn with lowest LUID will be
427 issued next.
428
429 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
430 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
431
432 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
433 can be returned from TARGET_SCHED_REORDER2.
434
435 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
436
437 #undef TARGET_SCHED_DFA_NEW_CYCLE
438 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
439
440 #undef TARGET_SCHED_INIT_GLOBAL
441 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
442
443 #undef TARGET_SCHED_FINISH_GLOBAL
444 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
445
446 #undef TARGET_SCHED_VARIABLE_ISSUE
447 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
448
449 #undef TARGET_SCHED_REORDER
450 #define TARGET_SCHED_REORDER sh_reorder
451
452 #undef TARGET_SCHED_REORDER2
453 #define TARGET_SCHED_REORDER2 sh_reorder2
454
455 #undef TARGET_SCHED_INIT
456 #define TARGET_SCHED_INIT sh_md_init
457
458 #undef TARGET_DELEGITIMIZE_ADDRESS
459 #define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address
460
461 #undef TARGET_LEGITIMIZE_ADDRESS
462 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
463
464 #undef TARGET_CAN_FOLLOW_JUMP
465 #define TARGET_CAN_FOLLOW_JUMP sh_can_follow_jump
466
467 #undef TARGET_MS_BITFIELD_LAYOUT_P
468 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
469
470 #undef TARGET_INIT_BUILTINS
471 #define TARGET_INIT_BUILTINS sh_init_builtins
472 #undef TARGET_BUILTIN_DECL
473 #define TARGET_BUILTIN_DECL sh_builtin_decl
474 #undef TARGET_EXPAND_BUILTIN
475 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
476
477 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
478 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
479
480 #undef TARGET_CANNOT_COPY_INSN_P
481 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
482 #undef TARGET_RTX_COSTS
483 #define TARGET_RTX_COSTS sh_rtx_costs
484 #undef TARGET_ADDRESS_COST
485 #define TARGET_ADDRESS_COST sh_address_cost
486 #undef TARGET_ALLOCATE_INITIAL_VALUE
487 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
488
489 #undef TARGET_MACHINE_DEPENDENT_REORG
490 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
491
492 #undef TARGET_DWARF_REGISTER_SPAN
493 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
494
495 #ifdef HAVE_AS_TLS
496 #undef TARGET_HAVE_TLS
497 #define TARGET_HAVE_TLS true
498 #endif
499
500 #undef TARGET_PROMOTE_PROTOTYPES
501 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
502 #undef TARGET_PROMOTE_FUNCTION_MODE
503 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
504
505 #undef TARGET_FUNCTION_VALUE
506 #define TARGET_FUNCTION_VALUE sh_function_value
507 #undef TARGET_FUNCTION_VALUE_REGNO_P
508 #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
509 #undef TARGET_LIBCALL_VALUE
510 #define TARGET_LIBCALL_VALUE sh_libcall_value
511 #undef TARGET_STRUCT_VALUE_RTX
512 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
513 #undef TARGET_RETURN_IN_MEMORY
514 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
515
516 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
517 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
518 #undef TARGET_SETUP_INCOMING_VARARGS
519 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
520 #undef TARGET_STRICT_ARGUMENT_NAMING
521 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
522 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
523 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
524 #undef TARGET_MUST_PASS_IN_STACK
525 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
526 #undef TARGET_PASS_BY_REFERENCE
527 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
528 #undef TARGET_CALLEE_COPIES
529 #define TARGET_CALLEE_COPIES sh_callee_copies
530 #undef TARGET_ARG_PARTIAL_BYTES
531 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
532 #undef TARGET_FUNCTION_ARG
533 #define TARGET_FUNCTION_ARG sh_function_arg
534 #undef TARGET_FUNCTION_ARG_ADVANCE
535 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
536
537 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
538 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sh_atomic_assign_expand_fenv
539
540 #undef TARGET_BUILD_BUILTIN_VA_LIST
541 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
542 #undef TARGET_EXPAND_BUILTIN_VA_START
543 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
544 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
545 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
546
547 #undef TARGET_VECTOR_MODE_SUPPORTED_P
548 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
549
550 #undef TARGET_CHECK_PCH_TARGET_FLAGS
551 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
552
553 #undef TARGET_DWARF_CALLING_CONVENTION
554 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
555
556 #undef TARGET_FRAME_POINTER_REQUIRED
557 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
558
559 #undef TARGET_MODE_EMIT
560 #define TARGET_MODE_EMIT sh_emit_mode_set
561
562 #undef TARGET_MODE_NEEDED
563 #define TARGET_MODE_NEEDED sh_mode_needed
564
565 #undef TARGET_MODE_AFTER
566 #define TARGET_MODE_AFTER sh_mode_after
567
568 #undef TARGET_MODE_ENTRY
569 #define TARGET_MODE_ENTRY sh_mode_entry
570
571 #undef TARGET_MODE_EXIT
572 #define TARGET_MODE_EXIT sh_mode_exit
573
574 #undef TARGET_MODE_PRIORITY
575 #define TARGET_MODE_PRIORITY sh_mode_priority
576
577 /* Return regmode weight for insn. */
578 #define INSN_REGMODE_WEIGHT(INSN, MODE)\
579 regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
580
581 /* Return current register pressure for regmode. */
582 #define CURR_REGMODE_PRESSURE(MODE)\
583 curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
584
585 #undef TARGET_ENCODE_SECTION_INFO
586 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
587
588 #undef TARGET_LRA_P
589 #define TARGET_LRA_P sh_lra_p
590
591 #undef TARGET_SECONDARY_RELOAD
592 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
593
594 #undef TARGET_PREFERRED_RELOAD_CLASS
595 #define TARGET_PREFERRED_RELOAD_CLASS sh_preferred_reload_class
596
597 #undef TARGET_CONDITIONAL_REGISTER_USAGE
598 #define TARGET_CONDITIONAL_REGISTER_USAGE sh_conditional_register_usage
599
600 #undef TARGET_LEGITIMATE_ADDRESS_P
601 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
602
603 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
604 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P sh_cannot_substitute_mem_equiv_p
605
606 #undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
607 #define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
608 sh_legitimize_address_displacement
609
610 #undef TARGET_TRAMPOLINE_INIT
611 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
612 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
613 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
614
615 #undef TARGET_LEGITIMATE_CONSTANT_P
616 #define TARGET_LEGITIMATE_CONSTANT_P sh_legitimate_constant_p
617
618 #undef TARGET_CANONICALIZE_COMPARISON
619 #define TARGET_CANONICALIZE_COMPARISON sh_canonicalize_comparison
620
621 #undef TARGET_LEGITIMATE_COMBINED_INSN
622 #define TARGET_LEGITIMATE_COMBINED_INSN sh_legitimate_combined_insn
623
624 #undef TARGET_FIXED_CONDITION_CODE_REGS
625 #define TARGET_FIXED_CONDITION_CODE_REGS sh_fixed_condition_code_regs
626
627 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
628 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
629 sh_use_by_pieces_infrastructure_p
630
631 /* Machine-specific symbol_ref flags. */
632 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
633
634 /* The tas.b instruction sets the 7th bit in the byte, i.e. 0x80. This value
635 is used by optabs.c atomic op expansion code as well as in sync.md. */
636 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
637 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0x80
638
639 #undef TARGET_CANNOT_FORCE_CONST_MEM
640 #define TARGET_CANNOT_FORCE_CONST_MEM sh_cannot_force_const_mem_p
641
642 struct gcc_target targetm = TARGET_INITIALIZER;
643 \f
644
645 /* Information on the currently selected atomic model.
646 This is initialized in sh_option_override. */
647 static sh_atomic_model selected_atomic_model_;
648
649 const sh_atomic_model&
650 selected_atomic_model (void)
651 {
652 return selected_atomic_model_;
653 }
654
655 static sh_atomic_model
656 parse_validate_atomic_model_option (const char* str)
657 {
658 const char* model_names[sh_atomic_model::num_models];
659 model_names[sh_atomic_model::none] = "none";
660 model_names[sh_atomic_model::soft_gusa] = "soft-gusa";
661 model_names[sh_atomic_model::hard_llcs] = "hard-llcs";
662 model_names[sh_atomic_model::soft_tcb] = "soft-tcb";
663 model_names[sh_atomic_model::soft_imask] = "soft-imask";
664
665 const char* model_cdef_names[sh_atomic_model::num_models];
666 model_cdef_names[sh_atomic_model::none] = "NONE";
667 model_cdef_names[sh_atomic_model::soft_gusa] = "SOFT_GUSA";
668 model_cdef_names[sh_atomic_model::hard_llcs] = "HARD_LLCS";
669 model_cdef_names[sh_atomic_model::soft_tcb] = "SOFT_TCB";
670 model_cdef_names[sh_atomic_model::soft_imask] = "SOFT_IMASK";
671
672 sh_atomic_model ret;
673 ret.type = sh_atomic_model::none;
674 ret.name = model_names[sh_atomic_model::none];
675 ret.cdef_name = model_cdef_names[sh_atomic_model::none];
676 ret.strict = false;
677 ret.tcb_gbr_offset = -1;
678
679 /* Handle empty string as 'none'. */
680 if (str == NULL || *str == '\0')
681 return ret;
682
683 #define err_ret(...) do { error (__VA_ARGS__); return ret; } while (0)
684
685 std::vector<std::string> tokens;
686 for (std::stringstream ss (str); ss.good (); )
687 {
688 tokens.push_back (std::string ());
689 std::getline (ss, tokens.back (), ',');
690 }
691
692 if (tokens.empty ())
693 err_ret ("invalid atomic model option");
694
695 /* The first token must be the atomic model name. */
696 {
697 for (size_t i = 0; i < sh_atomic_model::num_models; ++i)
698 if (tokens.front () == model_names[i])
699 {
700 ret.type = (sh_atomic_model::enum_type)i;
701 ret.name = model_names[i];
702 ret.cdef_name = model_cdef_names[i];
703 goto got_mode_name;
704 }
705
706 err_ret ("invalid atomic model name \"%s\"", tokens.front ().c_str ());
707 got_mode_name:;
708 }
709
710 /* Go through the remaining tokens. */
711 for (size_t i = 1; i < tokens.size (); ++i)
712 {
713 if (tokens[i] == "strict")
714 ret.strict = true;
715 else if (tokens[i].find ("gbr-offset=") == 0)
716 {
717 std::string offset_str = tokens[i].substr (strlen ("gbr-offset="));
718 ret.tcb_gbr_offset = integral_argument (offset_str.c_str ());
719 if (offset_str.empty () || ret.tcb_gbr_offset == -1)
720 err_ret ("could not parse gbr-offset value \"%s\" in atomic model "
721 "option", offset_str.c_str ());
722 }
723 else
724 err_ret ("unknown parameter \"%s\" in atomic model option",
725 tokens[i].c_str ());
726 }
727
728 /* Check that the selection makes sense. */
729 if (ret.type == sh_atomic_model::soft_gusa && !TARGET_SH3)
730 err_ret ("atomic model %s is only available on SH3 and SH4 targets",
731 ret.name);
732
733 if (ret.type == sh_atomic_model::hard_llcs && !TARGET_SH4A)
734 err_ret ("atomic model %s is only available on SH4A targets", ret.name);
735
736 if (ret.type == sh_atomic_model::soft_tcb && ret.tcb_gbr_offset == -1)
737 err_ret ("atomic model %s requires gbr-offset parameter", ret.name);
738
739 if (ret.type == sh_atomic_model::soft_tcb
740 && (ret.tcb_gbr_offset < 0 || ret.tcb_gbr_offset > 1020
741 || (ret.tcb_gbr_offset & 3) != 0))
742 err_ret ("invalid gbr-offset value \"%d\" for atomic model %s; it must be "
743 "a multiple of 4 in the range 0-1020", ret.tcb_gbr_offset,
744 ret.name);
745
746 if (ret.type == sh_atomic_model::soft_imask && TARGET_USERMODE)
747 err_ret ("cannot use atomic model %s in user mode", ret.name);
748
749 return ret;
750
751 #undef err_ret
752 }
753
754 /* Register SH specific RTL passes. */
755 extern opt_pass* make_pass_sh_treg_combine (gcc::context* ctx, bool split_insns,
756 const char* name);
757 extern opt_pass* make_pass_sh_optimize_sett_clrt (gcc::context* ctx,
758 const char* name);
759 static void
760 register_sh_passes (void)
761 {
762 /* Running the sh_treg_combine pass after ce1 generates better code when
763 comparisons are combined and reg-reg moves are introduced, because
764 reg-reg moves will be eliminated afterwards. However, there are quite
765 some cases where combine will be unable to fold comparison related insns,
766 thus for now don't do it.
767 register_pass (make_pass_sh_treg_combine (g, false, "sh_treg_combine1"),
768 PASS_POS_INSERT_AFTER, "ce1", 1);
769 */
770
771 /* Run sh_treg_combine pass after combine but before register allocation. */
772 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine2"),
773 PASS_POS_INSERT_AFTER, "split1", 1);
774
775 /* Run sh_treg_combine pass after register allocation and basic block
776 reordering as this sometimes creates new opportunities. */
777 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine3"),
778 PASS_POS_INSERT_AFTER, "split4", 1);
779
780 /* Optimize sett and clrt insns, by e.g. removing them if the T bit value
781 is known after a conditional branch.
782 This must be done after basic blocks and branch conditions have
783 stabilized and won't be changed by further passes. */
784 register_pass (make_pass_sh_optimize_sett_clrt (g, "sh_optimize_sett_clrt"),
785 PASS_POS_INSERT_BEFORE, "sched2", 1);
786 }
787
788 /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override
789 various options, and do some machine dependent initialization. */
790 static void
791 sh_option_override (void)
792 {
793 int regno;
794
795 SUBTARGET_OVERRIDE_OPTIONS;
796
797 sh_cpu = PROCESSOR_SH1;
798 assembler_dialect = 0;
799 if (TARGET_SH2)
800 sh_cpu = PROCESSOR_SH2;
801 if (TARGET_SH2E)
802 sh_cpu = PROCESSOR_SH2E;
803 if (TARGET_SH2A)
804 sh_cpu = PROCESSOR_SH2A;
805 if (TARGET_SH3)
806 sh_cpu = PROCESSOR_SH3;
807 if (TARGET_SH3E)
808 sh_cpu = PROCESSOR_SH3E;
809 if (TARGET_SH4)
810 {
811 assembler_dialect = 1;
812 sh_cpu = PROCESSOR_SH4;
813 }
814 if (TARGET_SH4A)
815 {
816 assembler_dialect = 1;
817 sh_cpu = PROCESSOR_SH4A;
818 }
819
820 /* User/priviledged mode is supported only on SH3* and SH4*.
821 Disable it for everything else. */
822 if (!TARGET_SH3 && TARGET_USERMODE)
823 TARGET_USERMODE = false;
824
825 if (! strcmp (sh_div_str, "call-div1"))
826 sh_div_strategy = SH_DIV_CALL_DIV1;
827 else if (! strcmp (sh_div_str, "call-fp") && TARGET_FPU_ANY)
828 sh_div_strategy = SH_DIV_CALL_FP;
829 else if (! strcmp (sh_div_str, "call-table") && TARGET_DYNSHIFT)
830 sh_div_strategy = SH_DIV_CALL_TABLE;
831 else
832 {
833 /* Pick one that makes most sense for the target in general.
834 It is not much good to use different functions depending on -Os,
835 since then we'll end up with two different functions when some of
836 the code is compiled for size, and some for speed. */
837
838 /* SH4 tends to emphasize speed. */
839 if (TARGET_HARD_SH4)
840 sh_div_strategy = SH_DIV_CALL_TABLE;
841 /* These have their own way of doing things. */
842 else if (TARGET_SH2A)
843 sh_div_strategy = SH_DIV_INTRINSIC;
844 /* SH1 .. SH3 cores often go into small-footprint systems, so
845 default to the smallest implementation available. */
846 else
847 sh_div_strategy = SH_DIV_CALL_DIV1;
848 }
849
850 if (sh_divsi3_libfunc[0])
851 ; /* User supplied - leave it alone. */
852 else if (TARGET_DIVIDE_CALL_FP)
853 sh_divsi3_libfunc = "__sdivsi3_i4";
854 else if (TARGET_DIVIDE_CALL_TABLE)
855 sh_divsi3_libfunc = "__sdivsi3_i4i";
856 else
857 sh_divsi3_libfunc = "__sdivsi3";
858
859 if (sh_branch_cost == -1)
860 {
861 /* The SH1 does not have delay slots, hence we get a pipeline stall
862 at every branch. The SH4 is superscalar, so the single delay slot
863 is not sufficient to keep both pipelines filled.
864 In any case, set the default branch cost to '2', as it results in
865 slightly overall smaller code and also enables some if conversions
866 that are required for matching special T bit related insns. */
867 sh_branch_cost = 2;
868 }
869
870 /* Set -mzdcbranch for SH4 / SH4A if not otherwise specified by the user. */
871 if (! global_options_set.x_TARGET_ZDCBRANCH && TARGET_HARD_SH4)
872 TARGET_ZDCBRANCH = 1;
873
874 /* FDPIC code is a special form of PIC, and the vast majority of code
875 generation constraints that apply to PIC also apply to FDPIC, so we
876 set flag_pic to avoid the need to check TARGET_FDPIC everywhere
877 flag_pic is checked. */
878 if (TARGET_FDPIC && !flag_pic)
879 flag_pic = 2;
880
881 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
882 if (! VALID_REGISTER_P (regno))
883 sh_register_names[regno][0] = '\0';
884
885 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
886 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
887 sh_additional_register_names[regno][0] = '\0';
888
889 if (flag_pic && ! TARGET_PREFERGOT)
890 flag_no_function_cse = 1;
891
892 if (targetm.small_register_classes_for_mode_p (VOIDmode))
893 {
894 /* Never run scheduling before reload, since that can
895 break global alloc, and generates slower code anyway due
896 to the pressure on R0. */
897 /* Enable sched1 for SH4 if the user explicitly requests.
898 When sched1 is enabled, the ready queue will be reordered by
899 the target hooks if pressure is high. We can not do this for
900 PIC, SH3 and lower as they give spill failures for R0. */
901 if (!TARGET_HARD_SH4 || flag_pic)
902 flag_schedule_insns = 0;
903 /* ??? Current exception handling places basic block boundaries
904 after call_insns. It causes the high pressure on R0 and gives
905 spill failures for R0 in reload. See PR 22553 and the thread
906 on gcc-patches
907 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
908 else if (flag_exceptions)
909 {
910 if (flag_schedule_insns && global_options_set.x_flag_schedule_insns)
911 warning (0, "ignoring -fschedule-insns because of exception "
912 "handling bug");
913 flag_schedule_insns = 0;
914 }
915 else if (flag_schedule_insns
916 && !global_options_set.x_flag_schedule_insns)
917 flag_schedule_insns = 0;
918 }
919
920 /* Unwind info is not correct around the CFG unless either a frame
921 pointer is present or M_A_O_A is set. Fixing this requires rewriting
922 unwind info generation to be aware of the CFG and propagating states
923 around edges. */
924 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
925 || flag_exceptions || flag_non_call_exceptions)
926 && flag_omit_frame_pointer && !TARGET_ACCUMULATE_OUTGOING_ARGS)
927 {
928 warning (0, "unwind tables currently require either a frame pointer "
929 "or -maccumulate-outgoing-args for correctness");
930 TARGET_ACCUMULATE_OUTGOING_ARGS = 1;
931 }
932
933 if (flag_unsafe_math_optimizations)
934 {
935 /* Enable fsca insn for SH4A if not otherwise specified by the user. */
936 if (global_options_set.x_TARGET_FSCA == 0 && TARGET_SH4A_FP)
937 TARGET_FSCA = 1;
938
939 /* Enable fsrra insn for SH4A if not otherwise specified by the user. */
940 if (global_options_set.x_TARGET_FSRRA == 0 && TARGET_SH4A_FP)
941 TARGET_FSRRA = 1;
942 }
943
944 /* Allow fsrra insn only if -funsafe-math-optimizations and
945 -ffinite-math-only is enabled. */
946 TARGET_FSRRA = TARGET_FSRRA
947 && flag_unsafe_math_optimizations
948 && flag_finite_math_only;
949
950 /* If the -mieee option was not explicitly set by the user, turn it on
951 unless -ffinite-math-only was specified. See also PR 33135. */
952 if (! global_options_set.x_TARGET_IEEE)
953 TARGET_IEEE = ! flag_finite_math_only;
954
955 if (sh_fixed_range_str)
956 sh_fix_range (sh_fixed_range_str);
957
958 /* This target defaults to strict volatile bitfields. */
959 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2))
960 flag_strict_volatile_bitfields = 1;
961
962 sh_override_options_after_change ();
963
964 /* Parse atomic model option and make sure it is valid for the current
965 target CPU. */
966 selected_atomic_model_
967 = parse_validate_atomic_model_option (sh_atomic_model_str);
968
969 register_sh_passes ();
970 }
971
972 /* Implement targetm.override_options_after_change. */
973
974 static void
975 sh_override_options_after_change (void)
976 {
977 /* Adjust loop, jump and function alignment values (in bytes), if those
978 were not specified by the user using -falign-loops, -falign-jumps
979 and -falign-functions options.
980 32 bit alignment is better for speed, because instructions can be
981 fetched as a pair from a longword boundary. For size use 16 bit
982 alignment to get more compact code.
983 Aligning all jumps increases the code size, even if it might
984 result in slightly faster code. Thus, it is set to the smallest
985 alignment possible if not specified by the user. */
986 if (align_loops == 0)
987 align_loops = optimize_size ? 2 : 4;
988
989 if (align_jumps == 0)
990 align_jumps = 2;
991 else if (align_jumps < 2)
992 align_jumps = 2;
993
994 if (align_functions == 0)
995 align_functions = optimize_size ? 2 : 4;
996
997 /* The linker relaxation code breaks when a function contains
998 alignments that are larger than that at the start of a
999 compilation unit. */
1000 if (TARGET_RELAX)
1001 {
1002 int min_align = align_loops > align_jumps ? align_loops : align_jumps;
1003
1004 /* Also take possible .long constants / mova tables into account. */
1005 if (min_align < 4)
1006 min_align = 4;
1007 if (align_functions < min_align)
1008 align_functions = min_align;
1009 }
1010 }
1011 \f
1012 /* Print the operand address in x to the stream. */
1013 static void
1014 sh_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x)
1015 {
1016 switch (GET_CODE (x))
1017 {
1018 case REG:
1019 case SUBREG:
1020 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
1021 break;
1022
1023 case PLUS:
1024 {
1025 rtx base = XEXP (x, 0);
1026 rtx index = XEXP (x, 1);
1027
1028 switch (GET_CODE (index))
1029 {
1030 case CONST_INT:
1031 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
1032 reg_names[true_regnum (base)]);
1033 break;
1034
1035 case REG:
1036 case SUBREG:
1037 {
1038 int base_num = true_regnum (base);
1039 int index_num = true_regnum (index);
1040
1041 /* If base or index is R0, make sure that it comes first.
1042 Usually one of them will be R0, but the order might be wrong.
1043 If neither base nor index are R0 it's an error and we just
1044 pass it on to the assembler. This avoids silent wrong code
1045 bugs. */
1046 if (base_num == 0 && index_num != 0)
1047 std::swap (base_num, index_num);
1048
1049 fprintf (stream, "@(%s,%s)", reg_names[index_num],
1050 reg_names[base_num]);
1051 break;
1052 }
1053
1054 default:
1055 gcc_unreachable ();
1056 }
1057 }
1058 break;
1059
1060 case PRE_DEC:
1061 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1062 break;
1063
1064 case POST_INC:
1065 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1066 break;
1067
1068 default:
1069 x = mark_constant_pool_use (x);
1070 output_addr_const (stream, x);
1071 break;
1072 }
1073 }
1074
1075 /* Print operand x (an rtx) in assembler syntax to file stream
1076 according to modifier code.
1077
1078 '.' print a .s if insn needs delay slot
1079 ',' print LOCAL_LABEL_PREFIX
1080 '@' print trap, rte or rts depending upon pragma interruptness
1081 '#' output a nop if there is nothing to put in the delay slot
1082 ''' print likelihood suffix (/u for unlikely).
1083 '>' print branch target if -fverbose-asm
1084 'O' print a constant without the #
1085 'R' print the LSW of a dp value - changes if in little endian
1086 'S' print the MSW of a dp value - changes if in little endian
1087 'T' print the next word of a dp value - same as 'R' in big endian mode.
1088 'M' print .b / .w / .l / .s / .d suffix if operand is a MEM.
1089 'N' print 'r63' if the operand is (const_int 0).
1090 'd' print a V2SF reg as dN instead of fpN.
1091 'm' print a pair `base,offset' or `base,index', for LD and ST.
1092 'U' Likewise for {LD,ST}{HI,LO}.
1093 'V' print the position of a single bit set.
1094 'W' print the position of a single bit cleared.
1095 't' print a memory address which is a register.
1096 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1097 'o' output an operator. */
1098 static void
1099 sh_print_operand (FILE *stream, rtx x, int code)
1100 {
1101 int regno;
1102 machine_mode mode;
1103
1104 switch (code)
1105 {
1106 tree trapa_attr;
1107
1108 case '.':
1109 if (final_sequence
1110 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0))
1111 && get_attr_length (final_sequence->insn (1)))
1112 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1113 break;
1114 case ',':
1115 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1116 break;
1117 case '@':
1118 trapa_attr = lookup_attribute ("trap_exit",
1119 DECL_ATTRIBUTES (current_function_decl));
1120 if (trapa_attr)
1121 fprintf (stream, "trapa #%ld",
1122 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1123 else if (sh_cfun_interrupt_handler_p ())
1124 {
1125 if (sh_cfun_resbank_handler_p ())
1126 fprintf (stream, "resbank\n");
1127 fprintf (stream, "rte");
1128 }
1129 else
1130 fprintf (stream, "rts");
1131 break;
1132 case '#':
1133 /* Output a nop if there's nothing in the delay slot. */
1134 if (dbr_sequence_length () == 0)
1135 fprintf (stream, "\n\tnop");
1136 break;
1137 case '\'':
1138 {
1139 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1140
1141 if (note && XINT (note, 0) * 2 < REG_BR_PROB_BASE)
1142 fputs ("/u", stream);
1143 break;
1144 }
1145 case '>':
1146 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1147 {
1148 fputs ("\t! target: ", stream);
1149 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1150 }
1151 break;
1152 case 'O':
1153 x = mark_constant_pool_use (x);
1154 output_addr_const (stream, x);
1155 break;
1156 /* N.B.: %R / %S / %T adjust memory addresses by four.
1157 While they can be used to access 64 bit parts of a larger value
1158 held in general purpose registers, that won't work with memory -
1159 neither for fp registers, since the frxx names are used. */
1160 case 'R':
1161 if (REG_P (x) || GET_CODE (x) == SUBREG)
1162 {
1163 regno = true_regnum (x);
1164 regno += FP_REGISTER_P (regno) ? 1 : SH_REG_LSW_OFFSET;
1165 fputs (reg_names[regno], (stream));
1166 }
1167 else if (MEM_P (x))
1168 {
1169 x = adjust_address (x, SImode, 4 * SH_REG_LSW_OFFSET);
1170 sh_print_operand_address (stream, GET_MODE (x), XEXP (x, 0));
1171 }
1172 else
1173 {
1174 rtx sub = NULL_RTX;
1175
1176 mode = GET_MODE (x);
1177 if (mode == VOIDmode)
1178 mode = DImode;
1179 if (GET_MODE_SIZE (mode) >= 8)
1180 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_LSW_OFFSET);
1181 if (sub)
1182 sh_print_operand (stream, sub, 0);
1183 else
1184 output_operand_lossage ("invalid operand to %%R");
1185 }
1186 break;
1187 case 'S':
1188 if (REG_P (x) || GET_CODE (x) == SUBREG)
1189 {
1190 regno = true_regnum (x);
1191 regno += FP_REGISTER_P (regno) ? 0 : SH_REG_MSW_OFFSET;
1192 fputs (reg_names[regno], (stream));
1193 }
1194 else if (MEM_P (x))
1195 {
1196 x = adjust_address (x, SImode, 4 * SH_REG_MSW_OFFSET);
1197 sh_print_operand_address (stream, GET_MODE (x), XEXP (x, 0));
1198 }
1199 else
1200 {
1201 rtx sub = NULL_RTX;
1202
1203 mode = GET_MODE (x);
1204 if (mode == VOIDmode)
1205 mode = DImode;
1206 if (GET_MODE_SIZE (mode) >= 8)
1207 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_MSW_OFFSET);
1208 if (sub)
1209 sh_print_operand (stream, sub, 0);
1210 else
1211 output_operand_lossage ("invalid operand to %%S");
1212 }
1213 break;
1214 case 'T':
1215 /* Next word of a double. */
1216 switch (GET_CODE (x))
1217 {
1218 case REG:
1219 fputs (reg_names[REGNO (x) + 1], (stream));
1220 break;
1221 case MEM:
1222 {
1223 machine_mode mode = GET_MODE (x);
1224 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1225 && GET_CODE (XEXP (x, 0)) != POST_INC)
1226 x = adjust_address (x, SImode, 4);
1227 sh_print_operand_address (stream, mode, XEXP (x, 0));
1228 }
1229 break;
1230 default:
1231 break;
1232 }
1233 break;
1234
1235 case 't':
1236 gcc_assert (MEM_P (x));
1237 x = XEXP (x, 0);
1238 switch (GET_CODE (x))
1239 {
1240 case REG:
1241 case SUBREG:
1242 sh_print_operand (stream, x, 0);
1243 break;
1244 default:
1245 break;
1246 }
1247 break;
1248
1249 case 'o':
1250 switch (GET_CODE (x))
1251 {
1252 case PLUS: fputs ("add", stream); break;
1253 case MINUS: fputs ("sub", stream); break;
1254 case MULT: fputs ("mul", stream); break;
1255 case DIV: fputs ("div", stream); break;
1256 case EQ: fputs ("eq", stream); break;
1257 case NE: fputs ("ne", stream); break;
1258 case GT: case LT: fputs ("gt", stream); break;
1259 case GE: case LE: fputs ("ge", stream); break;
1260 case GTU: case LTU: fputs ("gtu", stream); break;
1261 case GEU: case LEU: fputs ("geu", stream); break;
1262 default:
1263 break;
1264 }
1265 break;
1266 case 'M':
1267 if (MEM_P (x))
1268 {
1269 switch (GET_MODE (x))
1270 {
1271 case QImode: fputs (".b", stream); break;
1272 case HImode: fputs (".w", stream); break;
1273 case SImode: fputs (".l", stream); break;
1274 case SFmode: fputs (".s", stream); break;
1275 case DFmode: fputs (".d", stream); break;
1276 default: gcc_unreachable ();
1277 }
1278 }
1279 break;
1280
1281 case 'm':
1282 gcc_assert (MEM_P (x));
1283 x = XEXP (x, 0);
1284 /* Fall through. */
1285 case 'U':
1286 switch (GET_CODE (x))
1287 {
1288 case REG:
1289 case SUBREG:
1290 sh_print_operand (stream, x, 0);
1291 fputs (", 0", stream);
1292 break;
1293
1294 case PLUS:
1295 sh_print_operand (stream, XEXP (x, 0), 0);
1296 fputs (", ", stream);
1297 sh_print_operand (stream, XEXP (x, 1), 0);
1298 break;
1299
1300 default:
1301 gcc_unreachable ();
1302 }
1303 break;
1304
1305 case 'V':
1306 {
1307 int num = exact_log2 (INTVAL (x));
1308 gcc_assert (num >= 0);
1309 fprintf (stream, "#%d", num);
1310 }
1311 break;
1312
1313 case 'W':
1314 {
1315 int num = exact_log2 (~INTVAL (x));
1316 gcc_assert (num >= 0);
1317 fprintf (stream, "#%d", num);
1318 }
1319 break;
1320
1321 case 'd':
1322 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1323
1324 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1325 break;
1326
1327 case 'N':
1328 if (x == CONST0_RTX (GET_MODE (x)))
1329 {
1330 fprintf ((stream), "r63");
1331 break;
1332 }
1333 goto default_output;
1334 case 'u':
1335 if (CONST_INT_P (x))
1336 {
1337 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1338 break;
1339 }
1340 /* Fall through. */
1341
1342 default_output:
1343 default:
1344 regno = 0;
1345 mode = GET_MODE (x);
1346
1347 switch (GET_CODE (x))
1348 {
1349 case TRUNCATE:
1350 {
1351 rtx inner = XEXP (x, 0);
1352 int offset = 0;
1353 machine_mode inner_mode;
1354
1355 /* We might see SUBREGs with vector mode registers inside. */
1356 if (GET_CODE (inner) == SUBREG
1357 && (GET_MODE_SIZE (GET_MODE (inner))
1358 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1359 && subreg_lowpart_p (inner))
1360 inner = SUBREG_REG (inner);
1361 if (CONST_INT_P (inner))
1362 {
1363 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1364 goto default_output;
1365 }
1366 inner_mode = GET_MODE (inner);
1367 if (GET_CODE (inner) == SUBREG
1368 && (GET_MODE_SIZE (GET_MODE (inner))
1369 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1370 && REG_P (SUBREG_REG (inner)))
1371 {
1372 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1373 GET_MODE (SUBREG_REG (inner)),
1374 SUBREG_BYTE (inner),
1375 GET_MODE (inner));
1376 inner = SUBREG_REG (inner);
1377 }
1378 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1379 abort ();
1380 /* Floating point register pairs are always big endian;
1381 general purpose registers are 64 bit wide. */
1382 regno = REGNO (inner);
1383 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1384 - HARD_REGNO_NREGS (regno, mode))
1385 + offset;
1386 x = inner;
1387 goto reg;
1388 }
1389 case SIGN_EXTEND:
1390 x = XEXP (x, 0);
1391 goto reg;
1392 case SUBREG:
1393 gcc_assert (SUBREG_BYTE (x) == 0
1394 && REG_P (SUBREG_REG (x)));
1395
1396 x = SUBREG_REG (x);
1397 /* Fall through. */
1398
1399 reg:
1400 case REG:
1401 regno += REGNO (x);
1402 if (FP_REGISTER_P (regno)
1403 && mode == V16SFmode)
1404 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1405 else if (FP_REGISTER_P (REGNO (x))
1406 && mode == V4SFmode)
1407 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1408 else if (REG_P (x)
1409 && mode == V2SFmode)
1410 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1411 else if (FP_REGISTER_P (REGNO (x))
1412 && GET_MODE_SIZE (mode) > 4)
1413 fprintf ((stream), "d%s", reg_names[regno] + 1);
1414 else
1415 fputs (reg_names[regno], (stream));
1416 break;
1417
1418 case MEM:
1419 output_address (GET_MODE (x), XEXP (x, 0));
1420 break;
1421
1422 default:
1423 fputc ('#', stream);
1424 output_addr_const (stream, x);
1425 break;
1426 }
1427 break;
1428 }
1429 }
1430
1431 static bool
1432 sh_print_operand_punct_valid_p (unsigned char code)
1433 {
1434 return (code == '.' || code == '#' || code == '@' || code == ','
1435 || code == '$' || code == '\'' || code == '>');
1436 }
1437
1438 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
1439 static bool
1440 sh_asm_output_addr_const_extra (FILE *file, rtx x)
1441 {
1442 if (GET_CODE (x) == UNSPEC)
1443 {
1444 switch (XINT (x, 1))
1445 {
1446 case UNSPEC_PIC:
1447 /* GLOBAL_OFFSET_TABLE or local symbols, no suffix. */
1448 output_addr_const (file, XVECEXP (x, 0, 0));
1449 break;
1450 case UNSPEC_GOT:
1451 output_addr_const (file, XVECEXP (x, 0, 0));
1452 fputs ("@GOT", file);
1453 break;
1454 case UNSPEC_GOTOFF:
1455 output_addr_const (file, XVECEXP (x, 0, 0));
1456 fputs ("@GOTOFF", file);
1457 break;
1458 case UNSPEC_PLT:
1459 output_addr_const (file, XVECEXP (x, 0, 0));
1460 fputs ("@PLT", file);
1461 break;
1462 case UNSPEC_GOTPLT:
1463 output_addr_const (file, XVECEXP (x, 0, 0));
1464 fputs ("@GOTPLT", file);
1465 break;
1466 case UNSPEC_PCREL:
1467 output_addr_const (file, XVECEXP (x, 0, 0));
1468 fputs ("@PCREL", file);
1469 break;
1470 case UNSPEC_DTPOFF:
1471 output_addr_const (file, XVECEXP (x, 0, 0));
1472 fputs ("@DTPOFF", file);
1473 break;
1474 case UNSPEC_GOTTPOFF:
1475 output_addr_const (file, XVECEXP (x, 0, 0));
1476 fputs ("@GOTTPOFF", file);
1477 break;
1478 case UNSPEC_TPOFF:
1479 output_addr_const (file, XVECEXP (x, 0, 0));
1480 fputs ("@TPOFF", file);
1481 break;
1482 case UNSPEC_CALLER:
1483 {
1484 char name[32];
1485 /* LPCS stands for Label for PIC Call Site. */
1486 targetm.asm_out.generate_internal_label (name, "LPCS",
1487 INTVAL (XVECEXP (x, 0, 0)));
1488 assemble_name (file, name);
1489 }
1490 break;
1491 case UNSPEC_SYMOFF:
1492 output_addr_const (file, XVECEXP (x, 0, 0));
1493 fputc ('-', file);
1494 if (GET_CODE (XVECEXP (x, 0, 1)) == CONST)
1495 {
1496 fputc ('(', file);
1497 output_addr_const (file, XVECEXP (x, 0, 1));
1498 fputc (')', file);
1499 }
1500 else
1501 output_addr_const (file, XVECEXP (x, 0, 1));
1502 break;
1503 case UNSPEC_PCREL_SYMOFF:
1504 output_addr_const (file, XVECEXP (x, 0, 0));
1505 fputs ("-(", file);
1506 output_addr_const (file, XVECEXP (x, 0, 1));
1507 fputs ("-.)", file);
1508 break;
1509 case UNSPEC_GOTFUNCDESC:
1510 output_addr_const (file, XVECEXP (x, 0, 0));
1511 fputs ("@GOTFUNCDESC", file);
1512 break;
1513 case UNSPEC_GOTOFFFUNCDESC:
1514 output_addr_const (file, XVECEXP (x, 0, 0));
1515 fputs ("@GOTOFFFUNCDESC", file);
1516 break;
1517 default:
1518 return false;
1519 }
1520 return true;
1521 }
1522 else
1523 return false;
1524 }
1525 \f
1526 /* Encode symbol attributes of a SYMBOL_REF into its
1527 SYMBOL_REF_FLAGS. */
1528 static void
1529 sh_encode_section_info (tree decl, rtx rtl, int first)
1530 {
1531 default_encode_section_info (decl, rtl, first);
1532
1533 if (TREE_CODE (decl) == FUNCTION_DECL
1534 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1535 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1536 }
1537
1538 /* Prepare operands for a move define_expand; specifically, one of the
1539 operands must be in a register. */
1540 void
1541 prepare_move_operands (rtx operands[], machine_mode mode)
1542 {
1543 if ((mode == SImode || mode == DImode)
1544 && flag_pic
1545 && ! ((mode == Pmode || mode == ptr_mode)
1546 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1547 {
1548 rtx temp;
1549 if (SYMBOLIC_CONST_P (operands[1]))
1550 {
1551 if (MEM_P (operands[0]))
1552 operands[1] = force_reg (Pmode, operands[1]);
1553 else
1554 {
1555 temp = (!can_create_pseudo_p ()
1556 ? operands[0]
1557 : gen_reg_rtx (Pmode));
1558 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1559 }
1560 }
1561 else if (GET_CODE (operands[1]) == CONST
1562 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1563 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1564 {
1565 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1566 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1567 mode, temp);
1568 operands[1] = expand_binop (mode, add_optab, temp,
1569 XEXP (XEXP (operands[1], 0), 1),
1570 (!can_create_pseudo_p ()
1571 ? temp
1572 : gen_reg_rtx (Pmode)),
1573 0, OPTAB_LIB_WIDEN);
1574 }
1575 }
1576
1577 if (! reload_in_progress && ! reload_completed)
1578 {
1579 /* Copy the source to a register if both operands aren't registers. */
1580 if (! register_operand (operands[0], mode)
1581 && ! register_operand (operands[1], mode))
1582 operands[1] = copy_to_mode_reg (mode, operands[1]);
1583
1584 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1585 {
1586 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1587 except that we can't use that function because it is static. */
1588 rtx new_rtx = change_address (operands[0], mode, 0);
1589 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1590 operands[0] = new_rtx;
1591 }
1592
1593 /* This case can happen while generating code to move the result
1594 of a library call to the target. Reject `st r0,@(rX,rY)' because
1595 reload will fail to find a spill register for rX, since r0 is already
1596 being used for the source. */
1597 else if (refers_to_regno_p (R0_REG, operands[1])
1598 && MEM_P (operands[0])
1599 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1600 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1601 operands[1] = copy_to_mode_reg (mode, operands[1]);
1602
1603 /* When the displacement addressing is used, RA will assign r0 to
1604 the pseudo register operand for the QI/HImode load/store.
1605 This tends to make a long live range for R0 and might cause
1606 anomalous register spills in some case with LRA. See PR
1607 target/55212.
1608 We split possible load/store to two move insns via r0 so as to
1609 shorten R0 live range. It will make some codes worse but will
1610 win on average for LRA.
1611 Also when base+index addressing is used and the index term is
1612 a subreg, LRA assumes that more hard registers can be available
1613 in some situation. It isn't the case for SH in the problematic
1614 case. We can pre-allocate R0 for that index term to avoid
1615 the issue. See PR target/66591. */
1616 else if (sh_lra_p ()
1617 && ! TARGET_SH2A
1618 && ((REG_P (operands[0]) && MEM_P (operands[1]))
1619 || (REG_P (operands[1]) && MEM_P (operands[0]))))
1620 {
1621 bool load_p = REG_P (operands[0]);
1622 rtx reg = operands[load_p ? 0 : 1];
1623 rtx adr = XEXP (operands[load_p ? 1 : 0], 0);
1624
1625 if ((mode == QImode || mode == HImode)
1626 && REGNO (reg) >= FIRST_PSEUDO_REGISTER
1627 && GET_CODE (adr) == PLUS
1628 && REG_P (XEXP (adr, 0))
1629 && (REGNO (XEXP (adr, 0)) >= FIRST_PSEUDO_REGISTER)
1630 && CONST_INT_P (XEXP (adr, 1))
1631 && INTVAL (XEXP (adr, 1)) != 0
1632 && sh_legitimate_index_p (mode, XEXP (adr, 1), false, true))
1633 {
1634 rtx r0_rtx = gen_rtx_REG (mode, R0_REG);
1635 emit_move_insn (r0_rtx, operands[1]);
1636 operands[1] = r0_rtx;
1637 }
1638 if (REGNO (reg) >= FIRST_PSEUDO_REGISTER
1639 && GET_CODE (adr) == PLUS
1640 && REG_P (XEXP (adr, 0))
1641 && (REGNO (XEXP (adr, 0)) >= FIRST_PSEUDO_REGISTER)
1642 && SUBREG_P (XEXP (adr, 1))
1643 && REG_P (SUBREG_REG (XEXP (adr, 1))))
1644 {
1645 rtx r0_rtx = gen_rtx_REG (GET_MODE (XEXP (adr, 1)), R0_REG);
1646 emit_move_insn (r0_rtx, XEXP (adr, 1));
1647 XEXP (adr, 1) = r0_rtx;
1648 }
1649 }
1650 }
1651
1652 if (mode == Pmode || mode == ptr_mode)
1653 {
1654 rtx op0 = operands[0];
1655 rtx op1 = operands[1];
1656 rtx opc;
1657 if (GET_CODE (op1) == CONST
1658 && GET_CODE (XEXP (op1, 0)) == PLUS
1659 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1660 != TLS_MODEL_NONE))
1661 {
1662 opc = XEXP (XEXP (op1, 0), 1);
1663 op1 = XEXP (XEXP (op1, 0), 0);
1664 }
1665 else
1666 opc = NULL_RTX;
1667
1668 enum tls_model tls_kind;
1669
1670 if (! reload_in_progress && ! reload_completed
1671 && (tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1672 {
1673 rtx tga_op1, tga_ret, tmp, tmp2;
1674
1675 if (! flag_pic
1676 && (tls_kind == TLS_MODEL_GLOBAL_DYNAMIC
1677 || tls_kind == TLS_MODEL_LOCAL_DYNAMIC
1678 || tls_kind == TLS_MODEL_INITIAL_EXEC))
1679 {
1680 static int got_labelno;
1681 /* Don't schedule insns for getting GOT address when
1682 the first scheduling is enabled, to avoid spill
1683 failures for R0. */
1684 if (flag_schedule_insns)
1685 emit_insn (gen_blockage ());
1686 emit_insn (gen_GOTaddr2picreg (GEN_INT (++got_labelno)));
1687 emit_use (gen_rtx_REG (SImode, PIC_REG));
1688 if (flag_schedule_insns)
1689 emit_insn (gen_blockage ());
1690 }
1691
1692 switch (tls_kind)
1693 {
1694 case TLS_MODEL_GLOBAL_DYNAMIC:
1695 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1696 if (TARGET_FDPIC)
1697 emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
1698 sh_get_fdpic_reg_initial_val ());
1699 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1700 tmp = gen_reg_rtx (Pmode);
1701 emit_move_insn (tmp, tga_ret);
1702 op1 = tmp;
1703 break;
1704
1705 case TLS_MODEL_LOCAL_DYNAMIC:
1706 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1707 if (TARGET_FDPIC)
1708 emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
1709 sh_get_fdpic_reg_initial_val ());
1710 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1711
1712 tmp = gen_reg_rtx (Pmode);
1713 emit_move_insn (tmp, tga_ret);
1714
1715 if (register_operand (op0, Pmode))
1716 tmp2 = op0;
1717 else
1718 tmp2 = gen_reg_rtx (Pmode);
1719
1720 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1721 op1 = tmp2;
1722 break;
1723
1724 case TLS_MODEL_INITIAL_EXEC:
1725 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1726 tmp = gen_sym2GOTTPOFF (op1);
1727 if (TARGET_FDPIC)
1728 emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
1729 sh_get_fdpic_reg_initial_val ());
1730 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1731 op1 = tga_op1;
1732 break;
1733
1734 case TLS_MODEL_LOCAL_EXEC:
1735 tmp2 = gen_reg_rtx (Pmode);
1736 emit_insn (gen_store_gbr (tmp2));
1737 tmp = gen_reg_rtx (Pmode);
1738 emit_insn (gen_symTPOFF2reg (tmp, op1));
1739
1740 if (register_operand (op0, Pmode))
1741 op1 = op0;
1742 else
1743 op1 = gen_reg_rtx (Pmode);
1744
1745 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1746 break;
1747
1748 default:
1749 gcc_unreachable ();
1750 }
1751 if (opc)
1752 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1753 operands[1] = op1;
1754 }
1755 }
1756
1757 if (SH_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
1758 {
1759 rtx base, offset;
1760 split_const (operands[1], &base, &offset);
1761
1762 if (GET_CODE (base) == SYMBOL_REF
1763 && !offset_within_block_p (base, INTVAL (offset)))
1764 {
1765 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx (mode) : operands[0];
1766 emit_move_insn (tmp, base);
1767 if (!arith_operand (offset, mode))
1768 offset = force_reg (mode, offset);
1769 emit_insn (gen_add3_insn (operands[0], tmp, offset));
1770 }
1771 }
1772 }
1773
1774 /* Implement the canonicalize_comparison target hook for the combine
1775 pass. For the target hook this function is invoked via
1776 sh_canonicalize_comparison. This function is also re-used to
1777 canonicalize comparisons in cbranch pattern expanders. */
1778 static void
1779 sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1,
1780 machine_mode mode,
1781 bool op0_preserve_value)
1782 {
1783 /* When invoked from within the combine pass the mode is not specified,
1784 so try to get it from one of the operands. */
1785 if (mode == VOIDmode)
1786 mode = GET_MODE (op0);
1787 if (mode == VOIDmode)
1788 mode = GET_MODE (op1);
1789
1790 // We need to have a mode to do something useful here.
1791 if (mode == VOIDmode)
1792 return;
1793
1794 // Currently, we don't deal with floats here.
1795 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1796 return;
1797
1798 // Make sure that the constant operand is the second operand.
1799 if (CONST_INT_P (op0) && !CONST_INT_P (op1))
1800 {
1801 if (op0_preserve_value)
1802 return;
1803
1804 std::swap (op0, op1);
1805 cmp = swap_condition (cmp);
1806 }
1807
1808 if (CONST_INT_P (op1))
1809 {
1810 /* Try to adjust the constant operand in such a way that available
1811 comparison insns can be utilized better and the constant can be
1812 loaded with a 'mov #imm,Rm' insn. This avoids a load from the
1813 constant pool. */
1814 const HOST_WIDE_INT val = INTVAL (op1);
1815
1816 /* x > -1 --> x >= 0
1817 x > 0xFFFFFF7F --> x >= 0xFFFFFF80
1818 x <= -1 --> x < 0
1819 x <= 0xFFFFFF7F --> x < 0xFFFFFF80 */
1820 if ((val == -1 || val == -0x81) && (cmp == GT || cmp == LE))
1821 {
1822 cmp = cmp == GT ? GE : LT;
1823 op1 = gen_int_mode (val + 1, mode);
1824 }
1825
1826 /* x >= 1 --> x > 0
1827 x >= 0x80 --> x > 0x7F
1828 x < 1 --> x <= 0
1829 x < 0x80 --> x <= 0x7F */
1830 else if ((val == 1 || val == 0x80) && (cmp == GE || cmp == LT))
1831 {
1832 cmp = cmp == GE ? GT : LE;
1833 op1 = gen_int_mode (val - 1, mode);
1834 }
1835
1836 /* unsigned x >= 1 --> x != 0
1837 unsigned x < 1 --> x == 0 */
1838 else if (val == 1 && (cmp == GEU || cmp == LTU))
1839 {
1840 cmp = cmp == GEU ? NE : EQ;
1841 op1 = CONST0_RTX (mode);
1842 }
1843
1844 /* unsigned x >= 0x80 --> unsigned x > 0x7F
1845 unsigned x < 0x80 --> unsigned x < 0x7F */
1846 else if (val == 0x80 && (cmp == GEU || cmp == LTU))
1847 {
1848 cmp = cmp == GEU ? GTU : LEU;
1849 op1 = gen_int_mode (val - 1, mode);
1850 }
1851
1852 /* unsigned x > 0 --> x != 0
1853 unsigned x <= 0 --> x == 0 */
1854 else if (val == 0 && (cmp == GTU || cmp == LEU))
1855 cmp = cmp == GTU ? NE : EQ;
1856
1857 /* unsigned x > 0x7FFFFFFF --> signed x < 0
1858 unsigned x <= 0x7FFFFFFF --> signed x >= 0 */
1859 else if (mode == SImode && (cmp == GTU || cmp == LEU)
1860 && val == 0x7FFFFFFF)
1861 {
1862 cmp = cmp == GTU ? LT : GE;
1863 op1 = const0_rtx;
1864 }
1865
1866 /* unsigned x >= 0x80000000 --> signed x < 0
1867 unsigned x < 0x80000000 --> signed x >= 0 */
1868 else if (mode == SImode && (cmp == GEU || cmp == LTU)
1869 && (unsigned HOST_WIDE_INT)val
1870 == ((unsigned HOST_WIDE_INT)0x7FFFFFFF + 1))
1871 {
1872 cmp = cmp == GEU ? LT : GE;
1873 op1 = const0_rtx;
1874 }
1875 }
1876 }
1877
1878 /* This function implements the canonicalize_comparison target hook.
1879 This wrapper around the internally used sh_canonicalize_comparison
1880 function is needed to do the enum rtx_code <-> int conversion.
1881 Target hooks cannot use enum rtx_code in its definition. */
1882 static void
1883 sh_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1884 bool op0_preserve_value)
1885 {
1886 enum rtx_code tmp_code = (enum rtx_code)*code;
1887 sh_canonicalize_comparison (tmp_code, *op0, *op1,
1888 VOIDmode, op0_preserve_value);
1889 *code = (int)tmp_code;
1890 }
1891
1892 /* This function implements the legitimate_combined_insn target hook,
1893 which the combine pass uses to early reject combined insns, before
1894 it tries to recog the insn and determine its cost. */
1895 static bool
1896 sh_legitimate_combined_insn (rtx_insn* insn)
1897 {
1898 /* Reject combinations of memory loads and zero extensions, as these
1899 interfere with other combine patterns such as zero extracts and bit
1900 tests. The SH2A movu.{b|w} insns are formed later in the
1901 'sh_optimize_extu_exts' pass after combine/split1. */
1902 rtx p = PATTERN (insn);
1903 if (GET_CODE (p) == SET
1904 && REG_P (XEXP (p, 0)) && GET_MODE (XEXP (p, 0)) == SImode
1905 && GET_CODE (XEXP (p, 1)) == ZERO_EXTEND
1906 && MEM_P (XEXP (XEXP (p, 1), 0)))
1907 return false;
1908
1909 return true;
1910 }
1911
1912 bool
1913 sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2)
1914 {
1915 *p1 = T_REG;
1916 *p2 = INVALID_REGNUM;
1917 return true;
1918 }
1919
1920 /* Try to calculate the branch distance of a conditional branch in bytes.
1921
1922 FIXME: Because of PR 59189 we can't use the CFG here. Instead just
1923 walk from this insn into the next (fall-through) basic block and see if
1924 we hit the label. */
1925 unsigned int
1926 sh_cbranch_distance (rtx_insn* _cbranch_insn, unsigned int max_dist)
1927 {
1928 rtx_jump_insn* cbranch_insn = safe_as_a<rtx_jump_insn*> (_cbranch_insn);
1929
1930 if (dump_file)
1931 {
1932 fprintf (dump_file, "sh_cbranch_distance insn = \n");
1933 print_rtl_single (dump_file, cbranch_insn);
1934 }
1935
1936 unsigned int dist = 0;
1937
1938 for (rtx_insn* i = next_nonnote_insn (cbranch_insn);
1939 i != NULL && dist < max_dist; i = next_nonnote_insn (i))
1940 {
1941 const unsigned int i_len = get_attr_length (i);
1942 dist += i_len;
1943
1944 if (dump_file)
1945 fprintf (dump_file, " insn %d length = %u dist = %u\n",
1946 INSN_UID (i), i_len, dist);
1947
1948 if (rtx_code_label* l = dyn_cast<rtx_code_label*> (i))
1949 {
1950 if (l == cbranch_insn->jump_target ())
1951 {
1952 if (dump_file)
1953 fprintf (dump_file, " cbranch dist = %u\n", dist);
1954 return dist;
1955 }
1956 break;
1957 }
1958 }
1959
1960 if (dump_file)
1961 fprintf (dump_file, " cbranch dist = unknown\n");
1962
1963 return unknown_cbranch_distance;
1964 }
1965
1966 enum rtx_code
1967 prepare_cbranch_operands (rtx *operands, machine_mode mode,
1968 enum rtx_code comparison)
1969 {
1970 gcc_assert (can_create_pseudo_p ());
1971
1972 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1973 comparison = GET_CODE (operands[0]);
1974
1975 sh_canonicalize_comparison (comparison, operands[1], operands[2],
1976 mode, false);
1977
1978 rtx op1 = operands[1];
1979 operands[1] = force_reg (mode, op1);
1980
1981 /* When we are handling DImode comparisons, we want to keep constants so
1982 that we can optimize the component comparisons; however, memory loads
1983 are better issued as a whole so that they can be scheduled well.
1984 SImode equality comparisons allow I08 constants, but only when they
1985 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1986 into a register, that register might as well be r0, and we allow the
1987 constant. If it is already in a register, this is likely to be
1988 allocated to a different hard register, thus we load the constant into
1989 a register unless it is zero. */
1990 if (!REG_P (operands[2])
1991 && (!CONST_INT_P (operands[2])
1992 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1993 && ((comparison != EQ && comparison != NE)
1994 || (REG_P (op1) && REGNO (op1) != R0_REG)
1995 || !satisfies_constraint_I08 (operands[2])))))
1996 operands[2] = force_reg (mode, operands[2]);
1997
1998 return comparison;
1999 }
2000
2001 void
2002 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
2003 {
2004 rtx (*branch_expander) (rtx) = gen_branch_true;
2005 comparison = prepare_cbranch_operands (operands, SImode, comparison);
2006 switch (comparison)
2007 {
2008 case NE: case LT: case LE: case LTU: case LEU:
2009 comparison = reverse_condition (comparison);
2010 branch_expander = gen_branch_false;
2011 default: ;
2012 }
2013 emit_insn (gen_rtx_SET (get_t_reg_rtx (),
2014 gen_rtx_fmt_ee (comparison, SImode,
2015 operands[1], operands[2])));
2016 rtx_insn *jump = emit_jump_insn (branch_expander (operands[3]));
2017 if (probability >= 0)
2018 add_int_reg_note (jump, REG_BR_PROB, probability);
2019 }
2020
2021 /* ??? How should we distribute probabilities when more than one branch
2022 is generated. So far we only have some ad-hoc observations:
2023 - If the operands are random, they are likely to differ in both parts.
2024 - If comparing items in a hash chain, the operands are random or equal;
2025 operation should be EQ or NE.
2026 - If items are searched in an ordered tree from the root, we can expect
2027 the highpart to be unequal about half of the time; operation should be
2028 an inequality comparison, operands non-constant, and overall probability
2029 about 50%. Likewise for quicksort.
2030 - Range checks will be often made against constants. Even if we assume for
2031 simplicity an even distribution of the non-constant operand over a
2032 sub-range here, the same probability could be generated with differently
2033 wide sub-ranges - as long as the ratio of the part of the subrange that
2034 is before the threshold to the part that comes after the threshold stays
2035 the same. Thus, we can't really tell anything here;
2036 assuming random distribution is at least simple.
2037 */
2038 bool
2039 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
2040 {
2041 enum rtx_code msw_taken, msw_skip, lsw_taken;
2042 rtx_code_label *skip_label = NULL;
2043 rtx op1h, op1l, op2h, op2l;
2044 int num_branches;
2045 int prob, rev_prob;
2046 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
2047
2048 comparison = prepare_cbranch_operands (operands, DImode, comparison);
2049 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
2050 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
2051 op1l = gen_lowpart (SImode, operands[1]);
2052 op2l = gen_lowpart (SImode, operands[2]);
2053 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
2054 prob = split_branch_probability;
2055 rev_prob = REG_BR_PROB_BASE - prob;
2056 switch (comparison)
2057 {
2058 case EQ:
2059 msw_skip = NE;
2060 lsw_taken = EQ;
2061 if (prob >= 0)
2062 {
2063 // If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
2064 msw_skip_prob = rev_prob;
2065 if (REG_BR_PROB_BASE <= 65535)
2066 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
2067 else
2068 {
2069 lsw_taken_prob
2070 = (prob
2071 ? (REG_BR_PROB_BASE
2072 - ((gcov_type) REG_BR_PROB_BASE * rev_prob
2073 / ((gcov_type) prob << 32)))
2074 : 0);
2075 }
2076 }
2077 break;
2078 case NE:
2079 msw_taken = NE;
2080 msw_taken_prob = prob;
2081 lsw_taken = NE;
2082 lsw_taken_prob = 0;
2083 break;
2084 case GTU: case GT:
2085 msw_taken = comparison;
2086 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2087 break;
2088 if (comparison != GTU || op2h != CONST0_RTX (SImode))
2089 msw_skip = swap_condition (msw_taken);
2090 lsw_taken = GTU;
2091 break;
2092 case GEU: case GE:
2093 if (op2l == CONST0_RTX (SImode))
2094 msw_taken = comparison;
2095 else
2096 {
2097 msw_taken = comparison == GE ? GT : GTU;
2098 msw_skip = swap_condition (msw_taken);
2099 lsw_taken = GEU;
2100 }
2101 break;
2102 case LTU: case LT:
2103 msw_taken = comparison;
2104 if (op2l == CONST0_RTX (SImode))
2105 break;
2106 msw_skip = swap_condition (msw_taken);
2107 lsw_taken = LTU;
2108 break;
2109 case LEU: case LE:
2110 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2111 msw_taken = comparison;
2112 else
2113 {
2114 lsw_taken = LEU;
2115 if (comparison == LE)
2116 msw_taken = LT;
2117 else if (op2h != CONST0_RTX (SImode))
2118 msw_taken = LTU;
2119 else
2120 {
2121 msw_skip = swap_condition (LTU);
2122 break;
2123 }
2124 msw_skip = swap_condition (msw_taken);
2125 }
2126 break;
2127 default: return false;
2128 }
2129 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
2130 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2131 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
2132 if (comparison != EQ && comparison != NE && num_branches > 1)
2133 {
2134 if (!CONSTANT_P (operands[2])
2135 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
2136 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
2137 {
2138 msw_taken_prob = prob / 2U;
2139 msw_skip_prob
2140 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
2141 lsw_taken_prob = prob;
2142 }
2143 else
2144 {
2145 msw_taken_prob = prob;
2146 msw_skip_prob = REG_BR_PROB_BASE;
2147 /* ??? If we have a constant op2h, should we use that when
2148 calculating lsw_taken_prob? */
2149 lsw_taken_prob = prob;
2150 }
2151 }
2152 operands[1] = op1h;
2153 operands[2] = op2h;
2154 operands[4] = NULL_RTX;
2155
2156 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2157 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2158 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2159 {
2160 rtx taken_label = operands[3];
2161
2162 /* Operands were possibly modified, but msw_skip doesn't expect this.
2163 Always use the original ones. */
2164 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2165 {
2166 operands[1] = op1h;
2167 operands[2] = op2h;
2168 }
2169
2170 operands[3] = skip_label = gen_label_rtx ();
2171 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2172 operands[3] = taken_label;
2173 }
2174 operands[1] = op1l;
2175 operands[2] = op2l;
2176 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2177 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2178 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2179 emit_label (skip_label);
2180 return true;
2181 }
2182
2183 /* Given an operand, return 1 if the evaluated operand plugged into an
2184 if_then_else will result in a branch_true, 0 if branch_false, or
2185 -1 if neither nor applies. The truth table goes like this:
2186
2187 op | cmpval | code | result
2188 ---------+--------+---------+--------------------
2189 T (0) | 0 | EQ (1) | 0 = 0 ^ (0 == 1)
2190 T (0) | 1 | EQ (1) | 1 = 0 ^ (1 == 1)
2191 T (0) | 0 | NE (0) | 1 = 0 ^ (0 == 0)
2192 T (0) | 1 | NE (0) | 0 = 0 ^ (1 == 0)
2193 !T (1) | 0 | EQ (1) | 1 = 1 ^ (0 == 1)
2194 !T (1) | 1 | EQ (1) | 0 = 1 ^ (1 == 1)
2195 !T (1) | 0 | NE (0) | 0 = 1 ^ (0 == 0)
2196 !T (1) | 1 | NE (0) | 1 = 1 ^ (1 == 0) */
2197 int
2198 sh_eval_treg_value (rtx op)
2199 {
2200 if (t_reg_operand (op, GET_MODE (op)))
2201 return 1;
2202 if (negt_reg_operand (op, GET_MODE (op)))
2203 return 0;
2204
2205 rtx_code code = GET_CODE (op);
2206 if ((code != EQ && code != NE) || !CONST_INT_P (XEXP (op, 1)))
2207 return -1;
2208
2209 int cmpop = code == EQ ? 1 : 0;
2210 int cmpval = INTVAL (XEXP (op, 1));
2211 if (cmpval != 0 && cmpval != 1)
2212 return -1;
2213
2214 int t;
2215 if (t_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2216 t = 0;
2217 else if (negt_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2218 t = 1;
2219 else
2220 return -1;
2221
2222 return t ^ (cmpval == cmpop);
2223 }
2224
2225 /* Emit INSN, possibly in a PARALLEL with an USE/CLOBBER of FPSCR bits in case
2226 of floating-point comparisons. */
2227 static void
2228 sh_emit_set_t_insn (rtx insn, machine_mode mode)
2229 {
2230 if (TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT
2231 && GET_CODE (insn) != PARALLEL)
2232 {
2233 insn = gen_rtx_PARALLEL (VOIDmode,
2234 gen_rtvec (3, insn,
2235 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, FPSCR_STAT_REG)),
2236 gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, FPSCR_MODES_REG))));
2237 }
2238 emit_insn (insn);
2239 }
2240
2241 /* Prepare the operands for an scc instruction; make sure that the
2242 compare has been done and the result is in T_REG. */
2243 void
2244 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2245 {
2246 rtx t_reg = get_t_reg_rtx ();
2247 enum rtx_code oldcode = code;
2248
2249 /* First need a compare insn. */
2250 switch (code)
2251 {
2252 case NE:
2253 /* It isn't possible to handle this case. */
2254 gcc_unreachable ();
2255 case LT:
2256 code = GT;
2257 break;
2258 case LE:
2259 code = GE;
2260 break;
2261 case LTU:
2262 code = GTU;
2263 break;
2264 case LEU:
2265 code = GEU;
2266 break;
2267 default:
2268 break;
2269 }
2270 if (code != oldcode)
2271 std::swap (op0, op1);
2272
2273 machine_mode mode = GET_MODE (op0);
2274 if (mode == VOIDmode)
2275 mode = GET_MODE (op1);
2276
2277 op0 = force_reg (mode, op0);
2278 if ((code != EQ && code != NE
2279 && (op1 != const0_rtx
2280 || code == GTU || code == GEU || code == LTU || code == LEU))
2281 || (mode == DImode && op1 != const0_rtx)
2282 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2283 op1 = force_reg (mode, op1);
2284
2285 sh_emit_set_t_insn (gen_rtx_SET (t_reg,
2286 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2287 mode);
2288 }
2289
2290 /* Called from the md file, set up the operands of a compare instruction. */
2291 void
2292 sh_emit_compare_and_branch (rtx *operands, machine_mode mode)
2293 {
2294 enum rtx_code code = GET_CODE (operands[0]);
2295 enum rtx_code branch_code;
2296 rtx op0 = operands[1];
2297 rtx op1 = operands[2];
2298 rtx insn;
2299 bool need_ccmpeq = false;
2300
2301 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2302 {
2303 op0 = force_reg (mode, op0);
2304 op1 = force_reg (mode, op1);
2305 }
2306 else
2307 {
2308 if (code != EQ || mode == DImode)
2309 {
2310 /* Force args into regs, since we can't use constants here. */
2311 op0 = force_reg (mode, op0);
2312 if (op1 != const0_rtx || code == GTU || code == GEU)
2313 op1 = force_reg (mode, op1);
2314 }
2315 }
2316
2317 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2318 {
2319 if (code == LT
2320 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2321 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2322 {
2323 std::swap (op0, op1);
2324 code = swap_condition (code);
2325 }
2326
2327 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2328 if (code == GE)
2329 {
2330 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2331 need_ccmpeq = true;
2332 code = GT;
2333 }
2334
2335 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2336 to EQ/GT respectively. */
2337 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2338 }
2339
2340 switch (code)
2341 {
2342 case EQ:
2343 case GT:
2344 case GE:
2345 case GTU:
2346 case GEU:
2347 branch_code = code;
2348 break;
2349 case NE:
2350 case LT:
2351 case LE:
2352 case LTU:
2353 case LEU:
2354 branch_code = reverse_condition (code);
2355 break;
2356 default:
2357 gcc_unreachable ();
2358 }
2359
2360 insn = gen_rtx_SET (get_t_reg_rtx (),
2361 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2362
2363 sh_emit_set_t_insn (insn, mode);
2364 if (need_ccmpeq)
2365 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2366
2367 if (branch_code == code)
2368 emit_jump_insn (gen_branch_true (operands[3]));
2369 else
2370 emit_jump_insn (gen_branch_false (operands[3]));
2371 }
2372
2373 void
2374 sh_emit_compare_and_set (rtx *operands, machine_mode mode)
2375 {
2376 enum rtx_code code = GET_CODE (operands[1]);
2377 rtx op0 = operands[2];
2378 rtx op1 = operands[3];
2379 rtx_code_label *lab = NULL;
2380 bool invert = false;
2381
2382 op0 = force_reg (mode, op0);
2383 if ((code != EQ && code != NE
2384 && (op1 != const0_rtx
2385 || code == GTU || code == GEU || code == LTU || code == LEU))
2386 || (mode == DImode && op1 != const0_rtx)
2387 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2388 op1 = force_reg (mode, op1);
2389
2390 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2391 {
2392 if (code == LT || code == LE)
2393 {
2394 std::swap (op0, op1);
2395 code = swap_condition (code);
2396 }
2397 if (code == GE)
2398 {
2399 if (TARGET_IEEE)
2400 {
2401 lab = gen_label_rtx ();
2402 sh_emit_scc_to_t (EQ, op0, op1);
2403 emit_jump_insn (gen_branch_true (lab));
2404 code = GT;
2405 }
2406 else
2407 {
2408 code = LT;
2409 invert = true;
2410 }
2411 }
2412 }
2413
2414 if (code == NE)
2415 {
2416 code = EQ;
2417 invert = true;
2418 }
2419
2420 sh_emit_scc_to_t (code, op0, op1);
2421 if (lab)
2422 emit_label (lab);
2423 if (invert)
2424 emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ()));
2425 else
2426 emit_move_insn (operands[0], get_t_reg_rtx ());
2427 }
2428 \f
2429 /* Functions to output assembly code. */
2430
2431 /* Return a sequence of instructions to perform DI or DF move.
2432
2433 Since the SH cannot move a DI or DF in one instruction, we have
2434 to take care when we see overlapping source and dest registers. */
2435 const char *
2436 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2437 machine_mode mode)
2438 {
2439 rtx dst = operands[0];
2440 rtx src = operands[1];
2441
2442 if (MEM_P (dst)
2443 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2444 return "mov.l %T1,%0" "\n"
2445 " mov.l %1,%0";
2446
2447 if (register_operand (dst, mode)
2448 && register_operand (src, mode))
2449 {
2450 if (REGNO (src) == MACH_REG)
2451 return "sts mach,%S0" "\n"
2452 " sts macl,%R0";
2453
2454 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2455 when mov.d r1,r0 do r1->r0 then r2->r1. */
2456 if (REGNO (src) + 1 == REGNO (dst))
2457 return "mov %T1,%T0" "\n"
2458 " mov %1,%0";
2459 else
2460 return "mov %1,%0" "\n"
2461 " mov %T1,%T0";
2462 }
2463 else if (CONST_INT_P (src))
2464 {
2465 if (INTVAL (src) < 0)
2466 output_asm_insn ("mov #-1,%S0", operands);
2467 else
2468 output_asm_insn ("mov #0,%S0", operands);
2469
2470 return "mov %1,%R0";
2471 }
2472 else if (MEM_P (src))
2473 {
2474 int ptrreg = -1;
2475 int dreg = REGNO (dst);
2476 rtx inside = XEXP (src, 0);
2477
2478 switch (GET_CODE (inside))
2479 {
2480 case REG:
2481 ptrreg = REGNO (inside);
2482 break;
2483
2484 case SUBREG:
2485 ptrreg = subreg_regno (inside);
2486 break;
2487
2488 case PLUS:
2489 ptrreg = REGNO (XEXP (inside, 0));
2490 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2491 an offsettable address. Unfortunately, offsettable addresses use
2492 QImode to check the offset, and a QImode offsettable address
2493 requires r0 for the other operand, which is not currently
2494 supported, so we can't use the 'o' constraint.
2495 Thus we must check for and handle r0+REG addresses here.
2496 We punt for now, since this is likely very rare. */
2497 gcc_assert (!REG_P (XEXP (inside, 1)));
2498 break;
2499
2500 case LABEL_REF:
2501 return "mov.l %1,%0" "\n"
2502 " mov.l %1+4,%T0";
2503 case POST_INC:
2504 return "mov.l %1,%0" "\n"
2505 " mov.l %1,%T0";
2506 default:
2507 gcc_unreachable ();
2508 }
2509
2510 /* Work out the safe way to copy. Copy into the second half first. */
2511 if (dreg == ptrreg)
2512 return "mov.l %T1,%T0" "\n"
2513 " mov.l %1,%0";
2514 }
2515
2516 return "mov.l %1,%0" "\n"
2517 " mov.l %T1,%T0";
2518 }
2519
2520 /* Print an instruction which would have gone into a delay slot after
2521 another instruction, but couldn't because the other instruction expanded
2522 into a sequence where putting the slot insn at the end wouldn't work. */
2523 static void
2524 print_slot (rtx_sequence *seq)
2525 {
2526 final_scan_insn (seq->insn (1), asm_out_file, optimize, 1, NULL);
2527
2528 seq->insn (1)->set_deleted ();
2529 }
2530
2531 const char *
2532 output_far_jump (rtx_insn *insn, rtx op)
2533 {
2534 struct { rtx lab, reg, op; } this_jmp;
2535 rtx_code_label *braf_base_lab = NULL;
2536 const char *jump;
2537 int far;
2538 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2539 rtx_insn *prev;
2540
2541 this_jmp.lab = gen_label_rtx ();
2542
2543 if (TARGET_SH2
2544 && offset >= -32764
2545 && offset - get_attr_length (insn) <= 32766
2546 && ! CROSSING_JUMP_P (insn))
2547 {
2548 far = 0;
2549 jump = "mov.w %O0,%1" "\n"
2550 " braf %1";
2551 }
2552 else
2553 {
2554 far = 1;
2555 if (flag_pic)
2556 {
2557 if (TARGET_SH2)
2558 jump = "mov.l %O0,%1" "\n"
2559 " braf %1";
2560 else
2561 jump = "mov.l r0,@-r15" "\n"
2562 " mova %O0,r0" "\n"
2563 " mov.l @r0,%1" "\n"
2564 " add r0,%1" "\n"
2565 " mov.l @r15+,r0" "\n"
2566 " jmp @%1";
2567 }
2568 else
2569 jump = "mov.l %O0,%1" "\n"
2570 " jmp @%1";
2571 }
2572 /* If we have a scratch register available, use it. */
2573 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2574 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2575 {
2576 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2577 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2578 jump = "mov.l r1,@-r15" "\n"
2579 " mova %O0,r0" "\n"
2580 " mov.l @r0,r1" "\n"
2581 " add r1,r0" "\n"
2582 " mov.l @r15+,r1" "\n"
2583 " jmp @%1";
2584 output_asm_insn (jump, &this_jmp.lab);
2585 if (dbr_sequence_length ())
2586 print_slot (final_sequence);
2587 else
2588 output_asm_insn ("nop", 0);
2589 }
2590 else
2591 {
2592 /* Output the delay slot insn first if any. */
2593 if (dbr_sequence_length ())
2594 print_slot (final_sequence);
2595
2596 this_jmp.reg = gen_rtx_REG (SImode, 13);
2597 output_asm_insn ("mov.l r13,@-r15", 0);
2598 output_asm_insn (jump, &this_jmp.lab);
2599 output_asm_insn ("mov.l @r15+,r13", 0);
2600 }
2601 if (far && flag_pic && TARGET_SH2)
2602 {
2603 braf_base_lab = gen_label_rtx ();
2604 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2605 CODE_LABEL_NUMBER (braf_base_lab));
2606 }
2607 if (far)
2608 output_asm_insn (".align 2", 0);
2609 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2610 this_jmp.op = op;
2611 if (far && flag_pic)
2612 {
2613 if (TARGET_SH2)
2614 this_jmp.lab = braf_base_lab;
2615 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2616 }
2617 else
2618 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2619 return "";
2620 }
2621
2622 /* Local label counter, used for constants in the pool and inside
2623 pattern branches. */
2624 static int lf = 100;
2625
2626 /* Output code for ordinary branches. */
2627 const char *
2628 output_branch (int logic, rtx_insn *insn, rtx *operands)
2629 {
2630 switch (get_attr_length (insn))
2631 {
2632 case 6:
2633 /* This can happen if filling the delay slot has caused a forward
2634 branch to exceed its range (we could reverse it, but only
2635 when we know we won't overextend other branches; this should
2636 best be handled by relaxation).
2637 It can also happen when other condbranches hoist delay slot insn
2638 from their destination, thus leading to code size increase.
2639 But the branch will still be in the range -4092..+4098 bytes. */
2640 if (! TARGET_RELAX)
2641 {
2642 int label = lf++;
2643 /* The call to print_slot will clobber the operands. */
2644 rtx op0 = operands[0];
2645
2646 /* If the instruction in the delay slot is annulled (true), then
2647 there is no delay slot where we can put it now. The only safe
2648 place for it is after the label. final will do that by default. */
2649
2650 if (final_sequence
2651 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0))
2652 && get_attr_length (final_sequence->insn (1)))
2653 {
2654 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2655 ASSEMBLER_DIALECT ? "/" : ".", label);
2656 print_slot (final_sequence);
2657 }
2658 else
2659 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2660
2661 output_asm_insn ("bra\t%l0", &op0);
2662 fprintf (asm_out_file, "\tnop\n");
2663 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2664
2665 return "";
2666 }
2667 /* When relaxing, handle this like a short branch. The linker
2668 will fix it up if it still doesn't fit after relaxation. */
2669 case 2:
2670 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2671
2672 /* These are for SH2e, in which we have to account for the
2673 extra nop because of the hardware bug in annulled branches. */
2674 case 8:
2675 if (! TARGET_RELAX)
2676 {
2677 int label = lf++;
2678
2679 gcc_assert (!final_sequence
2680 || !(INSN_ANNULLED_BRANCH_P
2681 (XVECEXP (final_sequence, 0, 0))));
2682 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2683 logic ? "f" : "t",
2684 ASSEMBLER_DIALECT ? "/" : ".", label);
2685 fprintf (asm_out_file, "\tnop\n");
2686 output_asm_insn ("bra\t%l0", operands);
2687 fprintf (asm_out_file, "\tnop\n");
2688 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2689
2690 return "";
2691 }
2692 /* When relaxing, fall through. */
2693 case 4:
2694 {
2695 char buffer[10];
2696
2697 sprintf (buffer, "b%s%ss\t%%l0",
2698 logic ? "t" : "f",
2699 ASSEMBLER_DIALECT ? "/" : ".");
2700 output_asm_insn (buffer, &operands[0]);
2701 return "nop";
2702 }
2703
2704 default:
2705 /* There should be no longer branches now - that would
2706 indicate that something has destroyed the branches set
2707 up in machine_dependent_reorg. */
2708 gcc_unreachable ();
2709 }
2710 }
2711
2712 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2713 fill in operands 9 as a label to the successor insn.
2714 We try to use jump threading where possible.
2715 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2716 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2717 follow jmp and bt, if the address is in range. */
2718 const char *
2719 output_branchy_insn (enum rtx_code code, const char *templ,
2720 rtx_insn *insn, rtx *operands)
2721 {
2722 rtx_insn *next_insn = NEXT_INSN (insn);
2723
2724 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2725 {
2726 rtx src = SET_SRC (PATTERN (next_insn));
2727 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2728 {
2729 /* Following branch not taken */
2730 rtx_code_label *lab = gen_label_rtx ();
2731 emit_label_after (lab, next_insn);
2732 INSN_ADDRESSES_NEW (lab,
2733 INSN_ADDRESSES (INSN_UID (next_insn))
2734 + get_attr_length (next_insn));
2735 operands[9] = lab;
2736 return templ;
2737 }
2738 else
2739 {
2740 int offset = (branch_dest (next_insn)
2741 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2742 if (offset >= -252 && offset <= 258)
2743 {
2744 if (GET_CODE (src) == IF_THEN_ELSE)
2745 /* branch_true */
2746 src = XEXP (src, 1);
2747 operands[9] = src;
2748 return templ;
2749 }
2750 }
2751 }
2752 rtx_code_label *lab = gen_label_rtx ();
2753 emit_label_after (lab, insn);
2754 INSN_ADDRESSES_NEW (lab,
2755 INSN_ADDRESSES (INSN_UID (insn))
2756 + get_attr_length (insn));
2757 operands[9] = lab;
2758 return templ;
2759 }
2760
2761 const char *
2762 output_ieee_ccmpeq (rtx_insn *insn, rtx *operands)
2763 {
2764 return output_branchy_insn (NE, "bt %l9" "\n"
2765 " fcmp/eq %1,%0",
2766 insn, operands);
2767 }
2768 \f
2769 /* Output the start of the assembler file. */
2770 static void
2771 sh_file_start (void)
2772 {
2773 default_file_start ();
2774
2775 if (TARGET_ELF)
2776 /* We need to show the text section with the proper
2777 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2778 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2779 will complain. We can teach GAS specifically about the
2780 default attributes for our choice of text section, but
2781 then we would have to change GAS again if/when we change
2782 the text section name. */
2783 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2784 else
2785 /* Switch to the data section so that the coffsem symbol
2786 isn't in the text section. */
2787 switch_to_section (data_section);
2788
2789 if (TARGET_LITTLE_ENDIAN)
2790 fputs ("\t.little\n", asm_out_file);
2791 }
2792 \f
2793 /* Implementation of TARGET_ASM_INTEGER for SH. Pointers to functions
2794 need to be output as pointers to function descriptors for
2795 FDPIC. */
2796
2797 static bool
2798 sh_assemble_integer (rtx value, unsigned int size, int aligned_p)
2799 {
2800 if (TARGET_FDPIC && size == UNITS_PER_WORD
2801 && GET_CODE (value) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (value))
2802 {
2803 fputs ("\t.long\t", asm_out_file);
2804 output_addr_const (asm_out_file, value);
2805 fputs ("@FUNCDESC\n", asm_out_file);
2806 return true;
2807 }
2808 return default_assemble_integer (value, size, aligned_p);
2809 }
2810 \f
2811 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2812 static bool
2813 unspec_caller_rtx_p (rtx pat)
2814 {
2815 rtx base, offset;
2816 split_const (pat, &base, &offset);
2817
2818 if (GET_CODE (base) == UNSPEC)
2819 {
2820 if (XINT (base, 1) == UNSPEC_CALLER)
2821 return true;
2822 for (int i = 0; i < XVECLEN (base, 0); i++)
2823 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2824 return true;
2825 }
2826 return false;
2827 }
2828
2829 /* Indicate that INSN cannot be duplicated. This is true for insn
2830 that generates a unique label. */
2831 static bool
2832 sh_cannot_copy_insn_p (rtx_insn *insn)
2833 {
2834 if (!reload_completed || !flag_pic)
2835 return false;
2836
2837 if (!NONJUMP_INSN_P (insn))
2838 return false;
2839 if (asm_noperands (insn) >= 0)
2840 return false;
2841
2842 rtx pat = PATTERN (insn);
2843
2844 if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == USE)
2845 return false;
2846
2847 if (TARGET_FDPIC && GET_CODE (pat) == PARALLEL)
2848 {
2849 rtx t = XVECEXP (pat, 0, XVECLEN (pat, 0) - 1);
2850 if (GET_CODE (t) == USE && unspec_caller_rtx_p (XEXP (t, 0)))
2851 return true;
2852 }
2853
2854 if (GET_CODE (pat) != SET)
2855 return false;
2856 pat = SET_SRC (pat);
2857
2858 if (unspec_caller_rtx_p (pat))
2859 return true;
2860
2861 return false;
2862 }
2863 \f
2864 /* Number of instructions used to make an arithmetic right shift by N. */
2865 static const char ashiftrt_insns[] =
2866 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2867
2868 /* Description of a logical left or right shift, when expanded to a sequence
2869 of 1/2/8/16 shifts.
2870 Notice that one bit right shifts clobber the T bit. One bit left shifts
2871 are done with an 'add Rn,Rm' insn and thus do not clobber the T bit. */
2872 enum
2873 {
2874 ASHL_CLOBBERS_T = 1 << 0,
2875 LSHR_CLOBBERS_T = 1 << 1
2876 };
2877
2878 struct ashl_lshr_sequence
2879 {
2880 char insn_count;
2881 signed char amount[6];
2882 char clobbers_t;
2883 };
2884
2885 static const struct ashl_lshr_sequence ashl_lshr_seq[32] =
2886 {
2887 { 0, { 0 }, 0 }, // 0
2888 { 1, { 1 }, LSHR_CLOBBERS_T },
2889 { 1, { 2 }, 0 },
2890 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
2891 { 2, { 2, 2 }, 0 }, // 4
2892 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
2893 { 3, { 2, 2, 2 }, 0 },
2894 { 4, { 2, 2, 1, 2 }, LSHR_CLOBBERS_T },
2895 { 1, { 8 }, 0 }, // 8
2896 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
2897 { 2, { 8, 2 }, 0 },
2898 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
2899 { 3, { 8, 2, 2 }, 0 }, // 12
2900 { 4, { 8, 2, 1, 2 }, LSHR_CLOBBERS_T },
2901 { 3, { 8, -2, 8 }, 0 },
2902 { 3, { 8, -1, 8 }, ASHL_CLOBBERS_T },
2903 { 1, { 16 }, 0 }, // 16
2904 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
2905 { 2, { 16, 2 }, 0 },
2906 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
2907 { 3, { 16, 2, 2 }, 0 }, // 20
2908 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
2909 { 3, { 16, -2, 8 }, 0 },
2910 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
2911 { 2, { 16, 8 }, 0 }, // 24
2912 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
2913 { 3, { 16, 8, 2 }, 0 },
2914 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
2915 { 4, { 16, 8, 2, 2 }, 0 }, // 28
2916 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
2917 { 3, { 16, -2, 16 }, 0 },
2918
2919 /* For a right shift by 31 a 2 insn shll-movt sequence can be used.
2920 For a left shift by 31 a 2 insn and-rotl sequences can be used.
2921 However, the shift-and combiner code needs this entry here to be in
2922 terms of real shift insns. */
2923 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
2924 };
2925
2926 /* Individual shift amounts for shift amounts < 16, up to three highmost
2927 bits might be clobbered. This is typically used when combined with some
2928 kind of sign or zero extension. */
2929 static const struct ashl_lshr_sequence ext_ashl_lshr_seq[32] =
2930 {
2931 { 0, { 0 }, 0 }, // 0
2932 { 1, { 1 }, LSHR_CLOBBERS_T },
2933 { 1, { 2 }, 0 },
2934 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
2935 { 2, { 2, 2 }, 0 }, // 4
2936 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
2937 { 2, { 8, -2 }, 0 },
2938 { 2, { 8, -1 }, ASHL_CLOBBERS_T },
2939 { 1, { 8 }, 0 }, // 8
2940 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
2941 { 2, { 8, 2 }, 0 },
2942 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
2943 { 3, { 8, 2, 2 }, 0 }, // 12
2944 { 3, { 16, -2, -1 }, ASHL_CLOBBERS_T },
2945 { 2, { 16, -2 }, 0 },
2946 { 2, { 16, -1 }, ASHL_CLOBBERS_T },
2947 { 1, { 16 }, 0 }, // 16
2948 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
2949 { 2, { 16, 2 }, 0 },
2950 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
2951 { 3, { 16, 2, 2 }, 0 }, // 20
2952 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
2953 { 3, { 16, -2, 8 }, 0 },
2954 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
2955 { 2, { 16, 8 }, 0 }, // 24
2956 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
2957 { 3, { 16, 8, 2 }, 0 },
2958 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
2959 { 4, { 16, 8, 2, 2 }, 0 }, // 28
2960 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
2961 { 3, { 16, -2, 16 }, 0 },
2962 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
2963 };
2964
2965 /* Return true if a shift left consisting of 1/2/8/16 shift instructions
2966 will clobber the T bit. */
2967 bool
2968 sh_ashlsi_clobbers_t_reg_p (rtx shift_amount)
2969 {
2970 gcc_assert (CONST_INT_P (shift_amount));
2971
2972 const int shift_amount_i = INTVAL (shift_amount) & 31;
2973
2974 /* Special case for shift count of 31: use and-rotl sequence. */
2975 if (shift_amount_i == 31)
2976 return true;
2977
2978 return (ashl_lshr_seq[shift_amount_i].clobbers_t
2979 & ASHL_CLOBBERS_T) != 0;
2980 }
2981
2982 /* Return true if a logical right shift consisting of 1/2/8/16 shift
2983 instructions will clobber the T bit. */
2984 bool
2985 sh_lshrsi_clobbers_t_reg_p (rtx shift_amount)
2986 {
2987 gcc_assert (CONST_INT_P (shift_amount));
2988
2989 /* For right shifts the constant might be negative. */
2990 const int shift_amount_i = std::abs (INTVAL (shift_amount)) & 31;
2991
2992 /* Special case for shift count of 31: use shll-movt sequence. */
2993 if (shift_amount_i == 31)
2994 return true;
2995
2996 return (ashl_lshr_seq[shift_amount_i].clobbers_t
2997 & LSHR_CLOBBERS_T) != 0;
2998 }
2999
3000 /* Return true if it is potentially beneficial to use a dynamic shift
3001 instruction (shad / shar) instead of a combination of 1/2/8/16
3002 shift instructions for the specified shift count.
3003 If dynamic shifts are not available, always return false. */
3004 bool
3005 sh_dynamicalize_shift_p (rtx count)
3006 {
3007 gcc_assert (CONST_INT_P (count));
3008
3009 /* For right shifts the constant might be negative. */
3010 const int shift_amount_i = std::abs (INTVAL (count)) & 31;
3011 int insn_count;
3012
3013 /* For left and right shifts, there are shorter 2 insn sequences for
3014 shift amounts of 31. */
3015 if (shift_amount_i == 31)
3016 insn_count = 2;
3017 else
3018 insn_count = ashl_lshr_seq[shift_amount_i].insn_count;
3019
3020 return TARGET_DYNSHIFT && (insn_count > 1 + SH_DYNAMIC_SHIFT_COST);
3021 }
3022
3023 /* Assuming we have a value that has been sign-extended by at least one bit,
3024 can we use the ext_shift_amounts with the last shift turned to an
3025 arithmetic shift to shift it by N without data loss, and quicker than by
3026 other means? */
3027 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
3028
3029 /* Return the cost of a shift. */
3030 static inline int
3031 shiftcosts (rtx x)
3032 {
3033 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
3034 {
3035 if (GET_MODE (x) == DImode
3036 && CONST_INT_P (XEXP (x, 1))
3037 && INTVAL (XEXP (x, 1)) == 1)
3038 return 2;
3039
3040 /* Everything else is invalid, because there is no pattern for it. */
3041 return -1;
3042 }
3043 /* If shift by a non constant, then this will be expensive. */
3044 if (!CONST_INT_P (XEXP (x, 1)))
3045 return SH_DYNAMIC_SHIFT_COST;
3046
3047 /* Otherwise, return the true cost in instructions. Cope with out of range
3048 shift counts more or less arbitrarily. */
3049 int value = INTVAL (XEXP (x, 1)) & 31;
3050
3051 if (GET_CODE (x) == ASHIFTRT)
3052 {
3053 int cost = ashiftrt_insns[value];
3054 /* If dynamic shifts are available and profitable in this case, then we
3055 put the constant in a reg and use shad. */
3056 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
3057 cost = 1 + SH_DYNAMIC_SHIFT_COST;
3058 return cost;
3059 }
3060 else
3061 return ashl_lshr_seq[value].insn_count;
3062 }
3063
3064 /* Return the cost of an AND/XOR/IOR operation. */
3065 static inline int
3066 and_xor_ior_costs (rtx x, int code)
3067 {
3068 /* On SH1-4 we have only max. SImode operations.
3069 Double the cost for modes > SImode. */
3070 const int cost_scale = GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD ? 2 : 1;
3071
3072 /* A logical operation with two registers is a single cycle
3073 instruction. */
3074 if (!CONST_INT_P (XEXP (x, 1)))
3075 return 1 * cost_scale;
3076
3077 int i = INTVAL (XEXP (x, 1));
3078
3079 /* These constants are single cycle extu.[bw] instructions. */
3080 if ((i == 0xff || i == 0xffff) && code == AND)
3081 return 1 * cost_scale;
3082 /* Constants that can be used in an instruction as an immediate are
3083 a single cycle, but this requires r0, so make it a little more
3084 expensive. */
3085 if (CONST_OK_FOR_K08 (i))
3086 return 2 * cost_scale;
3087 /* Constants that can be loaded with a mov immediate need one more cycle.
3088 This case is probably unnecessary. */
3089 if (CONST_OK_FOR_I08 (i))
3090 return 2 * cost_scale;
3091 /* Any other constant requires an additional 2 cycle pc-relative load.
3092 This case is probably unnecessary. */
3093 return 3 * cost_scale;
3094 }
3095
3096 /* Return the cost of an addition or a subtraction. */
3097 static inline int
3098 addsubcosts (rtx x)
3099 {
3100 if (GET_MODE (x) == SImode)
3101 {
3102 /* The addc or subc patterns will eventually become one or two
3103 instructions. Below are some costs for some of the patterns
3104 which combine would reject because the costs of the individual
3105 insns in the patterns are lower.
3106
3107 FIXME: It would be much easier if we had something like insn cost
3108 attributes and the cost calculation machinery used those attributes
3109 in the first place. This would eliminate redundant recog-like C
3110 code to calculate costs of complex patterns. */
3111 rtx op0 = XEXP (x, 0);
3112 rtx op1 = XEXP (x, 1);
3113
3114 if (GET_CODE (x) == PLUS)
3115 {
3116 if (GET_CODE (op0) == AND
3117 && XEXP (op0, 1) == const1_rtx
3118 && (GET_CODE (op1) == PLUS
3119 || (GET_CODE (op1) == MULT && XEXP (op1, 1) == const2_rtx)))
3120 return 1;
3121
3122 if (GET_CODE (op0) == MULT && XEXP (op0, 1) == const2_rtx
3123 && GET_CODE (op1) == LSHIFTRT
3124 && CONST_INT_P (XEXP (op1, 1)) && INTVAL (XEXP (op1, 1)) == 31)
3125 return 1;
3126 }
3127 /* Let's assume that adding the result of an insns that stores into
3128 the T bit is cheap. */
3129 if (treg_set_expr (op1, SImode))
3130 return 1;
3131 if (treg_set_expr (op0, SImode))
3132 return 1;
3133 }
3134
3135 /* On SH1-4 we have only max. SImode operations.
3136 Double the cost for modes > SImode. */
3137 const int cost_scale = GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD ? 2 : 1;
3138
3139 /* Adding a register is a single cycle insn. */
3140 if (REG_P (XEXP (x, 1))
3141 || GET_CODE (XEXP (x, 1)) == SUBREG)
3142 return 1 * cost_scale;
3143
3144 /* Likewise for small constants. */
3145 if (CONST_INT_P (XEXP (x, 1))
3146 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
3147 return 1 * cost_scale;
3148
3149 /* Any other constant requires a 2 cycle pc-relative load plus an
3150 addition. */
3151 return 3 * cost_scale;
3152 }
3153
3154 /* Return the cost of a multiply. */
3155 static inline int
3156 multcosts (rtx x ATTRIBUTE_UNUSED)
3157 {
3158 if (sh_multcost >= 0)
3159 return sh_multcost;
3160
3161 if (TARGET_SH2)
3162 {
3163 /* We have a mul insn, so we can never take more than the mul and the
3164 read of the mac reg, but count more because of the latency and extra
3165 reg usage. */
3166 if (optimize_size)
3167 return 2;
3168 return 3;
3169 }
3170
3171 /* If we're aiming at small code, then just count the number of
3172 insns in a multiply call sequence. */
3173 if (optimize_size)
3174 return 5;
3175
3176 /* Otherwise count all the insns in the routine we'd be calling too. */
3177 return 20;
3178 }
3179
3180 /* Compute a (partial) cost for rtx X. Return true if the complete
3181 cost has been computed, and false if subexpressions should be
3182 scanned. In either case, *TOTAL contains the cost result. */
3183 static bool
3184 sh_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
3185 int opno ATTRIBUTE_UNUSED,
3186 int *total, bool speed ATTRIBUTE_UNUSED)
3187 {
3188 int code = GET_CODE (x);
3189
3190 switch (code)
3191 {
3192 /* The lower-subreg pass decides whether to split multi-word regs
3193 into individual regs by looking at the cost for a SET of certain
3194 modes with the following patterns:
3195 (set (reg) (reg))
3196 (set (reg) (const_int 0))
3197 On machines that support vector-move operations a multi-word move
3198 is the same cost as individual reg move. On SH there is no
3199 vector-move, so we have to provide the correct cost in the number
3200 of move insns to load/store the reg of the mode in question. */
3201 case SET:
3202 if (register_operand (SET_DEST (x), VOIDmode)
3203 && (register_operand (SET_SRC (x), VOIDmode)
3204 || satisfies_constraint_Z (SET_SRC (x))))
3205 {
3206 const machine_mode mode = GET_MODE (SET_DEST (x));
3207 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)
3208 / mov_insn_size (mode, TARGET_SH2A));
3209 return true;
3210 }
3211 return false;
3212
3213 /* The cost of a mem access is mainly the cost of the address mode. */
3214 case MEM:
3215 *total = sh_address_cost (XEXP (x, 0), GET_MODE (x), MEM_ADDR_SPACE (x),
3216 true);
3217 return true;
3218
3219 case IF_THEN_ELSE:
3220 /* This case is required for the if_then_else negc pattern. */
3221 if (treg_set_expr (XEXP (x, 0), SImode))
3222 {
3223 *total = COSTS_N_INSNS (1);
3224 return true;
3225 }
3226 else
3227 return false;
3228
3229 /* Zero extracts of single bits are usually combine patterns for the
3230 tst insns. */
3231 case ZERO_EXTRACT:
3232 if (GET_CODE (XEXP (x, 0)) == XOR
3233 && arith_reg_operand (XEXP (XEXP (x, 0), 0), VOIDmode)
3234 && XEXP (x, 1) == const1_rtx
3235 && CONST_INT_P (XEXP (x, 2))
3236 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3237 /* Check that the xor constaint overlaps with the extracted bit. */
3238 && (INTVAL (XEXP (XEXP (x, 0), 1)) & (1LL << INTVAL (XEXP (x, 2)))))
3239 {
3240 *total = 1; //COSTS_N_INSNS (1);
3241 return true;
3242 }
3243
3244 /* div0s variant. */
3245 if (GET_CODE (XEXP (x, 0)) == XOR
3246 && GET_CODE (XEXP (XEXP (x, 0), 0)) == XOR
3247 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3248 {
3249 *total = 1;
3250 return true;
3251 }
3252 return false;
3253
3254 /* The cost of a sign or zero extend depends on whether the source is a
3255 reg or a mem. In case of a mem take the address into acount. */
3256 case SIGN_EXTEND:
3257 if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
3258 {
3259 *total = COSTS_N_INSNS (1);
3260 return true;
3261 }
3262 if (MEM_P (XEXP (x, 0)))
3263 {
3264 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3265 GET_MODE (XEXP (x, 0)),
3266 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3267 return true;
3268 }
3269 return false;
3270
3271 case ZERO_EXTEND:
3272 if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
3273 {
3274 *total = COSTS_N_INSNS (1);
3275 return true;
3276 }
3277 else if (TARGET_SH2A && MEM_P (XEXP (x, 0))
3278 && (GET_MODE (XEXP (x, 0)) == QImode
3279 || GET_MODE (XEXP (x, 0)) == HImode))
3280 {
3281 /* Handle SH2A's movu.b and movu.w insn. */
3282 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3283 GET_MODE (XEXP (x, 0)),
3284 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3285 return true;
3286 }
3287 return false;
3288
3289 /* mems for SFmode and DFmode can be inside a parallel due to
3290 the way the fpscr is handled. */
3291 case PARALLEL:
3292 for (int i = 0; i < XVECLEN (x, 0); i++)
3293 {
3294 rtx xx = XVECEXP (x, 0, i);
3295 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 0)))
3296 {
3297 *total = sh_address_cost (XEXP (XEXP (xx, 0), 0),
3298 GET_MODE (XEXP (xx, 0)),
3299 MEM_ADDR_SPACE (XEXP (xx, 0)), true);
3300 return true;
3301 }
3302 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 1)))
3303 {
3304 *total = sh_address_cost (XEXP (XEXP (xx, 1), 0),
3305 GET_MODE (XEXP (xx, 1)),
3306 MEM_ADDR_SPACE (XEXP (xx, 1)), true);
3307 return true;
3308 }
3309 }
3310
3311 if (sh_1el_vec (x, VOIDmode))
3312 *total = outer_code != SET;
3313 else if (sh_rep_vec (x, VOIDmode))
3314 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3315 + (outer_code != SET));
3316 else
3317 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3318 return true;
3319
3320 case CONST_INT:
3321 if (CONST_OK_FOR_I08 (INTVAL (x)))
3322 *total = 0;
3323 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
3324 && CONST_OK_FOR_K08 (INTVAL (x)))
3325 *total = 1;
3326 /* prepare_cmp_insn will force costly constants int registers before
3327 the cbranch[sd]i4 patterns can see them, so preserve potentially
3328 interesting ones not covered by I08 above. */
3329 else if (outer_code == COMPARE
3330 && ((unsigned HOST_WIDE_INT) INTVAL (x)
3331 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3332 || INTVAL (x) == 0x7fffffff
3333 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3334 *total = 1;
3335 else
3336 *total = 8;
3337 return true;
3338
3339 case EQ:
3340 /* An and with a constant compared against zero is
3341 most likely going to be a TST #imm, R0 instruction. */
3342 if (XEXP (x, 1) == const0_rtx
3343 && ((GET_CODE (XEXP (x, 0)) == AND
3344 || (SUBREG_P (XEXP (x, 0))
3345 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == AND))
3346 || GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT))
3347 {
3348 *total = 1;
3349 return true;
3350 }
3351
3352 else if (XEXP (x, 1) == const0_rtx
3353 && GET_CODE (XEXP (x, 0)) == AND
3354 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3355 && GET_CODE (XEXP (XEXP (x, 0), 0)) == ASHIFT
3356 && arith_reg_operand (XEXP (XEXP (XEXP (x, 0), 0), 0), SImode)
3357 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1)))
3358 {
3359 *total = 1;
3360 return true;
3361 }
3362 else
3363 return false;
3364
3365 case SMIN:
3366 case SMAX:
3367 /* This is most likely a clips.b or clips.w insn that is being made up
3368 by combine. */
3369 if (TARGET_SH2A
3370 && (GET_CODE (XEXP (x, 0)) == SMAX || GET_CODE (XEXP (x, 0)) == SMIN)
3371 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3372 && REG_P (XEXP (XEXP (x, 0), 0))
3373 && CONST_INT_P (XEXP (x, 1)))
3374 {
3375 *total = COSTS_N_INSNS (1);
3376 return true;
3377 }
3378 else
3379 return false;
3380
3381 case CONST:
3382 case LABEL_REF:
3383 case SYMBOL_REF:
3384 *total = 5;
3385 return true;
3386
3387 case CONST_DOUBLE:
3388 /* prepare_cmp_insn will force costly constants int registers before
3389 the cbranchdi4 pattern can see them, so preserve potentially
3390 interesting ones. */
3391 if (outer_code == COMPARE && GET_MODE (x) == DImode)
3392 *total = 1;
3393 else
3394 *total = 10;
3395 return true;
3396
3397 case CONST_VECTOR:
3398 /* FIXME: This looks broken. Only the last statement has any effect.
3399 Probably this could be folded with the PARALLEL case? */
3400 if (x == CONST0_RTX (GET_MODE (x)))
3401 *total = 0;
3402 else if (sh_1el_vec (x, VOIDmode))
3403 *total = outer_code != SET;
3404 if (sh_rep_vec (x, VOIDmode))
3405 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3406 + (outer_code != SET));
3407 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3408 return true;
3409
3410 case PLUS:
3411 case MINUS:
3412 *total = COSTS_N_INSNS (addsubcosts (x));
3413 return true;
3414
3415 case AND:
3416 /* Check for (and (not (reg)) (const_int 1)) which is a tst insn. */
3417 if (GET_CODE (XEXP (x, 0)) == NOT && XEXP (x, 1) == const1_rtx)
3418 {
3419 *total = COSTS_N_INSNS (1);
3420 return true;
3421 }
3422 /* Fall through. */
3423
3424 case XOR:
3425 case IOR:
3426 *total = COSTS_N_INSNS (and_xor_ior_costs (x, code));
3427 return true;
3428
3429 case MULT:
3430 *total = COSTS_N_INSNS (multcosts (x));
3431 return true;
3432
3433 case LT:
3434 case GE:
3435 /* div0s sign comparison. */
3436 if (GET_CODE (XEXP (x, 0)) == XOR
3437 && REG_P ((XEXP (XEXP (x, 0), 0)))
3438 && REG_P ((XEXP (XEXP (x, 0), 1)))
3439 && satisfies_constraint_Z (XEXP (x, 1)))
3440 {
3441 *total = COSTS_N_INSNS (1);
3442 return true;
3443 }
3444 else
3445 return false;
3446
3447 case LSHIFTRT:
3448 /* div0s sign comparison. */
3449 if (GET_CODE (XEXP (x, 0)) == XOR
3450 && REG_P ((XEXP (XEXP (x, 0), 0)))
3451 && REG_P ((XEXP (XEXP (x, 0), 1)))
3452 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 31)
3453 {
3454 *total = COSTS_N_INSNS (1);
3455 return true;
3456 }
3457 /* Fall through to shiftcosts. */
3458 case ASHIFT:
3459 case ASHIFTRT:
3460 {
3461 int cost = shiftcosts (x);
3462 if (cost < 0)
3463 return false;
3464 *total = COSTS_N_INSNS (cost);
3465 return true;
3466 }
3467
3468 case DIV:
3469 case UDIV:
3470 case MOD:
3471 case UMOD:
3472 *total = COSTS_N_INSNS (20);
3473 return true;
3474
3475 case FLOAT:
3476 case FIX:
3477 *total = 100;
3478 return true;
3479
3480 default:
3481 return false;
3482 }
3483 }
3484
3485 /* Determine the size of the fundamental move insn that will be used
3486 for the specified mode. */
3487 static inline int
3488 mov_insn_size (machine_mode mode, bool consider_sh2a)
3489 {
3490 const int mode_sz = GET_MODE_SIZE (mode);
3491
3492 if ((consider_sh2a && TARGET_SH2A_DOUBLE && mode == DFmode)
3493 || (TARGET_FMOVD && mode == DFmode))
3494 return mode_sz;
3495 else
3496 {
3497 /* The max. available mode for actual move insns is SImode.
3498 Larger accesses will be split into multiple loads/stores. */
3499 const int max_mov_sz = GET_MODE_SIZE (SImode);
3500 return mode_sz >= max_mov_sz ? max_mov_sz : mode_sz;
3501 }
3502 }
3503
3504 /* Determine the maximum possible displacement for a move insn for the
3505 specified mode. */
3506 int
3507 sh_max_mov_insn_displacement (machine_mode mode, bool consider_sh2a)
3508 {
3509 /* The 4 byte displacement move insns are the same as the 2 byte
3510 versions but take a 12 bit displacement. All we need to do is to
3511 scale the max. displacement value accordingly. */
3512 const int disp_scale = consider_sh2a ? (4095 / 15) : 1;
3513
3514 /* SH2A supports FPU move insns with 12 bit displacements.
3515 Other variants to do not support any kind of displacements for
3516 FPU move insns. */
3517 if (! consider_sh2a && TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT)
3518 return 0;
3519 else
3520 {
3521 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3522 const int mode_sz = GET_MODE_SIZE (mode);
3523 int r = 15 * mov_insn_sz * disp_scale;
3524
3525 /* If the mov insn will be split into multiple loads/stores, the
3526 maximum possible displacement is a bit smaller. */
3527 if (mode_sz > mov_insn_sz)
3528 r -= mode_sz - mov_insn_sz;
3529 return r;
3530 }
3531 }
3532
3533 /* Determine the alignment mask for a move insn of the
3534 specified mode. */
3535 static inline int
3536 mov_insn_alignment_mask (machine_mode mode, bool consider_sh2a)
3537 {
3538 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3539 return mov_insn_sz > 0 ? (mov_insn_sz - 1) : 0;
3540 }
3541
3542 /* Return the displacement value of a displacement address. */
3543 HOST_WIDE_INT
3544 sh_disp_addr_displacement (rtx x)
3545 {
3546 gcc_assert (satisfies_constraint_Sdd (x));
3547 return INTVAL (XEXP (XEXP (x, 0), 1));
3548 }
3549
3550 /* Compute the cost of an address. */
3551 static int
3552 sh_address_cost (rtx x, machine_mode mode,
3553 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
3554 {
3555 /* 'GBR + 0'. Account one more because of R0 restriction. */
3556 if (REG_P (x) && REGNO (x) == GBR_REG)
3557 return 2;
3558
3559 /* Simple reg, post-inc, pre-dec addressing. */
3560 if (REG_P (x) || GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
3561 return 1;
3562
3563 /* 'reg + disp' addressing. */
3564 if (GET_CODE (x) == PLUS
3565 && REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
3566 {
3567 /* 'GBR + disp'. Account one more because of R0 restriction. */
3568 if (REGNO (XEXP (x, 0)) == GBR_REG
3569 && gbr_displacement (XEXP (x, 1), mode))
3570 return 2;
3571
3572 const HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
3573
3574 if (offset == 0)
3575 return 1;
3576
3577 /* The displacement would fit into a 2 byte move insn.
3578 HImode and QImode loads/stores with displacement put pressure on
3579 R0 which will most likely require another reg copy. Thus account
3580 a higher cost for that. */
3581 if (offset > 0 && offset <= sh_max_mov_insn_displacement (mode, false))
3582 return (mode == HImode || mode == QImode) ? 2 : 1;
3583
3584 /* The displacement would fit into a 4 byte move insn (SH2A). */
3585 if (TARGET_SH2A
3586 && offset > 0 && offset <= sh_max_mov_insn_displacement (mode, true))
3587 return 2;
3588
3589 /* The displacement is probably out of range and will require extra
3590 calculations. */
3591 return 3;
3592 }
3593
3594 /* 'reg + reg' addressing. Account a slightly higher cost because of
3595 increased pressure on R0. */
3596 if (GET_CODE (x) == PLUS && ! CONSTANT_P (XEXP (x, 1)))
3597 return 3;
3598
3599 /* Not sure what it is - probably expensive. */
3600 return 10;
3601 }
3602
3603 /* Code to expand a shift. */
3604 static void
3605 gen_ashift (int type, int n, rtx reg)
3606 {
3607 rtx n_rtx;
3608
3609 /* Negative values here come from the shift_amounts array. */
3610 if (n < 0)
3611 {
3612 if (type == ASHIFT)
3613 type = LSHIFTRT;
3614 else
3615 type = ASHIFT;
3616 n = -n;
3617 }
3618
3619 n_rtx = GEN_INT (n);
3620 gcc_assert (satisfies_constraint_P27 (n_rtx));
3621
3622 switch (type)
3623 {
3624 case ASHIFTRT:
3625 emit_insn (gen_ashrsi3_k (reg, reg, n_rtx));
3626 break;
3627 case LSHIFTRT:
3628 if (n == 1)
3629 emit_insn (gen_shlr (reg, reg));
3630 else
3631 emit_insn (gen_lshrsi3_k (reg, reg, n_rtx));
3632 break;
3633 case ASHIFT:
3634 emit_insn (gen_ashlsi3_k (reg, reg, n_rtx));
3635 break;
3636 default:
3637 gcc_unreachable ();
3638 }
3639 }
3640
3641 /* Code to expand a HImode shift. */
3642 static void
3643 gen_ashift_hi (int type, int n, rtx reg)
3644 {
3645 /* Negative values here come from the shift_amounts array. */
3646 if (n < 0)
3647 {
3648 if (type == ASHIFT)
3649 type = LSHIFTRT;
3650 else
3651 type = ASHIFT;
3652 n = -n;
3653 }
3654
3655 switch (type)
3656 {
3657 case ASHIFTRT:
3658 case LSHIFTRT:
3659 /* We don't have HImode right shift operations because using the
3660 ordinary 32 bit shift instructions for that doesn't generate proper
3661 zero/sign extension.
3662 gen_ashift_hi is only called in contexts where we know that the
3663 sign extension works out correctly. */
3664 {
3665 int offset = 0;
3666 if (GET_CODE (reg) == SUBREG)
3667 {
3668 offset = SUBREG_BYTE (reg);
3669 reg = SUBREG_REG (reg);
3670 }
3671 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3672 break;
3673 }
3674 case ASHIFT:
3675 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3676 break;
3677 }
3678 }
3679
3680 /* Output RTL to split a constant shift into its component SH constant
3681 shift instructions. */
3682 void
3683 gen_shifty_op (int code, rtx *operands)
3684 {
3685 int value = INTVAL (operands[2]);
3686 int max, i;
3687
3688 /* Truncate the shift count in case it is out of bounds. */
3689 value = value & 31;
3690
3691 if (value == 31)
3692 {
3693 if (code == LSHIFTRT)
3694 {
3695 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3696 emit_insn (gen_movt (operands[0], get_t_reg_rtx ()));
3697 return;
3698 }
3699 else if (code == ASHIFT)
3700 {
3701 /* There is a two instruction sequence for 31 bit left shifts,
3702 but it requires r0. */
3703 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3704 {
3705 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3706 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3707 return;
3708 }
3709 }
3710 }
3711 else if (value == 0)
3712 {
3713 /* This can happen even when optimizing, if there were subregs before
3714 reload. Don't output a nop here, as this is never optimized away;
3715 use a no-op move instead. */
3716 emit_insn (gen_rtx_SET (operands[0], operands[0]));
3717 return;
3718 }
3719
3720 max = ashl_lshr_seq[value].insn_count;
3721 for (i = 0; i < max; i++)
3722 gen_ashift (code, ashl_lshr_seq[value].amount[i], operands[0]);
3723 }
3724
3725 /* Same as gen_shifty_op, but optimized for values where the topmost bits
3726 don't matter. */
3727 void
3728 gen_shifty_hi_op (int code, rtx *operands)
3729 {
3730 int value = INTVAL (operands[2]);
3731 int max, i;
3732 void (*gen_fun) (int, int, rtx);
3733
3734 /* This operation is used by and_shl for SImode values with a few
3735 high bits known to be cleared. */
3736 value &= 31;
3737 if (value == 0)
3738 {
3739 emit_insn (gen_nop ());
3740 return;
3741 }
3742
3743 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3744 if (code == ASHIFT)
3745 {
3746 max = ext_ashl_lshr_seq[value].insn_count;
3747 for (i = 0; i < max; i++)
3748 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3749 }
3750 else
3751 /* When shifting right, emit the shifts in reverse order, so that
3752 solitary negative values come first. */
3753 for (i = ext_ashl_lshr_seq[value].insn_count - 1; i >= 0; i--)
3754 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3755 }
3756
3757 /* Output RTL for an arithmetic right shift.
3758 ??? Rewrite to use super-optimizer sequences. */
3759 bool
3760 expand_ashiftrt (rtx *operands)
3761 {
3762 rtx wrk;
3763 char func[18];
3764 int value;
3765
3766 if (TARGET_DYNSHIFT)
3767 {
3768 if (!CONST_INT_P (operands[2]))
3769 {
3770 rtx count = copy_to_mode_reg (SImode, operands[2]);
3771 emit_insn (gen_negsi2 (count, count));
3772 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3773 return true;
3774 }
3775 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3776 > 1 + SH_DYNAMIC_SHIFT_COST)
3777 {
3778 rtx count
3779 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3780 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3781 return true;
3782 }
3783 }
3784 if (!CONST_INT_P (operands[2]))
3785 return false;
3786
3787 value = INTVAL (operands[2]) & 31;
3788
3789 if (value == 31)
3790 {
3791 /* If we are called from abs expansion, arrange things so that we
3792 we can use a single MT instruction that doesn't clobber the source,
3793 if LICM can hoist out the load of the constant zero. */
3794 if (currently_expanding_to_rtl)
3795 {
3796 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3797 operands[1]));
3798 emit_insn (gen_mov_neg_si_t (operands[0], get_t_reg_rtx ()));
3799 return true;
3800 }
3801 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3802 return true;
3803 }
3804 else if (value >= 16 && value <= 19)
3805 {
3806 wrk = gen_reg_rtx (SImode);
3807 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3808 value -= 16;
3809 while (value--)
3810 gen_ashift (ASHIFTRT, 1, wrk);
3811 emit_move_insn (operands[0], wrk);
3812 return true;
3813 }
3814 /* Expand a short sequence inline, longer call a magic routine. */
3815 else if (value <= 5)
3816 {
3817 wrk = gen_reg_rtx (SImode);
3818 emit_move_insn (wrk, operands[1]);
3819 while (value--)
3820 gen_ashift (ASHIFTRT, 1, wrk);
3821 emit_move_insn (operands[0], wrk);
3822 return true;
3823 }
3824
3825 wrk = gen_reg_rtx (Pmode);
3826
3827 /* Load the value into an arg reg and call a helper. */
3828 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3829 sprintf (func, "__ashiftrt_r4_%d", value);
3830 rtx lab = function_symbol (wrk, func, SFUNC_STATIC).lab;
3831 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk, lab));
3832 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3833 return true;
3834 }
3835
3836 /* Try to find a good way to implement the combiner pattern
3837 [(set (match_operand:SI 0 "register_operand" "r")
3838 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3839 (match_operand:SI 2 "const_int_operand" "n"))
3840 (match_operand:SI 3 "const_int_operand" "n"))) .
3841 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3842 return 0 for simple right / left or left/right shift combination.
3843 return 1 for a combination of shifts with zero_extend.
3844 return 2 for a combination of shifts with an AND that needs r0.
3845 return 3 for a combination of shifts with an AND that needs an extra
3846 scratch register, when the three highmost bits of the AND mask are clear.
3847 return 4 for a combination of shifts with an AND that needs an extra
3848 scratch register, when any of the three highmost bits of the AND mask
3849 is set.
3850 If ATTRP is set, store an initial right shift width in ATTRP[0],
3851 and the instruction length in ATTRP[1] . These values are not valid
3852 when returning 0.
3853 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3854 shift_amounts for the last shift value that is to be used before the
3855 sign extend. */
3856 int
3857 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3858 {
3859 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3860 int left = INTVAL (left_rtx), right;
3861 int best = 0;
3862 int cost, best_cost = 10000;
3863 int best_right = 0, best_len = 0;
3864 int i;
3865 int can_ext;
3866
3867 if (left < 0 || left > 31)
3868 return 0;
3869 if (CONST_INT_P (mask_rtx))
3870 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3871 else
3872 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3873 /* Can this be expressed as a right shift / left shift pair? */
3874 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3875 right = exact_log2 (lsb);
3876 mask2 = ~(mask + lsb - 1);
3877 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3878 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3879 if (! mask2)
3880 best_cost = ashl_lshr_seq[right].insn_count
3881 + ashl_lshr_seq[right + left].insn_count;
3882 /* mask has no trailing zeroes <==> ! right */
3883 else if (! right && mask2 == ~(lsb2 - 1))
3884 {
3885 int late_right = exact_log2 (lsb2);
3886 best_cost = ashl_lshr_seq[left + late_right].insn_count
3887 + ashl_lshr_seq[late_right].insn_count;
3888 }
3889 /* Try to use zero extend. */
3890 if (mask2 == ~(lsb2 - 1))
3891 {
3892 int width, first;
3893
3894 for (width = 8; width <= 16; width += 8)
3895 {
3896 /* Can we zero-extend right away? */
3897 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3898 {
3899 cost = 1 + ext_ashl_lshr_seq[right].insn_count
3900 + ext_ashl_lshr_seq[left + right].insn_count;
3901 if (cost < best_cost)
3902 {
3903 best = 1;
3904 best_cost = cost;
3905 best_right = right;
3906 best_len = cost;
3907 if (attrp)
3908 attrp[2] = -1;
3909 }
3910 continue;
3911 }
3912 /* ??? Could try to put zero extend into initial right shift,
3913 or even shift a bit left before the right shift. */
3914 /* Determine value of first part of left shift, to get to the
3915 zero extend cut-off point. */
3916 first = width - exact_log2 (lsb2) + right;
3917 if (first >= 0 && right + left - first >= 0)
3918 {
3919 cost = ext_ashl_lshr_seq[right].insn_count
3920 + ext_ashl_lshr_seq[first].insn_count + 1
3921 + ext_ashl_lshr_seq[right + left - first].insn_count;
3922
3923 if (cost < best_cost)
3924 {
3925 best = 1;
3926 best_cost = cost;
3927 best_right = right;
3928 best_len = cost;
3929 if (attrp)
3930 attrp[2] = first;
3931 }
3932 }
3933 }
3934 }
3935 /* Try to use r0 AND pattern */
3936 for (i = 0; i <= 2; i++)
3937 {
3938 if (i > right)
3939 break;
3940 if (! CONST_OK_FOR_K08 (mask >> i))
3941 continue;
3942 cost = (i != 0) + 2 + ext_ashl_lshr_seq[left + i].insn_count;
3943 if (cost < best_cost)
3944 {
3945 best = 2;
3946 best_cost = cost;
3947 best_right = i;
3948 best_len = cost - 1;
3949 }
3950 }
3951 /* Try to use a scratch register to hold the AND operand. */
3952 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
3953 for (i = 0; i <= 2; i++)
3954 {
3955 if (i > right)
3956 break;
3957 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
3958 + (can_ext
3959 ? ext_ashl_lshr_seq
3960 : ashl_lshr_seq)[left + i].insn_count;
3961 if (cost < best_cost)
3962 {
3963 best = 4 - can_ext;
3964 best_cost = cost;
3965 best_right = i;
3966 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
3967 }
3968 }
3969
3970 if (attrp)
3971 {
3972 attrp[0] = best_right;
3973 attrp[1] = best_len;
3974 }
3975 return best;
3976 }
3977
3978 /* This is used in length attributes of the unnamed instructions
3979 corresponding to shl_and_kind return values of 1 and 2. */
3980 int
3981 shl_and_length (rtx insn)
3982 {
3983 rtx set_src, left_rtx, mask_rtx;
3984 int attributes[3];
3985
3986 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3987 left_rtx = XEXP (XEXP (set_src, 0), 1);
3988 mask_rtx = XEXP (set_src, 1);
3989 shl_and_kind (left_rtx, mask_rtx, attributes);
3990 return attributes[1];
3991 }
3992
3993 /* This is used in length attribute of the and_shl_scratch instruction. */
3994 int
3995 shl_and_scr_length (rtx insn)
3996 {
3997 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3998 int len = ashl_lshr_seq[INTVAL (XEXP (set_src, 1)) & 31].insn_count;
3999 rtx op = XEXP (set_src, 0);
4000 len += ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count + 1;
4001 op = XEXP (XEXP (op, 0), 0);
4002 return len + ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count;
4003 }
4004
4005 /* Generate rtl for instructions for which shl_and_kind advised a particular
4006 method of generating them, i.e. returned zero. */
4007 bool
4008 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
4009 {
4010 int attributes[3];
4011 unsigned HOST_WIDE_INT mask;
4012 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
4013 int right, total_shift;
4014 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
4015
4016 right = attributes[0];
4017 total_shift = INTVAL (left_rtx) + right;
4018 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
4019 switch (kind)
4020 {
4021 default:
4022 return true;
4023 case 1:
4024 {
4025 int first = attributes[2];
4026 rtx operands[3];
4027
4028 if (first < 0)
4029 {
4030 emit_insn ((mask << right) <= 0xff
4031 ? gen_zero_extendqisi2 (dest,
4032 gen_lowpart (QImode, source))
4033 : gen_zero_extendhisi2 (dest,
4034 gen_lowpart (HImode, source)));
4035 source = dest;
4036 }
4037 if (source != dest)
4038 emit_insn (gen_movsi (dest, source));
4039 operands[0] = dest;
4040 if (right)
4041 {
4042 operands[2] = GEN_INT (right);
4043 gen_shifty_hi_op (LSHIFTRT, operands);
4044 }
4045 if (first > 0)
4046 {
4047 operands[2] = GEN_INT (first);
4048 gen_shifty_hi_op (ASHIFT, operands);
4049 total_shift -= first;
4050 mask <<= first;
4051 }
4052 if (first >= 0)
4053 emit_insn (mask <= 0xff
4054 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
4055 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4056 if (total_shift > 0)
4057 {
4058 operands[2] = GEN_INT (total_shift);
4059 gen_shifty_hi_op (ASHIFT, operands);
4060 }
4061 break;
4062 }
4063 case 4:
4064 shift_gen_fun = gen_shifty_op;
4065 case 3:
4066 /* If the topmost bit that matters is set, set the topmost bits
4067 that don't matter. This way, we might be able to get a shorter
4068 signed constant. */
4069 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
4070 mask |= (HOST_WIDE_INT) ((HOST_WIDE_INT_M1U) << (31 - total_shift));
4071 case 2:
4072 /* Don't expand fine-grained when combining, because that will
4073 make the pattern fail. */
4074 if (currently_expanding_to_rtl
4075 || reload_in_progress || reload_completed)
4076 {
4077 rtx operands[3];
4078
4079 /* Cases 3 and 4 should be handled by this split
4080 only while combining */
4081 gcc_assert (kind <= 2);
4082 if (right)
4083 {
4084 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
4085 source = dest;
4086 }
4087 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
4088 if (total_shift)
4089 {
4090 operands[0] = dest;
4091 operands[1] = dest;
4092 operands[2] = GEN_INT (total_shift);
4093 shift_gen_fun (ASHIFT, operands);
4094 }
4095 break;
4096 }
4097 else
4098 {
4099 int neg = 0;
4100 if (kind != 4 && total_shift < 16)
4101 {
4102 neg = -ext_ashl_lshr_seq[total_shift].amount[1];
4103 if (neg > 0)
4104 neg -= ext_ashl_lshr_seq[total_shift].amount[2];
4105 else
4106 neg = 0;
4107 }
4108 emit_insn (gen_and_shl_scratch (dest, source,
4109 GEN_INT (right),
4110 GEN_INT (mask),
4111 GEN_INT (total_shift + neg),
4112 GEN_INT (neg)));
4113 emit_insn (gen_movsi (dest, dest));
4114 break;
4115 }
4116 }
4117 return false;
4118 }
4119
4120 /* Try to find a good way to implement the combiner pattern
4121 [(set (match_operand:SI 0 "register_operand" "=r")
4122 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
4123 (match_operand:SI 2 "const_int_operand" "n")
4124 (match_operand:SI 3 "const_int_operand" "n")
4125 (const_int 0)))
4126 (clobber (reg:SI T_REG))]
4127 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
4128 return 0 for simple left / right shift combination.
4129 return 1 for left shift / 8 bit sign extend / left shift.
4130 return 2 for left shift / 16 bit sign extend / left shift.
4131 return 3 for left shift / 8 bit sign extend / shift / sign extend.
4132 return 4 for left shift / 16 bit sign extend / shift / sign extend.
4133 return 5 for left shift / 16 bit sign extend / right shift
4134 return 6 for < 8 bit sign extend / left shift.
4135 return 7 for < 8 bit sign extend / left shift / single right shift.
4136 If COSTP is nonzero, assign the calculated cost to *COSTP. */
4137 int
4138 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
4139 {
4140 int left, size, insize, ext;
4141 int cost = 0, best_cost;
4142 int kind;
4143
4144 left = INTVAL (left_rtx);
4145 size = INTVAL (size_rtx);
4146 insize = size - left;
4147 gcc_assert (insize > 0);
4148 /* Default to left / right shift. */
4149 kind = 0;
4150 best_cost = ashl_lshr_seq[32 - insize].insn_count
4151 + ashl_lshr_seq[32 - size].insn_count;
4152 if (size <= 16)
4153 {
4154 /* 16 bit shift / sign extend / 16 bit shift */
4155 cost = ashl_lshr_seq[16 - insize].insn_count + 1
4156 + ashl_lshr_seq[16 - size].insn_count;
4157 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
4158 below, by alternative 3 or something even better. */
4159 if (cost < best_cost)
4160 {
4161 kind = 5;
4162 best_cost = cost;
4163 }
4164 }
4165 /* Try a plain sign extend between two shifts. */
4166 for (ext = 16; ext >= insize; ext -= 8)
4167 {
4168 if (ext <= size)
4169 {
4170 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4171 + ashl_lshr_seq[size - ext].insn_count;
4172 if (cost < best_cost)
4173 {
4174 kind = ext / (unsigned) 8;
4175 best_cost = cost;
4176 }
4177 }
4178 /* Check if we can do a sloppy shift with a final signed shift
4179 restoring the sign. */
4180 if (EXT_SHIFT_SIGNED (size - ext))
4181 cost = ext_ashl_lshr_seq[ext - insize].insn_count
4182 + ext_ashl_lshr_seq[size - ext].insn_count + 1;
4183 /* If not, maybe it's still cheaper to do the second shift sloppy,
4184 and do a final sign extend? */
4185 else if (size <= 16)
4186 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4187 + ext_ashl_lshr_seq[size > ext ? size - ext : ext - size].insn_count
4188 + 1;
4189 else
4190 continue;
4191 if (cost < best_cost)
4192 {
4193 kind = ext / (unsigned) 8 + 2;
4194 best_cost = cost;
4195 }
4196 }
4197 /* Check if we can sign extend in r0 */
4198 if (insize < 8)
4199 {
4200 cost = 3 + ashl_lshr_seq[left].insn_count;
4201 if (cost < best_cost)
4202 {
4203 kind = 6;
4204 best_cost = cost;
4205 }
4206 /* Try the same with a final signed shift. */
4207 if (left < 31)
4208 {
4209 cost = 3 + ext_ashl_lshr_seq[left + 1].insn_count + 1;
4210 if (cost < best_cost)
4211 {
4212 kind = 7;
4213 best_cost = cost;
4214 }
4215 }
4216 }
4217 if (TARGET_DYNSHIFT)
4218 {
4219 /* Try to use a dynamic shift. */
4220 cost = ashl_lshr_seq[32 - insize].insn_count + 1 + SH_DYNAMIC_SHIFT_COST;
4221 if (cost < best_cost)
4222 {
4223 kind = 0;
4224 best_cost = cost;
4225 }
4226 }
4227 if (costp)
4228 *costp = cost;
4229 return kind;
4230 }
4231
4232 /* Function to be used in the length attribute of the instructions
4233 implementing this pattern. */
4234 int
4235 shl_sext_length (rtx insn)
4236 {
4237 rtx set_src, left_rtx, size_rtx;
4238 int cost;
4239
4240 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4241 left_rtx = XEXP (XEXP (set_src, 0), 1);
4242 size_rtx = XEXP (set_src, 1);
4243 shl_sext_kind (left_rtx, size_rtx, &cost);
4244 return cost;
4245 }
4246
4247 /* Generate rtl for this pattern */
4248 bool
4249 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
4250 {
4251 int kind;
4252 int left, size, insize, cost;
4253 rtx operands[3];
4254
4255 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
4256 left = INTVAL (left_rtx);
4257 size = INTVAL (size_rtx);
4258 insize = size - left;
4259 switch (kind)
4260 {
4261 case 1:
4262 case 2:
4263 case 3:
4264 case 4:
4265 {
4266 int ext = kind & 1 ? 8 : 16;
4267 int shift2 = size - ext;
4268
4269 /* Don't expand fine-grained when combining, because that will
4270 make the pattern fail. */
4271 if (! currently_expanding_to_rtl
4272 && ! reload_in_progress && ! reload_completed)
4273 {
4274 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4275 emit_insn (gen_movsi (dest, source));
4276 break;
4277 }
4278 if (dest != source)
4279 emit_insn (gen_movsi (dest, source));
4280 operands[0] = dest;
4281 if (ext - insize)
4282 {
4283 operands[2] = GEN_INT (ext - insize);
4284 gen_shifty_hi_op (ASHIFT, operands);
4285 }
4286 emit_insn (kind & 1
4287 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4288 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4289 if (kind <= 2)
4290 {
4291 if (shift2)
4292 {
4293 operands[2] = GEN_INT (shift2);
4294 gen_shifty_op (ASHIFT, operands);
4295 }
4296 }
4297 else
4298 {
4299 if (shift2 > 0)
4300 {
4301 if (EXT_SHIFT_SIGNED (shift2))
4302 {
4303 operands[2] = GEN_INT (shift2 + 1);
4304 gen_shifty_op (ASHIFT, operands);
4305 operands[2] = const1_rtx;
4306 gen_shifty_op (ASHIFTRT, operands);
4307 break;
4308 }
4309 operands[2] = GEN_INT (shift2);
4310 gen_shifty_hi_op (ASHIFT, operands);
4311 }
4312 else if (shift2)
4313 {
4314 operands[2] = GEN_INT (-shift2);
4315 gen_shifty_hi_op (LSHIFTRT, operands);
4316 }
4317 emit_insn (size <= 8
4318 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4319 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4320 }
4321 break;
4322 }
4323 case 5:
4324 {
4325 int i = 16 - size;
4326 if (! currently_expanding_to_rtl
4327 && ! reload_in_progress && ! reload_completed)
4328 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4329 else
4330 {
4331 operands[0] = dest;
4332 operands[2] = GEN_INT (16 - insize);
4333 gen_shifty_hi_op (ASHIFT, operands);
4334 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4335 }
4336 /* Don't use gen_ashrsi3 because it generates new pseudos. */
4337 while (--i >= 0)
4338 gen_ashift (ASHIFTRT, 1, dest);
4339 break;
4340 }
4341 case 6:
4342 case 7:
4343 /* Don't expand fine-grained when combining, because that will
4344 make the pattern fail. */
4345 if (! currently_expanding_to_rtl
4346 && ! reload_in_progress && ! reload_completed)
4347 {
4348 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4349 emit_insn (gen_movsi (dest, source));
4350 break;
4351 }
4352 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
4353 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
4354 emit_insn (gen_addsi3 (dest, dest, GEN_INT (HOST_WIDE_INT_M1U << (insize - 1))));
4355 operands[0] = dest;
4356 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
4357 gen_shifty_op (ASHIFT, operands);
4358 if (kind == 7)
4359 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
4360 break;
4361 default:
4362 return true;
4363 }
4364 return false;
4365 }
4366
4367 typedef struct label_ref_list_d
4368 {
4369 rtx_code_label *label;
4370 struct label_ref_list_d *next;
4371 } *label_ref_list_t;
4372
4373 static object_allocator<label_ref_list_d> label_ref_list_d_pool
4374 ("label references list");
4375
4376 /* The SH cannot load a large constant into a register, constants have to
4377 come from a pc relative load. The reference of a pc relative load
4378 instruction must be less than 1k in front of the instruction. This
4379 means that we often have to dump a constant inside a function, and
4380 generate code to branch around it.
4381
4382 It is important to minimize this, since the branches will slow things
4383 down and make things bigger.
4384
4385 Worst case code looks like:
4386
4387 mov.l L1,rn
4388 bra L2
4389 nop
4390 align
4391 L1: .long value
4392 L2:
4393 ..
4394
4395 mov.l L3,rn
4396 bra L4
4397 nop
4398 align
4399 L3: .long value
4400 L4:
4401 ..
4402
4403 We fix this by performing a scan before scheduling, which notices which
4404 instructions need to have their operands fetched from the constant table
4405 and builds the table.
4406
4407 The algorithm is:
4408
4409 scan, find an instruction which needs a pcrel move. Look forward, find the
4410 last barrier which is within MAX_COUNT bytes of the requirement.
4411 If there isn't one, make one. Process all the instructions between
4412 the find and the barrier.
4413
4414 In the above example, we can tell that L3 is within 1k of L1, so
4415 the first move can be shrunk from the 3 insn+constant sequence into
4416 just 1 insn, and the constant moved to L3 to make:
4417
4418 mov.l L1,rn
4419 ..
4420 mov.l L3,rn
4421 bra L4
4422 nop
4423 align
4424 L3:.long value
4425 L4:.long value
4426
4427 Then the second move becomes the target for the shortening process. */
4428
4429 typedef struct
4430 {
4431 rtx value; /* Value in table. */
4432 rtx_code_label *label; /* Label of value. */
4433 label_ref_list_t wend; /* End of window. */
4434 machine_mode mode; /* Mode of value. */
4435
4436 /* True if this constant is accessed as part of a post-increment
4437 sequence. Note that HImode constants are never accessed in this way. */
4438 bool part_of_sequence_p;
4439 } pool_node;
4440
4441 /* The maximum number of constants that can fit into one pool, since
4442 constants in the range 0..510 are at least 2 bytes long, and in the
4443 range from there to 1018 at least 4 bytes. */
4444
4445 #define MAX_POOL_SIZE 372
4446 static pool_node pool_vector[MAX_POOL_SIZE];
4447 static int pool_size;
4448 static rtx_code_label *pool_window_label;
4449 static int pool_window_last;
4450
4451 static int max_labelno_before_reorg;
4452
4453 /* ??? If we need a constant in HImode which is the truncated value of a
4454 constant we need in SImode, we could combine the two entries thus saving
4455 two bytes. Is this common enough to be worth the effort of implementing
4456 it? */
4457
4458 /* ??? This stuff should be done at the same time that we shorten branches.
4459 As it is now, we must assume that all branches are the maximum size, and
4460 this causes us to almost always output constant pools sooner than
4461 necessary. */
4462
4463 /* Add a constant to the pool and return its label. */
4464 static rtx_code_label *
4465 add_constant (rtx x, machine_mode mode, rtx last_value)
4466 {
4467 rtx_code_label *lab, *new_rtx;
4468 label_ref_list_t ref, newref;
4469
4470 /* First see if we've already got it. */
4471 for (int i = 0; i < pool_size; i++)
4472 {
4473 if (x->code == pool_vector[i].value->code
4474 && mode == pool_vector[i].mode)
4475 {
4476 if (x->code == CODE_LABEL)
4477 {
4478 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4479 continue;
4480 }
4481 if (rtx_equal_p (x, pool_vector[i].value))
4482 {
4483 lab = new_rtx = 0;
4484 if (! last_value
4485 || ! i
4486 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4487 {
4488 new_rtx = gen_label_rtx ();
4489 LABEL_REFS (new_rtx) = pool_vector[i].label;
4490 pool_vector[i].label = lab = new_rtx;
4491 }
4492 if (lab && pool_window_label)
4493 {
4494 newref = label_ref_list_d_pool.allocate ();
4495 newref->label = pool_window_label;
4496 ref = pool_vector[pool_window_last].wend;
4497 newref->next = ref;
4498 pool_vector[pool_window_last].wend = newref;
4499 }
4500 if (new_rtx)
4501 pool_window_label = new_rtx;
4502 pool_window_last = i;
4503 return lab;
4504 }
4505 }
4506 }
4507
4508 /* Need a new one. */
4509 pool_vector[pool_size].value = x;
4510 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4511 {
4512 lab = 0;
4513 pool_vector[pool_size - 1].part_of_sequence_p = true;
4514 }
4515 else
4516 lab = gen_label_rtx ();
4517 pool_vector[pool_size].mode = mode;
4518 pool_vector[pool_size].label = lab;
4519 pool_vector[pool_size].wend = NULL;
4520 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4521 if (lab && pool_window_label)
4522 {
4523 newref = label_ref_list_d_pool.allocate ();
4524 newref->label = pool_window_label;
4525 ref = pool_vector[pool_window_last].wend;
4526 newref->next = ref;
4527 pool_vector[pool_window_last].wend = newref;
4528 }
4529 if (lab)
4530 pool_window_label = lab;
4531 pool_window_last = pool_size;
4532 pool_size++;
4533 return lab;
4534 }
4535
4536 /* Output the literal table. START, if nonzero, is the first instruction
4537 this table is needed for, and also indicates that there is at least one
4538 casesi_worker_2 instruction; We have to emit the operand3 labels from
4539 these insns at a 4-byte aligned position. BARRIER is the barrier
4540 after which we are to place the table. */
4541 static void
4542 dump_table (rtx_insn *start, rtx_insn *barrier)
4543 {
4544 rtx_insn *scan = barrier;
4545 bool need_align = true;
4546 rtx lab;
4547 label_ref_list_t ref;
4548 bool have_df = false;
4549
4550 /* Do two passes, first time dump out the HI sized constants. */
4551
4552 for (int i = 0; i < pool_size; i++)
4553 {
4554 pool_node *p = &pool_vector[i];
4555
4556 if (p->mode == HImode)
4557 {
4558 if (need_align)
4559 {
4560 scan = emit_insn_after (gen_align_2 (), scan);
4561 need_align = false;
4562 }
4563 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4564 scan = emit_label_after (lab, scan);
4565 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4566 scan);
4567 for (ref = p->wend; ref; ref = ref->next)
4568 {
4569 lab = ref->label;
4570 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4571 }
4572 }
4573 else if (p->mode == DFmode)
4574 have_df = true;
4575 }
4576
4577 need_align = true;
4578
4579 if (start)
4580 {
4581 scan = emit_insn_after (gen_align_4 (), scan);
4582 need_align = false;
4583 for (; start != barrier; start = NEXT_INSN (start))
4584 if (NONJUMP_INSN_P (start)
4585 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4586 {
4587 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4588 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4589
4590 scan = emit_label_after (lab, scan);
4591 }
4592 }
4593 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4594 {
4595 rtx_insn *align_insn = NULL;
4596
4597 scan = emit_label_after (gen_label_rtx (), scan);
4598 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4599 need_align = false;
4600
4601 for (int i = 0; i < pool_size; i++)
4602 {
4603 pool_node *p = &pool_vector[i];
4604
4605 switch (p->mode)
4606 {
4607 case HImode:
4608 break;
4609 case SImode:
4610 case SFmode:
4611 if (align_insn && !p->part_of_sequence_p)
4612 {
4613 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4614 emit_label_before (lab, align_insn);
4615 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4616 align_insn);
4617 for (ref = p->wend; ref; ref = ref->next)
4618 {
4619 lab = ref->label;
4620 emit_insn_before (gen_consttable_window_end (lab),
4621 align_insn);
4622 }
4623 delete_insn (align_insn);
4624 align_insn = NULL;
4625 continue;
4626 }
4627 else
4628 {
4629 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4630 scan = emit_label_after (lab, scan);
4631 scan = emit_insn_after (gen_consttable_4 (p->value,
4632 const0_rtx), scan);
4633 need_align = ! need_align;
4634 }
4635 break;
4636 case DFmode:
4637 if (need_align)
4638 {
4639 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4640 align_insn = scan;
4641 need_align = false;
4642 }
4643 case DImode:
4644 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4645 scan = emit_label_after (lab, scan);
4646 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4647 scan);
4648 break;
4649 default:
4650 gcc_unreachable ();
4651 }
4652
4653 if (p->mode != HImode)
4654 {
4655 for (ref = p->wend; ref; ref = ref->next)
4656 {
4657 lab = ref->label;
4658 scan = emit_insn_after (gen_consttable_window_end (lab),
4659 scan);
4660 }
4661 }
4662 }
4663
4664 pool_size = 0;
4665 }
4666
4667 for (int i = 0; i < pool_size; i++)
4668 {
4669 pool_node *p = &pool_vector[i];
4670
4671 switch (p->mode)
4672 {
4673 case HImode:
4674 break;
4675 case SImode:
4676 case SFmode:
4677 if (need_align)
4678 {
4679 need_align = false;
4680 scan = emit_label_after (gen_label_rtx (), scan);
4681 scan = emit_insn_after (gen_align_4 (), scan);
4682 }
4683 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4684 scan = emit_label_after (lab, scan);
4685 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4686 scan);
4687 break;
4688 case DFmode:
4689 case DImode:
4690 if (need_align)
4691 {
4692 need_align = false;
4693 scan = emit_label_after (gen_label_rtx (), scan);
4694 scan = emit_insn_after (gen_align_4 (), scan);
4695 }
4696 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4697 scan = emit_label_after (lab, scan);
4698 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4699 scan);
4700 break;
4701 default:
4702 gcc_unreachable ();
4703 }
4704
4705 if (p->mode != HImode)
4706 {
4707 for (ref = p->wend; ref; ref = ref->next)
4708 {
4709 lab = ref->label;
4710 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4711 }
4712 }
4713 }
4714
4715 scan = emit_insn_after (gen_consttable_end (), scan);
4716 scan = emit_barrier_after (scan);
4717 pool_size = 0;
4718 pool_window_label = NULL;
4719 pool_window_last = 0;
4720 }
4721
4722 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4723
4724 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4725
4726 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4727 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4728 need to fix it if the input value is CONST_OK_FOR_I08. */
4729 static bool
4730 broken_move (rtx_insn *insn)
4731 {
4732 if (NONJUMP_INSN_P (insn))
4733 {
4734 rtx pat = PATTERN (insn);
4735 if (GET_CODE (pat) == PARALLEL)
4736 pat = XVECEXP (pat, 0, 0);
4737 if (GET_CODE (pat) == SET
4738 /* We can load any 8-bit value if we don't care what the high
4739 order bits end up as. */
4740 && GET_MODE (SET_DEST (pat)) != QImode
4741 && (CONSTANT_P (SET_SRC (pat))
4742 || (GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
4743 && XINT (SET_SRC (pat), 1) == UNSPECV_SP_SWITCH_B)
4744 /* Match mova_const. */
4745 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4746 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4747 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4748 && ! (TARGET_SH2E
4749 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4750 && (fp_zero_operand (SET_SRC (pat))
4751 || fp_one_operand (SET_SRC (pat)))
4752 /* In general we don't know the current setting of fpscr, so
4753 disable fldi.
4754 There is an exception if this was a register-register move
4755 before reload - and hence it was ascertained that we have
4756 single precision setting - and in a post-reload optimization
4757 we changed this to do a constant load. In that case
4758 we don't have an r0 clobber, hence we must use fldi. */
4759 && (TARGET_FMOVD
4760 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4761 == SCRATCH))
4762 && REG_P (SET_DEST (pat))
4763 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4764 && ! (TARGET_SH2A
4765 && GET_MODE (SET_DEST (pat)) == SImode
4766 && (satisfies_constraint_I20 (SET_SRC (pat))
4767 || satisfies_constraint_I28 (SET_SRC (pat))))
4768 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4769 return true;
4770 }
4771
4772 return false;
4773 }
4774
4775 /* Return true if the specified insn is a mova insn. */
4776 static bool
4777 mova_p (rtx_insn *insn)
4778 {
4779 return (NONJUMP_INSN_P (insn)
4780 && GET_CODE (PATTERN (insn)) == SET
4781 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4782 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4783 /* Don't match mova_const. */
4784 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4785 }
4786
4787 /* Fix up a mova from a switch that went out of range. */
4788 static void
4789 fixup_mova (rtx_insn *mova)
4790 {
4791 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4792 if (! flag_pic)
4793 {
4794 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4795 INSN_CODE (mova) = -1;
4796 }
4797 else
4798 {
4799 rtx_insn *worker = mova;
4800 rtx_code_label *lab = gen_label_rtx ();
4801 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4802
4803 do
4804 {
4805 worker = NEXT_INSN (worker);
4806 gcc_assert (worker
4807 && !LABEL_P (worker)
4808 && !JUMP_P (worker));
4809 } while (NOTE_P (worker)
4810 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4811 wpat = PATTERN (worker);
4812 wpat0 = XVECEXP (wpat, 0, 0);
4813 wpat1 = XVECEXP (wpat, 0, 1);
4814 wsrc = SET_SRC (wpat0);
4815 PATTERN (worker) = (gen_casesi_worker_2
4816 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4817 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4818 XEXP (wpat1, 0)));
4819 INSN_CODE (worker) = -1;
4820 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4821 base = gen_rtx_LABEL_REF (Pmode, lab);
4822 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4823 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4824 INSN_CODE (mova) = -1;
4825 }
4826 }
4827
4828 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4829 *num_mova, and check if the new mova is not nested within the first one.
4830 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4831 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4832 static int
4833 untangle_mova (int *num_mova, rtx_insn **first_mova, rtx_insn *new_mova)
4834 {
4835 int n_addr = 0; /* Initialization to shut up spurious warning. */
4836 int f_target, n_target = 0; /* Likewise. */
4837
4838 if (optimize)
4839 {
4840 /* If NEW_MOVA has no address yet, it will be handled later. */
4841 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4842 return -1;
4843
4844 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4845 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4846 if (n_addr > n_target || n_addr + 1022 < n_target)
4847 {
4848 /* Change the mova into a load.
4849 broken_move will then return true for it. */
4850 fixup_mova (new_mova);
4851 return 1;
4852 }
4853 }
4854 if (!(*num_mova)++)
4855 {
4856 *first_mova = new_mova;
4857 return 2;
4858 }
4859 if (!optimize
4860 || ((f_target
4861 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4862 >= n_target))
4863 return -1;
4864
4865 (*num_mova)--;
4866 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4867 > n_target - n_addr)
4868 {
4869 fixup_mova (*first_mova);
4870 return 0;
4871 }
4872 else
4873 {
4874 fixup_mova (new_mova);
4875 return 1;
4876 }
4877 }
4878
4879 /* Find the last barrier from insn FROM which is close enough to hold the
4880 constant pool. If we can't find one, then create one near the end of
4881 the range. */
4882 static rtx_insn *
4883 find_barrier (int num_mova, rtx_insn *mova, rtx_insn *from)
4884 {
4885 int count_si = 0;
4886 int count_hi = 0;
4887 int found_hi = 0;
4888 int found_si = 0;
4889 int hi_align = 2;
4890 int si_align = 2;
4891 int leading_mova = num_mova;
4892 rtx_insn *barrier_before_mova = NULL;
4893 rtx_insn *found_barrier = NULL;
4894 rtx_insn *good_barrier = NULL;
4895 int si_limit;
4896 int hi_limit;
4897 rtx_insn *orig = from;
4898 rtx_insn *last_got = NULL;
4899 rtx_insn *last_symoff = NULL;
4900
4901 /* For HImode: range is 510, add 4 because pc counts from address of
4902 second instruction after this one, subtract 2 for the jump instruction
4903 that we may need to emit before the table, subtract 2 for the instruction
4904 that fills the jump delay slot (in very rare cases, reorg will take an
4905 instruction from after the constant pool or will leave the delay slot
4906 empty). This gives 510.
4907 For SImode: range is 1020, add 4 because pc counts from address of
4908 second instruction after this one, subtract 2 in case pc is 2 byte
4909 aligned, subtract 2 for the jump instruction that we may need to emit
4910 before the table, subtract 2 for the instruction that fills the jump
4911 delay slot. This gives 1018. */
4912
4913 /* The branch will always be shortened now that the reference address for
4914 forward branches is the successor address, thus we need no longer make
4915 adjustments to the [sh]i_limit for -O0. */
4916
4917 si_limit = 1018;
4918 hi_limit = 510;
4919
4920 while (from && count_si < si_limit && count_hi < hi_limit)
4921 {
4922 int inc = get_attr_length (from);
4923 int new_align = 1;
4924
4925 /* If this is a label that existed at the time of the compute_alignments
4926 call, determine the alignment. N.B. When find_barrier recurses for
4927 an out-of-reach mova, we might see labels at the start of previously
4928 inserted constant tables. */
4929 if (LABEL_P (from)
4930 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
4931 {
4932 if (optimize)
4933 new_align = 1 << label_to_alignment (from);
4934 else if (BARRIER_P (prev_nonnote_insn (from)))
4935 new_align = 1 << barrier_align (from);
4936 else
4937 new_align = 1;
4938 inc = 0;
4939 }
4940 /* In case we are scanning a constant table because of recursion, check
4941 for explicit alignments. If the table is long, we might be forced
4942 to emit the new table in front of it; the length of the alignment
4943 might be the last straw. */
4944 else if (NONJUMP_INSN_P (from)
4945 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4946 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
4947 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
4948 /* When we find the end of a constant table, paste the new constant
4949 at the end. That is better than putting it in front because
4950 this way, we don't need extra alignment for adding a 4-byte-aligned
4951 mov(a) label to a 2/4 or 8/4 byte aligned table. */
4952 else if (NONJUMP_INSN_P (from)
4953 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4954 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
4955 return from;
4956
4957 if (BARRIER_P (from))
4958 {
4959 rtx_insn *next;
4960
4961 found_barrier = from;
4962
4963 /* If we are at the end of the function, or in front of an alignment
4964 instruction, we need not insert an extra alignment. We prefer
4965 this kind of barrier. */
4966 if (barrier_align (from) > 2)
4967 good_barrier = from;
4968
4969 /* If we are at the end of a hot/cold block, dump the constants
4970 here. */
4971 next = NEXT_INSN (from);
4972 if (next
4973 && NOTE_P (next)
4974 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
4975 break;
4976 }
4977
4978 if (broken_move (from))
4979 {
4980 rtx pat, src, dst;
4981 machine_mode mode;
4982
4983 pat = PATTERN (from);
4984 if (GET_CODE (pat) == PARALLEL)
4985 pat = XVECEXP (pat, 0, 0);
4986 src = SET_SRC (pat);
4987 dst = SET_DEST (pat);
4988 mode = GET_MODE (dst);
4989
4990 /* GOT pcrelat setting comes in pair of
4991 mova .L8,r0
4992 mov.l .L8,r12
4993 instructions. (plus add r0,r12).
4994 Remember if we see one without the other. */
4995 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
4996 last_got = last_got ? NULL : from;
4997 else if (PIC_ADDR_P (src))
4998 last_got = last_got ? NULL : from;
4999
5000 /* We must explicitly check the mode, because sometimes the
5001 front end will generate code to load unsigned constants into
5002 HImode targets without properly sign extending them. */
5003 if (mode == HImode
5004 || (mode == SImode && satisfies_constraint_I16 (src)
5005 && REGNO (dst) != FPUL_REG))
5006 {
5007 found_hi += 2;
5008 /* We put the short constants before the long constants, so
5009 we must count the length of short constants in the range
5010 for the long constants. */
5011 /* ??? This isn't optimal, but is easy to do. */
5012 si_limit -= 2;
5013 }
5014 else
5015 {
5016 /* We dump DF/DI constants before SF/SI ones, because
5017 the limit is the same, but the alignment requirements
5018 are higher. We may waste up to 4 additional bytes
5019 for alignment, and the DF/DI constant may have
5020 another SF/SI constant placed before it. */
5021 while (si_align > 2 && found_si + si_align - 2 > count_si)
5022 si_align >>= 1;
5023 if (found_si > count_si)
5024 count_si = found_si;
5025 found_si += GET_MODE_SIZE (mode);
5026 if (num_mova)
5027 si_limit -= GET_MODE_SIZE (mode);
5028 }
5029 }
5030
5031 if (mova_p (from))
5032 {
5033 switch (untangle_mova (&num_mova, &mova, from))
5034 {
5035 case 1:
5036 if (flag_pic)
5037 {
5038 rtx src = SET_SRC (PATTERN (from));
5039 if (GET_CODE (src) == CONST
5040 && GET_CODE (XEXP (src, 0)) == UNSPEC
5041 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
5042 last_symoff = from;
5043 }
5044 break;
5045 case 0: return find_barrier (0, 0, mova);
5046 case 2:
5047 {
5048 leading_mova = 0;
5049 barrier_before_mova
5050 = good_barrier ? good_barrier : found_barrier;
5051 }
5052 default: break;
5053 }
5054 if (found_si > count_si)
5055 count_si = found_si;
5056 }
5057 else if (JUMP_TABLE_DATA_P (from)
5058 && GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC)
5059 {
5060 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
5061 || (num_mova
5062 && (prev_nonnote_insn (from)
5063 == XEXP (MOVA_LABELREF (mova), 0))))
5064 num_mova--;
5065 if (barrier_align (next_real_insn (from)) == align_jumps_log)
5066 {
5067 /* We have just passed the barrier in front of the
5068 ADDR_DIFF_VEC, which is stored in found_barrier. Since
5069 the ADDR_DIFF_VEC is accessed as data, just like our pool
5070 constants, this is a good opportunity to accommodate what
5071 we have gathered so far.
5072 If we waited any longer, we could end up at a barrier in
5073 front of code, which gives worse cache usage for separated
5074 instruction / data caches. */
5075 good_barrier = found_barrier;
5076 break;
5077 }
5078 else
5079 {
5080 rtx body = PATTERN (from);
5081 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
5082 }
5083 }
5084 /* For the SH1, we generate alignments even after jumps-around-jumps. */
5085 else if (JUMP_P (from)
5086 && ! TARGET_SH2
5087 && ! optimize_size)
5088 new_align = 4;
5089
5090 /* There is a possibility that a bf is transformed into a bf/s by the
5091 delay slot scheduler. */
5092 if (JUMP_P (from)
5093 && get_attr_type (from) == TYPE_CBRANCH
5094 && ! sequence_insn_p (from))
5095 inc += 2;
5096
5097 if (found_si)
5098 {
5099 count_si += inc;
5100 if (new_align > si_align)
5101 {
5102 si_limit -= (count_si - 1) & (new_align - si_align);
5103 si_align = new_align;
5104 }
5105 count_si = (count_si + new_align - 1) & -new_align;
5106 }
5107 if (found_hi)
5108 {
5109 count_hi += inc;
5110 if (new_align > hi_align)
5111 {
5112 hi_limit -= (count_hi - 1) & (new_align - hi_align);
5113 hi_align = new_align;
5114 }
5115 count_hi = (count_hi + new_align - 1) & -new_align;
5116 }
5117 from = NEXT_INSN (from);
5118 }
5119
5120 if (num_mova)
5121 {
5122 if (leading_mova)
5123 {
5124 /* Try as we might, the leading mova is out of range. Change
5125 it into a load (which will become a pcload) and retry. */
5126 fixup_mova (mova);
5127 return find_barrier (0, 0, mova);
5128 }
5129 else
5130 {
5131 /* Insert the constant pool table before the mova instruction,
5132 to prevent the mova label reference from going out of range. */
5133 from = mova;
5134 good_barrier = found_barrier = barrier_before_mova;
5135 }
5136 }
5137
5138 if (found_barrier)
5139 {
5140 if (good_barrier && next_real_insn (found_barrier))
5141 found_barrier = good_barrier;
5142 }
5143 else
5144 {
5145 /* We didn't find a barrier in time to dump our stuff,
5146 so we'll make one. */
5147 rtx_code_label *label = gen_label_rtx ();
5148
5149 /* Don't emit a constant table in the middle of insns for
5150 casesi_worker_2. This is a bit overkill but is enough
5151 because casesi_worker_2 wouldn't appear so frequently. */
5152 if (last_symoff)
5153 from = last_symoff;
5154
5155 /* If we exceeded the range, then we must back up over the last
5156 instruction we looked at. Otherwise, we just need to undo the
5157 NEXT_INSN at the end of the loop. */
5158 if (PREV_INSN (from) != orig
5159 && (count_hi > hi_limit || count_si > si_limit))
5160 from = PREV_INSN (PREV_INSN (from));
5161 else
5162 from = PREV_INSN (from);
5163
5164 /* Don't emit a constant table int the middle of global pointer setting,
5165 since that that would move the addressing base GOT into another table.
5166 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
5167 in the pool anyway, so just move up the whole constant pool.
5168
5169 However, avoid doing so when the last single GOT mov is the starting
5170 insn itself. Going past above the start insn would create a negative
5171 offset, causing errors. */
5172 if (last_got && last_got != orig)
5173 from = PREV_INSN (last_got);
5174
5175 /* Don't insert the constant pool table at the position which
5176 may be the landing pad. */
5177 if (flag_exceptions
5178 && CALL_P (from)
5179 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
5180 from = PREV_INSN (from);
5181
5182 /* Walk back to be just before any jump or label.
5183 Putting it before a label reduces the number of times the branch
5184 around the constant pool table will be hit. Putting it before
5185 a jump makes it more likely that the bra delay slot will be
5186 filled. */
5187 while (NOTE_P (from) || JUMP_P (from)
5188 || LABEL_P (from))
5189 from = PREV_INSN (from);
5190
5191 /* Make sure we do not split between a call and its corresponding
5192 CALL_ARG_LOCATION note. */
5193 if (CALL_P (from))
5194 {
5195 rtx_insn *next = NEXT_INSN (from);
5196 if (next && NOTE_P (next)
5197 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
5198 from = next;
5199 }
5200
5201 from = emit_jump_insn_after (gen_jump (label), from);
5202 JUMP_LABEL (from) = label;
5203 LABEL_NUSES (label) = 1;
5204 found_barrier = emit_barrier_after (from);
5205 emit_label_after (label, found_barrier);
5206 }
5207
5208 return found_barrier;
5209 }
5210
5211 /* If the instruction INSN is implemented by a special function, and we can
5212 positively find the register that is used to call the sfunc, and this
5213 register is not used anywhere else in this instruction - except as the
5214 destination of a set, return this register; else, return 0. */
5215 rtx
5216 sfunc_uses_reg (rtx_insn *insn)
5217 {
5218 int i;
5219 rtx pattern, part, reg_part, reg;
5220
5221 if (!NONJUMP_INSN_P (insn))
5222 return NULL_RTX;
5223 pattern = PATTERN (insn);
5224 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
5225 return NULL_RTX;
5226
5227 for (reg_part = NULL_RTX, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5228 {
5229 part = XVECEXP (pattern, 0, i);
5230 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
5231 reg_part = part;
5232 }
5233 if (! reg_part)
5234 return NULL_RTX;
5235 reg = XEXP (reg_part, 0);
5236 for (int i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
5237 {
5238 part = XVECEXP (pattern, 0, i);
5239 if (part == reg_part || GET_CODE (part) == CLOBBER)
5240 continue;
5241 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
5242 && REG_P (SET_DEST (part)))
5243 ? SET_SRC (part) : part)))
5244 return NULL_RTX;
5245 }
5246 return reg;
5247 }
5248
5249 /* See if the only way in which INSN uses REG is by calling it, or by
5250 setting it while calling it. Set *SET to a SET rtx if the register
5251 is set by INSN. */
5252 static bool
5253 noncall_uses_reg (rtx reg, rtx_insn *insn, rtx *set)
5254 {
5255 *set = NULL_RTX;
5256
5257 rtx reg2 = sfunc_uses_reg (insn);
5258 if (reg2 && REGNO (reg2) == REGNO (reg))
5259 {
5260 rtx pattern = single_set (insn);
5261 if (pattern
5262 && REG_P (SET_DEST (pattern))
5263 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5264 *set = pattern;
5265 return false;
5266 }
5267 if (!CALL_P (insn))
5268 {
5269 /* We don't use rtx_equal_p because we don't care if the mode is
5270 different. */
5271 rtx pattern = single_set (insn);
5272 if (pattern
5273 && REG_P (SET_DEST (pattern))
5274 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5275 {
5276 rtx par, part;
5277 int i;
5278
5279 *set = pattern;
5280 par = PATTERN (insn);
5281 if (GET_CODE (par) == PARALLEL)
5282 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
5283 {
5284 part = XVECEXP (par, 0, i);
5285 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
5286 return true;
5287 }
5288 return reg_mentioned_p (reg, SET_SRC (pattern));
5289 }
5290
5291 return true;
5292 }
5293
5294 rtx pattern = PATTERN (insn);
5295
5296 if (GET_CODE (pattern) == PARALLEL)
5297 {
5298 for (int i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5299 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
5300 return true;
5301 pattern = XVECEXP (pattern, 0, 0);
5302 }
5303
5304 if (GET_CODE (pattern) == SET)
5305 {
5306 if (reg_mentioned_p (reg, SET_DEST (pattern)))
5307 {
5308 /* We don't use rtx_equal_p, because we don't care if the
5309 mode is different. */
5310 if (!REG_P (SET_DEST (pattern))
5311 || REGNO (reg) != REGNO (SET_DEST (pattern)))
5312 return true;
5313
5314 *set = pattern;
5315 }
5316
5317 pattern = SET_SRC (pattern);
5318 }
5319
5320 if (GET_CODE (pattern) != CALL
5321 || !MEM_P (XEXP (pattern, 0))
5322 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
5323 return true;
5324
5325 return false;
5326 }
5327
5328 /* Given a X, a pattern of an insn or a part of it, return a mask of used
5329 general registers. Bits 0..15 mean that the respective registers
5330 are used as inputs in the instruction. Bits 16..31 mean that the
5331 registers 0..15, respectively, are used as outputs, or are clobbered.
5332 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
5333 int
5334 regs_used (rtx x, int is_dest)
5335 {
5336 enum rtx_code code;
5337 const char *fmt;
5338 int used = 0;
5339
5340 if (! x)
5341 return used;
5342 code = GET_CODE (x);
5343 switch (code)
5344 {
5345 case REG:
5346 if (REGNO (x) < 16)
5347 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5348 << (REGNO (x) + is_dest));
5349 return 0;
5350 case SUBREG:
5351 {
5352 rtx y = SUBREG_REG (x);
5353
5354 if (!REG_P (y))
5355 break;
5356 if (REGNO (y) < 16)
5357 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5358 << (REGNO (y) +
5359 subreg_regno_offset (REGNO (y),
5360 GET_MODE (y),
5361 SUBREG_BYTE (x),
5362 GET_MODE (x)) + is_dest));
5363 return 0;
5364 }
5365 case SET:
5366 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
5367 case RETURN:
5368 /* If there was a return value, it must have been indicated with USE. */
5369 return 0x00ffff00;
5370 case CLOBBER:
5371 is_dest = 1;
5372 break;
5373 case MEM:
5374 is_dest = 0;
5375 break;
5376 case CALL:
5377 used |= 0x00ff00f0;
5378 break;
5379 default:
5380 break;
5381 }
5382
5383 fmt = GET_RTX_FORMAT (code);
5384
5385 for (int i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5386 {
5387 if (fmt[i] == 'E')
5388 {
5389 for (int j = XVECLEN (x, i) - 1; j >= 0; j--)
5390 used |= regs_used (XVECEXP (x, i, j), is_dest);
5391 }
5392 else if (fmt[i] == 'e')
5393 used |= regs_used (XEXP (x, i), is_dest);
5394 }
5395 return used;
5396 }
5397
5398 /* Create an instruction that prevents redirection of a conditional branch
5399 to the destination of the JUMP with address ADDR.
5400 If the branch needs to be implemented as an indirect jump, try to find
5401 a scratch register for it.
5402 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
5403 If any preceding insn that doesn't fit into a delay slot is good enough,
5404 pass 1. Pass 2 if a definite blocking insn is needed.
5405 -1 is used internally to avoid deep recursion.
5406 If a blocking instruction is made or recognized, return it. */
5407 static rtx_insn *
5408 gen_block_redirect (rtx_insn *jump, int addr, int need_block)
5409 {
5410 int dead = 0;
5411 rtx_insn *prev = prev_nonnote_insn (jump);
5412
5413 /* First, check if we already have an instruction that satisfies our need. */
5414 if (prev && NONJUMP_INSN_P (prev) && ! prev->deleted ())
5415 {
5416 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
5417 return prev;
5418 if (GET_CODE (PATTERN (prev)) == USE
5419 || GET_CODE (PATTERN (prev)) == CLOBBER
5420 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5421 prev = jump;
5422 else if ((need_block &= ~1) < 0)
5423 return prev;
5424 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
5425 need_block = 0;
5426 }
5427 if (GET_CODE (PATTERN (jump)) == RETURN)
5428 {
5429 if (! need_block)
5430 return prev;
5431 /* Reorg even does nasty things with return insns that cause branches
5432 to go out of range - see find_end_label and callers. */
5433 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
5434 }
5435 /* We can't use JUMP_LABEL here because it might be undefined
5436 when not optimizing. */
5437 rtx dest = XEXP (SET_SRC (PATTERN (jump)), 0);
5438 /* If the branch is out of range, try to find a scratch register for it. */
5439 if (optimize
5440 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5441 > 4092 + 4098))
5442 {
5443 rtx_insn *scan;
5444 /* Don't look for the stack pointer as a scratch register,
5445 it would cause trouble if an interrupt occurred. */
5446 unsigned attempt = 0x7fff, used;
5447 int jump_left = flag_expensive_optimizations + 1;
5448
5449 /* It is likely that the most recent eligible instruction is wanted for
5450 the delay slot. Therefore, find out which registers it uses, and
5451 try to avoid using them. */
5452
5453 for (scan = jump; (scan = PREV_INSN (scan)); )
5454 {
5455 if (scan->deleted ())
5456 continue;
5457 rtx_code code = GET_CODE (scan);
5458 if (code == CODE_LABEL || code == JUMP_INSN)
5459 break;
5460 if (code == INSN
5461 && GET_CODE (PATTERN (scan)) != USE
5462 && GET_CODE (PATTERN (scan)) != CLOBBER
5463 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5464 {
5465 attempt &= ~regs_used (PATTERN (scan), 0);
5466 break;
5467 }
5468 }
5469 for (used = dead = 0, scan = JUMP_LABEL_AS_INSN (jump);
5470 (scan = NEXT_INSN (scan)); )
5471 {
5472 if (scan->deleted ())
5473 continue;
5474 rtx_code code = GET_CODE (scan);
5475 if (INSN_P (scan))
5476 {
5477 used |= regs_used (PATTERN (scan), 0);
5478 if (code == CALL_INSN)
5479 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5480 dead |= (used >> 16) & ~used;
5481 if (dead & attempt)
5482 {
5483 dead &= attempt;
5484 break;
5485 }
5486 if (code == JUMP_INSN)
5487 {
5488 if (jump_left-- && simplejump_p (scan))
5489 scan = JUMP_LABEL_AS_INSN (scan);
5490 else
5491 break;
5492 }
5493 }
5494 }
5495 /* Mask out the stack pointer again, in case it was
5496 the only 'free' register we have found. */
5497 dead &= 0x7fff;
5498 }
5499 /* If the immediate destination is still in range, check for possible
5500 threading with a jump beyond the delay slot insn.
5501 Don't check if we are called recursively; the jump has been or will be
5502 checked in a different invocation then. */
5503
5504 else if (optimize && need_block >= 0)
5505 {
5506 rtx_insn *next = next_active_insn (next_active_insn (dest));
5507 if (next && JUMP_P (next)
5508 && GET_CODE (PATTERN (next)) == SET
5509 && recog_memoized (next) == CODE_FOR_jump_compact)
5510 {
5511 dest = JUMP_LABEL (next);
5512 if (dest
5513 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5514 > 4092 + 4098))
5515 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5516 }
5517 }
5518
5519 if (dead)
5520 {
5521 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5522
5523 /* It would be nice if we could convert the jump into an indirect
5524 jump / far branch right now, and thus exposing all constituent
5525 instructions to further optimization. However, reorg uses
5526 simplejump_p to determine if there is an unconditional jump where
5527 it should try to schedule instructions from the target of the
5528 branch; simplejump_p fails for indirect jumps even if they have
5529 a JUMP_LABEL. */
5530 rtx_insn *insn = emit_insn_before (gen_indirect_jump_scratch
5531 (reg, GEN_INT (unspec_bbr_uid++)),
5532 jump);
5533 /* ??? We would like this to have the scope of the jump, but that
5534 scope will change when a delay slot insn of an inner scope is added.
5535 Hence, after delay slot scheduling, we'll have to expect
5536 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5537 the jump. */
5538
5539 INSN_LOCATION (insn) = INSN_LOCATION (jump);
5540 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5541 return insn;
5542 }
5543 else if (need_block)
5544 /* We can't use JUMP_LABEL here because it might be undefined
5545 when not optimizing. */
5546 return emit_insn_before (gen_block_branch_redirect
5547 (GEN_INT (unspec_bbr_uid++)),
5548 jump);
5549 return prev;
5550 }
5551
5552 #define CONDJUMP_MIN -252
5553 #define CONDJUMP_MAX 262
5554 struct far_branch
5555 {
5556 /* A label (to be placed) in front of the jump
5557 that jumps to our ultimate destination. */
5558 rtx_insn *near_label;
5559 /* Where we are going to insert it if we cannot move the jump any farther,
5560 or the jump itself if we have picked up an existing jump. */
5561 rtx_insn *insert_place;
5562 /* The ultimate destination. */
5563 rtx_insn *far_label;
5564 struct far_branch *prev;
5565 /* If the branch has already been created, its address;
5566 else the address of its first prospective user. */
5567 int address;
5568 };
5569
5570 enum mdep_reorg_phase_e mdep_reorg_phase;
5571
5572 static void
5573 gen_far_branch (struct far_branch *bp)
5574 {
5575 rtx_insn *insn = bp->insert_place;
5576 rtx_jump_insn *jump;
5577 rtx_code_label *label = gen_label_rtx ();
5578
5579 emit_label_after (label, insn);
5580 if (bp->far_label)
5581 {
5582 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5583 LABEL_NUSES (bp->far_label)++;
5584 }
5585 else
5586 jump = emit_jump_insn_after (gen_return (), insn);
5587
5588 /* Emit a barrier so that reorg knows that any following instructions
5589 are not reachable via a fall-through path.
5590 But don't do this when not optimizing, since we wouldn't suppress the
5591 alignment for the barrier then, and could end up with out-of-range
5592 pc-relative loads. */
5593 if (optimize)
5594 emit_barrier_after (jump);
5595 emit_label_after (bp->near_label, insn);
5596
5597 if (bp->far_label)
5598 JUMP_LABEL (jump) = bp->far_label;
5599 else
5600 {
5601 rtx pat = PATTERN (jump);
5602 gcc_assert (ANY_RETURN_P (pat));
5603 JUMP_LABEL (jump) = pat;
5604 }
5605
5606 bool ok = invert_jump (as_a <rtx_jump_insn *> (insn), label, 1);
5607 gcc_assert (ok);
5608
5609 /* If we are branching around a jump (rather than a return), prevent
5610 reorg from using an insn from the jump target as the delay slot insn -
5611 when reorg did this, it pessimized code (we rather hide the delay slot)
5612 and it could cause branches to go out of range. */
5613 if (bp->far_label)
5614 (emit_insn_after
5615 (gen_stuff_delay_slot
5616 (GEN_INT (unspec_bbr_uid++),
5617 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5618 insn));
5619 /* Prevent reorg from undoing our splits. */
5620 gen_block_redirect (jump, bp->address += 2, 2);
5621 }
5622
5623 /* Fix up ADDR_DIFF_VECs. */
5624 void
5625 fixup_addr_diff_vecs (rtx_insn *first)
5626 {
5627 rtx_insn *insn;
5628
5629 for (insn = first; insn; insn = NEXT_INSN (insn))
5630 {
5631 rtx vec_lab, pat, prevpat, x, braf_label;
5632 rtx_insn *prev;
5633
5634 if (! JUMP_TABLE_DATA_P (insn)
5635 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5636 continue;
5637 pat = PATTERN (insn);
5638 vec_lab = XEXP (XEXP (pat, 0), 0);
5639
5640 /* Search the matching casesi_jump_2. */
5641 for (prev = as_a <rtx_insn *> (vec_lab); ; prev = PREV_INSN (prev))
5642 {
5643 if (!JUMP_P (prev))
5644 continue;
5645 prevpat = PATTERN (prev);
5646 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5647 continue;
5648 x = XVECEXP (prevpat, 0, 1);
5649 if (GET_CODE (x) != USE)
5650 continue;
5651 x = XEXP (x, 0);
5652 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5653 break;
5654 }
5655 /* FIXME: This is a bug in the optimizer, but it seems harmless
5656 to just avoid panicing. */
5657 if (!prev)
5658 continue;
5659
5660 /* Emit the reference label of the braf where it belongs, right after
5661 the casesi_jump_2 (i.e. braf). */
5662 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5663 emit_label_after (braf_label, prev);
5664
5665 /* Fix up the ADDR_DIF_VEC to be relative
5666 to the reference address of the braf. */
5667 XEXP (XEXP (pat, 0), 0) = braf_label;
5668 }
5669 }
5670
5671 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5672 a barrier. Return the base 2 logarithm of the desired alignment. */
5673 int
5674 barrier_align (rtx_insn *barrier_or_label)
5675 {
5676 if (! barrier_or_label)
5677 return 0;
5678
5679 if (LABEL_P (barrier_or_label)
5680 && NEXT_INSN (barrier_or_label)
5681 && JUMP_TABLE_DATA_P (NEXT_INSN (barrier_or_label)))
5682 return 2;
5683
5684 if (BARRIER_P (barrier_or_label)
5685 && PREV_INSN (barrier_or_label)
5686 && JUMP_TABLE_DATA_P (PREV_INSN (barrier_or_label)))
5687 {
5688 rtx pat = PATTERN (PREV_INSN (barrier_or_label));
5689 /* If this is a very small table, we want to keep the alignment after
5690 the table to the minimum for proper code alignment. */
5691 return ((optimize_size
5692 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5693 <= (unsigned) 1 << (CACHE_LOG - 2)))
5694 ? 1 : align_jumps_log);
5695 }
5696
5697 rtx_insn *next = next_active_insn (barrier_or_label);
5698
5699 if (! next)
5700 return 0;
5701
5702 rtx pat = PATTERN (next);
5703
5704 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5705 /* This is a barrier in front of a constant table. */
5706 return 0;
5707
5708 if (optimize_size)
5709 return 0;
5710
5711 if (! TARGET_SH2 || ! optimize)
5712 return align_jumps_log;
5713
5714 /* When fixing up pcloads, a constant table might be inserted just before
5715 the basic block that ends with the barrier. Thus, we can't trust the
5716 instruction lengths before that. */
5717 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5718 {
5719 /* Check if there is an immediately preceding branch to the insn beyond
5720 the barrier. We must weight the cost of discarding useful information
5721 from the current cache line when executing this branch and there is
5722 an alignment, against that of fetching unneeded insn in front of the
5723 branch target when there is no alignment. */
5724
5725 /* There are two delay_slot cases to consider. One is the simple case
5726 where the preceding branch is to the insn beyond the barrier (simple
5727 delay slot filling), and the other is where the preceding branch has
5728 a delay slot that is a duplicate of the insn after the barrier
5729 (fill_eager_delay_slots) and the branch is to the insn after the insn
5730 after the barrier. */
5731
5732 int slot, credit;
5733 bool jump_to_next = false;
5734
5735 /* Skip to the insn before the JUMP_INSN before the barrier under
5736 investigation. */
5737 rtx_insn *prev = prev_real_insn (prev_active_insn (barrier_or_label));
5738
5739 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5740 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5741 prev = prev_real_insn (prev))
5742 {
5743 jump_to_next = false;
5744 if (GET_CODE (PATTERN (prev)) == USE
5745 || GET_CODE (PATTERN (prev)) == CLOBBER)
5746 continue;
5747 if (rtx_sequence *prev_seq = dyn_cast <rtx_sequence *> (PATTERN (prev)))
5748 {
5749 prev = prev_seq->insn (1);
5750 if (INSN_UID (prev) == INSN_UID (next))
5751 {
5752 /* Delay slot was filled with insn at jump target. */
5753 jump_to_next = true;
5754 continue;
5755 }
5756 }
5757
5758 if (slot &&
5759 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5760 slot = 0;
5761 credit -= get_attr_length (prev);
5762 }
5763 if (prev && jump_to_label_p (prev))
5764 {
5765 rtx_insn *x;
5766 if (jump_to_next
5767 || next_real_insn (JUMP_LABEL (prev)) == next
5768 /* If relax_delay_slots() decides NEXT was redundant
5769 with some previous instruction, it will have
5770 redirected PREV's jump to the following insn. */
5771 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5772 /* There is no upper bound on redundant instructions
5773 that might have been skipped, but we must not put an
5774 alignment where none had been before. */
5775 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5776 (INSN_P (x)
5777 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5778 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5779 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5780 {
5781 rtx pat = PATTERN (prev);
5782 if (GET_CODE (pat) == PARALLEL)
5783 pat = XVECEXP (pat, 0, 0);
5784 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5785 return 0;
5786 }
5787 }
5788 }
5789
5790 return align_jumps_log;
5791 }
5792
5793 /* If we are inside a phony loop, almost any kind of label can turn up as the
5794 first one in the loop. Aligning a braf label causes incorrect switch
5795 destination addresses; we can detect braf labels because they are
5796 followed by a BARRIER.
5797 Applying loop alignment to small constant or switch tables is a waste
5798 of space, so we suppress this too. */
5799 int
5800 sh_loop_align (rtx_insn *label)
5801 {
5802 rtx_insn *next = label;
5803
5804 if (! optimize || optimize_size)
5805 return 0;
5806
5807 do
5808 next = next_nonnote_insn (next);
5809 while (next && LABEL_P (next));
5810
5811 if (! next
5812 || ! INSN_P (next)
5813 || recog_memoized (next) == CODE_FOR_consttable_2)
5814 return 0;
5815
5816 return align_loops_log;
5817 }
5818
5819 /* Do a final pass over the function, just before delayed branch
5820 scheduling. */
5821 static void
5822 sh_reorg (void)
5823 {
5824 rtx_insn *first, *insn, *mova = NULL;
5825 int num_mova;
5826 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5827 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5828
5829 first = get_insns ();
5830 max_labelno_before_reorg = max_label_num ();
5831
5832 /* We must split call insns before introducing `mova's. If we're
5833 optimizing, they'll have already been split. Otherwise, make
5834 sure we don't split them too late. */
5835 if (! optimize)
5836 split_all_insns_noflow ();
5837
5838 /* If relaxing, generate pseudo-ops to associate function calls with
5839 the symbols they call. It does no harm to not generate these
5840 pseudo-ops. However, when we can generate them, it enables the
5841 linker to potentially relax the jsr to a bsr, and eliminate the
5842 register load and, possibly, the constant pool entry. */
5843
5844 mdep_reorg_phase = SH_INSERT_USES_LABELS;
5845 if (TARGET_RELAX)
5846 {
5847 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
5848 own purposes. This works because none of the remaining passes
5849 need to look at them.
5850
5851 ??? But it may break in the future. We should use a machine
5852 dependent REG_NOTE, or some other approach entirely. */
5853 for (insn = first; insn; insn = NEXT_INSN (insn))
5854 {
5855 if (INSN_P (insn))
5856 {
5857 rtx note;
5858
5859 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
5860 NULL_RTX)) != 0)
5861 remove_note (insn, note);
5862 }
5863 }
5864
5865 for (insn = first; insn; insn = NEXT_INSN (insn))
5866 {
5867 rtx pattern, reg, set, dies;
5868 rtx_code_label *label;
5869 rtx_insn *link, *scan;
5870 int rescan = 0, foundinsn = 0;
5871
5872 if (CALL_P (insn))
5873 {
5874 pattern = PATTERN (insn);
5875
5876 if (GET_CODE (pattern) == PARALLEL)
5877 pattern = XVECEXP (pattern, 0, 0);
5878 if (GET_CODE (pattern) == SET)
5879 pattern = SET_SRC (pattern);
5880
5881 if (GET_CODE (pattern) != CALL
5882 || !MEM_P (XEXP (pattern, 0)))
5883 continue;
5884
5885 reg = XEXP (XEXP (pattern, 0), 0);
5886 }
5887 else
5888 {
5889 reg = sfunc_uses_reg (insn);
5890 if (! reg)
5891 continue;
5892 }
5893
5894 if (!REG_P (reg))
5895 continue;
5896
5897 /* Try scanning backward to find where the register is set. */
5898 link = NULL;
5899 for (scan = PREV_INSN (insn);
5900 scan && !LABEL_P (scan);
5901 scan = PREV_INSN (scan))
5902 {
5903 if (! INSN_P (scan))
5904 continue;
5905
5906 if (! reg_mentioned_p (reg, scan))
5907 continue;
5908
5909 if (noncall_uses_reg (reg, scan, &set))
5910 break;
5911
5912 if (set)
5913 {
5914 link = scan;
5915 break;
5916 }
5917 }
5918
5919 if (! link)
5920 continue;
5921
5922 /* The register is set at LINK. */
5923
5924 /* We can only optimize the function call if the register is
5925 being set to a symbol. In theory, we could sometimes
5926 optimize calls to a constant location, but the assembler
5927 and linker do not support that at present. */
5928 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
5929 && GET_CODE (SET_SRC (set)) != LABEL_REF)
5930 continue;
5931
5932 /* Scan forward from LINK to the place where REG dies, and
5933 make sure that the only insns which use REG are
5934 themselves function calls. */
5935
5936 /* ??? This doesn't work for call targets that were allocated
5937 by reload, since there may not be a REG_DEAD note for the
5938 register. */
5939
5940 dies = NULL_RTX;
5941 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
5942 {
5943 rtx scanset;
5944
5945 /* Don't try to trace forward past a CODE_LABEL if we haven't
5946 seen INSN yet. Ordinarily, we will only find the setting insn
5947 if it is in the same basic block. However,
5948 cross-jumping can insert code labels in between the load and
5949 the call, and can result in situations where a single call
5950 insn may have two targets depending on where we came from. */
5951
5952 if (LABEL_P (scan) && ! foundinsn)
5953 break;
5954
5955 if (! INSN_P (scan))
5956 continue;
5957
5958 /* Don't try to trace forward past a JUMP. To optimize
5959 safely, we would have to check that all the
5960 instructions at the jump destination did not use REG. */
5961
5962 if (JUMP_P (scan))
5963 break;
5964
5965 if (! reg_mentioned_p (reg, scan))
5966 continue;
5967
5968 if (noncall_uses_reg (reg, scan, &scanset))
5969 break;
5970
5971 if (scan == insn)
5972 foundinsn = 1;
5973
5974 if (scan != insn
5975 && (CALL_P (scan) || sfunc_uses_reg (scan)))
5976 {
5977 /* There is a function call to this register other
5978 than the one we are checking. If we optimize
5979 this call, we need to rescan again below. */
5980 rescan = 1;
5981 }
5982
5983 /* ??? We shouldn't have to worry about SCANSET here.
5984 We should just be able to check for a REG_DEAD note
5985 on a function call. However, the REG_DEAD notes are
5986 apparently not dependable around libcalls; c-torture
5987 execute/920501-2 is a test case. If SCANSET is set,
5988 then this insn sets the register, so it must have
5989 died earlier. Unfortunately, this will only handle
5990 the cases in which the register is, in fact, set in a
5991 later insn. */
5992
5993 /* ??? We shouldn't have to use FOUNDINSN here.
5994 This dates back to when we used LOG_LINKS to find
5995 the most recent insn which sets the register. */
5996
5997 if (foundinsn
5998 && (scanset
5999 || find_reg_note (scan, REG_DEAD, reg)))
6000 {
6001 dies = scan;
6002 break;
6003 }
6004 }
6005
6006 if (! dies)
6007 {
6008 /* Either there was a branch, or some insn used REG
6009 other than as a function call address. */
6010 continue;
6011 }
6012
6013 /* Create a code label, and put it in a REG_LABEL_OPERAND note
6014 on the insn which sets the register, and on each call insn
6015 which uses the register. In final_prescan_insn we look for
6016 the REG_LABEL_OPERAND notes, and output the appropriate label
6017 or pseudo-op. */
6018
6019 label = gen_label_rtx ();
6020 add_reg_note (link, REG_LABEL_OPERAND, label);
6021 add_reg_note (insn, REG_LABEL_OPERAND, label);
6022 if (rescan)
6023 {
6024 scan = link;
6025 do
6026 {
6027 rtx reg2;
6028
6029 scan = NEXT_INSN (scan);
6030 if (scan != insn
6031 && ((CALL_P (scan)
6032 && reg_mentioned_p (reg, scan))
6033 || ((reg2 = sfunc_uses_reg (scan))
6034 && REGNO (reg2) == REGNO (reg))))
6035 add_reg_note (scan, REG_LABEL_OPERAND, label);
6036 }
6037 while (scan != dies);
6038 }
6039 }
6040 }
6041
6042 if (TARGET_SH2)
6043 fixup_addr_diff_vecs (first);
6044
6045 if (optimize)
6046 {
6047 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
6048 shorten_branches (first);
6049 }
6050
6051 /* Scan the function looking for move instructions which have to be
6052 changed to pc-relative loads and insert the literal tables. */
6053 mdep_reorg_phase = SH_FIXUP_PCLOAD;
6054 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
6055 {
6056 if (mova_p (insn))
6057 {
6058 /* ??? basic block reordering can move a switch table dispatch
6059 below the switch table. Check if that has happened.
6060 We only have the addresses available when optimizing; but then,
6061 this check shouldn't be needed when not optimizing. */
6062 if (!untangle_mova (&num_mova, &mova, insn))
6063 {
6064 insn = mova;
6065 num_mova = 0;
6066 }
6067 }
6068 else if (JUMP_TABLE_DATA_P (insn)
6069 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
6070 && num_mova
6071 /* ??? loop invariant motion can also move a mova out of a
6072 loop. Since loop does this code motion anyway, maybe we
6073 should wrap UNSPEC_MOVA into a CONST, so that reload can
6074 move it back. */
6075 && ((num_mova > 1
6076 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
6077 || (prev_nonnote_insn (insn)
6078 == XEXP (MOVA_LABELREF (mova), 0))))
6079 {
6080 rtx_insn *scan;
6081 int total;
6082
6083 num_mova--;
6084
6085 /* Some code might have been inserted between the mova and
6086 its ADDR_DIFF_VEC. Check if the mova is still in range. */
6087 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
6088 total += get_attr_length (scan);
6089
6090 /* range of mova is 1020, add 4 because pc counts from address of
6091 second instruction after this one, subtract 2 in case pc is 2
6092 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
6093 cancels out with alignment effects of the mova itself. */
6094 if (total > 1022)
6095 {
6096 /* Change the mova into a load, and restart scanning
6097 there. broken_move will then return true for mova. */
6098 fixup_mova (mova);
6099 insn = mova;
6100 }
6101 }
6102 if (broken_move (insn)
6103 || (NONJUMP_INSN_P (insn)
6104 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
6105 {
6106 rtx_insn *scan;
6107 /* Scan ahead looking for a barrier to stick the constant table
6108 behind. */
6109 rtx_insn *barrier = find_barrier (num_mova, mova, insn);
6110 rtx_insn *last_float_move = NULL;
6111 rtx last_float = 0, *last_float_addr = NULL;
6112 int need_aligned_label = 0;
6113
6114 if (num_mova && ! mova_p (mova))
6115 {
6116 /* find_barrier had to change the first mova into a
6117 pcload; thus, we have to start with this new pcload. */
6118 insn = mova;
6119 num_mova = 0;
6120 }
6121 /* Now find all the moves between the points and modify them. */
6122 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
6123 {
6124 if (LABEL_P (scan))
6125 last_float = 0;
6126 if (NONJUMP_INSN_P (scan)
6127 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
6128 need_aligned_label = 1;
6129 if (broken_move (scan))
6130 {
6131 rtx *patp = &PATTERN (scan), pat = *patp;
6132 rtx src, dst;
6133 rtx lab;
6134 rtx newsrc;
6135 machine_mode mode;
6136
6137 if (GET_CODE (pat) == PARALLEL)
6138 patp = &XVECEXP (pat, 0, 0), pat = *patp;
6139 src = SET_SRC (pat);
6140 dst = SET_DEST (pat);
6141 mode = GET_MODE (dst);
6142
6143 if (mode == SImode && satisfies_constraint_I16 (src)
6144 && REGNO (dst) != FPUL_REG)
6145 {
6146 int offset = 0;
6147
6148 mode = HImode;
6149 while (GET_CODE (dst) == SUBREG)
6150 {
6151 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
6152 GET_MODE (SUBREG_REG (dst)),
6153 SUBREG_BYTE (dst),
6154 GET_MODE (dst));
6155 dst = SUBREG_REG (dst);
6156 }
6157 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
6158 }
6159 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
6160 {
6161 /* This must be an insn that clobbers r0. */
6162 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
6163 XVECLEN (PATTERN (scan), 0)
6164 - 1);
6165 rtx clobber = *clobberp;
6166
6167 gcc_assert (GET_CODE (clobber) == CLOBBER
6168 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
6169
6170 if (last_float
6171 && reg_set_between_p (r0_rtx, last_float_move, scan))
6172 last_float = 0;
6173 lab = add_constant (src, mode, last_float);
6174 if (lab)
6175 emit_insn_before (gen_mova (lab), scan);
6176 else
6177 {
6178 /* There will be a REG_UNUSED note for r0 on
6179 LAST_FLOAT_MOVE; we have to change it to REG_INC,
6180 lest reorg:mark_target_live_regs will not
6181 consider r0 to be used, and we end up with delay
6182 slot insn in front of SCAN that clobbers r0. */
6183 rtx note
6184 = find_regno_note (last_float_move, REG_UNUSED, 0);
6185
6186 /* If we are not optimizing, then there may not be
6187 a note. */
6188 if (note)
6189 PUT_REG_NOTE_KIND (note, REG_INC);
6190
6191 *last_float_addr = r0_inc_rtx;
6192 }
6193 last_float_move = scan;
6194 last_float = src;
6195 newsrc = gen_const_mem (mode,
6196 (((TARGET_SH4 && ! TARGET_FMOVD)
6197 || REGNO (dst) == FPUL_REG)
6198 ? r0_inc_rtx
6199 : r0_rtx));
6200 last_float_addr = &XEXP (newsrc, 0);
6201
6202 /* Remove the clobber of r0. */
6203 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
6204 gen_rtx_SCRATCH (Pmode));
6205 }
6206 /* This is a mova needing a label. Create it. */
6207 else if (GET_CODE (src) == UNSPEC
6208 && XINT (src, 1) == UNSPEC_MOVA
6209 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
6210 {
6211 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
6212 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6213 newsrc = gen_rtx_UNSPEC (SImode,
6214 gen_rtvec (1, newsrc),
6215 UNSPEC_MOVA);
6216 }
6217 else if (GET_CODE (src) == UNSPEC_VOLATILE
6218 && XINT (src, 1) == UNSPECV_SP_SWITCH_B)
6219 {
6220 newsrc = XVECEXP (src, 0, 0);
6221 XVECEXP (src, 0, 0) = gen_const_mem (mode, newsrc);
6222 INSN_CODE (scan) = -1;
6223 continue;
6224 }
6225 else
6226 {
6227 lab = add_constant (src, mode, 0);
6228 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6229 newsrc = gen_const_mem (mode, newsrc);
6230 }
6231 *patp = gen_rtx_SET (dst, newsrc);
6232 INSN_CODE (scan) = -1;
6233 }
6234 }
6235 dump_table (need_aligned_label ? insn : 0, barrier);
6236 insn = barrier;
6237 }
6238 }
6239 label_ref_list_d_pool.release ();
6240 for (insn = first; insn; insn = NEXT_INSN (insn))
6241 PUT_MODE (insn, VOIDmode);
6242
6243 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
6244 INSN_ADDRESSES_FREE ();
6245 split_branches (first);
6246
6247 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
6248 also has an effect on the register that holds the address of the sfunc.
6249 Insert an extra dummy insn in front of each sfunc that pretends to
6250 use this register. */
6251 if (flag_delayed_branch)
6252 {
6253 for (insn = first; insn; insn = NEXT_INSN (insn))
6254 {
6255 rtx reg = sfunc_uses_reg (insn);
6256
6257 if (! reg)
6258 continue;
6259 emit_insn_before (gen_use_sfunc_addr (reg), insn);
6260 }
6261 }
6262 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
6263 }
6264
6265 /* Return the UID of the insn that follows the specified label. */
6266 int
6267 get_dest_uid (rtx label, int max_uid)
6268 {
6269 rtx_insn *dest = next_real_insn (label);
6270
6271 if (! dest)
6272 /* This can happen for an undefined label. */
6273 return 0;
6274 int dest_uid = INSN_UID (dest);
6275 /* If this is a newly created branch redirection blocking instruction,
6276 we cannot index the branch_uid or insn_addresses arrays with its
6277 uid. But then, we won't need to, because the actual destination is
6278 the following branch. */
6279 while (dest_uid >= max_uid)
6280 {
6281 dest = NEXT_INSN (dest);
6282 dest_uid = INSN_UID (dest);
6283 }
6284 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
6285 return 0;
6286 return dest_uid;
6287 }
6288
6289 /* Split condbranches that are out of range. Also add clobbers for
6290 scratch registers that are needed in far jumps.
6291 We do this before delay slot scheduling, so that it can take our
6292 newly created instructions into account. It also allows us to
6293 find branches with common targets more easily. */
6294 static void
6295 split_branches (rtx_insn *first)
6296 {
6297 rtx_insn *insn;
6298 struct far_branch **uid_branch, *far_branch_list = 0;
6299 int max_uid = get_max_uid ();
6300 int ok;
6301
6302 /* Find out which branches are out of range. */
6303 shorten_branches (first);
6304
6305 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
6306 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
6307
6308 for (insn = first; insn; insn = NEXT_INSN (insn))
6309 if (! INSN_P (insn))
6310 continue;
6311 else if (insn->deleted ())
6312 {
6313 /* Shorten_branches would split this instruction again,
6314 so transform it into a note. */
6315 SET_INSN_DELETED (insn);
6316 }
6317 else if (JUMP_P (insn))
6318 {
6319 enum attr_type type = get_attr_type (insn);
6320 if (type == TYPE_CBRANCH)
6321 {
6322 rtx_insn *next, *beyond;
6323
6324 if (get_attr_length (insn) > 4)
6325 {
6326 rtx src = SET_SRC (PATTERN (insn));
6327 rtx olabel = XEXP (XEXP (src, 1), 0);
6328 int addr = INSN_ADDRESSES (INSN_UID (insn));
6329 rtx_insn *label = 0;
6330 int dest_uid = get_dest_uid (olabel, max_uid);
6331 struct far_branch *bp = uid_branch[dest_uid];
6332
6333 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
6334 the label if the LABEL_NUSES count drops to zero. There is
6335 always a jump_optimize pass that sets these values, but it
6336 proceeds to delete unreferenced code, and then if not
6337 optimizing, to un-delete the deleted instructions, thus
6338 leaving labels with too low uses counts. */
6339 if (! optimize)
6340 {
6341 JUMP_LABEL (insn) = olabel;
6342 LABEL_NUSES (olabel)++;
6343 }
6344 if (! bp)
6345 {
6346 bp = (struct far_branch *) alloca (sizeof *bp);
6347 uid_branch[dest_uid] = bp;
6348 bp->prev = far_branch_list;
6349 far_branch_list = bp;
6350 bp->far_label = as_a <rtx_insn *> (
6351 XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6352 0));
6353 LABEL_NUSES (bp->far_label)++;
6354 }
6355 else
6356 {
6357 label = bp->near_label;
6358 if (! label && bp->address - addr >= CONDJUMP_MIN)
6359 {
6360 rtx_insn *block = bp->insert_place;
6361
6362 if (GET_CODE (PATTERN (block)) == RETURN)
6363 block = PREV_INSN (block);
6364 else
6365 block = gen_block_redirect (block,
6366 bp->address, 2);
6367 label = emit_label_after (gen_label_rtx (),
6368 PREV_INSN (block));
6369 bp->near_label = label;
6370 }
6371 else if (label && ! NEXT_INSN (label))
6372 {
6373 if (addr + 2 - bp->address <= CONDJUMP_MAX)
6374 bp->insert_place = insn;
6375 else
6376 gen_far_branch (bp);
6377 }
6378 }
6379 if (! label
6380 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
6381 {
6382 bp->near_label = label = gen_label_rtx ();
6383 bp->insert_place = insn;
6384 bp->address = addr;
6385 }
6386 ok = redirect_jump (as_a <rtx_jump_insn *> (insn), label, 0);
6387 gcc_assert (ok);
6388 }
6389 else
6390 {
6391 /* get_attr_length (insn) == 2 */
6392 /* Check if we have a pattern where reorg wants to redirect
6393 the branch to a label from an unconditional branch that
6394 is too far away. */
6395 /* We can't use JUMP_LABEL here because it might be undefined
6396 when not optimizing. */
6397 /* A syntax error might cause beyond to be NULL_RTX. */
6398 beyond
6399 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6400 0));
6401
6402 if (beyond
6403 && (JUMP_P (beyond)
6404 || ((beyond = next_active_insn (beyond))
6405 && JUMP_P (beyond)))
6406 && GET_CODE (PATTERN (beyond)) == SET
6407 && recog_memoized (beyond) == CODE_FOR_jump_compact
6408 && ((INSN_ADDRESSES
6409 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
6410 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6411 > 252 + 258 + 2))
6412 gen_block_redirect (beyond,
6413 INSN_ADDRESSES (INSN_UID (beyond)), 1);
6414 }
6415
6416 next = next_active_insn (insn);
6417
6418 if (next
6419 && (JUMP_P (next)
6420 || ((next = next_active_insn (next))
6421 && JUMP_P (next)))
6422 && GET_CODE (PATTERN (next)) == SET
6423 && recog_memoized (next) == CODE_FOR_jump_compact
6424 && ((INSN_ADDRESSES
6425 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
6426 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6427 > 252 + 258 + 2))
6428 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
6429 }
6430 else if (type == TYPE_JUMP || type == TYPE_RETURN)
6431 {
6432 int addr = INSN_ADDRESSES (INSN_UID (insn));
6433 rtx_insn *far_label = 0;
6434 int dest_uid = 0;
6435 struct far_branch *bp;
6436
6437 if (type == TYPE_JUMP)
6438 {
6439 if (CROSSING_JUMP_P (insn))
6440 {
6441 emit_insn_before (gen_block_branch_redirect (const0_rtx),
6442 insn);
6443 continue;
6444 }
6445
6446 far_label = as_a <rtx_insn *> (
6447 XEXP (SET_SRC (PATTERN (insn)), 0));
6448 dest_uid = get_dest_uid (far_label, max_uid);
6449 if (! dest_uid)
6450 {
6451 /* Parse errors can lead to labels outside
6452 the insn stream. */
6453 if (! NEXT_INSN (far_label))
6454 continue;
6455
6456 if (! optimize)
6457 {
6458 JUMP_LABEL (insn) = far_label;
6459 LABEL_NUSES (far_label)++;
6460 }
6461 redirect_jump (as_a <rtx_jump_insn *> (insn), ret_rtx, 1);
6462 far_label = 0;
6463 }
6464 }
6465 bp = uid_branch[dest_uid];
6466 if (! bp)
6467 {
6468 bp = (struct far_branch *) alloca (sizeof *bp);
6469 uid_branch[dest_uid] = bp;
6470 bp->prev = far_branch_list;
6471 far_branch_list = bp;
6472 bp->near_label = 0;
6473 bp->far_label = far_label;
6474 if (far_label)
6475 LABEL_NUSES (far_label)++;
6476 }
6477 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6478 if (addr - bp->address <= CONDJUMP_MAX)
6479 emit_label_after (bp->near_label, PREV_INSN (insn));
6480 else
6481 {
6482 gen_far_branch (bp);
6483 bp->near_label = 0;
6484 }
6485 else
6486 bp->near_label = 0;
6487 bp->address = addr;
6488 bp->insert_place = insn;
6489 if (! far_label)
6490 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6491 else
6492 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6493 }
6494 }
6495 /* Generate all pending far branches,
6496 and free our references to the far labels. */
6497 while (far_branch_list)
6498 {
6499 if (far_branch_list->near_label
6500 && ! NEXT_INSN (far_branch_list->near_label))
6501 gen_far_branch (far_branch_list);
6502 if (optimize
6503 && far_branch_list->far_label
6504 && ! --LABEL_NUSES (far_branch_list->far_label))
6505 delete_insn (far_branch_list->far_label);
6506 far_branch_list = far_branch_list->prev;
6507 }
6508
6509 /* Instruction length information is no longer valid due to the new
6510 instructions that have been generated. */
6511 init_insn_lengths ();
6512 }
6513
6514 /* Dump out instruction addresses, which is useful for debugging the
6515 constant pool table stuff.
6516
6517 If relaxing, output the label and pseudo-ops used to link together
6518 calls and the instruction which set the registers.
6519
6520 ??? The addresses printed by this routine for insns are nonsense for
6521 insns which are inside of a sequence where none of the inner insns have
6522 variable length. This is because the second pass of shorten_branches
6523 does not bother to update them. */
6524 void
6525 final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED,
6526 int noperands ATTRIBUTE_UNUSED)
6527 {
6528 if (TARGET_DUMPISIZE)
6529 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6530
6531 if (TARGET_RELAX)
6532 {
6533 if (rtx note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX))
6534 {
6535 rtx pattern = PATTERN (insn);
6536 if (GET_CODE (pattern) == PARALLEL)
6537 pattern = XVECEXP (pattern, 0, 0);
6538 switch (GET_CODE (pattern))
6539 {
6540 case SET:
6541 if (GET_CODE (SET_SRC (pattern)) != CALL
6542 && get_attr_type (insn) != TYPE_SFUNC)
6543 {
6544 targetm.asm_out.internal_label
6545 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6546 break;
6547 }
6548 /* else FALLTHROUGH */
6549 case CALL:
6550 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6551 CODE_LABEL_NUMBER (XEXP (note, 0)));
6552 break;
6553
6554 default:
6555 gcc_unreachable ();
6556 }
6557 }
6558 }
6559 }
6560
6561 /* Dump out any constants accumulated in the final pass. These will
6562 only be labels. */
6563 const char *
6564 output_jump_label_table (void)
6565 {
6566 if (pool_size)
6567 {
6568 fprintf (asm_out_file, "\t.align 2\n");
6569 for (int i = 0; i < pool_size; i++)
6570 {
6571 pool_node *p = &pool_vector[i];
6572
6573 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6574 CODE_LABEL_NUMBER (p->label));
6575 output_asm_insn (".long %O0", &p->value);
6576 }
6577 pool_size = 0;
6578 }
6579
6580 return "";
6581 }
6582 \f
6583 /* A full frame looks like:
6584
6585 arg-5
6586 arg-4
6587 [ if current_function_anonymous_args
6588 arg-3
6589 arg-2
6590 arg-1
6591 arg-0 ]
6592 saved-fp
6593 saved-r10
6594 saved-r11
6595 saved-r12
6596 saved-pr
6597 local-n
6598 ..
6599 local-1
6600 local-0 <- fp points here.
6601
6602 Number of bytes pushed for anonymous args, used to pass information
6603 between expand_prologue and expand_epilogue.
6604
6605 Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6606 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6607 for an epilogue and a negative value means that it's for a sibcall
6608 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6609 all the registers that are about to be restored, and hence dead. */
6610 static void
6611 output_stack_adjust (int size, rtx reg, int epilogue_p,
6612 HARD_REG_SET *live_regs_mask, bool frame_p)
6613 {
6614 rtx_insn *(*emit_fn) (rtx) = frame_p ? &emit_frame_insn : &emit_insn;
6615 if (size)
6616 {
6617 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6618
6619 /* This test is bogus, as output_stack_adjust is used to re-align the
6620 stack. */
6621 #if 0
6622 gcc_assert (!(size % align));
6623 #endif
6624
6625 if (CONST_OK_FOR_ADD (size))
6626 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6627 /* Try to do it with two partial adjustments; however, we must make
6628 sure that the stack is properly aligned at all times, in case
6629 an interrupt occurs between the two partial adjustments. */
6630 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6631 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6632 {
6633 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6634 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6635 }
6636 else
6637 {
6638 rtx const_reg;
6639 rtx insn;
6640 int temp = epilogue_p ? 7 : 1;
6641 int i;
6642
6643 /* If TEMP is invalid, we could temporarily save a general
6644 register to MACL. However, there is currently no need
6645 to handle this case, so just die when we see it. */
6646 if (epilogue_p < 0
6647 || current_function_interrupt
6648 || ! call_really_used_regs[temp] || fixed_regs[temp])
6649 temp = -1;
6650 if (temp < 0 && ! current_function_interrupt && epilogue_p >= 0)
6651 {
6652 HARD_REG_SET temps;
6653 COPY_HARD_REG_SET (temps, call_used_reg_set);
6654 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6655 if (epilogue_p > 0)
6656 {
6657 int nreg = 0;
6658 if (crtl->return_rtx)
6659 {
6660 machine_mode mode;
6661 mode = GET_MODE (crtl->return_rtx);
6662 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6663 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6664 }
6665 for (i = 0; i < nreg; i++)
6666 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6667 if (crtl->calls_eh_return)
6668 {
6669 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6670 for (i = 0; i <= 3; i++)
6671 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6672 }
6673 }
6674 if (epilogue_p <= 0)
6675 {
6676 for (i = FIRST_PARM_REG;
6677 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6678 CLEAR_HARD_REG_BIT (temps, i);
6679 if (cfun->static_chain_decl != NULL)
6680 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6681 }
6682 temp = scavenge_reg (&temps);
6683 }
6684 if (temp < 0 && live_regs_mask)
6685 {
6686 HARD_REG_SET temps;
6687
6688 COPY_HARD_REG_SET (temps, *live_regs_mask);
6689 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6690 temp = scavenge_reg (&temps);
6691 }
6692 if (temp < 0)
6693 {
6694 rtx adj_reg, tmp_reg, mem;
6695
6696 /* If we reached here, the most likely case is the (sibcall)
6697 epilogue. Put a special push/pop sequence for such case as
6698 the last resort. This looks lengthy but would not be problem
6699 because it seems to be very rare. */
6700 gcc_assert (epilogue_p);
6701
6702 /* ??? There is still the slight possibility that r4 or
6703 r5 have been reserved as fixed registers or assigned
6704 as global registers, and they change during an
6705 interrupt. There are possible ways to handle this:
6706
6707 - If we are adjusting the frame pointer (r14), we can do
6708 with a single temp register and an ordinary push / pop
6709 on the stack.
6710 - Grab any call-used or call-saved registers (i.e. not
6711 fixed or globals) for the temps we need. We might
6712 also grab r14 if we are adjusting the stack pointer.
6713 If we can't find enough available registers, issue
6714 a diagnostic and die - the user must have reserved
6715 way too many registers.
6716 But since all this is rather unlikely to happen and
6717 would require extra testing, we just die if r4 / r5
6718 are not available. */
6719 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6720 && !global_regs[4] && !global_regs[5]);
6721
6722 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6723 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6724 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6725 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6726 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6727 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6728 emit_move_insn (mem, tmp_reg);
6729 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6730 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6731 emit_move_insn (mem, tmp_reg);
6732 emit_move_insn (reg, adj_reg);
6733 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6734 emit_move_insn (adj_reg, mem);
6735 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6736 emit_move_insn (tmp_reg, mem);
6737 /* Tell flow the insns that pop r4/r5 aren't dead. */
6738 emit_use (tmp_reg);
6739 emit_use (adj_reg);
6740 return;
6741 }
6742 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6743
6744 /* If SIZE is negative, subtract the positive value.
6745 This sometimes allows a constant pool entry to be shared
6746 between prologue and epilogue code. */
6747 if (size < 0)
6748 {
6749 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6750 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6751 }
6752 else
6753 {
6754 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6755 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6756 }
6757 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6758 gen_rtx_SET (reg, gen_rtx_PLUS (SImode, reg,
6759 GEN_INT (size))));
6760 }
6761 }
6762 }
6763
6764 /* Emit the specified insn and mark it as frame related. */
6765 static rtx_insn *
6766 emit_frame_insn (rtx x)
6767 {
6768 rtx_insn *insn = emit_insn (x);
6769 RTX_FRAME_RELATED_P (insn) = 1;
6770 return insn;
6771 }
6772
6773 /* Output RTL to push register RN onto the stack. */
6774 static rtx
6775 push (int rn)
6776 {
6777 rtx x;
6778 if (rn == FPUL_REG)
6779 x = gen_push_fpul ();
6780 else if (rn == FPSCR_REG)
6781 x = gen_push_fpscr ();
6782 else if (TARGET_FPU_DOUBLE && TARGET_FMOVD
6783 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
6784 {
6785 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6786 return NULL_RTX;
6787 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6788 }
6789 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6790 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6791 else
6792 x = gen_push (gen_rtx_REG (SImode, rn));
6793
6794 x = emit_frame_insn (x);
6795 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6796 return x;
6797 }
6798
6799 /* Output RTL to pop register RN from the stack. */
6800 static void
6801 pop (int rn)
6802 {
6803 rtx x, sp_reg, reg;
6804 if (rn == FPUL_REG)
6805 x = gen_pop_fpul ();
6806 else if (rn == FPSCR_REG)
6807 x = gen_pop_fpscr ();
6808 else if (TARGET_FPU_DOUBLE && TARGET_FMOVD
6809 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
6810 {
6811 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6812 return;
6813 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6814 }
6815 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6816 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6817 else
6818 x = gen_pop (gen_rtx_REG (SImode, rn));
6819
6820 x = emit_insn (x);
6821
6822 sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
6823 reg = copy_rtx (GET_CODE (PATTERN (x)) == PARALLEL
6824 ? SET_DEST (XVECEXP (PATTERN (x), 0, 0))
6825 : SET_DEST (PATTERN (x)));
6826 add_reg_note (x, REG_CFA_RESTORE, reg);
6827 add_reg_note (x, REG_CFA_ADJUST_CFA,
6828 gen_rtx_SET (sp_reg,
6829 plus_constant (SImode, sp_reg,
6830 GET_MODE_SIZE (GET_MODE (reg)))));
6831 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6832 RTX_FRAME_RELATED_P (x) = 1;
6833 }
6834
6835 /* Generate code to push the regs specified in the mask. */
6836 static void
6837 push_regs (HARD_REG_SET *mask, bool interrupt_handler)
6838 {
6839 bool skip_fpscr = false;
6840
6841 /* Push PR last; this gives better latencies after the prologue, and
6842 candidates for the return delay slot when there are no general
6843 registers pushed. */
6844 for (int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
6845 i < FIRST_PSEUDO_REGISTER; i++)
6846 {
6847 /* If this is an interrupt handler, and the SZ bit varies,
6848 and we have to push any floating point register, we need
6849 to switch to the correct precision first. */
6850 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
6851 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
6852 {
6853 HARD_REG_SET unsaved;
6854
6855 push (FPSCR_REG);
6856 COMPL_HARD_REG_SET (unsaved, *mask);
6857 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
6858 skip_fpscr = true;
6859 }
6860 if (i != PR_REG
6861 && (i != FPSCR_REG || ! skip_fpscr)
6862 && TEST_HARD_REG_BIT (*mask, i))
6863 {
6864 /* If the ISR has RESBANK attribute assigned, don't push any of
6865 the following registers - R0-R14, MACH, MACL and GBR. */
6866 if (! (sh_cfun_resbank_handler_p ()
6867 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
6868 || i == MACH_REG
6869 || i == MACL_REG
6870 || i == GBR_REG)))
6871 push (i);
6872 }
6873 }
6874
6875 /* Push banked registers last to improve delay slot opportunities. */
6876 if (interrupt_handler)
6877 {
6878 bool use_movml = false;
6879
6880 if (TARGET_SH2A)
6881 {
6882 unsigned int count = 0;
6883
6884 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6885 if (TEST_HARD_REG_BIT (*mask, i))
6886 count++;
6887 else
6888 break;
6889
6890 /* Use movml when all banked registers are pushed. */
6891 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
6892 use_movml = true;
6893 }
6894
6895 if (sh_cfun_resbank_handler_p ())
6896 ; /* Do nothing. */
6897 else if (use_movml)
6898 {
6899 rtx x, mem, reg, set;
6900 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
6901
6902 /* We must avoid scheduling multiple store insn with another
6903 insns. */
6904 emit_insn (gen_blockage ());
6905 x = gen_movml_push_banked (sp_reg);
6906 x = emit_frame_insn (x);
6907 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6908 {
6909 mem = gen_rtx_MEM (SImode, plus_constant (Pmode, sp_reg, i * 4));
6910 reg = gen_rtx_REG (SImode, i);
6911 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
6912 }
6913
6914 set = gen_rtx_SET (sp_reg, plus_constant (Pmode, sp_reg, - 32));
6915 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
6916 emit_insn (gen_blockage ());
6917 }
6918 else
6919 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6920 if (TEST_HARD_REG_BIT (*mask, i))
6921 push (i);
6922 }
6923
6924 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
6925 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
6926 push (PR_REG);
6927 }
6928
6929 /* Work out the registers which need to be saved, both as a mask and a
6930 count of saved words. Return the count.
6931
6932 If doing a pragma interrupt function, then push all regs used by the
6933 function, and if we call another function (we can tell by looking at PR),
6934 make sure that all the regs it clobbers are safe too. */
6935 static int
6936 calc_live_regs (HARD_REG_SET *live_regs_mask)
6937 {
6938 unsigned int reg;
6939 tree attrs;
6940 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
6941 bool nosave_low_regs;
6942
6943 attrs = DECL_ATTRIBUTES (current_function_decl);
6944 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
6945 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
6946 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
6947 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
6948
6949 CLEAR_HARD_REG_SET (*live_regs_mask);
6950 if (TARGET_FPU_DOUBLE && TARGET_FMOVD && interrupt_handler
6951 && df_regs_ever_live_p (FPSCR_REG))
6952 target_flags &= ~MASK_FPU_SINGLE;
6953 /* If we can save a lot of saves by switching to double mode, do that. */
6954 else if (TARGET_FPU_DOUBLE && TARGET_FMOVD && TARGET_FPU_SINGLE)
6955 for (int count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
6956 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
6957 && (! call_really_used_regs[reg]
6958 || interrupt_handler)
6959 && ++count > 2)
6960 {
6961 target_flags &= ~MASK_FPU_SINGLE;
6962 break;
6963 }
6964
6965
6966 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
6967 bool pr_live = (pr_initial
6968 ? (!REG_P (pr_initial)
6969 || REGNO (pr_initial) != (PR_REG))
6970 : df_regs_ever_live_p (PR_REG));
6971 /* For Shcompact, if not optimizing, we end up with a memory reference
6972 using the return address pointer for __builtin_return_address even
6973 though there is no actual need to put the PR register on the stack. */
6974 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
6975
6976 /* Force PR to be live if the prologue has to call the SHmedia
6977 argument decoder or register saver. */
6978 bool has_call = pr_live;
6979
6980 int count;
6981 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
6982 {
6983 if (reg == PR_REG
6984 ? pr_live
6985 : interrupt_handler
6986 ? (/* Need to save all the regs ever live. */
6987 (df_regs_ever_live_p (reg)
6988 || (call_really_used_regs[reg]
6989 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
6990 || reg == PIC_OFFSET_TABLE_REGNUM)
6991 && has_call))
6992 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
6993 && reg != RETURN_ADDRESS_POINTER_REGNUM
6994 && reg != T_REG && reg != GBR_REG
6995 && reg != FPSCR_MODES_REG && reg != FPSCR_STAT_REG
6996 /* Push fpscr only on targets which have FPU */
6997 && (reg != FPSCR_REG || TARGET_FPU_ANY))
6998 : (/* Only push those regs which are used and need to be saved. */
6999 (false)
7000 || (df_regs_ever_live_p (reg)
7001 && ((!call_really_used_regs[reg]
7002 && !(reg != PIC_OFFSET_TABLE_REGNUM
7003 && fixed_regs[reg] && call_used_regs[reg]))
7004 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
7005 || (crtl->calls_eh_return
7006 && (reg == EH_RETURN_DATA_REGNO (0)
7007 || reg == EH_RETURN_DATA_REGNO (1)
7008 || reg == EH_RETURN_DATA_REGNO (2)
7009 || reg == EH_RETURN_DATA_REGNO (3)))
7010 || ((reg == MACL_REG || reg == MACH_REG)
7011 && df_regs_ever_live_p (reg)
7012 && sh_cfun_attr_renesas_p ())
7013 ))
7014 {
7015 SET_HARD_REG_BIT (*live_regs_mask, reg);
7016 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7017
7018 if (TARGET_FPU_DOUBLE && TARGET_FMOVD
7019 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
7020 {
7021 if (FP_REGISTER_P (reg))
7022 {
7023 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
7024 {
7025 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
7026 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
7027 }
7028 }
7029 else if (XD_REGISTER_P (reg))
7030 {
7031 /* Must switch to double mode to access these registers. */
7032 target_flags &= ~MASK_FPU_SINGLE;
7033 }
7034 }
7035 }
7036 if (nosave_low_regs && reg == R8_REG)
7037 break;
7038 }
7039
7040 return count;
7041 }
7042
7043 /* Code to generate prologue and epilogue sequences */
7044
7045 /* PUSHED is the number of bytes that are being pushed on the
7046 stack for register saves. Return the frame size, padded
7047 appropriately so that the stack stays properly aligned. */
7048 static HOST_WIDE_INT
7049 rounded_frame_size (int pushed)
7050 {
7051 HOST_WIDE_INT size = get_frame_size ();
7052 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
7053
7054 if (ACCUMULATE_OUTGOING_ARGS)
7055 size += crtl->outgoing_args_size;
7056
7057 return ((size + pushed + align - 1) & -align) - pushed;
7058 }
7059
7060 /* Expand code for the function prologue. */
7061 void
7062 sh_expand_prologue (void)
7063 {
7064 int save_flags = target_flags;
7065 tree sp_switch_attr
7066 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
7067
7068 current_function_interrupt = sh_cfun_interrupt_handler_p ();
7069
7070 /* We have pretend args if we had an object sent partially in registers
7071 and partially on the stack, e.g. a large structure. */
7072 int pretend_args = crtl->args.pretend_args_size;
7073 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
7074 && (NPARM_REGS(SImode)
7075 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
7076 pretend_args = 0;
7077
7078 output_stack_adjust (-pretend_args, stack_pointer_rtx, 0, NULL, true);
7079 int stack_usage = pretend_args;
7080
7081 /* Emit the code for SETUP_VARARGS. */
7082 if (cfun->stdarg)
7083 {
7084 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7085 {
7086 /* Push arg regs as if they'd been provided by caller in stack. */
7087 for (int i = 0; i < NPARM_REGS(SImode); i++)
7088 {
7089 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
7090
7091 if (i >= (NPARM_REGS(SImode)
7092 - crtl->args.info.arg_count[(int) SH_ARG_INT]
7093 ))
7094 break;
7095 push (rn);
7096 stack_usage += GET_MODE_SIZE (SImode);
7097 }
7098 }
7099 }
7100
7101 /* If we're supposed to switch stacks at function entry, do so now. */
7102 if (sp_switch_attr)
7103 {
7104 rtx lab, newsrc;
7105 /* The argument specifies a variable holding the address of the
7106 stack the interrupt function should switch to/from at entry/exit. */
7107 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
7108 const char* s = ggc_strdup (TREE_STRING_POINTER (arg));
7109 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
7110
7111 lab = add_constant (sp_switch, SImode, 0);
7112 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
7113
7114 emit_insn (gen_sp_switch_1 (newsrc));
7115 }
7116
7117 HARD_REG_SET live_regs_mask;
7118 int d = calc_live_regs (&live_regs_mask);
7119 /* ??? Maybe we could save some switching if we can move a mode switch
7120 that already happens to be at the function start into the prologue. */
7121 if (target_flags != save_flags && ! current_function_interrupt)
7122 emit_insn (gen_toggle_sz ());
7123
7124 push_regs (&live_regs_mask, current_function_interrupt);
7125 stack_usage += d;
7126
7127 if (flag_pic && !TARGET_FDPIC
7128 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7129 emit_insn (gen_GOTaddr2picreg (const0_rtx));
7130
7131 if (target_flags != save_flags && ! current_function_interrupt)
7132 emit_insn (gen_toggle_sz ());
7133
7134 target_flags = save_flags;
7135
7136 output_stack_adjust (-rounded_frame_size (d),
7137 stack_pointer_rtx, 0, NULL, true);
7138 stack_usage += rounded_frame_size (d);
7139
7140 if (frame_pointer_needed)
7141 emit_frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7142
7143 /* If we are profiling, make sure no instructions are scheduled before
7144 the call to mcount. Similarly if some call instructions are swapped
7145 before frame related insns, it'll confuse the unwinder because
7146 currently SH has no unwind info for function epilogues. */
7147 if (crtl->profile || flag_exceptions || flag_unwind_tables)
7148 emit_insn (gen_blockage ());
7149
7150 if (flag_stack_usage_info)
7151 current_function_static_stack_size = stack_usage;
7152 }
7153
7154 /* Expand code for the function epilogue. */
7155 void
7156 sh_expand_epilogue (bool sibcall_p)
7157 {
7158 int save_flags = target_flags;
7159 bool fpscr_deferred = false;
7160 int e = sibcall_p ? -1 : 1;
7161
7162 HARD_REG_SET live_regs_mask;
7163 int d = calc_live_regs (&live_regs_mask);
7164
7165 int save_size = d;
7166 int frame_size = rounded_frame_size (d);
7167
7168 if (frame_pointer_needed)
7169 {
7170 /* We must avoid scheduling the epilogue with previous basic blocks.
7171 See PR/18032 and PR/40313. */
7172 emit_insn (gen_blockage ());
7173 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7174 &live_regs_mask, true);
7175
7176 /* We must avoid moving the stack pointer adjustment past code
7177 which reads from the local frame, else an interrupt could
7178 occur after the SP adjustment and clobber data in the local
7179 frame. */
7180 emit_insn (gen_blockage ());
7181 emit_frame_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7182 }
7183 else if (frame_size)
7184 {
7185 /* We must avoid moving the stack pointer adjustment past code
7186 which reads from the local frame, else an interrupt could
7187 occur after the SP adjustment and clobber data in the local
7188 frame. */
7189 emit_insn (gen_blockage ());
7190 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7191 &live_regs_mask, true);
7192 }
7193
7194 /* Pop all the registers. */
7195
7196 if (target_flags != save_flags && ! current_function_interrupt)
7197 emit_insn (gen_toggle_sz ());
7198
7199 {
7200 int last_reg;
7201
7202 save_size = 0;
7203 /* For an ISR with RESBANK attribute assigned, don't pop PR
7204 register. */
7205 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
7206 && !sh_cfun_resbank_handler_p ())
7207 {
7208 if (!frame_pointer_needed)
7209 emit_insn (gen_blockage ());
7210 pop (PR_REG);
7211 }
7212
7213 /* Banked registers are popped first to avoid being scheduled in the
7214 delay slot. RTE switches banks before the ds instruction. */
7215 if (current_function_interrupt)
7216 {
7217 bool use_movml = false;
7218
7219 if (TARGET_SH2A)
7220 {
7221 unsigned int count = 0;
7222
7223 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7224 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7225 count++;
7226 else
7227 break;
7228
7229 /* Use movml when all banked register are poped. */
7230 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7231 use_movml = true;
7232 }
7233
7234 if (sh_cfun_resbank_handler_p ())
7235 ; /* Do nothing. */
7236 else if (use_movml)
7237 {
7238 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7239
7240 /* We must avoid scheduling multiple load insn with another
7241 insns. */
7242 emit_insn (gen_blockage ());
7243 emit_insn (gen_movml_pop_banked (sp_reg));
7244 emit_insn (gen_blockage ());
7245 }
7246 else
7247 for (int i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
7248 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7249 pop (i);
7250
7251 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
7252 }
7253 else
7254 last_reg = FIRST_PSEUDO_REGISTER;
7255
7256 for (int i = 0; i < last_reg; i++)
7257 {
7258 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
7259
7260 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
7261 && hard_reg_set_intersect_p (live_regs_mask,
7262 reg_class_contents[DF_REGS]))
7263 fpscr_deferred = true;
7264 /* For an ISR with RESBANK attribute assigned, don't pop
7265 following registers, R0-R14, MACH, MACL and GBR. */
7266 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
7267 && ! (sh_cfun_resbank_handler_p ()
7268 && ((j >= FIRST_GENERAL_REG
7269 && j < LAST_GENERAL_REG)
7270 || j == MACH_REG
7271 || j == MACL_REG
7272 || j == GBR_REG)))
7273 pop (j);
7274
7275 if (j == FIRST_FP_REG && fpscr_deferred)
7276 pop (FPSCR_REG);
7277 }
7278 }
7279 if (target_flags != save_flags && ! current_function_interrupt)
7280 emit_insn (gen_toggle_sz ());
7281 target_flags = save_flags;
7282
7283 output_stack_adjust (crtl->args.pretend_args_size + save_size,
7284 stack_pointer_rtx, e, NULL, true);
7285
7286 if (crtl->calls_eh_return)
7287 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
7288 EH_RETURN_STACKADJ_RTX));
7289
7290 /* Switch back to the normal stack if necessary. */
7291 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
7292 emit_insn (gen_sp_switch_2 ());
7293
7294 /* Tell flow the insn that pops PR isn't dead. */
7295 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7296 emit_use (gen_rtx_REG (SImode, PR_REG));
7297 }
7298
7299 /* Emit code to change the current function's return address to RA.
7300 TEMP is available as a scratch register, if needed. */
7301 void
7302 sh_set_return_address (rtx ra, rtx tmp)
7303 {
7304 HARD_REG_SET live_regs_mask;
7305 int d = calc_live_regs (&live_regs_mask);
7306
7307 /* If pr_reg isn't life, we can set it directly. */
7308 if (! TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7309 {
7310 rtx rr = gen_rtx_REG (SImode, PR_REG);
7311 emit_insn (GEN_MOV (rr, ra));
7312 /* Tell flow the register for return isn't dead. */
7313 emit_use (rr);
7314 return;
7315 }
7316
7317 int pr_offset = rounded_frame_size (d);
7318
7319 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
7320
7321 if (frame_pointer_needed)
7322 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
7323 else
7324 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
7325
7326 tmp = gen_frame_mem (Pmode, tmp);
7327 emit_insn (GEN_MOV (tmp, ra));
7328 /* Tell this store isn't dead. */
7329 emit_use (tmp);
7330 }
7331
7332 /* Clear variables at function end. */
7333 static void
7334 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
7335 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
7336 {
7337 }
7338
7339 static rtx
7340 sh_builtin_saveregs (void)
7341 {
7342 /* First unnamed integer register. */
7343 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
7344 /* Number of integer registers we need to save. */
7345 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
7346 /* First unnamed SFmode float reg */
7347 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
7348 /* Number of SFmode float regs to save. */
7349 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
7350 rtx regbuf, fpregs;
7351 int bufsize, regno;
7352 alias_set_type alias_set;
7353
7354 if (!TARGET_FPU_ANY)
7355 {
7356 error ("__builtin_saveregs not supported by this subtarget");
7357 return const0_rtx;
7358 }
7359
7360 /* Allocate block of memory for the regs. */
7361 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
7362 Or can assign_stack_local accept a 0 SIZE argument? */
7363 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
7364
7365 if (n_floatregs & 1)
7366 {
7367 rtx addr;
7368
7369 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7370 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
7371 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
7372 regbuf = change_address (regbuf, BLKmode, addr);
7373 }
7374 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
7375 {
7376 rtx addr, mask;
7377
7378 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7379 addr = copy_to_mode_reg (Pmode, plus_constant (Pmode,
7380 XEXP (regbuf, 0), 4));
7381 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
7382 emit_insn (gen_andsi3 (addr, addr, mask));
7383 regbuf = change_address (regbuf, BLKmode, addr);
7384 }
7385 else
7386 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7387 alias_set = get_varargs_alias_set ();
7388 set_mem_alias_set (regbuf, alias_set);
7389
7390 /* Save int args.
7391 This is optimized to only save the regs that are necessary. Explicitly
7392 named args need not be saved. */
7393 if (n_intregs > 0)
7394 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7395 adjust_address (regbuf, BLKmode,
7396 n_floatregs * UNITS_PER_WORD),
7397 n_intregs);
7398
7399 /* Save float args.
7400 This is optimized to only save the regs that are necessary. Explicitly
7401 named args need not be saved.
7402 We explicitly build a pointer to the buffer because it halves the insn
7403 count when not optimizing (otherwise the pointer is built for each reg
7404 saved).
7405 We emit the moves in reverse order so that we can use predecrement. */
7406
7407 fpregs = copy_to_mode_reg (Pmode,
7408 plus_constant (Pmode, XEXP (regbuf, 0),
7409 n_floatregs * UNITS_PER_WORD));
7410 if (TARGET_FPU_DOUBLE)
7411 {
7412 rtx mem;
7413 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7414 {
7415 emit_insn (gen_addsi3 (fpregs, fpregs,
7416 GEN_INT (-2 * UNITS_PER_WORD)));
7417 mem = change_address (regbuf, DFmode, fpregs);
7418 emit_move_insn (mem,
7419 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7420 }
7421 regno = first_floatreg;
7422 if (regno & 1)
7423 {
7424 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7425 mem = change_address (regbuf, SFmode, fpregs);
7426 emit_move_insn (mem,
7427 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode)
7428 + regno - SH_REG_MSW_OFFSET));
7429 }
7430 }
7431 else
7432 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7433 {
7434 rtx mem;
7435
7436 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7437 mem = change_address (regbuf, SFmode, fpregs);
7438 emit_move_insn (mem,
7439 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7440 }
7441
7442 /* Return the address of the regbuf. */
7443 return XEXP (regbuf, 0);
7444 }
7445
7446 /* Define the `__builtin_va_list' type for the ABI. */
7447 static tree
7448 sh_build_builtin_va_list (void)
7449 {
7450 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7451 tree record, type_decl;
7452
7453 if ((! TARGET_SH2E && ! TARGET_SH4)
7454 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7455 return ptr_type_node;
7456
7457 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7458 type_decl = build_decl (BUILTINS_LOCATION,
7459 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7460
7461 f_next_o = build_decl (BUILTINS_LOCATION,
7462 FIELD_DECL, get_identifier ("__va_next_o"),
7463 ptr_type_node);
7464 f_next_o_limit = build_decl (BUILTINS_LOCATION,
7465 FIELD_DECL,
7466 get_identifier ("__va_next_o_limit"),
7467 ptr_type_node);
7468 f_next_fp = build_decl (BUILTINS_LOCATION,
7469 FIELD_DECL, get_identifier ("__va_next_fp"),
7470 ptr_type_node);
7471 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
7472 FIELD_DECL,
7473 get_identifier ("__va_next_fp_limit"),
7474 ptr_type_node);
7475 f_next_stack = build_decl (BUILTINS_LOCATION,
7476 FIELD_DECL, get_identifier ("__va_next_stack"),
7477 ptr_type_node);
7478
7479 DECL_FIELD_CONTEXT (f_next_o) = record;
7480 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7481 DECL_FIELD_CONTEXT (f_next_fp) = record;
7482 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7483 DECL_FIELD_CONTEXT (f_next_stack) = record;
7484
7485 TYPE_STUB_DECL (record) = type_decl;
7486 TYPE_NAME (record) = type_decl;
7487 TYPE_FIELDS (record) = f_next_o;
7488 DECL_CHAIN (f_next_o) = f_next_o_limit;
7489 DECL_CHAIN (f_next_o_limit) = f_next_fp;
7490 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
7491 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
7492
7493 layout_type (record);
7494
7495 return record;
7496 }
7497
7498 /* Implement `va_start' for varargs and stdarg. */
7499 static void
7500 sh_va_start (tree valist, rtx nextarg)
7501 {
7502 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7503 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7504 tree t, u;
7505 int nfp, nint;
7506
7507 if ((! TARGET_SH2E && ! TARGET_SH4)
7508 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7509 {
7510 std_expand_builtin_va_start (valist, nextarg);
7511 return;
7512 }
7513
7514 f_next_o = TYPE_FIELDS (va_list_type_node);
7515 f_next_o_limit = DECL_CHAIN (f_next_o);
7516 f_next_fp = DECL_CHAIN (f_next_o_limit);
7517 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7518 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7519
7520 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7521 NULL_TREE);
7522 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7523 valist, f_next_o_limit, NULL_TREE);
7524 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7525 NULL_TREE);
7526 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7527 valist, f_next_fp_limit, NULL_TREE);
7528 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7529 valist, f_next_stack, NULL_TREE);
7530
7531 /* Call __builtin_saveregs. */
7532 u = make_tree (sizetype, expand_builtin_saveregs ());
7533 u = fold_convert (ptr_type_node, u);
7534 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
7535 TREE_SIDE_EFFECTS (t) = 1;
7536 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7537
7538 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7539 if (nfp < 8)
7540 nfp = 8 - nfp;
7541 else
7542 nfp = 0;
7543 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nfp);
7544 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
7545 TREE_SIDE_EFFECTS (t) = 1;
7546 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7547
7548 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
7549 TREE_SIDE_EFFECTS (t) = 1;
7550 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7551
7552 nint = crtl->args.info.arg_count[SH_ARG_INT];
7553 if (nint < 4)
7554 nint = 4 - nint;
7555 else
7556 nint = 0;
7557 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nint);
7558 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
7559 TREE_SIDE_EFFECTS (t) = 1;
7560 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7561
7562 u = make_tree (ptr_type_node, nextarg);
7563 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
7564 TREE_SIDE_EFFECTS (t) = 1;
7565 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7566 }
7567
7568 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7569 member, return it. */
7570 static tree
7571 find_sole_member (tree type)
7572 {
7573 tree field, member = NULL_TREE;
7574
7575 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7576 {
7577 if (TREE_CODE (field) != FIELD_DECL)
7578 continue;
7579 if (!DECL_SIZE (field))
7580 return NULL_TREE;
7581 if (integer_zerop (DECL_SIZE (field)))
7582 continue;
7583 if (member)
7584 return NULL_TREE;
7585 member = field;
7586 }
7587 return member;
7588 }
7589
7590 /* Implement `va_arg'. */
7591 static tree
7592 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7593 gimple_seq *post_p ATTRIBUTE_UNUSED)
7594 {
7595 tree tmp;
7596 tree addr, lab_over = NULL, result = NULL;
7597 tree eff_type;
7598
7599 const bool pass_by_ref =
7600 !VOID_TYPE_P (type)
7601 && targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7602
7603 if (pass_by_ref)
7604 type = build_pointer_type (type);
7605
7606 HOST_WIDE_INT size = int_size_in_bytes (type);
7607 HOST_WIDE_INT rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7608 tree pptr_type_node = build_pointer_type (ptr_type_node);
7609
7610 if ((TARGET_SH2E || TARGET_SH4)
7611 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7612 {
7613 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7614 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7615 tree lab_false;
7616 tree member;
7617
7618 f_next_o = TYPE_FIELDS (va_list_type_node);
7619 f_next_o_limit = DECL_CHAIN (f_next_o);
7620 f_next_fp = DECL_CHAIN (f_next_o_limit);
7621 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7622 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7623
7624 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7625 NULL_TREE);
7626 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7627 valist, f_next_o_limit, NULL_TREE);
7628 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7629 valist, f_next_fp, NULL_TREE);
7630 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7631 valist, f_next_fp_limit, NULL_TREE);
7632 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7633 valist, f_next_stack, NULL_TREE);
7634
7635 /* Structures with a single member with a distinct mode are passed
7636 like their member. This is relevant if the latter has a REAL_TYPE
7637 or COMPLEX_TYPE type. */
7638 eff_type = type;
7639 while (TREE_CODE (eff_type) == RECORD_TYPE
7640 && (member = find_sole_member (eff_type))
7641 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7642 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7643 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7644 {
7645 tree field_type = TREE_TYPE (member);
7646
7647 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7648 eff_type = field_type;
7649 else
7650 {
7651 gcc_assert ((TYPE_ALIGN (eff_type)
7652 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7653 || (TYPE_ALIGN (eff_type)
7654 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7655 break;
7656 }
7657 }
7658
7659 bool pass_as_float;
7660 if (TARGET_FPU_DOUBLE)
7661 {
7662 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7663 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7664 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7665 && size <= 16));
7666 }
7667 else
7668 {
7669 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7670 }
7671
7672 addr = create_tmp_var (pptr_type_node);
7673 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7674 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7675
7676 valist = build_simple_mem_ref (addr);
7677
7678 if (pass_as_float)
7679 {
7680 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp));
7681 tree cmp;
7682 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
7683
7684 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
7685 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7686
7687 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
7688 tmp = next_fp_limit;
7689 if (size > 4 && !is_double)
7690 tmp = fold_build_pointer_plus_hwi (unshare_expr (tmp), 4 - size);
7691 tmp = build2 (GE_EXPR, boolean_type_node,
7692 unshare_expr (next_fp_tmp), unshare_expr (tmp));
7693 cmp = build3 (COND_EXPR, void_type_node, tmp,
7694 build1 (GOTO_EXPR, void_type_node,
7695 unshare_expr (lab_false)), NULL_TREE);
7696 if (!is_double)
7697 gimplify_and_add (cmp, pre_p);
7698
7699 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
7700 || (is_double || size == 16))
7701 {
7702 tmp = fold_convert (sizetype, next_fp_tmp);
7703 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
7704 size_int (UNITS_PER_WORD));
7705 tmp = fold_build_pointer_plus (unshare_expr (next_fp_tmp), tmp);
7706 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
7707 }
7708 if (is_double)
7709 gimplify_and_add (cmp, pre_p);
7710
7711 #ifdef FUNCTION_ARG_SCmode_WART
7712 if (TYPE_MODE (eff_type) == SCmode
7713 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
7714 {
7715 tree subtype = TREE_TYPE (eff_type);
7716 tree real, imag;
7717
7718 imag
7719 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7720 imag = get_initialized_tmp_var (imag, pre_p, NULL);
7721
7722 real
7723 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7724 real = get_initialized_tmp_var (real, pre_p, NULL);
7725
7726 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
7727 if (type != eff_type)
7728 result = build1 (VIEW_CONVERT_EXPR, type, result);
7729 result = get_initialized_tmp_var (result, pre_p, NULL);
7730 }
7731 #endif /* FUNCTION_ARG_SCmode_WART */
7732
7733 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7734 gimplify_and_add (tmp, pre_p);
7735
7736 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7737 gimplify_and_add (tmp, pre_p);
7738
7739 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7740 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7741 gimplify_assign (unshare_expr (next_fp_tmp),
7742 unshare_expr (valist), pre_p);
7743
7744 gimplify_assign (unshare_expr (valist),
7745 unshare_expr (next_fp_tmp), post_p);
7746 valist = next_fp_tmp;
7747 }
7748 else
7749 {
7750 tmp = fold_build_pointer_plus_hwi (unshare_expr (next_o), rsize);
7751 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
7752 unshare_expr (next_o_limit));
7753 tmp = build3 (COND_EXPR, void_type_node, tmp,
7754 build1 (GOTO_EXPR, void_type_node,
7755 unshare_expr (lab_false)),
7756 NULL_TREE);
7757 gimplify_and_add (tmp, pre_p);
7758
7759 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
7760 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7761
7762 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7763 gimplify_and_add (tmp, pre_p);
7764
7765 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7766 gimplify_and_add (tmp, pre_p);
7767
7768 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
7769 gimplify_assign (unshare_expr (next_o),
7770 unshare_expr (next_o_limit), pre_p);
7771
7772 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7773 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7774 }
7775
7776 if (!result)
7777 {
7778 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7779 gimplify_and_add (tmp, pre_p);
7780 }
7781 }
7782
7783 /* ??? In va-sh.h, there had been code to make values larger than
7784 size 8 indirect. This does not match the FUNCTION_ARG macros. */
7785
7786 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
7787 if (result)
7788 {
7789 gimplify_assign (result, tmp, pre_p);
7790 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
7791 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7792 gimplify_and_add (tmp, pre_p);
7793 }
7794 else
7795 result = tmp;
7796
7797 if (pass_by_ref)
7798 result = build_va_arg_indirect_ref (result);
7799
7800 return result;
7801 }
7802
7803 /* 64 bit floating points memory transfers are paired single precision loads
7804 or store. So DWARF information needs fixing in little endian (unless
7805 PR=SZ=1 in FPSCR). */
7806 rtx
7807 sh_dwarf_register_span (rtx reg)
7808 {
7809 unsigned regno = REGNO (reg);
7810
7811 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
7812 return NULL_RTX;
7813
7814 return
7815 gen_rtx_PARALLEL (VOIDmode,
7816 gen_rtvec (2,
7817 gen_rtx_REG (SFmode, regno + 1),
7818 gen_rtx_REG (SFmode, regno)));
7819 }
7820
7821 static machine_mode
7822 sh_promote_function_mode (const_tree type, machine_mode mode,
7823 int *punsignedp, const_tree funtype,
7824 int for_return)
7825 {
7826 if (sh_promote_prototypes (funtype))
7827 return promote_mode (type, mode, punsignedp);
7828 else
7829 return default_promote_function_mode (type, mode, punsignedp, funtype,
7830 for_return);
7831 }
7832
7833 static bool
7834 sh_promote_prototypes (const_tree type)
7835 {
7836 if (TARGET_HITACHI)
7837 return false;
7838 if (! type)
7839 return true;
7840 return ! sh_attr_renesas_p (type);
7841 }
7842
7843 static bool
7844 sh_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
7845 const_tree type, bool named ATTRIBUTE_UNUSED)
7846 {
7847 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7848
7849 if (targetm.calls.must_pass_in_stack (mode, type))
7850 return true;
7851
7852 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
7853 wants to know about pass-by-reference semantics for incoming
7854 arguments. */
7855 if (! cum)
7856 return false;
7857
7858 return false;
7859 }
7860
7861 static bool
7862 sh_callee_copies (cumulative_args_t cum, machine_mode mode,
7863 const_tree type, bool named ATTRIBUTE_UNUSED)
7864 {
7865 /* ??? How can it possibly be correct to return true only on the
7866 caller side of the equation? Is there someplace else in the
7867 sh backend that's magically producing the copies? */
7868 return (get_cumulative_args (cum)->outgoing
7869 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
7870 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
7871 }
7872
7873 static sh_arg_class
7874 get_sh_arg_class (machine_mode mode)
7875 {
7876 if (TARGET_FPU_ANY && mode == SFmode)
7877 return SH_ARG_FLOAT;
7878
7879 if (TARGET_FPU_DOUBLE
7880 && (GET_MODE_CLASS (mode) == MODE_FLOAT
7881 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT))
7882 return SH_ARG_FLOAT;
7883
7884 return SH_ARG_INT;
7885 }
7886
7887 /* Round a register number up to a proper boundary for an arg of mode
7888 MODE.
7889 The SH doesn't care about double alignment, so we only
7890 round doubles to even regs when asked to explicitly. */
7891 static int
7892 sh_round_reg (const CUMULATIVE_ARGS& cum, machine_mode mode)
7893 {
7894 /* FIXME: This used to be a macro and has been copy pasted into this
7895 function as is. Make this more readable. */
7896 return
7897 (((TARGET_ALIGN_DOUBLE
7898 || (TARGET_FPU_DOUBLE
7899 && (mode == DFmode || mode == DCmode)
7900 && cum.arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (mode)))
7901 && GET_MODE_UNIT_SIZE (mode) > UNITS_PER_WORD)
7902 ? (cum.arg_count[(int) get_sh_arg_class (mode)]
7903 + (cum.arg_count[(int) get_sh_arg_class (mode)] & 1))
7904 : cum.arg_count[(int) get_sh_arg_class (mode)]);
7905 }
7906
7907 /* Return true if arg of the specified mode should be passed in a register
7908 or false otherwise. */
7909 static bool
7910 sh_pass_in_reg_p (const CUMULATIVE_ARGS& cum, machine_mode mode,
7911 const_tree type)
7912 {
7913 /* FIXME: This used to be a macro and has been copy pasted into this
7914 function as is. Make this more readable. */
7915 return
7916 ((type == 0
7917 || (! TREE_ADDRESSABLE (type)
7918 && (! (TARGET_HITACHI || cum.renesas_abi)
7919 || ! (AGGREGATE_TYPE_P (type)
7920 || (!TARGET_FPU_ANY
7921 && (GET_MODE_CLASS (mode) == MODE_FLOAT
7922 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SFmode)))))))
7923 && ! cum.force_mem
7924 && (TARGET_SH2E
7925 ? ((mode) == BLKmode
7926 ? ((cum.arg_count[(int) SH_ARG_INT] * UNITS_PER_WORD
7927 + int_size_in_bytes (type))
7928 <= NPARM_REGS (SImode) * UNITS_PER_WORD)
7929 : ((sh_round_reg (cum, mode)
7930 + HARD_REGNO_NREGS (BASE_ARG_REG (mode), mode))
7931 <= NPARM_REGS (mode)))
7932 : sh_round_reg (cum, mode) < NPARM_REGS (mode)));
7933 }
7934
7935 static int
7936 sh_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
7937 tree type, bool named ATTRIBUTE_UNUSED)
7938 {
7939 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7940 int words = 0;
7941
7942 if (sh_pass_in_reg_p (*cum, mode, type)
7943 && !TARGET_FPU_DOUBLE
7944 && (sh_round_reg (*cum, mode)
7945 + (mode != BLKmode
7946 ? CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)
7947 : CEIL (int_size_in_bytes (type), UNITS_PER_WORD))
7948 > NPARM_REGS (mode)))
7949 words = NPARM_REGS (mode) - sh_round_reg (*cum, mode);
7950
7951 return words * UNITS_PER_WORD;
7952 }
7953
7954
7955 /* Define where to put the arguments to a function.
7956 Value is zero to push the argument on the stack,
7957 or a hard register in which to store the argument.
7958
7959 MODE is the argument's machine mode.
7960 TYPE is the data type of the argument (as a tree).
7961 This is null for libcalls where that information may
7962 not be available.
7963 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7964 the preceding args and about the function being called.
7965 NAMED is nonzero if this argument is a named parameter
7966 (otherwise it is an extra parameter matching an ellipsis).
7967
7968 On SH the first args are normally in registers
7969 and the rest are pushed. Any arg that starts within the first
7970 NPARM_REGS words is at least partially passed in a register unless
7971 its data type forbids. */
7972 static rtx
7973 sh_function_arg (cumulative_args_t ca_v, machine_mode mode,
7974 const_tree type, bool named)
7975 {
7976 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
7977
7978 if (mode == VOIDmode)
7979 return ca->renesas_abi ? const1_rtx : const0_rtx;
7980
7981 if (sh_pass_in_reg_p (*ca, mode, type)
7982 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
7983 {
7984 int regno;
7985
7986 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
7987 && (! FUNCTION_ARG_SCmode_WART || (sh_round_reg (*ca, mode) & 1)))
7988 {
7989 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
7990 gen_rtx_REG (SFmode,
7991 BASE_ARG_REG (mode)
7992 + (sh_round_reg (*ca, mode) ^ 1)),
7993 const0_rtx);
7994 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
7995 gen_rtx_REG (SFmode,
7996 BASE_ARG_REG (mode)
7997 + ((sh_round_reg (*ca, mode) + 1) ^ 1)),
7998 GEN_INT (4));
7999 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8000 }
8001
8002 /* If the alignment of a DF value causes an SF register to be
8003 skipped, we will use that skipped register for the next SF
8004 value. */
8005 if ((TARGET_HITACHI || ca->renesas_abi)
8006 && ca->free_single_fp_reg
8007 && mode == SFmode)
8008 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8009
8010 regno = (BASE_ARG_REG (mode) + sh_round_reg (*ca, mode))
8011 ^ (mode == SFmode && TARGET_SH4
8012 && TARGET_LITTLE_ENDIAN
8013 && ! TARGET_HITACHI && ! ca->renesas_abi);
8014 return gen_rtx_REG (mode, regno);
8015
8016 }
8017
8018 return NULL_RTX;
8019 }
8020
8021 /* Update the data in CUM to advance over an argument
8022 of mode MODE and data type TYPE.
8023 (TYPE is null for libcalls where that information may not be
8024 available.) */
8025 static void
8026 sh_function_arg_advance (cumulative_args_t ca_v, machine_mode mode,
8027 const_tree type, bool named ATTRIBUTE_UNUSED)
8028 {
8029 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8030
8031 if (ca->force_mem)
8032 ca->force_mem = false;
8033
8034 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
8035 {
8036 /* Note that we've used the skipped register. */
8037 if (mode == SFmode && ca->free_single_fp_reg)
8038 {
8039 ca->free_single_fp_reg = 0;
8040 return;
8041 }
8042 /* When we have a DF after an SF, there's an SF register that get
8043 skipped in order to align the DF value. We note this skipped
8044 register, because the next SF value will use it, and not the
8045 SF that follows the DF. */
8046 if (mode == DFmode
8047 && sh_round_reg (*ca, DFmode) != sh_round_reg (*ca, SFmode))
8048 {
8049 ca->free_single_fp_reg = (sh_round_reg (*ca, SFmode)
8050 + BASE_ARG_REG (mode));
8051 }
8052 }
8053
8054 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
8055 || sh_pass_in_reg_p (*ca, mode, type))
8056 (ca->arg_count[(int) get_sh_arg_class (mode)]
8057 = (sh_round_reg (*ca, mode)
8058 + (mode == BLKmode
8059 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
8060 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD))));
8061 }
8062
8063 /* The Renesas calling convention doesn't quite fit into this scheme since
8064 the address is passed like an invisible argument, but one that is always
8065 passed in memory. */
8066 static rtx
8067 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
8068 {
8069 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8070 return NULL_RTX;
8071 return gen_rtx_REG (Pmode, 2);
8072 }
8073
8074 /* Worker function for TARGET_FUNCTION_VALUE.
8075
8076 For the SH, this is like LIBCALL_VALUE, except that we must change the
8077 mode like PROMOTE_MODE does.
8078 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
8079 tested here has to be kept in sync with the one in
8080 explow.c:promote_mode. */
8081 static rtx
8082 sh_function_value (const_tree valtype,
8083 const_tree fn_decl_or_type,
8084 bool outgoing ATTRIBUTE_UNUSED)
8085 {
8086 if (fn_decl_or_type
8087 && !DECL_P (fn_decl_or_type))
8088 fn_decl_or_type = NULL;
8089
8090 return gen_rtx_REG (
8091 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
8092 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
8093 && (TREE_CODE (valtype) == INTEGER_TYPE
8094 || TREE_CODE (valtype) == ENUMERAL_TYPE
8095 || TREE_CODE (valtype) == BOOLEAN_TYPE
8096 || TREE_CODE (valtype) == REAL_TYPE
8097 || TREE_CODE (valtype) == OFFSET_TYPE))
8098 && sh_promote_prototypes (fn_decl_or_type)
8099 ? SImode : TYPE_MODE (valtype)),
8100 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
8101 }
8102
8103 /* Worker function for TARGET_LIBCALL_VALUE. */
8104 static rtx
8105 sh_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
8106 {
8107 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
8108 }
8109
8110 /* Return true if N is a possible register number of function value. */
8111 static bool
8112 sh_function_value_regno_p (const unsigned int regno)
8113 {
8114 return regno == FIRST_RET_REG || (TARGET_SH2E && regno == FIRST_FP_RET_REG);
8115 }
8116
8117 /* Worker function for TARGET_RETURN_IN_MEMORY. */
8118 static bool
8119 sh_return_in_memory (const_tree type, const_tree fndecl)
8120 {
8121 return TYPE_MODE (type) == BLKmode
8122 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8123 && TREE_CODE (type) == RECORD_TYPE);
8124 }
8125
8126 /* We actually emit the code in sh_expand_prologue. We used to use
8127 a static variable to flag that we need to emit this code, but that
8128 doesn't when inlining, when functions are deferred and then emitted
8129 later. Fortunately, we already have two flags that are part of struct
8130 function that tell if a function uses varargs or stdarg. */
8131 static void
8132 sh_setup_incoming_varargs (cumulative_args_t ca,
8133 machine_mode mode,
8134 tree type,
8135 int *pretend_arg_size,
8136 int second_time ATTRIBUTE_UNUSED)
8137 {
8138 gcc_assert (cfun->stdarg);
8139 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
8140 {
8141 int named_parm_regs, anon_parm_regs;
8142
8143 named_parm_regs = (sh_round_reg (*get_cumulative_args (ca), mode)
8144 + (mode == BLKmode
8145 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
8146 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)));
8147 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
8148 if (anon_parm_regs > 0)
8149 *pretend_arg_size = anon_parm_regs * 4;
8150 }
8151 }
8152
8153 static bool
8154 sh_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
8155 {
8156 return false;
8157 }
8158
8159 static bool
8160 sh_pretend_outgoing_varargs_named (cumulative_args_t ca_v)
8161 {
8162 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8163
8164 return ! (TARGET_HITACHI || ca->renesas_abi);
8165 }
8166
8167
8168 /* Define the offset between two registers, one to be eliminated, and
8169 the other its replacement, at the start of a routine. */
8170 int
8171 initial_elimination_offset (int from, int to)
8172 {
8173 const int regs_saved_rounding = 0;
8174 int save_flags = target_flags;
8175 HARD_REG_SET live_regs_mask;
8176
8177 int regs_saved = calc_live_regs (&live_regs_mask);
8178
8179 int total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
8180 target_flags = save_flags;
8181
8182 int total_saved_regs_space = regs_saved + regs_saved_rounding;
8183
8184 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8185 return total_saved_regs_space + total_auto_space;
8186
8187 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8188 return total_saved_regs_space + total_auto_space;
8189
8190 /* Initial gap between fp and sp is 0. */
8191 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8192 return 0;
8193
8194 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8195 return rounded_frame_size (0);
8196
8197 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8198 return rounded_frame_size (0);
8199
8200 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
8201 && (to == HARD_FRAME_POINTER_REGNUM
8202 || to == STACK_POINTER_REGNUM));
8203 return total_auto_space;
8204 }
8205
8206 /* Parse the -mfixed-range= option string. */
8207 void
8208 sh_fix_range (const char *const_str)
8209 {
8210 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
8211 REG2 are either register names or register numbers. The effect
8212 of this option is to mark the registers in the range from REG1 to
8213 REG2 as ``fixed'' so they won't be used by the compiler. */
8214
8215 char* str = strcpy ((char*)alloca (strlen (const_str) + 1), const_str);
8216
8217 while (1)
8218 {
8219 char* dash = strchr (str, '-');
8220 if (!dash)
8221 {
8222 warning (0, "value of -mfixed-range must have form REG1-REG2");
8223 return;
8224 }
8225 *dash = '\0';
8226 char* comma = strchr (dash + 1, ',');
8227 if (comma)
8228 *comma = '\0';
8229
8230 int first = decode_reg_name (str);
8231 if (first < 0)
8232 {
8233 warning (0, "unknown register name: %s", str);
8234 return;
8235 }
8236
8237 int last = decode_reg_name (dash + 1);
8238 if (last < 0)
8239 {
8240 warning (0, "unknown register name: %s", dash + 1);
8241 return;
8242 }
8243
8244 *dash = '-';
8245
8246 if (first > last)
8247 {
8248 warning (0, "%s-%s is an empty range", str, dash + 1);
8249 return;
8250 }
8251
8252 for (int i = first; i <= last; ++i)
8253 fixed_regs[i] = call_used_regs[i] = 1;
8254
8255 if (!comma)
8256 break;
8257
8258 *comma = ',';
8259 str = comma + 1;
8260 }
8261 }
8262 \f
8263 /* Insert any deferred function attributes from earlier pragmas. */
8264 static void
8265 sh_insert_attributes (tree node, tree *attributes)
8266 {
8267 if (TREE_CODE (node) != FUNCTION_DECL)
8268 return;
8269
8270 /* We are only interested in fields. */
8271 if (!DECL_P (node))
8272 return;
8273
8274 /* Append the attributes to the deferred attributes. */
8275 *sh_deferred_function_attributes_tail = *attributes;
8276 tree attrs = sh_deferred_function_attributes;
8277 if (!attrs)
8278 return;
8279
8280 /* Some attributes imply or require the interrupt attribute. */
8281 if (!lookup_attribute ("interrupt_handler", attrs)
8282 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8283 {
8284 /* If we have a trapa_handler, but no interrupt_handler attribute,
8285 insert an interrupt_handler attribute. */
8286 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8287 /* We can't use sh_pr_interrupt here because that's not in the
8288 java frontend. */
8289 attrs
8290 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8291 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8292 if the interrupt attribute is missing, we ignore the attribute
8293 and warn. */
8294 else if (lookup_attribute ("sp_switch", attrs)
8295 || lookup_attribute ("trap_exit", attrs)
8296 || lookup_attribute ("nosave_low_regs", attrs)
8297 || lookup_attribute ("resbank", attrs))
8298 {
8299 tree *tail;
8300
8301 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8302 {
8303 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8304 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8305 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8306 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8307 warning (OPT_Wattributes,
8308 "%qE attribute only applies to interrupt functions",
8309 TREE_PURPOSE (attrs));
8310 else
8311 {
8312 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8313 NULL_TREE);
8314 tail = &TREE_CHAIN (*tail);
8315 }
8316 }
8317 attrs = *attributes;
8318 }
8319 }
8320
8321 /* Install the processed list. */
8322 *attributes = attrs;
8323
8324 /* Clear deferred attributes. */
8325 sh_deferred_function_attributes = NULL_TREE;
8326 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8327
8328 return;
8329 }
8330
8331 /*------------------------------------------------------------------------------
8332 Target specific attributes
8333 Supported attributes are:
8334
8335 * interrupt_handler
8336 Specifies this function is an interrupt handler.
8337
8338 * trapa_handler
8339 Like interrupt_handler, but don't save all registers.
8340
8341 * sp_switch
8342 Specifies an alternate stack for an interrupt handler to run on.
8343
8344 * trap_exit
8345 Use a trapa to exit an interrupt function instead of rte.
8346
8347 * nosave_low_regs
8348 Don't save r0..r7 in an interrupt handler function.
8349 This is useful on SH3* and SH4*, which have a separate set of low
8350 regs for user and privileged modes.
8351 This is mainly to be used for non-reentrant interrupt handlers (i.e.
8352 those that run with interrupts disabled and thus can't be
8353 interrupted thenselves).
8354
8355 * renesas
8356 Use Renesas calling/layout conventions (functions and structures).
8357
8358 * resbank
8359 In case of an interrupt handler function, use a register bank to
8360 save registers R0-R14, MACH, MACL, GBR and PR.
8361 This is available only on SH2A targets.
8362
8363 * function_vector
8364 Declares a function to be called using the TBR relative addressing
8365 mode. Takes an argument that specifies the slot number in the table
8366 where this function can be looked up by the JSR/N @@(disp8,TBR) insn.
8367 */
8368
8369 /* Handle a 'resbank' attribute. */
8370 static tree
8371 sh_handle_resbank_handler_attribute (tree * node, tree name,
8372 tree args ATTRIBUTE_UNUSED,
8373 int flags ATTRIBUTE_UNUSED,
8374 bool * no_add_attrs)
8375 {
8376 if (!TARGET_SH2A)
8377 {
8378 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
8379 name);
8380 *no_add_attrs = true;
8381 }
8382 if (TREE_CODE (*node) != FUNCTION_DECL)
8383 {
8384 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8385 name);
8386 *no_add_attrs = true;
8387 }
8388
8389 return NULL_TREE;
8390 }
8391
8392 /* Handle an "interrupt_handler" attribute; arguments as in
8393 struct attribute_spec.handler. */
8394 static tree
8395 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8396 tree args ATTRIBUTE_UNUSED,
8397 int flags ATTRIBUTE_UNUSED,
8398 bool *no_add_attrs)
8399 {
8400 if (TREE_CODE (*node) != FUNCTION_DECL)
8401 {
8402 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8403 name);
8404 *no_add_attrs = true;
8405 }
8406
8407 return NULL_TREE;
8408 }
8409
8410 /* Handle an 'function_vector' attribute; arguments as in
8411 struct attribute_spec.handler. */
8412 static tree
8413 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
8414 tree args ATTRIBUTE_UNUSED,
8415 int flags ATTRIBUTE_UNUSED,
8416 bool * no_add_attrs)
8417 {
8418 if (!TARGET_SH2A)
8419 {
8420 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
8421 name);
8422 *no_add_attrs = true;
8423 }
8424 else if (TREE_CODE (*node) != FUNCTION_DECL)
8425 {
8426 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8427 name);
8428 *no_add_attrs = true;
8429 }
8430 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8431 {
8432 /* The argument must be a constant integer. */
8433 warning (OPT_Wattributes,
8434 "%qE attribute argument not an integer constant",
8435 name);
8436 *no_add_attrs = true;
8437 }
8438 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
8439 {
8440 /* The argument value must be between 0 to 255. */
8441 warning (OPT_Wattributes,
8442 "%qE attribute argument should be between 0 to 255",
8443 name);
8444 *no_add_attrs = true;
8445 }
8446 return NULL_TREE;
8447 }
8448
8449 /* Returns true if current function has been assigned the attribute
8450 'function_vector'. */
8451 bool
8452 sh2a_is_function_vector_call (rtx x)
8453 {
8454 if (GET_CODE (x) == SYMBOL_REF
8455 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8456 {
8457 tree tr = SYMBOL_REF_DECL (x);
8458
8459 if (sh2a_function_vector_p (tr))
8460 return true;
8461 }
8462
8463 return false;
8464 }
8465
8466 /* Returns the function vector number, if the attribute
8467 'function_vector' is assigned, otherwise returns zero. */
8468 int
8469 sh2a_get_function_vector_number (rtx x)
8470 {
8471 if ((GET_CODE (x) == SYMBOL_REF)
8472 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8473 {
8474 tree t = SYMBOL_REF_DECL (x);
8475
8476 if (TREE_CODE (t) != FUNCTION_DECL)
8477 return 0;
8478
8479 for (tree list = SH_ATTRIBUTES (t); list; list = TREE_CHAIN (list))
8480 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8481 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
8482
8483 return 0;
8484 }
8485 else
8486 return 0;
8487 }
8488
8489 /* Handle an "sp_switch" attribute; arguments as in
8490 struct attribute_spec.handler. */
8491 static tree
8492 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
8493 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8494 {
8495 if (TREE_CODE (*node) != FUNCTION_DECL)
8496 {
8497 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8498 name);
8499 *no_add_attrs = true;
8500 }
8501 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
8502 {
8503 /* The argument must be a constant string. */
8504 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
8505 name);
8506 *no_add_attrs = true;
8507 }
8508
8509 return NULL_TREE;
8510 }
8511
8512 /* Handle an "trap_exit" attribute; arguments as in
8513 struct attribute_spec.handler. */
8514 static tree
8515 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
8516 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8517 {
8518 if (TREE_CODE (*node) != FUNCTION_DECL)
8519 {
8520 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8521 name);
8522 *no_add_attrs = true;
8523 }
8524 /* The argument specifies a trap number to be used in a trapa instruction
8525 at function exit (instead of an rte instruction). */
8526 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8527 {
8528 /* The argument must be a constant integer. */
8529 warning (OPT_Wattributes, "%qE attribute argument not an "
8530 "integer constant", name);
8531 *no_add_attrs = true;
8532 }
8533
8534 return NULL_TREE;
8535 }
8536
8537 static tree
8538 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
8539 tree name ATTRIBUTE_UNUSED,
8540 tree args ATTRIBUTE_UNUSED,
8541 int flags ATTRIBUTE_UNUSED,
8542 bool *no_add_attrs ATTRIBUTE_UNUSED)
8543 {
8544 return NULL_TREE;
8545 }
8546
8547 /* True if __attribute__((renesas)) or -mrenesas. */
8548 bool
8549 sh_attr_renesas_p (const_tree td)
8550 {
8551 if (TARGET_HITACHI)
8552 return true;
8553 if (td == NULL_TREE)
8554 return false;
8555 if (DECL_P (td))
8556 td = TREE_TYPE (td);
8557 if (td == error_mark_node)
8558 return false;
8559 return lookup_attribute ("renesas", TYPE_ATTRIBUTES (td)) != NULL_TREE;
8560 }
8561
8562 /* True if __attribute__((renesas)) or -mrenesas, for the current
8563 function. */
8564 bool
8565 sh_cfun_attr_renesas_p (void)
8566 {
8567 return sh_attr_renesas_p (current_function_decl);
8568 }
8569
8570 /* Returns true if the current function has the "interrupt_handler"
8571 attribute set. */
8572 bool
8573 sh_cfun_interrupt_handler_p (void)
8574 {
8575 return (lookup_attribute ("interrupt_handler",
8576 DECL_ATTRIBUTES (current_function_decl))
8577 != NULL_TREE);
8578 }
8579
8580 /* Returns true if FUNC has been assigned the attribute
8581 "function_vector". */
8582 bool
8583 sh2a_function_vector_p (tree func)
8584 {
8585 if (TREE_CODE (func) != FUNCTION_DECL)
8586 return false;
8587
8588 for (tree list = SH_ATTRIBUTES (func); list; list = TREE_CHAIN (list))
8589 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8590 return true;
8591
8592 return false;
8593 }
8594
8595 /* Returns true if given tree has the "resbank" attribute set. */
8596 bool
8597 sh_cfun_resbank_handler_p (void)
8598 {
8599 return ((lookup_attribute ("resbank",
8600 DECL_ATTRIBUTES (current_function_decl))
8601 != NULL_TREE)
8602 && (lookup_attribute ("interrupt_handler",
8603 DECL_ATTRIBUTES (current_function_decl))
8604 != NULL_TREE) && TARGET_SH2A);
8605 }
8606
8607 /* Returns true if the current function has a "trap_exit" attribute set. */
8608 bool
8609 sh_cfun_trap_exit_p (void)
8610 {
8611 return lookup_attribute ("trap_exit", DECL_ATTRIBUTES (current_function_decl))
8612 != NULL_TREE;
8613 }
8614
8615 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
8616 static const char *
8617 sh_check_pch_target_flags (int old_flags)
8618 {
8619 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
8620 | MASK_SH_E | MASK_HARD_SH4
8621 | MASK_FPU_SINGLE | MASK_SH4))
8622 return _("created and used with different architectures / ABIs");
8623 if ((old_flags ^ target_flags) & MASK_HITACHI)
8624 return _("created and used with different ABIs");
8625 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
8626 return _("created and used with different endianness");
8627 return NULL;
8628 }
8629 \f
8630 /* Predicates used by the templates. */
8631
8632 /* Returns true if OP is MACL, MACH or PR. The input must be a REG rtx.
8633 Used only in general_movsrc_operand. */
8634 bool
8635 system_reg_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED)
8636 {
8637 switch (REGNO (op))
8638 {
8639 case PR_REG:
8640 case MACL_REG:
8641 case MACH_REG:
8642 return true;
8643 }
8644 return false;
8645 }
8646
8647 /* Returns true if OP is a floating point value with value 0.0. */
8648 bool
8649 fp_zero_operand (rtx op)
8650 {
8651 if (GET_MODE (op) != SFmode)
8652 return false;
8653
8654 const REAL_VALUE_TYPE* r = CONST_DOUBLE_REAL_VALUE (op);
8655 return real_equal (r, &dconst0) && ! REAL_VALUE_MINUS_ZERO (*r);
8656 }
8657
8658 /* Returns true if OP is a floating point value with value 1.0. */
8659 bool
8660 fp_one_operand (rtx op)
8661 {
8662 if (GET_MODE (op) != SFmode)
8663 return false;
8664
8665 return real_equal (CONST_DOUBLE_REAL_VALUE (op), &dconst1);
8666 }
8667
8668 /* Return the TLS type for TLS symbols. */
8669 enum tls_model
8670 tls_symbolic_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED)
8671 {
8672 if (GET_CODE (op) != SYMBOL_REF)
8673 return TLS_MODEL_NONE;
8674 return SYMBOL_REF_TLS_MODEL (op);
8675 }
8676 \f
8677 /* Return the destination address of a branch. */
8678 static int
8679 branch_dest (rtx branch)
8680 {
8681 rtx dest = SET_SRC (PATTERN (branch));
8682
8683 if (GET_CODE (dest) == IF_THEN_ELSE)
8684 dest = XEXP (dest, 1);
8685
8686 return INSN_ADDRESSES (INSN_UID (XEXP (dest, 0)));
8687 }
8688 \f
8689 /* Return nonzero if REG is not used after INSN.
8690 We assume REG is a reload reg, and therefore does
8691 not live past labels. It may live past calls or jumps though. */
8692 bool
8693 reg_unused_after (rtx reg, rtx_insn *insn)
8694 {
8695 /* If the reg is set by this instruction, then it is safe for our
8696 case. Disregard the case where this is a store to memory, since
8697 we are checking a register used in the store address. */
8698 rtx set = single_set (insn);
8699 if (set && !MEM_P (SET_DEST (set))
8700 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8701 return true;
8702
8703 while ((insn = NEXT_INSN (insn)))
8704 {
8705 if (!INSN_P (insn))
8706 continue;
8707
8708 rtx_code code = GET_CODE (insn);
8709
8710 #if 0
8711 /* If this is a label that existed before reload, then the register
8712 is dead here. However, if this is a label added by reorg, then
8713 the register may still be live here. We can't tell the difference,
8714 so we just ignore labels completely. */
8715 if (code == CODE_LABEL)
8716 return 1;
8717 /* else */
8718 #endif
8719
8720 if (code == JUMP_INSN)
8721 return false;
8722
8723 /* If this is a sequence, we must handle them all at once.
8724 We could have for instance a call that sets the target register,
8725 and an insn in a delay slot that uses the register. In this case,
8726 we must return 0. */
8727 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
8728 {
8729 rtx_sequence *seq = as_a <rtx_sequence *> (PATTERN (insn));
8730 bool retval = false;
8731
8732 for (int i = 0; i < seq->len (); i++)
8733 {
8734 rtx_insn *this_insn = seq->insn (i);
8735 rtx set = single_set (this_insn);
8736
8737 if (CALL_P (this_insn))
8738 code = CALL_INSN;
8739 else if (JUMP_P (this_insn))
8740 {
8741 if (INSN_ANNULLED_BRANCH_P (this_insn))
8742 return false;
8743 code = JUMP_INSN;
8744 }
8745
8746 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8747 return false;
8748 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8749 {
8750 if (!MEM_P (SET_DEST (set)))
8751 retval = true;
8752 else
8753 return false;
8754 }
8755 if (set == NULL_RTX
8756 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
8757 return false;
8758 }
8759 if (retval)
8760 return true;
8761 else if (code == JUMP_INSN)
8762 return false;
8763 }
8764
8765 rtx set = single_set (insn);
8766 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8767 return false;
8768 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8769 return !MEM_P (SET_DEST (set));
8770 if (set == NULL && reg_overlap_mentioned_p (reg, PATTERN (insn)))
8771 return false;
8772
8773 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
8774 return true;
8775 }
8776 return true;
8777 }
8778 \f
8779
8780 static GTY(()) rtx t_reg_rtx;
8781 rtx
8782 get_t_reg_rtx (void)
8783 {
8784 if (! t_reg_rtx)
8785 t_reg_rtx = gen_rtx_REG (SImode, T_REG);
8786 return t_reg_rtx;
8787 }
8788
8789 static GTY(()) tree fpscr_values;
8790
8791 static void
8792 emit_fpu_switch (rtx scratch, int index)
8793 {
8794 if (fpscr_values == NULL)
8795 {
8796 tree t = build_index_type (integer_one_node);
8797 t = build_array_type (integer_type_node, t);
8798 t = build_decl (BUILTINS_LOCATION,
8799 VAR_DECL, get_identifier ("__fpscr_values"), t);
8800 DECL_ARTIFICIAL (t) = 1;
8801 DECL_IGNORED_P (t) = 1;
8802 DECL_EXTERNAL (t) = 1;
8803 TREE_STATIC (t) = 1;
8804 TREE_PUBLIC (t) = 1;
8805 TREE_USED (t) = 1;
8806
8807 fpscr_values = t;
8808 }
8809
8810 rtx src = DECL_RTL (fpscr_values);
8811 if (!can_create_pseudo_p ())
8812 {
8813 emit_move_insn (scratch, XEXP (src, 0));
8814 if (index != 0)
8815 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
8816 src = adjust_automodify_address (src, SImode, scratch, index * 4);
8817 }
8818 else
8819 src = adjust_address (src, SImode, index * 4);
8820
8821 emit_insn (gen_lds_fpscr (src));
8822 }
8823 \f
8824 static rtx get_free_reg (HARD_REG_SET);
8825
8826 /* This function returns a register to use to load the address to load
8827 the fpscr from. Currently it always returns r1 or r7, but when we are
8828 able to use pseudo registers after combine, or have a better mechanism
8829 for choosing a register, it should be done here. */
8830 /* REGS_LIVE is the liveness information for the point for which we
8831 need this allocation. In some bare-bones exit blocks, r1 is live at the
8832 start. We can even have all of r0..r3 being live:
8833 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8834 INSN before which new insns are placed with will clobber the register
8835 we return. If a basic block consists only of setting the return value
8836 register to a pseudo and using that register, the return value is not
8837 live before or after this block, yet we we'll insert our insns right in
8838 the middle. */
8839 static rtx
8840 get_free_reg (HARD_REG_SET regs_live)
8841 {
8842 if (! TEST_HARD_REG_BIT (regs_live, 1))
8843 return gen_rtx_REG (Pmode, 1);
8844
8845 /* Hard reg 1 is live; since this is a small register classes target,
8846 there shouldn't be anything but a jump before the function end. */
8847 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
8848 return gen_rtx_REG (Pmode, 7);
8849 }
8850
8851 /* This function will set the fpscr from memory.
8852 MODE is the mode we are setting it to. */
8853 void
8854 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8855 {
8856 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
8857 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
8858
8859 rtx addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
8860 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
8861 }
8862
8863 /* Is the given character a logical line separator for the assembler? */
8864 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8865 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
8866 #endif
8867
8868 static bool
8869 sequence_insn_p (rtx_insn *insn)
8870 {
8871 rtx_insn* prev = PREV_INSN (insn);
8872 if (prev == NULL)
8873 return false;
8874
8875 rtx_insn* next = NEXT_INSN (prev);
8876 if (next == NULL)
8877 return false;
8878
8879 return INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE;
8880 }
8881
8882 int
8883 sh_insn_length_adjustment (rtx_insn *insn)
8884 {
8885 /* Instructions with unfilled delay slots take up an extra two bytes for
8886 the nop in the delay slot. */
8887 if (((NONJUMP_INSN_P (insn)
8888 && GET_CODE (PATTERN (insn)) != USE
8889 && GET_CODE (PATTERN (insn)) != CLOBBER)
8890 || CALL_P (insn) || JUMP_P (insn))
8891 && ! sequence_insn_p (insn)
8892 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8893 return 2;
8894
8895 /* Increase the insn length of a cbranch without a delay slot insn to
8896 force a delay slot which will be stuffed with a nop. */
8897 if (TARGET_CBRANCH_FORCE_DELAY_SLOT && TARGET_SH2
8898 && JUMP_P (insn) && get_attr_type (insn) == TYPE_CBRANCH
8899 && ! sequence_insn_p (insn))
8900 return 2;
8901
8902 /* sh-dsp parallel processing insn take four bytes instead of two. */
8903
8904 if (NONJUMP_INSN_P (insn))
8905 {
8906 int sum = 0;
8907 rtx body = PATTERN (insn);
8908 const char *templ;
8909 char c;
8910 bool maybe_label = true;
8911
8912 if (GET_CODE (body) == ASM_INPUT)
8913 templ = XSTR (body, 0);
8914 else if (asm_noperands (body) >= 0)
8915 templ
8916 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
8917 else
8918 return 0;
8919 do
8920 {
8921 int ppi_adjust = 0;
8922
8923 do
8924 c = *templ++;
8925 while (c == ' ' || c == '\t');
8926 /* all sh-dsp parallel-processing insns start with p.
8927 The only non-ppi sh insn starting with p is pref.
8928 The only ppi starting with pr is prnd. */
8929 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
8930 ppi_adjust = 2;
8931 /* The repeat pseudo-insn expands two three insns, a total of
8932 six bytes in size. */
8933 else if ((c == 'r' || c == 'R')
8934 && ! strncasecmp ("epeat", templ, 5))
8935 ppi_adjust = 4;
8936 while (c && c != '\n'
8937 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
8938 {
8939 /* If this is a label, it is obviously not a ppi insn. */
8940 if (c == ':' && maybe_label)
8941 {
8942 ppi_adjust = 0;
8943 break;
8944 }
8945 else if (c == '\'' || c == '"')
8946 maybe_label = false;
8947 c = *templ++;
8948 }
8949 sum += ppi_adjust;
8950 maybe_label = c != ':';
8951 }
8952 while (c);
8953 return sum;
8954 }
8955 return 0;
8956 }
8957 \f
8958 /* Return TRUE for a valid displacement for the REG+disp addressing
8959 with MODE. */
8960 bool
8961 sh_legitimate_index_p (machine_mode mode, rtx op, bool consider_sh2a,
8962 bool allow_zero)
8963 {
8964 if (! CONST_INT_P (op))
8965 return false;
8966
8967 {
8968 const HOST_WIDE_INT offset = INTVAL (op);
8969 const int max_disp = sh_max_mov_insn_displacement (mode, consider_sh2a);
8970 const int align_mask = mov_insn_alignment_mask (mode, consider_sh2a);
8971
8972 /* If the mode does not support any displacement always return false.
8973 Even though an index of '0' is actually always valid, it will cause
8974 troubles when e.g. a DFmode move is split into two SFmode moves,
8975 where one SFmode move will have index '0' and the other move will
8976 have index '4'. */
8977 if (!allow_zero && max_disp < 1)
8978 return false;
8979
8980 return offset >= 0 && offset <= max_disp && (offset & align_mask) == 0;
8981 }
8982 }
8983
8984 /* Recognize an RTL expression that is a valid memory address for
8985 an instruction.
8986 The MODE argument is the machine mode for the MEM expression
8987 that wants to use this address.
8988 Allow REG
8989 REG+disp
8990 REG+r0
8991 REG++
8992 --REG
8993 GBR
8994 GBR+disp */
8995 static bool
8996 sh_legitimate_address_p (machine_mode mode, rtx x, bool strict)
8997 {
8998 if (REG_P (x) && REGNO (x) == GBR_REG)
8999 return true;
9000
9001 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
9002 return true;
9003 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
9004 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
9005 return true;
9006 else if (GET_CODE (x) == PLUS)
9007 {
9008 rtx xop0 = XEXP (x, 0);
9009 rtx xop1 = XEXP (x, 1);
9010
9011 if (REG_P (xop0) && REGNO (xop0) == GBR_REG)
9012 return gbr_displacement (xop1, mode);
9013
9014 if (GET_MODE_SIZE (mode) <= 8
9015 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
9016 && sh_legitimate_index_p (mode, xop1, TARGET_SH2A, false))
9017 return true;
9018
9019 if (GET_MODE_SIZE (mode) <= 4
9020 || (TARGET_FPU_DOUBLE && TARGET_FMOVD && mode == DFmode))
9021 {
9022 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
9023 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
9024 return true;
9025 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
9026 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
9027 return true;
9028 }
9029 }
9030
9031 return false;
9032 }
9033 \f
9034 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
9035 isn't protected by a PIC unspec. */
9036 bool
9037 nonpic_symbol_mentioned_p (rtx x)
9038 {
9039 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
9040 || GET_CODE (x) == PC)
9041 return true;
9042
9043 /* We don't want to look into the possible MEM location of a
9044 CONST_DOUBLE, since we're not going to use it, in general. */
9045 if (GET_CODE (x) == CONST_DOUBLE)
9046 return false;
9047
9048 if (GET_CODE (x) == UNSPEC
9049 && (XINT (x, 1) == UNSPEC_PIC
9050 || XINT (x, 1) == UNSPEC_GOT
9051 || XINT (x, 1) == UNSPEC_GOTOFF
9052 || XINT (x, 1) == UNSPEC_GOTPLT
9053 || XINT (x, 1) == UNSPEC_GOTTPOFF
9054 || XINT (x, 1) == UNSPEC_DTPOFF
9055 || XINT (x, 1) == UNSPEC_TPOFF
9056 || XINT (x, 1) == UNSPEC_PLT
9057 || XINT (x, 1) == UNSPEC_PCREL
9058 || XINT (x, 1) == UNSPEC_SYMOFF
9059 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF
9060 || XINT (x, 1) == UNSPEC_GOTFUNCDESC
9061 || XINT (x, 1) == UNSPEC_GOTOFFFUNCDESC))
9062 return false;
9063
9064 const char* fmt = GET_RTX_FORMAT (GET_CODE (x));
9065 for (int i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9066 {
9067 if (fmt[i] == 'E')
9068 {
9069 for (int j = XVECLEN (x, i) - 1; j >= 0; j--)
9070 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
9071 return true;
9072 }
9073 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
9074 return true;
9075 }
9076
9077 return false;
9078 }
9079
9080 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
9081 @GOTOFF in `reg'. */
9082 rtx
9083 legitimize_pic_address (rtx orig, machine_mode mode ATTRIBUTE_UNUSED, rtx reg)
9084 {
9085 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
9086 return orig;
9087
9088 if (GET_CODE (orig) == LABEL_REF
9089 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
9090 {
9091 if (reg == NULL_RTX)
9092 reg = gen_reg_rtx (Pmode);
9093
9094 if (TARGET_FDPIC
9095 && GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (orig))
9096 {
9097 /* Weak functions may be NULL which doesn't work with
9098 GOTOFFFUNCDESC because the runtime offset is not known. */
9099 if (SYMBOL_REF_WEAK (orig))
9100 emit_insn (gen_symGOTFUNCDESC2reg (reg, orig));
9101 else
9102 emit_insn (gen_symGOTOFFFUNCDESC2reg (reg, orig));
9103 }
9104 else if (TARGET_FDPIC
9105 && (GET_CODE (orig) == LABEL_REF
9106 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_DECL (orig)
9107 && (TREE_READONLY (SYMBOL_REF_DECL (orig))
9108 || SYMBOL_REF_EXTERNAL_P (orig)
9109 || DECL_SECTION_NAME(SYMBOL_REF_DECL (orig))))))
9110 /* In FDPIC, GOTOFF can only be used for writable data. */
9111 emit_insn (gen_symGOT2reg (reg, orig));
9112 else
9113 emit_insn (gen_symGOTOFF2reg (reg, orig));
9114 return reg;
9115 }
9116 else if (GET_CODE (orig) == SYMBOL_REF)
9117 {
9118 if (reg == NULL_RTX)
9119 reg = gen_reg_rtx (Pmode);
9120
9121 if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (orig))
9122 emit_insn (gen_symGOTFUNCDESC2reg (reg, orig));
9123 else
9124 emit_insn (gen_symGOT2reg (reg, orig));
9125 return reg;
9126 }
9127 return orig;
9128 }
9129
9130 /* Given a (logical) mode size and an offset in bytes, try to find a the
9131 appropriate displacement value for a mov insn. On SH the displacements
9132 are limited to max. 60 bytes for SImode, max. 30 bytes in HImode and max.
9133 15 bytes in QImode. To compensate this we create a new base address by
9134 adding an adjustment value to it.
9135
9136 If the originally requested offset is greater than 127 we prefer using
9137 values 124..127 over 128..131 to increase opportunities to use the
9138 add #imm, Rn insn.
9139
9140 In some cases it is possible that a requested offset might seem unaligned
9141 or inappropriate for the mode size, like offset = 2 and mode size = 4.
9142 This is compensated by adjusting the base address so that the effective
9143 address of the displacement move insn will be aligned.
9144
9145 This is not the best possible way of rebasing the base address, as it
9146 does not look at other present displacement addressings around it.
9147 In some cases this can create more base address adjustments than would
9148 actually be necessary. */
9149 struct disp_adjust
9150 {
9151 rtx offset_adjust;
9152 rtx mov_disp;
9153 };
9154
9155 static struct disp_adjust
9156 sh_find_mov_disp_adjust (machine_mode mode, HOST_WIDE_INT offset)
9157 {
9158 struct disp_adjust res = { NULL_RTX, NULL_RTX };
9159
9160 /* Do not try to use SH2A's large displacements here, because this would
9161 effectively disable the small displacement insns. */
9162 const int mode_sz = GET_MODE_SIZE (mode);
9163 const int mov_insn_sz = mov_insn_size (mode, false);
9164 const int max_disp = sh_max_mov_insn_displacement (mode, false);
9165 const int max_disp_next = max_disp + mov_insn_sz;
9166 HOST_WIDE_INT align_modifier = offset > 127 ? mov_insn_sz : 0;
9167 HOST_WIDE_INT offset_adjust;
9168
9169 /* In some cases this actually does happen and we must check for it. */
9170 if (mode_sz < 1 || mode_sz > 8 || max_disp < 1)
9171 return res;
9172
9173 /* Keeps the previous behavior for QImode displacement addressing.
9174 This just decides how the offset is re-based. Removing this special
9175 case will result in slightly bigger code on average, but it's not that
9176 bad actually. */
9177 if (mov_insn_sz == 1)
9178 align_modifier = 0;
9179
9180 offset_adjust = ((offset + align_modifier) & ~max_disp) - align_modifier;
9181
9182 if (mode_sz + offset - offset_adjust <= max_disp_next)
9183 {
9184 res.offset_adjust = GEN_INT (offset_adjust);
9185 res.mov_disp = GEN_INT (offset - offset_adjust);
9186 }
9187
9188 return res;
9189 }
9190
9191 /* Try to modify an illegitimate address and make it legitimate.
9192 If we find one, return the new, valid address.
9193 Otherwise, return the original address. */
9194 static rtx
9195 sh_legitimize_address (rtx x, rtx oldx, machine_mode mode)
9196 {
9197 if (flag_pic)
9198 x = legitimize_pic_address (oldx, mode, NULL_RTX);
9199
9200 if ((TARGET_FPU_DOUBLE && mode == DFmode)
9201 || (TARGET_SH2E && mode == SFmode))
9202 return x;
9203
9204 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))
9205 && BASE_REGISTER_RTX_P (XEXP (x, 0)))
9206 {
9207 struct disp_adjust adj = sh_find_mov_disp_adjust (mode,
9208 INTVAL (XEXP (x, 1)));
9209
9210 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
9211 {
9212 rtx sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
9213 adj.offset_adjust, NULL_RTX, 0,
9214 OPTAB_LIB_WIDEN);
9215 return gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
9216 }
9217 }
9218 return x;
9219 }
9220
9221 /* Attempt to replace *p, which is an address that needs reloading, with
9222 a valid memory address for an operand of mode MODE.
9223 Like for sh_legitimize_address, for the SH we try to get a normal form
9224 of the address. That will allow inheritance of the address reloads. */
9225 bool
9226 sh_legitimize_reload_address (rtx *p, machine_mode mode, int opnum,
9227 int itype)
9228 {
9229 enum reload_type type = (enum reload_type) itype;
9230 const int mode_sz = GET_MODE_SIZE (mode);
9231
9232 if (sh_lra_p ())
9233 return false;
9234
9235 if (GET_CODE (*p) == PLUS && CONST_INT_P (XEXP (*p, 1))
9236 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true))
9237 {
9238 const HOST_WIDE_INT offset = INTVAL (XEXP (*p, 1));
9239 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, offset);
9240
9241 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
9242 {
9243 push_reload (*p, NULL_RTX, p, NULL,
9244 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9245 return true;
9246 }
9247
9248 if (TARGET_SH2E && mode == SFmode)
9249 {
9250 *p = copy_rtx (*p);
9251 push_reload (*p, NULL_RTX, p, NULL,
9252 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9253 return true;
9254 }
9255
9256 /* FIXME: Do not allow to legitimize QImode and HImode displacement
9257 moves because then reload has a problem figuring the constraint
9258 that the move insn target/source reg must be R0.
9259 Or maybe some handling is wrong in sh_secondary_reload for this
9260 to work properly? */
9261 if ((mode_sz == 4 || mode_sz == 8)
9262 && ! (TARGET_SH4 && mode == DFmode)
9263 && adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
9264 {
9265 rtx sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), adj.offset_adjust);
9266 *p = gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
9267 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
9268 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9269 return true;
9270 }
9271 }
9272
9273 /* We must re-recognize what we created before. */
9274 if (GET_CODE (*p) == PLUS
9275 && (mode_sz == 4 || mode_sz == 8)
9276 && GET_CODE (XEXP (*p, 0)) == PLUS
9277 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
9278 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
9279 && CONST_INT_P (XEXP (*p, 1))
9280 && ! (TARGET_SH2E && mode == SFmode))
9281 {
9282 /* Because this address is so complex, we know it must have
9283 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
9284 it is already unshared, and needs no further unsharing. */
9285 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
9286 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9287 return true;
9288 }
9289
9290 return false;
9291 }
9292
9293 /* In the name of slightly smaller debug output, and to cater to
9294 general assembler lossage, recognize various UNSPEC sequences
9295 and turn them back into a direct symbol reference. */
9296 static rtx
9297 sh_delegitimize_address (rtx orig_x)
9298 {
9299 orig_x = delegitimize_mem_from_attrs (orig_x);
9300
9301 rtx x = orig_x;
9302 if (MEM_P (x))
9303 x = XEXP (x, 0);
9304 if (GET_CODE (x) == CONST)
9305 {
9306 rtx y = XEXP (x, 0);
9307 if (GET_CODE (y) == UNSPEC)
9308 {
9309 if (XINT (y, 1) == UNSPEC_GOT
9310 || XINT (y, 1) == UNSPEC_GOTOFF
9311 || XINT (y, 1) == UNSPEC_SYMOFF)
9312 return XVECEXP (y, 0, 0);
9313 else if (XINT (y, 1) == UNSPEC_PCREL_SYMOFF)
9314 {
9315 if (GET_CODE (XVECEXP (y, 0, 0)) == CONST)
9316 {
9317 rtx symplt = XEXP (XVECEXP (y, 0, 0), 0);
9318
9319 if (GET_CODE (symplt) == UNSPEC
9320 && (XINT (symplt, 1) == UNSPEC_PLT
9321 || XINT (symplt, 1) == UNSPEC_PCREL))
9322 return XVECEXP (symplt, 0, 0);
9323 }
9324 }
9325 }
9326 }
9327
9328 return orig_x;
9329 }
9330
9331 /* Mark the use of a constant in the literal table. If the constant
9332 has multiple labels, make it unique. */
9333 static rtx
9334 mark_constant_pool_use (rtx x)
9335 {
9336 if (x == NULL_RTX)
9337 return x;
9338
9339 switch (GET_CODE (x))
9340 {
9341 case LABEL_REF:
9342 x = XEXP (x, 0);
9343 case CODE_LABEL:
9344 break;
9345 default:
9346 return x;
9347 }
9348
9349 /* Get the first label in the list of labels for the same constant
9350 and delete another labels in the list. */
9351 rtx_insn* lab = as_a <rtx_insn*> (x);
9352 for (rtx_insn* insn = PREV_INSN (lab); insn; insn = PREV_INSN (insn))
9353 {
9354 if (!LABEL_P (insn)
9355 || LABEL_REFS (insn) != NEXT_INSN (insn))
9356 break;
9357 lab = insn;
9358 }
9359
9360 for (rtx insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
9361 as_a<rtx_insn *> (insn)->set_deleted ();
9362
9363 /* Mark constants in a window. */
9364 for (rtx_insn* insn = NEXT_INSN (as_a <rtx_insn *> (x)); insn;
9365 insn = NEXT_INSN (insn))
9366 {
9367 if (!NONJUMP_INSN_P (insn))
9368 continue;
9369
9370 rtx pattern = PATTERN (insn);
9371 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
9372 continue;
9373
9374 switch (XINT (pattern, 1))
9375 {
9376 case UNSPECV_CONST2:
9377 case UNSPECV_CONST4:
9378 case UNSPECV_CONST8:
9379 XVECEXP (pattern, 0, 1) = const1_rtx;
9380 break;
9381 case UNSPECV_WINDOW_END:
9382 if (XVECEXP (pattern, 0, 0) == x)
9383 return lab;
9384 break;
9385 case UNSPECV_CONST_END:
9386 return lab;
9387 default:
9388 break;
9389 }
9390 }
9391
9392 return lab;
9393 }
9394 \f
9395 /* Return true if it's possible to redirect BRANCH1 to the destination
9396 of an unconditional jump BRANCH2. We only want to do this if the
9397 resulting branch will have a short displacement. */
9398 static bool
9399 sh_can_follow_jump (const rtx_insn *branch1, const rtx_insn *branch2)
9400 {
9401 /* Don't follow if BRANCH2 is possible to be a jump crossing between
9402 hot and cold partitions. */
9403 if (flag_reorder_blocks_and_partition
9404 && simplejump_p (branch2)
9405 && CROSSING_JUMP_P (branch2))
9406 return false;
9407
9408 if (flag_expensive_optimizations && simplejump_p (branch2))
9409 {
9410 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
9411 rtx_insn *insn;
9412 int distance;
9413
9414 for (distance = 0, insn = NEXT_INSN (branch1);
9415 insn && distance < 256;
9416 insn = PREV_INSN (insn))
9417 {
9418 if (insn == dest)
9419 return true;
9420 else
9421 distance += get_attr_length (insn);
9422 }
9423 for (distance = 0, insn = NEXT_INSN (branch1);
9424 insn && distance < 256;
9425 insn = NEXT_INSN (insn))
9426 {
9427 if (insn == dest)
9428 return true;
9429 else
9430 distance += get_attr_length (insn);
9431 }
9432 }
9433 return false;
9434 }
9435
9436 /* Return nonzero if register old_reg can be renamed to register new_reg. */
9437 bool
9438 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
9439 unsigned int new_reg)
9440 {
9441 /* Interrupt functions can only use registers that have already been
9442 saved by the prologue, even if they would normally be
9443 call-clobbered. */
9444 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
9445 return false;
9446
9447 return true;
9448 }
9449
9450 /* Function to update the integer COST
9451 based on the relationship between INSN that is dependent on
9452 DEP_INSN through the dependence LINK. The default is to make no
9453 adjustment to COST. This can be used for example to specify to
9454 the scheduler that an output- or anti-dependence does not incur
9455 the same cost as a data-dependence. The return value should be
9456 the new value for COST. */
9457 static int
9458 sh_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
9459 unsigned int)
9460 {
9461 rtx reg, use_pat;
9462
9463 if (dep_type == 0)
9464 {
9465 if (recog_memoized (insn) < 0
9466 || recog_memoized (dep_insn) < 0)
9467 return cost;
9468
9469 rtx dep_set = single_set (dep_insn);
9470
9471 /* The latency that we specify in the scheduling description refers
9472 to the actual output, not to an auto-increment register; for that,
9473 the latency is one. */
9474 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
9475 {
9476 rtx set = single_set (insn);
9477
9478 if (set
9479 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
9480 && (!MEM_P (SET_DEST (set))
9481 || !reg_mentioned_p (SET_DEST (dep_set),
9482 XEXP (SET_DEST (set), 0))))
9483 cost = 1;
9484 }
9485 /* The only input for a call that is timing-critical is the
9486 function's address. */
9487 if (CALL_P (insn))
9488 {
9489 rtx call = get_call_rtx_from (insn);
9490 if (call
9491 /* sibcalli_thunk uses a symbol_ref in an unspec. */
9492 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
9493 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
9494 cost -= TARGET_SH4_300 ? 3 : 6;
9495 }
9496 /* Likewise, the most timing critical input for an sfuncs call
9497 is the function address. However, sfuncs typically start
9498 using their arguments pretty quickly.
9499 Assume a four cycle delay for SH4 before they are needed.
9500 Cached ST40-300 calls are quicker, so assume only a one
9501 cycle delay there.
9502 ??? Maybe we should encode the delays till input registers
9503 are needed by sfuncs into the sfunc call insn. */
9504 /* All sfunc calls are parallels with at least four components.
9505 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
9506 else if (GET_CODE (PATTERN (insn)) == PARALLEL
9507 && XVECLEN (PATTERN (insn), 0) >= 4
9508 && (reg = sfunc_uses_reg (insn)))
9509 {
9510 if (! reg_set_p (reg, dep_insn))
9511 cost -= TARGET_SH4_300 ? 1 : 4;
9512 }
9513 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
9514 {
9515 attr_type dep_type = get_attr_type (dep_insn);
9516 attr_type type;
9517 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
9518 cost--;
9519 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
9520 && (type = get_attr_type (insn)) != TYPE_CALL
9521 && type != TYPE_SFUNC)
9522 cost--;
9523 /* When the preceding instruction loads the shift amount of
9524 the following SHAD/SHLD, the latency of the load is increased
9525 by 1 cycle. */
9526 if (get_attr_type (insn) == TYPE_DYN_SHIFT
9527 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
9528 && reg_overlap_mentioned_p (SET_DEST (dep_set),
9529 XEXP (SET_SRC (single_set (insn)),
9530 1)))
9531 cost++;
9532 /* When an LS group instruction with a latency of less than
9533 3 cycles is followed by a double-precision floating-point
9534 instruction, FIPR, or FTRV, the latency of the first
9535 instruction is increased to 3 cycles. */
9536 else if (cost < 3
9537 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
9538 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
9539 cost = 3;
9540 /* The lsw register of a double-precision computation is ready one
9541 cycle earlier. */
9542 else if (reload_completed
9543 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
9544 && (use_pat = single_set (insn))
9545 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
9546 SET_SRC (use_pat)))
9547 cost -= 1;
9548
9549 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
9550 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
9551 cost -= 1;
9552 }
9553 else if (TARGET_SH4_300)
9554 {
9555 /* Stores need their input register two cycles later. */
9556 attr_type type;
9557 if (dep_set && cost >= 1
9558 && ((type = get_attr_type (insn)) == TYPE_STORE
9559 || type == TYPE_PSTORE
9560 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
9561 {
9562 rtx set = single_set (insn);
9563
9564 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
9565 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
9566 {
9567 cost -= 2;
9568 /* But don't reduce the cost below 1 if the address depends
9569 on a side effect of dep_insn. */
9570 if (cost < 1
9571 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
9572 cost = 1;
9573 }
9574 }
9575 }
9576 }
9577 /* An anti-dependence penalty of two applies if the first insn is a double
9578 precision fadd / fsub / fmul. */
9579 else if (!TARGET_SH4_300
9580 && dep_type == REG_DEP_ANTI
9581 && recog_memoized (dep_insn) >= 0
9582 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
9583 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
9584 /* A lot of alleged anti-flow dependences are fake,
9585 so check this one is real. */
9586 && flow_dependent_p (dep_insn, insn))
9587 cost = 2;
9588
9589 return cost;
9590 }
9591
9592 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
9593 if DEP_INSN is anti-flow dependent on INSN. */
9594 static bool
9595 flow_dependent_p (rtx insn, rtx dep_insn)
9596 {
9597 rtx tmp = PATTERN (insn);
9598
9599 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
9600 return tmp == NULL_RTX;
9601 }
9602
9603 /* A helper function for flow_dependent_p called through note_stores. */
9604 static void
9605 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
9606 {
9607 rtx * pinsn = (rtx *) data;
9608
9609 if (*pinsn && reg_referenced_p (x, *pinsn))
9610 *pinsn = NULL_RTX;
9611 }
9612
9613 /* For use by sh_allocate_initial_value. Note that sh.md contains some
9614 'special function' patterns (type sfunc) that clobber pr, but that
9615 do not look like function calls to leaf_function_p. Hence we must
9616 do this extra check. */
9617 static int
9618 sh_pr_n_sets (void)
9619 {
9620 return DF_REG_DEF_COUNT (PR_REG);
9621 }
9622
9623 /* Return where to allocate pseudo for a given hard register initial
9624 value. */
9625 static rtx
9626 sh_allocate_initial_value (rtx hard_reg)
9627 {
9628 if (REGNO (hard_reg) == PR_REG)
9629 {
9630 if (crtl->is_leaf && ! sh_pr_n_sets ())
9631 return hard_reg;
9632 else
9633 return gen_frame_mem (Pmode, return_address_pointer_rtx);
9634 }
9635
9636 return NULL_RTX;
9637 }
9638
9639 /* This function returns "2" to indicate dual issue for the SH4
9640 processor. To be used by the DFA pipeline description. */
9641 static int
9642 sh_issue_rate (void)
9643 {
9644 if (TARGET_SUPERSCALAR)
9645 return 2;
9646 else
9647 return 1;
9648 }
9649
9650 /* Functions for ready queue reordering for sched1. */
9651
9652 /* Get weight for mode for a set x. */
9653 static short
9654 find_set_regmode_weight (rtx x, machine_mode mode)
9655 {
9656 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
9657 return 1;
9658 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
9659 {
9660 if (REG_P (SET_DEST (x)))
9661 {
9662 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
9663 return 1;
9664 else
9665 return 0;
9666 }
9667 return 1;
9668 }
9669 return 0;
9670 }
9671
9672 /* Get regmode weight for insn. */
9673 static short
9674 find_insn_regmode_weight (rtx insn, machine_mode mode)
9675 {
9676 /* Increment weight for each register born here. */
9677 rtx x = PATTERN (insn);
9678 short reg_weight = find_set_regmode_weight (x, mode);
9679 if (GET_CODE (x) == PARALLEL)
9680 {
9681 int j;
9682 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
9683 {
9684 x = XVECEXP (PATTERN (insn), 0, j);
9685 reg_weight += find_set_regmode_weight (x, mode);
9686 }
9687 }
9688 /* Decrement weight for each register that dies here. */
9689 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
9690 {
9691 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
9692 {
9693 rtx note = XEXP (x, 0);
9694 if (REG_P (note) && GET_MODE (note) == mode)
9695 reg_weight--;
9696 }
9697 }
9698 return reg_weight;
9699 }
9700
9701 /* Calculate regmode weights for all insns of a basic block. */
9702 static void
9703 find_regmode_weight (basic_block b, machine_mode mode)
9704 {
9705 rtx_insn *insn, *next_tail, *head, *tail;
9706
9707 get_ebb_head_tail (b, b, &head, &tail);
9708 next_tail = NEXT_INSN (tail);
9709
9710 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
9711 {
9712 /* Handle register life information. */
9713 if (!INSN_P (insn))
9714 continue;
9715
9716 if (mode == SFmode)
9717 INSN_REGMODE_WEIGHT (insn, mode) =
9718 find_insn_regmode_weight (insn, mode)
9719 + 2 * find_insn_regmode_weight (insn, DFmode);
9720 else if (mode == SImode)
9721 INSN_REGMODE_WEIGHT (insn, mode) =
9722 find_insn_regmode_weight (insn, mode)
9723 + 2 * find_insn_regmode_weight (insn, DImode);
9724 }
9725 }
9726
9727 /* Comparison function for ready queue sorting. */
9728 static int
9729 rank_for_reorder (const void *x, const void *y)
9730 {
9731 rtx_insn *tmp = *(rtx_insn * const *) y;
9732 rtx_insn *tmp2 = *(rtx_insn * const *) x;
9733
9734 /* The insn in a schedule group should be issued the first. */
9735 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
9736 return SCHED_GROUP_P (tmp2) ? 1 : -1;
9737
9738 /* If insns are equally good, sort by INSN_LUID (original insn order), This
9739 minimizes instruction movement, thus minimizing sched's effect on
9740 register pressure. */
9741 return INSN_LUID (tmp) - INSN_LUID (tmp2);
9742 }
9743
9744 /* Resort the array A in which only element at index N may be out of order. */
9745 static void
9746 swap_reorder (rtx_insn **a, int n)
9747 {
9748 rtx_insn *insn = a[n - 1];
9749 int i = n - 2;
9750
9751 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
9752 {
9753 a[i + 1] = a[i];
9754 i -= 1;
9755 }
9756 a[i + 1] = insn;
9757 }
9758
9759 /* Sort the ready list by ascending priority. */
9760 static void
9761 ready_reorder (rtx_insn **ready, int nready)
9762 {
9763 if (nready == 2)
9764 swap_reorder (ready, nready);
9765 else if (nready > 2)
9766 qsort (ready, nready, sizeof (rtx_insn *), rank_for_reorder);
9767 }
9768
9769 /* Count life regions of r0 for a block. */
9770 static int
9771 find_r0_life_regions (basic_block b)
9772 {
9773 bool live;
9774 int set;
9775 int death = 0;
9776
9777 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
9778 {
9779 set = 1;
9780 live = true;
9781 }
9782 else
9783 {
9784 set = 0;
9785 live = false;
9786 }
9787
9788 rtx_insn* insn = BB_HEAD (b);
9789 rtx_insn* end = BB_END (b);
9790 rtx r0_reg = gen_rtx_REG (SImode, R0_REG);
9791 while (1)
9792 {
9793 if (INSN_P (insn))
9794 {
9795 if (find_regno_note (insn, REG_DEAD, R0_REG))
9796 {
9797 death++;
9798 live = false;
9799 }
9800
9801 rtx pset;
9802 if (!live
9803 && (pset = single_set (insn))
9804 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
9805 && !find_regno_note (insn, REG_UNUSED, R0_REG))
9806 {
9807 set++;
9808 live = true;
9809 }
9810 }
9811 if (insn == end)
9812 break;
9813 insn = NEXT_INSN (insn);
9814 }
9815 return set - death;
9816 }
9817
9818 /* Calculate regmode weights for all insns of all basic block. */
9819 static void
9820 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
9821 int verbose ATTRIBUTE_UNUSED,
9822 int old_max_uid)
9823 {
9824 basic_block b;
9825
9826 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
9827 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
9828 r0_life_regions = 0;
9829
9830 FOR_EACH_BB_REVERSE_FN (b, cfun)
9831 {
9832 find_regmode_weight (b, SImode);
9833 find_regmode_weight (b, SFmode);
9834 if (!reload_completed)
9835 r0_life_regions += find_r0_life_regions (b);
9836 }
9837
9838 CURR_REGMODE_PRESSURE (SImode) = 0;
9839 CURR_REGMODE_PRESSURE (SFmode) = 0;
9840 }
9841
9842 /* Cleanup. */
9843 static void
9844 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
9845 int verbose ATTRIBUTE_UNUSED)
9846 {
9847 if (regmode_weight[0])
9848 {
9849 free (regmode_weight[0]);
9850 regmode_weight[0] = NULL;
9851 }
9852 if (regmode_weight[1])
9853 {
9854 free (regmode_weight[1]);
9855 regmode_weight[1] = NULL;
9856 }
9857 }
9858
9859 /* Cache the can_issue_more so that we can return it from reorder2. Also,
9860 keep count of register pressures on SImode and SFmode. */
9861 static int
9862 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
9863 int sched_verbose ATTRIBUTE_UNUSED,
9864 rtx_insn *insn,
9865 int can_issue_more)
9866 {
9867 if (GET_CODE (PATTERN (insn)) != USE
9868 && GET_CODE (PATTERN (insn)) != CLOBBER)
9869 cached_can_issue_more = can_issue_more - 1;
9870 else
9871 cached_can_issue_more = can_issue_more;
9872
9873 if (reload_completed)
9874 return cached_can_issue_more;
9875
9876 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
9877 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
9878
9879 return cached_can_issue_more;
9880 }
9881
9882 static void
9883 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
9884 int verbose ATTRIBUTE_UNUSED,
9885 int veclen ATTRIBUTE_UNUSED)
9886 {
9887 CURR_REGMODE_PRESSURE (SImode) = 0;
9888 CURR_REGMODE_PRESSURE (SFmode) = 0;
9889 }
9890
9891 /* Some magic numbers. */
9892 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9893 functions that already have high pressure on r0. */
9894 #define R0_MAX_LIFE_REGIONS 2
9895 /* Register Pressure thresholds for SImode and SFmode registers. */
9896 #define SIMODE_MAX_WEIGHT 5
9897 #define SFMODE_MAX_WEIGHT 10
9898
9899 /* Return true if the pressure is high for MODE. */
9900 static bool
9901 high_pressure (machine_mode mode)
9902 {
9903 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9904 functions that already have high pressure on r0. */
9905 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
9906 return true;
9907
9908 if (mode == SFmode)
9909 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
9910 else
9911 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
9912 }
9913
9914 /* Reorder ready queue if register pressure is high. */
9915 static int
9916 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
9917 int sched_verbose ATTRIBUTE_UNUSED,
9918 rtx_insn **ready,
9919 int *n_readyp,
9920 int clock_var ATTRIBUTE_UNUSED)
9921 {
9922 if (reload_completed)
9923 return sh_issue_rate ();
9924
9925 if (high_pressure (SFmode) || high_pressure (SImode))
9926 {
9927 ready_reorder (ready, *n_readyp);
9928 }
9929
9930 return sh_issue_rate ();
9931 }
9932
9933 /* Skip cycles if the current register pressure is high. */
9934 static int
9935 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
9936 int sched_verbose ATTRIBUTE_UNUSED,
9937 rtx_insn **ready ATTRIBUTE_UNUSED,
9938 int *n_readyp ATTRIBUTE_UNUSED,
9939 int clock_var ATTRIBUTE_UNUSED)
9940 {
9941 if (reload_completed)
9942 return cached_can_issue_more;
9943
9944 if (high_pressure(SFmode) || high_pressure (SImode))
9945 skip_cycles = 1;
9946
9947 return cached_can_issue_more;
9948 }
9949
9950 /* Skip cycles without sorting the ready queue. This will move insn from
9951 Q->R. If this is the last cycle we are skipping; allow sorting of ready
9952 queue by sh_reorder. */
9953
9954 /* Generally, skipping these many cycles are sufficient for all insns to move
9955 from Q -> R. */
9956 #define MAX_SKIPS 8
9957
9958 static int
9959 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
9960 int sched_verbose ATTRIBUTE_UNUSED,
9961 rtx_insn *insn ATTRIBUTE_UNUSED,
9962 int last_clock_var,
9963 int clock_var,
9964 int *sort_p)
9965 {
9966 if (reload_completed)
9967 return 0;
9968
9969 if (skip_cycles)
9970 {
9971 if ((clock_var - last_clock_var) < MAX_SKIPS)
9972 {
9973 *sort_p = 0;
9974 return 1;
9975 }
9976 /* If this is the last cycle we are skipping, allow reordering of R. */
9977 if ((clock_var - last_clock_var) == MAX_SKIPS)
9978 {
9979 *sort_p = 1;
9980 return 1;
9981 }
9982 }
9983
9984 skip_cycles = 0;
9985
9986 return 0;
9987 }
9988
9989 static bool
9990 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
9991 {
9992 return TARGET_HITACHI || sh_attr_renesas_p (record_type);
9993 }
9994 \f
9995 /*
9996 On the SH1..SH4, the trampoline looks like
9997 2 0002 D202 mov.l l2,r2
9998 1 0000 D301 mov.l l1,r3
9999 3 0004 422B jmp @r2
10000 4 0006 0009 nop
10001 5 0008 00000000 l1: .long area
10002 6 000c 00000000 l2: .long function
10003
10004 FDPIC needs a form that includes a function descriptor and
10005 code to load the GOT register:
10006 0 0000 00000000 .long l0
10007 1 0004 00000000 .long gotval
10008 2 0008 D302 l0: mov.l l1,r3
10009 3 000a D203 mov.l l2,r2
10010 4 000c 6122 mov.l @r2,r1
10011 5 000e 5C21 mov.l @(4,r2),r12
10012 6 0010 412B jmp @r1
10013 7 0012 0009 nop
10014 8 0014 00000000 l1: .long area
10015 9 0018 00000000 l2: .long function
10016
10017 SH5 (compact) uses r1 instead of r3 for the static chain. */
10018
10019 /* Emit insns to store a value at memory address + offset. */
10020 static void
10021 sh_emit_storesi (rtx addr, HOST_WIDE_INT offset, rtx value)
10022 {
10023 gcc_assert ((offset & 3) == 0);
10024 emit_move_insn (offset == 0
10025 ? change_address (addr, SImode, NULL_RTX)
10026 : adjust_address (addr, SImode, offset), value);
10027 }
10028
10029 /* Emit insns to store w0 at addr + offset and w1 at addr + offset + 2. */
10030 static void
10031 sh_emit_storehi (rtx addr, HOST_WIDE_INT offset, uint16_t w0, uint16_t w1)
10032 {
10033 sh_emit_storesi (addr, offset, gen_int_mode (TARGET_LITTLE_ENDIAN
10034 ? (w0 | (w1 << 16))
10035 : (w1 | (w0 << 16)), SImode));
10036 }
10037
10038 /* Emit RTL insns to initialize the variable parts of a trampoline.
10039 FNADDR is an RTX for the address of the function's pure code.
10040 CXT is an RTX for the static chain value for the function. */
10041 static void
10042 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
10043 {
10044 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10045 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
10046
10047 if (TARGET_FDPIC)
10048 {
10049 rtx a = force_reg (Pmode, plus_constant (Pmode, XEXP (tramp_mem, 0), 8));
10050
10051 sh_emit_storesi (tramp_mem, 0, a);
10052 sh_emit_storesi (tramp_mem, 4, sh_get_fdpic_reg_initial_val ());
10053
10054 sh_emit_storehi (tramp_mem, 8, 0xd302, 0xd203);
10055 sh_emit_storehi (tramp_mem, 12, 0x6122, 0x5c21);
10056 sh_emit_storehi (tramp_mem, 16, 0x412b, 0x0009);
10057
10058 sh_emit_storesi (tramp_mem, 20, cxt);
10059 sh_emit_storesi (tramp_mem, 24, fnaddr);
10060 }
10061 else
10062 {
10063 sh_emit_storehi (tramp_mem, 0, 0xd202, 0xd301);
10064 sh_emit_storehi (tramp_mem, 4, 0x422b, 0x0009);
10065
10066 sh_emit_storesi (tramp_mem, 8, cxt);
10067 sh_emit_storesi (tramp_mem, 12, fnaddr);
10068 }
10069 if (TARGET_HARD_SH4)
10070 {
10071 if (!TARGET_INLINE_IC_INVALIDATE
10072 || (!(TARGET_SH4A || TARGET_SH4_300) && TARGET_USERMODE))
10073 emit_library_call (function_symbol (NULL, "__ic_invalidate",
10074 FUNCTION_ORDINARY).sym,
10075 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
10076 else
10077 emit_insn (gen_ic_invalidate_line (tramp));
10078 }
10079 }
10080
10081 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
10082 static rtx
10083 sh_trampoline_adjust_address (rtx tramp)
10084 {
10085 return tramp;
10086 }
10087
10088 /* If PIC, we cannot make sibling calls to global functions
10089 because the PLT requires r12 to be live. */
10090 static bool
10091 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10092 {
10093 return (1
10094 && ! sh_cfun_interrupt_handler_p ()
10095 && (! flag_pic || TARGET_FDPIC
10096 || (decl && ! (TREE_PUBLIC (decl) || DECL_WEAK (decl)))
10097 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
10098 }
10099
10100 /* Expand to appropriate sym*_label2reg for SYM and SIBCALL_P. */
10101 void
10102 sh_expand_sym_label2reg (rtx reg, rtx sym, rtx lab, bool sibcall_p)
10103 {
10104 const_tree decl = SYMBOL_REF_DECL (sym);
10105 bool is_weak = (decl && DECL_P (decl) && DECL_WEAK (decl));
10106
10107 if (!is_weak && SYMBOL_REF_LOCAL_P (sym))
10108 emit_insn (gen_sym_label2reg (reg, sym, lab));
10109 else if (sibcall_p && SYMBOL_REF_LOCAL_P (sym))
10110 emit_insn (gen_symPCREL_label2reg (reg, sym, lab));
10111 else
10112 emit_insn (gen_symPLT_label2reg (reg, sym, lab));
10113 }
10114 \f
10115 /* Machine specific built-in functions. */
10116
10117 struct builtin_description
10118 {
10119 bool (* const is_enabled) (void);
10120 const enum insn_code icode;
10121 const char *const name;
10122 int signature;
10123 tree fndecl;
10124 };
10125
10126 /* This function can be used if there are any built-ins that are not for
10127 SHmedia. It's commented out to avoid the defined-but-unused warning. */
10128 static bool
10129 sh1_builtin_p (void)
10130 {
10131 return TARGET_SH1;
10132 }
10133
10134 /* describe number and signedness of arguments; arg[0] == result
10135 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
10136 /* 9: 64-bit pointer, 10: 32-bit pointer */
10137 static const char signature_args[][4] =
10138 {
10139 #define SH_BLTIN_V2SI2 0
10140 { 4, 4 },
10141 #define SH_BLTIN_V4HI2 1
10142 { 4, 4 },
10143 #define SH_BLTIN_V2SI3 2
10144 { 4, 4, 4 },
10145 #define SH_BLTIN_V4HI3 3
10146 { 4, 4, 4 },
10147 #define SH_BLTIN_V8QI3 4
10148 { 4, 4, 4 },
10149 #define SH_BLTIN_MAC_HISI 5
10150 { 1, 4, 4, 1 },
10151 #define SH_BLTIN_SH_HI 6
10152 { 4, 4, 1 },
10153 #define SH_BLTIN_SH_SI 7
10154 { 4, 4, 1 },
10155 #define SH_BLTIN_V4HI2V2SI 8
10156 { 4, 4, 4 },
10157 #define SH_BLTIN_V4HI2V8QI 9
10158 { 4, 4, 4 },
10159 #define SH_BLTIN_SISF 10
10160 { 4, 2 },
10161 #define SH_BLTIN_LDUA_L 11
10162 { 2, 10 },
10163 #define SH_BLTIN_LDUA_Q 12
10164 { 1, 10 },
10165 #define SH_BLTIN_STUA_L 13
10166 { 0, 10, 2 },
10167 #define SH_BLTIN_STUA_Q 14
10168 { 0, 10, 1 },
10169 #define SH_BLTIN_LDUA_L64 15
10170 { 2, 9 },
10171 #define SH_BLTIN_LDUA_Q64 16
10172 { 1, 9 },
10173 #define SH_BLTIN_STUA_L64 17
10174 { 0, 9, 2 },
10175 #define SH_BLTIN_STUA_Q64 18
10176 { 0, 9, 1 },
10177 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
10178 #define SH_BLTIN_2 19
10179 #define SH_BLTIN_SU 19
10180 { 1, 2 },
10181 #define SH_BLTIN_3 20
10182 #define SH_BLTIN_SUS 20
10183 { 2, 2, 1 },
10184 #define SH_BLTIN_PSSV 21
10185 { 0, 8, 2, 2 },
10186 #define SH_BLTIN_XXUU 22
10187 #define SH_BLTIN_UUUU 22
10188 { 1, 1, 1, 1 },
10189 #define SH_BLTIN_PV 23
10190 { 0, 8 },
10191 #define SH_BLTIN_VP 24
10192 { 8, 0 },
10193 #define SH_BLTIN_UV 25
10194 { 1, 0 },
10195 #define SH_BLTIN_VU 26
10196 { 0, 1 },
10197 };
10198 /* mcmv: operands considered unsigned. */
10199 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
10200 /* mperm: control value considered unsigned int. */
10201 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
10202 /* mshards_q: returns signed short. */
10203 /* nsb: takes long long arg, returns unsigned char. */
10204 static struct builtin_description bdesc[] =
10205 {
10206 { sh1_builtin_p,
10207 CODE_FOR_sts_fpscr, "__builtin_sh_get_fpscr", SH_BLTIN_UV, 0 },
10208 { sh1_builtin_p,
10209 CODE_FOR_set_fpscr, "__builtin_sh_set_fpscr", SH_BLTIN_VU, 0 },
10210 };
10211
10212 static tree sh_builtin_get_fpscr;
10213 static tree sh_builtin_set_fpscr;
10214
10215 static void
10216 sh_init_builtins (void)
10217 {
10218 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
10219 memset (shared, 0, sizeof shared);
10220
10221 for (unsigned int di = 0; di < ARRAY_SIZE (bdesc); ++di)
10222 {
10223 builtin_description* d = &bdesc[di];
10224
10225 if (!d->is_enabled ())
10226 continue;
10227
10228 tree type, arg_type = NULL_TREE;
10229 int signature = d->signature;
10230
10231 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
10232 type = shared[signature];
10233 else
10234 {
10235 int has_result = signature_args[signature][0] != 0;
10236 tree args[3];
10237
10238 if (! TARGET_FPU_ANY
10239 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
10240 continue;
10241 for (unsigned int i = 0; i < ARRAY_SIZE (args); i++)
10242 args[i] = NULL_TREE;
10243 for (int i = 3; ; i--)
10244 {
10245 int arg = signature_args[signature][i];
10246 int opno = i - 1 + has_result;
10247
10248 if (arg & 8)
10249 arg_type = ptr_type_node;
10250 else if (arg)
10251 arg_type = (*lang_hooks.types.type_for_mode)
10252 (insn_data[d->icode].operand[opno].mode, (arg & 1));
10253 else if (i)
10254 continue;
10255 else
10256 arg_type = void_type_node;
10257 if (i == 0)
10258 break;
10259 args[i-1] = arg_type;
10260 }
10261 type = build_function_type_list (arg_type, args[0], args[1],
10262 args[2], NULL_TREE);
10263 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
10264 shared[signature] = type;
10265 }
10266 d->fndecl =
10267 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
10268 NULL, NULL_TREE);
10269 /* Recode {sts,set}_fpscr decls for sh_atomic_assign_expand_fenv. */
10270 if (d->icode == CODE_FOR_sts_fpscr)
10271 sh_builtin_get_fpscr = d->fndecl;
10272 else if (d->icode == CODE_FOR_set_fpscr)
10273 sh_builtin_set_fpscr = d->fndecl;
10274 }
10275 }
10276
10277 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
10278
10279 static void
10280 sh_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
10281 {
10282 const unsigned SH_FE_INVALID = 64;
10283 const unsigned SH_FE_DIVBYZERO = 32;
10284 const unsigned SH_FE_OVERFLOW = 16;
10285 const unsigned SH_FE_UNDERFLOW = 8;
10286 const unsigned SH_FE_INEXACT = 4;
10287 const unsigned HOST_WIDE_INT SH_FE_ALL_EXCEPT = (SH_FE_INVALID
10288 | SH_FE_DIVBYZERO
10289 | SH_FE_OVERFLOW
10290 | SH_FE_UNDERFLOW
10291 | SH_FE_INEXACT);
10292 const unsigned HOST_WIDE_INT SH_FE_EXCEPT_SHIFT = 5;
10293 tree fenv_var, mask, ld_fenv, masked_fenv;
10294 tree new_fenv_var, reload_fenv, restore_fnenv;
10295 tree update_call, atomic_feraiseexcept, hold_fnclex;
10296
10297 if (! TARGET_FPU_ANY)
10298 return;
10299
10300 /* Generate the equivalent of :
10301 unsigned int fenv_var;
10302 fenv_var = __builtin_sh_get_fpscr ();
10303
10304 unsigned int masked_fenv;
10305 masked_fenv = fenv_var & mask;
10306
10307 __builtin_sh_set_fpscr (masked_fenv); */
10308
10309 fenv_var = create_tmp_var_raw (unsigned_type_node);
10310 mask = build_int_cst (unsigned_type_node,
10311 ~((SH_FE_ALL_EXCEPT << SH_FE_EXCEPT_SHIFT)
10312 | SH_FE_ALL_EXCEPT));
10313 ld_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
10314 fenv_var, build_call_expr (sh_builtin_get_fpscr, 0));
10315 masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask);
10316 hold_fnclex = build_call_expr (sh_builtin_set_fpscr, 1, masked_fenv);
10317 fenv_var = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
10318 build2 (COMPOUND_EXPR, void_type_node, masked_fenv,
10319 ld_fenv),
10320 NULL_TREE, NULL_TREE);
10321 *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var, hold_fnclex);
10322
10323 /* Store the value of masked_fenv to clear the exceptions:
10324 __builtin_sh_set_fpscr (masked_fenv); */
10325
10326 *clear = build_call_expr (sh_builtin_set_fpscr, 1, masked_fenv);
10327
10328 /* Generate the equivalent of :
10329 unsigned int new_fenv_var;
10330 new_fenv_var = __builtin_sh_get_fpscr ();
10331
10332 __builtin_sh_set_fpscr (fenv_var);
10333
10334 __atomic_feraiseexcept (new_fenv_var); */
10335
10336 new_fenv_var = create_tmp_var_raw (unsigned_type_node);
10337 reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, new_fenv_var,
10338 build_call_expr (sh_builtin_get_fpscr, 0));
10339 restore_fnenv = build_call_expr (sh_builtin_set_fpscr, 1, fenv_var);
10340 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
10341 update_call = build_call_expr (atomic_feraiseexcept, 1,
10342 fold_convert (integer_type_node,
10343 new_fenv_var));
10344 *update = build2 (COMPOUND_EXPR, void_type_node,
10345 build2 (COMPOUND_EXPR, void_type_node,
10346 reload_fenv, restore_fnenv), update_call);
10347 }
10348
10349 /* Implements target hook vector_mode_supported_p. */
10350 bool
10351 sh_vector_mode_supported_p (machine_mode mode ATTRIBUTE_UNUSED)
10352 {
10353 return false;
10354 }
10355
10356 bool
10357 sh_frame_pointer_required (void)
10358 {
10359 /* If needed override this in other tm.h files to cope with various OS
10360 lossage requiring a frame pointer. */
10361 if (SUBTARGET_FRAME_POINTER_REQUIRED)
10362 return true;
10363
10364 if (crtl->profile)
10365 return true;
10366
10367 return false;
10368 }
10369
10370 /* Implements target hook dwarf_calling_convention. Return an enum
10371 of dwarf_calling_convention. */
10372 int
10373 sh_dwarf_calling_convention (const_tree func)
10374 {
10375 if (sh_attr_renesas_p (func))
10376 return DW_CC_GNU_renesas_sh;
10377
10378 return DW_CC_normal;
10379 }
10380
10381 /* Returns the sh builtin decl for CODE. */
10382 static tree
10383 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10384 {
10385 if (code >= ARRAY_SIZE (bdesc))
10386 return error_mark_node;
10387
10388 if (!bdesc[code].is_enabled ())
10389 return error_mark_node;
10390
10391 return bdesc[code].fndecl;
10392 }
10393
10394 /* Expand an expression EXP that calls a built-in function,
10395 with result going to TARGET if that's convenient
10396 (and in mode MODE if that's convenient).
10397 SUBTARGET may be used as the target for computing one of EXP's operands.
10398 IGNORE is nonzero if the value is to be ignored. */
10399 static rtx
10400 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10401 machine_mode mode ATTRIBUTE_UNUSED, int ignore)
10402 {
10403 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10404 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10405 const struct builtin_description *d = &bdesc[fcode];
10406 enum insn_code icode = d->icode;
10407 int signature = d->signature;
10408 int nop = 0;
10409 rtx op[4];
10410
10411 if (signature_args[signature][0])
10412 {
10413 if (ignore)
10414 return NULL_RTX;
10415
10416 machine_mode tmode = insn_data[icode].operand[0].mode;
10417 if (! target || GET_MODE (target) != tmode
10418 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10419 target = gen_reg_rtx (tmode);
10420 op[nop++] = target;
10421 }
10422 else
10423 target = NULL_RTX;
10424
10425 for (int i = 1; i <= 3; i++, nop++)
10426 {
10427 if (! signature_args[signature][i])
10428 break;
10429 tree arg = CALL_EXPR_ARG (exp, i - 1);
10430 if (arg == error_mark_node)
10431 return const0_rtx;
10432
10433 machine_mode opmode;
10434 tree optype;
10435 if (signature_args[signature][i] & 8)
10436 {
10437 opmode = ptr_mode;
10438 optype = ptr_type_node;
10439 }
10440 else
10441 {
10442 opmode = insn_data[icode].operand[nop].mode;
10443 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
10444 }
10445
10446 machine_mode argmode = TYPE_MODE (TREE_TYPE (arg));
10447 if (argmode != opmode)
10448 arg = build1 (NOP_EXPR, optype, arg);
10449 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
10450 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
10451 op[nop] = copy_to_mode_reg (opmode, op[nop]);
10452 }
10453
10454 rtx pat = NULL_RTX;
10455
10456 switch (nop)
10457 {
10458 case 1:
10459 pat = (*insn_data[d->icode].genfun) (op[0]);
10460 break;
10461 case 2:
10462 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
10463 break;
10464 case 3:
10465 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
10466 break;
10467 case 4:
10468 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
10469 break;
10470 default:
10471 gcc_unreachable ();
10472 }
10473 if (! pat)
10474 return NULL_RTX;
10475 emit_insn (pat);
10476 return target;
10477 }
10478
10479 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
10480 We can allow any mode in any general register. The special registers
10481 only allow SImode. Don't allow any mode in the PR.
10482
10483 We cannot hold DCmode values in the XD registers because alter_reg
10484 handles subregs of them incorrectly. We could work around this by
10485 spacing the XD registers like the DR registers, but this would require
10486 additional memory in every compilation to hold larger register vectors.
10487 We could hold SFmode / SCmode values in XD registers, but that
10488 would require a tertiary reload when reloading from / to memory,
10489 and a secondary reload to reload from / to general regs; that
10490 seems to be a losing proposition.
10491
10492 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
10493 it won't be ferried through GP registers first. */
10494 bool
10495 sh_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10496 {
10497 if (SPECIAL_REGISTER_P (regno))
10498 return mode == SImode;
10499
10500 if (regno == FPUL_REG)
10501 return (mode == SImode || mode == SFmode);
10502
10503 if (FP_REGISTER_P (regno) && mode == SFmode)
10504 return true;
10505
10506 if (mode == V2SFmode)
10507 {
10508 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
10509 || GENERAL_REGISTER_P (regno)))
10510 return true;
10511 else
10512 return false;
10513 }
10514
10515 if (mode == V4SFmode)
10516 {
10517 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
10518 || GENERAL_REGISTER_P (regno))
10519 return true;
10520 else
10521 return false;
10522 }
10523
10524 if (mode == V16SFmode)
10525 return regno == FIRST_XD_REG;
10526
10527 if (FP_REGISTER_P (regno))
10528 {
10529 if (mode == SFmode
10530 || mode == SImode
10531 || ((TARGET_SH2E) && mode == SCmode)
10532 || (((TARGET_FPU_DOUBLE && mode == DFmode) || mode == DCmode)
10533 && ((regno - FIRST_FP_REG) & 1) == 0)
10534 || (TARGET_SH4 && mode == TImode
10535 && ((regno - FIRST_FP_REG) & 3) == 0))
10536 return true;
10537 else
10538 return false;
10539 }
10540
10541 if (XD_REGISTER_P (regno))
10542 return mode == DFmode;
10543
10544 if (regno == PR_REG)
10545 return mode == SImode;
10546
10547 if (regno == FPSCR_REG)
10548 return mode == SImode;
10549
10550 return true;
10551 }
10552
10553 /* Specify the modes required to caller save a given hard regno.
10554 choose_hard_reg_mode chooses mode based on HARD_REGNO_MODE_OK
10555 and returns ?Imode for float regs when sh_hard_regno_mode_ok
10556 permits integer modes on them. That makes LRA's split process
10557 unhappy. See PR55212.
10558 */
10559 machine_mode
10560 sh_hard_regno_caller_save_mode (unsigned int regno, unsigned int nregs,
10561 machine_mode mode)
10562 {
10563 if (FP_REGISTER_P (regno)
10564 && (mode == SFmode
10565 || mode == SCmode
10566 || ((mode == DFmode || mode == DCmode)
10567 && ((regno - FIRST_FP_REG) & 1) == 0)))
10568 return mode;
10569
10570 return choose_hard_reg_mode (regno, nregs, false);
10571 }
10572
10573 /* Return the class of registers for which a mode change from FROM to TO
10574 is invalid. */
10575 bool
10576 sh_cannot_change_mode_class (machine_mode from, machine_mode to,
10577 enum reg_class rclass)
10578 {
10579 /* We want to enable the use of SUBREGs as a means to
10580 VEC_SELECT a single element of a vector. */
10581
10582 /* This effectively disallows using GENERAL_REGS for SFmode vector subregs.
10583 This can be problematic when SFmode vector subregs need to be accessed
10584 on the stack with displacement addressing, as it happens with -O0.
10585 Thus we disallow the mode change for -O0. */
10586 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
10587 return optimize ? (reg_classes_intersect_p (GENERAL_REGS, rclass)) : false;
10588
10589 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
10590 {
10591 if (TARGET_LITTLE_ENDIAN)
10592 {
10593 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
10594 return reg_classes_intersect_p (DF_REGS, rclass);
10595 }
10596 else
10597 {
10598 if (GET_MODE_SIZE (from) < 8)
10599 return reg_classes_intersect_p (DF_REGS, rclass);
10600 }
10601 }
10602 return false;
10603 }
10604
10605 /* Return true if registers in machine mode MODE will likely be
10606 allocated to registers in small register classes. */
10607 bool
10608 sh_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
10609 {
10610 return true;
10611 }
10612
10613 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
10614 that label is used. */
10615 void
10616 sh_mark_label (rtx address, int nuses)
10617 {
10618 if (GOTOFF_P (address))
10619 {
10620 /* Extract the label or symbol. */
10621 address = XEXP (address, 0);
10622 if (GET_CODE (address) == PLUS)
10623 address = XEXP (address, 0);
10624 address = XVECEXP (address, 0, 0);
10625 }
10626 if (GET_CODE (address) == LABEL_REF
10627 && LABEL_P (XEXP (address, 0)))
10628 LABEL_NUSES (XEXP (address, 0)) += nuses;
10629 }
10630
10631 /* Compute extra cost of moving data between one register class
10632 and another.
10633
10634 If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
10635 uses this information. Hence, the general register <-> floating point
10636 register information here is not used for SFmode. */
10637 static int
10638 sh_register_move_cost (machine_mode mode,
10639 reg_class_t srcclass, reg_class_t dstclass)
10640 {
10641 if (dstclass == T_REGS || dstclass == PR_REGS)
10642 return 10;
10643
10644 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
10645 return 4;
10646
10647 if (mode == SImode && TARGET_FMOVD
10648 && REGCLASS_HAS_FP_REG (srcclass)
10649 && REGCLASS_HAS_FP_REG (dstclass))
10650 return 4;
10651
10652 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
10653 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
10654
10655 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
10656 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
10657 return 9;
10658
10659 if ((REGCLASS_HAS_FP_REG (dstclass)
10660 && REGCLASS_HAS_GENERAL_REG (srcclass))
10661 || (REGCLASS_HAS_GENERAL_REG (dstclass)
10662 && REGCLASS_HAS_FP_REG (srcclass)))
10663 {
10664 /* Discourage trying to use fp regs for a pointer. This also
10665 discourages fp regs with SImode because Pmode is an alias
10666 of SImode on this target. See PR target/48596. */
10667 int addend = (mode == Pmode) ? 40 : 0;
10668
10669 return ((TARGET_FMOVD ? 8 : 12) + addend)
10670 * ((GET_MODE_SIZE (mode) + 7) / 8U);
10671 }
10672
10673 if ((dstclass == FPUL_REGS
10674 && REGCLASS_HAS_GENERAL_REG (srcclass))
10675 || (srcclass == FPUL_REGS
10676 && REGCLASS_HAS_GENERAL_REG (dstclass)))
10677 return 5;
10678
10679 if ((dstclass == FPUL_REGS
10680 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
10681 || (srcclass == FPUL_REGS
10682 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
10683 return 7;
10684
10685 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10686 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10687 return 4;
10688
10689 if (TARGET_FMOVD
10690 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
10691 && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10692 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
10693
10694 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
10695 }
10696
10697 static rtx
10698 emit_load_ptr (rtx reg, rtx addr)
10699 {
10700 rtx mem = gen_const_mem (ptr_mode, addr);
10701
10702 if (Pmode != ptr_mode)
10703 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
10704 return emit_move_insn (reg, mem);
10705 }
10706
10707 static void
10708 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
10709 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10710 tree function)
10711 {
10712 CUMULATIVE_ARGS cum;
10713 int structure_value_byref = 0;
10714 rtx this_rtx, this_value, sibcall, funexp;
10715 rtx_insn *insns;
10716 tree funtype = TREE_TYPE (function);
10717 int simple_add = CONST_OK_FOR_ADD (delta);
10718 int did_load = 0;
10719 rtx scratch0, scratch1, scratch2;
10720
10721 reload_completed = 1;
10722 epilogue_completed = 1;
10723 crtl->uses_only_leaf_regs = 1;
10724
10725 emit_note (NOTE_INSN_PROLOGUE_END);
10726
10727 /* Find the "this" pointer. We have such a wide range of ABIs for the
10728 SH that it's best to do this completely machine independently.
10729 "this" is passed as first argument, unless a structure return pointer
10730 comes first, in which case "this" comes second. */
10731 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
10732 #ifndef PCC_STATIC_STRUCT_RETURN
10733 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
10734 structure_value_byref = 1;
10735 #endif /* not PCC_STATIC_STRUCT_RETURN */
10736 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
10737 {
10738 tree ptype = build_pointer_type (TREE_TYPE (funtype));
10739
10740 sh_function_arg_advance (pack_cumulative_args (&cum), Pmode, ptype, true);
10741 }
10742 this_rtx
10743 = sh_function_arg (pack_cumulative_args (&cum), Pmode, ptr_type_node, true);
10744
10745 /* For SHcompact, we only have r0 for a scratch register: r1 is the
10746 static chain pointer (even if you can't have nested virtual functions
10747 right now, someone might implement them sometime), and the rest of the
10748 registers are used for argument passing, are callee-saved, or reserved. */
10749 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
10750 -ffixed-reg has been used. */
10751 if (! call_used_regs[0] || fixed_regs[0])
10752 error ("r0 needs to be available as a call-clobbered register");
10753 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
10754
10755 {
10756 if (call_used_regs[1] && ! fixed_regs[1])
10757 scratch1 = gen_rtx_REG (ptr_mode, 1);
10758 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
10759 pointing where to return struct values. */
10760 if (call_used_regs[3] && ! fixed_regs[3])
10761 scratch2 = gen_rtx_REG (Pmode, 3);
10762 }
10763
10764 this_value = plus_constant (Pmode, this_rtx, delta);
10765 if (vcall_offset
10766 && (simple_add || scratch0 != scratch1)
10767 && strict_memory_address_p (ptr_mode, this_value))
10768 {
10769 emit_load_ptr (scratch0, this_value);
10770 did_load = 1;
10771 }
10772
10773 if (!delta)
10774 ; /* Do nothing. */
10775 else if (simple_add)
10776 emit_move_insn (this_rtx, this_value);
10777 else
10778 {
10779 emit_move_insn (scratch1, GEN_INT (delta));
10780 emit_insn (gen_add2_insn (this_rtx, scratch1));
10781 }
10782
10783 if (vcall_offset)
10784 {
10785 rtx offset_addr;
10786
10787 if (!did_load)
10788 emit_load_ptr (scratch0, this_rtx);
10789
10790 offset_addr = plus_constant (Pmode, scratch0, vcall_offset);
10791 if (strict_memory_address_p (ptr_mode, offset_addr))
10792 ; /* Do nothing. */
10793 else if (scratch0 != scratch1)
10794 {
10795 /* scratch0 != scratch1, and we have indexed loads. Get better
10796 schedule by loading the offset into r1 and using an indexed
10797 load - then the load of r1 can issue before the load from
10798 (this_rtx + delta) finishes. */
10799 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10800 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
10801 }
10802 else if (CONST_OK_FOR_ADD (vcall_offset))
10803 {
10804 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
10805 offset_addr = scratch0;
10806 }
10807 else if (scratch0 != scratch1)
10808 {
10809 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10810 emit_insn (gen_add2_insn (scratch0, scratch1));
10811 offset_addr = scratch0;
10812 }
10813 else
10814 gcc_unreachable (); /* FIXME */
10815 emit_load_ptr (scratch0, offset_addr);
10816
10817 if (Pmode != ptr_mode)
10818 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
10819 emit_insn (gen_add2_insn (this_rtx, scratch0));
10820 }
10821
10822 /* Generate a tail call to the target function. */
10823 if (! TREE_USED (function))
10824 {
10825 assemble_external (function);
10826 TREE_USED (function) = 1;
10827 }
10828 funexp = XEXP (DECL_RTL (function), 0);
10829 /* If the function is overridden, so is the thunk, hence we don't
10830 need GOT addressing even if this is a public symbol. */
10831 #if 0
10832 if (TARGET_SH1 && ! flag_weak)
10833 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
10834 else
10835 #endif
10836 if (TARGET_SH2 && flag_pic)
10837 {
10838 if (TARGET_FDPIC)
10839 {
10840 sibcall = gen_sibcall_pcrel_fdpic (funexp, const0_rtx);
10841 XEXP (XVECEXP (sibcall, 0, 3), 0) = scratch2;
10842 }
10843 else
10844 {
10845 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
10846 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
10847 }
10848 }
10849 else
10850 {
10851 emit_move_insn (scratch2, funexp);
10852 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
10853 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
10854 }
10855 sibcall = emit_call_insn (sibcall);
10856 SIBLING_CALL_P (sibcall) = 1;
10857 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
10858 emit_barrier ();
10859
10860 /* Run just enough of rest_of_compilation to do scheduling and get
10861 the insns emitted. Note that use_thunk calls
10862 assemble_start_function and assemble_end_function. */
10863
10864 insns = get_insns ();
10865
10866 if (optimize > 0)
10867 {
10868 if (! cfun->cfg)
10869 init_flow (cfun);
10870 split_all_insns_noflow ();
10871 }
10872
10873 sh_reorg ();
10874 shorten_branches (insns);
10875 final_start_function (insns, file, 1);
10876 final (insns, file, 1);
10877 final_end_function ();
10878
10879 reload_completed = 0;
10880 epilogue_completed = 0;
10881 }
10882
10883 /* Return an RTX pair for the address and call site label of a function
10884 NAME of kind KIND, placing the result in TARGET if not NULL. For
10885 SFUNC_STATIC, if FDPIC, the LAB member of result will be set to
10886 (const_int 0) if jsr should be used, or a label_ref if bsrf should
10887 be used. For FDPIC, both SFUNC_GOT and SFUNC_STATIC will return the
10888 address of the function itself, not a function descriptor, so they
10889 can only be used with functions not using the FDPIC register that
10890 are known to be called directory without a PLT entry. */
10891
10892 function_symbol_result
10893 function_symbol (rtx target, const char *name, sh_function_kind kind)
10894 {
10895 /* If this is not an ordinary function, the name usually comes from a
10896 string literal or an sprintf buffer. Make sure we use the same
10897 string consistently, so that cse will be able to unify address loads. */
10898 if (kind != FUNCTION_ORDINARY)
10899 name = IDENTIFIER_POINTER (get_identifier (name));
10900 rtx sym = gen_rtx_SYMBOL_REF (Pmode, name);
10901 rtx lab = const0_rtx;
10902 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
10903 if (flag_pic)
10904 switch (kind)
10905 {
10906 case FUNCTION_ORDINARY:
10907 break;
10908 case SFUNC_GOT:
10909 {
10910 rtx reg = target ? target : gen_reg_rtx (Pmode);
10911
10912 emit_insn (gen_symGOT2reg (reg, sym));
10913 sym = reg;
10914 break;
10915 }
10916 case SFUNC_STATIC:
10917 {
10918 rtx reg = target ? target : gen_reg_rtx (Pmode);
10919
10920 if (TARGET_FDPIC)
10921 {
10922 /* We use PC-relative calls, since GOTOFF can only refer
10923 to writable data. This works along with sh_sfunc_call. */
10924 lab = PATTERN (gen_call_site ());
10925 emit_insn (gen_sym_label2reg (reg, sym, lab));
10926 }
10927 else
10928 {
10929 /* ??? To allow cse to work, we use GOTOFF relocations.
10930 we could add combiner patterns to transform this into
10931 straight pc-relative calls with sym2PIC / bsrf when
10932 label load and function call are still 1:1 and in the
10933 same basic block during combine. */
10934 emit_insn (gen_symGOTOFF2reg (reg, sym));
10935 }
10936
10937 sym = reg;
10938 break;
10939 }
10940 }
10941 if (target && sym != target)
10942 {
10943 emit_move_insn (target, sym);
10944 return function_symbol_result (target, lab);
10945 }
10946 return function_symbol_result (sym, lab);
10947 }
10948
10949 /* Find the number of the first general purpose register in S that
10950 is not set. */
10951 static int
10952 scavenge_reg (HARD_REG_SET *s)
10953 {
10954 for (int r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
10955 if (TEST_HARD_REG_BIT (*s, r))
10956 return r;
10957 return -1;
10958 }
10959
10960 rtx
10961 sh_get_pr_initial_val (void)
10962 {
10963 /* If we haven't finished rtl generation, there might be a nonlocal label
10964 that we haven't seen yet.
10965 ??? get_hard_reg_initial_val fails if it is called after register
10966 allocation has started, unless it has been called before for the
10967 same register. And even then, we end in trouble if we didn't use
10968 the register in the same basic block before. So call
10969 get_hard_reg_initial_val now and wrap it in an unspec if we might
10970 need to replace it. */
10971 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
10972 combine can put the pseudo returned by get_hard_reg_initial_val into
10973 instructions that need a general purpose registers, which will fail to
10974 be recognized when the pseudo becomes allocated to PR. */
10975 rtx val = get_hard_reg_initial_val (Pmode, PR_REG);
10976 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
10977 }
10978
10979 bool
10980 sh_expand_t_scc (rtx operands[])
10981 {
10982 enum rtx_code code = GET_CODE (operands[1]);
10983 rtx target = operands[0];
10984 rtx op0 = operands[2];
10985 rtx op1 = operands[3];
10986 rtx result = target;
10987
10988 if (!REG_P (op0) || REGNO (op0) != T_REG
10989 || !CONST_INT_P (op1))
10990 return false;
10991 if (!REG_P (result))
10992 result = gen_reg_rtx (SImode);
10993 HOST_WIDE_INT val = INTVAL (op1);
10994 if ((code == EQ && val == 1) || (code == NE && val == 0))
10995 emit_insn (gen_movt (result, get_t_reg_rtx ()));
10996 else if ((code == EQ && val == 0) || (code == NE && val == 1))
10997 emit_insn (gen_movnegt (result, get_t_reg_rtx ()));
10998 else if (code == EQ || code == NE)
10999 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
11000 else
11001 return false;
11002 if (result != target)
11003 emit_move_insn (target, result);
11004 return true;
11005 }
11006
11007 /* INSN is an sfunc; return the rtx that describes the address used. */
11008 static rtx
11009 extract_sfunc_addr (rtx insn)
11010 {
11011 rtx pattern = PATTERN (insn);
11012 const int len = XVECLEN (pattern, 0);
11013 for (int i = 0; i < len; i++)
11014 {
11015 rtx part = XVECEXP (pattern, 0, i);
11016 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
11017 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
11018 return XEXP (part, 0);
11019 }
11020 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
11021 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
11022 }
11023
11024 /* Verify that the register in use_sfunc_addr still agrees with the address
11025 used in the sfunc. This prevents fill_slots_from_thread from changing
11026 use_sfunc_addr.
11027 INSN is the use_sfunc_addr instruction, and REG is the register it
11028 guards. */
11029 bool
11030 check_use_sfunc_addr (rtx_insn *insn, rtx reg)
11031 {
11032 /* Search for the sfunc. It should really come right after INSN. */
11033 while ((insn = NEXT_INSN (insn)))
11034 {
11035 if (LABEL_P (insn) || JUMP_P (insn))
11036 break;
11037 if (! INSN_P (insn))
11038 continue;
11039
11040 if (rtx_sequence *seq = dyn_cast<rtx_sequence *> (PATTERN (insn)))
11041 insn = seq->insn (0);
11042 if (GET_CODE (PATTERN (insn)) != PARALLEL
11043 || get_attr_type (insn) != TYPE_SFUNC)
11044 continue;
11045 return rtx_equal_p (extract_sfunc_addr (insn), reg);
11046 }
11047 gcc_unreachable ();
11048 }
11049
11050 /* This function returns a constant rtx that represents 2**15 / pi in
11051 SFmode. It's used to scale a fixed-point signed 16.16-bit fraction
11052 of a full circle back to an SFmode value, i.e. 0x10000 maps to 2*pi. */
11053 static GTY(()) rtx sh_fsca_sf2int_rtx;
11054
11055 rtx
11056 sh_fsca_sf2int (void)
11057 {
11058 if (! sh_fsca_sf2int_rtx)
11059 {
11060 REAL_VALUE_TYPE rv;
11061
11062 real_from_string (&rv, "10430.378350470453");
11063 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
11064 }
11065
11066 return sh_fsca_sf2int_rtx;
11067 }
11068
11069 /* This function returns a constant rtx that represents pi / 2**15 in
11070 SFmode. It's used to scale SFmode angles, in radians, to a
11071 fixed-point signed 16.16-bit fraction of a full circle, i.e. 2*pi
11072 maps to 0x10000. */
11073 static GTY(()) rtx sh_fsca_int2sf_rtx;
11074
11075 rtx
11076 sh_fsca_int2sf (void)
11077 {
11078 if (! sh_fsca_int2sf_rtx)
11079 {
11080 REAL_VALUE_TYPE rv;
11081
11082 real_from_string (&rv, "9.587379924285257e-5");
11083 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
11084 }
11085
11086 return sh_fsca_int2sf_rtx;
11087 }
11088
11089 /* Initialize the CUMULATIVE_ARGS structure. */
11090 void
11091 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
11092 tree fntype,
11093 rtx libname ATTRIBUTE_UNUSED,
11094 tree fndecl,
11095 signed int n_named_args,
11096 machine_mode mode)
11097 {
11098 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
11099 pcum->free_single_fp_reg = 0;
11100 pcum->outgoing = n_named_args != -1;
11101
11102 /* FIXME: Should we check TARGET_HITACHI here ??? */
11103 pcum->renesas_abi = sh_attr_renesas_p (fntype);
11104
11105 if (fntype)
11106 {
11107 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
11108 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
11109 pcum->prototype_p = prototype_p (fntype);
11110 pcum->arg_count [(int) SH_ARG_INT] = false;
11111 }
11112 else
11113 {
11114 pcum->arg_count [(int) SH_ARG_INT] = 0;
11115 pcum->prototype_p = false;
11116 if (mode != VOIDmode)
11117 {
11118 /* If the default ABI is the Renesas ABI then all library
11119 calls must assume that the library will be using the
11120 Renesas ABI. So if the function would return its result
11121 in memory then we must force the address of this memory
11122 block onto the stack. Ideally we would like to call
11123 targetm.calls.return_in_memory() here but we do not have
11124 the TYPE or the FNDECL available so we synthesize the
11125 contents of that function as best we can. */
11126 pcum->force_mem =
11127 (TARGET_DEFAULT & MASK_HITACHI)
11128 && (mode == BLKmode
11129 || (GET_MODE_SIZE (mode) > 4
11130 && !(mode == DFmode
11131 && TARGET_FPU_DOUBLE)));
11132 }
11133 else
11134 pcum->force_mem = false;
11135 }
11136 }
11137
11138 rtx
11139 sh_gen_truncate (machine_mode mode, rtx x, int need_sign_ext)
11140 {
11141 enum rtx_code code = TRUNCATE;
11142
11143 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
11144 {
11145 rtx inner = XEXP (x, 0);
11146 machine_mode inner_mode = GET_MODE (inner);
11147
11148 if (inner_mode == mode)
11149 return inner;
11150 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
11151 x = inner;
11152 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
11153 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
11154 {
11155 code = GET_CODE (x);
11156 x = inner;
11157 }
11158 }
11159 return gen_rtx_fmt_e (code, mode, x);
11160 }
11161
11162 /* Load and store depend on the highpart of the address. However,
11163 set_attr_alternative does not give well-defined results before reload,
11164 so we must look at the rtl ourselves to see if any of the feeding
11165 registers is used in a memref.
11166
11167 Return true iff INSN contains a MEM. */
11168 bool
11169 sh_contains_memref_p (rtx insn)
11170 {
11171 subrtx_iterator::array_type array;
11172 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
11173 if (MEM_P (*iter))
11174 return true;
11175 return false;
11176 }
11177
11178 /* Return true iff INSN loads a banked register. */
11179 bool
11180 sh_loads_bankedreg_p (rtx insn)
11181 {
11182 if (GET_CODE (PATTERN (insn)) == SET)
11183 {
11184 rtx op = SET_DEST (PATTERN(insn));
11185 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
11186 return true;
11187 }
11188
11189 return false;
11190 }
11191
11192 /* Implement TARGET_PREFERRED_RELOAD_CLASS. */
11193 static reg_class_t
11194 sh_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
11195 {
11196 return rclass;
11197 }
11198
11199 /* Implement TARGET_SECONDARY_RELOAD. */
11200 static reg_class_t
11201 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
11202 machine_mode mode, secondary_reload_info *sri)
11203 {
11204 enum reg_class rclass = (enum reg_class) rclass_i;
11205
11206 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS
11207 && REG_P (XEXP (XEXP (x, 0), 0))
11208 && REGNO (XEXP (XEXP (x, 0), 0)) == GBR_REG)
11209 return rclass == R0_REGS ? NO_REGS : R0_REGS;
11210
11211 if (MEM_P (x) && REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == GBR_REG)
11212 return rclass == R0_REGS ? NO_REGS : R0_REGS;
11213
11214 if (REG_P (x) && REGNO (x) == GBR_REG)
11215 return NO_REGS;
11216
11217 if (in_p)
11218 {
11219 if (REGCLASS_HAS_FP_REG (rclass)
11220 && immediate_operand ((x), mode)
11221 && ! ((fp_zero_operand (x) || fp_one_operand (x)) && mode == SFmode))
11222 switch (mode)
11223 {
11224 case SFmode:
11225 sri->icode = CODE_FOR_reload_insf__frn;
11226 return NO_REGS;
11227 case DFmode:
11228 sri->icode = CODE_FOR_reload_indf__frn;
11229 return NO_REGS;
11230 case SImode:
11231 /* ??? If we knew that we are in the appropriate mode -
11232 single precision - we could use a reload pattern directly. */
11233 return FPUL_REGS;
11234 default:
11235 abort ();
11236 }
11237 if (rclass == FPUL_REGS
11238 && ((REG_P (x) && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
11239 || REGNO (x) == T_REG))
11240 || GET_CODE (x) == PLUS))
11241 return GENERAL_REGS;
11242 if (rclass == FPUL_REGS && immediate_operand (x, mode))
11243 {
11244 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
11245 return GENERAL_REGS;
11246 else if (mode == SFmode)
11247 return FP_REGS;
11248 sri->icode = CODE_FOR_reload_insi__i_fpul;
11249 return NO_REGS;
11250 }
11251 if (rclass == FPSCR_REGS
11252 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
11253 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
11254 return GENERAL_REGS;
11255 } /* end of input-only processing. */
11256
11257 if (((REGCLASS_HAS_FP_REG (rclass)
11258 && (REG_P (x)
11259 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
11260 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
11261 && TARGET_FMOVD))))
11262 || (REGCLASS_HAS_GENERAL_REG (rclass)
11263 && REG_P (x)
11264 && FP_REGISTER_P (REGNO (x))))
11265 && (mode == SFmode || mode == SImode))
11266 return FPUL_REGS;
11267 if ((rclass == FPUL_REGS
11268 || (REGCLASS_HAS_FP_REG (rclass) && mode == SImode))
11269 && (MEM_P (x)
11270 || (REG_P (x)
11271 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
11272 || REGNO (x) == T_REG
11273 || system_reg_operand (x, VOIDmode)))))
11274 {
11275 if (rclass == FPUL_REGS)
11276 return GENERAL_REGS;
11277 return NO_REGS; // LRA wants NO_REGS here, it used to be FPUL_REGS;
11278 }
11279
11280 if ((rclass == MAC_REGS || rclass == PR_REGS)
11281 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
11282 && rclass != REGNO_REG_CLASS (REGNO (x)))
11283 return GENERAL_REGS;
11284
11285 /* If here fall back to loading FPUL register through general registers.
11286 This case can happen when movsi_ie insn is picked initially to
11287 load/store the FPUL register from/to another register, and then the
11288 other register is allocated on the stack. */
11289 if (rclass == FPUL_REGS && true_regnum (x) == -1)
11290 return GENERAL_REGS;
11291
11292 /* Force mov.b / mov.w displacement addressing insn to use R0 as
11293 the other operand.
11294 On SH2A could also just leave it alone here, which would result in a
11295 4 byte move insn being generated instead. However, for this to work
11296 the insns must have the appropriate alternatives. */
11297 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
11298 && satisfies_constraint_Sdd (x)
11299 && sh_disp_addr_displacement (x)
11300 <= sh_max_mov_insn_displacement (mode, false))
11301 return R0_REGS;
11302
11303 /* When reload is trying to address a QImode or HImode subreg on the stack,
11304 force any subreg byte into R0_REGS, as this is going to become a
11305 displacement address.
11306 We could restrict this to SUBREG_BYTE (x) > 0, but if the actual reg
11307 is on the stack, the memref to it might already require a displacement
11308 and that has to be added to the final address. At this point we don't
11309 know the cumulative displacement so we assume the worst case. */
11310 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
11311 && GET_CODE (x) == SUBREG && true_regnum (x) == -1)
11312 return R0_REGS;
11313
11314 return NO_REGS;
11315 }
11316
11317 /* Return true if SUBST can't safely replace its equivalent during RA. */
11318 static bool
11319 sh_cannot_substitute_mem_equiv_p (rtx)
11320 {
11321 /* If SUBST is mem[base+index] or QI/HImode mem[base+disp], the insn
11322 uses R0 and may cause spill failure when R0 is already used.
11323 We have to return true for that case at least.
11324 Moreover SH has strong R0 parity and also have not enough numbers of
11325 the hard registers to make the equiv substitution win in the size
11326 and the speed on average working sets. The pseudos produced to
11327 hold the equiv values can't get good hard registers for bad cases
11328 and end up memory save/restore insns which make the code worse. */
11329 return true;
11330 }
11331
11332 /* Return true if DISP can be legitimized. */
11333 static bool
11334 sh_legitimize_address_displacement (rtx *disp, rtx *offs,
11335 machine_mode mode)
11336 {
11337 if ((TARGET_FPU_DOUBLE && mode == DFmode)
11338 || (TARGET_SH2E && mode == SFmode))
11339 return false;
11340
11341 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, INTVAL (*disp));
11342 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
11343 {
11344 *disp = adj.mov_disp;
11345 *offs = adj.offset_adjust;
11346 return true;
11347 }
11348
11349 return false;
11350 }
11351
11352 /* Return true if movsf insn should be splited with an additional
11353 register. */
11354 bool
11355 sh_movsf_ie_ra_split_p (rtx op0, rtx op1, rtx op2)
11356 {
11357 /* op0 == op1 */
11358 if (rtx_equal_p (op0, op1))
11359 return true;
11360 /* fy, FQ, reg */
11361 if (GET_CODE (op1) == CONST_DOUBLE
11362 && ! satisfies_constraint_G (op1)
11363 && ! satisfies_constraint_H (op1)
11364 && REG_P (op0)
11365 && REG_P (op2))
11366 return true;
11367 /* f, r, y */
11368 if (REG_P (op0) && FP_REGISTER_P (REGNO (op0))
11369 && REG_P (op1) && GENERAL_REGISTER_P (REGNO (op1))
11370 && REG_P (op2) && (REGNO (op2) == FPUL_REG))
11371 return true;
11372 /* r, f, y */
11373 if (REG_P (op1) && FP_REGISTER_P (REGNO (op1))
11374 && REG_P (op0) && GENERAL_REGISTER_P (REGNO (op0))
11375 && REG_P (op2) && (REGNO (op2) == FPUL_REG))
11376 return true;
11377
11378 return false;
11379 }
11380
11381 static void
11382 sh_conditional_register_usage (void)
11383 {
11384 for (int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)
11385 if (! VALID_REGISTER_P (regno))
11386 fixed_regs[regno] = call_used_regs[regno] = 1;
11387 /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs. */
11388 if (flag_pic)
11389 {
11390 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11391 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11392 }
11393 if (TARGET_FDPIC)
11394 {
11395 fixed_regs[PIC_REG] = 1;
11396 call_used_regs[PIC_REG] = 1;
11397 call_really_used_regs[PIC_REG] = 1;
11398 }
11399 /* Renesas saves and restores mac registers on call. */
11400 if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
11401 {
11402 call_really_used_regs[MACH_REG] = 0;
11403 call_really_used_regs[MACL_REG] = 0;
11404 }
11405
11406 for (int regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++)
11407 if (! fixed_regs[regno] && call_really_used_regs[regno])
11408 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
11409
11410 call_really_used_regs[FPSCR_MODES_REG] = 0;
11411 call_really_used_regs[FPSCR_STAT_REG] = 0;
11412 }
11413
11414 /* Implement TARGET_LEGITIMATE_CONSTANT_P
11415
11416 can_store_by_pieces constructs VOIDmode CONST_DOUBLEs. */
11417 static bool
11418 sh_legitimate_constant_p (machine_mode mode, rtx x)
11419 {
11420 if (SH_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
11421 {
11422 rtx base, offset;
11423 split_const (x, &base, &offset);
11424
11425 if (GET_CODE (base) == SYMBOL_REF
11426 && !offset_within_block_p (base, INTVAL (offset)))
11427 return false;
11428 }
11429
11430 if (TARGET_FDPIC
11431 && (SYMBOLIC_CONST_P (x)
11432 || (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
11433 && SYMBOLIC_CONST_P (XEXP (XEXP (x, 0), 0)))))
11434 return false;
11435
11436 return GET_CODE (x) != CONST_DOUBLE
11437 || mode == DFmode || mode == SFmode
11438 || mode == DImode || GET_MODE (x) == VOIDmode;
11439 }
11440
11441 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
11442
11443 static void
11444 sh_init_sync_libfuncs (void)
11445 {
11446 init_sync_libfuncs (UNITS_PER_WORD);
11447 }
11448
11449 /* Return true if it is appropriate to emit `ret' instructions in the
11450 body of a function. */
11451 bool
11452 sh_can_use_simple_return_p (void)
11453 {
11454 if (! reload_completed || frame_pointer_needed)
11455 return false;
11456
11457 /* Moving prologue around does't reduce the size. */
11458 if (optimize_function_for_size_p (cfun))
11459 return false;
11460
11461 /* Finally, allow for pr save. */
11462 HARD_REG_SET live_regs_mask;
11463 int d = calc_live_regs (&live_regs_mask);
11464
11465 if (rounded_frame_size (d) > 4)
11466 return false;
11467
11468 return true;
11469 }
11470
11471 /*------------------------------------------------------------------------------
11472 Address mode optimization support code
11473 */
11474
11475 typedef HOST_WIDE_INT disp_t;
11476 static const disp_t MIN_DISP = HOST_WIDE_INT_MIN;
11477 static const disp_t MAX_DISP = HOST_WIDE_INT_MAX;
11478 static const disp_t INVALID_DISP = MAX_DISP;
11479
11480 /* A memory reference which is described by a base register and a
11481 displacement. */
11482 class base_reg_disp
11483 {
11484 public:
11485 base_reg_disp (rtx br, disp_t d);
11486
11487 bool is_reg (void) const;
11488 bool is_disp (void) const;
11489 rtx reg (void) const;
11490 disp_t disp (void) const;
11491
11492 private:
11493 rtx reg_;
11494 disp_t disp_;
11495 };
11496
11497 inline
11498 base_reg_disp::base_reg_disp (rtx br, disp_t d)
11499 : reg_ (br), disp_ (d)
11500 {
11501 }
11502
11503 inline bool
11504 base_reg_disp::is_reg (void) const
11505 {
11506 return reg_ != NULL_RTX && disp_ != INVALID_DISP;
11507 }
11508
11509 inline bool
11510 base_reg_disp::is_disp (void) const
11511 {
11512 return reg_ == NULL_RTX && disp_ != INVALID_DISP;
11513 }
11514
11515 inline rtx
11516 base_reg_disp::reg (void) const
11517 {
11518 return reg_;
11519 }
11520
11521 inline disp_t
11522 base_reg_disp::disp (void) const
11523 {
11524 return disp_;
11525 }
11526
11527 /* Find the base register and calculate the displacement for a given
11528 address rtx 'x'. */
11529 static base_reg_disp
11530 sh_find_base_reg_disp (rtx_insn* insn, rtx x, disp_t disp = 0,
11531 rtx base_reg = NULL)
11532 {
11533 if (REG_P (x))
11534 {
11535 if (REGNO (x) == GBR_REG)
11536 return base_reg_disp (x, disp);
11537
11538 /* We've reached a hard-reg. This is probably the point where
11539 function args are copied to pseudos. Do not go any further and
11540 stick to the pseudo. If the original mem addr was in a hard reg
11541 from the beginning, it will become the base reg. */
11542 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
11543 return base_reg_disp (base_reg != NULL ? base_reg : x, disp);
11544
11545 /* Find the def of the reg and trace it. If there are more than one
11546 defs and they are not the same, assume it's not safe to proceed. */
11547 rtx_insn* last_i = NULL;
11548 rtx last_set = NULL;
11549 for (df_ref d = DF_REG_DEF_CHAIN (REGNO (x)); d != NULL;
11550 d = DF_REF_NEXT_REG (d))
11551 {
11552 rtx set = const_cast<rtx> (set_of (x, DF_REF_INSN (d)));
11553
11554 /* Accept multiple defs, as long as they are equal. */
11555 if (last_set == NULL || rtx_equal_p (last_set, set))
11556 {
11557 last_i = DF_REF_INSN (d);
11558 last_set = set;
11559 }
11560 else
11561 {
11562 last_i = NULL;
11563 last_set = NULL;
11564 break;
11565 }
11566 }
11567
11568 if (last_set != NULL && last_i != NULL)
11569 return sh_find_base_reg_disp (last_i, XEXP (last_set, 1), disp,
11570 XEXP (last_set, 0));
11571
11572 /* When here, no previous insn was found that sets the reg.
11573 The input reg is already the base reg. */
11574 return base_reg_disp (x, disp);
11575 }
11576
11577 else if (GET_CODE (x) == PLUS)
11578 {
11579 base_reg_disp left_val = sh_find_base_reg_disp (insn, XEXP (x, 0));
11580 base_reg_disp right_val = sh_find_base_reg_disp (insn, XEXP (x, 1));
11581
11582 /* Either left or right val must be a reg.
11583 We don't handle the case of 'reg + reg' here. */
11584 if (left_val.is_reg () && right_val.is_disp ())
11585 return base_reg_disp (left_val.reg (), left_val.disp ()
11586 + right_val.disp () + disp);
11587 else if (right_val.is_reg () && left_val.is_disp ())
11588 return base_reg_disp (right_val.reg (), right_val.disp ()
11589 + left_val.disp () + disp);
11590 else
11591 return base_reg_disp (base_reg, disp);
11592 }
11593
11594 else if (CONST_INT_P (x))
11595 return base_reg_disp (NULL, disp + INTVAL (x));
11596
11597 /* Didn't find anything useful. */
11598 return base_reg_disp (base_reg, disp);
11599 }
11600
11601 /* Given an insn and a memory operand, try to find an equivalent GBR
11602 based memory address and return the corresponding new memory address.
11603 Return NULL_RTX if not found. */
11604 rtx
11605 sh_find_equiv_gbr_addr (rtx_insn* insn, rtx mem)
11606 {
11607 if (!MEM_P (mem) || gbr_address_mem (mem, GET_MODE (mem)))
11608 return NULL_RTX;
11609
11610 /* Leave post/pre inc/dec or any other side effect addresses alone. */
11611 if (side_effects_p (XEXP (mem, 0)))
11612 return NULL_RTX;
11613
11614 /* When not optimizing there might be no dataflow available. */
11615 if (df == NULL)
11616 return NULL_RTX;
11617
11618 base_reg_disp gbr_disp = sh_find_base_reg_disp (insn, XEXP (mem, 0));
11619
11620 if (gbr_disp.is_reg () && REGNO (gbr_disp.reg ()) == GBR_REG)
11621 {
11622 /* If GBR is marked as call clobbered we bail out if we see a call.
11623 FIXME: Actually should check if this mem refers to the gbr value
11624 before or after the call. If there is a store_gbr preceeding this
11625 mem, it's safe to use GBR for this mem.
11626
11627 If GBR is not marked as call clobbered, but there is some other
11628 def than a call, it's probably a load_gbr upon which we also
11629 bail out to be on the safe side.
11630 FIXME: Should check if we have a use-after-def case, such as
11631 the call case above. */
11632 for (df_ref d = DF_REG_DEF_CHAIN (GBR_REG); d != NULL;
11633 d = DF_REF_NEXT_REG (d))
11634 {
11635 if (CALL_P (DF_REF_INSN (d)))
11636 {
11637 if (REGNO_REG_SET_P (regs_invalidated_by_call_regset, GBR_REG))
11638 return NULL_RTX;
11639 else
11640 continue;
11641 }
11642 else
11643 return NULL_RTX;
11644 }
11645
11646 rtx disp = GEN_INT (gbr_disp.disp ());
11647 if (gbr_displacement (disp, GET_MODE (mem)))
11648 return gen_rtx_PLUS (SImode, gen_rtx_REG (SImode, GBR_REG), disp);
11649 }
11650
11651 return NULL_RTX;
11652 }
11653
11654 /*------------------------------------------------------------------------------
11655 Manual insn combine support code.
11656 */
11657
11658 /* Return true if the specified insn contains any UNSPECs or
11659 UNSPEC_VOLATILEs. */
11660 static bool
11661 sh_unspec_insn_p (rtx x)
11662 {
11663 subrtx_iterator::array_type array;
11664 FOR_EACH_SUBRTX (i, array, x, ALL)
11665 if (*i != NULL
11666 && (GET_CODE (*i) == UNSPEC || GET_CODE (*i) == UNSPEC_VOLATILE))
11667 return true;
11668
11669 return false;
11670 }
11671
11672 /* Return true if the register operands of the specified insn are modified
11673 between the specified from and to insns (exclusive of those two). */
11674 bool
11675 sh_insn_operands_modified_between_p (rtx_insn* operands_insn,
11676 const rtx_insn* from,
11677 const rtx_insn* to)
11678 {
11679 /* FIXME: Return true for multiple sets for now. */
11680 rtx s = single_set (operands_insn);
11681 if (s == NULL_RTX)
11682 return true;
11683
11684 subrtx_iterator::array_type array;
11685 FOR_EACH_SUBRTX (i, array, SET_SRC (s), ALL)
11686 if (*i != NULL &&
11687 ((REG_P (*i) || SUBREG_P (*i)) && reg_set_between_p (*i, from, to)))
11688 return true;
11689
11690 return false;
11691 }
11692
11693 /* Given an insn, determine whether it's a 'nott' insn, i.e. an insn that
11694 negates the T bit and stores the result in the T bit. */
11695 bool
11696 sh_is_nott_insn (const rtx_insn* i)
11697 {
11698 return i != NULL && GET_CODE (PATTERN (i)) == SET
11699 && t_reg_operand (XEXP (PATTERN (i), 0), VOIDmode)
11700 && negt_reg_operand (XEXP (PATTERN (i), 1), VOIDmode);
11701 }
11702
11703 rtx
11704 sh_movt_set_dest (const rtx_insn* i)
11705 {
11706 if (i == NULL)
11707 return NULL;
11708
11709 const_rtx p = PATTERN (i);
11710 return GET_CODE (p) == SET
11711 && arith_reg_dest (XEXP (p, 0), SImode)
11712 && t_reg_operand (XEXP (p, 1), VOIDmode) ? XEXP (p, 0) : NULL;
11713 }
11714
11715 /* Given an insn, check whether it's a 'movrt' kind of insn, i.e. an insn
11716 that stores the negated T bit in a register, and return the destination
11717 register rtx, or null. */
11718 rtx
11719 sh_movrt_set_dest (const rtx_insn* i)
11720 {
11721 if (i == NULL)
11722 return NULL;
11723
11724 const_rtx p = PATTERN (i);
11725
11726 /* The negc movrt replacement is inside a parallel. */
11727 if (GET_CODE (p) == PARALLEL)
11728 p = XVECEXP (p, 0, 0);
11729
11730 return GET_CODE (p) == SET
11731 && arith_reg_dest (XEXP (p, 0), SImode)
11732 && negt_reg_operand (XEXP (p, 1), VOIDmode) ? XEXP (p, 0) : NULL;
11733 }
11734
11735 /* Given an insn and a reg number, tell whether the reg dies or is unused
11736 after the insn. */
11737 bool
11738 sh_reg_dead_or_unused_after_insn (const rtx_insn* i, int regno)
11739 {
11740 return find_regno_note (i, REG_DEAD, regno) != NULL
11741 || find_regno_note (i, REG_UNUSED, regno) != NULL;
11742 }
11743
11744 /* Given an insn and a reg number, remove reg dead or reg unused notes to
11745 mark it as being used after the insn. */
11746 void
11747 sh_remove_reg_dead_or_unused_notes (rtx_insn* i, int regno)
11748 {
11749 if (rtx n = find_regno_note (i, REG_DEAD, regno))
11750 remove_note (i, n);
11751 if (rtx n = find_regno_note (i, REG_UNUSED, regno))
11752 remove_note (i, n);
11753 }
11754
11755 /* Given an insn check if it contains any post/pre inc/dec mem operands and
11756 add the REG_INC notes accordingly.
11757 FIXME: This function is very similar to lra.c (add_auto_inc_notes).
11758 FIXME: This function is currently used by peephole2 patterns because
11759 the peephole2 pass does not preserve REG_INC notes. If the notes
11760 are dropped the following passes will do wrong things. */
11761 rtx_insn*
11762 sh_check_add_incdec_notes (rtx_insn* i)
11763 {
11764 struct for_each_inc_dec_clb
11765 {
11766 static int func (rtx mem ATTRIBUTE_UNUSED, rtx op ATTRIBUTE_UNUSED,
11767 rtx dest, rtx src ATTRIBUTE_UNUSED,
11768 rtx srcoff ATTRIBUTE_UNUSED, void* arg)
11769 {
11770 gcc_assert (REG_P (dest));
11771
11772 rtx_insn* i = (rtx_insn*)arg;
11773 if (find_regno_note (i, REG_INC, REGNO (dest)) == NULL)
11774 add_reg_note (i, REG_INC, dest);
11775
11776 return 0;
11777 }
11778 };
11779
11780 for_each_inc_dec (PATTERN (i), for_each_inc_dec_clb::func, i);
11781 return i;
11782 }
11783
11784 /* Given a move insn destiation and a source, make sure that the move source
11785 operand is not a post-inc mem load with the same address reg as the
11786 destination. Returns the modified source operand with the post-inc removed
11787 if necessary. */
11788 rtx
11789 sh_remove_overlapping_post_inc (rtx dst, rtx src)
11790 {
11791 if (!MEM_P (src))
11792 return src;
11793
11794 rtx addr = XEXP (src, 0);
11795
11796 if (GET_CODE (addr) == POST_INC
11797 && reg_overlap_mentioned_p (XEXP (addr, 0), dst))
11798 return replace_equiv_address (src, XEXP (addr, 0));
11799
11800 gcc_assert (GET_CODE (addr) != POST_MODIFY);
11801 return src;
11802 }
11803
11804 /* Emit a move insn that is safe to be used in peephole patterns. */
11805 rtx_insn*
11806 sh_peephole_emit_move_insn (rtx dst, rtx src)
11807 {
11808 return sh_check_add_incdec_notes (
11809 emit_move_insn (dst, sh_remove_overlapping_post_inc (dst, src)));
11810 }
11811
11812 /* Given an op rtx and an insn, try to find out whether the result of the
11813 specified op consists only of logical operations on T bit stores. */
11814 bool
11815 sh_is_logical_t_store_expr (rtx op, rtx_insn* insn)
11816 {
11817 if (!logical_operator (op, SImode))
11818 return false;
11819
11820 rtx ops[2] = { XEXP (op, 0), XEXP (op, 1) };
11821 int op_is_t_count = 0;
11822
11823 for (int i = 0; i < 2; ++i)
11824 {
11825 if (t_reg_operand (ops[i], VOIDmode)
11826 || negt_reg_operand (ops[i], VOIDmode))
11827 op_is_t_count++;
11828
11829 else
11830 {
11831 set_of_reg op_set = sh_find_set_of_reg (ops[i], insn,
11832 prev_nonnote_insn_bb);
11833 if (op_set.set_src == NULL_RTX)
11834 continue;
11835
11836 if (t_reg_operand (op_set.set_src, VOIDmode)
11837 || negt_reg_operand (op_set.set_src, VOIDmode)
11838 || sh_is_logical_t_store_expr (op_set.set_src, op_set.insn))
11839 op_is_t_count++;
11840 }
11841 }
11842
11843 return op_is_t_count == 2;
11844 }
11845
11846 /* Given the operand that is extended in a sign/zero extend insn, and the
11847 insn, try to figure out whether the sign/zero extension can be replaced
11848 by a simple reg-reg copy. If so, the replacement reg rtx is returned,
11849 NULL_RTX otherwise. */
11850 rtx
11851 sh_try_omit_signzero_extend (rtx extended_op, rtx_insn* insn)
11852 {
11853 if (REG_P (extended_op))
11854 extended_op = extended_op;
11855 else if (GET_CODE (extended_op) == SUBREG && REG_P (SUBREG_REG (extended_op)))
11856 extended_op = SUBREG_REG (extended_op);
11857 else
11858 return NULL_RTX;
11859
11860 /* Reg moves must be of the same mode. */
11861 if (GET_MODE (extended_op) != SImode)
11862 return NULL_RTX;
11863
11864 set_of_reg s = sh_find_set_of_reg (extended_op, insn, prev_nonnote_insn_bb);
11865 if (s.set_src == NULL_RTX)
11866 return NULL_RTX;
11867
11868 if (t_reg_operand (s.set_src, VOIDmode)
11869 || negt_reg_operand (s.set_src, VOIDmode))
11870 return extended_op;
11871
11872 /* If the zero extended reg was formed by a logical operation, check the
11873 operands of the logical operation. If both originated from T bit
11874 stores the zero extension can be eliminated. */
11875 else if (sh_is_logical_t_store_expr (s.set_src, s.insn))
11876 return extended_op;
11877
11878 return NULL_RTX;
11879 }
11880
11881 /* Given the current insn, which is assumed to be a movrt_negc insn, try to
11882 figure out whether it should be converted into a movt-xor sequence in
11883 the movrt_negc splitter.
11884 Returns true if insns have been modified and the splitter has succeeded. */
11885 bool
11886 sh_split_movrt_negc_to_movt_xor (rtx_insn* curr_insn, rtx operands[])
11887 {
11888 /* In cases such as
11889 tst r4,r4
11890 mov #-1,r1
11891 negc r1,r1
11892 tst r4,r4
11893 we can replace the T bit clobbering negc with a movt-xor sequence and
11894 eliminate the redundant comparison.
11895 Because the xor insn depends on register allocation results, allow this
11896 only before reload. */
11897 if (!can_create_pseudo_p ())
11898 return false;
11899
11900 set_of_reg t_before_negc = sh_find_set_of_reg (get_t_reg_rtx (), curr_insn,
11901 prev_nonnote_insn_bb);
11902 set_of_reg t_after_negc = sh_find_set_of_reg (get_t_reg_rtx (), curr_insn,
11903 next_nonnote_insn_bb);
11904
11905 if (t_before_negc.set_rtx != NULL_RTX && t_after_negc.set_rtx != NULL_RTX
11906 && rtx_equal_p (t_before_negc.set_rtx, t_after_negc.set_rtx)
11907 && !reg_used_between_p (get_t_reg_rtx (), curr_insn, t_after_negc.insn)
11908 && !sh_insn_operands_modified_between_p (t_before_negc.insn,
11909 t_before_negc.insn,
11910 t_after_negc.insn)
11911 && !modified_between_p (get_t_reg_rtx (), curr_insn, t_after_negc.insn)
11912 && !sh_unspec_insn_p (t_after_negc.insn)
11913 && !volatile_insn_p (PATTERN (t_after_negc.insn))
11914 && !side_effects_p (PATTERN (t_after_negc.insn))
11915 && !may_trap_or_fault_p (PATTERN (t_after_negc.insn)))
11916 {
11917 emit_insn (gen_movrt_xor (operands[0], get_t_reg_rtx ()));
11918 set_insn_deleted (t_after_negc.insn);
11919 return true;
11920 }
11921 else
11922 return false;
11923 }
11924
11925 /* Given a reg and the current insn, see if the value of the reg originated
11926 from a sign or zero extension and return the discovered information. */
11927 sh_extending_set_of_reg
11928 sh_find_extending_set_of_reg (rtx reg, rtx_insn* curr_insn)
11929 {
11930 if (reg == NULL)
11931 return sh_extending_set_of_reg (curr_insn);
11932
11933 if (SUBREG_P (reg))
11934 reg = SUBREG_REG (reg);
11935
11936 if (!REG_P (reg))
11937 return sh_extending_set_of_reg (curr_insn);
11938
11939 /* FIXME: Also search the predecessor basic blocks. It seems that checking
11940 only the adjacent predecessor blocks would cover most of the cases.
11941 Also try to look through the first extension that we hit. There are some
11942 cases, where a zero_extend is followed an (implicit) sign_extend, and it
11943 fails to see the sign_extend. */
11944 sh_extending_set_of_reg result =
11945 sh_find_set_of_reg (reg, curr_insn, prev_nonnote_insn_bb, true);
11946
11947 if (result.set_src != NULL)
11948 {
11949 if (GET_CODE (result.set_src) == SIGN_EXTEND
11950 || GET_CODE (result.set_src) == ZERO_EXTEND)
11951 {
11952 if (dump_file)
11953 fprintf (dump_file, "sh_find_extending_set_of_reg: reg %d is "
11954 "explicitly sign/zero extended in insn %d\n",
11955 REGNO (reg), INSN_UID (result.insn));
11956 result.from_mode = GET_MODE (XEXP (result.set_src, 0));
11957 result.ext_code = GET_CODE (result.set_src);
11958 }
11959 else if (MEM_P (result.set_src)
11960 && (GET_MODE (result.set_src) == QImode
11961 || GET_MODE (result.set_src) == HImode)
11962 && !sh_unspec_insn_p (result.insn))
11963 {
11964 /* On SH QIHImode memory loads always sign extend. However, in
11965 some cases where it seems that the higher bits are not
11966 interesting, the loads will not be expanded as sign extending
11967 insns, but as QIHImode loads into QIHImode regs. We report that
11968 the reg has been sign extended by the mem load. When it is used
11969 as such, we must convert the mem load into a sign extending insn,
11970 see also sh_extending_set_of_reg::use_as_extended_reg. */
11971 if (dump_file)
11972 fprintf (dump_file, "sh_find_extending_set_of_reg: reg %d is "
11973 "implicitly sign extended in insn %d\n",
11974 REGNO (reg), INSN_UID (result.insn));
11975 result.from_mode = GET_MODE (result.set_src);
11976 result.ext_code = SIGN_EXTEND;
11977 }
11978 }
11979
11980 return result;
11981 }
11982
11983 /* Given a reg that is known to be sign or zero extended at some insn,
11984 take the appropriate measures so that the extended value can be used as
11985 a reg at the specified insn and return the resulting reg rtx. */
11986 rtx
11987 sh_extending_set_of_reg::use_as_extended_reg (rtx_insn* use_at_insn) const
11988 {
11989 gcc_assert (insn != NULL && set_src != NULL && set_rtx != NULL);
11990 gcc_assert (ext_code == SIGN_EXTEND || ext_code == ZERO_EXTEND);
11991 gcc_assert (from_mode == QImode || from_mode == HImode);
11992
11993 if (MEM_P (set_src) && ext_code == SIGN_EXTEND)
11994 {
11995 if (dump_file)
11996 fprintf (dump_file,
11997 "use_as_extended_reg: converting non-extending mem load in "
11998 "insn %d into sign-extending load\n", INSN_UID (insn));
11999
12000 rtx r = gen_reg_rtx (SImode);
12001 rtx_insn* i0;
12002 if (from_mode == QImode)
12003 i0 = emit_insn_after (gen_extendqisi2 (r, set_src), insn);
12004 else if (from_mode == HImode)
12005 i0 = emit_insn_after (gen_extendhisi2 (r, set_src), insn);
12006 else
12007 gcc_unreachable ();
12008
12009 emit_insn_after (
12010 gen_move_insn (XEXP (set_rtx, 0),
12011 gen_lowpart (GET_MODE (set_src), r)), i0);
12012 set_insn_deleted (insn);
12013 return r;
12014 }
12015 else
12016 {
12017 rtx extension_dst = XEXP (set_rtx, 0);
12018 if (GET_MODE (extension_dst) != SImode)
12019 extension_dst = simplify_gen_subreg (SImode, extension_dst,
12020 GET_MODE (extension_dst), 0);
12021 if (modified_between_p (extension_dst, insn, use_at_insn))
12022 {
12023 if (dump_file)
12024 fprintf (dump_file,
12025 "use_as_extended_reg: dest reg %d of extending insn %d is "
12026 "modified, inserting a reg-reg copy\n",
12027 REGNO (extension_dst), INSN_UID (insn));
12028
12029 rtx r = gen_reg_rtx (SImode);
12030 emit_insn_after (gen_move_insn (r, extension_dst), insn);
12031 return r;
12032 }
12033 else
12034 {
12035 sh_remove_reg_dead_or_unused_notes (insn, REGNO (extension_dst));
12036 return extension_dst;
12037 }
12038 }
12039 }
12040
12041 bool
12042 sh_extending_set_of_reg::can_use_as_unextended_reg (void) const
12043 {
12044 if ((ext_code == SIGN_EXTEND || ext_code == ZERO_EXTEND)
12045 && (from_mode == QImode || from_mode == HImode)
12046 && set_src != NULL)
12047 return arith_reg_operand (XEXP (set_src, 0), from_mode);
12048 else
12049 return false;
12050 }
12051
12052 rtx
12053 sh_extending_set_of_reg::use_as_unextended_reg (rtx_insn* use_at_insn) const
12054 {
12055 gcc_assert (can_use_as_unextended_reg ());
12056
12057 rtx r = XEXP (set_src, 0);
12058 rtx r0 = simplify_gen_subreg (SImode, r, from_mode, 0);
12059
12060 if (modified_between_p (r, insn, use_at_insn))
12061 {
12062 rtx r1 = gen_reg_rtx (SImode);
12063 emit_insn_after (gen_move_insn (r1, r0), insn);
12064 return r1;
12065 }
12066 else
12067 {
12068 sh_remove_reg_dead_or_unused_notes (insn, SUBREG_P (r)
12069 ? REGNO (SUBREG_REG (r))
12070 : REGNO (r));
12071 return r0;
12072 }
12073 }
12074
12075 /* Given the current insn, which is assumed to be the *tst<mode>_t_subregs insn,
12076 perform the necessary checks on the operands and split it accordingly. */
12077 void
12078 sh_split_tst_subregs (rtx_insn* curr_insn, machine_mode subreg_mode,
12079 int subreg_offset, rtx operands[])
12080 {
12081 gcc_assert (subreg_mode == QImode || subreg_mode == HImode);
12082
12083 sh_extending_set_of_reg eop0 = sh_find_extending_set_of_reg (operands[0],
12084 curr_insn);
12085 sh_extending_set_of_reg eop1 = sh_find_extending_set_of_reg (operands[1],
12086 curr_insn);
12087
12088 /* If one of the operands is known to be zero extended, that's already
12089 sufficient to mask out the unwanted high bits. */
12090 if (eop0.ext_code == ZERO_EXTEND && eop0.from_mode == subreg_mode)
12091 {
12092 emit_insn (gen_tstsi_t (eop0.use_as_extended_reg (curr_insn),
12093 operands[1]));
12094 return;
12095 }
12096 if (eop1.ext_code == ZERO_EXTEND && eop1.from_mode == subreg_mode)
12097 {
12098 emit_insn (gen_tstsi_t (operands[0],
12099 eop1.use_as_extended_reg (curr_insn)));
12100 return;
12101 }
12102
12103 /* None of the operands seem to be zero extended.
12104 If both are sign extended it's OK, too. */
12105 if (eop0.ext_code == SIGN_EXTEND && eop1.ext_code == SIGN_EXTEND
12106 && eop0.from_mode == subreg_mode && eop1.from_mode == subreg_mode)
12107 {
12108 emit_insn (gen_tstsi_t (eop0.use_as_extended_reg (curr_insn),
12109 eop1.use_as_extended_reg (curr_insn)));
12110 return;
12111 }
12112
12113 /* Otherwise we have to insert a zero extension on one of the operands to
12114 mask out the unwanted high bits.
12115 Prefer the operand that has no known extension. */
12116 if (eop0.ext_code != UNKNOWN && eop1.ext_code == UNKNOWN)
12117 std::swap (operands[0], operands[1]);
12118
12119 rtx tmp0 = gen_reg_rtx (SImode);
12120 rtx tmp1 = simplify_gen_subreg (subreg_mode, operands[0],
12121 GET_MODE (operands[0]), subreg_offset);
12122 emit_insn (subreg_mode == QImode
12123 ? gen_zero_extendqisi2 (tmp0, tmp1)
12124 : gen_zero_extendhisi2 (tmp0, tmp1));
12125 emit_insn (gen_tstsi_t (tmp0, operands[1]));
12126 }
12127
12128 /* A helper class to increment/decrement a counter variable each time a
12129 function is entered/left. */
12130 class scope_counter
12131 {
12132 public:
12133 scope_counter (int& counter) : m_counter (counter) { ++m_counter; }
12134
12135 ~scope_counter (void)
12136 {
12137 --m_counter;
12138 gcc_assert (m_counter >= 0);
12139 }
12140
12141 int count (void) const { return m_counter; }
12142
12143 private:
12144 int& m_counter;
12145 };
12146
12147 /* Given an rtx x, determine whether the expression can be used to create
12148 an insn that calulates x and stores the result in the T bit.
12149 This is used by the 'treg_set_expr' predicate to construct insns sequences
12150 where T bit results are fed into other insns, such as addc, subc, negc
12151 insns.
12152
12153 FIXME: The patterns that expand 'treg_set_expr' operands tend to
12154 distinguish between 'positive' and 'negative' forms. For now this has to
12155 be done in the preparation code. We could also introduce
12156 'pos_treg_set_expr' and 'neg_treg_set_expr' predicates for that and write
12157 two different patterns for the 'postive' and 'negative' forms. However,
12158 the total amount of lines of code seems to be about the same and the
12159 '{pos|neg}_treg_set_expr' predicates would be more expensive, because the
12160 recog function would need to look inside the expression by temporarily
12161 splitting it. */
12162 static int sh_recog_treg_set_expr_reent_count = 0;
12163
12164 bool
12165 sh_recog_treg_set_expr (rtx op, machine_mode mode)
12166 {
12167 scope_counter recursion (sh_recog_treg_set_expr_reent_count);
12168
12169 /* Limit the recursion count to avoid nested expressions which we can't
12170 resolve to a single treg set insn. */
12171 if (recursion.count () > 1)
12172 return false;
12173
12174 /* Early accept known possible operands before doing recog. */
12175 if (op == const0_rtx || op == const1_rtx || t_reg_operand (op, mode)
12176 || negt_reg_operand (op, mode))
12177 return true;
12178
12179 /* Early reject impossible operands before doing recog.
12180 There are some (set ((t) (subreg ...))) patterns, but we must be careful
12181 not to allow any invalid reg-reg or mem-reg moves, or else other passes
12182 such as lower-subreg will bail out. Some insns such as SH4A movua are
12183 done with UNSPEC, so must reject those, too, or else it would result
12184 in an invalid reg -> treg move. */
12185 if (CONST_INT_P (op) || register_operand (op, mode)
12186 || memory_operand (op, mode) || sh_unspec_insn_p (op))
12187 return false;
12188
12189 if (!can_create_pseudo_p ())
12190 return false;
12191
12192 /* expand_debug_locations may call this to compute rtx costs at
12193 very early stage. In that case, don't make new insns here to
12194 avoid codegen differences with -g. */
12195 if (currently_expanding_to_rtl)
12196 return false;
12197
12198 /* We are going to invoke recog in a re-entrant way and thus
12199 have to capture its current state and restore it afterwards. */
12200 recog_data_d prev_recog_data = recog_data;
12201
12202 rtx_insn* i = make_insn_raw (gen_rtx_SET (get_t_reg_rtx (), op));
12203 SET_PREV_INSN (i) = NULL;
12204 SET_NEXT_INSN (i) = NULL;
12205
12206 /* If the comparison op doesn't have a result mode, set it to SImode. */
12207 machine_mode prev_op_mode = GET_MODE (op);
12208 if (COMPARISON_P (op) && prev_op_mode == VOIDmode)
12209 PUT_MODE (op, SImode);
12210
12211 int result = recog (PATTERN (i), i, 0);
12212
12213 /* It seems there is no insn like that. Create a negated version and
12214 try again. If we hit a negated form, we'll allow that and append a
12215 nott sequence when splitting out the insns. Insns that do the split
12216 can then remove the trailing nott if they know how to deal with it. */
12217 if (result < 0 && COMPARISON_P (op))
12218 {
12219 machine_mode cmp_mode = GET_MODE (XEXP (op, 0));
12220 if (cmp_mode == VOIDmode)
12221 cmp_mode = GET_MODE (XEXP (op, 1));
12222
12223 rtx_code prev_code = GET_CODE (op);
12224 PUT_CODE (op, reverse_condition (GET_CODE (op)));
12225 result = recog (PATTERN (i), i, 0);
12226 PUT_CODE (op, prev_code);
12227 }
12228
12229 PUT_MODE (op, prev_op_mode);
12230 recog_data = prev_recog_data;
12231 return result >= 0;
12232 }
12233
12234 /* Returns true when recog of a 'treg_set_expr' is currently in progress.
12235 This can be used as a condition for insn/split patterns to allow certain
12236 T bit setting patters only to be matched as sub expressions of other
12237 patterns. */
12238 bool
12239 sh_in_recog_treg_set_expr (void)
12240 {
12241 return sh_recog_treg_set_expr_reent_count > 0;
12242 }
12243
12244 /* Given an rtx x, which is assumed to be some expression that has been
12245 matched by the 'treg_set_expr' predicate before, split and emit the
12246 insns that are necessary to calculate the expression and store the result
12247 in the T bit.
12248 The splitting is done recursively similar to 'try_split' in emit-rt.c.
12249 Unfortunately we can't use 'try_split' here directly, as it tries to invoke
12250 'delete_insn' which then causes the DF parts to bail out, because we
12251 currently are inside another gen_split* function and would invoke
12252 'try_split' in a reentrant way. */
12253 static std::pair<rtx_insn*, rtx_insn*>
12254 sh_try_split_insn_simple (rtx_insn* i, rtx_insn* curr_insn, int n = 0)
12255 {
12256 if (dump_file)
12257 {
12258 fprintf (dump_file, "sh_try_split_insn_simple n = %d i = \n", n);
12259 print_rtl_single (dump_file, i);
12260 fprintf (dump_file, "\n");
12261 }
12262
12263 rtx_insn* seq = split_insns (PATTERN (i), curr_insn);
12264
12265 if (seq == NULL)
12266 return std::make_pair (i, i);
12267
12268 /* Avoid infinite splitter loops if any insn of the result matches
12269 the original pattern. */
12270 for (rtx_insn* s = seq; s != NULL; s = NEXT_INSN (s))
12271 if (INSN_P (s) && rtx_equal_p (PATTERN (s), PATTERN (i)))
12272 return std::make_pair (i, i);
12273
12274 unshare_all_rtl_in_chain (seq);
12275
12276 /* 'seq' is now a replacement for 'i'. Assuming that 'i' is an insn in
12277 a linked list, replace the single insn with the new insns. */
12278 rtx_insn* seqlast = seq;
12279 while (NEXT_INSN (seqlast) != NULL)
12280 seqlast = NEXT_INSN (seqlast);
12281
12282 if (rtx_insn* iprev = PREV_INSN (i))
12283 SET_NEXT_INSN (iprev) = seq;
12284 if (rtx_insn* inext = NEXT_INSN (i))
12285 SET_PREV_INSN (inext) = seqlast;
12286
12287 SET_PREV_INSN (seq) = PREV_INSN (i);
12288 SET_NEXT_INSN (seqlast) = NEXT_INSN (i);
12289
12290 SET_PREV_INSN (i) = NULL;
12291 SET_NEXT_INSN (i) = NULL;
12292
12293 /* Recursively split all insns. */
12294 for (i = seq; ; i = NEXT_INSN (i))
12295 {
12296 std::pair<rtx_insn*, rtx_insn*> ii =
12297 sh_try_split_insn_simple (i, curr_insn, n + 1);
12298 if (i == seq)
12299 seq = ii.first;
12300 if (i == seqlast)
12301 {
12302 seqlast = ii.second;
12303 break;
12304 }
12305 i = ii.first;
12306 }
12307
12308 return std::make_pair (seq, seqlast);
12309 }
12310
12311 sh_treg_insns
12312 sh_split_treg_set_expr (rtx x, rtx_insn* curr_insn)
12313 {
12314 if (t_reg_operand (x, VOIDmode))
12315 return sh_treg_insns ();
12316
12317 scope_counter in_treg_set_expr (sh_recog_treg_set_expr_reent_count);
12318
12319 rtx_insn* i = make_insn_raw (gen_rtx_SET (get_t_reg_rtx (), x));
12320 SET_PREV_INSN (i) = NULL;
12321 SET_NEXT_INSN (i) = NULL;
12322
12323 if (dump_file)
12324 {
12325 fprintf (dump_file, "split_treg_set_expr insn:\n");
12326 print_rtl (dump_file, i);
12327 fprintf (dump_file, "\n");
12328 }
12329
12330 /* If the insn is not found, we will try a negated form and append
12331 a nott. */
12332 bool append_nott = false;
12333
12334 /* We are going to invoke recog/split_insns in a re-entrant way and thus
12335 have to capture its current state and restore it afterwards. */
12336 recog_data_d prev_recog_data = recog_data;
12337
12338 if (negt_reg_operand (x, GET_MODE (x)))
12339 {
12340 /* This is a normal movt followed by a nott. It will be converted
12341 into a movrt after initial expansion. */
12342 XEXP (PATTERN (i), 1) = get_t_reg_rtx ();
12343 append_nott = true;
12344 }
12345 else
12346 {
12347 /* If the comparison op doesn't have a mode set, set it to SImode. */
12348 if (COMPARISON_P (x) && GET_MODE (x) == VOIDmode)
12349 PUT_MODE (x, SImode);
12350
12351 int insn_code = recog (PATTERN (i), i, 0);
12352
12353 if (insn_code < 0 && COMPARISON_P (x))
12354 {
12355 machine_mode cmp_mode = GET_MODE (XEXP (x, 0));
12356 if (cmp_mode == VOIDmode)
12357 cmp_mode = GET_MODE (XEXP (x, 1));
12358
12359 PUT_CODE (x, reverse_condition (GET_CODE (x)));
12360 insn_code = recog (PATTERN (i), i, 0);
12361 append_nott = true;
12362 }
12363
12364 gcc_assert (insn_code >= 0);
12365 }
12366
12367 /* Try to recursively split the insn. Some insns might refuse to split
12368 any further while we are in the treg_set_expr splitting phase. They
12369 will be emitted as part of the outer insn and then split again. */
12370 std::pair<rtx_insn*, rtx_insn*> insnlist =
12371 sh_try_split_insn_simple (i, curr_insn);
12372
12373 /* Restore recog state. */
12374 recog_data = prev_recog_data;
12375
12376 rtx_insn* nott_insn = sh_is_nott_insn (insnlist.second)
12377 ? insnlist.second
12378 : NULL;
12379 if (dump_file)
12380 {
12381 fprintf (dump_file, "split_treg_set_expr insnlist:\n");
12382 print_rtl (dump_file, insnlist.first);
12383 fprintf (dump_file, "\n");
12384
12385 if (nott_insn != NULL)
12386 fprintf (dump_file, "trailing nott insn %d\n", INSN_UID (nott_insn));
12387 }
12388
12389 emit_insn (insnlist.first);
12390
12391 if (nott_insn != NULL && append_nott)
12392 {
12393 if (dump_file)
12394 fprintf (dump_file, "removing trailing nott\n");
12395 remove_insn (nott_insn);
12396 nott_insn = NULL;
12397 append_nott = false;
12398 }
12399
12400 if (append_nott)
12401 nott_insn = emit_insn (gen_nott (get_t_reg_rtx ()));
12402
12403 rtx_insn* first_insn = get_insns ();
12404
12405 if (dump_file)
12406 {
12407 fprintf (dump_file, "resulting insns:\n");
12408 print_rtl (dump_file, first_insn);
12409 fprintf (dump_file, "\n");
12410 }
12411
12412 return sh_treg_insns (first_insn, nott_insn);
12413 }
12414
12415 /*------------------------------------------------------------------------------
12416 Mode switching support code.
12417 */
12418
12419 static void
12420 sh_emit_mode_set (int entity ATTRIBUTE_UNUSED, int mode,
12421 int prev_mode, HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
12422 {
12423 if ((TARGET_SH4A_FP || TARGET_SH4_300)
12424 && prev_mode != FP_MODE_NONE && prev_mode != mode)
12425 {
12426 emit_insn (gen_toggle_pr ());
12427 if (TARGET_FMOVD)
12428 emit_insn (gen_toggle_sz ());
12429 }
12430 else if (mode != FP_MODE_NONE)
12431 {
12432 rtx tmp = gen_reg_rtx (SImode);
12433 emit_insn (gen_sts_fpscr (tmp));
12434 rtx i = NULL;
12435
12436 const unsigned HOST_WIDE_INT fpbits =
12437 TARGET_FMOVD ? (FPSCR_PR | FPSCR_SZ) : FPSCR_PR;
12438
12439 if (prev_mode != FP_MODE_NONE && prev_mode != mode)
12440 i = gen_xorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits)));
12441 else if (mode == FP_MODE_SINGLE)
12442 i = gen_andsi3 (tmp, tmp, force_reg (SImode, GEN_INT (~fpbits)));
12443 else if (mode == FP_MODE_DOUBLE)
12444 i = gen_iorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits)));
12445 else
12446 gcc_unreachable ();
12447
12448 emit_insn (i);
12449 emit_insn (gen_lds_fpscr (tmp));
12450 }
12451 }
12452
12453 static int
12454 sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx_insn *insn)
12455 {
12456 return recog_memoized (insn) >= 0 ? get_attr_fp_mode (insn) : FP_MODE_NONE;
12457 }
12458
12459 static int
12460 sh_mode_after (int entity ATTRIBUTE_UNUSED, int mode, rtx_insn *insn)
12461 {
12462 if (TARGET_HITACHI && recog_memoized (insn) >= 0 &&
12463 get_attr_fp_set (insn) != FP_SET_NONE)
12464 return (int) get_attr_fp_set (insn);
12465 else
12466 return mode;
12467 }
12468
12469 static int
12470 sh_mode_entry (int entity ATTRIBUTE_UNUSED)
12471 {
12472 return NORMAL_MODE (entity);
12473 }
12474
12475 static int
12476 sh_mode_exit (int entity ATTRIBUTE_UNUSED)
12477 {
12478 return sh_cfun_attr_renesas_p () ? FP_MODE_NONE : NORMAL_MODE (entity);
12479 }
12480
12481 static int
12482 sh_mode_priority (int entity ATTRIBUTE_UNUSED, int n)
12483 {
12484 return ((TARGET_FPU_SINGLE != 0) ^ (n) ? FP_MODE_SINGLE : FP_MODE_DOUBLE);
12485 }
12486
12487 /*------------------------------------------------------------------------------
12488 Misc
12489 */
12490
12491 /* Return true if we use LRA instead of reload pass. */
12492 bool
12493 sh_lra_p (void)
12494 {
12495 return sh_lra_flag;
12496 }
12497
12498 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
12499
12500 static bool
12501 sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
12502 unsigned int align,
12503 enum by_pieces_operation op,
12504 bool speed_p)
12505 {
12506 switch (op)
12507 {
12508 case MOVE_BY_PIECES:
12509 return by_pieces_ninsns (size, align, MOVE_MAX_PIECES + 1, op)
12510 < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
12511 case STORE_BY_PIECES:
12512 case SET_BY_PIECES:
12513 return by_pieces_ninsns (size, align, STORE_MAX_PIECES + 1, op)
12514 < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
12515 default:
12516 return default_use_by_pieces_infrastructure_p (size, align,
12517 op, speed_p);
12518 }
12519 }
12520
12521 bool
12522 sh_cannot_force_const_mem_p (machine_mode mode ATTRIBUTE_UNUSED,
12523 rtx x ATTRIBUTE_UNUSED)
12524 {
12525 return TARGET_FDPIC;
12526 }
12527
12528 /* Emit insns to load the function address from FUNCDESC (an FDPIC
12529 function descriptor) into r1 and the GOT address into r12,
12530 returning an rtx for r1. */
12531
12532 rtx
12533 sh_load_function_descriptor (rtx funcdesc)
12534 {
12535 rtx r1 = gen_rtx_REG (Pmode, R1_REG);
12536 rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
12537 rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
12538 rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
12539
12540 emit_move_insn (r1, fnaddr);
12541 /* The ABI requires the entry point address to be loaded first, so
12542 prevent the load from being moved after that of the GOT
12543 address. */
12544 emit_insn (gen_blockage ());
12545 emit_move_insn (pic_reg, gotaddr);
12546 return r1;
12547 }
12548
12549 /* Return an rtx holding the initial value of the FDPIC register (the
12550 FDPIC pointer passed in from the caller). */
12551
12552 rtx
12553 sh_get_fdpic_reg_initial_val (void)
12554 {
12555 return get_hard_reg_initial_val (Pmode, PIC_REG);
12556 }
12557
12558 #include "gt-sh.h"