hppa: Install __sync libfuncs for linux.
[gcc.git] / gcc / config / pa / pa.c
1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
5 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "rtl.h"
28 #include "regs.h"
29 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "insn-attr.h"
33 #include "flags.h"
34 #include "tree.h"
35 #include "output.h"
36 #include "except.h"
37 #include "expr.h"
38 #include "optabs.h"
39 #include "reload.h"
40 #include "integrate.h"
41 #include "function.h"
42 #include "diagnostic-core.h"
43 #include "ggc.h"
44 #include "recog.h"
45 #include "predict.h"
46 #include "tm_p.h"
47 #include "target.h"
48 #include "common/common-target.h"
49 #include "target-def.h"
50 #include "langhooks.h"
51 #include "df.h"
52 #include "opts.h"
53
54 /* Return nonzero if there is a bypass for the output of
55 OUT_INSN and the fp store IN_INSN. */
56 int
57 pa_fpstore_bypass_p (rtx out_insn, rtx in_insn)
58 {
59 enum machine_mode store_mode;
60 enum machine_mode other_mode;
61 rtx set;
62
63 if (recog_memoized (in_insn) < 0
64 || (get_attr_type (in_insn) != TYPE_FPSTORE
65 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
66 || recog_memoized (out_insn) < 0)
67 return 0;
68
69 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
70
71 set = single_set (out_insn);
72 if (!set)
73 return 0;
74
75 other_mode = GET_MODE (SET_SRC (set));
76
77 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
78 }
79
80
81 #ifndef DO_FRAME_NOTES
82 #ifdef INCOMING_RETURN_ADDR_RTX
83 #define DO_FRAME_NOTES 1
84 #else
85 #define DO_FRAME_NOTES 0
86 #endif
87 #endif
88
89 static void pa_option_override (void);
90 static void copy_reg_pointer (rtx, rtx);
91 static void fix_range (const char *);
92 static int hppa_register_move_cost (enum machine_mode mode, reg_class_t,
93 reg_class_t);
94 static int hppa_address_cost (rtx, bool);
95 static bool hppa_rtx_costs (rtx, int, int, int, int *, bool);
96 static inline rtx force_mode (enum machine_mode, rtx);
97 static void pa_reorg (void);
98 static void pa_combine_instructions (void);
99 static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx);
100 static bool forward_branch_p (rtx);
101 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
102 static void compute_zdepdi_operands (unsigned HOST_WIDE_INT, unsigned *);
103 static int compute_movmem_length (rtx);
104 static int compute_clrmem_length (rtx);
105 static bool pa_assemble_integer (rtx, unsigned int, int);
106 static void remove_useless_addtr_insns (int);
107 static void store_reg (int, HOST_WIDE_INT, int);
108 static void store_reg_modify (int, int, HOST_WIDE_INT);
109 static void load_reg (int, HOST_WIDE_INT, int);
110 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
111 static rtx pa_function_value (const_tree, const_tree, bool);
112 static rtx pa_libcall_value (enum machine_mode, const_rtx);
113 static bool pa_function_value_regno_p (const unsigned int);
114 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
115 static void update_total_code_bytes (unsigned int);
116 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
117 static int pa_adjust_cost (rtx, rtx, rtx, int);
118 static int pa_adjust_priority (rtx, int);
119 static int pa_issue_rate (void);
120 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
121 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
122 ATTRIBUTE_UNUSED;
123 static void pa_encode_section_info (tree, rtx, int);
124 static const char *pa_strip_name_encoding (const char *);
125 static bool pa_function_ok_for_sibcall (tree, tree);
126 static void pa_globalize_label (FILE *, const char *)
127 ATTRIBUTE_UNUSED;
128 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
129 HOST_WIDE_INT, tree);
130 #if !defined(USE_COLLECT2)
131 static void pa_asm_out_constructor (rtx, int);
132 static void pa_asm_out_destructor (rtx, int);
133 #endif
134 static void pa_init_builtins (void);
135 static rtx pa_expand_builtin (tree, rtx, rtx, enum machine_mode mode, int);
136 static rtx hppa_builtin_saveregs (void);
137 static void hppa_va_start (tree, rtx);
138 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
139 static bool pa_scalar_mode_supported_p (enum machine_mode);
140 static bool pa_commutative_p (const_rtx x, int outer_code);
141 static void copy_fp_args (rtx) ATTRIBUTE_UNUSED;
142 static int length_fp_args (rtx) ATTRIBUTE_UNUSED;
143 static rtx hppa_legitimize_address (rtx, rtx, enum machine_mode);
144 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
145 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
146 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
147 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
148 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
149 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
150 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
151 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
152 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
153 static void output_deferred_plabels (void);
154 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
155 #ifdef ASM_OUTPUT_EXTERNAL_REAL
156 static void pa_hpux_file_end (void);
157 #endif
158 static void pa_init_libfuncs (void);
159 static rtx pa_struct_value_rtx (tree, int);
160 static bool pa_pass_by_reference (cumulative_args_t, enum machine_mode,
161 const_tree, bool);
162 static int pa_arg_partial_bytes (cumulative_args_t, enum machine_mode,
163 tree, bool);
164 static void pa_function_arg_advance (cumulative_args_t, enum machine_mode,
165 const_tree, bool);
166 static rtx pa_function_arg (cumulative_args_t, enum machine_mode,
167 const_tree, bool);
168 static unsigned int pa_function_arg_boundary (enum machine_mode, const_tree);
169 static struct machine_function * pa_init_machine_status (void);
170 static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
171 enum machine_mode,
172 secondary_reload_info *);
173 static void pa_extra_live_on_entry (bitmap);
174 static enum machine_mode pa_promote_function_mode (const_tree,
175 enum machine_mode, int *,
176 const_tree, int);
177
178 static void pa_asm_trampoline_template (FILE *);
179 static void pa_trampoline_init (rtx, tree, rtx);
180 static rtx pa_trampoline_adjust_address (rtx);
181 static rtx pa_delegitimize_address (rtx);
182 static bool pa_print_operand_punct_valid_p (unsigned char);
183 static rtx pa_internal_arg_pointer (void);
184 static bool pa_can_eliminate (const int, const int);
185 static void pa_conditional_register_usage (void);
186 static enum machine_mode pa_c_mode_for_suffix (char);
187 static section *pa_function_section (tree, enum node_frequency, bool, bool);
188 static bool pa_cannot_force_const_mem (enum machine_mode, rtx);
189 static bool pa_legitimate_constant_p (enum machine_mode, rtx);
190
191 /* The following extra sections are only used for SOM. */
192 static GTY(()) section *som_readonly_data_section;
193 static GTY(()) section *som_one_only_readonly_data_section;
194 static GTY(()) section *som_one_only_data_section;
195
196 /* Counts for the number of callee-saved general and floating point
197 registers which were saved by the current function's prologue. */
198 static int gr_saved, fr_saved;
199
200 /* Boolean indicating whether the return pointer was saved by the
201 current function's prologue. */
202 static bool rp_saved;
203
204 static rtx find_addr_reg (rtx);
205
206 /* Keep track of the number of bytes we have output in the CODE subspace
207 during this compilation so we'll know when to emit inline long-calls. */
208 unsigned long total_code_bytes;
209
210 /* The last address of the previous function plus the number of bytes in
211 associated thunks that have been output. This is used to determine if
212 a thunk can use an IA-relative branch to reach its target function. */
213 static unsigned int last_address;
214
215 /* Variables to handle plabels that we discover are necessary at assembly
216 output time. They are output after the current function. */
217 struct GTY(()) deferred_plabel
218 {
219 rtx internal_label;
220 rtx symbol;
221 };
222 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
223 deferred_plabels;
224 static size_t n_deferred_plabels = 0;
225 \f
226 /* Initialize the GCC target structure. */
227
228 #undef TARGET_OPTION_OVERRIDE
229 #define TARGET_OPTION_OVERRIDE pa_option_override
230
231 #undef TARGET_ASM_ALIGNED_HI_OP
232 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
233 #undef TARGET_ASM_ALIGNED_SI_OP
234 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
235 #undef TARGET_ASM_ALIGNED_DI_OP
236 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
237 #undef TARGET_ASM_UNALIGNED_HI_OP
238 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
239 #undef TARGET_ASM_UNALIGNED_SI_OP
240 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
241 #undef TARGET_ASM_UNALIGNED_DI_OP
242 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
243 #undef TARGET_ASM_INTEGER
244 #define TARGET_ASM_INTEGER pa_assemble_integer
245
246 #undef TARGET_ASM_FUNCTION_PROLOGUE
247 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
248 #undef TARGET_ASM_FUNCTION_EPILOGUE
249 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
250
251 #undef TARGET_FUNCTION_VALUE
252 #define TARGET_FUNCTION_VALUE pa_function_value
253 #undef TARGET_LIBCALL_VALUE
254 #define TARGET_LIBCALL_VALUE pa_libcall_value
255 #undef TARGET_FUNCTION_VALUE_REGNO_P
256 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
257
258 #undef TARGET_LEGITIMIZE_ADDRESS
259 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
260
261 #undef TARGET_SCHED_ADJUST_COST
262 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
263 #undef TARGET_SCHED_ADJUST_PRIORITY
264 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
265 #undef TARGET_SCHED_ISSUE_RATE
266 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
267
268 #undef TARGET_ENCODE_SECTION_INFO
269 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
270 #undef TARGET_STRIP_NAME_ENCODING
271 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
272
273 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
274 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
275
276 #undef TARGET_COMMUTATIVE_P
277 #define TARGET_COMMUTATIVE_P pa_commutative_p
278
279 #undef TARGET_ASM_OUTPUT_MI_THUNK
280 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
281 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
282 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
283
284 #undef TARGET_ASM_FILE_END
285 #ifdef ASM_OUTPUT_EXTERNAL_REAL
286 #define TARGET_ASM_FILE_END pa_hpux_file_end
287 #else
288 #define TARGET_ASM_FILE_END output_deferred_plabels
289 #endif
290
291 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
292 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
293
294 #if !defined(USE_COLLECT2)
295 #undef TARGET_ASM_CONSTRUCTOR
296 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
297 #undef TARGET_ASM_DESTRUCTOR
298 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
299 #endif
300
301 #undef TARGET_INIT_BUILTINS
302 #define TARGET_INIT_BUILTINS pa_init_builtins
303
304 #undef TARGET_EXPAND_BUILTIN
305 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
306
307 #undef TARGET_REGISTER_MOVE_COST
308 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
309 #undef TARGET_RTX_COSTS
310 #define TARGET_RTX_COSTS hppa_rtx_costs
311 #undef TARGET_ADDRESS_COST
312 #define TARGET_ADDRESS_COST hppa_address_cost
313
314 #undef TARGET_MACHINE_DEPENDENT_REORG
315 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
316
317 #undef TARGET_INIT_LIBFUNCS
318 #define TARGET_INIT_LIBFUNCS pa_init_libfuncs
319
320 #undef TARGET_PROMOTE_FUNCTION_MODE
321 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
322 #undef TARGET_PROMOTE_PROTOTYPES
323 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
324
325 #undef TARGET_STRUCT_VALUE_RTX
326 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
327 #undef TARGET_RETURN_IN_MEMORY
328 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
329 #undef TARGET_MUST_PASS_IN_STACK
330 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
331 #undef TARGET_PASS_BY_REFERENCE
332 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
333 #undef TARGET_CALLEE_COPIES
334 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
335 #undef TARGET_ARG_PARTIAL_BYTES
336 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
337 #undef TARGET_FUNCTION_ARG
338 #define TARGET_FUNCTION_ARG pa_function_arg
339 #undef TARGET_FUNCTION_ARG_ADVANCE
340 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
341 #undef TARGET_FUNCTION_ARG_BOUNDARY
342 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
343
344 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
345 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
346 #undef TARGET_EXPAND_BUILTIN_VA_START
347 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
348 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
349 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
350
351 #undef TARGET_SCALAR_MODE_SUPPORTED_P
352 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
353
354 #undef TARGET_CANNOT_FORCE_CONST_MEM
355 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
356
357 #undef TARGET_SECONDARY_RELOAD
358 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
359
360 #undef TARGET_EXTRA_LIVE_ON_ENTRY
361 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
362
363 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
364 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
365 #undef TARGET_TRAMPOLINE_INIT
366 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
367 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
368 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
369 #undef TARGET_DELEGITIMIZE_ADDRESS
370 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
371 #undef TARGET_INTERNAL_ARG_POINTER
372 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
373 #undef TARGET_CAN_ELIMINATE
374 #define TARGET_CAN_ELIMINATE pa_can_eliminate
375 #undef TARGET_CONDITIONAL_REGISTER_USAGE
376 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
377 #undef TARGET_C_MODE_FOR_SUFFIX
378 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
379 #undef TARGET_ASM_FUNCTION_SECTION
380 #define TARGET_ASM_FUNCTION_SECTION pa_function_section
381
382 #undef TARGET_LEGITIMATE_CONSTANT_P
383 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
384
385 struct gcc_target targetm = TARGET_INITIALIZER;
386 \f
387 /* Parse the -mfixed-range= option string. */
388
389 static void
390 fix_range (const char *const_str)
391 {
392 int i, first, last;
393 char *str, *dash, *comma;
394
395 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
396 REG2 are either register names or register numbers. The effect
397 of this option is to mark the registers in the range from REG1 to
398 REG2 as ``fixed'' so they won't be used by the compiler. This is
399 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
400
401 i = strlen (const_str);
402 str = (char *) alloca (i + 1);
403 memcpy (str, const_str, i + 1);
404
405 while (1)
406 {
407 dash = strchr (str, '-');
408 if (!dash)
409 {
410 warning (0, "value of -mfixed-range must have form REG1-REG2");
411 return;
412 }
413 *dash = '\0';
414
415 comma = strchr (dash + 1, ',');
416 if (comma)
417 *comma = '\0';
418
419 first = decode_reg_name (str);
420 if (first < 0)
421 {
422 warning (0, "unknown register name: %s", str);
423 return;
424 }
425
426 last = decode_reg_name (dash + 1);
427 if (last < 0)
428 {
429 warning (0, "unknown register name: %s", dash + 1);
430 return;
431 }
432
433 *dash = '-';
434
435 if (first > last)
436 {
437 warning (0, "%s-%s is an empty range", str, dash + 1);
438 return;
439 }
440
441 for (i = first; i <= last; ++i)
442 fixed_regs[i] = call_used_regs[i] = 1;
443
444 if (!comma)
445 break;
446
447 *comma = ',';
448 str = comma + 1;
449 }
450
451 /* Check if all floating point registers have been fixed. */
452 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
453 if (!fixed_regs[i])
454 break;
455
456 if (i > FP_REG_LAST)
457 target_flags |= MASK_DISABLE_FPREGS;
458 }
459
460 /* Implement the TARGET_OPTION_OVERRIDE hook. */
461
462 static void
463 pa_option_override (void)
464 {
465 unsigned int i;
466 cl_deferred_option *opt;
467 VEC(cl_deferred_option,heap) *vec
468 = (VEC(cl_deferred_option,heap) *) pa_deferred_options;
469
470 FOR_EACH_VEC_ELT (cl_deferred_option, vec, i, opt)
471 {
472 switch (opt->opt_index)
473 {
474 case OPT_mfixed_range_:
475 fix_range (opt->arg);
476 break;
477
478 default:
479 gcc_unreachable ();
480 }
481 }
482
483 /* Unconditional branches in the delay slot are not compatible with dwarf2
484 call frame information. There is no benefit in using this optimization
485 on PA8000 and later processors. */
486 if (pa_cpu >= PROCESSOR_8000
487 || (targetm_common.except_unwind_info (&global_options) == UI_DWARF2
488 && flag_exceptions)
489 || flag_unwind_tables)
490 target_flags &= ~MASK_JUMP_IN_DELAY;
491
492 if (flag_pic && TARGET_PORTABLE_RUNTIME)
493 {
494 warning (0, "PIC code generation is not supported in the portable runtime model");
495 }
496
497 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
498 {
499 warning (0, "PIC code generation is not compatible with fast indirect calls");
500 }
501
502 if (! TARGET_GAS && write_symbols != NO_DEBUG)
503 {
504 warning (0, "-g is only supported when using GAS on this processor,");
505 warning (0, "-g option disabled");
506 write_symbols = NO_DEBUG;
507 }
508
509 /* We only support the "big PIC" model now. And we always generate PIC
510 code when in 64bit mode. */
511 if (flag_pic == 1 || TARGET_64BIT)
512 flag_pic = 2;
513
514 /* Disable -freorder-blocks-and-partition as we don't support hot and
515 cold partitioning. */
516 if (flag_reorder_blocks_and_partition)
517 {
518 inform (input_location,
519 "-freorder-blocks-and-partition does not work "
520 "on this architecture");
521 flag_reorder_blocks_and_partition = 0;
522 flag_reorder_blocks = 1;
523 }
524
525 /* We can't guarantee that .dword is available for 32-bit targets. */
526 if (UNITS_PER_WORD == 4)
527 targetm.asm_out.aligned_op.di = NULL;
528
529 /* The unaligned ops are only available when using GAS. */
530 if (!TARGET_GAS)
531 {
532 targetm.asm_out.unaligned_op.hi = NULL;
533 targetm.asm_out.unaligned_op.si = NULL;
534 targetm.asm_out.unaligned_op.di = NULL;
535 }
536
537 init_machine_status = pa_init_machine_status;
538 }
539
540 enum pa_builtins
541 {
542 PA_BUILTIN_COPYSIGNQ,
543 PA_BUILTIN_FABSQ,
544 PA_BUILTIN_INFQ,
545 PA_BUILTIN_HUGE_VALQ,
546 PA_BUILTIN_max
547 };
548
549 static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
550
551 static void
552 pa_init_builtins (void)
553 {
554 #ifdef DONT_HAVE_FPUTC_UNLOCKED
555 {
556 tree decl = builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED);
557 set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED, decl,
558 builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED));
559 }
560 #endif
561 #if TARGET_HPUX_11
562 {
563 tree decl;
564
565 if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
566 set_user_assembler_name (decl, "_Isfinite");
567 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
568 set_user_assembler_name (decl, "_Isfinitef");
569 }
570 #endif
571
572 if (HPUX_LONG_DOUBLE_LIBRARY)
573 {
574 tree decl, ftype;
575
576 /* Under HPUX, the __float128 type is a synonym for "long double". */
577 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
578 "__float128");
579
580 /* TFmode support builtins. */
581 ftype = build_function_type_list (long_double_type_node,
582 long_double_type_node,
583 NULL_TREE);
584 decl = add_builtin_function ("__builtin_fabsq", ftype,
585 PA_BUILTIN_FABSQ, BUILT_IN_MD,
586 "_U_Qfabs", NULL_TREE);
587 TREE_READONLY (decl) = 1;
588 pa_builtins[PA_BUILTIN_FABSQ] = decl;
589
590 ftype = build_function_type_list (long_double_type_node,
591 long_double_type_node,
592 long_double_type_node,
593 NULL_TREE);
594 decl = add_builtin_function ("__builtin_copysignq", ftype,
595 PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
596 "_U_Qfcopysign", NULL_TREE);
597 TREE_READONLY (decl) = 1;
598 pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl;
599
600 ftype = build_function_type_list (long_double_type_node, NULL_TREE);
601 decl = add_builtin_function ("__builtin_infq", ftype,
602 PA_BUILTIN_INFQ, BUILT_IN_MD,
603 NULL, NULL_TREE);
604 pa_builtins[PA_BUILTIN_INFQ] = decl;
605
606 decl = add_builtin_function ("__builtin_huge_valq", ftype,
607 PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
608 NULL, NULL_TREE);
609 pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl;
610 }
611 }
612
613 static rtx
614 pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
615 enum machine_mode mode ATTRIBUTE_UNUSED,
616 int ignore ATTRIBUTE_UNUSED)
617 {
618 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
619 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
620
621 switch (fcode)
622 {
623 case PA_BUILTIN_FABSQ:
624 case PA_BUILTIN_COPYSIGNQ:
625 return expand_call (exp, target, ignore);
626
627 case PA_BUILTIN_INFQ:
628 case PA_BUILTIN_HUGE_VALQ:
629 {
630 enum machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
631 REAL_VALUE_TYPE inf;
632 rtx tmp;
633
634 real_inf (&inf);
635 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, target_mode);
636
637 tmp = validize_mem (force_const_mem (target_mode, tmp));
638
639 if (target == 0)
640 target = gen_reg_rtx (target_mode);
641
642 emit_move_insn (target, tmp);
643 return target;
644 }
645
646 default:
647 gcc_unreachable ();
648 }
649
650 return NULL_RTX;
651 }
652
653 /* Function to init struct machine_function.
654 This will be called, via a pointer variable,
655 from push_function_context. */
656
657 static struct machine_function *
658 pa_init_machine_status (void)
659 {
660 return ggc_alloc_cleared_machine_function ();
661 }
662
663 /* If FROM is a probable pointer register, mark TO as a probable
664 pointer register with the same pointer alignment as FROM. */
665
666 static void
667 copy_reg_pointer (rtx to, rtx from)
668 {
669 if (REG_POINTER (from))
670 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
671 }
672
673 /* Return 1 if X contains a symbolic expression. We know these
674 expressions will have one of a few well defined forms, so
675 we need only check those forms. */
676 int
677 pa_symbolic_expression_p (rtx x)
678 {
679
680 /* Strip off any HIGH. */
681 if (GET_CODE (x) == HIGH)
682 x = XEXP (x, 0);
683
684 return (symbolic_operand (x, VOIDmode));
685 }
686
687 /* Accept any constant that can be moved in one instruction into a
688 general register. */
689 int
690 pa_cint_ok_for_move (HOST_WIDE_INT ival)
691 {
692 /* OK if ldo, ldil, or zdepi, can be used. */
693 return (VAL_14_BITS_P (ival)
694 || pa_ldil_cint_p (ival)
695 || pa_zdepi_cint_p (ival));
696 }
697 \f
698 /* True iff ldil can be used to load this CONST_INT. The least
699 significant 11 bits of the value must be zero and the value must
700 not change sign when extended from 32 to 64 bits. */
701 int
702 pa_ldil_cint_p (HOST_WIDE_INT ival)
703 {
704 HOST_WIDE_INT x = ival & (((HOST_WIDE_INT) -1 << 31) | 0x7ff);
705
706 return x == 0 || x == ((HOST_WIDE_INT) -1 << 31);
707 }
708
709 /* True iff zdepi can be used to generate this CONST_INT.
710 zdepi first sign extends a 5-bit signed number to a given field
711 length, then places this field anywhere in a zero. */
712 int
713 pa_zdepi_cint_p (unsigned HOST_WIDE_INT x)
714 {
715 unsigned HOST_WIDE_INT lsb_mask, t;
716
717 /* This might not be obvious, but it's at least fast.
718 This function is critical; we don't have the time loops would take. */
719 lsb_mask = x & -x;
720 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
721 /* Return true iff t is a power of two. */
722 return ((t & (t - 1)) == 0);
723 }
724
725 /* True iff depi or extru can be used to compute (reg & mask).
726 Accept bit pattern like these:
727 0....01....1
728 1....10....0
729 1..10..01..1 */
730 int
731 pa_and_mask_p (unsigned HOST_WIDE_INT mask)
732 {
733 mask = ~mask;
734 mask += mask & -mask;
735 return (mask & (mask - 1)) == 0;
736 }
737
738 /* True iff depi can be used to compute (reg | MASK). */
739 int
740 pa_ior_mask_p (unsigned HOST_WIDE_INT mask)
741 {
742 mask += mask & -mask;
743 return (mask & (mask - 1)) == 0;
744 }
745 \f
746 /* Legitimize PIC addresses. If the address is already
747 position-independent, we return ORIG. Newly generated
748 position-independent addresses go to REG. If we need more
749 than one register, we lose. */
750
751 static rtx
752 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
753 {
754 rtx pic_ref = orig;
755
756 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
757
758 /* Labels need special handling. */
759 if (pic_label_operand (orig, mode))
760 {
761 rtx insn;
762
763 /* We do not want to go through the movXX expanders here since that
764 would create recursion.
765
766 Nor do we really want to call a generator for a named pattern
767 since that requires multiple patterns if we want to support
768 multiple word sizes.
769
770 So instead we just emit the raw set, which avoids the movXX
771 expanders completely. */
772 mark_reg_pointer (reg, BITS_PER_UNIT);
773 insn = emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
774
775 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
776 add_reg_note (insn, REG_EQUAL, orig);
777
778 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
779 and update LABEL_NUSES because this is not done automatically. */
780 if (reload_in_progress || reload_completed)
781 {
782 /* Extract LABEL_REF. */
783 if (GET_CODE (orig) == CONST)
784 orig = XEXP (XEXP (orig, 0), 0);
785 /* Extract CODE_LABEL. */
786 orig = XEXP (orig, 0);
787 add_reg_note (insn, REG_LABEL_OPERAND, orig);
788 LABEL_NUSES (orig)++;
789 }
790 crtl->uses_pic_offset_table = 1;
791 return reg;
792 }
793 if (GET_CODE (orig) == SYMBOL_REF)
794 {
795 rtx insn, tmp_reg;
796
797 gcc_assert (reg);
798
799 /* Before reload, allocate a temporary register for the intermediate
800 result. This allows the sequence to be deleted when the final
801 result is unused and the insns are trivially dead. */
802 tmp_reg = ((reload_in_progress || reload_completed)
803 ? reg : gen_reg_rtx (Pmode));
804
805 if (function_label_operand (orig, VOIDmode))
806 {
807 /* Force function label into memory in word mode. */
808 orig = XEXP (force_const_mem (word_mode, orig), 0);
809 /* Load plabel address from DLT. */
810 emit_move_insn (tmp_reg,
811 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
812 gen_rtx_HIGH (word_mode, orig)));
813 pic_ref
814 = gen_const_mem (Pmode,
815 gen_rtx_LO_SUM (Pmode, tmp_reg,
816 gen_rtx_UNSPEC (Pmode,
817 gen_rtvec (1, orig),
818 UNSPEC_DLTIND14R)));
819 emit_move_insn (reg, pic_ref);
820 /* Now load address of function descriptor. */
821 pic_ref = gen_rtx_MEM (Pmode, reg);
822 }
823 else
824 {
825 /* Load symbol reference from DLT. */
826 emit_move_insn (tmp_reg,
827 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
828 gen_rtx_HIGH (word_mode, orig)));
829 pic_ref
830 = gen_const_mem (Pmode,
831 gen_rtx_LO_SUM (Pmode, tmp_reg,
832 gen_rtx_UNSPEC (Pmode,
833 gen_rtvec (1, orig),
834 UNSPEC_DLTIND14R)));
835 }
836
837 crtl->uses_pic_offset_table = 1;
838 mark_reg_pointer (reg, BITS_PER_UNIT);
839 insn = emit_move_insn (reg, pic_ref);
840
841 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
842 set_unique_reg_note (insn, REG_EQUAL, orig);
843
844 return reg;
845 }
846 else if (GET_CODE (orig) == CONST)
847 {
848 rtx base;
849
850 if (GET_CODE (XEXP (orig, 0)) == PLUS
851 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
852 return orig;
853
854 gcc_assert (reg);
855 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
856
857 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
858 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
859 base == reg ? 0 : reg);
860
861 if (GET_CODE (orig) == CONST_INT)
862 {
863 if (INT_14_BITS (orig))
864 return plus_constant (base, INTVAL (orig));
865 orig = force_reg (Pmode, orig);
866 }
867 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
868 /* Likewise, should we set special REG_NOTEs here? */
869 }
870
871 return pic_ref;
872 }
873
874 static GTY(()) rtx gen_tls_tga;
875
876 static rtx
877 gen_tls_get_addr (void)
878 {
879 if (!gen_tls_tga)
880 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
881 return gen_tls_tga;
882 }
883
884 static rtx
885 hppa_tls_call (rtx arg)
886 {
887 rtx ret;
888
889 ret = gen_reg_rtx (Pmode);
890 emit_library_call_value (gen_tls_get_addr (), ret,
891 LCT_CONST, Pmode, 1, arg, Pmode);
892
893 return ret;
894 }
895
896 static rtx
897 legitimize_tls_address (rtx addr)
898 {
899 rtx ret, insn, tmp, t1, t2, tp;
900 enum tls_model model = SYMBOL_REF_TLS_MODEL (addr);
901
902 switch (model)
903 {
904 case TLS_MODEL_GLOBAL_DYNAMIC:
905 tmp = gen_reg_rtx (Pmode);
906 if (flag_pic)
907 emit_insn (gen_tgd_load_pic (tmp, addr));
908 else
909 emit_insn (gen_tgd_load (tmp, addr));
910 ret = hppa_tls_call (tmp);
911 break;
912
913 case TLS_MODEL_LOCAL_DYNAMIC:
914 ret = gen_reg_rtx (Pmode);
915 tmp = gen_reg_rtx (Pmode);
916 start_sequence ();
917 if (flag_pic)
918 emit_insn (gen_tld_load_pic (tmp, addr));
919 else
920 emit_insn (gen_tld_load (tmp, addr));
921 t1 = hppa_tls_call (tmp);
922 insn = get_insns ();
923 end_sequence ();
924 t2 = gen_reg_rtx (Pmode);
925 emit_libcall_block (insn, t2, t1,
926 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
927 UNSPEC_TLSLDBASE));
928 emit_insn (gen_tld_offset_load (ret, addr, t2));
929 break;
930
931 case TLS_MODEL_INITIAL_EXEC:
932 tp = gen_reg_rtx (Pmode);
933 tmp = gen_reg_rtx (Pmode);
934 ret = gen_reg_rtx (Pmode);
935 emit_insn (gen_tp_load (tp));
936 if (flag_pic)
937 emit_insn (gen_tie_load_pic (tmp, addr));
938 else
939 emit_insn (gen_tie_load (tmp, addr));
940 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
941 break;
942
943 case TLS_MODEL_LOCAL_EXEC:
944 tp = gen_reg_rtx (Pmode);
945 ret = gen_reg_rtx (Pmode);
946 emit_insn (gen_tp_load (tp));
947 emit_insn (gen_tle_load (ret, addr, tp));
948 break;
949
950 default:
951 gcc_unreachable ();
952 }
953
954 return ret;
955 }
956
957 /* Try machine-dependent ways of modifying an illegitimate address
958 to be legitimate. If we find one, return the new, valid address.
959 This macro is used in only one place: `memory_address' in explow.c.
960
961 OLDX is the address as it was before break_out_memory_refs was called.
962 In some cases it is useful to look at this to decide what needs to be done.
963
964 It is always safe for this macro to do nothing. It exists to recognize
965 opportunities to optimize the output.
966
967 For the PA, transform:
968
969 memory(X + <large int>)
970
971 into:
972
973 if (<large int> & mask) >= 16
974 Y = (<large int> & ~mask) + mask + 1 Round up.
975 else
976 Y = (<large int> & ~mask) Round down.
977 Z = X + Y
978 memory (Z + (<large int> - Y));
979
980 This is for CSE to find several similar references, and only use one Z.
981
982 X can either be a SYMBOL_REF or REG, but because combine cannot
983 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
984 D will not fit in 14 bits.
985
986 MODE_FLOAT references allow displacements which fit in 5 bits, so use
987 0x1f as the mask.
988
989 MODE_INT references allow displacements which fit in 14 bits, so use
990 0x3fff as the mask.
991
992 This relies on the fact that most mode MODE_FLOAT references will use FP
993 registers and most mode MODE_INT references will use integer registers.
994 (In the rare case of an FP register used in an integer MODE, we depend
995 on secondary reloads to clean things up.)
996
997
998 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
999 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
1000 addressing modes to be used).
1001
1002 Put X and Z into registers. Then put the entire expression into
1003 a register. */
1004
1005 rtx
1006 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1007 enum machine_mode mode)
1008 {
1009 rtx orig = x;
1010
1011 /* We need to canonicalize the order of operands in unscaled indexed
1012 addresses since the code that checks if an address is valid doesn't
1013 always try both orders. */
1014 if (!TARGET_NO_SPACE_REGS
1015 && GET_CODE (x) == PLUS
1016 && GET_MODE (x) == Pmode
1017 && REG_P (XEXP (x, 0))
1018 && REG_P (XEXP (x, 1))
1019 && REG_POINTER (XEXP (x, 0))
1020 && !REG_POINTER (XEXP (x, 1)))
1021 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1022
1023 if (PA_SYMBOL_REF_TLS_P (x))
1024 return legitimize_tls_address (x);
1025 else if (flag_pic)
1026 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1027
1028 /* Strip off CONST. */
1029 if (GET_CODE (x) == CONST)
1030 x = XEXP (x, 0);
1031
1032 /* Special case. Get the SYMBOL_REF into a register and use indexing.
1033 That should always be safe. */
1034 if (GET_CODE (x) == PLUS
1035 && GET_CODE (XEXP (x, 0)) == REG
1036 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1037 {
1038 rtx reg = force_reg (Pmode, XEXP (x, 1));
1039 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1040 }
1041
1042 /* Note we must reject symbols which represent function addresses
1043 since the assembler/linker can't handle arithmetic on plabels. */
1044 if (GET_CODE (x) == PLUS
1045 && GET_CODE (XEXP (x, 1)) == CONST_INT
1046 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1047 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1048 || GET_CODE (XEXP (x, 0)) == REG))
1049 {
1050 rtx int_part, ptr_reg;
1051 int newoffset;
1052 int offset = INTVAL (XEXP (x, 1));
1053 int mask;
1054
1055 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1056 ? (INT14_OK_STRICT ? 0x3fff : 0x1f) : 0x3fff);
1057
1058 /* Choose which way to round the offset. Round up if we
1059 are >= halfway to the next boundary. */
1060 if ((offset & mask) >= ((mask + 1) / 2))
1061 newoffset = (offset & ~ mask) + mask + 1;
1062 else
1063 newoffset = (offset & ~ mask);
1064
1065 /* If the newoffset will not fit in 14 bits (ldo), then
1066 handling this would take 4 or 5 instructions (2 to load
1067 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1068 add the new offset and the SYMBOL_REF.) Combine can
1069 not handle 4->2 or 5->2 combinations, so do not create
1070 them. */
1071 if (! VAL_14_BITS_P (newoffset)
1072 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1073 {
1074 rtx const_part = plus_constant (XEXP (x, 0), newoffset);
1075 rtx tmp_reg
1076 = force_reg (Pmode,
1077 gen_rtx_HIGH (Pmode, const_part));
1078 ptr_reg
1079 = force_reg (Pmode,
1080 gen_rtx_LO_SUM (Pmode,
1081 tmp_reg, const_part));
1082 }
1083 else
1084 {
1085 if (! VAL_14_BITS_P (newoffset))
1086 int_part = force_reg (Pmode, GEN_INT (newoffset));
1087 else
1088 int_part = GEN_INT (newoffset);
1089
1090 ptr_reg = force_reg (Pmode,
1091 gen_rtx_PLUS (Pmode,
1092 force_reg (Pmode, XEXP (x, 0)),
1093 int_part));
1094 }
1095 return plus_constant (ptr_reg, offset - newoffset);
1096 }
1097
1098 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
1099
1100 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
1101 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1102 && pa_shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
1103 && (OBJECT_P (XEXP (x, 1))
1104 || GET_CODE (XEXP (x, 1)) == SUBREG)
1105 && GET_CODE (XEXP (x, 1)) != CONST)
1106 {
1107 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1108 rtx reg1, reg2;
1109
1110 reg1 = XEXP (x, 1);
1111 if (GET_CODE (reg1) != REG)
1112 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1113
1114 reg2 = XEXP (XEXP (x, 0), 0);
1115 if (GET_CODE (reg2) != REG)
1116 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1117
1118 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1119 gen_rtx_MULT (Pmode,
1120 reg2,
1121 GEN_INT (val)),
1122 reg1));
1123 }
1124
1125 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
1126
1127 Only do so for floating point modes since this is more speculative
1128 and we lose if it's an integer store. */
1129 if (GET_CODE (x) == PLUS
1130 && GET_CODE (XEXP (x, 0)) == PLUS
1131 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
1132 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
1133 && pa_shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
1134 && (mode == SFmode || mode == DFmode))
1135 {
1136
1137 /* First, try and figure out what to use as a base register. */
1138 rtx reg1, reg2, base, idx;
1139
1140 reg1 = XEXP (XEXP (x, 0), 1);
1141 reg2 = XEXP (x, 1);
1142 base = NULL_RTX;
1143 idx = NULL_RTX;
1144
1145 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1146 then pa_emit_move_sequence will turn on REG_POINTER so we'll know
1147 it's a base register below. */
1148 if (GET_CODE (reg1) != REG)
1149 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1150
1151 if (GET_CODE (reg2) != REG)
1152 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1153
1154 /* Figure out what the base and index are. */
1155
1156 if (GET_CODE (reg1) == REG
1157 && REG_POINTER (reg1))
1158 {
1159 base = reg1;
1160 idx = gen_rtx_PLUS (Pmode,
1161 gen_rtx_MULT (Pmode,
1162 XEXP (XEXP (XEXP (x, 0), 0), 0),
1163 XEXP (XEXP (XEXP (x, 0), 0), 1)),
1164 XEXP (x, 1));
1165 }
1166 else if (GET_CODE (reg2) == REG
1167 && REG_POINTER (reg2))
1168 {
1169 base = reg2;
1170 idx = XEXP (x, 0);
1171 }
1172
1173 if (base == 0)
1174 return orig;
1175
1176 /* If the index adds a large constant, try to scale the
1177 constant so that it can be loaded with only one insn. */
1178 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1179 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1180 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1181 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1182 {
1183 /* Divide the CONST_INT by the scale factor, then add it to A. */
1184 int val = INTVAL (XEXP (idx, 1));
1185
1186 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
1187 reg1 = XEXP (XEXP (idx, 0), 0);
1188 if (GET_CODE (reg1) != REG)
1189 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1190
1191 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1192
1193 /* We can now generate a simple scaled indexed address. */
1194 return
1195 force_reg
1196 (Pmode, gen_rtx_PLUS (Pmode,
1197 gen_rtx_MULT (Pmode, reg1,
1198 XEXP (XEXP (idx, 0), 1)),
1199 base));
1200 }
1201
1202 /* If B + C is still a valid base register, then add them. */
1203 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1204 && INTVAL (XEXP (idx, 1)) <= 4096
1205 && INTVAL (XEXP (idx, 1)) >= -4096)
1206 {
1207 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
1208 rtx reg1, reg2;
1209
1210 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1211
1212 reg2 = XEXP (XEXP (idx, 0), 0);
1213 if (GET_CODE (reg2) != CONST_INT)
1214 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1215
1216 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1217 gen_rtx_MULT (Pmode,
1218 reg2,
1219 GEN_INT (val)),
1220 reg1));
1221 }
1222
1223 /* Get the index into a register, then add the base + index and
1224 return a register holding the result. */
1225
1226 /* First get A into a register. */
1227 reg1 = XEXP (XEXP (idx, 0), 0);
1228 if (GET_CODE (reg1) != REG)
1229 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1230
1231 /* And get B into a register. */
1232 reg2 = XEXP (idx, 1);
1233 if (GET_CODE (reg2) != REG)
1234 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1235
1236 reg1 = force_reg (Pmode,
1237 gen_rtx_PLUS (Pmode,
1238 gen_rtx_MULT (Pmode, reg1,
1239 XEXP (XEXP (idx, 0), 1)),
1240 reg2));
1241
1242 /* Add the result to our base register and return. */
1243 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1244
1245 }
1246
1247 /* Uh-oh. We might have an address for x[n-100000]. This needs
1248 special handling to avoid creating an indexed memory address
1249 with x-100000 as the base.
1250
1251 If the constant part is small enough, then it's still safe because
1252 there is a guard page at the beginning and end of the data segment.
1253
1254 Scaled references are common enough that we want to try and rearrange the
1255 terms so that we can use indexing for these addresses too. Only
1256 do the optimization for floatint point modes. */
1257
1258 if (GET_CODE (x) == PLUS
1259 && pa_symbolic_expression_p (XEXP (x, 1)))
1260 {
1261 /* Ugly. We modify things here so that the address offset specified
1262 by the index expression is computed first, then added to x to form
1263 the entire address. */
1264
1265 rtx regx1, regx2, regy1, regy2, y;
1266
1267 /* Strip off any CONST. */
1268 y = XEXP (x, 1);
1269 if (GET_CODE (y) == CONST)
1270 y = XEXP (y, 0);
1271
1272 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1273 {
1274 /* See if this looks like
1275 (plus (mult (reg) (shadd_const))
1276 (const (plus (symbol_ref) (const_int))))
1277
1278 Where const_int is small. In that case the const
1279 expression is a valid pointer for indexing.
1280
1281 If const_int is big, but can be divided evenly by shadd_const
1282 and added to (reg). This allows more scaled indexed addresses. */
1283 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1284 && GET_CODE (XEXP (x, 0)) == MULT
1285 && GET_CODE (XEXP (y, 1)) == CONST_INT
1286 && INTVAL (XEXP (y, 1)) >= -4096
1287 && INTVAL (XEXP (y, 1)) <= 4095
1288 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1289 && pa_shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1290 {
1291 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1292 rtx reg1, reg2;
1293
1294 reg1 = XEXP (x, 1);
1295 if (GET_CODE (reg1) != REG)
1296 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1297
1298 reg2 = XEXP (XEXP (x, 0), 0);
1299 if (GET_CODE (reg2) != REG)
1300 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1301
1302 return force_reg (Pmode,
1303 gen_rtx_PLUS (Pmode,
1304 gen_rtx_MULT (Pmode,
1305 reg2,
1306 GEN_INT (val)),
1307 reg1));
1308 }
1309 else if ((mode == DFmode || mode == SFmode)
1310 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1311 && GET_CODE (XEXP (x, 0)) == MULT
1312 && GET_CODE (XEXP (y, 1)) == CONST_INT
1313 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1314 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1315 && pa_shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1316 {
1317 regx1
1318 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1319 / INTVAL (XEXP (XEXP (x, 0), 1))));
1320 regx2 = XEXP (XEXP (x, 0), 0);
1321 if (GET_CODE (regx2) != REG)
1322 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1323 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1324 regx2, regx1));
1325 return
1326 force_reg (Pmode,
1327 gen_rtx_PLUS (Pmode,
1328 gen_rtx_MULT (Pmode, regx2,
1329 XEXP (XEXP (x, 0), 1)),
1330 force_reg (Pmode, XEXP (y, 0))));
1331 }
1332 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1333 && INTVAL (XEXP (y, 1)) >= -4096
1334 && INTVAL (XEXP (y, 1)) <= 4095)
1335 {
1336 /* This is safe because of the guard page at the
1337 beginning and end of the data space. Just
1338 return the original address. */
1339 return orig;
1340 }
1341 else
1342 {
1343 /* Doesn't look like one we can optimize. */
1344 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1345 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1346 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1347 regx1 = force_reg (Pmode,
1348 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1349 regx1, regy2));
1350 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1351 }
1352 }
1353 }
1354
1355 return orig;
1356 }
1357
1358 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1359
1360 Compute extra cost of moving data between one register class
1361 and another.
1362
1363 Make moves from SAR so expensive they should never happen. We used to
1364 have 0xffff here, but that generates overflow in rare cases.
1365
1366 Copies involving a FP register and a non-FP register are relatively
1367 expensive because they must go through memory.
1368
1369 Other copies are reasonably cheap. */
1370
1371 static int
1372 hppa_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
1373 reg_class_t from, reg_class_t to)
1374 {
1375 if (from == SHIFT_REGS)
1376 return 0x100;
1377 else if (to == SHIFT_REGS && FP_REG_CLASS_P (from))
1378 return 18;
1379 else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
1380 || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
1381 return 16;
1382 else
1383 return 2;
1384 }
1385
1386 /* For the HPPA, REG and REG+CONST is cost 0
1387 and addresses involving symbolic constants are cost 2.
1388
1389 PIC addresses are very expensive.
1390
1391 It is no coincidence that this has the same structure
1392 as GO_IF_LEGITIMATE_ADDRESS. */
1393
1394 static int
1395 hppa_address_cost (rtx X,
1396 bool speed ATTRIBUTE_UNUSED)
1397 {
1398 switch (GET_CODE (X))
1399 {
1400 case REG:
1401 case PLUS:
1402 case LO_SUM:
1403 return 1;
1404 case HIGH:
1405 return 2;
1406 default:
1407 return 4;
1408 }
1409 }
1410
1411 /* Compute a (partial) cost for rtx X. Return true if the complete
1412 cost has been computed, and false if subexpressions should be
1413 scanned. In either case, *TOTAL contains the cost result. */
1414
1415 static bool
1416 hppa_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
1417 int *total, bool speed ATTRIBUTE_UNUSED)
1418 {
1419 switch (code)
1420 {
1421 case CONST_INT:
1422 if (INTVAL (x) == 0)
1423 *total = 0;
1424 else if (INT_14_BITS (x))
1425 *total = 1;
1426 else
1427 *total = 2;
1428 return true;
1429
1430 case HIGH:
1431 *total = 2;
1432 return true;
1433
1434 case CONST:
1435 case LABEL_REF:
1436 case SYMBOL_REF:
1437 *total = 4;
1438 return true;
1439
1440 case CONST_DOUBLE:
1441 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1442 && outer_code != SET)
1443 *total = 0;
1444 else
1445 *total = 8;
1446 return true;
1447
1448 case MULT:
1449 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1450 *total = COSTS_N_INSNS (3);
1451 else if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1452 *total = COSTS_N_INSNS (8);
1453 else
1454 *total = COSTS_N_INSNS (20);
1455 return true;
1456
1457 case DIV:
1458 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1459 {
1460 *total = COSTS_N_INSNS (14);
1461 return true;
1462 }
1463 /* FALLTHRU */
1464
1465 case UDIV:
1466 case MOD:
1467 case UMOD:
1468 *total = COSTS_N_INSNS (60);
1469 return true;
1470
1471 case PLUS: /* this includes shNadd insns */
1472 case MINUS:
1473 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1474 *total = COSTS_N_INSNS (3);
1475 else
1476 *total = COSTS_N_INSNS (1);
1477 return true;
1478
1479 case ASHIFT:
1480 case ASHIFTRT:
1481 case LSHIFTRT:
1482 *total = COSTS_N_INSNS (1);
1483 return true;
1484
1485 default:
1486 return false;
1487 }
1488 }
1489
1490 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1491 new rtx with the correct mode. */
1492 static inline rtx
1493 force_mode (enum machine_mode mode, rtx orig)
1494 {
1495 if (mode == GET_MODE (orig))
1496 return orig;
1497
1498 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1499
1500 return gen_rtx_REG (mode, REGNO (orig));
1501 }
1502
1503 /* Return 1 if *X is a thread-local symbol. */
1504
1505 static int
1506 pa_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
1507 {
1508 return PA_SYMBOL_REF_TLS_P (*x);
1509 }
1510
1511 /* Return 1 if X contains a thread-local symbol. */
1512
1513 bool
1514 pa_tls_referenced_p (rtx x)
1515 {
1516 if (!TARGET_HAVE_TLS)
1517 return false;
1518
1519 return for_each_rtx (&x, &pa_tls_symbol_ref_1, 0);
1520 }
1521
1522 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
1523
1524 static bool
1525 pa_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1526 {
1527 return pa_tls_referenced_p (x);
1528 }
1529
1530 /* Emit insns to move operands[1] into operands[0].
1531
1532 Return 1 if we have written out everything that needs to be done to
1533 do the move. Otherwise, return 0 and the caller will emit the move
1534 normally.
1535
1536 Note SCRATCH_REG may not be in the proper mode depending on how it
1537 will be used. This routine is responsible for creating a new copy
1538 of SCRATCH_REG in the proper mode. */
1539
1540 int
1541 pa_emit_move_sequence (rtx *operands, enum machine_mode mode, rtx scratch_reg)
1542 {
1543 register rtx operand0 = operands[0];
1544 register rtx operand1 = operands[1];
1545 register rtx tem;
1546
1547 /* We can only handle indexed addresses in the destination operand
1548 of floating point stores. Thus, we need to break out indexed
1549 addresses from the destination operand. */
1550 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1551 {
1552 gcc_assert (can_create_pseudo_p ());
1553
1554 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1555 operand0 = replace_equiv_address (operand0, tem);
1556 }
1557
1558 /* On targets with non-equivalent space registers, break out unscaled
1559 indexed addresses from the source operand before the final CSE.
1560 We have to do this because the REG_POINTER flag is not correctly
1561 carried through various optimization passes and CSE may substitute
1562 a pseudo without the pointer set for one with the pointer set. As
1563 a result, we loose various opportunities to create insns with
1564 unscaled indexed addresses. */
1565 if (!TARGET_NO_SPACE_REGS
1566 && !cse_not_expected
1567 && GET_CODE (operand1) == MEM
1568 && GET_CODE (XEXP (operand1, 0)) == PLUS
1569 && REG_P (XEXP (XEXP (operand1, 0), 0))
1570 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1571 operand1
1572 = replace_equiv_address (operand1,
1573 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1574
1575 if (scratch_reg
1576 && reload_in_progress && GET_CODE (operand0) == REG
1577 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1578 operand0 = reg_equiv_mem (REGNO (operand0));
1579 else if (scratch_reg
1580 && reload_in_progress && GET_CODE (operand0) == SUBREG
1581 && GET_CODE (SUBREG_REG (operand0)) == REG
1582 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1583 {
1584 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1585 the code which tracks sets/uses for delete_output_reload. */
1586 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1587 reg_equiv_mem (REGNO (SUBREG_REG (operand0))),
1588 SUBREG_BYTE (operand0));
1589 operand0 = alter_subreg (&temp);
1590 }
1591
1592 if (scratch_reg
1593 && reload_in_progress && GET_CODE (operand1) == REG
1594 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1595 operand1 = reg_equiv_mem (REGNO (operand1));
1596 else if (scratch_reg
1597 && reload_in_progress && GET_CODE (operand1) == SUBREG
1598 && GET_CODE (SUBREG_REG (operand1)) == REG
1599 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1600 {
1601 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1602 the code which tracks sets/uses for delete_output_reload. */
1603 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1604 reg_equiv_mem (REGNO (SUBREG_REG (operand1))),
1605 SUBREG_BYTE (operand1));
1606 operand1 = alter_subreg (&temp);
1607 }
1608
1609 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1610 && ((tem = find_replacement (&XEXP (operand0, 0)))
1611 != XEXP (operand0, 0)))
1612 operand0 = replace_equiv_address (operand0, tem);
1613
1614 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1615 && ((tem = find_replacement (&XEXP (operand1, 0)))
1616 != XEXP (operand1, 0)))
1617 operand1 = replace_equiv_address (operand1, tem);
1618
1619 /* Handle secondary reloads for loads/stores of FP registers from
1620 REG+D addresses where D does not fit in 5 or 14 bits, including
1621 (subreg (mem (addr))) cases. */
1622 if (scratch_reg
1623 && fp_reg_operand (operand0, mode)
1624 && ((GET_CODE (operand1) == MEM
1625 && !memory_address_p ((GET_MODE_SIZE (mode) == 4 ? SFmode : DFmode),
1626 XEXP (operand1, 0)))
1627 || ((GET_CODE (operand1) == SUBREG
1628 && GET_CODE (XEXP (operand1, 0)) == MEM
1629 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1630 ? SFmode : DFmode),
1631 XEXP (XEXP (operand1, 0), 0))))))
1632 {
1633 if (GET_CODE (operand1) == SUBREG)
1634 operand1 = XEXP (operand1, 0);
1635
1636 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1637 it in WORD_MODE regardless of what mode it was originally given
1638 to us. */
1639 scratch_reg = force_mode (word_mode, scratch_reg);
1640
1641 /* D might not fit in 14 bits either; for such cases load D into
1642 scratch reg. */
1643 if (!memory_address_p (Pmode, XEXP (operand1, 0)))
1644 {
1645 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1646 emit_move_insn (scratch_reg,
1647 gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1648 Pmode,
1649 XEXP (XEXP (operand1, 0), 0),
1650 scratch_reg));
1651 }
1652 else
1653 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1654 emit_insn (gen_rtx_SET (VOIDmode, operand0,
1655 replace_equiv_address (operand1, scratch_reg)));
1656 return 1;
1657 }
1658 else if (scratch_reg
1659 && fp_reg_operand (operand1, mode)
1660 && ((GET_CODE (operand0) == MEM
1661 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1662 ? SFmode : DFmode),
1663 XEXP (operand0, 0)))
1664 || ((GET_CODE (operand0) == SUBREG)
1665 && GET_CODE (XEXP (operand0, 0)) == MEM
1666 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1667 ? SFmode : DFmode),
1668 XEXP (XEXP (operand0, 0), 0)))))
1669 {
1670 if (GET_CODE (operand0) == SUBREG)
1671 operand0 = XEXP (operand0, 0);
1672
1673 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1674 it in WORD_MODE regardless of what mode it was originally given
1675 to us. */
1676 scratch_reg = force_mode (word_mode, scratch_reg);
1677
1678 /* D might not fit in 14 bits either; for such cases load D into
1679 scratch reg. */
1680 if (!memory_address_p (Pmode, XEXP (operand0, 0)))
1681 {
1682 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1683 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1684 0)),
1685 Pmode,
1686 XEXP (XEXP (operand0, 0),
1687 0),
1688 scratch_reg));
1689 }
1690 else
1691 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1692 emit_insn (gen_rtx_SET (VOIDmode,
1693 replace_equiv_address (operand0, scratch_reg),
1694 operand1));
1695 return 1;
1696 }
1697 /* Handle secondary reloads for loads of FP registers from constant
1698 expressions by forcing the constant into memory.
1699
1700 Use scratch_reg to hold the address of the memory location.
1701
1702 The proper fix is to change TARGET_PREFERRED_RELOAD_CLASS to return
1703 NO_REGS when presented with a const_int and a register class
1704 containing only FP registers. Doing so unfortunately creates
1705 more problems than it solves. Fix this for 2.5. */
1706 else if (scratch_reg
1707 && CONSTANT_P (operand1)
1708 && fp_reg_operand (operand0, mode))
1709 {
1710 rtx const_mem, xoperands[2];
1711
1712 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1713 it in WORD_MODE regardless of what mode it was originally given
1714 to us. */
1715 scratch_reg = force_mode (word_mode, scratch_reg);
1716
1717 /* Force the constant into memory and put the address of the
1718 memory location into scratch_reg. */
1719 const_mem = force_const_mem (mode, operand1);
1720 xoperands[0] = scratch_reg;
1721 xoperands[1] = XEXP (const_mem, 0);
1722 pa_emit_move_sequence (xoperands, Pmode, 0);
1723
1724 /* Now load the destination register. */
1725 emit_insn (gen_rtx_SET (mode, operand0,
1726 replace_equiv_address (const_mem, scratch_reg)));
1727 return 1;
1728 }
1729 /* Handle secondary reloads for SAR. These occur when trying to load
1730 the SAR from memory or a constant. */
1731 else if (scratch_reg
1732 && GET_CODE (operand0) == REG
1733 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1734 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1735 && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT))
1736 {
1737 /* D might not fit in 14 bits either; for such cases load D into
1738 scratch reg. */
1739 if (GET_CODE (operand1) == MEM
1740 && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
1741 {
1742 /* We are reloading the address into the scratch register, so we
1743 want to make sure the scratch register is a full register. */
1744 scratch_reg = force_mode (word_mode, scratch_reg);
1745
1746 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1747 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1748 0)),
1749 Pmode,
1750 XEXP (XEXP (operand1, 0),
1751 0),
1752 scratch_reg));
1753
1754 /* Now we are going to load the scratch register from memory,
1755 we want to load it in the same width as the original MEM,
1756 which must be the same as the width of the ultimate destination,
1757 OPERAND0. */
1758 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1759
1760 emit_move_insn (scratch_reg,
1761 replace_equiv_address (operand1, scratch_reg));
1762 }
1763 else
1764 {
1765 /* We want to load the scratch register using the same mode as
1766 the ultimate destination. */
1767 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1768
1769 emit_move_insn (scratch_reg, operand1);
1770 }
1771
1772 /* And emit the insn to set the ultimate destination. We know that
1773 the scratch register has the same mode as the destination at this
1774 point. */
1775 emit_move_insn (operand0, scratch_reg);
1776 return 1;
1777 }
1778 /* Handle the most common case: storing into a register. */
1779 else if (register_operand (operand0, mode))
1780 {
1781 /* Legitimize TLS symbol references. This happens for references
1782 that aren't a legitimate constant. */
1783 if (PA_SYMBOL_REF_TLS_P (operand1))
1784 operand1 = legitimize_tls_address (operand1);
1785
1786 if (register_operand (operand1, mode)
1787 || (GET_CODE (operand1) == CONST_INT
1788 && pa_cint_ok_for_move (INTVAL (operand1)))
1789 || (operand1 == CONST0_RTX (mode))
1790 || (GET_CODE (operand1) == HIGH
1791 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1792 /* Only `general_operands' can come here, so MEM is ok. */
1793 || GET_CODE (operand1) == MEM)
1794 {
1795 /* Various sets are created during RTL generation which don't
1796 have the REG_POINTER flag correctly set. After the CSE pass,
1797 instruction recognition can fail if we don't consistently
1798 set this flag when performing register copies. This should
1799 also improve the opportunities for creating insns that use
1800 unscaled indexing. */
1801 if (REG_P (operand0) && REG_P (operand1))
1802 {
1803 if (REG_POINTER (operand1)
1804 && !REG_POINTER (operand0)
1805 && !HARD_REGISTER_P (operand0))
1806 copy_reg_pointer (operand0, operand1);
1807 }
1808
1809 /* When MEMs are broken out, the REG_POINTER flag doesn't
1810 get set. In some cases, we can set the REG_POINTER flag
1811 from the declaration for the MEM. */
1812 if (REG_P (operand0)
1813 && GET_CODE (operand1) == MEM
1814 && !REG_POINTER (operand0))
1815 {
1816 tree decl = MEM_EXPR (operand1);
1817
1818 /* Set the register pointer flag and register alignment
1819 if the declaration for this memory reference is a
1820 pointer type. */
1821 if (decl)
1822 {
1823 tree type;
1824
1825 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1826 tree operand 1. */
1827 if (TREE_CODE (decl) == COMPONENT_REF)
1828 decl = TREE_OPERAND (decl, 1);
1829
1830 type = TREE_TYPE (decl);
1831 type = strip_array_types (type);
1832
1833 if (POINTER_TYPE_P (type))
1834 {
1835 int align;
1836
1837 type = TREE_TYPE (type);
1838 /* Using TYPE_ALIGN_OK is rather conservative as
1839 only the ada frontend actually sets it. */
1840 align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type)
1841 : BITS_PER_UNIT);
1842 mark_reg_pointer (operand0, align);
1843 }
1844 }
1845 }
1846
1847 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1848 return 1;
1849 }
1850 }
1851 else if (GET_CODE (operand0) == MEM)
1852 {
1853 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1854 && !(reload_in_progress || reload_completed))
1855 {
1856 rtx temp = gen_reg_rtx (DFmode);
1857
1858 emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1859 emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1860 return 1;
1861 }
1862 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1863 {
1864 /* Run this case quickly. */
1865 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1866 return 1;
1867 }
1868 if (! (reload_in_progress || reload_completed))
1869 {
1870 operands[0] = validize_mem (operand0);
1871 operands[1] = operand1 = force_reg (mode, operand1);
1872 }
1873 }
1874
1875 /* Simplify the source if we need to.
1876 Note we do have to handle function labels here, even though we do
1877 not consider them legitimate constants. Loop optimizations can
1878 call the emit_move_xxx with one as a source. */
1879 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1880 || function_label_operand (operand1, VOIDmode)
1881 || (GET_CODE (operand1) == HIGH
1882 && symbolic_operand (XEXP (operand1, 0), mode)))
1883 {
1884 int ishighonly = 0;
1885
1886 if (GET_CODE (operand1) == HIGH)
1887 {
1888 ishighonly = 1;
1889 operand1 = XEXP (operand1, 0);
1890 }
1891 if (symbolic_operand (operand1, mode))
1892 {
1893 /* Argh. The assembler and linker can't handle arithmetic
1894 involving plabels.
1895
1896 So we force the plabel into memory, load operand0 from
1897 the memory location, then add in the constant part. */
1898 if ((GET_CODE (operand1) == CONST
1899 && GET_CODE (XEXP (operand1, 0)) == PLUS
1900 && function_label_operand (XEXP (XEXP (operand1, 0), 0),
1901 VOIDmode))
1902 || function_label_operand (operand1, VOIDmode))
1903 {
1904 rtx temp, const_part;
1905
1906 /* Figure out what (if any) scratch register to use. */
1907 if (reload_in_progress || reload_completed)
1908 {
1909 scratch_reg = scratch_reg ? scratch_reg : operand0;
1910 /* SCRATCH_REG will hold an address and maybe the actual
1911 data. We want it in WORD_MODE regardless of what mode it
1912 was originally given to us. */
1913 scratch_reg = force_mode (word_mode, scratch_reg);
1914 }
1915 else if (flag_pic)
1916 scratch_reg = gen_reg_rtx (Pmode);
1917
1918 if (GET_CODE (operand1) == CONST)
1919 {
1920 /* Save away the constant part of the expression. */
1921 const_part = XEXP (XEXP (operand1, 0), 1);
1922 gcc_assert (GET_CODE (const_part) == CONST_INT);
1923
1924 /* Force the function label into memory. */
1925 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1926 }
1927 else
1928 {
1929 /* No constant part. */
1930 const_part = NULL_RTX;
1931
1932 /* Force the function label into memory. */
1933 temp = force_const_mem (mode, operand1);
1934 }
1935
1936
1937 /* Get the address of the memory location. PIC-ify it if
1938 necessary. */
1939 temp = XEXP (temp, 0);
1940 if (flag_pic)
1941 temp = legitimize_pic_address (temp, mode, scratch_reg);
1942
1943 /* Put the address of the memory location into our destination
1944 register. */
1945 operands[1] = temp;
1946 pa_emit_move_sequence (operands, mode, scratch_reg);
1947
1948 /* Now load from the memory location into our destination
1949 register. */
1950 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
1951 pa_emit_move_sequence (operands, mode, scratch_reg);
1952
1953 /* And add back in the constant part. */
1954 if (const_part != NULL_RTX)
1955 expand_inc (operand0, const_part);
1956
1957 return 1;
1958 }
1959
1960 if (flag_pic)
1961 {
1962 rtx temp;
1963
1964 if (reload_in_progress || reload_completed)
1965 {
1966 temp = scratch_reg ? scratch_reg : operand0;
1967 /* TEMP will hold an address and maybe the actual
1968 data. We want it in WORD_MODE regardless of what mode it
1969 was originally given to us. */
1970 temp = force_mode (word_mode, temp);
1971 }
1972 else
1973 temp = gen_reg_rtx (Pmode);
1974
1975 /* (const (plus (symbol) (const_int))) must be forced to
1976 memory during/after reload if the const_int will not fit
1977 in 14 bits. */
1978 if (GET_CODE (operand1) == CONST
1979 && GET_CODE (XEXP (operand1, 0)) == PLUS
1980 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1981 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1982 && (reload_completed || reload_in_progress)
1983 && flag_pic)
1984 {
1985 rtx const_mem = force_const_mem (mode, operand1);
1986 operands[1] = legitimize_pic_address (XEXP (const_mem, 0),
1987 mode, temp);
1988 operands[1] = replace_equiv_address (const_mem, operands[1]);
1989 pa_emit_move_sequence (operands, mode, temp);
1990 }
1991 else
1992 {
1993 operands[1] = legitimize_pic_address (operand1, mode, temp);
1994 if (REG_P (operand0) && REG_P (operands[1]))
1995 copy_reg_pointer (operand0, operands[1]);
1996 emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
1997 }
1998 }
1999 /* On the HPPA, references to data space are supposed to use dp,
2000 register 27, but showing it in the RTL inhibits various cse
2001 and loop optimizations. */
2002 else
2003 {
2004 rtx temp, set;
2005
2006 if (reload_in_progress || reload_completed)
2007 {
2008 temp = scratch_reg ? scratch_reg : operand0;
2009 /* TEMP will hold an address and maybe the actual
2010 data. We want it in WORD_MODE regardless of what mode it
2011 was originally given to us. */
2012 temp = force_mode (word_mode, temp);
2013 }
2014 else
2015 temp = gen_reg_rtx (mode);
2016
2017 /* Loading a SYMBOL_REF into a register makes that register
2018 safe to be used as the base in an indexed address.
2019
2020 Don't mark hard registers though. That loses. */
2021 if (GET_CODE (operand0) == REG
2022 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
2023 mark_reg_pointer (operand0, BITS_PER_UNIT);
2024 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
2025 mark_reg_pointer (temp, BITS_PER_UNIT);
2026
2027 if (ishighonly)
2028 set = gen_rtx_SET (mode, operand0, temp);
2029 else
2030 set = gen_rtx_SET (VOIDmode,
2031 operand0,
2032 gen_rtx_LO_SUM (mode, temp, operand1));
2033
2034 emit_insn (gen_rtx_SET (VOIDmode,
2035 temp,
2036 gen_rtx_HIGH (mode, operand1)));
2037 emit_insn (set);
2038
2039 }
2040 return 1;
2041 }
2042 else if (pa_tls_referenced_p (operand1))
2043 {
2044 rtx tmp = operand1;
2045 rtx addend = NULL;
2046
2047 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
2048 {
2049 addend = XEXP (XEXP (tmp, 0), 1);
2050 tmp = XEXP (XEXP (tmp, 0), 0);
2051 }
2052
2053 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
2054 tmp = legitimize_tls_address (tmp);
2055 if (addend)
2056 {
2057 tmp = gen_rtx_PLUS (mode, tmp, addend);
2058 tmp = force_operand (tmp, operands[0]);
2059 }
2060 operands[1] = tmp;
2061 }
2062 else if (GET_CODE (operand1) != CONST_INT
2063 || !pa_cint_ok_for_move (INTVAL (operand1)))
2064 {
2065 rtx insn, temp;
2066 rtx op1 = operand1;
2067 HOST_WIDE_INT value = 0;
2068 HOST_WIDE_INT insv = 0;
2069 int insert = 0;
2070
2071 if (GET_CODE (operand1) == CONST_INT)
2072 value = INTVAL (operand1);
2073
2074 if (TARGET_64BIT
2075 && GET_CODE (operand1) == CONST_INT
2076 && HOST_BITS_PER_WIDE_INT > 32
2077 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2078 {
2079 HOST_WIDE_INT nval;
2080
2081 /* Extract the low order 32 bits of the value and sign extend.
2082 If the new value is the same as the original value, we can
2083 can use the original value as-is. If the new value is
2084 different, we use it and insert the most-significant 32-bits
2085 of the original value into the final result. */
2086 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
2087 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
2088 if (value != nval)
2089 {
2090 #if HOST_BITS_PER_WIDE_INT > 32
2091 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2092 #endif
2093 insert = 1;
2094 value = nval;
2095 operand1 = GEN_INT (nval);
2096 }
2097 }
2098
2099 if (reload_in_progress || reload_completed)
2100 temp = scratch_reg ? scratch_reg : operand0;
2101 else
2102 temp = gen_reg_rtx (mode);
2103
2104 /* We don't directly split DImode constants on 32-bit targets
2105 because PLUS uses an 11-bit immediate and the insn sequence
2106 generated is not as efficient as the one using HIGH/LO_SUM. */
2107 if (GET_CODE (operand1) == CONST_INT
2108 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
2109 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2110 && !insert)
2111 {
2112 /* Directly break constant into high and low parts. This
2113 provides better optimization opportunities because various
2114 passes recognize constants split with PLUS but not LO_SUM.
2115 We use a 14-bit signed low part except when the addition
2116 of 0x4000 to the high part might change the sign of the
2117 high part. */
2118 HOST_WIDE_INT low = value & 0x3fff;
2119 HOST_WIDE_INT high = value & ~ 0x3fff;
2120
2121 if (low >= 0x2000)
2122 {
2123 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2124 high += 0x2000;
2125 else
2126 high += 0x4000;
2127 }
2128
2129 low = value - high;
2130
2131 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
2132 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2133 }
2134 else
2135 {
2136 emit_insn (gen_rtx_SET (VOIDmode, temp,
2137 gen_rtx_HIGH (mode, operand1)));
2138 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2139 }
2140
2141 insn = emit_move_insn (operands[0], operands[1]);
2142
2143 /* Now insert the most significant 32 bits of the value
2144 into the register. When we don't have a second register
2145 available, it could take up to nine instructions to load
2146 a 64-bit integer constant. Prior to reload, we force
2147 constants that would take more than three instructions
2148 to load to the constant pool. During and after reload,
2149 we have to handle all possible values. */
2150 if (insert)
2151 {
2152 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2153 register and the value to be inserted is outside the
2154 range that can be loaded with three depdi instructions. */
2155 if (temp != operand0 && (insv >= 16384 || insv < -16384))
2156 {
2157 operand1 = GEN_INT (insv);
2158
2159 emit_insn (gen_rtx_SET (VOIDmode, temp,
2160 gen_rtx_HIGH (mode, operand1)));
2161 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2162 emit_insn (gen_insv (operand0, GEN_INT (32),
2163 const0_rtx, temp));
2164 }
2165 else
2166 {
2167 int len = 5, pos = 27;
2168
2169 /* Insert the bits using the depdi instruction. */
2170 while (pos >= 0)
2171 {
2172 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2173 HOST_WIDE_INT sign = v5 < 0;
2174
2175 /* Left extend the insertion. */
2176 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2177 while (pos > 0 && (insv & 1) == sign)
2178 {
2179 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2180 len += 1;
2181 pos -= 1;
2182 }
2183
2184 emit_insn (gen_insv (operand0, GEN_INT (len),
2185 GEN_INT (pos), GEN_INT (v5)));
2186
2187 len = pos > 0 && pos < 5 ? pos : 5;
2188 pos -= len;
2189 }
2190 }
2191 }
2192
2193 set_unique_reg_note (insn, REG_EQUAL, op1);
2194
2195 return 1;
2196 }
2197 }
2198 /* Now have insn-emit do whatever it normally does. */
2199 return 0;
2200 }
2201
2202 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2203 it will need a link/runtime reloc). */
2204
2205 int
2206 pa_reloc_needed (tree exp)
2207 {
2208 int reloc = 0;
2209
2210 switch (TREE_CODE (exp))
2211 {
2212 case ADDR_EXPR:
2213 return 1;
2214
2215 case POINTER_PLUS_EXPR:
2216 case PLUS_EXPR:
2217 case MINUS_EXPR:
2218 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2219 reloc |= pa_reloc_needed (TREE_OPERAND (exp, 1));
2220 break;
2221
2222 CASE_CONVERT:
2223 case NON_LVALUE_EXPR:
2224 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2225 break;
2226
2227 case CONSTRUCTOR:
2228 {
2229 tree value;
2230 unsigned HOST_WIDE_INT ix;
2231
2232 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2233 if (value)
2234 reloc |= pa_reloc_needed (value);
2235 }
2236 break;
2237
2238 case ERROR_MARK:
2239 break;
2240
2241 default:
2242 break;
2243 }
2244 return reloc;
2245 }
2246
2247 \f
2248 /* Return the best assembler insn template
2249 for moving operands[1] into operands[0] as a fullword. */
2250 const char *
2251 pa_singlemove_string (rtx *operands)
2252 {
2253 HOST_WIDE_INT intval;
2254
2255 if (GET_CODE (operands[0]) == MEM)
2256 return "stw %r1,%0";
2257 if (GET_CODE (operands[1]) == MEM)
2258 return "ldw %1,%0";
2259 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2260 {
2261 long i;
2262 REAL_VALUE_TYPE d;
2263
2264 gcc_assert (GET_MODE (operands[1]) == SFmode);
2265
2266 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2267 bit pattern. */
2268 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
2269 REAL_VALUE_TO_TARGET_SINGLE (d, i);
2270
2271 operands[1] = GEN_INT (i);
2272 /* Fall through to CONST_INT case. */
2273 }
2274 if (GET_CODE (operands[1]) == CONST_INT)
2275 {
2276 intval = INTVAL (operands[1]);
2277
2278 if (VAL_14_BITS_P (intval))
2279 return "ldi %1,%0";
2280 else if ((intval & 0x7ff) == 0)
2281 return "ldil L'%1,%0";
2282 else if (pa_zdepi_cint_p (intval))
2283 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2284 else
2285 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2286 }
2287 return "copy %1,%0";
2288 }
2289 \f
2290
2291 /* Compute position (in OP[1]) and width (in OP[2])
2292 useful for copying IMM to a register using the zdepi
2293 instructions. Store the immediate value to insert in OP[0]. */
2294 static void
2295 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2296 {
2297 int lsb, len;
2298
2299 /* Find the least significant set bit in IMM. */
2300 for (lsb = 0; lsb < 32; lsb++)
2301 {
2302 if ((imm & 1) != 0)
2303 break;
2304 imm >>= 1;
2305 }
2306
2307 /* Choose variants based on *sign* of the 5-bit field. */
2308 if ((imm & 0x10) == 0)
2309 len = (lsb <= 28) ? 4 : 32 - lsb;
2310 else
2311 {
2312 /* Find the width of the bitstring in IMM. */
2313 for (len = 5; len < 32 - lsb; len++)
2314 {
2315 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2316 break;
2317 }
2318
2319 /* Sign extend IMM as a 5-bit value. */
2320 imm = (imm & 0xf) - 0x10;
2321 }
2322
2323 op[0] = imm;
2324 op[1] = 31 - lsb;
2325 op[2] = len;
2326 }
2327
2328 /* Compute position (in OP[1]) and width (in OP[2])
2329 useful for copying IMM to a register using the depdi,z
2330 instructions. Store the immediate value to insert in OP[0]. */
2331
2332 static void
2333 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2334 {
2335 int lsb, len, maxlen;
2336
2337 maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
2338
2339 /* Find the least significant set bit in IMM. */
2340 for (lsb = 0; lsb < maxlen; lsb++)
2341 {
2342 if ((imm & 1) != 0)
2343 break;
2344 imm >>= 1;
2345 }
2346
2347 /* Choose variants based on *sign* of the 5-bit field. */
2348 if ((imm & 0x10) == 0)
2349 len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
2350 else
2351 {
2352 /* Find the width of the bitstring in IMM. */
2353 for (len = 5; len < maxlen - lsb; len++)
2354 {
2355 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2356 break;
2357 }
2358
2359 /* Extend length if host is narrow and IMM is negative. */
2360 if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2361 len += 32;
2362
2363 /* Sign extend IMM as a 5-bit value. */
2364 imm = (imm & 0xf) - 0x10;
2365 }
2366
2367 op[0] = imm;
2368 op[1] = 63 - lsb;
2369 op[2] = len;
2370 }
2371
2372 /* Output assembler code to perform a doubleword move insn
2373 with operands OPERANDS. */
2374
2375 const char *
2376 pa_output_move_double (rtx *operands)
2377 {
2378 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2379 rtx latehalf[2];
2380 rtx addreg0 = 0, addreg1 = 0;
2381
2382 /* First classify both operands. */
2383
2384 if (REG_P (operands[0]))
2385 optype0 = REGOP;
2386 else if (offsettable_memref_p (operands[0]))
2387 optype0 = OFFSOP;
2388 else if (GET_CODE (operands[0]) == MEM)
2389 optype0 = MEMOP;
2390 else
2391 optype0 = RNDOP;
2392
2393 if (REG_P (operands[1]))
2394 optype1 = REGOP;
2395 else if (CONSTANT_P (operands[1]))
2396 optype1 = CNSTOP;
2397 else if (offsettable_memref_p (operands[1]))
2398 optype1 = OFFSOP;
2399 else if (GET_CODE (operands[1]) == MEM)
2400 optype1 = MEMOP;
2401 else
2402 optype1 = RNDOP;
2403
2404 /* Check for the cases that the operand constraints are not
2405 supposed to allow to happen. */
2406 gcc_assert (optype0 == REGOP || optype1 == REGOP);
2407
2408 /* Handle copies between general and floating registers. */
2409
2410 if (optype0 == REGOP && optype1 == REGOP
2411 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2412 {
2413 if (FP_REG_P (operands[0]))
2414 {
2415 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2416 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2417 return "{fldds|fldd} -16(%%sp),%0";
2418 }
2419 else
2420 {
2421 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2422 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2423 return "{ldws|ldw} -12(%%sp),%R0";
2424 }
2425 }
2426
2427 /* Handle auto decrementing and incrementing loads and stores
2428 specifically, since the structure of the function doesn't work
2429 for them without major modification. Do it better when we learn
2430 this port about the general inc/dec addressing of PA.
2431 (This was written by tege. Chide him if it doesn't work.) */
2432
2433 if (optype0 == MEMOP)
2434 {
2435 /* We have to output the address syntax ourselves, since print_operand
2436 doesn't deal with the addresses we want to use. Fix this later. */
2437
2438 rtx addr = XEXP (operands[0], 0);
2439 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2440 {
2441 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2442
2443 operands[0] = XEXP (addr, 0);
2444 gcc_assert (GET_CODE (operands[1]) == REG
2445 && GET_CODE (operands[0]) == REG);
2446
2447 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2448
2449 /* No overlap between high target register and address
2450 register. (We do this in a non-obvious way to
2451 save a register file writeback) */
2452 if (GET_CODE (addr) == POST_INC)
2453 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2454 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2455 }
2456 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2457 {
2458 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2459
2460 operands[0] = XEXP (addr, 0);
2461 gcc_assert (GET_CODE (operands[1]) == REG
2462 && GET_CODE (operands[0]) == REG);
2463
2464 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2465 /* No overlap between high target register and address
2466 register. (We do this in a non-obvious way to save a
2467 register file writeback) */
2468 if (GET_CODE (addr) == PRE_INC)
2469 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2470 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2471 }
2472 }
2473 if (optype1 == MEMOP)
2474 {
2475 /* We have to output the address syntax ourselves, since print_operand
2476 doesn't deal with the addresses we want to use. Fix this later. */
2477
2478 rtx addr = XEXP (operands[1], 0);
2479 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2480 {
2481 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2482
2483 operands[1] = XEXP (addr, 0);
2484 gcc_assert (GET_CODE (operands[0]) == REG
2485 && GET_CODE (operands[1]) == REG);
2486
2487 if (!reg_overlap_mentioned_p (high_reg, addr))
2488 {
2489 /* No overlap between high target register and address
2490 register. (We do this in a non-obvious way to
2491 save a register file writeback) */
2492 if (GET_CODE (addr) == POST_INC)
2493 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2494 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2495 }
2496 else
2497 {
2498 /* This is an undefined situation. We should load into the
2499 address register *and* update that register. Probably
2500 we don't need to handle this at all. */
2501 if (GET_CODE (addr) == POST_INC)
2502 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2503 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2504 }
2505 }
2506 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2507 {
2508 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2509
2510 operands[1] = XEXP (addr, 0);
2511 gcc_assert (GET_CODE (operands[0]) == REG
2512 && GET_CODE (operands[1]) == REG);
2513
2514 if (!reg_overlap_mentioned_p (high_reg, addr))
2515 {
2516 /* No overlap between high target register and address
2517 register. (We do this in a non-obvious way to
2518 save a register file writeback) */
2519 if (GET_CODE (addr) == PRE_INC)
2520 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2521 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2522 }
2523 else
2524 {
2525 /* This is an undefined situation. We should load into the
2526 address register *and* update that register. Probably
2527 we don't need to handle this at all. */
2528 if (GET_CODE (addr) == PRE_INC)
2529 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2530 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2531 }
2532 }
2533 else if (GET_CODE (addr) == PLUS
2534 && GET_CODE (XEXP (addr, 0)) == MULT)
2535 {
2536 rtx xoperands[4];
2537 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2538
2539 if (!reg_overlap_mentioned_p (high_reg, addr))
2540 {
2541 xoperands[0] = high_reg;
2542 xoperands[1] = XEXP (addr, 1);
2543 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2544 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2545 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2546 xoperands);
2547 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2548 }
2549 else
2550 {
2551 xoperands[0] = high_reg;
2552 xoperands[1] = XEXP (addr, 1);
2553 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2554 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2555 output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2556 xoperands);
2557 return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2558 }
2559 }
2560 }
2561
2562 /* If an operand is an unoffsettable memory ref, find a register
2563 we can increment temporarily to make it refer to the second word. */
2564
2565 if (optype0 == MEMOP)
2566 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2567
2568 if (optype1 == MEMOP)
2569 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2570
2571 /* Ok, we can do one word at a time.
2572 Normally we do the low-numbered word first.
2573
2574 In either case, set up in LATEHALF the operands to use
2575 for the high-numbered word and in some cases alter the
2576 operands in OPERANDS to be suitable for the low-numbered word. */
2577
2578 if (optype0 == REGOP)
2579 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2580 else if (optype0 == OFFSOP)
2581 latehalf[0] = adjust_address (operands[0], SImode, 4);
2582 else
2583 latehalf[0] = operands[0];
2584
2585 if (optype1 == REGOP)
2586 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2587 else if (optype1 == OFFSOP)
2588 latehalf[1] = adjust_address (operands[1], SImode, 4);
2589 else if (optype1 == CNSTOP)
2590 split_double (operands[1], &operands[1], &latehalf[1]);
2591 else
2592 latehalf[1] = operands[1];
2593
2594 /* If the first move would clobber the source of the second one,
2595 do them in the other order.
2596
2597 This can happen in two cases:
2598
2599 mem -> register where the first half of the destination register
2600 is the same register used in the memory's address. Reload
2601 can create such insns.
2602
2603 mem in this case will be either register indirect or register
2604 indirect plus a valid offset.
2605
2606 register -> register move where REGNO(dst) == REGNO(src + 1)
2607 someone (Tim/Tege?) claimed this can happen for parameter loads.
2608
2609 Handle mem -> register case first. */
2610 if (optype0 == REGOP
2611 && (optype1 == MEMOP || optype1 == OFFSOP)
2612 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2613 operands[1], 0))
2614 {
2615 /* Do the late half first. */
2616 if (addreg1)
2617 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2618 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2619
2620 /* Then clobber. */
2621 if (addreg1)
2622 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2623 return pa_singlemove_string (operands);
2624 }
2625
2626 /* Now handle register -> register case. */
2627 if (optype0 == REGOP && optype1 == REGOP
2628 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2629 {
2630 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2631 return pa_singlemove_string (operands);
2632 }
2633
2634 /* Normal case: do the two words, low-numbered first. */
2635
2636 output_asm_insn (pa_singlemove_string (operands), operands);
2637
2638 /* Make any unoffsettable addresses point at high-numbered word. */
2639 if (addreg0)
2640 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2641 if (addreg1)
2642 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2643
2644 /* Do that word. */
2645 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2646
2647 /* Undo the adds we just did. */
2648 if (addreg0)
2649 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2650 if (addreg1)
2651 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2652
2653 return "";
2654 }
2655 \f
2656 const char *
2657 pa_output_fp_move_double (rtx *operands)
2658 {
2659 if (FP_REG_P (operands[0]))
2660 {
2661 if (FP_REG_P (operands[1])
2662 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2663 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2664 else
2665 output_asm_insn ("fldd%F1 %1,%0", operands);
2666 }
2667 else if (FP_REG_P (operands[1]))
2668 {
2669 output_asm_insn ("fstd%F0 %1,%0", operands);
2670 }
2671 else
2672 {
2673 rtx xoperands[2];
2674
2675 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2676
2677 /* This is a pain. You have to be prepared to deal with an
2678 arbitrary address here including pre/post increment/decrement.
2679
2680 so avoid this in the MD. */
2681 gcc_assert (GET_CODE (operands[0]) == REG);
2682
2683 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2684 xoperands[0] = operands[0];
2685 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2686 }
2687 return "";
2688 }
2689 \f
2690 /* Return a REG that occurs in ADDR with coefficient 1.
2691 ADDR can be effectively incremented by incrementing REG. */
2692
2693 static rtx
2694 find_addr_reg (rtx addr)
2695 {
2696 while (GET_CODE (addr) == PLUS)
2697 {
2698 if (GET_CODE (XEXP (addr, 0)) == REG)
2699 addr = XEXP (addr, 0);
2700 else if (GET_CODE (XEXP (addr, 1)) == REG)
2701 addr = XEXP (addr, 1);
2702 else if (CONSTANT_P (XEXP (addr, 0)))
2703 addr = XEXP (addr, 1);
2704 else if (CONSTANT_P (XEXP (addr, 1)))
2705 addr = XEXP (addr, 0);
2706 else
2707 gcc_unreachable ();
2708 }
2709 gcc_assert (GET_CODE (addr) == REG);
2710 return addr;
2711 }
2712
2713 /* Emit code to perform a block move.
2714
2715 OPERANDS[0] is the destination pointer as a REG, clobbered.
2716 OPERANDS[1] is the source pointer as a REG, clobbered.
2717 OPERANDS[2] is a register for temporary storage.
2718 OPERANDS[3] is a register for temporary storage.
2719 OPERANDS[4] is the size as a CONST_INT
2720 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2721 OPERANDS[6] is another temporary register. */
2722
2723 const char *
2724 pa_output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2725 {
2726 int align = INTVAL (operands[5]);
2727 unsigned long n_bytes = INTVAL (operands[4]);
2728
2729 /* We can't move more than a word at a time because the PA
2730 has no longer integer move insns. (Could use fp mem ops?) */
2731 if (align > (TARGET_64BIT ? 8 : 4))
2732 align = (TARGET_64BIT ? 8 : 4);
2733
2734 /* Note that we know each loop below will execute at least twice
2735 (else we would have open-coded the copy). */
2736 switch (align)
2737 {
2738 case 8:
2739 /* Pre-adjust the loop counter. */
2740 operands[4] = GEN_INT (n_bytes - 16);
2741 output_asm_insn ("ldi %4,%2", operands);
2742
2743 /* Copying loop. */
2744 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2745 output_asm_insn ("ldd,ma 8(%1),%6", operands);
2746 output_asm_insn ("std,ma %3,8(%0)", operands);
2747 output_asm_insn ("addib,>= -16,%2,.-12", operands);
2748 output_asm_insn ("std,ma %6,8(%0)", operands);
2749
2750 /* Handle the residual. There could be up to 7 bytes of
2751 residual to copy! */
2752 if (n_bytes % 16 != 0)
2753 {
2754 operands[4] = GEN_INT (n_bytes % 8);
2755 if (n_bytes % 16 >= 8)
2756 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2757 if (n_bytes % 8 != 0)
2758 output_asm_insn ("ldd 0(%1),%6", operands);
2759 if (n_bytes % 16 >= 8)
2760 output_asm_insn ("std,ma %3,8(%0)", operands);
2761 if (n_bytes % 8 != 0)
2762 output_asm_insn ("stdby,e %6,%4(%0)", operands);
2763 }
2764 return "";
2765
2766 case 4:
2767 /* Pre-adjust the loop counter. */
2768 operands[4] = GEN_INT (n_bytes - 8);
2769 output_asm_insn ("ldi %4,%2", operands);
2770
2771 /* Copying loop. */
2772 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2773 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2774 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2775 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2776 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2777
2778 /* Handle the residual. There could be up to 7 bytes of
2779 residual to copy! */
2780 if (n_bytes % 8 != 0)
2781 {
2782 operands[4] = GEN_INT (n_bytes % 4);
2783 if (n_bytes % 8 >= 4)
2784 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2785 if (n_bytes % 4 != 0)
2786 output_asm_insn ("ldw 0(%1),%6", operands);
2787 if (n_bytes % 8 >= 4)
2788 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2789 if (n_bytes % 4 != 0)
2790 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2791 }
2792 return "";
2793
2794 case 2:
2795 /* Pre-adjust the loop counter. */
2796 operands[4] = GEN_INT (n_bytes - 4);
2797 output_asm_insn ("ldi %4,%2", operands);
2798
2799 /* Copying loop. */
2800 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2801 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2802 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2803 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2804 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2805
2806 /* Handle the residual. */
2807 if (n_bytes % 4 != 0)
2808 {
2809 if (n_bytes % 4 >= 2)
2810 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2811 if (n_bytes % 2 != 0)
2812 output_asm_insn ("ldb 0(%1),%6", operands);
2813 if (n_bytes % 4 >= 2)
2814 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2815 if (n_bytes % 2 != 0)
2816 output_asm_insn ("stb %6,0(%0)", operands);
2817 }
2818 return "";
2819
2820 case 1:
2821 /* Pre-adjust the loop counter. */
2822 operands[4] = GEN_INT (n_bytes - 2);
2823 output_asm_insn ("ldi %4,%2", operands);
2824
2825 /* Copying loop. */
2826 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2827 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2828 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2829 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2830 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2831
2832 /* Handle the residual. */
2833 if (n_bytes % 2 != 0)
2834 {
2835 output_asm_insn ("ldb 0(%1),%3", operands);
2836 output_asm_insn ("stb %3,0(%0)", operands);
2837 }
2838 return "";
2839
2840 default:
2841 gcc_unreachable ();
2842 }
2843 }
2844
2845 /* Count the number of insns necessary to handle this block move.
2846
2847 Basic structure is the same as emit_block_move, except that we
2848 count insns rather than emit them. */
2849
2850 static int
2851 compute_movmem_length (rtx insn)
2852 {
2853 rtx pat = PATTERN (insn);
2854 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2855 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2856 unsigned int n_insns = 0;
2857
2858 /* We can't move more than four bytes at a time because the PA
2859 has no longer integer move insns. (Could use fp mem ops?) */
2860 if (align > (TARGET_64BIT ? 8 : 4))
2861 align = (TARGET_64BIT ? 8 : 4);
2862
2863 /* The basic copying loop. */
2864 n_insns = 6;
2865
2866 /* Residuals. */
2867 if (n_bytes % (2 * align) != 0)
2868 {
2869 if ((n_bytes % (2 * align)) >= align)
2870 n_insns += 2;
2871
2872 if ((n_bytes % align) != 0)
2873 n_insns += 2;
2874 }
2875
2876 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2877 return n_insns * 4;
2878 }
2879
2880 /* Emit code to perform a block clear.
2881
2882 OPERANDS[0] is the destination pointer as a REG, clobbered.
2883 OPERANDS[1] is a register for temporary storage.
2884 OPERANDS[2] is the size as a CONST_INT
2885 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
2886
2887 const char *
2888 pa_output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2889 {
2890 int align = INTVAL (operands[3]);
2891 unsigned long n_bytes = INTVAL (operands[2]);
2892
2893 /* We can't clear more than a word at a time because the PA
2894 has no longer integer move insns. */
2895 if (align > (TARGET_64BIT ? 8 : 4))
2896 align = (TARGET_64BIT ? 8 : 4);
2897
2898 /* Note that we know each loop below will execute at least twice
2899 (else we would have open-coded the copy). */
2900 switch (align)
2901 {
2902 case 8:
2903 /* Pre-adjust the loop counter. */
2904 operands[2] = GEN_INT (n_bytes - 16);
2905 output_asm_insn ("ldi %2,%1", operands);
2906
2907 /* Loop. */
2908 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2909 output_asm_insn ("addib,>= -16,%1,.-4", operands);
2910 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2911
2912 /* Handle the residual. There could be up to 7 bytes of
2913 residual to copy! */
2914 if (n_bytes % 16 != 0)
2915 {
2916 operands[2] = GEN_INT (n_bytes % 8);
2917 if (n_bytes % 16 >= 8)
2918 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2919 if (n_bytes % 8 != 0)
2920 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
2921 }
2922 return "";
2923
2924 case 4:
2925 /* Pre-adjust the loop counter. */
2926 operands[2] = GEN_INT (n_bytes - 8);
2927 output_asm_insn ("ldi %2,%1", operands);
2928
2929 /* Loop. */
2930 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2931 output_asm_insn ("addib,>= -8,%1,.-4", operands);
2932 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2933
2934 /* Handle the residual. There could be up to 7 bytes of
2935 residual to copy! */
2936 if (n_bytes % 8 != 0)
2937 {
2938 operands[2] = GEN_INT (n_bytes % 4);
2939 if (n_bytes % 8 >= 4)
2940 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2941 if (n_bytes % 4 != 0)
2942 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
2943 }
2944 return "";
2945
2946 case 2:
2947 /* Pre-adjust the loop counter. */
2948 operands[2] = GEN_INT (n_bytes - 4);
2949 output_asm_insn ("ldi %2,%1", operands);
2950
2951 /* Loop. */
2952 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2953 output_asm_insn ("addib,>= -4,%1,.-4", operands);
2954 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2955
2956 /* Handle the residual. */
2957 if (n_bytes % 4 != 0)
2958 {
2959 if (n_bytes % 4 >= 2)
2960 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2961 if (n_bytes % 2 != 0)
2962 output_asm_insn ("stb %%r0,0(%0)", operands);
2963 }
2964 return "";
2965
2966 case 1:
2967 /* Pre-adjust the loop counter. */
2968 operands[2] = GEN_INT (n_bytes - 2);
2969 output_asm_insn ("ldi %2,%1", operands);
2970
2971 /* Loop. */
2972 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2973 output_asm_insn ("addib,>= -2,%1,.-4", operands);
2974 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2975
2976 /* Handle the residual. */
2977 if (n_bytes % 2 != 0)
2978 output_asm_insn ("stb %%r0,0(%0)", operands);
2979
2980 return "";
2981
2982 default:
2983 gcc_unreachable ();
2984 }
2985 }
2986
2987 /* Count the number of insns necessary to handle this block move.
2988
2989 Basic structure is the same as emit_block_move, except that we
2990 count insns rather than emit them. */
2991
2992 static int
2993 compute_clrmem_length (rtx insn)
2994 {
2995 rtx pat = PATTERN (insn);
2996 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
2997 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
2998 unsigned int n_insns = 0;
2999
3000 /* We can't clear more than a word at a time because the PA
3001 has no longer integer move insns. */
3002 if (align > (TARGET_64BIT ? 8 : 4))
3003 align = (TARGET_64BIT ? 8 : 4);
3004
3005 /* The basic loop. */
3006 n_insns = 4;
3007
3008 /* Residuals. */
3009 if (n_bytes % (2 * align) != 0)
3010 {
3011 if ((n_bytes % (2 * align)) >= align)
3012 n_insns++;
3013
3014 if ((n_bytes % align) != 0)
3015 n_insns++;
3016 }
3017
3018 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3019 return n_insns * 4;
3020 }
3021 \f
3022
3023 const char *
3024 pa_output_and (rtx *operands)
3025 {
3026 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3027 {
3028 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3029 int ls0, ls1, ms0, p, len;
3030
3031 for (ls0 = 0; ls0 < 32; ls0++)
3032 if ((mask & (1 << ls0)) == 0)
3033 break;
3034
3035 for (ls1 = ls0; ls1 < 32; ls1++)
3036 if ((mask & (1 << ls1)) != 0)
3037 break;
3038
3039 for (ms0 = ls1; ms0 < 32; ms0++)
3040 if ((mask & (1 << ms0)) == 0)
3041 break;
3042
3043 gcc_assert (ms0 == 32);
3044
3045 if (ls1 == 32)
3046 {
3047 len = ls0;
3048
3049 gcc_assert (len);
3050
3051 operands[2] = GEN_INT (len);
3052 return "{extru|extrw,u} %1,31,%2,%0";
3053 }
3054 else
3055 {
3056 /* We could use this `depi' for the case above as well, but `depi'
3057 requires one more register file access than an `extru'. */
3058
3059 p = 31 - ls0;
3060 len = ls1 - ls0;
3061
3062 operands[2] = GEN_INT (p);
3063 operands[3] = GEN_INT (len);
3064 return "{depi|depwi} 0,%2,%3,%0";
3065 }
3066 }
3067 else
3068 return "and %1,%2,%0";
3069 }
3070
3071 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3072 storing the result in operands[0]. */
3073 const char *
3074 pa_output_64bit_and (rtx *operands)
3075 {
3076 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3077 {
3078 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3079 int ls0, ls1, ms0, p, len;
3080
3081 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3082 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3083 break;
3084
3085 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3086 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3087 break;
3088
3089 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3090 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3091 break;
3092
3093 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
3094
3095 if (ls1 == HOST_BITS_PER_WIDE_INT)
3096 {
3097 len = ls0;
3098
3099 gcc_assert (len);
3100
3101 operands[2] = GEN_INT (len);
3102 return "extrd,u %1,63,%2,%0";
3103 }
3104 else
3105 {
3106 /* We could use this `depi' for the case above as well, but `depi'
3107 requires one more register file access than an `extru'. */
3108
3109 p = 63 - ls0;
3110 len = ls1 - ls0;
3111
3112 operands[2] = GEN_INT (p);
3113 operands[3] = GEN_INT (len);
3114 return "depdi 0,%2,%3,%0";
3115 }
3116 }
3117 else
3118 return "and %1,%2,%0";
3119 }
3120
3121 const char *
3122 pa_output_ior (rtx *operands)
3123 {
3124 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3125 int bs0, bs1, p, len;
3126
3127 if (INTVAL (operands[2]) == 0)
3128 return "copy %1,%0";
3129
3130 for (bs0 = 0; bs0 < 32; bs0++)
3131 if ((mask & (1 << bs0)) != 0)
3132 break;
3133
3134 for (bs1 = bs0; bs1 < 32; bs1++)
3135 if ((mask & (1 << bs1)) == 0)
3136 break;
3137
3138 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3139
3140 p = 31 - bs0;
3141 len = bs1 - bs0;
3142
3143 operands[2] = GEN_INT (p);
3144 operands[3] = GEN_INT (len);
3145 return "{depi|depwi} -1,%2,%3,%0";
3146 }
3147
3148 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3149 storing the result in operands[0]. */
3150 const char *
3151 pa_output_64bit_ior (rtx *operands)
3152 {
3153 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3154 int bs0, bs1, p, len;
3155
3156 if (INTVAL (operands[2]) == 0)
3157 return "copy %1,%0";
3158
3159 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3160 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3161 break;
3162
3163 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3164 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3165 break;
3166
3167 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3168 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3169
3170 p = 63 - bs0;
3171 len = bs1 - bs0;
3172
3173 operands[2] = GEN_INT (p);
3174 operands[3] = GEN_INT (len);
3175 return "depdi -1,%2,%3,%0";
3176 }
3177 \f
3178 /* Target hook for assembling integer objects. This code handles
3179 aligned SI and DI integers specially since function references
3180 must be preceded by P%. */
3181
3182 static bool
3183 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3184 {
3185 if (size == UNITS_PER_WORD
3186 && aligned_p
3187 && function_label_operand (x, VOIDmode))
3188 {
3189 fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
3190 output_addr_const (asm_out_file, x);
3191 fputc ('\n', asm_out_file);
3192 return true;
3193 }
3194 return default_assemble_integer (x, size, aligned_p);
3195 }
3196 \f
3197 /* Output an ascii string. */
3198 void
3199 pa_output_ascii (FILE *file, const char *p, int size)
3200 {
3201 int i;
3202 int chars_output;
3203 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
3204
3205 /* The HP assembler can only take strings of 256 characters at one
3206 time. This is a limitation on input line length, *not* the
3207 length of the string. Sigh. Even worse, it seems that the
3208 restriction is in number of input characters (see \xnn &
3209 \whatever). So we have to do this very carefully. */
3210
3211 fputs ("\t.STRING \"", file);
3212
3213 chars_output = 0;
3214 for (i = 0; i < size; i += 4)
3215 {
3216 int co = 0;
3217 int io = 0;
3218 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3219 {
3220 register unsigned int c = (unsigned char) p[i + io];
3221
3222 if (c == '\"' || c == '\\')
3223 partial_output[co++] = '\\';
3224 if (c >= ' ' && c < 0177)
3225 partial_output[co++] = c;
3226 else
3227 {
3228 unsigned int hexd;
3229 partial_output[co++] = '\\';
3230 partial_output[co++] = 'x';
3231 hexd = c / 16 - 0 + '0';
3232 if (hexd > '9')
3233 hexd -= '9' - 'a' + 1;
3234 partial_output[co++] = hexd;
3235 hexd = c % 16 - 0 + '0';
3236 if (hexd > '9')
3237 hexd -= '9' - 'a' + 1;
3238 partial_output[co++] = hexd;
3239 }
3240 }
3241 if (chars_output + co > 243)
3242 {
3243 fputs ("\"\n\t.STRING \"", file);
3244 chars_output = 0;
3245 }
3246 fwrite (partial_output, 1, (size_t) co, file);
3247 chars_output += co;
3248 co = 0;
3249 }
3250 fputs ("\"\n", file);
3251 }
3252
3253 /* Try to rewrite floating point comparisons & branches to avoid
3254 useless add,tr insns.
3255
3256 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3257 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3258 first attempt to remove useless add,tr insns. It is zero
3259 for the second pass as reorg sometimes leaves bogus REG_DEAD
3260 notes lying around.
3261
3262 When CHECK_NOTES is zero we can only eliminate add,tr insns
3263 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3264 instructions. */
3265 static void
3266 remove_useless_addtr_insns (int check_notes)
3267 {
3268 rtx insn;
3269 static int pass = 0;
3270
3271 /* This is fairly cheap, so always run it when optimizing. */
3272 if (optimize > 0)
3273 {
3274 int fcmp_count = 0;
3275 int fbranch_count = 0;
3276
3277 /* Walk all the insns in this function looking for fcmp & fbranch
3278 instructions. Keep track of how many of each we find. */
3279 for (insn = get_insns (); insn; insn = next_insn (insn))
3280 {
3281 rtx tmp;
3282
3283 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3284 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
3285 continue;
3286
3287 tmp = PATTERN (insn);
3288
3289 /* It must be a set. */
3290 if (GET_CODE (tmp) != SET)
3291 continue;
3292
3293 /* If the destination is CCFP, then we've found an fcmp insn. */
3294 tmp = SET_DEST (tmp);
3295 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3296 {
3297 fcmp_count++;
3298 continue;
3299 }
3300
3301 tmp = PATTERN (insn);
3302 /* If this is an fbranch instruction, bump the fbranch counter. */
3303 if (GET_CODE (tmp) == SET
3304 && SET_DEST (tmp) == pc_rtx
3305 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3306 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3307 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3308 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3309 {
3310 fbranch_count++;
3311 continue;
3312 }
3313 }
3314
3315
3316 /* Find all floating point compare + branch insns. If possible,
3317 reverse the comparison & the branch to avoid add,tr insns. */
3318 for (insn = get_insns (); insn; insn = next_insn (insn))
3319 {
3320 rtx tmp, next;
3321
3322 /* Ignore anything that isn't an INSN. */
3323 if (GET_CODE (insn) != INSN)
3324 continue;
3325
3326 tmp = PATTERN (insn);
3327
3328 /* It must be a set. */
3329 if (GET_CODE (tmp) != SET)
3330 continue;
3331
3332 /* The destination must be CCFP, which is register zero. */
3333 tmp = SET_DEST (tmp);
3334 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3335 continue;
3336
3337 /* INSN should be a set of CCFP.
3338
3339 See if the result of this insn is used in a reversed FP
3340 conditional branch. If so, reverse our condition and
3341 the branch. Doing so avoids useless add,tr insns. */
3342 next = next_insn (insn);
3343 while (next)
3344 {
3345 /* Jumps, calls and labels stop our search. */
3346 if (GET_CODE (next) == JUMP_INSN
3347 || GET_CODE (next) == CALL_INSN
3348 || GET_CODE (next) == CODE_LABEL)
3349 break;
3350
3351 /* As does another fcmp insn. */
3352 if (GET_CODE (next) == INSN
3353 && GET_CODE (PATTERN (next)) == SET
3354 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3355 && REGNO (SET_DEST (PATTERN (next))) == 0)
3356 break;
3357
3358 next = next_insn (next);
3359 }
3360
3361 /* Is NEXT_INSN a branch? */
3362 if (next
3363 && GET_CODE (next) == JUMP_INSN)
3364 {
3365 rtx pattern = PATTERN (next);
3366
3367 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3368 and CCFP dies, then reverse our conditional and the branch
3369 to avoid the add,tr. */
3370 if (GET_CODE (pattern) == SET
3371 && SET_DEST (pattern) == pc_rtx
3372 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3373 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3374 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3375 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3376 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3377 && (fcmp_count == fbranch_count
3378 || (check_notes
3379 && find_regno_note (next, REG_DEAD, 0))))
3380 {
3381 /* Reverse the branch. */
3382 tmp = XEXP (SET_SRC (pattern), 1);
3383 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3384 XEXP (SET_SRC (pattern), 2) = tmp;
3385 INSN_CODE (next) = -1;
3386
3387 /* Reverse our condition. */
3388 tmp = PATTERN (insn);
3389 PUT_CODE (XEXP (tmp, 1),
3390 (reverse_condition_maybe_unordered
3391 (GET_CODE (XEXP (tmp, 1)))));
3392 }
3393 }
3394 }
3395 }
3396
3397 pass = !pass;
3398
3399 }
3400 \f
3401 /* You may have trouble believing this, but this is the 32 bit HP-PA
3402 stack layout. Wow.
3403
3404 Offset Contents
3405
3406 Variable arguments (optional; any number may be allocated)
3407
3408 SP-(4*(N+9)) arg word N
3409 : :
3410 SP-56 arg word 5
3411 SP-52 arg word 4
3412
3413 Fixed arguments (must be allocated; may remain unused)
3414
3415 SP-48 arg word 3
3416 SP-44 arg word 2
3417 SP-40 arg word 1
3418 SP-36 arg word 0
3419
3420 Frame Marker
3421
3422 SP-32 External Data Pointer (DP)
3423 SP-28 External sr4
3424 SP-24 External/stub RP (RP')
3425 SP-20 Current RP
3426 SP-16 Static Link
3427 SP-12 Clean up
3428 SP-8 Calling Stub RP (RP'')
3429 SP-4 Previous SP
3430
3431 Top of Frame
3432
3433 SP-0 Stack Pointer (points to next available address)
3434
3435 */
3436
3437 /* This function saves registers as follows. Registers marked with ' are
3438 this function's registers (as opposed to the previous function's).
3439 If a frame_pointer isn't needed, r4 is saved as a general register;
3440 the space for the frame pointer is still allocated, though, to keep
3441 things simple.
3442
3443
3444 Top of Frame
3445
3446 SP (FP') Previous FP
3447 SP + 4 Alignment filler (sigh)
3448 SP + 8 Space for locals reserved here.
3449 .
3450 .
3451 .
3452 SP + n All call saved register used.
3453 .
3454 .
3455 .
3456 SP + o All call saved fp registers used.
3457 .
3458 .
3459 .
3460 SP + p (SP') points to next available address.
3461
3462 */
3463
3464 /* Global variables set by output_function_prologue(). */
3465 /* Size of frame. Need to know this to emit return insns from
3466 leaf procedures. */
3467 static HOST_WIDE_INT actual_fsize, local_fsize;
3468 static int save_fregs;
3469
3470 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3471 Handle case where DISP > 8k by using the add_high_const patterns.
3472
3473 Note in DISP > 8k case, we will leave the high part of the address
3474 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3475
3476 static void
3477 store_reg (int reg, HOST_WIDE_INT disp, int base)
3478 {
3479 rtx insn, dest, src, basereg;
3480
3481 src = gen_rtx_REG (word_mode, reg);
3482 basereg = gen_rtx_REG (Pmode, base);
3483 if (VAL_14_BITS_P (disp))
3484 {
3485 dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3486 insn = emit_move_insn (dest, src);
3487 }
3488 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3489 {
3490 rtx delta = GEN_INT (disp);
3491 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3492
3493 emit_move_insn (tmpreg, delta);
3494 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3495 if (DO_FRAME_NOTES)
3496 {
3497 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3498 gen_rtx_SET (VOIDmode, tmpreg,
3499 gen_rtx_PLUS (Pmode, basereg, delta)));
3500 RTX_FRAME_RELATED_P (insn) = 1;
3501 }
3502 dest = gen_rtx_MEM (word_mode, tmpreg);
3503 insn = emit_move_insn (dest, src);
3504 }
3505 else
3506 {
3507 rtx delta = GEN_INT (disp);
3508 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3509 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3510
3511 emit_move_insn (tmpreg, high);
3512 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3513 insn = emit_move_insn (dest, src);
3514 if (DO_FRAME_NOTES)
3515 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3516 gen_rtx_SET (VOIDmode,
3517 gen_rtx_MEM (word_mode,
3518 gen_rtx_PLUS (word_mode,
3519 basereg,
3520 delta)),
3521 src));
3522 }
3523
3524 if (DO_FRAME_NOTES)
3525 RTX_FRAME_RELATED_P (insn) = 1;
3526 }
3527
3528 /* Emit RTL to store REG at the memory location specified by BASE and then
3529 add MOD to BASE. MOD must be <= 8k. */
3530
3531 static void
3532 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3533 {
3534 rtx insn, basereg, srcreg, delta;
3535
3536 gcc_assert (VAL_14_BITS_P (mod));
3537
3538 basereg = gen_rtx_REG (Pmode, base);
3539 srcreg = gen_rtx_REG (word_mode, reg);
3540 delta = GEN_INT (mod);
3541
3542 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3543 if (DO_FRAME_NOTES)
3544 {
3545 RTX_FRAME_RELATED_P (insn) = 1;
3546
3547 /* RTX_FRAME_RELATED_P must be set on each frame related set
3548 in a parallel with more than one element. */
3549 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3550 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3551 }
3552 }
3553
3554 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3555 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3556 whether to add a frame note or not.
3557
3558 In the DISP > 8k case, we leave the high part of the address in %r1.
3559 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3560
3561 static void
3562 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3563 {
3564 rtx insn;
3565
3566 if (VAL_14_BITS_P (disp))
3567 {
3568 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3569 plus_constant (gen_rtx_REG (Pmode, base), disp));
3570 }
3571 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3572 {
3573 rtx basereg = gen_rtx_REG (Pmode, base);
3574 rtx delta = GEN_INT (disp);
3575 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3576
3577 emit_move_insn (tmpreg, delta);
3578 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3579 gen_rtx_PLUS (Pmode, tmpreg, basereg));
3580 if (DO_FRAME_NOTES)
3581 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3582 gen_rtx_SET (VOIDmode, tmpreg,
3583 gen_rtx_PLUS (Pmode, basereg, delta)));
3584 }
3585 else
3586 {
3587 rtx basereg = gen_rtx_REG (Pmode, base);
3588 rtx delta = GEN_INT (disp);
3589 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3590
3591 emit_move_insn (tmpreg,
3592 gen_rtx_PLUS (Pmode, basereg,
3593 gen_rtx_HIGH (Pmode, delta)));
3594 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3595 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3596 }
3597
3598 if (DO_FRAME_NOTES && note)
3599 RTX_FRAME_RELATED_P (insn) = 1;
3600 }
3601
3602 HOST_WIDE_INT
3603 pa_compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
3604 {
3605 int freg_saved = 0;
3606 int i, j;
3607
3608 /* The code in pa_expand_prologue and pa_expand_epilogue must
3609 be consistent with the rounding and size calculation done here.
3610 Change them at the same time. */
3611
3612 /* We do our own stack alignment. First, round the size of the
3613 stack locals up to a word boundary. */
3614 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3615
3616 /* Space for previous frame pointer + filler. If any frame is
3617 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3618 waste some space here for the sake of HP compatibility. The
3619 first slot is only used when the frame pointer is needed. */
3620 if (size || frame_pointer_needed)
3621 size += STARTING_FRAME_OFFSET;
3622
3623 /* If the current function calls __builtin_eh_return, then we need
3624 to allocate stack space for registers that will hold data for
3625 the exception handler. */
3626 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3627 {
3628 unsigned int i;
3629
3630 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3631 continue;
3632 size += i * UNITS_PER_WORD;
3633 }
3634
3635 /* Account for space used by the callee general register saves. */
3636 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3637 if (df_regs_ever_live_p (i))
3638 size += UNITS_PER_WORD;
3639
3640 /* Account for space used by the callee floating point register saves. */
3641 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3642 if (df_regs_ever_live_p (i)
3643 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3644 {
3645 freg_saved = 1;
3646
3647 /* We always save both halves of the FP register, so always
3648 increment the frame size by 8 bytes. */
3649 size += 8;
3650 }
3651
3652 /* If any of the floating registers are saved, account for the
3653 alignment needed for the floating point register save block. */
3654 if (freg_saved)
3655 {
3656 size = (size + 7) & ~7;
3657 if (fregs_live)
3658 *fregs_live = 1;
3659 }
3660
3661 /* The various ABIs include space for the outgoing parameters in the
3662 size of the current function's stack frame. We don't need to align
3663 for the outgoing arguments as their alignment is set by the final
3664 rounding for the frame as a whole. */
3665 size += crtl->outgoing_args_size;
3666
3667 /* Allocate space for the fixed frame marker. This space must be
3668 allocated for any function that makes calls or allocates
3669 stack space. */
3670 if (!current_function_is_leaf || size)
3671 size += TARGET_64BIT ? 48 : 32;
3672
3673 /* Finally, round to the preferred stack boundary. */
3674 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3675 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3676 }
3677
3678 /* Generate the assembly code for function entry. FILE is a stdio
3679 stream to output the code to. SIZE is an int: how many units of
3680 temporary storage to allocate.
3681
3682 Refer to the array `regs_ever_live' to determine which registers to
3683 save; `regs_ever_live[I]' is nonzero if register number I is ever
3684 used in the function. This function is responsible for knowing
3685 which registers should not be saved even if used. */
3686
3687 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3688 of memory. If any fpu reg is used in the function, we allocate
3689 such a block here, at the bottom of the frame, just in case it's needed.
3690
3691 If this function is a leaf procedure, then we may choose not
3692 to do a "save" insn. The decision about whether or not
3693 to do this is made in regclass.c. */
3694
3695 static void
3696 pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3697 {
3698 /* The function's label and associated .PROC must never be
3699 separated and must be output *after* any profiling declarations
3700 to avoid changing spaces/subspaces within a procedure. */
3701 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3702 fputs ("\t.PROC\n", file);
3703
3704 /* pa_expand_prologue does the dirty work now. We just need
3705 to output the assembler directives which denote the start
3706 of a function. */
3707 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3708 if (current_function_is_leaf)
3709 fputs (",NO_CALLS", file);
3710 else
3711 fputs (",CALLS", file);
3712 if (rp_saved)
3713 fputs (",SAVE_RP", file);
3714
3715 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3716 at the beginning of the frame and that it is used as the frame
3717 pointer for the frame. We do this because our current frame
3718 layout doesn't conform to that specified in the HP runtime
3719 documentation and we need a way to indicate to programs such as
3720 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3721 isn't used by HP compilers but is supported by the assembler.
3722 However, SAVE_SP is supposed to indicate that the previous stack
3723 pointer has been saved in the frame marker. */
3724 if (frame_pointer_needed)
3725 fputs (",SAVE_SP", file);
3726
3727 /* Pass on information about the number of callee register saves
3728 performed in the prologue.
3729
3730 The compiler is supposed to pass the highest register number
3731 saved, the assembler then has to adjust that number before
3732 entering it into the unwind descriptor (to account for any
3733 caller saved registers with lower register numbers than the
3734 first callee saved register). */
3735 if (gr_saved)
3736 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3737
3738 if (fr_saved)
3739 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3740
3741 fputs ("\n\t.ENTRY\n", file);
3742
3743 remove_useless_addtr_insns (0);
3744 }
3745
3746 void
3747 pa_expand_prologue (void)
3748 {
3749 int merge_sp_adjust_with_store = 0;
3750 HOST_WIDE_INT size = get_frame_size ();
3751 HOST_WIDE_INT offset;
3752 int i;
3753 rtx insn, tmpreg;
3754
3755 gr_saved = 0;
3756 fr_saved = 0;
3757 save_fregs = 0;
3758
3759 /* Compute total size for frame pointer, filler, locals and rounding to
3760 the next word boundary. Similar code appears in pa_compute_frame_size
3761 and must be changed in tandem with this code. */
3762 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3763 if (local_fsize || frame_pointer_needed)
3764 local_fsize += STARTING_FRAME_OFFSET;
3765
3766 actual_fsize = pa_compute_frame_size (size, &save_fregs);
3767 if (flag_stack_usage_info)
3768 current_function_static_stack_size = actual_fsize;
3769
3770 /* Compute a few things we will use often. */
3771 tmpreg = gen_rtx_REG (word_mode, 1);
3772
3773 /* Save RP first. The calling conventions manual states RP will
3774 always be stored into the caller's frame at sp - 20 or sp - 16
3775 depending on which ABI is in use. */
3776 if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
3777 {
3778 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3779 rp_saved = true;
3780 }
3781 else
3782 rp_saved = false;
3783
3784 /* Allocate the local frame and set up the frame pointer if needed. */
3785 if (actual_fsize != 0)
3786 {
3787 if (frame_pointer_needed)
3788 {
3789 /* Copy the old frame pointer temporarily into %r1. Set up the
3790 new stack pointer, then store away the saved old frame pointer
3791 into the stack at sp and at the same time update the stack
3792 pointer by actual_fsize bytes. Two versions, first
3793 handles small (<8k) frames. The second handles large (>=8k)
3794 frames. */
3795 insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx);
3796 if (DO_FRAME_NOTES)
3797 RTX_FRAME_RELATED_P (insn) = 1;
3798
3799 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3800 if (DO_FRAME_NOTES)
3801 RTX_FRAME_RELATED_P (insn) = 1;
3802
3803 if (VAL_14_BITS_P (actual_fsize))
3804 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3805 else
3806 {
3807 /* It is incorrect to store the saved frame pointer at *sp,
3808 then increment sp (writes beyond the current stack boundary).
3809
3810 So instead use stwm to store at *sp and post-increment the
3811 stack pointer as an atomic operation. Then increment sp to
3812 finish allocating the new frame. */
3813 HOST_WIDE_INT adjust1 = 8192 - 64;
3814 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3815
3816 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3817 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3818 adjust2, 1);
3819 }
3820
3821 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3822 we need to store the previous stack pointer (frame pointer)
3823 into the frame marker on targets that use the HP unwind
3824 library. This allows the HP unwind library to be used to
3825 unwind GCC frames. However, we are not fully compatible
3826 with the HP library because our frame layout differs from
3827 that specified in the HP runtime specification.
3828
3829 We don't want a frame note on this instruction as the frame
3830 marker moves during dynamic stack allocation.
3831
3832 This instruction also serves as a blockage to prevent
3833 register spills from being scheduled before the stack
3834 pointer is raised. This is necessary as we store
3835 registers using the frame pointer as a base register,
3836 and the frame pointer is set before sp is raised. */
3837 if (TARGET_HPUX_UNWIND_LIBRARY)
3838 {
3839 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3840 GEN_INT (TARGET_64BIT ? -8 : -4));
3841
3842 emit_move_insn (gen_rtx_MEM (word_mode, addr),
3843 hard_frame_pointer_rtx);
3844 }
3845 else
3846 emit_insn (gen_blockage ());
3847 }
3848 /* no frame pointer needed. */
3849 else
3850 {
3851 /* In some cases we can perform the first callee register save
3852 and allocating the stack frame at the same time. If so, just
3853 make a note of it and defer allocating the frame until saving
3854 the callee registers. */
3855 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3856 merge_sp_adjust_with_store = 1;
3857 /* Can not optimize. Adjust the stack frame by actual_fsize
3858 bytes. */
3859 else
3860 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3861 actual_fsize, 1);
3862 }
3863 }
3864
3865 /* Normal register save.
3866
3867 Do not save the frame pointer in the frame_pointer_needed case. It
3868 was done earlier. */
3869 if (frame_pointer_needed)
3870 {
3871 offset = local_fsize;
3872
3873 /* Saving the EH return data registers in the frame is the simplest
3874 way to get the frame unwind information emitted. We put them
3875 just before the general registers. */
3876 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3877 {
3878 unsigned int i, regno;
3879
3880 for (i = 0; ; ++i)
3881 {
3882 regno = EH_RETURN_DATA_REGNO (i);
3883 if (regno == INVALID_REGNUM)
3884 break;
3885
3886 store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
3887 offset += UNITS_PER_WORD;
3888 }
3889 }
3890
3891 for (i = 18; i >= 4; i--)
3892 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3893 {
3894 store_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
3895 offset += UNITS_PER_WORD;
3896 gr_saved++;
3897 }
3898 /* Account for %r3 which is saved in a special place. */
3899 gr_saved++;
3900 }
3901 /* No frame pointer needed. */
3902 else
3903 {
3904 offset = local_fsize - actual_fsize;
3905
3906 /* Saving the EH return data registers in the frame is the simplest
3907 way to get the frame unwind information emitted. */
3908 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3909 {
3910 unsigned int i, regno;
3911
3912 for (i = 0; ; ++i)
3913 {
3914 regno = EH_RETURN_DATA_REGNO (i);
3915 if (regno == INVALID_REGNUM)
3916 break;
3917
3918 /* If merge_sp_adjust_with_store is nonzero, then we can
3919 optimize the first save. */
3920 if (merge_sp_adjust_with_store)
3921 {
3922 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
3923 merge_sp_adjust_with_store = 0;
3924 }
3925 else
3926 store_reg (regno, offset, STACK_POINTER_REGNUM);
3927 offset += UNITS_PER_WORD;
3928 }
3929 }
3930
3931 for (i = 18; i >= 3; i--)
3932 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3933 {
3934 /* If merge_sp_adjust_with_store is nonzero, then we can
3935 optimize the first GR save. */
3936 if (merge_sp_adjust_with_store)
3937 {
3938 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
3939 merge_sp_adjust_with_store = 0;
3940 }
3941 else
3942 store_reg (i, offset, STACK_POINTER_REGNUM);
3943 offset += UNITS_PER_WORD;
3944 gr_saved++;
3945 }
3946
3947 /* If we wanted to merge the SP adjustment with a GR save, but we never
3948 did any GR saves, then just emit the adjustment here. */
3949 if (merge_sp_adjust_with_store)
3950 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3951 actual_fsize, 1);
3952 }
3953
3954 /* The hppa calling conventions say that %r19, the pic offset
3955 register, is saved at sp - 32 (in this function's frame)
3956 when generating PIC code. FIXME: What is the correct thing
3957 to do for functions which make no calls and allocate no
3958 frame? Do we need to allocate a frame, or can we just omit
3959 the save? For now we'll just omit the save.
3960
3961 We don't want a note on this insn as the frame marker can
3962 move if there is a dynamic stack allocation. */
3963 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
3964 {
3965 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
3966
3967 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
3968
3969 }
3970
3971 /* Align pointer properly (doubleword boundary). */
3972 offset = (offset + 7) & ~7;
3973
3974 /* Floating point register store. */
3975 if (save_fregs)
3976 {
3977 rtx base;
3978
3979 /* First get the frame or stack pointer to the start of the FP register
3980 save area. */
3981 if (frame_pointer_needed)
3982 {
3983 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
3984 base = hard_frame_pointer_rtx;
3985 }
3986 else
3987 {
3988 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3989 base = stack_pointer_rtx;
3990 }
3991
3992 /* Now actually save the FP registers. */
3993 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3994 {
3995 if (df_regs_ever_live_p (i)
3996 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3997 {
3998 rtx addr, insn, reg;
3999 addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
4000 reg = gen_rtx_REG (DFmode, i);
4001 insn = emit_move_insn (addr, reg);
4002 if (DO_FRAME_NOTES)
4003 {
4004 RTX_FRAME_RELATED_P (insn) = 1;
4005 if (TARGET_64BIT)
4006 {
4007 rtx mem = gen_rtx_MEM (DFmode,
4008 plus_constant (base, offset));
4009 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4010 gen_rtx_SET (VOIDmode, mem, reg));
4011 }
4012 else
4013 {
4014 rtx meml = gen_rtx_MEM (SFmode,
4015 plus_constant (base, offset));
4016 rtx memr = gen_rtx_MEM (SFmode,
4017 plus_constant (base, offset + 4));
4018 rtx regl = gen_rtx_REG (SFmode, i);
4019 rtx regr = gen_rtx_REG (SFmode, i + 1);
4020 rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
4021 rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
4022 rtvec vec;
4023
4024 RTX_FRAME_RELATED_P (setl) = 1;
4025 RTX_FRAME_RELATED_P (setr) = 1;
4026 vec = gen_rtvec (2, setl, setr);
4027 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4028 gen_rtx_SEQUENCE (VOIDmode, vec));
4029 }
4030 }
4031 offset += GET_MODE_SIZE (DFmode);
4032 fr_saved++;
4033 }
4034 }
4035 }
4036 }
4037
4038 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4039 Handle case where DISP > 8k by using the add_high_const patterns. */
4040
4041 static void
4042 load_reg (int reg, HOST_WIDE_INT disp, int base)
4043 {
4044 rtx dest = gen_rtx_REG (word_mode, reg);
4045 rtx basereg = gen_rtx_REG (Pmode, base);
4046 rtx src;
4047
4048 if (VAL_14_BITS_P (disp))
4049 src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
4050 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4051 {
4052 rtx delta = GEN_INT (disp);
4053 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4054
4055 emit_move_insn (tmpreg, delta);
4056 if (TARGET_DISABLE_INDEXING)
4057 {
4058 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4059 src = gen_rtx_MEM (word_mode, tmpreg);
4060 }
4061 else
4062 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4063 }
4064 else
4065 {
4066 rtx delta = GEN_INT (disp);
4067 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4068 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4069
4070 emit_move_insn (tmpreg, high);
4071 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4072 }
4073
4074 emit_move_insn (dest, src);
4075 }
4076
4077 /* Update the total code bytes output to the text section. */
4078
4079 static void
4080 update_total_code_bytes (unsigned int nbytes)
4081 {
4082 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4083 && !IN_NAMED_SECTION_P (cfun->decl))
4084 {
4085 unsigned int old_total = total_code_bytes;
4086
4087 total_code_bytes += nbytes;
4088
4089 /* Be prepared to handle overflows. */
4090 if (old_total > total_code_bytes)
4091 total_code_bytes = UINT_MAX;
4092 }
4093 }
4094
4095 /* This function generates the assembly code for function exit.
4096 Args are as for output_function_prologue ().
4097
4098 The function epilogue should not depend on the current stack
4099 pointer! It should use the frame pointer only. This is mandatory
4100 because of alloca; we also take advantage of it to omit stack
4101 adjustments before returning. */
4102
4103 static void
4104 pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4105 {
4106 rtx insn = get_last_insn ();
4107
4108 last_address = 0;
4109
4110 /* pa_expand_epilogue does the dirty work now. We just need
4111 to output the assembler directives which denote the end
4112 of a function.
4113
4114 To make debuggers happy, emit a nop if the epilogue was completely
4115 eliminated due to a volatile call as the last insn in the
4116 current function. That way the return address (in %r2) will
4117 always point to a valid instruction in the current function. */
4118
4119 /* Get the last real insn. */
4120 if (GET_CODE (insn) == NOTE)
4121 insn = prev_real_insn (insn);
4122
4123 /* If it is a sequence, then look inside. */
4124 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
4125 insn = XVECEXP (PATTERN (insn), 0, 0);
4126
4127 /* If insn is a CALL_INSN, then it must be a call to a volatile
4128 function (otherwise there would be epilogue insns). */
4129 if (insn && GET_CODE (insn) == CALL_INSN)
4130 {
4131 fputs ("\tnop\n", file);
4132 last_address += 4;
4133 }
4134
4135 fputs ("\t.EXIT\n\t.PROCEND\n", file);
4136
4137 if (TARGET_SOM && TARGET_GAS)
4138 {
4139 /* We done with this subspace except possibly for some additional
4140 debug information. Forget that we are in this subspace to ensure
4141 that the next function is output in its own subspace. */
4142 in_section = NULL;
4143 cfun->machine->in_nsubspa = 2;
4144 }
4145
4146 if (INSN_ADDRESSES_SET_P ())
4147 {
4148 insn = get_last_nonnote_insn ();
4149 last_address += INSN_ADDRESSES (INSN_UID (insn));
4150 if (INSN_P (insn))
4151 last_address += insn_default_length (insn);
4152 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4153 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4154 }
4155 else
4156 last_address = UINT_MAX;
4157
4158 /* Finally, update the total number of code bytes output so far. */
4159 update_total_code_bytes (last_address);
4160 }
4161
4162 void
4163 pa_expand_epilogue (void)
4164 {
4165 rtx tmpreg;
4166 HOST_WIDE_INT offset;
4167 HOST_WIDE_INT ret_off = 0;
4168 int i;
4169 int merge_sp_adjust_with_load = 0;
4170
4171 /* We will use this often. */
4172 tmpreg = gen_rtx_REG (word_mode, 1);
4173
4174 /* Try to restore RP early to avoid load/use interlocks when
4175 RP gets used in the return (bv) instruction. This appears to still
4176 be necessary even when we schedule the prologue and epilogue. */
4177 if (rp_saved)
4178 {
4179 ret_off = TARGET_64BIT ? -16 : -20;
4180 if (frame_pointer_needed)
4181 {
4182 load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM);
4183 ret_off = 0;
4184 }
4185 else
4186 {
4187 /* No frame pointer, and stack is smaller than 8k. */
4188 if (VAL_14_BITS_P (ret_off - actual_fsize))
4189 {
4190 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4191 ret_off = 0;
4192 }
4193 }
4194 }
4195
4196 /* General register restores. */
4197 if (frame_pointer_needed)
4198 {
4199 offset = local_fsize;
4200
4201 /* If the current function calls __builtin_eh_return, then we need
4202 to restore the saved EH data registers. */
4203 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4204 {
4205 unsigned int i, regno;
4206
4207 for (i = 0; ; ++i)
4208 {
4209 regno = EH_RETURN_DATA_REGNO (i);
4210 if (regno == INVALID_REGNUM)
4211 break;
4212
4213 load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4214 offset += UNITS_PER_WORD;
4215 }
4216 }
4217
4218 for (i = 18; i >= 4; i--)
4219 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4220 {
4221 load_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4222 offset += UNITS_PER_WORD;
4223 }
4224 }
4225 else
4226 {
4227 offset = local_fsize - actual_fsize;
4228
4229 /* If the current function calls __builtin_eh_return, then we need
4230 to restore the saved EH data registers. */
4231 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4232 {
4233 unsigned int i, regno;
4234
4235 for (i = 0; ; ++i)
4236 {
4237 regno = EH_RETURN_DATA_REGNO (i);
4238 if (regno == INVALID_REGNUM)
4239 break;
4240
4241 /* Only for the first load.
4242 merge_sp_adjust_with_load holds the register load
4243 with which we will merge the sp adjustment. */
4244 if (merge_sp_adjust_with_load == 0
4245 && local_fsize == 0
4246 && VAL_14_BITS_P (-actual_fsize))
4247 merge_sp_adjust_with_load = regno;
4248 else
4249 load_reg (regno, offset, STACK_POINTER_REGNUM);
4250 offset += UNITS_PER_WORD;
4251 }
4252 }
4253
4254 for (i = 18; i >= 3; i--)
4255 {
4256 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4257 {
4258 /* Only for the first load.
4259 merge_sp_adjust_with_load holds the register load
4260 with which we will merge the sp adjustment. */
4261 if (merge_sp_adjust_with_load == 0
4262 && local_fsize == 0
4263 && VAL_14_BITS_P (-actual_fsize))
4264 merge_sp_adjust_with_load = i;
4265 else
4266 load_reg (i, offset, STACK_POINTER_REGNUM);
4267 offset += UNITS_PER_WORD;
4268 }
4269 }
4270 }
4271
4272 /* Align pointer properly (doubleword boundary). */
4273 offset = (offset + 7) & ~7;
4274
4275 /* FP register restores. */
4276 if (save_fregs)
4277 {
4278 /* Adjust the register to index off of. */
4279 if (frame_pointer_needed)
4280 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4281 else
4282 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4283
4284 /* Actually do the restores now. */
4285 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4286 if (df_regs_ever_live_p (i)
4287 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4288 {
4289 rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
4290 rtx dest = gen_rtx_REG (DFmode, i);
4291 emit_move_insn (dest, src);
4292 }
4293 }
4294
4295 /* Emit a blockage insn here to keep these insns from being moved to
4296 an earlier spot in the epilogue, or into the main instruction stream.
4297
4298 This is necessary as we must not cut the stack back before all the
4299 restores are finished. */
4300 emit_insn (gen_blockage ());
4301
4302 /* Reset stack pointer (and possibly frame pointer). The stack
4303 pointer is initially set to fp + 64 to avoid a race condition. */
4304 if (frame_pointer_needed)
4305 {
4306 rtx delta = GEN_INT (-64);
4307
4308 set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0);
4309 emit_insn (gen_pre_load (hard_frame_pointer_rtx,
4310 stack_pointer_rtx, delta));
4311 }
4312 /* If we were deferring a callee register restore, do it now. */
4313 else if (merge_sp_adjust_with_load)
4314 {
4315 rtx delta = GEN_INT (-actual_fsize);
4316 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4317
4318 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4319 }
4320 else if (actual_fsize != 0)
4321 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4322 - actual_fsize, 0);
4323
4324 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4325 frame greater than 8k), do so now. */
4326 if (ret_off != 0)
4327 load_reg (2, ret_off, STACK_POINTER_REGNUM);
4328
4329 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4330 {
4331 rtx sa = EH_RETURN_STACKADJ_RTX;
4332
4333 emit_insn (gen_blockage ());
4334 emit_insn (TARGET_64BIT
4335 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4336 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4337 }
4338 }
4339
4340 bool
4341 pa_can_use_return_insn (void)
4342 {
4343 if (!reload_completed)
4344 return false;
4345
4346 if (frame_pointer_needed)
4347 return false;
4348
4349 if (df_regs_ever_live_p (2))
4350 return false;
4351
4352 if (crtl->profile)
4353 return false;
4354
4355 return pa_compute_frame_size (get_frame_size (), 0) == 0;
4356 }
4357
4358 rtx
4359 hppa_pic_save_rtx (void)
4360 {
4361 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4362 }
4363
4364 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4365 #define NO_DEFERRED_PROFILE_COUNTERS 0
4366 #endif
4367
4368
4369 /* Vector of funcdef numbers. */
4370 static VEC(int,heap) *funcdef_nos;
4371
4372 /* Output deferred profile counters. */
4373 static void
4374 output_deferred_profile_counters (void)
4375 {
4376 unsigned int i;
4377 int align, n;
4378
4379 if (VEC_empty (int, funcdef_nos))
4380 return;
4381
4382 switch_to_section (data_section);
4383 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4384 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4385
4386 for (i = 0; VEC_iterate (int, funcdef_nos, i, n); i++)
4387 {
4388 targetm.asm_out.internal_label (asm_out_file, "LP", n);
4389 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4390 }
4391
4392 VEC_free (int, heap, funcdef_nos);
4393 }
4394
4395 void
4396 hppa_profile_hook (int label_no)
4397 {
4398 /* We use SImode for the address of the function in both 32 and
4399 64-bit code to avoid having to provide DImode versions of the
4400 lcla2 and load_offset_label_address insn patterns. */
4401 rtx reg = gen_reg_rtx (SImode);
4402 rtx label_rtx = gen_label_rtx ();
4403 rtx begin_label_rtx, call_insn;
4404 char begin_label_name[16];
4405
4406 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4407 label_no);
4408 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4409
4410 if (TARGET_64BIT)
4411 emit_move_insn (arg_pointer_rtx,
4412 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
4413 GEN_INT (64)));
4414
4415 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4416
4417 /* The address of the function is loaded into %r25 with an instruction-
4418 relative sequence that avoids the use of relocations. The sequence
4419 is split so that the load_offset_label_address instruction can
4420 occupy the delay slot of the call to _mcount. */
4421 if (TARGET_PA_20)
4422 emit_insn (gen_lcla2 (reg, label_rtx));
4423 else
4424 emit_insn (gen_lcla1 (reg, label_rtx));
4425
4426 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4427 reg, begin_label_rtx, label_rtx));
4428
4429 #if !NO_DEFERRED_PROFILE_COUNTERS
4430 {
4431 rtx count_label_rtx, addr, r24;
4432 char count_label_name[16];
4433
4434 VEC_safe_push (int, heap, funcdef_nos, label_no);
4435 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4436 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
4437
4438 addr = force_reg (Pmode, count_label_rtx);
4439 r24 = gen_rtx_REG (Pmode, 24);
4440 emit_move_insn (r24, addr);
4441
4442 call_insn =
4443 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4444 gen_rtx_SYMBOL_REF (Pmode,
4445 "_mcount")),
4446 GEN_INT (TARGET_64BIT ? 24 : 12)));
4447
4448 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4449 }
4450 #else
4451
4452 call_insn =
4453 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4454 gen_rtx_SYMBOL_REF (Pmode,
4455 "_mcount")),
4456 GEN_INT (TARGET_64BIT ? 16 : 8)));
4457
4458 #endif
4459
4460 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4461 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4462
4463 /* Indicate the _mcount call cannot throw, nor will it execute a
4464 non-local goto. */
4465 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
4466 }
4467
4468 /* Fetch the return address for the frame COUNT steps up from
4469 the current frame, after the prologue. FRAMEADDR is the
4470 frame pointer of the COUNT frame.
4471
4472 We want to ignore any export stub remnants here. To handle this,
4473 we examine the code at the return address, and if it is an export
4474 stub, we return a memory rtx for the stub return address stored
4475 at frame-24.
4476
4477 The value returned is used in two different ways:
4478
4479 1. To find a function's caller.
4480
4481 2. To change the return address for a function.
4482
4483 This function handles most instances of case 1; however, it will
4484 fail if there are two levels of stubs to execute on the return
4485 path. The only way I believe that can happen is if the return value
4486 needs a parameter relocation, which never happens for C code.
4487
4488 This function handles most instances of case 2; however, it will
4489 fail if we did not originally have stub code on the return path
4490 but will need stub code on the new return path. This can happen if
4491 the caller & callee are both in the main program, but the new
4492 return location is in a shared library. */
4493
4494 rtx
4495 pa_return_addr_rtx (int count, rtx frameaddr)
4496 {
4497 rtx label;
4498 rtx rp;
4499 rtx saved_rp;
4500 rtx ins;
4501
4502 /* Instruction stream at the normal return address for the export stub:
4503
4504 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4505 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4506 0x00011820 | stub+16: mtsp r1,sr0
4507 0xe0400002 | stub+20: be,n 0(sr0,rp)
4508
4509 0xe0400002 must be specified as -532676606 so that it won't be
4510 rejected as an invalid immediate operand on 64-bit hosts. */
4511
4512 HOST_WIDE_INT insns[4] = {0x4bc23fd1, 0x004010a1, 0x00011820, -532676606};
4513 int i;
4514
4515 if (count != 0)
4516 return NULL_RTX;
4517
4518 rp = get_hard_reg_initial_val (Pmode, 2);
4519
4520 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4521 return rp;
4522
4523 /* If there is no export stub then just use the value saved from
4524 the return pointer register. */
4525
4526 saved_rp = gen_reg_rtx (Pmode);
4527 emit_move_insn (saved_rp, rp);
4528
4529 /* Get pointer to the instruction stream. We have to mask out the
4530 privilege level from the two low order bits of the return address
4531 pointer here so that ins will point to the start of the first
4532 instruction that would have been executed if we returned. */
4533 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4534 label = gen_label_rtx ();
4535
4536 /* Check the instruction stream at the normal return address for the
4537 export stub. If it is an export stub, than our return address is
4538 really in -24[frameaddr]. */
4539
4540 for (i = 0; i < 3; i++)
4541 {
4542 rtx op0 = gen_rtx_MEM (SImode, plus_constant (ins, i * 4));
4543 rtx op1 = GEN_INT (insns[i]);
4544 emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4545 }
4546
4547 /* Here we know that our return address points to an export
4548 stub. We don't want to return the address of the export stub,
4549 but rather the return address of the export stub. That return
4550 address is stored at -24[frameaddr]. */
4551
4552 emit_move_insn (saved_rp,
4553 gen_rtx_MEM (Pmode,
4554 memory_address (Pmode,
4555 plus_constant (frameaddr,
4556 -24))));
4557
4558 emit_label (label);
4559
4560 return saved_rp;
4561 }
4562
4563 void
4564 pa_emit_bcond_fp (rtx operands[])
4565 {
4566 enum rtx_code code = GET_CODE (operands[0]);
4567 rtx operand0 = operands[1];
4568 rtx operand1 = operands[2];
4569 rtx label = operands[3];
4570
4571 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
4572 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4573
4574 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4575 gen_rtx_IF_THEN_ELSE (VOIDmode,
4576 gen_rtx_fmt_ee (NE,
4577 VOIDmode,
4578 gen_rtx_REG (CCFPmode, 0),
4579 const0_rtx),
4580 gen_rtx_LABEL_REF (VOIDmode, label),
4581 pc_rtx)));
4582
4583 }
4584
4585 /* Adjust the cost of a scheduling dependency. Return the new cost of
4586 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4587
4588 static int
4589 pa_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
4590 {
4591 enum attr_type attr_type;
4592
4593 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4594 true dependencies as they are described with bypasses now. */
4595 if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
4596 return cost;
4597
4598 if (! recog_memoized (insn))
4599 return 0;
4600
4601 attr_type = get_attr_type (insn);
4602
4603 switch (REG_NOTE_KIND (link))
4604 {
4605 case REG_DEP_ANTI:
4606 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4607 cycles later. */
4608
4609 if (attr_type == TYPE_FPLOAD)
4610 {
4611 rtx pat = PATTERN (insn);
4612 rtx dep_pat = PATTERN (dep_insn);
4613 if (GET_CODE (pat) == PARALLEL)
4614 {
4615 /* This happens for the fldXs,mb patterns. */
4616 pat = XVECEXP (pat, 0, 0);
4617 }
4618 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4619 /* If this happens, we have to extend this to schedule
4620 optimally. Return 0 for now. */
4621 return 0;
4622
4623 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4624 {
4625 if (! recog_memoized (dep_insn))
4626 return 0;
4627 switch (get_attr_type (dep_insn))
4628 {
4629 case TYPE_FPALU:
4630 case TYPE_FPMULSGL:
4631 case TYPE_FPMULDBL:
4632 case TYPE_FPDIVSGL:
4633 case TYPE_FPDIVDBL:
4634 case TYPE_FPSQRTSGL:
4635 case TYPE_FPSQRTDBL:
4636 /* A fpload can't be issued until one cycle before a
4637 preceding arithmetic operation has finished if
4638 the target of the fpload is any of the sources
4639 (or destination) of the arithmetic operation. */
4640 return insn_default_latency (dep_insn) - 1;
4641
4642 default:
4643 return 0;
4644 }
4645 }
4646 }
4647 else if (attr_type == TYPE_FPALU)
4648 {
4649 rtx pat = PATTERN (insn);
4650 rtx dep_pat = PATTERN (dep_insn);
4651 if (GET_CODE (pat) == PARALLEL)
4652 {
4653 /* This happens for the fldXs,mb patterns. */
4654 pat = XVECEXP (pat, 0, 0);
4655 }
4656 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4657 /* If this happens, we have to extend this to schedule
4658 optimally. Return 0 for now. */
4659 return 0;
4660
4661 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4662 {
4663 if (! recog_memoized (dep_insn))
4664 return 0;
4665 switch (get_attr_type (dep_insn))
4666 {
4667 case TYPE_FPDIVSGL:
4668 case TYPE_FPDIVDBL:
4669 case TYPE_FPSQRTSGL:
4670 case TYPE_FPSQRTDBL:
4671 /* An ALU flop can't be issued until two cycles before a
4672 preceding divide or sqrt operation has finished if
4673 the target of the ALU flop is any of the sources
4674 (or destination) of the divide or sqrt operation. */
4675 return insn_default_latency (dep_insn) - 2;
4676
4677 default:
4678 return 0;
4679 }
4680 }
4681 }
4682
4683 /* For other anti dependencies, the cost is 0. */
4684 return 0;
4685
4686 case REG_DEP_OUTPUT:
4687 /* Output dependency; DEP_INSN writes a register that INSN writes some
4688 cycles later. */
4689 if (attr_type == TYPE_FPLOAD)
4690 {
4691 rtx pat = PATTERN (insn);
4692 rtx dep_pat = PATTERN (dep_insn);
4693 if (GET_CODE (pat) == PARALLEL)
4694 {
4695 /* This happens for the fldXs,mb patterns. */
4696 pat = XVECEXP (pat, 0, 0);
4697 }
4698 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4699 /* If this happens, we have to extend this to schedule
4700 optimally. Return 0 for now. */
4701 return 0;
4702
4703 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4704 {
4705 if (! recog_memoized (dep_insn))
4706 return 0;
4707 switch (get_attr_type (dep_insn))
4708 {
4709 case TYPE_FPALU:
4710 case TYPE_FPMULSGL:
4711 case TYPE_FPMULDBL:
4712 case TYPE_FPDIVSGL:
4713 case TYPE_FPDIVDBL:
4714 case TYPE_FPSQRTSGL:
4715 case TYPE_FPSQRTDBL:
4716 /* A fpload can't be issued until one cycle before a
4717 preceding arithmetic operation has finished if
4718 the target of the fpload is the destination of the
4719 arithmetic operation.
4720
4721 Exception: For PA7100LC, PA7200 and PA7300, the cost
4722 is 3 cycles, unless they bundle together. We also
4723 pay the penalty if the second insn is a fpload. */
4724 return insn_default_latency (dep_insn) - 1;
4725
4726 default:
4727 return 0;
4728 }
4729 }
4730 }
4731 else if (attr_type == TYPE_FPALU)
4732 {
4733 rtx pat = PATTERN (insn);
4734 rtx dep_pat = PATTERN (dep_insn);
4735 if (GET_CODE (pat) == PARALLEL)
4736 {
4737 /* This happens for the fldXs,mb patterns. */
4738 pat = XVECEXP (pat, 0, 0);
4739 }
4740 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4741 /* If this happens, we have to extend this to schedule
4742 optimally. Return 0 for now. */
4743 return 0;
4744
4745 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4746 {
4747 if (! recog_memoized (dep_insn))
4748 return 0;
4749 switch (get_attr_type (dep_insn))
4750 {
4751 case TYPE_FPDIVSGL:
4752 case TYPE_FPDIVDBL:
4753 case TYPE_FPSQRTSGL:
4754 case TYPE_FPSQRTDBL:
4755 /* An ALU flop can't be issued until two cycles before a
4756 preceding divide or sqrt operation has finished if
4757 the target of the ALU flop is also the target of
4758 the divide or sqrt operation. */
4759 return insn_default_latency (dep_insn) - 2;
4760
4761 default:
4762 return 0;
4763 }
4764 }
4765 }
4766
4767 /* For other output dependencies, the cost is 0. */
4768 return 0;
4769
4770 default:
4771 gcc_unreachable ();
4772 }
4773 }
4774
4775 /* Adjust scheduling priorities. We use this to try and keep addil
4776 and the next use of %r1 close together. */
4777 static int
4778 pa_adjust_priority (rtx insn, int priority)
4779 {
4780 rtx set = single_set (insn);
4781 rtx src, dest;
4782 if (set)
4783 {
4784 src = SET_SRC (set);
4785 dest = SET_DEST (set);
4786 if (GET_CODE (src) == LO_SUM
4787 && symbolic_operand (XEXP (src, 1), VOIDmode)
4788 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4789 priority >>= 3;
4790
4791 else if (GET_CODE (src) == MEM
4792 && GET_CODE (XEXP (src, 0)) == LO_SUM
4793 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4794 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4795 priority >>= 1;
4796
4797 else if (GET_CODE (dest) == MEM
4798 && GET_CODE (XEXP (dest, 0)) == LO_SUM
4799 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4800 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4801 priority >>= 3;
4802 }
4803 return priority;
4804 }
4805
4806 /* The 700 can only issue a single insn at a time.
4807 The 7XXX processors can issue two insns at a time.
4808 The 8000 can issue 4 insns at a time. */
4809 static int
4810 pa_issue_rate (void)
4811 {
4812 switch (pa_cpu)
4813 {
4814 case PROCESSOR_700: return 1;
4815 case PROCESSOR_7100: return 2;
4816 case PROCESSOR_7100LC: return 2;
4817 case PROCESSOR_7200: return 2;
4818 case PROCESSOR_7300: return 2;
4819 case PROCESSOR_8000: return 4;
4820
4821 default:
4822 gcc_unreachable ();
4823 }
4824 }
4825
4826
4827
4828 /* Return any length adjustment needed by INSN which already has its length
4829 computed as LENGTH. Return zero if no adjustment is necessary.
4830
4831 For the PA: function calls, millicode calls, and backwards short
4832 conditional branches with unfilled delay slots need an adjustment by +1
4833 (to account for the NOP which will be inserted into the instruction stream).
4834
4835 Also compute the length of an inline block move here as it is too
4836 complicated to express as a length attribute in pa.md. */
4837 int
4838 pa_adjust_insn_length (rtx insn, int length)
4839 {
4840 rtx pat = PATTERN (insn);
4841
4842 /* Jumps inside switch tables which have unfilled delay slots need
4843 adjustment. */
4844 if (GET_CODE (insn) == JUMP_INSN
4845 && GET_CODE (pat) == PARALLEL
4846 && get_attr_type (insn) == TYPE_BTABLE_BRANCH)
4847 return 4;
4848 /* Millicode insn with an unfilled delay slot. */
4849 else if (GET_CODE (insn) == INSN
4850 && GET_CODE (pat) != SEQUENCE
4851 && GET_CODE (pat) != USE
4852 && GET_CODE (pat) != CLOBBER
4853 && get_attr_type (insn) == TYPE_MILLI)
4854 return 4;
4855 /* Block move pattern. */
4856 else if (GET_CODE (insn) == INSN
4857 && GET_CODE (pat) == PARALLEL
4858 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4859 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4860 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4861 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
4862 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
4863 return compute_movmem_length (insn) - 4;
4864 /* Block clear pattern. */
4865 else if (GET_CODE (insn) == INSN
4866 && GET_CODE (pat) == PARALLEL
4867 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4868 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4869 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
4870 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
4871 return compute_clrmem_length (insn) - 4;
4872 /* Conditional branch with an unfilled delay slot. */
4873 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
4874 {
4875 /* Adjust a short backwards conditional with an unfilled delay slot. */
4876 if (GET_CODE (pat) == SET
4877 && length == 4
4878 && JUMP_LABEL (insn) != NULL_RTX
4879 && ! forward_branch_p (insn))
4880 return 4;
4881 else if (GET_CODE (pat) == PARALLEL
4882 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
4883 && length == 4)
4884 return 4;
4885 /* Adjust dbra insn with short backwards conditional branch with
4886 unfilled delay slot -- only for case where counter is in a
4887 general register register. */
4888 else if (GET_CODE (pat) == PARALLEL
4889 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
4890 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
4891 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
4892 && length == 4
4893 && ! forward_branch_p (insn))
4894 return 4;
4895 else
4896 return 0;
4897 }
4898 return 0;
4899 }
4900
4901 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */
4902
4903 static bool
4904 pa_print_operand_punct_valid_p (unsigned char code)
4905 {
4906 if (code == '@'
4907 || code == '#'
4908 || code == '*'
4909 || code == '^')
4910 return true;
4911
4912 return false;
4913 }
4914
4915 /* Print operand X (an rtx) in assembler syntax to file FILE.
4916 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4917 For `%' followed by punctuation, CODE is the punctuation and X is null. */
4918
4919 void
4920 pa_print_operand (FILE *file, rtx x, int code)
4921 {
4922 switch (code)
4923 {
4924 case '#':
4925 /* Output a 'nop' if there's nothing for the delay slot. */
4926 if (dbr_sequence_length () == 0)
4927 fputs ("\n\tnop", file);
4928 return;
4929 case '*':
4930 /* Output a nullification completer if there's nothing for the */
4931 /* delay slot or nullification is requested. */
4932 if (dbr_sequence_length () == 0 ||
4933 (final_sequence &&
4934 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
4935 fputs (",n", file);
4936 return;
4937 case 'R':
4938 /* Print out the second register name of a register pair.
4939 I.e., R (6) => 7. */
4940 fputs (reg_names[REGNO (x) + 1], file);
4941 return;
4942 case 'r':
4943 /* A register or zero. */
4944 if (x == const0_rtx
4945 || (x == CONST0_RTX (DFmode))
4946 || (x == CONST0_RTX (SFmode)))
4947 {
4948 fputs ("%r0", file);
4949 return;
4950 }
4951 else
4952 break;
4953 case 'f':
4954 /* A register or zero (floating point). */
4955 if (x == const0_rtx
4956 || (x == CONST0_RTX (DFmode))
4957 || (x == CONST0_RTX (SFmode)))
4958 {
4959 fputs ("%fr0", file);
4960 return;
4961 }
4962 else
4963 break;
4964 case 'A':
4965 {
4966 rtx xoperands[2];
4967
4968 xoperands[0] = XEXP (XEXP (x, 0), 0);
4969 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
4970 pa_output_global_address (file, xoperands[1], 0);
4971 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
4972 return;
4973 }
4974
4975 case 'C': /* Plain (C)ondition */
4976 case 'X':
4977 switch (GET_CODE (x))
4978 {
4979 case EQ:
4980 fputs ("=", file); break;
4981 case NE:
4982 fputs ("<>", file); break;
4983 case GT:
4984 fputs (">", file); break;
4985 case GE:
4986 fputs (">=", file); break;
4987 case GEU:
4988 fputs (">>=", file); break;
4989 case GTU:
4990 fputs (">>", file); break;
4991 case LT:
4992 fputs ("<", file); break;
4993 case LE:
4994 fputs ("<=", file); break;
4995 case LEU:
4996 fputs ("<<=", file); break;
4997 case LTU:
4998 fputs ("<<", file); break;
4999 default:
5000 gcc_unreachable ();
5001 }
5002 return;
5003 case 'N': /* Condition, (N)egated */
5004 switch (GET_CODE (x))
5005 {
5006 case EQ:
5007 fputs ("<>", file); break;
5008 case NE:
5009 fputs ("=", file); break;
5010 case GT:
5011 fputs ("<=", file); break;
5012 case GE:
5013 fputs ("<", file); break;
5014 case GEU:
5015 fputs ("<<", file); break;
5016 case GTU:
5017 fputs ("<<=", file); break;
5018 case LT:
5019 fputs (">=", file); break;
5020 case LE:
5021 fputs (">", file); break;
5022 case LEU:
5023 fputs (">>", file); break;
5024 case LTU:
5025 fputs (">>=", file); break;
5026 default:
5027 gcc_unreachable ();
5028 }
5029 return;
5030 /* For floating point comparisons. Note that the output
5031 predicates are the complement of the desired mode. The
5032 conditions for GT, GE, LT, LE and LTGT cause an invalid
5033 operation exception if the result is unordered and this
5034 exception is enabled in the floating-point status register. */
5035 case 'Y':
5036 switch (GET_CODE (x))
5037 {
5038 case EQ:
5039 fputs ("!=", file); break;
5040 case NE:
5041 fputs ("=", file); break;
5042 case GT:
5043 fputs ("!>", file); break;
5044 case GE:
5045 fputs ("!>=", file); break;
5046 case LT:
5047 fputs ("!<", file); break;
5048 case LE:
5049 fputs ("!<=", file); break;
5050 case LTGT:
5051 fputs ("!<>", file); break;
5052 case UNLE:
5053 fputs ("!?<=", file); break;
5054 case UNLT:
5055 fputs ("!?<", file); break;
5056 case UNGE:
5057 fputs ("!?>=", file); break;
5058 case UNGT:
5059 fputs ("!?>", file); break;
5060 case UNEQ:
5061 fputs ("!?=", file); break;
5062 case UNORDERED:
5063 fputs ("!?", file); break;
5064 case ORDERED:
5065 fputs ("?", file); break;
5066 default:
5067 gcc_unreachable ();
5068 }
5069 return;
5070 case 'S': /* Condition, operands are (S)wapped. */
5071 switch (GET_CODE (x))
5072 {
5073 case EQ:
5074 fputs ("=", file); break;
5075 case NE:
5076 fputs ("<>", file); break;
5077 case GT:
5078 fputs ("<", file); break;
5079 case GE:
5080 fputs ("<=", file); break;
5081 case GEU:
5082 fputs ("<<=", file); break;
5083 case GTU:
5084 fputs ("<<", file); break;
5085 case LT:
5086 fputs (">", file); break;
5087 case LE:
5088 fputs (">=", file); break;
5089 case LEU:
5090 fputs (">>=", file); break;
5091 case LTU:
5092 fputs (">>", file); break;
5093 default:
5094 gcc_unreachable ();
5095 }
5096 return;
5097 case 'B': /* Condition, (B)oth swapped and negate. */
5098 switch (GET_CODE (x))
5099 {
5100 case EQ:
5101 fputs ("<>", file); break;
5102 case NE:
5103 fputs ("=", file); break;
5104 case GT:
5105 fputs (">=", file); break;
5106 case GE:
5107 fputs (">", file); break;
5108 case GEU:
5109 fputs (">>", file); break;
5110 case GTU:
5111 fputs (">>=", file); break;
5112 case LT:
5113 fputs ("<=", file); break;
5114 case LE:
5115 fputs ("<", file); break;
5116 case LEU:
5117 fputs ("<<", file); break;
5118 case LTU:
5119 fputs ("<<=", file); break;
5120 default:
5121 gcc_unreachable ();
5122 }
5123 return;
5124 case 'k':
5125 gcc_assert (GET_CODE (x) == CONST_INT);
5126 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5127 return;
5128 case 'Q':
5129 gcc_assert (GET_CODE (x) == CONST_INT);
5130 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5131 return;
5132 case 'L':
5133 gcc_assert (GET_CODE (x) == CONST_INT);
5134 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5135 return;
5136 case 'O':
5137 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5138 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5139 return;
5140 case 'p':
5141 gcc_assert (GET_CODE (x) == CONST_INT);
5142 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5143 return;
5144 case 'P':
5145 gcc_assert (GET_CODE (x) == CONST_INT);
5146 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5147 return;
5148 case 'I':
5149 if (GET_CODE (x) == CONST_INT)
5150 fputs ("i", file);
5151 return;
5152 case 'M':
5153 case 'F':
5154 switch (GET_CODE (XEXP (x, 0)))
5155 {
5156 case PRE_DEC:
5157 case PRE_INC:
5158 if (ASSEMBLER_DIALECT == 0)
5159 fputs ("s,mb", file);
5160 else
5161 fputs (",mb", file);
5162 break;
5163 case POST_DEC:
5164 case POST_INC:
5165 if (ASSEMBLER_DIALECT == 0)
5166 fputs ("s,ma", file);
5167 else
5168 fputs (",ma", file);
5169 break;
5170 case PLUS:
5171 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5172 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5173 {
5174 if (ASSEMBLER_DIALECT == 0)
5175 fputs ("x", file);
5176 }
5177 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5178 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5179 {
5180 if (ASSEMBLER_DIALECT == 0)
5181 fputs ("x,s", file);
5182 else
5183 fputs (",s", file);
5184 }
5185 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5186 fputs ("s", file);
5187 break;
5188 default:
5189 if (code == 'F' && ASSEMBLER_DIALECT == 0)
5190 fputs ("s", file);
5191 break;
5192 }
5193 return;
5194 case 'G':
5195 pa_output_global_address (file, x, 0);
5196 return;
5197 case 'H':
5198 pa_output_global_address (file, x, 1);
5199 return;
5200 case 0: /* Don't do anything special */
5201 break;
5202 case 'Z':
5203 {
5204 unsigned op[3];
5205 compute_zdepwi_operands (INTVAL (x), op);
5206 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5207 return;
5208 }
5209 case 'z':
5210 {
5211 unsigned op[3];
5212 compute_zdepdi_operands (INTVAL (x), op);
5213 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5214 return;
5215 }
5216 case 'c':
5217 /* We can get here from a .vtable_inherit due to our
5218 CONSTANT_ADDRESS_P rejecting perfectly good constant
5219 addresses. */
5220 break;
5221 default:
5222 gcc_unreachable ();
5223 }
5224 if (GET_CODE (x) == REG)
5225 {
5226 fputs (reg_names [REGNO (x)], file);
5227 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5228 {
5229 fputs ("R", file);
5230 return;
5231 }
5232 if (FP_REG_P (x)
5233 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5234 && (REGNO (x) & 1) == 0)
5235 fputs ("L", file);
5236 }
5237 else if (GET_CODE (x) == MEM)
5238 {
5239 int size = GET_MODE_SIZE (GET_MODE (x));
5240 rtx base = NULL_RTX;
5241 switch (GET_CODE (XEXP (x, 0)))
5242 {
5243 case PRE_DEC:
5244 case POST_DEC:
5245 base = XEXP (XEXP (x, 0), 0);
5246 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5247 break;
5248 case PRE_INC:
5249 case POST_INC:
5250 base = XEXP (XEXP (x, 0), 0);
5251 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5252 break;
5253 case PLUS:
5254 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5255 fprintf (file, "%s(%s)",
5256 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5257 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5258 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5259 fprintf (file, "%s(%s)",
5260 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5261 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5262 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5263 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5264 {
5265 /* Because the REG_POINTER flag can get lost during reload,
5266 GO_IF_LEGITIMATE_ADDRESS canonicalizes the order of the
5267 index and base registers in the combined move patterns. */
5268 rtx base = XEXP (XEXP (x, 0), 1);
5269 rtx index = XEXP (XEXP (x, 0), 0);
5270
5271 fprintf (file, "%s(%s)",
5272 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5273 }
5274 else
5275 output_address (XEXP (x, 0));
5276 break;
5277 default:
5278 output_address (XEXP (x, 0));
5279 break;
5280 }
5281 }
5282 else
5283 output_addr_const (file, x);
5284 }
5285
5286 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5287
5288 void
5289 pa_output_global_address (FILE *file, rtx x, int round_constant)
5290 {
5291
5292 /* Imagine (high (const (plus ...))). */
5293 if (GET_CODE (x) == HIGH)
5294 x = XEXP (x, 0);
5295
5296 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5297 output_addr_const (file, x);
5298 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5299 {
5300 output_addr_const (file, x);
5301 fputs ("-$global$", file);
5302 }
5303 else if (GET_CODE (x) == CONST)
5304 {
5305 const char *sep = "";
5306 int offset = 0; /* assembler wants -$global$ at end */
5307 rtx base = NULL_RTX;
5308
5309 switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5310 {
5311 case SYMBOL_REF:
5312 base = XEXP (XEXP (x, 0), 0);
5313 output_addr_const (file, base);
5314 break;
5315 case CONST_INT:
5316 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5317 break;
5318 default:
5319 gcc_unreachable ();
5320 }
5321
5322 switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5323 {
5324 case SYMBOL_REF:
5325 base = XEXP (XEXP (x, 0), 1);
5326 output_addr_const (file, base);
5327 break;
5328 case CONST_INT:
5329 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5330 break;
5331 default:
5332 gcc_unreachable ();
5333 }
5334
5335 /* How bogus. The compiler is apparently responsible for
5336 rounding the constant if it uses an LR field selector.
5337
5338 The linker and/or assembler seem a better place since
5339 they have to do this kind of thing already.
5340
5341 If we fail to do this, HP's optimizing linker may eliminate
5342 an addil, but not update the ldw/stw/ldo instruction that
5343 uses the result of the addil. */
5344 if (round_constant)
5345 offset = ((offset + 0x1000) & ~0x1fff);
5346
5347 switch (GET_CODE (XEXP (x, 0)))
5348 {
5349 case PLUS:
5350 if (offset < 0)
5351 {
5352 offset = -offset;
5353 sep = "-";
5354 }
5355 else
5356 sep = "+";
5357 break;
5358
5359 case MINUS:
5360 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5361 sep = "-";
5362 break;
5363
5364 default:
5365 gcc_unreachable ();
5366 }
5367
5368 if (!read_only_operand (base, VOIDmode) && !flag_pic)
5369 fputs ("-$global$", file);
5370 if (offset)
5371 fprintf (file, "%s%d", sep, offset);
5372 }
5373 else
5374 output_addr_const (file, x);
5375 }
5376
5377 /* Output boilerplate text to appear at the beginning of the file.
5378 There are several possible versions. */
5379 #define aputs(x) fputs(x, asm_out_file)
5380 static inline void
5381 pa_file_start_level (void)
5382 {
5383 if (TARGET_64BIT)
5384 aputs ("\t.LEVEL 2.0w\n");
5385 else if (TARGET_PA_20)
5386 aputs ("\t.LEVEL 2.0\n");
5387 else if (TARGET_PA_11)
5388 aputs ("\t.LEVEL 1.1\n");
5389 else
5390 aputs ("\t.LEVEL 1.0\n");
5391 }
5392
5393 static inline void
5394 pa_file_start_space (int sortspace)
5395 {
5396 aputs ("\t.SPACE $PRIVATE$");
5397 if (sortspace)
5398 aputs (",SORT=16");
5399 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31"
5400 "\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5401 "\n\t.SPACE $TEXT$");
5402 if (sortspace)
5403 aputs (",SORT=8");
5404 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5405 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5406 }
5407
5408 static inline void
5409 pa_file_start_file (int want_version)
5410 {
5411 if (write_symbols != NO_DEBUG)
5412 {
5413 output_file_directive (asm_out_file, main_input_filename);
5414 if (want_version)
5415 aputs ("\t.version\t\"01.01\"\n");
5416 }
5417 }
5418
5419 static inline void
5420 pa_file_start_mcount (const char *aswhat)
5421 {
5422 if (profile_flag)
5423 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5424 }
5425
5426 static void
5427 pa_elf_file_start (void)
5428 {
5429 pa_file_start_level ();
5430 pa_file_start_mcount ("ENTRY");
5431 pa_file_start_file (0);
5432 }
5433
5434 static void
5435 pa_som_file_start (void)
5436 {
5437 pa_file_start_level ();
5438 pa_file_start_space (0);
5439 aputs ("\t.IMPORT $global$,DATA\n"
5440 "\t.IMPORT $$dyncall,MILLICODE\n");
5441 pa_file_start_mcount ("CODE");
5442 pa_file_start_file (0);
5443 }
5444
5445 static void
5446 pa_linux_file_start (void)
5447 {
5448 pa_file_start_file (1);
5449 pa_file_start_level ();
5450 pa_file_start_mcount ("CODE");
5451 }
5452
5453 static void
5454 pa_hpux64_gas_file_start (void)
5455 {
5456 pa_file_start_level ();
5457 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5458 if (profile_flag)
5459 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5460 #endif
5461 pa_file_start_file (1);
5462 }
5463
5464 static void
5465 pa_hpux64_hpas_file_start (void)
5466 {
5467 pa_file_start_level ();
5468 pa_file_start_space (1);
5469 pa_file_start_mcount ("CODE");
5470 pa_file_start_file (0);
5471 }
5472 #undef aputs
5473
5474 /* Search the deferred plabel list for SYMBOL and return its internal
5475 label. If an entry for SYMBOL is not found, a new entry is created. */
5476
5477 rtx
5478 pa_get_deferred_plabel (rtx symbol)
5479 {
5480 const char *fname = XSTR (symbol, 0);
5481 size_t i;
5482
5483 /* See if we have already put this function on the list of deferred
5484 plabels. This list is generally small, so a liner search is not
5485 too ugly. If it proves too slow replace it with something faster. */
5486 for (i = 0; i < n_deferred_plabels; i++)
5487 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5488 break;
5489
5490 /* If the deferred plabel list is empty, or this entry was not found
5491 on the list, create a new entry on the list. */
5492 if (deferred_plabels == NULL || i == n_deferred_plabels)
5493 {
5494 tree id;
5495
5496 if (deferred_plabels == 0)
5497 deferred_plabels = ggc_alloc_deferred_plabel ();
5498 else
5499 deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
5500 deferred_plabels,
5501 n_deferred_plabels + 1);
5502
5503 i = n_deferred_plabels++;
5504 deferred_plabels[i].internal_label = gen_label_rtx ();
5505 deferred_plabels[i].symbol = symbol;
5506
5507 /* Gross. We have just implicitly taken the address of this
5508 function. Mark it in the same manner as assemble_name. */
5509 id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5510 if (id)
5511 mark_referenced (id);
5512 }
5513
5514 return deferred_plabels[i].internal_label;
5515 }
5516
5517 static void
5518 output_deferred_plabels (void)
5519 {
5520 size_t i;
5521
5522 /* If we have some deferred plabels, then we need to switch into the
5523 data or readonly data section, and align it to a 4 byte boundary
5524 before outputting the deferred plabels. */
5525 if (n_deferred_plabels)
5526 {
5527 switch_to_section (flag_pic ? data_section : readonly_data_section);
5528 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5529 }
5530
5531 /* Now output the deferred plabels. */
5532 for (i = 0; i < n_deferred_plabels; i++)
5533 {
5534 targetm.asm_out.internal_label (asm_out_file, "L",
5535 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5536 assemble_integer (deferred_plabels[i].symbol,
5537 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5538 }
5539 }
5540
5541 /* Initialize optabs to point to emulation routines. */
5542
5543 static void
5544 pa_init_libfuncs (void)
5545 {
5546 if (HPUX_LONG_DOUBLE_LIBRARY)
5547 {
5548 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5549 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5550 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5551 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5552 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5553 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5554 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5555 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5556 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5557
5558 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5559 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5560 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5561 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5562 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5563 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5564 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5565
5566 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5567 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5568 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5569 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5570
5571 set_conv_libfunc (sfix_optab, SImode, TFmode,
5572 TARGET_64BIT ? "__U_Qfcnvfxt_quad_to_sgl"
5573 : "_U_Qfcnvfxt_quad_to_sgl");
5574 set_conv_libfunc (sfix_optab, DImode, TFmode,
5575 "_U_Qfcnvfxt_quad_to_dbl");
5576 set_conv_libfunc (ufix_optab, SImode, TFmode,
5577 "_U_Qfcnvfxt_quad_to_usgl");
5578 set_conv_libfunc (ufix_optab, DImode, TFmode,
5579 "_U_Qfcnvfxt_quad_to_udbl");
5580
5581 set_conv_libfunc (sfloat_optab, TFmode, SImode,
5582 "_U_Qfcnvxf_sgl_to_quad");
5583 set_conv_libfunc (sfloat_optab, TFmode, DImode,
5584 "_U_Qfcnvxf_dbl_to_quad");
5585 set_conv_libfunc (ufloat_optab, TFmode, SImode,
5586 "_U_Qfcnvxf_usgl_to_quad");
5587 set_conv_libfunc (ufloat_optab, TFmode, DImode,
5588 "_U_Qfcnvxf_udbl_to_quad");
5589 }
5590
5591 if (TARGET_SYNC_LIBCALL)
5592 init_sync_libfuncs (UNITS_PER_WORD);
5593 }
5594
5595 /* HP's millicode routines mean something special to the assembler.
5596 Keep track of which ones we have used. */
5597
5598 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5599 static void import_milli (enum millicodes);
5600 static char imported[(int) end1000];
5601 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5602 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5603 #define MILLI_START 10
5604
5605 static void
5606 import_milli (enum millicodes code)
5607 {
5608 char str[sizeof (import_string)];
5609
5610 if (!imported[(int) code])
5611 {
5612 imported[(int) code] = 1;
5613 strcpy (str, import_string);
5614 strncpy (str + MILLI_START, milli_names[(int) code], 4);
5615 output_asm_insn (str, 0);
5616 }
5617 }
5618
5619 /* The register constraints have put the operands and return value in
5620 the proper registers. */
5621
5622 const char *
5623 pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx insn)
5624 {
5625 import_milli (mulI);
5626 return pa_output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5627 }
5628
5629 /* Emit the rtl for doing a division by a constant. */
5630
5631 /* Do magic division millicodes exist for this value? */
5632 const int pa_magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5633
5634 /* We'll use an array to keep track of the magic millicodes and
5635 whether or not we've used them already. [n][0] is signed, [n][1] is
5636 unsigned. */
5637
5638 static int div_milli[16][2];
5639
5640 int
5641 pa_emit_hpdiv_const (rtx *operands, int unsignedp)
5642 {
5643 if (GET_CODE (operands[2]) == CONST_INT
5644 && INTVAL (operands[2]) > 0
5645 && INTVAL (operands[2]) < 16
5646 && pa_magic_milli[INTVAL (operands[2])])
5647 {
5648 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5649
5650 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5651 emit
5652 (gen_rtx_PARALLEL
5653 (VOIDmode,
5654 gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
5655 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5656 SImode,
5657 gen_rtx_REG (SImode, 26),
5658 operands[2])),
5659 gen_rtx_CLOBBER (VOIDmode, operands[4]),
5660 gen_rtx_CLOBBER (VOIDmode, operands[3]),
5661 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5662 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5663 gen_rtx_CLOBBER (VOIDmode, ret))));
5664 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5665 return 1;
5666 }
5667 return 0;
5668 }
5669
5670 const char *
5671 pa_output_div_insn (rtx *operands, int unsignedp, rtx insn)
5672 {
5673 int divisor;
5674
5675 /* If the divisor is a constant, try to use one of the special
5676 opcodes .*/
5677 if (GET_CODE (operands[0]) == CONST_INT)
5678 {
5679 static char buf[100];
5680 divisor = INTVAL (operands[0]);
5681 if (!div_milli[divisor][unsignedp])
5682 {
5683 div_milli[divisor][unsignedp] = 1;
5684 if (unsignedp)
5685 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5686 else
5687 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5688 }
5689 if (unsignedp)
5690 {
5691 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5692 INTVAL (operands[0]));
5693 return pa_output_millicode_call (insn,
5694 gen_rtx_SYMBOL_REF (SImode, buf));
5695 }
5696 else
5697 {
5698 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5699 INTVAL (operands[0]));
5700 return pa_output_millicode_call (insn,
5701 gen_rtx_SYMBOL_REF (SImode, buf));
5702 }
5703 }
5704 /* Divisor isn't a special constant. */
5705 else
5706 {
5707 if (unsignedp)
5708 {
5709 import_milli (divU);
5710 return pa_output_millicode_call (insn,
5711 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5712 }
5713 else
5714 {
5715 import_milli (divI);
5716 return pa_output_millicode_call (insn,
5717 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5718 }
5719 }
5720 }
5721
5722 /* Output a $$rem millicode to do mod. */
5723
5724 const char *
5725 pa_output_mod_insn (int unsignedp, rtx insn)
5726 {
5727 if (unsignedp)
5728 {
5729 import_milli (remU);
5730 return pa_output_millicode_call (insn,
5731 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5732 }
5733 else
5734 {
5735 import_milli (remI);
5736 return pa_output_millicode_call (insn,
5737 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5738 }
5739 }
5740
5741 void
5742 pa_output_arg_descriptor (rtx call_insn)
5743 {
5744 const char *arg_regs[4];
5745 enum machine_mode arg_mode;
5746 rtx link;
5747 int i, output_flag = 0;
5748 int regno;
5749
5750 /* We neither need nor want argument location descriptors for the
5751 64bit runtime environment or the ELF32 environment. */
5752 if (TARGET_64BIT || TARGET_ELF32)
5753 return;
5754
5755 for (i = 0; i < 4; i++)
5756 arg_regs[i] = 0;
5757
5758 /* Specify explicitly that no argument relocations should take place
5759 if using the portable runtime calling conventions. */
5760 if (TARGET_PORTABLE_RUNTIME)
5761 {
5762 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5763 asm_out_file);
5764 return;
5765 }
5766
5767 gcc_assert (GET_CODE (call_insn) == CALL_INSN);
5768 for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5769 link; link = XEXP (link, 1))
5770 {
5771 rtx use = XEXP (link, 0);
5772
5773 if (! (GET_CODE (use) == USE
5774 && GET_CODE (XEXP (use, 0)) == REG
5775 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5776 continue;
5777
5778 arg_mode = GET_MODE (XEXP (use, 0));
5779 regno = REGNO (XEXP (use, 0));
5780 if (regno >= 23 && regno <= 26)
5781 {
5782 arg_regs[26 - regno] = "GR";
5783 if (arg_mode == DImode)
5784 arg_regs[25 - regno] = "GR";
5785 }
5786 else if (regno >= 32 && regno <= 39)
5787 {
5788 if (arg_mode == SFmode)
5789 arg_regs[(regno - 32) / 2] = "FR";
5790 else
5791 {
5792 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5793 arg_regs[(regno - 34) / 2] = "FR";
5794 arg_regs[(regno - 34) / 2 + 1] = "FU";
5795 #else
5796 arg_regs[(regno - 34) / 2] = "FU";
5797 arg_regs[(regno - 34) / 2 + 1] = "FR";
5798 #endif
5799 }
5800 }
5801 }
5802 fputs ("\t.CALL ", asm_out_file);
5803 for (i = 0; i < 4; i++)
5804 {
5805 if (arg_regs[i])
5806 {
5807 if (output_flag++)
5808 fputc (',', asm_out_file);
5809 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5810 }
5811 }
5812 fputc ('\n', asm_out_file);
5813 }
5814 \f
5815 /* Inform reload about cases where moving X with a mode MODE to a register in
5816 RCLASS requires an extra scratch or immediate register. Return the class
5817 needed for the immediate register. */
5818
5819 static reg_class_t
5820 pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
5821 enum machine_mode mode, secondary_reload_info *sri)
5822 {
5823 int regno;
5824 enum reg_class rclass = (enum reg_class) rclass_i;
5825
5826 /* Handle the easy stuff first. */
5827 if (rclass == R1_REGS)
5828 return NO_REGS;
5829
5830 if (REG_P (x))
5831 {
5832 regno = REGNO (x);
5833 if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
5834 return NO_REGS;
5835 }
5836 else
5837 regno = -1;
5838
5839 /* If we have something like (mem (mem (...)), we can safely assume the
5840 inner MEM will end up in a general register after reloading, so there's
5841 no need for a secondary reload. */
5842 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
5843 return NO_REGS;
5844
5845 /* Trying to load a constant into a FP register during PIC code
5846 generation requires %r1 as a scratch register. */
5847 if (flag_pic
5848 && (mode == SImode || mode == DImode)
5849 && FP_REG_CLASS_P (rclass)
5850 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
5851 {
5852 sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5853 : CODE_FOR_reload_indi_r1);
5854 return NO_REGS;
5855 }
5856
5857 /* Secondary reloads of symbolic operands require %r1 as a scratch
5858 register when we're generating PIC code and when the operand isn't
5859 readonly. */
5860 if (pa_symbolic_expression_p (x))
5861 {
5862 if (GET_CODE (x) == HIGH)
5863 x = XEXP (x, 0);
5864
5865 if (flag_pic || !read_only_operand (x, VOIDmode))
5866 {
5867 gcc_assert (mode == SImode || mode == DImode);
5868 sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5869 : CODE_FOR_reload_indi_r1);
5870 return NO_REGS;
5871 }
5872 }
5873
5874 /* Profiling showed the PA port spends about 1.3% of its compilation
5875 time in true_regnum from calls inside pa_secondary_reload_class. */
5876 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
5877 regno = true_regnum (x);
5878
5879 /* In order to allow 14-bit displacements in integer loads and stores,
5880 we need to prevent reload from generating out of range integer mode
5881 loads and stores to the floating point registers. Previously, we
5882 used to call for a secondary reload and have pa_emit_move_sequence()
5883 fix the instruction sequence. However, reload occasionally wouldn't
5884 generate the reload and we would end up with an invalid REG+D memory
5885 address. So, now we use an intermediate general register for most
5886 memory loads and stores. */
5887 if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
5888 && GET_MODE_CLASS (mode) == MODE_INT
5889 && FP_REG_CLASS_P (rclass))
5890 {
5891 /* Reload passes (mem:SI (reg/f:DI 30 %r30) when it wants to check
5892 the secondary reload needed for a pseudo. It never passes a
5893 REG+D address. */
5894 if (GET_CODE (x) == MEM)
5895 {
5896 x = XEXP (x, 0);
5897
5898 /* We don't need an intermediate for indexed and LO_SUM DLT
5899 memory addresses. When INT14_OK_STRICT is true, it might
5900 appear that we could directly allow register indirect
5901 memory addresses. However, this doesn't work because we
5902 don't support SUBREGs in floating-point register copies
5903 and reload doesn't tell us when it's going to use a SUBREG. */
5904 if (IS_INDEX_ADDR_P (x)
5905 || IS_LO_SUM_DLT_ADDR_P (x))
5906 return NO_REGS;
5907
5908 /* Otherwise, we need an intermediate general register. */
5909 return GENERAL_REGS;
5910 }
5911
5912 /* Request a secondary reload with a general scratch register
5913 for everthing else. ??? Could symbolic operands be handled
5914 directly when generating non-pic PA 2.0 code? */
5915 sri->icode = (in_p
5916 ? direct_optab_handler (reload_in_optab, mode)
5917 : direct_optab_handler (reload_out_optab, mode));
5918 return NO_REGS;
5919 }
5920
5921 /* A SAR<->FP register copy requires an intermediate general register
5922 and secondary memory. We need a secondary reload with a general
5923 scratch register for spills. */
5924 if (rclass == SHIFT_REGS)
5925 {
5926 /* Handle spill. */
5927 if (regno >= FIRST_PSEUDO_REGISTER || regno < 0)
5928 {
5929 sri->icode = (in_p
5930 ? direct_optab_handler (reload_in_optab, mode)
5931 : direct_optab_handler (reload_out_optab, mode));
5932 return NO_REGS;
5933 }
5934
5935 /* Handle FP copy. */
5936 if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))
5937 return GENERAL_REGS;
5938 }
5939
5940 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
5941 && REGNO_REG_CLASS (regno) == SHIFT_REGS
5942 && FP_REG_CLASS_P (rclass))
5943 return GENERAL_REGS;
5944
5945 return NO_REGS;
5946 }
5947
5948 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
5949 is only marked as live on entry by df-scan when it is a fixed
5950 register. It isn't a fixed register in the 64-bit runtime,
5951 so we need to mark it here. */
5952
5953 static void
5954 pa_extra_live_on_entry (bitmap regs)
5955 {
5956 if (TARGET_64BIT)
5957 bitmap_set_bit (regs, ARG_POINTER_REGNUM);
5958 }
5959
5960 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
5961 to prevent it from being deleted. */
5962
5963 rtx
5964 pa_eh_return_handler_rtx (void)
5965 {
5966 rtx tmp;
5967
5968 tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx,
5969 TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
5970 tmp = gen_rtx_MEM (word_mode, tmp);
5971 tmp->volatil = 1;
5972 return tmp;
5973 }
5974
5975 /* In the 32-bit runtime, arguments larger than eight bytes are passed
5976 by invisible reference. As a GCC extension, we also pass anything
5977 with a zero or variable size by reference.
5978
5979 The 64-bit runtime does not describe passing any types by invisible
5980 reference. The internals of GCC can't currently handle passing
5981 empty structures, and zero or variable length arrays when they are
5982 not passed entirely on the stack or by reference. Thus, as a GCC
5983 extension, we pass these types by reference. The HP compiler doesn't
5984 support these types, so hopefully there shouldn't be any compatibility
5985 issues. This may have to be revisited when HP releases a C99 compiler
5986 or updates the ABI. */
5987
5988 static bool
5989 pa_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
5990 enum machine_mode mode, const_tree type,
5991 bool named ATTRIBUTE_UNUSED)
5992 {
5993 HOST_WIDE_INT size;
5994
5995 if (type)
5996 size = int_size_in_bytes (type);
5997 else
5998 size = GET_MODE_SIZE (mode);
5999
6000 if (TARGET_64BIT)
6001 return size <= 0;
6002 else
6003 return size <= 0 || size > 8;
6004 }
6005
6006 enum direction
6007 pa_function_arg_padding (enum machine_mode mode, const_tree type)
6008 {
6009 if (mode == BLKmode
6010 || (TARGET_64BIT
6011 && type
6012 && (AGGREGATE_TYPE_P (type)
6013 || TREE_CODE (type) == COMPLEX_TYPE
6014 || TREE_CODE (type) == VECTOR_TYPE)))
6015 {
6016 /* Return none if justification is not required. */
6017 if (type
6018 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
6019 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
6020 return none;
6021
6022 /* The directions set here are ignored when a BLKmode argument larger
6023 than a word is placed in a register. Different code is used for
6024 the stack and registers. This makes it difficult to have a
6025 consistent data representation for both the stack and registers.
6026 For both runtimes, the justification and padding for arguments on
6027 the stack and in registers should be identical. */
6028 if (TARGET_64BIT)
6029 /* The 64-bit runtime specifies left justification for aggregates. */
6030 return upward;
6031 else
6032 /* The 32-bit runtime architecture specifies right justification.
6033 When the argument is passed on the stack, the argument is padded
6034 with garbage on the left. The HP compiler pads with zeros. */
6035 return downward;
6036 }
6037
6038 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
6039 return downward;
6040 else
6041 return none;
6042 }
6043
6044 \f
6045 /* Do what is necessary for `va_start'. We look at the current function
6046 to determine if stdargs or varargs is used and fill in an initial
6047 va_list. A pointer to this constructor is returned. */
6048
6049 static rtx
6050 hppa_builtin_saveregs (void)
6051 {
6052 rtx offset, dest;
6053 tree fntype = TREE_TYPE (current_function_decl);
6054 int argadj = ((!stdarg_p (fntype))
6055 ? UNITS_PER_WORD : 0);
6056
6057 if (argadj)
6058 offset = plus_constant (crtl->args.arg_offset_rtx, argadj);
6059 else
6060 offset = crtl->args.arg_offset_rtx;
6061
6062 if (TARGET_64BIT)
6063 {
6064 int i, off;
6065
6066 /* Adjust for varargs/stdarg differences. */
6067 if (argadj)
6068 offset = plus_constant (crtl->args.arg_offset_rtx, -argadj);
6069 else
6070 offset = crtl->args.arg_offset_rtx;
6071
6072 /* We need to save %r26 .. %r19 inclusive starting at offset -64
6073 from the incoming arg pointer and growing to larger addresses. */
6074 for (i = 26, off = -64; i >= 19; i--, off += 8)
6075 emit_move_insn (gen_rtx_MEM (word_mode,
6076 plus_constant (arg_pointer_rtx, off)),
6077 gen_rtx_REG (word_mode, i));
6078
6079 /* The incoming args pointer points just beyond the flushback area;
6080 normally this is not a serious concern. However, when we are doing
6081 varargs/stdargs we want to make the arg pointer point to the start
6082 of the incoming argument area. */
6083 emit_move_insn (virtual_incoming_args_rtx,
6084 plus_constant (arg_pointer_rtx, -64));
6085
6086 /* Now return a pointer to the first anonymous argument. */
6087 return copy_to_reg (expand_binop (Pmode, add_optab,
6088 virtual_incoming_args_rtx,
6089 offset, 0, 0, OPTAB_LIB_WIDEN));
6090 }
6091
6092 /* Store general registers on the stack. */
6093 dest = gen_rtx_MEM (BLKmode,
6094 plus_constant (crtl->args.internal_arg_pointer,
6095 -16));
6096 set_mem_alias_set (dest, get_varargs_alias_set ());
6097 set_mem_align (dest, BITS_PER_WORD);
6098 move_block_from_reg (23, dest, 4);
6099
6100 /* move_block_from_reg will emit code to store the argument registers
6101 individually as scalar stores.
6102
6103 However, other insns may later load from the same addresses for
6104 a structure load (passing a struct to a varargs routine).
6105
6106 The alias code assumes that such aliasing can never happen, so we
6107 have to keep memory referencing insns from moving up beyond the
6108 last argument register store. So we emit a blockage insn here. */
6109 emit_insn (gen_blockage ());
6110
6111 return copy_to_reg (expand_binop (Pmode, add_optab,
6112 crtl->args.internal_arg_pointer,
6113 offset, 0, 0, OPTAB_LIB_WIDEN));
6114 }
6115
6116 static void
6117 hppa_va_start (tree valist, rtx nextarg)
6118 {
6119 nextarg = expand_builtin_saveregs ();
6120 std_expand_builtin_va_start (valist, nextarg);
6121 }
6122
6123 static tree
6124 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6125 gimple_seq *post_p)
6126 {
6127 if (TARGET_64BIT)
6128 {
6129 /* Args grow upward. We can use the generic routines. */
6130 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6131 }
6132 else /* !TARGET_64BIT */
6133 {
6134 tree ptr = build_pointer_type (type);
6135 tree valist_type;
6136 tree t, u;
6137 unsigned int size, ofs;
6138 bool indirect;
6139
6140 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
6141 if (indirect)
6142 {
6143 type = ptr;
6144 ptr = build_pointer_type (type);
6145 }
6146 size = int_size_in_bytes (type);
6147 valist_type = TREE_TYPE (valist);
6148
6149 /* Args grow down. Not handled by generic routines. */
6150
6151 u = fold_convert (sizetype, size_in_bytes (type));
6152 u = fold_build1 (NEGATE_EXPR, sizetype, u);
6153 t = fold_build_pointer_plus (valist, u);
6154
6155 /* Align to 4 or 8 byte boundary depending on argument size. */
6156
6157 u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
6158 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
6159 t = fold_convert (valist_type, t);
6160
6161 t = build2 (MODIFY_EXPR, valist_type, valist, t);
6162
6163 ofs = (8 - size) % 4;
6164 if (ofs != 0)
6165 t = fold_build_pointer_plus_hwi (t, ofs);
6166
6167 t = fold_convert (ptr, t);
6168 t = build_va_arg_indirect_ref (t);
6169
6170 if (indirect)
6171 t = build_va_arg_indirect_ref (t);
6172
6173 return t;
6174 }
6175 }
6176
6177 /* True if MODE is valid for the target. By "valid", we mean able to
6178 be manipulated in non-trivial ways. In particular, this means all
6179 the arithmetic is supported.
6180
6181 Currently, TImode is not valid as the HP 64-bit runtime documentation
6182 doesn't document the alignment and calling conventions for this type.
6183 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6184 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
6185
6186 static bool
6187 pa_scalar_mode_supported_p (enum machine_mode mode)
6188 {
6189 int precision = GET_MODE_PRECISION (mode);
6190
6191 switch (GET_MODE_CLASS (mode))
6192 {
6193 case MODE_PARTIAL_INT:
6194 case MODE_INT:
6195 if (precision == CHAR_TYPE_SIZE)
6196 return true;
6197 if (precision == SHORT_TYPE_SIZE)
6198 return true;
6199 if (precision == INT_TYPE_SIZE)
6200 return true;
6201 if (precision == LONG_TYPE_SIZE)
6202 return true;
6203 if (precision == LONG_LONG_TYPE_SIZE)
6204 return true;
6205 return false;
6206
6207 case MODE_FLOAT:
6208 if (precision == FLOAT_TYPE_SIZE)
6209 return true;
6210 if (precision == DOUBLE_TYPE_SIZE)
6211 return true;
6212 if (precision == LONG_DOUBLE_TYPE_SIZE)
6213 return true;
6214 return false;
6215
6216 case MODE_DECIMAL_FLOAT:
6217 return false;
6218
6219 default:
6220 gcc_unreachable ();
6221 }
6222 }
6223
6224 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6225 it branches into the delay slot. Otherwise, return FALSE. */
6226
6227 static bool
6228 branch_to_delay_slot_p (rtx insn)
6229 {
6230 rtx jump_insn;
6231
6232 if (dbr_sequence_length ())
6233 return FALSE;
6234
6235 jump_insn = next_active_insn (JUMP_LABEL (insn));
6236 while (insn)
6237 {
6238 insn = next_active_insn (insn);
6239 if (jump_insn == insn)
6240 return TRUE;
6241
6242 /* We can't rely on the length of asms. So, we return FALSE when
6243 the branch is followed by an asm. */
6244 if (!insn
6245 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6246 || extract_asm_operands (PATTERN (insn)) != NULL_RTX
6247 || get_attr_length (insn) > 0)
6248 break;
6249 }
6250
6251 return FALSE;
6252 }
6253
6254 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6255
6256 This occurs when INSN has an unfilled delay slot and is followed
6257 by an asm. Disaster can occur if the asm is empty and the jump
6258 branches into the delay slot. So, we add a nop in the delay slot
6259 when this occurs. */
6260
6261 static bool
6262 branch_needs_nop_p (rtx insn)
6263 {
6264 rtx jump_insn;
6265
6266 if (dbr_sequence_length ())
6267 return FALSE;
6268
6269 jump_insn = next_active_insn (JUMP_LABEL (insn));
6270 while (insn)
6271 {
6272 insn = next_active_insn (insn);
6273 if (!insn || jump_insn == insn)
6274 return TRUE;
6275
6276 if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT
6277 || extract_asm_operands (PATTERN (insn)) != NULL_RTX)
6278 && get_attr_length (insn) > 0)
6279 break;
6280 }
6281
6282 return FALSE;
6283 }
6284
6285 /* Return TRUE if INSN, a forward jump insn, can use nullification
6286 to skip the following instruction. This avoids an extra cycle due
6287 to a mis-predicted branch when we fall through. */
6288
6289 static bool
6290 use_skip_p (rtx insn)
6291 {
6292 rtx jump_insn = next_active_insn (JUMP_LABEL (insn));
6293
6294 while (insn)
6295 {
6296 insn = next_active_insn (insn);
6297
6298 /* We can't rely on the length of asms, so we can't skip asms. */
6299 if (!insn
6300 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6301 || extract_asm_operands (PATTERN (insn)) != NULL_RTX)
6302 break;
6303 if (get_attr_length (insn) == 4
6304 && jump_insn == next_active_insn (insn))
6305 return TRUE;
6306 if (get_attr_length (insn) > 0)
6307 break;
6308 }
6309
6310 return FALSE;
6311 }
6312
6313 /* This routine handles all the normal conditional branch sequences we
6314 might need to generate. It handles compare immediate vs compare
6315 register, nullification of delay slots, varying length branches,
6316 negated branches, and all combinations of the above. It returns the
6317 output appropriate to emit the branch corresponding to all given
6318 parameters. */
6319
6320 const char *
6321 pa_output_cbranch (rtx *operands, int negated, rtx insn)
6322 {
6323 static char buf[100];
6324 bool useskip;
6325 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6326 int length = get_attr_length (insn);
6327 int xdelay;
6328
6329 /* A conditional branch to the following instruction (e.g. the delay slot)
6330 is asking for a disaster. This can happen when not optimizing and
6331 when jump optimization fails.
6332
6333 While it is usually safe to emit nothing, this can fail if the
6334 preceding instruction is a nullified branch with an empty delay
6335 slot and the same branch target as this branch. We could check
6336 for this but jump optimization should eliminate nop jumps. It
6337 is always safe to emit a nop. */
6338 if (branch_to_delay_slot_p (insn))
6339 return "nop";
6340
6341 /* The doubleword form of the cmpib instruction doesn't have the LEU
6342 and GTU conditions while the cmpb instruction does. Since we accept
6343 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6344 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6345 operands[2] = gen_rtx_REG (DImode, 0);
6346 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6347 operands[1] = gen_rtx_REG (DImode, 0);
6348
6349 /* If this is a long branch with its delay slot unfilled, set `nullify'
6350 as it can nullify the delay slot and save a nop. */
6351 if (length == 8 && dbr_sequence_length () == 0)
6352 nullify = 1;
6353
6354 /* If this is a short forward conditional branch which did not get
6355 its delay slot filled, the delay slot can still be nullified. */
6356 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6357 nullify = forward_branch_p (insn);
6358
6359 /* A forward branch over a single nullified insn can be done with a
6360 comclr instruction. This avoids a single cycle penalty due to
6361 mis-predicted branch if we fall through (branch not taken). */
6362 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6363
6364 switch (length)
6365 {
6366 /* All short conditional branches except backwards with an unfilled
6367 delay slot. */
6368 case 4:
6369 if (useskip)
6370 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6371 else
6372 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6373 if (GET_MODE (operands[1]) == DImode)
6374 strcat (buf, "*");
6375 if (negated)
6376 strcat (buf, "%B3");
6377 else
6378 strcat (buf, "%S3");
6379 if (useskip)
6380 strcat (buf, " %2,%r1,%%r0");
6381 else if (nullify)
6382 {
6383 if (branch_needs_nop_p (insn))
6384 strcat (buf, ",n %2,%r1,%0%#");
6385 else
6386 strcat (buf, ",n %2,%r1,%0");
6387 }
6388 else
6389 strcat (buf, " %2,%r1,%0");
6390 break;
6391
6392 /* All long conditionals. Note a short backward branch with an
6393 unfilled delay slot is treated just like a long backward branch
6394 with an unfilled delay slot. */
6395 case 8:
6396 /* Handle weird backwards branch with a filled delay slot
6397 which is nullified. */
6398 if (dbr_sequence_length () != 0
6399 && ! forward_branch_p (insn)
6400 && nullify)
6401 {
6402 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6403 if (GET_MODE (operands[1]) == DImode)
6404 strcat (buf, "*");
6405 if (negated)
6406 strcat (buf, "%S3");
6407 else
6408 strcat (buf, "%B3");
6409 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6410 }
6411 /* Handle short backwards branch with an unfilled delay slot.
6412 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6413 taken and untaken branches. */
6414 else if (dbr_sequence_length () == 0
6415 && ! forward_branch_p (insn)
6416 && INSN_ADDRESSES_SET_P ()
6417 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6418 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6419 {
6420 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6421 if (GET_MODE (operands[1]) == DImode)
6422 strcat (buf, "*");
6423 if (negated)
6424 strcat (buf, "%B3 %2,%r1,%0%#");
6425 else
6426 strcat (buf, "%S3 %2,%r1,%0%#");
6427 }
6428 else
6429 {
6430 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6431 if (GET_MODE (operands[1]) == DImode)
6432 strcat (buf, "*");
6433 if (negated)
6434 strcat (buf, "%S3");
6435 else
6436 strcat (buf, "%B3");
6437 if (nullify)
6438 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6439 else
6440 strcat (buf, " %2,%r1,%%r0\n\tb %0");
6441 }
6442 break;
6443
6444 default:
6445 /* The reversed conditional branch must branch over one additional
6446 instruction if the delay slot is filled and needs to be extracted
6447 by pa_output_lbranch. If the delay slot is empty or this is a
6448 nullified forward branch, the instruction after the reversed
6449 condition branch must be nullified. */
6450 if (dbr_sequence_length () == 0
6451 || (nullify && forward_branch_p (insn)))
6452 {
6453 nullify = 1;
6454 xdelay = 0;
6455 operands[4] = GEN_INT (length);
6456 }
6457 else
6458 {
6459 xdelay = 1;
6460 operands[4] = GEN_INT (length + 4);
6461 }
6462
6463 /* Create a reversed conditional branch which branches around
6464 the following insns. */
6465 if (GET_MODE (operands[1]) != DImode)
6466 {
6467 if (nullify)
6468 {
6469 if (negated)
6470 strcpy (buf,
6471 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6472 else
6473 strcpy (buf,
6474 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6475 }
6476 else
6477 {
6478 if (negated)
6479 strcpy (buf,
6480 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6481 else
6482 strcpy (buf,
6483 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6484 }
6485 }
6486 else
6487 {
6488 if (nullify)
6489 {
6490 if (negated)
6491 strcpy (buf,
6492 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6493 else
6494 strcpy (buf,
6495 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6496 }
6497 else
6498 {
6499 if (negated)
6500 strcpy (buf,
6501 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6502 else
6503 strcpy (buf,
6504 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6505 }
6506 }
6507
6508 output_asm_insn (buf, operands);
6509 return pa_output_lbranch (operands[0], insn, xdelay);
6510 }
6511 return buf;
6512 }
6513
6514 /* This routine handles output of long unconditional branches that
6515 exceed the maximum range of a simple branch instruction. Since
6516 we don't have a register available for the branch, we save register
6517 %r1 in the frame marker, load the branch destination DEST into %r1,
6518 execute the branch, and restore %r1 in the delay slot of the branch.
6519
6520 Since long branches may have an insn in the delay slot and the
6521 delay slot is used to restore %r1, we in general need to extract
6522 this insn and execute it before the branch. However, to facilitate
6523 use of this function by conditional branches, we also provide an
6524 option to not extract the delay insn so that it will be emitted
6525 after the long branch. So, if there is an insn in the delay slot,
6526 it is extracted if XDELAY is nonzero.
6527
6528 The lengths of the various long-branch sequences are 20, 16 and 24
6529 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
6530
6531 const char *
6532 pa_output_lbranch (rtx dest, rtx insn, int xdelay)
6533 {
6534 rtx xoperands[2];
6535
6536 xoperands[0] = dest;
6537
6538 /* First, free up the delay slot. */
6539 if (xdelay && dbr_sequence_length () != 0)
6540 {
6541 /* We can't handle a jump in the delay slot. */
6542 gcc_assert (GET_CODE (NEXT_INSN (insn)) != JUMP_INSN);
6543
6544 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6545 optimize, 0, NULL);
6546
6547 /* Now delete the delay insn. */
6548 SET_INSN_DELETED (NEXT_INSN (insn));
6549 }
6550
6551 /* Output an insn to save %r1. The runtime documentation doesn't
6552 specify whether the "Clean Up" slot in the callers frame can
6553 be clobbered by the callee. It isn't copied by HP's builtin
6554 alloca, so this suggests that it can be clobbered if necessary.
6555 The "Static Link" location is copied by HP builtin alloca, so
6556 we avoid using it. Using the cleanup slot might be a problem
6557 if we have to interoperate with languages that pass cleanup
6558 information. However, it should be possible to handle these
6559 situations with GCC's asm feature.
6560
6561 The "Current RP" slot is reserved for the called procedure, so
6562 we try to use it when we don't have a frame of our own. It's
6563 rather unlikely that we won't have a frame when we need to emit
6564 a very long branch.
6565
6566 Really the way to go long term is a register scavenger; goto
6567 the target of the jump and find a register which we can use
6568 as a scratch to hold the value in %r1. Then, we wouldn't have
6569 to free up the delay slot or clobber a slot that may be needed
6570 for other purposes. */
6571 if (TARGET_64BIT)
6572 {
6573 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6574 /* Use the return pointer slot in the frame marker. */
6575 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6576 else
6577 /* Use the slot at -40 in the frame marker since HP builtin
6578 alloca doesn't copy it. */
6579 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6580 }
6581 else
6582 {
6583 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6584 /* Use the return pointer slot in the frame marker. */
6585 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6586 else
6587 /* Use the "Clean Up" slot in the frame marker. In GCC,
6588 the only other use of this location is for copying a
6589 floating point double argument from a floating-point
6590 register to two general registers. The copy is done
6591 as an "atomic" operation when outputting a call, so it
6592 won't interfere with our using the location here. */
6593 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6594 }
6595
6596 if (TARGET_PORTABLE_RUNTIME)
6597 {
6598 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6599 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6600 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6601 }
6602 else if (flag_pic)
6603 {
6604 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6605 if (TARGET_SOM || !TARGET_GAS)
6606 {
6607 xoperands[1] = gen_label_rtx ();
6608 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
6609 targetm.asm_out.internal_label (asm_out_file, "L",
6610 CODE_LABEL_NUMBER (xoperands[1]));
6611 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
6612 }
6613 else
6614 {
6615 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
6616 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6617 }
6618 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6619 }
6620 else
6621 /* Now output a very long branch to the original target. */
6622 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6623
6624 /* Now restore the value of %r1 in the delay slot. */
6625 if (TARGET_64BIT)
6626 {
6627 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6628 return "ldd -16(%%r30),%%r1";
6629 else
6630 return "ldd -40(%%r30),%%r1";
6631 }
6632 else
6633 {
6634 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6635 return "ldw -20(%%r30),%%r1";
6636 else
6637 return "ldw -12(%%r30),%%r1";
6638 }
6639 }
6640
6641 /* This routine handles all the branch-on-bit conditional branch sequences we
6642 might need to generate. It handles nullification of delay slots,
6643 varying length branches, negated branches and all combinations of the
6644 above. it returns the appropriate output template to emit the branch. */
6645
6646 const char *
6647 pa_output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
6648 {
6649 static char buf[100];
6650 bool useskip;
6651 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6652 int length = get_attr_length (insn);
6653 int xdelay;
6654
6655 /* A conditional branch to the following instruction (e.g. the delay slot) is
6656 asking for a disaster. I do not think this can happen as this pattern
6657 is only used when optimizing; jump optimization should eliminate the
6658 jump. But be prepared just in case. */
6659
6660 if (branch_to_delay_slot_p (insn))
6661 return "nop";
6662
6663 /* If this is a long branch with its delay slot unfilled, set `nullify'
6664 as it can nullify the delay slot and save a nop. */
6665 if (length == 8 && dbr_sequence_length () == 0)
6666 nullify = 1;
6667
6668 /* If this is a short forward conditional branch which did not get
6669 its delay slot filled, the delay slot can still be nullified. */
6670 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6671 nullify = forward_branch_p (insn);
6672
6673 /* A forward branch over a single nullified insn can be done with a
6674 extrs instruction. This avoids a single cycle penalty due to
6675 mis-predicted branch if we fall through (branch not taken). */
6676 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6677
6678 switch (length)
6679 {
6680
6681 /* All short conditional branches except backwards with an unfilled
6682 delay slot. */
6683 case 4:
6684 if (useskip)
6685 strcpy (buf, "{extrs,|extrw,s,}");
6686 else
6687 strcpy (buf, "bb,");
6688 if (useskip && GET_MODE (operands[0]) == DImode)
6689 strcpy (buf, "extrd,s,*");
6690 else if (GET_MODE (operands[0]) == DImode)
6691 strcpy (buf, "bb,*");
6692 if ((which == 0 && negated)
6693 || (which == 1 && ! negated))
6694 strcat (buf, ">=");
6695 else
6696 strcat (buf, "<");
6697 if (useskip)
6698 strcat (buf, " %0,%1,1,%%r0");
6699 else if (nullify && negated)
6700 {
6701 if (branch_needs_nop_p (insn))
6702 strcat (buf, ",n %0,%1,%3%#");
6703 else
6704 strcat (buf, ",n %0,%1,%3");
6705 }
6706 else if (nullify && ! negated)
6707 {
6708 if (branch_needs_nop_p (insn))
6709 strcat (buf, ",n %0,%1,%2%#");
6710 else
6711 strcat (buf, ",n %0,%1,%2");
6712 }
6713 else if (! nullify && negated)
6714 strcat (buf, " %0,%1,%3");
6715 else if (! nullify && ! negated)
6716 strcat (buf, " %0,%1,%2");
6717 break;
6718
6719 /* All long conditionals. Note a short backward branch with an
6720 unfilled delay slot is treated just like a long backward branch
6721 with an unfilled delay slot. */
6722 case 8:
6723 /* Handle weird backwards branch with a filled delay slot
6724 which is nullified. */
6725 if (dbr_sequence_length () != 0
6726 && ! forward_branch_p (insn)
6727 && nullify)
6728 {
6729 strcpy (buf, "bb,");
6730 if (GET_MODE (operands[0]) == DImode)
6731 strcat (buf, "*");
6732 if ((which == 0 && negated)
6733 || (which == 1 && ! negated))
6734 strcat (buf, "<");
6735 else
6736 strcat (buf, ">=");
6737 if (negated)
6738 strcat (buf, ",n %0,%1,.+12\n\tb %3");
6739 else
6740 strcat (buf, ",n %0,%1,.+12\n\tb %2");
6741 }
6742 /* Handle short backwards branch with an unfilled delay slot.
6743 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6744 taken and untaken branches. */
6745 else if (dbr_sequence_length () == 0
6746 && ! forward_branch_p (insn)
6747 && INSN_ADDRESSES_SET_P ()
6748 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6749 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6750 {
6751 strcpy (buf, "bb,");
6752 if (GET_MODE (operands[0]) == DImode)
6753 strcat (buf, "*");
6754 if ((which == 0 && negated)
6755 || (which == 1 && ! negated))
6756 strcat (buf, ">=");
6757 else
6758 strcat (buf, "<");
6759 if (negated)
6760 strcat (buf, " %0,%1,%3%#");
6761 else
6762 strcat (buf, " %0,%1,%2%#");
6763 }
6764 else
6765 {
6766 if (GET_MODE (operands[0]) == DImode)
6767 strcpy (buf, "extrd,s,*");
6768 else
6769 strcpy (buf, "{extrs,|extrw,s,}");
6770 if ((which == 0 && negated)
6771 || (which == 1 && ! negated))
6772 strcat (buf, "<");
6773 else
6774 strcat (buf, ">=");
6775 if (nullify && negated)
6776 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
6777 else if (nullify && ! negated)
6778 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
6779 else if (negated)
6780 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
6781 else
6782 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
6783 }
6784 break;
6785
6786 default:
6787 /* The reversed conditional branch must branch over one additional
6788 instruction if the delay slot is filled and needs to be extracted
6789 by pa_output_lbranch. If the delay slot is empty or this is a
6790 nullified forward branch, the instruction after the reversed
6791 condition branch must be nullified. */
6792 if (dbr_sequence_length () == 0
6793 || (nullify && forward_branch_p (insn)))
6794 {
6795 nullify = 1;
6796 xdelay = 0;
6797 operands[4] = GEN_INT (length);
6798 }
6799 else
6800 {
6801 xdelay = 1;
6802 operands[4] = GEN_INT (length + 4);
6803 }
6804
6805 if (GET_MODE (operands[0]) == DImode)
6806 strcpy (buf, "bb,*");
6807 else
6808 strcpy (buf, "bb,");
6809 if ((which == 0 && negated)
6810 || (which == 1 && !negated))
6811 strcat (buf, "<");
6812 else
6813 strcat (buf, ">=");
6814 if (nullify)
6815 strcat (buf, ",n %0,%1,.+%4");
6816 else
6817 strcat (buf, " %0,%1,.+%4");
6818 output_asm_insn (buf, operands);
6819 return pa_output_lbranch (negated ? operands[3] : operands[2],
6820 insn, xdelay);
6821 }
6822 return buf;
6823 }
6824
6825 /* This routine handles all the branch-on-variable-bit conditional branch
6826 sequences we might need to generate. It handles nullification of delay
6827 slots, varying length branches, negated branches and all combinations
6828 of the above. it returns the appropriate output template to emit the
6829 branch. */
6830
6831 const char *
6832 pa_output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn,
6833 int which)
6834 {
6835 static char buf[100];
6836 bool useskip;
6837 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6838 int length = get_attr_length (insn);
6839 int xdelay;
6840
6841 /* A conditional branch to the following instruction (e.g. the delay slot) is
6842 asking for a disaster. I do not think this can happen as this pattern
6843 is only used when optimizing; jump optimization should eliminate the
6844 jump. But be prepared just in case. */
6845
6846 if (branch_to_delay_slot_p (insn))
6847 return "nop";
6848
6849 /* If this is a long branch with its delay slot unfilled, set `nullify'
6850 as it can nullify the delay slot and save a nop. */
6851 if (length == 8 && dbr_sequence_length () == 0)
6852 nullify = 1;
6853
6854 /* If this is a short forward conditional branch which did not get
6855 its delay slot filled, the delay slot can still be nullified. */
6856 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6857 nullify = forward_branch_p (insn);
6858
6859 /* A forward branch over a single nullified insn can be done with a
6860 extrs instruction. This avoids a single cycle penalty due to
6861 mis-predicted branch if we fall through (branch not taken). */
6862 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6863
6864 switch (length)
6865 {
6866
6867 /* All short conditional branches except backwards with an unfilled
6868 delay slot. */
6869 case 4:
6870 if (useskip)
6871 strcpy (buf, "{vextrs,|extrw,s,}");
6872 else
6873 strcpy (buf, "{bvb,|bb,}");
6874 if (useskip && GET_MODE (operands[0]) == DImode)
6875 strcpy (buf, "extrd,s,*");
6876 else if (GET_MODE (operands[0]) == DImode)
6877 strcpy (buf, "bb,*");
6878 if ((which == 0 && negated)
6879 || (which == 1 && ! negated))
6880 strcat (buf, ">=");
6881 else
6882 strcat (buf, "<");
6883 if (useskip)
6884 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
6885 else if (nullify && negated)
6886 {
6887 if (branch_needs_nop_p (insn))
6888 strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
6889 else
6890 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
6891 }
6892 else if (nullify && ! negated)
6893 {
6894 if (branch_needs_nop_p (insn))
6895 strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
6896 else
6897 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
6898 }
6899 else if (! nullify && negated)
6900 strcat (buf, "{ %0,%3| %0,%%sar,%3}");
6901 else if (! nullify && ! negated)
6902 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
6903 break;
6904
6905 /* All long conditionals. Note a short backward branch with an
6906 unfilled delay slot is treated just like a long backward branch
6907 with an unfilled delay slot. */
6908 case 8:
6909 /* Handle weird backwards branch with a filled delay slot
6910 which is nullified. */
6911 if (dbr_sequence_length () != 0
6912 && ! forward_branch_p (insn)
6913 && nullify)
6914 {
6915 strcpy (buf, "{bvb,|bb,}");
6916 if (GET_MODE (operands[0]) == DImode)
6917 strcat (buf, "*");
6918 if ((which == 0 && negated)
6919 || (which == 1 && ! negated))
6920 strcat (buf, "<");
6921 else
6922 strcat (buf, ">=");
6923 if (negated)
6924 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
6925 else
6926 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
6927 }
6928 /* Handle short backwards branch with an unfilled delay slot.
6929 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6930 taken and untaken branches. */
6931 else if (dbr_sequence_length () == 0
6932 && ! forward_branch_p (insn)
6933 && INSN_ADDRESSES_SET_P ()
6934 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6935 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6936 {
6937 strcpy (buf, "{bvb,|bb,}");
6938 if (GET_MODE (operands[0]) == DImode)
6939 strcat (buf, "*");
6940 if ((which == 0 && negated)
6941 || (which == 1 && ! negated))
6942 strcat (buf, ">=");
6943 else
6944 strcat (buf, "<");
6945 if (negated)
6946 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
6947 else
6948 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
6949 }
6950 else
6951 {
6952 strcpy (buf, "{vextrs,|extrw,s,}");
6953 if (GET_MODE (operands[0]) == DImode)
6954 strcpy (buf, "extrd,s,*");
6955 if ((which == 0 && negated)
6956 || (which == 1 && ! negated))
6957 strcat (buf, "<");
6958 else
6959 strcat (buf, ">=");
6960 if (nullify && negated)
6961 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
6962 else if (nullify && ! negated)
6963 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
6964 else if (negated)
6965 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
6966 else
6967 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
6968 }
6969 break;
6970
6971 default:
6972 /* The reversed conditional branch must branch over one additional
6973 instruction if the delay slot is filled and needs to be extracted
6974 by pa_output_lbranch. If the delay slot is empty or this is a
6975 nullified forward branch, the instruction after the reversed
6976 condition branch must be nullified. */
6977 if (dbr_sequence_length () == 0
6978 || (nullify && forward_branch_p (insn)))
6979 {
6980 nullify = 1;
6981 xdelay = 0;
6982 operands[4] = GEN_INT (length);
6983 }
6984 else
6985 {
6986 xdelay = 1;
6987 operands[4] = GEN_INT (length + 4);
6988 }
6989
6990 if (GET_MODE (operands[0]) == DImode)
6991 strcpy (buf, "bb,*");
6992 else
6993 strcpy (buf, "{bvb,|bb,}");
6994 if ((which == 0 && negated)
6995 || (which == 1 && !negated))
6996 strcat (buf, "<");
6997 else
6998 strcat (buf, ">=");
6999 if (nullify)
7000 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
7001 else
7002 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
7003 output_asm_insn (buf, operands);
7004 return pa_output_lbranch (negated ? operands[3] : operands[2],
7005 insn, xdelay);
7006 }
7007 return buf;
7008 }
7009
7010 /* Return the output template for emitting a dbra type insn.
7011
7012 Note it may perform some output operations on its own before
7013 returning the final output string. */
7014 const char *
7015 pa_output_dbra (rtx *operands, rtx insn, int which_alternative)
7016 {
7017 int length = get_attr_length (insn);
7018
7019 /* A conditional branch to the following instruction (e.g. the delay slot) is
7020 asking for a disaster. Be prepared! */
7021
7022 if (branch_to_delay_slot_p (insn))
7023 {
7024 if (which_alternative == 0)
7025 return "ldo %1(%0),%0";
7026 else if (which_alternative == 1)
7027 {
7028 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
7029 output_asm_insn ("ldw -16(%%r30),%4", operands);
7030 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7031 return "{fldws|fldw} -16(%%r30),%0";
7032 }
7033 else
7034 {
7035 output_asm_insn ("ldw %0,%4", operands);
7036 return "ldo %1(%4),%4\n\tstw %4,%0";
7037 }
7038 }
7039
7040 if (which_alternative == 0)
7041 {
7042 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7043 int xdelay;
7044
7045 /* If this is a long branch with its delay slot unfilled, set `nullify'
7046 as it can nullify the delay slot and save a nop. */
7047 if (length == 8 && dbr_sequence_length () == 0)
7048 nullify = 1;
7049
7050 /* If this is a short forward conditional branch which did not get
7051 its delay slot filled, the delay slot can still be nullified. */
7052 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7053 nullify = forward_branch_p (insn);
7054
7055 switch (length)
7056 {
7057 case 4:
7058 if (nullify)
7059 {
7060 if (branch_needs_nop_p (insn))
7061 return "addib,%C2,n %1,%0,%3%#";
7062 else
7063 return "addib,%C2,n %1,%0,%3";
7064 }
7065 else
7066 return "addib,%C2 %1,%0,%3";
7067
7068 case 8:
7069 /* Handle weird backwards branch with a fulled delay slot
7070 which is nullified. */
7071 if (dbr_sequence_length () != 0
7072 && ! forward_branch_p (insn)
7073 && nullify)
7074 return "addib,%N2,n %1,%0,.+12\n\tb %3";
7075 /* Handle short backwards branch with an unfilled delay slot.
7076 Using a addb;nop rather than addi;bl saves 1 cycle for both
7077 taken and untaken branches. */
7078 else if (dbr_sequence_length () == 0
7079 && ! forward_branch_p (insn)
7080 && INSN_ADDRESSES_SET_P ()
7081 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7082 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7083 return "addib,%C2 %1,%0,%3%#";
7084
7085 /* Handle normal cases. */
7086 if (nullify)
7087 return "addi,%N2 %1,%0,%0\n\tb,n %3";
7088 else
7089 return "addi,%N2 %1,%0,%0\n\tb %3";
7090
7091 default:
7092 /* The reversed conditional branch must branch over one additional
7093 instruction if the delay slot is filled and needs to be extracted
7094 by pa_output_lbranch. If the delay slot is empty or this is a
7095 nullified forward branch, the instruction after the reversed
7096 condition branch must be nullified. */
7097 if (dbr_sequence_length () == 0
7098 || (nullify && forward_branch_p (insn)))
7099 {
7100 nullify = 1;
7101 xdelay = 0;
7102 operands[4] = GEN_INT (length);
7103 }
7104 else
7105 {
7106 xdelay = 1;
7107 operands[4] = GEN_INT (length + 4);
7108 }
7109
7110 if (nullify)
7111 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
7112 else
7113 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
7114
7115 return pa_output_lbranch (operands[3], insn, xdelay);
7116 }
7117
7118 }
7119 /* Deal with gross reload from FP register case. */
7120 else if (which_alternative == 1)
7121 {
7122 /* Move loop counter from FP register to MEM then into a GR,
7123 increment the GR, store the GR into MEM, and finally reload
7124 the FP register from MEM from within the branch's delay slot. */
7125 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7126 operands);
7127 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7128 if (length == 24)
7129 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7130 else if (length == 28)
7131 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7132 else
7133 {
7134 operands[5] = GEN_INT (length - 16);
7135 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
7136 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7137 return pa_output_lbranch (operands[3], insn, 0);
7138 }
7139 }
7140 /* Deal with gross reload from memory case. */
7141 else
7142 {
7143 /* Reload loop counter from memory, the store back to memory
7144 happens in the branch's delay slot. */
7145 output_asm_insn ("ldw %0,%4", operands);
7146 if (length == 12)
7147 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7148 else if (length == 16)
7149 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7150 else
7151 {
7152 operands[5] = GEN_INT (length - 4);
7153 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
7154 return pa_output_lbranch (operands[3], insn, 0);
7155 }
7156 }
7157 }
7158
7159 /* Return the output template for emitting a movb type insn.
7160
7161 Note it may perform some output operations on its own before
7162 returning the final output string. */
7163 const char *
7164 pa_output_movb (rtx *operands, rtx insn, int which_alternative,
7165 int reverse_comparison)
7166 {
7167 int length = get_attr_length (insn);
7168
7169 /* A conditional branch to the following instruction (e.g. the delay slot) is
7170 asking for a disaster. Be prepared! */
7171
7172 if (branch_to_delay_slot_p (insn))
7173 {
7174 if (which_alternative == 0)
7175 return "copy %1,%0";
7176 else if (which_alternative == 1)
7177 {
7178 output_asm_insn ("stw %1,-16(%%r30)", operands);
7179 return "{fldws|fldw} -16(%%r30),%0";
7180 }
7181 else if (which_alternative == 2)
7182 return "stw %1,%0";
7183 else
7184 return "mtsar %r1";
7185 }
7186
7187 /* Support the second variant. */
7188 if (reverse_comparison)
7189 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7190
7191 if (which_alternative == 0)
7192 {
7193 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7194 int xdelay;
7195
7196 /* If this is a long branch with its delay slot unfilled, set `nullify'
7197 as it can nullify the delay slot and save a nop. */
7198 if (length == 8 && dbr_sequence_length () == 0)
7199 nullify = 1;
7200
7201 /* If this is a short forward conditional branch which did not get
7202 its delay slot filled, the delay slot can still be nullified. */
7203 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7204 nullify = forward_branch_p (insn);
7205
7206 switch (length)
7207 {
7208 case 4:
7209 if (nullify)
7210 {
7211 if (branch_needs_nop_p (insn))
7212 return "movb,%C2,n %1,%0,%3%#";
7213 else
7214 return "movb,%C2,n %1,%0,%3";
7215 }
7216 else
7217 return "movb,%C2 %1,%0,%3";
7218
7219 case 8:
7220 /* Handle weird backwards branch with a filled delay slot
7221 which is nullified. */
7222 if (dbr_sequence_length () != 0
7223 && ! forward_branch_p (insn)
7224 && nullify)
7225 return "movb,%N2,n %1,%0,.+12\n\tb %3";
7226
7227 /* Handle short backwards branch with an unfilled delay slot.
7228 Using a movb;nop rather than or;bl saves 1 cycle for both
7229 taken and untaken branches. */
7230 else if (dbr_sequence_length () == 0
7231 && ! forward_branch_p (insn)
7232 && INSN_ADDRESSES_SET_P ()
7233 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7234 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7235 return "movb,%C2 %1,%0,%3%#";
7236 /* Handle normal cases. */
7237 if (nullify)
7238 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7239 else
7240 return "or,%N2 %1,%%r0,%0\n\tb %3";
7241
7242 default:
7243 /* The reversed conditional branch must branch over one additional
7244 instruction if the delay slot is filled and needs to be extracted
7245 by pa_output_lbranch. If the delay slot is empty or this is a
7246 nullified forward branch, the instruction after the reversed
7247 condition branch must be nullified. */
7248 if (dbr_sequence_length () == 0
7249 || (nullify && forward_branch_p (insn)))
7250 {
7251 nullify = 1;
7252 xdelay = 0;
7253 operands[4] = GEN_INT (length);
7254 }
7255 else
7256 {
7257 xdelay = 1;
7258 operands[4] = GEN_INT (length + 4);
7259 }
7260
7261 if (nullify)
7262 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7263 else
7264 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7265
7266 return pa_output_lbranch (operands[3], insn, xdelay);
7267 }
7268 }
7269 /* Deal with gross reload for FP destination register case. */
7270 else if (which_alternative == 1)
7271 {
7272 /* Move source register to MEM, perform the branch test, then
7273 finally load the FP register from MEM from within the branch's
7274 delay slot. */
7275 output_asm_insn ("stw %1,-16(%%r30)", operands);
7276 if (length == 12)
7277 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7278 else if (length == 16)
7279 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7280 else
7281 {
7282 operands[4] = GEN_INT (length - 4);
7283 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7284 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7285 return pa_output_lbranch (operands[3], insn, 0);
7286 }
7287 }
7288 /* Deal with gross reload from memory case. */
7289 else if (which_alternative == 2)
7290 {
7291 /* Reload loop counter from memory, the store back to memory
7292 happens in the branch's delay slot. */
7293 if (length == 8)
7294 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7295 else if (length == 12)
7296 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7297 else
7298 {
7299 operands[4] = GEN_INT (length);
7300 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7301 operands);
7302 return pa_output_lbranch (operands[3], insn, 0);
7303 }
7304 }
7305 /* Handle SAR as a destination. */
7306 else
7307 {
7308 if (length == 8)
7309 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7310 else if (length == 12)
7311 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7312 else
7313 {
7314 operands[4] = GEN_INT (length);
7315 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7316 operands);
7317 return pa_output_lbranch (operands[3], insn, 0);
7318 }
7319 }
7320 }
7321
7322 /* Copy any FP arguments in INSN into integer registers. */
7323 static void
7324 copy_fp_args (rtx insn)
7325 {
7326 rtx link;
7327 rtx xoperands[2];
7328
7329 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7330 {
7331 int arg_mode, regno;
7332 rtx use = XEXP (link, 0);
7333
7334 if (! (GET_CODE (use) == USE
7335 && GET_CODE (XEXP (use, 0)) == REG
7336 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7337 continue;
7338
7339 arg_mode = GET_MODE (XEXP (use, 0));
7340 regno = REGNO (XEXP (use, 0));
7341
7342 /* Is it a floating point register? */
7343 if (regno >= 32 && regno <= 39)
7344 {
7345 /* Copy the FP register into an integer register via memory. */
7346 if (arg_mode == SFmode)
7347 {
7348 xoperands[0] = XEXP (use, 0);
7349 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7350 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7351 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7352 }
7353 else
7354 {
7355 xoperands[0] = XEXP (use, 0);
7356 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7357 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7358 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7359 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7360 }
7361 }
7362 }
7363 }
7364
7365 /* Compute length of the FP argument copy sequence for INSN. */
7366 static int
7367 length_fp_args (rtx insn)
7368 {
7369 int length = 0;
7370 rtx link;
7371
7372 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7373 {
7374 int arg_mode, regno;
7375 rtx use = XEXP (link, 0);
7376
7377 if (! (GET_CODE (use) == USE
7378 && GET_CODE (XEXP (use, 0)) == REG
7379 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7380 continue;
7381
7382 arg_mode = GET_MODE (XEXP (use, 0));
7383 regno = REGNO (XEXP (use, 0));
7384
7385 /* Is it a floating point register? */
7386 if (regno >= 32 && regno <= 39)
7387 {
7388 if (arg_mode == SFmode)
7389 length += 8;
7390 else
7391 length += 12;
7392 }
7393 }
7394
7395 return length;
7396 }
7397
7398 /* Return the attribute length for the millicode call instruction INSN.
7399 The length must match the code generated by pa_output_millicode_call.
7400 We include the delay slot in the returned length as it is better to
7401 over estimate the length than to under estimate it. */
7402
7403 int
7404 pa_attr_length_millicode_call (rtx insn)
7405 {
7406 unsigned long distance = -1;
7407 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7408
7409 if (INSN_ADDRESSES_SET_P ())
7410 {
7411 distance = (total + insn_current_reference_address (insn));
7412 if (distance < total)
7413 distance = -1;
7414 }
7415
7416 if (TARGET_64BIT)
7417 {
7418 if (!TARGET_LONG_CALLS && distance < 7600000)
7419 return 8;
7420
7421 return 20;
7422 }
7423 else if (TARGET_PORTABLE_RUNTIME)
7424 return 24;
7425 else
7426 {
7427 if (!TARGET_LONG_CALLS && distance < 240000)
7428 return 8;
7429
7430 if (TARGET_LONG_ABS_CALL && !flag_pic)
7431 return 12;
7432
7433 return 24;
7434 }
7435 }
7436
7437 /* INSN is a function call. It may have an unconditional jump
7438 in its delay slot.
7439
7440 CALL_DEST is the routine we are calling. */
7441
7442 const char *
7443 pa_output_millicode_call (rtx insn, rtx call_dest)
7444 {
7445 int attr_length = get_attr_length (insn);
7446 int seq_length = dbr_sequence_length ();
7447 int distance;
7448 rtx seq_insn;
7449 rtx xoperands[3];
7450
7451 xoperands[0] = call_dest;
7452 xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7453
7454 /* Handle the common case where we are sure that the branch will
7455 reach the beginning of the $CODE$ subspace. The within reach
7456 form of the $$sh_func_adrs call has a length of 28. Because
7457 it has an attribute type of multi, it never has a nonzero
7458 sequence length. The length of the $$sh_func_adrs is the same
7459 as certain out of reach PIC calls to other routines. */
7460 if (!TARGET_LONG_CALLS
7461 && ((seq_length == 0
7462 && (attr_length == 12
7463 || (attr_length == 28 && get_attr_type (insn) == TYPE_MULTI)))
7464 || (seq_length != 0 && attr_length == 8)))
7465 {
7466 output_asm_insn ("{bl|b,l} %0,%2", xoperands);
7467 }
7468 else
7469 {
7470 if (TARGET_64BIT)
7471 {
7472 /* It might seem that one insn could be saved by accessing
7473 the millicode function using the linkage table. However,
7474 this doesn't work in shared libraries and other dynamically
7475 loaded objects. Using a pc-relative sequence also avoids
7476 problems related to the implicit use of the gp register. */
7477 output_asm_insn ("b,l .+8,%%r1", xoperands);
7478
7479 if (TARGET_GAS)
7480 {
7481 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7482 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7483 }
7484 else
7485 {
7486 xoperands[1] = gen_label_rtx ();
7487 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7488 targetm.asm_out.internal_label (asm_out_file, "L",
7489 CODE_LABEL_NUMBER (xoperands[1]));
7490 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7491 }
7492
7493 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7494 }
7495 else if (TARGET_PORTABLE_RUNTIME)
7496 {
7497 /* Pure portable runtime doesn't allow be/ble; we also don't
7498 have PIC support in the assembler/linker, so this sequence
7499 is needed. */
7500
7501 /* Get the address of our target into %r1. */
7502 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7503 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7504
7505 /* Get our return address into %r31. */
7506 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7507 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7508
7509 /* Jump to our target address in %r1. */
7510 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7511 }
7512 else if (!flag_pic)
7513 {
7514 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7515 if (TARGET_PA_20)
7516 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7517 else
7518 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7519 }
7520 else
7521 {
7522 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7523 output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
7524
7525 if (TARGET_SOM || !TARGET_GAS)
7526 {
7527 /* The HP assembler can generate relocations for the
7528 difference of two symbols. GAS can do this for a
7529 millicode symbol but not an arbitrary external
7530 symbol when generating SOM output. */
7531 xoperands[1] = gen_label_rtx ();
7532 targetm.asm_out.internal_label (asm_out_file, "L",
7533 CODE_LABEL_NUMBER (xoperands[1]));
7534 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7535 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7536 }
7537 else
7538 {
7539 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
7540 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
7541 xoperands);
7542 }
7543
7544 /* Jump to our target address in %r1. */
7545 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7546 }
7547 }
7548
7549 if (seq_length == 0)
7550 output_asm_insn ("nop", xoperands);
7551
7552 /* We are done if there isn't a jump in the delay slot. */
7553 if (seq_length == 0 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7554 return "";
7555
7556 /* This call has an unconditional jump in its delay slot. */
7557 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7558
7559 /* See if the return address can be adjusted. Use the containing
7560 sequence insn's address. */
7561 if (INSN_ADDRESSES_SET_P ())
7562 {
7563 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7564 distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7565 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7566
7567 if (VAL_14_BITS_P (distance))
7568 {
7569 xoperands[1] = gen_label_rtx ();
7570 output_asm_insn ("ldo %0-%1(%2),%2", xoperands);
7571 targetm.asm_out.internal_label (asm_out_file, "L",
7572 CODE_LABEL_NUMBER (xoperands[1]));
7573 }
7574 else
7575 /* ??? This branch may not reach its target. */
7576 output_asm_insn ("nop\n\tb,n %0", xoperands);
7577 }
7578 else
7579 /* ??? This branch may not reach its target. */
7580 output_asm_insn ("nop\n\tb,n %0", xoperands);
7581
7582 /* Delete the jump. */
7583 SET_INSN_DELETED (NEXT_INSN (insn));
7584
7585 return "";
7586 }
7587
7588 /* Return the attribute length of the call instruction INSN. The SIBCALL
7589 flag indicates whether INSN is a regular call or a sibling call. The
7590 length returned must be longer than the code actually generated by
7591 pa_output_call. Since branch shortening is done before delay branch
7592 sequencing, there is no way to determine whether or not the delay
7593 slot will be filled during branch shortening. Even when the delay
7594 slot is filled, we may have to add a nop if the delay slot contains
7595 a branch that can't reach its target. Thus, we always have to include
7596 the delay slot in the length estimate. This used to be done in
7597 pa_adjust_insn_length but we do it here now as some sequences always
7598 fill the delay slot and we can save four bytes in the estimate for
7599 these sequences. */
7600
7601 int
7602 pa_attr_length_call (rtx insn, int sibcall)
7603 {
7604 int local_call;
7605 rtx call, call_dest;
7606 tree call_decl;
7607 int length = 0;
7608 rtx pat = PATTERN (insn);
7609 unsigned long distance = -1;
7610
7611 gcc_assert (GET_CODE (insn) == CALL_INSN);
7612
7613 if (INSN_ADDRESSES_SET_P ())
7614 {
7615 unsigned long total;
7616
7617 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7618 distance = (total + insn_current_reference_address (insn));
7619 if (distance < total)
7620 distance = -1;
7621 }
7622
7623 gcc_assert (GET_CODE (pat) == PARALLEL);
7624
7625 /* Get the call rtx. */
7626 call = XVECEXP (pat, 0, 0);
7627 if (GET_CODE (call) == SET)
7628 call = SET_SRC (call);
7629
7630 gcc_assert (GET_CODE (call) == CALL);
7631
7632 /* Determine if this is a local call. */
7633 call_dest = XEXP (XEXP (call, 0), 0);
7634 call_decl = SYMBOL_REF_DECL (call_dest);
7635 local_call = call_decl && targetm.binds_local_p (call_decl);
7636
7637 /* pc-relative branch. */
7638 if (!TARGET_LONG_CALLS
7639 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7640 || distance < 240000))
7641 length += 8;
7642
7643 /* 64-bit plabel sequence. */
7644 else if (TARGET_64BIT && !local_call)
7645 length += sibcall ? 28 : 24;
7646
7647 /* non-pic long absolute branch sequence. */
7648 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7649 length += 12;
7650
7651 /* long pc-relative branch sequence. */
7652 else if (TARGET_LONG_PIC_SDIFF_CALL
7653 || (TARGET_GAS && !TARGET_SOM
7654 && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
7655 {
7656 length += 20;
7657
7658 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7659 length += 8;
7660 }
7661
7662 /* 32-bit plabel sequence. */
7663 else
7664 {
7665 length += 32;
7666
7667 if (TARGET_SOM)
7668 length += length_fp_args (insn);
7669
7670 if (flag_pic)
7671 length += 4;
7672
7673 if (!TARGET_PA_20)
7674 {
7675 if (!sibcall)
7676 length += 8;
7677
7678 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7679 length += 8;
7680 }
7681 }
7682
7683 return length;
7684 }
7685
7686 /* INSN is a function call. It may have an unconditional jump
7687 in its delay slot.
7688
7689 CALL_DEST is the routine we are calling. */
7690
7691 const char *
7692 pa_output_call (rtx insn, rtx call_dest, int sibcall)
7693 {
7694 int delay_insn_deleted = 0;
7695 int delay_slot_filled = 0;
7696 int seq_length = dbr_sequence_length ();
7697 tree call_decl = SYMBOL_REF_DECL (call_dest);
7698 int local_call = call_decl && targetm.binds_local_p (call_decl);
7699 rtx xoperands[2];
7700
7701 xoperands[0] = call_dest;
7702
7703 /* Handle the common case where we're sure that the branch will reach
7704 the beginning of the "$CODE$" subspace. This is the beginning of
7705 the current function if we are in a named section. */
7706 if (!TARGET_LONG_CALLS && pa_attr_length_call (insn, sibcall) == 8)
7707 {
7708 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7709 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7710 }
7711 else
7712 {
7713 if (TARGET_64BIT && !local_call)
7714 {
7715 /* ??? As far as I can tell, the HP linker doesn't support the
7716 long pc-relative sequence described in the 64-bit runtime
7717 architecture. So, we use a slightly longer indirect call. */
7718 xoperands[0] = pa_get_deferred_plabel (call_dest);
7719 xoperands[1] = gen_label_rtx ();
7720
7721 /* If this isn't a sibcall, we put the load of %r27 into the
7722 delay slot. We can't do this in a sibcall as we don't
7723 have a second call-clobbered scratch register available. */
7724 if (seq_length != 0
7725 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7726 && !sibcall)
7727 {
7728 final_scan_insn (NEXT_INSN (insn), asm_out_file,
7729 optimize, 0, NULL);
7730
7731 /* Now delete the delay insn. */
7732 SET_INSN_DELETED (NEXT_INSN (insn));
7733 delay_insn_deleted = 1;
7734 }
7735
7736 output_asm_insn ("addil LT'%0,%%r27", xoperands);
7737 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7738 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7739
7740 if (sibcall)
7741 {
7742 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7743 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7744 output_asm_insn ("bve (%%r1)", xoperands);
7745 }
7746 else
7747 {
7748 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7749 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7750 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7751 delay_slot_filled = 1;
7752 }
7753 }
7754 else
7755 {
7756 int indirect_call = 0;
7757
7758 /* Emit a long call. There are several different sequences
7759 of increasing length and complexity. In most cases,
7760 they don't allow an instruction in the delay slot. */
7761 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7762 && !TARGET_LONG_PIC_SDIFF_CALL
7763 && !(TARGET_GAS && !TARGET_SOM
7764 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7765 && !TARGET_64BIT)
7766 indirect_call = 1;
7767
7768 if (seq_length != 0
7769 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7770 && !sibcall
7771 && (!TARGET_PA_20
7772 || indirect_call
7773 || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
7774 {
7775 /* A non-jump insn in the delay slot. By definition we can
7776 emit this insn before the call (and in fact before argument
7777 relocating. */
7778 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
7779 NULL);
7780
7781 /* Now delete the delay insn. */
7782 SET_INSN_DELETED (NEXT_INSN (insn));
7783 delay_insn_deleted = 1;
7784 }
7785
7786 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7787 {
7788 /* This is the best sequence for making long calls in
7789 non-pic code. Unfortunately, GNU ld doesn't provide
7790 the stub needed for external calls, and GAS's support
7791 for this with the SOM linker is buggy. It is safe
7792 to use this for local calls. */
7793 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7794 if (sibcall)
7795 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7796 else
7797 {
7798 if (TARGET_PA_20)
7799 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7800 xoperands);
7801 else
7802 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7803
7804 output_asm_insn ("copy %%r31,%%r2", xoperands);
7805 delay_slot_filled = 1;
7806 }
7807 }
7808 else
7809 {
7810 if (TARGET_LONG_PIC_SDIFF_CALL)
7811 {
7812 /* The HP assembler and linker can handle relocations
7813 for the difference of two symbols. The HP assembler
7814 recognizes the sequence as a pc-relative call and
7815 the linker provides stubs when needed. */
7816 xoperands[1] = gen_label_rtx ();
7817 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7818 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7819 targetm.asm_out.internal_label (asm_out_file, "L",
7820 CODE_LABEL_NUMBER (xoperands[1]));
7821 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7822 }
7823 else if (TARGET_GAS && !TARGET_SOM
7824 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7825 {
7826 /* GAS currently can't generate the relocations that
7827 are needed for the SOM linker under HP-UX using this
7828 sequence. The GNU linker doesn't generate the stubs
7829 that are needed for external calls on TARGET_ELF32
7830 with this sequence. For now, we have to use a
7831 longer plabel sequence when using GAS. */
7832 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7833 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7834 xoperands);
7835 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7836 xoperands);
7837 }
7838 else
7839 {
7840 /* Emit a long plabel-based call sequence. This is
7841 essentially an inline implementation of $$dyncall.
7842 We don't actually try to call $$dyncall as this is
7843 as difficult as calling the function itself. */
7844 xoperands[0] = pa_get_deferred_plabel (call_dest);
7845 xoperands[1] = gen_label_rtx ();
7846
7847 /* Since the call is indirect, FP arguments in registers
7848 need to be copied to the general registers. Then, the
7849 argument relocation stub will copy them back. */
7850 if (TARGET_SOM)
7851 copy_fp_args (insn);
7852
7853 if (flag_pic)
7854 {
7855 output_asm_insn ("addil LT'%0,%%r19", xoperands);
7856 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
7857 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
7858 }
7859 else
7860 {
7861 output_asm_insn ("addil LR'%0-$global$,%%r27",
7862 xoperands);
7863 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7864 xoperands);
7865 }
7866
7867 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
7868 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
7869 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
7870 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
7871
7872 if (!sibcall && !TARGET_PA_20)
7873 {
7874 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
7875 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
7876 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
7877 else
7878 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
7879 }
7880 }
7881
7882 if (TARGET_PA_20)
7883 {
7884 if (sibcall)
7885 output_asm_insn ("bve (%%r1)", xoperands);
7886 else
7887 {
7888 if (indirect_call)
7889 {
7890 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7891 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
7892 delay_slot_filled = 1;
7893 }
7894 else
7895 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7896 }
7897 }
7898 else
7899 {
7900 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7901 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
7902 xoperands);
7903
7904 if (sibcall)
7905 {
7906 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
7907 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
7908 else
7909 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
7910 }
7911 else
7912 {
7913 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
7914 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
7915 else
7916 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
7917
7918 if (indirect_call)
7919 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
7920 else
7921 output_asm_insn ("copy %%r31,%%r2", xoperands);
7922 delay_slot_filled = 1;
7923 }
7924 }
7925 }
7926 }
7927 }
7928
7929 if (!delay_slot_filled && (seq_length == 0 || delay_insn_deleted))
7930 output_asm_insn ("nop", xoperands);
7931
7932 /* We are done if there isn't a jump in the delay slot. */
7933 if (seq_length == 0
7934 || delay_insn_deleted
7935 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7936 return "";
7937
7938 /* A sibcall should never have a branch in the delay slot. */
7939 gcc_assert (!sibcall);
7940
7941 /* This call has an unconditional jump in its delay slot. */
7942 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7943
7944 if (!delay_slot_filled && INSN_ADDRESSES_SET_P ())
7945 {
7946 /* See if the return address can be adjusted. Use the containing
7947 sequence insn's address. This would break the regular call/return@
7948 relationship assumed by the table based eh unwinder, so only do that
7949 if the call is not possibly throwing. */
7950 rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7951 int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7952 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7953
7954 if (VAL_14_BITS_P (distance)
7955 && !(can_throw_internal (insn) || can_throw_external (insn)))
7956 {
7957 xoperands[1] = gen_label_rtx ();
7958 output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands);
7959 targetm.asm_out.internal_label (asm_out_file, "L",
7960 CODE_LABEL_NUMBER (xoperands[1]));
7961 }
7962 else
7963 output_asm_insn ("nop\n\tb,n %0", xoperands);
7964 }
7965 else
7966 output_asm_insn ("b,n %0", xoperands);
7967
7968 /* Delete the jump. */
7969 SET_INSN_DELETED (NEXT_INSN (insn));
7970
7971 return "";
7972 }
7973
7974 /* Return the attribute length of the indirect call instruction INSN.
7975 The length must match the code generated by output_indirect call.
7976 The returned length includes the delay slot. Currently, the delay
7977 slot of an indirect call sequence is not exposed and it is used by
7978 the sequence itself. */
7979
7980 int
7981 pa_attr_length_indirect_call (rtx insn)
7982 {
7983 unsigned long distance = -1;
7984 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7985
7986 if (INSN_ADDRESSES_SET_P ())
7987 {
7988 distance = (total + insn_current_reference_address (insn));
7989 if (distance < total)
7990 distance = -1;
7991 }
7992
7993 if (TARGET_64BIT)
7994 return 12;
7995
7996 if (TARGET_FAST_INDIRECT_CALLS
7997 || (!TARGET_PORTABLE_RUNTIME
7998 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
7999 || distance < 240000)))
8000 return 8;
8001
8002 if (flag_pic)
8003 return 24;
8004
8005 if (TARGET_PORTABLE_RUNTIME)
8006 return 20;
8007
8008 /* Out of reach, can use ble. */
8009 return 12;
8010 }
8011
8012 const char *
8013 pa_output_indirect_call (rtx insn, rtx call_dest)
8014 {
8015 rtx xoperands[1];
8016
8017 if (TARGET_64BIT)
8018 {
8019 xoperands[0] = call_dest;
8020 output_asm_insn ("ldd 16(%0),%%r2", xoperands);
8021 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
8022 return "";
8023 }
8024
8025 /* First the special case for kernels, level 0 systems, etc. */
8026 if (TARGET_FAST_INDIRECT_CALLS)
8027 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8028
8029 /* Now the normal case -- we can reach $$dyncall directly or
8030 we're sure that we can get there via a long-branch stub.
8031
8032 No need to check target flags as the length uniquely identifies
8033 the remaining cases. */
8034 if (pa_attr_length_indirect_call (insn) == 8)
8035 {
8036 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8037 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
8038 variant of the B,L instruction can't be used on the SOM target. */
8039 if (TARGET_PA_20 && !TARGET_SOM)
8040 return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
8041 else
8042 return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8043 }
8044
8045 /* Long millicode call, but we are not generating PIC or portable runtime
8046 code. */
8047 if (pa_attr_length_indirect_call (insn) == 12)
8048 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8049
8050 /* Long millicode call for portable runtime. */
8051 if (pa_attr_length_indirect_call (insn) == 20)
8052 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop";
8053
8054 /* We need a long PIC call to $$dyncall. */
8055 xoperands[0] = NULL_RTX;
8056 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
8057 if (TARGET_SOM || !TARGET_GAS)
8058 {
8059 xoperands[0] = gen_label_rtx ();
8060 output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands);
8061 targetm.asm_out.internal_label (asm_out_file, "L",
8062 CODE_LABEL_NUMBER (xoperands[0]));
8063 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
8064 }
8065 else
8066 {
8067 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands);
8068 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
8069 xoperands);
8070 }
8071 output_asm_insn ("blr %%r0,%%r2", xoperands);
8072 output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands);
8073 return "";
8074 }
8075
8076 /* In HPUX 8.0's shared library scheme, special relocations are needed
8077 for function labels if they might be passed to a function
8078 in a shared library (because shared libraries don't live in code
8079 space), and special magic is needed to construct their address. */
8080
8081 void
8082 pa_encode_label (rtx sym)
8083 {
8084 const char *str = XSTR (sym, 0);
8085 int len = strlen (str) + 1;
8086 char *newstr, *p;
8087
8088 p = newstr = XALLOCAVEC (char, len + 1);
8089 *p++ = '@';
8090 strcpy (p, str);
8091
8092 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
8093 }
8094
8095 static void
8096 pa_encode_section_info (tree decl, rtx rtl, int first)
8097 {
8098 int old_referenced = 0;
8099
8100 if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
8101 old_referenced
8102 = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
8103
8104 default_encode_section_info (decl, rtl, first);
8105
8106 if (first && TEXT_SPACE_P (decl))
8107 {
8108 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
8109 if (TREE_CODE (decl) == FUNCTION_DECL)
8110 pa_encode_label (XEXP (rtl, 0));
8111 }
8112 else if (old_referenced)
8113 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
8114 }
8115
8116 /* This is sort of inverse to pa_encode_section_info. */
8117
8118 static const char *
8119 pa_strip_name_encoding (const char *str)
8120 {
8121 str += (*str == '@');
8122 str += (*str == '*');
8123 return str;
8124 }
8125
8126 /* Returns 1 if OP is a function label involved in a simple addition
8127 with a constant. Used to keep certain patterns from matching
8128 during instruction combination. */
8129 int
8130 pa_is_function_label_plus_const (rtx op)
8131 {
8132 /* Strip off any CONST. */
8133 if (GET_CODE (op) == CONST)
8134 op = XEXP (op, 0);
8135
8136 return (GET_CODE (op) == PLUS
8137 && function_label_operand (XEXP (op, 0), VOIDmode)
8138 && GET_CODE (XEXP (op, 1)) == CONST_INT);
8139 }
8140
8141 /* Output assembly code for a thunk to FUNCTION. */
8142
8143 static void
8144 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8145 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
8146 tree function)
8147 {
8148 static unsigned int current_thunk_number;
8149 int val_14 = VAL_14_BITS_P (delta);
8150 unsigned int old_last_address = last_address, nbytes = 0;
8151 char label[16];
8152 rtx xoperands[4];
8153
8154 xoperands[0] = XEXP (DECL_RTL (function), 0);
8155 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8156 xoperands[2] = GEN_INT (delta);
8157
8158 ASM_OUTPUT_LABEL (file, XSTR (xoperands[1], 0));
8159 fprintf (file, "\t.PROC\n\t.CALLINFO FRAME=0,NO_CALLS\n\t.ENTRY\n");
8160
8161 /* Output the thunk. We know that the function is in the same
8162 translation unit (i.e., the same space) as the thunk, and that
8163 thunks are output after their method. Thus, we don't need an
8164 external branch to reach the function. With SOM and GAS,
8165 functions and thunks are effectively in different sections.
8166 Thus, we can always use a IA-relative branch and the linker
8167 will add a long branch stub if necessary.
8168
8169 However, we have to be careful when generating PIC code on the
8170 SOM port to ensure that the sequence does not transfer to an
8171 import stub for the target function as this could clobber the
8172 return value saved at SP-24. This would also apply to the
8173 32-bit linux port if the multi-space model is implemented. */
8174 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8175 && !(flag_pic && TREE_PUBLIC (function))
8176 && (TARGET_GAS || last_address < 262132))
8177 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8178 && ((targetm_common.have_named_sections
8179 && DECL_SECTION_NAME (thunk_fndecl) != NULL
8180 /* The GNU 64-bit linker has rather poor stub management.
8181 So, we use a long branch from thunks that aren't in
8182 the same section as the target function. */
8183 && ((!TARGET_64BIT
8184 && (DECL_SECTION_NAME (thunk_fndecl)
8185 != DECL_SECTION_NAME (function)))
8186 || ((DECL_SECTION_NAME (thunk_fndecl)
8187 == DECL_SECTION_NAME (function))
8188 && last_address < 262132)))
8189 || (targetm_common.have_named_sections
8190 && DECL_SECTION_NAME (thunk_fndecl) == NULL
8191 && DECL_SECTION_NAME (function) == NULL
8192 && last_address < 262132)
8193 || (!targetm_common.have_named_sections
8194 && last_address < 262132))))
8195 {
8196 if (!val_14)
8197 output_asm_insn ("addil L'%2,%%r26", xoperands);
8198
8199 output_asm_insn ("b %0", xoperands);
8200
8201 if (val_14)
8202 {
8203 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8204 nbytes += 8;
8205 }
8206 else
8207 {
8208 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8209 nbytes += 12;
8210 }
8211 }
8212 else if (TARGET_64BIT)
8213 {
8214 /* We only have one call-clobbered scratch register, so we can't
8215 make use of the delay slot if delta doesn't fit in 14 bits. */
8216 if (!val_14)
8217 {
8218 output_asm_insn ("addil L'%2,%%r26", xoperands);
8219 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8220 }
8221
8222 output_asm_insn ("b,l .+8,%%r1", xoperands);
8223
8224 if (TARGET_GAS)
8225 {
8226 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8227 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
8228 }
8229 else
8230 {
8231 xoperands[3] = GEN_INT (val_14 ? 8 : 16);
8232 output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands);
8233 }
8234
8235 if (val_14)
8236 {
8237 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8238 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8239 nbytes += 20;
8240 }
8241 else
8242 {
8243 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8244 nbytes += 24;
8245 }
8246 }
8247 else if (TARGET_PORTABLE_RUNTIME)
8248 {
8249 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8250 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8251
8252 if (!val_14)
8253 output_asm_insn ("addil L'%2,%%r26", xoperands);
8254
8255 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8256
8257 if (val_14)
8258 {
8259 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8260 nbytes += 16;
8261 }
8262 else
8263 {
8264 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8265 nbytes += 20;
8266 }
8267 }
8268 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8269 {
8270 /* The function is accessible from outside this module. The only
8271 way to avoid an import stub between the thunk and function is to
8272 call the function directly with an indirect sequence similar to
8273 that used by $$dyncall. This is possible because $$dyncall acts
8274 as the import stub in an indirect call. */
8275 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8276 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8277 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8278 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8279 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8280 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8281 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8282 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8283 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8284
8285 if (!val_14)
8286 {
8287 output_asm_insn ("addil L'%2,%%r26", xoperands);
8288 nbytes += 4;
8289 }
8290
8291 if (TARGET_PA_20)
8292 {
8293 output_asm_insn ("bve (%%r22)", xoperands);
8294 nbytes += 36;
8295 }
8296 else if (TARGET_NO_SPACE_REGS)
8297 {
8298 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8299 nbytes += 36;
8300 }
8301 else
8302 {
8303 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8304 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8305 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8306 nbytes += 44;
8307 }
8308
8309 if (val_14)
8310 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8311 else
8312 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8313 }
8314 else if (flag_pic)
8315 {
8316 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
8317
8318 if (TARGET_SOM || !TARGET_GAS)
8319 {
8320 output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands);
8321 output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands);
8322 }
8323 else
8324 {
8325 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8326 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands);
8327 }
8328
8329 if (!val_14)
8330 output_asm_insn ("addil L'%2,%%r26", xoperands);
8331
8332 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8333
8334 if (val_14)
8335 {
8336 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8337 nbytes += 20;
8338 }
8339 else
8340 {
8341 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8342 nbytes += 24;
8343 }
8344 }
8345 else
8346 {
8347 if (!val_14)
8348 output_asm_insn ("addil L'%2,%%r26", xoperands);
8349
8350 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8351 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8352
8353 if (val_14)
8354 {
8355 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8356 nbytes += 12;
8357 }
8358 else
8359 {
8360 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8361 nbytes += 16;
8362 }
8363 }
8364
8365 fprintf (file, "\t.EXIT\n\t.PROCEND\n");
8366
8367 if (TARGET_SOM && TARGET_GAS)
8368 {
8369 /* We done with this subspace except possibly for some additional
8370 debug information. Forget that we are in this subspace to ensure
8371 that the next function is output in its own subspace. */
8372 in_section = NULL;
8373 cfun->machine->in_nsubspa = 2;
8374 }
8375
8376 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8377 {
8378 switch_to_section (data_section);
8379 output_asm_insn (".align 4", xoperands);
8380 ASM_OUTPUT_LABEL (file, label);
8381 output_asm_insn (".word P'%0", xoperands);
8382 }
8383
8384 current_thunk_number++;
8385 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8386 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8387 last_address += nbytes;
8388 if (old_last_address > last_address)
8389 last_address = UINT_MAX;
8390 update_total_code_bytes (nbytes);
8391 }
8392
8393 /* Only direct calls to static functions are allowed to be sibling (tail)
8394 call optimized.
8395
8396 This restriction is necessary because some linker generated stubs will
8397 store return pointers into rp' in some cases which might clobber a
8398 live value already in rp'.
8399
8400 In a sibcall the current function and the target function share stack
8401 space. Thus if the path to the current function and the path to the
8402 target function save a value in rp', they save the value into the
8403 same stack slot, which has undesirable consequences.
8404
8405 Because of the deferred binding nature of shared libraries any function
8406 with external scope could be in a different load module and thus require
8407 rp' to be saved when calling that function. So sibcall optimizations
8408 can only be safe for static function.
8409
8410 Note that GCC never needs return value relocations, so we don't have to
8411 worry about static calls with return value relocations (which require
8412 saving rp').
8413
8414 It is safe to perform a sibcall optimization when the target function
8415 will never return. */
8416 static bool
8417 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8418 {
8419 if (TARGET_PORTABLE_RUNTIME)
8420 return false;
8421
8422 /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
8423 single subspace mode and the call is not indirect. As far as I know,
8424 there is no operating system support for the multiple subspace mode.
8425 It might be possible to support indirect calls if we didn't use
8426 $$dyncall (see the indirect sequence generated in pa_output_call). */
8427 if (TARGET_ELF32)
8428 return (decl != NULL_TREE);
8429
8430 /* Sibcalls are not ok because the arg pointer register is not a fixed
8431 register. This prevents the sibcall optimization from occurring. In
8432 addition, there are problems with stub placement using GNU ld. This
8433 is because a normal sibcall branch uses a 17-bit relocation while
8434 a regular call branch uses a 22-bit relocation. As a result, more
8435 care needs to be taken in the placement of long-branch stubs. */
8436 if (TARGET_64BIT)
8437 return false;
8438
8439 /* Sibcalls are only ok within a translation unit. */
8440 return (decl && !TREE_PUBLIC (decl));
8441 }
8442
8443 /* ??? Addition is not commutative on the PA due to the weird implicit
8444 space register selection rules for memory addresses. Therefore, we
8445 don't consider a + b == b + a, as this might be inside a MEM. */
8446 static bool
8447 pa_commutative_p (const_rtx x, int outer_code)
8448 {
8449 return (COMMUTATIVE_P (x)
8450 && (TARGET_NO_SPACE_REGS
8451 || (outer_code != UNKNOWN && outer_code != MEM)
8452 || GET_CODE (x) != PLUS));
8453 }
8454
8455 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8456 use in fmpyadd instructions. */
8457 int
8458 pa_fmpyaddoperands (rtx *operands)
8459 {
8460 enum machine_mode mode = GET_MODE (operands[0]);
8461
8462 /* Must be a floating point mode. */
8463 if (mode != SFmode && mode != DFmode)
8464 return 0;
8465
8466 /* All modes must be the same. */
8467 if (! (mode == GET_MODE (operands[1])
8468 && mode == GET_MODE (operands[2])
8469 && mode == GET_MODE (operands[3])
8470 && mode == GET_MODE (operands[4])
8471 && mode == GET_MODE (operands[5])))
8472 return 0;
8473
8474 /* All operands must be registers. */
8475 if (! (GET_CODE (operands[1]) == REG
8476 && GET_CODE (operands[2]) == REG
8477 && GET_CODE (operands[3]) == REG
8478 && GET_CODE (operands[4]) == REG
8479 && GET_CODE (operands[5]) == REG))
8480 return 0;
8481
8482 /* Only 2 real operands to the addition. One of the input operands must
8483 be the same as the output operand. */
8484 if (! rtx_equal_p (operands[3], operands[4])
8485 && ! rtx_equal_p (operands[3], operands[5]))
8486 return 0;
8487
8488 /* Inout operand of add cannot conflict with any operands from multiply. */
8489 if (rtx_equal_p (operands[3], operands[0])
8490 || rtx_equal_p (operands[3], operands[1])
8491 || rtx_equal_p (operands[3], operands[2]))
8492 return 0;
8493
8494 /* multiply cannot feed into addition operands. */
8495 if (rtx_equal_p (operands[4], operands[0])
8496 || rtx_equal_p (operands[5], operands[0]))
8497 return 0;
8498
8499 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8500 if (mode == SFmode
8501 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8502 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8503 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8504 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8505 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8506 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8507 return 0;
8508
8509 /* Passed. Operands are suitable for fmpyadd. */
8510 return 1;
8511 }
8512
8513 #if !defined(USE_COLLECT2)
8514 static void
8515 pa_asm_out_constructor (rtx symbol, int priority)
8516 {
8517 if (!function_label_operand (symbol, VOIDmode))
8518 pa_encode_label (symbol);
8519
8520 #ifdef CTORS_SECTION_ASM_OP
8521 default_ctor_section_asm_out_constructor (symbol, priority);
8522 #else
8523 # ifdef TARGET_ASM_NAMED_SECTION
8524 default_named_section_asm_out_constructor (symbol, priority);
8525 # else
8526 default_stabs_asm_out_constructor (symbol, priority);
8527 # endif
8528 #endif
8529 }
8530
8531 static void
8532 pa_asm_out_destructor (rtx symbol, int priority)
8533 {
8534 if (!function_label_operand (symbol, VOIDmode))
8535 pa_encode_label (symbol);
8536
8537 #ifdef DTORS_SECTION_ASM_OP
8538 default_dtor_section_asm_out_destructor (symbol, priority);
8539 #else
8540 # ifdef TARGET_ASM_NAMED_SECTION
8541 default_named_section_asm_out_destructor (symbol, priority);
8542 # else
8543 default_stabs_asm_out_destructor (symbol, priority);
8544 # endif
8545 #endif
8546 }
8547 #endif
8548
8549 /* This function places uninitialized global data in the bss section.
8550 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8551 function on the SOM port to prevent uninitialized global data from
8552 being placed in the data section. */
8553
8554 void
8555 pa_asm_output_aligned_bss (FILE *stream,
8556 const char *name,
8557 unsigned HOST_WIDE_INT size,
8558 unsigned int align)
8559 {
8560 switch_to_section (bss_section);
8561 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8562
8563 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8564 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8565 #endif
8566
8567 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8568 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8569 #endif
8570
8571 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8572 ASM_OUTPUT_LABEL (stream, name);
8573 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8574 }
8575
8576 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8577 that doesn't allow the alignment of global common storage to be directly
8578 specified. The SOM linker aligns common storage based on the rounded
8579 value of the NUM_BYTES parameter in the .comm directive. It's not
8580 possible to use the .align directive as it doesn't affect the alignment
8581 of the label associated with a .comm directive. */
8582
8583 void
8584 pa_asm_output_aligned_common (FILE *stream,
8585 const char *name,
8586 unsigned HOST_WIDE_INT size,
8587 unsigned int align)
8588 {
8589 unsigned int max_common_align;
8590
8591 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8592 if (align > max_common_align)
8593 {
8594 warning (0, "alignment (%u) for %s exceeds maximum alignment "
8595 "for global common data. Using %u",
8596 align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8597 align = max_common_align;
8598 }
8599
8600 switch_to_section (bss_section);
8601
8602 assemble_name (stream, name);
8603 fprintf (stream, "\t.comm "HOST_WIDE_INT_PRINT_UNSIGNED"\n",
8604 MAX (size, align / BITS_PER_UNIT));
8605 }
8606
8607 /* We can't use .comm for local common storage as the SOM linker effectively
8608 treats the symbol as universal and uses the same storage for local symbols
8609 with the same name in different object files. The .block directive
8610 reserves an uninitialized block of storage. However, it's not common
8611 storage. Fortunately, GCC never requests common storage with the same
8612 name in any given translation unit. */
8613
8614 void
8615 pa_asm_output_aligned_local (FILE *stream,
8616 const char *name,
8617 unsigned HOST_WIDE_INT size,
8618 unsigned int align)
8619 {
8620 switch_to_section (bss_section);
8621 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8622
8623 #ifdef LOCAL_ASM_OP
8624 fprintf (stream, "%s", LOCAL_ASM_OP);
8625 assemble_name (stream, name);
8626 fprintf (stream, "\n");
8627 #endif
8628
8629 ASM_OUTPUT_LABEL (stream, name);
8630 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8631 }
8632
8633 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8634 use in fmpysub instructions. */
8635 int
8636 pa_fmpysuboperands (rtx *operands)
8637 {
8638 enum machine_mode mode = GET_MODE (operands[0]);
8639
8640 /* Must be a floating point mode. */
8641 if (mode != SFmode && mode != DFmode)
8642 return 0;
8643
8644 /* All modes must be the same. */
8645 if (! (mode == GET_MODE (operands[1])
8646 && mode == GET_MODE (operands[2])
8647 && mode == GET_MODE (operands[3])
8648 && mode == GET_MODE (operands[4])
8649 && mode == GET_MODE (operands[5])))
8650 return 0;
8651
8652 /* All operands must be registers. */
8653 if (! (GET_CODE (operands[1]) == REG
8654 && GET_CODE (operands[2]) == REG
8655 && GET_CODE (operands[3]) == REG
8656 && GET_CODE (operands[4]) == REG
8657 && GET_CODE (operands[5]) == REG))
8658 return 0;
8659
8660 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8661 operation, so operands[4] must be the same as operand[3]. */
8662 if (! rtx_equal_p (operands[3], operands[4]))
8663 return 0;
8664
8665 /* multiply cannot feed into subtraction. */
8666 if (rtx_equal_p (operands[5], operands[0]))
8667 return 0;
8668
8669 /* Inout operand of sub cannot conflict with any operands from multiply. */
8670 if (rtx_equal_p (operands[3], operands[0])
8671 || rtx_equal_p (operands[3], operands[1])
8672 || rtx_equal_p (operands[3], operands[2]))
8673 return 0;
8674
8675 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8676 if (mode == SFmode
8677 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8678 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8679 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8680 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8681 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8682 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8683 return 0;
8684
8685 /* Passed. Operands are suitable for fmpysub. */
8686 return 1;
8687 }
8688
8689 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
8690 constants for shadd instructions. */
8691 int
8692 pa_shadd_constant_p (int val)
8693 {
8694 if (val == 2 || val == 4 || val == 8)
8695 return 1;
8696 else
8697 return 0;
8698 }
8699
8700 /* Return TRUE if INSN branches forward. */
8701
8702 static bool
8703 forward_branch_p (rtx insn)
8704 {
8705 rtx lab = JUMP_LABEL (insn);
8706
8707 /* The INSN must have a jump label. */
8708 gcc_assert (lab != NULL_RTX);
8709
8710 if (INSN_ADDRESSES_SET_P ())
8711 return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
8712
8713 while (insn)
8714 {
8715 if (insn == lab)
8716 return true;
8717 else
8718 insn = NEXT_INSN (insn);
8719 }
8720
8721 return false;
8722 }
8723
8724 /* Return 1 if INSN is in the delay slot of a call instruction. */
8725 int
8726 pa_jump_in_call_delay (rtx insn)
8727 {
8728
8729 if (GET_CODE (insn) != JUMP_INSN)
8730 return 0;
8731
8732 if (PREV_INSN (insn)
8733 && PREV_INSN (PREV_INSN (insn))
8734 && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
8735 {
8736 rtx test_insn = next_real_insn (PREV_INSN (PREV_INSN (insn)));
8737
8738 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
8739 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
8740
8741 }
8742 else
8743 return 0;
8744 }
8745
8746 /* Output an unconditional move and branch insn. */
8747
8748 const char *
8749 pa_output_parallel_movb (rtx *operands, rtx insn)
8750 {
8751 int length = get_attr_length (insn);
8752
8753 /* These are the cases in which we win. */
8754 if (length == 4)
8755 return "mov%I1b,tr %1,%0,%2";
8756
8757 /* None of the following cases win, but they don't lose either. */
8758 if (length == 8)
8759 {
8760 if (dbr_sequence_length () == 0)
8761 {
8762 /* Nothing in the delay slot, fake it by putting the combined
8763 insn (the copy or add) in the delay slot of a bl. */
8764 if (GET_CODE (operands[1]) == CONST_INT)
8765 return "b %2\n\tldi %1,%0";
8766 else
8767 return "b %2\n\tcopy %1,%0";
8768 }
8769 else
8770 {
8771 /* Something in the delay slot, but we've got a long branch. */
8772 if (GET_CODE (operands[1]) == CONST_INT)
8773 return "ldi %1,%0\n\tb %2";
8774 else
8775 return "copy %1,%0\n\tb %2";
8776 }
8777 }
8778
8779 if (GET_CODE (operands[1]) == CONST_INT)
8780 output_asm_insn ("ldi %1,%0", operands);
8781 else
8782 output_asm_insn ("copy %1,%0", operands);
8783 return pa_output_lbranch (operands[2], insn, 1);
8784 }
8785
8786 /* Output an unconditional add and branch insn. */
8787
8788 const char *
8789 pa_output_parallel_addb (rtx *operands, rtx insn)
8790 {
8791 int length = get_attr_length (insn);
8792
8793 /* To make life easy we want operand0 to be the shared input/output
8794 operand and operand1 to be the readonly operand. */
8795 if (operands[0] == operands[1])
8796 operands[1] = operands[2];
8797
8798 /* These are the cases in which we win. */
8799 if (length == 4)
8800 return "add%I1b,tr %1,%0,%3";
8801
8802 /* None of the following cases win, but they don't lose either. */
8803 if (length == 8)
8804 {
8805 if (dbr_sequence_length () == 0)
8806 /* Nothing in the delay slot, fake it by putting the combined
8807 insn (the copy or add) in the delay slot of a bl. */
8808 return "b %3\n\tadd%I1 %1,%0,%0";
8809 else
8810 /* Something in the delay slot, but we've got a long branch. */
8811 return "add%I1 %1,%0,%0\n\tb %3";
8812 }
8813
8814 output_asm_insn ("add%I1 %1,%0,%0", operands);
8815 return pa_output_lbranch (operands[3], insn, 1);
8816 }
8817
8818 /* Return nonzero if INSN (a jump insn) immediately follows a call
8819 to a named function. This is used to avoid filling the delay slot
8820 of the jump since it can usually be eliminated by modifying RP in
8821 the delay slot of the call. */
8822
8823 int
8824 pa_following_call (rtx insn)
8825 {
8826 if (! TARGET_JUMP_IN_DELAY)
8827 return 0;
8828
8829 /* Find the previous real insn, skipping NOTEs. */
8830 insn = PREV_INSN (insn);
8831 while (insn && GET_CODE (insn) == NOTE)
8832 insn = PREV_INSN (insn);
8833
8834 /* Check for CALL_INSNs and millicode calls. */
8835 if (insn
8836 && ((GET_CODE (insn) == CALL_INSN
8837 && get_attr_type (insn) != TYPE_DYNCALL)
8838 || (GET_CODE (insn) == INSN
8839 && GET_CODE (PATTERN (insn)) != SEQUENCE
8840 && GET_CODE (PATTERN (insn)) != USE
8841 && GET_CODE (PATTERN (insn)) != CLOBBER
8842 && get_attr_type (insn) == TYPE_MILLI)))
8843 return 1;
8844
8845 return 0;
8846 }
8847
8848 /* We use this hook to perform a PA specific optimization which is difficult
8849 to do in earlier passes.
8850
8851 We want the delay slots of branches within jump tables to be filled.
8852 None of the compiler passes at the moment even has the notion that a
8853 PA jump table doesn't contain addresses, but instead contains actual
8854 instructions!
8855
8856 Because we actually jump into the table, the addresses of each entry
8857 must stay constant in relation to the beginning of the table (which
8858 itself must stay constant relative to the instruction to jump into
8859 it). I don't believe we can guarantee earlier passes of the compiler
8860 will adhere to those rules.
8861
8862 So, late in the compilation process we find all the jump tables, and
8863 expand them into real code -- e.g. each entry in the jump table vector
8864 will get an appropriate label followed by a jump to the final target.
8865
8866 Reorg and the final jump pass can then optimize these branches and
8867 fill their delay slots. We end up with smaller, more efficient code.
8868
8869 The jump instructions within the table are special; we must be able
8870 to identify them during assembly output (if the jumps don't get filled
8871 we need to emit a nop rather than nullifying the delay slot)). We
8872 identify jumps in switch tables by using insns with the attribute
8873 type TYPE_BTABLE_BRANCH.
8874
8875 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
8876 insns. This serves two purposes, first it prevents jump.c from
8877 noticing that the last N entries in the table jump to the instruction
8878 immediately after the table and deleting the jumps. Second, those
8879 insns mark where we should emit .begin_brtab and .end_brtab directives
8880 when using GAS (allows for better link time optimizations). */
8881
8882 static void
8883 pa_reorg (void)
8884 {
8885 rtx insn;
8886
8887 remove_useless_addtr_insns (1);
8888
8889 if (pa_cpu < PROCESSOR_8000)
8890 pa_combine_instructions ();
8891
8892
8893 /* This is fairly cheap, so always run it if optimizing. */
8894 if (optimize > 0 && !TARGET_BIG_SWITCH)
8895 {
8896 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
8897 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8898 {
8899 rtx pattern, tmp, location, label;
8900 unsigned int length, i;
8901
8902 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
8903 if (GET_CODE (insn) != JUMP_INSN
8904 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8905 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8906 continue;
8907
8908 /* Emit marker for the beginning of the branch table. */
8909 emit_insn_before (gen_begin_brtab (), insn);
8910
8911 pattern = PATTERN (insn);
8912 location = PREV_INSN (insn);
8913 length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
8914
8915 for (i = 0; i < length; i++)
8916 {
8917 /* Emit a label before each jump to keep jump.c from
8918 removing this code. */
8919 tmp = gen_label_rtx ();
8920 LABEL_NUSES (tmp) = 1;
8921 emit_label_after (tmp, location);
8922 location = NEXT_INSN (location);
8923
8924 if (GET_CODE (pattern) == ADDR_VEC)
8925 label = XEXP (XVECEXP (pattern, 0, i), 0);
8926 else
8927 label = XEXP (XVECEXP (pattern, 1, i), 0);
8928
8929 tmp = gen_short_jump (label);
8930
8931 /* Emit the jump itself. */
8932 tmp = emit_jump_insn_after (tmp, location);
8933 JUMP_LABEL (tmp) = label;
8934 LABEL_NUSES (label)++;
8935 location = NEXT_INSN (location);
8936
8937 /* Emit a BARRIER after the jump. */
8938 emit_barrier_after (location);
8939 location = NEXT_INSN (location);
8940 }
8941
8942 /* Emit marker for the end of the branch table. */
8943 emit_insn_before (gen_end_brtab (), location);
8944 location = NEXT_INSN (location);
8945 emit_barrier_after (location);
8946
8947 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
8948 delete_insn (insn);
8949 }
8950 }
8951 else
8952 {
8953 /* Still need brtab marker insns. FIXME: the presence of these
8954 markers disables output of the branch table to readonly memory,
8955 and any alignment directives that might be needed. Possibly,
8956 the begin_brtab insn should be output before the label for the
8957 table. This doesn't matter at the moment since the tables are
8958 always output in the text section. */
8959 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8960 {
8961 /* Find an ADDR_VEC insn. */
8962 if (GET_CODE (insn) != JUMP_INSN
8963 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8964 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8965 continue;
8966
8967 /* Now generate markers for the beginning and end of the
8968 branch table. */
8969 emit_insn_before (gen_begin_brtab (), insn);
8970 emit_insn_after (gen_end_brtab (), insn);
8971 }
8972 }
8973 }
8974
8975 /* The PA has a number of odd instructions which can perform multiple
8976 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
8977 it may be profitable to combine two instructions into one instruction
8978 with two outputs. It's not profitable PA2.0 machines because the
8979 two outputs would take two slots in the reorder buffers.
8980
8981 This routine finds instructions which can be combined and combines
8982 them. We only support some of the potential combinations, and we
8983 only try common ways to find suitable instructions.
8984
8985 * addb can add two registers or a register and a small integer
8986 and jump to a nearby (+-8k) location. Normally the jump to the
8987 nearby location is conditional on the result of the add, but by
8988 using the "true" condition we can make the jump unconditional.
8989 Thus addb can perform two independent operations in one insn.
8990
8991 * movb is similar to addb in that it can perform a reg->reg
8992 or small immediate->reg copy and jump to a nearby (+-8k location).
8993
8994 * fmpyadd and fmpysub can perform a FP multiply and either an
8995 FP add or FP sub if the operands of the multiply and add/sub are
8996 independent (there are other minor restrictions). Note both
8997 the fmpy and fadd/fsub can in theory move to better spots according
8998 to data dependencies, but for now we require the fmpy stay at a
8999 fixed location.
9000
9001 * Many of the memory operations can perform pre & post updates
9002 of index registers. GCC's pre/post increment/decrement addressing
9003 is far too simple to take advantage of all the possibilities. This
9004 pass may not be suitable since those insns may not be independent.
9005
9006 * comclr can compare two ints or an int and a register, nullify
9007 the following instruction and zero some other register. This
9008 is more difficult to use as it's harder to find an insn which
9009 will generate a comclr than finding something like an unconditional
9010 branch. (conditional moves & long branches create comclr insns).
9011
9012 * Most arithmetic operations can conditionally skip the next
9013 instruction. They can be viewed as "perform this operation
9014 and conditionally jump to this nearby location" (where nearby
9015 is an insns away). These are difficult to use due to the
9016 branch length restrictions. */
9017
9018 static void
9019 pa_combine_instructions (void)
9020 {
9021 rtx anchor, new_rtx;
9022
9023 /* This can get expensive since the basic algorithm is on the
9024 order of O(n^2) (or worse). Only do it for -O2 or higher
9025 levels of optimization. */
9026 if (optimize < 2)
9027 return;
9028
9029 /* Walk down the list of insns looking for "anchor" insns which
9030 may be combined with "floating" insns. As the name implies,
9031 "anchor" instructions don't move, while "floating" insns may
9032 move around. */
9033 new_rtx = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
9034 new_rtx = make_insn_raw (new_rtx);
9035
9036 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
9037 {
9038 enum attr_pa_combine_type anchor_attr;
9039 enum attr_pa_combine_type floater_attr;
9040
9041 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
9042 Also ignore any special USE insns. */
9043 if ((GET_CODE (anchor) != INSN
9044 && GET_CODE (anchor) != JUMP_INSN
9045 && GET_CODE (anchor) != CALL_INSN)
9046 || GET_CODE (PATTERN (anchor)) == USE
9047 || GET_CODE (PATTERN (anchor)) == CLOBBER
9048 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
9049 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
9050 continue;
9051
9052 anchor_attr = get_attr_pa_combine_type (anchor);
9053 /* See if anchor is an insn suitable for combination. */
9054 if (anchor_attr == PA_COMBINE_TYPE_FMPY
9055 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
9056 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9057 && ! forward_branch_p (anchor)))
9058 {
9059 rtx floater;
9060
9061 for (floater = PREV_INSN (anchor);
9062 floater;
9063 floater = PREV_INSN (floater))
9064 {
9065 if (GET_CODE (floater) == NOTE
9066 || (GET_CODE (floater) == INSN
9067 && (GET_CODE (PATTERN (floater)) == USE
9068 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9069 continue;
9070
9071 /* Anything except a regular INSN will stop our search. */
9072 if (GET_CODE (floater) != INSN
9073 || GET_CODE (PATTERN (floater)) == ADDR_VEC
9074 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
9075 {
9076 floater = NULL_RTX;
9077 break;
9078 }
9079
9080 /* See if FLOATER is suitable for combination with the
9081 anchor. */
9082 floater_attr = get_attr_pa_combine_type (floater);
9083 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9084 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9085 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9086 && floater_attr == PA_COMBINE_TYPE_FMPY))
9087 {
9088 /* If ANCHOR and FLOATER can be combined, then we're
9089 done with this pass. */
9090 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9091 SET_DEST (PATTERN (floater)),
9092 XEXP (SET_SRC (PATTERN (floater)), 0),
9093 XEXP (SET_SRC (PATTERN (floater)), 1)))
9094 break;
9095 }
9096
9097 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9098 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9099 {
9100 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9101 {
9102 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9103 SET_DEST (PATTERN (floater)),
9104 XEXP (SET_SRC (PATTERN (floater)), 0),
9105 XEXP (SET_SRC (PATTERN (floater)), 1)))
9106 break;
9107 }
9108 else
9109 {
9110 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9111 SET_DEST (PATTERN (floater)),
9112 SET_SRC (PATTERN (floater)),
9113 SET_SRC (PATTERN (floater))))
9114 break;
9115 }
9116 }
9117 }
9118
9119 /* If we didn't find anything on the backwards scan try forwards. */
9120 if (!floater
9121 && (anchor_attr == PA_COMBINE_TYPE_FMPY
9122 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9123 {
9124 for (floater = anchor; floater; floater = NEXT_INSN (floater))
9125 {
9126 if (GET_CODE (floater) == NOTE
9127 || (GET_CODE (floater) == INSN
9128 && (GET_CODE (PATTERN (floater)) == USE
9129 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9130
9131 continue;
9132
9133 /* Anything except a regular INSN will stop our search. */
9134 if (GET_CODE (floater) != INSN
9135 || GET_CODE (PATTERN (floater)) == ADDR_VEC
9136 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
9137 {
9138 floater = NULL_RTX;
9139 break;
9140 }
9141
9142 /* See if FLOATER is suitable for combination with the
9143 anchor. */
9144 floater_attr = get_attr_pa_combine_type (floater);
9145 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9146 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9147 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9148 && floater_attr == PA_COMBINE_TYPE_FMPY))
9149 {
9150 /* If ANCHOR and FLOATER can be combined, then we're
9151 done with this pass. */
9152 if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9153 SET_DEST (PATTERN (floater)),
9154 XEXP (SET_SRC (PATTERN (floater)),
9155 0),
9156 XEXP (SET_SRC (PATTERN (floater)),
9157 1)))
9158 break;
9159 }
9160 }
9161 }
9162
9163 /* FLOATER will be nonzero if we found a suitable floating
9164 insn for combination with ANCHOR. */
9165 if (floater
9166 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9167 || anchor_attr == PA_COMBINE_TYPE_FMPY))
9168 {
9169 /* Emit the new instruction and delete the old anchor. */
9170 emit_insn_before (gen_rtx_PARALLEL
9171 (VOIDmode,
9172 gen_rtvec (2, PATTERN (anchor),
9173 PATTERN (floater))),
9174 anchor);
9175
9176 SET_INSN_DELETED (anchor);
9177
9178 /* Emit a special USE insn for FLOATER, then delete
9179 the floating insn. */
9180 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9181 delete_insn (floater);
9182
9183 continue;
9184 }
9185 else if (floater
9186 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9187 {
9188 rtx temp;
9189 /* Emit the new_jump instruction and delete the old anchor. */
9190 temp
9191 = emit_jump_insn_before (gen_rtx_PARALLEL
9192 (VOIDmode,
9193 gen_rtvec (2, PATTERN (anchor),
9194 PATTERN (floater))),
9195 anchor);
9196
9197 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9198 SET_INSN_DELETED (anchor);
9199
9200 /* Emit a special USE insn for FLOATER, then delete
9201 the floating insn. */
9202 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9203 delete_insn (floater);
9204 continue;
9205 }
9206 }
9207 }
9208 }
9209
9210 static int
9211 pa_can_combine_p (rtx new_rtx, rtx anchor, rtx floater, int reversed, rtx dest,
9212 rtx src1, rtx src2)
9213 {
9214 int insn_code_number;
9215 rtx start, end;
9216
9217 /* Create a PARALLEL with the patterns of ANCHOR and
9218 FLOATER, try to recognize it, then test constraints
9219 for the resulting pattern.
9220
9221 If the pattern doesn't match or the constraints
9222 aren't met keep searching for a suitable floater
9223 insn. */
9224 XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9225 XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9226 INSN_CODE (new_rtx) = -1;
9227 insn_code_number = recog_memoized (new_rtx);
9228 if (insn_code_number < 0
9229 || (extract_insn (new_rtx), ! constrain_operands (1)))
9230 return 0;
9231
9232 if (reversed)
9233 {
9234 start = anchor;
9235 end = floater;
9236 }
9237 else
9238 {
9239 start = floater;
9240 end = anchor;
9241 }
9242
9243 /* There's up to three operands to consider. One
9244 output and two inputs.
9245
9246 The output must not be used between FLOATER & ANCHOR
9247 exclusive. The inputs must not be set between
9248 FLOATER and ANCHOR exclusive. */
9249
9250 if (reg_used_between_p (dest, start, end))
9251 return 0;
9252
9253 if (reg_set_between_p (src1, start, end))
9254 return 0;
9255
9256 if (reg_set_between_p (src2, start, end))
9257 return 0;
9258
9259 /* If we get here, then everything is good. */
9260 return 1;
9261 }
9262
9263 /* Return nonzero if references for INSN are delayed.
9264
9265 Millicode insns are actually function calls with some special
9266 constraints on arguments and register usage.
9267
9268 Millicode calls always expect their arguments in the integer argument
9269 registers, and always return their result in %r29 (ret1). They
9270 are expected to clobber their arguments, %r1, %r29, and the return
9271 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9272
9273 This function tells reorg that the references to arguments and
9274 millicode calls do not appear to happen until after the millicode call.
9275 This allows reorg to put insns which set the argument registers into the
9276 delay slot of the millicode call -- thus they act more like traditional
9277 CALL_INSNs.
9278
9279 Note we cannot consider side effects of the insn to be delayed because
9280 the branch and link insn will clobber the return pointer. If we happened
9281 to use the return pointer in the delay slot of the call, then we lose.
9282
9283 get_attr_type will try to recognize the given insn, so make sure to
9284 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9285 in particular. */
9286 int
9287 pa_insn_refs_are_delayed (rtx insn)
9288 {
9289 return ((GET_CODE (insn) == INSN
9290 && GET_CODE (PATTERN (insn)) != SEQUENCE
9291 && GET_CODE (PATTERN (insn)) != USE
9292 && GET_CODE (PATTERN (insn)) != CLOBBER
9293 && get_attr_type (insn) == TYPE_MILLI));
9294 }
9295
9296 /* Promote the return value, but not the arguments. */
9297
9298 static enum machine_mode
9299 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9300 enum machine_mode mode,
9301 int *punsignedp ATTRIBUTE_UNUSED,
9302 const_tree fntype ATTRIBUTE_UNUSED,
9303 int for_return)
9304 {
9305 if (for_return == 0)
9306 return mode;
9307 return promote_mode (type, mode, punsignedp);
9308 }
9309
9310 /* On the HP-PA the value is found in register(s) 28(-29), unless
9311 the mode is SF or DF. Then the value is returned in fr4 (32).
9312
9313 This must perform the same promotions as PROMOTE_MODE, else promoting
9314 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9315
9316 Small structures must be returned in a PARALLEL on PA64 in order
9317 to match the HP Compiler ABI. */
9318
9319 static rtx
9320 pa_function_value (const_tree valtype,
9321 const_tree func ATTRIBUTE_UNUSED,
9322 bool outgoing ATTRIBUTE_UNUSED)
9323 {
9324 enum machine_mode valmode;
9325
9326 if (AGGREGATE_TYPE_P (valtype)
9327 || TREE_CODE (valtype) == COMPLEX_TYPE
9328 || TREE_CODE (valtype) == VECTOR_TYPE)
9329 {
9330 if (TARGET_64BIT)
9331 {
9332 /* Aggregates with a size less than or equal to 128 bits are
9333 returned in GR 28(-29). They are left justified. The pad
9334 bits are undefined. Larger aggregates are returned in
9335 memory. */
9336 rtx loc[2];
9337 int i, offset = 0;
9338 int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2;
9339
9340 for (i = 0; i < ub; i++)
9341 {
9342 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9343 gen_rtx_REG (DImode, 28 + i),
9344 GEN_INT (offset));
9345 offset += 8;
9346 }
9347
9348 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9349 }
9350 else if (int_size_in_bytes (valtype) > UNITS_PER_WORD)
9351 {
9352 /* Aggregates 5 to 8 bytes in size are returned in general
9353 registers r28-r29 in the same manner as other non
9354 floating-point objects. The data is right-justified and
9355 zero-extended to 64 bits. This is opposite to the normal
9356 justification used on big endian targets and requires
9357 special treatment. */
9358 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9359 gen_rtx_REG (DImode, 28), const0_rtx);
9360 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9361 }
9362 }
9363
9364 if ((INTEGRAL_TYPE_P (valtype)
9365 && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9366 || POINTER_TYPE_P (valtype))
9367 valmode = word_mode;
9368 else
9369 valmode = TYPE_MODE (valtype);
9370
9371 if (TREE_CODE (valtype) == REAL_TYPE
9372 && !AGGREGATE_TYPE_P (valtype)
9373 && TYPE_MODE (valtype) != TFmode
9374 && !TARGET_SOFT_FLOAT)
9375 return gen_rtx_REG (valmode, 32);
9376
9377 return gen_rtx_REG (valmode, 28);
9378 }
9379
9380 /* Implement the TARGET_LIBCALL_VALUE hook. */
9381
9382 static rtx
9383 pa_libcall_value (enum machine_mode mode,
9384 const_rtx fun ATTRIBUTE_UNUSED)
9385 {
9386 if (! TARGET_SOFT_FLOAT
9387 && (mode == SFmode || mode == DFmode))
9388 return gen_rtx_REG (mode, 32);
9389 else
9390 return gen_rtx_REG (mode, 28);
9391 }
9392
9393 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */
9394
9395 static bool
9396 pa_function_value_regno_p (const unsigned int regno)
9397 {
9398 if (regno == 28
9399 || (! TARGET_SOFT_FLOAT && regno == 32))
9400 return true;
9401
9402 return false;
9403 }
9404
9405 /* Update the data in CUM to advance over an argument
9406 of mode MODE and data type TYPE.
9407 (TYPE is null for libcalls where that information may not be available.) */
9408
9409 static void
9410 pa_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
9411 const_tree type, bool named ATTRIBUTE_UNUSED)
9412 {
9413 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9414 int arg_size = FUNCTION_ARG_SIZE (mode, type);
9415
9416 cum->nargs_prototype--;
9417 cum->words += (arg_size
9418 + ((cum->words & 01)
9419 && type != NULL_TREE
9420 && arg_size > 1));
9421 }
9422
9423 /* Return the location of a parameter that is passed in a register or NULL
9424 if the parameter has any component that is passed in memory.
9425
9426 This is new code and will be pushed to into the net sources after
9427 further testing.
9428
9429 ??? We might want to restructure this so that it looks more like other
9430 ports. */
9431 static rtx
9432 pa_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
9433 const_tree type, bool named ATTRIBUTE_UNUSED)
9434 {
9435 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9436 int max_arg_words = (TARGET_64BIT ? 8 : 4);
9437 int alignment = 0;
9438 int arg_size;
9439 int fpr_reg_base;
9440 int gpr_reg_base;
9441 rtx retval;
9442
9443 if (mode == VOIDmode)
9444 return NULL_RTX;
9445
9446 arg_size = FUNCTION_ARG_SIZE (mode, type);
9447
9448 /* If this arg would be passed partially or totally on the stack, then
9449 this routine should return zero. pa_arg_partial_bytes will
9450 handle arguments which are split between regs and stack slots if
9451 the ABI mandates split arguments. */
9452 if (!TARGET_64BIT)
9453 {
9454 /* The 32-bit ABI does not split arguments. */
9455 if (cum->words + arg_size > max_arg_words)
9456 return NULL_RTX;
9457 }
9458 else
9459 {
9460 if (arg_size > 1)
9461 alignment = cum->words & 1;
9462 if (cum->words + alignment >= max_arg_words)
9463 return NULL_RTX;
9464 }
9465
9466 /* The 32bit ABIs and the 64bit ABIs are rather different,
9467 particularly in their handling of FP registers. We might
9468 be able to cleverly share code between them, but I'm not
9469 going to bother in the hope that splitting them up results
9470 in code that is more easily understood. */
9471
9472 if (TARGET_64BIT)
9473 {
9474 /* Advance the base registers to their current locations.
9475
9476 Remember, gprs grow towards smaller register numbers while
9477 fprs grow to higher register numbers. Also remember that
9478 although FP regs are 32-bit addressable, we pretend that
9479 the registers are 64-bits wide. */
9480 gpr_reg_base = 26 - cum->words;
9481 fpr_reg_base = 32 + cum->words;
9482
9483 /* Arguments wider than one word and small aggregates need special
9484 treatment. */
9485 if (arg_size > 1
9486 || mode == BLKmode
9487 || (type && (AGGREGATE_TYPE_P (type)
9488 || TREE_CODE (type) == COMPLEX_TYPE
9489 || TREE_CODE (type) == VECTOR_TYPE)))
9490 {
9491 /* Double-extended precision (80-bit), quad-precision (128-bit)
9492 and aggregates including complex numbers are aligned on
9493 128-bit boundaries. The first eight 64-bit argument slots
9494 are associated one-to-one, with general registers r26
9495 through r19, and also with floating-point registers fr4
9496 through fr11. Arguments larger than one word are always
9497 passed in general registers.
9498
9499 Using a PARALLEL with a word mode register results in left
9500 justified data on a big-endian target. */
9501
9502 rtx loc[8];
9503 int i, offset = 0, ub = arg_size;
9504
9505 /* Align the base register. */
9506 gpr_reg_base -= alignment;
9507
9508 ub = MIN (ub, max_arg_words - cum->words - alignment);
9509 for (i = 0; i < ub; i++)
9510 {
9511 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9512 gen_rtx_REG (DImode, gpr_reg_base),
9513 GEN_INT (offset));
9514 gpr_reg_base -= 1;
9515 offset += 8;
9516 }
9517
9518 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9519 }
9520 }
9521 else
9522 {
9523 /* If the argument is larger than a word, then we know precisely
9524 which registers we must use. */
9525 if (arg_size > 1)
9526 {
9527 if (cum->words)
9528 {
9529 gpr_reg_base = 23;
9530 fpr_reg_base = 38;
9531 }
9532 else
9533 {
9534 gpr_reg_base = 25;
9535 fpr_reg_base = 34;
9536 }
9537
9538 /* Structures 5 to 8 bytes in size are passed in the general
9539 registers in the same manner as other non floating-point
9540 objects. The data is right-justified and zero-extended
9541 to 64 bits. This is opposite to the normal justification
9542 used on big endian targets and requires special treatment.
9543 We now define BLOCK_REG_PADDING to pad these objects.
9544 Aggregates, complex and vector types are passed in the same
9545 manner as structures. */
9546 if (mode == BLKmode
9547 || (type && (AGGREGATE_TYPE_P (type)
9548 || TREE_CODE (type) == COMPLEX_TYPE
9549 || TREE_CODE (type) == VECTOR_TYPE)))
9550 {
9551 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9552 gen_rtx_REG (DImode, gpr_reg_base),
9553 const0_rtx);
9554 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9555 }
9556 }
9557 else
9558 {
9559 /* We have a single word (32 bits). A simple computation
9560 will get us the register #s we need. */
9561 gpr_reg_base = 26 - cum->words;
9562 fpr_reg_base = 32 + 2 * cum->words;
9563 }
9564 }
9565
9566 /* Determine if the argument needs to be passed in both general and
9567 floating point registers. */
9568 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9569 /* If we are doing soft-float with portable runtime, then there
9570 is no need to worry about FP regs. */
9571 && !TARGET_SOFT_FLOAT
9572 /* The parameter must be some kind of scalar float, else we just
9573 pass it in integer registers. */
9574 && GET_MODE_CLASS (mode) == MODE_FLOAT
9575 /* The target function must not have a prototype. */
9576 && cum->nargs_prototype <= 0
9577 /* libcalls do not need to pass items in both FP and general
9578 registers. */
9579 && type != NULL_TREE
9580 /* All this hair applies to "outgoing" args only. This includes
9581 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9582 && !cum->incoming)
9583 /* Also pass outgoing floating arguments in both registers in indirect
9584 calls with the 32 bit ABI and the HP assembler since there is no
9585 way to the specify argument locations in static functions. */
9586 || (!TARGET_64BIT
9587 && !TARGET_GAS
9588 && !cum->incoming
9589 && cum->indirect
9590 && GET_MODE_CLASS (mode) == MODE_FLOAT))
9591 {
9592 retval
9593 = gen_rtx_PARALLEL
9594 (mode,
9595 gen_rtvec (2,
9596 gen_rtx_EXPR_LIST (VOIDmode,
9597 gen_rtx_REG (mode, fpr_reg_base),
9598 const0_rtx),
9599 gen_rtx_EXPR_LIST (VOIDmode,
9600 gen_rtx_REG (mode, gpr_reg_base),
9601 const0_rtx)));
9602 }
9603 else
9604 {
9605 /* See if we should pass this parameter in a general register. */
9606 if (TARGET_SOFT_FLOAT
9607 /* Indirect calls in the normal 32bit ABI require all arguments
9608 to be passed in general registers. */
9609 || (!TARGET_PORTABLE_RUNTIME
9610 && !TARGET_64BIT
9611 && !TARGET_ELF32
9612 && cum->indirect)
9613 /* If the parameter is not a scalar floating-point parameter,
9614 then it belongs in GPRs. */
9615 || GET_MODE_CLASS (mode) != MODE_FLOAT
9616 /* Structure with single SFmode field belongs in GPR. */
9617 || (type && AGGREGATE_TYPE_P (type)))
9618 retval = gen_rtx_REG (mode, gpr_reg_base);
9619 else
9620 retval = gen_rtx_REG (mode, fpr_reg_base);
9621 }
9622 return retval;
9623 }
9624
9625 /* Arguments larger than one word are double word aligned. */
9626
9627 static unsigned int
9628 pa_function_arg_boundary (enum machine_mode mode, const_tree type)
9629 {
9630 bool singleword = (type
9631 ? (integer_zerop (TYPE_SIZE (type))
9632 || !TREE_CONSTANT (TYPE_SIZE (type))
9633 || int_size_in_bytes (type) <= UNITS_PER_WORD)
9634 : GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
9635
9636 return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY;
9637 }
9638
9639 /* If this arg would be passed totally in registers or totally on the stack,
9640 then this routine should return zero. */
9641
9642 static int
9643 pa_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
9644 tree type, bool named ATTRIBUTE_UNUSED)
9645 {
9646 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9647 unsigned int max_arg_words = 8;
9648 unsigned int offset = 0;
9649
9650 if (!TARGET_64BIT)
9651 return 0;
9652
9653 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
9654 offset = 1;
9655
9656 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
9657 /* Arg fits fully into registers. */
9658 return 0;
9659 else if (cum->words + offset >= max_arg_words)
9660 /* Arg fully on the stack. */
9661 return 0;
9662 else
9663 /* Arg is split. */
9664 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9665 }
9666
9667
9668 /* A get_unnamed_section callback for switching to the text section.
9669
9670 This function is only used with SOM. Because we don't support
9671 named subspaces, we can only create a new subspace or switch back
9672 to the default text subspace. */
9673
9674 static void
9675 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9676 {
9677 gcc_assert (TARGET_SOM);
9678 if (TARGET_GAS)
9679 {
9680 if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9681 {
9682 /* We only want to emit a .nsubspa directive once at the
9683 start of the function. */
9684 cfun->machine->in_nsubspa = 1;
9685
9686 /* Create a new subspace for the text. This provides
9687 better stub placement and one-only functions. */
9688 if (cfun->decl
9689 && DECL_ONE_ONLY (cfun->decl)
9690 && !DECL_WEAK (cfun->decl))
9691 {
9692 output_section_asm_op ("\t.SPACE $TEXT$\n"
9693 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9694 "ACCESS=44,SORT=24,COMDAT");
9695 return;
9696 }
9697 }
9698 else
9699 {
9700 /* There isn't a current function or the body of the current
9701 function has been completed. So, we are changing to the
9702 text section to output debugging information. Thus, we
9703 need to forget that we are in the text section so that
9704 varasm.c will call us when text_section is selected again. */
9705 gcc_assert (!cfun || !cfun->machine
9706 || cfun->machine->in_nsubspa == 2);
9707 in_section = NULL;
9708 }
9709 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9710 return;
9711 }
9712 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9713 }
9714
9715 /* A get_unnamed_section callback for switching to comdat data
9716 sections. This function is only used with SOM. */
9717
9718 static void
9719 som_output_comdat_data_section_asm_op (const void *data)
9720 {
9721 in_section = NULL;
9722 output_section_asm_op (data);
9723 }
9724
9725 /* Implement TARGET_ASM_INITIALIZE_SECTIONS */
9726
9727 static void
9728 pa_som_asm_init_sections (void)
9729 {
9730 text_section
9731 = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
9732
9733 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9734 is not being generated. */
9735 som_readonly_data_section
9736 = get_unnamed_section (0, output_section_asm_op,
9737 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9738
9739 /* When secondary definitions are not supported, SOM makes readonly
9740 data one-only by creating a new $LIT$ subspace in $TEXT$ with
9741 the comdat flag. */
9742 som_one_only_readonly_data_section
9743 = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
9744 "\t.SPACE $TEXT$\n"
9745 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9746 "ACCESS=0x2c,SORT=16,COMDAT");
9747
9748
9749 /* When secondary definitions are not supported, SOM makes data one-only
9750 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
9751 som_one_only_data_section
9752 = get_unnamed_section (SECTION_WRITE,
9753 som_output_comdat_data_section_asm_op,
9754 "\t.SPACE $PRIVATE$\n"
9755 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9756 "ACCESS=31,SORT=24,COMDAT");
9757
9758 /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9759 which reference data within the $TEXT$ space (for example constant
9760 strings in the $LIT$ subspace).
9761
9762 The assemblers (GAS and HP as) both have problems with handling
9763 the difference of two symbols which is the other correct way to
9764 reference constant data during PIC code generation.
9765
9766 So, there's no way to reference constant data which is in the
9767 $TEXT$ space during PIC generation. Instead place all constant
9768 data into the $PRIVATE$ subspace (this reduces sharing, but it
9769 works correctly). */
9770 readonly_data_section = flag_pic ? data_section : som_readonly_data_section;
9771
9772 /* We must not have a reference to an external symbol defined in a
9773 shared library in a readonly section, else the SOM linker will
9774 complain.
9775
9776 So, we force exception information into the data section. */
9777 exception_section = data_section;
9778 }
9779
9780 /* On hpux10, the linker will give an error if we have a reference
9781 in the read-only data section to a symbol defined in a shared
9782 library. Therefore, expressions that might require a reloc can
9783 not be placed in the read-only data section. */
9784
9785 static section *
9786 pa_select_section (tree exp, int reloc,
9787 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9788 {
9789 if (TREE_CODE (exp) == VAR_DECL
9790 && TREE_READONLY (exp)
9791 && !TREE_THIS_VOLATILE (exp)
9792 && DECL_INITIAL (exp)
9793 && (DECL_INITIAL (exp) == error_mark_node
9794 || TREE_CONSTANT (DECL_INITIAL (exp)))
9795 && !reloc)
9796 {
9797 if (TARGET_SOM
9798 && DECL_ONE_ONLY (exp)
9799 && !DECL_WEAK (exp))
9800 return som_one_only_readonly_data_section;
9801 else
9802 return readonly_data_section;
9803 }
9804 else if (CONSTANT_CLASS_P (exp) && !reloc)
9805 return readonly_data_section;
9806 else if (TARGET_SOM
9807 && TREE_CODE (exp) == VAR_DECL
9808 && DECL_ONE_ONLY (exp)
9809 && !DECL_WEAK (exp))
9810 return som_one_only_data_section;
9811 else
9812 return data_section;
9813 }
9814
9815 static void
9816 pa_globalize_label (FILE *stream, const char *name)
9817 {
9818 /* We only handle DATA objects here, functions are globalized in
9819 ASM_DECLARE_FUNCTION_NAME. */
9820 if (! FUNCTION_NAME_P (name))
9821 {
9822 fputs ("\t.EXPORT ", stream);
9823 assemble_name (stream, name);
9824 fputs (",DATA\n", stream);
9825 }
9826 }
9827
9828 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9829
9830 static rtx
9831 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9832 int incoming ATTRIBUTE_UNUSED)
9833 {
9834 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9835 }
9836
9837 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9838
9839 bool
9840 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9841 {
9842 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9843 PA64 ABI says that objects larger than 128 bits are returned in memory.
9844 Note, int_size_in_bytes can return -1 if the size of the object is
9845 variable or larger than the maximum value that can be expressed as
9846 a HOST_WIDE_INT. It can also return zero for an empty type. The
9847 simplest way to handle variable and empty types is to pass them in
9848 memory. This avoids problems in defining the boundaries of argument
9849 slots, allocating registers, etc. */
9850 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9851 || int_size_in_bytes (type) <= 0);
9852 }
9853
9854 /* Structure to hold declaration and name of external symbols that are
9855 emitted by GCC. We generate a vector of these symbols and output them
9856 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9857 This avoids putting out names that are never really used. */
9858
9859 typedef struct GTY(()) extern_symbol
9860 {
9861 tree decl;
9862 const char *name;
9863 } extern_symbol;
9864
9865 /* Define gc'd vector type for extern_symbol. */
9866 DEF_VEC_O(extern_symbol);
9867 DEF_VEC_ALLOC_O(extern_symbol,gc);
9868
9869 /* Vector of extern_symbol pointers. */
9870 static GTY(()) VEC(extern_symbol,gc) *extern_symbols;
9871
9872 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9873 /* Mark DECL (name NAME) as an external reference (assembler output
9874 file FILE). This saves the names to output at the end of the file
9875 if actually referenced. */
9876
9877 void
9878 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
9879 {
9880 extern_symbol * p = VEC_safe_push (extern_symbol, gc, extern_symbols, NULL);
9881
9882 gcc_assert (file == asm_out_file);
9883 p->decl = decl;
9884 p->name = name;
9885 }
9886
9887 /* Output text required at the end of an assembler file.
9888 This includes deferred plabels and .import directives for
9889 all external symbols that were actually referenced. */
9890
9891 static void
9892 pa_hpux_file_end (void)
9893 {
9894 unsigned int i;
9895 extern_symbol *p;
9896
9897 if (!NO_DEFERRED_PROFILE_COUNTERS)
9898 output_deferred_profile_counters ();
9899
9900 output_deferred_plabels ();
9901
9902 for (i = 0; VEC_iterate (extern_symbol, extern_symbols, i, p); i++)
9903 {
9904 tree decl = p->decl;
9905
9906 if (!TREE_ASM_WRITTEN (decl)
9907 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
9908 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
9909 }
9910
9911 VEC_free (extern_symbol, gc, extern_symbols);
9912 }
9913 #endif
9914
9915 /* Return true if a change from mode FROM to mode TO for a register
9916 in register class RCLASS is invalid. */
9917
9918 bool
9919 pa_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9920 enum reg_class rclass)
9921 {
9922 if (from == to)
9923 return false;
9924
9925 /* Reject changes to/from complex and vector modes. */
9926 if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
9927 || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
9928 return true;
9929
9930 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
9931 return false;
9932
9933 /* There is no way to load QImode or HImode values directly from
9934 memory. SImode loads to the FP registers are not zero extended.
9935 On the 64-bit target, this conflicts with the definition of
9936 LOAD_EXTEND_OP. Thus, we can't allow changing between modes
9937 with different sizes in the floating-point registers. */
9938 if (MAYBE_FP_REG_CLASS_P (rclass))
9939 return true;
9940
9941 /* HARD_REGNO_MODE_OK places modes with sizes larger than a word
9942 in specific sets of registers. Thus, we cannot allow changing
9943 to a larger mode when it's larger than a word. */
9944 if (GET_MODE_SIZE (to) > UNITS_PER_WORD
9945 && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
9946 return true;
9947
9948 return false;
9949 }
9950
9951 /* Returns TRUE if it is a good idea to tie two pseudo registers
9952 when one has mode MODE1 and one has mode MODE2.
9953 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
9954 for any hard reg, then this must be FALSE for correct output.
9955
9956 We should return FALSE for QImode and HImode because these modes
9957 are not ok in the floating-point registers. However, this prevents
9958 tieing these modes to SImode and DImode in the general registers.
9959 So, this isn't a good idea. We rely on HARD_REGNO_MODE_OK and
9960 CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used
9961 in the floating-point registers. */
9962
9963 bool
9964 pa_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
9965 {
9966 /* Don't tie modes in different classes. */
9967 if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
9968 return false;
9969
9970 return true;
9971 }
9972
9973 \f
9974 /* Length in units of the trampoline instruction code. */
9975
9976 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
9977
9978
9979 /* Output assembler code for a block containing the constant parts
9980 of a trampoline, leaving space for the variable parts.\
9981
9982 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
9983 and then branches to the specified routine.
9984
9985 This code template is copied from text segment to stack location
9986 and then patched with pa_trampoline_init to contain valid values,
9987 and then entered as a subroutine.
9988
9989 It is best to keep this as small as possible to avoid having to
9990 flush multiple lines in the cache. */
9991
9992 static void
9993 pa_asm_trampoline_template (FILE *f)
9994 {
9995 if (!TARGET_64BIT)
9996 {
9997 fputs ("\tldw 36(%r22),%r21\n", f);
9998 fputs ("\tbb,>=,n %r21,30,.+16\n", f);
9999 if (ASSEMBLER_DIALECT == 0)
10000 fputs ("\tdepi 0,31,2,%r21\n", f);
10001 else
10002 fputs ("\tdepwi 0,31,2,%r21\n", f);
10003 fputs ("\tldw 4(%r21),%r19\n", f);
10004 fputs ("\tldw 0(%r21),%r21\n", f);
10005 if (TARGET_PA_20)
10006 {
10007 fputs ("\tbve (%r21)\n", f);
10008 fputs ("\tldw 40(%r22),%r29\n", f);
10009 fputs ("\t.word 0\n", f);
10010 fputs ("\t.word 0\n", f);
10011 }
10012 else
10013 {
10014 fputs ("\tldsid (%r21),%r1\n", f);
10015 fputs ("\tmtsp %r1,%sr0\n", f);
10016 fputs ("\tbe 0(%sr0,%r21)\n", f);
10017 fputs ("\tldw 40(%r22),%r29\n", f);
10018 }
10019 fputs ("\t.word 0\n", f);
10020 fputs ("\t.word 0\n", f);
10021 fputs ("\t.word 0\n", f);
10022 fputs ("\t.word 0\n", f);
10023 }
10024 else
10025 {
10026 fputs ("\t.dword 0\n", f);
10027 fputs ("\t.dword 0\n", f);
10028 fputs ("\t.dword 0\n", f);
10029 fputs ("\t.dword 0\n", f);
10030 fputs ("\tmfia %r31\n", f);
10031 fputs ("\tldd 24(%r31),%r1\n", f);
10032 fputs ("\tldd 24(%r1),%r27\n", f);
10033 fputs ("\tldd 16(%r1),%r1\n", f);
10034 fputs ("\tbve (%r1)\n", f);
10035 fputs ("\tldd 32(%r31),%r31\n", f);
10036 fputs ("\t.dword 0 ; fptr\n", f);
10037 fputs ("\t.dword 0 ; static link\n", f);
10038 }
10039 }
10040
10041 /* Emit RTL insns to initialize the variable parts of a trampoline.
10042 FNADDR is an RTX for the address of the function's pure code.
10043 CXT is an RTX for the static chain value for the function.
10044
10045 Move the function address to the trampoline template at offset 36.
10046 Move the static chain value to trampoline template at offset 40.
10047 Move the trampoline address to trampoline template at offset 44.
10048 Move r19 to trampoline template at offset 48. The latter two
10049 words create a plabel for the indirect call to the trampoline.
10050
10051 A similar sequence is used for the 64-bit port but the plabel is
10052 at the beginning of the trampoline.
10053
10054 Finally, the cache entries for the trampoline code are flushed.
10055 This is necessary to ensure that the trampoline instruction sequence
10056 is written to memory prior to any attempts at prefetching the code
10057 sequence. */
10058
10059 static void
10060 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10061 {
10062 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10063 rtx start_addr = gen_reg_rtx (Pmode);
10064 rtx end_addr = gen_reg_rtx (Pmode);
10065 rtx line_length = gen_reg_rtx (Pmode);
10066 rtx r_tramp, tmp;
10067
10068 emit_block_move (m_tramp, assemble_trampoline_template (),
10069 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
10070 r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
10071
10072 if (!TARGET_64BIT)
10073 {
10074 tmp = adjust_address (m_tramp, Pmode, 36);
10075 emit_move_insn (tmp, fnaddr);
10076 tmp = adjust_address (m_tramp, Pmode, 40);
10077 emit_move_insn (tmp, chain_value);
10078
10079 /* Create a fat pointer for the trampoline. */
10080 tmp = adjust_address (m_tramp, Pmode, 44);
10081 emit_move_insn (tmp, r_tramp);
10082 tmp = adjust_address (m_tramp, Pmode, 48);
10083 emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
10084
10085 /* fdc and fic only use registers for the address to flush,
10086 they do not accept integer displacements. We align the
10087 start and end addresses to the beginning of their respective
10088 cache lines to minimize the number of lines flushed. */
10089 emit_insn (gen_andsi3 (start_addr, r_tramp,
10090 GEN_INT (-MIN_CACHELINE_SIZE)));
10091 tmp = force_reg (Pmode, plus_constant (r_tramp, TRAMPOLINE_CODE_SIZE-1));
10092 emit_insn (gen_andsi3 (end_addr, tmp,
10093 GEN_INT (-MIN_CACHELINE_SIZE)));
10094 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10095 emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
10096 emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
10097 gen_reg_rtx (Pmode),
10098 gen_reg_rtx (Pmode)));
10099 }
10100 else
10101 {
10102 tmp = adjust_address (m_tramp, Pmode, 56);
10103 emit_move_insn (tmp, fnaddr);
10104 tmp = adjust_address (m_tramp, Pmode, 64);
10105 emit_move_insn (tmp, chain_value);
10106
10107 /* Create a fat pointer for the trampoline. */
10108 tmp = adjust_address (m_tramp, Pmode, 16);
10109 emit_move_insn (tmp, force_reg (Pmode, plus_constant (r_tramp, 32)));
10110 tmp = adjust_address (m_tramp, Pmode, 24);
10111 emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
10112
10113 /* fdc and fic only use registers for the address to flush,
10114 they do not accept integer displacements. We align the
10115 start and end addresses to the beginning of their respective
10116 cache lines to minimize the number of lines flushed. */
10117 tmp = force_reg (Pmode, plus_constant (r_tramp, 32));
10118 emit_insn (gen_anddi3 (start_addr, tmp,
10119 GEN_INT (-MIN_CACHELINE_SIZE)));
10120 tmp = force_reg (Pmode, plus_constant (tmp, TRAMPOLINE_CODE_SIZE - 1));
10121 emit_insn (gen_anddi3 (end_addr, tmp,
10122 GEN_INT (-MIN_CACHELINE_SIZE)));
10123 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10124 emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
10125 emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
10126 gen_reg_rtx (Pmode),
10127 gen_reg_rtx (Pmode)));
10128 }
10129 }
10130
10131 /* Perform any machine-specific adjustment in the address of the trampoline.
10132 ADDR contains the address that was passed to pa_trampoline_init.
10133 Adjust the trampoline address to point to the plabel at offset 44. */
10134
10135 static rtx
10136 pa_trampoline_adjust_address (rtx addr)
10137 {
10138 if (!TARGET_64BIT)
10139 addr = memory_address (Pmode, plus_constant (addr, 46));
10140 return addr;
10141 }
10142
10143 static rtx
10144 pa_delegitimize_address (rtx orig_x)
10145 {
10146 rtx x = delegitimize_mem_from_attrs (orig_x);
10147
10148 if (GET_CODE (x) == LO_SUM
10149 && GET_CODE (XEXP (x, 1)) == UNSPEC
10150 && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10151 return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10152 return x;
10153 }
10154 \f
10155 static rtx
10156 pa_internal_arg_pointer (void)
10157 {
10158 /* The argument pointer and the hard frame pointer are the same in
10159 the 32-bit runtime, so we don't need a copy. */
10160 if (TARGET_64BIT)
10161 return copy_to_reg (virtual_incoming_args_rtx);
10162 else
10163 return virtual_incoming_args_rtx;
10164 }
10165
10166 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10167 Frame pointer elimination is automatically handled. */
10168
10169 static bool
10170 pa_can_eliminate (const int from, const int to)
10171 {
10172 /* The argument cannot be eliminated in the 64-bit runtime. */
10173 if (TARGET_64BIT && from == ARG_POINTER_REGNUM)
10174 return false;
10175
10176 return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
10177 ? ! frame_pointer_needed
10178 : true);
10179 }
10180
10181 /* Define the offset between two registers, FROM to be eliminated and its
10182 replacement TO, at the start of a routine. */
10183 HOST_WIDE_INT
10184 pa_initial_elimination_offset (int from, int to)
10185 {
10186 HOST_WIDE_INT offset;
10187
10188 if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM)
10189 && to == STACK_POINTER_REGNUM)
10190 offset = -pa_compute_frame_size (get_frame_size (), 0);
10191 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10192 offset = 0;
10193 else
10194 gcc_unreachable ();
10195
10196 return offset;
10197 }
10198
10199 static void
10200 pa_conditional_register_usage (void)
10201 {
10202 int i;
10203
10204 if (!TARGET_64BIT && !TARGET_PA_11)
10205 {
10206 for (i = 56; i <= FP_REG_LAST; i++)
10207 fixed_regs[i] = call_used_regs[i] = 1;
10208 for (i = 33; i < 56; i += 2)
10209 fixed_regs[i] = call_used_regs[i] = 1;
10210 }
10211 if (TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT)
10212 {
10213 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
10214 fixed_regs[i] = call_used_regs[i] = 1;
10215 }
10216 if (flag_pic)
10217 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10218 }
10219
10220 /* Target hook for c_mode_for_suffix. */
10221
10222 static enum machine_mode
10223 pa_c_mode_for_suffix (char suffix)
10224 {
10225 if (HPUX_LONG_DOUBLE_LIBRARY)
10226 {
10227 if (suffix == 'q')
10228 return TFmode;
10229 }
10230
10231 return VOIDmode;
10232 }
10233
10234 /* Target hook for function_section. */
10235
10236 static section *
10237 pa_function_section (tree decl, enum node_frequency freq,
10238 bool startup, bool exit)
10239 {
10240 /* Put functions in text section if target doesn't have named sections. */
10241 if (!targetm_common.have_named_sections)
10242 return text_section;
10243
10244 /* Force nested functions into the same section as the containing
10245 function. */
10246 if (decl
10247 && DECL_SECTION_NAME (decl) == NULL_TREE
10248 && DECL_CONTEXT (decl) != NULL_TREE
10249 && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
10250 && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL_TREE)
10251 return function_section (DECL_CONTEXT (decl));
10252
10253 /* Otherwise, use the default function section. */
10254 return default_function_section (decl, freq, startup, exit);
10255 }
10256
10257 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
10258
10259 In 64-bit mode, we reject CONST_DOUBLES. We also reject CONST_INTS
10260 that need more than three instructions to load prior to reload. This
10261 limit is somewhat arbitrary. It takes three instructions to load a
10262 CONST_INT from memory but two are memory accesses. It may be better
10263 to increase the allowed range for CONST_INTS. We may also be able
10264 to handle CONST_DOUBLES. */
10265
10266 static bool
10267 pa_legitimate_constant_p (enum machine_mode mode, rtx x)
10268 {
10269 if (GET_MODE_CLASS (mode) == MODE_FLOAT && x != CONST0_RTX (mode))
10270 return false;
10271
10272 if (!NEW_HP_ASSEMBLER && !TARGET_GAS && GET_CODE (x) == LABEL_REF)
10273 return false;
10274
10275 /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
10276 legitimate constants. */
10277 if (PA_SYMBOL_REF_TLS_P (x))
10278 {
10279 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
10280
10281 if (model == TLS_MODEL_GLOBAL_DYNAMIC || model == TLS_MODEL_LOCAL_DYNAMIC)
10282 return false;
10283 }
10284
10285 if (TARGET_64BIT && GET_CODE (x) == CONST_DOUBLE)
10286 return false;
10287
10288 if (TARGET_64BIT
10289 && HOST_BITS_PER_WIDE_INT > 32
10290 && GET_CODE (x) == CONST_INT
10291 && !reload_in_progress
10292 && !reload_completed
10293 && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x))
10294 && !pa_cint_ok_for_move (INTVAL (x)))
10295 return false;
10296
10297 if (function_label_operand (x, mode))
10298 return false;
10299
10300 return true;
10301 }
10302
10303 #include "gt-pa.h"